SOLR-1132: Added support for poly fields

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@893746 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Grant Ingersoll 2009-12-24 13:03:22 +00:00
parent 9a5a4ed7b4
commit 05237cf80f
36 changed files with 1817 additions and 341 deletions

View File

@ -74,6 +74,9 @@ New Features
* SOLR-1653: Add PatternReplaceCharFilter (koji) * SOLR-1653: Add PatternReplaceCharFilter (koji)
* SOLR-1131: FieldTypes can now output multiple Fields per Type and still be searched. This can be handy for hiding the details of a particular
implementation such as in the spatial case. (Chris Mattmann, shalin, noble, gsingers, yonik)
Optimizations Optimizations
---------------------- ----------------------

View File

@ -28,6 +28,7 @@
<field name="popularity">6</field> <field name="popularity">6</field>
<field name="inStock">true</field> <field name="inStock">true</field>
<field name="manufacturedate_dt">2006-02-13T15:26:37Z</field> <field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
<field name="store">45.17614,-93.87341</field>
</doc> </doc>
<doc> <doc>
@ -42,6 +43,8 @@
<field name="price">350</field> <field name="price">350</field>
<field name="popularity">6</field> <field name="popularity">6</field>
<field name="inStock">true</field> <field name="inStock">true</field>
<!-- Buffalo store -->
<field name="store">45.17614,-93.87341</field>
<field name="manufacturedate_dt">2006-02-13T15:26:37Z</field> <field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
</doc> </doc>
</add> </add>

View File

@ -28,6 +28,8 @@
<field name="price">19.95</field> <field name="price">19.95</field>
<field name="popularity">1</field> <field name="popularity">1</field>
<field name="inStock">false</field> <field name="inStock">false</field>
<!-- Buffalo store -->
<field name="store">45.17614,-93.87341</field>
<field name="manufacturedate_dt">2005-08-01T16:30:25Z</field> <field name="manufacturedate_dt">2005-08-01T16:30:25Z</field>
</doc> </doc>
@ -42,6 +44,8 @@
<field name="price">11.50</field> <field name="price">11.50</field>
<field name="popularity">1</field> <field name="popularity">1</field>
<field name="inStock">false</field> <field name="inStock">false</field>
<!-- San Francisco store -->
<field name="store">37.7752,-122.4232</field>
<field name="manufacturedate_dt">2006-02-14T23:55:59Z</field> <field name="manufacturedate_dt">2006-02-14T23:55:59Z</field>
</doc> </doc>

View File

@ -32,5 +32,7 @@
<field name="price">399.00</field> <field name="price">399.00</field>
<field name="popularity">10</field> <field name="popularity">10</field>
<field name="inStock">true</field> <field name="inStock">true</field>
<!-- San Francisco store -->
<field name="store">37.7752,-122.4232</field>
<field name="manufacturedate_dt">2005-10-12T08:00:00Z</field> <field name="manufacturedate_dt">2005-10-12T08:00:00Z</field>
</doc></add> </doc></add>

View File

@ -26,6 +26,8 @@
<field name="price">185</field> <field name="price">185</field>
<field name="popularity">5</field> <field name="popularity">5</field>
<field name="inStock">true</field> <field name="inStock">true</field>
<!-- San Francisco store -->
<field name="store">37.7752,-122.4232</field>
<field name="manufacturedate_dt">2006-02-13T15:26:37Z</field> <field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
</doc> </doc>
@ -38,6 +40,8 @@
<field name="price">74.99</field> <field name="price">74.99</field>
<field name="popularity">7</field> <field name="popularity">7</field>
<field name="inStock">true</field> <field name="inStock">true</field>
<!-- San Francisco store -->
<field name="store">37.7752,-122.4232</field>
<field name="manufacturedate_dt">2006-02-13T15:26:37Z</field> <field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
</doc> </doc>
@ -51,6 +55,8 @@
<!-- note: price & popularity is missing on this one --> <!-- note: price & popularity is missing on this one -->
<field name="popularity">0</field> <field name="popularity">0</field>
<field name="inStock">true</field> <field name="inStock">true</field>
<!-- Buffalo store -->
<field name="store">45.17614,-93.87341</field>
<field name="manufacturedate_dt">2006-02-13T15:26:37Z</field> <field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
</doc> </doc>

View File

@ -27,5 +27,7 @@
<field name="price">2199</field> <field name="price">2199</field>
<field name="popularity">6</field> <field name="popularity">6</field>
<field name="inStock">true</field> <field name="inStock">true</field>
<!-- Buffalo store -->
<field name="store">45.17614,-93.87341</field>
</doc></add> </doc></add>

View File

@ -26,5 +26,7 @@
<field name="price">279.95</field> <field name="price">279.95</field>
<field name="popularity">6</field> <field name="popularity">6</field>
<field name="inStock">true</field> <field name="inStock">true</field>
<!-- Buffalo store -->
<field name="store">45.17614,-93.87341</field>
</doc></add> </doc></add>

View File

@ -35,5 +35,7 @@
<field name="price">179.99</field> <field name="price">179.99</field>
<field name="popularity">6</field> <field name="popularity">6</field>
<field name="inStock">true</field> <field name="inStock">true</field>
<!-- Buffalo store -->
<field name="store">45.17614,-93.87341</field>
</doc></add> </doc></add>

View File

@ -31,4 +31,6 @@
<field name="popularity">7</field> <field name="popularity">7</field>
<field name="inStock">true</field> <field name="inStock">true</field>
<field name="manufacturedate_dt">2006-02-13T15:26:37Z</field> <field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
<!-- Buffalo store -->
<field name="store">45.17614,-93.87341</field>
</doc></add> </doc></add>

View File

@ -30,6 +30,7 @@
<field name="weight">16</field> <field name="weight">16</field>
<field name="price">479.95</field> <field name="price">479.95</field>
<field name="popularity">7</field> <field name="popularity">7</field>
<field name="store">40.7143,-74.006</field>
<field name="inStock">false</field> <field name="inStock">false</field>
<field name="manufacturedate_dt">2006-02-13T15:26:37Z/DAY</field> <field name="manufacturedate_dt">2006-02-13T15:26:37Z/DAY</field>
</doc> </doc>
@ -50,5 +51,7 @@
<field name="popularity">7</field> <field name="popularity">7</field>
<field name="inStock">false</field> <field name="inStock">false</field>
<field name="manufacturedate_dt">2006-02-13T15:26:37Z/DAY</field> <field name="manufacturedate_dt">2006-02-13T15:26:37Z/DAY</field>
<!-- NYC store -->
<field name="store">40.7143,-74.006</field>
</doc> </doc>
</add> </add>

View File

@ -396,6 +396,11 @@
any data added to them will be ignored outright. --> any data added to them will be ignored outright. -->
<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" /> <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
<!--
A PointType is a Poly Field. It can either declare a subFieldType or a subFieldSuffix
-->
<fieldType name="location" class="solr.PointType" dimension="2" subFieldType="double"/>
</types> </types>
@ -440,6 +445,8 @@
<field name="popularity" type="int" indexed="true" stored="true" /> <field name="popularity" type="int" indexed="true" stored="true" />
<field name="inStock" type="boolean" indexed="true" stored="true" /> <field name="inStock" type="boolean" indexed="true" stored="true" />
<field name="store" type="location" indexed="true" stored="true"/>
<!-- Common metadata fields, named specifically to match up with <!-- Common metadata fields, named specifically to match up with
SolrCell metadata when parsing rich documents such as Word, PDF. SolrCell metadata when parsing rich documents such as Word, PDF.

View File

@ -38,7 +38,7 @@ public class SolrException extends RuntimeException {
SERVER_ERROR( 500 ), SERVER_ERROR( 500 ),
SERVICE_UNAVAILABLE( 503 ), SERVICE_UNAVAILABLE( 503 ),
UNKNOWN(0); UNKNOWN(0);
final int code; public final int code;
private ErrorCode( int c ) private ErrorCode( int c )
{ {

View File

@ -0,0 +1,138 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.index.Term;
import org.apache.solr.search.QParser;
import org.apache.solr.search.function.ValueSource;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.MapSolrParams;
import java.util.Map;
import java.util.List;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
/**
* A CoordinateFieldType is the base class for {@link org.apache.solr.schema.FieldType}s that have semantics
* related to items in a coordinate system.
* <br/>
* Implementations depend on a delegating work to a sub {@link org.apache.solr.schema.FieldType}, specified by
* either the {@link #SUB_FIELD_SUFFIX} or the {@link #SUB_FIELD_TYPE} (the latter is used if both are defined.
* <br/>
* Example:
* <pre>&lt;fieldType name="xy" class="solr.PointType" dimension="2" subFieldType="double"/&gt;
* </pre>
* In theory, classes deriving from this should be able to do things like represent a point, a polygon, a line, etc.
* <br/>
* NOTE: There can only be one sub Field Type.
*
*/
public abstract class CoordinateFieldType extends FieldType implements SchemaAware {
/**
* The dimension of the coordinate system
*/
protected int dimension;
protected FieldType subType;
public static final String SUB_FIELD_SUFFIX = "subFieldSuffix";
public static final String SUB_FIELD_TYPE = "subFieldType";
private String suffix;//need to keep this around between init and inform, since dynamic fields aren't created until before inform
protected int dynFieldProps;
public int getDimension() {
return dimension;
}
public FieldType getSubType() {
return subType;
}
@Override
protected void init(IndexSchema schema, Map<String, String> args) {
//it's not a first class citizen for the IndexSchema
SolrParams p = new MapSolrParams(args);
String subFT = p.get(SUB_FIELD_TYPE);
String subSuffix = p.get(SUB_FIELD_SUFFIX);
if (subFT != null) {
args.remove(SUB_FIELD_TYPE);
subType = schema.getFieldTypeByName(subFT.trim());
} else if (subSuffix != null) {
args.remove(SUB_FIELD_SUFFIX);
suffix = subSuffix;
}else {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "The field type: " + typeName
+ " must specify the " +
SUB_FIELD_TYPE + " attribute or the " + SUB_FIELD_SUFFIX + " attribute.");
}
super.init(schema, args);
}
public void inform(IndexSchema schema) {
//Can't do this until here b/c the Dynamic Fields are not initialized until here.
if (suffix != null){
SchemaField sf = schema.getField(suffix);
subType = sf.getType();//this means it is already registered
dynFieldProps = sf.getProperties();
}
else if (subType != null) {
SchemaField proto = registerPolyFieldDynamicPrototype(schema, subType);
dynFieldProps = proto.getProperties();
} else {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "The field type: " + typeName
+ " must specify the " +
SUB_FIELD_TYPE + " attribute or the " + SUB_FIELD_SUFFIX + " attribute.");
}
}
/**
* Helper method for creating a dynamic field SchemaField prototype. Returns a {@link org.apache.solr.schema.SchemaField} with
* the {@link org.apache.solr.schema.FieldType} given and a name of "*" + {@link org.apache.solr.schema.FieldType#POLY_FIELD_SEPARATOR} + {@link org.apache.solr.schema.FieldType#typeName}
* and props of indexed=true, stored=false.
* @param schema the IndexSchema
* @param type The {@link org.apache.solr.schema.FieldType} of the prototype.
* @return The {@link org.apache.solr.schema.SchemaField}
*/
static SchemaField registerPolyFieldDynamicPrototype(IndexSchema schema, FieldType type){
String name = "*" + FieldType.POLY_FIELD_SEPARATOR + type.typeName;
Map<String, String> props = new HashMap<String, String>();
//Just set these, delegate everything else to the field type
props.put("indexed", "true");
props.put("stored", "false");
int p = SchemaField.calcProps(name, type, props);
SchemaField proto = SchemaField.create(name,
type, p, null);
schema.registerDynamicField(proto);
return proto;
}
/**
* Throws UnsupportedOperationException()
*/
public Query getFieldQuery(QParser parser, SchemaField field, String externalVal) {
throw new UnsupportedOperationException();
}
}

View File

@ -20,14 +20,14 @@ package org.apache.solr.schema;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.Fieldable;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.index.Term;
import org.apache.solr.search.function.ValueSource; import org.apache.solr.search.function.ValueSource;
import org.apache.solr.search.function.OrdFieldSource; import org.apache.solr.search.function.OrdFieldSource;
import org.apache.solr.search.Sorting; import org.apache.solr.search.Sorting;
@ -36,11 +36,17 @@ import org.apache.solr.request.XMLWriter;
import org.apache.solr.request.TextResponseWriter; import org.apache.solr.request.TextResponseWriter;
import org.apache.solr.analysis.SolrAnalyzer; import org.apache.solr.analysis.SolrAnalyzer;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.MapSolrParams;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.util.Map; import java.util.Map;
import java.util.HashMap; import java.util.HashMap;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.ArrayList;
import java.io.Reader; import java.io.Reader;
import java.io.IOException; import java.io.IOException;
@ -52,6 +58,14 @@ import java.io.IOException;
public abstract class FieldType extends FieldProperties { public abstract class FieldType extends FieldProperties {
public static final Logger log = LoggerFactory.getLogger(FieldType.class); public static final Logger log = LoggerFactory.getLogger(FieldType.class);
/**
* The default poly field separator.
*
* @see #createFields(SchemaField, String, float)
* @see #isPolyField()
*/
public static final String POLY_FIELD_SEPARATOR = "___";
/** The name of the type (not the name of the field) */ /** The name of the type (not the name of the field) */
protected String typeName; protected String typeName;
/** additional arguments specified in the field type declaration */ /** additional arguments specified in the field type declaration */
@ -62,6 +76,7 @@ public abstract class FieldType extends FieldProperties {
protected int falseProperties; protected int falseProperties;
int properties; int properties;
/** Returns true if fields of this type should be tokenized */ /** Returns true if fields of this type should be tokenized */
public boolean isTokenized() { public boolean isTokenized() {
return (properties & TOKENIZED) != 0; return (properties & TOKENIZED) != 0;
@ -72,6 +87,18 @@ public abstract class FieldType extends FieldProperties {
return (properties & MULTIVALUED) != 0; return (properties & MULTIVALUED) != 0;
} }
/**
* A "polyField" is a FieldType that can produce more than one Field per FieldType, via the {@link #createFields(org.apache.solr.schema.SchemaField, String, float)} method. This is useful
* when hiding the implementation details of a field from the Solr end user. For instance, a spatial point may be represented by three different field types, all of which may produce 1 or more
* fields.
* @return true if the {@link #createFields(org.apache.solr.schema.SchemaField, String, float)} method may return more than one field
*/
public boolean isPolyField(){
return false;
}
/** Returns true if a single field value of this type has multiple logical values /** Returns true if a single field value of this type has multiple logical values
* for the purposes of faceting, sorting, etc. Text fields normally return * for the purposes of faceting, sorting, etc. Text fields normally return
* true since each token/word is a logical value. * true since each token/word is a logical value.
@ -85,7 +112,8 @@ public abstract class FieldType extends FieldProperties {
* Common boolean properties have already been handled. * Common boolean properties have already been handled.
* *
*/ */
protected void init(IndexSchema schema, Map<String,String> args) { protected void init(IndexSchema schema, Map<String, String> args) {
} }
protected String getArg(String n, Map<String,String> args) { protected String getArg(String n, Map<String,String> args) {
@ -191,8 +219,15 @@ public abstract class FieldType extends FieldProperties {
* :TODO: clean up and clarify this explanation. * :TODO: clean up and clarify this explanation.
* *
* @see #toInternal * @see #toInternal
*
*
*/ */
public Field createField(SchemaField field, String externalVal, float boost) { public Field createField(SchemaField field, String externalVal, float boost) {
if (!field.indexed() && !field.stored()) {
if (log.isTraceEnabled())
log.trace("Ignoring unindexed/unstored field: " + field);
return null;
}
String val; String val;
try { try {
val = toInternal(externalVal); val = toInternal(externalVal);
@ -200,23 +235,123 @@ public abstract class FieldType extends FieldProperties {
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "Error while creating field '" + field + "' from value '" + externalVal + "'", e, false); throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "Error while creating field '" + field + "' from value '" + externalVal + "'", e, false);
} }
if (val==null) return null; if (val==null) return null;
if (!field.indexed() && !field.stored()) {
if (log.isTraceEnabled()) return createField(field.getName(), val, getFieldStore(field, val),
log.trace("Ignoring unindexed/unstored field: " + field); getFieldIndex(field, val), getFieldTermVec(field, val), field.omitNorms(),
return null; field.omitTf(), boost);
}
/**
* Create multiple fields from a single field and multiple values. Fields are named as SchemaField.getName() + {@link #POLY_FIELD_SEPARATOR} + i, where
* i starts at 0.
* <p/>
* If the field is stored, then an extra field gets created that contains the storageVal. It is this field that also
*
* @param field The {@link org.apache.solr.schema.SchemaField}
* @param props The properties to use
* @param delegatedType An optional type to use. If null, then field.getType() is used. Useful for poly fields.
* @param storageVal If the field stores, then this value will be used for the stored field
* @param boost The boost to apply to all fields
* @param externalVals The values to use
* @return The fields
*/
protected Fieldable[] createFields(SchemaField field, int props,
FieldType delegatedType, String storageVal,
float boost, String ... externalVals) {
int n = field.indexed() ? externalVals.length : 0;
n += field.stored() ? 1 : 0;
if (delegatedType == null) { //if the type isn't being overriden, then just use the base one
delegatedType = field.getType();
} }
Field[] results = new Field[n];
//Field.Store.NO,Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO, true, true
if (externalVals.length > 0) {
if (field.indexed()) {
String name = field.getName() + "_";
String suffix = POLY_FIELD_SEPARATOR + delegatedType.typeName;
Field f = new Field(field.getName(), int len = name.length();
StringBuilder bldr = new StringBuilder(len + 3 + suffix.length());//should be enough buffer to handle most values of j.
bldr.append(name);
for (int j = 0; j < externalVals.length; j++) {
//SchemaField is final, as is name, so we need to recreate each time
//put the counter before the separator, b/c dynamic fields can't be asterisks on both the front and the end of the String
bldr.append(j).append(suffix);
SchemaField sf = SchemaField.create(bldr.toString(),
delegatedType, props, null);
//schema.getDynamicField(name + "_" + j + POLY_FIELD_SEPARATOR + delegatedType.typeName);
/**/
//new SchemaField(name, ft, p, defaultValue )
//QUESTION: should we allow for vectors, etc? Not sure that it makes sense
results[j] = delegatedType.createField(sf, externalVals[j], boost);
bldr.setLength(len);//cut the builder back to just the length of the prefix, but keep the capacity
}
}
Field.TermVector fieldTermVec = getFieldTermVec(field, storageVal);
if (field.stored() || fieldTermVec.equals(Field.TermVector.YES)
|| fieldTermVec.equals(Field.TermVector.WITH_OFFSETS)
|| fieldTermVec.equals(Field.TermVector.WITH_POSITIONS)
|| fieldTermVec.equals(Field.TermVector.WITH_POSITIONS_OFFSETS)
) {
//QUESTION: should we allow for vectors, etc? Not sure that it makes sense
results[results.length - 1] = createField(field.getName(), storageVal, getFieldStore(field, storageVal),
Field.Index.NO,
fieldTermVec, field.omitNorms(), field.omitTf(), boost);
}
}
return results;
}
/**
* Create the field from native Lucene parts. Mostly intended for use by FieldTypes outputing multiple
* Fields per SchemaField
* @param name The name of the field
* @param val The _internal_ value to index
* @param storage {@link org.apache.lucene.document.Field.Store}
* @param index {@link org.apache.lucene.document.Field.Index}
* @param vec {@link org.apache.lucene.document.Field.TermVector}
* @param omitNorms true if norms should be omitted
* @param omitTFPos true if term freq and position should be omitted.
* @param boost The boost value
* @return the {@link org.apache.lucene.document.Field}.
*/
protected Field createField(String name, String val, Field.Store storage, Field.Index index,
Field.TermVector vec, boolean omitNorms, boolean omitTFPos, float boost){
Field f = new Field(name,
val, val,
getFieldStore(field, val), storage,
getFieldIndex(field, val), index,
getFieldTermVec(field, val)); vec);
f.setOmitNorms(field.omitNorms()); f.setOmitNorms(omitNorms);
f.setOmitTermFreqAndPositions(field.omitTf()); f.setOmitTermFreqAndPositions(omitTFPos);
f.setBoost(boost); f.setBoost(boost);
return f; return f;
} }
/**
* Given a {@link org.apache.solr.schema.SchemaField}, create one or more {@link org.apache.lucene.document.Field} instances
* @param field the {@link org.apache.solr.schema.SchemaField}
* @param externalVal The value to add to the field
* @param boost The boost to apply
* @return The {@link org.apache.lucene.document.Field} instances
*
* @see #createField(SchemaField, String, float)
* @see #isPolyField()
*/
public Fieldable[] createFields(SchemaField field, String externalVal, float boost) {
Field f = createField( field, externalVal, boost);
if( f != null ) {
return new Field[] { f };
}
return null;
}
/* Helpers for field construction */ /* Helpers for field construction */
protected Field.TermVector getFieldTermVec(SchemaField field, protected Field.TermVector getFieldTermVec(SchemaField field,
String internalVal) { String internalVal) {
@ -452,4 +587,36 @@ public abstract class FieldType extends FieldProperties {
minInclusive, maxInclusive); minInclusive, maxInclusive);
} }
/**
* Returns a Query instance for doing searches against a field.
* @param parser The {@link org.apache.solr.search.QParser} calling the method
* @param field The {@link org.apache.solr.schema.SchemaField} of the field to search
* @param externalVal The String representation of the value to search
* @return The {@link org.apache.lucene.search.Query} instance. This implementation returns a {@link org.apache.lucene.search.TermQuery} but overriding queries may not
*
*/
public Query getFieldQuery(QParser parser, SchemaField field, String externalVal) {
return new TermQuery(new Term(field.getName(), toInternal(externalVal)));
}
/**
* Return a collection of all the Fields in the index where the {@link org.apache.solr.schema.SchemaField}
* @param polyField The instance of the {@link org.apache.solr.schema.SchemaField} to find the actual field names from
* @return The {@link java.util.Collection} of names of the actual fields that are a poly field.
*
*
*/
/*protected Collection<String> getPolyFieldNames(SchemaField polyField){
if (polyField.isPolyField()) {
if (polyField != null) {
//we need the names of all the fields. Do this lazily and then cache?
}
} //TODO: Should we throw an exception here in an else clause?
return Collections.emptyList();
}*/
} }

View File

@ -65,6 +65,30 @@ public final class IndexSchema {
private float version; private float version;
private final SolrResourceLoader loader; private final SolrResourceLoader loader;
private final HashMap<String, SchemaField> fields = new HashMap<String,SchemaField>();
private final HashMap<String, FieldType> fieldTypes = new HashMap<String,FieldType>();
private final List<SchemaField> fieldsWithDefaultValue = new ArrayList<SchemaField>();
private final Collection<SchemaField> requiredFields = new HashSet<SchemaField>();
private DynamicField[] dynamicFields;
private Analyzer analyzer;
private Analyzer queryAnalyzer;
private String defaultSearchFieldName=null;
private String queryParserDefaultOperator = "OR";
private final Map<String, List<CopyField>> copyFieldsMap = new HashMap<String, List<CopyField>>();
private DynamicCopy[] dynamicCopyFields;
/**
* keys are all fields copied to, count is num of copyField
* directives that target them.
*/
private Map<SchemaField, Integer> copyFieldTargetCounts
= new HashMap<SchemaField, Integer>();
/** /**
* Constructs a schema using the specified file name using the normal * Constructs a schema using the specified file name using the normal
* Config path directory searching rules. * Config path directory searching rules.
@ -156,10 +180,7 @@ public final class IndexSchema {
@Deprecated @Deprecated
public String getName() { return name; } public String getName() { return name; }
private final HashMap<String, SchemaField> fields = new HashMap<String,SchemaField>(); ;
private final HashMap<String, FieldType> fieldTypes = new HashMap<String,FieldType>();
private final List<SchemaField> fieldsWithDefaultValue = new ArrayList<SchemaField>();
private final Collection<SchemaField> requiredFields = new HashSet<SchemaField>();
/** /**
* Provides direct access to the Map containing all explicit * Provides direct access to the Map containing all explicit
@ -218,7 +239,7 @@ public final class IndexSchema {
*/ */
public SimilarityFactory getSimilarityFactory() { return similarityFactory; } public SimilarityFactory getSimilarityFactory() { return similarityFactory; }
private Analyzer analyzer;
/** /**
* Returns the Analyzer used when indexing documents for this index * Returns the Analyzer used when indexing documents for this index
@ -230,7 +251,7 @@ public final class IndexSchema {
*/ */
public Analyzer getAnalyzer() { return analyzer; } public Analyzer getAnalyzer() { return analyzer; }
private Analyzer queryAnalyzer;
/** /**
* Returns the Analyzer used when searching this index * Returns the Analyzer used when searching this index
@ -242,8 +263,7 @@ public final class IndexSchema {
*/ */
public Analyzer getQueryAnalyzer() { return queryAnalyzer; } public Analyzer getQueryAnalyzer() { return queryAnalyzer; }
private String defaultSearchFieldName=null;
private String queryParserDefaultOperator = "OR";
/** /**
* A SolrQueryParser linked to this IndexSchema for field datatype * A SolrQueryParser linked to this IndexSchema for field datatype
@ -399,7 +419,7 @@ public final class IndexSchema {
Config schemaConf = new Config(loader, "schema", is, "/schema/"); Config schemaConf = new Config(loader, "schema", is, "/schema/");
Document document = schemaConf.getDocument(); Document document = schemaConf.getDocument();
final XPath xpath = schemaConf.getXPath(); final XPath xpath = schemaConf.getXPath();
final List<SchemaAware> schemaAware = new ArrayList<SchemaAware>();
Node nd = (Node) xpath.evaluate("/schema/@name", document, XPathConstants.NODE); Node nd = (Node) xpath.evaluate("/schema/@name", document, XPathConstants.NODE);
if (nd==null) { if (nd==null) {
log.warn("schema has no name!"); log.warn("schema has no name!");
@ -434,6 +454,9 @@ public final class IndexSchema {
ft.setAnalyzer(analyzer); ft.setAnalyzer(analyzer);
ft.setQueryAnalyzer(queryAnalyzer); ft.setQueryAnalyzer(queryAnalyzer);
} }
if (ft instanceof SchemaAware){
schemaAware.add((SchemaAware) ft);
}
return ft; return ft;
} }
@ -494,7 +517,6 @@ public final class IndexSchema {
SolrException.logOnce(log,null,t); SolrException.logOnce(log,null,t);
SolrConfig.severeErrors.add( t ); SolrConfig.severeErrors.add( t );
} }
log.debug("field defined: " + f); log.debug("field defined: " + f);
if( f.getDefaultValue() != null ) { if( f.getDefaultValue() != null ) {
log.debug(name+" contains default value: " + f.getDefaultValue()); log.debug(name+" contains default value: " + f.getDefaultValue());
@ -506,23 +528,7 @@ public final class IndexSchema {
} }
} else if (node.getNodeName().equals("dynamicField")) { } else if (node.getNodeName().equals("dynamicField")) {
// make sure nothing else has the same path // make sure nothing else has the same path
boolean dup = false; addDynamicField(dFields, f);
for( DynamicField df : dFields ) {
if( df.regex.equals( f.name ) ) {
String msg = "[schema.xml] Duplicate DynamicField definition for '"
+ f.getName() + "' ignoring: "+f.toString();
Throwable t = new SolrException( SolrException.ErrorCode.SERVER_ERROR, msg );
SolrException.logOnce(log,null,t);
SolrConfig.severeErrors.add( t );
dup = true;
break;
}
}
if( !dup ) {
dFields.add(new DynamicField(f));
log.debug("dynamic field defined: " + f);
}
} else { } else {
// we should never get here // we should never get here
throw new RuntimeException("Unknown field type"); throw new RuntimeException("Unknown field type");
@ -534,6 +540,7 @@ public final class IndexSchema {
// in DocumentBuilder.getDoc() // in DocumentBuilder.getDoc()
requiredFields.addAll(getFieldsWithDefaultValue()); requiredFields.addAll(getFieldsWithDefaultValue());
// OK, now sort the dynamic fields largest to smallest size so we don't get // OK, now sort the dynamic fields largest to smallest size so we don't get
// any false matches. We want to act like a compiler tool and try and match // any false matches. We want to act like a compiler tool and try and match
// the largest string possible. // the largest string possible.
@ -568,6 +575,9 @@ public final class IndexSchema {
} }
}; };
} }
if (similarityFactory instanceof SchemaAware){
schemaAware.add((SchemaAware) similarityFactory);
}
log.debug("using similarity factory" + similarityFactory.getClass().getName()); log.debug("using similarity factory" + similarityFactory.getClass().getName());
} }
@ -652,7 +662,10 @@ public final class IndexSchema {
entry.getValue()+")"); entry.getValue()+")");
} }
} }
//Run the callbacks on SchemaAware now that everything else is done
for (SchemaAware aware : schemaAware) {
aware.inform(this);
}
} catch (SolrException e) { } catch (SolrException e) {
SolrConfig.severeErrors.add( e ); SolrConfig.severeErrors.add( e );
throw e; throw e;
@ -664,6 +677,56 @@ public final class IndexSchema {
// create the field analyzers // create the field analyzers
refreshAnalyzers(); refreshAnalyzers();
}
private void addDynamicField(List<DynamicField> dFields, SchemaField f) {
boolean dup = isDuplicateDynField(dFields, f);
if( !dup ) {
addDynamicFieldNoDupCheck(dFields, f);
} else {
String msg = "[schema.xml] Duplicate DynamicField definition for '"
+ f.getName() + "' ignoring: " + f.toString();
Throwable t = new SolrException(SolrException.ErrorCode.SERVER_ERROR, msg);
SolrException.logOnce(log, null, t);
SolrConfig.severeErrors.add(t);
}
}
/**
* Register one or more new Dynamic Field with the Schema.
* @param f The {@link org.apache.solr.schema.SchemaField}
*/
public void registerDynamicField(SchemaField ... f) {
List<DynamicField> dynFields = new ArrayList<DynamicField>(Arrays.asList(dynamicFields));
for (SchemaField field : f) {
if (isDuplicateDynField(dynFields, field) == false) {
log.debug("dynamic field creation for schema field: " + field.getName());
addDynamicFieldNoDupCheck(dynFields, field);
} else {
log.debug("dynamic field creation avoided: dynamic field: [" + field.getName() + "] " +
"already defined in the schema!");
}
}
Collections.sort(dynFields);
dynamicFields = (DynamicField[]) dynFields.toArray(new DynamicField[dynFields.size()]);
}
private void addDynamicFieldNoDupCheck(List<DynamicField> dFields, SchemaField f) {
dFields.add(new DynamicField(f));
log.debug("dynamic field defined: " + f);
}
private boolean isDuplicateDynField(List<DynamicField> dFields, SchemaField f) {
boolean dup = false;
for( DynamicField df : dFields ) {
if( df.regex.equals( f.name ) ) {
dup = true;
break;
}
}
return dup;
} }
public void registerCopyField( String source, String dest ) public void registerCopyField( String source, String dest )
@ -987,7 +1050,7 @@ public final class IndexSchema {
} }
} }
private DynamicField[] dynamicFields;
public SchemaField[] getDynamicFieldPrototypes() { public SchemaField[] getDynamicFieldPrototypes() {
SchemaField[] df = new SchemaField[dynamicFields.length]; SchemaField[] df = new SchemaField[dynamicFields.length];
for (int i=0;i<dynamicFields.length;i++) { for (int i=0;i<dynamicFields.length;i++) {
@ -1044,12 +1107,14 @@ public final class IndexSchema {
* Returns the SchemaField that should be used for the specified field name, or * Returns the SchemaField that should be used for the specified field name, or
* null if none exists. * null if none exists.
* *
* @param fieldName may be an explicitly defined field, or a name that * @param fieldName may be an explicitly defined field, a PolyField, or a name that
* matches a dynamic field. * matches a dynamic field.
* @see #getFieldType * @see #getFieldType
* @see #getField(String)
* @return The {@link org.apache.solr.schema.SchemaField}
*/ */
public SchemaField getFieldOrNull(String fieldName) { public SchemaField getFieldOrNull(String fieldName) {
SchemaField f = fields.get(fieldName); SchemaField f = fields.get(fieldName);
if (f != null) return f; if (f != null) return f;
for (DynamicField df : dynamicFields) { for (DynamicField df : dynamicFields) {
@ -1062,18 +1127,17 @@ public final class IndexSchema {
/** /**
* Returns the SchemaField that should be used for the specified field name * Returns the SchemaField that should be used for the specified field name
* *
* @param fieldName may be an explicitly defined field, or a name that * @param fieldName may be an explicitly defined field, a PolyField type, or a name that
* matches a dynamic field. * matches a dynamic field.
* @throws SolrException if no such field exists * @throws SolrException if no such field exists
* @see #getFieldType * @see #getFieldType
* @see #getFieldOrNull(String)
* @return The {@link SchemaField}
*/ */
public SchemaField getField(String fieldName) { public SchemaField getField(String fieldName) {
SchemaField f = fields.get(fieldName); SchemaField f = getFieldOrNull(fieldName);
if (f != null) return f; if (f != null) return f;
for (DynamicField df : dynamicFields) {
if (df.matches(fieldName)) return df.makeSchemaField(fieldName);
}
// Hmmm, default field could also be implemented with a dynamic field of "*". // Hmmm, default field could also be implemented with a dynamic field of "*".
// It would have to be special-cased and only used if nothing else matched. // It would have to be special-cased and only used if nothing else matched.
@ -1104,6 +1168,16 @@ public final class IndexSchema {
return getDynamicFieldType(fieldName); return getDynamicFieldType(fieldName);
} }
/**
* Given the name of a {@link org.apache.solr.schema.FieldType} (not to be confused with {@link #getFieldType(String)} which
* takes in the name of a field), return the {@link org.apache.solr.schema.FieldType}.
* @param fieldTypeName The name of the {@link org.apache.solr.schema.FieldType}
* @return The {@link org.apache.solr.schema.FieldType} or null.
*/
public FieldType getFieldTypeByName(String fieldTypeName){
return fieldTypes.get(fieldTypeName);
}
/** /**
* Returns the FieldType for the specified field name. * Returns the FieldType for the specified field name.
* *
@ -1149,15 +1223,22 @@ public final class IndexSchema {
return null; return null;
}; };
private final Map<String, List<CopyField>> copyFieldsMap = new HashMap<String, List<CopyField>>();
private DynamicCopy[] dynamicCopyFields;
/** /**
* keys are all fields copied to, count is num of copyField *
* directives that target them. * @param fieldName The name of the field
* @return the {@link FieldType} or a {@link org.apache.solr.common.SolrException} if the field is not a poly field.
*/ */
private Map<SchemaField, Integer> copyFieldTargetCounts public FieldType getPolyFieldType(String fieldName){
= new HashMap<SchemaField, Integer>(); SchemaField f = fields.get(fieldName);
if (f != null && f.isPolyField()) return f.getType();
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"undefined field or not a poly field "+fieldName);
}
public FieldType getPolyFieldTypeNoEx(String fieldName){
SchemaField f = fields.get(fieldName);
if (f != null && f.isPolyField()) return f.getType();
return null;
}
/** /**
* Get all copy fields, both the static and the dynamic ones. * Get all copy fields, both the static and the dynamic ones.

View File

@ -0,0 +1,276 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.SortField;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.request.TextResponseWriter;
import org.apache.solr.request.XMLWriter;
import org.apache.solr.search.MultiValueSource;
import org.apache.solr.search.QParser;
import org.apache.solr.search.function.DocValues;
import org.apache.solr.search.function.ValueSource;
import org.apache.solr.search.function.distance.DistanceUtils;
import java.io.IOException;
import java.util.Map;
/**
* A point type that indexes a point in an n-dimensional space as separate fields and uses
* range queries for bounding box calculations.
* <p/>
* <p/>
* NOTE: There can only be one sub type
*/
public class PointType extends CoordinateFieldType {
/**
* 2 dimensional by default
*/
public static final int DEFAULT_DIMENSION = 2;
public static final String DIMENSION = "dimension";
protected IndexSchema schema; // needed for retrieving SchemaFields
@Override
protected void init(IndexSchema schema, Map<String, String> args) {
SolrParams p = new MapSolrParams(args);
dimension = p.getInt(DIMENSION, DEFAULT_DIMENSION);
if (dimension < 1) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"The dimension must be > 0: " + dimension);
}
args.remove(DIMENSION);
this.schema = schema;
super.init(schema, args);
}
@Override
public boolean isPolyField() {
return true; // really only true if the field is indexed
}
@Override
public Fieldable[] createFields(SchemaField field, String externalVal, float boost) {
String[] point = DistanceUtils.parsePoint(null, externalVal, dimension);
return createFields(field, dynFieldProps, subType, externalVal, boost, point);
}
@Override
public ValueSource getValueSource(SchemaField field, QParser parser) {
return new PointTypeValueSource(field, dimension, subType, parser);
}
//It never makes sense to create a single field, so make it impossible to happen
@Override
public Field createField(SchemaField field, String externalVal, float boost) {
throw new UnsupportedOperationException("PointType uses multiple fields. field=" + field.getName());
}
@Override
public void write(XMLWriter xmlWriter, String name, Fieldable f) throws IOException {
xmlWriter.writeStr(name, f.stringValue());
}
@Override
public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException {
writer.writeStr(name, f.stringValue(), false);
}
@Override
public SortField getSortField(SchemaField field, boolean top) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Sorting not suported on DualPointType " + field.getName());
}
@Override
/**
* Care should be taken in calling this with higher order dimensions for performance reasons.
*/
public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
//Query could look like: [x1,y1 TO x2,y2] for 2 dimension, but could look like: [x1,y1,z1 TO x2,y2,z2], and can be extrapolated to n-dimensions
//thus, this query essentially creates a box, cube, etc.
String[] p1 = DistanceUtils.parsePoint(null, part1, dimension);
String[] p2 = DistanceUtils.parsePoint(null, part2, dimension);
BooleanQuery result = new BooleanQuery(true);
String name = field.getName() + "_";
String suffix = POLY_FIELD_SEPARATOR + subType.typeName;
int len = name.length();
StringBuilder bldr = new StringBuilder(len + 3 + suffix.length());//should be enough buffer to handle most values of j.
bldr.append(name);
for (int i = 0; i < dimension; i++) {
bldr.append(i).append(suffix);
SchemaField subSF = schema.getField(bldr.toString());
// points must currently be ordered... should we support specifying any two opposite corner points?
/*new TermRangeQuery(
field.getName() + i + POLY_FIELD_SEPARATOR + subType.typeName,
subType.toInternal(p1[i]),
subType.toInternal(p2[i]),
minInclusive, maxInclusive);*/
result.add(subType.getRangeQuery(parser, subSF, p1[i], p2[i], minInclusive, maxInclusive), BooleanClause.Occur.MUST);
bldr.setLength(len);
}
return result;
}
@Override
public Query getFieldQuery(QParser parser, SchemaField field, String externalVal) {
Query result = null;
String[] p1 = DistanceUtils.parsePoint(null, externalVal, dimension);
//TODO: should we assert that p1.length == dimension?
BooleanQuery bq = new BooleanQuery(true);
String name = field.getName() + "_";
String suffix = POLY_FIELD_SEPARATOR + subType.typeName;
int len = name.length();
StringBuilder bldr = new StringBuilder(len + 3 + suffix.length());//should be enough buffer to handle most values of j.
bldr.append(name);
for (int i = 0; i < dimension; i++) {
bldr.append(i).append(suffix);
SchemaField sf1 = schema.getField(bldr.toString());
Query tq = subType.getFieldQuery(parser, sf1, p1[i]);
//new TermQuery(new Term(bldr.toString(), subType.toInternal(p1[i])));
bq.add(tq, BooleanClause.Occur.MUST);
bldr.setLength(len);
}
result = bq;
return result;
}
class PointTypeValueSource extends MultiValueSource {
protected SchemaField field;
protected FieldType subType;
protected int dimension;
private QParser parser;
public PointTypeValueSource(SchemaField field, int dimension, FieldType subType, QParser parser) {
this.field = field;
this.dimension = dimension;
this.subType = subType;
this.parser = parser;
}
@Override
public void createWeight(Map context, Searcher searcher) throws IOException {
String name = field.getName();
String suffix = FieldType.POLY_FIELD_SEPARATOR + subType.typeName;
int len = name.length();
StringBuilder bldr = new StringBuilder(len + 3 + suffix.length());//should be enough buffer to handle most values of j.
bldr.append(name);
for (int i = 0; i < dimension; i++) {
bldr.append(i).append(suffix);
SchemaField sf = schema.getField(bldr.toString());
subType.getValueSource(sf, parser).createWeight(context, searcher);
bldr.setLength(len);
}
}
public int dimension() {
return dimension;
}
@Override
public DocValues getValues(Map context, IndexReader reader) throws IOException {
final DocValues[] valsArr1 = new DocValues[dimension];
String name = field.getName();
String suffix = FieldType.POLY_FIELD_SEPARATOR + subType.typeName;
int len = name.length();
StringBuilder bldr = new StringBuilder(len + 3 + suffix.length());//should be enough buffer to handle most values of j.
bldr.append(name);
for (int i = 0; i < dimension; i++) {
bldr.append(i).append(suffix);
SchemaField sf = schema.getField(bldr.toString());
valsArr1[i] = subType.getValueSource(sf, parser).getValues(context, reader);
bldr.setLength(len);
}
return new DocValues() {
//TODO: not sure how to handle the other types at this moment
@Override
public void doubleVal(int doc, double[] vals) {
//TODO: check whether vals.length == dimension or assume its handled elsewhere?
for (int i = 0; i < dimension; i++) {
vals[i] = valsArr1[i].doubleVal(doc);
}
}
@Override
public String toString(int doc) {
StringBuilder sb = new StringBuilder("point(");
boolean firstTime = true;
for (DocValues docValues : valsArr1) {
if (firstTime == false) {
sb.append(",");
} else {
firstTime = true;
}
sb.append(docValues.toString(doc));
}
sb.append(")");
return sb.toString();
}
};
}
public String description() {
StringBuilder sb = new StringBuilder();
sb.append("point(");
sb.append("fld=").append(field.name).append(", subType=").append(subType.typeName)
.append(", dimension=").append(dimension).append(')');
return sb.toString();
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof PointTypeValueSource)) return false;
PointTypeValueSource that = (PointTypeValueSource) o;
if (dimension != that.dimension) return false;
if (!field.equals(that.field)) return false;
if (!subType.equals(that.subType)) return false;
return true;
}
@Override
public int hashCode() {
int result = field.hashCode();
result = 31 * result + subType.hashCode();
result = 31 * result + dimension;
return result;
}
}
}

View File

@ -0,0 +1,39 @@
package org.apache.solr.schema;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* An interface that can be extended to provide a callback mechanism for
* informing an {@link IndexSchema} instance of changes to it, dynamically
* performed at runtime.
*
* @since SOLR-1131
*
**/
public interface SchemaAware {
/**
* Informs the {@link IndexSchema} provided by the <code>schema</code>
* parameter of an event (e.g., a new {@link FieldType} was added, etc.
*
* @param schema
* The {@link IndexSchema} instance that inform of the update to.
*
* @since SOLR-1131
*/
public void inform(IndexSchema schema);
}

View File

@ -90,10 +90,24 @@ public final class SchemaField extends FieldProperties {
boolean isTokenized() { return (properties & TOKENIZED)!=0; } boolean isTokenized() { return (properties & TOKENIZED)!=0; }
boolean isBinary() { return (properties & BINARY)!=0; } boolean isBinary() { return (properties & BINARY)!=0; }
public Field createField(String val, float boost) { public Field createField(String val, float boost) {
return type.createField(this,val,boost); return type.createField(this,val,boost);
} }
public Fieldable[] createFields(String val, float boost) {
return type.createFields(this,val,boost);
}
/**
* If true, then use {@link #createFields(String, float)}, else use {@link #createField} to save an extra allocation
* @return true if this field is a poly field
*/
public boolean isPolyField(){
return type.isPolyField();
}
@Override @Override
public String toString() { public String toString() {
return name + "{type="+type.getTypeName() return name + "{type="+type.getTypeName()
@ -119,6 +133,29 @@ public final class SchemaField extends FieldProperties {
static SchemaField create(String name, FieldType ft, Map<String,String> props) { static SchemaField create(String name, FieldType ft, Map<String,String> props) {
String defaultValue = null;
if( props.containsKey( "default" ) ) {
defaultValue = (String)props.get( "default" );
}
return new SchemaField(name, ft, calcProps(name, ft, props), defaultValue );
}
/**
* Create a SchemaField w/ the props specified. Does not support a default value.
* @param name The name of the SchemaField
* @param ft The {@link org.apache.solr.schema.FieldType} of the field
* @param props The props. See {@link #calcProps(String, org.apache.solr.schema.FieldType, java.util.Map)}
* @param defValue The default Value for the field
* @return The SchemaField
*
* @see #create(String, FieldType, java.util.Map)
*/
static SchemaField create(String name, FieldType ft, int props, String defValue){
return new SchemaField(name, ft, props, defValue);
}
static int calcProps(String name, FieldType ft, Map<String, String> props) {
int trueProps = parseProperties(props,true); int trueProps = parseProperties(props,true);
int falseProps = parseProperties(props,false); int falseProps = parseProperties(props,false);
@ -166,12 +203,7 @@ public final class SchemaField extends FieldProperties {
p &= ~falseProps; p &= ~falseProps;
p |= trueProps; p |= trueProps;
return p;
String defaultValue = null;
if( props.containsKey( "default" ) ) {
defaultValue = (String)props.get( "default" );
}
return new SchemaField(name, ft, p, defaultValue );
} }
public String getDefaultValue() { public String getDefaultValue() {

View File

@ -18,12 +18,28 @@
package org.apache.solr.schema; package org.apache.solr.schema;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.Term;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Analyzer;
import org.apache.solr.request.XMLWriter; import org.apache.solr.request.XMLWriter;
import org.apache.solr.request.TextResponseWriter; import org.apache.solr.request.TextResponseWriter;
import org.apache.solr.search.QParser;
import java.util.Map; import java.util.Map;
import java.util.List;
import java.util.ArrayList;
import java.io.IOException; import java.io.IOException;
import java.io.StringReader;
/** <code>TextField</code> is the basic type for configurable text analysis. /** <code>TextField</code> is the basic type for configurable text analysis.
* Analyzers for field types using this implementation should be defined in the schema. * Analyzers for field types using this implementation should be defined in the schema.
@ -48,4 +64,190 @@ public class TextField extends CompressableField {
public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException { public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException {
writer.writeStr(name, f.stringValue(), true); writer.writeStr(name, f.stringValue(), true);
} }
@Override
public Query getFieldQuery(QParser parser, SchemaField field, String externalVal) {
return parseFieldQuery(parser, getQueryAnalyzer(), field.getName(), externalVal);
}
static Query parseFieldQuery(QParser parser, Analyzer analyzer, String field, String queryText) {
int phraseSlop = 0;
boolean enablePositionIncrements = true;
// most of the following code is taken from the Lucene QueryParser
// Use the analyzer to get all the tokens, and then build a TermQuery,
// PhraseQuery, or nothing based on the term count
TokenStream source;
try {
source = analyzer.reusableTokenStream(field, new StringReader(queryText));
source.reset();
} catch (IOException e) {
source = analyzer.tokenStream(field, new StringReader(queryText));
}
CachingTokenFilter buffer = new CachingTokenFilter(source);
TermAttribute termAtt = null;
PositionIncrementAttribute posIncrAtt = null;
int numTokens = 0;
boolean success = false;
try {
buffer.reset();
success = true;
} catch (IOException e) {
// success==false if we hit an exception
}
if (success) {
if (buffer.hasAttribute(TermAttribute.class)) {
termAtt = (TermAttribute) buffer.getAttribute(TermAttribute.class);
}
if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
posIncrAtt = (PositionIncrementAttribute) buffer.getAttribute(PositionIncrementAttribute.class);
}
}
int positionCount = 0;
boolean severalTokensAtSamePosition = false;
boolean hasMoreTokens = false;
if (termAtt != null) {
try {
hasMoreTokens = buffer.incrementToken();
while (hasMoreTokens) {
numTokens++;
int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1;
if (positionIncrement != 0) {
positionCount += positionIncrement;
} else {
severalTokensAtSamePosition = true;
}
hasMoreTokens = buffer.incrementToken();
}
} catch (IOException e) {
// ignore
}
}
try {
// rewind the buffer stream
buffer.reset();
// close original stream - all tokens buffered
source.close();
}
catch (IOException e) {
// ignore
}
if (numTokens == 0)
return null;
else if (numTokens == 1) {
String term = null;
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.term();
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
// return newTermQuery(new Term(field, term));
return new TermQuery(new Term(field, term));
} else {
if (severalTokensAtSamePosition) {
if (positionCount == 1) {
// no phrase query:
// BooleanQuery q = newBooleanQuery(true);
BooleanQuery q = new BooleanQuery(true);
for (int i = 0; i < numTokens; i++) {
String term = null;
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.term();
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
// Query currentQuery = newTermQuery(new Term(field, term));
Query currentQuery = new TermQuery(new Term(field, term));
q.add(currentQuery, BooleanClause.Occur.SHOULD);
}
return q;
}
else {
// phrase query:
// MultiPhraseQuery mpq = newMultiPhraseQuery();
MultiPhraseQuery mpq = new MultiPhraseQuery();
mpq.setSlop(phraseSlop);
List multiTerms = new ArrayList();
int position = -1;
for (int i = 0; i < numTokens; i++) {
String term = null;
int positionIncrement = 1;
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.term();
if (posIncrAtt != null) {
positionIncrement = posIncrAtt.getPositionIncrement();
}
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
if (positionIncrement > 0 && multiTerms.size() > 0) {
if (enablePositionIncrements) {
mpq.add((Term[])multiTerms.toArray(new Term[0]),position);
} else {
mpq.add((Term[])multiTerms.toArray(new Term[0]));
}
multiTerms.clear();
}
position += positionIncrement;
multiTerms.add(new Term(field, term));
}
if (enablePositionIncrements) {
mpq.add((Term[])multiTerms.toArray(new Term[0]),position);
} else {
mpq.add((Term[])multiTerms.toArray(new Term[0]));
}
return mpq;
}
}
else {
// PhraseQuery pq = newPhraseQuery();
PhraseQuery pq = new PhraseQuery();
pq.setSlop(phraseSlop);
int position = -1;
for (int i = 0; i < numTokens; i++) {
String term = null;
int positionIncrement = 1;
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.term();
if (posIncrAtt != null) {
positionIncrement = posIncrAtt.getPositionIncrement();
}
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
if (enablePositionIncrements) {
position += positionIncrement;
pq.add(new Term(field, term),position);
} else {
pq.add(new Term(field, term));
}
}
return pq;
}
}
}
} }

View File

@ -28,6 +28,7 @@ import org.apache.solr.common.SolrException;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.FieldType; import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.TextField; import org.apache.solr.schema.TextField;
import org.apache.solr.schema.SchemaField;
import java.io.IOException; import java.io.IOException;
import java.io.StringReader; import java.io.StringReader;
@ -52,99 +53,9 @@ public class FieldQParserPlugin extends QParserPlugin {
public Query parse() throws ParseException { public Query parse() throws ParseException {
String field = localParams.get(QueryParsing.F); String field = localParams.get(QueryParsing.F);
String queryText = localParams.get(QueryParsing.V); String queryText = localParams.get(QueryParsing.V);
FieldType ft = req.getSchema().getFieldType(field); SchemaField sf = req.getSchema().getField(field);
if (!(ft instanceof TextField)) { FieldType ft = sf.getType();
String internal = ft.toInternal(queryText); return ft.getFieldQuery(this, sf, queryText);
return new TermQuery(new Term(field, internal));
}
int phraseSlop = 0;
Analyzer analyzer = req.getSchema().getQueryAnalyzer();
// most of the following code is taken from the Lucene QueryParser
// Use the analyzer to get all the tokens, and then build a TermQuery,
// PhraseQuery, or nothing based on the term count
TokenStream source = null;
try {
source = analyzer.reusableTokenStream(field, new StringReader(queryText));
source.reset();
} catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
ArrayList<Token> lst = new ArrayList<Token>();
Token t;
int positionCount = 0;
boolean severalTokensAtSamePosition = false;
while (true) {
try {
t = source.next();
}
catch (IOException e) {
t = null;
}
if (t == null)
break;
lst.add(t);
if (t.getPositionIncrement() != 0)
positionCount += t.getPositionIncrement();
else
severalTokensAtSamePosition = true;
}
try {
source.close();
}
catch (IOException e) {
// ignore
}
if (lst.size() == 0)
return null;
else if (lst.size() == 1) {
t = lst.get(0);
return new TermQuery(new Term(field, new String(t.termBuffer(), 0, t.termLength())));
} else {
if (severalTokensAtSamePosition) {
if (positionCount == 1) {
// no phrase query:
BooleanQuery q = new BooleanQuery(true);
for (int i = 0; i < lst.size(); i++) {
t = (org.apache.lucene.analysis.Token) lst.get(i);
TermQuery currentQuery = new TermQuery(
new Term(field, new String(t.termBuffer(), 0, t.termLength())));
q.add(currentQuery, BooleanClause.Occur.SHOULD);
}
return q;
}
else {
// phrase query:
MultiPhraseQuery mpq = new MultiPhraseQuery();
mpq.setSlop(phraseSlop);
ArrayList multiTerms = new ArrayList();
for (int i = 0; i < lst.size(); i++) {
t = (org.apache.lucene.analysis.Token) lst.get(i);
if (t.getPositionIncrement() == 1 && multiTerms.size() > 0) {
mpq.add((Term[])multiTerms.toArray(new Term[0]));
multiTerms.clear();
}
multiTerms.add(new Term(field, new String(t.termBuffer(), 0, t.termLength())));
}
mpq.add((Term[])multiTerms.toArray(new Term[0]));
return mpq;
}
}
else {
PhraseQuery q = new PhraseQuery();
q.setSlop(phraseSlop);
for (int i = 0; i < lst.size(); i++) {
Token token = lst.get(i);
q.add(new Term(field, new String(token.termBuffer(), 0, token.termLength())));
}
return q;
}
}
} }
}; };
} }

View File

@ -0,0 +1,29 @@
package org.apache.solr.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.solr.search.function.ValueSource;
/**
* A {@link ValueSource} that abstractly represents {@link ValueSource}s for
* poly fields, and other things.
**/
public abstract class MultiValueSource extends ValueSource {
public abstract int dimension();
}

View File

@ -34,6 +34,7 @@ import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.TrieField; import org.apache.solr.schema.TrieField;
import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.TextField;
// TODO: implement the analysis of simple fields with // TODO: implement the analysis of simple fields with
// FieldType.toInternal() instead of going through the // FieldType.toInternal() instead of going through the
@ -145,6 +146,12 @@ public class SolrQueryParser extends QueryParser {
return parser.subQuery(queryText, null).getQuery(); return parser.subQuery(queryText, null).getQuery();
} }
} }
//Intercept poly fields, as they get expanded by default to an OR clause of
SchemaField sf = schema.getField(field);
//TODO: is there anyway to avoid this instance of check?
if (sf != null&& !(sf.getType() instanceof TextField)){//we have a poly field, deal with it specially by delegating to the FieldType
return sf.getType().getFieldQuery(parser, sf, queryText);
}
// default to a normal field query // default to a normal field query
return super.getFieldQuery(field, queryText); return super.getFieldQuery(field, queryText);

View File

@ -0,0 +1,164 @@
package org.apache.solr.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Searcher;
import org.apache.solr.search.function.DocValues;
import org.apache.solr.search.function.ValueSource;
import java.io.IOException;
import java.util.List;
import java.util.Map;
/**
* Converts individual ValueSource instances to leverage the DocValues *Val functions that work with multiple values,
* i.e. {@link org.apache.solr.search.function.DocValues#doubleVal(int, double[])}
*/
//Not crazy about the name, but...
public class ToMultiValueSource extends MultiValueSource {
protected List<ValueSource> sources;
public ToMultiValueSource(List<ValueSource> sources) {
this.sources = sources;
}
public List<ValueSource> getSources() {
return sources;
}
public int dimension() {
return sources.size();
}
@Override
public DocValues getValues(Map context, IndexReader reader) throws IOException {
int size = sources.size();
final DocValues[] valsArr = new DocValues[size];
for (int i = 0; i < size; i++) {
valsArr[i] = sources.get(i).getValues(context, reader);
}
return new DocValues() {
@Override
public void byteVal(int doc, byte[] vals) {
for (int i = 0; i < valsArr.length; i++) {
vals[i] = valsArr[i].byteVal(doc);
}
}
@Override
public void shortVal(int doc, short[] vals) {
for (int i = 0; i < valsArr.length; i++) {
vals[i] = valsArr[i].shortVal(doc);
}
}
@Override
public void floatVal(int doc, float[] vals) {
for (int i = 0; i < valsArr.length; i++) {
vals[i] = valsArr[i].floatVal(doc);
}
}
@Override
public void intVal(int doc, int[] vals) {
for (int i = 0; i < valsArr.length; i++) {
vals[i] = valsArr[i].intVal(doc);
}
}
@Override
public void longVal(int doc, long[] vals) {
for (int i = 0; i < valsArr.length; i++) {
vals[i] = valsArr[i].longVal(doc);
}
}
@Override
public void doubleVal(int doc, double[] vals) {
for (int i = 0; i < valsArr.length; i++) {
vals[i] = valsArr[i].doubleVal(doc);
}
}
@Override
public void strVal(int doc, String[] vals) {
for (int i = 0; i < valsArr.length; i++) {
vals[i] = valsArr[i].strVal(doc);
}
}
@Override
public String toString(int doc) {
StringBuilder sb = new StringBuilder();
sb.append("toMultiVS(");
boolean firstTime = true;
for (DocValues vals : valsArr) {
if (firstTime) {
firstTime = false;
} else {
sb.append(',');
}
sb.append(vals.toString(doc));
}
sb.append(')');
return sb.toString();
}
};
}
public void createWeight(Map context, Searcher searcher) throws IOException {
for (ValueSource source : sources)
source.createWeight(context, searcher);
}
public String description() {
StringBuilder sb = new StringBuilder();
sb.append("toMultiVS(");
boolean firstTime = true;
for (ValueSource source : sources) {
if (firstTime) {
firstTime = false;
} else {
sb.append(',');
}
sb.append(source);
}
sb.append(")");
return sb.toString();
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof ToMultiValueSource)) return false;
ToMultiValueSource that = (ToMultiValueSource) o;
if (!sources.equals(that.sources)) return false;
return true;
}
@Override
public int hashCode() {
return sources.hashCode();
}
}

View File

@ -41,6 +41,7 @@ import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Collections;
/** /**
* A factory that parses user queries to generate ValueSource instances. * A factory that parses user queries to generate ValueSource instances.
@ -202,6 +203,11 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin {
}; };
} }
}); });
addParser("toMultiVS", new ValueSourceParser(){
public ValueSource parse(FunctionQParser fp) throws ParseException{
return new ToMultiValueSource(fp.parseValueSourceList());
}
});
addParser("query", new ValueSourceParser() { addParser("query", new ValueSourceParser() {
// boost(query($q),rating) // boost(query($q),rating)
public ValueSource parse(FunctionQParser fp) throws ParseException { public ValueSource parse(FunctionQParser fp) throws ParseException {
@ -224,22 +230,47 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin {
addParser("hsin", new ValueSourceParser() { addParser("hsin", new ValueSourceParser() {
public ValueSource parse(FunctionQParser fp) throws ParseException { public ValueSource parse(FunctionQParser fp) throws ParseException {
ValueSource x1 = fp.parseValueSource();
ValueSource y1 = fp.parseValueSource();
ValueSource x2 = fp.parseValueSource();
ValueSource y2 = fp.parseValueSource();
double radius = fp.parseDouble(); double radius = fp.parseDouble();
MultiValueSource pv1;
MultiValueSource pv2;
return new HaversineFunction(x1, y1, x2, y2, radius); ValueSource one = fp.parseValueSource();
ValueSource two = fp.parseValueSource();
if (fp.hasMoreArguments()) {
List<ValueSource> s1 = new ArrayList<ValueSource>();
s1.add(one);
s1.add(two);
pv1 = new ToMultiValueSource(s1);
ValueSource x2 = fp.parseValueSource();
ValueSource y2 = fp.parseValueSource();
List<ValueSource> s2 = new ArrayList<ValueSource>();
s2.add(x2);
s2.add(y2);
pv2 = new ToMultiValueSource(s2);
} else {
//check to see if we have multiValue source
if (one instanceof MultiValueSource && two instanceof MultiValueSource){
pv1 = (MultiValueSource) one;
pv2 = (MultiValueSource) two;
} else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Input must either be 2 MultiValueSources, or there must be 4 ValueSources");
}
}
boolean convert = false;
if (fp.hasMoreArguments()){
convert = Boolean.parseBoolean(fp.parseArg());
}
return new HaversineFunction(pv1, pv2, radius, convert);
} }
}); });
addParser("ghhsin", new ValueSourceParser() { addParser("ghhsin", new ValueSourceParser() {
public ValueSource parse(FunctionQParser fp) throws ParseException { public ValueSource parse(FunctionQParser fp) throws ParseException {
double radius = fp.parseDouble();
ValueSource gh1 = fp.parseValueSource(); ValueSource gh1 = fp.parseValueSource();
ValueSource gh2 = fp.parseValueSource(); ValueSource gh2 = fp.parseValueSource();
double radius = fp.parseDouble();
return new GeohashHaversineFunction(gh1, gh2, radius); return new GeohashHaversineFunction(gh1, gh2, radius);
} }
@ -393,15 +424,9 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin {
addParser("sqedist", new ValueSourceParser() { addParser("sqedist", new ValueSourceParser() {
public ValueSource parse(FunctionQParser fp) throws ParseException { public ValueSource parse(FunctionQParser fp) throws ParseException {
List<ValueSource> sources = fp.parseValueSourceList(); List<ValueSource> sources = fp.parseValueSourceList();
if (sources.size() % 2 != 0) { MVResult mvr = getMultiValueSources(sources);
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Illegal number of sources. There must be an even number of sources");
} return new SquaredEuclideanFunction(mvr.mv1, mvr.mv2);
int dim = sources.size() / 2;
List<ValueSource> sources1 = new ArrayList<ValueSource>(dim);
List<ValueSource> sources2 = new ArrayList<ValueSource>(dim);
//Get dim value sources for the first vector
splitSources(dim, sources, sources1, sources2);
return new SquaredEuclideanFunction(sources1, sources2);
} }
}); });
@ -409,14 +434,8 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin {
public ValueSource parse(FunctionQParser fp) throws ParseException { public ValueSource parse(FunctionQParser fp) throws ParseException {
float power = fp.parseFloat(); float power = fp.parseFloat();
List<ValueSource> sources = fp.parseValueSourceList(); List<ValueSource> sources = fp.parseValueSourceList();
if (sources.size() % 2 != 0) { MVResult mvr = getMultiValueSources(sources);
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Illegal number of sources. There must be an even number of sources"); return new VectorDistanceFunction(power, mvr.mv1, mvr.mv2);
}
int dim = sources.size() / 2;
List<ValueSource> sources1 = new ArrayList<ValueSource>(dim);
List<ValueSource> sources2 = new ArrayList<ValueSource>(dim);
splitSources(dim, sources, sources1, sources2);
return new VectorDistanceFunction(power, sources1, sources2);
} }
}); });
addParser("ms", new DateValueSourceParser()); addParser("ms", new DateValueSourceParser());
@ -445,6 +464,44 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin {
} }
} }
private static MVResult getMultiValueSources(List<ValueSource> sources) {
MVResult mvr = new MVResult();
if (sources.size() % 2 != 0) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Illegal number of sources. There must be an even number of sources");
}
if (sources.size() == 2) {
//check to see if these are MultiValueSource
boolean s1MV = sources.get(0) instanceof MultiValueSource;
boolean s2MV = sources.get(1) instanceof MultiValueSource;
if (s1MV && s2MV) {
mvr.mv1 = (MultiValueSource) sources.get(0);
mvr.mv2 = (MultiValueSource) sources.get(1);
} else if (s1MV ||
s2MV) {
//if one is a MultiValueSource, than the other one needs to be too.
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Illegal number of sources. There must be an even number of sources");
} else {
mvr.mv1 = new ToMultiValueSource(Collections.singletonList(sources.get(0)));
mvr.mv2 = new ToMultiValueSource(Collections.singletonList(sources.get(1)));
}
} else {
int dim = sources.size() / 2;
List<ValueSource> sources1 = new ArrayList<ValueSource>(dim);
List<ValueSource> sources2 = new ArrayList<ValueSource>(dim);
//Get dim value sources for the first vector
splitSources(dim, sources, sources1, sources2);
mvr.mv1 = new ToMultiValueSource(sources1);
mvr.mv2 = new ToMultiValueSource(sources2);
}
return mvr;
}
private static class MVResult {
MultiValueSource mv1;
MultiValueSource mv2;
}
} }

View File

@ -48,6 +48,15 @@ public abstract class DocValues {
public String strVal(int doc) { throw new UnsupportedOperationException(); } public String strVal(int doc) { throw new UnsupportedOperationException(); }
public abstract String toString(int doc); public abstract String toString(int doc);
//For Functions that can work with multiple values from the same document. This does not apply to all functions
public void byteVal(int doc, byte [] vals) { throw new UnsupportedOperationException(); }
public void shortVal(int doc, short [] vals) { throw new UnsupportedOperationException(); }
public void floatVal(int doc, float [] vals) { throw new UnsupportedOperationException(); }
public void intVal(int doc, int [] vals) { throw new UnsupportedOperationException(); }
public void longVal(int doc, long [] vals) { throw new UnsupportedOperationException(); }
public void doubleVal(int doc, double [] vals) { throw new UnsupportedOperationException(); }
public void strVal(int doc, String [] vals) { throw new UnsupportedOperationException(); }
public Explanation explain(int doc) { public Explanation explain(int doc) {
return new Explanation(floatVal(doc), toString(doc)); return new Explanation(floatVal(doc), toString(doc));

View File

@ -1,4 +1,6 @@
package org.apache.solr.search.function.distance; package org.apache.solr.search.function.distance;
import org.apache.solr.common.SolrException;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -50,5 +52,44 @@ public class DistanceUtils {
return result; return result;
} }
/**
* Given a string containing <i>dimension</i> values encoded in it, separated by commas, return a String array of length <i>dimension</i>
* containing the values.
* @param out A preallocated array. Must be size dimension. If it is not it will be resized.
* @param externalVal The value to parse
* @param dimension The expected number of values for the point
* @return An array of the values that make up the point (aka vector)
*
* @throws {@link SolrException} if the dimension specified does not match the number of values in the externalValue.
*/
public static String[] parsePoint(String[] out, String externalVal, int dimension) {
//TODO: Should we support sparse vectors?
if (out==null || out.length != dimension) out=new String[dimension];
int idx = externalVal.indexOf(',');
int end = idx;
int start = 0;
int i = 0;
if (idx == -1 && dimension == 1 && externalVal.length() > 0){//we have a single point, dimension better be 1
out[0] = externalVal.trim();
i = 1;
}
else if (idx > 0) {//if it is zero, that is an error
//Parse out a comma separated list of point values, as in: 73.5,89.2,7773.4
for (; i < dimension; i++){
while (start<end && externalVal.charAt(start)==' ') start++;
while (end>start && externalVal.charAt(end-1)==' ') end--;
out[i] = externalVal.substring(start, end);
start = idx+1;
end = externalVal.indexOf(',', start);
if (end == -1){
end = externalVal.length();
}
}
}
if (i != dimension){
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "incompatible dimension (" + dimension +
") and values (" + externalVal + "). Only " + i + " values specified");
}
return out;
}
} }

View File

@ -18,6 +18,8 @@ package org.apache.solr.search.function.distance;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Searcher; import org.apache.lucene.search.Searcher;
import org.apache.solr.common.SolrException;
import org.apache.solr.search.MultiValueSource;
import org.apache.solr.search.function.DocValues; import org.apache.solr.search.function.DocValues;
import org.apache.solr.search.function.ValueSource; import org.apache.solr.search.function.ValueSource;
@ -29,28 +31,31 @@ import java.util.Map;
* Calculate the Haversine formula (distance) between any two points on a sphere * Calculate the Haversine formula (distance) between any two points on a sphere
* Takes in four value sources: (latA, lonA); (latB, lonB). * Takes in four value sources: (latA, lonA); (latB, lonB).
* <p/> * <p/>
* Assumes the value sources are in radians * Assumes the value sources are in radians unless
* <p/> * <p/>
* See http://en.wikipedia.org/wiki/Great-circle_distance and * See http://en.wikipedia.org/wiki/Great-circle_distance and
* http://en.wikipedia.org/wiki/Haversine_formula for the actual formula and * http://en.wikipedia.org/wiki/Haversine_formula for the actual formula and
* also http://www.movable-type.co.uk/scripts/latlong.html * also http://www.movable-type.co.uk/scripts/latlong.html
*
* @see org.apache.solr.search.function.RadianFunction
*/ */
public class HaversineFunction extends ValueSource { public class HaversineFunction extends ValueSource {
private ValueSource x1; private MultiValueSource p1;
private ValueSource y1; private MultiValueSource p2;
private ValueSource x2; private boolean convertToRadians = false;
private ValueSource y2;
private double radius; private double radius;
public HaversineFunction(ValueSource x1, ValueSource y1, ValueSource x2, ValueSource y2, double radius) { public HaversineFunction(MultiValueSource p1, MultiValueSource p2, double radius) {
this.x1 = x1; this(p1, p2, radius, false);
this.y1 = y1; }
this.x2 = x2;
this.y2 = y2; public HaversineFunction(MultiValueSource p1, MultiValueSource p2, double radius, boolean convertToRads){
this.p1 = p1;
this.p2 = p2;
if (p1.dimension() != 2 || p2.dimension() != 2) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Illegal dimension for value sources");
}
this.radius = radius; this.radius = radius;
this.convertToRadians = convertToRads;
} }
protected String name() { protected String name() {
@ -59,28 +64,40 @@ public class HaversineFunction extends ValueSource {
/** /**
* @param doc The doc to score * @param doc The doc to score
* @param x1DV * @param p1DV
* @param y1DV * @param p2DV
* @param x2DV
* @param y2DV
* @return The haversine distance formula * @return The haversine distance formula
*/ */
protected double distance(int doc, DocValues x1DV, DocValues y1DV, DocValues x2DV, DocValues y2DV) { protected double distance(int doc, DocValues p1DV, DocValues p2DV) {
double x1 = x1DV.doubleVal(doc); //in radians
double y1 = y1DV.doubleVal(doc);
double x2 = x2DV.doubleVal(doc);
double y2 = y2DV.doubleVal(doc);
double[] p1D = new double[2];
double[] p2D = new double[2];
p1DV.doubleVal(doc, p1D);
p2DV.doubleVal(doc, p2D);
double x1;
double y1;
double x2;
double y2;
if (convertToRadians) {
x1 = p1D[0] * DistanceUtils.DEGREES_TO_RADIANS;
y1 = p1D[1] * DistanceUtils.DEGREES_TO_RADIANS;
x2 = p2D[0] * DistanceUtils.DEGREES_TO_RADIANS;
y2 = p2D[1] * DistanceUtils.DEGREES_TO_RADIANS;
} else {
x1 = p1D[0];
y1 = p1D[1];
x2 = p2D[0];
y2 = p2D[1];
}
return DistanceUtils.haversine(x1, y1, x2, y2, radius); return DistanceUtils.haversine(x1, y1, x2, y2, radius);
} }
@Override @Override
public DocValues getValues(Map context, IndexReader reader) throws IOException { public DocValues getValues(Map context, IndexReader reader) throws IOException {
final DocValues x1DV = x1.getValues(context, reader); final DocValues vals1 = p1.getValues(context, reader);
final DocValues y1DV = y1.getValues(context, reader);
final DocValues x2DV = x2.getValues(context, reader); final DocValues vals2 = p2.getValues(context, reader);
final DocValues y2DV = y2.getValues(context, reader);
return new DocValues() { return new DocValues() {
public float floatVal(int doc) { public float floatVal(int doc) {
return (float) doubleVal(doc); return (float) doubleVal(doc);
@ -95,7 +112,7 @@ public class HaversineFunction extends ValueSource {
} }
public double doubleVal(int doc) { public double doubleVal(int doc) {
return (double) distance(doc, x1DV, y1DV, x2DV, y2DV); return (double) distance(doc, vals1, vals2);
} }
public String strVal(int doc) { public String strVal(int doc) {
@ -106,8 +123,7 @@ public class HaversineFunction extends ValueSource {
public String toString(int doc) { public String toString(int doc) {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
sb.append(name()).append('('); sb.append(name()).append('(');
sb.append(x1DV.toString(doc)).append(',').append(y1DV.toString(doc)).append(',') sb.append(vals1.toString(doc)).append(',').append(vals2.toString(doc));
.append(x2DV.toString(doc)).append(',').append(y2DV.toString(doc));
sb.append(')'); sb.append(')');
return sb.toString(); return sb.toString();
} }
@ -116,10 +132,9 @@ public class HaversineFunction extends ValueSource {
@Override @Override
public void createWeight(Map context, Searcher searcher) throws IOException { public void createWeight(Map context, Searcher searcher) throws IOException {
x1.createWeight(context, searcher); p1.createWeight(context, searcher);
x2.createWeight(context, searcher); p2.createWeight(context, searcher);
y1.createWeight(context, searcher);
y2.createWeight(context, searcher);
} }
@Override @Override
@ -127,20 +142,16 @@ public class HaversineFunction extends ValueSource {
if (this.getClass() != o.getClass()) return false; if (this.getClass() != o.getClass()) return false;
HaversineFunction other = (HaversineFunction) o; HaversineFunction other = (HaversineFunction) o;
return this.name().equals(other.name()) return this.name().equals(other.name())
&& x1.equals(other.x1) && && p1.equals(other.p1) &&
y1.equals(other.y1) && p2.equals(other.p2) && radius == other.radius;
x2.equals(other.x2) &&
y2.equals(other.y2) && radius == other.radius;
} }
@Override @Override
public int hashCode() { public int hashCode() {
int result; int result;
long temp; long temp;
result = x1.hashCode(); result = p1.hashCode();
result = 31 * result + y1.hashCode(); result = 31 * result + p2.hashCode();
result = 31 * result + x2.hashCode();
result = 31 * result + y2.hashCode();
result = 31 * result + name().hashCode(); result = 31 * result + name().hashCode();
temp = Double.doubleToRawLongBits(radius); temp = Double.doubleToRawLongBits(radius);
result = 31 * result + (int) (temp ^ (temp >>> 32)); result = 31 * result + (int) (temp ^ (temp >>> 32));
@ -150,7 +161,7 @@ public class HaversineFunction extends ValueSource {
public String description() { public String description() {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
sb.append(name()).append('('); sb.append(name()).append('(');
sb.append(x1).append(',').append(y1).append(',').append(x2).append(',').append(y2); sb.append(p1).append(',').append(p2);
sb.append(')'); sb.append(')');
return sb.toString(); return sb.toString();
} }

View File

@ -18,6 +18,7 @@ package org.apache.solr.search.function.distance;
import org.apache.solr.search.function.DocValues; import org.apache.solr.search.function.DocValues;
import org.apache.solr.search.function.ValueSource; import org.apache.solr.search.function.ValueSource;
import org.apache.solr.search.MultiValueSource;
import java.util.List; import java.util.List;
@ -30,8 +31,8 @@ import java.util.List;
public class SquaredEuclideanFunction extends VectorDistanceFunction { public class SquaredEuclideanFunction extends VectorDistanceFunction {
protected String name = "sqedist"; protected String name = "sqedist";
public SquaredEuclideanFunction(List<ValueSource> sources1, List<ValueSource> sources2) { public SquaredEuclideanFunction(MultiValueSource source1, MultiValueSource source2) {
super(-1, sources1, sources2);//overriding distance, so power doesn't matter here super(-1, source1, source2);//overriding distance, so power doesn't matter here
} }
@ -43,11 +44,16 @@ public class SquaredEuclideanFunction extends VectorDistanceFunction {
/** /**
* @param doc The doc to score * @param doc The doc to score
*/ */
protected double distance(int doc, DocValues[] docValues1, DocValues[] docValues2) { protected double distance(int doc, DocValues dv1, DocValues dv2) {
double result = 0; double result = 0;
for (int i = 0; i < docValues1.length; i++) { double [] vals1 = new double[source1.dimension()];
result += Math.pow(docValues1[i].doubleVal(doc) - docValues2[i].doubleVal(doc), 2); double [] vals2 = new double[source1.dimension()];
} dv1.doubleVal(doc, vals1);
dv2.doubleVal(doc, vals2);
for (int i = 0; i < vals1.length; i++) {
double v = vals1[i] - vals2[i];
result += v * v;
}
return result; return result;
} }

View File

@ -21,9 +21,9 @@ import org.apache.lucene.search.Searcher;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.search.function.DocValues; import org.apache.solr.search.function.DocValues;
import org.apache.solr.search.function.ValueSource; import org.apache.solr.search.function.ValueSource;
import org.apache.solr.search.MultiValueSource;
import java.io.IOException; import java.io.IOException;
import java.util.List;
import java.util.Map; import java.util.Map;
@ -41,18 +41,18 @@ import java.util.Map;
* @see SquaredEuclideanFunction for the special case * @see SquaredEuclideanFunction for the special case
*/ */
public class VectorDistanceFunction extends ValueSource { public class VectorDistanceFunction extends ValueSource {
protected List<ValueSource> sources1, sources2; protected MultiValueSource source1, source2;
protected float power; protected float power;
protected float oneOverPower; protected float oneOverPower;
public VectorDistanceFunction(float power, List<ValueSource> sources1, List<ValueSource> sources2) { public VectorDistanceFunction(float power, MultiValueSource source1, MultiValueSource source2) {
this.power = power; if ((source1.dimension() != source2.dimension())) {
this.oneOverPower = 1 / power;
this.sources1 = sources1;
this.sources2 = sources2;
if ((sources1.size() != sources2.size())) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Illegal number of sources"); throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Illegal number of sources");
} }
this.power = power;
this.oneOverPower = 1 / power;
this.source1 = source1;
this.source2 = source2;
} }
protected String name() { protected String name() {
@ -63,37 +63,39 @@ public class VectorDistanceFunction extends ValueSource {
* Calculate the distance * Calculate the distance
* *
* @param doc The current doc * @param doc The current doc
* @param docValues1 The values from the first set of value sources * @param dv1 The values from the first MultiValueSource
* @param docValues2 The values from the second set of value sources * @param dv2 The values from the second MultiValueSource
* @return The distance * @return The distance
*/ */
protected double distance(int doc, DocValues[] docValues1, DocValues[] docValues2) { protected double distance(int doc, DocValues dv1, DocValues dv2) {
double result = 0; double result = 0;
//Handle some special cases: //Handle some special cases:
double [] vals1 = new double[source1.dimension()];
double [] vals2 = new double[source1.dimension()];
dv1.doubleVal(doc, vals1);
dv2.doubleVal(doc, vals2);
if (power == 0) { if (power == 0) {
for (int i = 0; i < docValues1.length; i++) { for (int i = 0; i < vals1.length; i++) {
//sparseness measure result += vals1[i] - vals2[i] == 0 ? 0 :1;
result += docValues1[i].doubleVal(doc) - docValues2[i].doubleVal(doc) == 0 ? 0 : 1;
} }
} else if (power == 1.0) { } else if (power == 1.0) {
for (int i = 0; i < docValues1.length; i++) { for (int i = 0; i < vals1.length; i++) {
result += docValues1[i].doubleVal(doc) - docValues2[i].doubleVal(doc); result += vals1[i] - vals2[i];
} }
} else if (power == 2.0) { } else if (power == 2.0) {
for (int i = 0; i < docValues1.length; i++) { for (int i = 0; i < vals1.length; i++) {
double v = docValues1[i].doubleVal(doc) - docValues2[i].doubleVal(doc); double v = vals1[i] - vals2[i];
result += v * v; result += v * v;
} }
result = Math.sqrt(result); result = Math.sqrt(result);
} else if (power == Integer.MAX_VALUE || Double.isInfinite(power)) {//infininte norm? } else if (power == Integer.MAX_VALUE || Double.isInfinite(power)) {//infininte norm?
for (int i = 0; i < docValues1.length; i++) { for (int i = 0; i < vals1.length; i++) {
//TODO: is this the correct infinite norm? result = Math.max(vals1[i], vals2[i]);
result = Math.max(docValues1[i].doubleVal(doc) - docValues2[i].doubleVal(doc), result);
} }
} else { } else {
for (int i = 0; i < docValues1.length; i++) { for (int i = 0; i < vals1.length; i++) {
result += Math.pow(docValues1[i].doubleVal(doc) - docValues2[i].doubleVal(doc), power); result += Math.pow(vals1[i] - vals2[i], power);
} }
result = Math.pow(result, oneOverPower); result = Math.pow(result, oneOverPower);
} }
@ -103,19 +105,24 @@ public class VectorDistanceFunction extends ValueSource {
@Override @Override
public DocValues getValues(Map context, IndexReader reader) throws IOException { public DocValues getValues(Map context, IndexReader reader) throws IOException {
final DocValues[] valsArr1 = new DocValues[sources1.size()];
int i = 0; final DocValues vals1 = source1.getValues(context, reader);
for (ValueSource source : sources1) {
valsArr1[i++] = source.getValues(context, reader); final DocValues vals2 = source2.getValues(context, reader);
}
final DocValues[] valsArr2 = new DocValues[sources2.size()];
i = 0;
for (ValueSource source : sources2) {
valsArr2[i++] = source.getValues(context, reader);
}
return new DocValues() { return new DocValues() {
@Override
public byte byteVal(int doc) {
return (byte) doubleVal(doc);
}
@Override
public short shortVal(int doc) {
return (short)doubleVal(doc);
}
public float floatVal(int doc) { public float floatVal(int doc) {
return (float) doubleVal(doc); return (float) doubleVal(doc);
} }
@ -129,7 +136,7 @@ public class VectorDistanceFunction extends ValueSource {
} }
public double doubleVal(int doc) { public double doubleVal(int doc) {
return distance(doc, valsArr1, valsArr2); return distance(doc, vals1, vals2);
} }
public String strVal(int doc) { public String strVal(int doc) {
@ -141,18 +148,8 @@ public class VectorDistanceFunction extends ValueSource {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
sb.append(name()).append('(').append(power).append(','); sb.append(name()).append('(').append(power).append(',');
boolean firstTime = true; boolean firstTime = true;
for (DocValues vals : valsArr1) { sb.append(vals1.toString(doc)).append(',');
if (firstTime) { sb.append(vals2.toString(doc));
firstTime = false;
} else {
sb.append(',');
}
sb.append(vals.toString(doc));
}
for (DocValues vals : valsArr2) {
sb.append(',');//we will always have valsArr1, else there is an error
sb.append(vals.toString(doc));
}
sb.append(')'); sb.append(')');
return sb.toString(); return sb.toString();
} }
@ -161,12 +158,8 @@ public class VectorDistanceFunction extends ValueSource {
@Override @Override
public void createWeight(Map context, Searcher searcher) throws IOException { public void createWeight(Map context, Searcher searcher) throws IOException {
for (ValueSource source : sources1) { source1.createWeight(context, searcher);
source.createWeight(context, searcher); source2.createWeight(context, searcher);
}
for (ValueSource source : sources2) {
source.createWeight(context, searcher);
}
} }
@Override @Override
@ -177,16 +170,16 @@ public class VectorDistanceFunction extends ValueSource {
VectorDistanceFunction that = (VectorDistanceFunction) o; VectorDistanceFunction that = (VectorDistanceFunction) o;
if (Float.compare(that.power, power) != 0) return false; if (Float.compare(that.power, power) != 0) return false;
if (!sources1.equals(that.sources1)) return false; if (!source1.equals(that.source1)) return false;
if (!sources2.equals(that.sources2)) return false; if (!source2.equals(that.source2)) return false;
return true; return true;
} }
@Override @Override
public int hashCode() { public int hashCode() {
int result = sources1.hashCode(); int result = source1.hashCode();
result = 31 * result + sources2.hashCode(); result = 31 * result + source2.hashCode();
result = 31 * result + Float.floatToRawIntBits(power); result = 31 * result + Float.floatToRawIntBits(power);
return result; return result;
} }
@ -195,19 +188,8 @@ public class VectorDistanceFunction extends ValueSource {
public String description() { public String description() {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
sb.append(name()).append('(').append(power).append(','); sb.append(name()).append('(').append(power).append(',');
boolean firstTime = true; sb.append(source1).append(',');
for (ValueSource source : sources1) { sb.append(source2);
if (firstTime) {
firstTime = false;
} else {
sb.append(',');
}
sb.append(source);
}
for (ValueSource source : sources2) {
sb.append(',');//we will always have sources1, else there is an error
sb.append(source);
}
sb.append(')'); sb.append(')');
return sb.toString(); return sb.toString();
} }

View File

@ -57,19 +57,35 @@ public class DocumentBuilder {
// we don't check for a null val ourselves because a solr.FieldType // we don't check for a null val ourselves because a solr.FieldType
// might actually want to map it to something. If createField() // might actually want to map it to something. If createField()
// returns null, then we don't store the field. // returns null, then we don't store the field.
Field field = sfield.createField(val, boost); if (sfield.isPolyField()) {
if (field != null) { Fieldable[] fields = sfield.createFields(val, boost);
if (!sfield.multiValued()) { if (fields != null && fields.length > 0) {
String oldValue = map.put(sfield.getName(), val); if (!sfield.multiValued()) {
if (oldValue != null) { String oldValue = map.put(sfield.getName(), val);
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"ERROR: multiple values encountered for non multiValued field " + sfield.getName() if (oldValue != null) {
+ ": first='" + oldValue + "' second='" + val + "'"); throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "ERROR: multiple values encountered for non multiValued field " + sfield.getName()
+ ": first='" + oldValue + "' second='" + val + "'");
}
}
// Add each field
for (Fieldable field : fields) {
doc.add(field);
}
}
} else {
Field field = sfield.createField(val, boost);
if (field != null) {
if (!sfield.multiValued()) {
String oldValue = map.put(sfield.getName(), val);
if (oldValue != null) {
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"ERROR: multiple values encountered for non multiValued field " + sfield.getName()
+ ": first='" + oldValue + "' second='" + val + "'");
}
} }
} }
// field.setBoost(boost);
doc.add(field); doc.add(field);
} }
} }
/** /**
@ -147,7 +163,7 @@ public class DocumentBuilder {
for (SchemaField field : schema.getRequiredFields()) { for (SchemaField field : schema.getRequiredFields()) {
if (doc.getField(field.getName() ) == null) { if (doc.getField(field.getName() ) == null) {
if (field.getDefaultValue() != null) { if (field.getDefaultValue() != null) {
doc.add( field.createField( field.getDefaultValue(), 1.0f ) ); addField(doc, field, field.getDefaultValue(), 1.0f);
} else { } else {
if (missingFields==null) { if (missingFields==null) {
missingFields = new ArrayList<String>(1); missingFields = new ArrayList<String>(1);
@ -178,6 +194,19 @@ public class DocumentBuilder {
} }
private static void addField(Document doc, SchemaField field, String val, float boost) {
if (field.isPolyField()) {
Fieldable[] farr = field.getType().createFields(field, val, boost);
for (Fieldable f : farr) {
if (f != null) doc.add(f); // null fields are not added
}
} else {
Field f = field.createField(val, boost);
if (f != null) doc.add(f); // null fields are not added
}
}
/** /**
* Convert a SolrInputDocument to a lucene Document. * Convert a SolrInputDocument to a lucene Document.
* *
@ -230,7 +259,9 @@ public class DocumentBuilder {
isBinaryField = true; isBinaryField = true;
BinaryField binaryField = (BinaryField) sfield.getType(); BinaryField binaryField = (BinaryField) sfield.getType();
Field f = binaryField.createField(sfield,v,boost); Field f = binaryField.createField(sfield,v,boost);
if(f != null) out.add(f); if(f != null){
out.add(f);
}
used = true; used = true;
} else { } else {
// TODO!!! HACK -- date conversion // TODO!!! HACK -- date conversion
@ -243,10 +274,7 @@ public class DocumentBuilder {
if (sfield != null) { if (sfield != null) {
used = true; used = true;
Field f = sfield.createField(val, boost); addField(out, sfield, val, boost);
if (f != null) { // null fields are not added
out.add(f);
}
} }
} }
@ -263,17 +291,21 @@ public class DocumentBuilder {
} }
used = true; used = true;
Field f = null; //Don't worry about poly fields here
Fieldable [] fields = null;
if (isBinaryField) { if (isBinaryField) {
if (destinationField.getType() instanceof BinaryField) { if (destinationField.getType() instanceof BinaryField) {
BinaryField binaryField = (BinaryField) destinationField.getType(); BinaryField binaryField = (BinaryField) destinationField.getType();
f = binaryField.createField(destinationField, v, boost); //TODO: safe to assume that binary fields only create one?
fields = new Field[]{binaryField.createField(destinationField, v, boost)};
} }
} else { } else {
f = destinationField.createField(cf.getLimitedValue(val), boost); fields = destinationField.createFields(cf.getLimitedValue(val), boost);
} }
if (f != null) { // null fields are not added if (fields != null) { // null fields are not added
out.add(f); for (Fieldable f : fields) {
out.add(f);
}
} }
} }
@ -297,7 +329,7 @@ public class DocumentBuilder {
for (SchemaField field : schema.getRequiredFields()) { for (SchemaField field : schema.getRequiredFields()) {
if (out.getField(field.getName() ) == null) { if (out.getField(field.getName() ) == null) {
if (field.getDefaultValue() != null) { if (field.getDefaultValue() != null) {
out.add( field.createField( field.getDefaultValue(), 1.0f ) ); addField(out, field, field.getDefaultValue(), 1.0f);
} }
else { else {
String id = schema.printableUniqueKey( out ); String id = schema.printableUniqueKey( out );

View File

@ -231,6 +231,17 @@ public abstract class AbstractSolrTestCase extends TestCase {
} }
} }
public void assertQEx(String message, SolrQueryRequest req, SolrException.ErrorCode code ) {
try {
h.query(req);
fail( message );
} catch (SolrException e) {
assertEquals( code.code, e.code() );
} catch (Exception e2) {
throw new RuntimeException("Exception during query", e2);
}
}
/** /**
* @see TestHarness#optimize * @see TestHarness#optimize

View File

@ -0,0 +1,196 @@
package org.apache.solr.schema;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.index.IndexReader;
import org.apache.solr.core.SolrCore;
import org.apache.solr.util.AbstractSolrTestCase;
import org.apache.solr.common.SolrException;
import java.util.Map;
import java.util.Random;
/**
* Test a whole slew of things related to PolyFields
*/
public class PolyFieldTest extends AbstractSolrTestCase {
@Override
public String getSchemaFile() {
return "schema.xml";
}
@Override
public String getSolrConfigFile() {
return "solrconfig.xml";
}
public void testSchemaBasics() throws Exception {
IndexSchema schema = h.getCore().getSchema();
SchemaField home = schema.getField("home");
assertNotNull(home);
assertTrue(home.isPolyField());
SchemaField[] dynFields = schema.getDynamicFieldPrototypes();
boolean seen = false;
for (SchemaField dynField : dynFields) {
if (dynField.getName().equals("*" + FieldType.POLY_FIELD_SEPARATOR + "double")) {
seen = true;
}
}
assertTrue("Didn't find the expected dynamic field", seen);
FieldType homeFT = schema.getFieldType("home");
assertEquals(home.getType(), homeFT);
FieldType xy = schema.getFieldTypeByName("xy");
assertNotNull(xy);
assertTrue(xy instanceof PointType);
assertTrue(xy.isPolyField());
home = schema.getFieldOrNull("home_0" + FieldType.POLY_FIELD_SEPARATOR + "double");
assertNotNull(home);
home = schema.getField("home");
assertNotNull(home);
homeFT = schema.getPolyFieldType("home");
assertNotNull(homeFT);
home = schema.getField("homed");//sub field suffix
assertNotNull(home);
assertTrue(home.isPolyField());
try {
FieldType bad = schema.getPolyFieldType("foo");
assertTrue(false);
} catch (Exception e) {
}
try {
FieldType bad = schema.getPolyFieldTypeNoEx("foo");
assertNull(bad);
} catch (Exception e) {
assertTrue(false);
}
}
public void testPointFieldType() throws Exception {
SolrCore core = h.getCore();
IndexSchema schema = core.getSchema();
SchemaField home = schema.getField("home");
assertNotNull(home);
assertTrue("home is not a poly field", home.isPolyField());
FieldType tmp = home.getType();
assertTrue(tmp instanceof PointType);
PointType pt = (PointType) tmp;
assertEquals(pt.getDimension(), 2);
double[] xy = new double[]{35.0, -79.34};
String point = xy[0] + "," + xy[1];
Fieldable[] fields = home.createFields(point, 2);
assertEquals(fields.length, 3);//should be 3, we have a stored field
//first two fields contain the values, third is just stored and contains the original
for (int i = 0; i < 3; i++) {
boolean hasValue = fields[1].tokenStreamValue() != null
|| fields[1].getBinaryValue() != null
|| fields[1].stringValue() != null;
assertTrue("Doesn't have a value: " + fields[1], hasValue);
}
/*assertTrue("first field " + fields[0].tokenStreamValue() + " is not 35.0", pt.getSubType().toExternal(fields[0]).equals(String.valueOf(xy[0])));
assertTrue("second field is not -79.34", pt.getSubType().toExternal(fields[1]).equals(String.valueOf(xy[1])));
assertTrue("third field is not '35.0,-79.34'", pt.getSubType().toExternal(fields[2]).equals(point));*/
home = schema.getField("home_ns");
assertNotNull(home);
fields = home.createFields(point, 2);
assertEquals(fields.length, 2);//should be 2, since we aren't storing
home = schema.getField("home_ns");
assertNotNull(home);
try {
fields = home.createFields("35.0,foo", 2);
assertTrue(false);
} catch (Exception e) {
//
}
}
public void testSearching() throws Exception {
for (int i = 0; i < 50; i++) {
assertU(adoc("id", "" + i, "home", i + "," + (i * 100), "homed", (i * 1000) + "," + (i * 10000)));
}
assertU(commit());
IndexReader reader = h.getCore().getSearcher().get().getReader();
/*for (int i = 0; i < 50; i++){
Document doc = reader.document(i);
System.out.println("Doc: " + doc.get("homed_0___double"));
}*/
assertQ(req("fl", "*,score", "q", "*:*"), "//*[@numFound='50']");
assertQ(req("fl", "*,score", "q", "home:1,100"),
"//*[@numFound='1']",
"//str[@name='home'][.='1,100']");
assertQ(req("fl", "*,score", "q", "homed:1000,10000"),
"//*[@numFound='1']",
"//str[@name='homed'][.='1000,10000']");
assertQ(req("fl", "*,score", "q",
"{!func}sqedist(home, toMultiVS(0, 0))"),
"\"//*[@numFound='50']\"");
assertQ(req("fl", "*,score", "q",
"{!func}dist(2, home, toMultiVS(0, 0))"),
"\"//*[@numFound='50']\"");
assertQ(req("fl", "*,score", "q",
"home:[10,10000 TO 30,30000]"),
"\"//*[@numFound='3']\"");
assertQ(req("fl", "*,score", "q",
"homed:[1,1000 TO 2000,35000]"),
"\"//*[@numFound='2']\"");
//bad
assertQEx("Query should throw an exception due to incorrect dimensions", req("fl", "*,score", "q",
"homed:[1 TO 2000]"), SolrException.ErrorCode.BAD_REQUEST);
}
public void testSearchDetails() throws Exception {
SolrCore core = h.getCore();
IndexSchema schema = core.getSchema();
double[] xy = new double[]{35.0, -79.34};
String point = xy[0] + "," + xy[1];
//How about some queries?
//don't need a parser for this path currently. This may change
assertU(adoc("id", "0", "home_ns", point));
assertU(commit());
SchemaField home = schema.getField("home_ns");
PointType pt = (PointType) home.getType();
assertEquals(pt.getDimension(), 2);
Query q = pt.getFieldQuery(null, home, point);
assertNotNull(q);
assertTrue(q instanceof BooleanQuery);
//should have two clauses, one for 35.0 and the other for -79.34
BooleanQuery bq = (BooleanQuery) q;
BooleanClause[] clauses = bq.getClauses();
assertEquals(clauses.length, 2);
}
}

View File

@ -16,9 +16,9 @@ package org.apache.solr.search.function.distance;
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.spatial.geohash.GeoHashUtils;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.util.AbstractSolrTestCase; import org.apache.solr.util.AbstractSolrTestCase;
import org.apache.lucene.spatial.geohash.GeoHashUtils;
/** /**
@ -44,20 +44,21 @@ public class DistanceFunctionTest extends AbstractSolrTestCase {
assertU(adoc("id", "2", "x_td", "0", "y_td", String.valueOf(Math.PI / 2), "gh_s", GeoHashUtils.encode(32.7693246, -78.9289094))); assertU(adoc("id", "2", "x_td", "0", "y_td", String.valueOf(Math.PI / 2), "gh_s", GeoHashUtils.encode(32.7693246, -78.9289094)));
assertU(adoc("id", "3", "x_td", String.valueOf(Math.PI / 2), "y_td", String.valueOf(Math.PI / 2), "gh_s", GeoHashUtils.encode(32.7693246, -80.9289094))); assertU(adoc("id", "3", "x_td", String.valueOf(Math.PI / 2), "y_td", String.valueOf(Math.PI / 2), "gh_s", GeoHashUtils.encode(32.7693246, -80.9289094)));
assertU(adoc("id", "4", "x_td", String.valueOf(Math.PI / 4), "y_td", String.valueOf(Math.PI / 4), "gh_s", GeoHashUtils.encode(32.7693246, -81.9289094))); assertU(adoc("id", "4", "x_td", String.valueOf(Math.PI / 4), "y_td", String.valueOf(Math.PI / 4), "gh_s", GeoHashUtils.encode(32.7693246, -81.9289094)));
assertU(adoc("id", "5", "x_td", "45.0", "y_td", "45.0",
"gh_s", GeoHashUtils.encode(32.7693246, -81.9289094)));
assertU(commit()); assertU(commit());
//Get the haversine distance between the point 0,0 and the docs above assuming a radius of 1 //Get the haversine distance between the point 0,0 and the docs above assuming a radius of 1
assertQ(req("fl", "*,score", "q", "{!func}hsin(x_td, y_td, 0, 0, 1)", "fq", "id:1"), "//float[@name='score']='0.0'"); assertQ(req("fl", "*,score", "q", "{!func}hsin(1, x_td, y_td, 0, 0)", "fq", "id:1"), "//float[@name='score']='0.0'");
assertQ(req("fl", "*,score", "q", "{!func}hsin(x_td, y_td, 0, 0, 1)", "fq", "id:2"), "//float[@name='score']='" + (float) (Math.PI / 2) + "'"); assertQ(req("fl", "*,score", "q", "{!func}hsin(1, x_td, y_td, 0, 0)", "fq", "id:2"), "//float[@name='score']='" + (float) (Math.PI / 2) + "'");
assertQ(req("fl", "*,score", "q", "{!func}hsin(x_td, y_td, 0, 0, 1)", "fq", "id:3"), "//float[@name='score']='" + (float) (Math.PI / 2) + "'"); assertQ(req("fl", "*,score", "q", "{!func}hsin(1, x_td, y_td, 0, 0)", "fq", "id:3"), "//float[@name='score']='" + (float) (Math.PI / 2) + "'");
assertQ(req("fl", "*,score", "q", "{!func}hsin(x_td, y_td, 0, 0, 1)", "fq", "id:4"), "//float[@name='score']='1.0471976'"); assertQ(req("fl", "*,score", "q", "{!func}hsin(1, x_td, y_td, 0, 0)", "fq", "id:4"), "//float[@name='score']='1.0471976'");
assertQ(req("fl", "*,score", "q", "{!func}hsin(1, x_td, y_td, 0, 0, true)", "fq", "id:5"), "//float[@name='score']='1.0471976'");
//Geo Hash Haversine //Geo Hash Haversine
//Can verify here: http://www.movable-type.co.uk/scripts/latlong.html, but they use a slightly different radius for the earth, so just be close //Can verify here: http://www.movable-type.co.uk/scripts/latlong.html, but they use a slightly different radius for the earth, so just be close
assertQ(req("fl", "*,score", "q", "{!func}ghhsin(gh_s, \"" + GeoHashUtils.encode(32, -79) + assertQ(req("fl", "*,score", "q", "{!func}ghhsin(" + Constants.EARTH_RADIUS_KM + ", gh_s, \"" + GeoHashUtils.encode(32, -79) +
"\"," + Constants.EARTH_RADIUS_KM + "\",)", "fq", "id:1"), "//float[@name='score']='122.30894'");
")", "fq", "id:1"), "//float[@name='score']='122.30894'"); assertQ(req("fl", "*,score", "q", "{!func}ghhsin(" + Constants.EARTH_RADIUS_KM + ", gh_s, geohash(32, -79))", "fq", "id:1"), "//float[@name='score']='122.30894'");
assertQ(req("fl", "*,score", "q", "{!func}ghhsin(gh_s, geohash(32, -79)," + Constants.EARTH_RADIUS_KM +
")", "fq", "id:1"), "//float[@name='score']='122.30894'");
} }
public void testVector() throws Exception { public void testVector() throws Exception {
@ -66,6 +67,8 @@ public class DistanceFunctionTest extends AbstractSolrTestCase {
assertU(adoc("id", "3", "x_td", "1", "y_td", "1", "z_td", "1", "w_td", "1")); assertU(adoc("id", "3", "x_td", "1", "y_td", "1", "z_td", "1", "w_td", "1"));
assertU(adoc("id", "4", "x_td", "1", "y_td", "0", "z_td", "0", "w_td", "0")); assertU(adoc("id", "4", "x_td", "1", "y_td", "0", "z_td", "0", "w_td", "0"));
assertU(adoc("id", "5", "x_td", "2.3", "y_td", "5.5", "z_td", "7.9", "w_td", "-2.4")); assertU(adoc("id", "5", "x_td", "2.3", "y_td", "5.5", "z_td", "7.9", "w_td", "-2.4"));
assertU(adoc("id", "6", "point", "1.0,0.0"));
assertU(adoc("id", "7", "point", "5.5,10.9"));
assertU(commit()); assertU(commit());
//two dimensions, notice how we only pass in 4 value sources //two dimensions, notice how we only pass in 4 value sources
assertQ(req("fl", "*,score", "q", "{!func}sqedist(x_td, y_td, 0, 0)", "fq", "id:1"), "//float[@name='score']='0.0'"); assertQ(req("fl", "*,score", "q", "{!func}sqedist(x_td, y_td, 0, 0)", "fq", "id:1"), "//float[@name='score']='0.0'");
@ -111,6 +114,15 @@ public class DistanceFunctionTest extends AbstractSolrTestCase {
assertQ(req("fl", "*,score", "q", "{!func}dist(1, x_td, y_td, 0, 0)", "fq", "id:3"), "//float[@name='score']='" + (float) 2.0 + "'"); assertQ(req("fl", "*,score", "q", "{!func}dist(1, x_td, y_td, 0, 0)", "fq", "id:3"), "//float[@name='score']='" + (float) 2.0 + "'");
assertQ(req("fl", "*,score", "q", "{!func}dist(1, x_td, y_td, 0, 0)", "fq", "id:4"), "//float[@name='score']='1.0'"); assertQ(req("fl", "*,score", "q", "{!func}dist(1, x_td, y_td, 0, 0)", "fq", "id:4"), "//float[@name='score']='1.0'");
assertQ(req("fl", "*,score", "q", "{!func}dist(1, x_td, y_td, 0, 0)", "fq", "id:5"), "//float[@name='score']='" + (float) (2.3 + 5.5) + "'"); assertQ(req("fl", "*,score", "q", "{!func}dist(1, x_td, y_td, 0, 0)", "fq", "id:5"), "//float[@name='score']='" + (float) (2.3 + 5.5) + "'");
//Do point tests:
assertQ(req("fl", "*,score", "q", "{!func}dist(1, toMultiVS(x_td, y_td), toMultiVS(0, 0))", "fq", "id:5"),
"//float[@name='score']='" + (float) (2.3 + 5.5) + "'");
assertQ(req("fl", "*,score", "q", "{!func}dist(1, point, toMultiVS(0, 0))", "fq", "id:6"),
"//float[@name='score']='" + 0.0f + "'");
} }
} }

View File

@ -22,6 +22,7 @@ import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.util.AbstractSolrTestCase; import org.apache.solr.util.AbstractSolrTestCase;
import org.apache.solr.schema.FieldType;
/** /**
* *
@ -59,4 +60,17 @@ public class DocumentBuilderTest extends AbstractSolrTestCase {
Document out = DocumentBuilder.toDocument( doc, core.getSchema() ); Document out = DocumentBuilder.toDocument( doc, core.getSchema() );
assertNull( out.get( "name" ) ); assertNull( out.get( "name" ) );
} }
public void testMultiField() throws Exception {
SolrCore core = h.getCore();
// make sure a null value is not indexed
SolrInputDocument doc = new SolrInputDocument();
doc.addField( "home", "2.2,3.3", 1.0f );
Document out = DocumentBuilder.toDocument( doc, core.getSchema() );
assertNotNull( out.get( "home" ) );//contains the stored value and term vector, if there is one
assertNotNull( out.getField( "home_0" + FieldType.POLY_FIELD_SEPARATOR + "double" ) );
assertNotNull( out.getField( "home_1" + FieldType.POLY_FIELD_SEPARATOR + "double" ) );
}
} }

View File

@ -368,6 +368,12 @@
<fieldType name="uuid" class="solr.UUIDField" /> <fieldType name="uuid" class="solr.UUIDField" />
<!-- Try out some point types -->
<fieldType name="xy" class="solr.PointType" dimension="2" subFieldType="double"/>
<fieldType name="tenD" class="solr.PointType" dimension="10" subFieldType="double"/>
<!-- Use the sub field suffix -->
<fieldType name="xyd" class="solr.PointType" dimension="2" subFieldSuffix="*_d"/>
</types> </types>
@ -392,6 +398,15 @@
<field name="shouldbestored" type="unstored" stored="true"/> <field name="shouldbestored" type="unstored" stored="true"/>
<field name="shouldbeunindexed" type="unstored" indexed="false" stored="true"/> <field name="shouldbeunindexed" type="unstored" indexed="false" stored="true"/>
<!-- Test points -->
<!-- Test points -->
<field name="home" type="xy" indexed="true" stored="true" multiValued="false"/>
<field name="homed" type="xyd" indexed="true" stored="true" multiValued="false"/>
<field name="home_ns" type="xy" indexed="true" stored="false" multiValued="false"/>
<field name="work" type="xy" indexed="true" stored="true" multiValued="false"/>
<field name="point10" type="tenD" indexed="true" stored="true" multiValued="false"/>
<!-- test different combinations of indexed and stored --> <!-- test different combinations of indexed and stored -->
<field name="bind" type="boolean" indexed="true" stored="false"/> <field name="bind" type="boolean" indexed="true" stored="false"/>

View File

@ -251,6 +251,10 @@
<fieldType name="tdoubles" class="solr.TrieDoubleField" omitNorms="true" positionIncrementGap="0" precisionStep="0" multiValued="true" /> <fieldType name="tdoubles" class="solr.TrieDoubleField" omitNorms="true" positionIncrementGap="0" precisionStep="0" multiValued="true" />
<fieldType name="tdates" class="solr.TrieDateField" omitNorms="true" positionIncrementGap="0" precisionStep="0" multiValued="true" /> <fieldType name="tdates" class="solr.TrieDateField" omitNorms="true" positionIncrementGap="0" precisionStep="0" multiValued="true" />
<!-- Poly field -->
<fieldType name="xy" class="solr.PointType" dimension="2" subFieldType="double"/>
<fieldType name="xyd" class="solr.PointType" dimension="2" subFieldSuffix="*_d"/>
</types> </types>
@ -277,6 +281,10 @@
<field name="id" type="sfloat" indexed="true" stored="true" required="true" /> <field name="id" type="sfloat" indexed="true" stored="true" required="true" />
<field name="text" type="text" indexed="true" stored="false" /> <field name="text" type="text" indexed="true" stored="false" />
<!-- Test a point field for distances -->
<field name="point" type="xy" indexed="true" stored="true" multiValued="false"/>
<field name="pointD" type="xyd" indexed="true" stored="true" multiValued="false"/>
<!-- Dynamic field definitions. If a field name is not found, dynamicFields <!-- Dynamic field definitions. If a field name is not found, dynamicFields
will be used if the name matches any of the patterns. will be used if the name matches any of the patterns.
RESTRICTION: the glob-like pattern in the name attribute must have RESTRICTION: the glob-like pattern in the name attribute must have