SOLR-5084: added enum field type to Solr

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1539111 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Erick Erickson 2013-11-05 19:41:28 +00:00
parent 5b6b274848
commit a9f8bf654c
11 changed files with 1213 additions and 51 deletions

View File

@ -0,0 +1,228 @@
package org.apache.lucene.queries.function.valuesource;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Map;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSourceScorer;
import org.apache.lucene.queries.function.docvalues.IntDocValues;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.mutable.MutableValue;
import org.apache.lucene.util.mutable.MutableValueInt;
/**
* Obtains int field values from {@link FieldCache#getInts} and makes
* those values available as other numeric types, casting as needed.
* strVal of the value is not the int value, but its string (displayed) value
*/
public class EnumFieldSource extends FieldCacheSource {
static final Integer DEFAULT_VALUE = -1;
final FieldCache.IntParser parser;
final Map<Integer, String> enumIntToStringMap;
final Map<String, Integer> enumStringToIntMap;
public EnumFieldSource(String field, FieldCache.IntParser parser, Map<Integer, String> enumIntToStringMap, Map<String, Integer> enumStringToIntMap) {
super(field);
this.parser = parser;
this.enumIntToStringMap = enumIntToStringMap;
this.enumStringToIntMap = enumStringToIntMap;
}
private static Integer tryParseInt(String valueStr) {
Integer intValue = null;
try {
intValue = Integer.parseInt(valueStr);
}
catch (NumberFormatException e) {
}
return intValue;
}
private String intValueToStringValue(Integer intVal) {
if (intVal == null)
return null;
final String enumString = enumIntToStringMap.get(intVal);
if (enumString != null)
return enumString;
// can't find matching enum name - return DEFAULT_VALUE.toString()
return DEFAULT_VALUE.toString();
}
private Integer stringValueToIntValue(String stringVal) {
if (stringVal == null)
return null;
Integer intValue;
final Integer enumInt = enumStringToIntMap.get(stringVal);
if (enumInt != null) //enum int found for string
return enumInt;
//enum int not found for string
intValue = tryParseInt(stringVal);
if (intValue == null) //not Integer
intValue = DEFAULT_VALUE;
final String enumString = enumIntToStringMap.get(intValue);
if (enumString != null) //has matching string
return intValue;
return DEFAULT_VALUE;
}
@Override
public String description() {
return "enum(" + field + ')';
}
@Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final FieldCache.Ints arr = cache.getInts(readerContext.reader(), field, parser, true);
final Bits valid = cache.getDocsWithField(readerContext.reader(), field);
return new IntDocValues(this) {
final MutableValueInt val = new MutableValueInt();
@Override
public float floatVal(int doc) {
return (float) arr.get(doc);
}
@Override
public int intVal(int doc) {
return arr.get(doc);
}
@Override
public long longVal(int doc) {
return (long) arr.get(doc);
}
@Override
public double doubleVal(int doc) {
return (double) arr.get(doc);
}
@Override
public String strVal(int doc) {
Integer intValue = arr.get(doc);
return intValueToStringValue(intValue);
}
@Override
public Object objectVal(int doc) {
return valid.get(doc) ? arr.get(doc) : null;
}
@Override
public boolean exists(int doc) {
return valid.get(doc);
}
@Override
public String toString(int doc) {
return description() + '=' + strVal(doc);
}
@Override
public ValueSourceScorer getRangeScorer(IndexReader reader, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) {
Integer lower = stringValueToIntValue(lowerVal);
Integer upper = stringValueToIntValue(upperVal);
// instead of using separate comparison functions, adjust the endpoints.
if (lower == null) {
lower = Integer.MIN_VALUE;
} else {
if (!includeLower && lower < Integer.MAX_VALUE) lower++;
}
if (upper == null) {
upper = Integer.MAX_VALUE;
} else {
if (!includeUpper && upper > Integer.MIN_VALUE) upper--;
}
final int ll = lower;
final int uu = upper;
return new ValueSourceScorer(reader, this) {
@Override
public boolean matchesValue(int doc) {
int val = arr.get(doc);
// only check for deleted if it's the default value
// if (val==0 && reader.isDeleted(doc)) return false;
return val >= ll && val <= uu;
}
};
}
@Override
public ValueFiller getValueFiller() {
return new ValueFiller() {
private final MutableValueInt mval = new MutableValueInt();
@Override
public MutableValue getValue() {
return mval;
}
@Override
public void fillValue(int doc) {
mval.value = arr.get(doc);
mval.exists = valid.get(doc);
}
};
}
};
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
EnumFieldSource that = (EnumFieldSource) o;
if (!enumIntToStringMap.equals(that.enumIntToStringMap)) return false;
if (!enumStringToIntMap.equals(that.enumStringToIntMap)) return false;
if (!parser.equals(that.parser)) return false;
return true;
}
@Override
public int hashCode() {
int result = super.hashCode();
result = 31 * result + parser.hashCode();
result = 31 * result + enumIntToStringMap.hashCode();
result = 31 * result + enumStringToIntMap.hashCode();
return result;
}
}

View File

@ -121,6 +121,8 @@ New Features
* SOLR-5392: Extend solrj apis to cover collection management. * SOLR-5392: Extend solrj apis to cover collection management.
(Roman Shaposhnik via Mark Miller) (Roman Shaposhnik via Mark Miller)
* SOLR-5084: new field type EnumField. Elran Dvir via Erick Erickson
Bug Fixes Bug Fixes
---------------------- ----------------------

View File

@ -27,6 +27,7 @@ import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.EnumFieldValue;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.SimpleOrderedMap;
@ -60,6 +61,8 @@ public class StatsValuesFactory {
return new DateStatsValues(sf); return new DateStatsValues(sf);
} else if (StrField.class.isInstance(fieldType)) { } else if (StrField.class.isInstance(fieldType)) {
return new StringStatsValues(sf); return new StringStatsValues(sf);
} else if (sf.getType().getClass().equals(EnumField.class)) {
return new EnumStatsValues(sf);
} else { } else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Field type " + fieldType + " is not currently supported"); throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Field type " + fieldType + " is not currently supported");
} }
@ -310,6 +313,72 @@ class NumericStatsValues extends AbstractStatsValues<Number> {
} }
/** /**
* Implementation of StatsValues that supports EnumField values
*/
class EnumStatsValues extends AbstractStatsValues<EnumFieldValue> {
public EnumStatsValues(SchemaField sf) {
super(sf);
}
/**
* {@inheritDoc}
*/
@Override
public void accumulate(int docID) {
if (values.exists(docID)) {
Integer intValue = (Integer) values.objectVal(docID);
String stringValue = values.strVal(docID);
EnumFieldValue enumFieldValue = new EnumFieldValue(intValue, stringValue);
accumulate(enumFieldValue, 1);
} else {
missing();
}
}
/**
* {@inheritDoc}
*/
protected void updateMinMax(EnumFieldValue min, EnumFieldValue max) {
if (max != null) {
if (max.compareTo(this.max) > 0)
this.max = max;
}
if (this.min == null)
this.min = min;
else if (this.min.compareTo(min) > 0)
this.min = min;
}
/**
* {@inheritDoc}
*/
@Override
protected void updateTypeSpecificStats(NamedList stv) {
// No type specific stats
}
/**
* {@inheritDoc}
*/
@Override
protected void updateTypeSpecificStats(EnumFieldValue value, int count) {
// No type specific stats
}
/**
* Adds no type specific statistics
*/
@Override
protected void addTypeSpecificStats(NamedList<Object> res) {
// Add no statistics
}
}
/**
* /**
* Implementation of StatsValues that supports Date values * Implementation of StatsValues that supports Date values
*/ */
class DateStatsValues extends AbstractStatsValues<Date> { class DateStatsValues extends AbstractStatsValues<Date> {

View File

@ -0,0 +1,440 @@
package org.apache.solr.schema;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.StorableField;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.EnumFieldSource;
import org.apache.lucene.search.*;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.NumericUtils;
import org.apache.solr.common.EnumFieldValue;
import org.apache.solr.common.SolrException;
import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
/***
* Field type for support of string values with custom sort order.
*/
public class EnumField extends PrimitiveFieldType {
public static final Logger log = LoggerFactory.getLogger(EnumField.class);
protected static final Locale LOCALE = Locale.getDefault();
protected static final String PARAM_ENUMS_CONFIG = "enumsConfig";
protected static final String PARAM_ENUM_NAME = "enumName";
protected static final Integer DEFAULT_VALUE = -1;
protected static final int DEFAULT_PRECISION_STEP = Integer.MAX_VALUE;
protected Map<String, Integer> enumStringToIntMap = new HashMap<String, Integer>();
protected Map<Integer, String> enumIntToStringMap = new HashMap<Integer, String>();
protected String enumsConfigFile;
protected String enumName;
/**
* {@inheritDoc}
*/
@Override
protected void init(IndexSchema schema, Map<String, String> args) {
super.init(schema, args);
enumsConfigFile = args.get(PARAM_ENUMS_CONFIG);
if (enumsConfigFile == null) {
throw new SolrException(SolrException.ErrorCode.NOT_FOUND, "No enums config file was configured.");
}
enumName = args.get(PARAM_ENUM_NAME);
if (enumName == null) {
throw new SolrException(SolrException.ErrorCode.NOT_FOUND, "No enum name was configured.");
}
InputStream is = null;
try {
is = schema.getResourceLoader().openResource(enumsConfigFile);
final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
try {
final Document doc = dbf.newDocumentBuilder().parse(is);
final XPathFactory xpathFactory = XPathFactory.newInstance();
final XPath xpath = xpathFactory.newXPath();
final String xpathStr = String.format(LOCALE, "/enumsConfig/enum[@name='%s']", enumName);
final NodeList nodes = (NodeList) xpath.evaluate(xpathStr, doc, XPathConstants.NODESET);
final int nodesLength = nodes.getLength();
if (nodesLength == 0) {
String exceptionMessage = String.format(LOCALE, "No enum configuration found for enum '%s' in %s.",
enumName, enumsConfigFile);
throw new SolrException(SolrException.ErrorCode.NOT_FOUND, exceptionMessage);
}
if (nodesLength > 1) {
if (log.isWarnEnabled())
log.warn("More than one enum configuration found for enum '{}' in {}. The last one was taken.", enumName, enumsConfigFile);
}
final Node enumNode = nodes.item(nodesLength - 1);
final NodeList valueNodes = (NodeList) xpath.evaluate("value", enumNode, XPathConstants.NODESET);
for (int i = 0; i < valueNodes.getLength(); i++) {
final Node valueNode = valueNodes.item(i);
final String valueStr = valueNode.getTextContent();
if ((valueStr == null) || (valueStr.length() == 0)) {
final String exceptionMessage = String.format(LOCALE, "A value was defined with an no value in enum '%s' in %s.",
enumName, enumsConfigFile);
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, exceptionMessage);
}
if (enumStringToIntMap.containsKey(valueStr)) {
final String exceptionMessage = String.format(LOCALE, "A duplicated definition was found for value '%s' in enum '%s' in %s.",
valueStr, enumName, enumsConfigFile);
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, exceptionMessage);
}
enumIntToStringMap.put(i, valueStr);
enumStringToIntMap.put(valueStr, i);
}
}
catch (ParserConfigurationException e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error parsing enums config.", e);
}
catch (SAXException e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error parsing enums config.", e);
}
catch (XPathExpressionException e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error parsing enums config.", e);
}
}
catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error while opening enums config.", e);
} finally {
try {
if (is != null) {
is.close();
}
}
catch (IOException e) {
e.printStackTrace();
}
}
if ((enumStringToIntMap.size() == 0) || (enumIntToStringMap.size() == 0)) {
String exceptionMessage = String.format(LOCALE, "Invalid configuration was defined for enum '%s' in %s.",
enumName, enumsConfigFile);
throw new SolrException(SolrException.ErrorCode.NOT_FOUND, exceptionMessage);
}
args.remove(PARAM_ENUMS_CONFIG);
args.remove(PARAM_ENUM_NAME);
}
/**
* {@inheritDoc}
*/
@Override
public EnumFieldValue toObject(StorableField f) {
Integer intValue = null;
String stringValue = null;
final Number val = f.numericValue();
if (val != null) {
intValue = val.intValue();
stringValue = intValueToStringValue(intValue);
}
return new EnumFieldValue(intValue, stringValue);
}
/**
* {@inheritDoc}
*/
@Override
public SortField getSortField(SchemaField field, boolean top) {
field.checkSortability();
final Object missingValue = Integer.MIN_VALUE;
return new SortField(field.getName(), FieldCache.NUMERIC_UTILS_INT_PARSER, top).setMissingValue(missingValue);
}
/**
* {@inheritDoc}
*/
@Override
public ValueSource getValueSource(SchemaField field, QParser qparser) {
field.checkFieldCacheSource(qparser);
return new EnumFieldSource(field.getName(), FieldCache.NUMERIC_UTILS_INT_PARSER, enumIntToStringMap, enumStringToIntMap);
}
/**
* {@inheritDoc}
*/
@Override
public void write(TextResponseWriter writer, String name, StorableField f) throws IOException {
final Number val = f.numericValue();
if (val == null) {
writer.writeNull(name);
return;
}
final String readableValue = intValueToStringValue(val.intValue());
writer.writeStr(name, readableValue, true);
}
/**
* {@inheritDoc}
*/
@Override
public boolean isTokenized() {
return false;
}
/**
* {@inheritDoc}
*/
@Override
public Query getRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive, boolean maxInclusive) {
Integer minValue = stringValueToIntValue(min);
Integer maxValue = stringValueToIntValue(max);
if (field.multiValued() && field.hasDocValues() && !field.indexed()) {
// for the multi-valued dv-case, the default rangeimpl over toInternal is correct
return super.getRangeQuery(parser, field, minValue.toString(), maxValue.toString(), minInclusive, maxInclusive);
}
Query query = null;
final boolean matchOnly = field.hasDocValues() && !field.indexed();
if (matchOnly) {
query = new ConstantScoreQuery(FieldCacheRangeFilter.newIntRange(field.getName(),
min == null ? null : minValue,
max == null ? null : maxValue,
minInclusive, maxInclusive));
} else {
query = NumericRangeQuery.newIntRange(field.getName(), DEFAULT_PRECISION_STEP,
min == null ? null : minValue,
max == null ? null : maxValue,
minInclusive, maxInclusive);
}
return query;
}
/**
* {@inheritDoc}
*/
@Override
public void checkSchemaField(final SchemaField field) {
if (field.hasDocValues() && !field.multiValued() && !(field.isRequired() || field.getDefaultValue() != null)) {
throw new IllegalStateException("Field " + this + " has single-valued doc values enabled, but has no default value and is not required");
}
}
/**
* {@inheritDoc}
*/
@Override
public String readableToIndexed(String val) {
if (val == null)
return null;
final BytesRef bytes = new BytesRef(NumericUtils.BUF_SIZE_LONG);
readableToIndexed(val, bytes);
return bytes.utf8ToString();
}
/**
* {@inheritDoc}
*/
@Override
public void readableToIndexed(CharSequence val, BytesRef result) {
final String s = val.toString();
if (s == null)
return;
final Integer intValue = stringValueToIntValue(s);
NumericUtils.intToPrefixCoded(intValue, 0, result);
}
/**
* {@inheritDoc}
*/
@Override
public String toInternal(String val) {
return readableToIndexed(val);
}
/**
* {@inheritDoc}
*/
@Override
public String toExternal(StorableField f) {
final Number val = f.numericValue();
if (val == null)
return null;
return intValueToStringValue(val.intValue());
}
/**
* {@inheritDoc}
*/
@Override
public String indexedToReadable(String indexedForm) {
if (indexedForm == null)
return null;
final BytesRef bytesRef = new BytesRef(indexedForm);
final Integer intValue = NumericUtils.prefixCodedToInt(bytesRef);
return intValueToStringValue(intValue);
}
/**
* {@inheritDoc}
*/
@Override
public CharsRef indexedToReadable(BytesRef input, CharsRef output) {
final Integer intValue = NumericUtils.prefixCodedToInt(input);
final String stringValue = intValueToStringValue(intValue);
output.grow(stringValue.length());
output.length = stringValue.length();
stringValue.getChars(0, output.length, output.chars, 0);
return output;
}
/**
* {@inheritDoc}
*/
@Override
public EnumFieldValue toObject(SchemaField sf, BytesRef term) {
final Integer intValue = NumericUtils.prefixCodedToInt(term);
final String stringValue = intValueToStringValue(intValue);
return new EnumFieldValue(intValue, stringValue);
}
/**
* {@inheritDoc}
*/
@Override
public String storedToIndexed(StorableField f) {
final Number val = f.numericValue();
if (val == null)
return null;
final BytesRef bytes = new BytesRef(NumericUtils.BUF_SIZE_LONG);
NumericUtils.intToPrefixCoded(val.intValue(), 0, bytes);
return bytes.utf8ToString();
}
/**
* {@inheritDoc}
*/
@Override
public StorableField createField(SchemaField field, Object value, float boost) {
final boolean indexed = field.indexed();
final boolean stored = field.stored();
final boolean docValues = field.hasDocValues();
if (!indexed && !stored && !docValues) {
if (log.isTraceEnabled())
log.trace("Ignoring unindexed/unstored field: " + field);
return null;
}
final Integer intValue = stringValueToIntValue(value.toString());
if (intValue == null || intValue.equals(DEFAULT_VALUE))
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown value for enum field: " + value.toString());
String intAsString = intValue.toString();
final FieldType newType = new FieldType();
newType.setIndexed(field.indexed());
newType.setTokenized(field.isTokenized());
newType.setStored(field.stored());
newType.setOmitNorms(field.omitNorms());
newType.setIndexOptions(getIndexOptions(field, intAsString));
newType.setStoreTermVectors(field.storeTermVector());
newType.setStoreTermVectorOffsets(field.storeTermOffsets());
newType.setStoreTermVectorPositions(field.storeTermPositions());
newType.setNumericType(FieldType.NumericType.INT);
newType.setNumericPrecisionStep(DEFAULT_PRECISION_STEP);
final org.apache.lucene.document.Field f;
f = new org.apache.lucene.document.IntField(field.getName(), intValue.intValue(), newType);
f.setBoost(boost);
return f;
}
/**
* Converting the (internal) integer value (indicating the sort order) to string (displayed) value
* @param intVal integer value
* @return string value
*/
public String intValueToStringValue(Integer intVal) {
if (intVal == null)
return null;
final String enumString = enumIntToStringMap.get(intVal);
if (enumString != null)
return enumString;
// can't find matching enum name - return DEFAULT_VALUE.toString()
return DEFAULT_VALUE.toString();
}
/**
* Converting the string (displayed) value (internal) to integer value (indicating the sort order)
* @param stringVal string value
* @return integer value
*/
public Integer stringValueToIntValue(String stringVal) {
if (stringVal == null)
return null;
Integer intValue;
final Integer enumInt = enumStringToIntMap.get(stringVal);
if (enumInt != null) //enum int found for string
return enumInt;
//enum int not found for string
intValue = tryParseInt(stringVal);
if (intValue == null) //not Integer
intValue = DEFAULT_VALUE;
final String enumString = enumIntToStringMap.get(intValue);
if (enumString != null) //has matching string
return intValue;
return DEFAULT_VALUE;
}
private static Integer tryParseInt(String valueStr) {
Integer intValue = null;
try {
intValue = Integer.parseInt(valueStr);
}
catch (NumberFormatException e) {
}
return intValue;
}
}

View File

@ -0,0 +1,33 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- Example enums configuration file,
-->
<enumsConfig>
<!-- note: you cannot change the order/existing values in enum without reindexing.
but you can always add new values to the end. -->
<enum name="severity">
<value>Not Available</value>
<value>Low</value>
<value>Medium</value>
<value>High</value>
<value>Critical</value>
</enum>
</enumsConfig>

View File

@ -0,0 +1,42 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<schema name="tiny" version="1.1">
<fields>
<field name="id" type="string" indexed="true" stored="true" required="true"/>
<field name="_version_" type="long" indexed="true" stored="true" multiValued="false"/>
<!-- Test EnumField -->
<field name="severity" type="severityType" indexed="true" stored="true" multiValued="false"/>
<field name="text" type="text" indexed="true" stored="true" multiValued="true"/>
</fields>
<uniqueKey>id</uniqueKey>
<types>
<fieldtype name="text" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
<!-- note: you cannot change the order/existing values in enum without reindexing.
but you can always add new values to the end. -->
<fieldType name="severityType" class="solr.EnumField" enumsConfig="enumsConfig.xml" enumName="severity"/>
<fieldType name="string" class="solr.StrField"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
</types>
</schema>

View File

@ -1,75 +1,61 @@
<?xml version="1.0" encoding="UTF-8" ?> <?xml version="1.0" ?>
<!-- <!--
Licensed to the Apache Software Foundation (ASF) under one or more Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership. this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0 The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
-->
<!-- This is a "kitchen sink" config file that tests can use.
When writting a new test, feel free to add *new* items (plugins,
config options, etc...) as long as they don't break any existing
tests. if you need to test something esoteric please add a new
"solrconfig-your-esoteric-purpose.xml" config file.
Note in particular that this test is used by MinimalSchemaTest so
Anything added to this file needs to work correctly even if there
is now uniqueKey or defaultSearch Field.
--> -->
<!-- For testing, I need to create some custom directories on the fly, particularly for some of the new
discovery-based core configuration. Trying a minimal configuration to cut down the setup time.
use in conjunction with schema-minimal.xml perhaps? -->
<config> <config>
<luceneMatchVersion>LUCENE_41</luceneMatchVersion>
<dataDir>${solr.data.dir:}</dataDir> <dataDir>${solr.data.dir:}</dataDir>
<directoryFactory name="DirectoryFactory" <directoryFactory name="DirectoryFactory"
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/> class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
<xi:include href="./solrconfig.snippet.randomindexconfig.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/> <luceneMatchVersion>${tests.luceneMatchVersion:LUCENE_CURRENT}</luceneMatchVersion>
<codecFactory class="solr.SchemaCodecFactory"/>
<jmx/> <xi:include href="solrconfig.snippet.randomindexconfig.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
<updateHandler class="solr.DirectUpdateHandler2"> <updateHandler class="solr.DirectUpdateHandler2">
<!--updateLog> <commitWithin>
<str name="dir">${solr.ulog.dir:}</str> <softCommit>${solr.commitwithin.softcommit:true}</softCommit>
</updateLog--> </commitWithin>
</updateHandler> </updateHandler>
<query>
<enableLazyFieldLoading>true</enableLazyFieldLoading>
<queryResultWindowSize>20</queryResultWindowSize>
<queryResultMaxDocsCached>20</queryResultMaxDocsCached>
<useColdSearcher>true</useColdSearcher>
<maxWarmingSearchers>1</maxWarmingSearchers>
</query>
<requestHandler name="/admin/" class="solr.admin.AdminHandlers" />
<requestDispatcher handleSelect="false">
<httpCaching never304="true"/>
</requestDispatcher>
<requestHandler name="/select" class="solr.SearchHandler"> <requestHandler name="/select" class="solr.SearchHandler">
<lst name="defaults"> <lst name="defaults">
<str name="echoParams">explicit</str> <str name="echoParams">explicit</str>
<str name="wt">json</str>
<str name="indent">true</str> <str name="indent">true</str>
<str name="df">text</str> <str name="df">text</str>
</lst> </lst>
</requestHandler> </requestHandler>
<requestHandler name="/update" class="solr.UpdateRequestHandler">
</requestHandler>
<queryResponseWriter name="json" class="solr.JSONResponseWriter"> <requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers" />
<!-- For the purposes of the tutorial, JSON responses are written as
plain text so that they are easy to read in *any* browser. <requestHandler name="/update" class="solr.UpdateRequestHandler" />
If you expect a MIME type of "application/json" just remove this override.
-->
<str name="content-type">text/plain; charset=UTF-8</str>
</queryResponseWriter>
</config> </config>

View File

@ -0,0 +1,216 @@
package org.apache.solr.schema;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.request.SolrQueryRequest;
import org.junit.BeforeClass;
import org.junit.Test;
public class EnumFieldTest extends SolrTestCaseJ4 {
private final static String FIELD_NAME = "severity";
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-minimal.xml", "schema-enums.xml");
}
@Test
public void testEnumSchema() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
SchemaField enumField = schema.getField(FIELD_NAME);
assertNotNull(enumField);
}
@Test
public void testEnumRangeSearch() throws Exception {
clearIndex();
assertU(adoc("id", "0", FIELD_NAME, "Not Available"));
assertU(adoc("id", "1", FIELD_NAME, "Not Available"));
assertU(adoc("id", "2", FIELD_NAME, "Not Available"));
assertU(adoc("id", "3", FIELD_NAME, "Not Available"));
assertU(adoc("id", "4", FIELD_NAME, "Not Available"));
assertU(adoc("id", "5", FIELD_NAME, "Low"));
assertU(adoc("id", "6", FIELD_NAME, "Low"));
assertU(adoc("id", "7", FIELD_NAME, "Low"));
assertU(adoc("id", "8", FIELD_NAME, "Low"));
assertU(adoc("id", "9", FIELD_NAME, "Medium"));
assertU(adoc("id", "10", FIELD_NAME, "Medium"));
assertU(adoc("id", "11", FIELD_NAME, "Medium"));
assertU(adoc("id", "12", FIELD_NAME, "High"));
assertU(adoc("id", "13", FIELD_NAME, "High"));
assertU(adoc("id", "14", FIELD_NAME, "Critical"));
// two docs w/o values
for (int i = 20; i <= 21; i++) {
assertU(adoc("id", "" + i));
}
assertU(commit());
//range with the same value
assertQ(req("fl", "" + FIELD_NAME, "q",
FIELD_NAME + ":[\"Not Available\" TO \"Not Available\"]"),
"//*[@numFound='5']");
assertQ(req("fl", "" + FIELD_NAME, "q",
FIELD_NAME + ":[\"Not Available\" TO Critical]"),
"//*[@numFound='15']");
assertQ(req("fl", "" + FIELD_NAME, "q",
FIELD_NAME + ":[Low TO High]"),
"//*[@numFound='9']");
assertQ(req("fl", "" + FIELD_NAME, "q",
FIELD_NAME + ":[High TO Low]"),
"//*[@numFound='0']");
//with int values
assertQ(req("fl", "" + FIELD_NAME, "q",
FIELD_NAME + ":[High TO 4]"),
"//*[@numFound='3']");
assertQ(req("fl", "" + FIELD_NAME, "q",
FIELD_NAME + ":[3 TO Critical]"),
"//*[@numFound='3']");
assertQ(req("fl", "" + FIELD_NAME, "q",
FIELD_NAME + ":[3 TO 4]"),
"//*[@numFound='3']");
//exclusive
assertQ(req("fl", "" + FIELD_NAME, "q",
FIELD_NAME + ":{Low TO High]"),
"//*[@numFound='5']");
assertQ(req("fl", "" + FIELD_NAME, "q",
FIELD_NAME + ":[Low TO High}"),
"//*[@numFound='7']");
assertQ(req("fl", "" + FIELD_NAME, "q",
FIELD_NAME + ":{Low TO High}"),
"//*[@numFound='3']");
//all docs
assertQ(req("fl", "" + FIELD_NAME, "q",
"*:*"),
"//*[@numFound='17']");
//all docs with values
assertQ(req("fl", "" + FIELD_NAME, "q",
FIELD_NAME + ":[* TO *]"),
"//*[@numFound='15']");
//empty docs
assertQ(req("fl", "" + FIELD_NAME, "q",
"-" + FIELD_NAME + ":[* TO *]"),
"//*[@numFound='2']");
}
@Test
public void testBogusEnumSearch() throws Exception {
clearIndex();
assertU(adoc("id", "0", FIELD_NAME, "Not Available"));
assertU(adoc("id", "1", FIELD_NAME, "Low"));
assertU(adoc("id", "2", FIELD_NAME, "Medium"));
assertU(adoc("id", "3", FIELD_NAME, "High"));
assertU(adoc("id", "4", FIELD_NAME, "Critical"));
// two docs w/o values
for (int i = 8; i <= 9; i++) {
assertU(adoc("id", "" + i));
}
assertU(commit());
SolrQueryRequest eoe = req("fl", "" + FIELD_NAME, "q",
FIELD_NAME + ":bla");
String eoe1 = eoe.toString();
assertQ(req("fl", "" + FIELD_NAME, "q",
FIELD_NAME + ":bla"),
"//*[@numFound='0']");
assertQ(req("fl", "" + FIELD_NAME, "q",
FIELD_NAME + ":7"),
"//*[@numFound='0']");
assertQ(req("fl", "" + FIELD_NAME, "q",
FIELD_NAME + ":\"-3\""),
"//*[@numFound='0']");
}
@Test
public void testBogusEnumIndexing() throws Exception {
ignoreException("Unknown value for enum field: blabla");
ignoreException("Unknown value for enum field: 10");
ignoreException("Unknown value for enum field: -4");
clearIndex();
assertFailedU(adoc("id", "0", FIELD_NAME, "blabla"));
assertFailedU(adoc("id", "0", FIELD_NAME, "10"));
assertFailedU(adoc("id", "0", FIELD_NAME, "-4"));
}
@Test
public void testKnownIntegerEnumIndexing() throws Exception {
clearIndex();
assertU(adoc("id", "0", FIELD_NAME, "1"));
assertU(commit());
assertQ(req("fl", "" + FIELD_NAME, "q", "*:*"), "//doc[1]/str[@name='severity']/text()='Low'");
}
@Test
public void testEnumSort() throws Exception {
clearIndex();
assertU(adoc("id", "0", FIELD_NAME, "Not Available"));
assertU(adoc("id", "1", FIELD_NAME, "Low"));
assertU(adoc("id", "2", FIELD_NAME, "Medium"));
assertU(adoc("id", "3", FIELD_NAME, "High"));
assertU(adoc("id", "4", FIELD_NAME, "Critical"));
// two docs w/o values
for (int i = 8; i <= 9; i++) {
assertU(adoc("id", "" + i));
}
assertU(commit());
assertQ(req("fl", "" + FIELD_NAME, "q", "*:*", "sort", FIELD_NAME + " desc"), "//doc[1]/str[@name='severity']/text()='Critical'",
"//doc[2]/str[@name='severity']/text()='High'", "//doc[3]/str[@name='severity']/text()='Medium'", "//doc[4]/str[@name='severity']/text()='Low'",
"//doc[5]/str[@name='severity']/text()='Not Available'");
//sort ascending - empty values will be first
assertQ(req("fl", "" + FIELD_NAME, "q", "*:*", "sort", FIELD_NAME + " asc"), "//doc[3]/str[@name='severity']/text()='Not Available'");
//q for not empty docs
assertQ(req("fl", "" + FIELD_NAME, "q", FIELD_NAME + ":[* TO *]" , "sort", FIELD_NAME + " asc"), "//doc[1]/str[@name='severity']/text()='Not Available'",
"//doc[2]/str[@name='severity']/text()='Low'", "//doc[3]/str[@name='severity']/text()='Medium'", "//doc[4]/str[@name='severity']/text()='High'",
"//doc[5]/str[@name='severity']/text()='Critical'"
);
}
}

View File

@ -0,0 +1,116 @@
package org.apache.solr.common;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.Serializable;
/**
* Represents a Enum field value, which includes integer value (indicating the sort order) and string (displayed) value.
* Note: this class has a natural ordering that is inconsistent with equals
*/
public final class EnumFieldValue implements Serializable, Comparable<EnumFieldValue> {
private final Integer intValue;
private final String stringValue;
@Override
public int hashCode() {
int result = intValue != null ? intValue.hashCode() : 0;
result = 31 * result + (stringValue != null ? stringValue.hashCode() : 0);
return result;
}
public EnumFieldValue(Integer intValue, String stringValue) {
this.intValue = intValue;
this.stringValue = stringValue;
}
@Override
public boolean equals(Object obj) {
if (obj == null)
return false;
if (!(obj instanceof EnumFieldValue))
return false;
EnumFieldValue otherEnumFieldValue = (EnumFieldValue) obj;
return equalsIntegers(intValue, otherEnumFieldValue.intValue) && equalStrings(stringValue, otherEnumFieldValue.stringValue);
}
/**
* @return string (displayed) value
*/
@Override
public String toString() {
return stringValue;
}
/**
* @return integer value (indicating the sort order)
*/
public Integer toInt() {
return intValue;
}
@Override
public int compareTo(EnumFieldValue o) {
if (o == null)
return 1;
return compareIntegers(intValue, o.intValue);
}
private boolean equalStrings(String str1, String str2) {
if ((str1 == null) && (str2 == null))
return true;
if (str1 == null)
return false;
if (str2 == null)
return false;
return str1.equals(str2);
}
private boolean equalsIntegers(Integer int1, Integer int2) {
if ((int1 == null) && (int2 == null))
return true;
if (int1 == null)
return false;
if (int2 == null)
return false;
return int1.equals(int2);
}
private int compareIntegers(Integer int1, Integer int2) {
if ((int1 == null) && (int2 == null))
return 0;
if (int1 == null)
return -1;
if (int2 == null)
return 1;
return int1.compareTo(int2);
}
}

View File

@ -16,6 +16,7 @@
*/ */
package org.apache.solr.common.util; package org.apache.solr.common.util;
import org.apache.solr.common.EnumFieldValue;
import org.noggit.CharArr; import org.noggit.CharArr;
import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrDocumentList;
@ -62,7 +63,8 @@ public class JavaBinCodec {
END = 15, END = 15,
SOLRINPUTDOC = 16, SOLRINPUTDOC = 16,
SOLRINPUTDOC_CHILDS = 17,
ENUM_FIELD_VALUE = 18,
// types that combine tag + length (or other info) in a single byte // types that combine tag + length (or other info) in a single byte
TAG_AND_LEN = (byte) (1 << 5), TAG_AND_LEN = (byte) (1 << 5),
STR = (byte) (1 << 5), STR = (byte) (1 << 5),
@ -223,6 +225,8 @@ public class JavaBinCodec {
return END_OBJ; return END_OBJ;
case SOLRINPUTDOC: case SOLRINPUTDOC:
return readSolrInputDocument(dis); return readSolrInputDocument(dis);
case ENUM_FIELD_VALUE:
return readEnumFieldValue(dis);
} }
throw new RuntimeException("Unknown type " + tagByte); throw new RuntimeException("Unknown type " + tagByte);
@ -278,6 +282,10 @@ public class JavaBinCodec {
writeIterator(((Iterable) val).iterator()); writeIterator(((Iterable) val).iterator());
return true; return true;
} }
if (val instanceof EnumFieldValue) {
writeEnumFieldValue((EnumFieldValue) val);
return true;
}
return false; return false;
} }
@ -463,6 +471,27 @@ public class JavaBinCodec {
return l; return l;
} }
/**
* write {@link EnumFieldValue} as tag+int value+string value
* @param enumFieldValue to write
*/
public void writeEnumFieldValue(EnumFieldValue enumFieldValue) throws IOException {
writeTag(ENUM_FIELD_VALUE);
writeInt(enumFieldValue.toInt());
writeStr(enumFieldValue.toString());
}
/**
* read {@link EnumFieldValue} (int+string) from input stream
* @param dis data input stream
* @return {@link EnumFieldValue}
*/
public EnumFieldValue readEnumFieldValue(DataInputInputStream dis) throws IOException {
Integer intValue = (Integer) readVal(dis);
String stringValue = (String) readVal(dis);
return new EnumFieldValue(intValue, stringValue);
}
/** /**
* write the string as tag+length, with length being the number of UTF-8 bytes * write the string as tag+length, with length being the number of UTF-8 bytes
*/ */

View File

@ -109,6 +109,7 @@ public abstract class AbstractZkTestCase extends SolrTestCaseJ4 {
putConfig("conf1", zkClient, solrhome, "stopwords.txt"); putConfig("conf1", zkClient, solrhome, "stopwords.txt");
putConfig("conf1", zkClient, solrhome, "protwords.txt"); putConfig("conf1", zkClient, solrhome, "protwords.txt");
putConfig("conf1", zkClient, solrhome, "currency.xml"); putConfig("conf1", zkClient, solrhome, "currency.xml");
putConfig("conf1", zkClient, solrhome, "enumsConfig.xml");
putConfig("conf1", zkClient, solrhome, "open-exchange-rates.json"); putConfig("conf1", zkClient, solrhome, "open-exchange-rates.json");
putConfig("conf1", zkClient, solrhome, "mapping-ISOLatin1Accent.txt"); putConfig("conf1", zkClient, solrhome, "mapping-ISOLatin1Accent.txt");
putConfig("conf1", zkClient, solrhome, "old_synonyms.txt"); putConfig("conf1", zkClient, solrhome, "old_synonyms.txt");