From a9f8bf654c2a338c709e1d385758474f3fa83e84 Mon Sep 17 00:00:00 2001 From: Erick Erickson Date: Tue, 5 Nov 2013 19:41:28 +0000 Subject: [PATCH] SOLR-5084: added enum field type to Solr git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1539111 13f79535-47bb-0310-9956-ffa450edef68 --- .../function/valuesource/EnumFieldSource.java | 228 +++++++++ solr/CHANGES.txt | 2 + .../handler/component/StatsValuesFactory.java | 69 +++ .../org/apache/solr/schema/EnumField.java | 440 ++++++++++++++++++ .../solr/collection1/conf/enumsConfig.xml | 33 ++ .../solr/collection1/conf/schema-enums.xml | 42 ++ .../collection1/conf/solrconfig-minimal.xml | 86 ++-- .../org/apache/solr/schema/EnumFieldTest.java | 216 +++++++++ .../apache/solr/common/EnumFieldValue.java | 116 +++++ .../apache/solr/common/util/JavaBinCodec.java | 31 +- .../apache/solr/cloud/AbstractZkTestCase.java | 1 + 11 files changed, 1213 insertions(+), 51 deletions(-) create mode 100644 lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/EnumFieldSource.java create mode 100644 solr/core/src/java/org/apache/solr/schema/EnumField.java create mode 100644 solr/core/src/test-files/solr/collection1/conf/enumsConfig.xml create mode 100644 solr/core/src/test-files/solr/collection1/conf/schema-enums.xml create mode 100644 solr/core/src/test/org/apache/solr/schema/EnumFieldTest.java create mode 100644 solr/solrj/src/java/org/apache/solr/common/EnumFieldValue.java diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/EnumFieldSource.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/EnumFieldSource.java new file mode 100644 index 00000000000..cd83c944052 --- /dev/null +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/EnumFieldSource.java @@ -0,0 +1,228 @@ +package org.apache.lucene.queries.function.valuesource; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Map; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.queries.function.ValueSourceScorer; +import org.apache.lucene.queries.function.docvalues.IntDocValues; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.mutable.MutableValue; +import org.apache.lucene.util.mutable.MutableValueInt; + +/** + * Obtains int field values from {@link FieldCache#getInts} and makes + * those values available as other numeric types, casting as needed. + * strVal of the value is not the int value, but its string (displayed) value + */ +public class EnumFieldSource extends FieldCacheSource { + static final Integer DEFAULT_VALUE = -1; + + final FieldCache.IntParser parser; + final Map enumIntToStringMap; + final Map enumStringToIntMap; + + public EnumFieldSource(String field, FieldCache.IntParser parser, Map enumIntToStringMap, Map enumStringToIntMap) { + super(field); + this.parser = parser; + this.enumIntToStringMap = enumIntToStringMap; + this.enumStringToIntMap = enumStringToIntMap; + } + + private static Integer tryParseInt(String valueStr) { + Integer intValue = null; + try { + intValue = Integer.parseInt(valueStr); + } + catch (NumberFormatException e) { + } + return intValue; + } + + private String intValueToStringValue(Integer intVal) { + if (intVal == null) + return null; + + final String enumString = enumIntToStringMap.get(intVal); + if (enumString != null) + return enumString; + // can't find matching enum name - return DEFAULT_VALUE.toString() + return DEFAULT_VALUE.toString(); + } + + private Integer stringValueToIntValue(String stringVal) { + if (stringVal == null) + return null; + + Integer intValue; + final Integer enumInt = enumStringToIntMap.get(stringVal); + if (enumInt != null) //enum int found for string + return enumInt; + + //enum int not found for string + intValue = tryParseInt(stringVal); + if (intValue == null) //not Integer + intValue = DEFAULT_VALUE; + final String enumString = enumIntToStringMap.get(intValue); + if (enumString != null) //has matching string + return intValue; + + return DEFAULT_VALUE; + } + + @Override + public String description() { + return "enum(" + field + ')'; + } + + + @Override + public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException { + final FieldCache.Ints arr = cache.getInts(readerContext.reader(), field, parser, true); + final Bits valid = cache.getDocsWithField(readerContext.reader(), field); + + return new IntDocValues(this) { + final MutableValueInt val = new MutableValueInt(); + + @Override + public float floatVal(int doc) { + return (float) arr.get(doc); + } + + @Override + public int intVal(int doc) { + return arr.get(doc); + } + + @Override + public long longVal(int doc) { + return (long) arr.get(doc); + } + + @Override + public double doubleVal(int doc) { + return (double) arr.get(doc); + } + + @Override + public String strVal(int doc) { + Integer intValue = arr.get(doc); + return intValueToStringValue(intValue); + } + + @Override + public Object objectVal(int doc) { + return valid.get(doc) ? arr.get(doc) : null; + } + + @Override + public boolean exists(int doc) { + return valid.get(doc); + } + + @Override + public String toString(int doc) { + return description() + '=' + strVal(doc); + } + + + @Override + public ValueSourceScorer getRangeScorer(IndexReader reader, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) { + Integer lower = stringValueToIntValue(lowerVal); + Integer upper = stringValueToIntValue(upperVal); + + // instead of using separate comparison functions, adjust the endpoints. + + if (lower == null) { + lower = Integer.MIN_VALUE; + } else { + if (!includeLower && lower < Integer.MAX_VALUE) lower++; + } + + if (upper == null) { + upper = Integer.MAX_VALUE; + } else { + if (!includeUpper && upper > Integer.MIN_VALUE) upper--; + } + + final int ll = lower; + final int uu = upper; + + return new ValueSourceScorer(reader, this) { + @Override + public boolean matchesValue(int doc) { + int val = arr.get(doc); + // only check for deleted if it's the default value + // if (val==0 && reader.isDeleted(doc)) return false; + return val >= ll && val <= uu; + } + }; + } + + @Override + public ValueFiller getValueFiller() { + return new ValueFiller() { + private final MutableValueInt mval = new MutableValueInt(); + + @Override + public MutableValue getValue() { + return mval; + } + + @Override + public void fillValue(int doc) { + mval.value = arr.get(doc); + mval.exists = valid.get(doc); + } + }; + } + + + }; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + if (!super.equals(o)) return false; + + EnumFieldSource that = (EnumFieldSource) o; + + if (!enumIntToStringMap.equals(that.enumIntToStringMap)) return false; + if (!enumStringToIntMap.equals(that.enumStringToIntMap)) return false; + if (!parser.equals(that.parser)) return false; + + return true; + } + + @Override + public int hashCode() { + int result = super.hashCode(); + result = 31 * result + parser.hashCode(); + result = 31 * result + enumIntToStringMap.hashCode(); + result = 31 * result + enumStringToIntMap.hashCode(); + return result; + } +} + diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index e6b0c275c11..64761c38b95 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -121,6 +121,8 @@ New Features * SOLR-5392: Extend solrj apis to cover collection management. (Roman Shaposhnik via Mark Miller) + +* SOLR-5084: new field type EnumField. Elran Dvir via Erick Erickson Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java b/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java index d334ffc6d59..2777a48a01e 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java +++ b/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java @@ -27,6 +27,7 @@ import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.util.BytesRef; +import org.apache.solr.common.EnumFieldValue; import org.apache.solr.common.SolrException; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; @@ -60,6 +61,8 @@ public class StatsValuesFactory { return new DateStatsValues(sf); } else if (StrField.class.isInstance(fieldType)) { return new StringStatsValues(sf); + } else if (sf.getType().getClass().equals(EnumField.class)) { + return new EnumStatsValues(sf); } else { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Field type " + fieldType + " is not currently supported"); } @@ -310,6 +313,72 @@ class NumericStatsValues extends AbstractStatsValues { } /** + * Implementation of StatsValues that supports EnumField values + */ +class EnumStatsValues extends AbstractStatsValues { + + public EnumStatsValues(SchemaField sf) { + super(sf); + } + + /** + * {@inheritDoc} + */ + @Override + public void accumulate(int docID) { + if (values.exists(docID)) { + Integer intValue = (Integer) values.objectVal(docID); + String stringValue = values.strVal(docID); + EnumFieldValue enumFieldValue = new EnumFieldValue(intValue, stringValue); + accumulate(enumFieldValue, 1); + } else { + missing(); + } + } + + /** + * {@inheritDoc} + */ + protected void updateMinMax(EnumFieldValue min, EnumFieldValue max) { + if (max != null) { + if (max.compareTo(this.max) > 0) + this.max = max; + } + if (this.min == null) + this.min = min; + else if (this.min.compareTo(min) > 0) + this.min = min; + } + + /** + * {@inheritDoc} + */ + @Override + protected void updateTypeSpecificStats(NamedList stv) { + // No type specific stats + } + + /** + * {@inheritDoc} + */ + @Override + protected void updateTypeSpecificStats(EnumFieldValue value, int count) { + // No type specific stats + } + + /** + * Adds no type specific statistics + */ + @Override + protected void addTypeSpecificStats(NamedList res) { + // Add no statistics + } + + +} + +/** + * /** * Implementation of StatsValues that supports Date values */ class DateStatsValues extends AbstractStatsValues { diff --git a/solr/core/src/java/org/apache/solr/schema/EnumField.java b/solr/core/src/java/org/apache/solr/schema/EnumField.java new file mode 100644 index 00000000000..1b49931ca02 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/schema/EnumField.java @@ -0,0 +1,440 @@ +package org.apache.solr.schema; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.FieldType; +import org.apache.lucene.index.StorableField; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.queries.function.valuesource.EnumFieldSource; +import org.apache.lucene.search.*; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.CharsRef; +import org.apache.lucene.util.NumericUtils; +import org.apache.solr.common.EnumFieldValue; +import org.apache.solr.common.SolrException; +import org.apache.solr.response.TextResponseWriter; +import org.apache.solr.search.QParser; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.w3c.dom.Document; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; + +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathConstants; +import javax.xml.xpath.XPathExpressionException; +import javax.xml.xpath.XPathFactory; +import java.io.IOException; +import java.io.InputStream; + +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; + +/*** + * Field type for support of string values with custom sort order. + */ +public class EnumField extends PrimitiveFieldType { + + public static final Logger log = LoggerFactory.getLogger(EnumField.class); + protected static final Locale LOCALE = Locale.getDefault(); + protected static final String PARAM_ENUMS_CONFIG = "enumsConfig"; + protected static final String PARAM_ENUM_NAME = "enumName"; + protected static final Integer DEFAULT_VALUE = -1; + protected static final int DEFAULT_PRECISION_STEP = Integer.MAX_VALUE; + + protected Map enumStringToIntMap = new HashMap(); + protected Map enumIntToStringMap = new HashMap(); + + protected String enumsConfigFile; + protected String enumName; + + /** + * {@inheritDoc} + */ + @Override + protected void init(IndexSchema schema, Map args) { + super.init(schema, args); + enumsConfigFile = args.get(PARAM_ENUMS_CONFIG); + if (enumsConfigFile == null) { + throw new SolrException(SolrException.ErrorCode.NOT_FOUND, "No enums config file was configured."); + } + enumName = args.get(PARAM_ENUM_NAME); + if (enumName == null) { + throw new SolrException(SolrException.ErrorCode.NOT_FOUND, "No enum name was configured."); + } + + InputStream is = null; + + try { + is = schema.getResourceLoader().openResource(enumsConfigFile); + final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + try { + final Document doc = dbf.newDocumentBuilder().parse(is); + final XPathFactory xpathFactory = XPathFactory.newInstance(); + final XPath xpath = xpathFactory.newXPath(); + final String xpathStr = String.format(LOCALE, "/enumsConfig/enum[@name='%s']", enumName); + final NodeList nodes = (NodeList) xpath.evaluate(xpathStr, doc, XPathConstants.NODESET); + final int nodesLength = nodes.getLength(); + if (nodesLength == 0) { + String exceptionMessage = String.format(LOCALE, "No enum configuration found for enum '%s' in %s.", + enumName, enumsConfigFile); + throw new SolrException(SolrException.ErrorCode.NOT_FOUND, exceptionMessage); + } + if (nodesLength > 1) { + if (log.isWarnEnabled()) + log.warn("More than one enum configuration found for enum '{}' in {}. The last one was taken.", enumName, enumsConfigFile); + } + final Node enumNode = nodes.item(nodesLength - 1); + final NodeList valueNodes = (NodeList) xpath.evaluate("value", enumNode, XPathConstants.NODESET); + for (int i = 0; i < valueNodes.getLength(); i++) { + final Node valueNode = valueNodes.item(i); + final String valueStr = valueNode.getTextContent(); + if ((valueStr == null) || (valueStr.length() == 0)) { + final String exceptionMessage = String.format(LOCALE, "A value was defined with an no value in enum '%s' in %s.", + enumName, enumsConfigFile); + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, exceptionMessage); + } + if (enumStringToIntMap.containsKey(valueStr)) { + final String exceptionMessage = String.format(LOCALE, "A duplicated definition was found for value '%s' in enum '%s' in %s.", + valueStr, enumName, enumsConfigFile); + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, exceptionMessage); + } + enumIntToStringMap.put(i, valueStr); + enumStringToIntMap.put(valueStr, i); + } + } + catch (ParserConfigurationException e) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error parsing enums config.", e); + } + catch (SAXException e) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error parsing enums config.", e); + } + catch (XPathExpressionException e) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error parsing enums config.", e); + } + } + catch (IOException e) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error while opening enums config.", e); + } finally { + try { + if (is != null) { + is.close(); + } + } + catch (IOException e) { + e.printStackTrace(); + } + } + + if ((enumStringToIntMap.size() == 0) || (enumIntToStringMap.size() == 0)) { + String exceptionMessage = String.format(LOCALE, "Invalid configuration was defined for enum '%s' in %s.", + enumName, enumsConfigFile); + throw new SolrException(SolrException.ErrorCode.NOT_FOUND, exceptionMessage); + } + + args.remove(PARAM_ENUMS_CONFIG); + args.remove(PARAM_ENUM_NAME); + } + + + /** + * {@inheritDoc} + */ + @Override + public EnumFieldValue toObject(StorableField f) { + Integer intValue = null; + String stringValue = null; + final Number val = f.numericValue(); + if (val != null) { + intValue = val.intValue(); + stringValue = intValueToStringValue(intValue); + } + return new EnumFieldValue(intValue, stringValue); + } + + /** + * {@inheritDoc} + */ + @Override + public SortField getSortField(SchemaField field, boolean top) { + field.checkSortability(); + final Object missingValue = Integer.MIN_VALUE; + return new SortField(field.getName(), FieldCache.NUMERIC_UTILS_INT_PARSER, top).setMissingValue(missingValue); + } + + /** + * {@inheritDoc} + */ + @Override + public ValueSource getValueSource(SchemaField field, QParser qparser) { + field.checkFieldCacheSource(qparser); + return new EnumFieldSource(field.getName(), FieldCache.NUMERIC_UTILS_INT_PARSER, enumIntToStringMap, enumStringToIntMap); + } + + /** + * {@inheritDoc} + */ + @Override + public void write(TextResponseWriter writer, String name, StorableField f) throws IOException { + final Number val = f.numericValue(); + if (val == null) { + writer.writeNull(name); + return; + } + + final String readableValue = intValueToStringValue(val.intValue()); + writer.writeStr(name, readableValue, true); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean isTokenized() { + return false; + } + + /** + * {@inheritDoc} + */ + @Override + public Query getRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive, boolean maxInclusive) { + Integer minValue = stringValueToIntValue(min); + Integer maxValue = stringValueToIntValue(max); + + if (field.multiValued() && field.hasDocValues() && !field.indexed()) { + // for the multi-valued dv-case, the default rangeimpl over toInternal is correct + return super.getRangeQuery(parser, field, minValue.toString(), maxValue.toString(), minInclusive, maxInclusive); + } + Query query = null; + final boolean matchOnly = field.hasDocValues() && !field.indexed(); + if (matchOnly) { + query = new ConstantScoreQuery(FieldCacheRangeFilter.newIntRange(field.getName(), + min == null ? null : minValue, + max == null ? null : maxValue, + minInclusive, maxInclusive)); + } else { + query = NumericRangeQuery.newIntRange(field.getName(), DEFAULT_PRECISION_STEP, + min == null ? null : minValue, + max == null ? null : maxValue, + minInclusive, maxInclusive); + } + + return query; + } + + /** + * {@inheritDoc} + */ + @Override + public void checkSchemaField(final SchemaField field) { + if (field.hasDocValues() && !field.multiValued() && !(field.isRequired() || field.getDefaultValue() != null)) { + throw new IllegalStateException("Field " + this + " has single-valued doc values enabled, but has no default value and is not required"); + } + } + + /** + * {@inheritDoc} + */ + @Override + public String readableToIndexed(String val) { + if (val == null) + return null; + + final BytesRef bytes = new BytesRef(NumericUtils.BUF_SIZE_LONG); + readableToIndexed(val, bytes); + return bytes.utf8ToString(); + } + + /** + * {@inheritDoc} + */ + @Override + public void readableToIndexed(CharSequence val, BytesRef result) { + final String s = val.toString(); + if (s == null) + return; + + final Integer intValue = stringValueToIntValue(s); + NumericUtils.intToPrefixCoded(intValue, 0, result); + } + + /** + * {@inheritDoc} + */ + @Override + public String toInternal(String val) { + return readableToIndexed(val); + } + + /** + * {@inheritDoc} + */ + @Override + public String toExternal(StorableField f) { + final Number val = f.numericValue(); + if (val == null) + return null; + + return intValueToStringValue(val.intValue()); + } + + /** + * {@inheritDoc} + */ + @Override + public String indexedToReadable(String indexedForm) { + if (indexedForm == null) + return null; + final BytesRef bytesRef = new BytesRef(indexedForm); + final Integer intValue = NumericUtils.prefixCodedToInt(bytesRef); + return intValueToStringValue(intValue); + } + + /** + * {@inheritDoc} + */ + @Override + public CharsRef indexedToReadable(BytesRef input, CharsRef output) { + final Integer intValue = NumericUtils.prefixCodedToInt(input); + final String stringValue = intValueToStringValue(intValue); + output.grow(stringValue.length()); + output.length = stringValue.length(); + stringValue.getChars(0, output.length, output.chars, 0); + return output; + } + + /** + * {@inheritDoc} + */ + @Override + public EnumFieldValue toObject(SchemaField sf, BytesRef term) { + final Integer intValue = NumericUtils.prefixCodedToInt(term); + final String stringValue = intValueToStringValue(intValue); + return new EnumFieldValue(intValue, stringValue); + } + + /** + * {@inheritDoc} + */ + @Override + public String storedToIndexed(StorableField f) { + final Number val = f.numericValue(); + if (val == null) + return null; + final BytesRef bytes = new BytesRef(NumericUtils.BUF_SIZE_LONG); + NumericUtils.intToPrefixCoded(val.intValue(), 0, bytes); + return bytes.utf8ToString(); + } + + /** + * {@inheritDoc} + */ + @Override + public StorableField createField(SchemaField field, Object value, float boost) { + final boolean indexed = field.indexed(); + final boolean stored = field.stored(); + final boolean docValues = field.hasDocValues(); + + if (!indexed && !stored && !docValues) { + if (log.isTraceEnabled()) + log.trace("Ignoring unindexed/unstored field: " + field); + return null; + } + final Integer intValue = stringValueToIntValue(value.toString()); + if (intValue == null || intValue.equals(DEFAULT_VALUE)) + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown value for enum field: " + value.toString()); + + String intAsString = intValue.toString(); + final FieldType newType = new FieldType(); + + newType.setIndexed(field.indexed()); + newType.setTokenized(field.isTokenized()); + newType.setStored(field.stored()); + newType.setOmitNorms(field.omitNorms()); + newType.setIndexOptions(getIndexOptions(field, intAsString)); + newType.setStoreTermVectors(field.storeTermVector()); + newType.setStoreTermVectorOffsets(field.storeTermOffsets()); + newType.setStoreTermVectorPositions(field.storeTermPositions()); + newType.setNumericType(FieldType.NumericType.INT); + newType.setNumericPrecisionStep(DEFAULT_PRECISION_STEP); + + final org.apache.lucene.document.Field f; + f = new org.apache.lucene.document.IntField(field.getName(), intValue.intValue(), newType); + + f.setBoost(boost); + return f; + } + + /** + * Converting the (internal) integer value (indicating the sort order) to string (displayed) value + * @param intVal integer value + * @return string value + */ + public String intValueToStringValue(Integer intVal) { + if (intVal == null) + return null; + + final String enumString = enumIntToStringMap.get(intVal); + if (enumString != null) + return enumString; + // can't find matching enum name - return DEFAULT_VALUE.toString() + return DEFAULT_VALUE.toString(); + } + + /** + * Converting the string (displayed) value (internal) to integer value (indicating the sort order) + * @param stringVal string value + * @return integer value + */ + public Integer stringValueToIntValue(String stringVal) { + if (stringVal == null) + return null; + + Integer intValue; + final Integer enumInt = enumStringToIntMap.get(stringVal); + if (enumInt != null) //enum int found for string + return enumInt; + + //enum int not found for string + intValue = tryParseInt(stringVal); + if (intValue == null) //not Integer + intValue = DEFAULT_VALUE; + final String enumString = enumIntToStringMap.get(intValue); + if (enumString != null) //has matching string + return intValue; + + return DEFAULT_VALUE; + } + + private static Integer tryParseInt(String valueStr) { + Integer intValue = null; + try { + intValue = Integer.parseInt(valueStr); + } + catch (NumberFormatException e) { + } + return intValue; + } + +} + diff --git a/solr/core/src/test-files/solr/collection1/conf/enumsConfig.xml b/solr/core/src/test-files/solr/collection1/conf/enumsConfig.xml new file mode 100644 index 00000000000..726c8297de7 --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/enumsConfig.xml @@ -0,0 +1,33 @@ + + + + + + + + Not Available + Low + Medium + High + Critical + + + diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-enums.xml b/solr/core/src/test-files/solr/collection1/conf/schema-enums.xml new file mode 100644 index 00000000000..dac3fe2754f --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/schema-enums.xml @@ -0,0 +1,42 @@ + + + + + + + + + + + id + + + + + + + + + + + + + + + diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-minimal.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-minimal.xml index 26af3a7cf5b..90912ba8d5b 100644 --- a/solr/core/src/test-files/solr/collection1/conf/solrconfig-minimal.xml +++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-minimal.xml @@ -1,75 +1,61 @@ - + + + - - LUCENE_41 ${solr.data.dir:} - - - + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + - + + ${solr.commitwithin.softcommit:true} + + - - - true - 20 - 20 - - true - - 1 - - - - - - - - explicit - json true text - - - - - text/plain; charset=UTF-8 - + + + + diff --git a/solr/core/src/test/org/apache/solr/schema/EnumFieldTest.java b/solr/core/src/test/org/apache/solr/schema/EnumFieldTest.java new file mode 100644 index 00000000000..0b95dc9b464 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/schema/EnumFieldTest.java @@ -0,0 +1,216 @@ +package org.apache.solr.schema; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.request.SolrQueryRequest; +import org.junit.BeforeClass; +import org.junit.Test; + +public class EnumFieldTest extends SolrTestCaseJ4 { + + private final static String FIELD_NAME = "severity"; + + @BeforeClass + public static void beforeClass() throws Exception { + initCore("solrconfig-minimal.xml", "schema-enums.xml"); + } + + @Test + public void testEnumSchema() throws Exception { + IndexSchema schema = h.getCore().getLatestSchema(); + + SchemaField enumField = schema.getField(FIELD_NAME); + assertNotNull(enumField); + } + + @Test + public void testEnumRangeSearch() throws Exception { + clearIndex(); + + assertU(adoc("id", "0", FIELD_NAME, "Not Available")); + assertU(adoc("id", "1", FIELD_NAME, "Not Available")); + assertU(adoc("id", "2", FIELD_NAME, "Not Available")); + assertU(adoc("id", "3", FIELD_NAME, "Not Available")); + assertU(adoc("id", "4", FIELD_NAME, "Not Available")); + assertU(adoc("id", "5", FIELD_NAME, "Low")); + assertU(adoc("id", "6", FIELD_NAME, "Low")); + assertU(adoc("id", "7", FIELD_NAME, "Low")); + assertU(adoc("id", "8", FIELD_NAME, "Low")); + assertU(adoc("id", "9", FIELD_NAME, "Medium")); + assertU(adoc("id", "10", FIELD_NAME, "Medium")); + assertU(adoc("id", "11", FIELD_NAME, "Medium")); + assertU(adoc("id", "12", FIELD_NAME, "High")); + assertU(adoc("id", "13", FIELD_NAME, "High")); + assertU(adoc("id", "14", FIELD_NAME, "Critical")); + + // two docs w/o values + for (int i = 20; i <= 21; i++) { + assertU(adoc("id", "" + i)); + } + + assertU(commit()); + + //range with the same value + assertQ(req("fl", "" + FIELD_NAME, "q", + FIELD_NAME + ":[\"Not Available\" TO \"Not Available\"]"), + "//*[@numFound='5']"); + + assertQ(req("fl", "" + FIELD_NAME, "q", + FIELD_NAME + ":[\"Not Available\" TO Critical]"), + "//*[@numFound='15']"); + + assertQ(req("fl", "" + FIELD_NAME, "q", + FIELD_NAME + ":[Low TO High]"), + "//*[@numFound='9']"); + + assertQ(req("fl", "" + FIELD_NAME, "q", + FIELD_NAME + ":[High TO Low]"), + "//*[@numFound='0']"); + + //with int values + assertQ(req("fl", "" + FIELD_NAME, "q", + FIELD_NAME + ":[High TO 4]"), + "//*[@numFound='3']"); + assertQ(req("fl", "" + FIELD_NAME, "q", + FIELD_NAME + ":[3 TO Critical]"), + "//*[@numFound='3']"); + assertQ(req("fl", "" + FIELD_NAME, "q", + FIELD_NAME + ":[3 TO 4]"), + "//*[@numFound='3']"); + + //exclusive + assertQ(req("fl", "" + FIELD_NAME, "q", + FIELD_NAME + ":{Low TO High]"), + "//*[@numFound='5']"); + assertQ(req("fl", "" + FIELD_NAME, "q", + FIELD_NAME + ":[Low TO High}"), + "//*[@numFound='7']"); + assertQ(req("fl", "" + FIELD_NAME, "q", + FIELD_NAME + ":{Low TO High}"), + "//*[@numFound='3']"); + + //all docs + assertQ(req("fl", "" + FIELD_NAME, "q", + "*:*"), + "//*[@numFound='17']"); + + //all docs with values + assertQ(req("fl", "" + FIELD_NAME, "q", + FIELD_NAME + ":[* TO *]"), + "//*[@numFound='15']"); + + //empty docs + assertQ(req("fl", "" + FIELD_NAME, "q", + "-" + FIELD_NAME + ":[* TO *]"), + "//*[@numFound='2']"); + } + + @Test + public void testBogusEnumSearch() throws Exception { + clearIndex(); + + assertU(adoc("id", "0", FIELD_NAME, "Not Available")); + assertU(adoc("id", "1", FIELD_NAME, "Low")); + assertU(adoc("id", "2", FIELD_NAME, "Medium")); + assertU(adoc("id", "3", FIELD_NAME, "High")); + assertU(adoc("id", "4", FIELD_NAME, "Critical")); + + // two docs w/o values + for (int i = 8; i <= 9; i++) { + assertU(adoc("id", "" + i)); + } + + assertU(commit()); + + SolrQueryRequest eoe = req("fl", "" + FIELD_NAME, "q", + FIELD_NAME + ":bla"); + String eoe1 = eoe.toString(); + + assertQ(req("fl", "" + FIELD_NAME, "q", + FIELD_NAME + ":bla"), + "//*[@numFound='0']"); + + assertQ(req("fl", "" + FIELD_NAME, "q", + FIELD_NAME + ":7"), + "//*[@numFound='0']"); + + assertQ(req("fl", "" + FIELD_NAME, "q", + FIELD_NAME + ":\"-3\""), + "//*[@numFound='0']"); + } + + @Test + public void testBogusEnumIndexing() throws Exception { + + ignoreException("Unknown value for enum field: blabla"); + ignoreException("Unknown value for enum field: 10"); + ignoreException("Unknown value for enum field: -4"); + + clearIndex(); + + assertFailedU(adoc("id", "0", FIELD_NAME, "blabla")); + assertFailedU(adoc("id", "0", FIELD_NAME, "10")); + assertFailedU(adoc("id", "0", FIELD_NAME, "-4")); + + } + + @Test + public void testKnownIntegerEnumIndexing() throws Exception { + clearIndex(); + + assertU(adoc("id", "0", FIELD_NAME, "1")); + + assertU(commit()); + + assertQ(req("fl", "" + FIELD_NAME, "q", "*:*"), "//doc[1]/str[@name='severity']/text()='Low'"); + } + + @Test + public void testEnumSort() throws Exception { + clearIndex(); + + assertU(adoc("id", "0", FIELD_NAME, "Not Available")); + assertU(adoc("id", "1", FIELD_NAME, "Low")); + assertU(adoc("id", "2", FIELD_NAME, "Medium")); + assertU(adoc("id", "3", FIELD_NAME, "High")); + assertU(adoc("id", "4", FIELD_NAME, "Critical")); + + // two docs w/o values + for (int i = 8; i <= 9; i++) { + assertU(adoc("id", "" + i)); + } + + assertU(commit()); + + assertQ(req("fl", "" + FIELD_NAME, "q", "*:*", "sort", FIELD_NAME + " desc"), "//doc[1]/str[@name='severity']/text()='Critical'", + "//doc[2]/str[@name='severity']/text()='High'", "//doc[3]/str[@name='severity']/text()='Medium'", "//doc[4]/str[@name='severity']/text()='Low'", + "//doc[5]/str[@name='severity']/text()='Not Available'"); + + //sort ascending - empty values will be first + assertQ(req("fl", "" + FIELD_NAME, "q", "*:*", "sort", FIELD_NAME + " asc"), "//doc[3]/str[@name='severity']/text()='Not Available'"); + + //q for not empty docs + assertQ(req("fl", "" + FIELD_NAME, "q", FIELD_NAME + ":[* TO *]" , "sort", FIELD_NAME + " asc"), "//doc[1]/str[@name='severity']/text()='Not Available'", + "//doc[2]/str[@name='severity']/text()='Low'", "//doc[3]/str[@name='severity']/text()='Medium'", "//doc[4]/str[@name='severity']/text()='High'", + "//doc[5]/str[@name='severity']/text()='Critical'" + ); + } + +} + diff --git a/solr/solrj/src/java/org/apache/solr/common/EnumFieldValue.java b/solr/solrj/src/java/org/apache/solr/common/EnumFieldValue.java new file mode 100644 index 00000000000..50d1fb0a5b2 --- /dev/null +++ b/solr/solrj/src/java/org/apache/solr/common/EnumFieldValue.java @@ -0,0 +1,116 @@ +package org.apache.solr.common; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Serializable; + +/** + * Represents a Enum field value, which includes integer value (indicating the sort order) and string (displayed) value. + * Note: this class has a natural ordering that is inconsistent with equals + */ + +public final class EnumFieldValue implements Serializable, Comparable { + private final Integer intValue; + private final String stringValue; + + @Override + public int hashCode() { + int result = intValue != null ? intValue.hashCode() : 0; + result = 31 * result + (stringValue != null ? stringValue.hashCode() : 0); + return result; + } + + public EnumFieldValue(Integer intValue, String stringValue) { + this.intValue = intValue; + this.stringValue = stringValue; + } + + @Override + public boolean equals(Object obj) { + if (obj == null) + return false; + if (!(obj instanceof EnumFieldValue)) + return false; + + EnumFieldValue otherEnumFieldValue = (EnumFieldValue) obj; + return equalsIntegers(intValue, otherEnumFieldValue.intValue) && equalStrings(stringValue, otherEnumFieldValue.stringValue); + } + + /** + * @return string (displayed) value + */ + @Override + public String toString() { + return stringValue; + } + + /** + * @return integer value (indicating the sort order) + */ + public Integer toInt() { + return intValue; + } + + @Override + public int compareTo(EnumFieldValue o) { + if (o == null) + return 1; + return compareIntegers(intValue, o.intValue); + } + + private boolean equalStrings(String str1, String str2) { + if ((str1 == null) && (str2 == null)) + return true; + + if (str1 == null) + return false; + + if (str2 == null) + return false; + + return str1.equals(str2); + } + + private boolean equalsIntegers(Integer int1, Integer int2) { + if ((int1 == null) && (int2 == null)) + return true; + + if (int1 == null) + return false; + + if (int2 == null) + return false; + + return int1.equals(int2); + } + + private int compareIntegers(Integer int1, Integer int2) { + if ((int1 == null) && (int2 == null)) + return 0; + + if (int1 == null) + return -1; + + if (int2 == null) + return 1; + + return int1.compareTo(int2); + } +} + + diff --git a/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java b/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java index c23d0455dba..f6e322861bb 100644 --- a/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java +++ b/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java @@ -16,6 +16,7 @@ */ package org.apache.solr.common.util; +import org.apache.solr.common.EnumFieldValue; import org.noggit.CharArr; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; @@ -62,7 +63,8 @@ public class JavaBinCodec { END = 15, SOLRINPUTDOC = 16, - + SOLRINPUTDOC_CHILDS = 17, + ENUM_FIELD_VALUE = 18, // types that combine tag + length (or other info) in a single byte TAG_AND_LEN = (byte) (1 << 5), STR = (byte) (1 << 5), @@ -223,6 +225,8 @@ public class JavaBinCodec { return END_OBJ; case SOLRINPUTDOC: return readSolrInputDocument(dis); + case ENUM_FIELD_VALUE: + return readEnumFieldValue(dis); } throw new RuntimeException("Unknown type " + tagByte); @@ -278,6 +282,10 @@ public class JavaBinCodec { writeIterator(((Iterable) val).iterator()); return true; } + if (val instanceof EnumFieldValue) { + writeEnumFieldValue((EnumFieldValue) val); + return true; + } return false; } @@ -463,6 +471,27 @@ public class JavaBinCodec { return l; } + /** + * write {@link EnumFieldValue} as tag+int value+string value + * @param enumFieldValue to write + */ + public void writeEnumFieldValue(EnumFieldValue enumFieldValue) throws IOException { + writeTag(ENUM_FIELD_VALUE); + writeInt(enumFieldValue.toInt()); + writeStr(enumFieldValue.toString()); + } + + /** + * read {@link EnumFieldValue} (int+string) from input stream + * @param dis data input stream + * @return {@link EnumFieldValue} + */ + public EnumFieldValue readEnumFieldValue(DataInputInputStream dis) throws IOException { + Integer intValue = (Integer) readVal(dis); + String stringValue = (String) readVal(dis); + return new EnumFieldValue(intValue, stringValue); + } + /** * write the string as tag+length, with length being the number of UTF-8 bytes */ diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractZkTestCase.java b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractZkTestCase.java index 2e9a0f0e108..41427ab31a2 100644 --- a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractZkTestCase.java +++ b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractZkTestCase.java @@ -109,6 +109,7 @@ public abstract class AbstractZkTestCase extends SolrTestCaseJ4 { putConfig("conf1", zkClient, solrhome, "stopwords.txt"); putConfig("conf1", zkClient, solrhome, "protwords.txt"); putConfig("conf1", zkClient, solrhome, "currency.xml"); + putConfig("conf1", zkClient, solrhome, "enumsConfig.xml"); putConfig("conf1", zkClient, solrhome, "open-exchange-rates.json"); putConfig("conf1", zkClient, solrhome, "mapping-ISOLatin1Accent.txt"); putConfig("conf1", zkClient, solrhome, "old_synonyms.txt");