SOLR-8838: Returning non-stored docValues is incorrect for negative floats and doubles.

This commit is contained in:
Steve Rowe 2016-03-16 18:56:23 -04:00
parent e76fa56817
commit 6e55135be3
3 changed files with 128 additions and 37 deletions

View File

@ -317,6 +317,9 @@ Bug Fixes
* SOLR-8835: JSON Facet API: fix faceting exception on multi-valued numeric fields that
have docValues. (yonik)
* SOLR-8838: Returning non-stored docValues is incorrect for negative floats and doubles.
(Ishan Chattopadhyaya, Steve Rowe)
Optimizations
----------------------
* SOLR-7876: Speed up queries and operations that use many terms when timeAllowed has not been

View File

@ -816,9 +816,9 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
if (schemaField.getType() instanceof TrieIntField) {
newVal = val.intValue();
} else if (schemaField.getType() instanceof TrieFloatField) {
newVal = NumericUtils.sortableIntToFloat(val.intValue());
newVal = Float.intBitsToFloat(val.intValue());
} else if (schemaField.getType() instanceof TrieDoubleField) {
newVal = NumericUtils.sortableLongToDouble(val);
newVal = Double.longBitsToDouble(val);
} else if (schemaField.getType() instanceof TrieDateField) {
newVal = new Date(val);
} else if (schemaField.getType() instanceof EnumField) {

View File

@ -16,15 +16,36 @@
*/
package org.apache.solr.schema;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathFactory;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.time.Instant;
import java.time.LocalDateTime;
import java.time.Month;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.commons.io.FileUtils;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.core.AbstractBadConfigTestBase;
import org.apache.solr.util.DOMUtil;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
/**
* Tests the useDocValuesAsStored functionality.
@ -39,21 +60,47 @@ public class TestUseDocValuesAsStored extends AbstractBadConfigTestBase {
private static final String collection = "collection1";
private static final String confDir = collection + "/conf";
private static final long START_RANDOM_EPOCH_MILLIS;
private static final long END_RANDOM_EPOCH_MILLIS;
private static final String[] SEVERITY;
// http://www.w3.org/TR/2006/REC-xml-20060816/#charsets
private static final String NON_XML_CHARS = "\u0000-\u0008\u000B-\u000C\u000E-\u001F\uFFFE\uFFFF";
// Avoid single quotes (problematic in XPath literals) and carriage returns (XML roundtripping fails)
private static final Pattern BAD_CHAR_PATTERN = Pattern.compile("[\'\r" + NON_XML_CHARS + "]");
private static final Pattern STORED_FIELD_NAME_PATTERN = Pattern.compile("_dv$");
static {
// Copy of DateTimeFormatter.ISO_INSTANT with fixed 3 digit milliseconds
START_RANDOM_EPOCH_MILLIS = LocalDateTime.of(1970, Month.JANUARY, 1, 0, 0)
.toInstant(ZoneOffset.UTC).toEpochMilli();
END_RANDOM_EPOCH_MILLIS = LocalDateTime.of(2030, Month.DECEMBER, 31, 23, 59, 59, 999_000_000)
.toInstant(ZoneOffset.UTC).toEpochMilli();
try {
DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
InputStream stream = TestUseDocValuesAsStored.class.getResourceAsStream("/solr/collection1/conf/enumsConfig.xml");
Document doc = builder.parse(new InputSource(IOUtils.getDecodingReader(stream, StandardCharsets.UTF_8)));
XPath xpath = XPathFactory.newInstance().newXPath();
NodeList nodes = (NodeList)xpath.evaluate
("/enumsConfig/enum[@name='severity']/value", doc, XPathConstants.NODESET);
SEVERITY = new String[nodes.getLength()];
for (int i = 0 ; i < nodes.getLength() ; ++i) {
SEVERITY[i] = DOMUtil.getText(nodes.item(i));
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
@Before
private void initManagedSchemaCore() throws Exception {
tmpSolrHome = createTempDir().toFile();
tmpConfDir = new File(tmpSolrHome, confDir);
File testHomeConfDir = new File(TEST_HOME(), confDir);
FileUtils.copyFileToDirectory(new File(testHomeConfDir, "solrconfig-managed-schema.xml"), tmpConfDir);
FileUtils.copyFileToDirectory(new File(testHomeConfDir, "solrconfig-basic.xml"), tmpConfDir);
FileUtils.copyFileToDirectory(new File(testHomeConfDir, "solrconfig.snippet.randomindexconfig.xml"), tmpConfDir);
FileUtils.copyFileToDirectory(new File(testHomeConfDir, "schema-one-field-no-dynamic-field.xml"), tmpConfDir);
FileUtils.copyFileToDirectory(new File(testHomeConfDir, "schema-one-field-no-dynamic-field-unique-key.xml"), tmpConfDir);
FileUtils.copyFileToDirectory(new File(testHomeConfDir, "enumsConfig.xml"), tmpConfDir);
FileUtils.copyFileToDirectory(new File(testHomeConfDir, "schema-non-stored-docvalues.xml"), tmpConfDir);
FileUtils.copyFileToDirectory(new File(testHomeConfDir, "schema-minimal.xml"), tmpConfDir);
FileUtils.copyFileToDirectory(new File(testHomeConfDir, "schema_codec.xml"), tmpConfDir);
FileUtils.copyFileToDirectory(new File(testHomeConfDir, "schema-bm25.xml"), tmpConfDir);
// initCore will trigger an upgrade to managed schema, since the solrconfig has
// <schemaFactory class="ManagedIndexSchemaFactory" ... />
@ -116,31 +163,68 @@ public class TestUseDocValuesAsStored extends AbstractBadConfigTestBase {
}
@Test
public void testSinglyValued() throws IOException {
clearIndex();
doTest("check string value is correct", "test_s_dvo", "str", "keyword");
doTest("check int value is correct", "test_i_dvo", "int", "1234");
doTest("check double value is correct", "test_d_dvo", "double", "1.234");
doTest("check long value is correct", "test_l_dvo", "long", "12345");
doTest("check float value is correct", "test_f_dvo", "float", "1.234");
doTest("check dt value is correct", "test_dt_dvo", "date", "1976-07-04T12:08:56.235Z");
doTest("check stored and docValues value is correct", "test_s_dv", "str", "storedAndDocValues");
doTest("check non-stored and non-indexed is accessible", "test_s_dvo2", "str", "gotIt");
doTest("enumField", "enum_dvo", "str", "Critical");
public void testRandomSingleAndMultiValued() throws Exception {
for (int c = 0 ; c < 10 * RANDOM_MULTIPLIER ; ++c) {
clearIndex();
int[] arity = new int[9];
for (int a = 0 ; a < arity.length ; ++a) {
// Single-valued 50% of the time; other 50%: 2-10 values equally likely
arity[a] = random().nextBoolean() ? 1 : TestUtil.nextInt(random(), 2, 10);
}
doTest("check string value is correct", dvStringFieldName(arity[0], true, false), "str", nextValues(arity[0], "str"));
doTest("check int value is correct", "test_i" + plural(arity[1]) + "_dvo", "int", nextValues(arity[1], "int"));
doTest("check double value is correct", "test_d" + plural(arity[2]) + "_dvo", "double", nextValues(arity[2], "double"));
doTest("check long value is correct", "test_l" + plural(arity[3]) + "_dvo", "long", nextValues(arity[3], "long"));
doTest("check float value is correct", "test_f" + plural(arity[4]) + "_dvo", "float", nextValues(arity[4], "float"));
doTest("check date value is correct", "test_dt" + plural(arity[5]) + "_dvo", "date", nextValues(arity[5], "date"));
doTest("check stored and docValues value is correct", dvStringFieldName(arity[6], true, true), "str", nextValues(arity[6], "str"));
doTest("check non-stored and non-indexed is accessible", dvStringFieldName(arity[7], false, false), "str", nextValues(arity[7], "str"));
doTest("enumField", "enum" + plural(arity[8]) + "_dvo", "str", nextValues(arity[8], "enum"));
}
}
@Test
public void testMultiValued() throws IOException {
clearIndex();
doTest("check string value is correct", "test_ss_dvo", "str", "keyword", "keyword2");
doTest("check int value is correct", "test_is_dvo", "int", "1234", "12345");
doTest("check double value is correct", "test_ds_dvo", "double", "1.234", "12.34", "123.4");
doTest("check long value is correct", "test_ls_dvo", "long", "12345", "123456");
doTest("check float value is correct", "test_fs_dvo", "float", "1.234", "12.34");
doTest("check dt value is correct", "test_dts_dvo", "date", "1976-07-04T12:08:56.235Z", "1978-07-04T12:08:56.235Z");
doTest("check stored and docValues value is correct", "test_ss_dv", "str", "storedAndDocValues", "storedAndDocValues2");
doTest("check non-stored and non-indexed is accessible", "test_ss_dvo2", "str", "gotIt", "gotIt2");
doTest("enumField", "enums_dvo", "str", "High", "Critical");
private String plural(int arity) {
return arity > 1 ? "s" : "";
}
private static boolean isStoredField(String fieldName) {
return STORED_FIELD_NAME_PATTERN.matcher(fieldName).find();
}
private String dvStringFieldName(int arity, boolean indexed, boolean stored) {
String base = "test_s" + (arity > 1 ? "s": "");
String suffix = "";
if (indexed && stored) suffix = "_dv";
else if (indexed && ! stored) suffix = "_dvo";
else if ( ! indexed && ! stored) suffix = "_dvo2";
else assertTrue("unsupported dv string field combination: stored and not indexed", false);
return base + suffix;
}
private String[] nextValues(int arity, String valueType) throws Exception {
String[] values = new String[arity];
for (int i = 0 ; i < arity ; ++i) {
switch (valueType) {
case "int": values[i] = String.valueOf(random().nextInt()); break;
case "double": values[i] = String.valueOf(Double.longBitsToDouble(random().nextLong())); break;
case "long": values[i] = String.valueOf(random().nextLong()); break;
case "float": values[i] = String.valueOf(Float.intBitsToFloat(random().nextInt())); break;
case "enum": values[i] = SEVERITY[TestUtil.nextInt(random(), 0, SEVERITY.length - 1)]; break;
case "str": {
String str = TestUtil.randomRealisticUnicodeString(random());
values[i] = BAD_CHAR_PATTERN.matcher(str).replaceAll("\uFFFD");
break;
}
case "date": {
long epochMillis = TestUtil.nextLong(random(), START_RANDOM_EPOCH_MILLIS, END_RANDOM_EPOCH_MILLIS);
LocalDateTime dateTime = LocalDateTime.ofInstant(Instant.ofEpochMilli(epochMillis), ZoneOffset.UTC);
values[i] = dateTime.format(DateTimeFormatter.ISO_LOCAL_DATE_TIME) + 'Z';
break;
}
default: throw new Exception("unknown type '" + valueType + "'");
}
}
return values;
}
@Test
@ -198,6 +282,8 @@ public class TestUseDocValuesAsStored extends AbstractBadConfigTestBase {
String[] xpaths = new String[value.length + 1];
if (value.length > 1) {
Set<String> valueSet = new HashSet<>();
valueSet.addAll(Arrays.asList(value));
String[] fieldAndValues = new String[value.length * 2 + 2];
fieldAndValues[0] = "id";
fieldAndValues[1] = id;
@ -205,10 +291,12 @@ public class TestUseDocValuesAsStored extends AbstractBadConfigTestBase {
for (int i = 0; i < value.length; ++i) {
fieldAndValues[i * 2 + 2] = field;
fieldAndValues[i * 2 + 3] = value[i];
xpaths[i] = "//arr[@name='" + field + "']/" + type + "[" + (i + 1) + "][.='" + value[i] + "']";
xpaths[i] = "//arr[@name='" + field + "']/" + type + "[.='" + value[i] + "']";
}
xpaths[value.length] = "*[count(//arr[@name='" + field + "']/" + type + ") = " + value.length + "]";
// Docvalues are sets, but stored values are ordered multisets, so cardinality depends on the value source
xpaths[value.length] = "*[count(//arr[@name='" + field + "']/" + type + ") = "
+ (isStoredField(field) ? value.length : valueSet.size()) + "]";
assertU(adoc(fieldAndValues));
} else {