diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index b5b1a04891f..0eac27cfaab 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -316,6 +316,9 @@ Bug Fixes * SOLR-8835: JSON Facet API: fix faceting exception on multi-valued numeric fields that have docValues. (yonik) +* SOLR-8838: Returning non-stored docValues is incorrect for negative floats and doubles. + (Ishan Chattopadhyaya, Steve Rowe) + Optimizations ---------------------- * SOLR-7876: Speed up queries and operations that use many terms when timeAllowed has not been diff --git a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java index 39fc8dd038e..4c9790a4249 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java +++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java @@ -816,9 +816,9 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI if (schemaField.getType() instanceof TrieIntField) { newVal = val.intValue(); } else if (schemaField.getType() instanceof TrieFloatField) { - newVal = NumericUtils.sortableIntToFloat(val.intValue()); + newVal = Float.intBitsToFloat(val.intValue()); } else if (schemaField.getType() instanceof TrieDoubleField) { - newVal = NumericUtils.sortableLongToDouble(val); + newVal = Double.longBitsToDouble(val); } else if (schemaField.getType() instanceof TrieDateField) { newVal = new Date(val); } else if (schemaField.getType() instanceof EnumField) { diff --git a/solr/core/src/test/org/apache/solr/schema/TestUseDocValuesAsStored.java b/solr/core/src/test/org/apache/solr/schema/TestUseDocValuesAsStored.java index 839121acd3c..ac245cd9ff1 100644 --- a/solr/core/src/test/org/apache/solr/schema/TestUseDocValuesAsStored.java +++ b/solr/core/src/test/org/apache/solr/schema/TestUseDocValuesAsStored.java @@ -16,15 +16,36 @@ */ package org.apache.solr.schema; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathConstants; +import javax.xml.xpath.XPathFactory; import java.io.File; -import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.time.Instant; +import java.time.LocalDateTime; +import java.time.Month; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; +import java.util.regex.Pattern; import org.apache.commons.io.FileUtils; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.TestUtil; import org.apache.solr.core.AbstractBadConfigTestBase; +import org.apache.solr.util.DOMUtil; import org.junit.After; import org.junit.Before; import org.junit.Test; +import org.w3c.dom.Document; +import org.w3c.dom.NodeList; +import org.xml.sax.InputSource; /** * Tests the useDocValuesAsStored functionality. @@ -38,30 +59,56 @@ public class TestUseDocValuesAsStored extends AbstractBadConfigTestBase { private static final String collection = "collection1"; private static final String confDir = collection + "/conf"; - + + private static final long START_RANDOM_EPOCH_MILLIS; + private static final long END_RANDOM_EPOCH_MILLIS; + private static final String[] SEVERITY; + + // http://www.w3.org/TR/2006/REC-xml-20060816/#charsets + private static final String NON_XML_CHARS = "\u0000-\u0008\u000B-\u000C\u000E-\u001F\uFFFE\uFFFF"; + // Avoid single quotes (problematic in XPath literals) and carriage returns (XML roundtripping fails) + private static final Pattern BAD_CHAR_PATTERN = Pattern.compile("[\'\r" + NON_XML_CHARS + "]"); + private static final Pattern STORED_FIELD_NAME_PATTERN = Pattern.compile("_dv$"); + + static { + // Copy of DateTimeFormatter.ISO_INSTANT with fixed 3 digit milliseconds + START_RANDOM_EPOCH_MILLIS = LocalDateTime.of(1970, Month.JANUARY, 1, 0, 0) + .toInstant(ZoneOffset.UTC).toEpochMilli(); + END_RANDOM_EPOCH_MILLIS = LocalDateTime.of(2030, Month.DECEMBER, 31, 23, 59, 59, 999_000_000) + .toInstant(ZoneOffset.UTC).toEpochMilli(); + try { + DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); + InputStream stream = TestUseDocValuesAsStored.class.getResourceAsStream("/solr/collection1/conf/enumsConfig.xml"); + Document doc = builder.parse(new InputSource(IOUtils.getDecodingReader(stream, StandardCharsets.UTF_8))); + XPath xpath = XPathFactory.newInstance().newXPath(); + NodeList nodes = (NodeList)xpath.evaluate + ("/enumsConfig/enum[@name='severity']/value", doc, XPathConstants.NODESET); + SEVERITY = new String[nodes.getLength()]; + for (int i = 0 ; i < nodes.getLength() ; ++i) { + SEVERITY[i] = DOMUtil.getText(nodes.item(i)); + } + } catch (Exception e) { + throw new RuntimeException(e); + } + } + @Before private void initManagedSchemaCore() throws Exception { tmpSolrHome = createTempDir().toFile(); tmpConfDir = new File(tmpSolrHome, confDir); File testHomeConfDir = new File(TEST_HOME(), confDir); FileUtils.copyFileToDirectory(new File(testHomeConfDir, "solrconfig-managed-schema.xml"), tmpConfDir); - FileUtils.copyFileToDirectory(new File(testHomeConfDir, "solrconfig-basic.xml"), tmpConfDir); FileUtils.copyFileToDirectory(new File(testHomeConfDir, "solrconfig.snippet.randomindexconfig.xml"), tmpConfDir); - FileUtils.copyFileToDirectory(new File(testHomeConfDir, "schema-one-field-no-dynamic-field.xml"), tmpConfDir); - FileUtils.copyFileToDirectory(new File(testHomeConfDir, "schema-one-field-no-dynamic-field-unique-key.xml"), tmpConfDir); FileUtils.copyFileToDirectory(new File(testHomeConfDir, "enumsConfig.xml"), tmpConfDir); FileUtils.copyFileToDirectory(new File(testHomeConfDir, "schema-non-stored-docvalues.xml"), tmpConfDir); - FileUtils.copyFileToDirectory(new File(testHomeConfDir, "schema-minimal.xml"), tmpConfDir); - FileUtils.copyFileToDirectory(new File(testHomeConfDir, "schema_codec.xml"), tmpConfDir); - FileUtils.copyFileToDirectory(new File(testHomeConfDir, "schema-bm25.xml"), tmpConfDir); - + // initCore will trigger an upgrade to managed schema, since the solrconfig has // System.setProperty("enable.update.log", "false"); System.setProperty("managed.schema.mutable", "true"); initCore("solrconfig-managed-schema.xml", "schema-non-stored-docvalues.xml", tmpSolrHome.getPath()); } - + @After private void afterClass() throws Exception { deleteCore(); @@ -116,31 +163,68 @@ public class TestUseDocValuesAsStored extends AbstractBadConfigTestBase { } @Test - public void testSinglyValued() throws IOException { - clearIndex(); - doTest("check string value is correct", "test_s_dvo", "str", "keyword"); - doTest("check int value is correct", "test_i_dvo", "int", "1234"); - doTest("check double value is correct", "test_d_dvo", "double", "1.234"); - doTest("check long value is correct", "test_l_dvo", "long", "12345"); - doTest("check float value is correct", "test_f_dvo", "float", "1.234"); - doTest("check dt value is correct", "test_dt_dvo", "date", "1976-07-04T12:08:56.235Z"); - doTest("check stored and docValues value is correct", "test_s_dv", "str", "storedAndDocValues"); - doTest("check non-stored and non-indexed is accessible", "test_s_dvo2", "str", "gotIt"); - doTest("enumField", "enum_dvo", "str", "Critical"); + public void testRandomSingleAndMultiValued() throws Exception { + for (int c = 0 ; c < 10 * RANDOM_MULTIPLIER ; ++c) { + clearIndex(); + int[] arity = new int[9]; + for (int a = 0 ; a < arity.length ; ++a) { + // Single-valued 50% of the time; other 50%: 2-10 values equally likely + arity[a] = random().nextBoolean() ? 1 : TestUtil.nextInt(random(), 2, 10); + } + doTest("check string value is correct", dvStringFieldName(arity[0], true, false), "str", nextValues(arity[0], "str")); + doTest("check int value is correct", "test_i" + plural(arity[1]) + "_dvo", "int", nextValues(arity[1], "int")); + doTest("check double value is correct", "test_d" + plural(arity[2]) + "_dvo", "double", nextValues(arity[2], "double")); + doTest("check long value is correct", "test_l" + plural(arity[3]) + "_dvo", "long", nextValues(arity[3], "long")); + doTest("check float value is correct", "test_f" + plural(arity[4]) + "_dvo", "float", nextValues(arity[4], "float")); + doTest("check date value is correct", "test_dt" + plural(arity[5]) + "_dvo", "date", nextValues(arity[5], "date")); + doTest("check stored and docValues value is correct", dvStringFieldName(arity[6], true, true), "str", nextValues(arity[6], "str")); + doTest("check non-stored and non-indexed is accessible", dvStringFieldName(arity[7], false, false), "str", nextValues(arity[7], "str")); + doTest("enumField", "enum" + plural(arity[8]) + "_dvo", "str", nextValues(arity[8], "enum")); + } } - @Test - public void testMultiValued() throws IOException { - clearIndex(); - doTest("check string value is correct", "test_ss_dvo", "str", "keyword", "keyword2"); - doTest("check int value is correct", "test_is_dvo", "int", "1234", "12345"); - doTest("check double value is correct", "test_ds_dvo", "double", "1.234", "12.34", "123.4"); - doTest("check long value is correct", "test_ls_dvo", "long", "12345", "123456"); - doTest("check float value is correct", "test_fs_dvo", "float", "1.234", "12.34"); - doTest("check dt value is correct", "test_dts_dvo", "date", "1976-07-04T12:08:56.235Z", "1978-07-04T12:08:56.235Z"); - doTest("check stored and docValues value is correct", "test_ss_dv", "str", "storedAndDocValues", "storedAndDocValues2"); - doTest("check non-stored and non-indexed is accessible", "test_ss_dvo2", "str", "gotIt", "gotIt2"); - doTest("enumField", "enums_dvo", "str", "High", "Critical"); + private String plural(int arity) { + return arity > 1 ? "s" : ""; + } + + private static boolean isStoredField(String fieldName) { + return STORED_FIELD_NAME_PATTERN.matcher(fieldName).find(); + } + + private String dvStringFieldName(int arity, boolean indexed, boolean stored) { + String base = "test_s" + (arity > 1 ? "s": ""); + String suffix = ""; + if (indexed && stored) suffix = "_dv"; + else if (indexed && ! stored) suffix = "_dvo"; + else if ( ! indexed && ! stored) suffix = "_dvo2"; + else assertTrue("unsupported dv string field combination: stored and not indexed", false); + return base + suffix; + } + + private String[] nextValues(int arity, String valueType) throws Exception { + String[] values = new String[arity]; + for (int i = 0 ; i < arity ; ++i) { + switch (valueType) { + case "int": values[i] = String.valueOf(random().nextInt()); break; + case "double": values[i] = String.valueOf(Double.longBitsToDouble(random().nextLong())); break; + case "long": values[i] = String.valueOf(random().nextLong()); break; + case "float": values[i] = String.valueOf(Float.intBitsToFloat(random().nextInt())); break; + case "enum": values[i] = SEVERITY[TestUtil.nextInt(random(), 0, SEVERITY.length - 1)]; break; + case "str": { + String str = TestUtil.randomRealisticUnicodeString(random()); + values[i] = BAD_CHAR_PATTERN.matcher(str).replaceAll("\uFFFD"); + break; + } + case "date": { + long epochMillis = TestUtil.nextLong(random(), START_RANDOM_EPOCH_MILLIS, END_RANDOM_EPOCH_MILLIS); + LocalDateTime dateTime = LocalDateTime.ofInstant(Instant.ofEpochMilli(epochMillis), ZoneOffset.UTC); + values[i] = dateTime.format(DateTimeFormatter.ISO_LOCAL_DATE_TIME) + 'Z'; + break; + } + default: throw new Exception("unknown type '" + valueType + "'"); + } + } + return values; } @Test @@ -198,6 +282,8 @@ public class TestUseDocValuesAsStored extends AbstractBadConfigTestBase { String[] xpaths = new String[value.length + 1]; if (value.length > 1) { + Set valueSet = new HashSet<>(); + valueSet.addAll(Arrays.asList(value)); String[] fieldAndValues = new String[value.length * 2 + 2]; fieldAndValues[0] = "id"; fieldAndValues[1] = id; @@ -205,10 +291,12 @@ public class TestUseDocValuesAsStored extends AbstractBadConfigTestBase { for (int i = 0; i < value.length; ++i) { fieldAndValues[i * 2 + 2] = field; fieldAndValues[i * 2 + 3] = value[i]; - xpaths[i] = "//arr[@name='" + field + "']/" + type + "[" + (i + 1) + "][.='" + value[i] + "']"; + xpaths[i] = "//arr[@name='" + field + "']/" + type + "[.='" + value[i] + "']"; } - xpaths[value.length] = "*[count(//arr[@name='" + field + "']/" + type + ") = " + value.length + "]"; + // Docvalues are sets, but stored values are ordered multisets, so cardinality depends on the value source + xpaths[value.length] = "*[count(//arr[@name='" + field + "']/" + type + ") = " + + (isStoredField(field) ? value.length : valueSet.size()) + "]"; assertU(adoc(fieldAndValues)); } else {