diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index e9a22db2953..9135cfa567b 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -360,6 +360,8 @@ Bug Fixes * SOLR-10715: /v2/ should not be an alias for /v2/collections (Cao Manh Dat) +* SOLR-10835: Add support for point fields in Export Handler (Tomás Fernández Löbbe) + Optimizations ---------------------- * SOLR-10634: JSON Facet API: When a field/terms facet will retrieve all buckets (i.e. limit:-1) diff --git a/solr/core/src/java/org/apache/solr/handler/ExportWriter.java b/solr/core/src/java/org/apache/solr/handler/ExportWriter.java index 46ec3a4a394..61f937ce05e 100644 --- a/solr/core/src/java/org/apache/solr/handler/ExportWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/ExportWriter.java @@ -17,6 +17,10 @@ package org.apache.solr.handler; +import static java.util.Collections.singletonList; +import static java.util.Collections.singletonMap; +import static org.apache.solr.common.util.Utils.makeMap; + import java.io.Closeable; import java.io.IOException; import java.io.OutputStream; @@ -26,6 +30,7 @@ import java.lang.invoke.MethodHandles; import java.nio.charset.StandardCharsets; import java.util.Date; import java.util.List; +import java.util.function.LongFunction; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.IndexableField; @@ -34,6 +39,7 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Sort; @@ -44,6 +50,7 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRefBuilder; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.LongValues; +import org.apache.lucene.util.NumericUtils; import org.apache.solr.client.solrj.impl.BinaryResponseParser; import org.apache.solr.common.IteratorWriter; import org.apache.solr.common.MapWriter; @@ -60,25 +67,21 @@ import org.apache.solr.response.JSONResponseWriter; import org.apache.solr.response.QueryResponseWriter; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.schema.BoolField; +import org.apache.solr.schema.DateValueFieldType; +import org.apache.solr.schema.DoubleValueFieldType; import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.FloatValueFieldType; import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.IntValueFieldType; +import org.apache.solr.schema.LongValueFieldType; import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.StrField; -import org.apache.solr.schema.TrieDateField; -import org.apache.solr.schema.TrieDoubleField; -import org.apache.solr.schema.TrieFloatField; -import org.apache.solr.schema.TrieIntField; -import org.apache.solr.schema.TrieLongField; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SortSpec; import org.apache.solr.search.SyntaxError; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static java.util.Collections.singletonList; -import static java.util.Collections.singletonMap; -import static org.apache.solr.common.util.Utils.makeMap; - public class ExportWriter implements SolrCore.RawWriter, Closeable { private static final Logger logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private OutputStreamWriter respWriter; @@ -322,25 +325,25 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable { boolean multiValued = schemaField.multiValued(); FieldType fieldType = schemaField.getType(); - if (fieldType instanceof TrieIntField) { + if (fieldType instanceof IntValueFieldType) { if (multiValued) { writers[i] = new MultiFieldWriter(field, fieldType, schemaField, true); } else { writers[i] = new IntFieldWriter(field); } - } else if (fieldType instanceof TrieLongField) { + } else if (fieldType instanceof LongValueFieldType) { if (multiValued) { writers[i] = new MultiFieldWriter(field, fieldType, schemaField, true); } else { writers[i] = new LongFieldWriter(field); } - } else if (fieldType instanceof TrieFloatField) { + } else if (fieldType instanceof FloatValueFieldType) { if (multiValued) { writers[i] = new MultiFieldWriter(field, fieldType, schemaField, true); } else { writers[i] = new FloatFieldWriter(field); } - } else if (fieldType instanceof TrieDoubleField) { + } else if (fieldType instanceof DoubleValueFieldType) { if (multiValued) { writers[i] = new MultiFieldWriter(field, fieldType, schemaField, true); } else { @@ -352,7 +355,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable { } else { writers[i] = new StringFieldWriter(field, fieldType); } - } else if (fieldType instanceof TrieDateField) { + } else if (fieldType instanceof DateValueFieldType) { if (multiValued) { writers[i] = new MultiFieldWriter(field, fieldType, schemaField, false); } else { @@ -385,25 +388,25 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable { throw new IOException(field+" must have DocValues to use this feature."); } - if(ft instanceof TrieIntField) { + if(ft instanceof IntValueFieldType) { if(reverse) { sortValues[i] = new IntValue(field, new IntDesc()); } else { sortValues[i] = new IntValue(field, new IntAsc()); } - } else if(ft instanceof TrieFloatField) { + } else if(ft instanceof FloatValueFieldType) { if(reverse) { sortValues[i] = new FloatValue(field, new FloatDesc()); } else { sortValues[i] = new FloatValue(field, new FloatAsc()); } - } else if(ft instanceof TrieDoubleField) { + } else if(ft instanceof DoubleValueFieldType) { if(reverse) { sortValues[i] = new DoubleValue(field, new DoubleDesc()); } else { sortValues[i] = new DoubleValue(field, new DoubleAsc()); } - } else if(ft instanceof TrieLongField) { + } else if(ft instanceof LongValueFieldType) { if(reverse) { sortValues[i] = new LongValue(field, new LongDesc()); } else { @@ -417,7 +420,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable { } else { sortValues[i] = new StringValue(vals, field, new IntAsc()); } - } else if (ft instanceof TrieDateField) { + } else if (ft instanceof DateValueFieldType) { if (reverse) { sortValues[i] = new LongValue(field, new LongDesc()); } else { @@ -1352,6 +1355,23 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable { return true; } } + + static LongFunction bitsToValue(FieldType fieldType) { + switch (fieldType.getNumberType()) { + case LONG: + return (bits)-> bits; + case DATE: + return (bits)-> new Date(bits); + case INTEGER: + return (bits)-> (int)bits; + case FLOAT: + return (bits)-> NumericUtils.sortableIntToFloat((int)bits); + case DOUBLE: + return (bits)-> NumericUtils.sortableLongToDouble(bits); + default: + throw new AssertionError("Unsupported NumberType: " + fieldType.getNumberType()); + } + } class MultiFieldWriter extends FieldWriter { private String field; @@ -1359,29 +1379,48 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable { private SchemaField schemaField; private boolean numeric; private CharsRefBuilder cref = new CharsRefBuilder(); + private final LongFunction bitsToValue; public MultiFieldWriter(String field, FieldType fieldType, SchemaField schemaField, boolean numeric) { this.field = field; this.fieldType = fieldType; this.schemaField = schemaField; this.numeric = numeric; + if (this.fieldType.isPointField()) { + bitsToValue = bitsToValue(fieldType); + } else { + bitsToValue = null; + } } public boolean write(int docId, LeafReader reader, EntryWriter out, int fieldIndex) throws IOException { - SortedSetDocValues vals = DocValues.getSortedSet(reader, this.field); - if (vals.advance(docId) != docId) return false; - out.put(this.field, - (IteratorWriter) w -> { - long o; - while((o = vals.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { - BytesRef ref = vals.lookupOrd(o); - fieldType.indexedToReadable(ref, cref); - IndexableField f = fieldType.createField(schemaField, cref.toString()); - if (f == null) w.add(cref.toString()); - else w.add(fieldType.toObject(f)); - } - }); - return true; + if (this.fieldType.isPointField()) { + SortedNumericDocValues vals = DocValues.getSortedNumeric(reader, this.field); + if (!vals.advanceExact(docId)) return false; + out.put(this.field, + (IteratorWriter) w -> { + for (int i = 0; i < vals.docValueCount(); i++) { + w.add(bitsToValue.apply(vals.nextValue())); + } + }); + return true; + } else { + SortedSetDocValues vals = DocValues.getSortedSet(reader, this.field); + if (vals.advance(docId) != docId) return false; + out.put(this.field, + (IteratorWriter) w -> { + long o; + while((o = vals.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { + BytesRef ref = vals.lookupOrd(o); + fieldType.indexedToReadable(ref, cref); + IndexableField f = fieldType.createField(schemaField, cref.toString()); + if (f == null) w.add(cref.toString()); + else w.add(fieldType.toObject(f)); + } + }); + return true; + } + } } diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml b/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml index ecf0daf6542..6ec93fa9c71 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml @@ -26,6 +26,14 @@ seconds part (.999) is optional. --> + + + + + + + + @@ -33,20 +41,63 @@ - - - - - + + + + + - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id diff --git a/solr/core/src/test/org/apache/solr/response/TestExportWriter.java b/solr/core/src/test/org/apache/solr/response/TestExportWriter.java index 5a303e9893d..c55678702b1 100644 --- a/solr/core/src/test/org/apache/solr/response/TestExportWriter.java +++ b/solr/core/src/test/org/apache/solr/response/TestExportWriter.java @@ -16,17 +16,40 @@ */ package org.apache.solr.response; -import org.apache.solr.SolrTestCaseJ4; -import org.apache.solr.common.util.Utils; -import org.junit.*; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Date; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Set; + +import org.apache.lucene.index.LeafReader; +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.util.SuppressForbidden; +import org.apache.solr.common.util.Utils; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.util.RefCounted; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; -@SuppressCodecs({"Lucene3x", "Lucene40","Lucene41","Lucene42","Lucene45"}) public class TestExportWriter extends SolrTestCaseJ4 { + @BeforeClass public static void beforeClass() throws Exception { System.setProperty("export.test", "true"); initCore("solrconfig-sortingresponse.xml","schema-sortingresponse.xml"); + } + + @Before + @Override + public void setUp() throws Exception { + super.setUp(); + assertU(delQ("*:*")); + assertU(commit()); createIndex(); } @@ -47,7 +70,12 @@ public class TestExportWriter extends SolrTestCaseJ4 { "longdv_m", "343332", "stringdv_m", "manchester \"city\"", "stringdv_m", "liverpool", - "stringdv_m", "Everton")); + "stringdv_m", "Everton", + "datedv", "2017-06-16T07:00:00Z", + "datedv_m", "2017-06-16T01:00:00Z", + "datedv_m", "2017-06-16T02:00:00Z", + "datedv_m", "2017-06-16T03:00:00Z", + "datedv_m", "2017-06-16T04:00:00Z")); assertU(adoc("id","7", "floatdv","2.1", @@ -80,7 +108,11 @@ public class TestExportWriter extends SolrTestCaseJ4 { "longdv_m", "343332", "stringdv_m", "manchester \"city\"", "stringdv_m", "liverpool", - "stringdv_m", "everton")); + "stringdv_m", "everton", + "int_is_t", "1", + "int_is_t", "1", + "int_is_t", "1", + "int_is_t", "1")); assertU(commit()); assertU(adoc("id","8", "floatdv","2.1", @@ -98,7 +130,14 @@ public class TestExportWriter extends SolrTestCaseJ4 { "longdv_m", "343332", "stringdv_m", "manchester \"city\"", "stringdv_m", "liverpool", - "stringdv_m", "everton")); + "stringdv_m", "everton", + "datedv", "2017-01-01T00:00:00Z", + "datedv_m", "2017-01-01T01:00:00Z", + "datedv_m", "2017-01-01T02:00:00Z", + "int_is_p", "1", + "int_is_p", "1", + "int_is_p", "1", + "int_is_p", "1")); assertU(commit()); @@ -192,4 +231,152 @@ public class TestExportWriter extends SolrTestCaseJ4 { // Interesting you don't even need to specify a "q" parameter. } + + @Test + public void testDates() throws Exception { + String s = h.query(req("q", "id:1", "qt", "/export", "fl", "datedv", "sort", "datedv asc")); + assertJsonEquals(s, "{\"responseHeader\": {\"status\": 0}, \"response\":{\"numFound\":1, \"docs\":[{\"datedv\":\"2017-06-16T07:00:00Z\"}]}}"); + s = h.query(req("q", "id:1", "qt", "/export", "fl", "datedv_m", "sort", "datedv asc")); + assertJsonEquals(s, "{\"responseHeader\": {\"status\": 0}, \"response\":{\"numFound\":1, \"docs\":[{\"datedv_m\":[\"2017-06-16T01:00:00Z\",\"2017-06-16T02:00:00Z\",\"2017-06-16T03:00:00Z\",\"2017-06-16T04:00:00Z\"]}]}}"); + } + + @Test + public void testDuplicates() throws Exception { + RefCounted ref = null; + try { + ref = h.getCore().getSearcher(); + LeafReader reader = ref.get().getSlowAtomicReader(); + // MultiValued Trie fields use SortedSet + assertNotNull(reader.getSortedSetDocValues("int_is_t")); + assertNull(reader.getSortedNumericDocValues("int_is_t")); + // MultiValued Point fields use SortedNumerics + assertNull(reader.getSortedSetDocValues("int_is_p")); + assertNotNull(reader.getSortedNumericDocValues("int_is_p")); + } finally { + if (ref != null) ref.decref(); + } + String s = h.query(req("q", "id:3", "qt", "/export", "fl", "int_is_t", "sort", "intdv asc")); + assertJsonEquals(s, "{\"responseHeader\": {\"status\": 0}, \"response\":{\"numFound\":1, \"docs\":[{\"int_is_t\":[1]}]}}"); + s = h.query(req("q", "id:8", "qt", "/export", "fl", "int_is_p", "sort", "intdv asc")); + assertJsonEquals(s, "{\"responseHeader\": {\"status\": 0}, \"response\":{\"numFound\":1, \"docs\":[{\"int_is_p\":[1,1,1,1]}]}}"); + } + + /** + * This test doesn't validate the correctness of results, it just compares the response of the same request + * when asking for Trie fields vs Point fields. Can be removed once Trie fields are no longer supported + */ + @Test + @SuppressForbidden(reason="using new Date(time) to create random dates") + public void testRandomNumerics() throws Exception { + assertU(delQ("*:*")); + assertU(commit()); + List trieFields = new ArrayList(); + List pointFields = new ArrayList(); + for (String mv:new String[]{"s", ""}) { + for (String indexed:new String[]{"_ni", ""}) { + for (String type:new String[]{"i", "l", "f", "d", "dt"}) { + String field = "number_" + type + mv + indexed; + SchemaField sf = h.getCore().getLatestSchema().getField(field + "_t"); + assertTrue(sf.hasDocValues()); + assertTrue(sf.getType().getNumberType() != null); + assertFalse(sf.getType().isPointField()); + + sf = h.getCore().getLatestSchema().getField(field + "_p"); + assertTrue(sf.hasDocValues()); + assertTrue(sf.getType().getNumberType() != null); + assertTrue(sf.getType().isPointField()); + + trieFields.add(field + "_t"); + pointFields.add(field + "_p"); + } + } + } + for (int i = 0; i < atLeast(100); i++) { + if (random().nextInt(20) == 0) { + //have some empty docs + assertU(adoc("id", String.valueOf(i))); + continue; + } + + if (random().nextInt(20) == 0 && i > 0) { + //delete some docs + assertU(delI(String.valueOf(i - 1))); + } + + SolrInputDocument doc = new SolrInputDocument(); + doc.addField("id", String.valueOf(i)); + addInt(doc, random().nextInt(), false); + addLong(doc, random().nextLong(), false); + addFloat(doc, random().nextFloat() * 3000 * (random().nextBoolean()?1:-1), false); + addDouble(doc, random().nextDouble() * 3000 * (random().nextBoolean()?1:-1), false); + addDate(doc, new Date(), false); + + // MV need to be unique in order to be the same in Trie vs Points + Set ints = new HashSet<>(); + Set longs = new HashSet<>(); + Set floats = new HashSet<>(); + Set doubles = new HashSet<>(); + Set dates = new HashSet<>(); + for (int j=0; j < random().nextInt(20); j++) { + ints.add(random().nextInt()); + longs.add(random().nextLong()); + floats.add(random().nextFloat() * 3000 * (random().nextBoolean()?1:-1)); + doubles.add(random().nextDouble() * 3000 * (random().nextBoolean()?1:-1)); + dates.add(new Date(System.currentTimeMillis() + random().nextInt())); + } + ints.stream().forEach((val)->addInt(doc, val, true)); + longs.stream().forEach((val)->addLong(doc, val, true)); + floats.stream().forEach((val)->addFloat(doc, val, true)); + doubles.stream().forEach((val)->addDouble(doc, val, true)); + dates.stream().forEach((val)->addDate(doc, val, true)); + + assertU(adoc(doc)); + if (random().nextInt(20) == 0) { + assertU(commit()); + } + } + assertU(commit()); + doTestQuery("id:1", trieFields, pointFields); + doTestQuery("*:*", trieFields, pointFields); + doTestQuery("id:[0 TO 2]", trieFields, pointFields);// "id" field is really a string, this is not a numeric range query + doTestQuery("id:[0 TO 9]", trieFields, pointFields); + doTestQuery("id:DOES_NOT_EXIST", trieFields, pointFields); + } + + private void doTestQuery(String query, List trieFields, List pointFields) throws Exception { + String trieFieldsFl = String.join(",", trieFields); + String pointFieldsFl = String.join(",", pointFields); + String sort = pickRandom((String)pickRandom(trieFields.toArray()), (String)pickRandom(pointFields.toArray())).replace("s_", "_") + pickRandom(" asc", " desc"); + String resultPoints = h.query(req("q", query, "qt", "/export", "fl", pointFieldsFl, "sort", sort)); + String resultTries = h.query(req("q", query, "qt", "/export", "fl", trieFieldsFl, "sort", sort)); + assertJsonEquals(resultPoints.replaceAll("_p", ""), resultTries.replaceAll("_t", "")); + } + + private void addFloat(SolrInputDocument doc, float value, boolean mv) { + addField(doc, "f", String.valueOf(value), mv); + } + + private void addDouble(SolrInputDocument doc, double value, boolean mv) { + addField(doc, "d", String.valueOf(value), mv); + } + + private void addLong(SolrInputDocument doc, long value, boolean mv) { + addField(doc, "l", String.valueOf(value), mv); + } + + private void addInt(SolrInputDocument doc, int value, boolean mv) { + addField(doc, "i", String.valueOf(value), mv); + } + + private void addDate(SolrInputDocument doc, Date value, boolean mv) { + addField(doc, "dt", new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.ROOT).format(value), mv); + } + + private void addField(SolrInputDocument doc, String type, String value, boolean mv) { + doc.addField("number_" + type + (mv?"s":"") + "_t", value); + doc.addField("number_" + type + (mv?"s":"") + "_p", value); + doc.addField("number_" + type + (mv?"s":"") + "_ni_t", value); + doc.addField("number_" + type + (mv?"s":"") + "_ni_p", value); + } + }