SOLR-14608: Faster sorting for the /export handler

This commit is contained in:
Joel Bernstein 2021-01-19 14:08:38 -05:00
parent 1aeb1dcb86
commit 64df5a65f0
24 changed files with 740 additions and 299 deletions

View File

@ -19,45 +19,17 @@ package org.apache.solr.handler.export;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.solr.common.MapWriter; import org.apache.solr.common.MapWriter;
import org.apache.solr.schema.FieldType; import org.apache.solr.schema.FieldType;
class BoolFieldWriter extends FieldWriter { class BoolFieldWriter extends StringFieldWriter {
private String field;
private FieldType fieldType;
private CharsRefBuilder cref = new CharsRefBuilder();
public BoolFieldWriter(String field, FieldType fieldType) { public BoolFieldWriter(String field, FieldType fieldType) {
this.field = field; super(field, fieldType);
this.fieldType = fieldType;
} }
public boolean write(SortDoc sortDoc, LeafReader reader, MapWriter.EntryWriter ew, int fieldIndex) throws IOException { protected void writeBytes(MapWriter.EntryWriter ew, BytesRef ref, FieldType fieldType) throws IOException {
BytesRef ref;
SortValue sortValue = sortDoc.getSortValue(this.field);
if (sortValue != null) {
if (sortValue.isPresent()) {
ref = (BytesRef) sortValue.getCurrentValue();
} else { //empty-value
return false;
}
} else {
// field is not part of 'sort' param, but part of 'fl' param
SortedDocValues vals = DocValues.getSorted(reader, this.field);
if (vals.advance(sortDoc.docId) != sortDoc.docId) {
return false;
}
int ord = vals.ordValue();
ref = vals.lookupOrd(ord);
}
fieldType.indexedToReadable(ref, cref); fieldType.indexedToReadable(ref, cref);
ew.put(this.field, "true".equals(cref.toString())); ew.put(this.field, "true".equals(cref.toString()));
return true;
} }
} }

View File

@ -20,19 +20,22 @@ package org.apache.solr.handler.export;
import java.io.IOException; import java.io.IOException;
import java.util.Date; import java.util.Date;
import com.carrotsearch.hppc.IntObjectHashMap;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.NumericDocValues;
import org.apache.solr.common.MapWriter; import org.apache.solr.common.MapWriter;
class DateFieldWriter extends FieldWriter { class DateFieldWriter extends FieldWriter {
private String field; private String field;
private IntObjectHashMap<NumericDocValues> docValuesCache = new IntObjectHashMap<>();
public DateFieldWriter(String field) { public DateFieldWriter(String field) {
this.field = field; this.field = field;
} }
public boolean write(SortDoc sortDoc, LeafReader reader, MapWriter.EntryWriter ew, int fieldIndex) throws IOException { public boolean write(SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew, int fieldIndex) throws IOException {
Long val; Long val;
SortValue sortValue = sortDoc.getSortValue(this.field); SortValue sortValue = sortDoc.getSortValue(this.field);
if (sortValue != null) { if (sortValue != null) {
@ -43,7 +46,21 @@ class DateFieldWriter extends FieldWriter {
} }
} else { } else {
// field is not part of 'sort' param, but part of 'fl' param // field is not part of 'sort' param, but part of 'fl' param
NumericDocValues vals = DocValues.getNumeric(reader, this.field); int readerOrd = readerContext.ord;
NumericDocValues vals = null;
if(docValuesCache.containsKey(readerOrd)) {
NumericDocValues numericDocValues = docValuesCache.get(readerOrd);
if(numericDocValues.docID() < sortDoc.docId) {
//We have not advanced beyond the current docId so we can use this docValues.
vals = numericDocValues;
}
}
if(vals == null) {
vals = DocValues.getNumeric(readerContext.reader(), this.field);
docValuesCache.put(readerOrd, vals);
}
if (vals.advance(sortDoc.docId) == sortDoc.docId) { if (vals.advance(sortDoc.docId) == sortDoc.docId) {
val = vals.longValue(); val = vals.longValue();
} else { } else {

View File

@ -19,19 +19,21 @@ package org.apache.solr.handler.export;
import java.io.IOException; import java.io.IOException;
import com.carrotsearch.hppc.IntObjectHashMap;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.NumericDocValues;
import org.apache.solr.common.MapWriter; import org.apache.solr.common.MapWriter;
class DoubleFieldWriter extends FieldWriter { class DoubleFieldWriter extends FieldWriter {
private String field; private String field;
private IntObjectHashMap<NumericDocValues> docValuesCache = new IntObjectHashMap<>();
public DoubleFieldWriter(String field) { public DoubleFieldWriter(String field) {
this.field = field; this.field = field;
} }
public boolean write(SortDoc sortDoc, LeafReader reader, MapWriter.EntryWriter ew, int fieldIndex) throws IOException { public boolean write(SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew, int fieldIndex) throws IOException {
SortValue sortValue = sortDoc.getSortValue(this.field); SortValue sortValue = sortDoc.getSortValue(this.field);
if (sortValue != null) { if (sortValue != null) {
if (sortValue.isPresent()) { if (sortValue.isPresent()) {
@ -43,7 +45,20 @@ class DoubleFieldWriter extends FieldWriter {
} }
} else { } else {
// field is not part of 'sort' param, but part of 'fl' param // field is not part of 'sort' param, but part of 'fl' param
NumericDocValues vals = DocValues.getNumeric(reader, this.field); int readerOrd = readerContext.ord;
NumericDocValues vals = null;
if(docValuesCache.containsKey(readerOrd)) {
NumericDocValues numericDocValues = docValuesCache.get(readerOrd);
if(numericDocValues.docID() < sortDoc.docId) {
//We have not advanced beyond the current docId so we can use this docValues.
vals = numericDocValues;
}
}
if(vals == null) {
vals = DocValues.getNumeric(readerContext.reader(), this.field);
docValuesCache.put(readerOrd, vals);
}
if (vals.advance(sortDoc.docId) == sortDoc.docId) { if (vals.advance(sortDoc.docId) == sortDoc.docId) {
long val = vals.longValue(); long val = vals.longValue();
ew.put(this.field, Double.longBitsToDouble(val)); ew.put(this.field, Double.longBitsToDouble(val));

View File

@ -78,13 +78,17 @@ class DoubleValue implements SortValue {
} }
} }
public void toGlobalValue(SortValue previousValue) {
}
@Override @Override
public boolean isPresent() { public boolean isPresent() {
return present; return present;
} }
public void setCurrentValue(SortValue sv) { public void setCurrentValue(SortValue sv) {
DoubleValue dv = (DoubleValue)sv; DoubleValue dv = (DoubleValue) sv;
this.currentValue = dv.currentValue; this.currentValue = dv.currentValue;
this.present = dv.present; this.present = dv.present;
} }
@ -95,7 +99,7 @@ class DoubleValue implements SortValue {
} }
public int compareTo(SortValue o) { public int compareTo(SortValue o) {
DoubleValue dv = (DoubleValue)o; DoubleValue dv = (DoubleValue) o;
return comp.compare(currentValue, dv.currentValue); return comp.compare(currentValue, dv.currentValue);
} }
} }

View File

@ -25,6 +25,7 @@ class DoubleValueSortDoc extends SingleValueSortDoc {
protected SortValue value2; protected SortValue value2;
@Override
public SortValue getSortValue(String field) { public SortValue getSortValue(String field) {
if (value1.getField().equals(field)) { if (value1.getField().equals(field)) {
return value1; return value1;
@ -34,6 +35,7 @@ class DoubleValueSortDoc extends SingleValueSortDoc {
return null; return null;
} }
@Override
public void setNextReader(LeafReaderContext context) throws IOException { public void setNextReader(LeafReaderContext context) throws IOException {
this.ord = context.ord; this.ord = context.ord;
this.docBase = context.docBase; this.docBase = context.docBase;
@ -41,6 +43,7 @@ class DoubleValueSortDoc extends SingleValueSortDoc {
value2.setNextReader(context); value2.setNextReader(context);
} }
@Override
public void reset() { public void reset() {
this.docId = -1; this.docId = -1;
this.docBase = -1; this.docBase = -1;
@ -49,18 +52,27 @@ class DoubleValueSortDoc extends SingleValueSortDoc {
value2.reset(); value2.reset();
} }
@Override
public void setValues(int docId) throws IOException { public void setValues(int docId) throws IOException {
this.docId = docId; this.docId = docId;
value1.setCurrentValue(docId); value1.setCurrentValue(docId);
value2.setCurrentValue(docId); value2.setCurrentValue(docId);
} }
@Override
public void setGlobalValues(SortDoc previous) {
DoubleValueSortDoc doubleValueSortDoc = (DoubleValueSortDoc) previous;
value1.toGlobalValue(doubleValueSortDoc.value1);
value2.toGlobalValue(doubleValueSortDoc.value2);
}
@Override
public void setValues(SortDoc sortDoc) { public void setValues(SortDoc sortDoc) {
this.docId = sortDoc.docId; this.docId = sortDoc.docId;
this.ord = sortDoc.ord; this.ord = sortDoc.ord;
this.docBase = sortDoc.docBase; this.docBase = sortDoc.docBase;
value1.setCurrentValue(((DoubleValueSortDoc)sortDoc).value1); value1.setCurrentValue(((DoubleValueSortDoc) sortDoc).value1);
value2.setCurrentValue(((DoubleValueSortDoc)sortDoc).value2); value2.setCurrentValue(((DoubleValueSortDoc) sortDoc).value2);
} }
public DoubleValueSortDoc(SortValue value1, SortValue value2) { public DoubleValueSortDoc(SortValue value1, SortValue value2) {
@ -68,34 +80,42 @@ class DoubleValueSortDoc extends SingleValueSortDoc {
this.value2 = value2; this.value2 = value2;
} }
@Override
public SortDoc copy() { public SortDoc copy() {
return new DoubleValueSortDoc(value1.copy(), value2.copy()); return new DoubleValueSortDoc(value1.copy(), value2.copy());
} }
@Override
public boolean lessThan(Object o) { public boolean lessThan(Object o) {
DoubleValueSortDoc sd = (DoubleValueSortDoc)o; DoubleValueSortDoc sd = (DoubleValueSortDoc) o;
int comp = value1.compareTo(sd.value1); int comp = value1.compareTo(sd.value1);
if(comp == -1) { if (comp == -1) {
return true; return true;
} else if (comp == 1) { } else if (comp == 1) {
return false; return false;
} else { } else {
comp = value2.compareTo(sd.value2); comp = value2.compareTo(sd.value2);
if(comp == -1) { if (comp == -1) {
return true; return true;
} else if (comp == 1) { } else if (comp == 1) {
return false; return false;
} else { } else {
return docId+docBase > sd.docId+sd.docBase; return docId + docBase > sd.docId + sd.docBase;
} }
} }
} }
public int compareTo(Object o) { @Override
DoubleValueSortDoc sd = (DoubleValueSortDoc)o; public int compareTo(SortDoc o) {
DoubleValueSortDoc sd = (DoubleValueSortDoc) o;
int comp = value1.compareTo(sd.value1); int comp = value1.compareTo(sd.value1);
if (comp == 0) { if (comp == 0) {
return value2.compareTo(sd.value2); comp = value2.compareTo(sd.value2);
if (comp == 0) {
return (sd.docId + sd.docBase) - (docId + docBase);
} else {
return comp;
}
} else { } else {
return comp; return comp;
} }

View File

@ -26,14 +26,16 @@ import java.util.concurrent.CyclicBarrier;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.LongAdder; import java.util.concurrent.atomic.LongAdder;
import java.util.concurrent.BrokenBarrierException;
import com.codahale.metrics.Timer;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Sort; import org.apache.lucene.search.Sort;
import org.apache.lucene.util.FixedBitSet;
import org.apache.solr.common.IteratorWriter; import org.apache.solr.common.IteratorWriter;
import org.apache.solr.common.util.ExecutorUtil; import org.apache.solr.common.util.ExecutorUtil;
import org.apache.solr.common.util.SolrNamedThreadFactory; import org.apache.solr.common.util.SolrNamedThreadFactory;
import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.handler.export.ExportWriter.MergeIterator;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -51,9 +53,6 @@ class ExportBuffers {
final List<LeafReaderContext> leaves; final List<LeafReaderContext> leaves;
final ExportWriter exportWriter; final ExportWriter exportWriter;
final OutputStream os; final OutputStream os;
final Timer writeOutputBufferTimer;
final Timer fillerWaitTimer;
final Timer writerWaitTimer;
final IteratorWriter.ItemWriter rawWriter; final IteratorWriter.ItemWriter rawWriter;
final IteratorWriter.ItemWriter writer; final IteratorWriter.ItemWriter writer;
final CyclicBarrier barrier; final CyclicBarrier barrier;
@ -68,7 +67,7 @@ class ExportBuffers {
ExportBuffers(ExportWriter exportWriter, List<LeafReaderContext> leaves, SolrIndexSearcher searcher, ExportBuffers(ExportWriter exportWriter, List<LeafReaderContext> leaves, SolrIndexSearcher searcher,
OutputStream os, IteratorWriter.ItemWriter rawWriter, Sort sort, int queueSize, int totalHits, OutputStream os, IteratorWriter.ItemWriter rawWriter, Sort sort, int queueSize, int totalHits,
Timer writeOutputBufferTimer, Timer fillerWaitTimer, Timer writerWaitTimer) throws IOException { FixedBitSet[] sets) throws IOException {
this.exportWriter = exportWriter; this.exportWriter = exportWriter;
this.leaves = leaves; this.leaves = leaves;
this.os = os; this.os = os;
@ -81,55 +80,62 @@ class ExportBuffers {
return this; return this;
} }
}; };
this.writeOutputBufferTimer = writeOutputBufferTimer;
this.fillerWaitTimer = fillerWaitTimer;
this.writerWaitTimer = writerWaitTimer;
this.bufferOne = new Buffer(queueSize); this.bufferOne = new Buffer(queueSize);
this.bufferTwo = new Buffer(queueSize); this.bufferTwo = new Buffer(queueSize);
this.totalHits = totalHits; this.totalHits = totalHits;
fillBuffer = bufferOne; fillBuffer = bufferOne;
outputBuffer = bufferTwo; outputBuffer = bufferTwo;
SortDoc writerSortDoc = exportWriter.getSortDoc(searcher, sort.getSort()); SortDoc writerSortDoc = exportWriter.getSortDoc(searcher, sort.getSort());
MergeIterator mergeIterator = exportWriter.getMergeIterator(leaves, sets, writerSortDoc);
bufferOne.initialize(writerSortDoc); bufferOne.initialize(writerSortDoc);
bufferTwo.initialize(writerSortDoc); bufferTwo.initialize(writerSortDoc);
barrier = new CyclicBarrier(2, () -> swapBuffers()); barrier = new CyclicBarrier(2, () -> swapBuffers());
filler = () -> { filler = () -> {
try { try {
// log.debug("--- filler start {}", Thread.currentThread()); // log.debug("--- filler start {}", Thread.currentThread());
SortDoc sortDoc = exportWriter.getSortDoc(searcher, sort.getSort());
Buffer buffer = getFillBuffer(); Buffer buffer = getFillBuffer();
SortQueue queue = new SortQueue(queueSize, sortDoc);
long lastOutputCounter = 0; long lastOutputCounter = 0;
for (int count = 0; count < totalHits; ) { for (int count = 0; count < totalHits; ) {
// log.debug("--- filler fillOutDocs in {}", fillBuffer); // log.debug("--- filler fillOutDocs in {}", fillBuffer);
exportWriter.fillOutDocs(leaves, sortDoc, queue, buffer); exportWriter.fillOutDocs(mergeIterator, buffer);
count += (buffer.outDocsIndex + 1); count += (buffer.outDocsIndex + 1);
// log.debug("--- filler count={}, exchange buffer from {}", count, buffer); // log.debug("--- filler count={}, exchange buffer from {}", count, buffer);
Timer.Context timerContext = getFillerWaitTimer().time();
try { try {
long startBufferWait = System.nanoTime();
exchangeBuffers(); exchangeBuffers();
long endBufferWait = System.nanoTime();
if(log.isDebugEnabled()) {
log.debug("Waited for writer thread:{}", Long.toString(((endBufferWait - startBufferWait) / 1000000)));
}
} finally { } finally {
timerContext.stop();
} }
buffer = getFillBuffer(); buffer = getFillBuffer();
if (outputCounter.longValue() > lastOutputCounter) { if (outputCounter.longValue() > lastOutputCounter) {
lastOutputCounter = outputCounter.longValue(); lastOutputCounter = outputCounter.longValue();
flushOutput(); flushOutput();
} }
// log.debug("--- filler got empty buffer {}", buffer);
} }
buffer.outDocsIndex = Buffer.NO_MORE_DOCS; buffer.outDocsIndex = Buffer.NO_MORE_DOCS;
// log.debug("--- filler final exchange buffer from {}", buffer);
Timer.Context timerContext = getFillerWaitTimer().time();
try { try {
exchangeBuffers(); exchangeBuffers();
} finally { } finally {
timerContext.stop();
} }
buffer = getFillBuffer(); buffer = getFillBuffer();
// log.debug("--- filler final got buffer {}", buffer); // log.debug("--- filler final got buffer {}", buffer);
} catch (Throwable e) { } catch (Throwable e) {
log.error("filler", e); if(!(e instanceof InterruptedException) && !(e instanceof BrokenBarrierException)) {
/*
Don't log the interrupt or BrokenBarrierException as it creates noise during early client disconnects and
doesn't log anything particularly useful in other situations.
*/
log.error("filler", e);
}
error(e); error(e);
if (e instanceof InterruptedException) { if (e instanceof InterruptedException) {
Thread.currentThread().interrupt(); Thread.currentThread().interrupt();
@ -155,7 +161,7 @@ class ExportBuffers {
} }
private void swapBuffers() { private void swapBuffers() {
log.debug("--- swap buffers"); //log.debug("--- swap buffers");
Buffer one = fillBuffer; Buffer one = fillBuffer;
fillBuffer = outputBuffer; fillBuffer = outputBuffer;
outputBuffer = one; outputBuffer = one;
@ -174,18 +180,6 @@ class ExportBuffers {
return fillBuffer; return fillBuffer;
} }
public Timer getWriteOutputBufferTimer() {
return writeOutputBufferTimer;
}
public Timer getFillerWaitTimer() {
return fillerWaitTimer;
}
public Timer getWriterWaitTimer() {
return writerWaitTimer;
}
// decorated writer that keeps track of number of writes // decorated writer that keeps track of number of writes
public IteratorWriter.ItemWriter getWriter() { public IteratorWriter.ItemWriter getWriter() {
return writer; return writer;
@ -230,8 +224,24 @@ class ExportBuffers {
// ); // );
// allDone.join(); // allDone.join();
log.debug("-- finished."); log.debug("-- finished.");
} catch (Exception e) { } catch (Throwable e) {
log.error("Exception running filler / writer", e); Throwable ex = e;
boolean ignore = false;
while (ex != null) {
String m = ex.getMessage();
if (m != null && m.contains("Broken pipe")) {
ignore = true;
break;
}
ex = ex.getCause();
}
if(!ignore) {
/*
Ignore Broken pipes. Broken pipes occur normally when using the export handler for
merge joins when the join is complete before both sides of the join are fully read.
*/
log.error("Exception running filler / writer", e);
}
error(e); error(e);
// //
} finally { } finally {

View File

@ -25,8 +25,8 @@ import java.io.PrintWriter;
import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.util.List; import java.util.List;
import java.util.TreeSet;
import com.codahale.metrics.Timer;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedDocValues;
@ -90,15 +90,16 @@ import static org.apache.solr.common.util.Utils.makeMap;
* bitmap identifies the smallest docs (default is {@link #DEFAULT_BATCH_SIZE}) that haven't been sent yet and stores them in a * bitmap identifies the smallest docs (default is {@link #DEFAULT_BATCH_SIZE}) that haven't been sent yet and stores them in a
* Priority Queue. They are then exported (written across the wire) and marked as sent (unset in the bitmap). * Priority Queue. They are then exported (written across the wire) and marked as sent (unset in the bitmap).
* This process repeats until all matching documents have been sent. * This process repeats until all matching documents have been sent.
* <p>
* This streaming approach is light on memory (only up to 2x batch size documents are ever stored in memory at
* once), and it allows {@link ExportWriter} to scale well with regard to numDocs.
*/ */
public class ExportWriter implements SolrCore.RawWriter, Closeable { public class ExportWriter implements SolrCore.RawWriter, Closeable {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
public static final String BATCH_SIZE_PARAM = "batchSize"; public static final String BATCH_SIZE_PARAM = "batchSize";
public static final String QUEUE_SIZE_PARAM = "queueSize";
public static final int DEFAULT_BATCH_SIZE = 30000; public static final int DEFAULT_BATCH_SIZE = 30000;
public static final int DEFAULT_QUEUE_SIZE = 150000;
private OutputStreamWriter respWriter; private OutputStreamWriter respWriter;
final SolrQueryRequest req; final SolrQueryRequest req;
@ -106,7 +107,10 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
final StreamContext initialStreamContext; final StreamContext initialStreamContext;
final SolrMetricsContext solrMetricsContext; final SolrMetricsContext solrMetricsContext;
final String metricsPath; final String metricsPath;
//The batch size for the output writer thread.
final int batchSize; final int batchSize;
//The max combined size of the segment level priority queues.
private int priorityQueueSize;
StreamExpression streamExpression; StreamExpression streamExpression;
StreamContext streamContext; StreamContext streamContext;
FieldWriter[] fieldWriters; FieldWriter[] fieldWriters;
@ -114,11 +118,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
FixedBitSet[] sets = null; FixedBitSet[] sets = null;
PushWriter writer; PushWriter writer;
private String wt; private String wt;
final Timer identifyLowestSortingDocTimer;
final Timer transferBatchToBufferTimer;
final Timer writeOutputBufferTimer;
final Timer writerWaitTimer;
final Timer fillerWaitTimer;
public ExportWriter(SolrQueryRequest req, SolrQueryResponse res, String wt, public ExportWriter(SolrQueryRequest req, SolrQueryResponse res, String wt,
@ -130,12 +130,8 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
this.initialStreamContext = initialStreamContext; this.initialStreamContext = initialStreamContext;
this.solrMetricsContext = solrMetricsContext; this.solrMetricsContext = solrMetricsContext;
this.metricsPath = metricsPath; this.metricsPath = metricsPath;
this.batchSize = req.getParams().getInt(BATCH_SIZE_PARAM, DEFAULT_BATCH_SIZE); this.priorityQueueSize = req.getParams().getInt(QUEUE_SIZE_PARAM, DEFAULT_QUEUE_SIZE);
identifyLowestSortingDocTimer = solrMetricsContext.timer("identifyLowestSortingDoc", metricsPath); this.batchSize = DEFAULT_BATCH_SIZE;
transferBatchToBufferTimer = solrMetricsContext.timer("transferBatchToBuffer", metricsPath);
writeOutputBufferTimer = solrMetricsContext.timer("writeOutputBuffer", metricsPath);
writerWaitTimer = solrMetricsContext.timer("writerWaitTimer", metricsPath);
fillerWaitTimer = solrMetricsContext.timer("fillerWaitTimer", metricsPath);
} }
@Override @Override
@ -147,10 +143,20 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
@Override @Override
public void close() throws IOException { public void close() throws IOException {
if (writer != null) writer.close(); if (writer != null) {
try {
writer.close();
} catch (Throwable t) {
//We're going to sit on this.
}
}
if (respWriter != null) { if (respWriter != null) {
respWriter.flush(); try {
respWriter.close(); respWriter.flush();
respWriter.close();
} catch (Throwable t) {
}
} }
} }
@ -168,6 +174,14 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
} }
public void write(OutputStream os) throws IOException { public void write(OutputStream os) throws IOException {
try {
_write(os);
} finally {
}
}
private void _write(OutputStream os) throws IOException {
QueryResponseWriter rw = req.getCore().getResponseWriters().get(wt); QueryResponseWriter rw = req.getCore().getResponseWriters().get(wt);
if (rw instanceof BinaryResponseWriter) { if (rw instanceof BinaryResponseWriter) {
//todo add support for other writers after testing //todo add support for other writers after testing
@ -281,13 +295,18 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
streamContext.put(CommonParams.SORT, params.get(CommonParams.SORT)); streamContext.put(CommonParams.SORT, params.get(CommonParams.SORT));
} }
writer.writeMap(m -> { try {
m.put("responseHeader", singletonMap("status", 0)); writer.writeMap(m -> {
m.put("response", (MapWriter) mw -> { m.put("responseHeader", singletonMap("status", 0));
mw.put("numFound", totalHits); m.put("response", (MapWriter) mw -> {
mw.put("docs", (IteratorWriter) iw -> writeDocs(req, os, iw, sort)); mw.put("numFound", totalHits);
mw.put("docs", (IteratorWriter) iw -> writeDocs(req, os, iw, sort));
});
}); });
}); } catch (java.io.EOFException e) {
log.info("Caught Eof likely caused by early client disconnect");
}
if (streamContext != null) { if (streamContext != null) {
streamContext = null; streamContext = null;
} }
@ -302,41 +321,16 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
return tupleStream; return tupleStream;
} }
private void identifyLowestSortingUnexportedDocs(List<LeafReaderContext> leaves, SortDoc sortDoc, SortQueue queue) throws IOException { private void transferBatchToBufferForOutput(MergeIterator mergeIterator,
Timer.Context timerContext = identifyLowestSortingDocTimer.time(); ExportBuffers.Buffer destination) throws IOException {
try {
queue.reset();
SortDoc top = queue.top();
for (int i = 0; i < leaves.size(); i++) {
sortDoc.setNextReader(leaves.get(i));
DocIdSetIterator it = new BitSetIterator(sets[i], 0); // cost is not useful here
int docId;
while ((docId = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
sortDoc.setValues(docId);
if (top.lessThan(sortDoc)) {
top.setValues(sortDoc);
top = queue.updateTop();
}
}
}
} finally {
timerContext.stop();
}
}
private void transferBatchToBufferForOutput(SortQueue queue,
List<LeafReaderContext> leaves,
ExportBuffers.Buffer destination) throws IOException {
Timer.Context timerContext = transferBatchToBufferTimer.time();
try { try {
int outDocsIndex = -1; int outDocsIndex = -1;
for (int i = 0; i < queue.maxSize; i++) { for (int i = 0; i < batchSize; i++) {
SortDoc s = queue.pop(); SortDoc sortDoc = mergeIterator.next();
if (s.docId > -1) { if (sortDoc != null) {
destination.outDocs[++outDocsIndex].setValues(s); destination.outDocs[++outDocsIndex].setValues(sortDoc);
// remove this doc id from the matching bitset, it's been exported } else {
sets[s.ord].clear(s.docId); break;
s.reset(); // reuse
} }
} }
destination.outDocsIndex = outDocsIndex; destination.outDocsIndex = outDocsIndex;
@ -347,7 +341,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
} }
throw t; throw t;
} finally { } finally {
timerContext.stop();
} }
} }
@ -355,8 +349,17 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
List<LeafReaderContext> leaves = req.getSearcher().getTopReaderContext().leaves(); List<LeafReaderContext> leaves = req.getSearcher().getTopReaderContext().leaves();
final int queueSize = Math.min(batchSize, totalHits); final int queueSize = Math.min(batchSize, totalHits);
ExportBuffers buffers = new ExportBuffers(this, leaves, req.getSearcher(), os, writer, sort, queueSize, totalHits,
writeOutputBufferTimer, fillerWaitTimer, writerWaitTimer); ExportBuffers buffers = new ExportBuffers(this,
leaves,
req.getSearcher(),
os,
writer,
sort,
queueSize,
totalHits,
sets);
if (streamExpression != null) { if (streamExpression != null) {
streamContext.put(ExportBuffers.EXPORT_BUFFERS_KEY, buffers); streamContext.put(ExportBuffers.EXPORT_BUFFERS_KEY, buffers);
@ -408,9 +411,8 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
log.debug("--- writer interrupted"); log.debug("--- writer interrupted");
break; break;
} }
Timer.Context timerContext = writeOutputBufferTimer.time();
try { try {
for (int i = buffer.outDocsIndex; i >= 0; --i) { for (int i = 0; i <= buffer.outDocsIndex; ++i) {
// we're using the raw writer here because there's no potential // we're using the raw writer here because there's no potential
// reduction in the number of output items, unlike when using // reduction in the number of output items, unlike when using
// streaming expressions // streaming expressions
@ -418,37 +420,38 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
writer.add((MapWriter) ew -> writeDoc(currentDoc, leaves, ew, fieldWriters)); writer.add((MapWriter) ew -> writeDoc(currentDoc, leaves, ew, fieldWriters));
} }
} finally { } finally {
timerContext.stop();
} }
log.debug("--- writer exchanging from {}", buffer); //log.debug("--- writer exchanging from {}", buffer);
timerContext = writerWaitTimer.time();
try { try {
long startExchangeBuffers = System.nanoTime();
buffers.exchangeBuffers(); buffers.exchangeBuffers();
long endExchangeBuffers = System.nanoTime();
if (log.isDebugEnabled()) {
log.debug("Waited for reader thread {}:", Long.toString(((endExchangeBuffers - startExchangeBuffers) / 1000000)));
}
} finally { } finally {
timerContext.stop();
} }
buffer = buffers.getOutputBuffer(); buffer = buffers.getOutputBuffer();
log.debug("--- writer got {}", buffer); //log.debug("--- writer got {}", buffer);
} }
return true; return true;
}); });
} }
} }
void fillOutDocs(List<LeafReaderContext> leaves, SortDoc sortDoc, void fillOutDocs(MergeIterator mergeIterator,
SortQueue sortQueue, ExportBuffers.Buffer buffer) throws IOException { ExportBuffers.Buffer buffer) throws IOException {
identifyLowestSortingUnexportedDocs(leaves, sortDoc, sortQueue); transferBatchToBufferForOutput(mergeIterator, buffer);
transferBatchToBufferForOutput(sortQueue, leaves, buffer);
} }
void writeDoc(SortDoc sortDoc, void writeDoc(SortDoc sortDoc,
List<LeafReaderContext> leaves, List<LeafReaderContext> leaves,
EntryWriter ew, FieldWriter[] writers) throws IOException { EntryWriter ew, FieldWriter[] writers) throws IOException {
int ord = sortDoc.ord; int ord = sortDoc.ord;
LeafReaderContext context = leaves.get(ord); LeafReaderContext context = leaves.get(ord);
int fieldIndex = 0; int fieldIndex = 0;
for (FieldWriter fieldWriter : writers) { for (FieldWriter fieldWriter : writers) {
if (fieldWriter.write(sortDoc, context.reader(), ew, fieldIndex)) { if (fieldWriter.write(sortDoc, context, ew, fieldIndex)) {
++fieldIndex; ++fieldIndex;
} }
} }
@ -612,6 +615,183 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
return new SortDoc(sortValues); return new SortDoc(sortValues);
} }
static class MergeIterator {
private TreeSet<SortDoc> set = new TreeSet<>();
private SegmentIterator[] segmentIterators;
private SortDoc outDoc;
public MergeIterator(SegmentIterator[] segmentIterators, SortDoc proto) throws IOException {
outDoc = proto.copy();
this.segmentIterators = segmentIterators;
for (int i = 0; i < segmentIterators.length; i++) {
try {
SortDoc sortDoc = segmentIterators[i].next();
if (sortDoc != null) {
set.add(sortDoc);
}
} catch (IOException e) {
log.error("Error in MergeIterator: ", e);
throw e;
}
}
}
/*
* Merge sorts the SortDocs from Segment Iterators
* Returns null when all docs are iterated.
*/
public SortDoc next() throws IOException {
SortDoc sortDoc = set.pollLast();
//We've exhausted all documents
if (sortDoc == null) {
return null;
} else {
outDoc.setValues(sortDoc);
}
SortDoc nextDoc = segmentIterators[sortDoc.ord].next();
if (nextDoc != null) {
//The entire expense of the operation is here
set.add(nextDoc);
}
return outDoc;
}
}
public MergeIterator getMergeIterator(List<LeafReaderContext> leaves, FixedBitSet[] bits, SortDoc sortDoc) throws IOException {
try {
long totalDocs = 0;
for (int i = 0; i < leaves.size(); i++) {
totalDocs += leaves.get(i).reader().maxDoc();
}
//Resize the priorityQueueSize down for small result sets.
this.priorityQueueSize = Math.min(this.priorityQueueSize, (int)(this.totalHits*1.5));
if(log.isDebugEnabled()) {
log.debug("Total priority queue size {}:", this.priorityQueueSize);
}
int[] sizes = new int[leaves.size()];
int combineQueueSize = 0;
for (int i = 0; i < leaves.size(); i++) {
long maxDoc = leaves.get(i).reader().maxDoc();
int sortQueueSize = Math.min((int) (((double) maxDoc / (double) totalDocs) * this.priorityQueueSize), batchSize);
//Protect against too small a queue size as well
if(sortQueueSize < 10) {
sortQueueSize = 10;
}
if(log.isDebugEnabled()) {
log.debug("Segment priority queue size {}:", sortQueueSize);
}
sizes[i] = sortQueueSize;
combineQueueSize += sortQueueSize;
}
if(log.isDebugEnabled()) {
log.debug("Combined priority queue size {}:", combineQueueSize);
}
SegmentIterator[] segmentIterators = new SegmentIterator[leaves.size()];
for (int i = 0; i < segmentIterators.length; i++) {
SortQueue sortQueue = new SortQueue(sizes[i], sortDoc.copy());
segmentIterators[i] = new SegmentIterator(bits[i], leaves.get(i), sortQueue, sortDoc.copy());
}
return new MergeIterator(segmentIterators, sortDoc);
} finally {
}
}
private static class SegmentIterator {
private final FixedBitSet bits;
private final SortQueue queue;
private final SortDoc sortDoc;
private final LeafReaderContext context;
private final SortDoc[] outDocs;
private SortDoc nextDoc;
private int index;
public SegmentIterator(FixedBitSet bits, LeafReaderContext context, SortQueue sortQueue, SortDoc sortDoc) throws IOException {
this.bits = bits;
this.queue = sortQueue;
this.sortDoc = sortDoc;
this.nextDoc = sortDoc.copy();
this.context = context;
this.outDocs = new SortDoc[sortQueue.maxSize];
topDocs();
}
public SortDoc next() throws IOException {
SortDoc _sortDoc = null;
if (index > -1) {
_sortDoc = outDocs[index--];
} else {
topDocs();
if (index > -1) {
_sortDoc = outDocs[index--];
}
}
if (_sortDoc != null) {
//Clear the bit so it's not loaded again.
bits.clear(_sortDoc.docId);
//Load the global ordinal (only matters for strings)
_sortDoc.setGlobalValues(nextDoc);
nextDoc.setValues(_sortDoc);
//We are now done with this doc.
_sortDoc.reset();
} else {
nextDoc = null;
}
return nextDoc;
}
private void topDocs() throws IOException {
try {
queue.reset();
SortDoc top = queue.top();
this.sortDoc.setNextReader(context);
DocIdSetIterator it = new BitSetIterator(bits, 0); // cost is not useful here
int docId;
while ((docId = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
this.sortDoc.setValues(docId);
if (top.lessThan(this.sortDoc)) {
top.setValues(this.sortDoc);
top = queue.updateTop();
}
}
//Pop the queue and load up the array.
index = -1;
SortDoc _sortDoc;
while ((_sortDoc = queue.pop()) != null) {
if (_sortDoc.docId > -1) {
outDocs[++index] = _sortDoc;
}
}
} catch (Exception e) {
log.error("Segment Iterator Error:", e);
throw new IOException(e);
} finally {
}
}
}
public static class IgnoreException extends IOException { public static class IgnoreException extends IOException {
public void printStackTrace(PrintWriter pw) { public void printStackTrace(PrintWriter pw) {
pw.print("Early Client Disconnect"); pw.print("Early Client Disconnect");
@ -621,5 +801,4 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
return "Early Client Disconnect"; return "Early Client Disconnect";
} }
} }
} }

View File

@ -25,7 +25,6 @@ import java.util.Map;
import java.util.concurrent.BrokenBarrierException; import java.util.concurrent.BrokenBarrierException;
import java.util.concurrent.TimeoutException; import java.util.concurrent.TimeoutException;
import com.codahale.metrics.Timer;
import org.apache.solr.client.solrj.io.Tuple; import org.apache.solr.client.solrj.io.Tuple;
import org.apache.solr.client.solrj.io.comp.ComparatorOrder; import org.apache.solr.client.solrj.io.comp.ComparatorOrder;
import org.apache.solr.client.solrj.io.comp.FieldComparator; import org.apache.solr.client.solrj.io.comp.FieldComparator;
@ -56,9 +55,9 @@ public class ExportWriterStream extends TupleStream implements Expressible {
StreamContext context; StreamContext context;
StreamComparator streamComparator; StreamComparator streamComparator;
int pos = -1; int pos = -1;
int index = -1;
ExportBuffers exportBuffers; ExportBuffers exportBuffers;
ExportBuffers.Buffer buffer; ExportBuffers.Buffer buffer;
Timer.Context writeOutputTimerContext;
private static final class TupleEntryWriter implements EntryWriter { private static final class TupleEntryWriter implements EntryWriter {
Tuple tuple; Tuple tuple;
@ -131,9 +130,7 @@ public class ExportWriterStream extends TupleStream implements Expressible {
@Override @Override
public void close() throws IOException { public void close() throws IOException {
if (writeOutputTimerContext != null) {
writeOutputTimerContext.stop();
}
exportBuffers = null; exportBuffers = null;
} }
@ -141,18 +138,19 @@ public class ExportWriterStream extends TupleStream implements Expressible {
public Tuple read() throws IOException { public Tuple read() throws IOException {
Tuple res = null; Tuple res = null;
if (pos < 0) { if (pos < 0) {
if (writeOutputTimerContext != null) {
writeOutputTimerContext.stop();
writeOutputTimerContext = null;
}
try { try {
buffer.outDocsIndex = ExportBuffers.Buffer.EMPTY; buffer.outDocsIndex = ExportBuffers.Buffer.EMPTY;
log.debug("--- ews exchange empty buffer {}", buffer); //log.debug("--- ews exchange empty buffer {}", buffer);
boolean exchanged = false; boolean exchanged = false;
while (!exchanged) { while (!exchanged) {
Timer.Context timerContext = exportBuffers.getWriterWaitTimer().time();
try { try {
long startExchangeBuffers = System.nanoTime();
exportBuffers.exchangeBuffers(); exportBuffers.exchangeBuffers();
long endExchangeBuffers = System.nanoTime();
if(log.isDebugEnabled()) {
log.debug("Waited for reader thread:{}", Long.toString(((endExchangeBuffers - startExchangeBuffers) / 1000000)));
}
exchanged = true; exchanged = true;
} catch (TimeoutException e) { } catch (TimeoutException e) {
log.debug("--- ews timeout loop"); log.debug("--- ews timeout loop");
@ -175,7 +173,6 @@ public class ExportWriterStream extends TupleStream implements Expressible {
} }
break; break;
} finally { } finally {
timerContext.stop();
} }
} }
} catch (InterruptedException e) { } catch (InterruptedException e) {
@ -196,6 +193,7 @@ public class ExportWriterStream extends TupleStream implements Expressible {
res = Tuple.EOF(); res = Tuple.EOF();
} else { } else {
pos = buffer.outDocsIndex; pos = buffer.outDocsIndex;
index = -1; //restart index.
log.debug("--- ews new pos={}", pos); log.debug("--- ews new pos={}", pos);
} }
} }
@ -205,15 +203,11 @@ public class ExportWriterStream extends TupleStream implements Expressible {
} }
if (res != null) { if (res != null) {
// only errors or EOF assigned result so far // only errors or EOF assigned result so far
if (writeOutputTimerContext != null) {
writeOutputTimerContext.stop();
}
return res; return res;
} }
if (writeOutputTimerContext == null) {
writeOutputTimerContext = exportBuffers.getWriteOutputBufferTimer().time(); SortDoc sortDoc = buffer.outDocs[++index];
}
SortDoc sortDoc = buffer.outDocs[pos];
tupleEntryWriter.tuple = new Tuple(); tupleEntryWriter.tuple = new Tuple();
exportBuffers.exportWriter.writeDoc(sortDoc, exportBuffers.leaves, tupleEntryWriter, exportBuffers.exportWriter.fieldWriters); exportBuffers.exportWriter.writeDoc(sortDoc, exportBuffers.leaves, tupleEntryWriter, exportBuffers.exportWriter.fieldWriters);
pos--; pos--;

View File

@ -19,9 +19,9 @@ package org.apache.solr.handler.export;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext;
import org.apache.solr.common.MapWriter; import org.apache.solr.common.MapWriter;
abstract class FieldWriter { abstract class FieldWriter {
public abstract boolean write(SortDoc sortDoc, LeafReader reader, MapWriter.EntryWriter out, int fieldIndex) throws IOException; public abstract boolean write(SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter out, int fieldIndex) throws IOException;
} }

View File

@ -19,19 +19,21 @@ package org.apache.solr.handler.export;
import java.io.IOException; import java.io.IOException;
import com.carrotsearch.hppc.IntObjectHashMap;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.NumericDocValues;
import org.apache.solr.common.MapWriter; import org.apache.solr.common.MapWriter;
class FloatFieldWriter extends FieldWriter { class FloatFieldWriter extends FieldWriter {
private String field; private String field;
private IntObjectHashMap<NumericDocValues> docValuesCache = new IntObjectHashMap<>();
public FloatFieldWriter(String field) { public FloatFieldWriter(String field) {
this.field = field; this.field = field;
} }
public boolean write(SortDoc sortDoc, LeafReader reader, MapWriter.EntryWriter ew, int fieldIndex) throws IOException { public boolean write(SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew, int fieldIndex) throws IOException {
SortValue sortValue = sortDoc.getSortValue(this.field); SortValue sortValue = sortDoc.getSortValue(this.field);
if (sortValue != null) { if (sortValue != null) {
if (sortValue.isPresent()) { if (sortValue.isPresent()) {
@ -43,7 +45,21 @@ class FloatFieldWriter extends FieldWriter {
} }
} else { } else {
// field is not part of 'sort' param, but part of 'fl' param // field is not part of 'sort' param, but part of 'fl' param
NumericDocValues vals = DocValues.getNumeric(reader, this.field); int readerOrd = readerContext.ord;
NumericDocValues vals = null;
if(docValuesCache.containsKey(readerOrd)) {
NumericDocValues numericDocValues = docValuesCache.get(readerOrd);
if(numericDocValues.docID() < sortDoc.docId) {
//We have not advanced beyond the current docId so we can use this docValues.
vals = numericDocValues;
}
}
if(vals == null) {
vals = DocValues.getNumeric(readerContext.reader(), this.field);
docValuesCache.put(readerOrd, vals);
}
if (vals.advance(sortDoc.docId) == sortDoc.docId) { if (vals.advance(sortDoc.docId) == sortDoc.docId) {
int val = (int) vals.longValue(); int val = (int) vals.longValue();
ew.put(this.field, Float.intBitsToFloat(val)); ew.put(this.field, Float.intBitsToFloat(val));

View File

@ -44,6 +44,10 @@ class FloatValue implements SortValue {
return currentValue; return currentValue;
} }
public void toGlobalValue(SortValue previousValue) {
}
public String getField() { public String getField() {
return field; return field;
} }
@ -81,7 +85,7 @@ class FloatValue implements SortValue {
} }
public void setCurrentValue(SortValue sv) { public void setCurrentValue(SortValue sv) {
FloatValue fv = (FloatValue)sv; FloatValue fv = (FloatValue) sv;
this.currentValue = fv.currentValue; this.currentValue = fv.currentValue;
this.present = fv.present; this.present = fv.present;
} }
@ -92,7 +96,7 @@ class FloatValue implements SortValue {
} }
public int compareTo(SortValue o) { public int compareTo(SortValue o) {
FloatValue fv = (FloatValue)o; FloatValue fv = (FloatValue) o;
return comp.compare(currentValue, fv.currentValue); return comp.compare(currentValue, fv.currentValue);
} }
} }

View File

@ -19,19 +19,21 @@ package org.apache.solr.handler.export;
import java.io.IOException; import java.io.IOException;
import com.carrotsearch.hppc.IntObjectHashMap;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.NumericDocValues;
import org.apache.solr.common.MapWriter; import org.apache.solr.common.MapWriter;
class IntFieldWriter extends FieldWriter { class IntFieldWriter extends FieldWriter {
private String field; private String field;
private IntObjectHashMap<NumericDocValues> docValuesCache = new IntObjectHashMap<>();
public IntFieldWriter(String field) { public IntFieldWriter(String field) {
this.field = field; this.field = field;
} }
public boolean write(SortDoc sortDoc, LeafReader reader, MapWriter.EntryWriter ew, int fieldIndex) throws IOException { public boolean write(SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew, int fieldIndex) throws IOException {
int val; int val;
SortValue sortValue = sortDoc.getSortValue(this.field); SortValue sortValue = sortDoc.getSortValue(this.field);
if (sortValue != null) { if (sortValue != null) {
@ -42,7 +44,21 @@ class IntFieldWriter extends FieldWriter {
} }
} else { } else {
// field is not part of 'sort' param, but part of 'fl' param // field is not part of 'sort' param, but part of 'fl' param
NumericDocValues vals = DocValues.getNumeric(reader, this.field); int readerOrd = readerContext.ord;
NumericDocValues vals = null;
if(docValuesCache.containsKey(readerOrd)) {
NumericDocValues numericDocValues = docValuesCache.get(readerOrd);
if(numericDocValues.docID() < sortDoc.docId) {
//We have not advanced beyond the current docId so we can use this docValues.
vals = numericDocValues;
}
}
if(vals == null) {
vals = DocValues.getNumeric(readerContext.reader(), this.field);
docValuesCache.put(readerOrd, vals);
}
if (vals.advance(sortDoc.docId) == sortDoc.docId) { if (vals.advance(sortDoc.docId) == sortDoc.docId) {
val = (int) vals.longValue(); val = (int) vals.longValue();
} else { } else {

View File

@ -75,18 +75,22 @@ public class IntValue implements SortValue {
} }
} }
public void toGlobalValue(SortValue previousValue) {
}
@Override @Override
public boolean isPresent() { public boolean isPresent() {
return this.present; return this.present;
} }
public int compareTo(SortValue o) { public int compareTo(SortValue o) {
IntValue iv = (IntValue)o; IntValue iv = (IntValue) o;
return comp.compare(currentValue, iv.currentValue); return comp.compare(currentValue, iv.currentValue);
} }
public void setCurrentValue(SortValue sv) { public void setCurrentValue(SortValue sv) {
IntValue iv = (IntValue)sv; IntValue iv = (IntValue) sv;
this.currentValue = iv.currentValue; this.currentValue = iv.currentValue;
this.present = iv.present; this.present = iv.present;
} }

View File

@ -19,19 +19,22 @@ package org.apache.solr.handler.export;
import java.io.IOException; import java.io.IOException;
import com.carrotsearch.hppc.IntObjectHashMap;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.NumericDocValues;
import org.apache.solr.common.MapWriter; import org.apache.solr.common.MapWriter;
class LongFieldWriter extends FieldWriter { class LongFieldWriter extends FieldWriter {
private String field; private String field;
private IntObjectHashMap<NumericDocValues> docValuesCache = new IntObjectHashMap<>();
public LongFieldWriter(String field) { public LongFieldWriter(String field) {
this.field = field; this.field = field;
} }
public boolean write(SortDoc sortDoc, LeafReader reader, MapWriter.EntryWriter ew, int fieldIndex) throws IOException { public boolean write(SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew, int fieldIndex) throws IOException {
long val; long val;
SortValue sortValue = sortDoc.getSortValue(this.field); SortValue sortValue = sortDoc.getSortValue(this.field);
if (sortValue != null) { if (sortValue != null) {
@ -42,7 +45,21 @@ class LongFieldWriter extends FieldWriter {
} }
} else { } else {
// field is not part of 'sort' param, but part of 'fl' param // field is not part of 'sort' param, but part of 'fl' param
NumericDocValues vals = DocValues.getNumeric(reader, this.field); int readerOrd = readerContext.ord;
NumericDocValues vals = null;
if(docValuesCache.containsKey(readerOrd)) {
NumericDocValues numericDocValues = docValuesCache.get(readerOrd);
if(numericDocValues.docID() < sortDoc.docId) {
//We have not advanced beyond the current docId so we can use this docValues.
vals = numericDocValues;
}
}
if(vals == null) {
vals = DocValues.getNumeric(readerContext.reader(), this.field);
docValuesCache.put(readerOrd, vals);
}
if (vals.advance(sortDoc.docId) == sortDoc.docId) { if (vals.advance(sortDoc.docId) == sortDoc.docId) {
val = vals.longValue(); val = vals.longValue();
} else { } else {

View File

@ -52,6 +52,10 @@ public class LongValue implements SortValue {
return new LongValue(field, comp); return new LongValue(field, comp);
} }
public void toGlobalValue(SortValue previousValue) {
}
public void setNextReader(LeafReaderContext context) throws IOException { public void setNextReader(LeafReaderContext context) throws IOException {
this.vals = DocValues.getNumeric(context.reader(), field); this.vals = DocValues.getNumeric(context.reader(), field);
lastDocID = 0; lastDocID = 0;

View File

@ -21,11 +21,8 @@ import java.io.IOException;
import java.util.Date; import java.util.Date;
import java.util.function.LongFunction; import java.util.function.LongFunction;
import org.apache.lucene.index.DocValues; import com.carrotsearch.hppc.IntObjectHashMap;
import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.*;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRefBuilder; import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.NumericUtils;
@ -41,6 +38,8 @@ class MultiFieldWriter extends FieldWriter {
private boolean numeric; private boolean numeric;
private CharsRefBuilder cref = new CharsRefBuilder(); private CharsRefBuilder cref = new CharsRefBuilder();
private final LongFunction<Object> bitsToValue; private final LongFunction<Object> bitsToValue;
private IntObjectHashMap<Object> docValuesCache = new IntObjectHashMap<>();
public MultiFieldWriter(String field, FieldType fieldType, SchemaField schemaField, boolean numeric) { public MultiFieldWriter(String field, FieldType fieldType, SchemaField schemaField, boolean numeric) {
this.field = field; this.field = field;
@ -54,25 +53,59 @@ class MultiFieldWriter extends FieldWriter {
} }
} }
public boolean write(SortDoc sortDoc, LeafReader reader, MapWriter.EntryWriter out, int fieldIndex) throws IOException { public boolean write(SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter out, int fieldIndex) throws IOException {
if (this.fieldType.isPointField()) { if (this.fieldType.isPointField()) {
SortedNumericDocValues vals = DocValues.getSortedNumeric(reader, this.field); int readerOrd = readerContext.ord;
SortedNumericDocValues vals = null;
if(docValuesCache.containsKey(readerOrd)) {
SortedNumericDocValues sortedNumericDocValues = (SortedNumericDocValues) docValuesCache.get(readerOrd);
if(sortedNumericDocValues.docID() < sortDoc.docId) {
//We have not advanced beyond the current docId so we can use this docValues.
vals = sortedNumericDocValues;
}
}
if(vals == null) {
vals = DocValues.getSortedNumeric(readerContext.reader(), this.field);
docValuesCache.put(readerOrd, vals);
}
if (!vals.advanceExact(sortDoc.docId)) return false; if (!vals.advanceExact(sortDoc.docId)) return false;
final SortedNumericDocValues docVals = vals;
out.put(this.field, out.put(this.field,
(IteratorWriter) w -> { (IteratorWriter) w -> {
for (int i = 0, count = vals.docValueCount(); i < count; i++) { for (int i = 0, count = docVals.docValueCount(); i < count; i++) {
w.add(bitsToValue.apply(vals.nextValue())); w.add(bitsToValue.apply(docVals.nextValue()));
} }
}); });
return true; return true;
} else { } else {
SortedSetDocValues vals = DocValues.getSortedSet(reader, this.field); int readerOrd = readerContext.ord;
SortedSetDocValues vals = null;
if(docValuesCache.containsKey(readerOrd)) {
SortedSetDocValues sortedSetDocValues = (SortedSetDocValues) docValuesCache.get(readerOrd);
if(sortedSetDocValues.docID() < sortDoc.docId) {
//We have not advanced beyond the current docId so we can use this docValues.
vals = sortedSetDocValues;
}
}
if(vals == null) {
vals = DocValues.getSortedSet(readerContext.reader(), this.field);
docValuesCache.put(readerOrd, vals);
}
if (vals.advance(sortDoc.docId) != sortDoc.docId) return false; if (vals.advance(sortDoc.docId) != sortDoc.docId) return false;
final SortedSetDocValues docVals = vals;
out.put(this.field, out.put(this.field,
(IteratorWriter) w -> { (IteratorWriter) w -> {
long o; long o;
while((o = vals.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { while((o = docVals.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
BytesRef ref = vals.lookupOrd(o); BytesRef ref = docVals.lookupOrd(o);
fieldType.indexedToReadable(ref, cref); fieldType.indexedToReadable(ref, cref);
IndexableField f = fieldType.createField(schemaField, cref.toString()); IndexableField f = fieldType.createField(schemaField, cref.toString());
if (f == null) w.add(cref.toString()); if (f == null) w.add(cref.toString());

View File

@ -25,6 +25,7 @@ class QuadValueSortDoc extends TripleValueSortDoc {
protected SortValue value4; protected SortValue value4;
@Override
public SortValue getSortValue(String field) { public SortValue getSortValue(String field) {
if (value1.getField().equals(field)) { if (value1.getField().equals(field)) {
return value1; return value1;
@ -38,6 +39,7 @@ class QuadValueSortDoc extends TripleValueSortDoc {
return null; return null;
} }
@Override
public void setNextReader(LeafReaderContext context) throws IOException { public void setNextReader(LeafReaderContext context) throws IOException {
this.ord = context.ord; this.ord = context.ord;
this.docBase = context.docBase; this.docBase = context.docBase;
@ -47,6 +49,17 @@ class QuadValueSortDoc extends TripleValueSortDoc {
value4.setNextReader(context); value4.setNextReader(context);
} }
@Override
public void setGlobalValues(SortDoc previous) {
QuadValueSortDoc quadValueSortDoc = (QuadValueSortDoc) previous;
value1.toGlobalValue(quadValueSortDoc.value1);
value2.toGlobalValue(quadValueSortDoc.value2);
value3.toGlobalValue(quadValueSortDoc.value3);
value4.toGlobalValue(quadValueSortDoc.value4);
}
@Override
public void reset() { public void reset() {
this.docId = -1; this.docId = -1;
this.docBase = -1; this.docBase = -1;
@ -57,6 +70,7 @@ class QuadValueSortDoc extends TripleValueSortDoc {
value4.reset(); value4.reset();
} }
@Override
public void setValues(int docId) throws IOException { public void setValues(int docId) throws IOException {
this.docId = docId; this.docId = docId;
value1.setCurrentValue(docId); value1.setCurrentValue(docId);
@ -65,6 +79,7 @@ class QuadValueSortDoc extends TripleValueSortDoc {
value4.setCurrentValue(docId); value4.setCurrentValue(docId);
} }
@Override
public void setValues(SortDoc sortDoc) { public void setValues(SortDoc sortDoc) {
this.docId = sortDoc.docId; this.docId = sortDoc.docId;
this.ord = sortDoc.ord; this.ord = sortDoc.ord;
@ -80,53 +95,61 @@ class QuadValueSortDoc extends TripleValueSortDoc {
this.value4 = value4; this.value4 = value4;
} }
@Override
public SortDoc copy() { public SortDoc copy() {
return new QuadValueSortDoc(value1.copy(), value2.copy(), value3.copy(), value4.copy()); return new QuadValueSortDoc(value1.copy(), value2.copy(), value3.copy(), value4.copy());
} }
@Override
public boolean lessThan(Object o) { public boolean lessThan(Object o) {
QuadValueSortDoc sd = (QuadValueSortDoc)o; QuadValueSortDoc sd = (QuadValueSortDoc) o;
int comp = value1.compareTo(sd.value1); int comp = value1.compareTo(sd.value1);
if(comp == -1) { if (comp == -1) {
return true; return true;
} else if (comp == 1) { } else if (comp == 1) {
return false; return false;
} else { } else {
comp = value2.compareTo(sd.value2); comp = value2.compareTo(sd.value2);
if(comp == -1) { if (comp == -1) {
return true; return true;
} else if (comp == 1) { } else if (comp == 1) {
return false; return false;
} else { } else {
comp = value3.compareTo(sd.value3); comp = value3.compareTo(sd.value3);
if(comp == -1) { if (comp == -1) {
return true; return true;
} else if (comp == 1) { } else if (comp == 1) {
return false; return false;
} else { } else {
comp = value4.compareTo(sd.value4); comp = value4.compareTo(sd.value4);
if(comp == -1) { if (comp == -1) {
return true; return true;
} else if (comp == 1) { } else if (comp == 1) {
return false; return false;
} else { } else {
return docId+docBase > sd.docId+sd.docBase; return docId + docBase > sd.docId + sd.docBase;
} }
} }
} }
} }
} }
public int compareTo(Object o) { @Override
QuadValueSortDoc sd = (QuadValueSortDoc)o; public int compareTo(SortDoc o) {
QuadValueSortDoc sd = (QuadValueSortDoc) o;
int comp = value1.compareTo(sd.value1); int comp = value1.compareTo(sd.value1);
if(comp == 0) { if (comp == 0) {
comp = value2.compareTo(sd.value2); comp = value2.compareTo(sd.value2);
if(comp == 0) { if (comp == 0) {
comp = value3.compareTo(sd.value3); comp = value3.compareTo(sd.value3);
if(comp == 0) { if (comp == 0) {
return value4.compareTo(sd.value4); comp = value4.compareTo(sd.value4);
if (comp == 0) {
return (sd.docId + sd.docBase) - (docId + docBase);
} else {
return comp;
}
} else { } else {
return comp; return comp;
} }

View File

@ -25,6 +25,7 @@ class SingleValueSortDoc extends SortDoc {
protected SortValue value1; protected SortValue value1;
@Override
public SortValue getSortValue(String field) { public SortValue getSortValue(String field) {
if (value1.getField().equals(field)) { if (value1.getField().equals(field)) {
return value1; return value1;
@ -32,12 +33,14 @@ class SingleValueSortDoc extends SortDoc {
return null; return null;
} }
@Override
public void setNextReader(LeafReaderContext context) throws IOException { public void setNextReader(LeafReaderContext context) throws IOException {
this.ord = context.ord; this.ord = context.ord;
this.docBase = context.docBase; this.docBase = context.docBase;
value1.setNextReader(context); value1.setNextReader(context);
} }
@Override
public void reset() { public void reset() {
this.docId = -1; this.docId = -1;
this.docBase = -1; this.docBase = -1;
@ -45,16 +48,18 @@ class SingleValueSortDoc extends SortDoc {
this.value1.reset(); this.value1.reset();
} }
@Override
public void setValues(int docId) throws IOException { public void setValues(int docId) throws IOException {
this.docId = docId; this.docId = docId;
value1.setCurrentValue(docId); value1.setCurrentValue(docId);
} }
@Override
public void setValues(SortDoc sortDoc) { public void setValues(SortDoc sortDoc) {
this.docId = sortDoc.docId; this.docId = sortDoc.docId;
this.ord = sortDoc.ord; this.ord = sortDoc.ord;
this.docBase = sortDoc.docBase; this.docBase = sortDoc.docBase;
value1.setCurrentValue(((SingleValueSortDoc)sortDoc).value1); value1.setCurrentValue(((SingleValueSortDoc) sortDoc).value1);
} }
public SingleValueSortDoc(SortValue value1) { public SingleValueSortDoc(SortValue value1) {
@ -62,25 +67,39 @@ class SingleValueSortDoc extends SortDoc {
this.value1 = value1; this.value1 = value1;
} }
@Override
public void setGlobalValues(SortDoc previous) {
SortValue previousValue = ((SingleValueSortDoc) previous).value1;
value1.toGlobalValue(previousValue);
}
@Override
public SortDoc copy() { public SortDoc copy() {
return new SingleValueSortDoc(value1.copy()); return new SingleValueSortDoc(value1.copy());
} }
@Override
public boolean lessThan(Object o) { public boolean lessThan(Object o) {
SingleValueSortDoc sd = (SingleValueSortDoc)o; SingleValueSortDoc sd = (SingleValueSortDoc) o;
int comp = value1.compareTo(sd.value1); int comp = value1.compareTo(sd.value1);
if(comp == -1) { if (comp == -1) {
return true; return true;
} else if (comp == 1) { } else if (comp == 1) {
return false; return false;
} else { } else {
return docId+docBase > sd.docId+sd.docBase; return docId + docBase > sd.docId + sd.docBase;
} }
} }
public int compareTo(Object o) { @Override
SingleValueSortDoc sd = (SingleValueSortDoc)o; public int compareTo(SortDoc o) {
return value1.compareTo(sd.value1); SingleValueSortDoc sd = (SingleValueSortDoc) o;
int comp = value1.compareTo(sd.value1);
if (comp == 0) {
return (sd.docId + sd.docBase) - (docId + docBase);
} else {
return comp;
}
} }
public String toString() { public String toString() {

View File

@ -18,10 +18,11 @@
package org.apache.solr.handler.export; package org.apache.solr.handler.export;
import java.io.IOException; import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
class SortDoc { class SortDoc implements Comparable<SortDoc> {
protected int docId = -1; protected int docId = -1;
protected int ord = -1; protected int ord = -1;
@ -34,6 +35,21 @@ class SortDoc {
} }
public SortDoc() { public SortDoc() {
}
@Override
public boolean equals(Object obj) {
// subclasses are not equal
if (!obj.getClass().equals(getClass())) {
return false;
}
return compareTo((SortDoc) obj) == 0;
}
@Override
public int hashCode() {
return Objects.hash(docId, ord, docBase);
} }
public SortValue getSortValue(String field) { public SortValue getSortValue(String field) {
@ -69,6 +85,13 @@ class SortDoc {
} }
} }
public void setGlobalValues(SortDoc previous) {
SortValue[] previousValues = previous.sortValues;
for (int i = 0; i < sortValues.length; i++) {
sortValues[i].toGlobalValue(previousValues[i]);
}
}
public void setValues(SortDoc sortDoc) { public void setValues(SortDoc sortDoc) {
this.docId = sortDoc.docId; this.docId = sortDoc.docId;
this.ord = sortDoc.ord; this.ord = sortDoc.ord;
@ -84,7 +107,6 @@ class SortDoc {
for (int i = 0; i < sortValues.length; i++) { for (int i = 0; i < sortValues.length; i++) {
svs[i] = sortValues[i].copy(); svs[i] = sortValues[i].copy();
} }
return new SortDoc(svs); return new SortDoc(svs);
} }
@ -92,7 +114,7 @@ class SortDoc {
if (docId == -1) { if (docId == -1) {
return true; return true;
} }
SortDoc sd = (SortDoc)o; SortDoc sd = (SortDoc) o;
SortValue[] sortValues1 = sd.sortValues; SortValue[] sortValues1 = sd.sortValues;
for (int i = 0; i < sortValues.length; i++) { for (int i = 0; i < sortValues.length; i++) {
int comp = sortValues[i].compareTo(sortValues1[i]); int comp = sortValues[i].compareTo(sortValues1[i]);
@ -105,18 +127,17 @@ class SortDoc {
return docId + docBase > sd.docId + sd.docBase; //index order return docId + docBase > sd.docId + sd.docBase; //index order
} }
public int compareTo(Object o) { @Override
SortDoc sd = (SortDoc)o; public int compareTo(SortDoc sd) {
for (int i = 0; i < sortValues.length; i++) { for (int i = 0; i < sortValues.length; i++) {
int comp = sortValues[i].compareTo(sd.sortValues[i]); int comp = sortValues[i].compareTo(sd.sortValues[i]);
if (comp != 0) { if (comp != 0) {
return comp; return comp;
} }
} }
return 0; return (sd.docId + sd.docBase) - (docId + docBase);
} }
public String toString() { public String toString() {
StringBuilder builder = new StringBuilder(); StringBuilder builder = new StringBuilder();
builder.append(ord).append(':').append(docBase).append(':').append(docId).append("; "); builder.append(ord).append(':').append(docBase).append(':').append(docId).append("; ");

View File

@ -21,18 +21,19 @@ import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
public interface SortValue extends Comparable<SortValue> { interface SortValue extends Comparable<SortValue> {
public void setCurrentValue(int docId) throws IOException; void setCurrentValue(int docId) throws IOException;
public void setNextReader(LeafReaderContext context) throws IOException; void setNextReader(LeafReaderContext context) throws IOException;
public void setCurrentValue(SortValue value); void setCurrentValue(SortValue value);
public void reset(); void toGlobalValue(SortValue previousValue);
public SortValue copy(); void reset();
public Object getCurrentValue() throws IOException; SortValue copy();
public String getField(); Object getCurrentValue() throws IOException;
String getField();
/** /**
* *
* @return true if document has a value for the specified field * @return true if document has a value for the specified field
*/ */
public boolean isPresent(); boolean isPresent();
} }

View File

@ -18,11 +18,9 @@
package org.apache.solr.handler.export; package org.apache.solr.handler.export;
import java.io.IOException; import java.io.IOException;
import java.util.HashMap; import com.carrotsearch.hppc.IntObjectHashMap;
import java.util.Map;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRefBuilder; import org.apache.lucene.util.CharsRefBuilder;
@ -32,10 +30,13 @@ import org.apache.solr.common.util.JavaBinCodec;
import org.apache.solr.schema.FieldType; import org.apache.solr.schema.FieldType;
class StringFieldWriter extends FieldWriter { class StringFieldWriter extends FieldWriter {
private String field; protected String field;
private FieldType fieldType; private FieldType fieldType;
private Map<Integer, SortedDocValues> lastDocValues = new HashMap<>(); private BytesRef lastRef;
private CharsRefBuilder cref = new CharsRefBuilder(); private int lastOrd = -1;
private IntObjectHashMap<SortedDocValues> docValuesCache = new IntObjectHashMap<>();
protected CharsRefBuilder cref = new CharsRefBuilder();
final ByteArrayUtf8CharSequence utf8 = new ByteArrayUtf8CharSequence(new byte[0], 0, 0) { final ByteArrayUtf8CharSequence utf8 = new ByteArrayUtf8CharSequence(new byte[0], 0, 0) {
@Override @Override
public String toString() { public String toString() {
@ -53,48 +54,69 @@ class StringFieldWriter extends FieldWriter {
this.fieldType = fieldType; this.fieldType = fieldType;
} }
public boolean write(SortDoc sortDoc, LeafReader reader, MapWriter.EntryWriter ew, int fieldIndex) throws IOException { public boolean write(SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew, int fieldIndex) throws IOException {
BytesRef ref; StringValue stringValue = (StringValue) sortDoc.getSortValue(this.field);
SortValue sortValue = sortDoc.getSortValue(this.field); BytesRef ref = null;
if (sortValue != null) {
if (sortValue.isPresent()) { if (stringValue != null) {
ref = (BytesRef) sortValue.getCurrentValue(); /*
} else { //empty-value We already have the top level ordinal used for sorting.
Now let's use it for caching the BytesRef so we don't have to look it up.
When we have long runs of repeated values do to the sort order of the docs this is a huge win.
*/
if(stringValue.currentOrd == -1) {
//Null sort value
return false; return false;
} }
} else {
// field is not part of 'sort' param, but part of 'fl' param if (this.lastOrd == stringValue.currentOrd) {
SortedDocValues vals = lastDocValues.get(sortDoc.ord); ref = lastRef;
if (vals == null || vals.docID() >= sortDoc.docId) {
vals = DocValues.getSorted(reader, this.field);
lastDocValues.put(sortDoc.ord, vals);
} }
this.lastOrd = stringValue.currentOrd;
}
if (ref == null) {
//Reuse the last DocValues object if possible
int readerOrd = readerContext.ord;
SortedDocValues vals = null;
if(docValuesCache.containsKey(readerOrd)) {
SortedDocValues sortedDocValues = docValuesCache.get(readerOrd);
if(sortedDocValues.docID() < sortDoc.docId) {
//We have not advanced beyond the current docId so we can use this docValues.
vals = sortedDocValues;
}
}
if(vals == null) {
vals = DocValues.getSorted(readerContext.reader(), this.field);
docValuesCache.put(readerOrd, vals);
}
if (vals.advance(sortDoc.docId) != sortDoc.docId) { if (vals.advance(sortDoc.docId) != sortDoc.docId) {
return false; return false;
} }
int ord = vals.ordValue(); int ord = vals.ordValue();
ref = vals.lookupOrd(ord); ref = vals.lookupOrd(ord);
if(stringValue != null) {
//Don't need to set the lastRef if it's not a sort value.
lastRef = ref.clone();
}
} }
writeBytes(ew, ref, fieldType);
return true;
}
protected void writeBytes(MapWriter.EntryWriter ew, BytesRef ref, FieldType fieldType) throws IOException {
if (ew instanceof JavaBinCodec.BinEntryWriter) { if (ew instanceof JavaBinCodec.BinEntryWriter) {
ew.put(this.field, utf8.reset(ref.bytes, ref.offset, ref.length, null)); ew.put(this.field, utf8.reset(ref.bytes, ref.offset, ref.length, null));
} else { } else {
String v = null; fieldType.indexedToReadable(ref, cref);
if (sortValue != null) { ew.put(this.field, cref.toString());
v = ((StringValue) sortValue).getLastString();
if (v == null) {
fieldType.indexedToReadable(ref, cref);
v = cref.toString();
((StringValue) sortValue).setLastString(v);
}
} else {
fieldType.indexedToReadable(ref, cref);
v = cref.toString();
}
ew.put(this.field, v);
} }
return true;
} }
} }

View File

@ -38,13 +38,14 @@ class StringValue implements SortValue {
protected LongValues toGlobal = LongValues.IDENTITY; // this segment to global ordinal. NN; protected LongValues toGlobal = LongValues.IDENTITY; // this segment to global ordinal. NN;
protected SortedDocValues docValues; protected SortedDocValues docValues;
protected int currentOrd; public int currentOrd;
protected int lastDocID; protected int lastDocID;
private boolean present; private boolean present;
private BytesRef lastBytes; private BytesRef lastBytes;
private String lastString; private String lastString;
private int lastOrd = -1; private int lastOrd = -1;
private int leafOrd = -1;
public StringValue(SortedDocValues globalDocValues, String field, IntComp comp) { public StringValue(SortedDocValues globalDocValues, String field, IntComp comp) {
this.globalDocValues = globalDocValues; this.globalDocValues = globalDocValues;
@ -74,18 +75,22 @@ class StringValue implements SortValue {
} }
public void setCurrentValue(int docId) throws IOException { public void setCurrentValue(int docId) throws IOException {
//System.out.println(docId +":"+lastDocID);
/*
if (docId < lastDocID) { if (docId < lastDocID) {
throw new AssertionError("docs were sent out-of-order: lastDocID=" + lastDocID + " vs doc=" + docId); throw new AssertionError("docs were sent out-of-order: lastDocID=" + lastDocID + " vs doc=" + docId);
} }
lastDocID = docId; lastDocID = docId;
*/
if (docId > docValues.docID()) { if (docId > docValues.docID()) {
docValues.advance(docId); docValues.advance(docId);
} }
if (docId == docValues.docID()) { if (docId == docValues.docID()) {
present = true; present = true;
currentOrd = (int) toGlobal.get(docValues.ordValue()); currentOrd = docValues.ordValue();
} else { } else {
present = false; present = false;
currentOrd = -1; currentOrd = -1;
@ -98,9 +103,12 @@ class StringValue implements SortValue {
} }
public void setCurrentValue(SortValue sv) { public void setCurrentValue(SortValue sv) {
StringValue v = (StringValue)sv; StringValue v = (StringValue) sv;
this.currentOrd = v.currentOrd; this.currentOrd = v.currentOrd;
this.present = v.present; this.present = v.present;
this.leafOrd = v.leafOrd;
this.lastOrd = v.lastOrd;
this.toGlobal = v.toGlobal;
} }
public Object getCurrentValue() throws IOException { public Object getCurrentValue() throws IOException {
@ -113,11 +121,27 @@ class StringValue implements SortValue {
return lastBytes; return lastBytes;
} }
public void toGlobalValue(SortValue previousValue) {
lastOrd = currentOrd;
StringValue sv = (StringValue) previousValue;
if (sv.lastOrd == currentOrd) {
//Take the global ord from the previousValue unless we are a -1 which is the same in both global and leaf ordinal
if(this.currentOrd != -1) {
this.currentOrd = sv.currentOrd;
}
} else {
if(this.currentOrd > -1) {
this.currentOrd = (int) toGlobal.get(this.currentOrd);
}
}
}
public String getField() { public String getField() {
return field; return field;
} }
public void setNextReader(LeafReaderContext context) throws IOException { public void setNextReader(LeafReaderContext context) throws IOException {
leafOrd = context.ord;
if (ordinalMap != null) { if (ordinalMap != null) {
toGlobal = ordinalMap.getGlobalOrds(context.ord); toGlobal = ordinalMap.getGlobalOrds(context.ord);
} }
@ -128,6 +152,7 @@ class StringValue implements SortValue {
public void reset() { public void reset() {
this.currentOrd = comp.resetValue(); this.currentOrd = comp.resetValue();
this.present = false; this.present = false;
lastDocID = 0;
} }
public int compareTo(SortValue o) { public int compareTo(SortValue o) {

View File

@ -25,6 +25,7 @@ class TripleValueSortDoc extends DoubleValueSortDoc {
protected SortValue value3; protected SortValue value3;
@Override
public SortValue getSortValue(String field) { public SortValue getSortValue(String field) {
if (value1.getField().equals(field)) { if (value1.getField().equals(field)) {
return value1; return value1;
@ -36,6 +37,7 @@ class TripleValueSortDoc extends DoubleValueSortDoc {
return null; return null;
} }
@Override
public void setNextReader(LeafReaderContext context) throws IOException { public void setNextReader(LeafReaderContext context) throws IOException {
this.ord = context.ord; this.ord = context.ord;
this.docBase = context.docBase; this.docBase = context.docBase;
@ -44,6 +46,7 @@ class TripleValueSortDoc extends DoubleValueSortDoc {
value3.setNextReader(context); value3.setNextReader(context);
} }
@Override
public void reset() { public void reset() {
this.docId = -1; this.docId = -1;
this.docBase = -1; this.docBase = -1;
@ -53,6 +56,7 @@ class TripleValueSortDoc extends DoubleValueSortDoc {
value3.reset(); value3.reset();
} }
@Override
public void setValues(int docId) throws IOException { public void setValues(int docId) throws IOException {
this.docId = docId; this.docId = docId;
value1.setCurrentValue(docId); value1.setCurrentValue(docId);
@ -60,13 +64,22 @@ class TripleValueSortDoc extends DoubleValueSortDoc {
value3.setCurrentValue(docId); value3.setCurrentValue(docId);
} }
@Override
public void setGlobalValues(SortDoc previous) {
TripleValueSortDoc tripleValueSortDoc = (TripleValueSortDoc) previous;
value1.toGlobalValue(tripleValueSortDoc.value1);
value2.toGlobalValue(tripleValueSortDoc.value2);
value3.toGlobalValue(tripleValueSortDoc.value3);
}
@Override
public void setValues(SortDoc sortDoc) { public void setValues(SortDoc sortDoc) {
this.docId = sortDoc.docId; this.docId = sortDoc.docId;
this.ord = sortDoc.ord; this.ord = sortDoc.ord;
this.docBase = sortDoc.docBase; this.docBase = sortDoc.docBase;
value1.setCurrentValue(((TripleValueSortDoc)sortDoc).value1); value1.setCurrentValue(((TripleValueSortDoc) sortDoc).value1);
value2.setCurrentValue(((TripleValueSortDoc)sortDoc).value2); value2.setCurrentValue(((TripleValueSortDoc) sortDoc).value2);
value3.setCurrentValue(((TripleValueSortDoc)sortDoc).value3); value3.setCurrentValue(((TripleValueSortDoc) sortDoc).value3);
} }
public TripleValueSortDoc(SortValue value1, SortValue value2, SortValue value3) { public TripleValueSortDoc(SortValue value1, SortValue value2, SortValue value3) {
@ -74,44 +87,51 @@ class TripleValueSortDoc extends DoubleValueSortDoc {
this.value3 = value3; this.value3 = value3;
} }
@Override
public SortDoc copy() { public SortDoc copy() {
return new TripleValueSortDoc(value1.copy(), value2.copy(), value3.copy()); return new TripleValueSortDoc(value1.copy(), value2.copy(), value3.copy());
} }
@Override
public boolean lessThan(Object o) { public boolean lessThan(Object o) {
TripleValueSortDoc sd = (TripleValueSortDoc) o;
TripleValueSortDoc sd = (TripleValueSortDoc)o;
int comp = value1.compareTo(sd.value1); int comp = value1.compareTo(sd.value1);
if(comp == -1) { if (comp == -1) {
return true; return true;
} else if (comp == 1) { } else if (comp == 1) {
return false; return false;
} else { } else {
comp = value2.compareTo(sd.value2); comp = value2.compareTo(sd.value2);
if(comp == -1) { if (comp == -1) {
return true; return true;
} else if (comp == 1) { } else if (comp == 1) {
return false; return false;
} else { } else {
comp = value3.compareTo(sd.value3); comp = value3.compareTo(sd.value3);
if(comp == -1) { if (comp == -1) {
return true; return true;
} else if (comp == 1) { } else if (comp == 1) {
return false; return false;
} else { } else {
return docId+docBase > sd.docId+sd.docBase; return docId + docBase > sd.docId + sd.docBase;
} }
} }
} }
} }
public int compareTo(Object o) { @Override
TripleValueSortDoc sd = (TripleValueSortDoc)o; public int compareTo(SortDoc o) {
TripleValueSortDoc sd = (TripleValueSortDoc) o;
int comp = value1.compareTo(sd.value1); int comp = value1.compareTo(sd.value1);
if (comp == 0) { if (comp == 0) {
comp = value2.compareTo(sd.value2); comp = value2.compareTo(sd.value2);
if (comp == 0) { if (comp == 0) {
return value3.compareTo(sd.value3); comp = value3.compareTo(sd.value3);
if (comp == 0) {
return (sd.docId + sd.docBase) - (docId + docBase);
} else {
return comp;
}
} else { } else {
return comp; return comp;
} }

View File

@ -503,55 +503,60 @@ public class SolrLogPostTool {
private void addParams(SolrInputDocument doc, String params) { private void addParams(SolrInputDocument doc, String params) {
String[] pairs = params.split("&"); String[] pairs = params.split("&");
for(String pair : pairs) { for (String pair : pairs) {
String[] parts = pair.split("="); String[] parts = pair.split("=");
if(parts.length == 2 && parts[0].equals("q")) { if (parts.length == 2 && parts[0].equals("q")) {
String dq = URLDecoder.decode(parts[1], Charset.defaultCharset()); String dq = URLDecoder.decode(parts[1], Charset.defaultCharset());
setFieldIfUnset(doc, "q_s", dq); setFieldIfUnset(doc, "q_s", dq);
setFieldIfUnset(doc, "q_t", dq); setFieldIfUnset(doc, "q_t", dq);
} }
if(parts[0].equals("rows")) { if (parts[0].equals("rows")) {
String dr = URLDecoder.decode(parts[1], Charset.defaultCharset()); String dr = URLDecoder.decode(parts[1], Charset.defaultCharset());
setFieldIfUnset(doc, "rows_i", dr); setFieldIfUnset(doc, "rows_i", dr);
} }
if(parts[0].equals("distrib")) { if (parts[0].equals("start")) {
String dr = URLDecoder.decode(parts[1], Charset.defaultCharset());
setFieldIfUnset(doc, "start_i", dr);
}
if (parts[0].equals("distrib")) {
String dr = URLDecoder.decode(parts[1], Charset.defaultCharset()); String dr = URLDecoder.decode(parts[1], Charset.defaultCharset());
setFieldIfUnset(doc, "distrib_s", dr); setFieldIfUnset(doc, "distrib_s", dr);
} }
if(parts[0].equals("shards")) { if (parts[0].equals("shards")) {
setFieldIfUnset(doc, "shards_s", "true"); setFieldIfUnset(doc, "shards_s", "true");
} }
if(parts[0].equals("ids") && !isRTGRequest(doc)) { if (parts[0].equals("ids") && !isRTGRequest(doc)) {
setFieldIfUnset(doc, "ids_s", "true"); setFieldIfUnset(doc, "ids_s", "true");
} }
if(parts[0].equals("isShard")) { if (parts[0].equals("isShard")) {
String dr = URLDecoder.decode(parts[1], Charset.defaultCharset()); String dr = URLDecoder.decode(parts[1], Charset.defaultCharset());
setFieldIfUnset(doc, "isShard_s", dr); setFieldIfUnset(doc, "isShard_s", dr);
} }
if(parts[0].equals("wt")) { if (parts[0].equals("wt")) {
String dr = URLDecoder.decode(parts[1], Charset.defaultCharset()); String dr = URLDecoder.decode(parts[1], Charset.defaultCharset());
setFieldIfUnset(doc, "wt_s", dr); setFieldIfUnset(doc, "wt_s", dr);
} }
if(parts[0].equals("facet")) { if (parts[0].equals("facet")) {
String dr = URLDecoder.decode(parts[1], Charset.defaultCharset()); String dr = URLDecoder.decode(parts[1], Charset.defaultCharset());
setFieldIfUnset(doc, "facet_s", dr); setFieldIfUnset(doc, "facet_s", dr);
} }
if(parts[0].equals("shards.purpose")) { if (parts[0].equals("shards.purpose")) {
try { try {
int purpose = Integer.parseInt(parts[1]); int purpose = Integer.parseInt(parts[1]);
String[] purposes = getRequestPurposeNames(purpose); String[] purposes = getRequestPurposeNames(purpose);
for (String p : purposes) { for (String p : purposes) {
doc.addField("purpose_ss", p); doc.addField("purpose_ss", p);
} }
} catch(Throwable e) { } catch (Throwable e) {
//We'll just sit on this for now and not interrupt the load for this one field. //We'll just sit on this for now and not interrupt the load for this one field.
} }
} }