SOLR-12572: While exporting documents using the export writer, if a field is specified as a sort parameter and also in the fl (field list) parameter, we save on one doc-value lookup. This can bring performance improvements of 15% and upwards depending on how many fields are in common

This commit is contained in:
Varun Thacker 2018-08-23 10:27:15 -07:00
parent 025350ea12
commit dfd2801cd2
23 changed files with 316 additions and 54 deletions

View File

@ -280,6 +280,10 @@ Optimizations
* SOLR-12616: Optimize Export writer upto 4 sort fields to get better performance.
This was removed in SOLR-11598 but brought back in the same version (Amrit Sarkar, Varun Thacker)
* SOLR-12572: While exporting documents using the export writer, if a field is specified as a sort parameter and also
in the fl (field list) parameter, we save on one doc-value lookup. This can bring performance improvements of 15%
and upwards depending on how many fields are in common. (Amrit Sarkar, Varun Thacker)
Other Changes
----------------------

View File

@ -37,14 +37,25 @@ class BoolFieldWriter extends FieldWriter {
this.fieldType = fieldType;
}
public boolean write(int docId, LeafReader reader, MapWriter.EntryWriter ew, int fieldIndex) throws IOException {
public boolean write(SortDoc sortDoc, LeafReader reader, MapWriter.EntryWriter ew, int fieldIndex) throws IOException {
BytesRef ref;
SortValue sortValue = sortDoc.getSortValue(this.field);
if (sortValue != null) {
if (sortValue.isPresent()) {
ref = (BytesRef) sortValue.getCurrentValue();
} else { //empty-value
return false;
}
} else {
// field is not part of 'sort' param, but part of 'fl' param
SortedDocValues vals = DocValues.getSorted(reader, this.field);
if (vals.advance(docId) != docId) {
if (vals.advance(sortDoc.docId) != sortDoc.docId) {
return false;
}
int ord = vals.ordValue();
ref = vals.lookupOrd(ord);
}
BytesRef ref = vals.lookupOrd(ord);
fieldType.indexedToReadable(ref, cref);
ew.put(this.field, "true".equals(cref.toString()));
return true;

View File

@ -32,14 +32,24 @@ class DateFieldWriter extends FieldWriter {
this.field = field;
}
public boolean write(int docId, LeafReader reader, MapWriter.EntryWriter ew, int fieldIndex) throws IOException {
public boolean write(SortDoc sortDoc, LeafReader reader, MapWriter.EntryWriter ew, int fieldIndex) throws IOException {
Long val;
SortValue sortValue = sortDoc.getSortValue(this.field);
if (sortValue != null) {
if (sortValue.isPresent()) {
val = (long) sortValue.getCurrentValue();
} else { //empty-value
return false;
}
} else {
// field is not part of 'sort' param, but part of 'fl' param
NumericDocValues vals = DocValues.getNumeric(reader, this.field);
long val;
if (vals.advance(docId) == docId) {
if (vals.advance(sortDoc.docId) == sortDoc.docId) {
val = vals.longValue();
} else {
return false;
}
}
ew.put(this.field, new Date(val));
return true;
}

View File

@ -31,15 +31,26 @@ class DoubleFieldWriter extends FieldWriter {
this.field = field;
}
public boolean write(int docId, LeafReader reader, MapWriter.EntryWriter ew, int fieldIndex) throws IOException {
public boolean write(SortDoc sortDoc, LeafReader reader, MapWriter.EntryWriter ew, int fieldIndex) throws IOException {
SortValue sortValue = sortDoc.getSortValue(this.field);
if (sortValue != null) {
if (sortValue.isPresent()) {
double val = (double) sortValue.getCurrentValue();
ew.put(this.field, val);
return true;
} else { //empty-value
return false;
}
} else {
// field is not part of 'sort' param, but part of 'fl' param
NumericDocValues vals = DocValues.getNumeric(reader, this.field);
long val;
if (vals.advance(docId) == docId) {
val = vals.longValue();
if (vals.advance(sortDoc.docId) == sortDoc.docId) {
long val = vals.longValue();
ew.put(this.field, Double.longBitsToDouble(val));
return true;
} else {
return false;
}
ew.put(this.field, Double.longBitsToDouble(val));
return true;
}
}
}

View File

@ -32,11 +32,22 @@ class DoubleValue implements SortValue {
protected DoubleComp comp;
private int lastDocID;
private LeafReader reader;
private boolean present;
public DoubleValue(String field, DoubleComp comp) {
this.field = field;
this.comp = comp;
this.currentValue = comp.resetValue();
this.present = false;
}
public Object getCurrentValue() {
assert present == true;
return currentValue;
}
public String getField() {
return field;
}
public DoubleValue copy() {
@ -59,19 +70,28 @@ class DoubleValue implements SortValue {
curDocID = vals.advance(docId);
}
if (docId == curDocID) {
present = true;
currentValue = Double.longBitsToDouble(vals.longValue());
} else {
present = false;
currentValue = 0f;
}
}
@Override
public boolean isPresent() {
return present;
}
public void setCurrentValue(SortValue sv) {
DoubleValue dv = (DoubleValue)sv;
this.currentValue = dv.currentValue;
this.present = dv.present;
}
public void reset() {
this.currentValue = comp.resetValue();
this.present = false;
}
public int compareTo(SortValue o) {

View File

@ -25,6 +25,15 @@ class DoubleValueSortDoc extends SingleValueSortDoc {
protected SortValue value2;
public SortValue getSortValue(String field) {
if (value1.getField().equals(field)) {
return value1;
} else if (value2.getField().equals(field)) {
return value2;
}
return null;
}
public void setNextReader(LeafReaderContext context) throws IOException {
this.ord = context.ord;
this.docBase = context.docBase;

View File

@ -290,7 +290,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
LeafReaderContext context = leaves.get(ord);
int fieldIndex = 0;
for (FieldWriter fieldWriter : fieldWriters) {
if (fieldWriter.write(sortDoc.docId, context.reader(), ew, fieldIndex)) {
if (fieldWriter.write(sortDoc, context.reader(), ew, fieldIndex)) {
++fieldIndex;
}
}

View File

@ -23,5 +23,5 @@ import org.apache.lucene.index.LeafReader;
import org.apache.solr.common.MapWriter;
abstract class FieldWriter {
public abstract boolean write(int docId, LeafReader reader, MapWriter.EntryWriter out, int fieldIndex) throws IOException;
public abstract boolean write(SortDoc sortDoc, LeafReader reader, MapWriter.EntryWriter out, int fieldIndex) throws IOException;
}

View File

@ -31,15 +31,26 @@ class FloatFieldWriter extends FieldWriter {
this.field = field;
}
public boolean write(int docId, LeafReader reader, MapWriter.EntryWriter ew, int fieldIndex) throws IOException {
public boolean write(SortDoc sortDoc, LeafReader reader, MapWriter.EntryWriter ew, int fieldIndex) throws IOException {
SortValue sortValue = sortDoc.getSortValue(this.field);
if (sortValue != null) {
if (sortValue.isPresent()) {
float val = (float) sortValue.getCurrentValue();
ew.put(this.field, val);
return true;
} else { //empty-value
return false;
}
} else {
// field is not part of 'sort' param, but part of 'fl' param
NumericDocValues vals = DocValues.getNumeric(reader, this.field);
int val;
if (vals.advance(docId) == docId) {
val = (int)vals.longValue();
if (vals.advance(sortDoc.docId) == sortDoc.docId) {
int val = (int) vals.longValue();
ew.put(this.field, Float.intBitsToFloat(val));
return true;
} else {
return false;
}
ew.put(this.field, Float.intBitsToFloat(val));
return true;
}
}
}

View File

@ -30,11 +30,22 @@ class FloatValue implements SortValue {
protected float currentValue;
protected FloatComp comp;
private int lastDocID;
private boolean present;
public FloatValue(String field, FloatComp comp) {
this.field = field;
this.comp = comp;
this.currentValue = comp.resetValue();
this.present = false;
}
public Object getCurrentValue() {
assert present == true;
return currentValue;
}
public String getField() {
return field;
}
public FloatValue copy() {
@ -56,19 +67,28 @@ class FloatValue implements SortValue {
curDocID = vals.advance(docId);
}
if (docId == curDocID) {
present = true;
currentValue = Float.intBitsToFloat((int)vals.longValue());
} else {
present = false;
currentValue = 0f;
}
}
@Override
public boolean isPresent() {
return present;
}
public void setCurrentValue(SortValue sv) {
FloatValue fv = (FloatValue)sv;
this.currentValue = fv.currentValue;
this.present = fv.present;
}
public void reset() {
this.currentValue = comp.resetValue();
this.present = false;
}
public int compareTo(SortValue o) {

View File

@ -31,14 +31,24 @@ class IntFieldWriter extends FieldWriter {
this.field = field;
}
public boolean write(int docId, LeafReader reader, MapWriter.EntryWriter ew, int fieldIndex) throws IOException {
NumericDocValues vals = DocValues.getNumeric(reader, this.field);
public boolean write(SortDoc sortDoc, LeafReader reader, MapWriter.EntryWriter ew, int fieldIndex) throws IOException {
int val;
if (vals.advance(docId) == docId) {
SortValue sortValue = sortDoc.getSortValue(this.field);
if (sortValue != null) {
if (sortValue.isPresent()) {
val = (int) sortValue.getCurrentValue();
} else { //empty-value
return false;
}
} else {
// field is not part of 'sort' param, but part of 'fl' param
NumericDocValues vals = DocValues.getNumeric(reader, this.field);
if (vals.advance(sortDoc.docId) == sortDoc.docId) {
val = (int) vals.longValue();
} else {
return false;
}
}
ew.put(this.field, val);
return true;
}

View File

@ -30,6 +30,16 @@ public class IntValue implements SortValue {
protected int currentValue;
protected IntComp comp;
private int lastDocID;
protected boolean present;
public Object getCurrentValue() {
assert present == true;
return currentValue;
}
public String getField() {
return field;
}
public IntValue copy() {
return new IntValue(field, comp);
@ -39,6 +49,7 @@ public class IntValue implements SortValue {
this.field = field;
this.comp = comp;
this.currentValue = comp.resetValue();
this.present = false;
}
public void setNextReader(LeafReaderContext context) throws IOException {
@ -56,22 +67,32 @@ public class IntValue implements SortValue {
curDocID = vals.advance(docId);
}
if (docId == curDocID) {
present = true;
currentValue = (int) vals.longValue();
} else {
present = false;
currentValue = 0;
}
}
@Override
public boolean isPresent() {
return this.present;
}
public int compareTo(SortValue o) {
IntValue iv = (IntValue)o;
return comp.compare(currentValue, iv.currentValue);
}
public void setCurrentValue (SortValue value) {
currentValue = ((IntValue)value).currentValue;
public void setCurrentValue(SortValue sv) {
IntValue iv = (IntValue)sv;
this.currentValue = iv.currentValue;
this.present = iv.present;
}
public void reset() {
currentValue = comp.resetValue();
this.present = false;
}
}

View File

@ -31,14 +31,24 @@ class LongFieldWriter extends FieldWriter {
this.field = field;
}
public boolean write(int docId, LeafReader reader, MapWriter.EntryWriter ew, int fieldIndex) throws IOException {
NumericDocValues vals = DocValues.getNumeric(reader, this.field);
public boolean write(SortDoc sortDoc, LeafReader reader, MapWriter.EntryWriter ew, int fieldIndex) throws IOException {
long val;
if (vals.advance(docId) == docId) {
SortValue sortValue = sortDoc.getSortValue(this.field);
if (sortValue != null) {
if (sortValue.isPresent()) {
val = (long) sortValue.getCurrentValue();
} else { //empty-value
return false;
}
} else {
// field is not part of 'sort' param, but part of 'fl' param
NumericDocValues vals = DocValues.getNumeric(reader, this.field);
if (vals.advance(sortDoc.docId) == sortDoc.docId) {
val = vals.longValue();
} else {
return false;
}
}
ew.put(field, val);
return true;
}

View File

@ -30,11 +30,22 @@ public class LongValue implements SortValue {
protected long currentValue;
protected LongComp comp;
private int lastDocID;
private boolean present;
public LongValue(String field, LongComp comp) {
this.field = field;
this.comp = comp;
this.currentValue = comp.resetValue();
this.present = false;
}
public Object getCurrentValue() {
assert present == true;
return currentValue;
}
public String getField() {
return field;
}
public LongValue copy() {
@ -56,15 +67,23 @@ public class LongValue implements SortValue {
curDocID = vals.advance(docId);
}
if (docId == curDocID) {
present = true;
currentValue = vals.longValue();
} else {
present = false;
currentValue = 0;
}
}
@Override
public boolean isPresent() {
return present;
}
public void setCurrentValue(SortValue sv) {
LongValue lv = (LongValue)sv;
this.currentValue = lv.currentValue;
this.present = lv.present;
}
public int compareTo(SortValue o) {
@ -74,5 +93,6 @@ public class LongValue implements SortValue {
public void reset() {
this.currentValue = comp.resetValue();
this.present = false;
}
}

View File

@ -54,10 +54,10 @@ class MultiFieldWriter extends FieldWriter {
}
}
public boolean write(int docId, LeafReader reader, MapWriter.EntryWriter out, int fieldIndex) throws IOException {
public boolean write(SortDoc sortDoc, LeafReader reader, MapWriter.EntryWriter out, int fieldIndex) throws IOException {
if (this.fieldType.isPointField()) {
SortedNumericDocValues vals = DocValues.getSortedNumeric(reader, this.field);
if (!vals.advanceExact(docId)) return false;
if (!vals.advanceExact(sortDoc.docId)) return false;
out.put(this.field,
(IteratorWriter) w -> {
for (int i = 0; i < vals.docValueCount(); i++) {
@ -67,7 +67,7 @@ class MultiFieldWriter extends FieldWriter {
return true;
} else {
SortedSetDocValues vals = DocValues.getSortedSet(reader, this.field);
if (vals.advance(docId) != docId) return false;
if (vals.advance(sortDoc.docId) != sortDoc.docId) return false;
out.put(this.field,
(IteratorWriter) w -> {
long o;

View File

@ -25,6 +25,19 @@ class QuadValueSortDoc extends TripleValueSortDoc {
protected SortValue value4;
public SortValue getSortValue(String field) {
if (value1.getField().equals(field)) {
return value1;
} else if (value2.getField().equals(field)) {
return value2;
} else if (value3.getField().equals(field)) {
return value3;
} else if (value4.getField().equals(field)) {
return value4;
}
return null;
}
public void setNextReader(LeafReaderContext context) throws IOException {
this.ord = context.ord;
this.docBase = context.docBase;

View File

@ -25,6 +25,13 @@ class SingleValueSortDoc extends SortDoc {
protected SortValue value1;
public SortValue getSortValue(String field) {
if (value1.getField().equals(field)) {
return value1;
}
return null;
}
public void setNextReader(LeafReaderContext context) throws IOException {
this.ord = context.ord;
this.docBase = context.docBase;

View File

@ -32,9 +32,19 @@ class SortDoc {
public SortDoc(SortValue[] sortValues) {
this.sortValues = sortValues;
}
public SortDoc() {
}
public SortValue getSortValue(String field) {
for (SortValue value : sortValues) {
if (value.getField().equals(field)) {
return value;
}
}
return null;
}
public void setNextReader(LeafReaderContext context) throws IOException {
this.ord = context.ord;
this.docBase = context.docBase;

View File

@ -27,4 +27,12 @@ public interface SortValue extends Comparable<SortValue> {
public void setCurrentValue(SortValue value);
public void reset();
public SortValue copy();
public Object getCurrentValue() throws IOException;
public String getField();
/**
*
* @return true if document has a value for the specified field
*/
public boolean isPresent();
}

View File

@ -37,14 +37,24 @@ class StringFieldWriter extends FieldWriter {
this.fieldType = fieldType;
}
public boolean write(int docId, LeafReader reader, MapWriter.EntryWriter ew, int fieldIndex) throws IOException {
public boolean write(SortDoc sortDoc, LeafReader reader, MapWriter.EntryWriter ew, int fieldIndex) throws IOException {
BytesRef ref;
SortValue sortValue = sortDoc.getSortValue(this.field);
if (sortValue != null) {
if (sortValue.isPresent()) {
ref = (BytesRef) sortValue.getCurrentValue();
} else { //empty-value
return false;
}
} else {
// field is not part of 'sort' param, but part of 'fl' param
SortedDocValues vals = DocValues.getSorted(reader, this.field);
if (vals.advance(docId) != docId) {
if (vals.advance(sortDoc.docId) != sortDoc.docId) {
return false;
}
int ord = vals.ordValue();
BytesRef ref = vals.lookupOrd(ord);
ref = vals.lookupOrd(ord);
}
fieldType.indexedToReadable(ref, cref);
ew.put(this.field, cref.toString());
return true;

View File

@ -38,6 +38,7 @@ class StringValue implements SortValue {
protected int currentOrd;
protected IntComp comp;
protected int lastDocID;
private boolean present;
public StringValue(SortedDocValues globalDocValues, String field, IntComp comp) {
this.globalDocValues = globalDocValues;
@ -48,6 +49,7 @@ class StringValue implements SortValue {
this.field = field;
this.comp = comp;
this.currentOrd = comp.resetValue();
this.present = false;
}
public StringValue copy() {
@ -65,15 +67,32 @@ class StringValue implements SortValue {
docValues.advance(docId);
}
if (docId == docValues.docID()) {
present = true;
currentOrd = (int) toGlobal.get(docValues.ordValue());
} else {
present = false;
currentOrd = -1;
}
}
@Override
public boolean isPresent() {
return present;
}
public void setCurrentValue(SortValue sv) {
StringValue v = (StringValue)sv;
this.currentOrd = v.currentOrd;
this.present = v.present;
}
public Object getCurrentValue() throws IOException {
assert present == true;
return docValues.lookupOrd(currentOrd);
}
public String getField() {
return field;
}
public void setNextReader(LeafReaderContext context) throws IOException {
@ -86,6 +105,7 @@ class StringValue implements SortValue {
public void reset() {
this.currentOrd = comp.resetValue();
this.present = false;
}
public int compareTo(SortValue o) {

View File

@ -25,6 +25,17 @@ class TripleValueSortDoc extends DoubleValueSortDoc {
protected SortValue value3;
public SortValue getSortValue(String field) {
if (value1.getField().equals(field)) {
return value1;
} else if (value2.getField().equals(field)) {
return value2;
} else if (value3.getField().equals(field)) {
return value3;
}
return null;
}
public void setNextReader(LeafReaderContext context) throws IOException {
this.ord = context.ord;
this.docBase = context.docBase;

View File

@ -58,6 +58,32 @@ public class TestExportWriter extends SolrTestCaseJ4 {
}
@Test
public void testEmptyValues() throws Exception {
//Index 2 document with one document that doesn't have field2_i_p
//Sort and return field2_i_p
//Test SOLR-12572 for potential NPEs
assertU(delQ("*:*"));
assertU(commit());
assertU(adoc("id","1", "field2_i_p","1"));
assertU(adoc("id","2"));
assertU(commit());
String resp = h.query(req("q", "*:*", "qt", "/export", "fl", "id,field2_i_p", "sort", "field2_i_p asc"));
assertJsonEquals(resp, "{\n" +
" \"responseHeader\":{\"status\":0},\n" +
" \"response\":{\n" +
" \"numFound\":2,\n" +
" \"docs\":[{\n" +
" \"id\":\"2\"}\n" +
" ,{\n" +
" \"id\":\"1\",\n" +
" \"field2_i_p\":1}]}}");
}
public static void createIndex() {
assertU(adoc("id","1",
"floatdv","2.1",
@ -569,7 +595,7 @@ public class TestExportWriter extends SolrTestCaseJ4 {
assertU(delQ("*:*"));
assertU(commit());
int numDocs = 1000;
int numDocs = 1000*40;
//10 unique values
String[] str_vals = new String[10];