SOLR-9822: speed up single-valued string fieldcache counting in dv facet processor

This commit is contained in:
yonik 2016-12-07 11:08:33 -05:00
parent 10500c894d
commit ca5e736db1
5 changed files with 270 additions and 75 deletions

View File

@ -62,6 +62,11 @@ Optimizations
* SOLR-9579: Make Solr's SchemaField implement Lucene's IndexableFieldType, removing the
creation of a Lucene FieldType every time a field is indexed. (John Call, yonik)
* SOLR-9822: JSON Facet API: Recover performance lost due to the DocValues transition to
an iterator API (LUCENE-7407). This only fixes calculating counts for single-valued
string fields from the FieldCache, resulting in up to 56% better throughput for those cases.
(yonik)
================== 6.4.0 ==================

View File

@ -33,6 +33,7 @@ import org.apache.lucene.util.UnicodeUtil;
import org.apache.solr.common.SolrException;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.Filter;
import org.apache.solr.uninverting.FieldCacheImpl;
/**
* Grabs values from {@link DocValues}.
@ -184,15 +185,33 @@ class FacetFieldProcessorByArrayDV extends FacetFieldProcessorByArray {
int segMax = singleDv.getValueCount() + 1;
final int[] counts = getCountArr( segMax );
/** alternate trial implementations
// ord
// FieldUtil.visitOrds(singleDv, disi, (doc,ord)->{counts[ord+1]++;} );
FieldUtil.OrdValues ordValues = FieldUtil.getOrdValues(singleDv, disi);
while (ordValues.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
counts[ ordValues.getOrd() + 1]++;
}
**/
// calculate segment-local counts
int doc;
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (singleDv.advanceExact(doc)) {
counts[ singleDv.ordValue() + 1 ]++;
} else {
counts[ 0 ]++;
if (singleDv instanceof FieldCacheImpl.SortedDocValuesImpl.Iter) {
FieldCacheImpl.SortedDocValuesImpl.Iter fc = (FieldCacheImpl.SortedDocValuesImpl.Iter) singleDv;
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
counts[fc.getOrd(doc) + 1]++;
}
} else {
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (singleDv.advanceExact(doc)) {
counts[singleDv.ordValue() + 1]++;
}
}
}
// convert segment-local counts to global counts
for (int i=1; i<segMax; i++) {
int segCount = counts[i];
if (segCount > 0) {
@ -250,12 +269,26 @@ class FacetFieldProcessorByArrayDV extends FacetFieldProcessorByArray {
private void collectCounts(SortedDocValues singleDv, DocIdSetIterator disi, LongValues toGlobal) throws IOException {
int doc;
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (singleDv.advanceExact(doc)) {
int segOrd = singleDv.ordValue();
if (singleDv instanceof FieldCacheImpl.SortedDocValuesImpl.Iter) {
FieldCacheImpl.SortedDocValuesImpl.Iter fc = (FieldCacheImpl.SortedDocValuesImpl.Iter)singleDv;
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
int segOrd = fc.getOrd(doc);
if (segOrd < 0) continue;
int ord = (int)toGlobal.get(segOrd);
countAcc.incrementCount(ord, 1);
}
} else {
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (singleDv.advanceExact(doc)) {
int segOrd = singleDv.ordValue();
int ord = (int) toGlobal.get(segOrd);
countAcc.incrementCount(ord, 1);
}
}
}
}

View File

@ -21,10 +21,13 @@ import java.io.IOException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QueryContext;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.uninverting.FieldCacheImpl;
/** @lucene.internal
* Porting helper... may be removed if it offers no value in the future.
@ -52,4 +55,148 @@ public class FieldUtil {
return si == null ? DocValues.emptySortedSet() : si;
}
/** The following ord visitors and wrappers are a work in progress and experimental
* @lucene.experimental */
@FunctionalInterface
public interface OrdFunc {
void handleOrd(int docid, int ord); // TODO: throw exception?
}
public static boolean isFieldCache(SortedDocValues singleDv) {
return singleDv instanceof FieldCacheImpl.SortedDocValuesImpl.Iter;
}
public static void visitOrds(SortedDocValues singleDv, DocIdSetIterator disi, OrdFunc ordFunc) throws IOException {
int doc;
if (singleDv instanceof FieldCacheImpl.SortedDocValuesImpl.Iter) {
FieldCacheImpl.SortedDocValuesImpl.Iter fc = (FieldCacheImpl.SortedDocValuesImpl.Iter) singleDv;
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
ordFunc.handleOrd(doc, fc.getOrd(doc));
}
} else {
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (singleDv.advanceExact(doc)) {
ordFunc.handleOrd(doc, singleDv.ordValue());
} else {
// TODO: optionally pass in missingOrd?
}
}
}
}
public static OrdValues getOrdValues(SortedDocValues singleDv, DocIdSetIterator disi) {
if (singleDv instanceof FieldCacheImpl.SortedDocValuesImpl.Iter) {
FieldCacheImpl.SortedDocValuesImpl.Iter fc = (FieldCacheImpl.SortedDocValuesImpl.Iter) singleDv;
return new FCOrdValues(fc, disi);
}
return new DVOrdValues(singleDv, disi);
}
public static abstract class OrdValues extends SortedDocValues {
int doc;
int ord;
public int getOrd() {
return ord;
}
@Override
public int docID() {
return doc;
}
@Override
public abstract int nextDoc() throws IOException;
@Override
public int advance(int target) throws IOException {
return 0; // TODO
}
@Override
public long cost() {
return 0;
}
@Override
public int getValueCount() {
throw new UnsupportedOperationException();
}
}
public static class FCOrdValues extends OrdValues {
FieldCacheImpl.SortedDocValuesImpl.Iter vals;
DocIdSetIterator disi;
public FCOrdValues(FieldCacheImpl.SortedDocValuesImpl.Iter iter, DocIdSetIterator disi) {
this.vals = iter;
this.disi = disi;
}
@Override
public int nextDoc() throws IOException {
doc = disi.nextDoc();
if (doc == NO_MORE_DOCS) return NO_MORE_DOCS;
ord = vals.getOrd(doc); // todo: loop until a hit?
return doc;
}
@Override
public boolean advanceExact(int target) throws IOException {
return false;
}
@Override
public int ordValue() {
return 0;
}
@Override
public BytesRef lookupOrd(int ord) throws IOException {
return null;
}
}
public static class DVOrdValues extends OrdValues {
SortedDocValues vals;
DocIdSetIterator disi;
int valDoc;
public DVOrdValues(SortedDocValues vals, DocIdSetIterator disi) {
this.vals = vals;
this.disi = disi;
}
@Override
public int nextDoc() throws IOException {
for (;;) {
// todo - use skipping when appropriate
doc = disi.nextDoc();
if (doc == NO_MORE_DOCS) return NO_MORE_DOCS;
boolean match = vals.advanceExact(doc);
if (match) {
ord = vals.ordValue();
return doc;
}
}
}
@Override
public boolean advanceExact(int target) throws IOException {
return false;
}
@Override
public int ordValue() {
return 0;
}
@Override
public BytesRef lookupOrd(int ord) throws IOException {
return null;
}
}
}

View File

@ -45,7 +45,7 @@ import org.apache.lucene.util.RamUsageEstimator;
*
* @lucene.internal
*/
interface FieldCache {
public interface FieldCache {
/**
* Placeholder indicating creation of this cache is currently in-progress.

View File

@ -58,9 +58,9 @@ import org.apache.lucene.util.packed.PackedLongValues;
* Expert: The default cache implementation, storing all values in memory.
* A WeakHashMap is used for storage.
*
* @since lucene 1.4
* @lucene.internal
*/
class FieldCacheImpl implements FieldCache {
public class FieldCacheImpl implements FieldCache {
private Map<Class<?>,Cache> caches;
FieldCacheImpl() {
@ -786,79 +786,89 @@ class FieldCacheImpl implements FieldCache {
this.termOrdToBytesOffset = termOrdToBytesOffset;
this.numOrd = numOrd;
}
public SortedDocValues iterator() {
final BytesRef term = new BytesRef();
return new SortedDocValues() {
private int docID = -1;
return new Iter();
}
@Override
public int docID() {
return docID;
}
public class Iter extends SortedDocValues {
private int docID = -1;
private final BytesRef term = new BytesRef();
@Override
public int nextDoc() {
while (true) {
docID++;
if (docID >= docToTermOrd.size()) {
docID = NO_MORE_DOCS;
return docID;
}
if (docToTermOrd.get(docID) != 0) {
return docID;
}
}
}
/** @lucene.internal Specific to this implementation and subject to change. For internal optimization only. */
public int getOrd(int docID) {
// Subtract 1, matching the 1+ord we did when
// storing, so that missing values, which are 0 in the
// packed ints, are returned as -1 ord:
return (int) docToTermOrd.get(docID)-1;
}
@Override
public int advance(int target) {
if (target < docToTermOrd.size()) {
docID = target;
if (docToTermOrd.get(docID) != 0) {
return docID;
} else{
return nextDoc();
}
} else {
@Override
public int docID() {
return docID;
}
@Override
public int nextDoc() {
while (true) {
docID++;
if (docID >= docToTermOrd.size()) {
docID = NO_MORE_DOCS;
return docID;
}
}
@Override
public boolean advanceExact(int target) throws IOException {
docID = target;
return docToTermOrd.get(docID) != 0;
}
@Override
public long cost() {
return 0;
}
@Override
public int ordValue() {
// Subtract 1, matching the 1+ord we did when
// storing, so that missing values, which are 0 in the
// packed ints, are returned as -1 ord:
return (int) docToTermOrd.get(docID)-1;
}
@Override
public int getValueCount() {
return numOrd;
}
@Override
public BytesRef lookupOrd(int ord) {
if (ord < 0) {
throw new IllegalArgumentException("ord must be >=0 (got ord=" + ord + ")");
if (docToTermOrd.get(docID) != 0) {
return docID;
}
bytes.fill(term, termOrdToBytesOffset.get(ord));
return term;
}
};
}
@Override
public int advance(int target) {
if (target < docToTermOrd.size()) {
docID = target;
if (docToTermOrd.get(docID) != 0) {
return docID;
} else{
return nextDoc();
}
} else {
docID = NO_MORE_DOCS;
return docID;
}
}
@Override
public boolean advanceExact(int target) throws IOException {
docID = target;
return docToTermOrd.get(docID) != 0;
}
@Override
public long cost() {
return 0;
}
@Override
public int ordValue() {
// Subtract 1, matching the 1+ord we did when
// storing, so that missing values, which are 0 in the
// packed ints, are returned as -1 ord:
return (int) docToTermOrd.get(docID)-1;
}
@Override
public int getValueCount() {
return numOrd;
}
@Override
public BytesRef lookupOrd(int ord) {
if (ord < 0) {
throw new IllegalArgumentException("ord must be >=0 (got ord=" + ord + ")");
}
bytes.fill(term, termOrdToBytesOffset.get(ord));
return term;
}
}
@Override