cut over collector

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4765@1444993 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2013-02-11 23:40:13 +00:00
parent d2d70135c2
commit be5990ceeb
2 changed files with 65 additions and 57 deletions

View File

@ -850,7 +850,11 @@ public class DocTermOrds {
/** Returns a SortedSetDocValues view of this instance */ /** Returns a SortedSetDocValues view of this instance */
public SortedSetDocValues iterator(TermsEnum termsEnum) throws IOException { public SortedSetDocValues iterator(TermsEnum termsEnum) throws IOException {
return new Iterator(termsEnum); if (isEmpty()) {
return SortedSetDocValues.EMPTY;
} else {
return new Iterator(termsEnum);
}
} }
// nocommit: make private (just public to enable hack to cutover gradually) // nocommit: make private (just public to enable hack to cutover gradually)

View File

@ -25,6 +25,8 @@ import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocTermOrds; import org.apache.lucene.index.DocTermOrds;
import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedDocValuesTermsEnum; import org.apache.lucene.index.SortedDocValuesTermsEnum;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.SortedSetDocValuesTermsEnum;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.grouping.AbstractGroupFacetCollector; import org.apache.lucene.search.grouping.AbstractGroupFacetCollector;
@ -196,9 +198,10 @@ public abstract class TermGroupFacetCollector extends AbstractGroupFacetCollecto
// Implementation for multi valued facet fields. // Implementation for multi valued facet fields.
static class MV extends TermGroupFacetCollector { static class MV extends TermGroupFacetCollector {
private DocTermOrds facetFieldDocTermOrds; private SortedSetDocValues facetFieldDocTermOrds;
private TermsEnum facetOrdTermsEnum; private TermsEnum facetOrdTermsEnum;
private DocTermOrds.TermOrdsIterator reuse; private int facetFieldNumTerms;
private final BytesRef scratch = new BytesRef();
MV(String groupField, String facetField, BytesRef facetPrefix, int initialSize) { MV(String groupField, String facetField, BytesRef facetPrefix, int initialSize) {
super(groupField, facetField, facetPrefix, initialSize); super(groupField, facetField, facetPrefix, initialSize);
@ -207,14 +210,14 @@ public abstract class TermGroupFacetCollector extends AbstractGroupFacetCollecto
@Override @Override
public void collect(int doc) throws IOException { public void collect(int doc) throws IOException {
int groupOrd = groupFieldTermsIndex.getOrd(doc); int groupOrd = groupFieldTermsIndex.getOrd(doc);
if (facetFieldDocTermOrds.isEmpty()) { if (facetFieldNumTerms == 0) {
int segmentGroupedFacetsIndex = groupOrd * (facetFieldDocTermOrds.numTerms() + 1); int segmentGroupedFacetsIndex = groupOrd * (facetFieldNumTerms + 1);
if (facetPrefix != null || segmentGroupedFacetHits.exists(segmentGroupedFacetsIndex)) { if (facetPrefix != null || segmentGroupedFacetHits.exists(segmentGroupedFacetsIndex)) {
return; return;
} }
segmentTotalCount++; segmentTotalCount++;
segmentFacetCounts[facetFieldDocTermOrds.numTerms()]++; segmentFacetCounts[facetFieldNumTerms]++;
segmentGroupedFacetHits.put(segmentGroupedFacetsIndex); segmentGroupedFacetHits.put(segmentGroupedFacetsIndex);
BytesRef groupKey; BytesRef groupKey;
@ -228,51 +231,50 @@ public abstract class TermGroupFacetCollector extends AbstractGroupFacetCollecto
return; return;
} }
if (facetOrdTermsEnum != null) { facetFieldDocTermOrds.setDocument(doc);
reuse = facetFieldDocTermOrds.lookup(doc, reuse); long ord;
boolean empty = true;
while ((ord = facetFieldDocTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
process(groupOrd, (int) ord);
empty = false;
}
if (empty) {
process(groupOrd, facetFieldNumTerms); // this facet ord is reserved for docs not containing facet field.
}
}
private void process(int groupOrd, int facetOrd) {
if (facetOrd < startFacetOrd || facetOrd >= endFacetOrd) {
return;
} }
int chunk;
boolean first = true;
int[] buffer = new int[5];
do {
chunk = reuse != null ? reuse.read(buffer) : 0;
if (first && chunk == 0) {
chunk = 1;
buffer[0] = facetFieldDocTermOrds.numTerms(); // this facet ord is reserved for docs not containing facet field.
}
first = false;
for (int pos = 0; pos < chunk; pos++) { int segmentGroupedFacetsIndex = groupOrd * (facetFieldNumTerms + 1) + facetOrd;
int facetOrd = buffer[pos]; if (segmentGroupedFacetHits.exists(segmentGroupedFacetsIndex)) {
if (facetOrd < startFacetOrd || facetOrd >= endFacetOrd) { return;
continue; }
}
int segmentGroupedFacetsIndex = groupOrd * (facetFieldDocTermOrds.numTerms() + 1) + facetOrd; segmentTotalCount++;
if (segmentGroupedFacetHits.exists(segmentGroupedFacetsIndex)) { segmentFacetCounts[facetOrd]++;
continue;
}
segmentTotalCount++; segmentGroupedFacetHits.put(segmentGroupedFacetsIndex);
segmentFacetCounts[facetOrd]++;
segmentGroupedFacetHits.put(segmentGroupedFacetsIndex); BytesRef groupKey;
if (groupOrd == -1) {
groupKey = null;
} else {
groupKey = new BytesRef();
groupFieldTermsIndex.lookupOrd(groupOrd, groupKey);
}
BytesRef groupKey; final BytesRef facetValue;
if (groupOrd == -1) { if (facetOrd == facetFieldNumTerms) {
groupKey = null; facetValue = null;
} else { } else {
groupKey = new BytesRef(); facetFieldDocTermOrds.lookupOrd(facetOrd, scratch);
groupFieldTermsIndex.lookupOrd(groupOrd, groupKey); facetValue = BytesRef.deepCopyOf(scratch); // must we?
} }
groupedFacetHits.add(new GroupedFacetHit(groupKey, facetValue));
groupedFacetHits.add(
new GroupedFacetHit(groupKey,
facetOrd == facetFieldDocTermOrds.numTerms() ? null : BytesRef.deepCopyOf(facetFieldDocTermOrds.lookupTerm(facetOrdTermsEnum, facetOrd))
)
);
}
} while (chunk >= buffer.length);
} }
@Override @Override
@ -281,14 +283,16 @@ public abstract class TermGroupFacetCollector extends AbstractGroupFacetCollecto
segmentResults.add(createSegmentResult()); segmentResults.add(createSegmentResult());
} }
reuse = null;
groupFieldTermsIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), groupField); groupFieldTermsIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), groupField);
// nocommit: cut over facetFieldDocTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), facetField);
DocTermOrds.Iterator iterator = (DocTermOrds.Iterator) FieldCache.DEFAULT.getDocTermOrds(context.reader(), facetField); facetFieldNumTerms = (int) facetFieldDocTermOrds.getValueCount();
facetFieldDocTermOrds = iterator.getParent(); if (facetFieldNumTerms == 0) {
facetOrdTermsEnum = facetFieldDocTermOrds.getOrdTermsEnum(context.reader()); facetOrdTermsEnum = null;
// [facetFieldDocTermOrds.numTerms() + 1] for all possible facet values and docs not containing facet field } else {
segmentFacetCounts = new int[facetFieldDocTermOrds.numTerms() + 1]; facetOrdTermsEnum = new SortedSetDocValuesTermsEnum(facetFieldDocTermOrds);
}
// [facetFieldNumTerms() + 1] for all possible facet values and docs not containing facet field
segmentFacetCounts = new int[facetFieldNumTerms + 1];
segmentTotalCount = 0; segmentTotalCount = 0;
segmentGroupedFacetHits.clear(); segmentGroupedFacetHits.clear();
@ -305,11 +309,11 @@ public abstract class TermGroupFacetCollector extends AbstractGroupFacetCollecto
} }
facetOrd = (int) facetOrdTermsEnum.ord(); facetOrd = (int) facetOrdTermsEnum.ord();
} else { } else {
facetOrd = facetFieldDocTermOrds.numTerms(); facetOrd = facetFieldNumTerms;
} }
// (facetFieldDocTermOrds.numTerms() + 1) for all possible facet values and docs not containing facet field // (facetFieldDocTermOrds.numTerms() + 1) for all possible facet values and docs not containing facet field
int segmentGroupedFacetsIndex = groupOrd * (facetFieldDocTermOrds.numTerms() + 1) + facetOrd; int segmentGroupedFacetsIndex = groupOrd * (facetFieldNumTerms + 1) + facetOrd;
segmentGroupedFacetHits.put(segmentGroupedFacetsIndex); segmentGroupedFacetHits.put(segmentGroupedFacetsIndex);
} }
@ -335,17 +339,17 @@ public abstract class TermGroupFacetCollector extends AbstractGroupFacetCollecto
if (seekStatus != TermsEnum.SeekStatus.END) { if (seekStatus != TermsEnum.SeekStatus.END) {
endFacetOrd = (int) facetOrdTermsEnum.ord(); endFacetOrd = (int) facetOrdTermsEnum.ord();
} else { } else {
endFacetOrd = facetFieldDocTermOrds.numTerms(); // Don't include null... endFacetOrd = facetFieldNumTerms; // Don't include null...
} }
} else { } else {
startFacetOrd = 0; startFacetOrd = 0;
endFacetOrd = facetFieldDocTermOrds.numTerms() + 1; endFacetOrd = facetFieldNumTerms + 1;
} }
} }
@Override @Override
protected SegmentResult createSegmentResult() throws IOException { protected SegmentResult createSegmentResult() throws IOException {
return new SegmentResult(segmentFacetCounts, segmentTotalCount, facetFieldDocTermOrds.numTerms(), facetOrdTermsEnum, startFacetOrd, endFacetOrd); return new SegmentResult(segmentFacetCounts, segmentTotalCount, facetFieldNumTerms, facetOrdTermsEnum, startFacetOrd, endFacetOrd);
} }
private static class SegmentResult extends AbstractGroupFacetCollector.SegmentResult { private static class SegmentResult extends AbstractGroupFacetCollector.SegmentResult {