fix facet tests

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1436600 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2013-01-21 20:39:25 +00:00
parent 1801cac63e
commit 8ba93c7f02
4 changed files with 104 additions and 124 deletions

View File

@ -30,6 +30,7 @@ import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
@ -74,39 +75,37 @@ import org.apache.lucene.util.BytesRef;
*/
public class FacetsPayloadMigrationReader extends FilterAtomicReader {
private class PayloadMigratingDocValues extends DocValues {
private class PayloadMigratingBinaryDocValues extends BinaryDocValues {
private final DocsAndPositionsEnum dpe;
public PayloadMigratingDocValues(DocsAndPositionsEnum dpe) {
this.dpe = dpe;
}
private Fields fields;
private Term term;
private DocsAndPositionsEnum dpe;
private int curDocID = -1;
private int lastRequestedDocID;
@Override
protected Source loadDirectSource() throws IOException {
return new PayloadMigratingSource(getType(), dpe);
}
@Override
protected Source loadSource() throws IOException {
throw new UnsupportedOperationException("in-memory Source is not supported by this reader");
}
@Override
public Type getType() {
return Type.BYTES_VAR_STRAIGHT;
private DocsAndPositionsEnum getDPE() {
try {
DocsAndPositionsEnum dpe = null;
if (fields != null) {
Terms terms = fields.terms(term.field());
if (terms != null) {
TermsEnum te = terms.iterator(null); // no use for reusing
if (te.seekExact(term.bytes(), true)) {
// we're not expected to be called for deleted documents
dpe = te.docsAndPositions(null, null, DocsAndPositionsEnum.FLAG_PAYLOADS);
}
}
}
return dpe;
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
}
private class PayloadMigratingSource extends Source {
private final DocsAndPositionsEnum dpe;
private int curDocID;
protected PayloadMigratingSource(Type type, DocsAndPositionsEnum dpe) {
super(type);
this.dpe = dpe;
protected PayloadMigratingBinaryDocValues(Fields fields, Term term) {
this.fields = fields;
this.term = term;
this.dpe = getDPE();
if (dpe == null) {
curDocID = DocIdSetIterator.NO_MORE_DOCS;
} else {
@ -119,31 +118,41 @@ public class FacetsPayloadMigrationReader extends FilterAtomicReader {
}
@Override
public BytesRef getBytes(int docID, BytesRef ref) {
if (curDocID > docID) {
// document does not exist
ref.length = 0;
return ref;
}
public void get(int docID, BytesRef result) {
try {
// If caller is moving backwards (eg, during merge,
// the consuming DV format is free to iterate over
// our values as many times as it wants), we must
// re-init the dpe:
if (docID <= lastRequestedDocID) {
dpe = getDPE();
if (dpe == null) {
curDocID = DocIdSetIterator.NO_MORE_DOCS;
} else{
curDocID = dpe.nextDoc();
}
}
lastRequestedDocID = docID;
if (curDocID > docID) {
// document does not exist
result.length = 0;
return;
}
if (curDocID < docID) {
curDocID = dpe.advance(docID);
if (curDocID != docID) { // requested document does not have a payload
ref.length = 0;
return ref;
result.length = 0;
return;
}
}
// we're on the document
dpe.nextPosition();
ref.copyBytes(dpe.getPayload());
return ref;
result.copyBytes(dpe.getPayload());
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
/** The {@link Term} text of the ordinals payload. */
@ -202,26 +211,14 @@ public class FacetsPayloadMigrationReader extends FilterAtomicReader {
}
@Override
public DocValues docValues(String field) throws IOException {
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
Term term = fieldTerms.get(field);
if (term == null) {
return super.docValues(field);
return super.getBinaryDocValues(field);
} else {
DocsAndPositionsEnum dpe = null;
Fields fields = fields();
if (fields != null) {
Terms terms = fields.terms(term.field());
if (terms != null) {
TermsEnum te = terms.iterator(null); // no use for reusing
if (te.seekExact(term.bytes(), true)) {
// we're not expected to be called for deleted documents
dpe = te.docsAndPositions(null, null, DocsAndPositionsEnum.FLAG_PAYLOADS);
}
}
}
// we shouldn't return null, even if the term does not exist or has no
// payloads, since we already marked the field as having DocValues.
return new PayloadMigratingDocValues(dpe);
return new PayloadMigratingBinaryDocValues(fields(), term);
}
}
@ -238,7 +235,7 @@ public class FacetsPayloadMigrationReader extends FilterAtomicReader {
// mark this field as having a DocValues
infos.add(new FieldInfo(info.name, true, info.number,
info.hasVectors(), info.omitsNorms(), info.hasPayloads(),
info.getIndexOptions(), Type.BYTES_VAR_STRAIGHT,
info.getIndexOptions(), DocValuesType.BINARY,
info.getNormType(), info.attributes()));
leftoverFields.remove(info.name);
} else {
@ -248,9 +245,8 @@ public class FacetsPayloadMigrationReader extends FilterAtomicReader {
}
for (String field : leftoverFields) {
infos.add(new FieldInfo(field, false, ++number, false, false, false,
null, Type.BYTES_VAR_STRAIGHT, null, null));
null, DocValuesType.BINARY, null, null));
}
return new FieldInfos(infos.toArray(new FieldInfo[infos.size()]));
}
}

View File

@ -25,9 +25,7 @@ import org.apache.lucene.facet.index.params.CategoryListParams;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.OrdinalMap;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.FilterAtomicReader;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
@ -92,8 +90,8 @@ public class OrdinalMappingAtomicReader extends FilterAtomicReader {
}
@Override
public DocValues docValues(String field) throws IOException {
DocValues inner = super.docValues(field);
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
BinaryDocValues inner = super.getBinaryDocValues(field);
if (inner == null) {
return inner;
}
@ -102,46 +100,19 @@ public class OrdinalMappingAtomicReader extends FilterAtomicReader {
if (clp == null) {
return inner;
} else {
return new OrdinalMappingDocValues(inner, clp);
return new OrdinalMappingBinaryDocValues(clp, inner);
}
}
private class OrdinalMappingDocValues extends DocValues {
private final CategoryListParams clp;
private final DocValues delegate;
public OrdinalMappingDocValues(DocValues delegate, CategoryListParams clp) {
this.delegate = delegate;
this.clp = clp;
}
@Override
protected Source loadSource() throws IOException {
return new OrdinalMappingSource(getType(), clp, delegate.getSource());
}
@Override
protected Source loadDirectSource() throws IOException {
return new OrdinalMappingSource(getType(), clp, delegate.getDirectSource());
}
@Override
public Type getType() {
return Type.BYTES_VAR_STRAIGHT;
}
}
private class OrdinalMappingSource extends Source {
private class OrdinalMappingBinaryDocValues extends BinaryDocValues {
private final IntEncoder encoder;
private final IntDecoder decoder;
private final IntsRef ordinals = new IntsRef(32);
private final Source delegate;
private final BinaryDocValues delegate;
private final BytesRef scratch = new BytesRef();
protected OrdinalMappingSource(Type type, CategoryListParams clp, Source delegate) {
super(type);
protected OrdinalMappingBinaryDocValues(CategoryListParams clp, BinaryDocValues delegate) {
this.delegate = delegate;
encoder = clp.createEncoder();
decoder = encoder.createMatchingDecoder();
@ -149,23 +120,26 @@ public class OrdinalMappingAtomicReader extends FilterAtomicReader {
@SuppressWarnings("synthetic-access")
@Override
public BytesRef getBytes(int docID, BytesRef ref) {
ref = delegate.getBytes(docID, ref);
if (ref == null || ref.length == 0) {
return ref;
} else {
decoder.decode(ref, ordinals);
public void get(int docID, BytesRef result) {
// NOTE: this isn't quite koscher, because in general
// multiple threads can call BinaryDV.get which would
// then conflict on the single scratch instance, but
// because this impl is only used for merging, we know
// only 1 thread calls us:
delegate.get(docID, scratch);
if (scratch.length > 0) {
// We must use scratch (and not re-use result) here,
// else encoder may overwrite the DV provider's
// private byte[]:
decoder.decode(scratch, ordinals);
// map the ordinals
for (int i = 0; i < ordinals.length; i++) {
ordinals.ints[i] = ordinalMap[ordinals.ints[i]];
}
encoder.encode(ordinals, ref);
return ref;
encoder.encode(ordinals, result);
}
}
}
}

View File

@ -11,17 +11,16 @@ import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
import org.apache.lucene.facet.index.params.CategoryListParams;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.search.params.CountFacetRequest;
import org.apache.lucene.facet.search.params.FacetRequest;
import org.apache.lucene.facet.search.params.FacetRequest.SortBy;
import org.apache.lucene.facet.search.params.FacetRequest.SortOrder;
import org.apache.lucene.facet.search.params.FacetRequest;
import org.apache.lucene.facet.search.params.FacetSearchParams;
import org.apache.lucene.facet.search.results.FacetResult;
import org.apache.lucene.facet.search.results.FacetResultNode;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.ParallelTaxonomyArrays;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.BytesRef;
@ -89,9 +88,9 @@ public class CountingFacetsCollector extends FacetsCollector {
private final int[] counts;
private final String facetsField;
private final boolean useDirectSource;
private final HashMap<Source,FixedBitSet> matchingDocs = new HashMap<Source,FixedBitSet>();
private final HashMap<BinaryDocValues,FixedBitSet> matchingDocs = new HashMap<BinaryDocValues,FixedBitSet>();
private DocValues facetsValues;
private BinaryDocValues facetsValues;
private FixedBitSet bits;
public CountingFacetsCollector(FacetSearchParams fsp, TaxonomyReader taxoReader) {
@ -158,11 +157,10 @@ public class CountingFacetsCollector extends FacetsCollector {
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
facetsValues = context.reader().docValues(facetsField);
facetsValues = context.reader().getBinaryDocValues(facetsField);
if (facetsValues != null) {
Source facetSource = useDirectSource ? facetsValues.getDirectSource() : facetsValues.getSource();
bits = new FixedBitSet(context.reader().maxDoc());
matchingDocs.put(facetSource, bits);
matchingDocs.put(facetsValues, bits);
}
}
@ -176,13 +174,13 @@ public class CountingFacetsCollector extends FacetsCollector {
}
private void countFacets() {
for (Entry<Source,FixedBitSet> entry : matchingDocs.entrySet()) {
Source facetsSource = entry.getKey();
for (Entry<BinaryDocValues,FixedBitSet> entry : matchingDocs.entrySet()) {
BinaryDocValues facetsSource = entry.getKey();
FixedBitSet bits = entry.getValue();
int doc = 0;
int length = bits.length();
while (doc < length && (doc = bits.nextSetBit(doc)) != -1) {
facetsSource .getBytes(doc, buf);
facetsSource.get(doc, buf);
if (buf.length > 0) {
// this document has facets
int upto = buf.offset + buf.length;

View File

@ -33,7 +33,7 @@ public class SlowRAMDirectory extends RAMDirectory {
private static final int IO_SLEEP_THRESHOLD = 50;
private Random random;
Random random;
private int sleepMillis;
public void setSleepMillis(int sleepMillis) {
@ -62,7 +62,7 @@ public class SlowRAMDirectory extends RAMDirectory {
return super.openInput(name, context);
}
void doSleep(int length) {
void doSleep(Random random, int length) {
int sTime = length<10 ? sleepMillis : (int) (sleepMillis * Math.log(length));
if (random!=null) {
sTime = random.nextInt(sTime);
@ -74,6 +74,14 @@ public class SlowRAMDirectory extends RAMDirectory {
}
}
/** Make a private random. */
Random forkRandom() {
if (random == null) {
return null;
}
return new Random(random.nextLong());
}
/**
* Delegate class to wrap an IndexInput and delay reading bytes by some
* specified time.
@ -81,16 +89,18 @@ public class SlowRAMDirectory extends RAMDirectory {
private class SlowIndexInput extends IndexInput {
private IndexInput ii;
private int numRead = 0;
private Random random;
public SlowIndexInput(IndexInput ii) {
super("SlowIndexInput(" + ii + ")");
this.random = forkRandom();
this.ii = ii;
}
@Override
public byte readByte() throws IOException {
if (numRead >= IO_SLEEP_THRESHOLD) {
doSleep(0);
doSleep(random, 0);
numRead = 0;
}
++numRead;
@ -100,7 +110,7 @@ public class SlowRAMDirectory extends RAMDirectory {
@Override
public void readBytes(byte[] b, int offset, int len) throws IOException {
if (numRead >= IO_SLEEP_THRESHOLD) {
doSleep(len);
doSleep(random, len);
numRead = 0;
}
numRead += len;
@ -125,15 +135,17 @@ public class SlowRAMDirectory extends RAMDirectory {
private IndexOutput io;
private int numWrote;
private final Random random;
public SlowIndexOutput(IndexOutput io) {
this.io = io;
this.random = forkRandom();
}
@Override
public void writeByte(byte b) throws IOException {
if (numWrote >= IO_SLEEP_THRESHOLD) {
doSleep(0);
doSleep(random, 0);
numWrote = 0;
}
++numWrote;
@ -143,7 +155,7 @@ public class SlowRAMDirectory extends RAMDirectory {
@Override
public void writeBytes(byte[] b, int offset, int length) throws IOException {
if (numWrote >= IO_SLEEP_THRESHOLD) {
doSleep(length);
doSleep(random, length);
numWrote = 0;
}
numWrote += length;