mirror of https://github.com/apache/lucene.git
fix facet tests
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1436600 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1801cac63e
commit
8ba93c7f02
|
@ -30,6 +30,7 @@ import org.apache.lucene.index.AtomicReaderContext;
|
|||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.Fields;
|
||||
|
@ -74,39 +75,37 @@ import org.apache.lucene.util.BytesRef;
|
|||
*/
|
||||
public class FacetsPayloadMigrationReader extends FilterAtomicReader {
|
||||
|
||||
private class PayloadMigratingDocValues extends DocValues {
|
||||
private class PayloadMigratingBinaryDocValues extends BinaryDocValues {
|
||||
|
||||
private final DocsAndPositionsEnum dpe;
|
||||
|
||||
public PayloadMigratingDocValues(DocsAndPositionsEnum dpe) {
|
||||
this.dpe = dpe;
|
||||
}
|
||||
private Fields fields;
|
||||
private Term term;
|
||||
private DocsAndPositionsEnum dpe;
|
||||
private int curDocID = -1;
|
||||
private int lastRequestedDocID;
|
||||
|
||||
@Override
|
||||
protected Source loadDirectSource() throws IOException {
|
||||
return new PayloadMigratingSource(getType(), dpe);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Source loadSource() throws IOException {
|
||||
throw new UnsupportedOperationException("in-memory Source is not supported by this reader");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Type getType() {
|
||||
return Type.BYTES_VAR_STRAIGHT;
|
||||
private DocsAndPositionsEnum getDPE() {
|
||||
try {
|
||||
DocsAndPositionsEnum dpe = null;
|
||||
if (fields != null) {
|
||||
Terms terms = fields.terms(term.field());
|
||||
if (terms != null) {
|
||||
TermsEnum te = terms.iterator(null); // no use for reusing
|
||||
if (te.seekExact(term.bytes(), true)) {
|
||||
// we're not expected to be called for deleted documents
|
||||
dpe = te.docsAndPositions(null, null, DocsAndPositionsEnum.FLAG_PAYLOADS);
|
||||
}
|
||||
}
|
||||
}
|
||||
return dpe;
|
||||
} catch (IOException ioe) {
|
||||
throw new RuntimeException(ioe);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private class PayloadMigratingSource extends Source {
|
||||
|
||||
private final DocsAndPositionsEnum dpe;
|
||||
private int curDocID;
|
||||
|
||||
protected PayloadMigratingSource(Type type, DocsAndPositionsEnum dpe) {
|
||||
super(type);
|
||||
this.dpe = dpe;
|
||||
protected PayloadMigratingBinaryDocValues(Fields fields, Term term) {
|
||||
this.fields = fields;
|
||||
this.term = term;
|
||||
this.dpe = getDPE();
|
||||
if (dpe == null) {
|
||||
curDocID = DocIdSetIterator.NO_MORE_DOCS;
|
||||
} else {
|
||||
|
@ -119,31 +118,41 @@ public class FacetsPayloadMigrationReader extends FilterAtomicReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytes(int docID, BytesRef ref) {
|
||||
if (curDocID > docID) {
|
||||
// document does not exist
|
||||
ref.length = 0;
|
||||
return ref;
|
||||
}
|
||||
|
||||
public void get(int docID, BytesRef result) {
|
||||
try {
|
||||
// If caller is moving backwards (eg, during merge,
|
||||
// the consuming DV format is free to iterate over
|
||||
// our values as many times as it wants), we must
|
||||
// re-init the dpe:
|
||||
if (docID <= lastRequestedDocID) {
|
||||
dpe = getDPE();
|
||||
if (dpe == null) {
|
||||
curDocID = DocIdSetIterator.NO_MORE_DOCS;
|
||||
} else{
|
||||
curDocID = dpe.nextDoc();
|
||||
}
|
||||
}
|
||||
lastRequestedDocID = docID;
|
||||
if (curDocID > docID) {
|
||||
// document does not exist
|
||||
result.length = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if (curDocID < docID) {
|
||||
curDocID = dpe.advance(docID);
|
||||
if (curDocID != docID) { // requested document does not have a payload
|
||||
ref.length = 0;
|
||||
return ref;
|
||||
result.length = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// we're on the document
|
||||
dpe.nextPosition();
|
||||
ref.copyBytes(dpe.getPayload());
|
||||
return ref;
|
||||
result.copyBytes(dpe.getPayload());
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/** The {@link Term} text of the ordinals payload. */
|
||||
|
@ -202,26 +211,14 @@ public class FacetsPayloadMigrationReader extends FilterAtomicReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocValues docValues(String field) throws IOException {
|
||||
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
|
||||
Term term = fieldTerms.get(field);
|
||||
if (term == null) {
|
||||
return super.docValues(field);
|
||||
return super.getBinaryDocValues(field);
|
||||
} else {
|
||||
DocsAndPositionsEnum dpe = null;
|
||||
Fields fields = fields();
|
||||
if (fields != null) {
|
||||
Terms terms = fields.terms(term.field());
|
||||
if (terms != null) {
|
||||
TermsEnum te = terms.iterator(null); // no use for reusing
|
||||
if (te.seekExact(term.bytes(), true)) {
|
||||
// we're not expected to be called for deleted documents
|
||||
dpe = te.docsAndPositions(null, null, DocsAndPositionsEnum.FLAG_PAYLOADS);
|
||||
}
|
||||
}
|
||||
}
|
||||
// we shouldn't return null, even if the term does not exist or has no
|
||||
// payloads, since we already marked the field as having DocValues.
|
||||
return new PayloadMigratingDocValues(dpe);
|
||||
return new PayloadMigratingBinaryDocValues(fields(), term);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -238,7 +235,7 @@ public class FacetsPayloadMigrationReader extends FilterAtomicReader {
|
|||
// mark this field as having a DocValues
|
||||
infos.add(new FieldInfo(info.name, true, info.number,
|
||||
info.hasVectors(), info.omitsNorms(), info.hasPayloads(),
|
||||
info.getIndexOptions(), Type.BYTES_VAR_STRAIGHT,
|
||||
info.getIndexOptions(), DocValuesType.BINARY,
|
||||
info.getNormType(), info.attributes()));
|
||||
leftoverFields.remove(info.name);
|
||||
} else {
|
||||
|
@ -248,9 +245,8 @@ public class FacetsPayloadMigrationReader extends FilterAtomicReader {
|
|||
}
|
||||
for (String field : leftoverFields) {
|
||||
infos.add(new FieldInfo(field, false, ++number, false, false, false,
|
||||
null, Type.BYTES_VAR_STRAIGHT, null, null));
|
||||
null, DocValuesType.BINARY, null, null));
|
||||
}
|
||||
return new FieldInfos(infos.toArray(new FieldInfo[infos.size()]));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -25,9 +25,7 @@ import org.apache.lucene.facet.index.params.CategoryListParams;
|
|||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.OrdinalMap;
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValues.Source;
|
||||
import org.apache.lucene.index.DocValues.Type;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.FilterAtomicReader;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
|
@ -92,8 +90,8 @@ public class OrdinalMappingAtomicReader extends FilterAtomicReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocValues docValues(String field) throws IOException {
|
||||
DocValues inner = super.docValues(field);
|
||||
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
|
||||
BinaryDocValues inner = super.getBinaryDocValues(field);
|
||||
if (inner == null) {
|
||||
return inner;
|
||||
}
|
||||
|
@ -102,46 +100,19 @@ public class OrdinalMappingAtomicReader extends FilterAtomicReader {
|
|||
if (clp == null) {
|
||||
return inner;
|
||||
} else {
|
||||
return new OrdinalMappingDocValues(inner, clp);
|
||||
return new OrdinalMappingBinaryDocValues(clp, inner);
|
||||
}
|
||||
}
|
||||
|
||||
private class OrdinalMappingDocValues extends DocValues {
|
||||
|
||||
private final CategoryListParams clp;
|
||||
private final DocValues delegate;
|
||||
|
||||
public OrdinalMappingDocValues(DocValues delegate, CategoryListParams clp) {
|
||||
this.delegate = delegate;
|
||||
this.clp = clp;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Source loadSource() throws IOException {
|
||||
return new OrdinalMappingSource(getType(), clp, delegate.getSource());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Source loadDirectSource() throws IOException {
|
||||
return new OrdinalMappingSource(getType(), clp, delegate.getDirectSource());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Type getType() {
|
||||
return Type.BYTES_VAR_STRAIGHT;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private class OrdinalMappingSource extends Source {
|
||||
private class OrdinalMappingBinaryDocValues extends BinaryDocValues {
|
||||
|
||||
private final IntEncoder encoder;
|
||||
private final IntDecoder decoder;
|
||||
private final IntsRef ordinals = new IntsRef(32);
|
||||
private final Source delegate;
|
||||
private final BinaryDocValues delegate;
|
||||
private final BytesRef scratch = new BytesRef();
|
||||
|
||||
protected OrdinalMappingSource(Type type, CategoryListParams clp, Source delegate) {
|
||||
super(type);
|
||||
protected OrdinalMappingBinaryDocValues(CategoryListParams clp, BinaryDocValues delegate) {
|
||||
this.delegate = delegate;
|
||||
encoder = clp.createEncoder();
|
||||
decoder = encoder.createMatchingDecoder();
|
||||
|
@ -149,23 +120,26 @@ public class OrdinalMappingAtomicReader extends FilterAtomicReader {
|
|||
|
||||
@SuppressWarnings("synthetic-access")
|
||||
@Override
|
||||
public BytesRef getBytes(int docID, BytesRef ref) {
|
||||
ref = delegate.getBytes(docID, ref);
|
||||
if (ref == null || ref.length == 0) {
|
||||
return ref;
|
||||
} else {
|
||||
decoder.decode(ref, ordinals);
|
||||
public void get(int docID, BytesRef result) {
|
||||
// NOTE: this isn't quite koscher, because in general
|
||||
// multiple threads can call BinaryDV.get which would
|
||||
// then conflict on the single scratch instance, but
|
||||
// because this impl is only used for merging, we know
|
||||
// only 1 thread calls us:
|
||||
delegate.get(docID, scratch);
|
||||
if (scratch.length > 0) {
|
||||
// We must use scratch (and not re-use result) here,
|
||||
// else encoder may overwrite the DV provider's
|
||||
// private byte[]:
|
||||
decoder.decode(scratch, ordinals);
|
||||
|
||||
// map the ordinals
|
||||
for (int i = 0; i < ordinals.length; i++) {
|
||||
ordinals.ints[i] = ordinalMap[ordinals.ints[i]];
|
||||
}
|
||||
|
||||
encoder.encode(ordinals, ref);
|
||||
return ref;
|
||||
encoder.encode(ordinals, result);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -11,17 +11,16 @@ import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
|
|||
import org.apache.lucene.facet.index.params.CategoryListParams;
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.search.params.CountFacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetRequest.SortBy;
|
||||
import org.apache.lucene.facet.search.params.FacetRequest.SortOrder;
|
||||
import org.apache.lucene.facet.search.params.FacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
import org.apache.lucene.facet.search.results.FacetResultNode;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.directory.ParallelTaxonomyArrays;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValues.Source;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -89,9 +88,9 @@ public class CountingFacetsCollector extends FacetsCollector {
|
|||
private final int[] counts;
|
||||
private final String facetsField;
|
||||
private final boolean useDirectSource;
|
||||
private final HashMap<Source,FixedBitSet> matchingDocs = new HashMap<Source,FixedBitSet>();
|
||||
private final HashMap<BinaryDocValues,FixedBitSet> matchingDocs = new HashMap<BinaryDocValues,FixedBitSet>();
|
||||
|
||||
private DocValues facetsValues;
|
||||
private BinaryDocValues facetsValues;
|
||||
private FixedBitSet bits;
|
||||
|
||||
public CountingFacetsCollector(FacetSearchParams fsp, TaxonomyReader taxoReader) {
|
||||
|
@ -158,11 +157,10 @@ public class CountingFacetsCollector extends FacetsCollector {
|
|||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||
facetsValues = context.reader().docValues(facetsField);
|
||||
facetsValues = context.reader().getBinaryDocValues(facetsField);
|
||||
if (facetsValues != null) {
|
||||
Source facetSource = useDirectSource ? facetsValues.getDirectSource() : facetsValues.getSource();
|
||||
bits = new FixedBitSet(context.reader().maxDoc());
|
||||
matchingDocs.put(facetSource, bits);
|
||||
matchingDocs.put(facetsValues, bits);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -176,13 +174,13 @@ public class CountingFacetsCollector extends FacetsCollector {
|
|||
}
|
||||
|
||||
private void countFacets() {
|
||||
for (Entry<Source,FixedBitSet> entry : matchingDocs.entrySet()) {
|
||||
Source facetsSource = entry.getKey();
|
||||
for (Entry<BinaryDocValues,FixedBitSet> entry : matchingDocs.entrySet()) {
|
||||
BinaryDocValues facetsSource = entry.getKey();
|
||||
FixedBitSet bits = entry.getValue();
|
||||
int doc = 0;
|
||||
int length = bits.length();
|
||||
while (doc < length && (doc = bits.nextSetBit(doc)) != -1) {
|
||||
facetsSource .getBytes(doc, buf);
|
||||
facetsSource.get(doc, buf);
|
||||
if (buf.length > 0) {
|
||||
// this document has facets
|
||||
int upto = buf.offset + buf.length;
|
||||
|
|
|
@ -33,7 +33,7 @@ public class SlowRAMDirectory extends RAMDirectory {
|
|||
|
||||
private static final int IO_SLEEP_THRESHOLD = 50;
|
||||
|
||||
private Random random;
|
||||
Random random;
|
||||
private int sleepMillis;
|
||||
|
||||
public void setSleepMillis(int sleepMillis) {
|
||||
|
@ -62,7 +62,7 @@ public class SlowRAMDirectory extends RAMDirectory {
|
|||
return super.openInput(name, context);
|
||||
}
|
||||
|
||||
void doSleep(int length) {
|
||||
void doSleep(Random random, int length) {
|
||||
int sTime = length<10 ? sleepMillis : (int) (sleepMillis * Math.log(length));
|
||||
if (random!=null) {
|
||||
sTime = random.nextInt(sTime);
|
||||
|
@ -74,6 +74,14 @@ public class SlowRAMDirectory extends RAMDirectory {
|
|||
}
|
||||
}
|
||||
|
||||
/** Make a private random. */
|
||||
Random forkRandom() {
|
||||
if (random == null) {
|
||||
return null;
|
||||
}
|
||||
return new Random(random.nextLong());
|
||||
}
|
||||
|
||||
/**
|
||||
* Delegate class to wrap an IndexInput and delay reading bytes by some
|
||||
* specified time.
|
||||
|
@ -81,16 +89,18 @@ public class SlowRAMDirectory extends RAMDirectory {
|
|||
private class SlowIndexInput extends IndexInput {
|
||||
private IndexInput ii;
|
||||
private int numRead = 0;
|
||||
private Random random;
|
||||
|
||||
public SlowIndexInput(IndexInput ii) {
|
||||
super("SlowIndexInput(" + ii + ")");
|
||||
this.random = forkRandom();
|
||||
this.ii = ii;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte readByte() throws IOException {
|
||||
if (numRead >= IO_SLEEP_THRESHOLD) {
|
||||
doSleep(0);
|
||||
doSleep(random, 0);
|
||||
numRead = 0;
|
||||
}
|
||||
++numRead;
|
||||
|
@ -100,7 +110,7 @@ public class SlowRAMDirectory extends RAMDirectory {
|
|||
@Override
|
||||
public void readBytes(byte[] b, int offset, int len) throws IOException {
|
||||
if (numRead >= IO_SLEEP_THRESHOLD) {
|
||||
doSleep(len);
|
||||
doSleep(random, len);
|
||||
numRead = 0;
|
||||
}
|
||||
numRead += len;
|
||||
|
@ -125,15 +135,17 @@ public class SlowRAMDirectory extends RAMDirectory {
|
|||
|
||||
private IndexOutput io;
|
||||
private int numWrote;
|
||||
private final Random random;
|
||||
|
||||
public SlowIndexOutput(IndexOutput io) {
|
||||
this.io = io;
|
||||
this.random = forkRandom();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeByte(byte b) throws IOException {
|
||||
if (numWrote >= IO_SLEEP_THRESHOLD) {
|
||||
doSleep(0);
|
||||
doSleep(random, 0);
|
||||
numWrote = 0;
|
||||
}
|
||||
++numWrote;
|
||||
|
@ -143,7 +155,7 @@ public class SlowRAMDirectory extends RAMDirectory {
|
|||
@Override
|
||||
public void writeBytes(byte[] b, int offset, int length) throws IOException {
|
||||
if (numWrote >= IO_SLEEP_THRESHOLD) {
|
||||
doSleep(length);
|
||||
doSleep(random, length);
|
||||
numWrote = 0;
|
||||
}
|
||||
numWrote += length;
|
||||
|
|
Loading…
Reference in New Issue