From 8ba93c7f02c613db982351866b5bb7f34c7e10d5 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Mon, 21 Jan 2013 20:39:25 +0000 Subject: [PATCH] fix facet tests git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1436600 13f79535-47bb-0310-9956-ffa450edef68 --- .../index/FacetsPayloadMigrationReader.java | 116 +++++++++--------- .../index/OrdinalMappingAtomicReader.java | 68 ++++------ .../facet/search/CountingFacetsCollector.java | 20 ++- .../apache/lucene/util/SlowRAMDirectory.java | 24 +++- 4 files changed, 104 insertions(+), 124 deletions(-) diff --git a/lucene/facet/src/java/org/apache/lucene/facet/index/FacetsPayloadMigrationReader.java b/lucene/facet/src/java/org/apache/lucene/facet/index/FacetsPayloadMigrationReader.java index 766e49aabfe..a469e879bf3 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/index/FacetsPayloadMigrationReader.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/index/FacetsPayloadMigrationReader.java @@ -30,6 +30,7 @@ import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.FieldInfo.DocValuesType; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.Fields; @@ -74,39 +75,37 @@ import org.apache.lucene.util.BytesRef; */ public class FacetsPayloadMigrationReader extends FilterAtomicReader { - private class PayloadMigratingDocValues extends DocValues { + private class PayloadMigratingBinaryDocValues extends BinaryDocValues { - private final DocsAndPositionsEnum dpe; - - public PayloadMigratingDocValues(DocsAndPositionsEnum dpe) { - this.dpe = dpe; - } + private Fields fields; + private Term term; + private DocsAndPositionsEnum dpe; + private int curDocID = -1; + private int lastRequestedDocID; - @Override - protected Source loadDirectSource() throws IOException { - return new PayloadMigratingSource(getType(), dpe); - } - - @Override - protected Source loadSource() throws IOException { - throw new UnsupportedOperationException("in-memory Source is not supported by this reader"); - } - - @Override - public Type getType() { - return Type.BYTES_VAR_STRAIGHT; + private DocsAndPositionsEnum getDPE() { + try { + DocsAndPositionsEnum dpe = null; + if (fields != null) { + Terms terms = fields.terms(term.field()); + if (terms != null) { + TermsEnum te = terms.iterator(null); // no use for reusing + if (te.seekExact(term.bytes(), true)) { + // we're not expected to be called for deleted documents + dpe = te.docsAndPositions(null, null, DocsAndPositionsEnum.FLAG_PAYLOADS); + } + } + } + return dpe; + } catch (IOException ioe) { + throw new RuntimeException(ioe); + } } - } - - private class PayloadMigratingSource extends Source { - - private final DocsAndPositionsEnum dpe; - private int curDocID; - - protected PayloadMigratingSource(Type type, DocsAndPositionsEnum dpe) { - super(type); - this.dpe = dpe; + protected PayloadMigratingBinaryDocValues(Fields fields, Term term) { + this.fields = fields; + this.term = term; + this.dpe = getDPE(); if (dpe == null) { curDocID = DocIdSetIterator.NO_MORE_DOCS; } else { @@ -119,31 +118,41 @@ public class FacetsPayloadMigrationReader extends FilterAtomicReader { } @Override - public BytesRef getBytes(int docID, BytesRef ref) { - if (curDocID > docID) { - // document does not exist - ref.length = 0; - return ref; - } - + public void get(int docID, BytesRef result) { try { + // If caller is moving backwards (eg, during merge, + // the consuming DV format is free to iterate over + // our values as many times as it wants), we must + // re-init the dpe: + if (docID <= lastRequestedDocID) { + dpe = getDPE(); + if (dpe == null) { + curDocID = DocIdSetIterator.NO_MORE_DOCS; + } else{ + curDocID = dpe.nextDoc(); + } + } + lastRequestedDocID = docID; + if (curDocID > docID) { + // document does not exist + result.length = 0; + return; + } + if (curDocID < docID) { curDocID = dpe.advance(docID); if (curDocID != docID) { // requested document does not have a payload - ref.length = 0; - return ref; + result.length = 0; + return; } } - // we're on the document dpe.nextPosition(); - ref.copyBytes(dpe.getPayload()); - return ref; + result.copyBytes(dpe.getPayload()); } catch (IOException e) { throw new RuntimeException(e); } } - } /** The {@link Term} text of the ordinals payload. */ @@ -202,26 +211,14 @@ public class FacetsPayloadMigrationReader extends FilterAtomicReader { } @Override - public DocValues docValues(String field) throws IOException { + public BinaryDocValues getBinaryDocValues(String field) throws IOException { Term term = fieldTerms.get(field); if (term == null) { - return super.docValues(field); + return super.getBinaryDocValues(field); } else { - DocsAndPositionsEnum dpe = null; - Fields fields = fields(); - if (fields != null) { - Terms terms = fields.terms(term.field()); - if (terms != null) { - TermsEnum te = terms.iterator(null); // no use for reusing - if (te.seekExact(term.bytes(), true)) { - // we're not expected to be called for deleted documents - dpe = te.docsAndPositions(null, null, DocsAndPositionsEnum.FLAG_PAYLOADS); - } - } - } // we shouldn't return null, even if the term does not exist or has no // payloads, since we already marked the field as having DocValues. - return new PayloadMigratingDocValues(dpe); + return new PayloadMigratingBinaryDocValues(fields(), term); } } @@ -238,7 +235,7 @@ public class FacetsPayloadMigrationReader extends FilterAtomicReader { // mark this field as having a DocValues infos.add(new FieldInfo(info.name, true, info.number, info.hasVectors(), info.omitsNorms(), info.hasPayloads(), - info.getIndexOptions(), Type.BYTES_VAR_STRAIGHT, + info.getIndexOptions(), DocValuesType.BINARY, info.getNormType(), info.attributes())); leftoverFields.remove(info.name); } else { @@ -248,9 +245,8 @@ public class FacetsPayloadMigrationReader extends FilterAtomicReader { } for (String field : leftoverFields) { infos.add(new FieldInfo(field, false, ++number, false, false, false, - null, Type.BYTES_VAR_STRAIGHT, null, null)); + null, DocValuesType.BINARY, null, null)); } return new FieldInfos(infos.toArray(new FieldInfo[infos.size()])); } - } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/index/OrdinalMappingAtomicReader.java b/lucene/facet/src/java/org/apache/lucene/facet/index/OrdinalMappingAtomicReader.java index d892ab0c8c5..e9ef0df0536 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/index/OrdinalMappingAtomicReader.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/index/OrdinalMappingAtomicReader.java @@ -25,9 +25,7 @@ import org.apache.lucene.facet.index.params.CategoryListParams; import org.apache.lucene.facet.index.params.FacetIndexingParams; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.OrdinalMap; import org.apache.lucene.index.AtomicReader; -import org.apache.lucene.index.DocValues; -import org.apache.lucene.index.DocValues.Source; -import org.apache.lucene.index.DocValues.Type; +import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.FilterAtomicReader; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IntsRef; @@ -92,8 +90,8 @@ public class OrdinalMappingAtomicReader extends FilterAtomicReader { } @Override - public DocValues docValues(String field) throws IOException { - DocValues inner = super.docValues(field); + public BinaryDocValues getBinaryDocValues(String field) throws IOException { + BinaryDocValues inner = super.getBinaryDocValues(field); if (inner == null) { return inner; } @@ -102,46 +100,19 @@ public class OrdinalMappingAtomicReader extends FilterAtomicReader { if (clp == null) { return inner; } else { - return new OrdinalMappingDocValues(inner, clp); + return new OrdinalMappingBinaryDocValues(clp, inner); } } - private class OrdinalMappingDocValues extends DocValues { - - private final CategoryListParams clp; - private final DocValues delegate; - - public OrdinalMappingDocValues(DocValues delegate, CategoryListParams clp) { - this.delegate = delegate; - this.clp = clp; - } - - @Override - protected Source loadSource() throws IOException { - return new OrdinalMappingSource(getType(), clp, delegate.getSource()); - } - - @Override - protected Source loadDirectSource() throws IOException { - return new OrdinalMappingSource(getType(), clp, delegate.getDirectSource()); - } - - @Override - public Type getType() { - return Type.BYTES_VAR_STRAIGHT; - } - - } - - private class OrdinalMappingSource extends Source { + private class OrdinalMappingBinaryDocValues extends BinaryDocValues { private final IntEncoder encoder; private final IntDecoder decoder; private final IntsRef ordinals = new IntsRef(32); - private final Source delegate; + private final BinaryDocValues delegate; + private final BytesRef scratch = new BytesRef(); - protected OrdinalMappingSource(Type type, CategoryListParams clp, Source delegate) { - super(type); + protected OrdinalMappingBinaryDocValues(CategoryListParams clp, BinaryDocValues delegate) { this.delegate = delegate; encoder = clp.createEncoder(); decoder = encoder.createMatchingDecoder(); @@ -149,23 +120,26 @@ public class OrdinalMappingAtomicReader extends FilterAtomicReader { @SuppressWarnings("synthetic-access") @Override - public BytesRef getBytes(int docID, BytesRef ref) { - ref = delegate.getBytes(docID, ref); - if (ref == null || ref.length == 0) { - return ref; - } else { - decoder.decode(ref, ordinals); + public void get(int docID, BytesRef result) { + // NOTE: this isn't quite koscher, because in general + // multiple threads can call BinaryDV.get which would + // then conflict on the single scratch instance, but + // because this impl is only used for merging, we know + // only 1 thread calls us: + delegate.get(docID, scratch); + if (scratch.length > 0) { + // We must use scratch (and not re-use result) here, + // else encoder may overwrite the DV provider's + // private byte[]: + decoder.decode(scratch, ordinals); // map the ordinals for (int i = 0; i < ordinals.length; i++) { ordinals.ints[i] = ordinalMap[ordinals.ints[i]]; } - encoder.encode(ordinals, ref); - return ref; + encoder.encode(ordinals, result); } } - } - } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsCollector.java b/lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsCollector.java index a82d8d8a1dd..65db12cdb1f 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsCollector.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsCollector.java @@ -11,17 +11,16 @@ import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy; import org.apache.lucene.facet.index.params.CategoryListParams; import org.apache.lucene.facet.index.params.FacetIndexingParams; import org.apache.lucene.facet.search.params.CountFacetRequest; -import org.apache.lucene.facet.search.params.FacetRequest; import org.apache.lucene.facet.search.params.FacetRequest.SortBy; import org.apache.lucene.facet.search.params.FacetRequest.SortOrder; +import org.apache.lucene.facet.search.params.FacetRequest; import org.apache.lucene.facet.search.params.FacetSearchParams; import org.apache.lucene.facet.search.results.FacetResult; import org.apache.lucene.facet.search.results.FacetResultNode; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.facet.taxonomy.directory.ParallelTaxonomyArrays; import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.DocValues; -import org.apache.lucene.index.DocValues.Source; +import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.search.Collector; import org.apache.lucene.search.Scorer; import org.apache.lucene.util.BytesRef; @@ -89,9 +88,9 @@ public class CountingFacetsCollector extends FacetsCollector { private final int[] counts; private final String facetsField; private final boolean useDirectSource; - private final HashMap matchingDocs = new HashMap(); + private final HashMap matchingDocs = new HashMap(); - private DocValues facetsValues; + private BinaryDocValues facetsValues; private FixedBitSet bits; public CountingFacetsCollector(FacetSearchParams fsp, TaxonomyReader taxoReader) { @@ -158,11 +157,10 @@ public class CountingFacetsCollector extends FacetsCollector { @Override public void setNextReader(AtomicReaderContext context) throws IOException { - facetsValues = context.reader().docValues(facetsField); + facetsValues = context.reader().getBinaryDocValues(facetsField); if (facetsValues != null) { - Source facetSource = useDirectSource ? facetsValues.getDirectSource() : facetsValues.getSource(); bits = new FixedBitSet(context.reader().maxDoc()); - matchingDocs.put(facetSource, bits); + matchingDocs.put(facetsValues, bits); } } @@ -176,13 +174,13 @@ public class CountingFacetsCollector extends FacetsCollector { } private void countFacets() { - for (Entry entry : matchingDocs.entrySet()) { - Source facetsSource = entry.getKey(); + for (Entry entry : matchingDocs.entrySet()) { + BinaryDocValues facetsSource = entry.getKey(); FixedBitSet bits = entry.getValue(); int doc = 0; int length = bits.length(); while (doc < length && (doc = bits.nextSetBit(doc)) != -1) { - facetsSource .getBytes(doc, buf); + facetsSource.get(doc, buf); if (buf.length > 0) { // this document has facets int upto = buf.offset + buf.length; diff --git a/lucene/facet/src/test/org/apache/lucene/util/SlowRAMDirectory.java b/lucene/facet/src/test/org/apache/lucene/util/SlowRAMDirectory.java index 9db5f6c0cab..77c75ca3865 100644 --- a/lucene/facet/src/test/org/apache/lucene/util/SlowRAMDirectory.java +++ b/lucene/facet/src/test/org/apache/lucene/util/SlowRAMDirectory.java @@ -33,7 +33,7 @@ public class SlowRAMDirectory extends RAMDirectory { private static final int IO_SLEEP_THRESHOLD = 50; - private Random random; + Random random; private int sleepMillis; public void setSleepMillis(int sleepMillis) { @@ -62,7 +62,7 @@ public class SlowRAMDirectory extends RAMDirectory { return super.openInput(name, context); } - void doSleep(int length) { + void doSleep(Random random, int length) { int sTime = length<10 ? sleepMillis : (int) (sleepMillis * Math.log(length)); if (random!=null) { sTime = random.nextInt(sTime); @@ -74,6 +74,14 @@ public class SlowRAMDirectory extends RAMDirectory { } } + /** Make a private random. */ + Random forkRandom() { + if (random == null) { + return null; + } + return new Random(random.nextLong()); + } + /** * Delegate class to wrap an IndexInput and delay reading bytes by some * specified time. @@ -81,16 +89,18 @@ public class SlowRAMDirectory extends RAMDirectory { private class SlowIndexInput extends IndexInput { private IndexInput ii; private int numRead = 0; + private Random random; public SlowIndexInput(IndexInput ii) { super("SlowIndexInput(" + ii + ")"); + this.random = forkRandom(); this.ii = ii; } @Override public byte readByte() throws IOException { if (numRead >= IO_SLEEP_THRESHOLD) { - doSleep(0); + doSleep(random, 0); numRead = 0; } ++numRead; @@ -100,7 +110,7 @@ public class SlowRAMDirectory extends RAMDirectory { @Override public void readBytes(byte[] b, int offset, int len) throws IOException { if (numRead >= IO_SLEEP_THRESHOLD) { - doSleep(len); + doSleep(random, len); numRead = 0; } numRead += len; @@ -125,15 +135,17 @@ public class SlowRAMDirectory extends RAMDirectory { private IndexOutput io; private int numWrote; + private final Random random; public SlowIndexOutput(IndexOutput io) { this.io = io; + this.random = forkRandom(); } @Override public void writeByte(byte b) throws IOException { if (numWrote >= IO_SLEEP_THRESHOLD) { - doSleep(0); + doSleep(random, 0); numWrote = 0; } ++numWrote; @@ -143,7 +155,7 @@ public class SlowRAMDirectory extends RAMDirectory { @Override public void writeBytes(byte[] b, int offset, int length) throws IOException { if (numWrote >= IO_SLEEP_THRESHOLD) { - doSleep(length); + doSleep(random, length); numWrote = 0; } numWrote += length;