From 4597d97e68db0f258c4e0cdef33e1949052af638 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Thu, 17 Nov 2011 13:58:59 +0000 Subject: [PATCH] LUCENE-3580: fix postingsreaders to obey DISI contract git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1203190 13f79535-47bb-0310-9956-ffa450edef68 --- .../index/codecs/lucene3x/Lucene3xFields.java | 2 + .../lucene40/Lucene40PostingsReader.java | 52 +++++++++++-------- .../codecs/memory/MemoryPostingsFormat.java | 32 +++++++----- .../codecs/pulsing/PulsingPostingsReader.java | 26 ++++++---- .../index/codecs/sep/SepPostingsReader.java | 34 ++++++------ .../simpletext/SimpleTextFieldsReader.java | 3 +- .../lucene/index/TestDocsAndPositions.java | 50 +++++++++++++++++- 7 files changed, 134 insertions(+), 65 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/codecs/lucene3x/Lucene3xFields.java b/lucene/src/java/org/apache/lucene/index/codecs/lucene3x/Lucene3xFields.java index 7fb92828071..37f7b22dda7 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/lucene3x/Lucene3xFields.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/lucene3x/Lucene3xFields.java @@ -994,6 +994,7 @@ public class Lucene3xFields extends FieldsProducer { public PreDocsEnum reset(SegmentTermEnum termEnum, Bits liveDocs) throws IOException { docs.setLiveDocs(liveDocs); docs.seek(termEnum); + docID = -1; return this; } @@ -1050,6 +1051,7 @@ public class Lucene3xFields extends FieldsProducer { public DocsAndPositionsEnum reset(SegmentTermEnum termEnum, Bits liveDocs) throws IOException { pos.setLiveDocs(liveDocs); pos.seek(termEnum); + docID = -1; return this; } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40PostingsReader.java b/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40PostingsReader.java index 5283c56b404..102e7333e42 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40PostingsReader.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40PostingsReader.java @@ -273,7 +273,8 @@ public class Lucene40PostingsReader extends PostingsReaderBase { int limit; // number of docs in this posting int ord; // how many docs we've read - int doc; // doc we last read + int doc = -1; // doc we last read + int accum; // accumulator for doc deltas int freq; // freq we last read Bits liveDocs; @@ -306,7 +307,8 @@ public class Lucene40PostingsReader extends PostingsReaderBase { limit = termState.docFreq; assert limit > 0; ord = 0; - doc = 0; + doc = -1; + accum = 0; // if (DEBUG) System.out.println(" sde limit=" + limit + " freqFP=" + freqOffset); skipped = false; @@ -329,9 +331,9 @@ public class Lucene40PostingsReader extends PostingsReaderBase { final int code = freqIn.readVInt(); // if (DEBUG) System.out.println(" code=" + code); if (omitTF) { - doc += code; + accum += code; } else { - doc += code >>> 1; // shift off low bit + accum += code >>> 1; // shift off low bit if ((code & 1) != 0) { // if low bit is set freq = 1; // freq is one } else { @@ -339,13 +341,13 @@ public class Lucene40PostingsReader extends PostingsReaderBase { } } - if (liveDocs == null || liveDocs.get(doc)) { + if (liveDocs == null || liveDocs.get(accum)) { break; } } //if (DEBUG) System.out.println(" stpr.nextDoc return doc=" + doc); - return doc; + return (doc = accum); } @Override @@ -360,9 +362,9 @@ public class Lucene40PostingsReader extends PostingsReaderBase { // manually inlined call to next() for speed final int code = freqIn.readVInt(); if (omitTF) { - doc += code; + accum += code; } else { - doc += code >>> 1; // shift off low bit + accum += code >>> 1; // shift off low bit if ((code & 1) != 0) { // if low bit is set freq = 1; // freq is one } else { @@ -370,8 +372,8 @@ public class Lucene40PostingsReader extends PostingsReaderBase { } } - if (liveDocs == null || liveDocs.get(doc)) { - docs[i] = doc; + if (liveDocs == null || liveDocs.get(accum)) { + docs[i] = doc = accum; freqs[i] = freq; ++i; } @@ -422,7 +424,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase { // Skipper moved ord = newOrd; - doc = skipper.getDoc(); + doc = accum = skipper.getDoc(); freqIn.seek(skipper.getFreqPointer()); } } @@ -444,7 +446,8 @@ public class Lucene40PostingsReader extends PostingsReaderBase { int limit; // number of docs in this posting int ord; // how many docs we've read - int doc; // doc we last read + int doc = -1; // doc we last read + int accum; // accumulator for doc deltas int freq; // freq we last read int position; @@ -482,7 +485,8 @@ public class Lucene40PostingsReader extends PostingsReaderBase { assert limit > 0; ord = 0; - doc = 0; + doc = -1; + accum = 0; position = 0; skipped = false; @@ -510,7 +514,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase { // Decode next doc/freq pair final int code = freqIn.readVInt(); - doc += code >>> 1; // shift off low bit + accum += code >>> 1; // shift off low bit if ((code & 1) != 0) { // if low bit is set freq = 1; // freq is one } else { @@ -518,7 +522,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase { } posPendingCount += freq; - if (liveDocs == null || liveDocs.get(doc)) { + if (liveDocs == null || liveDocs.get(accum)) { break; } } @@ -526,7 +530,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase { position = 0; // if (DEBUG) System.out.println(" return doc=" + doc); - return doc; + return (doc = accum); } @Override @@ -572,7 +576,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase { if (newOrd > ord) { // Skipper moved ord = newOrd; - doc = skipper.getDoc(); + doc = accum = skipper.getDoc(); freqIn.seek(skipper.getFreqPointer()); lazyProxPointer = skipper.getProxPointer(); posPendingCount = 0; @@ -636,7 +640,8 @@ public class Lucene40PostingsReader extends PostingsReaderBase { int limit; // number of docs in this posting int ord; // how many docs we've read - int doc; // doc we last read + int doc = -1; // doc we last read + int accum; // accumulator for doc deltas int freq; // freq we last read int position; @@ -679,7 +684,8 @@ public class Lucene40PostingsReader extends PostingsReaderBase { limit = termState.docFreq; ord = 0; - doc = 0; + doc = -1; + accum = 0; position = 0; skipped = false; @@ -707,7 +713,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase { // Decode next doc/freq pair final int code = freqIn.readVInt(); - doc += code >>> 1; // shift off low bit + accum += code >>> 1; // shift off low bit if ((code & 1) != 0) { // if low bit is set freq = 1; // freq is one } else { @@ -715,7 +721,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase { } posPendingCount += freq; - if (liveDocs == null || liveDocs.get(doc)) { + if (liveDocs == null || liveDocs.get(accum)) { break; } } @@ -723,7 +729,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase { position = 0; //System.out.println("StandardR.D&PE nextDoc seg=" + segment + " return doc=" + doc); - return doc; + return (doc = accum); } @Override @@ -769,7 +775,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase { if (newOrd > ord) { // Skipper moved ord = newOrd; - doc = skipper.getDoc(); + doc = accum = skipper.getDoc(); freqIn.seek(skipper.getFreqPointer()); lazyProxPointer = skipper.getProxPointer(); posPendingCount = 0; diff --git a/lucene/src/java/org/apache/lucene/index/codecs/memory/MemoryPostingsFormat.java b/lucene/src/java/org/apache/lucene/index/codecs/memory/MemoryPostingsFormat.java index 16a32be80e8..e244d0a4804 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/memory/MemoryPostingsFormat.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/memory/MemoryPostingsFormat.java @@ -273,7 +273,8 @@ public class MemoryPostingsFormat extends PostingsFormat { private Bits liveDocs; private int docUpto; - private int docID; + private int docID = -1; + private int accum; private int freq; private int payloadLen; private int numDocs; @@ -295,7 +296,8 @@ public class MemoryPostingsFormat extends PostingsFormat { in.reset(buffer, 0, bufferIn.length - bufferIn.offset); System.arraycopy(bufferIn.bytes, bufferIn.offset, buffer, 0, bufferIn.length - bufferIn.offset); this.liveDocs = liveDocs; - docID = 0; + docID = -1; + accum = 0; docUpto = 0; payloadLen = 0; this.numDocs = numDocs; @@ -314,12 +316,12 @@ public class MemoryPostingsFormat extends PostingsFormat { } docUpto++; if (indexOptions == IndexOptions.DOCS_ONLY) { - docID += in.readVInt(); + accum += in.readVInt(); freq = 1; } else { final int code = in.readVInt(); - docID += code >>> 1; - if (VERBOSE) System.out.println(" docID=" + docID + " code=" + code); + accum += code >>> 1; + if (VERBOSE) System.out.println(" docID=" + accum + " code=" + code); if ((code & 1) != 0) { freq = 1; } else { @@ -343,9 +345,9 @@ public class MemoryPostingsFormat extends PostingsFormat { } } - if (liveDocs == null || liveDocs.get(docID)) { - if (VERBOSE) System.out.println(" return docID=" + docID + " freq=" + freq); - return docID; + if (liveDocs == null || liveDocs.get(accum)) { + if (VERBOSE) System.out.println(" return docID=" + accum + " freq=" + freq); + return (docID = accum); } } } @@ -380,7 +382,8 @@ public class MemoryPostingsFormat extends PostingsFormat { private Bits liveDocs; private int docUpto; - private int docID; + private int docID = -1; + private int accum; private int freq; private int numDocs; private int posPending; @@ -412,7 +415,8 @@ public class MemoryPostingsFormat extends PostingsFormat { in.reset(buffer, 0, bufferIn.length - bufferIn.offset); System.arraycopy(bufferIn.bytes, bufferIn.offset, buffer, 0, bufferIn.length - bufferIn.offset); this.liveDocs = liveDocs; - docID = 0; + docID = -1; + accum = 0; docUpto = 0; payload.bytes = buffer; payloadLength = 0; @@ -436,7 +440,7 @@ public class MemoryPostingsFormat extends PostingsFormat { docUpto++; final int code = in.readVInt(); - docID += code >>> 1; + accum += code >>> 1; if ((code & 1) != 0) { freq = 1; } else { @@ -444,11 +448,11 @@ public class MemoryPostingsFormat extends PostingsFormat { assert freq > 0; } - if (liveDocs == null || liveDocs.get(docID)) { + if (liveDocs == null || liveDocs.get(accum)) { pos = 0; posPending = freq; - if (VERBOSE) System.out.println(" return docID=" + docID + " freq=" + freq); - return docID; + if (VERBOSE) System.out.println(" return docID=" + accum + " freq=" + freq); + return (docID = accum); } // Skip positions diff --git a/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReader.java b/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReader.java index 233a4f990fa..044c936a6d0 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReader.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReader.java @@ -257,7 +257,8 @@ public class PulsingPostingsReader extends PostingsReaderBase { private final IndexOptions indexOptions; private final boolean storePayloads; private Bits liveDocs; - private int docID; + private int docID = -1; + private int accum; private int freq; private int payloadLength; @@ -279,7 +280,8 @@ public class PulsingPostingsReader extends PostingsReaderBase { } System.arraycopy(termState.postings, 0, postingsBytes, 0, termState.postingsSize); postings.reset(postingsBytes, 0, termState.postingsSize); - docID = 0; + docID = -1; + accum = 0; payloadLength = 0; freq = 1; this.liveDocs = liveDocs; @@ -302,9 +304,9 @@ public class PulsingPostingsReader extends PostingsReaderBase { final int code = postings.readVInt(); //System.out.println(" read code=" + code); if (indexOptions == IndexOptions.DOCS_ONLY) { - docID += code; + accum += code; } else { - docID += code >>> 1; // shift off low bit + accum += code >>> 1; // shift off low bit if ((code & 1) != 0) { // if low bit is set freq = 1; // freq is one } else { @@ -332,8 +334,8 @@ public class PulsingPostingsReader extends PostingsReaderBase { } } - if (liveDocs == null || liveDocs.get(docID)) { - return docID; + if (liveDocs == null || liveDocs.get(accum)) { + return (docID = accum); } } } @@ -365,7 +367,8 @@ public class PulsingPostingsReader extends PostingsReaderBase { private final boolean storePayloads; private Bits liveDocs; - private int docID; + private int docID = -1; + private int accum; private int freq; private int posPending; private int position; @@ -394,7 +397,8 @@ public class PulsingPostingsReader extends PostingsReaderBase { this.liveDocs = liveDocs; payloadLength = 0; posPending = 0; - docID = 0; + docID = -1; + accum = 0; //System.out.println("PR d&p reset storesPayloads=" + storePayloads + " bytes=" + bytes.length + " this=" + this); return this; } @@ -414,7 +418,7 @@ public class PulsingPostingsReader extends PostingsReaderBase { } final int code = postings.readVInt(); - docID += code >>> 1; // shift off low bit + accum += code >>> 1; // shift off low bit if ((code & 1) != 0) { // if low bit is set freq = 1; // freq is one } else { @@ -422,10 +426,10 @@ public class PulsingPostingsReader extends PostingsReaderBase { } posPending = freq; - if (liveDocs == null || liveDocs.get(docID)) { + if (liveDocs == null || liveDocs.get(accum)) { //System.out.println(" return docID=" + docID + " freq=" + freq); position = 0; - return docID; + return (docID = accum); } } } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReader.java b/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReader.java index 678d3c8e2dc..40acb609083 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReader.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReader.java @@ -312,7 +312,8 @@ public class SepPostingsReader extends PostingsReaderBase { class SepDocsEnum extends DocsEnum { int docFreq; - int doc; + int doc = -1; + int accum; int count; int freq; long freqStart; @@ -376,7 +377,8 @@ public class SepPostingsReader extends PostingsReaderBase { // NOTE: unused if docFreq < skipMinimum: skipFP = termState.skipFP; count = 0; - doc = 0; + doc = -1; + accum = 0; skipped = false; return this; @@ -394,18 +396,18 @@ public class SepPostingsReader extends PostingsReaderBase { // Decode next doc //System.out.println("decode docDelta:"); - doc += docReader.next(); + accum += docReader.next(); if (!omitTF) { //System.out.println("decode freq:"); freq = freqReader.next(); } - if (liveDocs == null || liveDocs.get(doc)) { + if (liveDocs == null || liveDocs.get(accum)) { break; } } - return doc; + return (doc = accum); } @Override @@ -420,14 +422,14 @@ public class SepPostingsReader extends PostingsReaderBase { count++; // manually inlined call to next() for speed //System.out.println("decode doc"); - doc += docReader.next(); + accum += docReader.next(); if (!omitTF) { //System.out.println("decode freq"); freq = freqReader.next(); } - if (liveDocs == null || liveDocs.get(doc)) { - docs[i] = doc; + if (liveDocs == null || liveDocs.get(accum)) { + docs[i] = doc = accum; freqs[i] = freq; //System.out.println(" docs[" + i + "]=" + doc + " count=" + count + " dF=" + docFreq); i++; @@ -488,7 +490,7 @@ public class SepPostingsReader extends PostingsReaderBase { } skipper.getDocIndex().seek(docReader); count = newCount; - doc = skipper.getDoc(); + doc = accum = skipper.getDoc(); } } @@ -505,7 +507,8 @@ public class SepPostingsReader extends PostingsReaderBase { class SepDocsAndPositionsEnum extends DocsAndPositionsEnum { int docFreq; - int doc; + int doc = -1; + int accum; int count; int freq; long freqStart; @@ -572,7 +575,8 @@ public class SepPostingsReader extends PostingsReaderBase { docFreq = termState.docFreq; count = 0; - doc = 0; + doc = -1; + accum = 0; pendingPosCount = 0; pendingPayloadBytes = 0; skipped = false; @@ -595,20 +599,20 @@ public class SepPostingsReader extends PostingsReaderBase { // Decode next doc //System.out.println(" sep d&p read doc"); - doc += docReader.next(); + accum += docReader.next(); //System.out.println(" sep d&p read freq"); freq = freqReader.next(); pendingPosCount += freq; - if (liveDocs == null || liveDocs.get(doc)) { + if (liveDocs == null || liveDocs.get(accum)) { break; } } position = 0; - return doc; + return (doc = accum); } @Override @@ -668,7 +672,7 @@ public class SepPostingsReader extends PostingsReaderBase { posIndex.set(skipper.getPosIndex()); posSeekPending = true; count = newCount; - doc = skipper.getDoc(); + doc = accum = skipper.getDoc(); //System.out.println(" moved to doc=" + doc); //payloadIn.seek(skipper.getPayloadPointer()); payloadFP = skipper.getPayloadPointer(); diff --git a/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java b/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java index fbe2779f9e9..3516d8d3edc 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java @@ -314,7 +314,7 @@ class SimpleTextFieldsReader extends FieldsProducer { private class SimpleTextDocsAndPositionsEnum extends DocsAndPositionsEnum { private final IndexInput inStart; private final IndexInput in; - private int docID; + private int docID = -1; private int tf; private Bits liveDocs; private final BytesRef scratch = new BytesRef(10); @@ -336,6 +336,7 @@ class SimpleTextFieldsReader extends FieldsProducer { public SimpleTextDocsAndPositionsEnum reset(long fp, Bits liveDocs) { this.liveDocs = liveDocs; nextDocStart = fp; + docID = -1; return this; } diff --git a/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java b/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java index 8141da52417..80904a5adb7 100644 --- a/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java +++ b/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java @@ -22,10 +22,13 @@ import java.util.Arrays; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader.ReaderContext; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -331,5 +334,50 @@ public class TestDocsAndPositions extends LuceneTestCase { reader.close(); dir.close(); } - + + public void testDocsEnumStart() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, dir); + Document doc = new Document(); + doc.add(newField("foo", "bar", StringField.TYPE_UNSTORED)); + writer.addDocument(doc); + IndexReader reader = writer.getReader(); + IndexReader r = getOnlySegmentReader(reader); + DocsEnum disi = r.termDocsEnum(null, "foo", new BytesRef("bar")); + int docid = disi.docID(); + assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS); + assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); + + // now reuse and check again + disi = r.terms("foo").docs(null, new BytesRef("bar"), disi); + docid = disi.docID(); + assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS); + assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); + writer.close(); + r.close(); + dir.close(); + } + + public void testDocsAndPositionsEnumStart() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, dir); + Document doc = new Document(); + doc.add(newField("foo", "bar", TextField.TYPE_UNSTORED)); + writer.addDocument(doc); + IndexReader reader = writer.getReader(); + IndexReader r = getOnlySegmentReader(reader); + DocsAndPositionsEnum disi = r.termPositionsEnum(null, "foo", new BytesRef("bar")); + int docid = disi.docID(); + assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS); + assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); + + // now reuse and check again + disi = r.terms("foo").docsAndPositions(null, new BytesRef("bar"), disi); + docid = disi.docID(); + assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS); + assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); + writer.close(); + r.close(); + dir.close(); + } }