Substantially improved the performance of DateFilter by adding the

ability to reuse TermDocs objects.


git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@149642 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Doug Cutting 2002-01-21 17:12:12 +00:00
parent 1fa4fa82d1
commit 65ead57f1c
8 changed files with 88 additions and 69 deletions

View File

@ -193,7 +193,7 @@ abstract public class IndexReader {
abstract public int docFreq(Term t) throws IOException;
/** Returns an enumeration of all the documents which contain
<code>Term</code>. For each document, the document number, the frequency of
<code>term</code>. For each document, the document number, the frequency of
the term in that document is also provided, for use in search scoring.
Thus, this method implements the mapping:
<p><ul>
@ -201,10 +201,17 @@ abstract public class IndexReader {
</ul>
<p>The enumeration is ordered by document number. Each document number
is greater than all that precede it in the enumeration. */
abstract public TermDocs termDocs(Term t) throws IOException;
public TermDocs termDocs(Term term) throws IOException {
TermDocs termDocs = termDocs();
termDocs.seek(term);
return termDocs;
}
/** Returns an unpositioned {@link TermDocs} enumerator. */
abstract public TermDocs termDocs() throws IOException;
/** Returns an enumeration of all the documents which contain
<code>Term</code>. For each document, in addition to the document number
<code>term</code>. For each document, in addition to the document number
and frequency of the term in that document, a list of all of the ordinal
positions of the term in the document is available. Thus, this method
implements the mapping:
@ -218,7 +225,14 @@ abstract public class IndexReader {
<p> This positional information faciliates phrase and proximity searching.
<p>The enumeration is ordered by document number. Each document number is
greater than all that precede it in the enumeration. */
abstract public TermPositions termPositions(Term t) throws IOException;
public TermPositions termPositions(Term term) throws IOException {
TermPositions termPositions = termPositions();
termPositions.seek(term);
return termPositions;
}
/** Returns an unpositioned {@link TermPositions} enumerator. */
abstract public TermPositions termPositions() throws IOException;
/** Deletes the document numbered <code>docNum</code>. Once a document is
deleted it will not appear in TermDocs or TermPostitions enumerations.

View File

@ -78,8 +78,8 @@ final class SegmentReader extends IndexReader {
BitVector deletedDocs = null;
private boolean deletedDocsDirty = false;
private InputStream freqStream;
private InputStream proxStream;
InputStream freqStream;
InputStream proxStream;
private static class Norm {
@ -194,28 +194,12 @@ final class SegmentReader extends IndexReader {
return (deletedDocs != null && deletedDocs.get(n));
}
public final TermDocs termDocs(Term t) throws IOException {
TermInfo ti = tis.get(t);
if (ti != null)
return new SegmentTermDocs(this, ti);
else
return null;
public final TermDocs termDocs() throws IOException {
return new SegmentTermDocs(this);
}
final InputStream getFreqStream () {
return (InputStream)freqStream.clone();
}
public final TermPositions termPositions(Term t) throws IOException {
TermInfo ti = tis.get(t);
if (ti != null)
return new SegmentTermPositions(this, ti);
else
return null;
}
final InputStream getProxStream () {
return (InputStream)proxStream.clone();
public final TermPositions termPositions() throws IOException {
return new SegmentTermPositions(this);
}
public final int docFreq(Term t) throws IOException {

View File

@ -66,22 +66,27 @@ class SegmentTermDocs implements TermDocs {
int doc = 0;
int freq;
SegmentTermDocs(SegmentReader p) throws IOException {
parent = p;
freqStream = parent.getFreqStream();
deletedDocs = parent.deletedDocs;
SegmentTermDocs(SegmentReader parent)
throws IOException {
this.parent = parent;
this.freqStream = (InputStream)parent.freqStream.clone();
this.deletedDocs = parent.deletedDocs;
}
SegmentTermDocs(SegmentReader p, TermInfo ti) throws IOException {
this(p);
public void seek(Term term) throws IOException {
TermInfo ti = parent.tis.get(term);
seek(ti);
}
void seek(TermInfo ti) throws IOException {
if (ti == null) {
freqCount = 0;
} else {
freqCount = ti.docFreq;
doc = 0;
freqStream.seek(ti.freqPointer);
}
}
public void close() throws IOException {
freqStream.close();

View File

@ -66,13 +66,7 @@ extends SegmentTermDocs implements TermPositions {
SegmentTermPositions(SegmentReader p) throws IOException {
super(p);
proxStream = parent.getProxStream();
}
SegmentTermPositions(SegmentReader p, TermInfo ti)
throws IOException {
this(p);
seek(ti);
this.proxStream = (InputStream)parent.proxStream.clone();
}
final void seek(TermInfo ti) throws IOException {

View File

@ -151,12 +151,12 @@ final class SegmentsReader extends IndexReader {
return total;
}
public final TermDocs termDocs(Term term) throws IOException {
return new SegmentsTermDocs(readers, starts, term);
public final TermDocs termDocs() throws IOException {
return new SegmentsTermDocs(readers, starts);
}
public final TermPositions termPositions(Term term) throws IOException {
return new SegmentsTermPositions(readers, starts, term);
public final TermPositions termPositions() throws IOException {
return new SegmentsTermPositions(readers, starts);
}
public final void close() throws IOException {
@ -240,13 +240,15 @@ class SegmentsTermDocs implements TermDocs {
protected int base = 0;
protected int pointer = 0;
SegmentsTermDocs(SegmentReader[] r, int[] s, Term t) {
private SegmentTermDocs[] segTermDocs;
protected SegmentTermDocs current; // == segTermDocs[pointer]
SegmentsTermDocs(SegmentReader[] r, int[] s) {
readers = r;
starts = s;
term = t;
}
protected SegmentTermDocs current;
segTermDocs = new SegmentTermDocs[r.length];
}
public final int doc() {
return base + current.doc;
@ -255,14 +257,19 @@ class SegmentsTermDocs implements TermDocs {
return current.freq;
}
public final void seek(Term term) {
this.term = term;
this.base = 0;
this.pointer = 0;
this.current = null;
}
public final boolean next() throws IOException {
if (current != null && current.next()) {
return true;
} else if (pointer < readers.length) {
if (current != null)
current.close();
base = starts[pointer];
current = termDocs(readers[pointer++]);
current = termDocs(pointer++);
return next();
} else
return false;
@ -275,14 +282,13 @@ class SegmentsTermDocs implements TermDocs {
while (current == null) {
if (pointer < readers.length) { // try next segment
base = starts[pointer];
current = termDocs(readers[pointer++]);
current = termDocs(pointer++);
} else {
return 0;
}
}
int end = current.read(docs, freqs);
if (end == 0) { // none left in segment
current.close();
current = null;
} else { // got some
final int b = base; // adjust doc numbers
@ -302,25 +308,37 @@ class SegmentsTermDocs implements TermDocs {
return true;
}
private SegmentTermDocs termDocs(int i) throws IOException {
if (term == null)
return null;
SegmentTermDocs result = segTermDocs[i];
if (result == null)
result = segTermDocs[i] = termDocs(readers[i]);
result.seek(term);
return result;
}
protected SegmentTermDocs termDocs(SegmentReader reader)
throws IOException {
return (SegmentTermDocs)reader.termDocs(term);
return (SegmentTermDocs)reader.termDocs();
}
public final void close() throws IOException {
if (current != null)
current.close();
for (int i = 0; i < segTermDocs.length; i++) {
if (segTermDocs[i] != null)
segTermDocs[i].close();
}
}
}
class SegmentsTermPositions extends SegmentsTermDocs implements TermPositions {
SegmentsTermPositions(SegmentReader[] r, int[] s, Term t) {
super(r,s,t);
SegmentsTermPositions(SegmentReader[] r, int[] s) {
super(r,s);
}
protected final SegmentTermDocs termDocs(SegmentReader reader)
throws IOException {
return (SegmentTermDocs)reader.termPositions(term);
return (SegmentTermDocs)reader.termPositions();
}
public final int nextPosition() throws IOException {

View File

@ -67,6 +67,11 @@ import org.apache.lucene.document.Document;
*/
public interface TermDocs {
/** Sets this to the data for a term.
* The enumeration is reset to the start of the data for this term.
*/
void seek(Term term) throws IOException;
/** Returns the current document number. <p> This is invalid until {@link
#next()} is called for the first time.*/
int doc();
@ -91,7 +96,7 @@ public interface TermDocs {
/** Skips entries to the first beyond the current whose document number is
* greater than or equal to <i>target</i>. <p>Returns true iff there is such
* an entry. <p>Behaves as if written: <pre>
* public boolean skipTo(int target) {
* boolean skipTo(int target) {
* do {
* if (!next())
* return false;

View File

@ -65,12 +65,10 @@ public abstract class TermEnum {
/** Increments the enumeration to the next element. True if one exists.*/
abstract public boolean next() throws IOException;
/** Returns the current Term in the enumeration.
Initially invalid, valid after next() called for the first time.*/
/** Returns the current Term in the enumeration.*/
abstract public Term term();
/** Returns the docFreq of the current Term in the enumeration.
Initially invalid, valid after next() called for the first time.*/
/** Returns the docFreq of the current Term in the enumeration.*/
abstract public int docFreq();
/** Closes the enumeration to further activity, freeing resources. */

View File

@ -129,10 +129,11 @@ public final class DateFilter extends Filter {
final public BitSet bits(IndexReader reader) throws IOException {
BitSet bits = new BitSet(reader.maxDoc());
TermEnum enum = reader.terms(new Term(field, start));
TermDocs termDocs = reader.termDocs();
try {
Term stop = new Term(field, end);
while (enum.term().compareTo(stop) <= 0) {
TermDocs termDocs = reader.termDocs(enum.term());
termDocs.seek(enum.term());
try {
while (termDocs.next())
bits.set(termDocs.doc());