Substantially improved the performance of DateFilter by adding the

ability to reuse TermDocs objects.


git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@149642 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Doug Cutting 2002-01-21 17:12:12 +00:00
parent 1fa4fa82d1
commit 65ead57f1c
8 changed files with 88 additions and 69 deletions

View File

@ -193,7 +193,7 @@ abstract public class IndexReader {
abstract public int docFreq(Term t) throws IOException; abstract public int docFreq(Term t) throws IOException;
/** Returns an enumeration of all the documents which contain /** Returns an enumeration of all the documents which contain
<code>Term</code>. For each document, the document number, the frequency of <code>term</code>. For each document, the document number, the frequency of
the term in that document is also provided, for use in search scoring. the term in that document is also provided, for use in search scoring.
Thus, this method implements the mapping: Thus, this method implements the mapping:
<p><ul> <p><ul>
@ -201,10 +201,17 @@ abstract public class IndexReader {
</ul> </ul>
<p>The enumeration is ordered by document number. Each document number <p>The enumeration is ordered by document number. Each document number
is greater than all that precede it in the enumeration. */ is greater than all that precede it in the enumeration. */
abstract public TermDocs termDocs(Term t) throws IOException; public TermDocs termDocs(Term term) throws IOException {
TermDocs termDocs = termDocs();
termDocs.seek(term);
return termDocs;
}
/** Returns an unpositioned {@link TermDocs} enumerator. */
abstract public TermDocs termDocs() throws IOException;
/** Returns an enumeration of all the documents which contain /** Returns an enumeration of all the documents which contain
<code>Term</code>. For each document, in addition to the document number <code>term</code>. For each document, in addition to the document number
and frequency of the term in that document, a list of all of the ordinal and frequency of the term in that document, a list of all of the ordinal
positions of the term in the document is available. Thus, this method positions of the term in the document is available. Thus, this method
implements the mapping: implements the mapping:
@ -218,7 +225,14 @@ abstract public class IndexReader {
<p> This positional information faciliates phrase and proximity searching. <p> This positional information faciliates phrase and proximity searching.
<p>The enumeration is ordered by document number. Each document number is <p>The enumeration is ordered by document number. Each document number is
greater than all that precede it in the enumeration. */ greater than all that precede it in the enumeration. */
abstract public TermPositions termPositions(Term t) throws IOException; public TermPositions termPositions(Term term) throws IOException {
TermPositions termPositions = termPositions();
termPositions.seek(term);
return termPositions;
}
/** Returns an unpositioned {@link TermPositions} enumerator. */
abstract public TermPositions termPositions() throws IOException;
/** Deletes the document numbered <code>docNum</code>. Once a document is /** Deletes the document numbered <code>docNum</code>. Once a document is
deleted it will not appear in TermDocs or TermPostitions enumerations. deleted it will not appear in TermDocs or TermPostitions enumerations.

View File

@ -78,8 +78,8 @@ final class SegmentReader extends IndexReader {
BitVector deletedDocs = null; BitVector deletedDocs = null;
private boolean deletedDocsDirty = false; private boolean deletedDocsDirty = false;
private InputStream freqStream; InputStream freqStream;
private InputStream proxStream; InputStream proxStream;
private static class Norm { private static class Norm {
@ -194,28 +194,12 @@ final class SegmentReader extends IndexReader {
return (deletedDocs != null && deletedDocs.get(n)); return (deletedDocs != null && deletedDocs.get(n));
} }
public final TermDocs termDocs(Term t) throws IOException { public final TermDocs termDocs() throws IOException {
TermInfo ti = tis.get(t); return new SegmentTermDocs(this);
if (ti != null)
return new SegmentTermDocs(this, ti);
else
return null;
} }
final InputStream getFreqStream () { public final TermPositions termPositions() throws IOException {
return (InputStream)freqStream.clone(); return new SegmentTermPositions(this);
}
public final TermPositions termPositions(Term t) throws IOException {
TermInfo ti = tis.get(t);
if (ti != null)
return new SegmentTermPositions(this, ti);
else
return null;
}
final InputStream getProxStream () {
return (InputStream)proxStream.clone();
} }
public final int docFreq(Term t) throws IOException { public final int docFreq(Term t) throws IOException {

View File

@ -66,22 +66,27 @@ class SegmentTermDocs implements TermDocs {
int doc = 0; int doc = 0;
int freq; int freq;
SegmentTermDocs(SegmentReader p) throws IOException { SegmentTermDocs(SegmentReader parent)
parent = p; throws IOException {
freqStream = parent.getFreqStream(); this.parent = parent;
deletedDocs = parent.deletedDocs; this.freqStream = (InputStream)parent.freqStream.clone();
this.deletedDocs = parent.deletedDocs;
} }
SegmentTermDocs(SegmentReader p, TermInfo ti) throws IOException { public void seek(Term term) throws IOException {
this(p); TermInfo ti = parent.tis.get(term);
seek(ti); seek(ti);
} }
void seek(TermInfo ti) throws IOException { void seek(TermInfo ti) throws IOException {
if (ti == null) {
freqCount = 0;
} else {
freqCount = ti.docFreq; freqCount = ti.docFreq;
doc = 0; doc = 0;
freqStream.seek(ti.freqPointer); freqStream.seek(ti.freqPointer);
} }
}
public void close() throws IOException { public void close() throws IOException {
freqStream.close(); freqStream.close();

View File

@ -66,13 +66,7 @@ extends SegmentTermDocs implements TermPositions {
SegmentTermPositions(SegmentReader p) throws IOException { SegmentTermPositions(SegmentReader p) throws IOException {
super(p); super(p);
proxStream = parent.getProxStream(); this.proxStream = (InputStream)parent.proxStream.clone();
}
SegmentTermPositions(SegmentReader p, TermInfo ti)
throws IOException {
this(p);
seek(ti);
} }
final void seek(TermInfo ti) throws IOException { final void seek(TermInfo ti) throws IOException {

View File

@ -151,12 +151,12 @@ final class SegmentsReader extends IndexReader {
return total; return total;
} }
public final TermDocs termDocs(Term term) throws IOException { public final TermDocs termDocs() throws IOException {
return new SegmentsTermDocs(readers, starts, term); return new SegmentsTermDocs(readers, starts);
} }
public final TermPositions termPositions(Term term) throws IOException { public final TermPositions termPositions() throws IOException {
return new SegmentsTermPositions(readers, starts, term); return new SegmentsTermPositions(readers, starts);
} }
public final void close() throws IOException { public final void close() throws IOException {
@ -240,13 +240,15 @@ class SegmentsTermDocs implements TermDocs {
protected int base = 0; protected int base = 0;
protected int pointer = 0; protected int pointer = 0;
SegmentsTermDocs(SegmentReader[] r, int[] s, Term t) { private SegmentTermDocs[] segTermDocs;
protected SegmentTermDocs current; // == segTermDocs[pointer]
SegmentsTermDocs(SegmentReader[] r, int[] s) {
readers = r; readers = r;
starts = s; starts = s;
term = t;
}
protected SegmentTermDocs current; segTermDocs = new SegmentTermDocs[r.length];
}
public final int doc() { public final int doc() {
return base + current.doc; return base + current.doc;
@ -255,14 +257,19 @@ class SegmentsTermDocs implements TermDocs {
return current.freq; return current.freq;
} }
public final void seek(Term term) {
this.term = term;
this.base = 0;
this.pointer = 0;
this.current = null;
}
public final boolean next() throws IOException { public final boolean next() throws IOException {
if (current != null && current.next()) { if (current != null && current.next()) {
return true; return true;
} else if (pointer < readers.length) { } else if (pointer < readers.length) {
if (current != null)
current.close();
base = starts[pointer]; base = starts[pointer];
current = termDocs(readers[pointer++]); current = termDocs(pointer++);
return next(); return next();
} else } else
return false; return false;
@ -275,14 +282,13 @@ class SegmentsTermDocs implements TermDocs {
while (current == null) { while (current == null) {
if (pointer < readers.length) { // try next segment if (pointer < readers.length) { // try next segment
base = starts[pointer]; base = starts[pointer];
current = termDocs(readers[pointer++]); current = termDocs(pointer++);
} else { } else {
return 0; return 0;
} }
} }
int end = current.read(docs, freqs); int end = current.read(docs, freqs);
if (end == 0) { // none left in segment if (end == 0) { // none left in segment
current.close();
current = null; current = null;
} else { // got some } else { // got some
final int b = base; // adjust doc numbers final int b = base; // adjust doc numbers
@ -302,25 +308,37 @@ class SegmentsTermDocs implements TermDocs {
return true; return true;
} }
private SegmentTermDocs termDocs(int i) throws IOException {
if (term == null)
return null;
SegmentTermDocs result = segTermDocs[i];
if (result == null)
result = segTermDocs[i] = termDocs(readers[i]);
result.seek(term);
return result;
}
protected SegmentTermDocs termDocs(SegmentReader reader) protected SegmentTermDocs termDocs(SegmentReader reader)
throws IOException { throws IOException {
return (SegmentTermDocs)reader.termDocs(term); return (SegmentTermDocs)reader.termDocs();
} }
public final void close() throws IOException { public final void close() throws IOException {
if (current != null) for (int i = 0; i < segTermDocs.length; i++) {
current.close(); if (segTermDocs[i] != null)
segTermDocs[i].close();
}
} }
} }
class SegmentsTermPositions extends SegmentsTermDocs implements TermPositions { class SegmentsTermPositions extends SegmentsTermDocs implements TermPositions {
SegmentsTermPositions(SegmentReader[] r, int[] s, Term t) { SegmentsTermPositions(SegmentReader[] r, int[] s) {
super(r,s,t); super(r,s);
} }
protected final SegmentTermDocs termDocs(SegmentReader reader) protected final SegmentTermDocs termDocs(SegmentReader reader)
throws IOException { throws IOException {
return (SegmentTermDocs)reader.termPositions(term); return (SegmentTermDocs)reader.termPositions();
} }
public final int nextPosition() throws IOException { public final int nextPosition() throws IOException {

View File

@ -67,6 +67,11 @@ import org.apache.lucene.document.Document;
*/ */
public interface TermDocs { public interface TermDocs {
/** Sets this to the data for a term.
* The enumeration is reset to the start of the data for this term.
*/
void seek(Term term) throws IOException;
/** Returns the current document number. <p> This is invalid until {@link /** Returns the current document number. <p> This is invalid until {@link
#next()} is called for the first time.*/ #next()} is called for the first time.*/
int doc(); int doc();
@ -91,7 +96,7 @@ public interface TermDocs {
/** Skips entries to the first beyond the current whose document number is /** Skips entries to the first beyond the current whose document number is
* greater than or equal to <i>target</i>. <p>Returns true iff there is such * greater than or equal to <i>target</i>. <p>Returns true iff there is such
* an entry. <p>Behaves as if written: <pre> * an entry. <p>Behaves as if written: <pre>
* public boolean skipTo(int target) { * boolean skipTo(int target) {
* do { * do {
* if (!next()) * if (!next())
* return false; * return false;

View File

@ -65,12 +65,10 @@ public abstract class TermEnum {
/** Increments the enumeration to the next element. True if one exists.*/ /** Increments the enumeration to the next element. True if one exists.*/
abstract public boolean next() throws IOException; abstract public boolean next() throws IOException;
/** Returns the current Term in the enumeration. /** Returns the current Term in the enumeration.*/
Initially invalid, valid after next() called for the first time.*/
abstract public Term term(); abstract public Term term();
/** Returns the docFreq of the current Term in the enumeration. /** Returns the docFreq of the current Term in the enumeration.*/
Initially invalid, valid after next() called for the first time.*/
abstract public int docFreq(); abstract public int docFreq();
/** Closes the enumeration to further activity, freeing resources. */ /** Closes the enumeration to further activity, freeing resources. */

View File

@ -129,10 +129,11 @@ public final class DateFilter extends Filter {
final public BitSet bits(IndexReader reader) throws IOException { final public BitSet bits(IndexReader reader) throws IOException {
BitSet bits = new BitSet(reader.maxDoc()); BitSet bits = new BitSet(reader.maxDoc());
TermEnum enum = reader.terms(new Term(field, start)); TermEnum enum = reader.terms(new Term(field, start));
TermDocs termDocs = reader.termDocs();
try { try {
Term stop = new Term(field, end); Term stop = new Term(field, end);
while (enum.term().compareTo(stop) <= 0) { while (enum.term().compareTo(stop) <= 0) {
TermDocs termDocs = reader.termDocs(enum.term()); termDocs.seek(enum.term());
try { try {
while (termDocs.next()) while (termDocs.next())
bits.set(termDocs.doc()); bits.set(termDocs.doc());