mirror of https://github.com/apache/lucene.git
Substantially improved the performance of DateFilter by adding the
ability to reuse TermDocs objects. git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@149642 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1fa4fa82d1
commit
65ead57f1c
|
@ -193,7 +193,7 @@ abstract public class IndexReader {
|
|||
abstract public int docFreq(Term t) throws IOException;
|
||||
|
||||
/** Returns an enumeration of all the documents which contain
|
||||
<code>Term</code>. For each document, the document number, the frequency of
|
||||
<code>term</code>. For each document, the document number, the frequency of
|
||||
the term in that document is also provided, for use in search scoring.
|
||||
Thus, this method implements the mapping:
|
||||
<p><ul>
|
||||
|
@ -201,10 +201,17 @@ abstract public class IndexReader {
|
|||
</ul>
|
||||
<p>The enumeration is ordered by document number. Each document number
|
||||
is greater than all that precede it in the enumeration. */
|
||||
abstract public TermDocs termDocs(Term t) throws IOException;
|
||||
public TermDocs termDocs(Term term) throws IOException {
|
||||
TermDocs termDocs = termDocs();
|
||||
termDocs.seek(term);
|
||||
return termDocs;
|
||||
}
|
||||
|
||||
/** Returns an unpositioned {@link TermDocs} enumerator. */
|
||||
abstract public TermDocs termDocs() throws IOException;
|
||||
|
||||
/** Returns an enumeration of all the documents which contain
|
||||
<code>Term</code>. For each document, in addition to the document number
|
||||
<code>term</code>. For each document, in addition to the document number
|
||||
and frequency of the term in that document, a list of all of the ordinal
|
||||
positions of the term in the document is available. Thus, this method
|
||||
implements the mapping:
|
||||
|
@ -218,7 +225,14 @@ abstract public class IndexReader {
|
|||
<p> This positional information faciliates phrase and proximity searching.
|
||||
<p>The enumeration is ordered by document number. Each document number is
|
||||
greater than all that precede it in the enumeration. */
|
||||
abstract public TermPositions termPositions(Term t) throws IOException;
|
||||
public TermPositions termPositions(Term term) throws IOException {
|
||||
TermPositions termPositions = termPositions();
|
||||
termPositions.seek(term);
|
||||
return termPositions;
|
||||
}
|
||||
|
||||
/** Returns an unpositioned {@link TermPositions} enumerator. */
|
||||
abstract public TermPositions termPositions() throws IOException;
|
||||
|
||||
/** Deletes the document numbered <code>docNum</code>. Once a document is
|
||||
deleted it will not appear in TermDocs or TermPostitions enumerations.
|
||||
|
|
|
@ -78,8 +78,8 @@ final class SegmentReader extends IndexReader {
|
|||
BitVector deletedDocs = null;
|
||||
private boolean deletedDocsDirty = false;
|
||||
|
||||
private InputStream freqStream;
|
||||
private InputStream proxStream;
|
||||
InputStream freqStream;
|
||||
InputStream proxStream;
|
||||
|
||||
|
||||
private static class Norm {
|
||||
|
@ -194,28 +194,12 @@ final class SegmentReader extends IndexReader {
|
|||
return (deletedDocs != null && deletedDocs.get(n));
|
||||
}
|
||||
|
||||
public final TermDocs termDocs(Term t) throws IOException {
|
||||
TermInfo ti = tis.get(t);
|
||||
if (ti != null)
|
||||
return new SegmentTermDocs(this, ti);
|
||||
else
|
||||
return null;
|
||||
public final TermDocs termDocs() throws IOException {
|
||||
return new SegmentTermDocs(this);
|
||||
}
|
||||
|
||||
final InputStream getFreqStream () {
|
||||
return (InputStream)freqStream.clone();
|
||||
}
|
||||
|
||||
public final TermPositions termPositions(Term t) throws IOException {
|
||||
TermInfo ti = tis.get(t);
|
||||
if (ti != null)
|
||||
return new SegmentTermPositions(this, ti);
|
||||
else
|
||||
return null;
|
||||
}
|
||||
|
||||
final InputStream getProxStream () {
|
||||
return (InputStream)proxStream.clone();
|
||||
public final TermPositions termPositions() throws IOException {
|
||||
return new SegmentTermPositions(this);
|
||||
}
|
||||
|
||||
public final int docFreq(Term t) throws IOException {
|
||||
|
|
|
@ -66,21 +66,26 @@ class SegmentTermDocs implements TermDocs {
|
|||
int doc = 0;
|
||||
int freq;
|
||||
|
||||
SegmentTermDocs(SegmentReader p) throws IOException {
|
||||
parent = p;
|
||||
freqStream = parent.getFreqStream();
|
||||
deletedDocs = parent.deletedDocs;
|
||||
SegmentTermDocs(SegmentReader parent)
|
||||
throws IOException {
|
||||
this.parent = parent;
|
||||
this.freqStream = (InputStream)parent.freqStream.clone();
|
||||
this.deletedDocs = parent.deletedDocs;
|
||||
}
|
||||
|
||||
SegmentTermDocs(SegmentReader p, TermInfo ti) throws IOException {
|
||||
this(p);
|
||||
|
||||
public void seek(Term term) throws IOException {
|
||||
TermInfo ti = parent.tis.get(term);
|
||||
seek(ti);
|
||||
}
|
||||
|
||||
void seek(TermInfo ti) throws IOException {
|
||||
freqCount = ti.docFreq;
|
||||
doc = 0;
|
||||
freqStream.seek(ti.freqPointer);
|
||||
if (ti == null) {
|
||||
freqCount = 0;
|
||||
} else {
|
||||
freqCount = ti.docFreq;
|
||||
doc = 0;
|
||||
freqStream.seek(ti.freqPointer);
|
||||
}
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
|
|
|
@ -66,13 +66,7 @@ extends SegmentTermDocs implements TermPositions {
|
|||
|
||||
SegmentTermPositions(SegmentReader p) throws IOException {
|
||||
super(p);
|
||||
proxStream = parent.getProxStream();
|
||||
}
|
||||
|
||||
SegmentTermPositions(SegmentReader p, TermInfo ti)
|
||||
throws IOException {
|
||||
this(p);
|
||||
seek(ti);
|
||||
this.proxStream = (InputStream)parent.proxStream.clone();
|
||||
}
|
||||
|
||||
final void seek(TermInfo ti) throws IOException {
|
||||
|
|
|
@ -151,12 +151,12 @@ final class SegmentsReader extends IndexReader {
|
|||
return total;
|
||||
}
|
||||
|
||||
public final TermDocs termDocs(Term term) throws IOException {
|
||||
return new SegmentsTermDocs(readers, starts, term);
|
||||
public final TermDocs termDocs() throws IOException {
|
||||
return new SegmentsTermDocs(readers, starts);
|
||||
}
|
||||
|
||||
public final TermPositions termPositions(Term term) throws IOException {
|
||||
return new SegmentsTermPositions(readers, starts, term);
|
||||
public final TermPositions termPositions() throws IOException {
|
||||
return new SegmentsTermPositions(readers, starts);
|
||||
}
|
||||
|
||||
public final void close() throws IOException {
|
||||
|
@ -240,14 +240,16 @@ class SegmentsTermDocs implements TermDocs {
|
|||
protected int base = 0;
|
||||
protected int pointer = 0;
|
||||
|
||||
SegmentsTermDocs(SegmentReader[] r, int[] s, Term t) {
|
||||
private SegmentTermDocs[] segTermDocs;
|
||||
protected SegmentTermDocs current; // == segTermDocs[pointer]
|
||||
|
||||
SegmentsTermDocs(SegmentReader[] r, int[] s) {
|
||||
readers = r;
|
||||
starts = s;
|
||||
term = t;
|
||||
|
||||
segTermDocs = new SegmentTermDocs[r.length];
|
||||
}
|
||||
|
||||
protected SegmentTermDocs current;
|
||||
|
||||
public final int doc() {
|
||||
return base + current.doc;
|
||||
}
|
||||
|
@ -255,14 +257,19 @@ class SegmentsTermDocs implements TermDocs {
|
|||
return current.freq;
|
||||
}
|
||||
|
||||
public final void seek(Term term) {
|
||||
this.term = term;
|
||||
this.base = 0;
|
||||
this.pointer = 0;
|
||||
this.current = null;
|
||||
}
|
||||
|
||||
public final boolean next() throws IOException {
|
||||
if (current != null && current.next()) {
|
||||
return true;
|
||||
} else if (pointer < readers.length) {
|
||||
if (current != null)
|
||||
current.close();
|
||||
base = starts[pointer];
|
||||
current = termDocs(readers[pointer++]);
|
||||
current = termDocs(pointer++);
|
||||
return next();
|
||||
} else
|
||||
return false;
|
||||
|
@ -275,14 +282,13 @@ class SegmentsTermDocs implements TermDocs {
|
|||
while (current == null) {
|
||||
if (pointer < readers.length) { // try next segment
|
||||
base = starts[pointer];
|
||||
current = termDocs(readers[pointer++]);
|
||||
current = termDocs(pointer++);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
int end = current.read(docs, freqs);
|
||||
if (end == 0) { // none left in segment
|
||||
current.close();
|
||||
current = null;
|
||||
} else { // got some
|
||||
final int b = base; // adjust doc numbers
|
||||
|
@ -302,25 +308,37 @@ class SegmentsTermDocs implements TermDocs {
|
|||
return true;
|
||||
}
|
||||
|
||||
private SegmentTermDocs termDocs(int i) throws IOException {
|
||||
if (term == null)
|
||||
return null;
|
||||
SegmentTermDocs result = segTermDocs[i];
|
||||
if (result == null)
|
||||
result = segTermDocs[i] = termDocs(readers[i]);
|
||||
result.seek(term);
|
||||
return result;
|
||||
}
|
||||
|
||||
protected SegmentTermDocs termDocs(SegmentReader reader)
|
||||
throws IOException {
|
||||
return (SegmentTermDocs)reader.termDocs(term);
|
||||
throws IOException {
|
||||
return (SegmentTermDocs)reader.termDocs();
|
||||
}
|
||||
|
||||
public final void close() throws IOException {
|
||||
if (current != null)
|
||||
current.close();
|
||||
for (int i = 0; i < segTermDocs.length; i++) {
|
||||
if (segTermDocs[i] != null)
|
||||
segTermDocs[i].close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class SegmentsTermPositions extends SegmentsTermDocs implements TermPositions {
|
||||
SegmentsTermPositions(SegmentReader[] r, int[] s, Term t) {
|
||||
super(r,s,t);
|
||||
SegmentsTermPositions(SegmentReader[] r, int[] s) {
|
||||
super(r,s);
|
||||
}
|
||||
|
||||
protected final SegmentTermDocs termDocs(SegmentReader reader)
|
||||
throws IOException {
|
||||
return (SegmentTermDocs)reader.termPositions(term);
|
||||
return (SegmentTermDocs)reader.termPositions();
|
||||
}
|
||||
|
||||
public final int nextPosition() throws IOException {
|
||||
|
|
|
@ -67,6 +67,11 @@ import org.apache.lucene.document.Document;
|
|||
*/
|
||||
|
||||
public interface TermDocs {
|
||||
/** Sets this to the data for a term.
|
||||
* The enumeration is reset to the start of the data for this term.
|
||||
*/
|
||||
void seek(Term term) throws IOException;
|
||||
|
||||
/** Returns the current document number. <p> This is invalid until {@link
|
||||
#next()} is called for the first time.*/
|
||||
int doc();
|
||||
|
@ -91,7 +96,7 @@ public interface TermDocs {
|
|||
/** Skips entries to the first beyond the current whose document number is
|
||||
* greater than or equal to <i>target</i>. <p>Returns true iff there is such
|
||||
* an entry. <p>Behaves as if written: <pre>
|
||||
* public boolean skipTo(int target) {
|
||||
* boolean skipTo(int target) {
|
||||
* do {
|
||||
* if (!next())
|
||||
* return false;
|
||||
|
|
|
@ -65,12 +65,10 @@ public abstract class TermEnum {
|
|||
/** Increments the enumeration to the next element. True if one exists.*/
|
||||
abstract public boolean next() throws IOException;
|
||||
|
||||
/** Returns the current Term in the enumeration.
|
||||
Initially invalid, valid after next() called for the first time.*/
|
||||
/** Returns the current Term in the enumeration.*/
|
||||
abstract public Term term();
|
||||
|
||||
/** Returns the docFreq of the current Term in the enumeration.
|
||||
Initially invalid, valid after next() called for the first time.*/
|
||||
/** Returns the docFreq of the current Term in the enumeration.*/
|
||||
abstract public int docFreq();
|
||||
|
||||
/** Closes the enumeration to further activity, freeing resources. */
|
||||
|
|
|
@ -129,10 +129,11 @@ public final class DateFilter extends Filter {
|
|||
final public BitSet bits(IndexReader reader) throws IOException {
|
||||
BitSet bits = new BitSet(reader.maxDoc());
|
||||
TermEnum enum = reader.terms(new Term(field, start));
|
||||
TermDocs termDocs = reader.termDocs();
|
||||
try {
|
||||
Term stop = new Term(field, end);
|
||||
while (enum.term().compareTo(stop) <= 0) {
|
||||
TermDocs termDocs = reader.termDocs(enum.term());
|
||||
termDocs.seek(enum.term());
|
||||
try {
|
||||
while (termDocs.next())
|
||||
bits.set(termDocs.doc());
|
||||
|
|
Loading…
Reference in New Issue