mirror of https://github.com/apache/lucene.git
Substantially improved the performance of DateFilter by adding the
ability to reuse TermDocs objects. git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@149642 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1fa4fa82d1
commit
65ead57f1c
|
@ -193,7 +193,7 @@ abstract public class IndexReader {
|
||||||
abstract public int docFreq(Term t) throws IOException;
|
abstract public int docFreq(Term t) throws IOException;
|
||||||
|
|
||||||
/** Returns an enumeration of all the documents which contain
|
/** Returns an enumeration of all the documents which contain
|
||||||
<code>Term</code>. For each document, the document number, the frequency of
|
<code>term</code>. For each document, the document number, the frequency of
|
||||||
the term in that document is also provided, for use in search scoring.
|
the term in that document is also provided, for use in search scoring.
|
||||||
Thus, this method implements the mapping:
|
Thus, this method implements the mapping:
|
||||||
<p><ul>
|
<p><ul>
|
||||||
|
@ -201,10 +201,17 @@ abstract public class IndexReader {
|
||||||
</ul>
|
</ul>
|
||||||
<p>The enumeration is ordered by document number. Each document number
|
<p>The enumeration is ordered by document number. Each document number
|
||||||
is greater than all that precede it in the enumeration. */
|
is greater than all that precede it in the enumeration. */
|
||||||
abstract public TermDocs termDocs(Term t) throws IOException;
|
public TermDocs termDocs(Term term) throws IOException {
|
||||||
|
TermDocs termDocs = termDocs();
|
||||||
|
termDocs.seek(term);
|
||||||
|
return termDocs;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns an unpositioned {@link TermDocs} enumerator. */
|
||||||
|
abstract public TermDocs termDocs() throws IOException;
|
||||||
|
|
||||||
/** Returns an enumeration of all the documents which contain
|
/** Returns an enumeration of all the documents which contain
|
||||||
<code>Term</code>. For each document, in addition to the document number
|
<code>term</code>. For each document, in addition to the document number
|
||||||
and frequency of the term in that document, a list of all of the ordinal
|
and frequency of the term in that document, a list of all of the ordinal
|
||||||
positions of the term in the document is available. Thus, this method
|
positions of the term in the document is available. Thus, this method
|
||||||
implements the mapping:
|
implements the mapping:
|
||||||
|
@ -218,7 +225,14 @@ abstract public class IndexReader {
|
||||||
<p> This positional information faciliates phrase and proximity searching.
|
<p> This positional information faciliates phrase and proximity searching.
|
||||||
<p>The enumeration is ordered by document number. Each document number is
|
<p>The enumeration is ordered by document number. Each document number is
|
||||||
greater than all that precede it in the enumeration. */
|
greater than all that precede it in the enumeration. */
|
||||||
abstract public TermPositions termPositions(Term t) throws IOException;
|
public TermPositions termPositions(Term term) throws IOException {
|
||||||
|
TermPositions termPositions = termPositions();
|
||||||
|
termPositions.seek(term);
|
||||||
|
return termPositions;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns an unpositioned {@link TermPositions} enumerator. */
|
||||||
|
abstract public TermPositions termPositions() throws IOException;
|
||||||
|
|
||||||
/** Deletes the document numbered <code>docNum</code>. Once a document is
|
/** Deletes the document numbered <code>docNum</code>. Once a document is
|
||||||
deleted it will not appear in TermDocs or TermPostitions enumerations.
|
deleted it will not appear in TermDocs or TermPostitions enumerations.
|
||||||
|
|
|
@ -78,8 +78,8 @@ final class SegmentReader extends IndexReader {
|
||||||
BitVector deletedDocs = null;
|
BitVector deletedDocs = null;
|
||||||
private boolean deletedDocsDirty = false;
|
private boolean deletedDocsDirty = false;
|
||||||
|
|
||||||
private InputStream freqStream;
|
InputStream freqStream;
|
||||||
private InputStream proxStream;
|
InputStream proxStream;
|
||||||
|
|
||||||
|
|
||||||
private static class Norm {
|
private static class Norm {
|
||||||
|
@ -194,28 +194,12 @@ final class SegmentReader extends IndexReader {
|
||||||
return (deletedDocs != null && deletedDocs.get(n));
|
return (deletedDocs != null && deletedDocs.get(n));
|
||||||
}
|
}
|
||||||
|
|
||||||
public final TermDocs termDocs(Term t) throws IOException {
|
public final TermDocs termDocs() throws IOException {
|
||||||
TermInfo ti = tis.get(t);
|
return new SegmentTermDocs(this);
|
||||||
if (ti != null)
|
|
||||||
return new SegmentTermDocs(this, ti);
|
|
||||||
else
|
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
final InputStream getFreqStream () {
|
public final TermPositions termPositions() throws IOException {
|
||||||
return (InputStream)freqStream.clone();
|
return new SegmentTermPositions(this);
|
||||||
}
|
|
||||||
|
|
||||||
public final TermPositions termPositions(Term t) throws IOException {
|
|
||||||
TermInfo ti = tis.get(t);
|
|
||||||
if (ti != null)
|
|
||||||
return new SegmentTermPositions(this, ti);
|
|
||||||
else
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
final InputStream getProxStream () {
|
|
||||||
return (InputStream)proxStream.clone();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public final int docFreq(Term t) throws IOException {
|
public final int docFreq(Term t) throws IOException {
|
||||||
|
|
|
@ -66,21 +66,26 @@ class SegmentTermDocs implements TermDocs {
|
||||||
int doc = 0;
|
int doc = 0;
|
||||||
int freq;
|
int freq;
|
||||||
|
|
||||||
SegmentTermDocs(SegmentReader p) throws IOException {
|
SegmentTermDocs(SegmentReader parent)
|
||||||
parent = p;
|
throws IOException {
|
||||||
freqStream = parent.getFreqStream();
|
this.parent = parent;
|
||||||
deletedDocs = parent.deletedDocs;
|
this.freqStream = (InputStream)parent.freqStream.clone();
|
||||||
|
this.deletedDocs = parent.deletedDocs;
|
||||||
}
|
}
|
||||||
|
|
||||||
SegmentTermDocs(SegmentReader p, TermInfo ti) throws IOException {
|
public void seek(Term term) throws IOException {
|
||||||
this(p);
|
TermInfo ti = parent.tis.get(term);
|
||||||
seek(ti);
|
seek(ti);
|
||||||
}
|
}
|
||||||
|
|
||||||
void seek(TermInfo ti) throws IOException {
|
void seek(TermInfo ti) throws IOException {
|
||||||
freqCount = ti.docFreq;
|
if (ti == null) {
|
||||||
doc = 0;
|
freqCount = 0;
|
||||||
freqStream.seek(ti.freqPointer);
|
} else {
|
||||||
|
freqCount = ti.docFreq;
|
||||||
|
doc = 0;
|
||||||
|
freqStream.seek(ti.freqPointer);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
|
|
|
@ -66,13 +66,7 @@ extends SegmentTermDocs implements TermPositions {
|
||||||
|
|
||||||
SegmentTermPositions(SegmentReader p) throws IOException {
|
SegmentTermPositions(SegmentReader p) throws IOException {
|
||||||
super(p);
|
super(p);
|
||||||
proxStream = parent.getProxStream();
|
this.proxStream = (InputStream)parent.proxStream.clone();
|
||||||
}
|
|
||||||
|
|
||||||
SegmentTermPositions(SegmentReader p, TermInfo ti)
|
|
||||||
throws IOException {
|
|
||||||
this(p);
|
|
||||||
seek(ti);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
final void seek(TermInfo ti) throws IOException {
|
final void seek(TermInfo ti) throws IOException {
|
||||||
|
|
|
@ -151,12 +151,12 @@ final class SegmentsReader extends IndexReader {
|
||||||
return total;
|
return total;
|
||||||
}
|
}
|
||||||
|
|
||||||
public final TermDocs termDocs(Term term) throws IOException {
|
public final TermDocs termDocs() throws IOException {
|
||||||
return new SegmentsTermDocs(readers, starts, term);
|
return new SegmentsTermDocs(readers, starts);
|
||||||
}
|
}
|
||||||
|
|
||||||
public final TermPositions termPositions(Term term) throws IOException {
|
public final TermPositions termPositions() throws IOException {
|
||||||
return new SegmentsTermPositions(readers, starts, term);
|
return new SegmentsTermPositions(readers, starts);
|
||||||
}
|
}
|
||||||
|
|
||||||
public final void close() throws IOException {
|
public final void close() throws IOException {
|
||||||
|
@ -240,14 +240,16 @@ class SegmentsTermDocs implements TermDocs {
|
||||||
protected int base = 0;
|
protected int base = 0;
|
||||||
protected int pointer = 0;
|
protected int pointer = 0;
|
||||||
|
|
||||||
SegmentsTermDocs(SegmentReader[] r, int[] s, Term t) {
|
private SegmentTermDocs[] segTermDocs;
|
||||||
|
protected SegmentTermDocs current; // == segTermDocs[pointer]
|
||||||
|
|
||||||
|
SegmentsTermDocs(SegmentReader[] r, int[] s) {
|
||||||
readers = r;
|
readers = r;
|
||||||
starts = s;
|
starts = s;
|
||||||
term = t;
|
|
||||||
|
segTermDocs = new SegmentTermDocs[r.length];
|
||||||
}
|
}
|
||||||
|
|
||||||
protected SegmentTermDocs current;
|
|
||||||
|
|
||||||
public final int doc() {
|
public final int doc() {
|
||||||
return base + current.doc;
|
return base + current.doc;
|
||||||
}
|
}
|
||||||
|
@ -255,14 +257,19 @@ class SegmentsTermDocs implements TermDocs {
|
||||||
return current.freq;
|
return current.freq;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public final void seek(Term term) {
|
||||||
|
this.term = term;
|
||||||
|
this.base = 0;
|
||||||
|
this.pointer = 0;
|
||||||
|
this.current = null;
|
||||||
|
}
|
||||||
|
|
||||||
public final boolean next() throws IOException {
|
public final boolean next() throws IOException {
|
||||||
if (current != null && current.next()) {
|
if (current != null && current.next()) {
|
||||||
return true;
|
return true;
|
||||||
} else if (pointer < readers.length) {
|
} else if (pointer < readers.length) {
|
||||||
if (current != null)
|
|
||||||
current.close();
|
|
||||||
base = starts[pointer];
|
base = starts[pointer];
|
||||||
current = termDocs(readers[pointer++]);
|
current = termDocs(pointer++);
|
||||||
return next();
|
return next();
|
||||||
} else
|
} else
|
||||||
return false;
|
return false;
|
||||||
|
@ -275,14 +282,13 @@ class SegmentsTermDocs implements TermDocs {
|
||||||
while (current == null) {
|
while (current == null) {
|
||||||
if (pointer < readers.length) { // try next segment
|
if (pointer < readers.length) { // try next segment
|
||||||
base = starts[pointer];
|
base = starts[pointer];
|
||||||
current = termDocs(readers[pointer++]);
|
current = termDocs(pointer++);
|
||||||
} else {
|
} else {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
int end = current.read(docs, freqs);
|
int end = current.read(docs, freqs);
|
||||||
if (end == 0) { // none left in segment
|
if (end == 0) { // none left in segment
|
||||||
current.close();
|
|
||||||
current = null;
|
current = null;
|
||||||
} else { // got some
|
} else { // got some
|
||||||
final int b = base; // adjust doc numbers
|
final int b = base; // adjust doc numbers
|
||||||
|
@ -302,25 +308,37 @@ class SegmentsTermDocs implements TermDocs {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private SegmentTermDocs termDocs(int i) throws IOException {
|
||||||
|
if (term == null)
|
||||||
|
return null;
|
||||||
|
SegmentTermDocs result = segTermDocs[i];
|
||||||
|
if (result == null)
|
||||||
|
result = segTermDocs[i] = termDocs(readers[i]);
|
||||||
|
result.seek(term);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
protected SegmentTermDocs termDocs(SegmentReader reader)
|
protected SegmentTermDocs termDocs(SegmentReader reader)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
return (SegmentTermDocs)reader.termDocs(term);
|
return (SegmentTermDocs)reader.termDocs();
|
||||||
}
|
}
|
||||||
|
|
||||||
public final void close() throws IOException {
|
public final void close() throws IOException {
|
||||||
if (current != null)
|
for (int i = 0; i < segTermDocs.length; i++) {
|
||||||
current.close();
|
if (segTermDocs[i] != null)
|
||||||
|
segTermDocs[i].close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class SegmentsTermPositions extends SegmentsTermDocs implements TermPositions {
|
class SegmentsTermPositions extends SegmentsTermDocs implements TermPositions {
|
||||||
SegmentsTermPositions(SegmentReader[] r, int[] s, Term t) {
|
SegmentsTermPositions(SegmentReader[] r, int[] s) {
|
||||||
super(r,s,t);
|
super(r,s);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected final SegmentTermDocs termDocs(SegmentReader reader)
|
protected final SegmentTermDocs termDocs(SegmentReader reader)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
return (SegmentTermDocs)reader.termPositions(term);
|
return (SegmentTermDocs)reader.termPositions();
|
||||||
}
|
}
|
||||||
|
|
||||||
public final int nextPosition() throws IOException {
|
public final int nextPosition() throws IOException {
|
||||||
|
|
|
@ -67,6 +67,11 @@ import org.apache.lucene.document.Document;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public interface TermDocs {
|
public interface TermDocs {
|
||||||
|
/** Sets this to the data for a term.
|
||||||
|
* The enumeration is reset to the start of the data for this term.
|
||||||
|
*/
|
||||||
|
void seek(Term term) throws IOException;
|
||||||
|
|
||||||
/** Returns the current document number. <p> This is invalid until {@link
|
/** Returns the current document number. <p> This is invalid until {@link
|
||||||
#next()} is called for the first time.*/
|
#next()} is called for the first time.*/
|
||||||
int doc();
|
int doc();
|
||||||
|
@ -91,7 +96,7 @@ public interface TermDocs {
|
||||||
/** Skips entries to the first beyond the current whose document number is
|
/** Skips entries to the first beyond the current whose document number is
|
||||||
* greater than or equal to <i>target</i>. <p>Returns true iff there is such
|
* greater than or equal to <i>target</i>. <p>Returns true iff there is such
|
||||||
* an entry. <p>Behaves as if written: <pre>
|
* an entry. <p>Behaves as if written: <pre>
|
||||||
* public boolean skipTo(int target) {
|
* boolean skipTo(int target) {
|
||||||
* do {
|
* do {
|
||||||
* if (!next())
|
* if (!next())
|
||||||
* return false;
|
* return false;
|
||||||
|
|
|
@ -65,12 +65,10 @@ public abstract class TermEnum {
|
||||||
/** Increments the enumeration to the next element. True if one exists.*/
|
/** Increments the enumeration to the next element. True if one exists.*/
|
||||||
abstract public boolean next() throws IOException;
|
abstract public boolean next() throws IOException;
|
||||||
|
|
||||||
/** Returns the current Term in the enumeration.
|
/** Returns the current Term in the enumeration.*/
|
||||||
Initially invalid, valid after next() called for the first time.*/
|
|
||||||
abstract public Term term();
|
abstract public Term term();
|
||||||
|
|
||||||
/** Returns the docFreq of the current Term in the enumeration.
|
/** Returns the docFreq of the current Term in the enumeration.*/
|
||||||
Initially invalid, valid after next() called for the first time.*/
|
|
||||||
abstract public int docFreq();
|
abstract public int docFreq();
|
||||||
|
|
||||||
/** Closes the enumeration to further activity, freeing resources. */
|
/** Closes the enumeration to further activity, freeing resources. */
|
||||||
|
|
|
@ -129,10 +129,11 @@ public final class DateFilter extends Filter {
|
||||||
final public BitSet bits(IndexReader reader) throws IOException {
|
final public BitSet bits(IndexReader reader) throws IOException {
|
||||||
BitSet bits = new BitSet(reader.maxDoc());
|
BitSet bits = new BitSet(reader.maxDoc());
|
||||||
TermEnum enum = reader.terms(new Term(field, start));
|
TermEnum enum = reader.terms(new Term(field, start));
|
||||||
|
TermDocs termDocs = reader.termDocs();
|
||||||
try {
|
try {
|
||||||
Term stop = new Term(field, end);
|
Term stop = new Term(field, end);
|
||||||
while (enum.term().compareTo(stop) <= 0) {
|
while (enum.term().compareTo(stop) <= 0) {
|
||||||
TermDocs termDocs = reader.termDocs(enum.term());
|
termDocs.seek(enum.term());
|
||||||
try {
|
try {
|
||||||
while (termDocs.next())
|
while (termDocs.next())
|
||||||
bits.set(termDocs.doc());
|
bits.set(termDocs.doc());
|
||||||
|
|
Loading…
Reference in New Issue