Optimized TermDocs.skipTo() and changed scorers to take advantage of it.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150170 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Doug Cutting 2004-01-15 22:42:35 +00:00
parent 07829a37a7
commit 6f8347c6fe
24 changed files with 1272 additions and 325 deletions

View File

@ -2,6 +2,36 @@ Lucene Change Log
$Id$ $Id$
1.4 RC1
1. Changed the format of the .tis file, so that:
- it has a format version number, which makes it easier to
back-compatibly change file formats in the future.
- the term count is now stored as a long. This was the one aspect
of the Lucene's file formats which limited index size.
- a few internal index parameters are now stored in the index, so
that they can (in theory) now be changed from index to index,
although there is not yet an API to do so.
These changes are back compatible. The new code can read old
indexes. But old code will not be able read new indexes. (cutting)
2. Added an optimized implementation of TermDocs.skipTo(). A skip
table is now stored for each term in the .frq file. This only
adds a percent or two to overall index size, but can substantially
speedup many searches. (cutting)
3. Restructured the Scorer API and all Scorer implementations to take
advantage of an optimized TermDocs.skipTo() implementation. In
particular, PhraseQuerys and conjunctive BooleanQuerys are
faster when one clause has substantially fewer matches than the
others. (A conjunctive BooleanQuery is a BooleanQuery where all
clauses are required.) (cutting)
1.3 final 1.3 final
1. Added catch of BooleanQuery$TooManyClauses in QueryParser to 1. Added catch of BooleanQuery$TooManyClauses in QueryParser to

View File

@ -291,7 +291,7 @@ final class DocumentWriter {
Posting posting = postings[i]; Posting posting = postings[i];
// add an entry to the dictionary with pointers to prox and freq files // add an entry to the dictionary with pointers to prox and freq files
ti.set(1, freq.getFilePointer(), prox.getFilePointer()); ti.set(1, freq.getFilePointer(), prox.getFilePointer(), -1);
tis.add(posting.term, ti); tis.add(posting.term, ti);
// add an entry to the freq file // add an entry to the freq file

View File

@ -62,6 +62,7 @@ import java.io.IOException;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.OutputStream; import org.apache.lucene.store.OutputStream;
import org.apache.lucene.store.InputStream; import org.apache.lucene.store.InputStream;
import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.BitVector; import org.apache.lucene.util.BitVector;
final class SegmentMerger { final class SegmentMerger {
@ -246,17 +247,21 @@ final class SegmentMerger {
int df = appendPostings(smis, n); // append posting data int df = appendPostings(smis, n); // append posting data
long skipPointer = writeSkip();
if (df > 0) { if (df > 0) {
// add an entry to the dictionary with pointers to prox and freq files // add an entry to the dictionary with pointers to prox and freq files
termInfo.set(df, freqPointer, proxPointer); termInfo.set(df, freqPointer, proxPointer, (int)(skipPointer-freqPointer));
termInfosWriter.add(smis[0].term, termInfo); termInfosWriter.add(smis[0].term, termInfo);
} }
} }
private final int appendPostings(SegmentMergeInfo[] smis, int n) private final int appendPostings(SegmentMergeInfo[] smis, int n)
throws IOException { throws IOException {
final int skipInterval = termInfosWriter.skipInterval;
int lastDoc = 0; int lastDoc = 0;
int df = 0; // number of docs w/ term int df = 0; // number of docs w/ term
resetSkip();
for (int i = 0; i < n; i++) { for (int i = 0; i < n; i++) {
SegmentMergeInfo smi = smis[i]; SegmentMergeInfo smi = smis[i];
TermPositions postings = smi.postings; TermPositions postings = smi.postings;
@ -272,6 +277,12 @@ final class SegmentMerger {
if (doc < lastDoc) if (doc < lastDoc)
throw new IllegalStateException("docs out of order"); throw new IllegalStateException("docs out of order");
df++;
if ((df % skipInterval) == 0) {
bufferSkip(lastDoc);
}
int docCode = (doc - lastDoc) << 1; // use low bit to flag freq=1 int docCode = (doc - lastDoc) << 1; // use low bit to flag freq=1
lastDoc = doc; lastDoc = doc;
@ -289,13 +300,43 @@ final class SegmentMerger {
proxOutput.writeVInt(position - lastPosition); proxOutput.writeVInt(position - lastPosition);
lastPosition = position; lastPosition = position;
} }
df++;
} }
} }
return df; return df;
} }
private final void mergeNorms() throws IOException {
private RAMOutputStream skipBuffer = new RAMOutputStream();
private int lastSkipDoc;
private long lastSkipFreqPointer;
private long lastSkipProxPointer;
private void resetSkip() throws IOException {
skipBuffer.reset();
lastSkipDoc = 0;
lastSkipFreqPointer = freqOutput.getFilePointer();
lastSkipProxPointer = proxOutput.getFilePointer();
}
private void bufferSkip(int doc) throws IOException {
long freqPointer = freqOutput.getFilePointer();
long proxPointer = proxOutput.getFilePointer();
skipBuffer.writeVInt(doc - lastSkipDoc);
skipBuffer.writeVInt((int)(freqPointer - lastSkipFreqPointer));
skipBuffer.writeVInt((int)(proxPointer - lastSkipProxPointer));
lastSkipDoc = doc;
lastSkipFreqPointer = freqPointer;
lastSkipProxPointer = proxPointer;
}
private long writeSkip() throws IOException {
long skipPointer = freqOutput.getFilePointer();
skipBuffer.writeTo(freqOutput);
return skipPointer;
}
private void mergeNorms() throws IOException {
for (int i = 0; i < fieldInfos.size(); i++) { for (int i = 0; i < fieldInfos.size(); i++) {
FieldInfo fi = fieldInfos.fieldInfo(i); FieldInfo fi = fieldInfos.fieldInfo(i);
if (fi.isIndexed) { if (fi.isIndexed) {

View File

@ -61,16 +61,27 @@ import org.apache.lucene.store.InputStream;
class SegmentTermDocs implements TermDocs { class SegmentTermDocs implements TermDocs {
protected SegmentReader parent; protected SegmentReader parent;
private InputStream freqStream; private InputStream freqStream;
private int freqCount; private int count;
private int df;
private BitVector deletedDocs; private BitVector deletedDocs;
int doc = 0; int doc = 0;
int freq; int freq;
private int skipInterval;
private int skipCount;
private InputStream skipStream;
private int skipDoc;
private long freqPointer;
private long proxPointer;
private long skipPointer;
private boolean haveSkipped;
SegmentTermDocs(SegmentReader parent) SegmentTermDocs(SegmentReader parent)
throws IOException { throws IOException {
this.parent = parent; this.parent = parent;
this.freqStream = (InputStream)parent.freqStream.clone(); this.freqStream = (InputStream)parent.freqStream.clone();
this.deletedDocs = parent.deletedDocs; this.deletedDocs = parent.deletedDocs;
this.skipInterval = parent.tis.getSkipInterval();
} }
public void seek(Term term) throws IOException { public void seek(Term term) throws IOException {
@ -88,12 +99,19 @@ class SegmentTermDocs implements TermDocs {
} }
void seek(TermInfo ti) throws IOException { void seek(TermInfo ti) throws IOException {
count = 0;
if (ti == null) { if (ti == null) {
freqCount = 0; df = 0;
} else { } else {
freqCount = ti.docFreq; df = ti.docFreq;
doc = 0; doc = 0;
freqStream.seek(ti.freqPointer); skipDoc = 0;
skipCount = 0;
freqPointer = ti.freqPointer;
proxPointer = ti.proxPointer;
skipPointer = freqPointer + ti.skipOffset;
freqStream.seek(freqPointer);
haveSkipped = false;
} }
} }
@ -109,7 +127,7 @@ class SegmentTermDocs implements TermDocs {
public boolean next() throws IOException { public boolean next() throws IOException {
while (true) { while (true) {
if (freqCount == 0) if (count == df)
return false; return false;
int docCode = freqStream.readVInt(); int docCode = freqStream.readVInt();
@ -119,7 +137,7 @@ class SegmentTermDocs implements TermDocs {
else else
freq = freqStream.readVInt(); // else read freq freq = freqStream.readVInt(); // else read freq
freqCount--; count++;
if (deletedDocs == null || !deletedDocs.get(doc)) if (deletedDocs == null || !deletedDocs.get(doc))
break; break;
@ -131,9 +149,9 @@ class SegmentTermDocs implements TermDocs {
/** Optimized implementation. */ /** Optimized implementation. */
public int read(final int[] docs, final int[] freqs) public int read(final int[] docs, final int[] freqs)
throws IOException { throws IOException {
final int end = docs.length; final int length = docs.length;
int i = 0; int i = 0;
while (i < end && freqCount > 0) { while (i < length && count < df) {
// manually inlined call to next() for speed // manually inlined call to next() for speed
final int docCode = freqStream.readVInt(); final int docCode = freqStream.readVInt();
@ -142,7 +160,7 @@ class SegmentTermDocs implements TermDocs {
freq = 1; // freq is one freq = 1; // freq is one
else else
freq = freqStream.readVInt(); // else read freq freq = freqStream.readVInt(); // else read freq
freqCount--; count++;
if (deletedDocs == null || !deletedDocs.get(doc)) { if (deletedDocs == null || !deletedDocs.get(doc)) {
docs[i] = doc; docs[i] = doc;
@ -153,12 +171,61 @@ class SegmentTermDocs implements TermDocs {
return i; return i;
} }
/** As yet unoptimized implementation. */ /** Overridden by SegmentTermPositions to skip in prox stream. */
protected void skipProx(long proxPointer) throws IOException {}
/** Optimized implementation. */
public boolean skipTo(int target) throws IOException { public boolean skipTo(int target) throws IOException {
if (df > skipInterval) { // optimized case
if (skipStream == null)
skipStream = (InputStream)freqStream.clone(); // lazily clone
if (!haveSkipped) { // lazily seek skip stream
skipStream.seek(skipPointer);
haveSkipped = true;
}
// scan skip data
int lastSkipDoc = skipDoc;
long lastFreqPointer = freqStream.getFilePointer();
long lastProxPointer = -1;
int numSkipped = -1 -(count % skipInterval);
while (target > skipDoc) {
lastSkipDoc = skipDoc;
lastFreqPointer = freqPointer;
lastProxPointer = proxPointer;
if (skipDoc >= doc)
numSkipped += skipInterval;
if ((count + numSkipped + skipInterval) > df)
break; // no more skips
skipDoc += skipStream.readVInt();
freqPointer += skipStream.readVInt();
proxPointer += skipStream.readVInt();
skipCount++;
}
// if we found something to skip, then skip it
if (lastFreqPointer > freqStream.getFilePointer()) {
freqStream.seek(lastFreqPointer);
skipProx(lastProxPointer);
doc = lastSkipDoc;
count += numSkipped;
}
}
// done skipping, now just scan
do { do {
if (!next()) if (!next())
return false; return false;
} while (target > doc); } while (target > doc);
return true; return true;
} }
} }

View File

@ -60,14 +60,17 @@ import org.apache.lucene.store.InputStream;
final class SegmentTermEnum extends TermEnum implements Cloneable { final class SegmentTermEnum extends TermEnum implements Cloneable {
private InputStream input; private InputStream input;
private FieldInfos fieldInfos; private FieldInfos fieldInfos;
int size; long size;
int position = -1; long position = -1;
private Term term = new Term("", ""); private Term term = new Term("", "");
private TermInfo termInfo = new TermInfo(); private TermInfo termInfo = new TermInfo();
boolean isIndex = false; private int format;
private boolean isIndex = false;
long indexPointer = 0; long indexPointer = 0;
int indexInterval;
int skipInterval;
Term prev; Term prev;
private char[] buffer = {}; private char[] buffer = {};
@ -76,8 +79,34 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
throws IOException { throws IOException {
input = i; input = i;
fieldInfos = fis; fieldInfos = fis;
size = input.readInt();
isIndex = isi; isIndex = isi;
int firstInt = input.readInt();
if (firstInt >= 0) {
// original-format file, without explicit format version number
format = 0;
size = firstInt;
// back-compatible settings
indexInterval = 128;
skipInterval = Integer.MAX_VALUE;
} else {
// we have a format version number
format = firstInt;
// check that it is a format we can understand
if (format < TermInfosWriter.FORMAT)
throw new IOException("Unknown format version:" + format);
size = input.readLong(); // read the size
if (!isIndex) {
indexInterval = input.readInt();
skipInterval = input.readInt();
}
}
} }
protected Object clone() { protected Object clone() {
@ -117,6 +146,12 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
termInfo.freqPointer += input.readVLong(); // read freq pointer termInfo.freqPointer += input.readVLong(); // read freq pointer
termInfo.proxPointer += input.readVLong(); // read prox pointer termInfo.proxPointer += input.readVLong(); // read prox pointer
if (!isIndex) {
if (termInfo.docFreq > skipInterval) {
termInfo.skipOffset = input.readVInt();
}
}
if (isIndex) if (isIndex)
indexPointer += input.readVLong(); // read index pointer indexPointer += input.readVLong(); // read index pointer

View File

@ -109,4 +109,11 @@ extends SegmentTermDocs implements TermPositions {
throw new UnsupportedOperationException(); throw new UnsupportedOperationException();
} }
/** Called by super.skipTo(). */
protected void skipProx(long proxPointer) throws IOException {
proxStream.seek(proxPointer);
proxCount = 0;
}
} }

View File

@ -62,6 +62,7 @@ final class TermInfo {
long freqPointer = 0; long freqPointer = 0;
long proxPointer = 0; long proxPointer = 0;
int skipOffset;
TermInfo() {} TermInfo() {}
@ -75,17 +76,21 @@ final class TermInfo {
docFreq = ti.docFreq; docFreq = ti.docFreq;
freqPointer = ti.freqPointer; freqPointer = ti.freqPointer;
proxPointer = ti.proxPointer; proxPointer = ti.proxPointer;
skipOffset = ti.skipOffset;
} }
final void set(int df, long fp, long pp) { final void set(int docFreq,
docFreq = df; long freqPointer, long proxPointer, int skipOffset) {
freqPointer = fp; this.docFreq = docFreq;
proxPointer = pp; this.freqPointer = freqPointer;
this.proxPointer = proxPointer;
this.skipOffset = skipOffset;
} }
final void set(TermInfo ti) { final void set(TermInfo ti) {
docFreq = ti.docFreq; docFreq = ti.docFreq;
freqPointer = ti.freqPointer; freqPointer = ti.freqPointer;
proxPointer = ti.proxPointer; proxPointer = ti.proxPointer;
skipOffset = ti.skipOffset;
} }
} }

View File

@ -68,7 +68,7 @@ final class TermInfosReader {
private FieldInfos fieldInfos; private FieldInfos fieldInfos;
private SegmentTermEnum enumerator; private SegmentTermEnum enumerator;
private int size; private long size;
TermInfosReader(Directory dir, String seg, FieldInfos fis) TermInfosReader(Directory dir, String seg, FieldInfos fis)
throws IOException { throws IOException {
@ -82,13 +82,17 @@ final class TermInfosReader {
readIndex(); readIndex();
} }
public int getSkipInterval() {
return enumerator.skipInterval;
}
final void close() throws IOException { final void close() throws IOException {
if (enumerator != null) if (enumerator != null)
enumerator.close(); enumerator.close();
} }
/** Returns the number of term/value pairs in the set. */ /** Returns the number of term/value pairs in the set. */
final int size() { final long size() {
return size; return size;
} }
@ -101,7 +105,7 @@ final class TermInfosReader {
new SegmentTermEnum(directory.openFile(segment + ".tii"), new SegmentTermEnum(directory.openFile(segment + ".tii"),
fieldInfos, true); fieldInfos, true);
try { try {
int indexSize = indexEnum.size; int indexSize = (int)indexEnum.size;
indexTerms = new Term[indexSize]; indexTerms = new Term[indexSize];
indexInfos = new TermInfo[indexSize]; indexInfos = new TermInfo[indexSize];
@ -137,7 +141,7 @@ final class TermInfosReader {
private final void seekEnum(int indexOffset) throws IOException { private final void seekEnum(int indexOffset) throws IOException {
enumerator.seek(indexPointers[indexOffset], enumerator.seek(indexPointers[indexOffset],
(indexOffset * TermInfosWriter.INDEX_INTERVAL) - 1, (indexOffset * enumerator.indexInterval) - 1,
indexTerms[indexOffset], indexInfos[indexOffset]); indexTerms[indexOffset], indexInfos[indexOffset]);
} }
@ -146,10 +150,10 @@ final class TermInfosReader {
if (size == 0) return null; if (size == 0) return null;
// optimize sequential access: first try scanning cached enumerator w/o seeking // optimize sequential access: first try scanning cached enumerator w/o seeking
if (enumerator.term() != null // term is at or past current if (enumerator.term() != null // term is at or past current
&& ((enumerator.prev != null && term.compareTo(enumerator.prev) > 0) && ((enumerator.prev != null && term.compareTo(enumerator.prev) > 0)
|| term.compareTo(enumerator.term()) >= 0)) { || term.compareTo(enumerator.term()) >= 0)) {
int enumOffset = (enumerator.position/TermInfosWriter.INDEX_INTERVAL)+1; int enumOffset = (int)(enumerator.position/enumerator.indexInterval)+1;
if (indexTerms.length == enumOffset // but before end of block if (indexTerms.length == enumOffset // but before end of block
|| term.compareTo(indexTerms[enumOffset]) < 0) || term.compareTo(indexTerms[enumOffset]) < 0)
return scanEnum(term); // no need to seek return scanEnum(term); // no need to seek
@ -174,10 +178,10 @@ final class TermInfosReader {
if (size == 0) return null; if (size == 0) return null;
if (enumerator != null && enumerator.term() != null && position >= enumerator.position && if (enumerator != null && enumerator.term() != null && position >= enumerator.position &&
position < (enumerator.position + TermInfosWriter.INDEX_INTERVAL)) position < (enumerator.position + enumerator.indexInterval))
return scanEnum(position); // can avoid seek return scanEnum(position); // can avoid seek
seekEnum(position / TermInfosWriter.INDEX_INTERVAL); // must seek seekEnum(position / enumerator.indexInterval); // must seek
return scanEnum(position); return scanEnum(position);
} }
@ -190,7 +194,7 @@ final class TermInfosReader {
} }
/** Returns the position of a Term in the set or -1. */ /** Returns the position of a Term in the set or -1. */
final synchronized int getPosition(Term term) throws IOException { final synchronized long getPosition(Term term) throws IOException {
if (size == 0) return -1; if (size == 0) return -1;
int indexOffset = getIndexOffset(term); int indexOffset = getIndexOffset(term);

View File

@ -62,13 +62,36 @@ import org.apache.lucene.store.Directory;
Directory. A TermInfos can be written once, in order. */ Directory. A TermInfos can be written once, in order. */
final class TermInfosWriter { final class TermInfosWriter {
/** The file format version, a negative number. */
public static final int FORMAT = -1;
private FieldInfos fieldInfos; private FieldInfos fieldInfos;
private OutputStream output; private OutputStream output;
private Term lastTerm = new Term("", ""); private Term lastTerm = new Term("", "");
private TermInfo lastTi = new TermInfo(); private TermInfo lastTi = new TermInfo();
private int size = 0; private int size = 0;
static final int INDEX_INTERVAL = 128; // TODO: the default values for these two parameters should be settable from
// IndexWriter. However, once that's done, folks will start setting them to
// ridiculous values and complaining that things don't work well, as with
// mergeFactor. So, let's wait until a number of folks find that alternate
// values work better. Note that both of these values are stored in the
// segment, so that it's safe to change these w/o rebuilding all indexes.
/** Expert: The fraction of terms in the "dictionary" which should be stored
* in RAM. Smaller values use more memory, but make searching slightly
* faster, while larger values use less memory and make searching slightly
* slower. Searching is typically not dominated by dictionary lookup, so
* tweaking this is rarely useful.*/
int indexInterval = 128;
/** Expert: The fraction of {@link TermDocs} entries stored in skip tables,
* used to accellerate {@link TermDocs#skipTo(int)}. Larger values result in
* smaller indexes, greater acceleration, but fewer accelerable cases, while
* smaller values result in bigger indexes, less acceleration and more
* accelerable cases. More detailed experiments would be useful here. */
int skipInterval = 16;
private long lastIndexPointer = 0; private long lastIndexPointer = 0;
private boolean isIndex = false; private boolean isIndex = false;
@ -91,7 +114,12 @@ final class TermInfosWriter {
fieldInfos = fis; fieldInfos = fis;
isIndex = isi; isIndex = isi;
output = directory.createFile(segment + (isIndex ? ".tii" : ".tis")); output = directory.createFile(segment + (isIndex ? ".tii" : ".tis"));
output.writeInt(0); // leave space for size output.writeInt(FORMAT); // write format
output.writeLong(0); // leave space for size
if (!isIndex) {
output.writeInt(indexInterval); // write indexInterval
output.writeInt(skipInterval); // write skipInterval
}
} }
/** Adds a new <Term, TermInfo> pair to the set. /** Adds a new <Term, TermInfo> pair to the set.
@ -106,7 +134,7 @@ final class TermInfosWriter {
if (ti.proxPointer < lastTi.proxPointer) if (ti.proxPointer < lastTi.proxPointer)
throw new IOException("proxPointer out of order"); throw new IOException("proxPointer out of order");
if (!isIndex && size % INDEX_INTERVAL == 0) if (!isIndex && size % indexInterval == 0)
other.add(lastTerm, lastTi); // add an index term other.add(lastTerm, lastTi); // add an index term
writeTerm(term); // write term writeTerm(term); // write term
@ -114,6 +142,12 @@ final class TermInfosWriter {
output.writeVLong(ti.freqPointer - lastTi.freqPointer); // write pointers output.writeVLong(ti.freqPointer - lastTi.freqPointer); // write pointers
output.writeVLong(ti.proxPointer - lastTi.proxPointer); output.writeVLong(ti.proxPointer - lastTi.proxPointer);
if (!isIndex) {
if (ti.docFreq > skipInterval) {
output.writeVInt(ti.skipOffset);
}
}
if (isIndex) { if (isIndex) {
output.writeVLong(other.output.getFilePointer() - lastIndexPointer); output.writeVLong(other.output.getFilePointer() - lastIndexPointer);
lastIndexPointer = other.output.getFilePointer(); // write pointer lastIndexPointer = other.output.getFilePointer(); // write pointer
@ -149,8 +183,8 @@ final class TermInfosWriter {
/** Called to complete TermInfos creation. */ /** Called to complete TermInfos creation. */
final void close() throws IOException { final void close() throws IOException {
output.seek(0); // write size at start output.seek(4); // write size after format
output.writeInt(size); output.writeLong(size);
output.close(); output.close();
if (!isIndex) if (!isIndex)

View File

@ -158,6 +158,37 @@ public class BooleanQuery extends Query {
} }
public Scorer scorer(IndexReader reader) throws IOException { public Scorer scorer(IndexReader reader) throws IOException {
// First see if the (faster) ConjunctionScorer will work. This can be
// used when all clauses are required. Also, at this point a
// BooleanScorer cannot be embedded in a ConjunctionScorer, as the hits
// from a BooleanScorer are not always sorted by document number (sigh)
// and hence BooleanScorer cannot implement skipTo() correctly, which is
// required by ConjunctionScorer.
boolean allRequired = true;
boolean noneBoolean = true;
for (int i = 0 ; i < weights.size(); i++) {
BooleanClause c = (BooleanClause)clauses.elementAt(i);
if (!c.required)
allRequired = false;
if (c.query instanceof BooleanQuery)
noneBoolean = false;
}
if (allRequired && noneBoolean) { // ConjunctionScorer is okay
ConjunctionScorer result =
new ConjunctionScorer(searcher.getSimilarity());
for (int i = 0 ; i < weights.size(); i++) {
BooleanClause c = (BooleanClause)clauses.elementAt(i);
Weight w = (Weight)weights.elementAt(i);
Scorer subScorer = w.scorer(reader);
if (subScorer == null)
return null;
result.add(subScorer);
}
return result;
}
// Use good-old BooleanScorer instead.
BooleanScorer result = new BooleanScorer(searcher.getSimilarity()); BooleanScorer result = new BooleanScorer(searcher.getSimilarity());
for (int i = 0 ; i < weights.size(); i++) { for (int i = 0 ; i < weights.size(); i++) {

View File

@ -76,14 +76,17 @@ final class BooleanScorer extends Scorer {
static final class SubScorer { static final class SubScorer {
public Scorer scorer; public Scorer scorer;
public boolean done;
public boolean required = false; public boolean required = false;
public boolean prohibited = false; public boolean prohibited = false;
public HitCollector collector; public HitCollector collector;
public SubScorer next; public SubScorer next;
public SubScorer(Scorer scorer, boolean required, boolean prohibited, public SubScorer(Scorer scorer, boolean required, boolean prohibited,
HitCollector collector, SubScorer next) { HitCollector collector, SubScorer next)
throws IOException {
this.scorer = scorer; this.scorer = scorer;
this.done = !scorer.next();
this.required = required; this.required = required;
this.prohibited = prohibited; this.prohibited = prohibited;
this.collector = collector; this.collector = collector;
@ -91,7 +94,8 @@ final class BooleanScorer extends Scorer {
} }
} }
final void add(Scorer scorer, boolean required, boolean prohibited) { final void add(Scorer scorer, boolean required, boolean prohibited)
throws IOException {
int mask = 0; int mask = 0;
if (required || prohibited) { if (required || prohibited) {
if (nextMask == 0) if (nextMask == 0)
@ -120,17 +124,45 @@ final class BooleanScorer extends Scorer {
coordFactors[i] = getSimilarity().coord(i, maxCoord-1); coordFactors[i] = getSimilarity().coord(i, maxCoord-1);
} }
public final void score(HitCollector results, int maxDoc) private int end;
throws IOException { private Bucket current;
public int doc() { return current.doc; }
public boolean next() throws IOException {
boolean more = false;
do {
while (bucketTable.first != null) { // more queued
current = bucketTable.first;
bucketTable.first = current.next; // pop the queue
// check prohibited & required
if ((current.bits & prohibitedMask) == 0 &&
(current.bits & requiredMask) == requiredMask) {
return true;
}
}
// refill the queue
end += BucketTable.SIZE;
for (SubScorer sub = scorers; sub != null; sub = sub.next) {
Scorer scorer = sub.scorer;
while (!sub.done && scorer.doc() < end) {
sub.collector.collect(scorer.doc(), scorer.score());
sub.done = !scorer.next();
}
if (!sub.done) {
more = true;
}
}
} while (bucketTable.first != null | more);
return false;
}
public float score() throws IOException {
if (coordFactors == null) if (coordFactors == null)
computeCoordFactors(); computeCoordFactors();
return current.score * coordFactors[current.coord];
while (currentDoc < maxDoc) {
currentDoc = Math.min(currentDoc+BucketTable.SIZE, maxDoc);
for (SubScorer t = scorers; t != null; t = t.next)
t.scorer.score(t.collector, currentDoc);
bucketTable.collectHits(results);
}
} }
static final class Bucket { static final class Bucket {
@ -207,6 +239,10 @@ final class BooleanScorer extends Scorer {
} }
} }
public boolean skipTo(int target) throws IOException {
throw new UnsupportedOperationException();
}
public Explanation explain(int doc) throws IOException { public Explanation explain(int doc) throws IOException {
throw new UnsupportedOperationException(); throw new UnsupportedOperationException();
} }

View File

@ -0,0 +1,155 @@
package org.apache.lucene.search;
/* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2004 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache Lucene" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* "Apache Lucene", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
import java.io.IOException;
import java.util.*;
import org.apache.lucene.index.*;
/** Scorer for conjunctions, sets of queries, all of which are required. */
final class ConjunctionScorer extends Scorer {
private LinkedList scorers = new LinkedList();
private boolean firstTime = true;
private boolean more = true;
private float coord;
public ConjunctionScorer(Similarity similarity) {
super(similarity);
}
final void add(Scorer scorer) throws IOException {
scorers.addLast(scorer);
}
private Scorer first() { return (Scorer)scorers.getFirst(); }
private Scorer last() { return (Scorer)scorers.getLast(); }
public int doc() { return first().doc(); }
public boolean next() throws IOException {
if (firstTime) {
init();
} else if (more) {
more = last().next(); // trigger further scanning
}
while (more && first().doc() < last().doc()) { // find doc w/ all clauses
more = first().skipTo(last().doc()); // skip first upto last
scorers.addLast(scorers.removeFirst()); // move first to last
}
return more; // found a doc with all clauses
}
public boolean skipTo(int target) throws IOException {
Iterator i = scorers.iterator();
while (more && i.hasNext()) {
more = ((Scorer)i.next()).skipTo(target);
}
if (more)
sortScorers(); // re-sort scorers
return more;
}
public float score() throws IOException {
float score = 0.0f; // sum scores
Iterator i = scorers.iterator();
while (i.hasNext())
score += ((Scorer)i.next()).score();
score *= coord;
return score;
}
private void init() throws IOException {
more = scorers.size() > 0;
// compute coord factor
coord = getSimilarity().coord(scorers.size(), scorers.size());
// move each scorer to its first entry
Iterator i = scorers.iterator();
while (more && i.hasNext()) {
more = ((Scorer)i.next()).next();
}
if (more)
sortScorers(); // initial sort of list
firstTime = false;
}
private void sortScorers() throws IOException {
// move scorers to an array
Scorer[] array = (Scorer[])scorers.toArray(new Scorer[scorers.size()]);
scorers.clear(); // empty the list
Arrays.sort(array, new Comparator() { // sort the array
public int compare(Object o1, Object o2) {
return ((Scorer)o1).doc() - ((Scorer)o2).doc();
}
public boolean equals(Object o1, Object o2) {
return ((Scorer)o1).doc() == ((Scorer)o2).doc();
}
});
for (int i = 0; i < array.length; i++) {
scorers.addLast(array[i]); // re-build list, now sorted
}
}
public Explanation explain(int doc) throws IOException {
throw new UnsupportedOperationException();
}
}

View File

@ -140,7 +140,7 @@ public class IndexSearcher extends Searcher {
hq.insert(new ScoreDoc(doc, score)); hq.insert(new ScoreDoc(doc, score));
} }
} }
}, reader.maxDoc()); });
ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()]; ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
for (int i = hq.size()-1; i >= 0; i--) // put docs in array for (int i = hq.size()-1; i >= 0; i--) // put docs in array
@ -180,7 +180,7 @@ public class IndexSearcher extends Searcher {
Scorer scorer = query.weight(this).scorer(reader); Scorer scorer = query.weight(this).scorer(reader);
if (scorer == null) if (scorer == null)
return; return;
scorer.score(collector, reader.maxDoc()); scorer.score(collector);
} }
public Query rewrite(Query original) throws IOException { public Query rewrite(Query original) throws IOException {

View File

@ -68,19 +68,31 @@ final class PhrasePositions {
PhrasePositions(TermPositions t, int o) throws IOException { PhrasePositions(TermPositions t, int o) throws IOException {
tp = t; tp = t;
offset = o; offset = o;
next();
} }
final void next() throws IOException { // increments to next doc final boolean next() throws IOException { // increments to next doc
if (!tp.next()) { if (!tp.next()) {
tp.close(); // close stream tp.close(); // close stream
doc = Integer.MAX_VALUE; // sentinel value doc = Integer.MAX_VALUE; // sentinel value
return; return false;
} }
doc = tp.doc(); doc = tp.doc();
position = 0; position = 0;
return true;
} }
final boolean skipTo(int target) throws IOException {
if (!tp.skipTo(target)) {
tp.close(); // close stream
doc = Integer.MAX_VALUE; // sentinel value
return false;
}
doc = tp.doc();
position = 0;
return true;
}
final void firstPosition() throws IOException { final void firstPosition() throws IOException {
count = tp.freq(); // read first pos count = tp.freq(); // read first pos
nextPosition(); nextPosition();

View File

@ -60,89 +60,127 @@ import org.apache.lucene.util.*;
import org.apache.lucene.index.*; import org.apache.lucene.index.*;
abstract class PhraseScorer extends Scorer { abstract class PhraseScorer extends Scorer {
private Weight weight; private Weight weight;
protected byte[] norms; protected byte[] norms;
protected float value; protected float value;
protected PhraseQueue pq; private boolean firstTime = true;
protected PhrasePositions first, last; private boolean more = true;
protected PhraseQueue pq;
protected PhrasePositions first, last;
private float freq; private float freq;
PhraseScorer(Weight weight, TermPositions[] tps, Similarity similarity, PhraseScorer(Weight weight, TermPositions[] tps, Similarity similarity,
byte[] norms) throws IOException { byte[] norms) throws IOException {
super(similarity); super(similarity);
this.norms = norms; this.norms = norms;
this.weight = weight; this.weight = weight;
this.value = weight.getValue(); this.value = weight.getValue();
// use PQ to build a sorted list of PhrasePositions // convert tps to a list
pq = new PhraseQueue(tps.length); for (int i = 0; i < tps.length; i++) {
for (int i = 0; i < tps.length; i++) { PhrasePositions pp = new PhrasePositions(tps[i], i);
pq.put(new PhrasePositions(tps[i], i)); if (last != null) { // add next to end of list
} last.next = pp;
pqToList(); } else
first = pp;
last = pp;
} }
public final void score(HitCollector results, int end) throws IOException { pq = new PhraseQueue(tps.length); // construct empty pq
Similarity similarity = getSimilarity();
while (last.doc < end) { // find doc w/ all the terms
while (first.doc < last.doc) { // scan forward in first
do {
first.next();
} while (first.doc < last.doc);
firstToLast();
if (last.doc >= end)
return;
}
// found doc with all terms }
freq = phraseFreq(); // check for phrase
if (freq > 0.0) { public int doc() { return first.doc; }
float score = similarity.tf(freq) * value; // compute score
score *= Similarity.decodeNorm(norms[first.doc]); // normalize public boolean next() throws IOException {
results.collect(first.doc, score); // add to results if (firstTime) {
} sort();
last.next(); // resume scanning firstTime = false;
} } else if (more) {
more = last.next(); // trigger further scanning
} }
protected abstract float phraseFreq() throws IOException; while (more) {
while (more && first.doc < last.doc) { // find doc w/ all the terms
more = first.skipTo(last.doc); // skip first upto last
firstToLast(); // and move it to the end
}
protected final void pqToList() { if (more) {
last = first = null; // found a doc with all of the terms
while (pq.top() != null) { freq = phraseFreq(); // check for phrase
PhrasePositions pp = (PhrasePositions) pq.pop(); if (freq == 0.0f) // no match
if (last != null) { // add next to end of list more = last.next(); // trigger further scanning
last.next = pp; else
} else return true; // found a match
first = pp; }
last = pp;
pp.next = null;
}
} }
return false; // no more matches
}
protected final void firstToLast() { public float score() throws IOException {
last.next = first; // move first to end of list //System.out.println("scoring " + first.doc);
last = first; float raw = getSimilarity().tf(freq) * value; // raw score
first = first.next; return raw * Similarity.decodeNorm(norms[first.doc]); // normalize
last.next = null; }
public boolean skipTo(int target) throws IOException {
for (PhrasePositions pp = first; more && pp != null; pp = pp.next) {
more = pp.skipTo(target);
} }
if (more)
sort(); // re-sort
return more;
}
public Explanation explain(final int doc) throws IOException {
Explanation tfExplanation = new Explanation();
score(new HitCollector() { protected abstract float phraseFreq() throws IOException;
public final void collect(int d, float score) {
}
}, doc + 1);
float phraseFreq = (first.doc == doc) ? freq : 0.0f; private void sort() throws IOException {
tfExplanation.setValue(getSimilarity().tf(phraseFreq)); pq.clear();
tfExplanation.setDescription("tf(phraseFreq=" + phraseFreq + ")"); for (PhrasePositions pp = first; more && pp != null; pp = pp.next) {
more = pp.next();
return tfExplanation; if (more) {
pq.put(pp);
} else {
return;
}
} }
pqToList();
}
protected final void pqToList() {
last = first = null;
while (pq.top() != null) {
PhrasePositions pp = (PhrasePositions) pq.pop();
if (last != null) { // add next to end of list
last.next = pp;
} else
first = pp;
last = pp;
pp.next = null;
}
}
protected final void firstToLast() {
last.next = first; // move first to end of list
last = first;
first = first.next;
last.next = null;
}
public Explanation explain(final int doc) throws IOException {
Explanation tfExplanation = new Explanation();
while (next() && doc() < doc) {}
float phraseFreq = (doc() == doc) ? freq : 0.0f;
tfExplanation.setValue(getSimilarity().tf(phraseFreq));
tfExplanation.setDescription("tf(phraseFreq=" + phraseFreq + ")");
return tfExplanation;
}
} }

View File

@ -70,11 +70,39 @@ public abstract class Scorer {
return this.similarity; return this.similarity;
} }
/** Scores hits and passes them to a collector. Stops at the last document /** Scores all documents and passes them to a collector. */
* before <code>maxDoc</code>. If called repeatedly, will restart at point public void score(HitCollector hc) throws IOException {
* where it last left off. while (next()) {
hc.collect(doc(), score());
}
}
/** Advance to the next document matching the query. Returns true iff there
* is another match. */
public abstract boolean next() throws IOException;
/** Returns the current document number. Initially invalid, until {@link
* #next()} is called the first time. */
public abstract int doc();
/** Returns the score of the current document. Initially invalid, until
* {@link #next()} is called the first time. */
public abstract float score() throws IOException;
/** Skips to the first match beyond the current whose document number is
* greater than or equal to <i>target</i>. <p>Returns true iff there is such
* a match. <p>Behaves as if written: <pre>
* boolean skipTo(int target) {
* do {
* if (!next())
* return false;
* } while (target > doc());
* return true;
* }
* </pre>
* Most implementations are considerably more efficient than that.
*/ */
public abstract void score(HitCollector hc, int maxDoc) throws IOException; public abstract boolean skipTo(int target) throws IOException;
/** Returns an explanation of the score for <code>doc</code>. */ /** Returns an explanation of the score for <code>doc</code>. */
public abstract Explanation explain(int doc) throws IOException; public abstract Explanation explain(int doc) throws IOException;

View File

@ -83,44 +83,56 @@ final class TermScorer extends Scorer {
for (int i = 0; i < SCORE_CACHE_SIZE; i++) for (int i = 0; i < SCORE_CACHE_SIZE; i++)
scoreCache[i] = getSimilarity().tf(i) * weightValue; scoreCache[i] = getSimilarity().tf(i) * weightValue;
pointerMax = termDocs.read(docs, freqs); // fill buffers
if (pointerMax != 0)
doc = docs[0];
else {
termDocs.close(); // close stream
doc = Integer.MAX_VALUE; // set to sentinel value
}
} }
public final void score(HitCollector c, final int end) throws IOException { public int doc() { return doc; }
int d = doc; // cache doc in local
Similarity similarity = getSimilarity(); // cache sim in local
while (d < end) { // for docs in window
final int f = freqs[pointer];
float score = // compute tf(f)*weight
f < SCORE_CACHE_SIZE // check cache
? scoreCache[f] // cache hit
: similarity.tf(f)*weightValue; // cache miss
score *= Similarity.decodeNorm(norms[d]); // normalize for field public boolean next() throws IOException {
pointer++;
c.collect(d, score); // collect score if (pointer >= pointerMax) {
pointerMax = termDocs.read(docs, freqs); // refill buffer
if (++pointer == pointerMax) { if (pointerMax != 0) {
pointerMax = termDocs.read(docs, freqs); // refill buffers pointer = 0;
if (pointerMax != 0) { } else {
pointer = 0; termDocs.close(); // close stream
} else { doc = Integer.MAX_VALUE; // set to sentinel value
termDocs.close(); // close stream return false;
doc = Integer.MAX_VALUE; // set to sentinel value
return;
}
} }
d = docs[pointer];
} }
doc = d; // flush cache doc = docs[pointer];
return true;
}
public float score() throws IOException {
int f = freqs[pointer];
float raw = // compute tf(f)*weight
f < SCORE_CACHE_SIZE // check cache
? scoreCache[f] // cache hit
: getSimilarity().tf(f)*weightValue; // cache miss
return raw * Similarity.decodeNorm(norms[doc]); // normalize for field
}
public boolean skipTo(int target) throws IOException {
// first scan in cache
for (pointer++; pointer < pointerMax; pointer++) {
if (!(target > docs[pointer])) {
doc = docs[pointer];
return true;
}
}
// not found in cache, seek underlying stream
boolean result = termDocs.skipTo(target);
if (result) {
pointerMax = 1;
pointer = 0;
docs[pointer] = doc = termDocs.doc();
freqs[pointer] = termDocs.freq();
} else {
doc = Integer.MAX_VALUE;
}
return result;
} }
public Explanation explain(int doc) throws IOException { public Explanation explain(int doc) throws IOException {

View File

@ -226,98 +226,3 @@ public final class RAMDirectory extends Directory {
public final void close() { public final void close() {
} }
} }
final class RAMInputStream extends InputStream implements Cloneable {
RAMFile file;
int pointer = 0;
public RAMInputStream(RAMFile f) {
file = f;
length = file.length;
}
/** InputStream methods */
public final void readInternal(byte[] dest, int destOffset, int len) {
int remainder = len;
int start = pointer;
while (remainder != 0) {
int bufferNumber = start/InputStream.BUFFER_SIZE;
int bufferOffset = start%InputStream.BUFFER_SIZE;
int bytesInBuffer = InputStream.BUFFER_SIZE - bufferOffset;
int bytesToCopy = bytesInBuffer >= remainder ? remainder : bytesInBuffer;
byte[] buffer = (byte[])file.buffers.elementAt(bufferNumber);
System.arraycopy(buffer, bufferOffset, dest, destOffset, bytesToCopy);
destOffset += bytesToCopy;
start += bytesToCopy;
remainder -= bytesToCopy;
}
pointer += len;
}
public final void close() {
}
/** Random-access methods */
public final void seekInternal(long pos) {
pointer = (int)pos;
}
}
final class RAMOutputStream extends OutputStream {
RAMFile file;
int pointer = 0;
public RAMOutputStream(RAMFile f) {
file = f;
}
/** output methods: */
public final void flushBuffer(byte[] src, int len) {
int bufferNumber = pointer/OutputStream.BUFFER_SIZE;
int bufferOffset = pointer%OutputStream.BUFFER_SIZE;
int bytesInBuffer = OutputStream.BUFFER_SIZE - bufferOffset;
int bytesToCopy = bytesInBuffer >= len ? len : bytesInBuffer;
if (bufferNumber == file.buffers.size())
file.buffers.addElement(new byte[OutputStream.BUFFER_SIZE]);
byte[] buffer = (byte[])file.buffers.elementAt(bufferNumber);
System.arraycopy(src, 0, buffer, bufferOffset, bytesToCopy);
if (bytesToCopy < len) { // not all in one buffer
int srcOffset = bytesToCopy;
bytesToCopy = len - bytesToCopy; // remaining bytes
bufferNumber++;
if (bufferNumber == file.buffers.size())
file.buffers.addElement(new byte[OutputStream.BUFFER_SIZE]);
buffer = (byte[])file.buffers.elementAt(bufferNumber);
System.arraycopy(src, srcOffset, buffer, 0, bytesToCopy);
}
pointer += len;
if (pointer > file.length)
file.length = pointer;
file.lastModified = System.currentTimeMillis();
}
public final void close() throws IOException {
super.close();
}
/** Random-access methods */
public final void seek(long pos) throws IOException {
super.seek(pos);
pointer = (int)pos;
}
public final long length() throws IOException {
return file.length;
}
}
final class RAMFile {
Vector buffers = new Vector();
long length;
long lastModified = System.currentTimeMillis();
}

View File

@ -0,0 +1,63 @@
package org.apache.lucene.store;
/* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2001, 2004 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache Lucene" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* "Apache Lucene", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
import java.util.Vector;
class RAMFile {
Vector buffers = new Vector();
long length;
long lastModified = System.currentTimeMillis();
}

View File

@ -0,0 +1,95 @@
package org.apache.lucene.store;
/* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2001, 2004 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache Lucene" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* "Apache Lucene", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
/**
* A memory-resident {@link InputStream} implementation.
*
* @version $Id$
*/
class RAMInputStream extends InputStream implements Cloneable {
private RAMFile file;
private int pointer = 0;
public RAMInputStream(RAMFile f) {
file = f;
length = file.length;
}
public void readInternal(byte[] dest, int destOffset, int len) {
int remainder = len;
int start = pointer;
while (remainder != 0) {
int bufferNumber = start/BUFFER_SIZE;
int bufferOffset = start%BUFFER_SIZE;
int bytesInBuffer = BUFFER_SIZE - bufferOffset;
int bytesToCopy = bytesInBuffer >= remainder ? remainder : bytesInBuffer;
byte[] buffer = (byte[])file.buffers.elementAt(bufferNumber);
System.arraycopy(buffer, bufferOffset, dest, destOffset, bytesToCopy);
destOffset += bytesToCopy;
start += bytesToCopy;
remainder -= bytesToCopy;
}
pointer += len;
}
public void close() {
}
public void seekInternal(long pos) {
pointer = (int)pos;
}
}

View File

@ -0,0 +1,145 @@
package org.apache.lucene.store;
/* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2001, 2004 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache Lucene" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* "Apache Lucene", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
import java.io.IOException;
/**
* A memory-resident {@link OutputStream} implementation.
*
* @version $Id$
*/
public class RAMOutputStream extends OutputStream {
private RAMFile file;
private int pointer = 0;
/** Construct an empty output buffer. */
public RAMOutputStream() {
this(new RAMFile());
}
RAMOutputStream(RAMFile f) {
file = f;
}
/** Copy the current contents of this buffer to the named output. */
public void writeTo(OutputStream out) throws IOException {
flush();
final long end = file.length;
long pos = 0;
int buffer = 0;
while (pos < end) {
int length = BUFFER_SIZE;
long nextPos = pos + length;
if (nextPos > end) { // at the last buffer
length = (int)(end - pos);
}
out.writeBytes((byte[])file.buffers.elementAt(buffer++), length);
pos = nextPos;
}
}
/** Resets this to an empty buffer. */
public void reset() {
try {
seek(0);
} catch (IOException e) { // should never happen
throw new RuntimeException(e.toString());
}
file.length = 0;
}
public void flushBuffer(byte[] src, int len) {
int bufferNumber = pointer/BUFFER_SIZE;
int bufferOffset = pointer%BUFFER_SIZE;
int bytesInBuffer = BUFFER_SIZE - bufferOffset;
int bytesToCopy = bytesInBuffer >= len ? len : bytesInBuffer;
if (bufferNumber == file.buffers.size())
file.buffers.addElement(new byte[BUFFER_SIZE]);
byte[] buffer = (byte[])file.buffers.elementAt(bufferNumber);
System.arraycopy(src, 0, buffer, bufferOffset, bytesToCopy);
if (bytesToCopy < len) { // not all in one buffer
int srcOffset = bytesToCopy;
bytesToCopy = len - bytesToCopy; // remaining bytes
bufferNumber++;
if (bufferNumber == file.buffers.size())
file.buffers.addElement(new byte[BUFFER_SIZE]);
buffer = (byte[])file.buffers.elementAt(bufferNumber);
System.arraycopy(src, srcOffset, buffer, 0, bytesToCopy);
}
pointer += len;
if (pointer > file.length)
file.length = pointer;
file.lastModified = System.currentTimeMillis();
}
public void close() throws IOException {
super.close();
}
public void seek(long pos) throws IOException {
super.seek(pos);
pointer = (int)pos;
}
public long length() {
return file.length;
}
}

View File

@ -54,6 +54,7 @@ package org.apache.lucene;
* <http://www.apache.org/>. * <http://www.apache.org/>.
*/ */
import org.apache.lucene.util.*;
import org.apache.lucene.store.*; import org.apache.lucene.store.*;
import org.apache.lucene.document.*; import org.apache.lucene.document.*;
import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.*;
@ -93,7 +94,7 @@ class ThreadSafetyTest {
Document d = new Document(); Document d = new Document();
int n = RANDOM.nextInt(); int n = RANDOM.nextInt();
d.add(Field.Keyword("id", Integer.toString(n))); d.add(Field.Keyword("id", Integer.toString(n)));
d.add(Field.UnStored("contents", intToEnglish(n))); d.add(Field.UnStored("contents", English.intToEnglish(n)));
System.out.println("Adding " + n); System.out.println("Adding " + n);
// Switch between single and multiple file segments // Switch between single and multiple file segments
@ -151,7 +152,7 @@ class ThreadSafetyTest {
throws Exception { throws Exception {
System.out.println("Searching for " + n); System.out.println("Searching for " + n);
Hits hits = Hits hits =
searcher.search(QueryParser.parse(intToEnglish(n), "contents", searcher.search(QueryParser.parse(English.intToEnglish(n), "contents",
ANALYZER)); ANALYZER));
System.out.println("Search for " + n + ": total=" + hits.length()); System.out.println("Search for " + n + ": total=" + hits.length());
for (int j = 0; j < Math.min(3, hits.length()); j++) { for (int j = 0; j < Math.min(3, hits.length()); j++) {
@ -197,76 +198,4 @@ class ThreadSafetyTest {
SearcherThread searcherThread3 = new SearcherThread(true); SearcherThread searcherThread3 = new SearcherThread(true);
searcherThread3.start(); searcherThread3.start();
} }
private static String intToEnglish(int i) {
StringBuffer result = new StringBuffer();
intToEnglish(i, result);
return result.toString();
}
private static void intToEnglish(int i, StringBuffer result) {
if (i < 0) {
result.append("minus ");
i = -i;
}
if (i >= 1000000000) { // billions
intToEnglish(i/1000000000, result);
result.append("billion, ");
i = i%1000000000;
}
if (i >= 1000000) { // millions
intToEnglish(i/1000000, result);
result.append("million, ");
i = i%1000000;
}
if (i >= 1000) { // thousands
intToEnglish(i/1000, result);
result.append("thousand, ");
i = i%1000;
}
if (i >= 100) { // hundreds
intToEnglish(i/100, result);
result.append("hundred ");
i = i%100;
}
if (i >= 20) {
switch (i/10) {
case 9 : result.append("ninety"); break;
case 8 : result.append("eighty"); break;
case 7 : result.append("seventy"); break;
case 6 : result.append("sixty"); break;
case 5 : result.append("fifty"); break;
case 4 : result.append("forty"); break;
case 3 : result.append("thirty"); break;
case 2 : result.append("twenty"); break;
}
i = i%10;
if (i == 0)
result.append(" ");
else
result.append("-");
}
switch (i) {
case 19 : result.append("nineteen "); break;
case 18 : result.append("eighteen "); break;
case 17 : result.append("seventeen "); break;
case 16 : result.append("sixteen "); break;
case 15 : result.append("fifteen "); break;
case 14 : result.append("fourteen "); break;
case 13 : result.append("thirteen "); break;
case 12 : result.append("twelve "); break;
case 11 : result.append("eleven "); break;
case 10 : result.append("ten "); break;
case 9 : result.append("nine "); break;
case 8 : result.append("eight "); break;
case 7 : result.append("seven "); break;
case 6 : result.append("six "); break;
case 5 : result.append("five "); break;
case 4 : result.append("four "); break;
case 3 : result.append("three "); break;
case 2 : result.append("two "); break;
case 1 : result.append("one "); break;
case 0 : result.append(""); break;
}
}
} }

View File

@ -0,0 +1,135 @@
package org.apache.lucene.search;
/* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2001, 2004 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache Lucene" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* "Apache Lucene", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
import junit.framework.TestCase;
import org.apache.lucene.util.English;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.RAMDirectory;
/**
* Tests basic search capabilities.
*
* @author Doug Cutting
*/
public class TestBasics extends TestCase {
private IndexSearcher searcher;
public void setUp() throws Exception {
RAMDirectory directory = new RAMDirectory();
IndexWriter writer
= new IndexWriter(directory, new SimpleAnalyzer(), true);
//writer.infoStream = System.out;
StringBuffer buffer = new StringBuffer();
for (int i = 0; i < 1000; i++) {
Document doc = new Document();
doc.add(Field.Text("field", English.intToEnglish(i)));
writer.addDocument(doc);
}
writer.close();
searcher = new IndexSearcher(directory);
}
public void testTerm() throws Exception {
Query query = new TermQuery(new Term("field", "seventy"));
Hits hits = searcher.search(query);
assertEquals(100, hits.length());
}
public void testTerm2() throws Exception {
Query query = new TermQuery(new Term("field", "seventish"));
Hits hits = searcher.search(query);
assertEquals(0, hits.length());
}
public void testPhrase() throws Exception {
PhraseQuery query = new PhraseQuery();
query.add(new Term("field", "seventy"));
query.add(new Term("field", "seven"));
Hits hits = searcher.search(query);
assertEquals(10, hits.length());
}
public void testPhrase2() throws Exception {
PhraseQuery query = new PhraseQuery();
query.add(new Term("field", "seventish"));
query.add(new Term("field", "sevenon"));
Hits hits = searcher.search(query);
assertEquals(0, hits.length());
}
public void testBoolean() throws Exception {
BooleanQuery query = new BooleanQuery();
query.add(new TermQuery(new Term("field", "seventy")), true, false);
query.add(new TermQuery(new Term("field", "seven")), true, false);
Hits hits = searcher.search(query);
assertEquals(19, hits.length());
}
public void testBoolean2() throws Exception {
BooleanQuery query = new BooleanQuery();
query.add(new TermQuery(new Term("field", "sevento")), true, false);
query.add(new TermQuery(new Term("field", "sevenly")), true, false);
Hits hits = searcher.search(query);
assertEquals(0, hits.length());
}
}

View File

@ -0,0 +1,140 @@
package org.apache.lucene.util;
/* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2001, 2004 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache Lucene" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* "Apache Lucene", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
public class English {
public static String intToEnglish(int i) {
StringBuffer result = new StringBuffer();
intToEnglish(i, result);
return result.toString();
}
public static void intToEnglish(int i, StringBuffer result) {
if (i == 0) {
result.append("zero");
return;
}
if (i < 0) {
result.append("minus ");
i = -i;
}
if (i >= 1000000000) { // billions
intToEnglish(i/1000000000, result);
result.append("billion, ");
i = i%1000000000;
}
if (i >= 1000000) { // millions
intToEnglish(i/1000000, result);
result.append("million, ");
i = i%1000000;
}
if (i >= 1000) { // thousands
intToEnglish(i/1000, result);
result.append("thousand, ");
i = i%1000;
}
if (i >= 100) { // hundreds
intToEnglish(i/100, result);
result.append("hundred ");
i = i%100;
}
if (i >= 20) {
switch (i/10) {
case 9 : result.append("ninety"); break;
case 8 : result.append("eighty"); break;
case 7 : result.append("seventy"); break;
case 6 : result.append("sixty"); break;
case 5 : result.append("fifty"); break;
case 4 : result.append("forty"); break;
case 3 : result.append("thirty"); break;
case 2 : result.append("twenty"); break;
}
i = i%10;
if (i == 0)
result.append(" ");
else
result.append("-");
}
switch (i) {
case 19 : result.append("nineteen "); break;
case 18 : result.append("eighteen "); break;
case 17 : result.append("seventeen "); break;
case 16 : result.append("sixteen "); break;
case 15 : result.append("fifteen "); break;
case 14 : result.append("fourteen "); break;
case 13 : result.append("thirteen "); break;
case 12 : result.append("twelve "); break;
case 11 : result.append("eleven "); break;
case 10 : result.append("ten "); break;
case 9 : result.append("nine "); break;
case 8 : result.append("eight "); break;
case 7 : result.append("seven "); break;
case 6 : result.append("six "); break;
case 5 : result.append("five "); break;
case 4 : result.append("four "); break;
case 3 : result.append("three "); break;
case 2 : result.append("two "); break;
case 1 : result.append("one "); break;
case 0 : result.append(""); break;
}
}
public static void main(String[] args) {
System.out.println(intToEnglish(Integer.parseInt(args[0])));
}
}