mirror of https://github.com/apache/lucene.git
Optimized TermDocs.skipTo() and changed scorers to take advantage of it.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150170 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
07829a37a7
commit
6f8347c6fe
30
CHANGES.txt
30
CHANGES.txt
|
@ -2,6 +2,36 @@ Lucene Change Log
|
||||||
|
|
||||||
$Id$
|
$Id$
|
||||||
|
|
||||||
|
1.4 RC1
|
||||||
|
|
||||||
|
1. Changed the format of the .tis file, so that:
|
||||||
|
|
||||||
|
- it has a format version number, which makes it easier to
|
||||||
|
back-compatibly change file formats in the future.
|
||||||
|
|
||||||
|
- the term count is now stored as a long. This was the one aspect
|
||||||
|
of the Lucene's file formats which limited index size.
|
||||||
|
|
||||||
|
- a few internal index parameters are now stored in the index, so
|
||||||
|
that they can (in theory) now be changed from index to index,
|
||||||
|
although there is not yet an API to do so.
|
||||||
|
|
||||||
|
These changes are back compatible. The new code can read old
|
||||||
|
indexes. But old code will not be able read new indexes. (cutting)
|
||||||
|
|
||||||
|
2. Added an optimized implementation of TermDocs.skipTo(). A skip
|
||||||
|
table is now stored for each term in the .frq file. This only
|
||||||
|
adds a percent or two to overall index size, but can substantially
|
||||||
|
speedup many searches. (cutting)
|
||||||
|
|
||||||
|
3. Restructured the Scorer API and all Scorer implementations to take
|
||||||
|
advantage of an optimized TermDocs.skipTo() implementation. In
|
||||||
|
particular, PhraseQuerys and conjunctive BooleanQuerys are
|
||||||
|
faster when one clause has substantially fewer matches than the
|
||||||
|
others. (A conjunctive BooleanQuery is a BooleanQuery where all
|
||||||
|
clauses are required.) (cutting)
|
||||||
|
|
||||||
|
|
||||||
1.3 final
|
1.3 final
|
||||||
|
|
||||||
1. Added catch of BooleanQuery$TooManyClauses in QueryParser to
|
1. Added catch of BooleanQuery$TooManyClauses in QueryParser to
|
||||||
|
|
|
@ -291,7 +291,7 @@ final class DocumentWriter {
|
||||||
Posting posting = postings[i];
|
Posting posting = postings[i];
|
||||||
|
|
||||||
// add an entry to the dictionary with pointers to prox and freq files
|
// add an entry to the dictionary with pointers to prox and freq files
|
||||||
ti.set(1, freq.getFilePointer(), prox.getFilePointer());
|
ti.set(1, freq.getFilePointer(), prox.getFilePointer(), -1);
|
||||||
tis.add(posting.term, ti);
|
tis.add(posting.term, ti);
|
||||||
|
|
||||||
// add an entry to the freq file
|
// add an entry to the freq file
|
||||||
|
|
|
@ -62,6 +62,7 @@ import java.io.IOException;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.OutputStream;
|
import org.apache.lucene.store.OutputStream;
|
||||||
import org.apache.lucene.store.InputStream;
|
import org.apache.lucene.store.InputStream;
|
||||||
|
import org.apache.lucene.store.RAMOutputStream;
|
||||||
import org.apache.lucene.util.BitVector;
|
import org.apache.lucene.util.BitVector;
|
||||||
|
|
||||||
final class SegmentMerger {
|
final class SegmentMerger {
|
||||||
|
@ -246,17 +247,21 @@ final class SegmentMerger {
|
||||||
|
|
||||||
int df = appendPostings(smis, n); // append posting data
|
int df = appendPostings(smis, n); // append posting data
|
||||||
|
|
||||||
|
long skipPointer = writeSkip();
|
||||||
|
|
||||||
if (df > 0) {
|
if (df > 0) {
|
||||||
// add an entry to the dictionary with pointers to prox and freq files
|
// add an entry to the dictionary with pointers to prox and freq files
|
||||||
termInfo.set(df, freqPointer, proxPointer);
|
termInfo.set(df, freqPointer, proxPointer, (int)(skipPointer-freqPointer));
|
||||||
termInfosWriter.add(smis[0].term, termInfo);
|
termInfosWriter.add(smis[0].term, termInfo);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private final int appendPostings(SegmentMergeInfo[] smis, int n)
|
private final int appendPostings(SegmentMergeInfo[] smis, int n)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
final int skipInterval = termInfosWriter.skipInterval;
|
||||||
int lastDoc = 0;
|
int lastDoc = 0;
|
||||||
int df = 0; // number of docs w/ term
|
int df = 0; // number of docs w/ term
|
||||||
|
resetSkip();
|
||||||
for (int i = 0; i < n; i++) {
|
for (int i = 0; i < n; i++) {
|
||||||
SegmentMergeInfo smi = smis[i];
|
SegmentMergeInfo smi = smis[i];
|
||||||
TermPositions postings = smi.postings;
|
TermPositions postings = smi.postings;
|
||||||
|
@ -272,6 +277,12 @@ final class SegmentMerger {
|
||||||
if (doc < lastDoc)
|
if (doc < lastDoc)
|
||||||
throw new IllegalStateException("docs out of order");
|
throw new IllegalStateException("docs out of order");
|
||||||
|
|
||||||
|
df++;
|
||||||
|
|
||||||
|
if ((df % skipInterval) == 0) {
|
||||||
|
bufferSkip(lastDoc);
|
||||||
|
}
|
||||||
|
|
||||||
int docCode = (doc - lastDoc) << 1; // use low bit to flag freq=1
|
int docCode = (doc - lastDoc) << 1; // use low bit to flag freq=1
|
||||||
lastDoc = doc;
|
lastDoc = doc;
|
||||||
|
|
||||||
|
@ -289,13 +300,43 @@ final class SegmentMerger {
|
||||||
proxOutput.writeVInt(position - lastPosition);
|
proxOutput.writeVInt(position - lastPosition);
|
||||||
lastPosition = position;
|
lastPosition = position;
|
||||||
}
|
}
|
||||||
|
|
||||||
df++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return df;
|
return df;
|
||||||
}
|
}
|
||||||
private final void mergeNorms() throws IOException {
|
|
||||||
|
private RAMOutputStream skipBuffer = new RAMOutputStream();
|
||||||
|
private int lastSkipDoc;
|
||||||
|
private long lastSkipFreqPointer;
|
||||||
|
private long lastSkipProxPointer;
|
||||||
|
|
||||||
|
private void resetSkip() throws IOException {
|
||||||
|
skipBuffer.reset();
|
||||||
|
lastSkipDoc = 0;
|
||||||
|
lastSkipFreqPointer = freqOutput.getFilePointer();
|
||||||
|
lastSkipProxPointer = proxOutput.getFilePointer();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void bufferSkip(int doc) throws IOException {
|
||||||
|
long freqPointer = freqOutput.getFilePointer();
|
||||||
|
long proxPointer = proxOutput.getFilePointer();
|
||||||
|
|
||||||
|
skipBuffer.writeVInt(doc - lastSkipDoc);
|
||||||
|
skipBuffer.writeVInt((int)(freqPointer - lastSkipFreqPointer));
|
||||||
|
skipBuffer.writeVInt((int)(proxPointer - lastSkipProxPointer));
|
||||||
|
|
||||||
|
lastSkipDoc = doc;
|
||||||
|
lastSkipFreqPointer = freqPointer;
|
||||||
|
lastSkipProxPointer = proxPointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
private long writeSkip() throws IOException {
|
||||||
|
long skipPointer = freqOutput.getFilePointer();
|
||||||
|
skipBuffer.writeTo(freqOutput);
|
||||||
|
return skipPointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void mergeNorms() throws IOException {
|
||||||
for (int i = 0; i < fieldInfos.size(); i++) {
|
for (int i = 0; i < fieldInfos.size(); i++) {
|
||||||
FieldInfo fi = fieldInfos.fieldInfo(i);
|
FieldInfo fi = fieldInfos.fieldInfo(i);
|
||||||
if (fi.isIndexed) {
|
if (fi.isIndexed) {
|
||||||
|
|
|
@ -61,16 +61,27 @@ import org.apache.lucene.store.InputStream;
|
||||||
class SegmentTermDocs implements TermDocs {
|
class SegmentTermDocs implements TermDocs {
|
||||||
protected SegmentReader parent;
|
protected SegmentReader parent;
|
||||||
private InputStream freqStream;
|
private InputStream freqStream;
|
||||||
private int freqCount;
|
private int count;
|
||||||
|
private int df;
|
||||||
private BitVector deletedDocs;
|
private BitVector deletedDocs;
|
||||||
int doc = 0;
|
int doc = 0;
|
||||||
int freq;
|
int freq;
|
||||||
|
|
||||||
|
private int skipInterval;
|
||||||
|
private int skipCount;
|
||||||
|
private InputStream skipStream;
|
||||||
|
private int skipDoc;
|
||||||
|
private long freqPointer;
|
||||||
|
private long proxPointer;
|
||||||
|
private long skipPointer;
|
||||||
|
private boolean haveSkipped;
|
||||||
|
|
||||||
SegmentTermDocs(SegmentReader parent)
|
SegmentTermDocs(SegmentReader parent)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
this.parent = parent;
|
this.parent = parent;
|
||||||
this.freqStream = (InputStream)parent.freqStream.clone();
|
this.freqStream = (InputStream)parent.freqStream.clone();
|
||||||
this.deletedDocs = parent.deletedDocs;
|
this.deletedDocs = parent.deletedDocs;
|
||||||
|
this.skipInterval = parent.tis.getSkipInterval();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void seek(Term term) throws IOException {
|
public void seek(Term term) throws IOException {
|
||||||
|
@ -88,12 +99,19 @@ class SegmentTermDocs implements TermDocs {
|
||||||
}
|
}
|
||||||
|
|
||||||
void seek(TermInfo ti) throws IOException {
|
void seek(TermInfo ti) throws IOException {
|
||||||
|
count = 0;
|
||||||
if (ti == null) {
|
if (ti == null) {
|
||||||
freqCount = 0;
|
df = 0;
|
||||||
} else {
|
} else {
|
||||||
freqCount = ti.docFreq;
|
df = ti.docFreq;
|
||||||
doc = 0;
|
doc = 0;
|
||||||
freqStream.seek(ti.freqPointer);
|
skipDoc = 0;
|
||||||
|
skipCount = 0;
|
||||||
|
freqPointer = ti.freqPointer;
|
||||||
|
proxPointer = ti.proxPointer;
|
||||||
|
skipPointer = freqPointer + ti.skipOffset;
|
||||||
|
freqStream.seek(freqPointer);
|
||||||
|
haveSkipped = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -109,7 +127,7 @@ class SegmentTermDocs implements TermDocs {
|
||||||
|
|
||||||
public boolean next() throws IOException {
|
public boolean next() throws IOException {
|
||||||
while (true) {
|
while (true) {
|
||||||
if (freqCount == 0)
|
if (count == df)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
int docCode = freqStream.readVInt();
|
int docCode = freqStream.readVInt();
|
||||||
|
@ -119,7 +137,7 @@ class SegmentTermDocs implements TermDocs {
|
||||||
else
|
else
|
||||||
freq = freqStream.readVInt(); // else read freq
|
freq = freqStream.readVInt(); // else read freq
|
||||||
|
|
||||||
freqCount--;
|
count++;
|
||||||
|
|
||||||
if (deletedDocs == null || !deletedDocs.get(doc))
|
if (deletedDocs == null || !deletedDocs.get(doc))
|
||||||
break;
|
break;
|
||||||
|
@ -131,9 +149,9 @@ class SegmentTermDocs implements TermDocs {
|
||||||
/** Optimized implementation. */
|
/** Optimized implementation. */
|
||||||
public int read(final int[] docs, final int[] freqs)
|
public int read(final int[] docs, final int[] freqs)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
final int end = docs.length;
|
final int length = docs.length;
|
||||||
int i = 0;
|
int i = 0;
|
||||||
while (i < end && freqCount > 0) {
|
while (i < length && count < df) {
|
||||||
|
|
||||||
// manually inlined call to next() for speed
|
// manually inlined call to next() for speed
|
||||||
final int docCode = freqStream.readVInt();
|
final int docCode = freqStream.readVInt();
|
||||||
|
@ -142,7 +160,7 @@ class SegmentTermDocs implements TermDocs {
|
||||||
freq = 1; // freq is one
|
freq = 1; // freq is one
|
||||||
else
|
else
|
||||||
freq = freqStream.readVInt(); // else read freq
|
freq = freqStream.readVInt(); // else read freq
|
||||||
freqCount--;
|
count++;
|
||||||
|
|
||||||
if (deletedDocs == null || !deletedDocs.get(doc)) {
|
if (deletedDocs == null || !deletedDocs.get(doc)) {
|
||||||
docs[i] = doc;
|
docs[i] = doc;
|
||||||
|
@ -153,12 +171,61 @@ class SegmentTermDocs implements TermDocs {
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** As yet unoptimized implementation. */
|
/** Overridden by SegmentTermPositions to skip in prox stream. */
|
||||||
|
protected void skipProx(long proxPointer) throws IOException {}
|
||||||
|
|
||||||
|
/** Optimized implementation. */
|
||||||
public boolean skipTo(int target) throws IOException {
|
public boolean skipTo(int target) throws IOException {
|
||||||
|
if (df > skipInterval) { // optimized case
|
||||||
|
|
||||||
|
if (skipStream == null)
|
||||||
|
skipStream = (InputStream)freqStream.clone(); // lazily clone
|
||||||
|
|
||||||
|
if (!haveSkipped) { // lazily seek skip stream
|
||||||
|
skipStream.seek(skipPointer);
|
||||||
|
haveSkipped = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// scan skip data
|
||||||
|
int lastSkipDoc = skipDoc;
|
||||||
|
long lastFreqPointer = freqStream.getFilePointer();
|
||||||
|
long lastProxPointer = -1;
|
||||||
|
int numSkipped = -1 -(count % skipInterval);
|
||||||
|
|
||||||
|
while (target > skipDoc) {
|
||||||
|
lastSkipDoc = skipDoc;
|
||||||
|
lastFreqPointer = freqPointer;
|
||||||
|
lastProxPointer = proxPointer;
|
||||||
|
if (skipDoc >= doc)
|
||||||
|
numSkipped += skipInterval;
|
||||||
|
|
||||||
|
if ((count + numSkipped + skipInterval) > df)
|
||||||
|
break; // no more skips
|
||||||
|
|
||||||
|
skipDoc += skipStream.readVInt();
|
||||||
|
freqPointer += skipStream.readVInt();
|
||||||
|
proxPointer += skipStream.readVInt();
|
||||||
|
|
||||||
|
skipCount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// if we found something to skip, then skip it
|
||||||
|
if (lastFreqPointer > freqStream.getFilePointer()) {
|
||||||
|
freqStream.seek(lastFreqPointer);
|
||||||
|
skipProx(lastProxPointer);
|
||||||
|
|
||||||
|
doc = lastSkipDoc;
|
||||||
|
count += numSkipped;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// done skipping, now just scan
|
||||||
do {
|
do {
|
||||||
if (!next())
|
if (!next())
|
||||||
return false;
|
return false;
|
||||||
} while (target > doc);
|
} while (target > doc);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -60,14 +60,17 @@ import org.apache.lucene.store.InputStream;
|
||||||
final class SegmentTermEnum extends TermEnum implements Cloneable {
|
final class SegmentTermEnum extends TermEnum implements Cloneable {
|
||||||
private InputStream input;
|
private InputStream input;
|
||||||
private FieldInfos fieldInfos;
|
private FieldInfos fieldInfos;
|
||||||
int size;
|
long size;
|
||||||
int position = -1;
|
long position = -1;
|
||||||
|
|
||||||
private Term term = new Term("", "");
|
private Term term = new Term("", "");
|
||||||
private TermInfo termInfo = new TermInfo();
|
private TermInfo termInfo = new TermInfo();
|
||||||
|
|
||||||
boolean isIndex = false;
|
private int format;
|
||||||
|
private boolean isIndex = false;
|
||||||
long indexPointer = 0;
|
long indexPointer = 0;
|
||||||
|
int indexInterval;
|
||||||
|
int skipInterval;
|
||||||
Term prev;
|
Term prev;
|
||||||
|
|
||||||
private char[] buffer = {};
|
private char[] buffer = {};
|
||||||
|
@ -76,8 +79,34 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
|
||||||
throws IOException {
|
throws IOException {
|
||||||
input = i;
|
input = i;
|
||||||
fieldInfos = fis;
|
fieldInfos = fis;
|
||||||
size = input.readInt();
|
|
||||||
isIndex = isi;
|
isIndex = isi;
|
||||||
|
|
||||||
|
int firstInt = input.readInt();
|
||||||
|
if (firstInt >= 0) {
|
||||||
|
// original-format file, without explicit format version number
|
||||||
|
format = 0;
|
||||||
|
size = firstInt;
|
||||||
|
|
||||||
|
// back-compatible settings
|
||||||
|
indexInterval = 128;
|
||||||
|
skipInterval = Integer.MAX_VALUE;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// we have a format version number
|
||||||
|
format = firstInt;
|
||||||
|
|
||||||
|
// check that it is a format we can understand
|
||||||
|
if (format < TermInfosWriter.FORMAT)
|
||||||
|
throw new IOException("Unknown format version:" + format);
|
||||||
|
|
||||||
|
size = input.readLong(); // read the size
|
||||||
|
|
||||||
|
if (!isIndex) {
|
||||||
|
indexInterval = input.readInt();
|
||||||
|
skipInterval = input.readInt();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Object clone() {
|
protected Object clone() {
|
||||||
|
@ -117,6 +146,12 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
|
||||||
termInfo.freqPointer += input.readVLong(); // read freq pointer
|
termInfo.freqPointer += input.readVLong(); // read freq pointer
|
||||||
termInfo.proxPointer += input.readVLong(); // read prox pointer
|
termInfo.proxPointer += input.readVLong(); // read prox pointer
|
||||||
|
|
||||||
|
if (!isIndex) {
|
||||||
|
if (termInfo.docFreq > skipInterval) {
|
||||||
|
termInfo.skipOffset = input.readVInt();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (isIndex)
|
if (isIndex)
|
||||||
indexPointer += input.readVLong(); // read index pointer
|
indexPointer += input.readVLong(); // read index pointer
|
||||||
|
|
||||||
|
|
|
@ -109,4 +109,11 @@ extends SegmentTermDocs implements TermPositions {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Called by super.skipTo(). */
|
||||||
|
protected void skipProx(long proxPointer) throws IOException {
|
||||||
|
proxStream.seek(proxPointer);
|
||||||
|
proxCount = 0;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -62,6 +62,7 @@ final class TermInfo {
|
||||||
|
|
||||||
long freqPointer = 0;
|
long freqPointer = 0;
|
||||||
long proxPointer = 0;
|
long proxPointer = 0;
|
||||||
|
int skipOffset;
|
||||||
|
|
||||||
TermInfo() {}
|
TermInfo() {}
|
||||||
|
|
||||||
|
@ -75,17 +76,21 @@ final class TermInfo {
|
||||||
docFreq = ti.docFreq;
|
docFreq = ti.docFreq;
|
||||||
freqPointer = ti.freqPointer;
|
freqPointer = ti.freqPointer;
|
||||||
proxPointer = ti.proxPointer;
|
proxPointer = ti.proxPointer;
|
||||||
|
skipOffset = ti.skipOffset;
|
||||||
}
|
}
|
||||||
|
|
||||||
final void set(int df, long fp, long pp) {
|
final void set(int docFreq,
|
||||||
docFreq = df;
|
long freqPointer, long proxPointer, int skipOffset) {
|
||||||
freqPointer = fp;
|
this.docFreq = docFreq;
|
||||||
proxPointer = pp;
|
this.freqPointer = freqPointer;
|
||||||
|
this.proxPointer = proxPointer;
|
||||||
|
this.skipOffset = skipOffset;
|
||||||
}
|
}
|
||||||
|
|
||||||
final void set(TermInfo ti) {
|
final void set(TermInfo ti) {
|
||||||
docFreq = ti.docFreq;
|
docFreq = ti.docFreq;
|
||||||
freqPointer = ti.freqPointer;
|
freqPointer = ti.freqPointer;
|
||||||
proxPointer = ti.proxPointer;
|
proxPointer = ti.proxPointer;
|
||||||
|
skipOffset = ti.skipOffset;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -68,7 +68,7 @@ final class TermInfosReader {
|
||||||
private FieldInfos fieldInfos;
|
private FieldInfos fieldInfos;
|
||||||
|
|
||||||
private SegmentTermEnum enumerator;
|
private SegmentTermEnum enumerator;
|
||||||
private int size;
|
private long size;
|
||||||
|
|
||||||
TermInfosReader(Directory dir, String seg, FieldInfos fis)
|
TermInfosReader(Directory dir, String seg, FieldInfos fis)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
@ -82,13 +82,17 @@ final class TermInfosReader {
|
||||||
readIndex();
|
readIndex();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int getSkipInterval() {
|
||||||
|
return enumerator.skipInterval;
|
||||||
|
}
|
||||||
|
|
||||||
final void close() throws IOException {
|
final void close() throws IOException {
|
||||||
if (enumerator != null)
|
if (enumerator != null)
|
||||||
enumerator.close();
|
enumerator.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns the number of term/value pairs in the set. */
|
/** Returns the number of term/value pairs in the set. */
|
||||||
final int size() {
|
final long size() {
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -101,7 +105,7 @@ final class TermInfosReader {
|
||||||
new SegmentTermEnum(directory.openFile(segment + ".tii"),
|
new SegmentTermEnum(directory.openFile(segment + ".tii"),
|
||||||
fieldInfos, true);
|
fieldInfos, true);
|
||||||
try {
|
try {
|
||||||
int indexSize = indexEnum.size;
|
int indexSize = (int)indexEnum.size;
|
||||||
|
|
||||||
indexTerms = new Term[indexSize];
|
indexTerms = new Term[indexSize];
|
||||||
indexInfos = new TermInfo[indexSize];
|
indexInfos = new TermInfo[indexSize];
|
||||||
|
@ -137,7 +141,7 @@ final class TermInfosReader {
|
||||||
|
|
||||||
private final void seekEnum(int indexOffset) throws IOException {
|
private final void seekEnum(int indexOffset) throws IOException {
|
||||||
enumerator.seek(indexPointers[indexOffset],
|
enumerator.seek(indexPointers[indexOffset],
|
||||||
(indexOffset * TermInfosWriter.INDEX_INTERVAL) - 1,
|
(indexOffset * enumerator.indexInterval) - 1,
|
||||||
indexTerms[indexOffset], indexInfos[indexOffset]);
|
indexTerms[indexOffset], indexInfos[indexOffset]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -146,10 +150,10 @@ final class TermInfosReader {
|
||||||
if (size == 0) return null;
|
if (size == 0) return null;
|
||||||
|
|
||||||
// optimize sequential access: first try scanning cached enumerator w/o seeking
|
// optimize sequential access: first try scanning cached enumerator w/o seeking
|
||||||
if (enumerator.term() != null // term is at or past current
|
if (enumerator.term() != null // term is at or past current
|
||||||
&& ((enumerator.prev != null && term.compareTo(enumerator.prev) > 0)
|
&& ((enumerator.prev != null && term.compareTo(enumerator.prev) > 0)
|
||||||
|| term.compareTo(enumerator.term()) >= 0)) {
|
|| term.compareTo(enumerator.term()) >= 0)) {
|
||||||
int enumOffset = (enumerator.position/TermInfosWriter.INDEX_INTERVAL)+1;
|
int enumOffset = (int)(enumerator.position/enumerator.indexInterval)+1;
|
||||||
if (indexTerms.length == enumOffset // but before end of block
|
if (indexTerms.length == enumOffset // but before end of block
|
||||||
|| term.compareTo(indexTerms[enumOffset]) < 0)
|
|| term.compareTo(indexTerms[enumOffset]) < 0)
|
||||||
return scanEnum(term); // no need to seek
|
return scanEnum(term); // no need to seek
|
||||||
|
@ -174,10 +178,10 @@ final class TermInfosReader {
|
||||||
if (size == 0) return null;
|
if (size == 0) return null;
|
||||||
|
|
||||||
if (enumerator != null && enumerator.term() != null && position >= enumerator.position &&
|
if (enumerator != null && enumerator.term() != null && position >= enumerator.position &&
|
||||||
position < (enumerator.position + TermInfosWriter.INDEX_INTERVAL))
|
position < (enumerator.position + enumerator.indexInterval))
|
||||||
return scanEnum(position); // can avoid seek
|
return scanEnum(position); // can avoid seek
|
||||||
|
|
||||||
seekEnum(position / TermInfosWriter.INDEX_INTERVAL); // must seek
|
seekEnum(position / enumerator.indexInterval); // must seek
|
||||||
return scanEnum(position);
|
return scanEnum(position);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -190,7 +194,7 @@ final class TermInfosReader {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns the position of a Term in the set or -1. */
|
/** Returns the position of a Term in the set or -1. */
|
||||||
final synchronized int getPosition(Term term) throws IOException {
|
final synchronized long getPosition(Term term) throws IOException {
|
||||||
if (size == 0) return -1;
|
if (size == 0) return -1;
|
||||||
|
|
||||||
int indexOffset = getIndexOffset(term);
|
int indexOffset = getIndexOffset(term);
|
||||||
|
|
|
@ -62,13 +62,36 @@ import org.apache.lucene.store.Directory;
|
||||||
Directory. A TermInfos can be written once, in order. */
|
Directory. A TermInfos can be written once, in order. */
|
||||||
|
|
||||||
final class TermInfosWriter {
|
final class TermInfosWriter {
|
||||||
|
/** The file format version, a negative number. */
|
||||||
|
public static final int FORMAT = -1;
|
||||||
|
|
||||||
private FieldInfos fieldInfos;
|
private FieldInfos fieldInfos;
|
||||||
private OutputStream output;
|
private OutputStream output;
|
||||||
private Term lastTerm = new Term("", "");
|
private Term lastTerm = new Term("", "");
|
||||||
private TermInfo lastTi = new TermInfo();
|
private TermInfo lastTi = new TermInfo();
|
||||||
private int size = 0;
|
private int size = 0;
|
||||||
|
|
||||||
static final int INDEX_INTERVAL = 128;
|
// TODO: the default values for these two parameters should be settable from
|
||||||
|
// IndexWriter. However, once that's done, folks will start setting them to
|
||||||
|
// ridiculous values and complaining that things don't work well, as with
|
||||||
|
// mergeFactor. So, let's wait until a number of folks find that alternate
|
||||||
|
// values work better. Note that both of these values are stored in the
|
||||||
|
// segment, so that it's safe to change these w/o rebuilding all indexes.
|
||||||
|
|
||||||
|
/** Expert: The fraction of terms in the "dictionary" which should be stored
|
||||||
|
* in RAM. Smaller values use more memory, but make searching slightly
|
||||||
|
* faster, while larger values use less memory and make searching slightly
|
||||||
|
* slower. Searching is typically not dominated by dictionary lookup, so
|
||||||
|
* tweaking this is rarely useful.*/
|
||||||
|
int indexInterval = 128;
|
||||||
|
|
||||||
|
/** Expert: The fraction of {@link TermDocs} entries stored in skip tables,
|
||||||
|
* used to accellerate {@link TermDocs#skipTo(int)}. Larger values result in
|
||||||
|
* smaller indexes, greater acceleration, but fewer accelerable cases, while
|
||||||
|
* smaller values result in bigger indexes, less acceleration and more
|
||||||
|
* accelerable cases. More detailed experiments would be useful here. */
|
||||||
|
int skipInterval = 16;
|
||||||
|
|
||||||
private long lastIndexPointer = 0;
|
private long lastIndexPointer = 0;
|
||||||
private boolean isIndex = false;
|
private boolean isIndex = false;
|
||||||
|
|
||||||
|
@ -91,7 +114,12 @@ final class TermInfosWriter {
|
||||||
fieldInfos = fis;
|
fieldInfos = fis;
|
||||||
isIndex = isi;
|
isIndex = isi;
|
||||||
output = directory.createFile(segment + (isIndex ? ".tii" : ".tis"));
|
output = directory.createFile(segment + (isIndex ? ".tii" : ".tis"));
|
||||||
output.writeInt(0); // leave space for size
|
output.writeInt(FORMAT); // write format
|
||||||
|
output.writeLong(0); // leave space for size
|
||||||
|
if (!isIndex) {
|
||||||
|
output.writeInt(indexInterval); // write indexInterval
|
||||||
|
output.writeInt(skipInterval); // write skipInterval
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Adds a new <Term, TermInfo> pair to the set.
|
/** Adds a new <Term, TermInfo> pair to the set.
|
||||||
|
@ -106,7 +134,7 @@ final class TermInfosWriter {
|
||||||
if (ti.proxPointer < lastTi.proxPointer)
|
if (ti.proxPointer < lastTi.proxPointer)
|
||||||
throw new IOException("proxPointer out of order");
|
throw new IOException("proxPointer out of order");
|
||||||
|
|
||||||
if (!isIndex && size % INDEX_INTERVAL == 0)
|
if (!isIndex && size % indexInterval == 0)
|
||||||
other.add(lastTerm, lastTi); // add an index term
|
other.add(lastTerm, lastTi); // add an index term
|
||||||
|
|
||||||
writeTerm(term); // write term
|
writeTerm(term); // write term
|
||||||
|
@ -114,6 +142,12 @@ final class TermInfosWriter {
|
||||||
output.writeVLong(ti.freqPointer - lastTi.freqPointer); // write pointers
|
output.writeVLong(ti.freqPointer - lastTi.freqPointer); // write pointers
|
||||||
output.writeVLong(ti.proxPointer - lastTi.proxPointer);
|
output.writeVLong(ti.proxPointer - lastTi.proxPointer);
|
||||||
|
|
||||||
|
if (!isIndex) {
|
||||||
|
if (ti.docFreq > skipInterval) {
|
||||||
|
output.writeVInt(ti.skipOffset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (isIndex) {
|
if (isIndex) {
|
||||||
output.writeVLong(other.output.getFilePointer() - lastIndexPointer);
|
output.writeVLong(other.output.getFilePointer() - lastIndexPointer);
|
||||||
lastIndexPointer = other.output.getFilePointer(); // write pointer
|
lastIndexPointer = other.output.getFilePointer(); // write pointer
|
||||||
|
@ -149,8 +183,8 @@ final class TermInfosWriter {
|
||||||
|
|
||||||
/** Called to complete TermInfos creation. */
|
/** Called to complete TermInfos creation. */
|
||||||
final void close() throws IOException {
|
final void close() throws IOException {
|
||||||
output.seek(0); // write size at start
|
output.seek(4); // write size after format
|
||||||
output.writeInt(size);
|
output.writeLong(size);
|
||||||
output.close();
|
output.close();
|
||||||
|
|
||||||
if (!isIndex)
|
if (!isIndex)
|
||||||
|
|
|
@ -158,6 +158,37 @@ public class BooleanQuery extends Query {
|
||||||
}
|
}
|
||||||
|
|
||||||
public Scorer scorer(IndexReader reader) throws IOException {
|
public Scorer scorer(IndexReader reader) throws IOException {
|
||||||
|
// First see if the (faster) ConjunctionScorer will work. This can be
|
||||||
|
// used when all clauses are required. Also, at this point a
|
||||||
|
// BooleanScorer cannot be embedded in a ConjunctionScorer, as the hits
|
||||||
|
// from a BooleanScorer are not always sorted by document number (sigh)
|
||||||
|
// and hence BooleanScorer cannot implement skipTo() correctly, which is
|
||||||
|
// required by ConjunctionScorer.
|
||||||
|
boolean allRequired = true;
|
||||||
|
boolean noneBoolean = true;
|
||||||
|
for (int i = 0 ; i < weights.size(); i++) {
|
||||||
|
BooleanClause c = (BooleanClause)clauses.elementAt(i);
|
||||||
|
if (!c.required)
|
||||||
|
allRequired = false;
|
||||||
|
if (c.query instanceof BooleanQuery)
|
||||||
|
noneBoolean = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (allRequired && noneBoolean) { // ConjunctionScorer is okay
|
||||||
|
ConjunctionScorer result =
|
||||||
|
new ConjunctionScorer(searcher.getSimilarity());
|
||||||
|
for (int i = 0 ; i < weights.size(); i++) {
|
||||||
|
BooleanClause c = (BooleanClause)clauses.elementAt(i);
|
||||||
|
Weight w = (Weight)weights.elementAt(i);
|
||||||
|
Scorer subScorer = w.scorer(reader);
|
||||||
|
if (subScorer == null)
|
||||||
|
return null;
|
||||||
|
result.add(subScorer);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use good-old BooleanScorer instead.
|
||||||
BooleanScorer result = new BooleanScorer(searcher.getSimilarity());
|
BooleanScorer result = new BooleanScorer(searcher.getSimilarity());
|
||||||
|
|
||||||
for (int i = 0 ; i < weights.size(); i++) {
|
for (int i = 0 ; i < weights.size(); i++) {
|
||||||
|
|
|
@ -76,14 +76,17 @@ final class BooleanScorer extends Scorer {
|
||||||
|
|
||||||
static final class SubScorer {
|
static final class SubScorer {
|
||||||
public Scorer scorer;
|
public Scorer scorer;
|
||||||
|
public boolean done;
|
||||||
public boolean required = false;
|
public boolean required = false;
|
||||||
public boolean prohibited = false;
|
public boolean prohibited = false;
|
||||||
public HitCollector collector;
|
public HitCollector collector;
|
||||||
public SubScorer next;
|
public SubScorer next;
|
||||||
|
|
||||||
public SubScorer(Scorer scorer, boolean required, boolean prohibited,
|
public SubScorer(Scorer scorer, boolean required, boolean prohibited,
|
||||||
HitCollector collector, SubScorer next) {
|
HitCollector collector, SubScorer next)
|
||||||
|
throws IOException {
|
||||||
this.scorer = scorer;
|
this.scorer = scorer;
|
||||||
|
this.done = !scorer.next();
|
||||||
this.required = required;
|
this.required = required;
|
||||||
this.prohibited = prohibited;
|
this.prohibited = prohibited;
|
||||||
this.collector = collector;
|
this.collector = collector;
|
||||||
|
@ -91,7 +94,8 @@ final class BooleanScorer extends Scorer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
final void add(Scorer scorer, boolean required, boolean prohibited) {
|
final void add(Scorer scorer, boolean required, boolean prohibited)
|
||||||
|
throws IOException {
|
||||||
int mask = 0;
|
int mask = 0;
|
||||||
if (required || prohibited) {
|
if (required || prohibited) {
|
||||||
if (nextMask == 0)
|
if (nextMask == 0)
|
||||||
|
@ -120,17 +124,45 @@ final class BooleanScorer extends Scorer {
|
||||||
coordFactors[i] = getSimilarity().coord(i, maxCoord-1);
|
coordFactors[i] = getSimilarity().coord(i, maxCoord-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
public final void score(HitCollector results, int maxDoc)
|
private int end;
|
||||||
throws IOException {
|
private Bucket current;
|
||||||
|
|
||||||
|
public int doc() { return current.doc; }
|
||||||
|
|
||||||
|
public boolean next() throws IOException {
|
||||||
|
boolean more = false;
|
||||||
|
do {
|
||||||
|
while (bucketTable.first != null) { // more queued
|
||||||
|
current = bucketTable.first;
|
||||||
|
bucketTable.first = current.next; // pop the queue
|
||||||
|
|
||||||
|
// check prohibited & required
|
||||||
|
if ((current.bits & prohibitedMask) == 0 &&
|
||||||
|
(current.bits & requiredMask) == requiredMask) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// refill the queue
|
||||||
|
end += BucketTable.SIZE;
|
||||||
|
for (SubScorer sub = scorers; sub != null; sub = sub.next) {
|
||||||
|
Scorer scorer = sub.scorer;
|
||||||
|
while (!sub.done && scorer.doc() < end) {
|
||||||
|
sub.collector.collect(scorer.doc(), scorer.score());
|
||||||
|
sub.done = !scorer.next();
|
||||||
|
}
|
||||||
|
if (!sub.done) {
|
||||||
|
more = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} while (bucketTable.first != null | more);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public float score() throws IOException {
|
||||||
if (coordFactors == null)
|
if (coordFactors == null)
|
||||||
computeCoordFactors();
|
computeCoordFactors();
|
||||||
|
return current.score * coordFactors[current.coord];
|
||||||
while (currentDoc < maxDoc) {
|
|
||||||
currentDoc = Math.min(currentDoc+BucketTable.SIZE, maxDoc);
|
|
||||||
for (SubScorer t = scorers; t != null; t = t.next)
|
|
||||||
t.scorer.score(t.collector, currentDoc);
|
|
||||||
bucketTable.collectHits(results);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static final class Bucket {
|
static final class Bucket {
|
||||||
|
@ -207,6 +239,10 @@ final class BooleanScorer extends Scorer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean skipTo(int target) throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
public Explanation explain(int doc) throws IOException {
|
public Explanation explain(int doc) throws IOException {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,155 @@
|
||||||
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
/* ====================================================================
|
||||||
|
* The Apache Software License, Version 1.1
|
||||||
|
*
|
||||||
|
* Copyright (c) 2004 The Apache Software Foundation. All rights
|
||||||
|
* reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in
|
||||||
|
* the documentation and/or other materials provided with the
|
||||||
|
* distribution.
|
||||||
|
*
|
||||||
|
* 3. The end-user documentation included with the redistribution,
|
||||||
|
* if any, must include the following acknowledgment:
|
||||||
|
* "This product includes software developed by the
|
||||||
|
* Apache Software Foundation (http://www.apache.org/)."
|
||||||
|
* Alternately, this acknowledgment may appear in the software itself,
|
||||||
|
* if and wherever such third-party acknowledgments normally appear.
|
||||||
|
*
|
||||||
|
* 4. The names "Apache" and "Apache Software Foundation" and
|
||||||
|
* "Apache Lucene" must not be used to endorse or promote products
|
||||||
|
* derived from this software without prior written permission. For
|
||||||
|
* written permission, please contact apache@apache.org.
|
||||||
|
*
|
||||||
|
* 5. Products derived from this software may not be called "Apache",
|
||||||
|
* "Apache Lucene", nor may "Apache" appear in their name, without
|
||||||
|
* prior written permission of the Apache Software Foundation.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
||||||
|
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||||
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
||||||
|
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||||
|
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||||
|
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
* ====================================================================
|
||||||
|
*
|
||||||
|
* This software consists of voluntary contributions made by many
|
||||||
|
* individuals on behalf of the Apache Software Foundation. For more
|
||||||
|
* information on the Apache Software Foundation, please see
|
||||||
|
* <http://www.apache.org/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.*;
|
||||||
|
import org.apache.lucene.index.*;
|
||||||
|
|
||||||
|
/** Scorer for conjunctions, sets of queries, all of which are required. */
|
||||||
|
final class ConjunctionScorer extends Scorer {
|
||||||
|
private LinkedList scorers = new LinkedList();
|
||||||
|
private boolean firstTime = true;
|
||||||
|
private boolean more = true;
|
||||||
|
private float coord;
|
||||||
|
|
||||||
|
public ConjunctionScorer(Similarity similarity) {
|
||||||
|
super(similarity);
|
||||||
|
}
|
||||||
|
|
||||||
|
final void add(Scorer scorer) throws IOException {
|
||||||
|
scorers.addLast(scorer);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Scorer first() { return (Scorer)scorers.getFirst(); }
|
||||||
|
private Scorer last() { return (Scorer)scorers.getLast(); }
|
||||||
|
|
||||||
|
public int doc() { return first().doc(); }
|
||||||
|
|
||||||
|
public boolean next() throws IOException {
|
||||||
|
if (firstTime) {
|
||||||
|
init();
|
||||||
|
} else if (more) {
|
||||||
|
more = last().next(); // trigger further scanning
|
||||||
|
}
|
||||||
|
|
||||||
|
while (more && first().doc() < last().doc()) { // find doc w/ all clauses
|
||||||
|
more = first().skipTo(last().doc()); // skip first upto last
|
||||||
|
scorers.addLast(scorers.removeFirst()); // move first to last
|
||||||
|
}
|
||||||
|
|
||||||
|
return more; // found a doc with all clauses
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean skipTo(int target) throws IOException {
|
||||||
|
Iterator i = scorers.iterator();
|
||||||
|
while (more && i.hasNext()) {
|
||||||
|
more = ((Scorer)i.next()).skipTo(target);
|
||||||
|
}
|
||||||
|
if (more)
|
||||||
|
sortScorers(); // re-sort scorers
|
||||||
|
return more;
|
||||||
|
}
|
||||||
|
|
||||||
|
public float score() throws IOException {
|
||||||
|
float score = 0.0f; // sum scores
|
||||||
|
Iterator i = scorers.iterator();
|
||||||
|
while (i.hasNext())
|
||||||
|
score += ((Scorer)i.next()).score();
|
||||||
|
score *= coord;
|
||||||
|
return score;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void init() throws IOException {
|
||||||
|
more = scorers.size() > 0;
|
||||||
|
|
||||||
|
// compute coord factor
|
||||||
|
coord = getSimilarity().coord(scorers.size(), scorers.size());
|
||||||
|
|
||||||
|
// move each scorer to its first entry
|
||||||
|
Iterator i = scorers.iterator();
|
||||||
|
while (more && i.hasNext()) {
|
||||||
|
more = ((Scorer)i.next()).next();
|
||||||
|
}
|
||||||
|
if (more)
|
||||||
|
sortScorers(); // initial sort of list
|
||||||
|
|
||||||
|
firstTime = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void sortScorers() throws IOException {
|
||||||
|
// move scorers to an array
|
||||||
|
Scorer[] array = (Scorer[])scorers.toArray(new Scorer[scorers.size()]);
|
||||||
|
scorers.clear(); // empty the list
|
||||||
|
|
||||||
|
Arrays.sort(array, new Comparator() { // sort the array
|
||||||
|
public int compare(Object o1, Object o2) {
|
||||||
|
return ((Scorer)o1).doc() - ((Scorer)o2).doc();
|
||||||
|
}
|
||||||
|
public boolean equals(Object o1, Object o2) {
|
||||||
|
return ((Scorer)o1).doc() == ((Scorer)o2).doc();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
for (int i = 0; i < array.length; i++) {
|
||||||
|
scorers.addLast(array[i]); // re-build list, now sorted
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Explanation explain(int doc) throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -140,7 +140,7 @@ public class IndexSearcher extends Searcher {
|
||||||
hq.insert(new ScoreDoc(doc, score));
|
hq.insert(new ScoreDoc(doc, score));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}, reader.maxDoc());
|
});
|
||||||
|
|
||||||
ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
|
ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
|
||||||
for (int i = hq.size()-1; i >= 0; i--) // put docs in array
|
for (int i = hq.size()-1; i >= 0; i--) // put docs in array
|
||||||
|
@ -180,7 +180,7 @@ public class IndexSearcher extends Searcher {
|
||||||
Scorer scorer = query.weight(this).scorer(reader);
|
Scorer scorer = query.weight(this).scorer(reader);
|
||||||
if (scorer == null)
|
if (scorer == null)
|
||||||
return;
|
return;
|
||||||
scorer.score(collector, reader.maxDoc());
|
scorer.score(collector);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Query rewrite(Query original) throws IOException {
|
public Query rewrite(Query original) throws IOException {
|
||||||
|
|
|
@ -68,19 +68,31 @@ final class PhrasePositions {
|
||||||
PhrasePositions(TermPositions t, int o) throws IOException {
|
PhrasePositions(TermPositions t, int o) throws IOException {
|
||||||
tp = t;
|
tp = t;
|
||||||
offset = o;
|
offset = o;
|
||||||
next();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
final void next() throws IOException { // increments to next doc
|
final boolean next() throws IOException { // increments to next doc
|
||||||
if (!tp.next()) {
|
if (!tp.next()) {
|
||||||
tp.close(); // close stream
|
tp.close(); // close stream
|
||||||
doc = Integer.MAX_VALUE; // sentinel value
|
doc = Integer.MAX_VALUE; // sentinel value
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
doc = tp.doc();
|
doc = tp.doc();
|
||||||
position = 0;
|
position = 0;
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final boolean skipTo(int target) throws IOException {
|
||||||
|
if (!tp.skipTo(target)) {
|
||||||
|
tp.close(); // close stream
|
||||||
|
doc = Integer.MAX_VALUE; // sentinel value
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
doc = tp.doc();
|
||||||
|
position = 0;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
final void firstPosition() throws IOException {
|
final void firstPosition() throws IOException {
|
||||||
count = tp.freq(); // read first pos
|
count = tp.freq(); // read first pos
|
||||||
nextPosition();
|
nextPosition();
|
||||||
|
|
|
@ -60,89 +60,127 @@ import org.apache.lucene.util.*;
|
||||||
import org.apache.lucene.index.*;
|
import org.apache.lucene.index.*;
|
||||||
|
|
||||||
abstract class PhraseScorer extends Scorer {
|
abstract class PhraseScorer extends Scorer {
|
||||||
private Weight weight;
|
private Weight weight;
|
||||||
protected byte[] norms;
|
protected byte[] norms;
|
||||||
protected float value;
|
protected float value;
|
||||||
|
|
||||||
protected PhraseQueue pq;
|
private boolean firstTime = true;
|
||||||
protected PhrasePositions first, last;
|
private boolean more = true;
|
||||||
|
protected PhraseQueue pq;
|
||||||
|
protected PhrasePositions first, last;
|
||||||
|
|
||||||
private float freq;
|
private float freq;
|
||||||
|
|
||||||
PhraseScorer(Weight weight, TermPositions[] tps, Similarity similarity,
|
PhraseScorer(Weight weight, TermPositions[] tps, Similarity similarity,
|
||||||
byte[] norms) throws IOException {
|
byte[] norms) throws IOException {
|
||||||
super(similarity);
|
super(similarity);
|
||||||
this.norms = norms;
|
this.norms = norms;
|
||||||
this.weight = weight;
|
this.weight = weight;
|
||||||
this.value = weight.getValue();
|
this.value = weight.getValue();
|
||||||
|
|
||||||
// use PQ to build a sorted list of PhrasePositions
|
// convert tps to a list
|
||||||
pq = new PhraseQueue(tps.length);
|
for (int i = 0; i < tps.length; i++) {
|
||||||
for (int i = 0; i < tps.length; i++) {
|
PhrasePositions pp = new PhrasePositions(tps[i], i);
|
||||||
pq.put(new PhrasePositions(tps[i], i));
|
if (last != null) { // add next to end of list
|
||||||
}
|
last.next = pp;
|
||||||
pqToList();
|
} else
|
||||||
|
first = pp;
|
||||||
|
last = pp;
|
||||||
}
|
}
|
||||||
|
|
||||||
public final void score(HitCollector results, int end) throws IOException {
|
pq = new PhraseQueue(tps.length); // construct empty pq
|
||||||
Similarity similarity = getSimilarity();
|
|
||||||
while (last.doc < end) { // find doc w/ all the terms
|
|
||||||
while (first.doc < last.doc) { // scan forward in first
|
|
||||||
do {
|
|
||||||
first.next();
|
|
||||||
} while (first.doc < last.doc);
|
|
||||||
firstToLast();
|
|
||||||
if (last.doc >= end)
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// found doc with all terms
|
}
|
||||||
freq = phraseFreq(); // check for phrase
|
|
||||||
|
|
||||||
if (freq > 0.0) {
|
public int doc() { return first.doc; }
|
||||||
float score = similarity.tf(freq) * value; // compute score
|
|
||||||
score *= Similarity.decodeNorm(norms[first.doc]); // normalize
|
public boolean next() throws IOException {
|
||||||
results.collect(first.doc, score); // add to results
|
if (firstTime) {
|
||||||
}
|
sort();
|
||||||
last.next(); // resume scanning
|
firstTime = false;
|
||||||
}
|
} else if (more) {
|
||||||
|
more = last.next(); // trigger further scanning
|
||||||
}
|
}
|
||||||
|
|
||||||
protected abstract float phraseFreq() throws IOException;
|
while (more) {
|
||||||
|
while (more && first.doc < last.doc) { // find doc w/ all the terms
|
||||||
|
more = first.skipTo(last.doc); // skip first upto last
|
||||||
|
firstToLast(); // and move it to the end
|
||||||
|
}
|
||||||
|
|
||||||
protected final void pqToList() {
|
if (more) {
|
||||||
last = first = null;
|
// found a doc with all of the terms
|
||||||
while (pq.top() != null) {
|
freq = phraseFreq(); // check for phrase
|
||||||
PhrasePositions pp = (PhrasePositions) pq.pop();
|
if (freq == 0.0f) // no match
|
||||||
if (last != null) { // add next to end of list
|
more = last.next(); // trigger further scanning
|
||||||
last.next = pp;
|
else
|
||||||
} else
|
return true; // found a match
|
||||||
first = pp;
|
}
|
||||||
last = pp;
|
|
||||||
pp.next = null;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
return false; // no more matches
|
||||||
|
}
|
||||||
|
|
||||||
protected final void firstToLast() {
|
public float score() throws IOException {
|
||||||
last.next = first; // move first to end of list
|
//System.out.println("scoring " + first.doc);
|
||||||
last = first;
|
float raw = getSimilarity().tf(freq) * value; // raw score
|
||||||
first = first.next;
|
return raw * Similarity.decodeNorm(norms[first.doc]); // normalize
|
||||||
last.next = null;
|
}
|
||||||
|
|
||||||
|
public boolean skipTo(int target) throws IOException {
|
||||||
|
for (PhrasePositions pp = first; more && pp != null; pp = pp.next) {
|
||||||
|
more = pp.skipTo(target);
|
||||||
}
|
}
|
||||||
|
if (more)
|
||||||
|
sort(); // re-sort
|
||||||
|
return more;
|
||||||
|
}
|
||||||
|
|
||||||
public Explanation explain(final int doc) throws IOException {
|
|
||||||
Explanation tfExplanation = new Explanation();
|
|
||||||
|
|
||||||
score(new HitCollector() {
|
protected abstract float phraseFreq() throws IOException;
|
||||||
public final void collect(int d, float score) {
|
|
||||||
}
|
|
||||||
}, doc + 1);
|
|
||||||
|
|
||||||
float phraseFreq = (first.doc == doc) ? freq : 0.0f;
|
private void sort() throws IOException {
|
||||||
tfExplanation.setValue(getSimilarity().tf(phraseFreq));
|
pq.clear();
|
||||||
tfExplanation.setDescription("tf(phraseFreq=" + phraseFreq + ")");
|
for (PhrasePositions pp = first; more && pp != null; pp = pp.next) {
|
||||||
|
more = pp.next();
|
||||||
return tfExplanation;
|
if (more) {
|
||||||
|
pq.put(pp);
|
||||||
|
} else {
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
pqToList();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected final void pqToList() {
|
||||||
|
last = first = null;
|
||||||
|
while (pq.top() != null) {
|
||||||
|
PhrasePositions pp = (PhrasePositions) pq.pop();
|
||||||
|
if (last != null) { // add next to end of list
|
||||||
|
last.next = pp;
|
||||||
|
} else
|
||||||
|
first = pp;
|
||||||
|
last = pp;
|
||||||
|
pp.next = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected final void firstToLast() {
|
||||||
|
last.next = first; // move first to end of list
|
||||||
|
last = first;
|
||||||
|
first = first.next;
|
||||||
|
last.next = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Explanation explain(final int doc) throws IOException {
|
||||||
|
Explanation tfExplanation = new Explanation();
|
||||||
|
|
||||||
|
while (next() && doc() < doc) {}
|
||||||
|
|
||||||
|
float phraseFreq = (doc() == doc) ? freq : 0.0f;
|
||||||
|
tfExplanation.setValue(getSimilarity().tf(phraseFreq));
|
||||||
|
tfExplanation.setDescription("tf(phraseFreq=" + phraseFreq + ")");
|
||||||
|
|
||||||
|
return tfExplanation;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -70,11 +70,39 @@ public abstract class Scorer {
|
||||||
return this.similarity;
|
return this.similarity;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Scores hits and passes them to a collector. Stops at the last document
|
/** Scores all documents and passes them to a collector. */
|
||||||
* before <code>maxDoc</code>. If called repeatedly, will restart at point
|
public void score(HitCollector hc) throws IOException {
|
||||||
* where it last left off.
|
while (next()) {
|
||||||
|
hc.collect(doc(), score());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Advance to the next document matching the query. Returns true iff there
|
||||||
|
* is another match. */
|
||||||
|
public abstract boolean next() throws IOException;
|
||||||
|
|
||||||
|
/** Returns the current document number. Initially invalid, until {@link
|
||||||
|
* #next()} is called the first time. */
|
||||||
|
public abstract int doc();
|
||||||
|
|
||||||
|
/** Returns the score of the current document. Initially invalid, until
|
||||||
|
* {@link #next()} is called the first time. */
|
||||||
|
public abstract float score() throws IOException;
|
||||||
|
|
||||||
|
/** Skips to the first match beyond the current whose document number is
|
||||||
|
* greater than or equal to <i>target</i>. <p>Returns true iff there is such
|
||||||
|
* a match. <p>Behaves as if written: <pre>
|
||||||
|
* boolean skipTo(int target) {
|
||||||
|
* do {
|
||||||
|
* if (!next())
|
||||||
|
* return false;
|
||||||
|
* } while (target > doc());
|
||||||
|
* return true;
|
||||||
|
* }
|
||||||
|
* </pre>
|
||||||
|
* Most implementations are considerably more efficient than that.
|
||||||
*/
|
*/
|
||||||
public abstract void score(HitCollector hc, int maxDoc) throws IOException;
|
public abstract boolean skipTo(int target) throws IOException;
|
||||||
|
|
||||||
/** Returns an explanation of the score for <code>doc</code>. */
|
/** Returns an explanation of the score for <code>doc</code>. */
|
||||||
public abstract Explanation explain(int doc) throws IOException;
|
public abstract Explanation explain(int doc) throws IOException;
|
||||||
|
|
|
@ -83,44 +83,56 @@ final class TermScorer extends Scorer {
|
||||||
|
|
||||||
for (int i = 0; i < SCORE_CACHE_SIZE; i++)
|
for (int i = 0; i < SCORE_CACHE_SIZE; i++)
|
||||||
scoreCache[i] = getSimilarity().tf(i) * weightValue;
|
scoreCache[i] = getSimilarity().tf(i) * weightValue;
|
||||||
|
|
||||||
pointerMax = termDocs.read(docs, freqs); // fill buffers
|
|
||||||
|
|
||||||
if (pointerMax != 0)
|
|
||||||
doc = docs[0];
|
|
||||||
else {
|
|
||||||
termDocs.close(); // close stream
|
|
||||||
doc = Integer.MAX_VALUE; // set to sentinel value
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public final void score(HitCollector c, final int end) throws IOException {
|
public int doc() { return doc; }
|
||||||
int d = doc; // cache doc in local
|
|
||||||
Similarity similarity = getSimilarity(); // cache sim in local
|
|
||||||
while (d < end) { // for docs in window
|
|
||||||
final int f = freqs[pointer];
|
|
||||||
float score = // compute tf(f)*weight
|
|
||||||
f < SCORE_CACHE_SIZE // check cache
|
|
||||||
? scoreCache[f] // cache hit
|
|
||||||
: similarity.tf(f)*weightValue; // cache miss
|
|
||||||
|
|
||||||
score *= Similarity.decodeNorm(norms[d]); // normalize for field
|
public boolean next() throws IOException {
|
||||||
|
pointer++;
|
||||||
c.collect(d, score); // collect score
|
if (pointer >= pointerMax) {
|
||||||
|
pointerMax = termDocs.read(docs, freqs); // refill buffer
|
||||||
if (++pointer == pointerMax) {
|
if (pointerMax != 0) {
|
||||||
pointerMax = termDocs.read(docs, freqs); // refill buffers
|
pointer = 0;
|
||||||
if (pointerMax != 0) {
|
} else {
|
||||||
pointer = 0;
|
termDocs.close(); // close stream
|
||||||
} else {
|
doc = Integer.MAX_VALUE; // set to sentinel value
|
||||||
termDocs.close(); // close stream
|
return false;
|
||||||
doc = Integer.MAX_VALUE; // set to sentinel value
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
d = docs[pointer];
|
|
||||||
}
|
}
|
||||||
doc = d; // flush cache
|
doc = docs[pointer];
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public float score() throws IOException {
|
||||||
|
int f = freqs[pointer];
|
||||||
|
float raw = // compute tf(f)*weight
|
||||||
|
f < SCORE_CACHE_SIZE // check cache
|
||||||
|
? scoreCache[f] // cache hit
|
||||||
|
: getSimilarity().tf(f)*weightValue; // cache miss
|
||||||
|
|
||||||
|
return raw * Similarity.decodeNorm(norms[doc]); // normalize for field
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean skipTo(int target) throws IOException {
|
||||||
|
// first scan in cache
|
||||||
|
for (pointer++; pointer < pointerMax; pointer++) {
|
||||||
|
if (!(target > docs[pointer])) {
|
||||||
|
doc = docs[pointer];
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// not found in cache, seek underlying stream
|
||||||
|
boolean result = termDocs.skipTo(target);
|
||||||
|
if (result) {
|
||||||
|
pointerMax = 1;
|
||||||
|
pointer = 0;
|
||||||
|
docs[pointer] = doc = termDocs.doc();
|
||||||
|
freqs[pointer] = termDocs.freq();
|
||||||
|
} else {
|
||||||
|
doc = Integer.MAX_VALUE;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Explanation explain(int doc) throws IOException {
|
public Explanation explain(int doc) throws IOException {
|
||||||
|
|
|
@ -226,98 +226,3 @@ public final class RAMDirectory extends Directory {
|
||||||
public final void close() {
|
public final void close() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
final class RAMInputStream extends InputStream implements Cloneable {
|
|
||||||
RAMFile file;
|
|
||||||
int pointer = 0;
|
|
||||||
|
|
||||||
public RAMInputStream(RAMFile f) {
|
|
||||||
file = f;
|
|
||||||
length = file.length;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** InputStream methods */
|
|
||||||
public final void readInternal(byte[] dest, int destOffset, int len) {
|
|
||||||
int remainder = len;
|
|
||||||
int start = pointer;
|
|
||||||
while (remainder != 0) {
|
|
||||||
int bufferNumber = start/InputStream.BUFFER_SIZE;
|
|
||||||
int bufferOffset = start%InputStream.BUFFER_SIZE;
|
|
||||||
int bytesInBuffer = InputStream.BUFFER_SIZE - bufferOffset;
|
|
||||||
int bytesToCopy = bytesInBuffer >= remainder ? remainder : bytesInBuffer;
|
|
||||||
byte[] buffer = (byte[])file.buffers.elementAt(bufferNumber);
|
|
||||||
System.arraycopy(buffer, bufferOffset, dest, destOffset, bytesToCopy);
|
|
||||||
destOffset += bytesToCopy;
|
|
||||||
start += bytesToCopy;
|
|
||||||
remainder -= bytesToCopy;
|
|
||||||
}
|
|
||||||
pointer += len;
|
|
||||||
}
|
|
||||||
|
|
||||||
public final void close() {
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Random-access methods */
|
|
||||||
public final void seekInternal(long pos) {
|
|
||||||
pointer = (int)pos;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
final class RAMOutputStream extends OutputStream {
|
|
||||||
RAMFile file;
|
|
||||||
int pointer = 0;
|
|
||||||
|
|
||||||
public RAMOutputStream(RAMFile f) {
|
|
||||||
file = f;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** output methods: */
|
|
||||||
public final void flushBuffer(byte[] src, int len) {
|
|
||||||
int bufferNumber = pointer/OutputStream.BUFFER_SIZE;
|
|
||||||
int bufferOffset = pointer%OutputStream.BUFFER_SIZE;
|
|
||||||
int bytesInBuffer = OutputStream.BUFFER_SIZE - bufferOffset;
|
|
||||||
int bytesToCopy = bytesInBuffer >= len ? len : bytesInBuffer;
|
|
||||||
|
|
||||||
if (bufferNumber == file.buffers.size())
|
|
||||||
file.buffers.addElement(new byte[OutputStream.BUFFER_SIZE]);
|
|
||||||
|
|
||||||
byte[] buffer = (byte[])file.buffers.elementAt(bufferNumber);
|
|
||||||
System.arraycopy(src, 0, buffer, bufferOffset, bytesToCopy);
|
|
||||||
|
|
||||||
if (bytesToCopy < len) { // not all in one buffer
|
|
||||||
int srcOffset = bytesToCopy;
|
|
||||||
bytesToCopy = len - bytesToCopy; // remaining bytes
|
|
||||||
bufferNumber++;
|
|
||||||
if (bufferNumber == file.buffers.size())
|
|
||||||
file.buffers.addElement(new byte[OutputStream.BUFFER_SIZE]);
|
|
||||||
buffer = (byte[])file.buffers.elementAt(bufferNumber);
|
|
||||||
System.arraycopy(src, srcOffset, buffer, 0, bytesToCopy);
|
|
||||||
}
|
|
||||||
pointer += len;
|
|
||||||
if (pointer > file.length)
|
|
||||||
file.length = pointer;
|
|
||||||
|
|
||||||
file.lastModified = System.currentTimeMillis();
|
|
||||||
}
|
|
||||||
|
|
||||||
public final void close() throws IOException {
|
|
||||||
super.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Random-access methods */
|
|
||||||
public final void seek(long pos) throws IOException {
|
|
||||||
super.seek(pos);
|
|
||||||
pointer = (int)pos;
|
|
||||||
}
|
|
||||||
public final long length() throws IOException {
|
|
||||||
return file.length;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
final class RAMFile {
|
|
||||||
Vector buffers = new Vector();
|
|
||||||
long length;
|
|
||||||
long lastModified = System.currentTimeMillis();
|
|
||||||
}
|
|
||||||
|
|
|
@ -0,0 +1,63 @@
|
||||||
|
package org.apache.lucene.store;
|
||||||
|
|
||||||
|
/* ====================================================================
|
||||||
|
* The Apache Software License, Version 1.1
|
||||||
|
*
|
||||||
|
* Copyright (c) 2001, 2004 The Apache Software Foundation. All rights
|
||||||
|
* reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in
|
||||||
|
* the documentation and/or other materials provided with the
|
||||||
|
* distribution.
|
||||||
|
*
|
||||||
|
* 3. The end-user documentation included with the redistribution,
|
||||||
|
* if any, must include the following acknowledgment:
|
||||||
|
* "This product includes software developed by the
|
||||||
|
* Apache Software Foundation (http://www.apache.org/)."
|
||||||
|
* Alternately, this acknowledgment may appear in the software itself,
|
||||||
|
* if and wherever such third-party acknowledgments normally appear.
|
||||||
|
*
|
||||||
|
* 4. The names "Apache" and "Apache Software Foundation" and
|
||||||
|
* "Apache Lucene" must not be used to endorse or promote products
|
||||||
|
* derived from this software without prior written permission. For
|
||||||
|
* written permission, please contact apache@apache.org.
|
||||||
|
*
|
||||||
|
* 5. Products derived from this software may not be called "Apache",
|
||||||
|
* "Apache Lucene", nor may "Apache" appear in their name, without
|
||||||
|
* prior written permission of the Apache Software Foundation.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
||||||
|
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||||
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
||||||
|
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||||
|
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||||
|
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
* ====================================================================
|
||||||
|
*
|
||||||
|
* This software consists of voluntary contributions made by many
|
||||||
|
* individuals on behalf of the Apache Software Foundation. For more
|
||||||
|
* information on the Apache Software Foundation, please see
|
||||||
|
* <http://www.apache.org/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.util.Vector;
|
||||||
|
|
||||||
|
class RAMFile {
|
||||||
|
Vector buffers = new Vector();
|
||||||
|
long length;
|
||||||
|
long lastModified = System.currentTimeMillis();
|
||||||
|
}
|
|
@ -0,0 +1,95 @@
|
||||||
|
package org.apache.lucene.store;
|
||||||
|
|
||||||
|
/* ====================================================================
|
||||||
|
* The Apache Software License, Version 1.1
|
||||||
|
*
|
||||||
|
* Copyright (c) 2001, 2004 The Apache Software Foundation. All rights
|
||||||
|
* reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in
|
||||||
|
* the documentation and/or other materials provided with the
|
||||||
|
* distribution.
|
||||||
|
*
|
||||||
|
* 3. The end-user documentation included with the redistribution,
|
||||||
|
* if any, must include the following acknowledgment:
|
||||||
|
* "This product includes software developed by the
|
||||||
|
* Apache Software Foundation (http://www.apache.org/)."
|
||||||
|
* Alternately, this acknowledgment may appear in the software itself,
|
||||||
|
* if and wherever such third-party acknowledgments normally appear.
|
||||||
|
*
|
||||||
|
* 4. The names "Apache" and "Apache Software Foundation" and
|
||||||
|
* "Apache Lucene" must not be used to endorse or promote products
|
||||||
|
* derived from this software without prior written permission. For
|
||||||
|
* written permission, please contact apache@apache.org.
|
||||||
|
*
|
||||||
|
* 5. Products derived from this software may not be called "Apache",
|
||||||
|
* "Apache Lucene", nor may "Apache" appear in their name, without
|
||||||
|
* prior written permission of the Apache Software Foundation.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
||||||
|
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||||
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
||||||
|
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||||
|
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||||
|
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
* ====================================================================
|
||||||
|
*
|
||||||
|
* This software consists of voluntary contributions made by many
|
||||||
|
* individuals on behalf of the Apache Software Foundation. For more
|
||||||
|
* information on the Apache Software Foundation, please see
|
||||||
|
* <http://www.apache.org/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A memory-resident {@link InputStream} implementation.
|
||||||
|
*
|
||||||
|
* @version $Id$
|
||||||
|
*/
|
||||||
|
|
||||||
|
class RAMInputStream extends InputStream implements Cloneable {
|
||||||
|
private RAMFile file;
|
||||||
|
private int pointer = 0;
|
||||||
|
|
||||||
|
public RAMInputStream(RAMFile f) {
|
||||||
|
file = f;
|
||||||
|
length = file.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void readInternal(byte[] dest, int destOffset, int len) {
|
||||||
|
int remainder = len;
|
||||||
|
int start = pointer;
|
||||||
|
while (remainder != 0) {
|
||||||
|
int bufferNumber = start/BUFFER_SIZE;
|
||||||
|
int bufferOffset = start%BUFFER_SIZE;
|
||||||
|
int bytesInBuffer = BUFFER_SIZE - bufferOffset;
|
||||||
|
int bytesToCopy = bytesInBuffer >= remainder ? remainder : bytesInBuffer;
|
||||||
|
byte[] buffer = (byte[])file.buffers.elementAt(bufferNumber);
|
||||||
|
System.arraycopy(buffer, bufferOffset, dest, destOffset, bytesToCopy);
|
||||||
|
destOffset += bytesToCopy;
|
||||||
|
start += bytesToCopy;
|
||||||
|
remainder -= bytesToCopy;
|
||||||
|
}
|
||||||
|
pointer += len;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public void seekInternal(long pos) {
|
||||||
|
pointer = (int)pos;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,145 @@
|
||||||
|
package org.apache.lucene.store;
|
||||||
|
|
||||||
|
/* ====================================================================
|
||||||
|
* The Apache Software License, Version 1.1
|
||||||
|
*
|
||||||
|
* Copyright (c) 2001, 2004 The Apache Software Foundation. All rights
|
||||||
|
* reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in
|
||||||
|
* the documentation and/or other materials provided with the
|
||||||
|
* distribution.
|
||||||
|
*
|
||||||
|
* 3. The end-user documentation included with the redistribution,
|
||||||
|
* if any, must include the following acknowledgment:
|
||||||
|
* "This product includes software developed by the
|
||||||
|
* Apache Software Foundation (http://www.apache.org/)."
|
||||||
|
* Alternately, this acknowledgment may appear in the software itself,
|
||||||
|
* if and wherever such third-party acknowledgments normally appear.
|
||||||
|
*
|
||||||
|
* 4. The names "Apache" and "Apache Software Foundation" and
|
||||||
|
* "Apache Lucene" must not be used to endorse or promote products
|
||||||
|
* derived from this software without prior written permission. For
|
||||||
|
* written permission, please contact apache@apache.org.
|
||||||
|
*
|
||||||
|
* 5. Products derived from this software may not be called "Apache",
|
||||||
|
* "Apache Lucene", nor may "Apache" appear in their name, without
|
||||||
|
* prior written permission of the Apache Software Foundation.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
||||||
|
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||||
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
||||||
|
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||||
|
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||||
|
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
* ====================================================================
|
||||||
|
*
|
||||||
|
* This software consists of voluntary contributions made by many
|
||||||
|
* individuals on behalf of the Apache Software Foundation. For more
|
||||||
|
* information on the Apache Software Foundation, please see
|
||||||
|
* <http://www.apache.org/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A memory-resident {@link OutputStream} implementation.
|
||||||
|
*
|
||||||
|
* @version $Id$
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class RAMOutputStream extends OutputStream {
|
||||||
|
private RAMFile file;
|
||||||
|
private int pointer = 0;
|
||||||
|
|
||||||
|
/** Construct an empty output buffer. */
|
||||||
|
public RAMOutputStream() {
|
||||||
|
this(new RAMFile());
|
||||||
|
}
|
||||||
|
|
||||||
|
RAMOutputStream(RAMFile f) {
|
||||||
|
file = f;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Copy the current contents of this buffer to the named output. */
|
||||||
|
public void writeTo(OutputStream out) throws IOException {
|
||||||
|
flush();
|
||||||
|
final long end = file.length;
|
||||||
|
long pos = 0;
|
||||||
|
int buffer = 0;
|
||||||
|
while (pos < end) {
|
||||||
|
int length = BUFFER_SIZE;
|
||||||
|
long nextPos = pos + length;
|
||||||
|
if (nextPos > end) { // at the last buffer
|
||||||
|
length = (int)(end - pos);
|
||||||
|
}
|
||||||
|
out.writeBytes((byte[])file.buffers.elementAt(buffer++), length);
|
||||||
|
pos = nextPos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Resets this to an empty buffer. */
|
||||||
|
public void reset() {
|
||||||
|
try {
|
||||||
|
seek(0);
|
||||||
|
} catch (IOException e) { // should never happen
|
||||||
|
throw new RuntimeException(e.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
file.length = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void flushBuffer(byte[] src, int len) {
|
||||||
|
int bufferNumber = pointer/BUFFER_SIZE;
|
||||||
|
int bufferOffset = pointer%BUFFER_SIZE;
|
||||||
|
int bytesInBuffer = BUFFER_SIZE - bufferOffset;
|
||||||
|
int bytesToCopy = bytesInBuffer >= len ? len : bytesInBuffer;
|
||||||
|
|
||||||
|
if (bufferNumber == file.buffers.size())
|
||||||
|
file.buffers.addElement(new byte[BUFFER_SIZE]);
|
||||||
|
|
||||||
|
byte[] buffer = (byte[])file.buffers.elementAt(bufferNumber);
|
||||||
|
System.arraycopy(src, 0, buffer, bufferOffset, bytesToCopy);
|
||||||
|
|
||||||
|
if (bytesToCopy < len) { // not all in one buffer
|
||||||
|
int srcOffset = bytesToCopy;
|
||||||
|
bytesToCopy = len - bytesToCopy; // remaining bytes
|
||||||
|
bufferNumber++;
|
||||||
|
if (bufferNumber == file.buffers.size())
|
||||||
|
file.buffers.addElement(new byte[BUFFER_SIZE]);
|
||||||
|
buffer = (byte[])file.buffers.elementAt(bufferNumber);
|
||||||
|
System.arraycopy(src, srcOffset, buffer, 0, bytesToCopy);
|
||||||
|
}
|
||||||
|
pointer += len;
|
||||||
|
if (pointer > file.length)
|
||||||
|
file.length = pointer;
|
||||||
|
|
||||||
|
file.lastModified = System.currentTimeMillis();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close() throws IOException {
|
||||||
|
super.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void seek(long pos) throws IOException {
|
||||||
|
super.seek(pos);
|
||||||
|
pointer = (int)pos;
|
||||||
|
}
|
||||||
|
public long length() {
|
||||||
|
return file.length;
|
||||||
|
}
|
||||||
|
}
|
|
@ -54,6 +54,7 @@ package org.apache.lucene;
|
||||||
* <http://www.apache.org/>.
|
* <http://www.apache.org/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.util.*;
|
||||||
import org.apache.lucene.store.*;
|
import org.apache.lucene.store.*;
|
||||||
import org.apache.lucene.document.*;
|
import org.apache.lucene.document.*;
|
||||||
import org.apache.lucene.analysis.*;
|
import org.apache.lucene.analysis.*;
|
||||||
|
@ -93,7 +94,7 @@ class ThreadSafetyTest {
|
||||||
Document d = new Document();
|
Document d = new Document();
|
||||||
int n = RANDOM.nextInt();
|
int n = RANDOM.nextInt();
|
||||||
d.add(Field.Keyword("id", Integer.toString(n)));
|
d.add(Field.Keyword("id", Integer.toString(n)));
|
||||||
d.add(Field.UnStored("contents", intToEnglish(n)));
|
d.add(Field.UnStored("contents", English.intToEnglish(n)));
|
||||||
System.out.println("Adding " + n);
|
System.out.println("Adding " + n);
|
||||||
|
|
||||||
// Switch between single and multiple file segments
|
// Switch between single and multiple file segments
|
||||||
|
@ -151,7 +152,7 @@ class ThreadSafetyTest {
|
||||||
throws Exception {
|
throws Exception {
|
||||||
System.out.println("Searching for " + n);
|
System.out.println("Searching for " + n);
|
||||||
Hits hits =
|
Hits hits =
|
||||||
searcher.search(QueryParser.parse(intToEnglish(n), "contents",
|
searcher.search(QueryParser.parse(English.intToEnglish(n), "contents",
|
||||||
ANALYZER));
|
ANALYZER));
|
||||||
System.out.println("Search for " + n + ": total=" + hits.length());
|
System.out.println("Search for " + n + ": total=" + hits.length());
|
||||||
for (int j = 0; j < Math.min(3, hits.length()); j++) {
|
for (int j = 0; j < Math.min(3, hits.length()); j++) {
|
||||||
|
@ -197,76 +198,4 @@ class ThreadSafetyTest {
|
||||||
SearcherThread searcherThread3 = new SearcherThread(true);
|
SearcherThread searcherThread3 = new SearcherThread(true);
|
||||||
searcherThread3.start();
|
searcherThread3.start();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String intToEnglish(int i) {
|
|
||||||
StringBuffer result = new StringBuffer();
|
|
||||||
intToEnglish(i, result);
|
|
||||||
return result.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void intToEnglish(int i, StringBuffer result) {
|
|
||||||
if (i < 0) {
|
|
||||||
result.append("minus ");
|
|
||||||
i = -i;
|
|
||||||
}
|
|
||||||
if (i >= 1000000000) { // billions
|
|
||||||
intToEnglish(i/1000000000, result);
|
|
||||||
result.append("billion, ");
|
|
||||||
i = i%1000000000;
|
|
||||||
}
|
|
||||||
if (i >= 1000000) { // millions
|
|
||||||
intToEnglish(i/1000000, result);
|
|
||||||
result.append("million, ");
|
|
||||||
i = i%1000000;
|
|
||||||
}
|
|
||||||
if (i >= 1000) { // thousands
|
|
||||||
intToEnglish(i/1000, result);
|
|
||||||
result.append("thousand, ");
|
|
||||||
i = i%1000;
|
|
||||||
}
|
|
||||||
if (i >= 100) { // hundreds
|
|
||||||
intToEnglish(i/100, result);
|
|
||||||
result.append("hundred ");
|
|
||||||
i = i%100;
|
|
||||||
}
|
|
||||||
if (i >= 20) {
|
|
||||||
switch (i/10) {
|
|
||||||
case 9 : result.append("ninety"); break;
|
|
||||||
case 8 : result.append("eighty"); break;
|
|
||||||
case 7 : result.append("seventy"); break;
|
|
||||||
case 6 : result.append("sixty"); break;
|
|
||||||
case 5 : result.append("fifty"); break;
|
|
||||||
case 4 : result.append("forty"); break;
|
|
||||||
case 3 : result.append("thirty"); break;
|
|
||||||
case 2 : result.append("twenty"); break;
|
|
||||||
}
|
|
||||||
i = i%10;
|
|
||||||
if (i == 0)
|
|
||||||
result.append(" ");
|
|
||||||
else
|
|
||||||
result.append("-");
|
|
||||||
}
|
|
||||||
switch (i) {
|
|
||||||
case 19 : result.append("nineteen "); break;
|
|
||||||
case 18 : result.append("eighteen "); break;
|
|
||||||
case 17 : result.append("seventeen "); break;
|
|
||||||
case 16 : result.append("sixteen "); break;
|
|
||||||
case 15 : result.append("fifteen "); break;
|
|
||||||
case 14 : result.append("fourteen "); break;
|
|
||||||
case 13 : result.append("thirteen "); break;
|
|
||||||
case 12 : result.append("twelve "); break;
|
|
||||||
case 11 : result.append("eleven "); break;
|
|
||||||
case 10 : result.append("ten "); break;
|
|
||||||
case 9 : result.append("nine "); break;
|
|
||||||
case 8 : result.append("eight "); break;
|
|
||||||
case 7 : result.append("seven "); break;
|
|
||||||
case 6 : result.append("six "); break;
|
|
||||||
case 5 : result.append("five "); break;
|
|
||||||
case 4 : result.append("four "); break;
|
|
||||||
case 3 : result.append("three "); break;
|
|
||||||
case 2 : result.append("two "); break;
|
|
||||||
case 1 : result.append("one "); break;
|
|
||||||
case 0 : result.append(""); break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,135 @@
|
||||||
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
/* ====================================================================
|
||||||
|
* The Apache Software License, Version 1.1
|
||||||
|
*
|
||||||
|
* Copyright (c) 2001, 2004 The Apache Software Foundation. All rights
|
||||||
|
* reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in
|
||||||
|
* the documentation and/or other materials provided with the
|
||||||
|
* distribution.
|
||||||
|
*
|
||||||
|
* 3. The end-user documentation included with the redistribution,
|
||||||
|
* if any, must include the following acknowledgment:
|
||||||
|
* "This product includes software developed by the
|
||||||
|
* Apache Software Foundation (http://www.apache.org/)."
|
||||||
|
* Alternately, this acknowledgment may appear in the software itself,
|
||||||
|
* if and wherever such third-party acknowledgments normally appear.
|
||||||
|
*
|
||||||
|
* 4. The names "Apache" and "Apache Software Foundation" and
|
||||||
|
* "Apache Lucene" must not be used to endorse or promote products
|
||||||
|
* derived from this software without prior written permission. For
|
||||||
|
* written permission, please contact apache@apache.org.
|
||||||
|
*
|
||||||
|
* 5. Products derived from this software may not be called "Apache",
|
||||||
|
* "Apache Lucene", nor may "Apache" appear in their name, without
|
||||||
|
* prior written permission of the Apache Software Foundation.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
||||||
|
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||||
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
||||||
|
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||||
|
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||||
|
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
* ====================================================================
|
||||||
|
*
|
||||||
|
* This software consists of voluntary contributions made by many
|
||||||
|
* individuals on behalf of the Apache Software Foundation. For more
|
||||||
|
* information on the Apache Software Foundation, please see
|
||||||
|
* <http://www.apache.org/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
import org.apache.lucene.util.English;
|
||||||
|
import org.apache.lucene.analysis.SimpleAnalyzer;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests basic search capabilities.
|
||||||
|
*
|
||||||
|
* @author Doug Cutting
|
||||||
|
*/
|
||||||
|
public class TestBasics extends TestCase {
|
||||||
|
private IndexSearcher searcher;
|
||||||
|
|
||||||
|
public void setUp() throws Exception {
|
||||||
|
RAMDirectory directory = new RAMDirectory();
|
||||||
|
IndexWriter writer
|
||||||
|
= new IndexWriter(directory, new SimpleAnalyzer(), true);
|
||||||
|
//writer.infoStream = System.out;
|
||||||
|
StringBuffer buffer = new StringBuffer();
|
||||||
|
for (int i = 0; i < 1000; i++) {
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(Field.Text("field", English.intToEnglish(i)));
|
||||||
|
writer.addDocument(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
searcher = new IndexSearcher(directory);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testTerm() throws Exception {
|
||||||
|
Query query = new TermQuery(new Term("field", "seventy"));
|
||||||
|
Hits hits = searcher.search(query);
|
||||||
|
assertEquals(100, hits.length());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testTerm2() throws Exception {
|
||||||
|
Query query = new TermQuery(new Term("field", "seventish"));
|
||||||
|
Hits hits = searcher.search(query);
|
||||||
|
assertEquals(0, hits.length());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testPhrase() throws Exception {
|
||||||
|
PhraseQuery query = new PhraseQuery();
|
||||||
|
query.add(new Term("field", "seventy"));
|
||||||
|
query.add(new Term("field", "seven"));
|
||||||
|
Hits hits = searcher.search(query);
|
||||||
|
assertEquals(10, hits.length());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testPhrase2() throws Exception {
|
||||||
|
PhraseQuery query = new PhraseQuery();
|
||||||
|
query.add(new Term("field", "seventish"));
|
||||||
|
query.add(new Term("field", "sevenon"));
|
||||||
|
Hits hits = searcher.search(query);
|
||||||
|
assertEquals(0, hits.length());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testBoolean() throws Exception {
|
||||||
|
BooleanQuery query = new BooleanQuery();
|
||||||
|
query.add(new TermQuery(new Term("field", "seventy")), true, false);
|
||||||
|
query.add(new TermQuery(new Term("field", "seven")), true, false);
|
||||||
|
Hits hits = searcher.search(query);
|
||||||
|
assertEquals(19, hits.length());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testBoolean2() throws Exception {
|
||||||
|
BooleanQuery query = new BooleanQuery();
|
||||||
|
query.add(new TermQuery(new Term("field", "sevento")), true, false);
|
||||||
|
query.add(new TermQuery(new Term("field", "sevenly")), true, false);
|
||||||
|
Hits hits = searcher.search(query);
|
||||||
|
assertEquals(0, hits.length());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,140 @@
|
||||||
|
package org.apache.lucene.util;
|
||||||
|
|
||||||
|
/* ====================================================================
|
||||||
|
* The Apache Software License, Version 1.1
|
||||||
|
*
|
||||||
|
* Copyright (c) 2001, 2004 The Apache Software Foundation. All rights
|
||||||
|
* reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in
|
||||||
|
* the documentation and/or other materials provided with the
|
||||||
|
* distribution.
|
||||||
|
*
|
||||||
|
* 3. The end-user documentation included with the redistribution,
|
||||||
|
* if any, must include the following acknowledgment:
|
||||||
|
* "This product includes software developed by the
|
||||||
|
* Apache Software Foundation (http://www.apache.org/)."
|
||||||
|
* Alternately, this acknowledgment may appear in the software itself,
|
||||||
|
* if and wherever such third-party acknowledgments normally appear.
|
||||||
|
*
|
||||||
|
* 4. The names "Apache" and "Apache Software Foundation" and
|
||||||
|
* "Apache Lucene" must not be used to endorse or promote products
|
||||||
|
* derived from this software without prior written permission. For
|
||||||
|
* written permission, please contact apache@apache.org.
|
||||||
|
*
|
||||||
|
* 5. Products derived from this software may not be called "Apache",
|
||||||
|
* "Apache Lucene", nor may "Apache" appear in their name, without
|
||||||
|
* prior written permission of the Apache Software Foundation.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
||||||
|
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||||
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
||||||
|
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||||
|
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||||
|
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
* ====================================================================
|
||||||
|
*
|
||||||
|
* This software consists of voluntary contributions made by many
|
||||||
|
* individuals on behalf of the Apache Software Foundation. For more
|
||||||
|
* information on the Apache Software Foundation, please see
|
||||||
|
* <http://www.apache.org/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
public class English {
|
||||||
|
|
||||||
|
public static String intToEnglish(int i) {
|
||||||
|
StringBuffer result = new StringBuffer();
|
||||||
|
intToEnglish(i, result);
|
||||||
|
return result.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void intToEnglish(int i, StringBuffer result) {
|
||||||
|
if (i == 0) {
|
||||||
|
result.append("zero");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (i < 0) {
|
||||||
|
result.append("minus ");
|
||||||
|
i = -i;
|
||||||
|
}
|
||||||
|
if (i >= 1000000000) { // billions
|
||||||
|
intToEnglish(i/1000000000, result);
|
||||||
|
result.append("billion, ");
|
||||||
|
i = i%1000000000;
|
||||||
|
}
|
||||||
|
if (i >= 1000000) { // millions
|
||||||
|
intToEnglish(i/1000000, result);
|
||||||
|
result.append("million, ");
|
||||||
|
i = i%1000000;
|
||||||
|
}
|
||||||
|
if (i >= 1000) { // thousands
|
||||||
|
intToEnglish(i/1000, result);
|
||||||
|
result.append("thousand, ");
|
||||||
|
i = i%1000;
|
||||||
|
}
|
||||||
|
if (i >= 100) { // hundreds
|
||||||
|
intToEnglish(i/100, result);
|
||||||
|
result.append("hundred ");
|
||||||
|
i = i%100;
|
||||||
|
}
|
||||||
|
if (i >= 20) {
|
||||||
|
switch (i/10) {
|
||||||
|
case 9 : result.append("ninety"); break;
|
||||||
|
case 8 : result.append("eighty"); break;
|
||||||
|
case 7 : result.append("seventy"); break;
|
||||||
|
case 6 : result.append("sixty"); break;
|
||||||
|
case 5 : result.append("fifty"); break;
|
||||||
|
case 4 : result.append("forty"); break;
|
||||||
|
case 3 : result.append("thirty"); break;
|
||||||
|
case 2 : result.append("twenty"); break;
|
||||||
|
}
|
||||||
|
i = i%10;
|
||||||
|
if (i == 0)
|
||||||
|
result.append(" ");
|
||||||
|
else
|
||||||
|
result.append("-");
|
||||||
|
}
|
||||||
|
switch (i) {
|
||||||
|
case 19 : result.append("nineteen "); break;
|
||||||
|
case 18 : result.append("eighteen "); break;
|
||||||
|
case 17 : result.append("seventeen "); break;
|
||||||
|
case 16 : result.append("sixteen "); break;
|
||||||
|
case 15 : result.append("fifteen "); break;
|
||||||
|
case 14 : result.append("fourteen "); break;
|
||||||
|
case 13 : result.append("thirteen "); break;
|
||||||
|
case 12 : result.append("twelve "); break;
|
||||||
|
case 11 : result.append("eleven "); break;
|
||||||
|
case 10 : result.append("ten "); break;
|
||||||
|
case 9 : result.append("nine "); break;
|
||||||
|
case 8 : result.append("eight "); break;
|
||||||
|
case 7 : result.append("seven "); break;
|
||||||
|
case 6 : result.append("six "); break;
|
||||||
|
case 5 : result.append("five "); break;
|
||||||
|
case 4 : result.append("four "); break;
|
||||||
|
case 3 : result.append("three "); break;
|
||||||
|
case 2 : result.append("two "); break;
|
||||||
|
case 1 : result.append("one "); break;
|
||||||
|
case 0 : result.append(""); break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String[] args) {
|
||||||
|
System.out.println(intToEnglish(Integer.parseInt(args[0])));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue