mirror of https://github.com/apache/lucene.git
Implementation of compound files. This reduces the number of files
used by Lucene to 1 per index segment (2 when deleted documents exist). Test cases modified and added to go with this code. git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150067 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4e84ddc3f5
commit
e2559e4003
|
@ -0,0 +1,247 @@
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.InputStream;
|
||||||
|
import org.apache.lucene.store.OutputStream;
|
||||||
|
import org.apache.lucene.store.FSDirectory;
|
||||||
|
import org.apache.lucene.store.Lock;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
|
||||||
|
/** Class for accessing a compound stream.
|
||||||
|
* This class implements a directory, but is limited to only read operations.
|
||||||
|
* Directory methods that would normally modify data throw an exception.
|
||||||
|
*/
|
||||||
|
public class CompoundFileReader extends Directory {
|
||||||
|
|
||||||
|
private static final class FileEntry {
|
||||||
|
long offset;
|
||||||
|
long length;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// Base info
|
||||||
|
private Directory directory;
|
||||||
|
private String fileName;
|
||||||
|
|
||||||
|
// Reference count
|
||||||
|
private boolean open;
|
||||||
|
|
||||||
|
private InputStream stream;
|
||||||
|
private HashMap entries = new HashMap();
|
||||||
|
|
||||||
|
|
||||||
|
public CompoundFileReader(Directory dir, String name)
|
||||||
|
throws IOException
|
||||||
|
{
|
||||||
|
directory = dir;
|
||||||
|
fileName = name;
|
||||||
|
|
||||||
|
boolean success = false;
|
||||||
|
|
||||||
|
try {
|
||||||
|
stream = dir.openFile(name);
|
||||||
|
|
||||||
|
// read the directory and init files
|
||||||
|
int count = stream.readVInt();
|
||||||
|
FileEntry entry = null;
|
||||||
|
for (int i=0; i<count; i++) {
|
||||||
|
long offset = stream.readLong();
|
||||||
|
String id = stream.readString();
|
||||||
|
|
||||||
|
if (entry != null) {
|
||||||
|
// set length of the previous entry
|
||||||
|
entry.length = offset - entry.offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
entry = new FileEntry();
|
||||||
|
entry.offset = offset;
|
||||||
|
entries.put(id, entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
// set the length of the final entry
|
||||||
|
if (entry != null) {
|
||||||
|
entry.length = stream.length() - entry.offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
success = true;
|
||||||
|
|
||||||
|
} finally {
|
||||||
|
if (! success) {
|
||||||
|
try {
|
||||||
|
stream.close();
|
||||||
|
} catch (IOException e) { }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public Directory getDirectory() {
|
||||||
|
return directory;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return fileName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized void close() throws IOException {
|
||||||
|
if (stream == null)
|
||||||
|
throw new IOException("Already closed");
|
||||||
|
|
||||||
|
entries.clear();
|
||||||
|
stream.close();
|
||||||
|
stream = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public synchronized InputStream openFile(String id)
|
||||||
|
throws IOException
|
||||||
|
{
|
||||||
|
if (stream == null)
|
||||||
|
throw new IOException("Stream closed");
|
||||||
|
|
||||||
|
FileEntry entry = (FileEntry) entries.get(id);
|
||||||
|
if (entry == null)
|
||||||
|
throw new IOException("No sub-file with id " + id + " found");
|
||||||
|
|
||||||
|
return new CSInputStream(stream, entry.offset, entry.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Returns an array of strings, one for each file in the directory. */
|
||||||
|
public String[] list() {
|
||||||
|
String res[] = new String[entries.size()];
|
||||||
|
return (String[]) entries.keySet().toArray(res);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns true iff a file with the given name exists. */
|
||||||
|
public boolean fileExists(String name) {
|
||||||
|
return entries.containsKey(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the time the named file was last modified. */
|
||||||
|
public long fileModified(String name) throws IOException {
|
||||||
|
return directory.fileModified(fileName);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Set the modified time of an existing file to now. */
|
||||||
|
public void touchFile(String name) throws IOException {
|
||||||
|
directory.touchFile(fileName);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Removes an existing file in the directory. */
|
||||||
|
public void deleteFile(String name)
|
||||||
|
{
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Renames an existing file in the directory.
|
||||||
|
If a file already exists with the new name, then it is replaced.
|
||||||
|
This replacement should be atomic. */
|
||||||
|
public void renameFile(String from, String to)
|
||||||
|
{
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Returns the length of a file in the directory. */
|
||||||
|
public long fileLength(String name)
|
||||||
|
throws IOException
|
||||||
|
{
|
||||||
|
FileEntry e = (FileEntry) entries.get(name);
|
||||||
|
if (e == null)
|
||||||
|
throw new IOException("File " + name + " does not exist");
|
||||||
|
return e.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Creates a new, empty file in the directory with the given name.
|
||||||
|
Returns a stream writing this file. */
|
||||||
|
public OutputStream createFile(String name)
|
||||||
|
{
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Construct a {@link Lock}.
|
||||||
|
* @param name the name of the lock file
|
||||||
|
*/
|
||||||
|
public Lock makeLock(String name)
|
||||||
|
{
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Implementation of an InputStream that reads from a portion of the
|
||||||
|
* compound file. The visibility is left as "package" *only* because
|
||||||
|
* this helps with testing since JUnit test cases in a different class
|
||||||
|
* can then access package fields of this class.
|
||||||
|
*/
|
||||||
|
static final class CSInputStream extends InputStream {
|
||||||
|
|
||||||
|
InputStream base;
|
||||||
|
long fileOffset;
|
||||||
|
|
||||||
|
CSInputStream(final InputStream base,
|
||||||
|
final long fileOffset,
|
||||||
|
final long length)
|
||||||
|
throws IOException
|
||||||
|
{
|
||||||
|
this.base = (InputStream) base.clone();
|
||||||
|
this.fileOffset = fileOffset;
|
||||||
|
this.length = length; // variable in the superclass
|
||||||
|
seekInternal(0); // position to the adjusted 0th byte
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Expert: implements buffer refill. Reads bytes from the current
|
||||||
|
* position in the input.
|
||||||
|
* @param b the array to read bytes into
|
||||||
|
* @param offset the offset in the array to start storing bytes
|
||||||
|
* @param length the number of bytes to read
|
||||||
|
*/
|
||||||
|
protected void readInternal(byte[] b, int offset, int len)
|
||||||
|
throws IOException
|
||||||
|
{
|
||||||
|
base.readBytes(b, offset, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Expert: implements seek. Sets current position in this file, where
|
||||||
|
* the next {@link #readInternal(byte[],int,int)} will occur.
|
||||||
|
* @see #readInternal(byte[],int,int)
|
||||||
|
*/
|
||||||
|
protected void seekInternal(long pos) throws IOException
|
||||||
|
{
|
||||||
|
if (pos > 0 && pos >= length)
|
||||||
|
throw new IOException("Seek past the end of file");
|
||||||
|
|
||||||
|
if (pos < 0)
|
||||||
|
throw new IOException("Seek to a negative offset");
|
||||||
|
|
||||||
|
base.seek(fileOffset + pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Closes the stream to futher operations. */
|
||||||
|
public void close() throws IOException
|
||||||
|
{
|
||||||
|
base.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Returns a clone of this stream.
|
||||||
|
*
|
||||||
|
* <p>Clones of a stream access the same data, and are positioned at the same
|
||||||
|
* point as the stream they were cloned from.
|
||||||
|
*
|
||||||
|
* <p>Expert: Subclasses must ensure that clones may be positioned at
|
||||||
|
* different points in the input from each other and from the stream they
|
||||||
|
* were cloned from.
|
||||||
|
*/
|
||||||
|
public Object clone() {
|
||||||
|
CSInputStream other = (CSInputStream) super.clone();
|
||||||
|
other.base = (InputStream) base.clone();
|
||||||
|
return other;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,210 @@
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.OutputStream;
|
||||||
|
import org.apache.lucene.store.InputStream;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
|
||||||
|
/** Combines multiple files into a single compound file.
|
||||||
|
* The file format:<br>
|
||||||
|
* <ul>
|
||||||
|
* <li>VInt fileCount</li>
|
||||||
|
* <li>{Directory}
|
||||||
|
* fileCount entries with the following structure:</li>
|
||||||
|
* <ul>
|
||||||
|
* <li>long dataOffset</li>
|
||||||
|
* <li>UTFString extension</li>
|
||||||
|
* </ul>
|
||||||
|
* <li>{File Data}
|
||||||
|
* fileCount entries with the raw data of the corresponding file</li>
|
||||||
|
* </ul>
|
||||||
|
*
|
||||||
|
* The fileCount integer indicates how many files are contained in this compound
|
||||||
|
* file. The {directory} that follows has that many entries. Each directory entry
|
||||||
|
* contains an encoding identifier, an long pointer to the start of this file's
|
||||||
|
* data section, and a UTF String with that file's extension.
|
||||||
|
*/
|
||||||
|
final class CompoundFileWriter {
|
||||||
|
|
||||||
|
private static final class FileEntry {
|
||||||
|
/** source file */
|
||||||
|
String file;
|
||||||
|
|
||||||
|
/** temporary holder for the start of directory entry for this file */
|
||||||
|
long directoryOffset;
|
||||||
|
|
||||||
|
/** temporary holder for the start of this file's data section */
|
||||||
|
long dataOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private Directory directory;
|
||||||
|
private String fileName;
|
||||||
|
private HashSet ids;
|
||||||
|
private LinkedList entries;
|
||||||
|
private boolean merged = false;
|
||||||
|
|
||||||
|
|
||||||
|
/** Create the compound stream in the specified file. The file name is the
|
||||||
|
* entire name (no extensions are added).
|
||||||
|
*/
|
||||||
|
public CompoundFileWriter(Directory dir, String name) {
|
||||||
|
if (dir == null)
|
||||||
|
throw new IllegalArgumentException("Missing directory");
|
||||||
|
if (name == null)
|
||||||
|
throw new IllegalArgumentException("Missing name");
|
||||||
|
|
||||||
|
directory = dir;
|
||||||
|
fileName = name;
|
||||||
|
ids = new HashSet();
|
||||||
|
entries = new LinkedList();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the directory of the compound file. */
|
||||||
|
public Directory getDirectory() {
|
||||||
|
return directory;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the name of the compound file. */
|
||||||
|
public String getName() {
|
||||||
|
return fileName;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Add a source stream. If sourceDir is null, it is set to the
|
||||||
|
* same value as the directory where this compound stream exists.
|
||||||
|
* The id is the string by which the sub-stream will be know in the
|
||||||
|
* compound stream. The caller must ensure that the ID is unique. If the
|
||||||
|
* id is null, it is set to the name of the source file.
|
||||||
|
*/
|
||||||
|
public void addFile(String file) {
|
||||||
|
if (merged)
|
||||||
|
throw new IllegalStateException(
|
||||||
|
"Can't add extensions after merge has been called");
|
||||||
|
|
||||||
|
if (file == null)
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
"Missing source file");
|
||||||
|
|
||||||
|
if (! ids.add(file))
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
"File " + file + " already added");
|
||||||
|
|
||||||
|
FileEntry entry = new FileEntry();
|
||||||
|
entry.file = file;
|
||||||
|
entries.add(entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Merge files with the extensions added up to now.
|
||||||
|
* All files with these extensions are combined sequentially into the
|
||||||
|
* compound stream. After successful merge, the source files
|
||||||
|
* are deleted.
|
||||||
|
*/
|
||||||
|
public void close() throws IOException {
|
||||||
|
if (merged)
|
||||||
|
throw new IllegalStateException(
|
||||||
|
"Merge already performed");
|
||||||
|
|
||||||
|
if (entries.isEmpty())
|
||||||
|
throw new IllegalStateException(
|
||||||
|
"No entries to merge have been defined");
|
||||||
|
|
||||||
|
merged = true;
|
||||||
|
|
||||||
|
// open the compound stream
|
||||||
|
OutputStream os = null;
|
||||||
|
try {
|
||||||
|
os = directory.createFile(fileName);
|
||||||
|
|
||||||
|
// Write the number of entries
|
||||||
|
os.writeVInt(entries.size());
|
||||||
|
|
||||||
|
// Write the directory with all offsets at 0.
|
||||||
|
// Remember the positions of directory entries so that we can
|
||||||
|
// adjust the offsets later
|
||||||
|
Iterator it = entries.iterator();
|
||||||
|
while(it.hasNext()) {
|
||||||
|
FileEntry fe = (FileEntry) it.next();
|
||||||
|
fe.directoryOffset = os.getFilePointer();
|
||||||
|
os.writeLong(0); // for now
|
||||||
|
os.writeString(fe.file);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Open the files and copy their data into the stream.
|
||||||
|
// Remeber the locations of each file's data section.
|
||||||
|
byte buffer[] = new byte[1024];
|
||||||
|
it = entries.iterator();
|
||||||
|
while(it.hasNext()) {
|
||||||
|
FileEntry fe = (FileEntry) it.next();
|
||||||
|
fe.dataOffset = os.getFilePointer();
|
||||||
|
copyFile(fe, os, buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write the data offsets into the directory of the compound stream
|
||||||
|
it = entries.iterator();
|
||||||
|
while(it.hasNext()) {
|
||||||
|
FileEntry fe = (FileEntry) it.next();
|
||||||
|
os.seek(fe.directoryOffset);
|
||||||
|
os.writeLong(fe.dataOffset);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close the output stream. Set the os to null before trying to
|
||||||
|
// close so that if an exception occurs during the close, the
|
||||||
|
// finally clause below will not attempt to close the stream
|
||||||
|
// the second time.
|
||||||
|
OutputStream tmp = os;
|
||||||
|
os = null;
|
||||||
|
tmp.close();
|
||||||
|
|
||||||
|
} finally {
|
||||||
|
if (os != null) try { os.close(); } catch (IOException e) { }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Copy the contents of the file with specified extension into the
|
||||||
|
* provided output stream. Use the provided buffer for moving data
|
||||||
|
* to reduce memory allocation.
|
||||||
|
*/
|
||||||
|
private void copyFile(FileEntry source, OutputStream os, byte buffer[])
|
||||||
|
throws IOException
|
||||||
|
{
|
||||||
|
InputStream is = null;
|
||||||
|
try {
|
||||||
|
long startPtr = os.getFilePointer();
|
||||||
|
|
||||||
|
is = directory.openFile(source.file);
|
||||||
|
long length = is.length();
|
||||||
|
long remainder = length;
|
||||||
|
int chunk = buffer.length;
|
||||||
|
|
||||||
|
while(remainder > 0) {
|
||||||
|
int len = (int) Math.min(chunk, remainder);
|
||||||
|
is.readBytes(buffer, 0, len);
|
||||||
|
os.writeBytes(buffer, len);
|
||||||
|
remainder -= len;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify that remainder is 0
|
||||||
|
if (remainder != 0)
|
||||||
|
throw new IOException(
|
||||||
|
"Non-zero remainder length after copying: " + remainder
|
||||||
|
+ " (id: " + source.file + ", length: " + length
|
||||||
|
+ ", buffer size: " + chunk + ")");
|
||||||
|
|
||||||
|
// Verify that the output length diff is equal to original file
|
||||||
|
long endPtr = os.getFilePointer();
|
||||||
|
long diff = endPtr - startPtr;
|
||||||
|
if (diff != length)
|
||||||
|
throw new IOException(
|
||||||
|
"Difference in the output file offsets " + diff
|
||||||
|
+ " does not match the original file length " + length);
|
||||||
|
|
||||||
|
} finally {
|
||||||
|
if (is != null) is.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -100,6 +100,29 @@ public class IndexWriter {
|
||||||
|
|
||||||
private Lock writeLock;
|
private Lock writeLock;
|
||||||
|
|
||||||
|
/** Use compound file setting. Defaults to false to maintain multiple files
|
||||||
|
* per segment behavior.
|
||||||
|
*/
|
||||||
|
private boolean useCompoundFile = false;
|
||||||
|
|
||||||
|
|
||||||
|
/** Setting to turn on usage of a compound file. When on, multiple files
|
||||||
|
* for each segment are merged into a single file once the segment creation
|
||||||
|
* is finished. This is done regardless of what directory is in use.
|
||||||
|
*/
|
||||||
|
public boolean getUseCompoundFile() {
|
||||||
|
return useCompoundFile;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Setting to turn on usage of a compound file. When on, multiple files
|
||||||
|
* for each segment are merged into a single file once the segment creation
|
||||||
|
* is finished. This is done regardless of what directory is in use.
|
||||||
|
*/
|
||||||
|
public void setUseCompoundFile(boolean value) {
|
||||||
|
useCompoundFile = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/** Expert: Set the Similarity implementation used by this IndexWriter.
|
/** Expert: Set the Similarity implementation used by this IndexWriter.
|
||||||
*
|
*
|
||||||
* @see Similarity#setDefault(Similarity)
|
* @see Similarity#setDefault(Similarity)
|
||||||
|
@ -150,14 +173,14 @@ public class IndexWriter {
|
||||||
|
|
||||||
synchronized (directory) { // in- & inter-process sync
|
synchronized (directory) { // in- & inter-process sync
|
||||||
new Lock.With(directory.makeLock("commit.lock"), COMMIT_LOCK_TIMEOUT) {
|
new Lock.With(directory.makeLock("commit.lock"), COMMIT_LOCK_TIMEOUT) {
|
||||||
public Object doBody() throws IOException {
|
public Object doBody() throws IOException {
|
||||||
if (create)
|
if (create)
|
||||||
segmentInfos.write(directory);
|
segmentInfos.write(directory);
|
||||||
else
|
else
|
||||||
segmentInfos.read(directory);
|
segmentInfos.read(directory);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}.run();
|
}.run();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -266,12 +289,14 @@ public class IndexWriter {
|
||||||
public synchronized void optimize() throws IOException {
|
public synchronized void optimize() throws IOException {
|
||||||
flushRamSegments();
|
flushRamSegments();
|
||||||
while (segmentInfos.size() > 1 ||
|
while (segmentInfos.size() > 1 ||
|
||||||
(segmentInfos.size() == 1 &&
|
(segmentInfos.size() == 1 &&
|
||||||
(SegmentReader.hasDeletions(segmentInfos.info(0)) ||
|
(SegmentReader.hasDeletions(segmentInfos.info(0)) ||
|
||||||
segmentInfos.info(0).dir != directory))) {
|
(useCompoundFile &&
|
||||||
|
!SegmentReader.usesCompoundFile(segmentInfos.info(0))) ||
|
||||||
|
segmentInfos.info(0).dir != directory))) {
|
||||||
int minSegment = segmentInfos.size() - mergeFactor;
|
int minSegment = segmentInfos.size() - mergeFactor;
|
||||||
mergeSegments(minSegment < 0 ? 0 : minSegment);
|
mergeSegments(minSegment < 0 ? 0 : minSegment);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Merges all segments from an array of indexes into this index.
|
/** Merges all segments from an array of indexes into this index.
|
||||||
|
@ -290,7 +315,7 @@ public class IndexWriter {
|
||||||
SegmentInfos sis = new SegmentInfos(); // read infos from dir
|
SegmentInfos sis = new SegmentInfos(); // read infos from dir
|
||||||
sis.read(dirs[i]);
|
sis.read(dirs[i]);
|
||||||
for (int j = 0; j < sis.size(); j++) {
|
for (int j = 0; j < sis.size(); j++) {
|
||||||
segmentInfos.addElement(sis.info(j)); // add each info
|
segmentInfos.addElement(sis.info(j)); // add each info
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
optimize(); // final cleanup
|
optimize(); // final cleanup
|
||||||
|
@ -301,13 +326,13 @@ public class IndexWriter {
|
||||||
int minSegment = segmentInfos.size()-1;
|
int minSegment = segmentInfos.size()-1;
|
||||||
int docCount = 0;
|
int docCount = 0;
|
||||||
while (minSegment >= 0 &&
|
while (minSegment >= 0 &&
|
||||||
(segmentInfos.info(minSegment)).dir == ramDirectory) {
|
(segmentInfos.info(minSegment)).dir == ramDirectory) {
|
||||||
docCount += segmentInfos.info(minSegment).docCount;
|
docCount += segmentInfos.info(minSegment).docCount;
|
||||||
minSegment--;
|
minSegment--;
|
||||||
}
|
}
|
||||||
if (minSegment < 0 || // add one FS segment?
|
if (minSegment < 0 || // add one FS segment?
|
||||||
(docCount + segmentInfos.info(minSegment).docCount) > mergeFactor ||
|
(docCount + segmentInfos.info(minSegment).docCount) > mergeFactor ||
|
||||||
!(segmentInfos.info(segmentInfos.size()-1).dir == ramDirectory))
|
!(segmentInfos.info(segmentInfos.size()-1).dir == ramDirectory))
|
||||||
minSegment++;
|
minSegment++;
|
||||||
if (minSegment >= segmentInfos.size())
|
if (minSegment >= segmentInfos.size())
|
||||||
return; // none to merge
|
return; // none to merge
|
||||||
|
@ -322,16 +347,16 @@ public class IndexWriter {
|
||||||
int minSegment = segmentInfos.size();
|
int minSegment = segmentInfos.size();
|
||||||
int mergeDocs = 0;
|
int mergeDocs = 0;
|
||||||
while (--minSegment >= 0) {
|
while (--minSegment >= 0) {
|
||||||
SegmentInfo si = segmentInfos.info(minSegment);
|
SegmentInfo si = segmentInfos.info(minSegment);
|
||||||
if (si.docCount >= targetMergeDocs)
|
if (si.docCount >= targetMergeDocs)
|
||||||
break;
|
break;
|
||||||
mergeDocs += si.docCount;
|
mergeDocs += si.docCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mergeDocs >= targetMergeDocs) // found a merge to do
|
if (mergeDocs >= targetMergeDocs) // found a merge to do
|
||||||
mergeSegments(minSegment+1);
|
mergeSegments(minSegment+1);
|
||||||
else
|
else
|
||||||
break;
|
break;
|
||||||
|
|
||||||
targetMergeDocs *= mergeFactor; // increase target size
|
targetMergeDocs *= mergeFactor; // increase target size
|
||||||
}
|
}
|
||||||
|
@ -344,17 +369,19 @@ public class IndexWriter {
|
||||||
String mergedName = newSegmentName();
|
String mergedName = newSegmentName();
|
||||||
int mergedDocCount = 0;
|
int mergedDocCount = 0;
|
||||||
if (infoStream != null) infoStream.print("merging segments");
|
if (infoStream != null) infoStream.print("merging segments");
|
||||||
SegmentMerger merger = new SegmentMerger(directory, mergedName);
|
SegmentMerger merger =
|
||||||
|
new SegmentMerger(directory, mergedName, useCompoundFile);
|
||||||
|
|
||||||
final Vector segmentsToDelete = new Vector();
|
final Vector segmentsToDelete = new Vector();
|
||||||
for (int i = minSegment; i < segmentInfos.size(); i++) {
|
for (int i = minSegment; i < segmentInfos.size(); i++) {
|
||||||
SegmentInfo si = segmentInfos.info(i);
|
SegmentInfo si = segmentInfos.info(i);
|
||||||
if (infoStream != null)
|
if (infoStream != null)
|
||||||
infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
|
infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
|
||||||
SegmentReader reader = new SegmentReader(si);
|
SegmentReader reader = new SegmentReader(si);
|
||||||
merger.add(reader);
|
merger.add(reader);
|
||||||
if ((reader.directory == this.directory) || // if we own the directory
|
if ((reader.directory == this.directory) || // if we own the directory
|
||||||
(reader.directory == this.ramDirectory))
|
(reader.directory == this.ramDirectory))
|
||||||
segmentsToDelete.addElement(reader); // queue segment for deletion
|
segmentsToDelete.addElement(reader); // queue segment for deletion
|
||||||
mergedDocCount += reader.numDocs();
|
mergedDocCount += reader.numDocs();
|
||||||
}
|
}
|
||||||
if (infoStream != null) {
|
if (infoStream != null) {
|
||||||
|
@ -362,19 +389,19 @@ public class IndexWriter {
|
||||||
infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)");
|
infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)");
|
||||||
}
|
}
|
||||||
merger.merge();
|
merger.merge();
|
||||||
|
|
||||||
segmentInfos.setSize(minSegment); // pop old infos & add new
|
segmentInfos.setSize(minSegment); // pop old infos & add new
|
||||||
segmentInfos.addElement(new SegmentInfo(mergedName, mergedDocCount,
|
segmentInfos.addElement(new SegmentInfo(mergedName, mergedDocCount,
|
||||||
directory));
|
directory));
|
||||||
|
|
||||||
synchronized (directory) { // in- & inter-process sync
|
synchronized (directory) { // in- & inter-process sync
|
||||||
new Lock.With(directory.makeLock("commit.lock"), COMMIT_LOCK_TIMEOUT) {
|
new Lock.With(directory.makeLock("commit.lock"), COMMIT_LOCK_TIMEOUT) {
|
||||||
public Object doBody() throws IOException {
|
public Object doBody() throws IOException {
|
||||||
segmentInfos.write(directory); // commit before deleting
|
segmentInfos.write(directory); // commit before deleting
|
||||||
deleteSegments(segmentsToDelete); // delete now-unused segments
|
deleteSegments(segmentsToDelete); // delete now-unused segments
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}.run();
|
}.run();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -391,9 +418,9 @@ public class IndexWriter {
|
||||||
for (int i = 0; i < segments.size(); i++) {
|
for (int i = 0; i < segments.size(); i++) {
|
||||||
SegmentReader reader = (SegmentReader)segments.elementAt(i);
|
SegmentReader reader = (SegmentReader)segments.elementAt(i);
|
||||||
if (reader.directory == this.directory)
|
if (reader.directory == this.directory)
|
||||||
deleteFiles(reader.files(), deletable); // try to delete our files
|
deleteFiles(reader.files(), deletable); // try to delete our files
|
||||||
else
|
else
|
||||||
deleteFiles(reader.files(), reader.directory); // delete, eg, RAM files
|
deleteFiles(reader.files(), reader.directory); // delete, eg, RAM files
|
||||||
}
|
}
|
||||||
|
|
||||||
writeDeleteableFiles(deletable); // note files we can't delete
|
writeDeleteableFiles(deletable); // note files we can't delete
|
||||||
|
@ -410,13 +437,13 @@ public class IndexWriter {
|
||||||
for (int i = 0; i < files.size(); i++) {
|
for (int i = 0; i < files.size(); i++) {
|
||||||
String file = (String)files.elementAt(i);
|
String file = (String)files.elementAt(i);
|
||||||
try {
|
try {
|
||||||
directory.deleteFile(file); // try to delete each file
|
directory.deleteFile(file); // try to delete each file
|
||||||
} catch (IOException e) { // if delete fails
|
} catch (IOException e) { // if delete fails
|
||||||
if (directory.fileExists(file)) {
|
if (directory.fileExists(file)) {
|
||||||
if (infoStream != null)
|
if (infoStream != null)
|
||||||
infoStream.println(e.getMessage() + "; Will re-try later.");
|
infoStream.println(e.getMessage() + "; Will re-try later.");
|
||||||
deletable.addElement(file); // add to deletable
|
deletable.addElement(file); // add to deletable
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -429,7 +456,7 @@ public class IndexWriter {
|
||||||
InputStream input = directory.openFile("deletable");
|
InputStream input = directory.openFile("deletable");
|
||||||
try {
|
try {
|
||||||
for (int i = input.readInt(); i > 0; i--) // read file names
|
for (int i = input.readInt(); i > 0; i--) // read file names
|
||||||
result.addElement(input.readString());
|
result.addElement(input.readString());
|
||||||
} finally {
|
} finally {
|
||||||
input.close();
|
input.close();
|
||||||
}
|
}
|
||||||
|
@ -441,7 +468,7 @@ public class IndexWriter {
|
||||||
try {
|
try {
|
||||||
output.writeInt(files.size());
|
output.writeInt(files.size());
|
||||||
for (int i = 0; i < files.size(); i++)
|
for (int i = 0; i < files.size(); i++)
|
||||||
output.writeString((String)files.elementAt(i));
|
output.writeString((String)files.elementAt(i));
|
||||||
} finally {
|
} finally {
|
||||||
output.close();
|
output.close();
|
||||||
}
|
}
|
||||||
|
|
|
@ -55,6 +55,8 @@ package org.apache.lucene.index;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.util.Vector;
|
import java.util.Vector;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Iterator;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
@ -63,15 +65,17 @@ import org.apache.lucene.store.InputStream;
|
||||||
import org.apache.lucene.util.BitVector;
|
import org.apache.lucene.util.BitVector;
|
||||||
|
|
||||||
final class SegmentMerger {
|
final class SegmentMerger {
|
||||||
|
private boolean useCompoundFile;
|
||||||
private Directory directory;
|
private Directory directory;
|
||||||
private String segment;
|
private String segment;
|
||||||
|
|
||||||
private Vector readers = new Vector();
|
private Vector readers = new Vector();
|
||||||
private FieldInfos fieldInfos;
|
private FieldInfos fieldInfos;
|
||||||
|
|
||||||
SegmentMerger(Directory dir, String name) {
|
SegmentMerger(Directory dir, String name, boolean compoundFile) {
|
||||||
directory = dir;
|
directory = dir;
|
||||||
segment = name;
|
segment = name;
|
||||||
|
useCompoundFile = compoundFile;
|
||||||
}
|
}
|
||||||
|
|
||||||
final void add(SegmentReader reader) {
|
final void add(SegmentReader reader) {
|
||||||
|
@ -90,12 +94,62 @@ final class SegmentMerger {
|
||||||
|
|
||||||
} finally {
|
} finally {
|
||||||
for (int i = 0; i < readers.size(); i++) { // close readers
|
for (int i = 0; i < readers.size(); i++) { // close readers
|
||||||
SegmentReader reader = (SegmentReader)readers.elementAt(i);
|
SegmentReader reader = (SegmentReader)readers.elementAt(i);
|
||||||
reader.close();
|
reader.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (useCompoundFile)
|
||||||
|
createCompoundFile();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Add the fixed files
|
||||||
|
private final String COMPOUND_EXTENSIONS[] = new String[] {
|
||||||
|
"fnm", "frq", "prx", "fdx", "fdt", "tii", "tis"
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
private final void createCompoundFile()
|
||||||
|
throws IOException
|
||||||
|
{
|
||||||
|
CompoundFileWriter oneWriter =
|
||||||
|
new CompoundFileWriter(directory, segment + ".cfs");
|
||||||
|
|
||||||
|
ArrayList files =
|
||||||
|
new ArrayList(COMPOUND_EXTENSIONS.length + fieldInfos.size());
|
||||||
|
|
||||||
|
// Basic files
|
||||||
|
for (int i=0; i<COMPOUND_EXTENSIONS.length; i++) {
|
||||||
|
files.add(segment + "." + COMPOUND_EXTENSIONS[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Field norm files
|
||||||
|
for (int i = 0; i < fieldInfos.size(); i++) {
|
||||||
|
FieldInfo fi = fieldInfos.fieldInfo(i);
|
||||||
|
if (fi.isIndexed) {
|
||||||
|
files.add(segment + ".f" + i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now merge all added files
|
||||||
|
Iterator it = files.iterator();
|
||||||
|
while(it.hasNext()) {
|
||||||
|
oneWriter.addFile((String) it.next());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Perform the merge
|
||||||
|
oneWriter.close();
|
||||||
|
|
||||||
|
|
||||||
|
// Now delete the source files
|
||||||
|
it = files.iterator();
|
||||||
|
while(it.hasNext()) {
|
||||||
|
directory.deleteFile((String) it.next());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private final void mergeFields() throws IOException {
|
private final void mergeFields() throws IOException {
|
||||||
fieldInfos = new FieldInfos(); // merge field names
|
fieldInfos = new FieldInfos(); // merge field names
|
||||||
for (int i = 0; i < readers.size(); i++) {
|
for (int i = 0; i < readers.size(); i++) {
|
||||||
|
@ -108,12 +162,12 @@ final class SegmentMerger {
|
||||||
new FieldsWriter(directory, segment, fieldInfos);
|
new FieldsWriter(directory, segment, fieldInfos);
|
||||||
try {
|
try {
|
||||||
for (int i = 0; i < readers.size(); i++) {
|
for (int i = 0; i < readers.size(); i++) {
|
||||||
SegmentReader reader = (SegmentReader)readers.elementAt(i);
|
SegmentReader reader = (SegmentReader)readers.elementAt(i);
|
||||||
BitVector deletedDocs = reader.deletedDocs;
|
BitVector deletedDocs = reader.deletedDocs;
|
||||||
int maxDoc = reader.maxDoc();
|
int maxDoc = reader.maxDoc();
|
||||||
for (int j = 0; j < maxDoc; j++)
|
for (int j = 0; j < maxDoc; j++)
|
||||||
if (deletedDocs == null || !deletedDocs.get(j)) // skip deleted docs
|
if (deletedDocs == null || !deletedDocs.get(j)) // skip deleted docs
|
||||||
fieldsWriter.addDocument(reader.document(j));
|
fieldsWriter.addDocument(reader.document(j));
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
fieldsWriter.close();
|
fieldsWriter.close();
|
||||||
|
@ -130,7 +184,7 @@ final class SegmentMerger {
|
||||||
freqOutput = directory.createFile(segment + ".frq");
|
freqOutput = directory.createFile(segment + ".frq");
|
||||||
proxOutput = directory.createFile(segment + ".prx");
|
proxOutput = directory.createFile(segment + ".prx");
|
||||||
termInfosWriter =
|
termInfosWriter =
|
||||||
new TermInfosWriter(directory, segment, fieldInfos);
|
new TermInfosWriter(directory, segment, fieldInfos);
|
||||||
|
|
||||||
mergeTermInfos();
|
mergeTermInfos();
|
||||||
|
|
||||||
|
@ -151,9 +205,9 @@ final class SegmentMerger {
|
||||||
SegmentMergeInfo smi = new SegmentMergeInfo(base, termEnum, reader);
|
SegmentMergeInfo smi = new SegmentMergeInfo(base, termEnum, reader);
|
||||||
base += reader.numDocs();
|
base += reader.numDocs();
|
||||||
if (smi.next())
|
if (smi.next())
|
||||||
queue.put(smi); // initialize queue
|
queue.put(smi); // initialize queue
|
||||||
else
|
else
|
||||||
smi.close();
|
smi.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
SegmentMergeInfo[] match = new SegmentMergeInfo[readers.size()];
|
SegmentMergeInfo[] match = new SegmentMergeInfo[readers.size()];
|
||||||
|
@ -165,18 +219,18 @@ final class SegmentMerger {
|
||||||
SegmentMergeInfo top = (SegmentMergeInfo)queue.top();
|
SegmentMergeInfo top = (SegmentMergeInfo)queue.top();
|
||||||
|
|
||||||
while (top != null && term.compareTo(top.term) == 0) {
|
while (top != null && term.compareTo(top.term) == 0) {
|
||||||
match[matchSize++] = (SegmentMergeInfo)queue.pop();
|
match[matchSize++] = (SegmentMergeInfo)queue.pop();
|
||||||
top = (SegmentMergeInfo)queue.top();
|
top = (SegmentMergeInfo)queue.top();
|
||||||
}
|
}
|
||||||
|
|
||||||
mergeTermInfo(match, matchSize); // add new TermInfo
|
mergeTermInfo(match, matchSize); // add new TermInfo
|
||||||
|
|
||||||
while (matchSize > 0) {
|
while (matchSize > 0) {
|
||||||
SegmentMergeInfo smi = match[--matchSize];
|
SegmentMergeInfo smi = match[--matchSize];
|
||||||
if (smi.next())
|
if (smi.next())
|
||||||
queue.put(smi); // restore queue
|
queue.put(smi); // restore queue
|
||||||
else
|
else
|
||||||
smi.close(); // done with a segment
|
smi.close(); // done with a segment
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -209,34 +263,34 @@ final class SegmentMerger {
|
||||||
smi.termEnum.termInfo(termInfo);
|
smi.termEnum.termInfo(termInfo);
|
||||||
postings.seek(termInfo);
|
postings.seek(termInfo);
|
||||||
while (postings.next()) {
|
while (postings.next()) {
|
||||||
int doc;
|
int doc;
|
||||||
if (docMap == null)
|
if (docMap == null)
|
||||||
doc = base + postings.doc; // no deletions
|
doc = base + postings.doc; // no deletions
|
||||||
else
|
else
|
||||||
doc = base + docMap[postings.doc]; // re-map around deletions
|
doc = base + docMap[postings.doc]; // re-map around deletions
|
||||||
|
|
||||||
if (doc < lastDoc)
|
if (doc < lastDoc)
|
||||||
throw new IllegalStateException("docs out of order");
|
throw new IllegalStateException("docs out of order");
|
||||||
|
|
||||||
int docCode = (doc - lastDoc) << 1; // use low bit to flag freq=1
|
int docCode = (doc - lastDoc) << 1; // use low bit to flag freq=1
|
||||||
lastDoc = doc;
|
lastDoc = doc;
|
||||||
|
|
||||||
int freq = postings.freq;
|
int freq = postings.freq;
|
||||||
if (freq == 1) {
|
if (freq == 1) {
|
||||||
freqOutput.writeVInt(docCode | 1); // write doc & freq=1
|
freqOutput.writeVInt(docCode | 1); // write doc & freq=1
|
||||||
} else {
|
} else {
|
||||||
freqOutput.writeVInt(docCode); // write doc
|
freqOutput.writeVInt(docCode); // write doc
|
||||||
freqOutput.writeVInt(freq); // write frequency in doc
|
freqOutput.writeVInt(freq); // write frequency in doc
|
||||||
}
|
}
|
||||||
|
|
||||||
int lastPosition = 0; // write position deltas
|
int lastPosition = 0; // write position deltas
|
||||||
for (int j = 0; j < freq; j++) {
|
for (int j = 0; j < freq; j++) {
|
||||||
int position = postings.nextPosition();
|
int position = postings.nextPosition();
|
||||||
proxOutput.writeVInt(position - lastPosition);
|
proxOutput.writeVInt(position - lastPosition);
|
||||||
lastPosition = position;
|
lastPosition = position;
|
||||||
}
|
}
|
||||||
|
|
||||||
df++;
|
df++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return df;
|
return df;
|
||||||
|
@ -246,27 +300,27 @@ final class SegmentMerger {
|
||||||
for (int i = 0; i < fieldInfos.size(); i++) {
|
for (int i = 0; i < fieldInfos.size(); i++) {
|
||||||
FieldInfo fi = fieldInfos.fieldInfo(i);
|
FieldInfo fi = fieldInfos.fieldInfo(i);
|
||||||
if (fi.isIndexed) {
|
if (fi.isIndexed) {
|
||||||
OutputStream output = directory.createFile(segment + ".f" + i);
|
OutputStream output = directory.createFile(segment + ".f" + i);
|
||||||
try {
|
try {
|
||||||
for (int j = 0; j < readers.size(); j++) {
|
for (int j = 0; j < readers.size(); j++) {
|
||||||
SegmentReader reader = (SegmentReader)readers.elementAt(j);
|
SegmentReader reader = (SegmentReader)readers.elementAt(j);
|
||||||
BitVector deletedDocs = reader.deletedDocs;
|
BitVector deletedDocs = reader.deletedDocs;
|
||||||
InputStream input = reader.normStream(fi.name);
|
InputStream input = reader.normStream(fi.name);
|
||||||
int maxDoc = reader.maxDoc();
|
int maxDoc = reader.maxDoc();
|
||||||
try {
|
try {
|
||||||
for (int k = 0; k < maxDoc; k++) {
|
for (int k = 0; k < maxDoc; k++) {
|
||||||
byte norm = input != null ? input.readByte() : (byte)0;
|
byte norm = input != null ? input.readByte() : (byte)0;
|
||||||
if (deletedDocs == null || !deletedDocs.get(k))
|
if (deletedDocs == null || !deletedDocs.get(k))
|
||||||
output.writeByte(norm);
|
output.writeByte(norm);
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
if (input != null)
|
if (input != null)
|
||||||
input.close();
|
input.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
output.close();
|
output.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -65,6 +65,7 @@ import java.util.Vector;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.store.InputStream;
|
import org.apache.lucene.store.InputStream;
|
||||||
import org.apache.lucene.store.Lock;
|
import org.apache.lucene.store.Lock;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.BitVector;
|
import org.apache.lucene.util.BitVector;
|
||||||
|
|
||||||
final class SegmentReader extends IndexReader {
|
final class SegmentReader extends IndexReader {
|
||||||
|
@ -81,7 +82,9 @@ final class SegmentReader extends IndexReader {
|
||||||
|
|
||||||
InputStream freqStream;
|
InputStream freqStream;
|
||||||
InputStream proxStream;
|
InputStream proxStream;
|
||||||
|
|
||||||
|
// Compound File Reader when based on a compound file segment
|
||||||
|
CompoundFileReader cfsReader;
|
||||||
|
|
||||||
private static class Norm {
|
private static class Norm {
|
||||||
public Norm(InputStream in) { this.in = in; }
|
public Norm(InputStream in) { this.in = in; }
|
||||||
|
@ -101,32 +104,42 @@ final class SegmentReader extends IndexReader {
|
||||||
super(si.dir);
|
super(si.dir);
|
||||||
segment = si.name;
|
segment = si.name;
|
||||||
|
|
||||||
fieldInfos = new FieldInfos(directory, segment + ".fnm");
|
// Use compound file directory for some files, if it exists
|
||||||
fieldsReader = new FieldsReader(directory, segment, fieldInfos);
|
Directory cfsDir = directory;
|
||||||
|
if (directory.fileExists(segment + ".cfs")) {
|
||||||
|
cfsReader = new CompoundFileReader(directory, segment + ".cfs");
|
||||||
|
cfsDir = cfsReader;
|
||||||
|
}
|
||||||
|
|
||||||
tis = new TermInfosReader(directory, segment, fieldInfos);
|
// No compound file exists - use the multi-file format
|
||||||
|
fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
|
||||||
|
fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos);
|
||||||
|
|
||||||
|
tis = new TermInfosReader(cfsDir, segment, fieldInfos);
|
||||||
|
|
||||||
|
// NOTE: the bitvector is stored using the regular directory, not cfs
|
||||||
if (hasDeletions(si))
|
if (hasDeletions(si))
|
||||||
deletedDocs = new BitVector(directory, segment + ".del");
|
deletedDocs = new BitVector(directory, segment + ".del");
|
||||||
|
|
||||||
// make sure that all index files have been read or are kept open
|
// make sure that all index files have been read or are kept open
|
||||||
// so that if an index update removes them we'll still have them
|
// so that if an index update removes them we'll still have them
|
||||||
freqStream = directory.openFile(segment + ".frq");
|
freqStream = cfsDir.openFile(segment + ".frq");
|
||||||
proxStream = directory.openFile(segment + ".prx");
|
proxStream = cfsDir.openFile(segment + ".prx");
|
||||||
openNorms();
|
openNorms(cfsDir);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
final synchronized void doClose() throws IOException {
|
final synchronized void doClose() throws IOException {
|
||||||
if (deletedDocsDirty) {
|
if (deletedDocsDirty) {
|
||||||
synchronized (directory) { // in- & inter-process sync
|
synchronized (directory) { // in- & inter-process sync
|
||||||
new Lock.With(directory.makeLock("commit.lock"), IndexWriter.COMMIT_LOCK_TIMEOUT) {
|
new Lock.With(directory.makeLock("commit.lock"), IndexWriter.COMMIT_LOCK_TIMEOUT) {
|
||||||
public Object doBody() throws IOException {
|
public Object doBody() throws IOException {
|
||||||
deletedDocs.write(directory, segment + ".tmp");
|
deletedDocs.write(directory, segment + ".tmp");
|
||||||
directory.renameFile(segment + ".tmp", segment + ".del");
|
directory.renameFile(segment + ".tmp", segment + ".del");
|
||||||
directory.touchFile("segments");
|
directory.touchFile("segments");
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}.run();
|
}.run();
|
||||||
}
|
}
|
||||||
deletedDocsDirty = false;
|
deletedDocsDirty = false;
|
||||||
}
|
}
|
||||||
|
@ -140,6 +153,9 @@ final class SegmentReader extends IndexReader {
|
||||||
proxStream.close();
|
proxStream.close();
|
||||||
|
|
||||||
closeNorms();
|
closeNorms();
|
||||||
|
|
||||||
|
if (cfsReader != null)
|
||||||
|
cfsReader.close();
|
||||||
|
|
||||||
if (closeDirectory)
|
if (closeDirectory)
|
||||||
directory.close();
|
directory.close();
|
||||||
|
@ -149,6 +165,10 @@ final class SegmentReader extends IndexReader {
|
||||||
return si.dir.fileExists(si.name + ".del");
|
return si.dir.fileExists(si.name + ".del");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static final boolean usesCompoundFile(SegmentInfo si) throws IOException {
|
||||||
|
return si.dir.fileExists(si.name + ".cfs");
|
||||||
|
}
|
||||||
|
|
||||||
final synchronized void doDelete(int docNum) throws IOException {
|
final synchronized void doDelete(int docNum) throws IOException {
|
||||||
if (deletedDocs == null)
|
if (deletedDocs == null)
|
||||||
deletedDocs = new BitVector(maxDoc());
|
deletedDocs = new BitVector(maxDoc());
|
||||||
|
@ -158,21 +178,20 @@ final class SegmentReader extends IndexReader {
|
||||||
|
|
||||||
final Vector files() throws IOException {
|
final Vector files() throws IOException {
|
||||||
Vector files = new Vector(16);
|
Vector files = new Vector(16);
|
||||||
files.addElement(segment + ".fnm");
|
final String ext[] = new String[] {
|
||||||
files.addElement(segment + ".fdx");
|
"cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del"
|
||||||
files.addElement(segment + ".fdt");
|
};
|
||||||
files.addElement(segment + ".tii");
|
|
||||||
files.addElement(segment + ".tis");
|
for (int i=0; i<ext.length; i++) {
|
||||||
files.addElement(segment + ".frq");
|
String name = segment + "." + ext[i];
|
||||||
files.addElement(segment + ".prx");
|
if (directory.fileExists(name))
|
||||||
|
files.addElement(name);
|
||||||
if (directory.fileExists(segment + ".del"))
|
}
|
||||||
files.addElement(segment + ".del");
|
|
||||||
|
|
||||||
for (int i = 0; i < fieldInfos.size(); i++) {
|
for (int i = 0; i < fieldInfos.size(); i++) {
|
||||||
FieldInfo fi = fieldInfos.fieldInfo(i);
|
FieldInfo fi = fieldInfos.fieldInfo(i);
|
||||||
if (fi.isIndexed)
|
if (fi.isIndexed)
|
||||||
files.addElement(segment + ".f" + i);
|
files.addElement(segment + ".f" + i);
|
||||||
}
|
}
|
||||||
return files;
|
return files;
|
||||||
}
|
}
|
||||||
|
@ -188,7 +207,7 @@ final class SegmentReader extends IndexReader {
|
||||||
public final synchronized Document document(int n) throws IOException {
|
public final synchronized Document document(int n) throws IOException {
|
||||||
if (isDeleted(n))
|
if (isDeleted(n))
|
||||||
throw new IllegalArgumentException
|
throw new IllegalArgumentException
|
||||||
("attempt to access a deleted document");
|
("attempt to access a deleted document");
|
||||||
return fieldsReader.doc(n);
|
return fieldsReader.doc(n);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -282,12 +301,12 @@ final class SegmentReader extends IndexReader {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
private final void openNorms() throws IOException {
|
private final void openNorms(Directory useDir) throws IOException {
|
||||||
for (int i = 0; i < fieldInfos.size(); i++) {
|
for (int i = 0; i < fieldInfos.size(); i++) {
|
||||||
FieldInfo fi = fieldInfos.fieldInfo(i);
|
FieldInfo fi = fieldInfos.fieldInfo(i);
|
||||||
if (fi.isIndexed)
|
if (fi.isIndexed)
|
||||||
norms.put(fi.name,
|
norms.put(fi.name,
|
||||||
new Norm(directory.openFile(segment + ".f" + fi.number)));
|
new Norm(useDir.openFile(segment + ".f" + fi.number)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -295,8 +314,8 @@ final class SegmentReader extends IndexReader {
|
||||||
synchronized (norms) {
|
synchronized (norms) {
|
||||||
Enumeration enumerator = norms.elements();
|
Enumeration enumerator = norms.elements();
|
||||||
while (enumerator.hasMoreElements()) {
|
while (enumerator.hasMoreElements()) {
|
||||||
Norm norm = (Norm)enumerator.nextElement();
|
Norm norm = (Norm)enumerator.nextElement();
|
||||||
norm.in.close();
|
norm.in.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,178 @@
|
||||||
|
package org.apache.lucene;
|
||||||
|
|
||||||
|
/* ====================================================================
|
||||||
|
* The Apache Software License, Version 1.1
|
||||||
|
*
|
||||||
|
* Copyright (c) 2001 The Apache Software Foundation. All rights
|
||||||
|
* reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in
|
||||||
|
* the documentation and/or other materials provided with the
|
||||||
|
* distribution.
|
||||||
|
*
|
||||||
|
* 3. The end-user documentation included with the redistribution,
|
||||||
|
* if any, must include the following acknowledgment:
|
||||||
|
* "This product includes software developed by the
|
||||||
|
* Apache Software Foundation (http://www.apache.org/)."
|
||||||
|
* Alternately, this acknowledgment may appear in the software itself,
|
||||||
|
* if and wherever such third-party acknowledgments normally appear.
|
||||||
|
*
|
||||||
|
* 4. The names "Apache" and "Apache Software Foundation" and
|
||||||
|
* "Apache Lucene" must not be used to endorse or promote products
|
||||||
|
* derived from this software without prior written permission. For
|
||||||
|
* written permission, please contact apache@apache.org.
|
||||||
|
*
|
||||||
|
* 5. Products derived from this software may not be called "Apache",
|
||||||
|
* "Apache Lucene", nor may "Apache" appear in their name, without
|
||||||
|
* prior written permission of the Apache Software Foundation.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
||||||
|
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||||
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
||||||
|
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||||
|
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||||
|
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
* ====================================================================
|
||||||
|
*
|
||||||
|
* This software consists of voluntary contributions made by many
|
||||||
|
* individuals on behalf of the Apache Software Foundation. For more
|
||||||
|
* information on the Apache Software Foundation, please see
|
||||||
|
* <http://www.apache.org/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.util.GregorianCalendar;
|
||||||
|
import java.io.PrintWriter;
|
||||||
|
import java.io.StringWriter;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
import junit.framework.TestSuite;
|
||||||
|
import junit.textui.TestRunner;
|
||||||
|
|
||||||
|
import org.apache.lucene.store.*;
|
||||||
|
import org.apache.lucene.document.*;
|
||||||
|
import org.apache.lucene.analysis.*;
|
||||||
|
import org.apache.lucene.index.*;
|
||||||
|
import org.apache.lucene.search.*;
|
||||||
|
import org.apache.lucene.queryParser.*;
|
||||||
|
|
||||||
|
/** JUnit adaptation of an older test case SearchTest.
|
||||||
|
* @author dmitrys@earthlink.net
|
||||||
|
* @version $Id$
|
||||||
|
*/
|
||||||
|
public class TestSearch extends TestCase {
|
||||||
|
|
||||||
|
/** Main for running test case by itself. */
|
||||||
|
public static void main(String args[]) {
|
||||||
|
TestRunner.run (new TestSuite(TestSearch.class));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** This test performs a number of searches. It also compares output
|
||||||
|
* of searches using multi-file index segments with single-file
|
||||||
|
* index segments.
|
||||||
|
*
|
||||||
|
* TODO: someone should check that the results of the searches are
|
||||||
|
* still correct by adding assert statements. Right now, the test
|
||||||
|
* passes if the results are the same between multi-file and
|
||||||
|
* single-file formats, even if the results are wrong.
|
||||||
|
*/
|
||||||
|
public void testSearch() throws Exception {
|
||||||
|
StringWriter sw = new StringWriter();
|
||||||
|
PrintWriter pw = new PrintWriter(sw, true);
|
||||||
|
doTestSearch(pw, false);
|
||||||
|
pw.close();
|
||||||
|
sw.close();
|
||||||
|
String multiFileOutput = sw.getBuffer().toString();
|
||||||
|
System.out.println(multiFileOutput);
|
||||||
|
|
||||||
|
sw = new StringWriter();
|
||||||
|
pw = new PrintWriter(sw, true);
|
||||||
|
doTestSearch(pw, true);
|
||||||
|
pw.close();
|
||||||
|
sw.close();
|
||||||
|
String singleFileOutput = sw.getBuffer().toString();
|
||||||
|
|
||||||
|
assertEquals(multiFileOutput, singleFileOutput);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void doTestSearch(PrintWriter out, boolean useCompoundFile)
|
||||||
|
throws Exception
|
||||||
|
{
|
||||||
|
Directory directory = new RAMDirectory();
|
||||||
|
Analyzer analyzer = new SimpleAnalyzer();
|
||||||
|
IndexWriter writer = new IndexWriter(directory, analyzer, true);
|
||||||
|
|
||||||
|
writer.setUseCompoundFile(useCompoundFile);
|
||||||
|
|
||||||
|
String[] docs = {
|
||||||
|
"a b c d e",
|
||||||
|
"a b c d e a b c d e",
|
||||||
|
"a b c d e f g h i j",
|
||||||
|
"a c e",
|
||||||
|
"e c a",
|
||||||
|
"a c e a c e",
|
||||||
|
"a c e a b c"
|
||||||
|
};
|
||||||
|
for (int j = 0; j < docs.length; j++) {
|
||||||
|
Document d = new Document();
|
||||||
|
d.add(Field.Text("contents", docs[j]));
|
||||||
|
writer.addDocument(d);
|
||||||
|
}
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
Searcher searcher = new IndexSearcher(directory);
|
||||||
|
|
||||||
|
String[] queries = {
|
||||||
|
"a b",
|
||||||
|
"\"a b\"",
|
||||||
|
"\"a b c\"",
|
||||||
|
"a c",
|
||||||
|
"\"a c\"",
|
||||||
|
"\"a c e\"",
|
||||||
|
};
|
||||||
|
Hits hits = null;
|
||||||
|
|
||||||
|
QueryParser parser = new QueryParser("contents", analyzer);
|
||||||
|
parser.setPhraseSlop(4);
|
||||||
|
for (int j = 0; j < queries.length; j++) {
|
||||||
|
Query query = parser.parse(queries[j]);
|
||||||
|
out.println("Query: " + query.toString("contents"));
|
||||||
|
|
||||||
|
//DateFilter filter =
|
||||||
|
// new DateFilter("modified", Time(1997,0,1), Time(1998,0,1));
|
||||||
|
//DateFilter filter = DateFilter.Before("modified", Time(1997,00,01));
|
||||||
|
//System.out.println(filter);
|
||||||
|
|
||||||
|
hits = searcher.search(query);
|
||||||
|
|
||||||
|
out.println(hits.length() + " total results");
|
||||||
|
for (int i = 0 ; i < hits.length() && i < 10; i++) {
|
||||||
|
Document d = hits.doc(i);
|
||||||
|
out.println(i + " " + hits.score(i)
|
||||||
|
// + " " + DateField.stringToDate(d.get("modified"))
|
||||||
|
+ " " + d.get("contents"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
searcher.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
static long Time(int year, int month, int day) {
|
||||||
|
GregorianCalendar calendar = new GregorianCalendar();
|
||||||
|
calendar.set(year, month, day);
|
||||||
|
return calendar.getTime().getTime();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,190 @@
|
||||||
|
package org.apache.lucene;
|
||||||
|
|
||||||
|
/* ====================================================================
|
||||||
|
* The Apache Software License, Version 1.1
|
||||||
|
*
|
||||||
|
* Copyright (c) 2001 The Apache Software Foundation. All rights
|
||||||
|
* reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in
|
||||||
|
* the documentation and/or other materials provided with the
|
||||||
|
* distribution.
|
||||||
|
*
|
||||||
|
* 3. The end-user documentation included with the redistribution,
|
||||||
|
* if any, must include the following acknowledgment:
|
||||||
|
* "This product includes software developed by the
|
||||||
|
* Apache Software Foundation (http://www.apache.org/)."
|
||||||
|
* Alternately, this acknowledgment may appear in the software itself,
|
||||||
|
* if and wherever such third-party acknowledgments normally appear.
|
||||||
|
*
|
||||||
|
* 4. The names "Apache" and "Apache Software Foundation" and
|
||||||
|
* "Apache Lucene" must not be used to endorse or promote products
|
||||||
|
* derived from this software without prior written permission. For
|
||||||
|
* written permission, please contact apache@apache.org.
|
||||||
|
*
|
||||||
|
* 5. Products derived from this software may not be called "Apache",
|
||||||
|
* "Apache Lucene", nor may "Apache" appear in their name, without
|
||||||
|
* prior written permission of the Apache Software Foundation.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
||||||
|
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||||
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
||||||
|
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||||
|
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||||
|
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
* ====================================================================
|
||||||
|
*
|
||||||
|
* This software consists of voluntary contributions made by many
|
||||||
|
* individuals on behalf of the Apache Software Foundation. For more
|
||||||
|
* information on the Apache Software Foundation, please see
|
||||||
|
* <http://www.apache.org/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.PrintWriter;
|
||||||
|
import java.io.StringWriter;
|
||||||
|
|
||||||
|
import org.apache.lucene.store.*;
|
||||||
|
import org.apache.lucene.document.*;
|
||||||
|
import org.apache.lucene.analysis.*;
|
||||||
|
import org.apache.lucene.index.*;
|
||||||
|
import org.apache.lucene.search.*;
|
||||||
|
import org.apache.lucene.queryParser.*;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
import junit.framework.TestSuite;
|
||||||
|
import junit.textui.TestRunner;
|
||||||
|
|
||||||
|
|
||||||
|
/** JUnit adaptation of an older test case DocTest.
|
||||||
|
* @author dmitrys@earthlink.net
|
||||||
|
* @version $Id$
|
||||||
|
*/
|
||||||
|
public class TestSearchForDuplicates extends TestCase {
|
||||||
|
|
||||||
|
/** Main for running test case by itself. */
|
||||||
|
public static void main(String args[]) {
|
||||||
|
TestRunner.run (new TestSuite(TestSearchForDuplicates.class));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
static final String PRIORITY_FIELD ="priority";
|
||||||
|
static final String ID_FIELD ="id";
|
||||||
|
static final String HIGH_PRIORITY ="high";
|
||||||
|
static final String MED_PRIORITY ="medium";
|
||||||
|
static final String LOW_PRIORITY ="low";
|
||||||
|
|
||||||
|
|
||||||
|
/** This test compares search results when using and not using compound
|
||||||
|
* files.
|
||||||
|
*
|
||||||
|
* TODO: There is rudimentary search result validation as well, but it is
|
||||||
|
* simply based on asserting the output observed in the old test case,
|
||||||
|
* without really knowing if the output is correct. Someone needs to
|
||||||
|
* validate this output and make any changes to the checkHits method.
|
||||||
|
*/
|
||||||
|
public void testRun() throws Exception {
|
||||||
|
StringWriter sw = new StringWriter();
|
||||||
|
PrintWriter pw = new PrintWriter(sw, true);
|
||||||
|
doTest(pw, false);
|
||||||
|
pw.close();
|
||||||
|
sw.close();
|
||||||
|
String multiFileOutput = sw.getBuffer().toString();
|
||||||
|
System.out.println(multiFileOutput);
|
||||||
|
|
||||||
|
sw = new StringWriter();
|
||||||
|
pw = new PrintWriter(sw, true);
|
||||||
|
doTest(pw, true);
|
||||||
|
pw.close();
|
||||||
|
sw.close();
|
||||||
|
String singleFileOutput = sw.getBuffer().toString();
|
||||||
|
|
||||||
|
assertEquals(multiFileOutput, singleFileOutput);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void doTest(PrintWriter out, boolean useCompoundFiles) throws Exception {
|
||||||
|
Directory directory = new RAMDirectory();
|
||||||
|
Analyzer analyzer = new SimpleAnalyzer();
|
||||||
|
IndexWriter writer = new IndexWriter(directory, analyzer, true);
|
||||||
|
|
||||||
|
writer.setUseCompoundFile(useCompoundFiles);
|
||||||
|
|
||||||
|
final int MAX_DOCS = 225;
|
||||||
|
|
||||||
|
for (int j = 0; j < MAX_DOCS; j++) {
|
||||||
|
Document d = new Document();
|
||||||
|
d.add(Field.Text(PRIORITY_FIELD, HIGH_PRIORITY));
|
||||||
|
d.add(Field.Text(ID_FIELD, Integer.toString(j)));
|
||||||
|
writer.addDocument(d);
|
||||||
|
}
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
// try a search without OR
|
||||||
|
Searcher searcher = new IndexSearcher(directory);
|
||||||
|
Hits hits = null;
|
||||||
|
|
||||||
|
QueryParser parser = new QueryParser(PRIORITY_FIELD, analyzer);
|
||||||
|
|
||||||
|
Query query = parser.parse(HIGH_PRIORITY);
|
||||||
|
out.println("Query: " + query.toString(PRIORITY_FIELD));
|
||||||
|
|
||||||
|
hits = searcher.search(query);
|
||||||
|
printHits(out, hits);
|
||||||
|
checkHits(hits, MAX_DOCS);
|
||||||
|
|
||||||
|
searcher.close();
|
||||||
|
|
||||||
|
// try a new search with OR
|
||||||
|
searcher = new IndexSearcher(directory);
|
||||||
|
hits = null;
|
||||||
|
|
||||||
|
parser = new QueryParser(PRIORITY_FIELD, analyzer);
|
||||||
|
|
||||||
|
query = parser.parse(HIGH_PRIORITY + " OR " + MED_PRIORITY);
|
||||||
|
out.println("Query: " + query.toString(PRIORITY_FIELD));
|
||||||
|
|
||||||
|
hits = searcher.search(query);
|
||||||
|
printHits(out, hits);
|
||||||
|
checkHits(hits, MAX_DOCS);
|
||||||
|
|
||||||
|
searcher.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void printHits(PrintWriter out, Hits hits ) throws IOException {
|
||||||
|
out.println(hits.length() + " total results\n");
|
||||||
|
for (int i = 0 ; i < hits.length(); i++) {
|
||||||
|
if ( i < 10 || (i > 94 && i < 105) ) {
|
||||||
|
Document d = hits.doc(i);
|
||||||
|
out.println(i + " " + d.get(ID_FIELD));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkHits(Hits hits, int expectedCount) throws IOException {
|
||||||
|
assertEquals("total results", expectedCount, hits.length());
|
||||||
|
for (int i = 0 ; i < hits.length(); i++) {
|
||||||
|
if ( i < 10 || (i > 94 && i < 105) ) {
|
||||||
|
Document d = hits.doc(i);
|
||||||
|
assertEquals("check " + i, String.valueOf(i), d.get(ID_FIELD));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -62,6 +62,7 @@ import org.apache.lucene.search.*;
|
||||||
import org.apache.lucene.queryParser.*;
|
import org.apache.lucene.queryParser.*;
|
||||||
|
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
class ThreadSafetyTest {
|
class ThreadSafetyTest {
|
||||||
private static final Analyzer ANALYZER = new SimpleAnalyzer();
|
private static final Analyzer ANALYZER = new SimpleAnalyzer();
|
||||||
|
@ -86,26 +87,33 @@ class ThreadSafetyTest {
|
||||||
|
|
||||||
public void run() {
|
public void run() {
|
||||||
try {
|
try {
|
||||||
for (int i = 0; i < 1024*ITERATIONS; i++) {
|
boolean useCompoundFiles = false;
|
||||||
Document d = new Document();
|
|
||||||
int n = RANDOM.nextInt();
|
for (int i = 0; i < 1024*ITERATIONS; i++) {
|
||||||
d.add(Field.Keyword("id", Integer.toString(n)));
|
Document d = new Document();
|
||||||
d.add(Field.UnStored("contents", intToEnglish(n)));
|
int n = RANDOM.nextInt();
|
||||||
System.out.println("Adding " + n);
|
d.add(Field.Keyword("id", Integer.toString(n)));
|
||||||
writer.addDocument(d);
|
d.add(Field.UnStored("contents", intToEnglish(n)));
|
||||||
|
System.out.println("Adding " + n);
|
||||||
|
|
||||||
|
// Switch between single and multiple file segments
|
||||||
|
useCompoundFiles = Math.random() < 0.5;
|
||||||
|
writer.setUseCompoundFile(useCompoundFiles);
|
||||||
|
|
||||||
|
writer.addDocument(d);
|
||||||
|
|
||||||
if (i%reopenInterval == 0) {
|
if (i%reopenInterval == 0) {
|
||||||
writer.close();
|
writer.close();
|
||||||
writer = new IndexWriter("index", ANALYZER, false);
|
writer = new IndexWriter("index", ANALYZER, false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
writer.close();
|
writer.close();
|
||||||
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
System.out.println(e.toString());
|
System.out.println(e.toString());
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
System.exit(0);
|
System.exit(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -116,26 +124,26 @@ class ThreadSafetyTest {
|
||||||
|
|
||||||
public SearcherThread(boolean useGlobal) throws java.io.IOException {
|
public SearcherThread(boolean useGlobal) throws java.io.IOException {
|
||||||
if (!useGlobal)
|
if (!useGlobal)
|
||||||
this.searcher = new IndexSearcher("index");
|
this.searcher = new IndexSearcher("index");
|
||||||
}
|
}
|
||||||
|
|
||||||
public void run() {
|
public void run() {
|
||||||
try {
|
try {
|
||||||
for (int i = 0; i < 512*ITERATIONS; i++) {
|
for (int i = 0; i < 512*ITERATIONS; i++) {
|
||||||
searchFor(RANDOM.nextInt(), (searcher==null)?SEARCHER:searcher);
|
searchFor(RANDOM.nextInt(), (searcher==null)?SEARCHER:searcher);
|
||||||
if (i%reopenInterval == 0) {
|
if (i%reopenInterval == 0) {
|
||||||
if (searcher == null) {
|
if (searcher == null) {
|
||||||
SEARCHER = new IndexSearcher("index");
|
SEARCHER = new IndexSearcher("index");
|
||||||
} else {
|
} else {
|
||||||
searcher.close();
|
searcher.close();
|
||||||
searcher = new IndexSearcher("index");
|
searcher = new IndexSearcher("index");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
System.out.println(e.toString());
|
System.out.println(e.toString());
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
System.exit(0);
|
System.exit(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -143,11 +151,11 @@ class ThreadSafetyTest {
|
||||||
throws Exception {
|
throws Exception {
|
||||||
System.out.println("Searching for " + n);
|
System.out.println("Searching for " + n);
|
||||||
Hits hits =
|
Hits hits =
|
||||||
searcher.search(QueryParser.parse(intToEnglish(n), "contents",
|
searcher.search(QueryParser.parse(intToEnglish(n), "contents",
|
||||||
ANALYZER));
|
ANALYZER));
|
||||||
System.out.println("Search for " + n + ": total=" + hits.length());
|
System.out.println("Search for " + n + ": total=" + hits.length());
|
||||||
for (int j = 0; j < Math.min(3, hits.length()); j++) {
|
for (int j = 0; j < Math.min(3, hits.length()); j++) {
|
||||||
System.out.println("Hit for " + n + ": " + hits.doc(j).get("id"));
|
System.out.println("Hit for " + n + ": " + hits.doc(j).get("id"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -159,15 +167,18 @@ class ThreadSafetyTest {
|
||||||
|
|
||||||
for (int i = 0; i < args.length; i++) {
|
for (int i = 0; i < args.length; i++) {
|
||||||
if ("-ro".equals(args[i]))
|
if ("-ro".equals(args[i]))
|
||||||
readOnly = true;
|
readOnly = true;
|
||||||
if ("-add".equals(args[i]))
|
if ("-add".equals(args[i]))
|
||||||
add = true;
|
add = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
IndexReader.unlock(FSDirectory.getDirectory("index", false));
|
File indexDir = new File("index");
|
||||||
|
if (! indexDir.exists()) indexDir.mkdirs();
|
||||||
|
|
||||||
|
IndexReader.unlock(FSDirectory.getDirectory(indexDir, false));
|
||||||
|
|
||||||
if (!readOnly) {
|
if (!readOnly) {
|
||||||
IndexWriter writer = new IndexWriter("index", ANALYZER, !add);
|
IndexWriter writer = new IndexWriter(indexDir, ANALYZER, !add);
|
||||||
|
|
||||||
Thread indexerThread = new IndexerThread(writer);
|
Thread indexerThread = new IndexerThread(writer);
|
||||||
indexerThread.start();
|
indexerThread.start();
|
||||||
|
@ -178,7 +189,7 @@ class ThreadSafetyTest {
|
||||||
SearcherThread searcherThread1 = new SearcherThread(false);
|
SearcherThread searcherThread1 = new SearcherThread(false);
|
||||||
searcherThread1.start();
|
searcherThread1.start();
|
||||||
|
|
||||||
SEARCHER = new IndexSearcher("index");
|
SEARCHER = new IndexSearcher(indexDir.toString());
|
||||||
|
|
||||||
SearcherThread searcherThread2 = new SearcherThread(true);
|
SearcherThread searcherThread2 = new SearcherThread(true);
|
||||||
searcherThread2.start();
|
searcherThread2.start();
|
||||||
|
@ -231,9 +242,9 @@ class ThreadSafetyTest {
|
||||||
}
|
}
|
||||||
i = i%10;
|
i = i%10;
|
||||||
if (i == 0)
|
if (i == 0)
|
||||||
result.append(" ");
|
result.append(" ");
|
||||||
else
|
else
|
||||||
result.append("-");
|
result.append("-");
|
||||||
}
|
}
|
||||||
switch (i) {
|
switch (i) {
|
||||||
case 19 : result.append("nineteen "); break;
|
case 19 : result.append("nineteen "); break;
|
||||||
|
|
|
@ -87,7 +87,8 @@ class DocTest {
|
||||||
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
System.out.println(" caught a " + e.getClass() +
|
System.out.println(" caught a " + e.getClass() +
|
||||||
"\n with message: " + e.getMessage());
|
"\n with message: " + e.getMessage());
|
||||||
|
e.printStackTrace();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -113,7 +114,7 @@ class DocTest {
|
||||||
SegmentReader r1 = new SegmentReader(new SegmentInfo(seg1, 1, directory));
|
SegmentReader r1 = new SegmentReader(new SegmentInfo(seg1, 1, directory));
|
||||||
SegmentReader r2 = new SegmentReader(new SegmentInfo(seg2, 1, directory));
|
SegmentReader r2 = new SegmentReader(new SegmentInfo(seg2, 1, directory));
|
||||||
|
|
||||||
SegmentMerger merger = new SegmentMerger(directory, merged);
|
SegmentMerger merger = new SegmentMerger(directory, merged, false);
|
||||||
merger.add(r1);
|
merger.add(r1);
|
||||||
merger.add(r2);
|
merger.add(r2);
|
||||||
merger.merge();
|
merger.merge();
|
||||||
|
@ -137,17 +138,17 @@ class DocTest {
|
||||||
|
|
||||||
TermPositions positions = reader.termPositions(tis.term());
|
TermPositions positions = reader.termPositions(tis.term());
|
||||||
try {
|
try {
|
||||||
while (positions.next()) {
|
while (positions.next()) {
|
||||||
System.out.print(" doc=" + positions.doc());
|
System.out.print(" doc=" + positions.doc());
|
||||||
System.out.print(" TF=" + positions.freq());
|
System.out.print(" TF=" + positions.freq());
|
||||||
System.out.print(" pos=");
|
System.out.print(" pos=");
|
||||||
System.out.print(positions.nextPosition());
|
System.out.print(positions.nextPosition());
|
||||||
for (int j = 1; j < positions.freq(); j++)
|
for (int j = 1; j < positions.freq(); j++)
|
||||||
System.out.print("," + positions.nextPosition());
|
System.out.print("," + positions.nextPosition());
|
||||||
System.out.println("");
|
System.out.println("");
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
positions.close();
|
positions.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tis.close();
|
tis.close();
|
||||||
|
|
|
@ -0,0 +1,701 @@
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
import junit.framework.TestSuite;
|
||||||
|
import junit.textui.TestRunner;
|
||||||
|
|
||||||
|
import org.apache.lucene.store.*;
|
||||||
|
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author dmitrys@earthlink.net
|
||||||
|
* @version $Id$
|
||||||
|
*/
|
||||||
|
public class TestCompoundFile extends TestCase
|
||||||
|
{
|
||||||
|
/** Main for running test case by itself. */
|
||||||
|
public static void main(String args[]) {
|
||||||
|
TestRunner.run (new TestSuite(TestCompoundFile.class));
|
||||||
|
// TestRunner.run (new TestCompoundFile("testSingleFile"));
|
||||||
|
// TestRunner.run (new TestCompoundFile("testTwoFiles"));
|
||||||
|
// TestRunner.run (new TestCompoundFile("testRandomFiles"));
|
||||||
|
// TestRunner.run (new TestCompoundFile("testClonedStreamsClosing"));
|
||||||
|
// TestRunner.run (new TestCompoundFile("testReadAfterClose"));
|
||||||
|
// TestRunner.run (new TestCompoundFile("testRandomAccess"));
|
||||||
|
// TestRunner.run (new TestCompoundFile("testRandomAccessClones"));
|
||||||
|
// TestRunner.run (new TestCompoundFile("testFileNotFound"));
|
||||||
|
// TestRunner.run (new TestCompoundFile("testReadPastEOF"));
|
||||||
|
|
||||||
|
// TestRunner.run (new TestCompoundFile("testIWCreate"));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public TestCompoundFile() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
public TestCompoundFile(String name) {
|
||||||
|
super(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Directory dir;
|
||||||
|
|
||||||
|
|
||||||
|
public void setUp() throws IOException {
|
||||||
|
//dir = new RAMDirectory();
|
||||||
|
dir = FSDirectory.getDirectory("testIndex", true);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Creates a file of the specified size with random data. */
|
||||||
|
private void createRandomFile(Directory dir, String name, int size)
|
||||||
|
throws IOException
|
||||||
|
{
|
||||||
|
OutputStream os = dir.createFile(name);
|
||||||
|
for (int i=0; i<size; i++) {
|
||||||
|
byte b = (byte) (Math.random() * 256);
|
||||||
|
os.writeByte(b);
|
||||||
|
}
|
||||||
|
os.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Creates a file of the specified size with sequential data. The first
|
||||||
|
* byte is written as the start byte provided. All subsequent bytes are
|
||||||
|
* computed as start + offset where offset is the number of the byte.
|
||||||
|
*/
|
||||||
|
private void createSequenceFile(Directory dir,
|
||||||
|
String name,
|
||||||
|
byte start,
|
||||||
|
int size)
|
||||||
|
throws IOException
|
||||||
|
{
|
||||||
|
OutputStream os = dir.createFile(name);
|
||||||
|
for (int i=0; i < size; i++) {
|
||||||
|
os.writeByte(start);
|
||||||
|
start ++;
|
||||||
|
}
|
||||||
|
os.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void assertSameStreams(String msg,
|
||||||
|
InputStream expected,
|
||||||
|
InputStream test)
|
||||||
|
throws IOException
|
||||||
|
{
|
||||||
|
assertNotNull(msg + " null expected", expected);
|
||||||
|
assertNotNull(msg + " null test", test);
|
||||||
|
assertEquals(msg + " length", expected.length(), test.length());
|
||||||
|
assertEquals(msg + " position", expected.getFilePointer(),
|
||||||
|
test.getFilePointer());
|
||||||
|
|
||||||
|
byte expectedBuffer[] = new byte[512];
|
||||||
|
byte testBuffer[] = new byte[expectedBuffer.length];
|
||||||
|
|
||||||
|
long remainder = expected.length() - expected.getFilePointer();
|
||||||
|
while(remainder > 0) {
|
||||||
|
int readLen = (int) Math.min(remainder, expectedBuffer.length);
|
||||||
|
expected.readBytes(expectedBuffer, 0, readLen);
|
||||||
|
test.readBytes(testBuffer, 0, readLen);
|
||||||
|
assertEqualArrays(msg + ", remainder " + remainder, expectedBuffer,
|
||||||
|
testBuffer, 0, readLen);
|
||||||
|
remainder -= readLen;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void assertSameStreams(String msg,
|
||||||
|
InputStream expected,
|
||||||
|
InputStream actual,
|
||||||
|
long seekTo)
|
||||||
|
throws IOException
|
||||||
|
{
|
||||||
|
if (seekTo < 0) {
|
||||||
|
try {
|
||||||
|
actual.seek(seekTo);
|
||||||
|
fail(msg + ", " + seekTo + ", negative seek");
|
||||||
|
} catch (IOException e) {
|
||||||
|
/* success */
|
||||||
|
//System.out.println("SUCCESS: Negative seek: " + e);
|
||||||
|
}
|
||||||
|
|
||||||
|
} else if (seekTo > 0 && seekTo >= expected.length()) {
|
||||||
|
try {
|
||||||
|
actual.seek(seekTo);
|
||||||
|
fail(msg + ", " + seekTo + ", seek past EOF");
|
||||||
|
} catch (IOException e) {
|
||||||
|
/* success */
|
||||||
|
//System.out.println("SUCCESS: Seek past EOF: " + e);
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
expected.seek(seekTo);
|
||||||
|
actual.seek(seekTo);
|
||||||
|
assertSameStreams(msg + ", seek(mid)", expected, actual);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
private void assertSameSeekBehavior(String msg,
|
||||||
|
InputStream expected,
|
||||||
|
InputStream actual)
|
||||||
|
throws IOException
|
||||||
|
{
|
||||||
|
// seek to 0
|
||||||
|
long point = 0;
|
||||||
|
assertSameStreams(msg + ", seek(0)", expected, actual, point);
|
||||||
|
|
||||||
|
// seek to middle
|
||||||
|
point = expected.length() / 2l;
|
||||||
|
assertSameStreams(msg + ", seek(mid)", expected, actual, point);
|
||||||
|
|
||||||
|
// seek to end - 2
|
||||||
|
point = expected.length() - 2;
|
||||||
|
assertSameStreams(msg + ", seek(end-2)", expected, actual, point);
|
||||||
|
|
||||||
|
// seek to end - 1
|
||||||
|
point = expected.length() - 1;
|
||||||
|
assertSameStreams(msg + ", seek(end-1)", expected, actual, point);
|
||||||
|
|
||||||
|
// seek to the end
|
||||||
|
point = expected.length();
|
||||||
|
assertSameStreams(msg + ", seek(end)", expected, actual, point);
|
||||||
|
|
||||||
|
// seek past end
|
||||||
|
point = expected.length() + 1;
|
||||||
|
assertSameStreams(msg + ", seek(end+1)", expected, actual, point);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void assertEqualArrays(String msg,
|
||||||
|
byte[] expected,
|
||||||
|
byte[] test,
|
||||||
|
int start,
|
||||||
|
int len)
|
||||||
|
{
|
||||||
|
assertNotNull(msg + " null expected", expected);
|
||||||
|
assertNotNull(msg + " null test", test);
|
||||||
|
|
||||||
|
for (int i=start; i<len; i++) {
|
||||||
|
assertEquals(msg + " " + i, expected[i], test[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// ===========================================================
|
||||||
|
// Tests of the basic CompoundFile functionality
|
||||||
|
// ===========================================================
|
||||||
|
|
||||||
|
|
||||||
|
/** This test creates compound file based on a single file.
|
||||||
|
* Files of different sizes are tested: 0, 1, 10, 100 bytes.
|
||||||
|
*/
|
||||||
|
public void testSingleFile() throws IOException {
|
||||||
|
int data[] = new int[] { 0, 1, 10, 100 };
|
||||||
|
for (int i=0; i<data.length; i++) {
|
||||||
|
String name = "t" + data[i];
|
||||||
|
createSequenceFile(dir, name, (byte) 0, data[i]);
|
||||||
|
CompoundFileWriter csw = new CompoundFileWriter(dir, name + ".cfs");
|
||||||
|
csw.addFile(name);
|
||||||
|
csw.close();
|
||||||
|
|
||||||
|
CompoundFileReader csr = new CompoundFileReader(dir, name + ".cfs");
|
||||||
|
InputStream expected = dir.openFile(name);
|
||||||
|
InputStream actual = csr.openFile(name);
|
||||||
|
assertSameStreams(name, expected, actual);
|
||||||
|
assertSameSeekBehavior(name, expected, actual);
|
||||||
|
expected.close();
|
||||||
|
actual.close();
|
||||||
|
csr.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** This test creates compound file based on two files.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public void testTwoFiles() throws IOException {
|
||||||
|
createSequenceFile(dir, "d1", (byte) 0, 15);
|
||||||
|
createSequenceFile(dir, "d2", (byte) 0, 114);
|
||||||
|
|
||||||
|
CompoundFileWriter csw = new CompoundFileWriter(dir, "d.csf");
|
||||||
|
csw.addFile("d1");
|
||||||
|
csw.addFile("d2");
|
||||||
|
csw.close();
|
||||||
|
|
||||||
|
CompoundFileReader csr = new CompoundFileReader(dir, "d.csf");
|
||||||
|
InputStream expected = dir.openFile("d1");
|
||||||
|
InputStream actual = csr.openFile("d1");
|
||||||
|
assertSameStreams("d1", expected, actual);
|
||||||
|
assertSameSeekBehavior("d1", expected, actual);
|
||||||
|
expected.close();
|
||||||
|
actual.close();
|
||||||
|
|
||||||
|
expected = dir.openFile("d2");
|
||||||
|
actual = csr.openFile("d2");
|
||||||
|
assertSameStreams("d2", expected, actual);
|
||||||
|
assertSameSeekBehavior("d2", expected, actual);
|
||||||
|
expected.close();
|
||||||
|
actual.close();
|
||||||
|
csr.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** This test creates a compound file based on a large number of files of
|
||||||
|
* various length. The file content is generated randomly. The sizes range
|
||||||
|
* from 0 to 1Mb. Some of the sizes are selected to test the buffering
|
||||||
|
* logic in the file reading code. For this the chunk variable is set to
|
||||||
|
* the length of the buffer used internally by the compound file logic.
|
||||||
|
*/
|
||||||
|
public void testRandomFiles() throws IOException {
|
||||||
|
// Setup the test segment
|
||||||
|
String segment = "test";
|
||||||
|
int chunk = 1024; // internal buffer size used by the stream
|
||||||
|
createRandomFile(dir, segment + ".zero", 0);
|
||||||
|
createRandomFile(dir, segment + ".one", 1);
|
||||||
|
createRandomFile(dir, segment + ".ten", 10);
|
||||||
|
createRandomFile(dir, segment + ".hundred", 100);
|
||||||
|
createRandomFile(dir, segment + ".big1", chunk);
|
||||||
|
createRandomFile(dir, segment + ".big2", chunk - 1);
|
||||||
|
createRandomFile(dir, segment + ".big3", chunk + 1);
|
||||||
|
createRandomFile(dir, segment + ".big4", 3 * chunk);
|
||||||
|
createRandomFile(dir, segment + ".big5", 3 * chunk - 1);
|
||||||
|
createRandomFile(dir, segment + ".big6", 3 * chunk + 1);
|
||||||
|
createRandomFile(dir, segment + ".big7", 1000 * chunk);
|
||||||
|
|
||||||
|
// Setup extraneous files
|
||||||
|
createRandomFile(dir, "onetwothree", 100);
|
||||||
|
createRandomFile(dir, segment + ".notIn", 50);
|
||||||
|
createRandomFile(dir, segment + ".notIn2", 51);
|
||||||
|
|
||||||
|
// Now test
|
||||||
|
CompoundFileWriter csw = new CompoundFileWriter(dir, "test.cfs");
|
||||||
|
final String data[] = new String[] {
|
||||||
|
".zero", ".one", ".ten", ".hundred", ".big1", ".big2", ".big3",
|
||||||
|
".big4", ".big5", ".big6", ".big7"
|
||||||
|
};
|
||||||
|
for (int i=0; i<data.length; i++) {
|
||||||
|
csw.addFile(segment + data[i]);
|
||||||
|
}
|
||||||
|
csw.close();
|
||||||
|
|
||||||
|
CompoundFileReader csr = new CompoundFileReader(dir, "test.cfs");
|
||||||
|
for (int i=0; i<data.length; i++) {
|
||||||
|
InputStream check = dir.openFile(segment + data[i]);
|
||||||
|
InputStream test = csr.openFile(segment + data[i]);
|
||||||
|
assertSameStreams(data[i], check, test);
|
||||||
|
assertSameSeekBehavior(data[i], check, test);
|
||||||
|
test.close();
|
||||||
|
check.close();
|
||||||
|
}
|
||||||
|
csr.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Setup a larger compound file with a number of components, each of
|
||||||
|
* which is a sequential file (so that we can easily tell that we are
|
||||||
|
* reading in the right byte). The methods sets up 20 files - f0 to f19,
|
||||||
|
* the size of each file is 1000 bytes.
|
||||||
|
*/
|
||||||
|
private void setUp_2() throws IOException {
|
||||||
|
CompoundFileWriter cw = new CompoundFileWriter(dir, "f.comp");
|
||||||
|
for (int i=0; i<20; i++) {
|
||||||
|
createSequenceFile(dir, "f" + i, (byte) 0, 2000);
|
||||||
|
cw.addFile("f" + i);
|
||||||
|
}
|
||||||
|
cw.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void testReadAfterClose() throws IOException {
|
||||||
|
demo_FSInputStreamBug((FSDirectory) dir, "test");
|
||||||
|
}
|
||||||
|
|
||||||
|
private void demo_FSInputStreamBug(FSDirectory fsdir, String file)
|
||||||
|
throws IOException
|
||||||
|
{
|
||||||
|
// Setup the test file - we need more than 1024 bytes
|
||||||
|
OutputStream os = fsdir.createFile(file);
|
||||||
|
for(int i=0; i<2000; i++) {
|
||||||
|
os.writeByte((byte) i);
|
||||||
|
}
|
||||||
|
os.close();
|
||||||
|
|
||||||
|
InputStream in = fsdir.openFile(file);
|
||||||
|
|
||||||
|
// This read primes the buffer in InputStream
|
||||||
|
byte b = in.readByte();
|
||||||
|
|
||||||
|
// Close the file
|
||||||
|
in.close();
|
||||||
|
|
||||||
|
// ERROR: this call should fail, but succeeds because the buffer
|
||||||
|
// is still filled
|
||||||
|
b = in.readByte();
|
||||||
|
|
||||||
|
// ERROR: this call should fail, but succeeds for some reason as well
|
||||||
|
in.seek(1099);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// OK: this call correctly fails. We are now past the 1024 internal
|
||||||
|
// buffer, so an actual IO is attempted, which fails
|
||||||
|
b = in.readByte();
|
||||||
|
} catch (IOException e) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static boolean isCSInputStream(InputStream is) {
|
||||||
|
return is instanceof CompoundFileReader.CSInputStream;
|
||||||
|
}
|
||||||
|
|
||||||
|
static boolean isCSInputStreamOpen(InputStream is) throws IOException {
|
||||||
|
if (isCSInputStream(is)) {
|
||||||
|
CompoundFileReader.CSInputStream cis =
|
||||||
|
(CompoundFileReader.CSInputStream) is;
|
||||||
|
|
||||||
|
return _TestHelper.isFSInputStreamOpen(cis.base);
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void testClonedStreamsClosing() throws IOException {
|
||||||
|
setUp_2();
|
||||||
|
CompoundFileReader cr = new CompoundFileReader(dir, "f.comp");
|
||||||
|
|
||||||
|
// basic clone
|
||||||
|
InputStream expected = dir.openFile("f11");
|
||||||
|
assertTrue(_TestHelper.isFSInputStreamOpen(expected));
|
||||||
|
|
||||||
|
InputStream one = cr.openFile("f11");
|
||||||
|
assertTrue(isCSInputStreamOpen(one));
|
||||||
|
|
||||||
|
InputStream two = (InputStream) one.clone();
|
||||||
|
assertTrue(isCSInputStreamOpen(two));
|
||||||
|
|
||||||
|
assertSameStreams("basic clone one", expected, one);
|
||||||
|
expected.seek(0);
|
||||||
|
assertSameStreams("basic clone two", expected, two);
|
||||||
|
|
||||||
|
// Now close the first stream
|
||||||
|
one.close();
|
||||||
|
assertTrue("Only close when cr is closed", isCSInputStreamOpen(one));
|
||||||
|
|
||||||
|
// The following should really fail since we couldn't expect to
|
||||||
|
// access a file once close has been called on it (regardless of
|
||||||
|
// buffering and/or clone magic)
|
||||||
|
expected.seek(0);
|
||||||
|
two.seek(0);
|
||||||
|
assertSameStreams("basic clone two/2", expected, two);
|
||||||
|
|
||||||
|
|
||||||
|
// Now close the compound reader
|
||||||
|
cr.close();
|
||||||
|
assertFalse("Now closed one", isCSInputStreamOpen(one));
|
||||||
|
assertFalse("Now closed two", isCSInputStreamOpen(two));
|
||||||
|
|
||||||
|
// The following may also fail since the compound stream is closed
|
||||||
|
expected.seek(0);
|
||||||
|
two.seek(0);
|
||||||
|
//assertSameStreams("basic clone two/3", expected, two);
|
||||||
|
|
||||||
|
|
||||||
|
// Now close the second clone
|
||||||
|
two.close();
|
||||||
|
expected.seek(0);
|
||||||
|
two.seek(0);
|
||||||
|
//assertSameStreams("basic clone two/4", expected, two);
|
||||||
|
|
||||||
|
expected.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** This test opens two files from a compound stream and verifies that
|
||||||
|
* their file positions are independent of each other.
|
||||||
|
*/
|
||||||
|
public void testRandomAccess() throws IOException {
|
||||||
|
setUp_2();
|
||||||
|
CompoundFileReader cr = new CompoundFileReader(dir, "f.comp");
|
||||||
|
|
||||||
|
// Open two files
|
||||||
|
InputStream e1 = dir.openFile("f11");
|
||||||
|
InputStream e2 = dir.openFile("f3");
|
||||||
|
|
||||||
|
InputStream a1 = cr.openFile("f11");
|
||||||
|
InputStream a2 = dir.openFile("f3");
|
||||||
|
|
||||||
|
// Seek the first pair
|
||||||
|
e1.seek(100);
|
||||||
|
a1.seek(100);
|
||||||
|
assertEquals(100, e1.getFilePointer());
|
||||||
|
assertEquals(100, a1.getFilePointer());
|
||||||
|
byte be1 = e1.readByte();
|
||||||
|
byte ba1 = a1.readByte();
|
||||||
|
assertEquals(be1, ba1);
|
||||||
|
|
||||||
|
// Now seek the second pair
|
||||||
|
e2.seek(1027);
|
||||||
|
a2.seek(1027);
|
||||||
|
assertEquals(1027, e2.getFilePointer());
|
||||||
|
assertEquals(1027, a2.getFilePointer());
|
||||||
|
byte be2 = e2.readByte();
|
||||||
|
byte ba2 = a2.readByte();
|
||||||
|
assertEquals(be2, ba2);
|
||||||
|
|
||||||
|
// Now make sure the first one didn't move
|
||||||
|
assertEquals(101, e1.getFilePointer());
|
||||||
|
assertEquals(101, a1.getFilePointer());
|
||||||
|
be1 = e1.readByte();
|
||||||
|
ba1 = a1.readByte();
|
||||||
|
assertEquals(be1, ba1);
|
||||||
|
|
||||||
|
// Now more the first one again, past the buffer length
|
||||||
|
e1.seek(1910);
|
||||||
|
a1.seek(1910);
|
||||||
|
assertEquals(1910, e1.getFilePointer());
|
||||||
|
assertEquals(1910, a1.getFilePointer());
|
||||||
|
be1 = e1.readByte();
|
||||||
|
ba1 = a1.readByte();
|
||||||
|
assertEquals(be1, ba1);
|
||||||
|
|
||||||
|
// Now make sure the second set didn't move
|
||||||
|
assertEquals(1028, e2.getFilePointer());
|
||||||
|
assertEquals(1028, a2.getFilePointer());
|
||||||
|
be2 = e2.readByte();
|
||||||
|
ba2 = a2.readByte();
|
||||||
|
assertEquals(be2, ba2);
|
||||||
|
|
||||||
|
// Move the second set back, again cross the buffer size
|
||||||
|
e2.seek(17);
|
||||||
|
a2.seek(17);
|
||||||
|
assertEquals(17, e2.getFilePointer());
|
||||||
|
assertEquals(17, a2.getFilePointer());
|
||||||
|
be2 = e2.readByte();
|
||||||
|
ba2 = a2.readByte();
|
||||||
|
assertEquals(be2, ba2);
|
||||||
|
|
||||||
|
// Finally, make sure the first set didn't move
|
||||||
|
// Now make sure the first one didn't move
|
||||||
|
assertEquals(1911, e1.getFilePointer());
|
||||||
|
assertEquals(1911, a1.getFilePointer());
|
||||||
|
be1 = e1.readByte();
|
||||||
|
ba1 = a1.readByte();
|
||||||
|
assertEquals(be1, ba1);
|
||||||
|
|
||||||
|
e1.close();
|
||||||
|
e2.close();
|
||||||
|
a1.close();
|
||||||
|
a2.close();
|
||||||
|
cr.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** This test opens two files from a compound stream and verifies that
|
||||||
|
* their file positions are independent of each other.
|
||||||
|
*/
|
||||||
|
public void testRandomAccessClones() throws IOException {
|
||||||
|
setUp_2();
|
||||||
|
CompoundFileReader cr = new CompoundFileReader(dir, "f.comp");
|
||||||
|
|
||||||
|
// Open two files
|
||||||
|
InputStream e1 = cr.openFile("f11");
|
||||||
|
InputStream e2 = cr.openFile("f3");
|
||||||
|
|
||||||
|
InputStream a1 = (InputStream) e1.clone();
|
||||||
|
InputStream a2 = (InputStream) e2.clone();
|
||||||
|
|
||||||
|
// Seek the first pair
|
||||||
|
e1.seek(100);
|
||||||
|
a1.seek(100);
|
||||||
|
assertEquals(100, e1.getFilePointer());
|
||||||
|
assertEquals(100, a1.getFilePointer());
|
||||||
|
byte be1 = e1.readByte();
|
||||||
|
byte ba1 = a1.readByte();
|
||||||
|
assertEquals(be1, ba1);
|
||||||
|
|
||||||
|
// Now seek the second pair
|
||||||
|
e2.seek(1027);
|
||||||
|
a2.seek(1027);
|
||||||
|
assertEquals(1027, e2.getFilePointer());
|
||||||
|
assertEquals(1027, a2.getFilePointer());
|
||||||
|
byte be2 = e2.readByte();
|
||||||
|
byte ba2 = a2.readByte();
|
||||||
|
assertEquals(be2, ba2);
|
||||||
|
|
||||||
|
// Now make sure the first one didn't move
|
||||||
|
assertEquals(101, e1.getFilePointer());
|
||||||
|
assertEquals(101, a1.getFilePointer());
|
||||||
|
be1 = e1.readByte();
|
||||||
|
ba1 = a1.readByte();
|
||||||
|
assertEquals(be1, ba1);
|
||||||
|
|
||||||
|
// Now more the first one again, past the buffer length
|
||||||
|
e1.seek(1910);
|
||||||
|
a1.seek(1910);
|
||||||
|
assertEquals(1910, e1.getFilePointer());
|
||||||
|
assertEquals(1910, a1.getFilePointer());
|
||||||
|
be1 = e1.readByte();
|
||||||
|
ba1 = a1.readByte();
|
||||||
|
assertEquals(be1, ba1);
|
||||||
|
|
||||||
|
// Now make sure the second set didn't move
|
||||||
|
assertEquals(1028, e2.getFilePointer());
|
||||||
|
assertEquals(1028, a2.getFilePointer());
|
||||||
|
be2 = e2.readByte();
|
||||||
|
ba2 = a2.readByte();
|
||||||
|
assertEquals(be2, ba2);
|
||||||
|
|
||||||
|
// Move the second set back, again cross the buffer size
|
||||||
|
e2.seek(17);
|
||||||
|
a2.seek(17);
|
||||||
|
assertEquals(17, e2.getFilePointer());
|
||||||
|
assertEquals(17, a2.getFilePointer());
|
||||||
|
be2 = e2.readByte();
|
||||||
|
ba2 = a2.readByte();
|
||||||
|
assertEquals(be2, ba2);
|
||||||
|
|
||||||
|
// Finally, make sure the first set didn't move
|
||||||
|
// Now make sure the first one didn't move
|
||||||
|
assertEquals(1911, e1.getFilePointer());
|
||||||
|
assertEquals(1911, a1.getFilePointer());
|
||||||
|
be1 = e1.readByte();
|
||||||
|
ba1 = a1.readByte();
|
||||||
|
assertEquals(be1, ba1);
|
||||||
|
|
||||||
|
e1.close();
|
||||||
|
e2.close();
|
||||||
|
a1.close();
|
||||||
|
a2.close();
|
||||||
|
cr.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void testFileNotFound() throws IOException {
|
||||||
|
setUp_2();
|
||||||
|
CompoundFileReader cr = new CompoundFileReader(dir, "f.comp");
|
||||||
|
|
||||||
|
// Open two files
|
||||||
|
try {
|
||||||
|
InputStream e1 = cr.openFile("bogus");
|
||||||
|
fail("File not found");
|
||||||
|
|
||||||
|
} catch (IOException e) {
|
||||||
|
/* success */
|
||||||
|
//System.out.println("SUCCESS: File Not Found: " + e);
|
||||||
|
}
|
||||||
|
|
||||||
|
cr.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void testReadPastEOF() throws IOException {
|
||||||
|
setUp_2();
|
||||||
|
CompoundFileReader cr = new CompoundFileReader(dir, "f.comp");
|
||||||
|
InputStream is = cr.openFile("f2");
|
||||||
|
is.seek(is.length() - 10);
|
||||||
|
byte b[] = new byte[100];
|
||||||
|
is.readBytes(b, 0, 10);
|
||||||
|
|
||||||
|
try {
|
||||||
|
byte test = is.readByte();
|
||||||
|
fail("Single byte read past end of file");
|
||||||
|
} catch (IOException e) {
|
||||||
|
/* success */
|
||||||
|
//System.out.println("SUCCESS: single byte read past end of file: " + e);
|
||||||
|
}
|
||||||
|
|
||||||
|
is.seek(is.length() - 10);
|
||||||
|
try {
|
||||||
|
is.readBytes(b, 0, 50);
|
||||||
|
fail("Block read past end of file");
|
||||||
|
} catch (IOException e) {
|
||||||
|
/* success */
|
||||||
|
//System.out.println("SUCCESS: block read past end of file: " + e);
|
||||||
|
}
|
||||||
|
|
||||||
|
is.close();
|
||||||
|
cr.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// ===========================================================
|
||||||
|
// More extensive tests involving an IndexWriter
|
||||||
|
// ===========================================================
|
||||||
|
|
||||||
|
public void testIWCreate() throws IOException {
|
||||||
|
// create index writer
|
||||||
|
IndexWriter iw = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
|
||||||
|
int created = 0;
|
||||||
|
for (int i=0; i<150; i++) {
|
||||||
|
iw.addDocument(createTestDoc(String.valueOf(i)));
|
||||||
|
created ++;
|
||||||
|
}
|
||||||
|
assertEquals(created, iw.docCount());
|
||||||
|
iw.close();
|
||||||
|
|
||||||
|
// delete 500 documents
|
||||||
|
IndexReader reader = IndexReader.open(dir);
|
||||||
|
int deleted = 0;
|
||||||
|
for (int i = 10; i < created-7; i+=7) {
|
||||||
|
reader.delete(i);
|
||||||
|
deleted ++;
|
||||||
|
}
|
||||||
|
reader.close();
|
||||||
|
int remains = created - deleted;
|
||||||
|
|
||||||
|
iw = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
|
||||||
|
assertEquals(created, iw.docCount());
|
||||||
|
iw.close();
|
||||||
|
|
||||||
|
reader = IndexReader.open(dir);
|
||||||
|
assertEquals(created, reader.maxDoc());
|
||||||
|
assertEquals(remains, reader.numDocs());
|
||||||
|
for (int i = 10; i < created-7; i+=7) {
|
||||||
|
assertTrue("deleted: " + i, reader.isDeleted(i));
|
||||||
|
assertFalse("deleted+1: " + i, reader.isDeleted(i + 1));
|
||||||
|
assertFalse("deleted-1: " + i, reader.isDeleted(i - 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
|
||||||
|
iw = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
|
||||||
|
iw.optimize();
|
||||||
|
assertEquals(remains, iw.docCount());
|
||||||
|
iw.close();
|
||||||
|
|
||||||
|
reader = IndexReader.open(dir);
|
||||||
|
assertEquals(remains, reader.maxDoc());
|
||||||
|
assertEquals(remains, reader.numDocs());
|
||||||
|
reader.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private Document createTestDoc(String id) {
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(Field.Keyword("keyword_id", id));
|
||||||
|
doc.add(Field.Text("text_id", id));
|
||||||
|
doc.add(Field.Keyword("keyword_text", "KeywordText"));
|
||||||
|
doc.add(Field.Text("text", "This is a text field"));
|
||||||
|
doc.add(Field.UnIndexed("unindexed", "This is some payload unindexed text"));
|
||||||
|
doc.add(Field.UnStored("unstored", "This is unstored text"));
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void verifyDoc(Document doc, String id) {
|
||||||
|
assertEquals("keyword_id", doc.get("keyword_id"), id);
|
||||||
|
assertEquals("text_id", id);
|
||||||
|
assertEquals("keyword_text", doc.get("keyword_text"), "KeywordText");
|
||||||
|
assertEquals("text", doc.get("text"), "This is some payload unindexed text");
|
||||||
|
assertEquals("unindexed", doc.get("unindexed"), "This is some payload unindexed text");
|
||||||
|
assertNull("unstored", doc.get("unstored"));
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,265 @@
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
/* ====================================================================
|
||||||
|
* The Apache Software License, Version 1.1
|
||||||
|
*
|
||||||
|
* Copyright (c) 2001 The Apache Software Foundation. All rights
|
||||||
|
* reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in
|
||||||
|
* the documentation and/or other materials provided with the
|
||||||
|
* distribution.
|
||||||
|
*
|
||||||
|
* 3. The end-user documentation included with the redistribution,
|
||||||
|
* if any, must include the following acknowledgment:
|
||||||
|
* "This product includes software developed by the
|
||||||
|
* Apache Software Foundation (http://www.apache.org/)."
|
||||||
|
* Alternately, this acknowledgment may appear in the software itself,
|
||||||
|
* if and wherever such third-party acknowledgments normally appear.
|
||||||
|
*
|
||||||
|
* 4. The names "Apache" and "Apache Software Foundation" and
|
||||||
|
* "Apache Lucene" must not be used to endorse or promote products
|
||||||
|
* derived from this software without prior written permission. For
|
||||||
|
* written permission, please contact apache@apache.org.
|
||||||
|
*
|
||||||
|
* 5. Products derived from this software may not be called "Apache",
|
||||||
|
* "Apache Lucene", nor may "Apache" appear in their name, without
|
||||||
|
* prior written permission of the Apache Software Foundation.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
||||||
|
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||||
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
||||||
|
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||||
|
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||||
|
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
* ====================================================================
|
||||||
|
*
|
||||||
|
* This software consists of voluntary contributions made by many
|
||||||
|
* individuals on behalf of the Apache Software Foundation. For more
|
||||||
|
* information on the Apache Software Foundation, please see
|
||||||
|
* <http://www.apache.org/>.
|
||||||
|
*/
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
import junit.framework.TestSuite;
|
||||||
|
import junit.textui.TestRunner;
|
||||||
|
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.SimpleAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.store.FSDirectory;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.search.Similarity;
|
||||||
|
import org.apache.lucene.demo.FileDocument;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
|
||||||
|
/** JUnit adaptation of an older test case DocTest.
|
||||||
|
* @author dmitrys@earthlink.net
|
||||||
|
* @version $Id$
|
||||||
|
*/
|
||||||
|
public class TestDoc extends TestCase {
|
||||||
|
|
||||||
|
/** Main for running test case by itself. */
|
||||||
|
public static void main(String args[]) {
|
||||||
|
TestRunner.run (new TestSuite(TestDoc.class));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private File workDir;
|
||||||
|
private File indexDir;
|
||||||
|
private LinkedList files;
|
||||||
|
|
||||||
|
|
||||||
|
/** Set the test case. This test case needs
|
||||||
|
* a few text files created in the current working directory.
|
||||||
|
*/
|
||||||
|
public void setUp() throws IOException {
|
||||||
|
workDir = new File("TestDoc");
|
||||||
|
workDir.mkdirs();
|
||||||
|
|
||||||
|
indexDir = new File(workDir, "testIndex");
|
||||||
|
indexDir.mkdirs();
|
||||||
|
|
||||||
|
Directory directory = FSDirectory.getDirectory(indexDir, true);
|
||||||
|
directory.close();
|
||||||
|
|
||||||
|
files = new LinkedList();
|
||||||
|
files.add(createFile("test.txt",
|
||||||
|
"This is the first test file"
|
||||||
|
));
|
||||||
|
|
||||||
|
files.add(createFile("test2.txt",
|
||||||
|
"This is the second test file"
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
private File createFile(String name, String text) throws IOException {
|
||||||
|
FileWriter fw = null;
|
||||||
|
PrintWriter pw = null;
|
||||||
|
|
||||||
|
try {
|
||||||
|
File f = new File(workDir, name);
|
||||||
|
if (f.exists()) f.delete();
|
||||||
|
|
||||||
|
fw = new FileWriter(f);
|
||||||
|
pw = new PrintWriter(fw);
|
||||||
|
pw.println(text);
|
||||||
|
return f;
|
||||||
|
|
||||||
|
} finally {
|
||||||
|
if (pw != null) pw.close();
|
||||||
|
if (fw != null) fw.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** This test executes a number of merges and compares the contents of
|
||||||
|
* the segments created when using compound file or not using one.
|
||||||
|
*
|
||||||
|
* TODO: the original test used to print the segment contents to System.out
|
||||||
|
* for visual validation. To have the same effect, a new method
|
||||||
|
* checkSegment(String name, ...) should be created that would
|
||||||
|
* assert various things about the segment.
|
||||||
|
*/
|
||||||
|
public void testIndexAndMerge() throws Exception {
|
||||||
|
StringWriter sw = new StringWriter();
|
||||||
|
PrintWriter out = new PrintWriter(sw, true);
|
||||||
|
|
||||||
|
Directory directory = FSDirectory.getDirectory(indexDir, true);
|
||||||
|
directory.close();
|
||||||
|
|
||||||
|
indexDoc("one", "test.txt");
|
||||||
|
printSegment(out, "one");
|
||||||
|
|
||||||
|
indexDoc("two", "test2.txt");
|
||||||
|
printSegment(out, "two");
|
||||||
|
|
||||||
|
merge("one", "two", "merge", false);
|
||||||
|
printSegment(out, "merge");
|
||||||
|
|
||||||
|
merge("one", "two", "merge2", false);
|
||||||
|
printSegment(out, "merge2");
|
||||||
|
|
||||||
|
merge("merge", "merge2", "merge3", false);
|
||||||
|
printSegment(out, "merge3");
|
||||||
|
|
||||||
|
out.close();
|
||||||
|
sw.close();
|
||||||
|
String multiFileOutput = sw.getBuffer().toString();
|
||||||
|
System.out.println(multiFileOutput);
|
||||||
|
|
||||||
|
sw = new StringWriter();
|
||||||
|
out = new PrintWriter(sw, true);
|
||||||
|
|
||||||
|
directory = FSDirectory.getDirectory(indexDir, true);
|
||||||
|
directory.close();
|
||||||
|
|
||||||
|
indexDoc("one", "test.txt");
|
||||||
|
printSegment(out, "one");
|
||||||
|
|
||||||
|
indexDoc("two", "test2.txt");
|
||||||
|
printSegment(out, "two");
|
||||||
|
|
||||||
|
merge("one", "two", "merge", true);
|
||||||
|
printSegment(out, "merge");
|
||||||
|
|
||||||
|
merge("one", "two", "merge2", true);
|
||||||
|
printSegment(out, "merge2");
|
||||||
|
|
||||||
|
merge("merge", "merge2", "merge3", true);
|
||||||
|
printSegment(out, "merge3");
|
||||||
|
|
||||||
|
out.close();
|
||||||
|
sw.close();
|
||||||
|
String singleFileOutput = sw.getBuffer().toString();
|
||||||
|
|
||||||
|
assertEquals(multiFileOutput, singleFileOutput);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void indexDoc(String segment, String fileName)
|
||||||
|
throws Exception
|
||||||
|
{
|
||||||
|
Directory directory = FSDirectory.getDirectory(indexDir, false);
|
||||||
|
Analyzer analyzer = new SimpleAnalyzer();
|
||||||
|
DocumentWriter writer =
|
||||||
|
new DocumentWriter(directory, analyzer, Similarity.getDefault(), 1000);
|
||||||
|
|
||||||
|
File file = new File(workDir, fileName);
|
||||||
|
Document doc = FileDocument.Document(file);
|
||||||
|
|
||||||
|
writer.addDocument(segment, doc);
|
||||||
|
|
||||||
|
directory.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void merge(String seg1, String seg2, String merged, boolean useCompoundFile)
|
||||||
|
throws Exception {
|
||||||
|
Directory directory = FSDirectory.getDirectory(indexDir, false);
|
||||||
|
|
||||||
|
SegmentReader r1 = new SegmentReader(new SegmentInfo(seg1, 1, directory));
|
||||||
|
SegmentReader r2 = new SegmentReader(new SegmentInfo(seg2, 1, directory));
|
||||||
|
|
||||||
|
SegmentMerger merger =
|
||||||
|
new SegmentMerger(directory, merged, useCompoundFile);
|
||||||
|
|
||||||
|
merger.add(r1);
|
||||||
|
merger.add(r2);
|
||||||
|
merger.merge();
|
||||||
|
|
||||||
|
directory.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void printSegment(PrintWriter out, String segment)
|
||||||
|
throws Exception {
|
||||||
|
Directory directory = FSDirectory.getDirectory(indexDir, false);
|
||||||
|
SegmentReader reader =
|
||||||
|
new SegmentReader(new SegmentInfo(segment, 1, directory));
|
||||||
|
|
||||||
|
for (int i = 0; i < reader.numDocs(); i++)
|
||||||
|
out.println(reader.document(i));
|
||||||
|
|
||||||
|
TermEnum tis = reader.terms();
|
||||||
|
while (tis.next()) {
|
||||||
|
out.print(tis.term());
|
||||||
|
out.println(" DF=" + tis.docFreq());
|
||||||
|
|
||||||
|
TermPositions positions = reader.termPositions(tis.term());
|
||||||
|
try {
|
||||||
|
while (positions.next()) {
|
||||||
|
out.print(" doc=" + positions.doc());
|
||||||
|
out.print(" TF=" + positions.freq());
|
||||||
|
out.print(" pos=");
|
||||||
|
out.print(positions.nextPosition());
|
||||||
|
for (int j = 1; j < positions.freq(); j++)
|
||||||
|
out.print("," + positions.nextPosition());
|
||||||
|
out.println("");
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
positions.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tis.close();
|
||||||
|
reader.close();
|
||||||
|
directory.close();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,47 @@
|
||||||
|
package org.apache.lucene.store;
|
||||||
|
import java.io.RandomAccessFile;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/** This class provides access to package-level features defined in the
|
||||||
|
* store package. It is used for testing only.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class _TestHelper {
|
||||||
|
|
||||||
|
/** Returns true if the instance of the provided input stream is actually
|
||||||
|
* an FSInputStream.
|
||||||
|
*/
|
||||||
|
public static boolean isFSInputStream(InputStream is) {
|
||||||
|
return is instanceof FSInputStream;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns true if the provided input stream is an FSInputStream and
|
||||||
|
* is a clone, that is it does not own its underlying file descriptor.
|
||||||
|
*/
|
||||||
|
public static boolean isFSInputStreamClone(InputStream is) {
|
||||||
|
if (isFSInputStream(is)) {
|
||||||
|
return ((FSInputStream) is).isClone;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Given an instance of FSDirectory.FSInputStream, this method returns
|
||||||
|
* true if the underlying file descriptor is valid, and false otherwise.
|
||||||
|
* This can be used to determine if the OS file has been closed.
|
||||||
|
* The descriptor becomes invalid when the non-clone instance of the
|
||||||
|
* FSInputStream that owns this descriptor is closed. However, the
|
||||||
|
* descriptor may possibly become invalid in other ways as well.
|
||||||
|
*/
|
||||||
|
public static boolean isFSInputStreamOpen(InputStream is)
|
||||||
|
throws IOException
|
||||||
|
{
|
||||||
|
if (isFSInputStream(is)) {
|
||||||
|
FSInputStream fis = (FSInputStream) is;
|
||||||
|
return fis.isFDValid();
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue