LUCENE-756: small improvement to not rely on IO operation (fileExists)

to check whether a "single norm file" is in use for the segment.
Instead, save this information per segment explicitly into the segment
infos file.  Also renamed to "singleNormFile".


git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@496851 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2007-01-16 20:24:52 +00:00
parent 8cafdd9b64
commit 30083146b4
7 changed files with 104 additions and 122 deletions

View File

@ -44,10 +44,11 @@ final class SegmentInfo {
// pre-2.1 (ie, must check file system to see
// if <name>.cfs and <name>.nrm exist)
private byte withNrm; // 1 if this segment maintains norms in a single file;
// -1 if not; 0 if check file is required to tell.
// would be -1 for segments populated by DocumentWriter.
// would be 1 for (newly created) merge resulted segments (both compound and non compound).
private boolean hasSingleNormFile; // true if this segment maintains norms in a single file;
// false otherwise
// this is currently false for segments populated by DocumentWriter
// and true for newly created merged segments (both
// compound and non compound).
public SegmentInfo(String name, int docCount, Directory dir) {
this.name = name;
@ -56,13 +57,13 @@ final class SegmentInfo {
delGen = -1;
isCompoundFile = 0;
preLockless = true;
withNrm = 0;
hasSingleNormFile = false;
}
public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean withNrm) {
public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasSingleNormFile) {
this(name, docCount, dir);
this.isCompoundFile = (byte) (isCompoundFile ? 1 : -1);
this.withNrm = (byte) (withNrm ? 1 : -1);
this.hasSingleNormFile = hasSingleNormFile;
preLockless = false;
}
@ -82,7 +83,7 @@ final class SegmentInfo {
System.arraycopy(src.normGen, 0, normGen, 0, src.normGen.length);
}
isCompoundFile = src.isCompoundFile;
withNrm = src.withNrm;
hasSingleNormFile = src.hasSingleNormFile;
}
/**
@ -99,6 +100,11 @@ final class SegmentInfo {
docCount = input.readInt();
if (format <= SegmentInfos.FORMAT_LOCKLESS) {
delGen = input.readLong();
if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE) {
hasSingleNormFile = (1 == input.readByte());
} else {
hasSingleNormFile = false;
}
int numNormGen = input.readInt();
if (numNormGen == -1) {
normGen = null;
@ -115,8 +121,8 @@ final class SegmentInfo {
normGen = null;
isCompoundFile = 0;
preLockless = true;
hasSingleNormFile = false;
}
withNrm = 0;
}
void setNumFields(int numFields) {
@ -179,7 +185,7 @@ final class SegmentInfo {
si.isCompoundFile = isCompoundFile;
si.delGen = delGen;
si.preLockless = preLockless;
si.withNrm = withNrm;
si.hasSingleNormFile = hasSingleNormFile;
if (normGen != null) {
si.normGen = (long[]) normGen.clone();
}
@ -297,7 +303,7 @@ final class SegmentInfo {
return IndexFileNames.fileNameFromGeneration(name, prefix + number, gen);
}
if (withNrm()) {
if (hasSingleNormFile) {
// case 2: lockless (or nrm file exists) - single file for all norms
prefix = "." + IndexFileNames.NORMS_EXTENSION;
return IndexFileNames.fileNameFromGeneration(name, prefix, 0);
@ -336,31 +342,6 @@ final class SegmentInfo {
}
}
/**
* Returns true iff this segment stores field norms in a single .nrm file.
*/
private boolean withNrm () throws IOException {
if (withNrm == -1) {
return false;
}
if (withNrm == 1) {
return true;
}
Directory d = dir;
try {
if (getUseCompoundFile()) {
d = new CompoundFileReader(dir, name + ".cfs");
}
boolean res = d.fileExists(name + "." + IndexFileNames.NORMS_EXTENSION);
withNrm = (byte) (res ? 1 : -1); // avoid more file tests like this
return res;
} finally {
if (d!=dir && d!=null) {
d.close();
}
}
}
/**
* Save this segment's info.
*/
@ -369,6 +350,7 @@ final class SegmentInfo {
output.writeString(name);
output.writeInt(docCount);
output.writeLong(delGen);
output.writeByte((byte) (hasSingleNormFile ? 1:0));
if (normGen == null) {
output.writeInt(-1);
} else {

View File

@ -33,7 +33,7 @@ public final class SegmentInfos extends Vector {
/* Works since counter, the old 1st entry, is always >= 0 */
public static final int FORMAT = -1;
/** This is the current file format written. It differs
/** This format adds details used for lockless commits. It differs
* slightly from the previous format in that file names
* are never re-used (write once). Instead, each file is
* written to the next generation. For example,
@ -44,6 +44,13 @@ public final class SegmentInfos extends Vector {
*/
public static final int FORMAT_LOCKLESS = -2;
/** This is the current file format written. It adds a
* "hasSingleNormFile" flag into each segment info.
* See <a href="http://issues.apache.org/jira/browse/LUCENE-756">LUCENE-756</a>
* for details.
*/
public static final int FORMAT_SINGLE_NORM_FILE = -3;
public int counter = 0; // used to name new segments
/**
* counts how often the index has been changed by adding or deleting docs.
@ -184,7 +191,7 @@ public final class SegmentInfos extends Vector {
int format = input.readInt();
if(format < 0){ // file contains explicit format info
// check that it is a format we can understand
if (format < FORMAT_LOCKLESS)
if (format < FORMAT_SINGLE_NORM_FILE)
throw new IOException("Unknown format version: " + format);
version = input.readLong(); // read version
counter = input.readInt(); // read counter
@ -245,7 +252,7 @@ public final class SegmentInfos extends Vector {
IndexOutput output = directory.createOutput(segmentFileName);
try {
output.writeInt(FORMAT_LOCKLESS); // write FORMAT
output.writeInt(FORMAT_SINGLE_NORM_FILE); // write FORMAT
output.writeLong(++version); // every write changes
// the index
output.writeInt(counter); // write counter
@ -311,7 +318,7 @@ public final class SegmentInfos extends Vector {
try {
format = input.readInt();
if(format < 0){
if (format < FORMAT_LOCKLESS)
if (format < FORMAT_SINGLE_NORM_FILE)
throw new IOException("Unknown format version: " + format);
version = input.readLong(); // read version
}

View File

@ -321,7 +321,7 @@ class SegmentReader extends IndexReader {
if (addedNrm) continue; // add .nrm just once
addedNrm = true;
}
files.addElement(name);
files.addElement(name);
}
}
return files;

View File

@ -762,8 +762,8 @@
<p>
The active segments in the index are stored in the
segment info file,
<tt>segments_N</tt>
. There may
<tt>segments_N</tt>.
There may
be one or more
<tt>segments_N</tt>
files in the
@ -779,13 +779,13 @@
<p>
As of 2.1, there is also a file
<tt>segments.gen</tt>
. This file contains the
<tt>segments.gen</tt>.
This file contains the
current generation (the
<tt>_N</tt>
in
<tt>segments_N</tt>
) of the index. This is
<tt>segments_N</tt>)
of the index. This is
used only as a fallback in case the current
generation cannot be accurately determined by
directory listing alone (as is the case for some
@ -803,11 +803,9 @@
</p>
<p>
<b>2.1 and above:</b>
Segments --&gt; Format, Version, NameCounter, SegCount, &lt;SegName, SegSize, DelGen, NumField, NormGen
<sup>NumField</sup>
&gt;
<sup>SegCount</sup>
, IsCompoundFile
Segments --&gt; Format, Version, NameCounter, SegCount, &lt;SegName, SegSize, DelGen, HasSingleNormFile, NumField,
NormGen<sup>NumField</sup>,
IsCompoundFile&gt;<sup>SegCount</sup>
</p>
<p>
@ -823,11 +821,11 @@
</p>
<p>
IsCompoundFile --&gt; Int8
IsCompoundFile, HasSingleNormFile --&gt; Int8
</p>
<p>
Format is -1 as of Lucene 1.4 and -2 as of Lucene 2.1.
Format is -1 as of Lucene 1.4 and -3 (SemgentInfos.FORMAT_SINGLE_NORM_FILE) as of Lucene 2.1.
</p>
<p>
@ -881,6 +879,13 @@
exists.
</p>
<p>
If HasSingleNormFile is 1, then the field norms are
written as a single joined file (with extension
<tt>.nrm</tt>); if it is 0 then each field's norms
are stored as separate <tt>.fN</tt> files. See
"Normalization Factors" below for details.
</p>
</section>

View File

@ -85,46 +85,34 @@ public class TestBackwardsCompatibility extends TestCase
rmDir(dirName);
}
public void testSearchOldIndexCFS() throws IOException {
String dirName = "src/test/org/apache/lucene/index/index.prelockless.cfs";
unzip(dirName);
searchIndex(dirName);
rmDir(dirName);
public void testSearchOldIndex() throws IOException {
String[] oldNames = {"prelockless.cfs", "prelockless.nocfs"};
for(int i=0;i<oldNames.length;i++) {
String dirName = "src/test/org/apache/lucene/index/index." + oldNames[i];
unzip(dirName);
searchIndex(dirName);
rmDir(dirName);
}
}
public void testIndexOldIndexCFSNoAdds() throws IOException {
String dirName = "src/test/org/apache/lucene/index/index.prelockless.cfs";
unzip(dirName);
changeIndexNoAdds(dirName);
rmDir(dirName);
public void testIndexOldIndexNoAdds() throws IOException {
String[] oldNames = {"prelockless.cfs", "prelockless.nocfs"};
for(int i=0;i<oldNames.length;i++) {
String dirName = "src/test/org/apache/lucene/index/index." + oldNames[i];
unzip(dirName);
changeIndexNoAdds(dirName);
rmDir(dirName);
}
}
public void testIndexOldIndexCFS() throws IOException {
String dirName = "src/test/org/apache/lucene/index/index.prelockless.cfs";
unzip(dirName);
changeIndexWithAdds(dirName);
rmDir(dirName);
}
public void testSearchOldIndexNoCFS() throws IOException {
String dirName = "src/test/org/apache/lucene/index/index.prelockless.nocfs";
unzip(dirName);
searchIndex(dirName);
rmDir(dirName);
}
public void testIndexOldIndexNoCFS() throws IOException {
String dirName = "src/test/org/apache/lucene/index/index.prelockless.nocfs";
unzip(dirName);
changeIndexWithAdds(dirName);
rmDir(dirName);
}
public void testIndexOldIndexNoCFSNoAdds() throws IOException {
String dirName = "src/test/org/apache/lucene/index/index.prelockless.nocfs";
unzip(dirName);
changeIndexNoAdds(dirName);
rmDir(dirName);
public void testIndexOldIndex() throws IOException {
String[] oldNames = {"prelockless.cfs", "prelockless.nocfs"};
for(int i=0;i<oldNames.length;i++) {
String dirName = "src/test/org/apache/lucene/index/index." + oldNames[i];
unzip(dirName);
changeIndexWithAdds(dirName);
rmDir(dirName);
}
}
public void searchIndex(String dirName) throws IOException {

View File

@ -108,21 +108,21 @@ public class TestDoc extends TestCase {
Directory directory = FSDirectory.getDirectory(indexDir, true);
directory.close();
indexDoc("one", "test.txt");
printSegment(out, "one", 1);
SegmentInfo si1 = indexDoc("one", "test.txt");
printSegment(out, si1);
indexDoc("two", "test2.txt");
printSegment(out, "two", 1);
SegmentInfo si2 = indexDoc("two", "test2.txt");
printSegment(out, si2);
merge("one", 1, "two", 1, "merge", false);
printSegment(out, "merge", 2);
SegmentInfo siMerge = merge(si1, si2, "merge", false);
printSegment(out, siMerge);
merge("one", 1, "two", 1, "merge2", false);
printSegment(out, "merge2", 2);
merge("merge", 2, "merge2", 2, "merge3", false);
printSegment(out, "merge3", 4);
SegmentInfo siMerge2 = merge(si1, si2, "merge2", false);
printSegment(out, siMerge2);
SegmentInfo siMerge3 = merge(siMerge, siMerge2, "merge3", false);
printSegment(out, siMerge3);
out.close();
sw.close();
String multiFileOutput = sw.getBuffer().toString();
@ -134,21 +134,21 @@ public class TestDoc extends TestCase {
directory = FSDirectory.getDirectory(indexDir, true);
directory.close();
indexDoc("one", "test.txt");
printSegment(out, "one", 1);
si1 = indexDoc("one", "test.txt");
printSegment(out, si1);
indexDoc("two", "test2.txt");
printSegment(out, "two", 1);
si2 = indexDoc("two", "test2.txt");
printSegment(out, si2);
merge("one", 1, "two", 1, "merge", true);
printSegment(out, "merge", 2);
siMerge = merge(si1, si2, "merge", true);
printSegment(out, siMerge);
merge("one", 1, "two", 1, "merge2", true);
printSegment(out, "merge2", 2);
merge("merge", 2, "merge2", 2, "merge3", true);
printSegment(out, "merge3", 4);
siMerge2 = merge(si1, si2, "merge2", true);
printSegment(out, siMerge2);
siMerge3 = merge(siMerge, siMerge2, "merge3", true);
printSegment(out, siMerge3);
out.close();
sw.close();
String singleFileOutput = sw.getBuffer().toString();
@ -157,7 +157,7 @@ public class TestDoc extends TestCase {
}
private void indexDoc(String segment, String fileName)
private SegmentInfo indexDoc(String segment, String fileName)
throws Exception
{
Directory directory = FSDirectory.getDirectory(indexDir, false);
@ -171,18 +171,18 @@ public class TestDoc extends TestCase {
writer.addDocument(segment, doc);
directory.close();
return new SegmentInfo(segment, 1, directory, false, false);
}
private void merge(String seg1, int docCount1, String seg2, int docCount2, String merged, boolean useCompoundFile)
private SegmentInfo merge(SegmentInfo si1, SegmentInfo si2, String merged, boolean useCompoundFile)
throws Exception {
Directory directory = FSDirectory.getDirectory(indexDir, false);
SegmentReader r1 = SegmentReader.get(new SegmentInfo(seg1, docCount1, directory));
SegmentReader r2 = SegmentReader.get(new SegmentInfo(seg2, docCount2, directory));
SegmentReader r1 = SegmentReader.get(si1);
SegmentReader r2 = SegmentReader.get(si2);
SegmentMerger merger =
new SegmentMerger(directory, merged);
SegmentMerger merger = new SegmentMerger(directory, merged);
merger.add(r1);
merger.add(r2);
@ -196,14 +196,14 @@ public class TestDoc extends TestCase {
}
directory.close();
return new SegmentInfo(merged, si1.docCount + si2.docCount, directory, useCompoundFile, true);
}
private void printSegment(PrintWriter out, String segment, int docCount)
private void printSegment(PrintWriter out, SegmentInfo si)
throws Exception {
Directory directory = FSDirectory.getDirectory(indexDir, false);
SegmentReader reader =
SegmentReader.get(new SegmentInfo(segment, docCount, directory));
SegmentReader reader = SegmentReader.get(si);
for (int i = 0; i < reader.numDocs(); i++)
out.println(reader.document(i));

View File

@ -70,7 +70,7 @@ public class TestSegmentMerger extends TestCase {
merger.closeReaders();
assertTrue(docsMerged == 2);
//Should be able to open a new SegmentReader against the new directory
SegmentReader mergedReader = SegmentReader.get(new SegmentInfo(mergedSegment, docsMerged, mergedDir));
SegmentReader mergedReader = SegmentReader.get(new SegmentInfo(mergedSegment, docsMerged, mergedDir, false, true));
assertTrue(mergedReader != null);
assertTrue(mergedReader.numDocs() == 2);
Document newDoc1 = mergedReader.document(0);