mirror of https://github.com/apache/lucene.git
LUCENE-756: small improvement to not rely on IO operation (fileExists)
to check whether a "single norm file" is in use for the segment. Instead, save this information per segment explicitly into the segment infos file. Also renamed to "singleNormFile". git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@496851 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8cafdd9b64
commit
30083146b4
|
@ -44,10 +44,11 @@ final class SegmentInfo {
|
|||
// pre-2.1 (ie, must check file system to see
|
||||
// if <name>.cfs and <name>.nrm exist)
|
||||
|
||||
private byte withNrm; // 1 if this segment maintains norms in a single file;
|
||||
// -1 if not; 0 if check file is required to tell.
|
||||
// would be -1 for segments populated by DocumentWriter.
|
||||
// would be 1 for (newly created) merge resulted segments (both compound and non compound).
|
||||
private boolean hasSingleNormFile; // true if this segment maintains norms in a single file;
|
||||
// false otherwise
|
||||
// this is currently false for segments populated by DocumentWriter
|
||||
// and true for newly created merged segments (both
|
||||
// compound and non compound).
|
||||
|
||||
public SegmentInfo(String name, int docCount, Directory dir) {
|
||||
this.name = name;
|
||||
|
@ -56,13 +57,13 @@ final class SegmentInfo {
|
|||
delGen = -1;
|
||||
isCompoundFile = 0;
|
||||
preLockless = true;
|
||||
withNrm = 0;
|
||||
hasSingleNormFile = false;
|
||||
}
|
||||
|
||||
public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean withNrm) {
|
||||
public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasSingleNormFile) {
|
||||
this(name, docCount, dir);
|
||||
this.isCompoundFile = (byte) (isCompoundFile ? 1 : -1);
|
||||
this.withNrm = (byte) (withNrm ? 1 : -1);
|
||||
this.hasSingleNormFile = hasSingleNormFile;
|
||||
preLockless = false;
|
||||
}
|
||||
|
||||
|
@ -82,7 +83,7 @@ final class SegmentInfo {
|
|||
System.arraycopy(src.normGen, 0, normGen, 0, src.normGen.length);
|
||||
}
|
||||
isCompoundFile = src.isCompoundFile;
|
||||
withNrm = src.withNrm;
|
||||
hasSingleNormFile = src.hasSingleNormFile;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -99,6 +100,11 @@ final class SegmentInfo {
|
|||
docCount = input.readInt();
|
||||
if (format <= SegmentInfos.FORMAT_LOCKLESS) {
|
||||
delGen = input.readLong();
|
||||
if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE) {
|
||||
hasSingleNormFile = (1 == input.readByte());
|
||||
} else {
|
||||
hasSingleNormFile = false;
|
||||
}
|
||||
int numNormGen = input.readInt();
|
||||
if (numNormGen == -1) {
|
||||
normGen = null;
|
||||
|
@ -115,8 +121,8 @@ final class SegmentInfo {
|
|||
normGen = null;
|
||||
isCompoundFile = 0;
|
||||
preLockless = true;
|
||||
hasSingleNormFile = false;
|
||||
}
|
||||
withNrm = 0;
|
||||
}
|
||||
|
||||
void setNumFields(int numFields) {
|
||||
|
@ -179,7 +185,7 @@ final class SegmentInfo {
|
|||
si.isCompoundFile = isCompoundFile;
|
||||
si.delGen = delGen;
|
||||
si.preLockless = preLockless;
|
||||
si.withNrm = withNrm;
|
||||
si.hasSingleNormFile = hasSingleNormFile;
|
||||
if (normGen != null) {
|
||||
si.normGen = (long[]) normGen.clone();
|
||||
}
|
||||
|
@ -297,7 +303,7 @@ final class SegmentInfo {
|
|||
return IndexFileNames.fileNameFromGeneration(name, prefix + number, gen);
|
||||
}
|
||||
|
||||
if (withNrm()) {
|
||||
if (hasSingleNormFile) {
|
||||
// case 2: lockless (or nrm file exists) - single file for all norms
|
||||
prefix = "." + IndexFileNames.NORMS_EXTENSION;
|
||||
return IndexFileNames.fileNameFromGeneration(name, prefix, 0);
|
||||
|
@ -336,31 +342,6 @@ final class SegmentInfo {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true iff this segment stores field norms in a single .nrm file.
|
||||
*/
|
||||
private boolean withNrm () throws IOException {
|
||||
if (withNrm == -1) {
|
||||
return false;
|
||||
}
|
||||
if (withNrm == 1) {
|
||||
return true;
|
||||
}
|
||||
Directory d = dir;
|
||||
try {
|
||||
if (getUseCompoundFile()) {
|
||||
d = new CompoundFileReader(dir, name + ".cfs");
|
||||
}
|
||||
boolean res = d.fileExists(name + "." + IndexFileNames.NORMS_EXTENSION);
|
||||
withNrm = (byte) (res ? 1 : -1); // avoid more file tests like this
|
||||
return res;
|
||||
} finally {
|
||||
if (d!=dir && d!=null) {
|
||||
d.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Save this segment's info.
|
||||
*/
|
||||
|
@ -369,6 +350,7 @@ final class SegmentInfo {
|
|||
output.writeString(name);
|
||||
output.writeInt(docCount);
|
||||
output.writeLong(delGen);
|
||||
output.writeByte((byte) (hasSingleNormFile ? 1:0));
|
||||
if (normGen == null) {
|
||||
output.writeInt(-1);
|
||||
} else {
|
||||
|
|
|
@ -33,7 +33,7 @@ public final class SegmentInfos extends Vector {
|
|||
/* Works since counter, the old 1st entry, is always >= 0 */
|
||||
public static final int FORMAT = -1;
|
||||
|
||||
/** This is the current file format written. It differs
|
||||
/** This format adds details used for lockless commits. It differs
|
||||
* slightly from the previous format in that file names
|
||||
* are never re-used (write once). Instead, each file is
|
||||
* written to the next generation. For example,
|
||||
|
@ -44,6 +44,13 @@ public final class SegmentInfos extends Vector {
|
|||
*/
|
||||
public static final int FORMAT_LOCKLESS = -2;
|
||||
|
||||
/** This is the current file format written. It adds a
|
||||
* "hasSingleNormFile" flag into each segment info.
|
||||
* See <a href="http://issues.apache.org/jira/browse/LUCENE-756">LUCENE-756</a>
|
||||
* for details.
|
||||
*/
|
||||
public static final int FORMAT_SINGLE_NORM_FILE = -3;
|
||||
|
||||
public int counter = 0; // used to name new segments
|
||||
/**
|
||||
* counts how often the index has been changed by adding or deleting docs.
|
||||
|
@ -184,7 +191,7 @@ public final class SegmentInfos extends Vector {
|
|||
int format = input.readInt();
|
||||
if(format < 0){ // file contains explicit format info
|
||||
// check that it is a format we can understand
|
||||
if (format < FORMAT_LOCKLESS)
|
||||
if (format < FORMAT_SINGLE_NORM_FILE)
|
||||
throw new IOException("Unknown format version: " + format);
|
||||
version = input.readLong(); // read version
|
||||
counter = input.readInt(); // read counter
|
||||
|
@ -245,7 +252,7 @@ public final class SegmentInfos extends Vector {
|
|||
IndexOutput output = directory.createOutput(segmentFileName);
|
||||
|
||||
try {
|
||||
output.writeInt(FORMAT_LOCKLESS); // write FORMAT
|
||||
output.writeInt(FORMAT_SINGLE_NORM_FILE); // write FORMAT
|
||||
output.writeLong(++version); // every write changes
|
||||
// the index
|
||||
output.writeInt(counter); // write counter
|
||||
|
@ -311,7 +318,7 @@ public final class SegmentInfos extends Vector {
|
|||
try {
|
||||
format = input.readInt();
|
||||
if(format < 0){
|
||||
if (format < FORMAT_LOCKLESS)
|
||||
if (format < FORMAT_SINGLE_NORM_FILE)
|
||||
throw new IOException("Unknown format version: " + format);
|
||||
version = input.readLong(); // read version
|
||||
}
|
||||
|
|
|
@ -321,7 +321,7 @@ class SegmentReader extends IndexReader {
|
|||
if (addedNrm) continue; // add .nrm just once
|
||||
addedNrm = true;
|
||||
}
|
||||
files.addElement(name);
|
||||
files.addElement(name);
|
||||
}
|
||||
}
|
||||
return files;
|
||||
|
|
|
@ -762,8 +762,8 @@
|
|||
<p>
|
||||
The active segments in the index are stored in the
|
||||
segment info file,
|
||||
<tt>segments_N</tt>
|
||||
. There may
|
||||
<tt>segments_N</tt>.
|
||||
There may
|
||||
be one or more
|
||||
<tt>segments_N</tt>
|
||||
files in the
|
||||
|
@ -779,13 +779,13 @@
|
|||
|
||||
<p>
|
||||
As of 2.1, there is also a file
|
||||
<tt>segments.gen</tt>
|
||||
. This file contains the
|
||||
<tt>segments.gen</tt>.
|
||||
This file contains the
|
||||
current generation (the
|
||||
<tt>_N</tt>
|
||||
in
|
||||
<tt>segments_N</tt>
|
||||
) of the index. This is
|
||||
<tt>segments_N</tt>)
|
||||
of the index. This is
|
||||
used only as a fallback in case the current
|
||||
generation cannot be accurately determined by
|
||||
directory listing alone (as is the case for some
|
||||
|
@ -803,11 +803,9 @@
|
|||
</p>
|
||||
<p>
|
||||
<b>2.1 and above:</b>
|
||||
Segments --> Format, Version, NameCounter, SegCount, <SegName, SegSize, DelGen, NumField, NormGen
|
||||
<sup>NumField</sup>
|
||||
>
|
||||
<sup>SegCount</sup>
|
||||
, IsCompoundFile
|
||||
Segments --> Format, Version, NameCounter, SegCount, <SegName, SegSize, DelGen, HasSingleNormFile, NumField,
|
||||
NormGen<sup>NumField</sup>,
|
||||
IsCompoundFile><sup>SegCount</sup>
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
@ -823,11 +821,11 @@
|
|||
</p>
|
||||
|
||||
<p>
|
||||
IsCompoundFile --> Int8
|
||||
IsCompoundFile, HasSingleNormFile --> Int8
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Format is -1 as of Lucene 1.4 and -2 as of Lucene 2.1.
|
||||
Format is -1 as of Lucene 1.4 and -3 (SemgentInfos.FORMAT_SINGLE_NORM_FILE) as of Lucene 2.1.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
@ -881,6 +879,13 @@
|
|||
exists.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
If HasSingleNormFile is 1, then the field norms are
|
||||
written as a single joined file (with extension
|
||||
<tt>.nrm</tt>); if it is 0 then each field's norms
|
||||
are stored as separate <tt>.fN</tt> files. See
|
||||
"Normalization Factors" below for details.
|
||||
</p>
|
||||
|
||||
</section>
|
||||
|
||||
|
|
|
@ -85,46 +85,34 @@ public class TestBackwardsCompatibility extends TestCase
|
|||
rmDir(dirName);
|
||||
}
|
||||
|
||||
public void testSearchOldIndexCFS() throws IOException {
|
||||
String dirName = "src/test/org/apache/lucene/index/index.prelockless.cfs";
|
||||
unzip(dirName);
|
||||
searchIndex(dirName);
|
||||
rmDir(dirName);
|
||||
public void testSearchOldIndex() throws IOException {
|
||||
String[] oldNames = {"prelockless.cfs", "prelockless.nocfs"};
|
||||
for(int i=0;i<oldNames.length;i++) {
|
||||
String dirName = "src/test/org/apache/lucene/index/index." + oldNames[i];
|
||||
unzip(dirName);
|
||||
searchIndex(dirName);
|
||||
rmDir(dirName);
|
||||
}
|
||||
}
|
||||
|
||||
public void testIndexOldIndexCFSNoAdds() throws IOException {
|
||||
String dirName = "src/test/org/apache/lucene/index/index.prelockless.cfs";
|
||||
unzip(dirName);
|
||||
changeIndexNoAdds(dirName);
|
||||
rmDir(dirName);
|
||||
public void testIndexOldIndexNoAdds() throws IOException {
|
||||
String[] oldNames = {"prelockless.cfs", "prelockless.nocfs"};
|
||||
for(int i=0;i<oldNames.length;i++) {
|
||||
String dirName = "src/test/org/apache/lucene/index/index." + oldNames[i];
|
||||
unzip(dirName);
|
||||
changeIndexNoAdds(dirName);
|
||||
rmDir(dirName);
|
||||
}
|
||||
}
|
||||
|
||||
public void testIndexOldIndexCFS() throws IOException {
|
||||
String dirName = "src/test/org/apache/lucene/index/index.prelockless.cfs";
|
||||
unzip(dirName);
|
||||
changeIndexWithAdds(dirName);
|
||||
rmDir(dirName);
|
||||
}
|
||||
|
||||
public void testSearchOldIndexNoCFS() throws IOException {
|
||||
String dirName = "src/test/org/apache/lucene/index/index.prelockless.nocfs";
|
||||
unzip(dirName);
|
||||
searchIndex(dirName);
|
||||
rmDir(dirName);
|
||||
}
|
||||
|
||||
public void testIndexOldIndexNoCFS() throws IOException {
|
||||
String dirName = "src/test/org/apache/lucene/index/index.prelockless.nocfs";
|
||||
unzip(dirName);
|
||||
changeIndexWithAdds(dirName);
|
||||
rmDir(dirName);
|
||||
}
|
||||
|
||||
public void testIndexOldIndexNoCFSNoAdds() throws IOException {
|
||||
String dirName = "src/test/org/apache/lucene/index/index.prelockless.nocfs";
|
||||
unzip(dirName);
|
||||
changeIndexNoAdds(dirName);
|
||||
rmDir(dirName);
|
||||
public void testIndexOldIndex() throws IOException {
|
||||
String[] oldNames = {"prelockless.cfs", "prelockless.nocfs"};
|
||||
for(int i=0;i<oldNames.length;i++) {
|
||||
String dirName = "src/test/org/apache/lucene/index/index." + oldNames[i];
|
||||
unzip(dirName);
|
||||
changeIndexWithAdds(dirName);
|
||||
rmDir(dirName);
|
||||
}
|
||||
}
|
||||
|
||||
public void searchIndex(String dirName) throws IOException {
|
||||
|
|
|
@ -108,21 +108,21 @@ public class TestDoc extends TestCase {
|
|||
Directory directory = FSDirectory.getDirectory(indexDir, true);
|
||||
directory.close();
|
||||
|
||||
indexDoc("one", "test.txt");
|
||||
printSegment(out, "one", 1);
|
||||
SegmentInfo si1 = indexDoc("one", "test.txt");
|
||||
printSegment(out, si1);
|
||||
|
||||
indexDoc("two", "test2.txt");
|
||||
printSegment(out, "two", 1);
|
||||
SegmentInfo si2 = indexDoc("two", "test2.txt");
|
||||
printSegment(out, si2);
|
||||
|
||||
merge("one", 1, "two", 1, "merge", false);
|
||||
printSegment(out, "merge", 2);
|
||||
SegmentInfo siMerge = merge(si1, si2, "merge", false);
|
||||
printSegment(out, siMerge);
|
||||
|
||||
merge("one", 1, "two", 1, "merge2", false);
|
||||
printSegment(out, "merge2", 2);
|
||||
|
||||
merge("merge", 2, "merge2", 2, "merge3", false);
|
||||
printSegment(out, "merge3", 4);
|
||||
SegmentInfo siMerge2 = merge(si1, si2, "merge2", false);
|
||||
printSegment(out, siMerge2);
|
||||
|
||||
SegmentInfo siMerge3 = merge(siMerge, siMerge2, "merge3", false);
|
||||
printSegment(out, siMerge3);
|
||||
|
||||
out.close();
|
||||
sw.close();
|
||||
String multiFileOutput = sw.getBuffer().toString();
|
||||
|
@ -134,21 +134,21 @@ public class TestDoc extends TestCase {
|
|||
directory = FSDirectory.getDirectory(indexDir, true);
|
||||
directory.close();
|
||||
|
||||
indexDoc("one", "test.txt");
|
||||
printSegment(out, "one", 1);
|
||||
si1 = indexDoc("one", "test.txt");
|
||||
printSegment(out, si1);
|
||||
|
||||
indexDoc("two", "test2.txt");
|
||||
printSegment(out, "two", 1);
|
||||
si2 = indexDoc("two", "test2.txt");
|
||||
printSegment(out, si2);
|
||||
|
||||
merge("one", 1, "two", 1, "merge", true);
|
||||
printSegment(out, "merge", 2);
|
||||
siMerge = merge(si1, si2, "merge", true);
|
||||
printSegment(out, siMerge);
|
||||
|
||||
merge("one", 1, "two", 1, "merge2", true);
|
||||
printSegment(out, "merge2", 2);
|
||||
|
||||
merge("merge", 2, "merge2", 2, "merge3", true);
|
||||
printSegment(out, "merge3", 4);
|
||||
siMerge2 = merge(si1, si2, "merge2", true);
|
||||
printSegment(out, siMerge2);
|
||||
|
||||
siMerge3 = merge(siMerge, siMerge2, "merge3", true);
|
||||
printSegment(out, siMerge3);
|
||||
|
||||
out.close();
|
||||
sw.close();
|
||||
String singleFileOutput = sw.getBuffer().toString();
|
||||
|
@ -157,7 +157,7 @@ public class TestDoc extends TestCase {
|
|||
}
|
||||
|
||||
|
||||
private void indexDoc(String segment, String fileName)
|
||||
private SegmentInfo indexDoc(String segment, String fileName)
|
||||
throws Exception
|
||||
{
|
||||
Directory directory = FSDirectory.getDirectory(indexDir, false);
|
||||
|
@ -171,18 +171,18 @@ public class TestDoc extends TestCase {
|
|||
writer.addDocument(segment, doc);
|
||||
|
||||
directory.close();
|
||||
return new SegmentInfo(segment, 1, directory, false, false);
|
||||
}
|
||||
|
||||
|
||||
private void merge(String seg1, int docCount1, String seg2, int docCount2, String merged, boolean useCompoundFile)
|
||||
private SegmentInfo merge(SegmentInfo si1, SegmentInfo si2, String merged, boolean useCompoundFile)
|
||||
throws Exception {
|
||||
Directory directory = FSDirectory.getDirectory(indexDir, false);
|
||||
|
||||
SegmentReader r1 = SegmentReader.get(new SegmentInfo(seg1, docCount1, directory));
|
||||
SegmentReader r2 = SegmentReader.get(new SegmentInfo(seg2, docCount2, directory));
|
||||
SegmentReader r1 = SegmentReader.get(si1);
|
||||
SegmentReader r2 = SegmentReader.get(si2);
|
||||
|
||||
SegmentMerger merger =
|
||||
new SegmentMerger(directory, merged);
|
||||
SegmentMerger merger = new SegmentMerger(directory, merged);
|
||||
|
||||
merger.add(r1);
|
||||
merger.add(r2);
|
||||
|
@ -196,14 +196,14 @@ public class TestDoc extends TestCase {
|
|||
}
|
||||
|
||||
directory.close();
|
||||
return new SegmentInfo(merged, si1.docCount + si2.docCount, directory, useCompoundFile, true);
|
||||
}
|
||||
|
||||
|
||||
private void printSegment(PrintWriter out, String segment, int docCount)
|
||||
private void printSegment(PrintWriter out, SegmentInfo si)
|
||||
throws Exception {
|
||||
Directory directory = FSDirectory.getDirectory(indexDir, false);
|
||||
SegmentReader reader =
|
||||
SegmentReader.get(new SegmentInfo(segment, docCount, directory));
|
||||
SegmentReader reader = SegmentReader.get(si);
|
||||
|
||||
for (int i = 0; i < reader.numDocs(); i++)
|
||||
out.println(reader.document(i));
|
||||
|
|
|
@ -70,7 +70,7 @@ public class TestSegmentMerger extends TestCase {
|
|||
merger.closeReaders();
|
||||
assertTrue(docsMerged == 2);
|
||||
//Should be able to open a new SegmentReader against the new directory
|
||||
SegmentReader mergedReader = SegmentReader.get(new SegmentInfo(mergedSegment, docsMerged, mergedDir));
|
||||
SegmentReader mergedReader = SegmentReader.get(new SegmentInfo(mergedSegment, docsMerged, mergedDir, false, true));
|
||||
assertTrue(mergedReader != null);
|
||||
assertTrue(mergedReader.numDocs() == 2);
|
||||
Document newDoc1 = mergedReader.document(0);
|
||||
|
|
Loading…
Reference in New Issue