diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 24ca9a3c784..396eb449aa2 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -511,6 +511,9 @@ Optimizations
* LUCENE-2897: Apply deleted terms while flushing a segment. We still
buffer deleted terms to later apply to past segments. (Mike McCandless)
+* LUCENE-3126: IndexWriter.addIndexes copies incoming segments into CFS if they
+ aren't already and MergePolicy allows that. (Shai Erera)
+
Bug fixes
* LUCENE-2996: addIndexes(IndexReader) did not flush before adding the new
diff --git a/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java b/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java
index a077a8f2716..e47fed7b717 100644
--- a/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java
@@ -60,6 +60,9 @@ public final class CompoundFileWriter {
/** temporary holder for the start of this file's data section */
long dataOffset;
+
+ /** the directory which contains the file. */
+ Directory dir;
}
// Before versioning started.
@@ -119,6 +122,14 @@ public final class CompoundFileWriter {
* has been added already
*/
public void addFile(String file) {
+ addFile(file, directory);
+ }
+
+ /**
+ * Same as {@link #addFile(String)}, only for files that are found in an
+ * external {@link Directory}.
+ */
+ public void addFile(String file, Directory dir) {
if (merged)
throw new IllegalStateException(
"Can't add extensions after merge has been called");
@@ -133,6 +144,7 @@ public final class CompoundFileWriter {
FileEntry entry = new FileEntry();
entry.file = file;
+ entry.dir = dir;
entries.add(entry);
}
@@ -170,7 +182,7 @@ public final class CompoundFileWriter {
fe.directoryOffset = os.getFilePointer();
os.writeLong(0); // for now
os.writeString(IndexFileNames.stripSegmentName(fe.file));
- totalSize += directory.fileLength(fe.file);
+ totalSize += fe.dir.fileLength(fe.file);
}
// Pre-allocate size of file as optimization --
@@ -216,7 +228,7 @@ public final class CompoundFileWriter {
* output stream.
*/
private void copyFile(FileEntry source, IndexOutput os) throws IOException {
- IndexInput is = directory.openInput(source.file);
+ IndexInput is = source.dir.openInput(source.file);
try {
long startPtr = os.getFilePointer();
long length = is.length();
diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java
index 54f6ad2173e..9c069afa4e6 100644
--- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java
@@ -23,6 +23,7 @@ import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
+import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
@@ -51,6 +52,7 @@ import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.BitVector;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.Constants;
+import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.ThreadInterruptedException;
import org.apache.lucene.util.MapBackedSet;
@@ -2322,10 +2324,10 @@ public class IndexWriter implements Closeable {
*
* NOTE: this method only copies the segments of the incoming indexes
* and does not merge them. Therefore deleted documents are not removed and
- * the new segments are not merged with the existing ones. Also, the segments
- * are copied as-is, meaning they are not converted to CFS if they aren't,
- * and vice-versa. If you wish to do that, you can call {@link #maybeMerge}
- * or {@link #optimize} afterwards.
+ * the new segments are not merged with the existing ones. Also, if the merge
+ * policy allows compound files, then any segment that is not compound is
+ * converted to such. However, if the segment is compound, it is copied as-is
+ * even if the merge policy does not allow compound files.
*
*
This requires this index not be among those to be added.
*
@@ -2349,6 +2351,7 @@ public class IndexWriter implements Closeable {
int docCount = 0;
List infos = new ArrayList();
+ Comparator versionComparator = StringHelper.getVersionComparator();
for (Directory dir : dirs) {
if (infoStream != null) {
message("addIndexes: process directory " + dir);
@@ -2368,46 +2371,22 @@ public class IndexWriter implements Closeable {
message("addIndexes: process segment origName=" + info.name + " newName=" + newSegName + " dsName=" + dsName + " info=" + info);
}
- // Determine if the doc store of this segment needs to be copied. It's
- // only relevant for segments who share doc store with others, because
- // the DS might have been copied already, in which case we just want
- // to update the DS name of this SegmentInfo.
- // NOTE: pre-3x segments include a null DSName if they don't share doc
- // store. So the following code ensures we don't accidentally insert
- // 'null' to the map.
- final String newDsName;
- if (dsName != null) {
- if (dsNames.containsKey(dsName)) {
- newDsName = dsNames.get(dsName);
- } else {
- dsNames.put(dsName, newSegName);
- newDsName = newSegName;
- }
+ // create CFS only if the source segment is not CFS, and MP agrees it
+ // should be CFS.
+ boolean createCFS;
+ synchronized (this) { // Guard segmentInfos
+ createCFS = !info.getUseCompoundFile()
+ && mergePolicy.useCompoundFile(segmentInfos, info)
+ // optimize case only for segments that don't share doc stores
+ && versionComparator.compare(info.getVersion(), "3.1") >= 0;
+ }
+
+ if (createCFS) {
+ copySegmentIntoCFS(info, newSegName);
} else {
- newDsName = newSegName;
+ copySegmentAsIs(info, newSegName, dsNames, dsFilesCopied);
}
- // Copy the segment files
- for (String file: info.files()) {
- final String newFileName;
- if (IndexFileNames.isDocStoreFile(file)) {
- newFileName = newDsName + IndexFileNames.stripSegmentName(file);
- if (dsFilesCopied.contains(newFileName)) {
- continue;
- }
- dsFilesCopied.add(newFileName);
- } else {
- newFileName = newSegName + IndexFileNames.stripSegmentName(file);
- }
- assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists";
- dir.copy(directory, file, newFileName);
- }
-
- // Update SI appropriately
- info.setDocStore(info.getDocStoreOffset(), newDsName, info.getDocStoreIsCompoundFile());
- info.dir = directory;
- info.name = newSegName;
-
infos.add(info);
}
}
@@ -2496,6 +2475,76 @@ public class IndexWriter implements Closeable {
}
}
+ /** Copies the segment into the IndexWriter's directory, as a compound segment. */
+ private void copySegmentIntoCFS(SegmentInfo info, String segName) throws IOException {
+ String segFileName = IndexFileNames.segmentFileName(segName, "", IndexFileNames.COMPOUND_FILE_EXTENSION);
+ Collection files = info.files();
+ CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, segFileName);
+ for (String file : files) {
+ String newFileName = segName + IndexFileNames.stripSegmentName(file);
+ if (!IndexFileNames.matchesExtension(file, IndexFileNames.DELETES_EXTENSION)
+ && !IndexFileNames.isSeparateNormsFile(file)) {
+ cfsWriter.addFile(file, info.dir);
+ } else {
+ assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists";
+ info.dir.copy(directory, file, newFileName);
+ }
+ }
+
+ // Create the .cfs
+ cfsWriter.close();
+
+ info.dir = directory;
+ info.name = segName;
+ info.setUseCompoundFile(true);
+ }
+
+ /** Copies the segment files as-is into the IndexWriter's directory. */
+ private void copySegmentAsIs(SegmentInfo info, String segName,
+ Map dsNames, Set dsFilesCopied)
+ throws IOException {
+ // Determine if the doc store of this segment needs to be copied. It's
+ // only relevant for segments that share doc store with others,
+ // because the DS might have been copied already, in which case we
+ // just want to update the DS name of this SegmentInfo.
+ // NOTE: pre-3x segments include a null DSName if they don't share doc
+ // store. The following code ensures we don't accidentally insert
+ // 'null' to the map.
+ String dsName = info.getDocStoreSegment();
+ final String newDsName;
+ if (dsName != null) {
+ if (dsNames.containsKey(dsName)) {
+ newDsName = dsNames.get(dsName);
+ } else {
+ dsNames.put(dsName, segName);
+ newDsName = segName;
+ }
+ } else {
+ newDsName = segName;
+ }
+
+ // Copy the segment files
+ for (String file: info.files()) {
+ final String newFileName;
+ if (IndexFileNames.isDocStoreFile(file)) {
+ newFileName = newDsName + IndexFileNames.stripSegmentName(file);
+ if (dsFilesCopied.contains(newFileName)) {
+ continue;
+ }
+ dsFilesCopied.add(newFileName);
+ } else {
+ newFileName = segName + IndexFileNames.stripSegmentName(file);
+ }
+
+ assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists";
+ info.dir.copy(directory, file, newFileName);
+ }
+
+ info.setDocStore(info.getDocStoreOffset(), newDsName, info.getDocStoreIsCompoundFile());
+ info.dir = directory;
+ info.name = segName;
+ }
+
/**
* A hook for extending classes to execute operations after pending added and
* deleted documents have been flushed to the Directory but before the change
diff --git a/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java b/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java
index 097d9c9944d..6f55e7013d6 100755
--- a/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java
+++ b/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java
@@ -74,7 +74,7 @@ public class TestAddIndexes extends LuceneTestCase {
writer.close();
writer = newWriter(aux2, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE));
- // add 40 documents in compound files
+ // add 50 documents in compound files
addDocs2(writer, 50);
assertEquals(50, writer.maxDoc());
writer.close();
@@ -1084,4 +1084,63 @@ public class TestAddIndexes extends LuceneTestCase {
assertEquals("Only one compound segment should exist", 4, dir.listAll().length);
}
+ // LUCENE-3126: tests that if a non-CFS segment is copied, it is converted to
+ // a CFS, given MP preferences
+ public void testCopyIntoCFS() throws Exception {
+ // create an index, no CFS (so we can assert that existing segments are not affected)
+ Directory target = newDirectory();
+ LogMergePolicy lmp = newLogMergePolicy(false);
+ IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null).setMergePolicy(lmp);
+ IndexWriter w = new IndexWriter(target, conf);
+ w.addDocument(new Document());
+ w.commit();
+ assertFalse(w.segmentInfos.info(0).getUseCompoundFile());
+
+ // prepare second index, no-CFS too + .del file + separate norms file
+ Directory src = newDirectory();
+ LogMergePolicy lmp2 = newLogMergePolicy(false);
+ IndexWriterConfig conf2 = newIndexWriterConfig(TEST_VERSION_CURRENT,
+ new MockAnalyzer(random)).setMergePolicy(lmp2);
+ IndexWriter w2 = new IndexWriter(src, conf2);
+ Document doc = new Document();
+ doc.add(new Field("c", "some text", Store.YES, Index.ANALYZED));
+ w2.addDocument(doc);
+ doc = new Document();
+ doc.add(new Field("d", "delete", Store.NO, Index.NOT_ANALYZED_NO_NORMS));
+ w2.addDocument(doc);
+ w2.commit();
+ w2.deleteDocuments(new Term("d", "delete"));
+ w2.commit();
+ w2.close();
+
+ // create separate norms file
+ IndexReader r = IndexReader.open(src, false);
+ r.setNorm(0, "c", (byte) 1);
+ r.close();
+ assertTrue(".del file not found", src.fileExists("_0_1.del"));
+ assertTrue("separate norms file not found", src.fileExists("_0_1.s0"));
+
+ // Case 1: force 'CFS' on target
+ lmp.setUseCompoundFile(true);
+ lmp.setNoCFSRatio(1.0);
+ w.addIndexes(src);
+ w.commit();
+ assertFalse("existing segments should not be modified by addIndexes", w.segmentInfos.info(0).getUseCompoundFile());
+ assertTrue("segment should have been converted to a CFS by addIndexes", w.segmentInfos.info(1).getUseCompoundFile());
+ assertTrue(".del file not found", target.fileExists("_1_1.del"));
+ assertTrue("separate norms file not found", target.fileExists("_1_1.s0"));
+
+ // Case 2: LMP disallows CFS
+ lmp.setUseCompoundFile(false);
+ w.addIndexes(src);
+ w.commit();
+ assertFalse("segment should not have been converted to a CFS by addIndexes if MP disallows", w.segmentInfos.info(2).getUseCompoundFile());
+
+ w.close();
+
+ // cleanup
+ src.close();
+ target.close();
+ }
+
}
diff --git a/lucene/src/test/org/apache/lucene/index/TestCompoundFile.java b/lucene/src/test/org/apache/lucene/index/TestCompoundFile.java
index 88d499e1359..39eb2f6702a 100644
--- a/lucene/src/test/org/apache/lucene/index/TestCompoundFile.java
+++ b/lucene/src/test/org/apache/lucene/index/TestCompoundFile.java
@@ -648,4 +648,25 @@ public class TestCompoundFile extends LuceneTestCase
}
}
+
+ public void testAddExternalFile() throws IOException {
+ createSequenceFile(dir, "d1", (byte) 0, 15);
+
+ Directory newDir = newDirectory();
+ CompoundFileWriter csw = new CompoundFileWriter(newDir, "d.csf");
+ csw.addFile("d1", dir);
+ csw.close();
+
+ CompoundFileReader csr = new CompoundFileReader(newDir, "d.csf");
+ IndexInput expected = dir.openInput("d1");
+ IndexInput actual = csr.openInput("d1");
+ assertSameStreams("d1", expected, actual);
+ assertSameSeekBehavior("d1", expected, actual);
+ expected.close();
+ actual.close();
+ csr.close();
+
+ newDir.close();
+ }
+
}