diff --git a/contrib/CHANGES.txt b/contrib/CHANGES.txt
index 57e3a96dc6f..6f6d4dcc415 100644
--- a/contrib/CHANGES.txt
+++ b/contrib/CHANGES.txt
@@ -33,6 +33,11 @@ New features
segment merges to give better search performance in a mixed
indexing/searching environment. (John Wang via Mike McCandless)
+ * LUCENE-1959: Add IndexSplitter tool, to copy specific segments out
+ of the index into a new index. It can also list the segments in
+ the index, and delete specified segments. (Jason Rutherglen via
+ Mike McCandless)
+
Optimizations
Documentation
diff --git a/contrib/misc/src/java/org/apache/lucene/index/IndexSplitter.java b/contrib/misc/src/java/org/apache/lucene/index/IndexSplitter.java
new file mode 100644
index 00000000000..3683d9ceb4a
--- /dev/null
+++ b/contrib/misc/src/java/org/apache/lucene/index/IndexSplitter.java
@@ -0,0 +1,163 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.index;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.text.DecimalFormat;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.store.FSDirectory;
+
+/**
+ * Command-line tool that enables listing segments in an
+ * index, copying specific segments to another index, and
+ * deleting segments from an index.
+ *
+ *
This tool does file-level copying of segments files.
+ * This means it's unable to split apart a single segment
+ * into multiple segments. For example if your index is
+ * optimized, this tool won't help. Also, it does basic
+ * file-level copying (using simple
+ * File{In,Out}putStream) so it will not work with non
+ * FSDirectory Directory impls.
+ *
+ * NOTE: The tool is experimental and might change
+ * in incompatible ways in the next release. You can easily
+ * accidentally remove segments from your index so be
+ * careful!
+ */
+public class IndexSplitter {
+ public SegmentInfos infos;
+
+ FSDirectory fsDir;
+
+ File dir;
+
+ /**
+ * @param args
+ */
+ public static void main(String[] args) throws Exception {
+ if (args.length < 2) {
+ System.err
+ .println("Usage: IndexSplitter -l (list the segments and their sizes)");
+ System.err.println("IndexSplitter +");
+ System.err
+ .println("IndexSplitter -d (delete the following segments)");
+ return;
+ }
+ File srcDir = new File(args[0]);
+ IndexSplitter is = new IndexSplitter(srcDir);
+ if (!srcDir.exists()) {
+ throw new Exception("srcdir:" + srcDir.getAbsolutePath()
+ + " doesn't exist");
+ }
+ if (args[1].equals("-l")) {
+ is.listSegments();
+ } else if (args[1].equals("-d")) {
+ List segs = new ArrayList();
+ for (int x = 2; x < args.length; x++) {
+ segs.add(args[x]);
+ }
+ is.remove((String[]) segs.toArray(new String[0]));
+ } else {
+ File targetDir = new File(args[1]);
+ List segs = new ArrayList();
+ for (int x = 2; x < args.length; x++) {
+ segs.add(args[x]);
+ }
+ is.split(targetDir, (String[]) segs.toArray(new String[0]));
+ }
+ }
+
+ public IndexSplitter(File dir) throws IOException {
+ this.dir = dir;
+ fsDir = FSDirectory.open(dir);
+ infos = new SegmentInfos();
+ infos.read(fsDir);
+ }
+
+ public void listSegments() throws IOException {
+ DecimalFormat formatter = new DecimalFormat("###,###.###");
+ for (int x = 0; x < infos.size(); x++) {
+ SegmentInfo info = infos.info(x);
+ String sizeStr = formatter.format(info.sizeInBytes());
+ System.out.println(info.name + " " + sizeStr);
+ }
+ }
+
+ private int getIdx(String name) {
+ for (int x = 0; x < infos.size(); x++) {
+ if (name.equals(infos.info(x).name))
+ return x;
+ }
+ return -1;
+ }
+
+ private SegmentInfo getInfo(String name) {
+ for (int x = 0; x < infos.size(); x++) {
+ if (name.equals(infos.info(x).name))
+ return infos.info(x);
+ }
+ return null;
+ }
+
+ public void remove(String[] segs) throws IOException {
+ for (String n : segs) {
+ int idx = getIdx(n);
+ infos.remove(idx);
+ }
+ infos.commit(fsDir);
+ }
+
+ public void split(File destDir, String[] segs) throws IOException {
+ destDir.mkdirs();
+ FSDirectory destFSDir = FSDirectory.open(destDir);
+ SegmentInfos destInfos = new SegmentInfos();
+ for (String n : segs) {
+ SegmentInfo info = getInfo(n);
+ destInfos.add(info);
+ // now copy files over
+ List files = info.files();
+ for (int x = 0; x < files.size(); x++) {
+ String srcName = (String) files.get(x);
+ File srcFile = new File(dir, srcName);
+ File destFile = new File(destDir, srcName);
+ copyFile(srcFile, destFile);
+ }
+ }
+ destInfos.commit(destFSDir);
+ // System.out.println("destDir:"+destDir.getAbsolutePath());
+ }
+
+ private static void copyFile(File src, File dst) throws IOException {
+ InputStream in = new FileInputStream(src);
+ OutputStream out = new FileOutputStream(dst);
+ byte[] buf = new byte[32*1024];
+ int len;
+ while ((len = in.read(buf)) > 0) {
+ out.write(buf, 0, len);
+ }
+ in.close();
+ out.close();
+ }
+}
diff --git a/contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java b/contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java
new file mode 100644
index 00000000000..77cc4a49ff0
--- /dev/null
+++ b/contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.index;
+
+import java.io.File;
+
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexWriter.MaxFieldLength;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
+
+public class TestIndexSplitter extends LuceneTestCase {
+ public void test() throws Exception {
+ String tmpDir = System.getProperty("java.io.tmpdir");
+ File dir = new File(tmpDir, "testfilesplitter");
+ _TestUtil.rmDir(dir);
+ dir.mkdirs();
+ File destDir = new File(tmpDir, "testfilesplitterdest");
+ _TestUtil.rmDir(destDir);
+ destDir.mkdirs();
+ FSDirectory fsDir = FSDirectory.open(dir);
+ IndexWriter iw = new IndexWriter(fsDir, new WhitespaceAnalyzer(), true, MaxFieldLength.UNLIMITED);
+ for (int x=0; x < 100; x++) {
+ Document doc = TestIndexWriterReader.createDocument(x, "index", 5);
+ iw.addDocument(doc);
+ }
+ iw.commit();
+ for (int x=100; x < 150; x++) {
+ Document doc = TestIndexWriterReader.createDocument(x, "index2", 5);
+ iw.addDocument(doc);
+ }
+ iw.commit();
+ for (int x=150; x < 200; x++) {
+ Document doc = TestIndexWriterReader.createDocument(x, "index3", 5);
+ iw.addDocument(doc);
+ }
+ iw.commit();
+ assertEquals(3, iw.getReader().getSequentialSubReaders().length);
+ iw.close();
+ // we should have 2 segments now
+ IndexSplitter is = new IndexSplitter(dir);
+ String splitSegName = is.infos.info(1).name;
+ is.split(destDir, new String[] {splitSegName});
+ IndexReader r = IndexReader.open(FSDirectory.open(destDir), true);
+ assertEquals(50, r.maxDoc());
+
+ // now test cmdline
+ File destDir2 = new File(tmpDir, "testfilesplitterdest2");
+ _TestUtil.rmDir(destDir2);
+ destDir2.mkdirs();
+ IndexSplitter.main(new String[] {dir.getAbsolutePath(), destDir2.getAbsolutePath(), splitSegName});
+ assertEquals(3, destDir2.listFiles().length);
+ r = IndexReader.open(FSDirectory.open(destDir2), true);
+ assertEquals(50, r.maxDoc());
+
+ // now remove the copied segment from src
+ IndexSplitter.main(new String[] {dir.getAbsolutePath(), "-d", splitSegName});
+ r = IndexReader.open(FSDirectory.open(dir), true);
+ assertEquals(2, r.getSequentialSubReaders().length);
+ }
+}