mirror of https://github.com/apache/lucene.git
LUCENE-1959: add IndexSplitter tool to pull segment files out of an index into another
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@823153 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
6be57e324e
commit
2fc8e01d9e
|
@ -33,6 +33,11 @@ New features
|
||||||
segment merges to give better search performance in a mixed
|
segment merges to give better search performance in a mixed
|
||||||
indexing/searching environment. (John Wang via Mike McCandless)
|
indexing/searching environment. (John Wang via Mike McCandless)
|
||||||
|
|
||||||
|
* LUCENE-1959: Add IndexSplitter tool, to copy specific segments out
|
||||||
|
of the index into a new index. It can also list the segments in
|
||||||
|
the index, and delete specified segments. (Jason Rutherglen via
|
||||||
|
Mike McCandless)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
Documentation
|
Documentation
|
||||||
|
|
|
@ -0,0 +1,163 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
import java.text.DecimalFormat;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.lucene.store.FSDirectory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Command-line tool that enables listing segments in an
|
||||||
|
* index, copying specific segments to another index, and
|
||||||
|
* deleting segments from an index.
|
||||||
|
*
|
||||||
|
* <p>This tool does file-level copying of segments files.
|
||||||
|
* This means it's unable to split apart a single segment
|
||||||
|
* into multiple segments. For example if your index is
|
||||||
|
* optimized, this tool won't help. Also, it does basic
|
||||||
|
* file-level copying (using simple
|
||||||
|
* File{In,Out}putStream) so it will not work with non
|
||||||
|
* FSDirectory Directory impls.</p>
|
||||||
|
*
|
||||||
|
* <p><b>NOTE</b>: The tool is experimental and might change
|
||||||
|
* in incompatible ways in the next release. You can easily
|
||||||
|
* accidentally remove segments from your index so be
|
||||||
|
* careful!
|
||||||
|
*/
|
||||||
|
public class IndexSplitter {
|
||||||
|
public SegmentInfos infos;
|
||||||
|
|
||||||
|
FSDirectory fsDir;
|
||||||
|
|
||||||
|
File dir;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param args
|
||||||
|
*/
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
if (args.length < 2) {
|
||||||
|
System.err
|
||||||
|
.println("Usage: IndexSplitter <srcDir> -l (list the segments and their sizes)");
|
||||||
|
System.err.println("IndexSplitter <srcDir> <destDir> <segments>+");
|
||||||
|
System.err
|
||||||
|
.println("IndexSplitter <srcDir> -d (delete the following segments)");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
File srcDir = new File(args[0]);
|
||||||
|
IndexSplitter is = new IndexSplitter(srcDir);
|
||||||
|
if (!srcDir.exists()) {
|
||||||
|
throw new Exception("srcdir:" + srcDir.getAbsolutePath()
|
||||||
|
+ " doesn't exist");
|
||||||
|
}
|
||||||
|
if (args[1].equals("-l")) {
|
||||||
|
is.listSegments();
|
||||||
|
} else if (args[1].equals("-d")) {
|
||||||
|
List<String> segs = new ArrayList<String>();
|
||||||
|
for (int x = 2; x < args.length; x++) {
|
||||||
|
segs.add(args[x]);
|
||||||
|
}
|
||||||
|
is.remove((String[]) segs.toArray(new String[0]));
|
||||||
|
} else {
|
||||||
|
File targetDir = new File(args[1]);
|
||||||
|
List<String> segs = new ArrayList<String>();
|
||||||
|
for (int x = 2; x < args.length; x++) {
|
||||||
|
segs.add(args[x]);
|
||||||
|
}
|
||||||
|
is.split(targetDir, (String[]) segs.toArray(new String[0]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public IndexSplitter(File dir) throws IOException {
|
||||||
|
this.dir = dir;
|
||||||
|
fsDir = FSDirectory.open(dir);
|
||||||
|
infos = new SegmentInfos();
|
||||||
|
infos.read(fsDir);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void listSegments() throws IOException {
|
||||||
|
DecimalFormat formatter = new DecimalFormat("###,###.###");
|
||||||
|
for (int x = 0; x < infos.size(); x++) {
|
||||||
|
SegmentInfo info = infos.info(x);
|
||||||
|
String sizeStr = formatter.format(info.sizeInBytes());
|
||||||
|
System.out.println(info.name + " " + sizeStr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private int getIdx(String name) {
|
||||||
|
for (int x = 0; x < infos.size(); x++) {
|
||||||
|
if (name.equals(infos.info(x).name))
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
private SegmentInfo getInfo(String name) {
|
||||||
|
for (int x = 0; x < infos.size(); x++) {
|
||||||
|
if (name.equals(infos.info(x).name))
|
||||||
|
return infos.info(x);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void remove(String[] segs) throws IOException {
|
||||||
|
for (String n : segs) {
|
||||||
|
int idx = getIdx(n);
|
||||||
|
infos.remove(idx);
|
||||||
|
}
|
||||||
|
infos.commit(fsDir);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void split(File destDir, String[] segs) throws IOException {
|
||||||
|
destDir.mkdirs();
|
||||||
|
FSDirectory destFSDir = FSDirectory.open(destDir);
|
||||||
|
SegmentInfos destInfos = new SegmentInfos();
|
||||||
|
for (String n : segs) {
|
||||||
|
SegmentInfo info = getInfo(n);
|
||||||
|
destInfos.add(info);
|
||||||
|
// now copy files over
|
||||||
|
List files = info.files();
|
||||||
|
for (int x = 0; x < files.size(); x++) {
|
||||||
|
String srcName = (String) files.get(x);
|
||||||
|
File srcFile = new File(dir, srcName);
|
||||||
|
File destFile = new File(destDir, srcName);
|
||||||
|
copyFile(srcFile, destFile);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
destInfos.commit(destFSDir);
|
||||||
|
// System.out.println("destDir:"+destDir.getAbsolutePath());
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void copyFile(File src, File dst) throws IOException {
|
||||||
|
InputStream in = new FileInputStream(src);
|
||||||
|
OutputStream out = new FileOutputStream(dst);
|
||||||
|
byte[] buf = new byte[32*1024];
|
||||||
|
int len;
|
||||||
|
while ((len = in.read(buf)) > 0) {
|
||||||
|
out.write(buf, 0, len);
|
||||||
|
}
|
||||||
|
in.close();
|
||||||
|
out.close();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,77 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
|
||||||
|
import org.apache.lucene.store.FSDirectory;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util._TestUtil;
|
||||||
|
|
||||||
|
public class TestIndexSplitter extends LuceneTestCase {
|
||||||
|
public void test() throws Exception {
|
||||||
|
String tmpDir = System.getProperty("java.io.tmpdir");
|
||||||
|
File dir = new File(tmpDir, "testfilesplitter");
|
||||||
|
_TestUtil.rmDir(dir);
|
||||||
|
dir.mkdirs();
|
||||||
|
File destDir = new File(tmpDir, "testfilesplitterdest");
|
||||||
|
_TestUtil.rmDir(destDir);
|
||||||
|
destDir.mkdirs();
|
||||||
|
FSDirectory fsDir = FSDirectory.open(dir);
|
||||||
|
IndexWriter iw = new IndexWriter(fsDir, new WhitespaceAnalyzer(), true, MaxFieldLength.UNLIMITED);
|
||||||
|
for (int x=0; x < 100; x++) {
|
||||||
|
Document doc = TestIndexWriterReader.createDocument(x, "index", 5);
|
||||||
|
iw.addDocument(doc);
|
||||||
|
}
|
||||||
|
iw.commit();
|
||||||
|
for (int x=100; x < 150; x++) {
|
||||||
|
Document doc = TestIndexWriterReader.createDocument(x, "index2", 5);
|
||||||
|
iw.addDocument(doc);
|
||||||
|
}
|
||||||
|
iw.commit();
|
||||||
|
for (int x=150; x < 200; x++) {
|
||||||
|
Document doc = TestIndexWriterReader.createDocument(x, "index3", 5);
|
||||||
|
iw.addDocument(doc);
|
||||||
|
}
|
||||||
|
iw.commit();
|
||||||
|
assertEquals(3, iw.getReader().getSequentialSubReaders().length);
|
||||||
|
iw.close();
|
||||||
|
// we should have 2 segments now
|
||||||
|
IndexSplitter is = new IndexSplitter(dir);
|
||||||
|
String splitSegName = is.infos.info(1).name;
|
||||||
|
is.split(destDir, new String[] {splitSegName});
|
||||||
|
IndexReader r = IndexReader.open(FSDirectory.open(destDir), true);
|
||||||
|
assertEquals(50, r.maxDoc());
|
||||||
|
|
||||||
|
// now test cmdline
|
||||||
|
File destDir2 = new File(tmpDir, "testfilesplitterdest2");
|
||||||
|
_TestUtil.rmDir(destDir2);
|
||||||
|
destDir2.mkdirs();
|
||||||
|
IndexSplitter.main(new String[] {dir.getAbsolutePath(), destDir2.getAbsolutePath(), splitSegName});
|
||||||
|
assertEquals(3, destDir2.listFiles().length);
|
||||||
|
r = IndexReader.open(FSDirectory.open(destDir2), true);
|
||||||
|
assertEquals(50, r.maxDoc());
|
||||||
|
|
||||||
|
// now remove the copied segment from src
|
||||||
|
IndexSplitter.main(new String[] {dir.getAbsolutePath(), "-d", splitSegName});
|
||||||
|
r = IndexReader.open(FSDirectory.open(dir), true);
|
||||||
|
assertEquals(2, r.getSequentialSubReaders().length);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue