LUCENE-3296: Enable passing a IndexWriterConfig into PKIndexSplitter

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1145479 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2011-07-12 07:58:44 +00:00
parent f34671ca46
commit 98df516384
4 changed files with 37 additions and 16 deletions
lucene/contrib/misc/src

View File

@ -57,7 +57,7 @@ public class MultiPassIndexSplitter {
* assigned in a deterministic round-robin fashion to one of the output splits.
* @throws IOException
*/
public void split(IndexReader input, Directory[] outputs, boolean seq) throws IOException {
public void split(Version version, IndexReader input, Directory[] outputs, boolean seq) throws IOException {
if (outputs == null || outputs.length < 2) {
throw new IOException("Invalid number of outputs.");
}
@ -96,7 +96,7 @@ public class MultiPassIndexSplitter {
}
}
IndexWriter w = new IndexWriter(outputs[i], new IndexWriterConfig(
Version.LUCENE_CURRENT,
version,
null)
.setOpenMode(OpenMode.CREATE));
System.err.println("Writing part " + (i + 1) + " ...");
@ -106,6 +106,7 @@ public class MultiPassIndexSplitter {
System.err.println("Done.");
}
@SuppressWarnings("deprecation")
public static void main(String[] args) throws Exception {
if (args.length < 5) {
System.err.println("Usage: MultiPassIndexSplitter -out <outputDir> -num <numParts> [-seq] <inputIndex1> [<inputIndex2 ...]");
@ -169,7 +170,7 @@ public class MultiPassIndexSplitter {
} else {
input = new MultiReader(indexes.toArray(new IndexReader[indexes.size()]));
}
splitter.split(input, dirs, seq);
splitter.split(Version.LUCENE_CURRENT, input, dirs, seq);
}
/**

View File

@ -19,7 +19,6 @@ package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
@ -39,17 +38,30 @@ public class PKIndexSplitter {
private final Filter docsInFirstIndex;
private final Directory input;
private final Directory dir1;
private final Directory dir2;
private final Directory dir2;
private final IndexWriterConfig config1;
private final IndexWriterConfig config2;
/**
* Split an index based on a {@link Filter}. All documents that match the filter
* are sent to dir1, remaining ones to dir2.
*/
public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Filter docsInFirstIndex) {
public PKIndexSplitter(Version version, Directory input, Directory dir1, Directory dir2, Filter docsInFirstIndex) {
this(input, dir1, dir2, docsInFirstIndex, newDefaultConfig(version), newDefaultConfig(version));
}
private static IndexWriterConfig newDefaultConfig(Version version) {
return new IndexWriterConfig(version, null).setOpenMode(OpenMode.CREATE);
}
public PKIndexSplitter(Directory input, Directory dir1,
Directory dir2, Filter docsInFirstIndex, IndexWriterConfig config1, IndexWriterConfig config2) {
this.input = input;
this.dir1 = dir1;
this.dir2 = dir2;
this.docsInFirstIndex = docsInFirstIndex;
this.config1 = config1;
this.config2 = config2;
}
/**
@ -57,27 +69,33 @@ public class PKIndexSplitter {
* and a 'middle' term. If the middle term is present, it's
* sent to dir2.
*/
public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Term midTerm) {
this(input, dir1, dir2,
public PKIndexSplitter(Version version, Directory input, Directory dir1, Directory dir2, Term midTerm) {
this(version, input, dir1, dir2,
new TermRangeFilter(midTerm.field(), null, midTerm.bytes(), true, false));
}
public PKIndexSplitter(Directory input, Directory dir1,
Directory dir2, Term midTerm, IndexWriterConfig config1, IndexWriterConfig config2) {
this(input, dir1, dir2,
new TermRangeFilter(midTerm.field(), null, midTerm.bytes(), true, false), config1, config2);
}
public void split() throws IOException {
boolean success = false;
IndexReader reader = IndexReader.open(input);
try {
createIndex(dir1, reader, docsInFirstIndex, false);
createIndex(dir2, reader, docsInFirstIndex, true);
// pass an individual config in here since one config can not be reused!
createIndex(config1, dir1, reader, docsInFirstIndex, false);
createIndex(config2, dir2, reader, docsInFirstIndex, true);
success = true;
} finally {
IOUtils.closeSafely(!success, reader);
}
}
private void createIndex(Directory target, IndexReader reader, Filter preserveFilter, boolean negateFilter) throws IOException {
private void createIndex(IndexWriterConfig config, Directory target, IndexReader reader, Filter preserveFilter, boolean negateFilter) throws IOException {
boolean success = false;
IndexWriter w = new IndexWriter(target, new IndexWriterConfig(
Version.LUCENE_CURRENT, null).setOpenMode(OpenMode.CREATE));
IndexWriter w = new IndexWriter(target, config);
try {
w.addIndexes(new DocumentFilteredIndexReader(reader, preserveFilter, negateFilter));
success = true;

View File

@ -66,7 +66,7 @@ public class TestMultiPassIndexSplitter extends LuceneTestCase {
newDirectory(),
newDirectory()
};
splitter.split(input, dirs, false);
splitter.split(TEST_VERSION_CURRENT, input, dirs, false);
IndexReader ir;
ir = IndexReader.open(dirs[0], true);
assertTrue(ir.numDocs() - NUM_DOCS / 3 <= 1); // rounding error
@ -111,7 +111,7 @@ public class TestMultiPassIndexSplitter extends LuceneTestCase {
newDirectory(),
newDirectory()
};
splitter.split(input, dirs, true);
splitter.split(TEST_VERSION_CURRENT, input, dirs, true);
IndexReader ir;
ir = IndexReader.open(dirs[0], true);
assertTrue(ir.numDocs() - NUM_DOCS / 3 <= 1);

View File

@ -68,7 +68,9 @@ public class TestPKIndexSplitter extends LuceneTestCase {
private void checkSplitting(Directory dir, Term splitTerm, int leftCount, int rightCount) throws Exception {
Directory dir1 = newDirectory();
Directory dir2 = newDirectory();
PKIndexSplitter splitter = new PKIndexSplitter(dir, dir1, dir2, splitTerm);
PKIndexSplitter splitter = new PKIndexSplitter(dir, dir1, dir2, splitTerm,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)),
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
splitter.split();
IndexReader ir1 = IndexReader.open(dir1);