LUCENE-3296: Enable passing a IndexWriterConfig into PKIndexSplitter

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1145479 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2011-07-12 07:58:44 +00:00
parent f34671ca46
commit 98df516384
4 changed files with 37 additions and 16 deletions

View File

@ -57,7 +57,7 @@ public class MultiPassIndexSplitter {
* assigned in a deterministic round-robin fashion to one of the output splits. * assigned in a deterministic round-robin fashion to one of the output splits.
* @throws IOException * @throws IOException
*/ */
public void split(IndexReader input, Directory[] outputs, boolean seq) throws IOException { public void split(Version version, IndexReader input, Directory[] outputs, boolean seq) throws IOException {
if (outputs == null || outputs.length < 2) { if (outputs == null || outputs.length < 2) {
throw new IOException("Invalid number of outputs."); throw new IOException("Invalid number of outputs.");
} }
@ -96,7 +96,7 @@ public class MultiPassIndexSplitter {
} }
} }
IndexWriter w = new IndexWriter(outputs[i], new IndexWriterConfig( IndexWriter w = new IndexWriter(outputs[i], new IndexWriterConfig(
Version.LUCENE_CURRENT, version,
null) null)
.setOpenMode(OpenMode.CREATE)); .setOpenMode(OpenMode.CREATE));
System.err.println("Writing part " + (i + 1) + " ..."); System.err.println("Writing part " + (i + 1) + " ...");
@ -106,6 +106,7 @@ public class MultiPassIndexSplitter {
System.err.println("Done."); System.err.println("Done.");
} }
@SuppressWarnings("deprecation")
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
if (args.length < 5) { if (args.length < 5) {
System.err.println("Usage: MultiPassIndexSplitter -out <outputDir> -num <numParts> [-seq] <inputIndex1> [<inputIndex2 ...]"); System.err.println("Usage: MultiPassIndexSplitter -out <outputDir> -num <numParts> [-seq] <inputIndex1> [<inputIndex2 ...]");
@ -169,7 +170,7 @@ public class MultiPassIndexSplitter {
} else { } else {
input = new MultiReader(indexes.toArray(new IndexReader[indexes.size()])); input = new MultiReader(indexes.toArray(new IndexReader[indexes.size()]));
} }
splitter.split(input, dirs, seq); splitter.split(Version.LUCENE_CURRENT, input, dirs, seq);
} }
/** /**

View File

@ -19,7 +19,6 @@ package org.apache.lucene.index;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
@ -39,17 +38,30 @@ public class PKIndexSplitter {
private final Filter docsInFirstIndex; private final Filter docsInFirstIndex;
private final Directory input; private final Directory input;
private final Directory dir1; private final Directory dir1;
private final Directory dir2; private final Directory dir2;
private final IndexWriterConfig config1;
private final IndexWriterConfig config2;
/** /**
* Split an index based on a {@link Filter}. All documents that match the filter * Split an index based on a {@link Filter}. All documents that match the filter
* are sent to dir1, remaining ones to dir2. * are sent to dir1, remaining ones to dir2.
*/ */
public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Filter docsInFirstIndex) { public PKIndexSplitter(Version version, Directory input, Directory dir1, Directory dir2, Filter docsInFirstIndex) {
this(input, dir1, dir2, docsInFirstIndex, newDefaultConfig(version), newDefaultConfig(version));
}
private static IndexWriterConfig newDefaultConfig(Version version) {
return new IndexWriterConfig(version, null).setOpenMode(OpenMode.CREATE);
}
public PKIndexSplitter(Directory input, Directory dir1,
Directory dir2, Filter docsInFirstIndex, IndexWriterConfig config1, IndexWriterConfig config2) {
this.input = input; this.input = input;
this.dir1 = dir1; this.dir1 = dir1;
this.dir2 = dir2; this.dir2 = dir2;
this.docsInFirstIndex = docsInFirstIndex; this.docsInFirstIndex = docsInFirstIndex;
this.config1 = config1;
this.config2 = config2;
} }
/** /**
@ -57,27 +69,33 @@ public class PKIndexSplitter {
* and a 'middle' term. If the middle term is present, it's * and a 'middle' term. If the middle term is present, it's
* sent to dir2. * sent to dir2.
*/ */
public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Term midTerm) { public PKIndexSplitter(Version version, Directory input, Directory dir1, Directory dir2, Term midTerm) {
this(input, dir1, dir2, this(version, input, dir1, dir2,
new TermRangeFilter(midTerm.field(), null, midTerm.bytes(), true, false)); new TermRangeFilter(midTerm.field(), null, midTerm.bytes(), true, false));
} }
public PKIndexSplitter(Directory input, Directory dir1,
Directory dir2, Term midTerm, IndexWriterConfig config1, IndexWriterConfig config2) {
this(input, dir1, dir2,
new TermRangeFilter(midTerm.field(), null, midTerm.bytes(), true, false), config1, config2);
}
public void split() throws IOException { public void split() throws IOException {
boolean success = false; boolean success = false;
IndexReader reader = IndexReader.open(input); IndexReader reader = IndexReader.open(input);
try { try {
createIndex(dir1, reader, docsInFirstIndex, false); // pass an individual config in here since one config can not be reused!
createIndex(dir2, reader, docsInFirstIndex, true); createIndex(config1, dir1, reader, docsInFirstIndex, false);
createIndex(config2, dir2, reader, docsInFirstIndex, true);
success = true; success = true;
} finally { } finally {
IOUtils.closeSafely(!success, reader); IOUtils.closeSafely(!success, reader);
} }
} }
private void createIndex(Directory target, IndexReader reader, Filter preserveFilter, boolean negateFilter) throws IOException { private void createIndex(IndexWriterConfig config, Directory target, IndexReader reader, Filter preserveFilter, boolean negateFilter) throws IOException {
boolean success = false; boolean success = false;
IndexWriter w = new IndexWriter(target, new IndexWriterConfig( IndexWriter w = new IndexWriter(target, config);
Version.LUCENE_CURRENT, null).setOpenMode(OpenMode.CREATE));
try { try {
w.addIndexes(new DocumentFilteredIndexReader(reader, preserveFilter, negateFilter)); w.addIndexes(new DocumentFilteredIndexReader(reader, preserveFilter, negateFilter));
success = true; success = true;

View File

@ -66,7 +66,7 @@ public class TestMultiPassIndexSplitter extends LuceneTestCase {
newDirectory(), newDirectory(),
newDirectory() newDirectory()
}; };
splitter.split(input, dirs, false); splitter.split(TEST_VERSION_CURRENT, input, dirs, false);
IndexReader ir; IndexReader ir;
ir = IndexReader.open(dirs[0], true); ir = IndexReader.open(dirs[0], true);
assertTrue(ir.numDocs() - NUM_DOCS / 3 <= 1); // rounding error assertTrue(ir.numDocs() - NUM_DOCS / 3 <= 1); // rounding error
@ -111,7 +111,7 @@ public class TestMultiPassIndexSplitter extends LuceneTestCase {
newDirectory(), newDirectory(),
newDirectory() newDirectory()
}; };
splitter.split(input, dirs, true); splitter.split(TEST_VERSION_CURRENT, input, dirs, true);
IndexReader ir; IndexReader ir;
ir = IndexReader.open(dirs[0], true); ir = IndexReader.open(dirs[0], true);
assertTrue(ir.numDocs() - NUM_DOCS / 3 <= 1); assertTrue(ir.numDocs() - NUM_DOCS / 3 <= 1);

View File

@ -68,7 +68,9 @@ public class TestPKIndexSplitter extends LuceneTestCase {
private void checkSplitting(Directory dir, Term splitTerm, int leftCount, int rightCount) throws Exception { private void checkSplitting(Directory dir, Term splitTerm, int leftCount, int rightCount) throws Exception {
Directory dir1 = newDirectory(); Directory dir1 = newDirectory();
Directory dir2 = newDirectory(); Directory dir2 = newDirectory();
PKIndexSplitter splitter = new PKIndexSplitter(dir, dir1, dir2, splitTerm); PKIndexSplitter splitter = new PKIndexSplitter(dir, dir1, dir2, splitTerm,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)),
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
splitter.split(); splitter.split();
IndexReader ir1 = IndexReader.open(dir1); IndexReader ir1 = IndexReader.open(dir1);