mirror of https://github.com/apache/lucene.git
LUCENE-3296: Enable passing a IndexWriterConfig into PKIndexSplitter
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1145479 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f34671ca46
commit
98df516384
|
@ -57,7 +57,7 @@ public class MultiPassIndexSplitter {
|
||||||
* assigned in a deterministic round-robin fashion to one of the output splits.
|
* assigned in a deterministic round-robin fashion to one of the output splits.
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
public void split(IndexReader input, Directory[] outputs, boolean seq) throws IOException {
|
public void split(Version version, IndexReader input, Directory[] outputs, boolean seq) throws IOException {
|
||||||
if (outputs == null || outputs.length < 2) {
|
if (outputs == null || outputs.length < 2) {
|
||||||
throw new IOException("Invalid number of outputs.");
|
throw new IOException("Invalid number of outputs.");
|
||||||
}
|
}
|
||||||
|
@ -96,7 +96,7 @@ public class MultiPassIndexSplitter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
IndexWriter w = new IndexWriter(outputs[i], new IndexWriterConfig(
|
IndexWriter w = new IndexWriter(outputs[i], new IndexWriterConfig(
|
||||||
Version.LUCENE_CURRENT,
|
version,
|
||||||
null)
|
null)
|
||||||
.setOpenMode(OpenMode.CREATE));
|
.setOpenMode(OpenMode.CREATE));
|
||||||
System.err.println("Writing part " + (i + 1) + " ...");
|
System.err.println("Writing part " + (i + 1) + " ...");
|
||||||
|
@ -106,6 +106,7 @@ public class MultiPassIndexSplitter {
|
||||||
System.err.println("Done.");
|
System.err.println("Done.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("deprecation")
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
if (args.length < 5) {
|
if (args.length < 5) {
|
||||||
System.err.println("Usage: MultiPassIndexSplitter -out <outputDir> -num <numParts> [-seq] <inputIndex1> [<inputIndex2 ...]");
|
System.err.println("Usage: MultiPassIndexSplitter -out <outputDir> -num <numParts> [-seq] <inputIndex1> [<inputIndex2 ...]");
|
||||||
|
@ -169,7 +170,7 @@ public class MultiPassIndexSplitter {
|
||||||
} else {
|
} else {
|
||||||
input = new MultiReader(indexes.toArray(new IndexReader[indexes.size()]));
|
input = new MultiReader(indexes.toArray(new IndexReader[indexes.size()]));
|
||||||
}
|
}
|
||||||
splitter.split(input, dirs, seq);
|
splitter.split(Version.LUCENE_CURRENT, input, dirs, seq);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.index;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
|
||||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||||
import org.apache.lucene.search.DocIdSet;
|
import org.apache.lucene.search.DocIdSet;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
@ -39,17 +38,30 @@ public class PKIndexSplitter {
|
||||||
private final Filter docsInFirstIndex;
|
private final Filter docsInFirstIndex;
|
||||||
private final Directory input;
|
private final Directory input;
|
||||||
private final Directory dir1;
|
private final Directory dir1;
|
||||||
private final Directory dir2;
|
private final Directory dir2;
|
||||||
|
private final IndexWriterConfig config1;
|
||||||
|
private final IndexWriterConfig config2;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Split an index based on a {@link Filter}. All documents that match the filter
|
* Split an index based on a {@link Filter}. All documents that match the filter
|
||||||
* are sent to dir1, remaining ones to dir2.
|
* are sent to dir1, remaining ones to dir2.
|
||||||
*/
|
*/
|
||||||
public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Filter docsInFirstIndex) {
|
public PKIndexSplitter(Version version, Directory input, Directory dir1, Directory dir2, Filter docsInFirstIndex) {
|
||||||
|
this(input, dir1, dir2, docsInFirstIndex, newDefaultConfig(version), newDefaultConfig(version));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static IndexWriterConfig newDefaultConfig(Version version) {
|
||||||
|
return new IndexWriterConfig(version, null).setOpenMode(OpenMode.CREATE);
|
||||||
|
}
|
||||||
|
|
||||||
|
public PKIndexSplitter(Directory input, Directory dir1,
|
||||||
|
Directory dir2, Filter docsInFirstIndex, IndexWriterConfig config1, IndexWriterConfig config2) {
|
||||||
this.input = input;
|
this.input = input;
|
||||||
this.dir1 = dir1;
|
this.dir1 = dir1;
|
||||||
this.dir2 = dir2;
|
this.dir2 = dir2;
|
||||||
this.docsInFirstIndex = docsInFirstIndex;
|
this.docsInFirstIndex = docsInFirstIndex;
|
||||||
|
this.config1 = config1;
|
||||||
|
this.config2 = config2;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -57,27 +69,33 @@ public class PKIndexSplitter {
|
||||||
* and a 'middle' term. If the middle term is present, it's
|
* and a 'middle' term. If the middle term is present, it's
|
||||||
* sent to dir2.
|
* sent to dir2.
|
||||||
*/
|
*/
|
||||||
public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Term midTerm) {
|
public PKIndexSplitter(Version version, Directory input, Directory dir1, Directory dir2, Term midTerm) {
|
||||||
this(input, dir1, dir2,
|
this(version, input, dir1, dir2,
|
||||||
new TermRangeFilter(midTerm.field(), null, midTerm.bytes(), true, false));
|
new TermRangeFilter(midTerm.field(), null, midTerm.bytes(), true, false));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public PKIndexSplitter(Directory input, Directory dir1,
|
||||||
|
Directory dir2, Term midTerm, IndexWriterConfig config1, IndexWriterConfig config2) {
|
||||||
|
this(input, dir1, dir2,
|
||||||
|
new TermRangeFilter(midTerm.field(), null, midTerm.bytes(), true, false), config1, config2);
|
||||||
|
}
|
||||||
|
|
||||||
public void split() throws IOException {
|
public void split() throws IOException {
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
IndexReader reader = IndexReader.open(input);
|
IndexReader reader = IndexReader.open(input);
|
||||||
try {
|
try {
|
||||||
createIndex(dir1, reader, docsInFirstIndex, false);
|
// pass an individual config in here since one config can not be reused!
|
||||||
createIndex(dir2, reader, docsInFirstIndex, true);
|
createIndex(config1, dir1, reader, docsInFirstIndex, false);
|
||||||
|
createIndex(config2, dir2, reader, docsInFirstIndex, true);
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
IOUtils.closeSafely(!success, reader);
|
IOUtils.closeSafely(!success, reader);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void createIndex(Directory target, IndexReader reader, Filter preserveFilter, boolean negateFilter) throws IOException {
|
private void createIndex(IndexWriterConfig config, Directory target, IndexReader reader, Filter preserveFilter, boolean negateFilter) throws IOException {
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
IndexWriter w = new IndexWriter(target, new IndexWriterConfig(
|
IndexWriter w = new IndexWriter(target, config);
|
||||||
Version.LUCENE_CURRENT, null).setOpenMode(OpenMode.CREATE));
|
|
||||||
try {
|
try {
|
||||||
w.addIndexes(new DocumentFilteredIndexReader(reader, preserveFilter, negateFilter));
|
w.addIndexes(new DocumentFilteredIndexReader(reader, preserveFilter, negateFilter));
|
||||||
success = true;
|
success = true;
|
||||||
|
|
|
@ -66,7 +66,7 @@ public class TestMultiPassIndexSplitter extends LuceneTestCase {
|
||||||
newDirectory(),
|
newDirectory(),
|
||||||
newDirectory()
|
newDirectory()
|
||||||
};
|
};
|
||||||
splitter.split(input, dirs, false);
|
splitter.split(TEST_VERSION_CURRENT, input, dirs, false);
|
||||||
IndexReader ir;
|
IndexReader ir;
|
||||||
ir = IndexReader.open(dirs[0], true);
|
ir = IndexReader.open(dirs[0], true);
|
||||||
assertTrue(ir.numDocs() - NUM_DOCS / 3 <= 1); // rounding error
|
assertTrue(ir.numDocs() - NUM_DOCS / 3 <= 1); // rounding error
|
||||||
|
@ -111,7 +111,7 @@ public class TestMultiPassIndexSplitter extends LuceneTestCase {
|
||||||
newDirectory(),
|
newDirectory(),
|
||||||
newDirectory()
|
newDirectory()
|
||||||
};
|
};
|
||||||
splitter.split(input, dirs, true);
|
splitter.split(TEST_VERSION_CURRENT, input, dirs, true);
|
||||||
IndexReader ir;
|
IndexReader ir;
|
||||||
ir = IndexReader.open(dirs[0], true);
|
ir = IndexReader.open(dirs[0], true);
|
||||||
assertTrue(ir.numDocs() - NUM_DOCS / 3 <= 1);
|
assertTrue(ir.numDocs() - NUM_DOCS / 3 <= 1);
|
||||||
|
|
|
@ -68,7 +68,9 @@ public class TestPKIndexSplitter extends LuceneTestCase {
|
||||||
private void checkSplitting(Directory dir, Term splitTerm, int leftCount, int rightCount) throws Exception {
|
private void checkSplitting(Directory dir, Term splitTerm, int leftCount, int rightCount) throws Exception {
|
||||||
Directory dir1 = newDirectory();
|
Directory dir1 = newDirectory();
|
||||||
Directory dir2 = newDirectory();
|
Directory dir2 = newDirectory();
|
||||||
PKIndexSplitter splitter = new PKIndexSplitter(dir, dir1, dir2, splitTerm);
|
PKIndexSplitter splitter = new PKIndexSplitter(dir, dir1, dir2, splitTerm,
|
||||||
|
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)),
|
||||||
|
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
|
||||||
splitter.split();
|
splitter.split();
|
||||||
|
|
||||||
IndexReader ir1 = IndexReader.open(dir1);
|
IndexReader ir1 = IndexReader.open(dir1);
|
||||||
|
|
Loading…
Reference in New Issue