LUCENE-2554: make PreFlexRW codec use finer-grained impersonation, so we can test dancing NRT/deletions too

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/preflexfixes@978872 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2010-07-24 13:40:22 +00:00
parent 483266f22b
commit 94dbf446ed
7 changed files with 50 additions and 46 deletions

View File

@ -62,7 +62,7 @@ public class PreFlexCodec extends Codec {
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
return new PreFlexFields(state.dir, state.fieldInfos, state.segmentInfo, state.readBufferSize, state.termsIndexDivisor, true);
return new PreFlexFields(state.dir, state.fieldInfos, state.segmentInfo, state.readBufferSize, state.termsIndexDivisor);
}
@Override

View File

@ -58,19 +58,11 @@ public class PreFlexFields extends FieldsProducer {
private final Directory dir;
private final int readBufferSize;
private Directory cfsReader;
private final boolean unicodeSortOrder;
// If unicodeSortOrder is true, we do the surrogates dance
// so that the terms are sorted by unicode sort order.
// This should be true when segments are used for "normal"
// searching; it's only false during testing, to create a
// pre-flex index, using the preflexrw codec under
// src/test.
public PreFlexFields(Directory dir, FieldInfos fieldInfos, SegmentInfo info, int readBufferSize, int indexDivisor, boolean unicodeSortOrder)
public PreFlexFields(Directory dir, FieldInfos fieldInfos, SegmentInfo info, int readBufferSize, int indexDivisor)
throws IOException {
si = info;
this.unicodeSortOrder = unicodeSortOrder;
// NOTE: we must always load terms index, even for
// "sequential" scan during merging, because what is
@ -114,6 +106,15 @@ public class PreFlexFields extends FieldsProducer {
this.dir = dir;
}
// If this returns, we do the surrogates dance so that the
// terms are sorted by unicode sort order. This should be
// true when segments are used for "normal" searching;
// it's only false during testing, to create a pre-flex
// index, using the test-only PreFlexRW.
protected boolean sortTermsByUnicode() {
return true;
}
static void files(Directory dir, SegmentInfo info, Collection<String> files) throws IOException {
files.add(IndexFileNames.segmentFileName(info.name, "", PreFlexCodec.TERMS_EXTENSION));
files.add(IndexFileNames.segmentFileName(info.name, "", PreFlexCodec.TERMS_INDEX_EXTENSION));
@ -241,7 +242,7 @@ public class PreFlexFields extends FieldsProducer {
public Comparator<BytesRef> getComparator() {
// Pre-flex indexes always sorted in UTF16 order, but
// we remap on-the-fly to unicode order
if (unicodeSortOrder) {
if (sortTermsByUnicode()) {
return BytesRef.getUTF8SortedAsUnicodeComparator();
} else {
return BytesRef.getUTF8SortedAsUTF16Comparator();
@ -692,6 +693,8 @@ public class PreFlexFields extends FieldsProducer {
}
}
private boolean unicodeSortOrder;
void reset(FieldInfo fieldInfo) throws IOException {
//System.out.println("pff.reset te=" + termEnum);
this.fieldInfo = fieldInfo;
@ -705,6 +708,8 @@ public class PreFlexFields extends FieldsProducer {
}
skipNext = true;
unicodeSortOrder = sortTermsByUnicode();
final Term t = termEnum.term();
if (t != null && t.field() == fieldInfo.name) {
newSuffixStart = 0;

View File

@ -25,7 +25,6 @@ import java.util.List;
import java.util.Map;
import java.util.Random;
import org.apache.lucene.util.*;
import org.apache.lucene.index.codecs.preflexrw.PreFlexRWCodec;
import junit.framework.Assert;
@ -263,10 +262,7 @@ public class TestStressIndexing2 extends MultiCodecTestCase {
}
public static void verifyEquals(Random r, IndexReader r1, Directory dir2, String idField) throws Throwable {
// When we're testing w/ PreFlex codec, we must open
// this reader with UTF16 terms since incoming NRT
// reader is sorted this way:
IndexReader r2 = IndexReader.open(dir2, null, true, _TestUtil.nextInt(r, 1, 3), _TestUtil.alwaysCodec(new PreFlexRWCodec("utf16")));
IndexReader r2 = IndexReader.open(dir2);
verifyEquals(r1, r2, idField);
r2.close();
}

View File

@ -63,10 +63,14 @@ public class TestSurrogates extends LuceneTestCaseJ4 {
private String getRandomString(Random r) {
String s;
if (r.nextInt(3) == 1) {
s = makeDifficultRandomUnicodeString(r);
if (r.nextInt(5) == 1) {
if (r.nextInt(3) == 1) {
s = makeDifficultRandomUnicodeString(r);
} else {
s = _TestUtil.randomUnicodeString(r);
}
} else {
s = _TestUtil.randomUnicodeString(r);
s = _TestUtil.randomRealisticUnicodeString(r);
}
return s;
}
@ -272,7 +276,7 @@ public class TestSurrogates extends LuceneTestCaseJ4 {
RandomIndexWriter w = new RandomIndexWriter(r,
dir,
newIndexWriterConfig(r, TEST_VERSION_CURRENT,
new MockAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec(new PreFlexRWCodec(null))));
new MockAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec(new PreFlexRWCodec())));
final int numField = _TestUtil.nextInt(r, 2, 5);

View File

@ -25,6 +25,7 @@ import org.apache.lucene.index.codecs.preflex.PreFlexCodec;
import org.apache.lucene.index.codecs.preflex.PreFlexFields;
import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.codecs.FieldsProducer;
import org.apache.lucene.util.LuceneTestCaseJ4;
/** Codec, only for testing, that can write and read the
* pre-flex index format.
@ -33,20 +34,14 @@ import org.apache.lucene.index.codecs.FieldsProducer;
*/
public class PreFlexRWCodec extends PreFlexCodec {
private final String termSortOrder;
// termSortOrder should be null (dynamically deteremined
// by stack), "codepoint" or "utf16"
public PreFlexRWCodec(String termSortOrder) {
public PreFlexRWCodec() {
// NOTE: we impersonate the PreFlex codec so that it can
// read the segments we write!
super();
this.termSortOrder = termSortOrder;
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
System.out.println("PFW");
return new PreFlexFieldsWriter(state);
}
@ -56,23 +51,27 @@ public class PreFlexRWCodec extends PreFlexCodec {
// Whenever IW opens readers, eg for merging, we have to
// keep terms order in UTF16:
boolean unicodeSortOrder;
if (termSortOrder == null) {
unicodeSortOrder = true;
return new PreFlexFields(state.dir, state.fieldInfos, state.segmentInfo, state.readBufferSize, state.termsIndexDivisor) {
@Override
protected boolean sortTermsByUnicode() {
// We carefully peek into stack track above us: if
// we are part of a "merge", we must sort by UTF16:
boolean unicodeSortOrder = true;
StackTraceElement[] trace = new Exception().getStackTrace();
for (int i = 0; i < trace.length; i++) {
//System.out.println(trace[i].getClassName());
if ("org.apache.lucene.index.IndexWriter".equals(trace[i].getClassName())) {
unicodeSortOrder = false;
break;
StackTraceElement[] trace = new Exception().getStackTrace();
for (int i = 0; i < trace.length; i++) {
//System.out.println(trace[i].getClassName());
if ("merge".equals(trace[i].getMethodName())) {
unicodeSortOrder = false;
if (LuceneTestCaseJ4.VERBOSE) {
System.out.println("NOTE: PreFlexRW codec: forcing legacy UTF16 term sort order");
}
break;
}
}
}
//System.out.println("PRW: " + unicodeSortOrder);
} else {
unicodeSortOrder = termSortOrder.equals("codepoint");
}
return new PreFlexFields(state.dir, state.fieldInfos, state.segmentInfo, state.readBufferSize, state.termsIndexDivisor, unicodeSortOrder);
return unicodeSortOrder;
}
};
}
}

View File

@ -128,7 +128,7 @@ public abstract class LuceneTestCase extends TestCase {
// test-only PreFlexRW codec (since core PreFlex can
// only read segments):
if (codec.equals("PreFlex")) {
CodecProvider.getDefault().register(new PreFlexRWCodec(null));
CodecProvider.getDefault().register(new PreFlexRWCodec());
}
CodecProvider.setDefaultCodec(codec);
}
@ -158,7 +158,7 @@ public abstract class LuceneTestCase extends TestCase {
BooleanQuery.setMaxClauseCount(savedBoolMaxClauseCount);
// Restore read-only PreFlex codec:
if (codec.equals("PreFlex")) {
CodecProvider.getDefault().unregister(new PreFlexRWCodec(null));
CodecProvider.getDefault().unregister(new PreFlexRWCodec());
CodecProvider.getDefault().register(new PreFlexCodec());
}
CodecProvider.setDefaultCodec(savedDefaultCodec);

View File

@ -152,7 +152,7 @@ public class LuceneTestCaseJ4 {
// test-only PreFlexRW codec (since core PreFlex can
// only read segments):
if (codec.equals("PreFlex")) {
CodecProvider.getDefault().register(new PreFlexRWCodec(null));
CodecProvider.getDefault().register(new PreFlexRWCodec());
}
CodecProvider.setDefaultCodec(codec);
}
@ -161,7 +161,7 @@ public class LuceneTestCaseJ4 {
public static void afterClassLuceneTestCaseJ4() {
// Restore read-only PreFlex codec:
if (codec.equals("PreFlex")) {
CodecProvider.getDefault().unregister(new PreFlexRWCodec(null));
CodecProvider.getDefault().unregister(new PreFlexRWCodec());
CodecProvider.getDefault().register(new PreFlexCodec());
}
CodecProvider.setDefaultCodec(savedDefaultCodec);