LUCENE-2554: make PreFlexRW codec use finer-grained impersonation, so we can test dancing NRT/deletions too

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/preflexfixes@978872 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2010-07-24 13:40:22 +00:00
parent 483266f22b
commit 94dbf446ed
7 changed files with 50 additions and 46 deletions

View File

@ -62,7 +62,7 @@ public class PreFlexCodec extends Codec {
@Override @Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
return new PreFlexFields(state.dir, state.fieldInfos, state.segmentInfo, state.readBufferSize, state.termsIndexDivisor, true); return new PreFlexFields(state.dir, state.fieldInfos, state.segmentInfo, state.readBufferSize, state.termsIndexDivisor);
} }
@Override @Override

View File

@ -58,19 +58,11 @@ public class PreFlexFields extends FieldsProducer {
private final Directory dir; private final Directory dir;
private final int readBufferSize; private final int readBufferSize;
private Directory cfsReader; private Directory cfsReader;
private final boolean unicodeSortOrder;
// If unicodeSortOrder is true, we do the surrogates dance public PreFlexFields(Directory dir, FieldInfos fieldInfos, SegmentInfo info, int readBufferSize, int indexDivisor)
// so that the terms are sorted by unicode sort order.
// This should be true when segments are used for "normal"
// searching; it's only false during testing, to create a
// pre-flex index, using the preflexrw codec under
// src/test.
public PreFlexFields(Directory dir, FieldInfos fieldInfos, SegmentInfo info, int readBufferSize, int indexDivisor, boolean unicodeSortOrder)
throws IOException { throws IOException {
si = info; si = info;
this.unicodeSortOrder = unicodeSortOrder;
// NOTE: we must always load terms index, even for // NOTE: we must always load terms index, even for
// "sequential" scan during merging, because what is // "sequential" scan during merging, because what is
@ -114,6 +106,15 @@ public class PreFlexFields extends FieldsProducer {
this.dir = dir; this.dir = dir;
} }
// If this returns, we do the surrogates dance so that the
// terms are sorted by unicode sort order. This should be
// true when segments are used for "normal" searching;
// it's only false during testing, to create a pre-flex
// index, using the test-only PreFlexRW.
protected boolean sortTermsByUnicode() {
return true;
}
static void files(Directory dir, SegmentInfo info, Collection<String> files) throws IOException { static void files(Directory dir, SegmentInfo info, Collection<String> files) throws IOException {
files.add(IndexFileNames.segmentFileName(info.name, "", PreFlexCodec.TERMS_EXTENSION)); files.add(IndexFileNames.segmentFileName(info.name, "", PreFlexCodec.TERMS_EXTENSION));
files.add(IndexFileNames.segmentFileName(info.name, "", PreFlexCodec.TERMS_INDEX_EXTENSION)); files.add(IndexFileNames.segmentFileName(info.name, "", PreFlexCodec.TERMS_INDEX_EXTENSION));
@ -241,7 +242,7 @@ public class PreFlexFields extends FieldsProducer {
public Comparator<BytesRef> getComparator() { public Comparator<BytesRef> getComparator() {
// Pre-flex indexes always sorted in UTF16 order, but // Pre-flex indexes always sorted in UTF16 order, but
// we remap on-the-fly to unicode order // we remap on-the-fly to unicode order
if (unicodeSortOrder) { if (sortTermsByUnicode()) {
return BytesRef.getUTF8SortedAsUnicodeComparator(); return BytesRef.getUTF8SortedAsUnicodeComparator();
} else { } else {
return BytesRef.getUTF8SortedAsUTF16Comparator(); return BytesRef.getUTF8SortedAsUTF16Comparator();
@ -692,6 +693,8 @@ public class PreFlexFields extends FieldsProducer {
} }
} }
private boolean unicodeSortOrder;
void reset(FieldInfo fieldInfo) throws IOException { void reset(FieldInfo fieldInfo) throws IOException {
//System.out.println("pff.reset te=" + termEnum); //System.out.println("pff.reset te=" + termEnum);
this.fieldInfo = fieldInfo; this.fieldInfo = fieldInfo;
@ -705,6 +708,8 @@ public class PreFlexFields extends FieldsProducer {
} }
skipNext = true; skipNext = true;
unicodeSortOrder = sortTermsByUnicode();
final Term t = termEnum.term(); final Term t = termEnum.term();
if (t != null && t.field() == fieldInfo.name) { if (t != null && t.field() == fieldInfo.name) {
newSuffixStart = 0; newSuffixStart = 0;

View File

@ -25,7 +25,6 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Random; import java.util.Random;
import org.apache.lucene.util.*; import org.apache.lucene.util.*;
import org.apache.lucene.index.codecs.preflexrw.PreFlexRWCodec;
import junit.framework.Assert; import junit.framework.Assert;
@ -263,10 +262,7 @@ public class TestStressIndexing2 extends MultiCodecTestCase {
} }
public static void verifyEquals(Random r, IndexReader r1, Directory dir2, String idField) throws Throwable { public static void verifyEquals(Random r, IndexReader r1, Directory dir2, String idField) throws Throwable {
// When we're testing w/ PreFlex codec, we must open IndexReader r2 = IndexReader.open(dir2);
// this reader with UTF16 terms since incoming NRT
// reader is sorted this way:
IndexReader r2 = IndexReader.open(dir2, null, true, _TestUtil.nextInt(r, 1, 3), _TestUtil.alwaysCodec(new PreFlexRWCodec("utf16")));
verifyEquals(r1, r2, idField); verifyEquals(r1, r2, idField);
r2.close(); r2.close();
} }

View File

@ -63,10 +63,14 @@ public class TestSurrogates extends LuceneTestCaseJ4 {
private String getRandomString(Random r) { private String getRandomString(Random r) {
String s; String s;
if (r.nextInt(3) == 1) { if (r.nextInt(5) == 1) {
s = makeDifficultRandomUnicodeString(r); if (r.nextInt(3) == 1) {
s = makeDifficultRandomUnicodeString(r);
} else {
s = _TestUtil.randomUnicodeString(r);
}
} else { } else {
s = _TestUtil.randomUnicodeString(r); s = _TestUtil.randomRealisticUnicodeString(r);
} }
return s; return s;
} }
@ -272,7 +276,7 @@ public class TestSurrogates extends LuceneTestCaseJ4 {
RandomIndexWriter w = new RandomIndexWriter(r, RandomIndexWriter w = new RandomIndexWriter(r,
dir, dir,
newIndexWriterConfig(r, TEST_VERSION_CURRENT, newIndexWriterConfig(r, TEST_VERSION_CURRENT,
new MockAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec(new PreFlexRWCodec(null)))); new MockAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec(new PreFlexRWCodec())));
final int numField = _TestUtil.nextInt(r, 2, 5); final int numField = _TestUtil.nextInt(r, 2, 5);

View File

@ -25,6 +25,7 @@ import org.apache.lucene.index.codecs.preflex.PreFlexCodec;
import org.apache.lucene.index.codecs.preflex.PreFlexFields; import org.apache.lucene.index.codecs.preflex.PreFlexFields;
import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.codecs.FieldsProducer; import org.apache.lucene.index.codecs.FieldsProducer;
import org.apache.lucene.util.LuceneTestCaseJ4;
/** Codec, only for testing, that can write and read the /** Codec, only for testing, that can write and read the
* pre-flex index format. * pre-flex index format.
@ -33,20 +34,14 @@ import org.apache.lucene.index.codecs.FieldsProducer;
*/ */
public class PreFlexRWCodec extends PreFlexCodec { public class PreFlexRWCodec extends PreFlexCodec {
private final String termSortOrder; public PreFlexRWCodec() {
// termSortOrder should be null (dynamically deteremined
// by stack), "codepoint" or "utf16"
public PreFlexRWCodec(String termSortOrder) {
// NOTE: we impersonate the PreFlex codec so that it can // NOTE: we impersonate the PreFlex codec so that it can
// read the segments we write! // read the segments we write!
super(); super();
this.termSortOrder = termSortOrder;
} }
@Override @Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
System.out.println("PFW");
return new PreFlexFieldsWriter(state); return new PreFlexFieldsWriter(state);
} }
@ -56,23 +51,27 @@ public class PreFlexRWCodec extends PreFlexCodec {
// Whenever IW opens readers, eg for merging, we have to // Whenever IW opens readers, eg for merging, we have to
// keep terms order in UTF16: // keep terms order in UTF16:
boolean unicodeSortOrder; return new PreFlexFields(state.dir, state.fieldInfos, state.segmentInfo, state.readBufferSize, state.termsIndexDivisor) {
if (termSortOrder == null) { @Override
unicodeSortOrder = true; protected boolean sortTermsByUnicode() {
// We carefully peek into stack track above us: if
// we are part of a "merge", we must sort by UTF16:
boolean unicodeSortOrder = true;
StackTraceElement[] trace = new Exception().getStackTrace(); StackTraceElement[] trace = new Exception().getStackTrace();
for (int i = 0; i < trace.length; i++) { for (int i = 0; i < trace.length; i++) {
//System.out.println(trace[i].getClassName()); //System.out.println(trace[i].getClassName());
if ("org.apache.lucene.index.IndexWriter".equals(trace[i].getClassName())) { if ("merge".equals(trace[i].getMethodName())) {
unicodeSortOrder = false; unicodeSortOrder = false;
break; if (LuceneTestCaseJ4.VERBOSE) {
System.out.println("NOTE: PreFlexRW codec: forcing legacy UTF16 term sort order");
}
break;
}
} }
}
//System.out.println("PRW: " + unicodeSortOrder);
} else {
unicodeSortOrder = termSortOrder.equals("codepoint");
}
return new PreFlexFields(state.dir, state.fieldInfos, state.segmentInfo, state.readBufferSize, state.termsIndexDivisor, unicodeSortOrder); return unicodeSortOrder;
}
};
} }
} }

View File

@ -128,7 +128,7 @@ public abstract class LuceneTestCase extends TestCase {
// test-only PreFlexRW codec (since core PreFlex can // test-only PreFlexRW codec (since core PreFlex can
// only read segments): // only read segments):
if (codec.equals("PreFlex")) { if (codec.equals("PreFlex")) {
CodecProvider.getDefault().register(new PreFlexRWCodec(null)); CodecProvider.getDefault().register(new PreFlexRWCodec());
} }
CodecProvider.setDefaultCodec(codec); CodecProvider.setDefaultCodec(codec);
} }
@ -158,7 +158,7 @@ public abstract class LuceneTestCase extends TestCase {
BooleanQuery.setMaxClauseCount(savedBoolMaxClauseCount); BooleanQuery.setMaxClauseCount(savedBoolMaxClauseCount);
// Restore read-only PreFlex codec: // Restore read-only PreFlex codec:
if (codec.equals("PreFlex")) { if (codec.equals("PreFlex")) {
CodecProvider.getDefault().unregister(new PreFlexRWCodec(null)); CodecProvider.getDefault().unregister(new PreFlexRWCodec());
CodecProvider.getDefault().register(new PreFlexCodec()); CodecProvider.getDefault().register(new PreFlexCodec());
} }
CodecProvider.setDefaultCodec(savedDefaultCodec); CodecProvider.setDefaultCodec(savedDefaultCodec);

View File

@ -152,7 +152,7 @@ public class LuceneTestCaseJ4 {
// test-only PreFlexRW codec (since core PreFlex can // test-only PreFlexRW codec (since core PreFlex can
// only read segments): // only read segments):
if (codec.equals("PreFlex")) { if (codec.equals("PreFlex")) {
CodecProvider.getDefault().register(new PreFlexRWCodec(null)); CodecProvider.getDefault().register(new PreFlexRWCodec());
} }
CodecProvider.setDefaultCodec(codec); CodecProvider.setDefaultCodec(codec);
} }
@ -161,7 +161,7 @@ public class LuceneTestCaseJ4 {
public static void afterClassLuceneTestCaseJ4() { public static void afterClassLuceneTestCaseJ4() {
// Restore read-only PreFlex codec: // Restore read-only PreFlex codec:
if (codec.equals("PreFlex")) { if (codec.equals("PreFlex")) {
CodecProvider.getDefault().unregister(new PreFlexRWCodec(null)); CodecProvider.getDefault().unregister(new PreFlexRWCodec());
CodecProvider.getDefault().register(new PreFlexCodec()); CodecProvider.getDefault().register(new PreFlexCodec());
} }
CodecProvider.setDefaultCodec(savedDefaultCodec); CodecProvider.setDefaultCodec(savedDefaultCodec);