mirror of https://github.com/apache/lucene.git
LUCENE-2554: make PreFlexRW codec use finer-grained impersonation, so we can test dancing NRT/deletions too
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/preflexfixes@978872 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
483266f22b
commit
94dbf446ed
|
@ -62,7 +62,7 @@ public class PreFlexCodec extends Codec {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
|
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
|
||||||
return new PreFlexFields(state.dir, state.fieldInfos, state.segmentInfo, state.readBufferSize, state.termsIndexDivisor, true);
|
return new PreFlexFields(state.dir, state.fieldInfos, state.segmentInfo, state.readBufferSize, state.termsIndexDivisor);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -58,19 +58,11 @@ public class PreFlexFields extends FieldsProducer {
|
||||||
private final Directory dir;
|
private final Directory dir;
|
||||||
private final int readBufferSize;
|
private final int readBufferSize;
|
||||||
private Directory cfsReader;
|
private Directory cfsReader;
|
||||||
private final boolean unicodeSortOrder;
|
|
||||||
|
|
||||||
// If unicodeSortOrder is true, we do the surrogates dance
|
public PreFlexFields(Directory dir, FieldInfos fieldInfos, SegmentInfo info, int readBufferSize, int indexDivisor)
|
||||||
// so that the terms are sorted by unicode sort order.
|
|
||||||
// This should be true when segments are used for "normal"
|
|
||||||
// searching; it's only false during testing, to create a
|
|
||||||
// pre-flex index, using the preflexrw codec under
|
|
||||||
// src/test.
|
|
||||||
public PreFlexFields(Directory dir, FieldInfos fieldInfos, SegmentInfo info, int readBufferSize, int indexDivisor, boolean unicodeSortOrder)
|
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
|
||||||
si = info;
|
si = info;
|
||||||
this.unicodeSortOrder = unicodeSortOrder;
|
|
||||||
|
|
||||||
// NOTE: we must always load terms index, even for
|
// NOTE: we must always load terms index, even for
|
||||||
// "sequential" scan during merging, because what is
|
// "sequential" scan during merging, because what is
|
||||||
|
@ -114,6 +106,15 @@ public class PreFlexFields extends FieldsProducer {
|
||||||
this.dir = dir;
|
this.dir = dir;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If this returns, we do the surrogates dance so that the
|
||||||
|
// terms are sorted by unicode sort order. This should be
|
||||||
|
// true when segments are used for "normal" searching;
|
||||||
|
// it's only false during testing, to create a pre-flex
|
||||||
|
// index, using the test-only PreFlexRW.
|
||||||
|
protected boolean sortTermsByUnicode() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static void files(Directory dir, SegmentInfo info, Collection<String> files) throws IOException {
|
static void files(Directory dir, SegmentInfo info, Collection<String> files) throws IOException {
|
||||||
files.add(IndexFileNames.segmentFileName(info.name, "", PreFlexCodec.TERMS_EXTENSION));
|
files.add(IndexFileNames.segmentFileName(info.name, "", PreFlexCodec.TERMS_EXTENSION));
|
||||||
files.add(IndexFileNames.segmentFileName(info.name, "", PreFlexCodec.TERMS_INDEX_EXTENSION));
|
files.add(IndexFileNames.segmentFileName(info.name, "", PreFlexCodec.TERMS_INDEX_EXTENSION));
|
||||||
|
@ -241,7 +242,7 @@ public class PreFlexFields extends FieldsProducer {
|
||||||
public Comparator<BytesRef> getComparator() {
|
public Comparator<BytesRef> getComparator() {
|
||||||
// Pre-flex indexes always sorted in UTF16 order, but
|
// Pre-flex indexes always sorted in UTF16 order, but
|
||||||
// we remap on-the-fly to unicode order
|
// we remap on-the-fly to unicode order
|
||||||
if (unicodeSortOrder) {
|
if (sortTermsByUnicode()) {
|
||||||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||||
} else {
|
} else {
|
||||||
return BytesRef.getUTF8SortedAsUTF16Comparator();
|
return BytesRef.getUTF8SortedAsUTF16Comparator();
|
||||||
|
@ -692,6 +693,8 @@ public class PreFlexFields extends FieldsProducer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean unicodeSortOrder;
|
||||||
|
|
||||||
void reset(FieldInfo fieldInfo) throws IOException {
|
void reset(FieldInfo fieldInfo) throws IOException {
|
||||||
//System.out.println("pff.reset te=" + termEnum);
|
//System.out.println("pff.reset te=" + termEnum);
|
||||||
this.fieldInfo = fieldInfo;
|
this.fieldInfo = fieldInfo;
|
||||||
|
@ -705,6 +708,8 @@ public class PreFlexFields extends FieldsProducer {
|
||||||
}
|
}
|
||||||
skipNext = true;
|
skipNext = true;
|
||||||
|
|
||||||
|
unicodeSortOrder = sortTermsByUnicode();
|
||||||
|
|
||||||
final Term t = termEnum.term();
|
final Term t = termEnum.term();
|
||||||
if (t != null && t.field() == fieldInfo.name) {
|
if (t != null && t.field() == fieldInfo.name) {
|
||||||
newSuffixStart = 0;
|
newSuffixStart = 0;
|
||||||
|
|
|
@ -25,7 +25,6 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
import org.apache.lucene.util.*;
|
import org.apache.lucene.util.*;
|
||||||
import org.apache.lucene.index.codecs.preflexrw.PreFlexRWCodec;
|
|
||||||
|
|
||||||
import junit.framework.Assert;
|
import junit.framework.Assert;
|
||||||
|
|
||||||
|
@ -263,10 +262,7 @@ public class TestStressIndexing2 extends MultiCodecTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void verifyEquals(Random r, IndexReader r1, Directory dir2, String idField) throws Throwable {
|
public static void verifyEquals(Random r, IndexReader r1, Directory dir2, String idField) throws Throwable {
|
||||||
// When we're testing w/ PreFlex codec, we must open
|
IndexReader r2 = IndexReader.open(dir2);
|
||||||
// this reader with UTF16 terms since incoming NRT
|
|
||||||
// reader is sorted this way:
|
|
||||||
IndexReader r2 = IndexReader.open(dir2, null, true, _TestUtil.nextInt(r, 1, 3), _TestUtil.alwaysCodec(new PreFlexRWCodec("utf16")));
|
|
||||||
verifyEquals(r1, r2, idField);
|
verifyEquals(r1, r2, idField);
|
||||||
r2.close();
|
r2.close();
|
||||||
}
|
}
|
||||||
|
|
|
@ -63,10 +63,14 @@ public class TestSurrogates extends LuceneTestCaseJ4 {
|
||||||
|
|
||||||
private String getRandomString(Random r) {
|
private String getRandomString(Random r) {
|
||||||
String s;
|
String s;
|
||||||
if (r.nextInt(3) == 1) {
|
if (r.nextInt(5) == 1) {
|
||||||
s = makeDifficultRandomUnicodeString(r);
|
if (r.nextInt(3) == 1) {
|
||||||
|
s = makeDifficultRandomUnicodeString(r);
|
||||||
|
} else {
|
||||||
|
s = _TestUtil.randomUnicodeString(r);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
s = _TestUtil.randomUnicodeString(r);
|
s = _TestUtil.randomRealisticUnicodeString(r);
|
||||||
}
|
}
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
@ -272,7 +276,7 @@ public class TestSurrogates extends LuceneTestCaseJ4 {
|
||||||
RandomIndexWriter w = new RandomIndexWriter(r,
|
RandomIndexWriter w = new RandomIndexWriter(r,
|
||||||
dir,
|
dir,
|
||||||
newIndexWriterConfig(r, TEST_VERSION_CURRENT,
|
newIndexWriterConfig(r, TEST_VERSION_CURRENT,
|
||||||
new MockAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec(new PreFlexRWCodec(null))));
|
new MockAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec(new PreFlexRWCodec())));
|
||||||
|
|
||||||
final int numField = _TestUtil.nextInt(r, 2, 5);
|
final int numField = _TestUtil.nextInt(r, 2, 5);
|
||||||
|
|
||||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.index.codecs.preflex.PreFlexCodec;
|
||||||
import org.apache.lucene.index.codecs.preflex.PreFlexFields;
|
import org.apache.lucene.index.codecs.preflex.PreFlexFields;
|
||||||
import org.apache.lucene.index.codecs.FieldsConsumer;
|
import org.apache.lucene.index.codecs.FieldsConsumer;
|
||||||
import org.apache.lucene.index.codecs.FieldsProducer;
|
import org.apache.lucene.index.codecs.FieldsProducer;
|
||||||
|
import org.apache.lucene.util.LuceneTestCaseJ4;
|
||||||
|
|
||||||
/** Codec, only for testing, that can write and read the
|
/** Codec, only for testing, that can write and read the
|
||||||
* pre-flex index format.
|
* pre-flex index format.
|
||||||
|
@ -33,20 +34,14 @@ import org.apache.lucene.index.codecs.FieldsProducer;
|
||||||
*/
|
*/
|
||||||
public class PreFlexRWCodec extends PreFlexCodec {
|
public class PreFlexRWCodec extends PreFlexCodec {
|
||||||
|
|
||||||
private final String termSortOrder;
|
public PreFlexRWCodec() {
|
||||||
|
|
||||||
// termSortOrder should be null (dynamically deteremined
|
|
||||||
// by stack), "codepoint" or "utf16"
|
|
||||||
public PreFlexRWCodec(String termSortOrder) {
|
|
||||||
// NOTE: we impersonate the PreFlex codec so that it can
|
// NOTE: we impersonate the PreFlex codec so that it can
|
||||||
// read the segments we write!
|
// read the segments we write!
|
||||||
super();
|
super();
|
||||||
this.termSortOrder = termSortOrder;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
||||||
System.out.println("PFW");
|
|
||||||
return new PreFlexFieldsWriter(state);
|
return new PreFlexFieldsWriter(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -56,23 +51,27 @@ public class PreFlexRWCodec extends PreFlexCodec {
|
||||||
// Whenever IW opens readers, eg for merging, we have to
|
// Whenever IW opens readers, eg for merging, we have to
|
||||||
// keep terms order in UTF16:
|
// keep terms order in UTF16:
|
||||||
|
|
||||||
boolean unicodeSortOrder;
|
return new PreFlexFields(state.dir, state.fieldInfos, state.segmentInfo, state.readBufferSize, state.termsIndexDivisor) {
|
||||||
if (termSortOrder == null) {
|
@Override
|
||||||
unicodeSortOrder = true;
|
protected boolean sortTermsByUnicode() {
|
||||||
|
// We carefully peek into stack track above us: if
|
||||||
|
// we are part of a "merge", we must sort by UTF16:
|
||||||
|
boolean unicodeSortOrder = true;
|
||||||
|
|
||||||
StackTraceElement[] trace = new Exception().getStackTrace();
|
StackTraceElement[] trace = new Exception().getStackTrace();
|
||||||
for (int i = 0; i < trace.length; i++) {
|
for (int i = 0; i < trace.length; i++) {
|
||||||
//System.out.println(trace[i].getClassName());
|
//System.out.println(trace[i].getClassName());
|
||||||
if ("org.apache.lucene.index.IndexWriter".equals(trace[i].getClassName())) {
|
if ("merge".equals(trace[i].getMethodName())) {
|
||||||
unicodeSortOrder = false;
|
unicodeSortOrder = false;
|
||||||
break;
|
if (LuceneTestCaseJ4.VERBOSE) {
|
||||||
|
System.out.println("NOTE: PreFlexRW codec: forcing legacy UTF16 term sort order");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
//System.out.println("PRW: " + unicodeSortOrder);
|
|
||||||
} else {
|
|
||||||
unicodeSortOrder = termSortOrder.equals("codepoint");
|
|
||||||
}
|
|
||||||
|
|
||||||
return new PreFlexFields(state.dir, state.fieldInfos, state.segmentInfo, state.readBufferSize, state.termsIndexDivisor, unicodeSortOrder);
|
return unicodeSortOrder;
|
||||||
|
}
|
||||||
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -128,7 +128,7 @@ public abstract class LuceneTestCase extends TestCase {
|
||||||
// test-only PreFlexRW codec (since core PreFlex can
|
// test-only PreFlexRW codec (since core PreFlex can
|
||||||
// only read segments):
|
// only read segments):
|
||||||
if (codec.equals("PreFlex")) {
|
if (codec.equals("PreFlex")) {
|
||||||
CodecProvider.getDefault().register(new PreFlexRWCodec(null));
|
CodecProvider.getDefault().register(new PreFlexRWCodec());
|
||||||
}
|
}
|
||||||
CodecProvider.setDefaultCodec(codec);
|
CodecProvider.setDefaultCodec(codec);
|
||||||
}
|
}
|
||||||
|
@ -158,7 +158,7 @@ public abstract class LuceneTestCase extends TestCase {
|
||||||
BooleanQuery.setMaxClauseCount(savedBoolMaxClauseCount);
|
BooleanQuery.setMaxClauseCount(savedBoolMaxClauseCount);
|
||||||
// Restore read-only PreFlex codec:
|
// Restore read-only PreFlex codec:
|
||||||
if (codec.equals("PreFlex")) {
|
if (codec.equals("PreFlex")) {
|
||||||
CodecProvider.getDefault().unregister(new PreFlexRWCodec(null));
|
CodecProvider.getDefault().unregister(new PreFlexRWCodec());
|
||||||
CodecProvider.getDefault().register(new PreFlexCodec());
|
CodecProvider.getDefault().register(new PreFlexCodec());
|
||||||
}
|
}
|
||||||
CodecProvider.setDefaultCodec(savedDefaultCodec);
|
CodecProvider.setDefaultCodec(savedDefaultCodec);
|
||||||
|
|
|
@ -152,7 +152,7 @@ public class LuceneTestCaseJ4 {
|
||||||
// test-only PreFlexRW codec (since core PreFlex can
|
// test-only PreFlexRW codec (since core PreFlex can
|
||||||
// only read segments):
|
// only read segments):
|
||||||
if (codec.equals("PreFlex")) {
|
if (codec.equals("PreFlex")) {
|
||||||
CodecProvider.getDefault().register(new PreFlexRWCodec(null));
|
CodecProvider.getDefault().register(new PreFlexRWCodec());
|
||||||
}
|
}
|
||||||
CodecProvider.setDefaultCodec(codec);
|
CodecProvider.setDefaultCodec(codec);
|
||||||
}
|
}
|
||||||
|
@ -161,7 +161,7 @@ public class LuceneTestCaseJ4 {
|
||||||
public static void afterClassLuceneTestCaseJ4() {
|
public static void afterClassLuceneTestCaseJ4() {
|
||||||
// Restore read-only PreFlex codec:
|
// Restore read-only PreFlex codec:
|
||||||
if (codec.equals("PreFlex")) {
|
if (codec.equals("PreFlex")) {
|
||||||
CodecProvider.getDefault().unregister(new PreFlexRWCodec(null));
|
CodecProvider.getDefault().unregister(new PreFlexRWCodec());
|
||||||
CodecProvider.getDefault().register(new PreFlexCodec());
|
CodecProvider.getDefault().register(new PreFlexCodec());
|
||||||
}
|
}
|
||||||
CodecProvider.setDefaultCodec(savedDefaultCodec);
|
CodecProvider.setDefaultCodec(savedDefaultCodec);
|
||||||
|
|
Loading…
Reference in New Issue