mirror of https://github.com/apache/lucene.git
LUCENE-2554: preflex codec doesnt order terms correctly
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@979453 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
f8fba74632
|
@ -56,6 +56,7 @@
|
|||
<property name="args" value=""/>
|
||||
<property name="threadsPerProcessor" value="1" />
|
||||
<property name="random.multiplier" value="1" />
|
||||
<property name="tests.codec" value="random" />
|
||||
|
||||
<property name="javac.deprecation" value="off"/>
|
||||
<property name="javac.debug" value="on"/>
|
||||
|
@ -434,6 +435,8 @@
|
|||
|
||||
<!-- allow tests to control debug prints -->
|
||||
<sysproperty key="tests.verbose" value="${tests.verbose}"/>
|
||||
<!-- set the codec tests should run with -->
|
||||
<sysproperty key="tests.codec" value="${tests.codec}"/>
|
||||
|
||||
<!-- TODO: create propertyset for test properties, so each project can have its own set -->
|
||||
<sysproperty key="random.multiplier" value="${random.multiplier}"/>
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.lucene.analysis.MockTokenizer;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.queryParser.QueryParser;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
|
@ -107,8 +108,8 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase {
|
|||
|
||||
RAMDirectory ramdir = new RAMDirectory();
|
||||
Analyzer analyzer = randomAnalyzer();
|
||||
IndexWriter writer = new IndexWriter(ramdir, analyzer,
|
||||
IndexWriter.MaxFieldLength.UNLIMITED);
|
||||
IndexWriter writer = new IndexWriter(ramdir,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setCodecProvider(_TestUtil.alwaysCodec("Standard")));
|
||||
Document doc = new Document();
|
||||
Field field1 = new Field("foo", fooField.toString(), Field.Store.NO, Field.Index.ANALYZED);
|
||||
Field field2 = new Field("term", termField.toString(), Field.Store.NO, Field.Index.ANALYZED);
|
||||
|
|
|
@ -18,13 +18,13 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
@ -38,8 +38,7 @@ public class BooleanFilterTest extends LuceneTestCase {
|
|||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
directory = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory, new IndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory, new MockAnalyzer(MockTokenizer.WHITESPACE, false));
|
||||
|
||||
//Add series of docs with filterable fields : acces rights, prices, dates and "in-stock" flags
|
||||
addDoc(writer, "admin guest", "010", "20040101","Y");
|
||||
|
|
|
@ -21,11 +21,9 @@ import java.util.Calendar;
|
|||
import java.util.GregorianCalendar;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
|
@ -63,9 +61,7 @@ public class ChainedFilterTest extends LuceneTestCase {
|
|||
super.setUp();
|
||||
random = newRandom();
|
||||
directory = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, directory);
|
||||
Calendar cal = new GregorianCalendar();
|
||||
cal.clear();
|
||||
cal.setTimeInMillis(1041397200000L); // 2003 January 01
|
||||
|
@ -200,8 +196,7 @@ public class ChainedFilterTest extends LuceneTestCase {
|
|||
|
||||
public void testWithCachingFilter() throws Exception {
|
||||
Directory dir = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir);
|
||||
IndexReader reader = writer.getReader();
|
||||
writer.close();
|
||||
|
||||
|
|
|
@ -20,11 +20,9 @@ package org.apache.lucene.search;
|
|||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
|
@ -44,8 +42,7 @@ public class DuplicateFilterTest extends LuceneTestCase {
|
|||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
directory = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory);
|
||||
|
||||
//Add series of docs with filterable fields : url, text and dates flags
|
||||
addDoc(writer, "http://lucene.apache.org", "lucene 1.4.3 available", "20040101");
|
||||
|
|
|
@ -25,7 +25,6 @@ import org.apache.lucene.analysis.MockAnalyzer;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
@ -41,8 +40,7 @@ public class FuzzyLikeThisQueryTest extends LuceneTestCase {
|
|||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
directory = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory);
|
||||
|
||||
//Add series of docs with misspelt names
|
||||
addDoc(writer, "jonathon smythe","1");
|
||||
|
|
|
@ -19,11 +19,9 @@ package org.apache.lucene.search;
|
|||
|
||||
import java.util.HashSet;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
@ -53,8 +51,7 @@ public class TermsFilterTest extends LuceneTestCase {
|
|||
public void testMissingTerms() throws Exception {
|
||||
String fieldName="field1";
|
||||
RAMDirectory rd=new RAMDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(newRandom(), rd,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter w = new RandomIndexWriter(newRandom(), rd);
|
||||
for (int i = 0; i < 100; i++) {
|
||||
Document doc=new Document();
|
||||
int term=i*10; //terms are units of 10;
|
||||
|
|
|
@ -20,10 +20,8 @@ package org.apache.lucene.search.regex;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
|
@ -44,8 +42,7 @@ public class TestRegexQuery extends LuceneTestCase {
|
|||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
directory = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory);
|
||||
Document doc = new Document();
|
||||
doc.add(new Field(FN, "the quick brown fox jumps over the lazy dog", Field.Store.NO, Field.Index.ANALYZED));
|
||||
writer.addDocument(doc);
|
||||
|
|
|
@ -28,7 +28,6 @@ import org.apache.lucene.analysis.MockTokenizer;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
|
@ -46,8 +45,7 @@ public class TestMoreLikeThis extends LuceneTestCase {
|
|||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
directory = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory);
|
||||
|
||||
// Add series of docs with specific information for MoreLikeThis
|
||||
addDoc(writer, "lucene");
|
||||
|
|
|
@ -116,7 +116,7 @@ public final class MultiTermsEnum extends TermsEnum {
|
|||
// different TermComps
|
||||
final Comparator<BytesRef> subTermComp = termsEnumIndex.termsEnum.getComparator();
|
||||
if (subTermComp != null && !subTermComp.equals(termComp)) {
|
||||
throw new IllegalStateException("sub-readers have different BytesRef.Comparators; cannot merge");
|
||||
throw new IllegalStateException("sub-readers have different BytesRef.Comparators: " + subTermComp + " vs " + termComp + "; cannot merge");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -47,13 +47,12 @@ public abstract class CodecProvider {
|
|||
|
||||
private static String defaultCodec = "Standard";
|
||||
|
||||
public final static String[] CORE_CODECS = new String[] {"Standard", "Sep", "Pulsing", "IntBlock"};
|
||||
public final static String[] CORE_CODECS = new String[] {"Standard", "Sep", "Pulsing", "IntBlock", "PreFlex"};
|
||||
|
||||
public void register(Codec codec) {
|
||||
if (codec.name == null) {
|
||||
throw new IllegalArgumentException("code.name is null");
|
||||
}
|
||||
|
||||
if (!codecs.containsKey(codec.name)) {
|
||||
codecs.put(codec.name, codec);
|
||||
codec.getExtensions(knownExtensions);
|
||||
|
@ -62,6 +61,21 @@ public abstract class CodecProvider {
|
|||
}
|
||||
}
|
||||
|
||||
/** @lucene.internal */
|
||||
public void unregister(Codec codec) {
|
||||
if (codec.name == null) {
|
||||
throw new IllegalArgumentException("code.name is null");
|
||||
}
|
||||
if (codecs.containsKey(codec.name)) {
|
||||
Codec c = codecs.get(codec.name);
|
||||
if (codec == c) {
|
||||
codecs.remove(codec.name);
|
||||
} else {
|
||||
throw new IllegalArgumentException("codec '" + codec.name + "' is being impersonated by a different codec instance!!!");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public Collection<String> getAllExtensions() {
|
||||
return knownExtensions;
|
||||
}
|
||||
|
@ -111,8 +125,5 @@ class DefaultCodecProvider extends CodecProvider {
|
|||
@Override
|
||||
public Codec getWriter(SegmentWriteState state) {
|
||||
return lookup(CodecProvider.getDefaultCodec());
|
||||
//return lookup("Pulsing");
|
||||
//return lookup("Sep");
|
||||
//return lookup("IntBlock");
|
||||
}
|
||||
}
|
|
@ -22,6 +22,7 @@ import org.apache.lucene.index.Fields;
|
|||
import org.apache.lucene.index.FieldsEnum;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Closeable;
|
||||
|
||||
/** Abstract API that consumes terms, doc, freq, prox and
|
||||
* payloads postings. Concrete implementations of this
|
||||
|
@ -30,7 +31,7 @@ import java.io.IOException;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class FieldsConsumer {
|
||||
public abstract class FieldsConsumer implements Closeable {
|
||||
|
||||
/** Add a new field */
|
||||
public abstract TermsConsumer addField(FieldInfo field) throws IOException;
|
||||
|
|
|
@ -40,16 +40,16 @@ import org.apache.lucene.index.codecs.FieldsProducer;
|
|||
public class PreFlexCodec extends Codec {
|
||||
|
||||
/** Extension of terms file */
|
||||
static final String TERMS_EXTENSION = "tis";
|
||||
public static final String TERMS_EXTENSION = "tis";
|
||||
|
||||
/** Extension of terms index file */
|
||||
static final String TERMS_INDEX_EXTENSION = "tii";
|
||||
public static final String TERMS_INDEX_EXTENSION = "tii";
|
||||
|
||||
/** Extension of freq postings file */
|
||||
static final String FREQ_EXTENSION = "frq";
|
||||
public static final String FREQ_EXTENSION = "frq";
|
||||
|
||||
/** Extension of prox postings file */
|
||||
static final String PROX_EXTENSION = "prx";
|
||||
public static final String PROX_EXTENSION = "prx";
|
||||
|
||||
public PreFlexCodec() {
|
||||
name = "PreFlex";
|
||||
|
|
|
@ -40,7 +40,6 @@ import org.apache.lucene.store.IndexInput;
|
|||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
||||
/** Exposes flex API on a pre-flex index, as a codec.
|
||||
* @lucene.experimental */
|
||||
|
@ -60,7 +59,7 @@ public class PreFlexFields extends FieldsProducer {
|
|||
private final int readBufferSize;
|
||||
private Directory cfsReader;
|
||||
|
||||
PreFlexFields(Directory dir, FieldInfos fieldInfos, SegmentInfo info, int readBufferSize, int indexDivisor)
|
||||
public PreFlexFields(Directory dir, FieldInfos fieldInfos, SegmentInfo info, int readBufferSize, int indexDivisor)
|
||||
throws IOException {
|
||||
|
||||
si = info;
|
||||
|
@ -107,6 +106,15 @@ public class PreFlexFields extends FieldsProducer {
|
|||
this.dir = dir;
|
||||
}
|
||||
|
||||
// If this returns, we do the surrogates dance so that the
|
||||
// terms are sorted by unicode sort order. This should be
|
||||
// true when segments are used for "normal" searching;
|
||||
// it's only false during testing, to create a pre-flex
|
||||
// index, using the test-only PreFlexRW.
|
||||
protected boolean sortTermsByUnicode() {
|
||||
return true;
|
||||
}
|
||||
|
||||
static void files(Directory dir, SegmentInfo info, Collection<String> files) throws IOException {
|
||||
files.add(IndexFileNames.segmentFileName(info.name, "", PreFlexCodec.TERMS_EXTENSION));
|
||||
files.add(IndexFileNames.segmentFileName(info.name, "", PreFlexCodec.TERMS_INDEX_EXTENSION));
|
||||
|
@ -182,6 +190,12 @@ public class PreFlexFields extends FieldsProducer {
|
|||
if (cfsReader != null) {
|
||||
cfsReader.close();
|
||||
}
|
||||
if (freqStream != null) {
|
||||
freqStream.close();
|
||||
}
|
||||
if (proxStream != null) {
|
||||
proxStream.close();
|
||||
}
|
||||
}
|
||||
|
||||
private class PreFlexFieldsEnum extends FieldsEnum {
|
||||
|
@ -228,7 +242,11 @@ public class PreFlexFields extends FieldsProducer {
|
|||
public Comparator<BytesRef> getComparator() {
|
||||
// Pre-flex indexes always sorted in UTF16 order, but
|
||||
// we remap on-the-fly to unicode order
|
||||
if (sortTermsByUnicode()) {
|
||||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||
} else {
|
||||
return BytesRef.getUTF8SortedAsUTF16Comparator();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -238,237 +256,475 @@ public class PreFlexFields extends FieldsProducer {
|
|||
private boolean skipNext;
|
||||
private BytesRef current;
|
||||
|
||||
private int[] surrogateSeekPending = new int[1];
|
||||
private boolean[] surrogateDidSeekBack = new boolean[1];
|
||||
private int surrogateSeekUpto;
|
||||
private char[] pendingPrefix;
|
||||
|
||||
private SegmentTermEnum seekTermEnum;
|
||||
private Term protoTerm;
|
||||
|
||||
private static final byte UTF8_NON_BMP_LEAD = (byte) 0xf0;
|
||||
private static final byte UTF8_HIGH_BMP_LEAD = (byte) 0xee;
|
||||
|
||||
// Returns true if the unicode char is "after" the
|
||||
// surrogates in UTF16, ie >= U+E000 and <= U+FFFF:
|
||||
private final boolean isHighBMPChar(byte[] b, int idx) {
|
||||
return (b[idx] & UTF8_HIGH_BMP_LEAD) == UTF8_HIGH_BMP_LEAD;
|
||||
}
|
||||
|
||||
// Returns true if the unicode char in the UTF8 byte
|
||||
// sequence starting at idx encodes a char outside of
|
||||
// BMP (ie what would be a surrogate pair in UTF16):
|
||||
private final boolean isNonBMPChar(byte[] b, int idx) {
|
||||
return (b[idx] & UTF8_NON_BMP_LEAD) == UTF8_NON_BMP_LEAD;
|
||||
}
|
||||
|
||||
private final byte[] scratch = new byte[4];
|
||||
private final BytesRef prevTerm = new BytesRef();
|
||||
private final BytesRef scratchTerm = new BytesRef();
|
||||
private int newSuffixStart;
|
||||
|
||||
// Swap in S, in place of E:
|
||||
private boolean seekToNonBMP(SegmentTermEnum te, BytesRef term, int pos) throws IOException {
|
||||
final int savLength = term.length;
|
||||
|
||||
assert term.offset == 0;
|
||||
|
||||
// The 3 bytes starting at downTo make up 1
|
||||
// unicode character:
|
||||
assert isHighBMPChar(term.bytes, pos);
|
||||
|
||||
// NOTE: we cannot make this assert, because
|
||||
// AutomatonQuery legitimately sends us malformed UTF8
|
||||
// (eg the UTF8 bytes with just 0xee)
|
||||
// assert term.length >= pos + 3: "term.length=" + term.length + " pos+3=" + (pos+3) + " byte=" + Integer.toHexString(term.bytes[pos]) + " term=" + term.toString();
|
||||
|
||||
// Save the bytes && length, since we need to
|
||||
// restore this if seek "back" finds no matching
|
||||
// terms
|
||||
if (term.bytes.length < 4+pos) {
|
||||
term.grow(4+pos);
|
||||
}
|
||||
|
||||
scratch[0] = term.bytes[pos];
|
||||
scratch[1] = term.bytes[pos+1];
|
||||
scratch[2] = term.bytes[pos+2];
|
||||
|
||||
term.bytes[pos] = (byte) 0xf0;
|
||||
term.bytes[pos+1] = (byte) 0x90;
|
||||
term.bytes[pos+2] = (byte) 0x80;
|
||||
term.bytes[pos+3] = (byte) 0x80;
|
||||
term.length = 4+pos;
|
||||
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" try seek term=" + UnicodeUtil.toHexString(term.utf8ToString()));
|
||||
}
|
||||
|
||||
// Seek "back":
|
||||
getTermsDict().seekEnum(te, protoTerm.createTerm(term));
|
||||
|
||||
// Test if the term we seek'd to in fact found a
|
||||
// surrogate pair at the same position as the E:
|
||||
Term t2 = te.term();
|
||||
|
||||
// Cannot be null (or move to next field) because at
|
||||
// "worst" it'd seek to the same term we are on now,
|
||||
// unless we are being called from seek
|
||||
if (t2 == null || t2.field() != fieldInfo.name) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" got term=" + UnicodeUtil.toHexString(t2.text()));
|
||||
}
|
||||
|
||||
// Now test if prefix is identical and we found
|
||||
// a non-BMP char at the same position:
|
||||
BytesRef b2 = t2.bytes();
|
||||
assert b2.offset == 0;
|
||||
|
||||
boolean matches;
|
||||
if (b2.length >= term.length && isNonBMPChar(b2.bytes, pos)) {
|
||||
matches = true;
|
||||
for(int i=0;i<pos;i++) {
|
||||
if (term.bytes[i] != b2.bytes[i]) {
|
||||
matches = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
matches = false;
|
||||
}
|
||||
|
||||
// Restore term:
|
||||
term.length = savLength;
|
||||
term.bytes[pos] = scratch[0];
|
||||
term.bytes[pos+1] = scratch[1];
|
||||
term.bytes[pos+2] = scratch[2];
|
||||
|
||||
return matches;
|
||||
}
|
||||
|
||||
// Seek type 2 "continue" (back to the start of the
|
||||
// surrogates): scan the stripped suffix from the
|
||||
// prior term, backwards. If there was an E in that
|
||||
// part, then we try to seek back to S. If that
|
||||
// seek finds a matching term, we go there.
|
||||
private boolean doContinue() throws IOException {
|
||||
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" try cont");
|
||||
}
|
||||
|
||||
int downTo = prevTerm.length-1;
|
||||
|
||||
boolean didSeek = false;
|
||||
|
||||
final int limit = Math.min(newSuffixStart, scratchTerm.length-1);
|
||||
|
||||
while(downTo > limit) {
|
||||
|
||||
if (isHighBMPChar(prevTerm.bytes, downTo)) {
|
||||
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" found E pos=" + downTo + " vs len=" + prevTerm.length);
|
||||
}
|
||||
|
||||
if (seekToNonBMP(seekTermEnum, prevTerm, downTo)) {
|
||||
// TODO: more efficient seek?
|
||||
getTermsDict().seekEnum(termEnum, seekTermEnum.term());
|
||||
//newSuffixStart = downTo+4;
|
||||
newSuffixStart = downTo;
|
||||
scratchTerm.copy(termEnum.term().bytes());
|
||||
didSeek = true;
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" seek!");
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" no seek");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Shorten prevTerm in place so that we don't redo
|
||||
// this loop if we come back here:
|
||||
if ((prevTerm.bytes[downTo] & 0xc0) == 0xc0 || (prevTerm.bytes[downTo] & 0x80) == 0) {
|
||||
prevTerm.length = downTo;
|
||||
}
|
||||
|
||||
downTo--;
|
||||
}
|
||||
|
||||
return didSeek;
|
||||
}
|
||||
|
||||
// Look for seek type 3 ("pop"): if the delta from
|
||||
// prev -> current was replacing an S with an E,
|
||||
// we must now seek to beyond that E. This seek
|
||||
// "finishes" the dance at this character
|
||||
// position.
|
||||
private boolean doPop() throws IOException {
|
||||
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" try pop");
|
||||
}
|
||||
|
||||
assert newSuffixStart <= prevTerm.length;
|
||||
assert newSuffixStart < scratchTerm.length || newSuffixStart == 0;
|
||||
|
||||
if (prevTerm.length > newSuffixStart &&
|
||||
isNonBMPChar(prevTerm.bytes, newSuffixStart) &&
|
||||
isHighBMPChar(scratchTerm.bytes, newSuffixStart)) {
|
||||
|
||||
// Seek type 2 -- put 0xFF at this position:
|
||||
scratchTerm.bytes[newSuffixStart] = (byte) 0xff;
|
||||
scratchTerm.length = newSuffixStart+1;
|
||||
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" seek to term=" + UnicodeUtil.toHexString(scratchTerm.utf8ToString()) + " " + scratchTerm.toString());
|
||||
}
|
||||
|
||||
// TODO: more efficient seek? can we simply swap
|
||||
// the enums?
|
||||
getTermsDict().seekEnum(termEnum, protoTerm.createTerm(scratchTerm));
|
||||
|
||||
final Term t2 = termEnum.term();
|
||||
|
||||
// We could hit EOF or different field since this
|
||||
// was a seek "forward":
|
||||
if (t2 != null && t2.field() == fieldInfo.name) {
|
||||
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" got term=" + UnicodeUtil.toHexString(t2.text()) + " " + t2.bytes());
|
||||
}
|
||||
|
||||
final BytesRef b2 = t2.bytes();
|
||||
assert b2.offset == 0;
|
||||
|
||||
|
||||
// Set newSuffixStart -- we can't use
|
||||
// termEnum's since the above seek may have
|
||||
// done no scanning (eg, term was precisely
|
||||
// and index term, or, was in the term seek
|
||||
// cache):
|
||||
scratchTerm.copy(b2);
|
||||
setNewSuffixStart(prevTerm, scratchTerm);
|
||||
|
||||
return true;
|
||||
} else if (newSuffixStart != 0 || scratchTerm.length != 0) {
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" got term=null (or next field)");
|
||||
}
|
||||
newSuffixStart = 0;
|
||||
scratchTerm.length = 0;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Pre-flex indices store terms in UTF16 sort order, but
|
||||
// certain queries require Unicode codepoint order; this
|
||||
// method carefully seeks around surrogates to handle
|
||||
// this impedance mismatch
|
||||
|
||||
private void surrogateDance() throws IOException {
|
||||
|
||||
if (!unicodeSortOrder) {
|
||||
return;
|
||||
}
|
||||
|
||||
// We are invoked after TIS.next() (by UTF16 order) to
|
||||
// possibly seek to a different "next" (by unicode
|
||||
// order) term.
|
||||
|
||||
// We scan only the "delta" from the last term to the
|
||||
// current term, in UTF8 bytes. We look at 1) the bytes
|
||||
// stripped from the prior term, and then 2) the bytes
|
||||
// appended to that prior term's prefix.
|
||||
|
||||
// We don't care about specific UTF8 sequences, just
|
||||
// the "category" of the UTF16 character. Category S
|
||||
// is a high/low surrogate pair (it non-BMP).
|
||||
// Category E is any BMP char > UNI_SUR_LOW_END (and <
|
||||
// U+FFFF). Category A is the rest (any unicode char
|
||||
// <= UNI_SUR_HIGH_START).
|
||||
|
||||
// The core issue is that pre-flex indices sort the
|
||||
// characters as ASE, while flex must sort as AES. So
|
||||
// when scanning, when we hit S, we must 1) seek
|
||||
// forward to E and enum the terms there, then 2) seek
|
||||
// back to S and enum all terms there, then 3) seek to
|
||||
// after E. Three different seek points (1, 2, 3).
|
||||
|
||||
// We can easily detect S in UTF8: if a byte has
|
||||
// prefix 11110 (0xf0), then that byte and the
|
||||
// following 3 bytes encode a single unicode codepoint
|
||||
// in S. Similary,we can detect E: if a byte has
|
||||
// prefix 1110111 (0xee), then that byte and the
|
||||
// following 2 bytes encode a single unicode codepoint
|
||||
// in E.
|
||||
|
||||
// Note that this is really a recursive process --
|
||||
// maybe the char at pos 2 needs to dance, but any
|
||||
// point in its dance, suddenly pos 4 needs to dance
|
||||
// so you must finish pos 4 before returning to pos
|
||||
// 2. But then during pos 4's dance maybe pos 7 needs
|
||||
// to dance, etc. However, despite being recursive,
|
||||
// we don't need to hold any state because the state
|
||||
// can always be derived by looking at prior term &
|
||||
// current term.
|
||||
|
||||
// TODO: can we avoid this copy?
|
||||
if (termEnum.term() == null || termEnum.term().field() != fieldInfo.name) {
|
||||
scratchTerm.length = 0;
|
||||
} else {
|
||||
scratchTerm.copy(termEnum.term().bytes());
|
||||
}
|
||||
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" dance");
|
||||
System.out.println(" prev=" + UnicodeUtil.toHexString(prevTerm.utf8ToString()));
|
||||
System.out.println(" " + prevTerm.toString());
|
||||
System.out.println(" term=" + UnicodeUtil.toHexString(scratchTerm.utf8ToString()));
|
||||
System.out.println(" " + scratchTerm.toString());
|
||||
}
|
||||
|
||||
// This code assumes TermInfosReader/SegmentTermEnum
|
||||
// always use BytesRef.offset == 0
|
||||
assert prevTerm.offset == 0;
|
||||
assert scratchTerm.offset == 0;
|
||||
|
||||
// Need to loop here because we may need to do multiple
|
||||
// pops, and possibly a continue in the end, ie:
|
||||
//
|
||||
// cont
|
||||
// pop, cont
|
||||
// pop, pop, cont
|
||||
// <nothing>
|
||||
//
|
||||
|
||||
while(true) {
|
||||
if (doContinue()) {
|
||||
break;
|
||||
} else {
|
||||
if (!doPop()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" finish bmp ends");
|
||||
}
|
||||
|
||||
doPushes();
|
||||
}
|
||||
|
||||
|
||||
// Look for seek type 1 ("push"): if the newly added
|
||||
// suffix contains any S, we must try to seek to the
|
||||
// corresponding E. If we find a match, we go there;
|
||||
// else we keep looking for additional S's in the new
|
||||
// suffix. This "starts" the dance, at this character
|
||||
// position:
|
||||
private void doPushes() throws IOException {
|
||||
|
||||
int upTo = newSuffixStart;
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" try push newSuffixStart=" + newSuffixStart + " scratchLen=" + scratchTerm.length);
|
||||
}
|
||||
|
||||
while(upTo < scratchTerm.length) {
|
||||
if (isNonBMPChar(scratchTerm.bytes, upTo) &&
|
||||
(upTo > newSuffixStart ||
|
||||
(upTo >= prevTerm.length ||
|
||||
(!isNonBMPChar(prevTerm.bytes, upTo) &&
|
||||
!isHighBMPChar(prevTerm.bytes, upTo))))) {
|
||||
|
||||
// A non-BMP char (4 bytes UTF8) starts here:
|
||||
assert scratchTerm.length >= upTo + 4;
|
||||
|
||||
final int savLength = scratchTerm.length;
|
||||
scratch[0] = scratchTerm.bytes[upTo];
|
||||
scratch[1] = scratchTerm.bytes[upTo+1];
|
||||
scratch[2] = scratchTerm.bytes[upTo+2];
|
||||
|
||||
scratchTerm.bytes[upTo] = UTF8_HIGH_BMP_LEAD;
|
||||
scratchTerm.bytes[upTo+1] = (byte) 0x80;
|
||||
scratchTerm.bytes[upTo+2] = (byte) 0x80;
|
||||
scratchTerm.length = upTo+3;
|
||||
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" try seek 1 pos=" + upTo + " term=" + UnicodeUtil.toHexString(scratchTerm.utf8ToString()) + " " + scratchTerm.toString() + " len=" + scratchTerm.length);
|
||||
}
|
||||
|
||||
// Seek "forward":
|
||||
// TODO: more efficient seek?
|
||||
getTermsDict().seekEnum(seekTermEnum, protoTerm.createTerm(scratchTerm));
|
||||
|
||||
scratchTerm.bytes[upTo] = scratch[0];
|
||||
scratchTerm.bytes[upTo+1] = scratch[1];
|
||||
scratchTerm.bytes[upTo+2] = scratch[2];
|
||||
scratchTerm.length = savLength;
|
||||
|
||||
// Did we find a match?
|
||||
final Term t2 = seekTermEnum.term();
|
||||
|
||||
if (DEBUG_SURROGATES) {
|
||||
if (t2 == null) {
|
||||
System.out.println(" hit term=null");
|
||||
} else {
|
||||
System.out.println(" hit term=" + UnicodeUtil.toHexString(t2.text()) + " " + (t2==null? null:t2.bytes()));
|
||||
}
|
||||
}
|
||||
|
||||
// Since this was a seek "forward", we could hit
|
||||
// EOF or a different field:
|
||||
boolean matches;
|
||||
|
||||
if (t2 != null && t2.field() == fieldInfo.name) {
|
||||
final BytesRef b2 = t2.bytes();
|
||||
assert b2.offset == 0;
|
||||
if (b2.length >= upTo+3 && isHighBMPChar(b2.bytes, upTo)) {
|
||||
matches = true;
|
||||
for(int i=0;i<upTo;i++) {
|
||||
if (scratchTerm.bytes[i] != b2.bytes[i]) {
|
||||
matches = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
matches = false;
|
||||
}
|
||||
} else {
|
||||
matches = false;
|
||||
}
|
||||
|
||||
if (matches) {
|
||||
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" matches!");
|
||||
}
|
||||
|
||||
// OK seek "back"
|
||||
// TODO: more efficient seek?
|
||||
getTermsDict().seekEnum(termEnum, seekTermEnum.term());
|
||||
|
||||
scratchTerm.copy(seekTermEnum.term().bytes());
|
||||
|
||||
// +3 because we don't need to check the char
|
||||
// at upTo: we know it's > BMP
|
||||
upTo += 3;
|
||||
|
||||
// NOTE: we keep iterating, now, since this
|
||||
// can easily "recurse". Ie, after seeking
|
||||
// forward at a certain char position, we may
|
||||
// find another surrogate in our [new] suffix
|
||||
// and must then do another seek (recurse)
|
||||
} else {
|
||||
upTo++;
|
||||
}
|
||||
} else {
|
||||
upTo++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private boolean unicodeSortOrder;
|
||||
|
||||
void reset(FieldInfo fieldInfo) throws IOException {
|
||||
//System.out.println("pff.reset te=" + termEnum);
|
||||
this.fieldInfo = fieldInfo;
|
||||
protoTerm = new Term(fieldInfo.name);
|
||||
if (termEnum == null) {
|
||||
termEnum = getTermsDict().terms(protoTerm);
|
||||
seekTermEnum = getTermsDict().terms(protoTerm);
|
||||
//System.out.println(" term=" + termEnum.term());
|
||||
} else {
|
||||
getTermsDict().seekEnum(termEnum, protoTerm);
|
||||
}
|
||||
skipNext = true;
|
||||
|
||||
surrogateSeekUpto = 0;
|
||||
unicodeSortOrder = sortTermsByUnicode();
|
||||
|
||||
final Term t = termEnum.term();
|
||||
if (t != null && t.field() == fieldInfo.name) {
|
||||
newSuffixStart = 0;
|
||||
|
||||
surrogatesDance();
|
||||
prevTerm.length = 0;
|
||||
surrogateDance();
|
||||
}
|
||||
|
||||
private void surrogatesDance() throws IOException {
|
||||
|
||||
// Tricky: prior to 4.0, Lucene index sorted terms in
|
||||
// UTF16 order, but as of 4.0 we sort by Unicode code
|
||||
// point order. These orders differ because of the
|
||||
// surrrogates; so we have to fixup our enum, here, by
|
||||
// carefully first seeking past the surrogates and
|
||||
// then back again at the end. The process is
|
||||
// recursive, since any given term could have multiple
|
||||
// new occurrences of surrogate pairs, so we use a
|
||||
// stack to record the pending seek-backs.
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" dance start term=" + (termEnum.term() == null ? null : UnicodeUtil.toHexString(termEnum.term().text())));
|
||||
}
|
||||
|
||||
while(popPendingSeek());
|
||||
while(pushNewSurrogate());
|
||||
}
|
||||
|
||||
// only for debugging
|
||||
private String getStack() {
|
||||
if (surrogateSeekUpto == 0) {
|
||||
return "null";
|
||||
} else {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
for(int i=0;i<surrogateSeekUpto;i++) {
|
||||
if (i > 0) {
|
||||
sb.append(' ');
|
||||
}
|
||||
sb.append(surrogateSeekPending[i]);
|
||||
}
|
||||
sb.append(" pendingSeekText=" + new String(pendingPrefix, 0, surrogateSeekPending[surrogateSeekUpto-1]));
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
||||
private boolean popPendingSeek() throws IOException {
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" check pop newSuffix=" + newSuffixStart + " stack=" + getStack());
|
||||
}
|
||||
// if a .next() has advanced beyond the
|
||||
// after-surrogates range we had last seeked to, we
|
||||
// must seek back to the start and resume .next from
|
||||
// there. this pops the pending seek off the stack.
|
||||
final Term t = termEnum.term();
|
||||
if (surrogateSeekUpto > 0) {
|
||||
final int seekPrefix = surrogateSeekPending[surrogateSeekUpto-1];
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" seekPrefix=" + seekPrefix);
|
||||
}
|
||||
if (newSuffixStart < seekPrefix) {
|
||||
assert pendingPrefix != null;
|
||||
assert pendingPrefix.length > seekPrefix;
|
||||
pendingPrefix[seekPrefix] = UnicodeUtil.UNI_SUR_HIGH_START;
|
||||
pendingPrefix[1+seekPrefix] = UnicodeUtil.UNI_SUR_LOW_START;
|
||||
Term t2 = protoTerm.createTerm(new BytesRef(pendingPrefix, 0, 2+seekPrefix));
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" do pop; seek back to " + UnicodeUtil.toHexString(t2.text()));
|
||||
}
|
||||
getTermsDict().seekEnum(termEnum, t2);
|
||||
surrogateDidSeekBack[surrogateSeekUpto-1] = true;
|
||||
|
||||
// +2 because we don't want to re-check the
|
||||
// surrogates we just seek'd back to
|
||||
newSuffixStart = seekPrefix + 2;
|
||||
return true;
|
||||
} else if (newSuffixStart == seekPrefix && surrogateDidSeekBack[surrogateSeekUpto-1] && t != null && t.field() == fieldInfo.name && t.text().charAt(seekPrefix) > UnicodeUtil.UNI_SUR_LOW_END) {
|
||||
assert pendingPrefix != null;
|
||||
assert pendingPrefix.length > seekPrefix;
|
||||
pendingPrefix[seekPrefix] = 0xffff;
|
||||
Term t2 = protoTerm.createTerm(new BytesRef(pendingPrefix, 0, 1+seekPrefix));
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" finish pop; seek fwd to " + UnicodeUtil.toHexString(t2.text()));
|
||||
}
|
||||
getTermsDict().seekEnum(termEnum, t2);
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" found term=" + (termEnum.term() == null ? null : UnicodeUtil.toHexString(termEnum.term().text())));
|
||||
}
|
||||
surrogateSeekUpto--;
|
||||
|
||||
if (termEnum.term() == null || termEnum.term().field() != fieldInfo.name) {
|
||||
// force pop
|
||||
newSuffixStart = -1;
|
||||
} else {
|
||||
newSuffixStart = termEnum.newSuffixStart;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private UnicodeUtil.UTF16Result termBuffer = new UnicodeUtil.UTF16Result();
|
||||
private UnicodeUtil.UTF16Result seekBuffer = new UnicodeUtil.UTF16Result();
|
||||
|
||||
private boolean pushNewSurrogate() throws IOException {
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" check push newSuffix=" + newSuffixStart + " stack=" + getStack());
|
||||
}
|
||||
final Term t = termEnum.term();
|
||||
if (t == null || t.field() != fieldInfo.name) {
|
||||
return false;
|
||||
}
|
||||
|
||||
final BytesRef bytes = t.bytes();
|
||||
UnicodeUtil.UTF8toUTF16(bytes.bytes, bytes.offset, bytes.length, termBuffer);
|
||||
|
||||
for(int i=Math.max(0,newSuffixStart);i<termBuffer.length;i++) {
|
||||
final char ch = termBuffer.result[i];
|
||||
if (ch >= UnicodeUtil.UNI_SUR_HIGH_START && ch <= UnicodeUtil.UNI_SUR_HIGH_END && (surrogateSeekUpto == 0 || i > surrogateSeekPending[surrogateSeekUpto-1])) {
|
||||
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" found high surr 0x" + Integer.toHexString(ch) + " at pos=" + i);
|
||||
}
|
||||
|
||||
// the next() that we just did read in a new
|
||||
// suffix, containing a surrogate pair
|
||||
|
||||
// seek forward to see if there are any terms with
|
||||
// this same prefix, but with characters after the
|
||||
// surrogate range; if so, we must first iterate
|
||||
// them, then seek back to the surrogates
|
||||
|
||||
char[] testPrefix = new char[i+2];
|
||||
for(int j=0;j<i;j++) {
|
||||
testPrefix[j] = termBuffer.result[j];
|
||||
}
|
||||
testPrefix[i] = 1+UnicodeUtil.UNI_SUR_LOW_END;
|
||||
|
||||
getTermsDict().seekEnum(seekTermEnum, protoTerm.createTerm(new BytesRef(testPrefix, 0, i+1)));
|
||||
|
||||
Term t2 = seekTermEnum.term();
|
||||
boolean isPrefix;
|
||||
if (t2 != null && t2.field() == fieldInfo.name) {
|
||||
|
||||
final BytesRef seekBytes = t2.bytes();
|
||||
UnicodeUtil.UTF8toUTF16(seekBytes.bytes, seekBytes.offset, seekBytes.length, seekBuffer);
|
||||
|
||||
isPrefix = true;
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" seek found " + UnicodeUtil.toHexString(t2.text()));
|
||||
}
|
||||
for(int j=0;j<i;j++) {
|
||||
if (testPrefix[j] != seekBuffer.result[j]) {
|
||||
isPrefix = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (DEBUG_SURROGATES && !isPrefix) {
|
||||
System.out.println(" no end terms");
|
||||
}
|
||||
} else {
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" no end terms");
|
||||
}
|
||||
isPrefix = false;
|
||||
}
|
||||
|
||||
if (isPrefix) {
|
||||
// we found a term, sharing the same prefix,
|
||||
// with characters after the surrogates, so we
|
||||
// must first enum those, and then return the
|
||||
// the surrogates afterwards. push that pending
|
||||
// seek on the surrogates stack now:
|
||||
pendingPrefix = testPrefix;
|
||||
|
||||
getTermsDict().seekEnum(termEnum, t2);
|
||||
|
||||
if (surrogateSeekUpto == surrogateSeekPending.length) {
|
||||
surrogateSeekPending = ArrayUtil.grow(surrogateSeekPending);
|
||||
}
|
||||
if (surrogateSeekUpto == surrogateDidSeekBack.length) {
|
||||
surrogateDidSeekBack = ArrayUtil.grow(surrogateDidSeekBack);
|
||||
}
|
||||
surrogateSeekPending[surrogateSeekUpto] = i;
|
||||
surrogateDidSeekBack[surrogateSeekUpto] = false;
|
||||
surrogateSeekUpto++;
|
||||
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" do push " + i + "; end term=" + UnicodeUtil.toHexString(t2.text()));
|
||||
}
|
||||
|
||||
newSuffixStart = i+1;
|
||||
|
||||
return true;
|
||||
} else {
|
||||
// there are no terms after the surrogates, so
|
||||
// we do nothing to the enum and just step
|
||||
// through the surrogates like normal. but we
|
||||
// must keep iterating through the term, in case
|
||||
// another surrogate pair appears later
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator<BytesRef> getComparator() {
|
||||
// Pre-flex indexes always sorted in UTF16 order, but
|
||||
// we remap on-the-fly to unicode order
|
||||
if (unicodeSortOrder) {
|
||||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||
} else {
|
||||
return BytesRef.getUTF8SortedAsUTF16Comparator();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -484,7 +740,7 @@ public class PreFlexFields extends FieldsProducer {
|
|||
@Override
|
||||
public SeekStatus seek(BytesRef term, boolean useCache) throws IOException {
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println("TE.seek() term=" + term.utf8ToString());
|
||||
System.out.println("TE.seek target=" + UnicodeUtil.toHexString(term.utf8ToString()));
|
||||
}
|
||||
skipNext = false;
|
||||
final TermInfosReader tis = getTermsDict();
|
||||
|
@ -492,50 +748,142 @@ public class PreFlexFields extends FieldsProducer {
|
|||
|
||||
assert termEnum != null;
|
||||
|
||||
if (termEnum == null) {
|
||||
termEnum = tis.terms(t0);
|
||||
} else {
|
||||
tis.seekEnum(termEnum, t0);
|
||||
}
|
||||
|
||||
surrogateSeekUpto = 0;
|
||||
surrogatesDance();
|
||||
|
||||
final Term t = termEnum.term();
|
||||
|
||||
final BytesRef tr = t == null ? null : t.bytes();
|
||||
|
||||
if (t != null && t.field() == fieldInfo.name && term.bytesEquals(tr)) {
|
||||
current = tr;
|
||||
if (t != null && t.field() == fieldInfo.name && term.bytesEquals(t.bytes())) {
|
||||
// If we found an exact match, no need to do the
|
||||
// surrogate dance
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" seek exact match");
|
||||
}
|
||||
current = t.bytes();
|
||||
return SeekStatus.FOUND;
|
||||
} else if (t == null || t.field() != fieldInfo.name) {
|
||||
|
||||
// TODO: maybe we can handle this like the next()
|
||||
// into null? set term as prevTerm then dance?
|
||||
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" seek hit EOF");
|
||||
}
|
||||
|
||||
// We hit EOF; try end-case surrogate dance: if we
|
||||
// find an E, try swapping in S, backwards:
|
||||
scratchTerm.copy(term);
|
||||
|
||||
assert scratchTerm.offset == 0;
|
||||
|
||||
for(int i=scratchTerm.length-1;i>=0;i--) {
|
||||
if (isHighBMPChar(scratchTerm.bytes, i)) {
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" found E pos=" + i + "; try seek");
|
||||
}
|
||||
|
||||
if (seekToNonBMP(seekTermEnum, scratchTerm, i)) {
|
||||
|
||||
scratchTerm.copy(seekTermEnum.term().bytes());
|
||||
getTermsDict().seekEnum(termEnum, seekTermEnum.term());
|
||||
|
||||
newSuffixStart = 1+i;
|
||||
|
||||
doPushes();
|
||||
|
||||
// Found a match
|
||||
// TODO: faster seek?
|
||||
current = termEnum.term().bytes();
|
||||
return SeekStatus.NOT_FOUND;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" seek END");
|
||||
}
|
||||
|
||||
current = null;
|
||||
return SeekStatus.END;
|
||||
} else {
|
||||
current = tr;
|
||||
|
||||
// We found a non-exact but non-null term; this one
|
||||
// is fun -- just treat it like next, by pretending
|
||||
// requested term was prev:
|
||||
prevTerm.copy(term);
|
||||
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" seek hit non-exact term=" + UnicodeUtil.toHexString(t.text()));
|
||||
}
|
||||
|
||||
final BytesRef br = t.bytes();
|
||||
assert br.offset == 0;
|
||||
|
||||
setNewSuffixStart(term, br);
|
||||
|
||||
surrogateDance();
|
||||
|
||||
final Term t2 = termEnum.term();
|
||||
if (t2 == null || t2.field() != fieldInfo.name) {
|
||||
assert t2 == null || !t2.field().equals(fieldInfo.name); // make sure fields are in fact interned
|
||||
current = null;
|
||||
return SeekStatus.END;
|
||||
} else {
|
||||
current = t2.bytes();
|
||||
assert !unicodeSortOrder || term.compareTo(current) < 0 : "term=" + UnicodeUtil.toHexString(term.utf8ToString()) + " vs current=" + UnicodeUtil.toHexString(current.utf8ToString());
|
||||
return SeekStatus.NOT_FOUND;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void setNewSuffixStart(BytesRef br1, BytesRef br2) {
|
||||
final int limit = Math.min(br1.length, br2.length);
|
||||
int lastStart = 0;
|
||||
for(int i=0;i<limit;i++) {
|
||||
if ((br1.bytes[br1.offset+i] & 0xc0) == 0xc0 || (br1.bytes[br1.offset+i] & 0x80) == 0) {
|
||||
lastStart = i;
|
||||
}
|
||||
if (br1.bytes[br1.offset+i] != br2.bytes[br2.offset+i]) {
|
||||
newSuffixStart = lastStart;
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" set newSuffixStart=" + newSuffixStart);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
newSuffixStart = limit;
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" set newSuffixStart=" + newSuffixStart);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef next() throws IOException {
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println("TE.next() skipNext=" + skipNext);
|
||||
System.out.println("TE.next()");
|
||||
}
|
||||
if (skipNext) {
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" skipNext=true");
|
||||
}
|
||||
skipNext = false;
|
||||
if (termEnum.term() == null) {
|
||||
return null;
|
||||
} else if (termEnum.term().field() != fieldInfo.name) {
|
||||
return null;
|
||||
} else {
|
||||
return current = termEnum.term().bytes();
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: can we use STE's prevBuffer here?
|
||||
prevTerm.copy(termEnum.term().bytes());
|
||||
|
||||
if (termEnum.next() && termEnum.term().field() == fieldInfo.name) {
|
||||
newSuffixStart = termEnum.newSuffixStart;
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" set newSuffixStart=" + newSuffixStart);
|
||||
System.out.println(" newSuffixStart=" + newSuffixStart);
|
||||
}
|
||||
surrogatesDance();
|
||||
surrogateDance();
|
||||
final Term t = termEnum.term();
|
||||
if (t == null || t.field() != fieldInfo.name) {
|
||||
assert t == null || !t.field().equals(fieldInfo.name); // make sure fields are in fact interned
|
||||
|
@ -545,12 +893,15 @@ public class PreFlexFields extends FieldsProducer {
|
|||
}
|
||||
return current;
|
||||
} else {
|
||||
// This field is exhausted, but we have to give
|
||||
// surrogateDance a chance to seek back:
|
||||
if (DEBUG_SURROGATES) {
|
||||
System.out.println(" force pop");
|
||||
System.out.println(" force cont");
|
||||
}
|
||||
// force pop
|
||||
newSuffixStart = -1;
|
||||
surrogatesDance();
|
||||
//newSuffixStart = prevTerm.length;
|
||||
newSuffixStart = 0;
|
||||
surrogateDance();
|
||||
|
||||
final Term t = termEnum.term();
|
||||
if (t == null || t.field() != fieldInfo.name) {
|
||||
assert t == null || !t.field().equals(fieldInfo.name); // make sure fields are in fact interned
|
||||
|
@ -574,21 +925,33 @@ public class PreFlexFields extends FieldsProducer {
|
|||
|
||||
@Override
|
||||
public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException {
|
||||
if (reuse != null) {
|
||||
return ((PreDocsEnum) reuse).reset(termEnum, skipDocs);
|
||||
PreDocsEnum docsEnum;
|
||||
if (reuse == null || !(reuse instanceof PreDocsEnum)) {
|
||||
docsEnum = new PreDocsEnum();
|
||||
} else {
|
||||
return (new PreDocsEnum()).reset(termEnum, skipDocs);
|
||||
docsEnum = (PreDocsEnum) reuse;
|
||||
if (docsEnum.getFreqStream() != freqStream) {
|
||||
docsEnum = new PreDocsEnum();
|
||||
}
|
||||
}
|
||||
return docsEnum.reset(termEnum, skipDocs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
|
||||
if (reuse != null) {
|
||||
return ((PreDocsAndPositionsEnum) reuse).reset(termEnum, skipDocs);
|
||||
PreDocsAndPositionsEnum docsPosEnum;
|
||||
if (fieldInfo.omitTermFreqAndPositions) {
|
||||
return null;
|
||||
} else if (reuse == null || !(reuse instanceof PreDocsAndPositionsEnum)) {
|
||||
docsPosEnum = new PreDocsAndPositionsEnum();
|
||||
} else {
|
||||
return (new PreDocsAndPositionsEnum()).reset(termEnum, skipDocs);
|
||||
docsPosEnum = (PreDocsAndPositionsEnum) reuse;
|
||||
if (docsPosEnum.getFreqStream() != freqStream) {
|
||||
docsPosEnum = new PreDocsAndPositionsEnum();
|
||||
}
|
||||
}
|
||||
return docsPosEnum.reset(termEnum, skipDocs);
|
||||
}
|
||||
}
|
||||
|
||||
private final class PreDocsEnum extends DocsEnum {
|
||||
|
@ -598,6 +961,10 @@ public class PreFlexFields extends FieldsProducer {
|
|||
docs = new SegmentTermDocs(freqStream, getTermsDict(), fieldInfos);
|
||||
}
|
||||
|
||||
IndexInput getFreqStream() {
|
||||
return freqStream;
|
||||
}
|
||||
|
||||
public PreDocsEnum reset(SegmentTermEnum termEnum, Bits skipDocs) throws IOException {
|
||||
docs.setSkipDocs(skipDocs);
|
||||
docs.seek(termEnum);
|
||||
|
@ -650,6 +1017,10 @@ public class PreFlexFields extends FieldsProducer {
|
|||
pos = new SegmentTermPositions(freqStream, proxStream, getTermsDict(), fieldInfos);
|
||||
}
|
||||
|
||||
IndexInput getFreqStream() {
|
||||
return freqStream;
|
||||
}
|
||||
|
||||
public DocsAndPositionsEnum reset(SegmentTermEnum termEnum, Bits skipDocs) throws IOException {
|
||||
pos.setSkipDocs(skipDocs);
|
||||
pos.seek(termEnum);
|
||||
|
|
|
@ -91,19 +91,9 @@ public final class SegmentTermEnum implements Cloneable {
|
|||
|
||||
size = input.readLong(); // read the size
|
||||
|
||||
if(format == -1){
|
||||
if (!isIndex) {
|
||||
indexInterval = input.readInt();
|
||||
formatM1SkipInterval = input.readInt();
|
||||
}
|
||||
// switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in
|
||||
// skipTo implementation of these versions
|
||||
skipInterval = Integer.MAX_VALUE;
|
||||
} else {
|
||||
indexInterval = input.readInt();
|
||||
skipInterval = input.readInt();
|
||||
maxSkipLevels = input.readInt();
|
||||
}
|
||||
assert indexInterval > 0: "indexInterval=" + indexInterval + " is negative; must be > 0";
|
||||
assert skipInterval > 0: "skipInterval=" + skipInterval + " is negative; must be > 0";
|
||||
}
|
||||
|
@ -132,18 +122,21 @@ public final class SegmentTermEnum implements Cloneable {
|
|||
position = p;
|
||||
termBuffer.set(t);
|
||||
prevBuffer.reset();
|
||||
//System.out.println(" ste doSeek prev=" + prevBuffer.toTerm() + " this=" + this);
|
||||
termInfo.set(ti);
|
||||
}
|
||||
|
||||
/** Increments the enumeration to the next element. True if one exists.*/
|
||||
public final boolean next() throws IOException {
|
||||
if (position++ >= size - 1) {
|
||||
prevBuffer.set(termBuffer);
|
||||
//System.out.println(" ste setPrev=" + prev() + " this=" + this);
|
||||
|
||||
if (position++ >= size - 1) {
|
||||
termBuffer.reset();
|
||||
//System.out.println(" EOF");
|
||||
return false;
|
||||
}
|
||||
|
||||
prevBuffer.set(termBuffer);
|
||||
termBuffer.read(input, fieldInfos);
|
||||
newSuffixStart = termBuffer.newSuffixStart;
|
||||
|
||||
|
@ -168,6 +161,7 @@ public final class SegmentTermEnum implements Cloneable {
|
|||
if (isIndex)
|
||||
indexPointer += input.readVLong(); // read index pointer
|
||||
|
||||
//System.out.println(" ste ret term=" + term());
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -18,9 +18,10 @@ package org.apache.lucene.index.codecs.preflex;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
|
||||
|
@ -28,102 +29,65 @@ final class TermBuffer implements Cloneable {
|
|||
|
||||
private String field;
|
||||
private Term term; // cached
|
||||
private boolean dirty; // true if text was set externally (ie not read via UTF8 bytes)
|
||||
|
||||
private UnicodeUtil.UTF16Result text = new UnicodeUtil.UTF16Result();
|
||||
private BytesRef bytes = new BytesRef(10);
|
||||
|
||||
int newSuffixStart;
|
||||
private static final Comparator<BytesRef> utf8AsUTF16Comparator = BytesRef.getUTF8SortedAsUTF16Comparator();
|
||||
|
||||
public final int compareTo(TermBuffer other) {
|
||||
int newSuffixStart; // only valid right after .read is called
|
||||
|
||||
public int compareTo(TermBuffer other) {
|
||||
if (field == other.field) // fields are interned
|
||||
return compareChars(text.result, text.length, other.text.result, other.text.length);
|
||||
return utf8AsUTF16Comparator.compare(bytes, other.bytes);
|
||||
else
|
||||
return field.compareTo(other.field);
|
||||
}
|
||||
|
||||
private static int compareChars(char[] chars1, int len1,
|
||||
char[] chars2, int len2) {
|
||||
final int end = len1 < len2 ? len1:len2;
|
||||
for (int k = 0; k < end; k++) {
|
||||
char c1 = chars1[k];
|
||||
char c2 = chars2[k];
|
||||
if (c1 != c2) {
|
||||
return c1 - c2;
|
||||
}
|
||||
}
|
||||
return len1 - len2;
|
||||
}
|
||||
|
||||
public final void read(IndexInput input, FieldInfos fieldInfos)
|
||||
public void read(IndexInput input, FieldInfos fieldInfos)
|
||||
throws IOException {
|
||||
this.term = null; // invalidate cache
|
||||
int start = input.readVInt();
|
||||
newSuffixStart = input.readVInt();
|
||||
int length = input.readVInt();
|
||||
int totalLength = start + length;
|
||||
int totalLength = newSuffixStart + length;
|
||||
if (bytes.bytes.length < totalLength) {
|
||||
bytes.grow(totalLength);
|
||||
}
|
||||
if (dirty) {
|
||||
// Fully convert all bytes since bytes is dirty
|
||||
UnicodeUtil.UTF16toUTF8(text.result, 0, text.length, bytes);
|
||||
bytes.length = totalLength;
|
||||
input.readBytes(bytes.bytes, start, length);
|
||||
UnicodeUtil.UTF8toUTF16(bytes.bytes, 0, totalLength, text);
|
||||
dirty = false;
|
||||
} else {
|
||||
// Incrementally convert only the UTF8 bytes that are new:
|
||||
bytes.length = totalLength;
|
||||
input.readBytes(bytes.bytes, start, length);
|
||||
UnicodeUtil.UTF8toUTF16(bytes.bytes, start, length, text);
|
||||
}
|
||||
|
||||
while(true) {
|
||||
newSuffixStart = text.offsets[start];
|
||||
if (newSuffixStart != -1) {
|
||||
break;
|
||||
}
|
||||
if (--start == 0) {
|
||||
newSuffixStart = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
input.readBytes(bytes.bytes, newSuffixStart, length);
|
||||
this.field = fieldInfos.fieldName(input.readVInt());
|
||||
}
|
||||
|
||||
public final void set(Term term) {
|
||||
public void set(Term term) {
|
||||
if (term == null) {
|
||||
reset();
|
||||
return;
|
||||
}
|
||||
|
||||
final BytesRef termBytes = term.bytes();
|
||||
UnicodeUtil.UTF8toUTF16(termBytes.bytes, termBytes.offset, termBytes.length, text);
|
||||
dirty = true;
|
||||
bytes.copy(term.bytes());
|
||||
field = term.field();
|
||||
this.term = term;
|
||||
}
|
||||
|
||||
public final void set(TermBuffer other) {
|
||||
text.copyText(other.text);
|
||||
dirty = true;
|
||||
public void set(TermBuffer other) {
|
||||
field = other.field;
|
||||
term = other.term;
|
||||
// dangerous to copy Term over, since the underlying
|
||||
// BytesRef could subsequently be modified:
|
||||
term = null;
|
||||
bytes.copy(other.bytes);
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
field = null;
|
||||
text.setLength(0);
|
||||
term = null;
|
||||
dirty = true;
|
||||
}
|
||||
|
||||
public Term toTerm() {
|
||||
if (field == null) // unset
|
||||
return null;
|
||||
|
||||
if (term == null)
|
||||
term = new Term(field, new BytesRef(text.result, 0, text.length), false);
|
||||
if (term == null) {
|
||||
term = new Term(field, new BytesRef(bytes), false);
|
||||
//term = new Term(field, bytes, false);
|
||||
}
|
||||
|
||||
return term;
|
||||
}
|
||||
|
@ -134,12 +98,7 @@ final class TermBuffer implements Cloneable {
|
|||
try {
|
||||
clone = (TermBuffer)super.clone();
|
||||
} catch (CloneNotSupportedException e) {}
|
||||
clone.dirty = true;
|
||||
clone.bytes = new BytesRef(10);
|
||||
clone.text = new UnicodeUtil.UTF16Result();
|
||||
clone.text.offsets = new int[text.offsets.length];
|
||||
System.arraycopy(text.offsets, 0, clone.text.offsets, 0, text.offsets.length);
|
||||
clone.text.copyText(text);
|
||||
clone.bytes = new BytesRef(bytes);
|
||||
return clone;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,30 +23,30 @@ package org.apache.lucene.index.codecs.preflex;
|
|||
* indexing. */
|
||||
|
||||
@Deprecated
|
||||
class TermInfo {
|
||||
public class TermInfo {
|
||||
/** The number of documents which contain the term. */
|
||||
int docFreq = 0;
|
||||
public int docFreq = 0;
|
||||
|
||||
long freqPointer = 0;
|
||||
long proxPointer = 0;
|
||||
int skipOffset;
|
||||
public long freqPointer = 0;
|
||||
public long proxPointer = 0;
|
||||
public int skipOffset;
|
||||
|
||||
TermInfo() {}
|
||||
public TermInfo() {}
|
||||
|
||||
TermInfo(int df, long fp, long pp) {
|
||||
public TermInfo(int df, long fp, long pp) {
|
||||
docFreq = df;
|
||||
freqPointer = fp;
|
||||
proxPointer = pp;
|
||||
}
|
||||
|
||||
TermInfo(TermInfo ti) {
|
||||
public TermInfo(TermInfo ti) {
|
||||
docFreq = ti.docFreq;
|
||||
freqPointer = ti.freqPointer;
|
||||
proxPointer = ti.proxPointer;
|
||||
skipOffset = ti.skipOffset;
|
||||
}
|
||||
|
||||
final void set(int docFreq,
|
||||
public final void set(int docFreq,
|
||||
long freqPointer, long proxPointer, int skipOffset) {
|
||||
this.docFreq = docFreq;
|
||||
this.freqPointer = freqPointer;
|
||||
|
@ -54,7 +54,7 @@ class TermInfo {
|
|||
this.skipOffset = skipOffset;
|
||||
}
|
||||
|
||||
final void set(TermInfo ti) {
|
||||
public final void set(TermInfo ti) {
|
||||
docFreq = ti.docFreq;
|
||||
freqPointer = ti.freqPointer;
|
||||
proxPointer = ti.proxPointer;
|
||||
|
|
|
@ -120,8 +120,11 @@ public final class TermInfosReader {
|
|||
indexInfos = new TermInfo[indexSize];
|
||||
indexPointers = new long[indexSize];
|
||||
|
||||
for (int i = 0; indexEnum.next(); i++) {
|
||||
for (int i=0;indexEnum.next(); i++) {
|
||||
indexTerms[i] = indexEnum.term();
|
||||
assert indexTerms[i] != null;
|
||||
assert indexTerms[i].text() != null;
|
||||
assert indexTerms[i].field() != null;
|
||||
indexInfos[i] = indexEnum.termInfo();
|
||||
indexPointers[i] = indexEnum.indexPointer;
|
||||
|
||||
|
@ -160,14 +163,14 @@ public final class TermInfosReader {
|
|||
return origEnum.maxSkipLevels;
|
||||
}
|
||||
|
||||
final void close() throws IOException {
|
||||
void close() throws IOException {
|
||||
if (origEnum != null)
|
||||
origEnum.close();
|
||||
threadResources.close();
|
||||
}
|
||||
|
||||
/** Returns the number of term/value pairs in the set. */
|
||||
final long size() {
|
||||
long size() {
|
||||
return size;
|
||||
}
|
||||
|
||||
|
@ -183,12 +186,13 @@ public final class TermInfosReader {
|
|||
|
||||
|
||||
/** Returns the offset of the greatest index entry which is less than or equal to term.*/
|
||||
private final int getIndexOffset(Term term) {
|
||||
private int getIndexOffset(Term term) {
|
||||
int lo = 0; // binary search indexTerms[]
|
||||
int hi = indexTerms.length - 1;
|
||||
|
||||
while (hi >= lo) {
|
||||
int mid = (lo + hi) >>> 1;
|
||||
assert indexTerms[mid] != null : "indexTerms = " + indexTerms.length + " mid=" + mid;
|
||||
int delta = term.compareToUTF16(indexTerms[mid]);
|
||||
if (delta < 0)
|
||||
hi = mid - 1;
|
||||
|
@ -200,7 +204,7 @@ public final class TermInfosReader {
|
|||
return hi;
|
||||
}
|
||||
|
||||
private final void seekEnum(SegmentTermEnum enumerator, int indexOffset) throws IOException {
|
||||
private void seekEnum(SegmentTermEnum enumerator, int indexOffset) throws IOException {
|
||||
enumerator.seek(indexPointers[indexOffset],
|
||||
((long) indexOffset * totalIndexInterval) - 1,
|
||||
indexTerms[indexOffset], indexInfos[indexOffset]);
|
||||
|
@ -231,6 +235,9 @@ public final class TermInfosReader {
|
|||
}
|
||||
|
||||
TermInfo seekEnum(SegmentTermEnum enumerator, Term term, TermInfoAndOrd tiOrd) throws IOException {
|
||||
if (size == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// optimize sequential access: first try scanning cached enum w/o seeking
|
||||
if (enumerator.term() != null // term is at or past current
|
||||
|
@ -242,7 +249,6 @@ public final class TermInfosReader {
|
|||
// no need to seek
|
||||
|
||||
final TermInfo ti;
|
||||
|
||||
int numScans = enumerator.scanTo(term);
|
||||
if (enumerator.term() != null && term.compareToUTF16(enumerator.term()) == 0) {
|
||||
ti = enumerator.termInfo();
|
||||
|
@ -279,6 +285,7 @@ public final class TermInfosReader {
|
|||
seekEnum(enumerator, indexPos);
|
||||
enumerator.scanTo(term);
|
||||
final TermInfo ti;
|
||||
|
||||
if (enumerator.term() != null && term.compareToUTF16(enumerator.term()) == 0) {
|
||||
ti = enumerator.termInfo();
|
||||
if (tiOrd == null) {
|
||||
|
@ -294,7 +301,7 @@ public final class TermInfosReader {
|
|||
}
|
||||
|
||||
// called only from asserts
|
||||
private final boolean sameTermInfo(TermInfo ti1, TermInfo ti2, SegmentTermEnum enumerator) {
|
||||
private boolean sameTermInfo(TermInfo ti1, TermInfo ti2, SegmentTermEnum enumerator) {
|
||||
if (ti1.docFreq != ti2.docFreq) {
|
||||
return false;
|
||||
}
|
||||
|
@ -319,7 +326,7 @@ public final class TermInfosReader {
|
|||
}
|
||||
|
||||
/** Returns the position of a Term in the set or -1. */
|
||||
final long getPosition(Term term) throws IOException {
|
||||
long getPosition(Term term) throws IOException {
|
||||
if (size == 0) return -1;
|
||||
|
||||
ensureIndexIsRead();
|
||||
|
|
|
@ -331,12 +331,17 @@ public final class BytesRef implements Comparable<BytesRef>, Externalizable {
|
|||
// We know the terms are not equal, but, we may
|
||||
// have to carefully fixup the bytes at the
|
||||
// difference to match UTF16's sort order:
|
||||
|
||||
// NOTE: instead of moving supplementary code points (0xee and 0xef) to the unused 0xfe and 0xff,
|
||||
// we move them to the unused 0xfc and 0xfd [reserved for future 6-byte character sequences]
|
||||
// this reserves 0xff for preflex's term reordering (surrogate dance), and if unicode grows such
|
||||
// that 6-byte sequences are needed we have much bigger problems anyway.
|
||||
if (aByte >= 0xee && bByte >= 0xee) {
|
||||
if ((aByte & 0xfe) == 0xee) {
|
||||
aByte += 0x10;
|
||||
aByte += 0xe;
|
||||
}
|
||||
if ((bByte&0xfe) == 0xee) {
|
||||
bByte += 0x10;
|
||||
bByte += 0xe;
|
||||
}
|
||||
}
|
||||
return aByte - bByte;
|
||||
|
@ -346,10 +351,6 @@ public final class BytesRef implements Comparable<BytesRef>, Externalizable {
|
|||
// One is a prefix of the other, or, they are equal:
|
||||
return a.length - b.length;
|
||||
}
|
||||
|
||||
public boolean equals(Object other) {
|
||||
return this == other;
|
||||
}
|
||||
}
|
||||
|
||||
public void writeExternal(ObjectOutput out)
|
||||
|
|
|
@ -2,9 +2,7 @@ package org.apache.lucene.document;
|
|||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.store.MockRAMDirectory;
|
||||
|
||||
|
@ -58,8 +56,7 @@ public class TestBinaryDocument extends LuceneTestCase {
|
|||
|
||||
/** add the doc to a ram index */
|
||||
MockRAMDirectory dir = new MockRAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir);
|
||||
writer.addDocument(doc);
|
||||
|
||||
/** open a reader and fetch the document */
|
||||
|
@ -98,8 +95,7 @@ public class TestBinaryDocument extends LuceneTestCase {
|
|||
|
||||
/** add the doc to a ram index */
|
||||
MockRAMDirectory dir = new MockRAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir);
|
||||
writer.addDocument(doc);
|
||||
|
||||
/** open a reader and fetch the document */
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
package org.apache.lucene.document;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
|
@ -155,8 +153,7 @@ public class TestDocument extends LuceneTestCase {
|
|||
*/
|
||||
public void testGetValuesForIndexedDocument() throws Exception {
|
||||
RAMDirectory dir = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir);
|
||||
writer.addDocument(makeDocumentWithFields());
|
||||
IndexReader reader = writer.getReader();
|
||||
|
||||
|
@ -234,8 +231,7 @@ public class TestDocument extends LuceneTestCase {
|
|||
Field.Index.NOT_ANALYZED));
|
||||
|
||||
RAMDirectory dir = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir);
|
||||
writer.addDocument(doc);
|
||||
field.setValue("id2");
|
||||
writer.addDocument(doc);
|
||||
|
|
|
@ -17,20 +17,18 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Random;
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.codecs.Codec;
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
import org.apache.lucene.index.codecs.intblock.IntBlockCodec;
|
||||
import org.apache.lucene.index.codecs.preflex.PreFlexCodec;
|
||||
import org.apache.lucene.index.codecs.pulsing.PulsingCodec;
|
||||
import org.apache.lucene.index.codecs.sep.SepCodec;
|
||||
import org.apache.lucene.index.codecs.standard.StandardCodec;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCaseJ4;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
/** Silly class that randomizes the indexing experience. EG
|
||||
* it may swap in a different merge policy/scheduler; may
|
||||
|
@ -45,32 +43,48 @@ public class RandomIndexWriter implements Closeable {
|
|||
int docCount;
|
||||
int flushAt;
|
||||
|
||||
// Randomly calls Thread.yield so we mixup thread scheduling
|
||||
private static final class MockIndexWriter extends IndexWriter {
|
||||
|
||||
private final Random r;
|
||||
|
||||
public MockIndexWriter(Random r,Directory dir, IndexWriterConfig conf) throws IOException {
|
||||
super(dir, conf);
|
||||
this.r = r;
|
||||
}
|
||||
|
||||
@Override
|
||||
boolean testPoint(String name) {
|
||||
if (r.nextInt(4) == 2)
|
||||
Thread.yield();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/** create a RandomIndexWriter with a random config: Uses TEST_VERSION_CURRENT and MockAnalyzer */
|
||||
public RandomIndexWriter(Random r, Directory dir) throws IOException {
|
||||
this(r, dir, LuceneTestCaseJ4.newIndexWriterConfig(r, LuceneTestCaseJ4.TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
}
|
||||
|
||||
/** create a RandomIndexWriter with a random config: Uses TEST_VERSION_CURRENT */
|
||||
public RandomIndexWriter(Random r, Directory dir, Analyzer a) throws IOException {
|
||||
this(r, dir, LuceneTestCaseJ4.newIndexWriterConfig(r, LuceneTestCaseJ4.TEST_VERSION_CURRENT, a));
|
||||
}
|
||||
|
||||
/** create a RandomIndexWriter with a random config */
|
||||
public RandomIndexWriter(Random r, Directory dir, Version v, Analyzer a) throws IOException {
|
||||
this(r, dir, LuceneTestCaseJ4.newIndexWriterConfig(r, v, a));
|
||||
}
|
||||
|
||||
/** create a RandomIndexWriter with the provided config */
|
||||
public RandomIndexWriter(Random r, Directory dir, IndexWriterConfig c) throws IOException {
|
||||
this.r = r;
|
||||
if (r.nextBoolean()) {
|
||||
c.setMergePolicy(new LogDocMergePolicy());
|
||||
}
|
||||
if (r.nextBoolean()) {
|
||||
c.setMergeScheduler(new SerialMergeScheduler());
|
||||
}
|
||||
if (r.nextBoolean()) {
|
||||
c.setMaxBufferedDocs(_TestUtil.nextInt(r, 2, 1000));
|
||||
}
|
||||
if (r.nextBoolean()) {
|
||||
c.setTermIndexInterval(_TestUtil.nextInt(r, 1, 1000));
|
||||
}
|
||||
|
||||
if (c.getMergePolicy() instanceof LogMergePolicy) {
|
||||
LogMergePolicy logmp = (LogMergePolicy) c.getMergePolicy();
|
||||
logmp.setUseCompoundDocStore(r.nextBoolean());
|
||||
logmp.setUseCompoundFile(r.nextBoolean());
|
||||
logmp.setCalibrateSizeByDeletes(r.nextBoolean());
|
||||
}
|
||||
|
||||
c.setReaderPooling(r.nextBoolean());
|
||||
c.setCodecProvider(new RandomCodecProvider(r));
|
||||
w = new IndexWriter(dir, c);
|
||||
w = new MockIndexWriter(r, dir, c);
|
||||
flushAt = _TestUtil.nextInt(r, 10, 1000);
|
||||
if (LuceneTestCaseJ4.VERBOSE) {
|
||||
System.out.println("RIW config=" + w.getConfig());
|
||||
System.out.println("codec default=" + CodecProvider.getDefaultCodec());
|
||||
}
|
||||
}
|
||||
|
||||
public void addDocument(Document doc) throws IOException {
|
||||
|
@ -89,14 +103,27 @@ public class RandomIndexWriter implements Closeable {
|
|||
w.deleteDocuments(term);
|
||||
}
|
||||
|
||||
public void commit() throws CorruptIndexException, IOException {
|
||||
w.commit();
|
||||
}
|
||||
|
||||
public int maxDoc() {
|
||||
return w.maxDoc();
|
||||
}
|
||||
|
||||
public IndexReader getReader() throws IOException {
|
||||
if (r.nextBoolean()) {
|
||||
// If we are writing with PreFlexRW, force a full
|
||||
// IndexReader.open so terms are sorted in codepoint
|
||||
// order during searching:
|
||||
if (!w.codecs.getWriter(null).name.equals("PreFlex") && r.nextBoolean()) {
|
||||
if (LuceneTestCaseJ4.VERBOSE) {
|
||||
System.out.println("RIW.getReader: use NRT reader");
|
||||
}
|
||||
return w.getReader();
|
||||
} else {
|
||||
if (LuceneTestCaseJ4.VERBOSE) {
|
||||
System.out.println("RIW.getReader: open new reader");
|
||||
}
|
||||
w.commit();
|
||||
return IndexReader.open(w.getDirectory(), new KeepOnlyLastCommitDeletionPolicy(), r.nextBoolean(), _TestUtil.nextInt(r, 1, 10));
|
||||
}
|
||||
|
@ -112,22 +139,4 @@ public class RandomIndexWriter implements Closeable {
|
|||
public void optimize() throws IOException {
|
||||
w.optimize();
|
||||
}
|
||||
|
||||
class RandomCodecProvider extends CodecProvider {
|
||||
final String codec;
|
||||
|
||||
RandomCodecProvider(Random random) {
|
||||
register(new StandardCodec());
|
||||
register(new IntBlockCodec());
|
||||
register(new PreFlexCodec());
|
||||
register(new PulsingCodec());
|
||||
register(new SepCodec());
|
||||
codec = CodecProvider.CORE_CODECS[random.nextInt(CodecProvider.CORE_CODECS.length)];
|
||||
}
|
||||
|
||||
@Override
|
||||
public Codec getWriter(SegmentWriteState state) {
|
||||
return lookup(codec);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.index;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -139,7 +138,6 @@ public class TestAddIndexes extends LuceneTestCase {
|
|||
|
||||
setUpDirs(dir, aux);
|
||||
IndexWriter writer = newWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND));
|
||||
|
||||
writer.addIndexes(new Directory[] {aux});
|
||||
|
||||
// Adds 10 docs, then replaces them with another 10
|
||||
|
|
|
@ -493,7 +493,13 @@ public class TestCodecs extends MultiCodecTestCase {
|
|||
// Test random seek by ord:
|
||||
final int idx = TestCodecs.this.nextInt(field.terms.length);
|
||||
term = field.terms[idx];
|
||||
try {
|
||||
status = termsEnum.seek(idx);
|
||||
} catch (UnsupportedOperationException uoe) {
|
||||
// ok -- skip it
|
||||
status = null;
|
||||
}
|
||||
if (status != null) {
|
||||
assertEquals(status, TermsEnum.SeekStatus.FOUND);
|
||||
assertTrue(termsEnum.term().bytesEquals(new BytesRef(term.text2)));
|
||||
assertEquals(term.docs.length, termsEnum.docFreq());
|
||||
|
@ -502,6 +508,7 @@ public class TestCodecs extends MultiCodecTestCase {
|
|||
} else {
|
||||
this.verifyDocs(term.docs, term.positions, termsEnum.docsAndPositions(null, null), true);
|
||||
}
|
||||
}
|
||||
|
||||
// Test seek to non-existent terms:
|
||||
for(int i=0;i<100;i++) {
|
||||
|
@ -520,9 +527,12 @@ public class TestCodecs extends MultiCodecTestCase {
|
|||
|
||||
// Seek to each term by ord, backwards
|
||||
for(int i=field.terms.length-1;i>=0;i--) {
|
||||
try {
|
||||
assertEquals(Thread.currentThread().getName() + ": field=" + field.fieldInfo.name + " term=" + field.terms[i].text2, TermsEnum.SeekStatus.FOUND, termsEnum.seek(i));
|
||||
assertEquals(field.terms[i].docs.length, termsEnum.docFreq());
|
||||
assertTrue(termsEnum.term().bytesEquals(new BytesRef(field.terms[i].text2)));
|
||||
} catch (UnsupportedOperationException uoe) {
|
||||
}
|
||||
}
|
||||
|
||||
// Seek to non-existent empty-string term
|
||||
|
|
|
@ -20,6 +20,8 @@ package org.apache.lucene.index;
|
|||
import java.io.*;
|
||||
import java.util.*;
|
||||
import org.apache.lucene.store.*;
|
||||
import org.apache.lucene.index.codecs.*;
|
||||
import org.apache.lucene.index.codecs.standard.*;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.document.*;
|
||||
|
@ -64,7 +66,8 @@ public class TestFlex extends LuceneTestCase {
|
|||
|
||||
public void testTermOrd() throws Exception {
|
||||
Directory d = new MockRAMDirectory();
|
||||
IndexWriter w = new IndexWriter(d, new MockAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
|
||||
IndexWriter w = new IndexWriter(d, new IndexWriterConfig(TEST_VERSION_CURRENT,
|
||||
new MockAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec("Standard")));
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("f", "a b c", Field.Store.NO, Field.Index.ANALYZED));
|
||||
w.addDocument(doc);
|
||||
|
|
|
@ -1675,7 +1675,7 @@ public class TestIndexReader extends LuceneTestCase
|
|||
// LUCENE-1586: getUniqueTermCount
|
||||
public void testUniqueTermCount() throws Exception {
|
||||
Directory dir = new MockRAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec("Standard")));
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("field", "a b c d e f g h i j k l m n o p q r s t u v w x y z", Field.Store.NO, Field.Index.ANALYZED));
|
||||
doc.add(new Field("number", "0 1 2 3 4 5 6 7 8 9", Field.Store.NO, Field.Index.ANALYZED));
|
||||
|
@ -1708,7 +1708,7 @@ public class TestIndexReader extends LuceneTestCase
|
|||
// LUCENE-1609: don't load terms index
|
||||
public void testNoTermsIndex() throws Throwable {
|
||||
Directory dir = new MockRAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec("Standard")));
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("field", "a b c d e f g h i j k l m n o p q r s t u v w x y z", Field.Store.NO, Field.Index.ANALYZED));
|
||||
doc.add(new Field("number", "0 1 2 3 4 5 6 7 8 9", Field.Store.NO, Field.Index.ANALYZED));
|
||||
|
@ -1725,7 +1725,7 @@ public class TestIndexReader extends LuceneTestCase
|
|||
}
|
||||
|
||||
assertEquals(-1, ((SegmentReader) r.getSequentialSubReaders()[0]).getTermInfosIndexDivisor());
|
||||
writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec("Standard")));
|
||||
writer.addDocument(doc);
|
||||
writer.close();
|
||||
|
||||
|
|
|
@ -4670,16 +4670,16 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
}
|
||||
|
||||
// Make sure terms, including ones with surrogate pairs,
|
||||
// sort in UTF16 sort order by default
|
||||
// sort in codepoint sort order by default
|
||||
public void testTermUTF16SortOrder() throws Throwable {
|
||||
Random rnd = newRandom();
|
||||
Directory dir = new MockRAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(dir, new MockAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
|
||||
RandomIndexWriter writer = new RandomIndexWriter(rnd, dir);
|
||||
Document d = new Document();
|
||||
// Single segment
|
||||
Field f = new Field("f", "", Field.Store.NO, Field.Index.NOT_ANALYZED);
|
||||
d.add(f);
|
||||
char[] chars = new char[2];
|
||||
Random rnd = newRandom();
|
||||
final Set<String> allTerms = new HashSet<String>();
|
||||
|
||||
for(int i=0;i<200*_TestUtil.getRandomMultiplier();i++) {
|
||||
|
@ -4705,7 +4705,6 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
allTerms.add(s);
|
||||
f.setValue(s);
|
||||
|
||||
//System.out.println("add " + termDesc(s));
|
||||
writer.addDocument(d);
|
||||
|
||||
if ((1+i) % 42 == 0) {
|
||||
|
|
|
@ -394,18 +394,18 @@ public class TestIndexWriterDelete extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testDeletesOnDiskFull() throws IOException {
|
||||
testOperationsOnDiskFull(false);
|
||||
doTestOperationsOnDiskFull(false);
|
||||
}
|
||||
|
||||
public void testUpdatesOnDiskFull() throws IOException {
|
||||
testOperationsOnDiskFull(true);
|
||||
doTestOperationsOnDiskFull(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Make sure if modifier tries to commit but hits disk full that modifier
|
||||
* remains consistent and usable. Similar to TestIndexReader.testDiskFull().
|
||||
*/
|
||||
private void testOperationsOnDiskFull(boolean updates) throws IOException {
|
||||
private void doTestOperationsOnDiskFull(boolean updates) throws IOException {
|
||||
|
||||
Term searchTerm = new Term("content", "aaa");
|
||||
int START_COUNT = 157;
|
||||
|
@ -700,6 +700,7 @@ public class TestIndexWriterDelete extends LuceneTestCase {
|
|||
try {
|
||||
modifier.commit();
|
||||
} catch (IOException ioe) {
|
||||
// expected
|
||||
failed = true;
|
||||
}
|
||||
|
||||
|
|
|
@ -27,11 +27,12 @@ public class TestMultiFields extends LuceneTestCase {
|
|||
|
||||
public void testRandom() throws Exception {
|
||||
|
||||
Random r = newRandom();
|
||||
|
||||
for(int iter=0;iter<2*_TestUtil.getRandomMultiplier();iter++) {
|
||||
Directory dir = new MockRAMDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(NoMergePolicy.COMPOUND_FILES));
|
||||
|
||||
Random r = new Random();
|
||||
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(NoMergePolicy.COMPOUND_FILES));
|
||||
|
||||
Map<BytesRef,List<Integer>> docs = new HashMap<BytesRef,List<Integer>>();
|
||||
Set<Integer> deleted = new HashSet<Integer>();
|
||||
|
@ -45,7 +46,7 @@ public class TestMultiFields extends LuceneTestCase {
|
|||
doc.add(id);
|
||||
|
||||
boolean onlyUniqueTerms = r.nextBoolean();
|
||||
|
||||
Set<BytesRef> uniqueTerms = new HashSet<BytesRef>();
|
||||
for(int i=0;i<numDocs;i++) {
|
||||
|
||||
if (!onlyUniqueTerms && r.nextBoolean() && terms.size() > 0) {
|
||||
|
@ -61,6 +62,7 @@ public class TestMultiFields extends LuceneTestCase {
|
|||
}
|
||||
docs.get(term).add(i);
|
||||
terms.add(term);
|
||||
uniqueTerms.add(term);
|
||||
f.setValue(s);
|
||||
}
|
||||
id.setValue(""+i);
|
||||
|
@ -75,8 +77,18 @@ public class TestMultiFields extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
List<BytesRef> termsList = new ArrayList<BytesRef>(uniqueTerms);
|
||||
Collections.sort(termsList, BytesRef.getUTF8SortedAsUTF16Comparator());
|
||||
System.out.println("UTF16 order:");
|
||||
for(BytesRef b : termsList) {
|
||||
System.out.println(" " + UnicodeUtil.toHexString(b.utf8ToString()));
|
||||
}
|
||||
}
|
||||
|
||||
IndexReader reader = w.getReader();
|
||||
w.close();
|
||||
//System.out.println("TEST reader=" + reader);
|
||||
|
||||
Bits delDocs = MultiFields.getDeletedDocs(reader);
|
||||
for(int delDoc : deleted) {
|
||||
|
|
|
@ -31,7 +31,7 @@ public class TestRollback extends LuceneTestCase {
|
|||
// LUCENE-2536
|
||||
public void testRollbackIntegrityWithBufferFlush() throws Exception {
|
||||
Directory dir = new MockRAMDirectory();
|
||||
RandomIndexWriter rw = new RandomIndexWriter(newRandom(), dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter rw = new RandomIndexWriter(newRandom(), dir);
|
||||
for (int i = 0; i < 5; i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("pk", Integer.toString(i), Store.YES, Index.ANALYZED_NO_NORMS));
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
|
@ -64,7 +65,7 @@ public class TestSegmentTermEnum extends LuceneTestCase {
|
|||
public void testPrevTermAtEnd() throws IOException
|
||||
{
|
||||
Directory dir = new MockRAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec("Standard")));
|
||||
addDoc(writer, "aaa bbb");
|
||||
writer.close();
|
||||
SegmentReader reader = SegmentReader.getOnlySegmentReader(dir);
|
||||
|
|
|
@ -68,10 +68,10 @@ public class TestStressIndexing2 extends MultiCodecTestCase {
|
|||
|
||||
// TODO: verify equals using IW.getReader
|
||||
DocsAndWriter dw = indexRandomIWReader(5, 3, 100, dir);
|
||||
IndexReader r = dw.writer.getReader();
|
||||
IndexReader reader = dw.writer.getReader();
|
||||
dw.writer.commit();
|
||||
verifyEquals(r, dir, "id");
|
||||
r.close();
|
||||
verifyEquals(r, reader, dir, "id");
|
||||
reader.close();
|
||||
dw.writer.close();
|
||||
dir.close();
|
||||
}
|
||||
|
@ -261,8 +261,8 @@ public class TestStressIndexing2 extends MultiCodecTestCase {
|
|||
w.close();
|
||||
}
|
||||
|
||||
public static void verifyEquals(IndexReader r1, Directory dir2, String idField) throws Throwable {
|
||||
IndexReader r2 = IndexReader.open(dir2, true);
|
||||
public static void verifyEquals(Random r, IndexReader r1, Directory dir2, String idField) throws Throwable {
|
||||
IndexReader r2 = IndexReader.open(dir2);
|
||||
verifyEquals(r1, r2, idField);
|
||||
r2.close();
|
||||
}
|
||||
|
|
|
@ -18,8 +18,10 @@ package org.apache.lucene.index.codecs.preflex;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.store.*;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.index.codecs.*;
|
||||
import org.apache.lucene.index.codecs.preflexrw.PreFlexRWCodec;
|
||||
import org.apache.lucene.util.*;
|
||||
|
||||
import java.util.*;
|
||||
|
@ -30,8 +32,6 @@ import org.junit.Test;
|
|||
|
||||
public class TestSurrogates extends LuceneTestCaseJ4 {
|
||||
|
||||
// chooses from a very limited alphabet to exacerbate the
|
||||
// surrogate seeking required
|
||||
private static String makeDifficultRandomUnicodeString(Random r) {
|
||||
final int end = r.nextInt(20);
|
||||
if (end == 0) {
|
||||
|
@ -44,136 +44,66 @@ public class TestSurrogates extends LuceneTestCaseJ4 {
|
|||
|
||||
if (0 == t && i < end - 1) {
|
||||
// hi
|
||||
buffer[i++] = (char) 0xd800;
|
||||
buffer[i++] = (char) (0xd800 + r.nextInt(2));
|
||||
// lo
|
||||
buffer[i] = (char) 0xdc00;
|
||||
buffer[i] = (char) (0xdc00 + r.nextInt(2));
|
||||
} else if (t <= 3) {
|
||||
buffer[i] = 'a';
|
||||
buffer[i] = (char) ('a' + r.nextInt(2));
|
||||
} else if (4 == t) {
|
||||
buffer[i] = 0xe000;
|
||||
buffer[i] = (char) (0xe000 + r.nextInt(2));
|
||||
}
|
||||
}
|
||||
|
||||
return new String(buffer, 0, end);
|
||||
}
|
||||
|
||||
private SegmentInfo makePreFlexSegment(Random r, String segName, Directory dir, FieldInfos fieldInfos, Codec codec, List<Term> fieldTerms) throws IOException {
|
||||
|
||||
final int numField = _TestUtil.nextInt(r, 2, 5);
|
||||
|
||||
List<Term> terms = new ArrayList<Term>();
|
||||
|
||||
int tc = 0;
|
||||
|
||||
for(int f=0;f<numField;f++) {
|
||||
String field = "f" + f;
|
||||
Term protoTerm = new Term(field);
|
||||
|
||||
fieldInfos.add(field, true, false, false, false, false, false, false);
|
||||
final int numTerms = 10000*_TestUtil.getRandomMultiplier();
|
||||
for(int i=0;i<numTerms;i++) {
|
||||
String s;
|
||||
if (r.nextInt(3) == 1) {
|
||||
s = makeDifficultRandomUnicodeString(r);
|
||||
} else {
|
||||
s = _TestUtil.randomUnicodeString(r);
|
||||
|
||||
// The surrogate dance uses 0xffff to seek-to-end
|
||||
// of blocks. Also, pre-4.0 indices are already
|
||||
// guaranteed to not contain the char 0xffff since
|
||||
// it's mapped during indexing:
|
||||
s = s.replace((char) 0xffff, (char) 0xfffe);
|
||||
}
|
||||
terms.add(protoTerm.createTerm(s + "_" + (tc++)));
|
||||
}
|
||||
}
|
||||
|
||||
fieldInfos.write(dir, segName);
|
||||
|
||||
// sorts in UTF16 order, just like preflex:
|
||||
Collections.sort(terms, new Comparator<Term>() {
|
||||
public int compare(Term o1, Term o2) {
|
||||
return o1.compareToUTF16(o2);
|
||||
}
|
||||
});
|
||||
|
||||
TermInfosWriter w = new TermInfosWriter(dir, segName, fieldInfos, 128);
|
||||
TermInfo ti = new TermInfo();
|
||||
String lastText = null;
|
||||
int uniqueTermCount = 0;
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: utf16 order:");
|
||||
}
|
||||
for(Term t : terms) {
|
||||
FieldInfo fi = fieldInfos.fieldInfo(t.field());
|
||||
|
||||
String text = t.text();
|
||||
if (lastText != null && lastText.equals(text)) {
|
||||
continue;
|
||||
}
|
||||
fieldTerms.add(t);
|
||||
uniqueTermCount++;
|
||||
lastText = text;
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println(" " + toHexString(t));
|
||||
}
|
||||
w.add(fi.number, t.bytes().bytes, t.bytes().length, ti);
|
||||
}
|
||||
w.close();
|
||||
|
||||
Collections.sort(fieldTerms);
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nTEST: codepoint order");
|
||||
for(Term t: fieldTerms) {
|
||||
System.out.println(" " + t.field() + ":" + toHexString(t));
|
||||
}
|
||||
}
|
||||
|
||||
dir.createOutput(segName + ".prx").close();
|
||||
dir.createOutput(segName + ".frq").close();
|
||||
|
||||
// !!hack alert!! stuffing uniqueTermCount in as docCount
|
||||
return new SegmentInfo(segName, uniqueTermCount, dir, false, -1, null, false, true, codec);
|
||||
}
|
||||
|
||||
private String toHexString(Term t) {
|
||||
return t.field() + ":" + UnicodeUtil.toHexString(t.text());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSurrogatesOrder() throws Exception {
|
||||
Directory dir = new MockRAMDirectory();
|
||||
private String getRandomString(Random r) {
|
||||
String s;
|
||||
if (r.nextInt(5) == 1) {
|
||||
if (r.nextInt(3) == 1) {
|
||||
s = makeDifficultRandomUnicodeString(r);
|
||||
} else {
|
||||
s = _TestUtil.randomUnicodeString(r);
|
||||
}
|
||||
} else {
|
||||
s = _TestUtil.randomRealisticUnicodeString(r);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
Codec codec = new PreFlexCodec();
|
||||
private static class SortTermAsUTF16Comparator implements Comparator<Term> {
|
||||
public int compare(Term o1, Term o2) {
|
||||
return o1.compareToUTF16(o2);
|
||||
}
|
||||
}
|
||||
|
||||
Random r = newRandom();
|
||||
FieldInfos fieldInfos = new FieldInfos();
|
||||
List<Term> fieldTerms = new ArrayList<Term>();
|
||||
SegmentInfo si = makePreFlexSegment(r, "_0", dir, fieldInfos, codec, fieldTerms);
|
||||
private static final SortTermAsUTF16Comparator termAsUTF16Comparator = new SortTermAsUTF16Comparator();
|
||||
|
||||
// hack alert!!
|
||||
int uniqueTermCount = si.docCount;
|
||||
|
||||
FieldsProducer fields = codec.fieldsProducer(new SegmentReadState(dir, si, fieldInfos, 1024, 1));
|
||||
assertNotNull(fields);
|
||||
// single straight enum
|
||||
private void doTestStraightEnum(List<Term> fieldTerms, IndexReader reader, int uniqueTermCount) throws IOException {
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nTEST: now enum");
|
||||
System.out.println("\nTEST: top now enum reader=" + reader);
|
||||
}
|
||||
FieldsEnum fieldsEnum = fields.iterator();
|
||||
String field;
|
||||
UnicodeUtil.UTF16Result utf16 = new UnicodeUtil.UTF16Result();
|
||||
FieldsEnum fieldsEnum = MultiFields.getFields(reader).iterator();
|
||||
|
||||
{
|
||||
// Test straight enum:
|
||||
String field;
|
||||
int termCount = 0;
|
||||
while((field = fieldsEnum.next()) != null) {
|
||||
TermsEnum termsEnum = fieldsEnum.terms();
|
||||
BytesRef text;
|
||||
BytesRef lastText = null;
|
||||
while((text = termsEnum.next()) != null) {
|
||||
Term exp = fieldTerms.get(termCount);
|
||||
if (VERBOSE) {
|
||||
UnicodeUtil.UTF8toUTF16(text.bytes, text.offset, text.length, utf16);
|
||||
System.out.println("got term=" + field + ":" + UnicodeUtil.toHexString(new String(utf16.result, 0, utf16.length)));
|
||||
System.out.println(" got term=" + field + ":" + UnicodeUtil.toHexString(text.utf8ToString()));
|
||||
System.out.println(" exp=" + exp.field() + ":" + UnicodeUtil.toHexString(exp.text().toString()));
|
||||
System.out.println();
|
||||
}
|
||||
if (lastText == null) {
|
||||
|
@ -182,8 +112,8 @@ public class TestSurrogates extends LuceneTestCaseJ4 {
|
|||
assertTrue(lastText.compareTo(text) < 0);
|
||||
lastText.copy(text);
|
||||
}
|
||||
assertEquals(fieldTerms.get(termCount).field(), field);
|
||||
assertEquals(fieldTerms.get(termCount).bytes(), text);
|
||||
assertEquals(exp.field(), field);
|
||||
assertEquals(exp.bytes(), text);
|
||||
termCount++;
|
||||
}
|
||||
if (VERBOSE) {
|
||||
|
@ -191,7 +121,220 @@ public class TestSurrogates extends LuceneTestCaseJ4 {
|
|||
}
|
||||
}
|
||||
assertEquals(uniqueTermCount, termCount);
|
||||
}
|
||||
}
|
||||
|
||||
fields.close();
|
||||
// randomly seeks to term that we know exists, then next's
|
||||
// from there
|
||||
private void doTestSeekExists(Random r, List<Term> fieldTerms, IndexReader reader) throws IOException {
|
||||
|
||||
final Map<String,TermsEnum> tes = new HashMap<String,TermsEnum>();
|
||||
|
||||
// Test random seek to existing term, then enum:
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nTEST: top now seek");
|
||||
}
|
||||
|
||||
for(int iter=0;iter<100*_TestUtil.getRandomMultiplier();iter++) {
|
||||
|
||||
// pick random field+term
|
||||
int spot = r.nextInt(fieldTerms.size());
|
||||
Term term = fieldTerms.get(spot);
|
||||
String field = term.field();
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: exist seek field=" + field + " term=" + UnicodeUtil.toHexString(term.text()));
|
||||
}
|
||||
|
||||
// seek to it
|
||||
TermsEnum te = tes.get(field);
|
||||
if (te == null) {
|
||||
te = MultiFields.getTerms(reader, field).iterator();
|
||||
tes.put(field, te);
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println(" done get enum");
|
||||
}
|
||||
|
||||
// seek should find the term
|
||||
assertEquals(TermsEnum.SeekStatus.FOUND,
|
||||
te.seek(term.bytes()));
|
||||
|
||||
// now .next() this many times:
|
||||
int ct = _TestUtil.nextInt(r, 5, 100);
|
||||
for(int i=0;i<ct;i++) {
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: now next()");
|
||||
}
|
||||
if (1+spot+i >= fieldTerms.size()) {
|
||||
break;
|
||||
}
|
||||
term = fieldTerms.get(1+spot+i);
|
||||
if (term.field() != field) {
|
||||
assertNull(te.next());
|
||||
break;
|
||||
} else {
|
||||
BytesRef t = te.next();
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println(" got term=" + (t == null ? null : UnicodeUtil.toHexString(t.utf8ToString())));
|
||||
System.out.println(" exp=" + UnicodeUtil.toHexString(term.text().toString()));
|
||||
}
|
||||
|
||||
assertEquals(term.bytes(), t);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void doTestSeekDoesNotExist(Random r, int numField, List<Term> fieldTerms, Term[] fieldTermsArray, IndexReader reader) throws IOException {
|
||||
|
||||
final Map<String,TermsEnum> tes = new HashMap<String,TermsEnum>();
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: top random seeks");
|
||||
}
|
||||
|
||||
{
|
||||
for(int iter=0;iter<100*_TestUtil.getRandomMultiplier();iter++) {
|
||||
|
||||
// seek to random spot
|
||||
String field = ("f" + r.nextInt(numField)).intern();
|
||||
Term tx = new Term(field, getRandomString(r));
|
||||
|
||||
int spot = Arrays.binarySearch(fieldTermsArray, tx);
|
||||
|
||||
if (spot < 0) {
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: non-exist seek to " + field + ":" + UnicodeUtil.toHexString(tx.text()));
|
||||
}
|
||||
|
||||
// term does not exist:
|
||||
TermsEnum te = tes.get(field);
|
||||
if (te == null) {
|
||||
te = MultiFields.getTerms(reader, field).iterator();
|
||||
tes.put(field, te);
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println(" got enum");
|
||||
}
|
||||
|
||||
spot = -spot - 1;
|
||||
|
||||
if (spot == fieldTerms.size() || fieldTerms.get(spot).field() != field) {
|
||||
assertEquals(TermsEnum.SeekStatus.END, te.seek(tx.bytes()));
|
||||
} else {
|
||||
assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seek(tx.bytes()));
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println(" got term=" + UnicodeUtil.toHexString(te.term().utf8ToString()));
|
||||
System.out.println(" exp term=" + UnicodeUtil.toHexString(fieldTerms.get(spot).text()));
|
||||
}
|
||||
|
||||
assertEquals(fieldTerms.get(spot).bytes(),
|
||||
te.term());
|
||||
|
||||
// now .next() this many times:
|
||||
int ct = _TestUtil.nextInt(r, 5, 100);
|
||||
for(int i=0;i<ct;i++) {
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: now next()");
|
||||
}
|
||||
if (1+spot+i >= fieldTerms.size()) {
|
||||
break;
|
||||
}
|
||||
Term term = fieldTerms.get(1+spot+i);
|
||||
if (term.field() != field) {
|
||||
assertNull(te.next());
|
||||
break;
|
||||
} else {
|
||||
BytesRef t = te.next();
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println(" got term=" + (t == null ? null : UnicodeUtil.toHexString(t.utf8ToString())));
|
||||
System.out.println(" exp=" + UnicodeUtil.toHexString(term.text().toString()));
|
||||
}
|
||||
|
||||
assertEquals(term.bytes(), t);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testSurrogatesOrder() throws Exception {
|
||||
Random r = newRandom();
|
||||
|
||||
Directory dir = new MockRAMDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(r,
|
||||
dir,
|
||||
newIndexWriterConfig(r, TEST_VERSION_CURRENT,
|
||||
new MockAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec(new PreFlexRWCodec())));
|
||||
|
||||
final int numField = _TestUtil.nextInt(r, 2, 5);
|
||||
|
||||
int uniqueTermCount = 0;
|
||||
|
||||
int tc = 0;
|
||||
|
||||
List<Term> fieldTerms = new ArrayList<Term>();
|
||||
|
||||
for(int f=0;f<numField;f++) {
|
||||
String field = "f" + f;
|
||||
final int numTerms = 10000*_TestUtil.getRandomMultiplier();
|
||||
|
||||
final Set<String> uniqueTerms = new HashSet<String>();
|
||||
|
||||
for(int i=0;i<numTerms;i++) {
|
||||
String term = getRandomString(r) + "_ " + (tc++);
|
||||
uniqueTerms.add(term);
|
||||
fieldTerms.add(new Term(field, term));
|
||||
Document doc = new Document();
|
||||
doc.add(new Field(field, term, Field.Store.NO, Field.Index.NOT_ANALYZED));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
uniqueTermCount += uniqueTerms.size();
|
||||
}
|
||||
|
||||
IndexReader reader = w.getReader();
|
||||
|
||||
if (VERBOSE) {
|
||||
Collections.sort(fieldTerms, termAsUTF16Comparator);
|
||||
|
||||
System.out.println("\nTEST: UTF16 order");
|
||||
for(Term t: fieldTerms) {
|
||||
System.out.println(" " + toHexString(t));
|
||||
}
|
||||
}
|
||||
|
||||
// sorts in code point order:
|
||||
Collections.sort(fieldTerms);
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nTEST: codepoint order");
|
||||
for(Term t: fieldTerms) {
|
||||
System.out.println(" " + toHexString(t));
|
||||
}
|
||||
}
|
||||
|
||||
Term[] fieldTermsArray = fieldTerms.toArray(new Term[fieldTerms.size()]);
|
||||
|
||||
//SegmentInfo si = makePreFlexSegment(r, "_0", dir, fieldInfos, codec, fieldTerms);
|
||||
|
||||
//FieldsProducer fields = codec.fieldsProducer(new SegmentReadState(dir, si, fieldInfos, 1024, 1));
|
||||
//assertNotNull(fields);
|
||||
|
||||
doTestStraightEnum(fieldTerms, reader, uniqueTermCount);
|
||||
doTestSeekExists(r, fieldTerms, reader);
|
||||
doTestSeekDoesNotExist(r, numField, fieldTerms, fieldTermsArray, reader);
|
||||
|
||||
reader.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,212 @@
|
|||
package org.apache.lucene.index.codecs.preflexrw;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.index.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.index.codecs.TermsConsumer;
|
||||
import org.apache.lucene.index.codecs.PostingsConsumer;
|
||||
import org.apache.lucene.index.codecs.standard.DefaultSkipListWriter;
|
||||
import org.apache.lucene.index.codecs.preflex.PreFlexCodec;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.codecs.preflex.TermInfo;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Comparator;
|
||||
|
||||
class PreFlexFieldsWriter extends FieldsConsumer {
|
||||
|
||||
private final TermInfosWriter termsOut;
|
||||
private final IndexOutput freqOut;
|
||||
private final IndexOutput proxOut;
|
||||
private final DefaultSkipListWriter skipListWriter;
|
||||
private final int totalNumDocs;
|
||||
|
||||
public PreFlexFieldsWriter(SegmentWriteState state) throws IOException {
|
||||
termsOut = new TermInfosWriter(state.directory,
|
||||
state.segmentName,
|
||||
state.fieldInfos,
|
||||
state.termIndexInterval);
|
||||
state.flushedFiles.add(IndexFileNames.segmentFileName(state.segmentName, "", PreFlexCodec.TERMS_EXTENSION));
|
||||
state.flushedFiles.add(IndexFileNames.segmentFileName(state.segmentName, "", PreFlexCodec.TERMS_INDEX_EXTENSION));
|
||||
|
||||
final String freqFile = IndexFileNames.segmentFileName(state.segmentName, "", PreFlexCodec.FREQ_EXTENSION);
|
||||
freqOut = state.directory.createOutput(freqFile);
|
||||
state.flushedFiles.add(freqFile);
|
||||
totalNumDocs = state.numDocs;
|
||||
|
||||
if (state.fieldInfos.hasProx()) {
|
||||
final String proxFile = IndexFileNames.segmentFileName(state.segmentName, "", PreFlexCodec.PROX_EXTENSION);
|
||||
proxOut = state.directory.createOutput(proxFile);
|
||||
state.flushedFiles.add(proxFile);
|
||||
} else {
|
||||
proxOut = null;
|
||||
}
|
||||
|
||||
skipListWriter = new DefaultSkipListWriter(termsOut.skipInterval,
|
||||
termsOut.maxSkipLevels,
|
||||
totalNumDocs,
|
||||
freqOut,
|
||||
proxOut);
|
||||
//System.out.println("\nw start seg=" + segment);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermsConsumer addField(FieldInfo field) throws IOException {
|
||||
assert field.number != -1;
|
||||
//System.out.println("w field=" + field.name + " storePayload=" + field.storePayloads + " number=" + field.number);
|
||||
return new PreFlexTermsWriter(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
termsOut.close();
|
||||
freqOut.close();
|
||||
if (proxOut != null) {
|
||||
proxOut.close();
|
||||
}
|
||||
}
|
||||
|
||||
private class PreFlexTermsWriter extends TermsConsumer {
|
||||
private final FieldInfo fieldInfo;
|
||||
private final boolean omitTF;
|
||||
private final boolean storePayloads;
|
||||
|
||||
private final TermInfo termInfo = new TermInfo();
|
||||
private final PostingsWriter postingsWriter = new PostingsWriter();
|
||||
|
||||
public PreFlexTermsWriter(FieldInfo fieldInfo) {
|
||||
this.fieldInfo = fieldInfo;
|
||||
omitTF = fieldInfo.omitTermFreqAndPositions;
|
||||
storePayloads = fieldInfo.storePayloads;
|
||||
}
|
||||
|
||||
private class PostingsWriter extends PostingsConsumer {
|
||||
private int lastDocID;
|
||||
private int lastPayloadLength = -1;
|
||||
private int lastPosition;
|
||||
private int df;
|
||||
|
||||
public PostingsWriter reset() {
|
||||
df = 0;
|
||||
lastDocID = 0;
|
||||
lastPayloadLength = -1;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startDoc(int docID, int termDocFreq) throws IOException {
|
||||
//System.out.println(" w doc=" + docID);
|
||||
|
||||
final int delta = docID - lastDocID;
|
||||
if (docID < 0 || (df > 0 && delta <= 0)) {
|
||||
throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " )");
|
||||
}
|
||||
|
||||
if ((++df % termsOut.skipInterval) == 0) {
|
||||
skipListWriter.setSkipData(lastDocID, storePayloads, lastPayloadLength);
|
||||
skipListWriter.bufferSkip(df);
|
||||
}
|
||||
|
||||
lastDocID = docID;
|
||||
|
||||
assert docID < totalNumDocs: "docID=" + docID + " totalNumDocs=" + totalNumDocs;
|
||||
|
||||
if (omitTF) {
|
||||
freqOut.writeVInt(delta);
|
||||
} else {
|
||||
final int code = delta << 1;
|
||||
if (termDocFreq == 1) {
|
||||
freqOut.writeVInt(code|1);
|
||||
} else {
|
||||
freqOut.writeVInt(code);
|
||||
freqOut.writeVInt(termDocFreq);
|
||||
}
|
||||
}
|
||||
lastPosition = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addPosition(int position, BytesRef payload) throws IOException {
|
||||
assert proxOut != null;
|
||||
|
||||
//System.out.println(" w pos=" + position + " payl=" + payload);
|
||||
final int delta = position - lastPosition;
|
||||
lastPosition = position;
|
||||
|
||||
if (storePayloads) {
|
||||
final int payloadLength = payload == null ? 0 : payload.length;
|
||||
if (payloadLength != lastPayloadLength) {
|
||||
//System.out.println(" write payload len=" + payloadLength);
|
||||
lastPayloadLength = payloadLength;
|
||||
proxOut.writeVInt((delta<<1)|1);
|
||||
proxOut.writeVInt(payloadLength);
|
||||
} else {
|
||||
proxOut.writeVInt(delta << 1);
|
||||
}
|
||||
if (payloadLength > 0) {
|
||||
proxOut.writeBytes(payload.bytes, payload.offset, payload.length);
|
||||
}
|
||||
} else {
|
||||
proxOut.writeVInt(delta);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finishDoc() throws IOException {
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public PostingsConsumer startTerm(BytesRef text) throws IOException {
|
||||
//System.out.println(" w term=" + text.utf8ToString());
|
||||
skipListWriter.resetSkip();
|
||||
termInfo.freqPointer = freqOut.getFilePointer();
|
||||
if (proxOut != null) {
|
||||
termInfo.proxPointer = proxOut.getFilePointer();
|
||||
}
|
||||
return postingsWriter.reset();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finishTerm(BytesRef text, int numDocs) throws IOException {
|
||||
if (numDocs > 0) {
|
||||
long skipPointer = skipListWriter.writeSkip(freqOut);
|
||||
termInfo.docFreq = numDocs;
|
||||
termInfo.skipOffset = (int) (skipPointer - termInfo.freqPointer);
|
||||
//System.out.println(" w finish term=" + text.utf8ToString() + " fnum=" + fieldInfo.number);
|
||||
termsOut.add(fieldInfo.number,
|
||||
text,
|
||||
termInfo);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finish() throws IOException {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator<BytesRef> getComparator() throws IOException {
|
||||
return BytesRef.getUTF8SortedAsUTF16Comparator();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,77 @@
|
|||
package org.apache.lucene.index.codecs.preflexrw;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.codecs.preflex.PreFlexCodec;
|
||||
import org.apache.lucene.index.codecs.preflex.PreFlexFields;
|
||||
import org.apache.lucene.index.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.index.codecs.FieldsProducer;
|
||||
import org.apache.lucene.util.LuceneTestCaseJ4;
|
||||
|
||||
/** Codec, only for testing, that can write and read the
|
||||
* pre-flex index format.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class PreFlexRWCodec extends PreFlexCodec {
|
||||
|
||||
public PreFlexRWCodec() {
|
||||
// NOTE: we impersonate the PreFlex codec so that it can
|
||||
// read the segments we write!
|
||||
super();
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
||||
return new PreFlexFieldsWriter(state);
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
|
||||
|
||||
// Whenever IW opens readers, eg for merging, we have to
|
||||
// keep terms order in UTF16:
|
||||
|
||||
return new PreFlexFields(state.dir, state.fieldInfos, state.segmentInfo, state.readBufferSize, state.termsIndexDivisor) {
|
||||
@Override
|
||||
protected boolean sortTermsByUnicode() {
|
||||
// We carefully peek into stack track above us: if
|
||||
// we are part of a "merge", we must sort by UTF16:
|
||||
boolean unicodeSortOrder = true;
|
||||
|
||||
StackTraceElement[] trace = new Exception().getStackTrace();
|
||||
for (int i = 0; i < trace.length; i++) {
|
||||
//System.out.println(trace[i].getClassName());
|
||||
if ("merge".equals(trace[i].getMethodName())) {
|
||||
unicodeSortOrder = false;
|
||||
if (LuceneTestCaseJ4.VERBOSE) {
|
||||
System.out.println("NOTE: PreFlexRW codec: forcing legacy UTF16 term sort order");
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return unicodeSortOrder;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.index.codecs.preflex;
|
||||
package org.apache.lucene.index.codecs.preflexrw;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -19,9 +19,12 @@ package org.apache.lucene.index.codecs.preflex;
|
|||
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.store.*;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.util.*;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.index.codecs.preflex.TermInfo;
|
||||
|
||||
|
||||
/** This stores a monotonically increasing set of <Term, TermInfo> pairs in a
|
||||
|
@ -71,8 +74,7 @@ final class TermInfosWriter {
|
|||
|
||||
private long lastIndexPointer;
|
||||
private boolean isIndex;
|
||||
private byte[] lastTermBytes = new byte[10];
|
||||
private int lastTermBytesLength = 0;
|
||||
private final BytesRef lastTerm = new BytesRef();
|
||||
private int lastFieldNumber = -1;
|
||||
|
||||
private TermInfosWriter other;
|
||||
|
@ -104,13 +106,10 @@ final class TermInfosWriter {
|
|||
assert initUTF16Results();
|
||||
}
|
||||
|
||||
void add(Term term, TermInfo ti) throws IOException {
|
||||
add(fieldInfos.fieldNumber(term.field()), term.bytes().bytes, term.bytes().length, ti);
|
||||
}
|
||||
|
||||
// Currently used only by assert statements
|
||||
UnicodeUtil.UTF16Result utf16Result1;
|
||||
UnicodeUtil.UTF16Result utf16Result2;
|
||||
private final BytesRef scratchBytes = new BytesRef();
|
||||
|
||||
// Currently used only by assert statements
|
||||
private boolean initUTF16Results() {
|
||||
|
@ -120,7 +119,7 @@ final class TermInfosWriter {
|
|||
}
|
||||
|
||||
// Currently used only by assert statement
|
||||
private int compareToLastTerm(int fieldNumber, byte[] termBytes, int termBytesLength) {
|
||||
private int compareToLastTerm(int fieldNumber, BytesRef term) {
|
||||
|
||||
if (lastFieldNumber != fieldNumber) {
|
||||
final int cmp = fieldInfos.fieldName(lastFieldNumber).compareTo(fieldInfos.fieldName(fieldNumber));
|
||||
|
@ -132,8 +131,13 @@ final class TermInfosWriter {
|
|||
return cmp;
|
||||
}
|
||||
|
||||
UnicodeUtil.UTF8toUTF16(lastTermBytes, 0, lastTermBytesLength, utf16Result1);
|
||||
UnicodeUtil.UTF8toUTF16(termBytes, 0, termBytesLength, utf16Result2);
|
||||
scratchBytes.copy(term);
|
||||
assert lastTerm.offset == 0;
|
||||
UnicodeUtil.UTF8toUTF16(lastTerm.bytes, 0, lastTerm.length, utf16Result1);
|
||||
|
||||
assert scratchBytes.offset == 0;
|
||||
UnicodeUtil.UTF8toUTF16(scratchBytes.bytes, 0, scratchBytes.length, utf16Result2);
|
||||
|
||||
final int len;
|
||||
if (utf16Result1.length < utf16Result2.length)
|
||||
len = utf16Result1.length;
|
||||
|
@ -152,22 +156,22 @@ final class TermInfosWriter {
|
|||
/** Adds a new <<fieldNumber, termBytes>, TermInfo> pair to the set.
|
||||
Term must be lexicographically greater than all previous Terms added.
|
||||
TermInfo pointers must be positive and greater than all previous.*/
|
||||
void add(int fieldNumber, byte[] termBytes, int termBytesLength, TermInfo ti)
|
||||
public void add(int fieldNumber, BytesRef term, TermInfo ti)
|
||||
throws IOException {
|
||||
|
||||
assert compareToLastTerm(fieldNumber, termBytes, termBytesLength) < 0 ||
|
||||
(isIndex && termBytesLength == 0 && lastTermBytesLength == 0) :
|
||||
assert compareToLastTerm(fieldNumber, term) < 0 ||
|
||||
(isIndex && term.length == 0 && lastTerm.length == 0) :
|
||||
"Terms are out of order: field=" + fieldInfos.fieldName(fieldNumber) + " (number " + fieldNumber + ")" +
|
||||
" lastField=" + fieldInfos.fieldName(lastFieldNumber) + " (number " + lastFieldNumber + ")" +
|
||||
" text=" + new String(termBytes, 0, termBytesLength, "UTF-8") + " lastText=" + new String(lastTermBytes, 0, lastTermBytesLength, "UTF-8");
|
||||
" text=" + term.utf8ToString() + " lastText=" + lastTerm.utf8ToString();
|
||||
|
||||
assert ti.freqPointer >= lastTi.freqPointer: "freqPointer out of order (" + ti.freqPointer + " < " + lastTi.freqPointer + ")";
|
||||
assert ti.proxPointer >= lastTi.proxPointer: "proxPointer out of order (" + ti.proxPointer + " < " + lastTi.proxPointer + ")";
|
||||
|
||||
if (!isIndex && size % indexInterval == 0)
|
||||
other.add(lastFieldNumber, lastTermBytes, lastTermBytesLength, lastTi); // add an index term
|
||||
other.add(lastFieldNumber, lastTerm, lastTi); // add an index term
|
||||
|
||||
writeTerm(fieldNumber, termBytes, termBytesLength); // write term
|
||||
writeTerm(fieldNumber, term); // write term
|
||||
|
||||
output.writeVInt(ti.docFreq); // write doc freq
|
||||
output.writeVLong(ti.freqPointer - lastTi.freqPointer); // write pointers
|
||||
|
@ -187,29 +191,27 @@ final class TermInfosWriter {
|
|||
size++;
|
||||
}
|
||||
|
||||
private void writeTerm(int fieldNumber, byte[] termBytes, int termBytesLength)
|
||||
private void writeTerm(int fieldNumber, BytesRef term)
|
||||
throws IOException {
|
||||
|
||||
//System.out.println(" tiw.write field=" + fieldNumber + " term=" + term.utf8ToString());
|
||||
|
||||
// TODO: UTF16toUTF8 could tell us this prefix
|
||||
// Compute prefix in common with last term:
|
||||
int start = 0;
|
||||
final int limit = termBytesLength < lastTermBytesLength ? termBytesLength : lastTermBytesLength;
|
||||
final int limit = term.length < lastTerm.length ? term.length : lastTerm.length;
|
||||
while(start < limit) {
|
||||
if (termBytes[start] != lastTermBytes[start])
|
||||
if (term.bytes[start+term.offset] != lastTerm.bytes[start+lastTerm.offset])
|
||||
break;
|
||||
start++;
|
||||
}
|
||||
|
||||
final int length = termBytesLength - start;
|
||||
final int length = term.length - start;
|
||||
output.writeVInt(start); // write shared prefix length
|
||||
output.writeVInt(length); // write delta length
|
||||
output.writeBytes(termBytes, start, length); // write delta bytes
|
||||
output.writeBytes(term.bytes, start+term.offset, length); // write delta bytes
|
||||
output.writeVInt(fieldNumber); // write field num
|
||||
if (lastTermBytes.length < termBytesLength) {
|
||||
lastTermBytes = ArrayUtil.grow(lastTermBytes, termBytesLength);
|
||||
}
|
||||
System.arraycopy(termBytes, start, lastTermBytes, start, length);
|
||||
lastTermBytesLength = termBytesLength;
|
||||
lastTerm.copy(term);
|
||||
}
|
||||
|
||||
/** Called to complete TermInfos creation. */
|
|
@ -25,8 +25,6 @@ import org.apache.lucene.analysis.MockAnalyzer;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
@ -104,7 +102,7 @@ public class BaseTestRangeFilter extends LuceneTestCase {
|
|||
private IndexReader build(Random random, TestIndex index) throws IOException {
|
||||
/* build an index */
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, index.index,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())
|
||||
newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer())
|
||||
.setOpenMode(OpenMode.CREATE));
|
||||
|
||||
for (int d = minId; d <= maxId; d++) {
|
||||
|
|
|
@ -20,11 +20,9 @@ package org.apache.lucene.search;
|
|||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
|
@ -46,8 +44,7 @@ public class TestAutomatonQuery extends LuceneTestCase {
|
|||
super.setUp();
|
||||
Random random = newRandom();
|
||||
directory = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, directory);
|
||||
Document doc = new Document();
|
||||
Field titleField = new Field("title", "some title", Field.Store.NO,
|
||||
Field.Index.ANALYZED);
|
||||
|
|
|
@ -20,11 +20,9 @@ package org.apache.lucene.search;
|
|||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -49,8 +47,7 @@ public class TestAutomatonQueryUnicode extends LuceneTestCase {
|
|||
super.setUp();
|
||||
Random random = newRandom();
|
||||
directory = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, directory);
|
||||
Document doc = new Document();
|
||||
Field titleField = new Field("title", "some title", Field.Store.NO,
|
||||
Field.Index.ANALYZED);
|
||||
|
|
|
@ -54,7 +54,7 @@ public class TestBoolean2 extends LuceneTestCase {
|
|||
super.setUp();
|
||||
rnd = newRandom();
|
||||
RAMDirectory directory = new RAMDirectory();
|
||||
RandomIndexWriter writer= new RandomIndexWriter(rnd, directory, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer= new RandomIndexWriter(rnd, directory);
|
||||
for (int i = 0; i < docFields.length; i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(new Field(field, docFields[i], Field.Store.NO, Field.Index.ANALYZED));
|
||||
|
@ -71,14 +71,14 @@ public class TestBoolean2 extends LuceneTestCase {
|
|||
int docCount = 0;
|
||||
do {
|
||||
final Directory copy = new RAMDirectory(dir2);
|
||||
RandomIndexWriter w = new RandomIndexWriter(rnd, dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter w = new RandomIndexWriter(rnd, dir2);
|
||||
w.addIndexes(new Directory[] {copy});
|
||||
docCount = w.maxDoc();
|
||||
w.close();
|
||||
mulFactor *= 2;
|
||||
} while(docCount < 3000);
|
||||
|
||||
RandomIndexWriter w = new RandomIndexWriter(rnd, dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter w = new RandomIndexWriter(rnd, dir2);
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("field2", "xxx", Field.Store.NO, Field.Index.ANALYZED));
|
||||
for(int i=0;i<NUM_EXTRA_DOCS/2;i++) {
|
||||
|
|
|
@ -20,11 +20,9 @@ package org.apache.lucene.search;
|
|||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -60,8 +58,7 @@ public class TestBooleanMinShouldMatch extends LuceneTestCase {
|
|||
};
|
||||
|
||||
index = new RAMDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(rnd, index, new IndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter w = new RandomIndexWriter(rnd, index);
|
||||
|
||||
for (int i = 0; i < data.length; i++) {
|
||||
Document doc = new Document();
|
||||
|
|
|
@ -20,11 +20,9 @@ import java.util.Random;
|
|||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
|
@ -143,8 +141,7 @@ public class TestBooleanOr extends LuceneTestCase {
|
|||
|
||||
Random random = newRandom();
|
||||
//
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir, new IndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir);
|
||||
|
||||
//
|
||||
Document d = new Document();
|
||||
|
|
|
@ -22,11 +22,9 @@ import junit.framework.Test;
|
|||
import junit.framework.TestSuite;
|
||||
import junit.textui.TestRunner;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.search.PrefixQuery;
|
||||
|
@ -77,8 +75,7 @@ public class TestBooleanPrefixQuery extends LuceneTestCase {
|
|||
Query rw1 = null;
|
||||
Query rw2 = null;
|
||||
IndexReader reader = null;
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory, new IndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory);
|
||||
for (int i = 0; i < categories.length; i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("category", categories[i], Field.Store.YES, Field.Index.NOT_ANALYZED));
|
||||
|
|
|
@ -18,9 +18,7 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.store.MockRAMDirectory;
|
||||
|
@ -62,8 +60,7 @@ public class TestBooleanQuery extends LuceneTestCase {
|
|||
// LUCENE-1630
|
||||
public void testNullOrSubScorer() throws Throwable {
|
||||
Directory dir = new MockRAMDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(newRandom(), dir, new IndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter w = new RandomIndexWriter(newRandom(), dir);
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("field", "a b c d", Field.Store.NO, Field.Index.ANALYZED));
|
||||
w.addDocument(doc);
|
||||
|
|
|
@ -20,11 +20,9 @@ package org.apache.lucene.search;
|
|||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
@ -45,8 +43,7 @@ public class TestBooleanScorer extends LuceneTestCase
|
|||
|
||||
String[] values = new String[] { "1", "2", "3", "4" };
|
||||
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory);
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(new Field(FIELD, values[i], Field.Store.YES, Field.Index.NOT_ANALYZED));
|
||||
|
|
|
@ -24,12 +24,10 @@ import java.util.Map;
|
|||
import java.util.Random;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.DateTools;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -59,8 +57,7 @@ public class TestCustomSearcherSort extends LuceneTestCase implements
|
|||
super.setUp();
|
||||
Random rand = newRandom();
|
||||
index = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(rand, index,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(rand, index);
|
||||
RandomGen random = new RandomGen(rand);
|
||||
for (int i = 0; i < INDEX_SIZE; ++i) { // don't decrease; if to low the
|
||||
// problem doesn't show up
|
||||
|
|
|
@ -18,12 +18,10 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.DateTools;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
@ -47,8 +45,7 @@ public class TestDateFilter extends LuceneTestCase {
|
|||
public void testBefore() throws IOException {
|
||||
// create an index
|
||||
RAMDirectory indexStore = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), indexStore,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), indexStore);
|
||||
|
||||
long now = System.currentTimeMillis();
|
||||
|
||||
|
@ -114,8 +111,7 @@ public class TestDateFilter extends LuceneTestCase {
|
|||
public void testAfter() throws IOException {
|
||||
// create an index
|
||||
RAMDirectory indexStore = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), indexStore,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), indexStore);
|
||||
|
||||
long now = System.currentTimeMillis();
|
||||
|
||||
|
|
|
@ -26,7 +26,6 @@ import org.apache.lucene.document.DateTools;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.queryParser.QueryParser;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
|
@ -53,8 +52,7 @@ public class TestDateSort extends LuceneTestCase {
|
|||
super.setUp();
|
||||
// Create an index writer.
|
||||
directory = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory);
|
||||
|
||||
// oldest doc:
|
||||
// Add the first document. text = "Document 1" dateTime = Oct 10 03:25:22 EDT 2007
|
||||
|
|
|
@ -22,13 +22,13 @@ import org.apache.lucene.analysis.MockAnalyzer;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
||||
import java.text.DecimalFormat;
|
||||
import java.util.Random;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
|
@ -80,8 +80,9 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase {
|
|||
super.setUp();
|
||||
|
||||
index = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), index,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())
|
||||
Random random = newRandom();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, index,
|
||||
newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer())
|
||||
.setSimilarity(sim));
|
||||
|
||||
// hed is the most important field, dek is secondary
|
||||
|
|
|
@ -20,10 +20,8 @@ package org.apache.lucene.search;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
@ -40,8 +38,7 @@ public class TestDocBoost extends LuceneTestCase {
|
|||
|
||||
public void testDocBoost() throws Exception {
|
||||
RAMDirectory store = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), store,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), store);
|
||||
|
||||
Fieldable f1 = new Field("field", "word", Field.Store.YES, Field.Index.ANALYZED);
|
||||
Fieldable f2 = new Field("field", "word", Field.Store.YES, Field.Index.ANALYZED);
|
||||
|
|
|
@ -24,13 +24,11 @@ import java.util.Iterator;
|
|||
|
||||
import junit.framework.Assert;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.Index;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
@ -104,8 +102,7 @@ public class TestDocIdSet extends LuceneTestCase {
|
|||
// Tests that if a Filter produces a null DocIdSet, which is given to
|
||||
// IndexSearcher, everything works fine. This came up in LUCENE-1754.
|
||||
Directory dir = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir);
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("c", "val", Store.NO, Index.NOT_ANALYZED_NO_NORMS));
|
||||
writer.addDocument(doc);
|
||||
|
|
|
@ -23,7 +23,6 @@ import org.apache.lucene.analysis.MockAnalyzer;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.spans.SpanFirstQuery;
|
||||
|
@ -70,8 +69,7 @@ public class TestExplanations extends LuceneTestCase {
|
|||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
directory = new RAMDirectory();
|
||||
RandomIndexWriter writer= new RandomIndexWriter(newRandom(), directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer= new RandomIndexWriter(newRandom(), directory);
|
||||
for (int i = 0; i < docFields.length; i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(new Field(KEY, ""+i, Field.Store.NO, Field.Index.NOT_ANALYZED));
|
||||
|
|
|
@ -50,8 +50,7 @@ public class TestFieldCache extends LuceneTestCase {
|
|||
Random r = newRandom();
|
||||
NUM_DOCS = 1000 * _TestUtil.getRandomMultiplier();
|
||||
RAMDirectory directory = new RAMDirectory();
|
||||
RandomIndexWriter writer= new RandomIndexWriter(r, directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer= new RandomIndexWriter(r, directory);
|
||||
long theLong = Long.MAX_VALUE;
|
||||
double theDouble = Double.MAX_VALUE;
|
||||
byte theByte = Byte.MAX_VALUE;
|
||||
|
|
|
@ -19,11 +19,9 @@ package org.apache.lucene.search;
|
|||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.store.MockRAMDirectory;
|
||||
|
||||
|
@ -39,8 +37,7 @@ public class TestFieldCacheTermsFilter extends LuceneTestCase {
|
|||
public void testMissingTerms() throws Exception {
|
||||
String fieldName = "field1";
|
||||
MockRAMDirectory rd = new MockRAMDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(newRandom(), rd,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter w = new RandomIndexWriter(newRandom(), rd);
|
||||
for (int i = 0; i < 100; i++) {
|
||||
Document doc = new Document();
|
||||
int term = i * 10; //terms are units of 10;
|
||||
|
|
|
@ -17,11 +17,9 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
|
@ -50,8 +48,7 @@ public class TestFilteredQuery extends LuceneTestCase {
|
|||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
directory = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter (newRandom(), directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter (newRandom(), directory);
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add (new Field("field", "one two three four five", Field.Store.YES, Field.Index.ANALYZED));
|
||||
|
@ -73,6 +70,11 @@ public class TestFilteredQuery extends LuceneTestCase {
|
|||
doc.add (new Field("sorter", "c", Field.Store.YES, Field.Index.ANALYZED));
|
||||
writer.addDocument (doc);
|
||||
|
||||
// tests here require single segment (eg try seed
|
||||
// 8239472272678419952L), because SingleDocTestFilter(x)
|
||||
// blindly accepts that docID in any sub-segment
|
||||
writer.optimize();
|
||||
|
||||
reader = writer.getReader();
|
||||
writer.close ();
|
||||
|
||||
|
|
|
@ -25,7 +25,6 @@ import org.apache.lucene.analysis.MockAnalyzer;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queryParser.QueryParser;
|
||||
|
@ -42,8 +41,7 @@ public class TestFuzzyQuery extends LuceneTestCase {
|
|||
|
||||
public void testFuzziness() throws Exception {
|
||||
RAMDirectory directory = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory);
|
||||
addDoc("aaaaa", writer);
|
||||
addDoc("aaaab", writer);
|
||||
addDoc("aaabb", writer);
|
||||
|
@ -196,8 +194,7 @@ public class TestFuzzyQuery extends LuceneTestCase {
|
|||
|
||||
public void testFuzzinessLong() throws Exception {
|
||||
RAMDirectory directory = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory);
|
||||
addDoc("aaaaaaa", writer);
|
||||
addDoc("segment", writer);
|
||||
|
||||
|
@ -287,8 +284,7 @@ public class TestFuzzyQuery extends LuceneTestCase {
|
|||
|
||||
public void testTokenLengthOpt() throws IOException {
|
||||
RAMDirectory directory = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory);
|
||||
addDoc("12345678911", writer);
|
||||
addDoc("segment", writer);
|
||||
|
||||
|
@ -325,8 +321,7 @@ public class TestFuzzyQuery extends LuceneTestCase {
|
|||
/** Test the TopTermsBoostOnlyBooleanQueryRewrite rewrite method. */
|
||||
public void testBoostOnlyRewrite() throws Exception {
|
||||
RAMDirectory directory = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory);
|
||||
addDoc("Lucene", writer);
|
||||
addDoc("Lucene", writer);
|
||||
addDoc("Lucenne", writer);
|
||||
|
@ -353,8 +348,7 @@ public class TestFuzzyQuery extends LuceneTestCase {
|
|||
MockAnalyzer analyzer = new MockAnalyzer();
|
||||
|
||||
Directory index = new MockRAMDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(newRandom(), index,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
|
||||
RandomIndexWriter w = new RandomIndexWriter(newRandom(), index);
|
||||
|
||||
addDoc("Lucene in Action", w);
|
||||
addDoc("Lucene for Dummies", w);
|
||||
|
|
|
@ -27,7 +27,6 @@ import org.apache.lucene.analysis.MockTokenizer;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
@ -88,8 +87,7 @@ public class TestFuzzyQuery2 extends LuceneTestCase {
|
|||
int terms = (int) Math.pow(2, bits);
|
||||
|
||||
RAMDirectory dir = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.KEYWORD, false)));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir, new MockAnalyzer(MockTokenizer.KEYWORD, false));
|
||||
|
||||
Document doc = new Document();
|
||||
Field field = new Field("field", "", Field.Store.NO, Field.Index.ANALYZED);
|
||||
|
|
|
@ -17,7 +17,6 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
|
@ -25,7 +24,6 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.store.MockRAMDirectory;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
|
||||
|
@ -46,8 +44,7 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
|
|||
|
||||
public void testPhrasePrefix() throws IOException {
|
||||
MockRAMDirectory indexStore = new MockRAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), indexStore,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), indexStore);
|
||||
add("blueberry pie", writer);
|
||||
add("blueberry strudel", writer);
|
||||
add("blueberry pizza", writer);
|
||||
|
@ -152,8 +149,7 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
|
|||
// The contained PhraseMultiQuery must contain exactly one term array.
|
||||
|
||||
MockRAMDirectory indexStore = new MockRAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), indexStore,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), indexStore);
|
||||
add("blueberry pie", writer);
|
||||
add("blueberry chewing gum", writer);
|
||||
add("blue raspberry pie", writer);
|
||||
|
@ -185,8 +181,7 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
|
|||
|
||||
public void testPhrasePrefixWithBooleanQuery() throws IOException {
|
||||
MockRAMDirectory indexStore = new MockRAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), indexStore,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), indexStore);
|
||||
add("This is a test", "object", writer);
|
||||
add("a note", "note", writer);
|
||||
|
||||
|
@ -214,8 +209,7 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
|
|||
|
||||
public void testNoDocs() throws Exception {
|
||||
MockRAMDirectory indexStore = new MockRAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), indexStore,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), indexStore);
|
||||
add("a note", "note", writer);
|
||||
|
||||
IndexReader reader = writer.getReader();
|
||||
|
|
|
@ -22,7 +22,6 @@ import org.apache.lucene.analysis.MockTokenizer;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -57,8 +56,7 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter {
|
|||
"X 4 5 6" };
|
||||
|
||||
small = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(rand, small,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(rand, small, new MockAnalyzer(MockTokenizer.WHITESPACE, false));
|
||||
|
||||
for (int i = 0; i < data.length; i++) {
|
||||
Document doc = new Document();
|
||||
|
@ -612,8 +610,7 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter {
|
|||
|
||||
/* build an index */
|
||||
RAMDirectory farsiIndex = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(rand, farsiIndex,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true)));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(rand, farsiIndex, new MockAnalyzer(MockTokenizer.SIMPLE, true));
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES,
|
||||
Field.Index.NOT_ANALYZED));
|
||||
|
@ -653,8 +650,7 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter {
|
|||
|
||||
/* build an index */
|
||||
RAMDirectory danishIndex = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(rand, danishIndex,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true)));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(rand, danishIndex, new MockAnalyzer(MockTokenizer.SIMPLE, true));
|
||||
|
||||
// Danish collation orders the words below in the given order
|
||||
// (example taken from TestSort.testInternationalSort() ).
|
||||
|
|
|
@ -22,12 +22,10 @@ import java.util.Locale;
|
|||
import java.text.DecimalFormat;
|
||||
import java.text.DecimalFormatSymbols;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.NumericField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
@ -45,8 +43,7 @@ public class TestMultiValuedNumericRangeQuery extends LuceneTestCase {
|
|||
final Random rnd = newRandom();
|
||||
|
||||
RAMDirectory directory = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(rnd, directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(rnd, directory);
|
||||
|
||||
DecimalFormat format = new DecimalFormat("00000000000", new DecimalFormatSymbols(Locale.US));
|
||||
|
||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.search;
|
|||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.queryParser.QueryParser;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
@ -40,8 +39,7 @@ public class TestNot extends LuceneTestCase {
|
|||
|
||||
public void testNot() throws Exception {
|
||||
RAMDirectory store = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), store,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), store);
|
||||
|
||||
Document d1 = new Document();
|
||||
d1.add(new Field("field", "a b", Field.Store.YES, Field.Index.ANALYZED));
|
||||
|
|
|
@ -55,8 +55,7 @@ public class TestNumericRangeQuery32 extends LuceneTestCaseJ4 {
|
|||
public static void beforeClass() throws Exception {
|
||||
directory = new RAMDirectory();
|
||||
Random random = newStaticRandom(TestNumericRangeQuery32.class);
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, directory);
|
||||
|
||||
NumericField
|
||||
field8 = new NumericField("field8", 8, Field.Store.YES, true),
|
||||
|
|
|
@ -54,8 +54,7 @@ public class TestNumericRangeQuery64 extends LuceneTestCaseJ4 {
|
|||
public static void beforeClass() throws Exception {
|
||||
directory = new RAMDirectory();
|
||||
Random random = newStaticRandom(TestNumericRangeQuery64.class);
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, directory);
|
||||
|
||||
NumericField
|
||||
field8 = new NumericField("field8", 8, Field.Store.YES, true),
|
||||
|
|
|
@ -18,13 +18,11 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -46,8 +44,7 @@ public class TestPhrasePrefixQuery extends LuceneTestCase {
|
|||
*/
|
||||
public void testPhrasePrefix() throws IOException {
|
||||
RAMDirectory indexStore = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), indexStore,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), indexStore);
|
||||
Document doc1 = new Document();
|
||||
Document doc2 = new Document();
|
||||
Document doc3 = new Document();
|
||||
|
|
|
@ -67,8 +67,7 @@ public class TestPhraseQuery extends LuceneTestCase {
|
|||
return 100;
|
||||
}
|
||||
};
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, directory, analyzer);
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("field", "one two three four five", Field.Store.YES, Field.Index.ANALYZED));
|
||||
|
@ -217,7 +216,7 @@ public class TestPhraseQuery extends LuceneTestCase {
|
|||
RAMDirectory directory = new RAMDirectory();
|
||||
Analyzer stopAnalyzer = new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, false);
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
|
||||
new IndexWriterConfig(Version.LUCENE_24, stopAnalyzer));
|
||||
newIndexWriterConfig(random, Version.LUCENE_24, stopAnalyzer));
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("field", "the stop words are here", Field.Store.YES, Field.Index.ANALYZED));
|
||||
writer.addDocument(doc);
|
||||
|
@ -251,8 +250,7 @@ public class TestPhraseQuery extends LuceneTestCase {
|
|||
|
||||
public void testPhraseQueryInConjunctionScorer() throws Exception {
|
||||
RAMDirectory directory = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, directory);
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("source", "marketing info", Field.Store.YES, Field.Index.ANALYZED));
|
||||
|
@ -289,7 +287,7 @@ public class TestPhraseQuery extends LuceneTestCase {
|
|||
reader.close();
|
||||
|
||||
writer = new RandomIndexWriter(random, directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE));
|
||||
newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE));
|
||||
doc = new Document();
|
||||
doc.add(new Field("contents", "map entry woo", Field.Store.YES, Field.Index.ANALYZED));
|
||||
writer.addDocument(doc);
|
||||
|
@ -339,8 +337,7 @@ public class TestPhraseQuery extends LuceneTestCase {
|
|||
|
||||
public void testSlopScoring() throws IOException {
|
||||
Directory directory = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, directory);
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("field", "foo firstname lastname foo", Field.Store.YES, Field.Index.ANALYZED));
|
||||
|
@ -603,8 +600,7 @@ public class TestPhraseQuery extends LuceneTestCase {
|
|||
Directory dir = new MockRAMDirectory();
|
||||
Analyzer analyzer = new MockAnalyzer();
|
||||
|
||||
RandomIndexWriter w = new RandomIndexWriter(random, dir,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
|
||||
RandomIndexWriter w = new RandomIndexWriter(random, dir, analyzer);
|
||||
List<List<String>> docs = new ArrayList<List<String>>();
|
||||
Document d = new Document();
|
||||
Field f = new Field("f", "", Field.Store.NO, Field.Index.ANALYZED);
|
||||
|
|
|
@ -35,7 +35,6 @@ import org.apache.lucene.document.Field;
|
|||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queryParser.QueryParser;
|
||||
|
@ -91,8 +90,7 @@ public class TestPositionIncrement extends LuceneTestCase {
|
|||
}
|
||||
};
|
||||
Directory store = new MockRAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), store,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), store, analyzer);
|
||||
Document d = new Document();
|
||||
d.add(new Field("field", "bogus", Field.Store.YES, Field.Index.ANALYZED));
|
||||
writer.addDocument(d);
|
||||
|
@ -242,8 +240,7 @@ public class TestPositionIncrement extends LuceneTestCase {
|
|||
|
||||
public void testPayloadsPos0() throws Exception {
|
||||
Directory dir = new MockRAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new TestPayloadAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir, new TestPayloadAnalyzer());
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("content", new StringReader(
|
||||
"a a b c d e a f g h i j a b k k")));
|
||||
|
|
|
@ -20,10 +20,8 @@ package org.apache.lucene.search;
|
|||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
|
||||
|
@ -39,8 +37,7 @@ public class TestPrefixFilter extends LuceneTestCase {
|
|||
"/Computers/Mac/One",
|
||||
"/Computers/Mac/Two",
|
||||
"/Computers/Windows"};
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory);
|
||||
for (int i = 0; i < categories.length; i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("category", categories[i], Field.Store.YES, Field.Index.NOT_ANALYZED));
|
||||
|
|
|
@ -18,11 +18,9 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
@ -48,8 +46,7 @@ public class TestPrefixInBooleanQuery extends LuceneTestCase {
|
|||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory);
|
||||
|
||||
for (int i = 0; i < 5137; ++i) {
|
||||
Document doc = new Document();
|
||||
|
|
|
@ -20,10 +20,8 @@ package org.apache.lucene.search;
|
|||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
|
||||
|
@ -38,8 +36,7 @@ public class TestPrefixQuery extends LuceneTestCase {
|
|||
String[] categories = new String[] {"/Computers",
|
||||
"/Computers/Mac",
|
||||
"/Computers/Windows"};
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory);
|
||||
for (int i = 0; i < categories.length; i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("category", categories[i], Field.Store.YES, Field.Index.NOT_ANALYZED));
|
||||
|
|
|
@ -25,7 +25,6 @@ import org.apache.lucene.analysis.MockTokenizer;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
|
@ -51,8 +50,7 @@ public class TestPrefixRandom extends LuceneTestCase {
|
|||
random = newRandom();
|
||||
dir = new MockRAMDirectory();
|
||||
// TODO: fix mocktokenizer to not extend chartokenizer, so you can have an 'empty' keyword.
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.KEYWORD, false)));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir, new MockAnalyzer(MockTokenizer.KEYWORD, false));
|
||||
|
||||
Document doc = new Document();
|
||||
Field field = new Field("field", "", Field.Store.NO, Field.Index.ANALYZED);
|
||||
|
|
|
@ -17,13 +17,11 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.Index;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
|
@ -35,8 +33,7 @@ public class TestQueryWrapperFilter extends LuceneTestCase {
|
|||
|
||||
public void testBasic() throws Exception {
|
||||
Directory dir = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir);
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("field", "value", Store.NO, Index.ANALYZED));
|
||||
writer.addDocument(doc);
|
||||
|
|
|
@ -20,11 +20,9 @@ package org.apache.lucene.search;
|
|||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -48,8 +46,7 @@ public class TestRegexpQuery extends LuceneTestCase {
|
|||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
directory = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory);
|
||||
Document doc = new Document();
|
||||
doc.add(new Field(FN,
|
||||
"the quick brown fox jumps over the lazy ??? dog 493432 49344",
|
||||
|
|
|
@ -51,8 +51,7 @@ public class TestRegexpRandom extends LuceneTestCase {
|
|||
super.setUp();
|
||||
random = newRandom();
|
||||
dir = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir);
|
||||
|
||||
Document doc = new Document();
|
||||
Field field = new Field("field", "", Field.Store.NO, Field.Index.ANALYZED);
|
||||
|
|
|
@ -19,13 +19,15 @@ package org.apache.lucene.search;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
|
@ -58,17 +60,28 @@ public class TestRegexpRandom2 extends LuceneTestCase {
|
|||
// TODO: fix mocktokenizer to not extend chartokenizer, so you can have an 'empty' keyword.
|
||||
// currently, this means 'empty tokens' arent created/tested in the enumeration:
|
||||
// <mikemccand> it's like having a big hairy scary monster in the basement but being upset that it doesn't have fangs
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir, new IndexWriterConfig(TEST_VERSION_CURRENT,
|
||||
new MockAnalyzer(MockTokenizer.KEYWORD, false)));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir, new MockAnalyzer(MockTokenizer.KEYWORD, false));
|
||||
|
||||
Document doc = new Document();
|
||||
Field field = new Field("field", "", Field.Store.NO, Field.Index.ANALYZED);
|
||||
doc.add(field);
|
||||
|
||||
List<String> terms = new ArrayList<String>();
|
||||
for (int i = 0; i < 2000*_TestUtil.getRandomMultiplier(); i++) {
|
||||
field.setValue(_TestUtil.randomUnicodeString(random));
|
||||
String s = _TestUtil.randomUnicodeString(random);
|
||||
field.setValue(s);
|
||||
terms.add(s);
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
// utf16 order
|
||||
Collections.sort(terms);
|
||||
System.out.println("UTF16 order:");
|
||||
for(String s : terms) {
|
||||
System.out.println(" " + UnicodeUtil.toHexString(s));
|
||||
}
|
||||
}
|
||||
|
||||
reader = writer.getReader();
|
||||
searcher = new IndexSearcher(reader);
|
||||
writer.close();
|
||||
|
@ -122,8 +135,11 @@ public class TestRegexpRandom2 extends LuceneTestCase {
|
|||
|
||||
/** test a bunch of random regular expressions */
|
||||
public void testRegexps() throws Exception {
|
||||
for (int i = 0; i < 1000*_TestUtil.getRandomMultiplier(); i++)
|
||||
assertSame(AutomatonTestUtil.randomRegexp(random).toString());
|
||||
|
||||
for (int i = 0; i < 1000*_TestUtil.getRandomMultiplier(); i++) {
|
||||
String reg = AutomatonTestUtil.randomRegexp(random).toString();
|
||||
assertSame(reg);
|
||||
}
|
||||
}
|
||||
|
||||
/** check that the # of hits is the same as from a very
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.search;
|
|||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
|
@ -65,8 +66,9 @@ public class TestSimilarity extends LuceneTestCase {
|
|||
|
||||
public void testSimilarity() throws Exception {
|
||||
RAMDirectory store = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), store,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())
|
||||
Random random = newRandom();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, store,
|
||||
newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer())
|
||||
.setSimilarity(new SimpleSimilarity()));
|
||||
|
||||
Document d1 = new Document();
|
||||
|
|
|
@ -25,8 +25,6 @@ import org.apache.lucene.analysis.MockTokenizer;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
|
@ -128,8 +126,7 @@ public class TestSloppyPhraseQuery extends LuceneTestCase {
|
|||
query.setSlop(slop);
|
||||
|
||||
RAMDirectory ramDir = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, ramDir,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, ramDir, new MockAnalyzer(MockTokenizer.WHITESPACE, false));
|
||||
writer.addDocument(doc);
|
||||
|
||||
IndexReader reader = writer.getReader();
|
||||
|
|
|
@ -112,8 +112,7 @@ public class TestSort extends LuceneTestCase implements Serializable {
|
|||
private Searcher getIndex (boolean even, boolean odd)
|
||||
throws IOException {
|
||||
RAMDirectory indexStore = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, indexStore,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, indexStore);
|
||||
|
||||
for (int i=0; i<data.length; ++i) {
|
||||
if (((i%2)==0 && even) || ((i%2)==1 && odd)) {
|
||||
|
|
|
@ -18,11 +18,9 @@ package org.apache.lucene.search;
|
|||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
|
@ -40,8 +38,7 @@ public class TestSpanQueryFilter extends LuceneTestCase {
|
|||
|
||||
public void testFilterWorks() throws Exception {
|
||||
Directory dir = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir);
|
||||
for (int i = 0; i < 500; i++) {
|
||||
Document document = new Document();
|
||||
document.add(new Field("field", English.intToEnglish(i) + " equals " + English.intToEnglish(i),
|
||||
|
|
|
@ -22,10 +22,8 @@ import java.text.Collator;
|
|||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
@ -401,8 +399,7 @@ public class TestTermRangeFilter extends BaseTestRangeFilter {
|
|||
|
||||
/* build an index */
|
||||
RAMDirectory farsiIndex = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(rand, farsiIndex,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(rand, farsiIndex);
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES,
|
||||
Field.Index.NOT_ANALYZED));
|
||||
|
@ -442,8 +439,7 @@ public class TestTermRangeFilter extends BaseTestRangeFilter {
|
|||
|
||||
/* build an index */
|
||||
RAMDirectory danishIndex = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(rand, danishIndex,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(rand, danishIndex);
|
||||
// Danish collation orders the words below in the given order
|
||||
// (example taken from TestSort.testInternationalSort() ).
|
||||
String[] words = {"H\u00D8T", "H\u00C5T", "MAND"};
|
||||
|
|
|
@ -22,11 +22,9 @@ import java.util.ArrayList;
|
|||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
@ -49,8 +47,7 @@ public class TestTermScorer extends LuceneTestCase {
|
|||
super.setUp();
|
||||
directory = new RAMDirectory();
|
||||
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory);
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
Document doc = new Document();
|
||||
doc
|
||||
|
|
|
@ -50,8 +50,7 @@ public class TestTermVectors extends LuceneTestCase {
|
|||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
random = newRandom();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true)));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, directory, new MockAnalyzer(MockTokenizer.SIMPLE, true));
|
||||
//writer.setUseCompoundFile(true);
|
||||
//writer.infoStream = System.out;
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
|
@ -117,8 +116,7 @@ public class TestTermVectors extends LuceneTestCase {
|
|||
|
||||
public void testTermVectorsFieldOrder() throws IOException {
|
||||
Directory dir = new MockRAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true)));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir, new MockAnalyzer(MockTokenizer.SIMPLE, true));
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("c", "some content here", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
|
||||
doc.add(new Field("a", "some content here", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
|
||||
|
@ -248,7 +246,7 @@ public class TestTermVectors extends LuceneTestCase {
|
|||
Directory dir = new MockRAMDirectory();
|
||||
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true))
|
||||
newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true))
|
||||
.setOpenMode(OpenMode.CREATE));
|
||||
writer.addDocument(testDoc1);
|
||||
writer.addDocument(testDoc2);
|
||||
|
@ -361,7 +359,7 @@ public class TestTermVectors extends LuceneTestCase {
|
|||
// Test only a few docs having vectors
|
||||
public void testRareVectors() throws IOException {
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true))
|
||||
newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true))
|
||||
.setOpenMode(OpenMode.CREATE));
|
||||
for (int i = 0; i < 100; i++) {
|
||||
Document doc = new Document();
|
||||
|
@ -396,7 +394,7 @@ public class TestTermVectors extends LuceneTestCase {
|
|||
// vectors up
|
||||
public void testMixedVectrosVectors() throws IOException {
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT,
|
||||
newIndexWriterConfig(random, TEST_VERSION_CURRENT,
|
||||
new MockAnalyzer(MockTokenizer.SIMPLE, true)).setOpenMode(OpenMode.CREATE));
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("field", "one",
|
||||
|
|
|
@ -24,7 +24,6 @@ import org.apache.lucene.analysis.MockAnalyzer;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.queryParser.QueryParser;
|
||||
import org.apache.lucene.search.TimeLimitingCollector.TimeExceededException;
|
||||
|
@ -78,8 +77,7 @@ public class TestTimeLimitingCollector extends LuceneTestCase {
|
|||
"blueberry pizza",
|
||||
};
|
||||
directory = new RAMDirectory();
|
||||
RandomIndexWriter iw = new RandomIndexWriter(newRandom(), directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter iw = new RandomIndexWriter(newRandom(), directory);
|
||||
|
||||
for (int i=0; i<N_DOCS; i++) {
|
||||
add(docText[i%docText.length], iw);
|
||||
|
|
|
@ -19,10 +19,8 @@ package org.apache.lucene.search;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
@ -110,7 +108,7 @@ public class TestTopDocsCollector extends LuceneTestCase {
|
|||
|
||||
// populate an index with 30 documents, this should be enough for the test.
|
||||
// The documents have no content - the test uses MatchAllDocsQuery().
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir);
|
||||
for (int i = 0; i < 30; i++) {
|
||||
writer.addDocument(new Document());
|
||||
}
|
||||
|
|
|
@ -19,10 +19,8 @@ package org.apache.lucene.search;
|
|||
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -42,8 +40,7 @@ public class TestTopScoreDocCollector extends LuceneTestCase {
|
|||
|
||||
Directory dir = new RAMDirectory();
|
||||
Random random = newRandom();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir);
|
||||
for (int i = 0; i < 10; i++) {
|
||||
writer.addDocument(new Document());
|
||||
}
|
||||
|
|
|
@ -23,7 +23,6 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.Field.Index;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queryParser.QueryParser;
|
||||
|
@ -202,8 +201,7 @@ public class TestWildcard
|
|||
private RAMDirectory getIndexStore(String field, String[] contents)
|
||||
throws IOException {
|
||||
RAMDirectory indexStore = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, indexStore, new IndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, indexStore);
|
||||
for (int i = 0; i < contents.length; ++i) {
|
||||
Document doc = new Document();
|
||||
doc.add(new Field(field, contents[i], Field.Store.YES, Field.Index.ANALYZED));
|
||||
|
@ -259,8 +257,7 @@ public class TestWildcard
|
|||
|
||||
// prepare the index
|
||||
RAMDirectory dir = new RAMDirectory();
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random, dir,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random, dir);
|
||||
for (int i = 0; i < docs.length; i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(new Field(field,docs[i],Store.NO,Index.ANALYZED));
|
||||
|
|
|
@ -23,11 +23,9 @@ import java.text.NumberFormat;
|
|||
import java.util.Locale;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -50,8 +48,7 @@ public class TestWildcardRandom extends LuceneTestCase {
|
|||
super.setUp();
|
||||
random = newRandom();
|
||||
dir = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir);
|
||||
|
||||
Document doc = new Document();
|
||||
Field field = new Field("field", "", Field.Store.NO, Field.Index.ANALYZED);
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.search.payloads;
|
|||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.Collection;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
|
@ -106,8 +107,9 @@ public class TestPayloadNearQuery extends LuceneTestCase {
|
|||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
directory = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer())
|
||||
Random random = newRandom();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
|
||||
newIndexWriterConfig(random, TEST_VERSION_CURRENT, new PayloadAnalyzer())
|
||||
.setSimilarity(similarity));
|
||||
//writer.infoStream = System.out;
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
|
|
|
@ -45,6 +45,7 @@ import org.apache.lucene.document.Field;
|
|||
|
||||
import java.io.Reader;
|
||||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -112,8 +113,9 @@ public class TestPayloadTermQuery extends LuceneTestCase {
|
|||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
directory = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer())
|
||||
Random random = newRandom();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
|
||||
newIndexWriterConfig(random, TEST_VERSION_CURRENT, new PayloadAnalyzer())
|
||||
.setSimilarity(similarity));
|
||||
//writer.infoStream = System.out;
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
|
|
|
@ -24,7 +24,6 @@ import org.apache.lucene.analysis.MockTokenizer;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
|
@ -62,7 +61,7 @@ public class TestBasics extends LuceneTestCase {
|
|||
super.setUp();
|
||||
directory = new RAMDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true)));
|
||||
new MockAnalyzer(MockTokenizer.SIMPLE, true));
|
||||
//writer.infoStream = System.out;
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
Document doc = new Document();
|
||||
|
|
|
@ -20,11 +20,9 @@ package org.apache.lucene.search.spans;
|
|||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.CheckHits;
|
||||
|
@ -57,8 +55,7 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase {
|
|||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
directory = new RAMDirectory();
|
||||
RandomIndexWriter writer= new RandomIndexWriter(newRandom(), directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer= new RandomIndexWriter(newRandom(), directory);
|
||||
|
||||
writer.addDocument(doc(new Field[] { field("id", "0")
|
||||
,
|
||||
|
|
|
@ -21,7 +21,6 @@ import org.apache.lucene.analysis.MockAnalyzer;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queryParser.QueryParser;
|
||||
|
@ -55,8 +54,7 @@ public class TestNearSpansOrdered extends LuceneTestCase {
|
|||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
directory = new RAMDirectory();
|
||||
RandomIndexWriter writer= new RandomIndexWriter(newRandom(), directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer= new RandomIndexWriter(newRandom(), directory);
|
||||
for (int i = 0; i < docFields.length; i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(new Field(FIELD, docFields[i], Field.Store.NO, Field.Index.ANALYZED));
|
||||
|
|
|
@ -51,8 +51,7 @@ public class TestSpans extends LuceneTestCase {
|
|||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
directory = new RAMDirectory();
|
||||
RandomIndexWriter writer= new RandomIndexWriter(newRandom(), directory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
RandomIndexWriter writer= new RandomIndexWriter(newRandom(), directory);
|
||||
for (int i = 0; i < docFields.length; i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(new Field(field, docFields[i], Field.Store.YES, Field.Index.ANALYZED));
|
||||
|
|
|
@ -28,7 +28,6 @@ import org.apache.lucene.analysis.MockTokenizer;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.*;
|
||||
|
@ -62,9 +61,8 @@ public class TestSpansAdvanced extends LuceneTestCase {
|
|||
// create test index
|
||||
mDirectory = new RAMDirectory();
|
||||
final RandomIndexWriter writer = new RandomIndexWriter(random,
|
||||
mDirectory, new IndexWriterConfig(TEST_VERSION_CURRENT,
|
||||
new MockAnalyzer(MockTokenizer.SIMPLE, true,
|
||||
MockTokenFilter.ENGLISH_STOPSET, true)));
|
||||
mDirectory, new MockAnalyzer(MockTokenizer.SIMPLE, true,
|
||||
MockTokenFilter.ENGLISH_STOPSET, true));
|
||||
addDocument(writer, "1", "I think it should work.");
|
||||
addDocument(writer, "2", "I think it should work.");
|
||||
addDocument(writer, "3", "I think it should work.");
|
||||
|
|
|
@ -23,7 +23,6 @@ import org.apache.lucene.analysis.MockAnalyzer;
|
|||
import org.apache.lucene.analysis.MockTokenFilter;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
|
@ -47,7 +46,7 @@ public class TestSpansAdvanced2 extends TestSpansAdvanced {
|
|||
|
||||
// create test index
|
||||
final RandomIndexWriter writer = new RandomIndexWriter(random, mDirectory,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(
|
||||
newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer(
|
||||
MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true))
|
||||
.setOpenMode(OpenMode.APPEND));
|
||||
addDocument(writer, "A", "Should we, could we, would we?");
|
||||
|
|
|
@ -29,11 +29,15 @@ import java.util.Collections;
|
|||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.index.ConcurrentMergeScheduler;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.lucene.search.FieldCache.CacheEntry;
|
||||
import org.apache.lucene.util.FieldCacheSanityChecker.Insanity;
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
import org.apache.lucene.index.codecs.Codec;
|
||||
|
||||
/**
|
||||
* Base class for all Lucene unit tests.
|
||||
|
@ -72,6 +76,10 @@ public abstract class LuceneTestCase extends TestCase {
|
|||
|
||||
private volatile Thread.UncaughtExceptionHandler savedUncaughtExceptionHandler = null;
|
||||
|
||||
private String savedDefaultCodec;
|
||||
private String codec;
|
||||
private Codec preFlexSav;
|
||||
|
||||
/** Used to track if setUp and tearDown are called correctly from subclasses */
|
||||
private boolean setup;
|
||||
|
||||
|
@ -110,6 +118,19 @@ public abstract class LuceneTestCase extends TestCase {
|
|||
|
||||
ConcurrentMergeScheduler.setTestMode();
|
||||
savedBoolMaxClauseCount = BooleanQuery.getMaxClauseCount();
|
||||
savedDefaultCodec = CodecProvider.getDefaultCodec();
|
||||
|
||||
codec = _TestUtil.getTestCodec();
|
||||
if (codec.equals("random"))
|
||||
codec = CodecProvider.CORE_CODECS[seedRnd.nextInt(CodecProvider.CORE_CODECS.length)];
|
||||
|
||||
// If we're running w/ PreFlex codec we must swap in the
|
||||
// test-only PreFlexRW codec (since core PreFlex can
|
||||
// only read segments):
|
||||
if (codec.equals("PreFlex")) {
|
||||
preFlexSav = LuceneTestCaseJ4.installPreFlexRW();
|
||||
}
|
||||
CodecProvider.setDefaultCodec(codec);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -135,6 +156,11 @@ public abstract class LuceneTestCase extends TestCase {
|
|||
assertTrue("ensure your setUp() calls super.setUp()!!!", setup);
|
||||
setup = false;
|
||||
BooleanQuery.setMaxClauseCount(savedBoolMaxClauseCount);
|
||||
// Restore read-only PreFlex codec:
|
||||
if (codec.equals("PreFlex")) {
|
||||
LuceneTestCaseJ4.restorePreFlex(preFlexSav);
|
||||
}
|
||||
CodecProvider.setDefaultCodec(savedDefaultCodec);
|
||||
|
||||
try {
|
||||
Thread.setDefaultUncaughtExceptionHandler(savedUncaughtExceptionHandler);
|
||||
|
@ -268,6 +294,11 @@ public abstract class LuceneTestCase extends TestCase {
|
|||
return new Random(seed);
|
||||
}
|
||||
|
||||
/** create a new index writer config with random defaults */
|
||||
public static IndexWriterConfig newIndexWriterConfig(Random r, Version v, Analyzer a) {
|
||||
return LuceneTestCaseJ4.newIndexWriterConfig(r, v, a);
|
||||
}
|
||||
|
||||
/** Gets a resource from the classpath as {@link File}. This method should only be used,
|
||||
* if a real file is needed. To get a stream, code should prefer
|
||||
* {@link Class#getResourceAsStream} using {@code this.getClass()}.
|
||||
|
@ -287,6 +318,9 @@ public abstract class LuceneTestCase extends TestCase {
|
|||
seed = null;
|
||||
super.runBare();
|
||||
} catch (Throwable e) {
|
||||
if (_TestUtil.getTestCodec().equals("random")) {
|
||||
System.out.println("NOTE: random codec of testcase '" + getName() + "' was: " + codec);
|
||||
}
|
||||
if (seed != null) {
|
||||
System.out.println("NOTE: random seed of testcase '" + getName() + "' was: " + seed);
|
||||
}
|
||||
|
|
|
@ -17,14 +17,25 @@ package org.apache.lucene.util;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.index.ConcurrentMergeScheduler;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.LogDocMergePolicy;
|
||||
import org.apache.lucene.index.LogMergePolicy;
|
||||
import org.apache.lucene.index.SerialMergeScheduler;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.lucene.search.FieldCache.CacheEntry;
|
||||
import org.apache.lucene.util.FieldCacheSanityChecker.Insanity;
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
import org.apache.lucene.index.codecs.Codec;
|
||||
import org.apache.lucene.index.codecs.preflexrw.PreFlexRWCodec;
|
||||
|
||||
import org.junit.After;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Rule;
|
||||
import org.junit.Test;
|
||||
import org.junit.rules.TestWatchman;
|
||||
|
@ -34,7 +45,6 @@ import java.io.File;
|
|||
import java.io.PrintStream;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Hashtable;
|
||||
import java.util.Iterator;
|
||||
import java.util.Random;
|
||||
import java.util.ArrayList;
|
||||
|
@ -127,6 +137,56 @@ public class LuceneTestCaseJ4 {
|
|||
private static final Map<Class<? extends LuceneTestCaseJ4>,Object> checkedClasses =
|
||||
Collections.synchronizedMap(new WeakHashMap<Class<? extends LuceneTestCaseJ4>,Object>());
|
||||
|
||||
// saves default codec: we do this statically as many build indexes in @beforeClass
|
||||
private static String savedDefaultCodec;
|
||||
private static String codec;
|
||||
private static Codec preFlexSav;
|
||||
|
||||
// returns current PreFlex codec
|
||||
public static Codec installPreFlexRW() {
|
||||
final Codec preFlex = CodecProvider.getDefault().lookup("PreFlex");
|
||||
if (preFlex != null) {
|
||||
CodecProvider.getDefault().unregister(preFlex);
|
||||
}
|
||||
CodecProvider.getDefault().register(new PreFlexRWCodec());
|
||||
return preFlex;
|
||||
}
|
||||
|
||||
// returns current PreFlex codec
|
||||
public static void restorePreFlex(Codec preFlex) {
|
||||
Codec preFlexRW = CodecProvider.getDefault().lookup("PreFlex");
|
||||
if (preFlexRW != null) {
|
||||
CodecProvider.getDefault().unregister(preFlexRW);
|
||||
}
|
||||
CodecProvider.getDefault().register(preFlex);
|
||||
}
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClassLuceneTestCaseJ4() {
|
||||
savedDefaultCodec = CodecProvider.getDefaultCodec();
|
||||
codec = _TestUtil.getTestCodec();
|
||||
if (codec.equals("random"))
|
||||
codec = CodecProvider.CORE_CODECS[seedRnd.nextInt(CodecProvider.CORE_CODECS.length)];
|
||||
|
||||
// If we're running w/ PreFlex codec we must swap in the
|
||||
// test-only PreFlexRW codec (since core PreFlex can
|
||||
// only read segments):
|
||||
if (codec.equals("PreFlex")) {
|
||||
preFlexSav = installPreFlexRW();
|
||||
}
|
||||
|
||||
CodecProvider.setDefaultCodec(codec);
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void afterClassLuceneTestCaseJ4() {
|
||||
// Restore read-only PreFlex codec:
|
||||
if (codec.equals("PreFlex")) {
|
||||
restorePreFlex(preFlexSav);
|
||||
}
|
||||
CodecProvider.setDefaultCodec(savedDefaultCodec);
|
||||
}
|
||||
|
||||
// This is how we get control when errors occur.
|
||||
// Think of this as start/end/success/failed
|
||||
// events.
|
||||
|
@ -372,6 +432,34 @@ public class LuceneTestCaseJ4 {
|
|||
return new Random(seed);
|
||||
}
|
||||
|
||||
/** create a new index writer config with random defaults */
|
||||
public static IndexWriterConfig newIndexWriterConfig(Random r, Version v, Analyzer a) {
|
||||
IndexWriterConfig c = new IndexWriterConfig(v, a);
|
||||
if (r.nextBoolean()) {
|
||||
c.setMergePolicy(new LogDocMergePolicy());
|
||||
}
|
||||
if (r.nextBoolean()) {
|
||||
c.setMergeScheduler(new SerialMergeScheduler());
|
||||
}
|
||||
if (r.nextBoolean()) {
|
||||
c.setMaxBufferedDocs(_TestUtil.nextInt(r, 2, 1000));
|
||||
}
|
||||
if (r.nextBoolean()) {
|
||||
c.setTermIndexInterval(_TestUtil.nextInt(r, 1, 1000));
|
||||
}
|
||||
|
||||
if (c.getMergePolicy() instanceof LogMergePolicy) {
|
||||
LogMergePolicy logmp = (LogMergePolicy) c.getMergePolicy();
|
||||
logmp.setUseCompoundDocStore(r.nextBoolean());
|
||||
logmp.setUseCompoundFile(r.nextBoolean());
|
||||
logmp.setCalibrateSizeByDeletes(r.nextBoolean());
|
||||
logmp.setMergeFactor(_TestUtil.nextInt(r, 2, 20));
|
||||
}
|
||||
|
||||
c.setReaderPooling(r.nextBoolean());
|
||||
return c;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return this.name;
|
||||
}
|
||||
|
@ -395,6 +483,10 @@ public class LuceneTestCaseJ4 {
|
|||
System.out.println("NOTE: random static seed of testclass '" + getName() + "' was: " + staticSeed);
|
||||
}
|
||||
|
||||
if (_TestUtil.getTestCodec().equals("random")) {
|
||||
System.out.println("NOTE: random codec of testcase '" + getName() + "' was: " + codec);
|
||||
}
|
||||
|
||||
if (seed != null) {
|
||||
System.out.println("NOTE: random seed of testcase '" + getName() + "' was: " + seed);
|
||||
}
|
||||
|
@ -407,5 +499,4 @@ public class LuceneTestCaseJ4 {
|
|||
private static final Random seedRnd = new Random();
|
||||
|
||||
private String name = "<unknown>";
|
||||
|
||||
}
|
||||
|
|
|
@ -23,6 +23,9 @@ import org.apache.lucene.index.IndexWriter;
|
|||
import org.apache.lucene.index.MergeScheduler;
|
||||
import org.apache.lucene.index.ConcurrentMergeScheduler;
|
||||
import org.apache.lucene.index.CheckIndex;
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
import org.apache.lucene.index.codecs.Codec;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.PrintStream;
|
||||
|
@ -130,7 +133,7 @@ public class _TestUtil {
|
|||
final char[] buffer = new char[end];
|
||||
for (int i = 0; i < end; i++) {
|
||||
int t = r.nextInt(5);
|
||||
//buffer[i] = (char) (97 + r.nextInt(26));
|
||||
|
||||
if (0 == t && i < end - 1) {
|
||||
// Make a surrogate pair
|
||||
// High surrogate
|
||||
|
@ -218,4 +221,39 @@ public class _TestUtil {
|
|||
public static int getRandomMultiplier() {
|
||||
return Integer.parseInt(System.getProperty("random.multiplier", "1"));
|
||||
}
|
||||
|
||||
/** gets the codec to run tests with */
|
||||
public static String getTestCodec() {
|
||||
// by default we randomly pick a different codec for
|
||||
// each test case (non-J4 tests) and each test class (J4
|
||||
// tests)
|
||||
return System.getProperty("tests.codec", "random");
|
||||
}
|
||||
|
||||
public static CodecProvider alwaysCodec(final Codec c) {
|
||||
return new CodecProvider() {
|
||||
@Override
|
||||
public Codec getWriter(SegmentWriteState state) {
|
||||
return c;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Codec lookup(String name) {
|
||||
// can't do this until we fix PreFlexRW to not
|
||||
//impersonate PreFlex:
|
||||
if (name.equals(c.name)) {
|
||||
return c;
|
||||
} else {
|
||||
return CodecProvider.getDefault().lookup(name);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/** Return a CodecProvider that can read any of the
|
||||
* default codecs, but always writes in the specified
|
||||
* codec. */
|
||||
public static CodecProvider alwaysCodec(final String codec) {
|
||||
return alwaysCodec(CodecProvider.getDefault().lookup(codec));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -435,6 +435,7 @@
|
|||
>
|
||||
<sysproperty key="java.util.logging.config.file" value="${common-solr.dir}/testlogging.properties"/>
|
||||
<sysproperty key="tests.luceneMatchVersion" value="${tests.luceneMatchVersion}"/>
|
||||
<sysproperty key="tests.codec" value="${tests.codec}"/>
|
||||
<jvmarg line="${dir.prop}"/>
|
||||
<jvmarg line="${args}"/>
|
||||
|
||||
|
|
|
@ -44,6 +44,8 @@
|
|||
<!-- TODO: measure toning this down by default to 1 -->
|
||||
<property name="threadsPerProcessor" value="2"/>
|
||||
|
||||
<property name="tests.codec" value="random" />
|
||||
|
||||
<!-- Example directory -->
|
||||
<property name="example" value="${common-solr.dir}/example" />
|
||||
<!--
|
||||
|
|
Loading…
Reference in New Issue