LUCENE-2554: preflex codec doesnt order terms correctly

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@979453 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2010-07-26 21:35:43 +00:00
commit f8fba74632
99 changed files with 1724 additions and 881 deletions

View File

@ -56,6 +56,7 @@
<property name="args" value=""/>
<property name="threadsPerProcessor" value="1" />
<property name="random.multiplier" value="1" />
<property name="tests.codec" value="random" />
<property name="javac.deprecation" value="off"/>
<property name="javac.debug" value="on"/>
@ -434,6 +435,8 @@
<!-- allow tests to control debug prints -->
<sysproperty key="tests.verbose" value="${tests.verbose}"/>
<!-- set the codec tests should run with -->
<sysproperty key="tests.codec" value="${tests.codec}"/>
<!-- TODO: create propertyset for test properties, so each project can have its own set -->
<sysproperty key="random.multiplier" value="${random.multiplier}"/>

View File

@ -33,6 +33,7 @@ import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TopDocs;
@ -107,8 +108,8 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase {
RAMDirectory ramdir = new RAMDirectory();
Analyzer analyzer = randomAnalyzer();
IndexWriter writer = new IndexWriter(ramdir, analyzer,
IndexWriter.MaxFieldLength.UNLIMITED);
IndexWriter writer = new IndexWriter(ramdir,
new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setCodecProvider(_TestUtil.alwaysCodec("Standard")));
Document doc = new Document();
Field field1 = new Field("foo", fooField.toString(), Field.Store.NO, Field.Index.ANALYZED);
Field field2 = new Field("term", termField.toString(), Field.Store.NO, Field.Index.ANALYZED);

View File

@ -18,13 +18,13 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
import java.util.Random;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.RAMDirectory;
@ -38,8 +38,7 @@ public class BooleanFilterTest extends LuceneTestCase {
protected void setUp() throws Exception {
super.setUp();
directory = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory, new IndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory, new MockAnalyzer(MockTokenizer.WHITESPACE, false));
//Add series of docs with filterable fields : acces rights, prices, dates and "in-stock" flags
addDoc(writer, "admin guest", "010", "20040101","Y");

View File

@ -21,11 +21,9 @@ import java.util.Calendar;
import java.util.GregorianCalendar;
import java.util.Random;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
@ -63,9 +61,7 @@ public class ChainedFilterTest extends LuceneTestCase {
super.setUp();
random = newRandom();
directory = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(random, directory);
Calendar cal = new GregorianCalendar();
cal.clear();
cal.setTimeInMillis(1041397200000L); // 2003 January 01
@ -200,8 +196,7 @@ public class ChainedFilterTest extends LuceneTestCase {
public void testWithCachingFilter() throws Exception {
Directory dir = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, dir,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(random, dir);
IndexReader reader = writer.getReader();
writer.close();

View File

@ -20,11 +20,9 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.HashSet;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.DocsEnum;
@ -44,8 +42,7 @@ public class DuplicateFilterTest extends LuceneTestCase {
protected void setUp() throws Exception {
super.setUp();
directory = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory);
//Add series of docs with filterable fields : url, text and dates flags
addDoc(writer, "http://lucene.apache.org", "lucene 1.4.3 available", "20040101");

View File

@ -25,7 +25,6 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.RAMDirectory;
@ -41,8 +40,7 @@ public class FuzzyLikeThisQueryTest extends LuceneTestCase {
protected void setUp() throws Exception {
super.setUp();
directory = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory);
//Add series of docs with misspelt names
addDoc(writer, "jonathon smythe","1");

View File

@ -19,11 +19,9 @@ package org.apache.lucene.search;
import java.util.HashSet;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.RAMDirectory;
@ -53,8 +51,7 @@ public class TermsFilterTest extends LuceneTestCase {
public void testMissingTerms() throws Exception {
String fieldName="field1";
RAMDirectory rd=new RAMDirectory();
RandomIndexWriter w = new RandomIndexWriter(newRandom(), rd,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter w = new RandomIndexWriter(newRandom(), rd);
for (int i = 0; i < 100; i++) {
Document doc=new Document();
int term=i*10; //terms are units of 10;

View File

@ -20,10 +20,8 @@ package org.apache.lucene.search.regex;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.search.IndexSearcher;
@ -44,8 +42,7 @@ public class TestRegexQuery extends LuceneTestCase {
protected void setUp() throws Exception {
super.setUp();
directory = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory);
Document doc = new Document();
doc.add(new Field(FN, "the quick brown fox jumps over the lazy dog", Field.Store.NO, Field.Index.ANALYZED));
writer.addDocument(doc);

View File

@ -28,7 +28,6 @@ import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
@ -46,8 +45,7 @@ public class TestMoreLikeThis extends LuceneTestCase {
protected void setUp() throws Exception {
super.setUp();
directory = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory);
// Add series of docs with specific information for MoreLikeThis
addDoc(writer, "lucene");

View File

@ -116,7 +116,7 @@ public final class MultiTermsEnum extends TermsEnum {
// different TermComps
final Comparator<BytesRef> subTermComp = termsEnumIndex.termsEnum.getComparator();
if (subTermComp != null && !subTermComp.equals(termComp)) {
throw new IllegalStateException("sub-readers have different BytesRef.Comparators; cannot merge");
throw new IllegalStateException("sub-readers have different BytesRef.Comparators: " + subTermComp + " vs " + termComp + "; cannot merge");
}
}

View File

@ -47,13 +47,12 @@ public abstract class CodecProvider {
private static String defaultCodec = "Standard";
public final static String[] CORE_CODECS = new String[] {"Standard", "Sep", "Pulsing", "IntBlock"};
public final static String[] CORE_CODECS = new String[] {"Standard", "Sep", "Pulsing", "IntBlock", "PreFlex"};
public void register(Codec codec) {
if (codec.name == null) {
throw new IllegalArgumentException("code.name is null");
}
if (!codecs.containsKey(codec.name)) {
codecs.put(codec.name, codec);
codec.getExtensions(knownExtensions);
@ -61,6 +60,21 @@ public abstract class CodecProvider {
throw new IllegalArgumentException("codec '" + codec.name + "' is already registered as a different codec instance");
}
}
/** @lucene.internal */
public void unregister(Codec codec) {
if (codec.name == null) {
throw new IllegalArgumentException("code.name is null");
}
if (codecs.containsKey(codec.name)) {
Codec c = codecs.get(codec.name);
if (codec == c) {
codecs.remove(codec.name);
} else {
throw new IllegalArgumentException("codec '" + codec.name + "' is being impersonated by a different codec instance!!!");
}
}
}
public Collection<String> getAllExtensions() {
return knownExtensions;
@ -111,8 +125,5 @@ class DefaultCodecProvider extends CodecProvider {
@Override
public Codec getWriter(SegmentWriteState state) {
return lookup(CodecProvider.getDefaultCodec());
//return lookup("Pulsing");
//return lookup("Sep");
//return lookup("IntBlock");
}
}
}

View File

@ -22,6 +22,7 @@ import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FieldsEnum;
import java.io.IOException;
import java.io.Closeable;
/** Abstract API that consumes terms, doc, freq, prox and
* payloads postings. Concrete implementations of this
@ -30,7 +31,7 @@ import java.io.IOException;
*
* @lucene.experimental
*/
public abstract class FieldsConsumer {
public abstract class FieldsConsumer implements Closeable {
/** Add a new field */
public abstract TermsConsumer addField(FieldInfo field) throws IOException;

View File

@ -40,16 +40,16 @@ import org.apache.lucene.index.codecs.FieldsProducer;
public class PreFlexCodec extends Codec {
/** Extension of terms file */
static final String TERMS_EXTENSION = "tis";
public static final String TERMS_EXTENSION = "tis";
/** Extension of terms index file */
static final String TERMS_INDEX_EXTENSION = "tii";
public static final String TERMS_INDEX_EXTENSION = "tii";
/** Extension of freq postings file */
static final String FREQ_EXTENSION = "frq";
public static final String FREQ_EXTENSION = "frq";
/** Extension of prox postings file */
static final String PROX_EXTENSION = "prx";
public static final String PROX_EXTENSION = "prx";
public PreFlexCodec() {
name = "PreFlex";

View File

@ -40,12 +40,11 @@ import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.ArrayUtil;
/** Exposes flex API on a pre-flex index, as a codec.
* @lucene.experimental */
public class PreFlexFields extends FieldsProducer {
private static final boolean DEBUG_SURROGATES = false;
public TermInfosReader tis;
@ -60,7 +59,7 @@ public class PreFlexFields extends FieldsProducer {
private final int readBufferSize;
private Directory cfsReader;
PreFlexFields(Directory dir, FieldInfos fieldInfos, SegmentInfo info, int readBufferSize, int indexDivisor)
public PreFlexFields(Directory dir, FieldInfos fieldInfos, SegmentInfo info, int readBufferSize, int indexDivisor)
throws IOException {
si = info;
@ -107,6 +106,15 @@ public class PreFlexFields extends FieldsProducer {
this.dir = dir;
}
// If this returns, we do the surrogates dance so that the
// terms are sorted by unicode sort order. This should be
// true when segments are used for "normal" searching;
// it's only false during testing, to create a pre-flex
// index, using the test-only PreFlexRW.
protected boolean sortTermsByUnicode() {
return true;
}
static void files(Directory dir, SegmentInfo info, Collection<String> files) throws IOException {
files.add(IndexFileNames.segmentFileName(info.name, "", PreFlexCodec.TERMS_EXTENSION));
files.add(IndexFileNames.segmentFileName(info.name, "", PreFlexCodec.TERMS_INDEX_EXTENSION));
@ -182,6 +190,12 @@ public class PreFlexFields extends FieldsProducer {
if (cfsReader != null) {
cfsReader.close();
}
if (freqStream != null) {
freqStream.close();
}
if (proxStream != null) {
proxStream.close();
}
}
private class PreFlexFieldsEnum extends FieldsEnum {
@ -228,7 +242,11 @@ public class PreFlexFields extends FieldsProducer {
public Comparator<BytesRef> getComparator() {
// Pre-flex indexes always sorted in UTF16 order, but
// we remap on-the-fly to unicode order
return BytesRef.getUTF8SortedAsUnicodeComparator();
if (sortTermsByUnicode()) {
return BytesRef.getUTF8SortedAsUnicodeComparator();
} else {
return BytesRef.getUTF8SortedAsUTF16Comparator();
}
}
}
@ -238,237 +256,475 @@ public class PreFlexFields extends FieldsProducer {
private boolean skipNext;
private BytesRef current;
private int[] surrogateSeekPending = new int[1];
private boolean[] surrogateDidSeekBack = new boolean[1];
private int surrogateSeekUpto;
private char[] pendingPrefix;
private SegmentTermEnum seekTermEnum;
private Term protoTerm;
private static final byte UTF8_NON_BMP_LEAD = (byte) 0xf0;
private static final byte UTF8_HIGH_BMP_LEAD = (byte) 0xee;
// Returns true if the unicode char is "after" the
// surrogates in UTF16, ie >= U+E000 and <= U+FFFF:
private final boolean isHighBMPChar(byte[] b, int idx) {
return (b[idx] & UTF8_HIGH_BMP_LEAD) == UTF8_HIGH_BMP_LEAD;
}
// Returns true if the unicode char in the UTF8 byte
// sequence starting at idx encodes a char outside of
// BMP (ie what would be a surrogate pair in UTF16):
private final boolean isNonBMPChar(byte[] b, int idx) {
return (b[idx] & UTF8_NON_BMP_LEAD) == UTF8_NON_BMP_LEAD;
}
private final byte[] scratch = new byte[4];
private final BytesRef prevTerm = new BytesRef();
private final BytesRef scratchTerm = new BytesRef();
private int newSuffixStart;
// Swap in S, in place of E:
private boolean seekToNonBMP(SegmentTermEnum te, BytesRef term, int pos) throws IOException {
final int savLength = term.length;
assert term.offset == 0;
// The 3 bytes starting at downTo make up 1
// unicode character:
assert isHighBMPChar(term.bytes, pos);
// NOTE: we cannot make this assert, because
// AutomatonQuery legitimately sends us malformed UTF8
// (eg the UTF8 bytes with just 0xee)
// assert term.length >= pos + 3: "term.length=" + term.length + " pos+3=" + (pos+3) + " byte=" + Integer.toHexString(term.bytes[pos]) + " term=" + term.toString();
// Save the bytes && length, since we need to
// restore this if seek "back" finds no matching
// terms
if (term.bytes.length < 4+pos) {
term.grow(4+pos);
}
scratch[0] = term.bytes[pos];
scratch[1] = term.bytes[pos+1];
scratch[2] = term.bytes[pos+2];
term.bytes[pos] = (byte) 0xf0;
term.bytes[pos+1] = (byte) 0x90;
term.bytes[pos+2] = (byte) 0x80;
term.bytes[pos+3] = (byte) 0x80;
term.length = 4+pos;
if (DEBUG_SURROGATES) {
System.out.println(" try seek term=" + UnicodeUtil.toHexString(term.utf8ToString()));
}
// Seek "back":
getTermsDict().seekEnum(te, protoTerm.createTerm(term));
// Test if the term we seek'd to in fact found a
// surrogate pair at the same position as the E:
Term t2 = te.term();
// Cannot be null (or move to next field) because at
// "worst" it'd seek to the same term we are on now,
// unless we are being called from seek
if (t2 == null || t2.field() != fieldInfo.name) {
return false;
}
if (DEBUG_SURROGATES) {
System.out.println(" got term=" + UnicodeUtil.toHexString(t2.text()));
}
// Now test if prefix is identical and we found
// a non-BMP char at the same position:
BytesRef b2 = t2.bytes();
assert b2.offset == 0;
boolean matches;
if (b2.length >= term.length && isNonBMPChar(b2.bytes, pos)) {
matches = true;
for(int i=0;i<pos;i++) {
if (term.bytes[i] != b2.bytes[i]) {
matches = false;
break;
}
}
} else {
matches = false;
}
// Restore term:
term.length = savLength;
term.bytes[pos] = scratch[0];
term.bytes[pos+1] = scratch[1];
term.bytes[pos+2] = scratch[2];
return matches;
}
// Seek type 2 "continue" (back to the start of the
// surrogates): scan the stripped suffix from the
// prior term, backwards. If there was an E in that
// part, then we try to seek back to S. If that
// seek finds a matching term, we go there.
private boolean doContinue() throws IOException {
if (DEBUG_SURROGATES) {
System.out.println(" try cont");
}
int downTo = prevTerm.length-1;
boolean didSeek = false;
final int limit = Math.min(newSuffixStart, scratchTerm.length-1);
while(downTo > limit) {
if (isHighBMPChar(prevTerm.bytes, downTo)) {
if (DEBUG_SURROGATES) {
System.out.println(" found E pos=" + downTo + " vs len=" + prevTerm.length);
}
if (seekToNonBMP(seekTermEnum, prevTerm, downTo)) {
// TODO: more efficient seek?
getTermsDict().seekEnum(termEnum, seekTermEnum.term());
//newSuffixStart = downTo+4;
newSuffixStart = downTo;
scratchTerm.copy(termEnum.term().bytes());
didSeek = true;
if (DEBUG_SURROGATES) {
System.out.println(" seek!");
}
break;
} else {
if (DEBUG_SURROGATES) {
System.out.println(" no seek");
}
}
}
// Shorten prevTerm in place so that we don't redo
// this loop if we come back here:
if ((prevTerm.bytes[downTo] & 0xc0) == 0xc0 || (prevTerm.bytes[downTo] & 0x80) == 0) {
prevTerm.length = downTo;
}
downTo--;
}
return didSeek;
}
// Look for seek type 3 ("pop"): if the delta from
// prev -> current was replacing an S with an E,
// we must now seek to beyond that E. This seek
// "finishes" the dance at this character
// position.
private boolean doPop() throws IOException {
if (DEBUG_SURROGATES) {
System.out.println(" try pop");
}
assert newSuffixStart <= prevTerm.length;
assert newSuffixStart < scratchTerm.length || newSuffixStart == 0;
if (prevTerm.length > newSuffixStart &&
isNonBMPChar(prevTerm.bytes, newSuffixStart) &&
isHighBMPChar(scratchTerm.bytes, newSuffixStart)) {
// Seek type 2 -- put 0xFF at this position:
scratchTerm.bytes[newSuffixStart] = (byte) 0xff;
scratchTerm.length = newSuffixStart+1;
if (DEBUG_SURROGATES) {
System.out.println(" seek to term=" + UnicodeUtil.toHexString(scratchTerm.utf8ToString()) + " " + scratchTerm.toString());
}
// TODO: more efficient seek? can we simply swap
// the enums?
getTermsDict().seekEnum(termEnum, protoTerm.createTerm(scratchTerm));
final Term t2 = termEnum.term();
// We could hit EOF or different field since this
// was a seek "forward":
if (t2 != null && t2.field() == fieldInfo.name) {
if (DEBUG_SURROGATES) {
System.out.println(" got term=" + UnicodeUtil.toHexString(t2.text()) + " " + t2.bytes());
}
final BytesRef b2 = t2.bytes();
assert b2.offset == 0;
// Set newSuffixStart -- we can't use
// termEnum's since the above seek may have
// done no scanning (eg, term was precisely
// and index term, or, was in the term seek
// cache):
scratchTerm.copy(b2);
setNewSuffixStart(prevTerm, scratchTerm);
return true;
} else if (newSuffixStart != 0 || scratchTerm.length != 0) {
if (DEBUG_SURROGATES) {
System.out.println(" got term=null (or next field)");
}
newSuffixStart = 0;
scratchTerm.length = 0;
return true;
}
}
return false;
}
// Pre-flex indices store terms in UTF16 sort order, but
// certain queries require Unicode codepoint order; this
// method carefully seeks around surrogates to handle
// this impedance mismatch
private void surrogateDance() throws IOException {
if (!unicodeSortOrder) {
return;
}
// We are invoked after TIS.next() (by UTF16 order) to
// possibly seek to a different "next" (by unicode
// order) term.
// We scan only the "delta" from the last term to the
// current term, in UTF8 bytes. We look at 1) the bytes
// stripped from the prior term, and then 2) the bytes
// appended to that prior term's prefix.
// We don't care about specific UTF8 sequences, just
// the "category" of the UTF16 character. Category S
// is a high/low surrogate pair (it non-BMP).
// Category E is any BMP char > UNI_SUR_LOW_END (and <
// U+FFFF). Category A is the rest (any unicode char
// <= UNI_SUR_HIGH_START).
// The core issue is that pre-flex indices sort the
// characters as ASE, while flex must sort as AES. So
// when scanning, when we hit S, we must 1) seek
// forward to E and enum the terms there, then 2) seek
// back to S and enum all terms there, then 3) seek to
// after E. Three different seek points (1, 2, 3).
// We can easily detect S in UTF8: if a byte has
// prefix 11110 (0xf0), then that byte and the
// following 3 bytes encode a single unicode codepoint
// in S. Similary,we can detect E: if a byte has
// prefix 1110111 (0xee), then that byte and the
// following 2 bytes encode a single unicode codepoint
// in E.
// Note that this is really a recursive process --
// maybe the char at pos 2 needs to dance, but any
// point in its dance, suddenly pos 4 needs to dance
// so you must finish pos 4 before returning to pos
// 2. But then during pos 4's dance maybe pos 7 needs
// to dance, etc. However, despite being recursive,
// we don't need to hold any state because the state
// can always be derived by looking at prior term &
// current term.
// TODO: can we avoid this copy?
if (termEnum.term() == null || termEnum.term().field() != fieldInfo.name) {
scratchTerm.length = 0;
} else {
scratchTerm.copy(termEnum.term().bytes());
}
if (DEBUG_SURROGATES) {
System.out.println(" dance");
System.out.println(" prev=" + UnicodeUtil.toHexString(prevTerm.utf8ToString()));
System.out.println(" " + prevTerm.toString());
System.out.println(" term=" + UnicodeUtil.toHexString(scratchTerm.utf8ToString()));
System.out.println(" " + scratchTerm.toString());
}
// This code assumes TermInfosReader/SegmentTermEnum
// always use BytesRef.offset == 0
assert prevTerm.offset == 0;
assert scratchTerm.offset == 0;
// Need to loop here because we may need to do multiple
// pops, and possibly a continue in the end, ie:
//
// cont
// pop, cont
// pop, pop, cont
// <nothing>
//
while(true) {
if (doContinue()) {
break;
} else {
if (!doPop()) {
break;
}
}
}
if (DEBUG_SURROGATES) {
System.out.println(" finish bmp ends");
}
doPushes();
}
// Look for seek type 1 ("push"): if the newly added
// suffix contains any S, we must try to seek to the
// corresponding E. If we find a match, we go there;
// else we keep looking for additional S's in the new
// suffix. This "starts" the dance, at this character
// position:
private void doPushes() throws IOException {
int upTo = newSuffixStart;
if (DEBUG_SURROGATES) {
System.out.println(" try push newSuffixStart=" + newSuffixStart + " scratchLen=" + scratchTerm.length);
}
while(upTo < scratchTerm.length) {
if (isNonBMPChar(scratchTerm.bytes, upTo) &&
(upTo > newSuffixStart ||
(upTo >= prevTerm.length ||
(!isNonBMPChar(prevTerm.bytes, upTo) &&
!isHighBMPChar(prevTerm.bytes, upTo))))) {
// A non-BMP char (4 bytes UTF8) starts here:
assert scratchTerm.length >= upTo + 4;
final int savLength = scratchTerm.length;
scratch[0] = scratchTerm.bytes[upTo];
scratch[1] = scratchTerm.bytes[upTo+1];
scratch[2] = scratchTerm.bytes[upTo+2];
scratchTerm.bytes[upTo] = UTF8_HIGH_BMP_LEAD;
scratchTerm.bytes[upTo+1] = (byte) 0x80;
scratchTerm.bytes[upTo+2] = (byte) 0x80;
scratchTerm.length = upTo+3;
if (DEBUG_SURROGATES) {
System.out.println(" try seek 1 pos=" + upTo + " term=" + UnicodeUtil.toHexString(scratchTerm.utf8ToString()) + " " + scratchTerm.toString() + " len=" + scratchTerm.length);
}
// Seek "forward":
// TODO: more efficient seek?
getTermsDict().seekEnum(seekTermEnum, protoTerm.createTerm(scratchTerm));
scratchTerm.bytes[upTo] = scratch[0];
scratchTerm.bytes[upTo+1] = scratch[1];
scratchTerm.bytes[upTo+2] = scratch[2];
scratchTerm.length = savLength;
// Did we find a match?
final Term t2 = seekTermEnum.term();
if (DEBUG_SURROGATES) {
if (t2 == null) {
System.out.println(" hit term=null");
} else {
System.out.println(" hit term=" + UnicodeUtil.toHexString(t2.text()) + " " + (t2==null? null:t2.bytes()));
}
}
// Since this was a seek "forward", we could hit
// EOF or a different field:
boolean matches;
if (t2 != null && t2.field() == fieldInfo.name) {
final BytesRef b2 = t2.bytes();
assert b2.offset == 0;
if (b2.length >= upTo+3 && isHighBMPChar(b2.bytes, upTo)) {
matches = true;
for(int i=0;i<upTo;i++) {
if (scratchTerm.bytes[i] != b2.bytes[i]) {
matches = false;
break;
}
}
} else {
matches = false;
}
} else {
matches = false;
}
if (matches) {
if (DEBUG_SURROGATES) {
System.out.println(" matches!");
}
// OK seek "back"
// TODO: more efficient seek?
getTermsDict().seekEnum(termEnum, seekTermEnum.term());
scratchTerm.copy(seekTermEnum.term().bytes());
// +3 because we don't need to check the char
// at upTo: we know it's > BMP
upTo += 3;
// NOTE: we keep iterating, now, since this
// can easily "recurse". Ie, after seeking
// forward at a certain char position, we may
// find another surrogate in our [new] suffix
// and must then do another seek (recurse)
} else {
upTo++;
}
} else {
upTo++;
}
}
}
private boolean unicodeSortOrder;
void reset(FieldInfo fieldInfo) throws IOException {
//System.out.println("pff.reset te=" + termEnum);
this.fieldInfo = fieldInfo;
protoTerm = new Term(fieldInfo.name);
if (termEnum == null) {
termEnum = getTermsDict().terms(protoTerm);
seekTermEnum = getTermsDict().terms(protoTerm);
//System.out.println(" term=" + termEnum.term());
} else {
getTermsDict().seekEnum(termEnum, protoTerm);
}
skipNext = true;
surrogateSeekUpto = 0;
newSuffixStart = 0;
surrogatesDance();
}
unicodeSortOrder = sortTermsByUnicode();
private void surrogatesDance() throws IOException {
// Tricky: prior to 4.0, Lucene index sorted terms in
// UTF16 order, but as of 4.0 we sort by Unicode code
// point order. These orders differ because of the
// surrrogates; so we have to fixup our enum, here, by
// carefully first seeking past the surrogates and
// then back again at the end. The process is
// recursive, since any given term could have multiple
// new occurrences of surrogate pairs, so we use a
// stack to record the pending seek-backs.
if (DEBUG_SURROGATES) {
System.out.println(" dance start term=" + (termEnum.term() == null ? null : UnicodeUtil.toHexString(termEnum.term().text())));
}
while(popPendingSeek());
while(pushNewSurrogate());
}
// only for debugging
private String getStack() {
if (surrogateSeekUpto == 0) {
return "null";
} else {
StringBuffer sb = new StringBuffer();
for(int i=0;i<surrogateSeekUpto;i++) {
if (i > 0) {
sb.append(' ');
}
sb.append(surrogateSeekPending[i]);
}
sb.append(" pendingSeekText=" + new String(pendingPrefix, 0, surrogateSeekPending[surrogateSeekUpto-1]));
return sb.toString();
}
}
private boolean popPendingSeek() throws IOException {
if (DEBUG_SURROGATES) {
System.out.println(" check pop newSuffix=" + newSuffixStart + " stack=" + getStack());
}
// if a .next() has advanced beyond the
// after-surrogates range we had last seeked to, we
// must seek back to the start and resume .next from
// there. this pops the pending seek off the stack.
final Term t = termEnum.term();
if (surrogateSeekUpto > 0) {
final int seekPrefix = surrogateSeekPending[surrogateSeekUpto-1];
if (DEBUG_SURROGATES) {
System.out.println(" seekPrefix=" + seekPrefix);
}
if (newSuffixStart < seekPrefix) {
assert pendingPrefix != null;
assert pendingPrefix.length > seekPrefix;
pendingPrefix[seekPrefix] = UnicodeUtil.UNI_SUR_HIGH_START;
pendingPrefix[1+seekPrefix] = UnicodeUtil.UNI_SUR_LOW_START;
Term t2 = protoTerm.createTerm(new BytesRef(pendingPrefix, 0, 2+seekPrefix));
if (DEBUG_SURROGATES) {
System.out.println(" do pop; seek back to " + UnicodeUtil.toHexString(t2.text()));
}
getTermsDict().seekEnum(termEnum, t2);
surrogateDidSeekBack[surrogateSeekUpto-1] = true;
// +2 because we don't want to re-check the
// surrogates we just seek'd back to
newSuffixStart = seekPrefix + 2;
return true;
} else if (newSuffixStart == seekPrefix && surrogateDidSeekBack[surrogateSeekUpto-1] && t != null && t.field() == fieldInfo.name && t.text().charAt(seekPrefix) > UnicodeUtil.UNI_SUR_LOW_END) {
assert pendingPrefix != null;
assert pendingPrefix.length > seekPrefix;
pendingPrefix[seekPrefix] = 0xffff;
Term t2 = protoTerm.createTerm(new BytesRef(pendingPrefix, 0, 1+seekPrefix));
if (DEBUG_SURROGATES) {
System.out.println(" finish pop; seek fwd to " + UnicodeUtil.toHexString(t2.text()));
}
getTermsDict().seekEnum(termEnum, t2);
if (DEBUG_SURROGATES) {
System.out.println(" found term=" + (termEnum.term() == null ? null : UnicodeUtil.toHexString(termEnum.term().text())));
}
surrogateSeekUpto--;
if (termEnum.term() == null || termEnum.term().field() != fieldInfo.name) {
// force pop
newSuffixStart = -1;
} else {
newSuffixStart = termEnum.newSuffixStart;
}
return true;
}
if (t != null && t.field() == fieldInfo.name) {
newSuffixStart = 0;
prevTerm.length = 0;
surrogateDance();
}
return false;
}
private UnicodeUtil.UTF16Result termBuffer = new UnicodeUtil.UTF16Result();
private UnicodeUtil.UTF16Result seekBuffer = new UnicodeUtil.UTF16Result();
private boolean pushNewSurrogate() throws IOException {
if (DEBUG_SURROGATES) {
System.out.println(" check push newSuffix=" + newSuffixStart + " stack=" + getStack());
}
final Term t = termEnum.term();
if (t == null || t.field() != fieldInfo.name) {
return false;
}
final BytesRef bytes = t.bytes();
UnicodeUtil.UTF8toUTF16(bytes.bytes, bytes.offset, bytes.length, termBuffer);
for(int i=Math.max(0,newSuffixStart);i<termBuffer.length;i++) {
final char ch = termBuffer.result[i];
if (ch >= UnicodeUtil.UNI_SUR_HIGH_START && ch <= UnicodeUtil.UNI_SUR_HIGH_END && (surrogateSeekUpto == 0 || i > surrogateSeekPending[surrogateSeekUpto-1])) {
if (DEBUG_SURROGATES) {
System.out.println(" found high surr 0x" + Integer.toHexString(ch) + " at pos=" + i);
}
// the next() that we just did read in a new
// suffix, containing a surrogate pair
// seek forward to see if there are any terms with
// this same prefix, but with characters after the
// surrogate range; if so, we must first iterate
// them, then seek back to the surrogates
char[] testPrefix = new char[i+2];
for(int j=0;j<i;j++) {
testPrefix[j] = termBuffer.result[j];
}
testPrefix[i] = 1+UnicodeUtil.UNI_SUR_LOW_END;
getTermsDict().seekEnum(seekTermEnum, protoTerm.createTerm(new BytesRef(testPrefix, 0, i+1)));
Term t2 = seekTermEnum.term();
boolean isPrefix;
if (t2 != null && t2.field() == fieldInfo.name) {
final BytesRef seekBytes = t2.bytes();
UnicodeUtil.UTF8toUTF16(seekBytes.bytes, seekBytes.offset, seekBytes.length, seekBuffer);
isPrefix = true;
if (DEBUG_SURROGATES) {
System.out.println(" seek found " + UnicodeUtil.toHexString(t2.text()));
}
for(int j=0;j<i;j++) {
if (testPrefix[j] != seekBuffer.result[j]) {
isPrefix = false;
break;
}
}
if (DEBUG_SURROGATES && !isPrefix) {
System.out.println(" no end terms");
}
} else {
if (DEBUG_SURROGATES) {
System.out.println(" no end terms");
}
isPrefix = false;
}
if (isPrefix) {
// we found a term, sharing the same prefix,
// with characters after the surrogates, so we
// must first enum those, and then return the
// the surrogates afterwards. push that pending
// seek on the surrogates stack now:
pendingPrefix = testPrefix;
getTermsDict().seekEnum(termEnum, t2);
if (surrogateSeekUpto == surrogateSeekPending.length) {
surrogateSeekPending = ArrayUtil.grow(surrogateSeekPending);
}
if (surrogateSeekUpto == surrogateDidSeekBack.length) {
surrogateDidSeekBack = ArrayUtil.grow(surrogateDidSeekBack);
}
surrogateSeekPending[surrogateSeekUpto] = i;
surrogateDidSeekBack[surrogateSeekUpto] = false;
surrogateSeekUpto++;
if (DEBUG_SURROGATES) {
System.out.println(" do push " + i + "; end term=" + UnicodeUtil.toHexString(t2.text()));
}
newSuffixStart = i+1;
return true;
} else {
// there are no terms after the surrogates, so
// we do nothing to the enum and just step
// through the surrogates like normal. but we
// must keep iterating through the term, in case
// another surrogate pair appears later
}
}
}
return false;
}
@Override
public Comparator<BytesRef> getComparator() {
// Pre-flex indexes always sorted in UTF16 order, but
// we remap on-the-fly to unicode order
return BytesRef.getUTF8SortedAsUnicodeComparator();
if (unicodeSortOrder) {
return BytesRef.getUTF8SortedAsUnicodeComparator();
} else {
return BytesRef.getUTF8SortedAsUTF16Comparator();
}
}
@Override
@ -484,7 +740,7 @@ public class PreFlexFields extends FieldsProducer {
@Override
public SeekStatus seek(BytesRef term, boolean useCache) throws IOException {
if (DEBUG_SURROGATES) {
System.out.println("TE.seek() term=" + term.utf8ToString());
System.out.println("TE.seek target=" + UnicodeUtil.toHexString(term.utf8ToString()));
}
skipNext = false;
final TermInfosReader tis = getTermsDict();
@ -492,50 +748,142 @@ public class PreFlexFields extends FieldsProducer {
assert termEnum != null;
if (termEnum == null) {
termEnum = tis.terms(t0);
} else {
tis.seekEnum(termEnum, t0);
}
surrogateSeekUpto = 0;
surrogatesDance();
tis.seekEnum(termEnum, t0);
final Term t = termEnum.term();
final BytesRef tr = t == null ? null : t.bytes();
if (t != null && t.field() == fieldInfo.name && term.bytesEquals(tr)) {
current = tr;
if (t != null && t.field() == fieldInfo.name && term.bytesEquals(t.bytes())) {
// If we found an exact match, no need to do the
// surrogate dance
if (DEBUG_SURROGATES) {
System.out.println(" seek exact match");
}
current = t.bytes();
return SeekStatus.FOUND;
} else if (t == null || t.field() != fieldInfo.name) {
// TODO: maybe we can handle this like the next()
// into null? set term as prevTerm then dance?
if (DEBUG_SURROGATES) {
System.out.println(" seek hit EOF");
}
// We hit EOF; try end-case surrogate dance: if we
// find an E, try swapping in S, backwards:
scratchTerm.copy(term);
assert scratchTerm.offset == 0;
for(int i=scratchTerm.length-1;i>=0;i--) {
if (isHighBMPChar(scratchTerm.bytes, i)) {
if (DEBUG_SURROGATES) {
System.out.println(" found E pos=" + i + "; try seek");
}
if (seekToNonBMP(seekTermEnum, scratchTerm, i)) {
scratchTerm.copy(seekTermEnum.term().bytes());
getTermsDict().seekEnum(termEnum, seekTermEnum.term());
newSuffixStart = 1+i;
doPushes();
// Found a match
// TODO: faster seek?
current = termEnum.term().bytes();
return SeekStatus.NOT_FOUND;
}
}
}
if (DEBUG_SURROGATES) {
System.out.println(" seek END");
}
current = null;
return SeekStatus.END;
} else {
current = tr;
return SeekStatus.NOT_FOUND;
// We found a non-exact but non-null term; this one
// is fun -- just treat it like next, by pretending
// requested term was prev:
prevTerm.copy(term);
if (DEBUG_SURROGATES) {
System.out.println(" seek hit non-exact term=" + UnicodeUtil.toHexString(t.text()));
}
final BytesRef br = t.bytes();
assert br.offset == 0;
setNewSuffixStart(term, br);
surrogateDance();
final Term t2 = termEnum.term();
if (t2 == null || t2.field() != fieldInfo.name) {
assert t2 == null || !t2.field().equals(fieldInfo.name); // make sure fields are in fact interned
current = null;
return SeekStatus.END;
} else {
current = t2.bytes();
assert !unicodeSortOrder || term.compareTo(current) < 0 : "term=" + UnicodeUtil.toHexString(term.utf8ToString()) + " vs current=" + UnicodeUtil.toHexString(current.utf8ToString());
return SeekStatus.NOT_FOUND;
}
}
}
private void setNewSuffixStart(BytesRef br1, BytesRef br2) {
final int limit = Math.min(br1.length, br2.length);
int lastStart = 0;
for(int i=0;i<limit;i++) {
if ((br1.bytes[br1.offset+i] & 0xc0) == 0xc0 || (br1.bytes[br1.offset+i] & 0x80) == 0) {
lastStart = i;
}
if (br1.bytes[br1.offset+i] != br2.bytes[br2.offset+i]) {
newSuffixStart = lastStart;
if (DEBUG_SURROGATES) {
System.out.println(" set newSuffixStart=" + newSuffixStart);
}
return;
}
}
newSuffixStart = limit;
if (DEBUG_SURROGATES) {
System.out.println(" set newSuffixStart=" + newSuffixStart);
}
}
@Override
public BytesRef next() throws IOException {
if (DEBUG_SURROGATES) {
System.out.println("TE.next() skipNext=" + skipNext);
System.out.println("TE.next()");
}
if (skipNext) {
if (DEBUG_SURROGATES) {
System.out.println(" skipNext=true");
}
skipNext = false;
if (termEnum.term() == null) {
return null;
} else if (termEnum.term().field() != fieldInfo.name) {
return null;
} else {
return current = termEnum.term().bytes();
}
}
// TODO: can we use STE's prevBuffer here?
prevTerm.copy(termEnum.term().bytes());
if (termEnum.next() && termEnum.term().field() == fieldInfo.name) {
newSuffixStart = termEnum.newSuffixStart;
if (DEBUG_SURROGATES) {
System.out.println(" set newSuffixStart=" + newSuffixStart);
System.out.println(" newSuffixStart=" + newSuffixStart);
}
surrogatesDance();
surrogateDance();
final Term t = termEnum.term();
if (t == null || t.field() != fieldInfo.name) {
assert t == null || !t.field().equals(fieldInfo.name); // make sure fields are in fact interned
@ -545,12 +893,15 @@ public class PreFlexFields extends FieldsProducer {
}
return current;
} else {
// This field is exhausted, but we have to give
// surrogateDance a chance to seek back:
if (DEBUG_SURROGATES) {
System.out.println(" force pop");
System.out.println(" force cont");
}
// force pop
newSuffixStart = -1;
surrogatesDance();
//newSuffixStart = prevTerm.length;
newSuffixStart = 0;
surrogateDance();
final Term t = termEnum.term();
if (t == null || t.field() != fieldInfo.name) {
assert t == null || !t.field().equals(fieldInfo.name); // make sure fields are in fact interned
@ -574,20 +925,32 @@ public class PreFlexFields extends FieldsProducer {
@Override
public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException {
if (reuse != null) {
return ((PreDocsEnum) reuse).reset(termEnum, skipDocs);
PreDocsEnum docsEnum;
if (reuse == null || !(reuse instanceof PreDocsEnum)) {
docsEnum = new PreDocsEnum();
} else {
return (new PreDocsEnum()).reset(termEnum, skipDocs);
docsEnum = (PreDocsEnum) reuse;
if (docsEnum.getFreqStream() != freqStream) {
docsEnum = new PreDocsEnum();
}
}
return docsEnum.reset(termEnum, skipDocs);
}
@Override
public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
if (reuse != null) {
return ((PreDocsAndPositionsEnum) reuse).reset(termEnum, skipDocs);
PreDocsAndPositionsEnum docsPosEnum;
if (fieldInfo.omitTermFreqAndPositions) {
return null;
} else if (reuse == null || !(reuse instanceof PreDocsAndPositionsEnum)) {
docsPosEnum = new PreDocsAndPositionsEnum();
} else {
return (new PreDocsAndPositionsEnum()).reset(termEnum, skipDocs);
docsPosEnum = (PreDocsAndPositionsEnum) reuse;
if (docsPosEnum.getFreqStream() != freqStream) {
docsPosEnum = new PreDocsAndPositionsEnum();
}
}
return docsPosEnum.reset(termEnum, skipDocs);
}
}
@ -598,6 +961,10 @@ public class PreFlexFields extends FieldsProducer {
docs = new SegmentTermDocs(freqStream, getTermsDict(), fieldInfos);
}
IndexInput getFreqStream() {
return freqStream;
}
public PreDocsEnum reset(SegmentTermEnum termEnum, Bits skipDocs) throws IOException {
docs.setSkipDocs(skipDocs);
docs.seek(termEnum);
@ -650,6 +1017,10 @@ public class PreFlexFields extends FieldsProducer {
pos = new SegmentTermPositions(freqStream, proxStream, getTermsDict(), fieldInfos);
}
IndexInput getFreqStream() {
return freqStream;
}
public DocsAndPositionsEnum reset(SegmentTermEnum termEnum, Bits skipDocs) throws IOException {
pos.setSkipDocs(skipDocs);
pos.seek(termEnum);

View File

@ -84,26 +84,16 @@ public final class SegmentTermEnum implements Cloneable {
format = firstInt;
// check that it is a format we can understand
if (format > FORMAT_MINIMUM)
throw new IndexFormatTooOldException(null, format, FORMAT_MINIMUM, FORMAT_CURRENT);
if (format < FORMAT_CURRENT)
throw new IndexFormatTooNewException(null, format, FORMAT_MINIMUM, FORMAT_CURRENT);
if (format > FORMAT_MINIMUM)
throw new IndexFormatTooOldException(null, format, FORMAT_MINIMUM, FORMAT_CURRENT);
if (format < FORMAT_CURRENT)
throw new IndexFormatTooNewException(null, format, FORMAT_MINIMUM, FORMAT_CURRENT);
size = input.readLong(); // read the size
if(format == -1){
if (!isIndex) {
indexInterval = input.readInt();
formatM1SkipInterval = input.readInt();
}
// switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in
// skipTo implementation of these versions
skipInterval = Integer.MAX_VALUE;
} else {
indexInterval = input.readInt();
skipInterval = input.readInt();
maxSkipLevels = input.readInt();
}
indexInterval = input.readInt();
skipInterval = input.readInt();
maxSkipLevels = input.readInt();
assert indexInterval > 0: "indexInterval=" + indexInterval + " is negative; must be > 0";
assert skipInterval > 0: "skipInterval=" + skipInterval + " is negative; must be > 0";
}
@ -132,18 +122,21 @@ public final class SegmentTermEnum implements Cloneable {
position = p;
termBuffer.set(t);
prevBuffer.reset();
//System.out.println(" ste doSeek prev=" + prevBuffer.toTerm() + " this=" + this);
termInfo.set(ti);
}
/** Increments the enumeration to the next element. True if one exists.*/
public final boolean next() throws IOException {
prevBuffer.set(termBuffer);
//System.out.println(" ste setPrev=" + prev() + " this=" + this);
if (position++ >= size - 1) {
prevBuffer.set(termBuffer);
termBuffer.reset();
//System.out.println(" EOF");
return false;
}
prevBuffer.set(termBuffer);
termBuffer.read(input, fieldInfos);
newSuffixStart = termBuffer.newSuffixStart;
@ -168,6 +161,7 @@ public final class SegmentTermEnum implements Cloneable {
if (isIndex)
indexPointer += input.readVLong(); // read index pointer
//System.out.println(" ste ret term=" + term());
return true;
}

View File

@ -18,9 +18,10 @@ package org.apache.lucene.index.codecs.preflex;
*/
import java.io.IOException;
import java.util.Comparator;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.FieldInfos;
@ -28,102 +29,65 @@ final class TermBuffer implements Cloneable {
private String field;
private Term term; // cached
private boolean dirty; // true if text was set externally (ie not read via UTF8 bytes)
private UnicodeUtil.UTF16Result text = new UnicodeUtil.UTF16Result();
private BytesRef bytes = new BytesRef(10);
int newSuffixStart;
private static final Comparator<BytesRef> utf8AsUTF16Comparator = BytesRef.getUTF8SortedAsUTF16Comparator();
public final int compareTo(TermBuffer other) {
int newSuffixStart; // only valid right after .read is called
public int compareTo(TermBuffer other) {
if (field == other.field) // fields are interned
return compareChars(text.result, text.length, other.text.result, other.text.length);
return utf8AsUTF16Comparator.compare(bytes, other.bytes);
else
return field.compareTo(other.field);
}
private static int compareChars(char[] chars1, int len1,
char[] chars2, int len2) {
final int end = len1 < len2 ? len1:len2;
for (int k = 0; k < end; k++) {
char c1 = chars1[k];
char c2 = chars2[k];
if (c1 != c2) {
return c1 - c2;
}
}
return len1 - len2;
}
public final void read(IndexInput input, FieldInfos fieldInfos)
public void read(IndexInput input, FieldInfos fieldInfos)
throws IOException {
this.term = null; // invalidate cache
int start = input.readVInt();
newSuffixStart = input.readVInt();
int length = input.readVInt();
int totalLength = start + length;
int totalLength = newSuffixStart + length;
if (bytes.bytes.length < totalLength) {
bytes.grow(totalLength);
}
if (dirty) {
// Fully convert all bytes since bytes is dirty
UnicodeUtil.UTF16toUTF8(text.result, 0, text.length, bytes);
bytes.length = totalLength;
input.readBytes(bytes.bytes, start, length);
UnicodeUtil.UTF8toUTF16(bytes.bytes, 0, totalLength, text);
dirty = false;
} else {
// Incrementally convert only the UTF8 bytes that are new:
bytes.length = totalLength;
input.readBytes(bytes.bytes, start, length);
UnicodeUtil.UTF8toUTF16(bytes.bytes, start, length, text);
}
while(true) {
newSuffixStart = text.offsets[start];
if (newSuffixStart != -1) {
break;
}
if (--start == 0) {
newSuffixStart = 0;
break;
}
}
bytes.length = totalLength;
input.readBytes(bytes.bytes, newSuffixStart, length);
this.field = fieldInfos.fieldName(input.readVInt());
}
public final void set(Term term) {
public void set(Term term) {
if (term == null) {
reset();
return;
}
final BytesRef termBytes = term.bytes();
UnicodeUtil.UTF8toUTF16(termBytes.bytes, termBytes.offset, termBytes.length, text);
dirty = true;
bytes.copy(term.bytes());
field = term.field();
this.term = term;
}
public final void set(TermBuffer other) {
text.copyText(other.text);
dirty = true;
public void set(TermBuffer other) {
field = other.field;
term = other.term;
// dangerous to copy Term over, since the underlying
// BytesRef could subsequently be modified:
term = null;
bytes.copy(other.bytes);
}
public void reset() {
field = null;
text.setLength(0);
term = null;
dirty = true;
}
public Term toTerm() {
if (field == null) // unset
return null;
if (term == null)
term = new Term(field, new BytesRef(text.result, 0, text.length), false);
if (term == null) {
term = new Term(field, new BytesRef(bytes), false);
//term = new Term(field, bytes, false);
}
return term;
}
@ -134,12 +98,7 @@ final class TermBuffer implements Cloneable {
try {
clone = (TermBuffer)super.clone();
} catch (CloneNotSupportedException e) {}
clone.dirty = true;
clone.bytes = new BytesRef(10);
clone.text = new UnicodeUtil.UTF16Result();
clone.text.offsets = new int[text.offsets.length];
System.arraycopy(text.offsets, 0, clone.text.offsets, 0, text.offsets.length);
clone.text.copyText(text);
clone.bytes = new BytesRef(bytes);
return clone;
}
}

View File

@ -23,30 +23,30 @@ package org.apache.lucene.index.codecs.preflex;
* indexing. */
@Deprecated
class TermInfo {
public class TermInfo {
/** The number of documents which contain the term. */
int docFreq = 0;
public int docFreq = 0;
long freqPointer = 0;
long proxPointer = 0;
int skipOffset;
public long freqPointer = 0;
public long proxPointer = 0;
public int skipOffset;
TermInfo() {}
public TermInfo() {}
TermInfo(int df, long fp, long pp) {
public TermInfo(int df, long fp, long pp) {
docFreq = df;
freqPointer = fp;
proxPointer = pp;
}
TermInfo(TermInfo ti) {
public TermInfo(TermInfo ti) {
docFreq = ti.docFreq;
freqPointer = ti.freqPointer;
proxPointer = ti.proxPointer;
skipOffset = ti.skipOffset;
}
final void set(int docFreq,
public final void set(int docFreq,
long freqPointer, long proxPointer, int skipOffset) {
this.docFreq = docFreq;
this.freqPointer = freqPointer;
@ -54,7 +54,7 @@ class TermInfo {
this.skipOffset = skipOffset;
}
final void set(TermInfo ti) {
public final void set(TermInfo ti) {
docFreq = ti.docFreq;
freqPointer = ti.freqPointer;
proxPointer = ti.proxPointer;

View File

@ -119,9 +119,12 @@ public final class TermInfosReader {
indexTerms = new Term[indexSize];
indexInfos = new TermInfo[indexSize];
indexPointers = new long[indexSize];
for (int i = 0; indexEnum.next(); i++) {
for (int i=0;indexEnum.next(); i++) {
indexTerms[i] = indexEnum.term();
assert indexTerms[i] != null;
assert indexTerms[i].text() != null;
assert indexTerms[i].field() != null;
indexInfos[i] = indexEnum.termInfo();
indexPointers[i] = indexEnum.indexPointer;
@ -160,14 +163,14 @@ public final class TermInfosReader {
return origEnum.maxSkipLevels;
}
final void close() throws IOException {
void close() throws IOException {
if (origEnum != null)
origEnum.close();
threadResources.close();
}
/** Returns the number of term/value pairs in the set. */
final long size() {
long size() {
return size;
}
@ -183,12 +186,13 @@ public final class TermInfosReader {
/** Returns the offset of the greatest index entry which is less than or equal to term.*/
private final int getIndexOffset(Term term) {
private int getIndexOffset(Term term) {
int lo = 0; // binary search indexTerms[]
int hi = indexTerms.length - 1;
while (hi >= lo) {
int mid = (lo + hi) >>> 1;
assert indexTerms[mid] != null : "indexTerms = " + indexTerms.length + " mid=" + mid;
int delta = term.compareToUTF16(indexTerms[mid]);
if (delta < 0)
hi = mid - 1;
@ -200,7 +204,7 @@ public final class TermInfosReader {
return hi;
}
private final void seekEnum(SegmentTermEnum enumerator, int indexOffset) throws IOException {
private void seekEnum(SegmentTermEnum enumerator, int indexOffset) throws IOException {
enumerator.seek(indexPointers[indexOffset],
((long) indexOffset * totalIndexInterval) - 1,
indexTerms[indexOffset], indexInfos[indexOffset]);
@ -231,6 +235,9 @@ public final class TermInfosReader {
}
TermInfo seekEnum(SegmentTermEnum enumerator, Term term, TermInfoAndOrd tiOrd) throws IOException {
if (size == 0) {
return null;
}
// optimize sequential access: first try scanning cached enum w/o seeking
if (enumerator.term() != null // term is at or past current
@ -242,7 +249,6 @@ public final class TermInfosReader {
// no need to seek
final TermInfo ti;
int numScans = enumerator.scanTo(term);
if (enumerator.term() != null && term.compareToUTF16(enumerator.term()) == 0) {
ti = enumerator.termInfo();
@ -279,6 +285,7 @@ public final class TermInfosReader {
seekEnum(enumerator, indexPos);
enumerator.scanTo(term);
final TermInfo ti;
if (enumerator.term() != null && term.compareToUTF16(enumerator.term()) == 0) {
ti = enumerator.termInfo();
if (tiOrd == null) {
@ -294,7 +301,7 @@ public final class TermInfosReader {
}
// called only from asserts
private final boolean sameTermInfo(TermInfo ti1, TermInfo ti2, SegmentTermEnum enumerator) {
private boolean sameTermInfo(TermInfo ti1, TermInfo ti2, SegmentTermEnum enumerator) {
if (ti1.docFreq != ti2.docFreq) {
return false;
}
@ -319,7 +326,7 @@ public final class TermInfosReader {
}
/** Returns the position of a Term in the set or -1. */
final long getPosition(Term term) throws IOException {
long getPosition(Term term) throws IOException {
if (size == 0) return -1;
ensureIndexIsRead();

View File

@ -331,12 +331,17 @@ public final class BytesRef implements Comparable<BytesRef>, Externalizable {
// We know the terms are not equal, but, we may
// have to carefully fixup the bytes at the
// difference to match UTF16's sort order:
// NOTE: instead of moving supplementary code points (0xee and 0xef) to the unused 0xfe and 0xff,
// we move them to the unused 0xfc and 0xfd [reserved for future 6-byte character sequences]
// this reserves 0xff for preflex's term reordering (surrogate dance), and if unicode grows such
// that 6-byte sequences are needed we have much bigger problems anyway.
if (aByte >= 0xee && bByte >= 0xee) {
if ((aByte & 0xfe) == 0xee) {
aByte += 0x10;
aByte += 0xe;
}
if ((bByte&0xfe) == 0xee) {
bByte += 0x10;
bByte += 0xe;
}
}
return aByte - bByte;
@ -346,10 +351,6 @@ public final class BytesRef implements Comparable<BytesRef>, Externalizable {
// One is a prefix of the other, or, they are equal:
return a.length - b.length;
}
public boolean equals(Object other) {
return this == other;
}
}
public void writeExternal(ObjectOutput out)

View File

@ -2,9 +2,7 @@ package org.apache.lucene.document;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.store.MockRAMDirectory;
@ -58,8 +56,7 @@ public class TestBinaryDocument extends LuceneTestCase {
/** add the doc to a ram index */
MockRAMDirectory dir = new MockRAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir);
writer.addDocument(doc);
/** open a reader and fetch the document */
@ -98,8 +95,7 @@ public class TestBinaryDocument extends LuceneTestCase {
/** add the doc to a ram index */
MockRAMDirectory dir = new MockRAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir);
writer.addDocument(doc);
/** open a reader and fetch the document */

View File

@ -1,8 +1,6 @@
package org.apache.lucene.document;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
@ -155,8 +153,7 @@ public class TestDocument extends LuceneTestCase {
*/
public void testGetValuesForIndexedDocument() throws Exception {
RAMDirectory dir = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir);
writer.addDocument(makeDocumentWithFields());
IndexReader reader = writer.getReader();
@ -234,8 +231,7 @@ public class TestDocument extends LuceneTestCase {
Field.Index.NOT_ANALYZED));
RAMDirectory dir = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir);
writer.addDocument(doc);
field.setValue("id2");
writer.addDocument(doc);

View File

@ -17,20 +17,18 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import java.util.Random;
import java.io.Closeable;
import java.io.IOException;
import java.util.Random;
import org.apache.lucene.util._TestUtil;
import org.apache.lucene.store.Directory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.codecs.intblock.IntBlockCodec;
import org.apache.lucene.index.codecs.preflex.PreFlexCodec;
import org.apache.lucene.index.codecs.pulsing.PulsingCodec;
import org.apache.lucene.index.codecs.sep.SepCodec;
import org.apache.lucene.index.codecs.standard.StandardCodec;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCaseJ4;
import org.apache.lucene.util.Version;
import org.apache.lucene.util._TestUtil;
/** Silly class that randomizes the indexing experience. EG
* it may swap in a different merge policy/scheduler; may
@ -45,32 +43,48 @@ public class RandomIndexWriter implements Closeable {
int docCount;
int flushAt;
// Randomly calls Thread.yield so we mixup thread scheduling
private static final class MockIndexWriter extends IndexWriter {
private final Random r;
public MockIndexWriter(Random r,Directory dir, IndexWriterConfig conf) throws IOException {
super(dir, conf);
this.r = r;
}
@Override
boolean testPoint(String name) {
if (r.nextInt(4) == 2)
Thread.yield();
return true;
}
}
/** create a RandomIndexWriter with a random config: Uses TEST_VERSION_CURRENT and MockAnalyzer */
public RandomIndexWriter(Random r, Directory dir) throws IOException {
this(r, dir, LuceneTestCaseJ4.newIndexWriterConfig(r, LuceneTestCaseJ4.TEST_VERSION_CURRENT, new MockAnalyzer()));
}
/** create a RandomIndexWriter with a random config: Uses TEST_VERSION_CURRENT */
public RandomIndexWriter(Random r, Directory dir, Analyzer a) throws IOException {
this(r, dir, LuceneTestCaseJ4.newIndexWriterConfig(r, LuceneTestCaseJ4.TEST_VERSION_CURRENT, a));
}
/** create a RandomIndexWriter with a random config */
public RandomIndexWriter(Random r, Directory dir, Version v, Analyzer a) throws IOException {
this(r, dir, LuceneTestCaseJ4.newIndexWriterConfig(r, v, a));
}
/** create a RandomIndexWriter with the provided config */
public RandomIndexWriter(Random r, Directory dir, IndexWriterConfig c) throws IOException {
this.r = r;
if (r.nextBoolean()) {
c.setMergePolicy(new LogDocMergePolicy());
}
if (r.nextBoolean()) {
c.setMergeScheduler(new SerialMergeScheduler());
}
if (r.nextBoolean()) {
c.setMaxBufferedDocs(_TestUtil.nextInt(r, 2, 1000));
}
if (r.nextBoolean()) {
c.setTermIndexInterval(_TestUtil.nextInt(r, 1, 1000));
}
if (c.getMergePolicy() instanceof LogMergePolicy) {
LogMergePolicy logmp = (LogMergePolicy) c.getMergePolicy();
logmp.setUseCompoundDocStore(r.nextBoolean());
logmp.setUseCompoundFile(r.nextBoolean());
logmp.setCalibrateSizeByDeletes(r.nextBoolean());
}
c.setReaderPooling(r.nextBoolean());
c.setCodecProvider(new RandomCodecProvider(r));
w = new IndexWriter(dir, c);
w = new MockIndexWriter(r, dir, c);
flushAt = _TestUtil.nextInt(r, 10, 1000);
if (LuceneTestCaseJ4.VERBOSE) {
System.out.println("RIW config=" + w.getConfig());
System.out.println("codec default=" + CodecProvider.getDefaultCodec());
}
}
public void addDocument(Document doc) throws IOException {
@ -89,14 +103,27 @@ public class RandomIndexWriter implements Closeable {
w.deleteDocuments(term);
}
public void commit() throws CorruptIndexException, IOException {
w.commit();
}
public int maxDoc() {
return w.maxDoc();
}
public IndexReader getReader() throws IOException {
if (r.nextBoolean()) {
// If we are writing with PreFlexRW, force a full
// IndexReader.open so terms are sorted in codepoint
// order during searching:
if (!w.codecs.getWriter(null).name.equals("PreFlex") && r.nextBoolean()) {
if (LuceneTestCaseJ4.VERBOSE) {
System.out.println("RIW.getReader: use NRT reader");
}
return w.getReader();
} else {
if (LuceneTestCaseJ4.VERBOSE) {
System.out.println("RIW.getReader: open new reader");
}
w.commit();
return IndexReader.open(w.getDirectory(), new KeepOnlyLastCommitDeletionPolicy(), r.nextBoolean(), _TestUtil.nextInt(r, 1, 10));
}
@ -112,22 +139,4 @@ public class RandomIndexWriter implements Closeable {
public void optimize() throws IOException {
w.optimize();
}
class RandomCodecProvider extends CodecProvider {
final String codec;
RandomCodecProvider(Random random) {
register(new StandardCodec());
register(new IntBlockCodec());
register(new PreFlexCodec());
register(new PulsingCodec());
register(new SepCodec());
codec = CodecProvider.CORE_CODECS[random.nextInt(CodecProvider.CORE_CODECS.length)];
}
@Override
public Codec getWriter(SegmentWriteState state) {
return lookup(codec);
}
}
}

View File

@ -19,7 +19,6 @@ package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
@ -139,7 +138,6 @@ public class TestAddIndexes extends LuceneTestCase {
setUpDirs(dir, aux);
IndexWriter writer = newWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND));
writer.addIndexes(new Directory[] {aux});
// Adds 10 docs, then replaces them with another 10

View File

@ -493,14 +493,21 @@ public class TestCodecs extends MultiCodecTestCase {
// Test random seek by ord:
final int idx = TestCodecs.this.nextInt(field.terms.length);
term = field.terms[idx];
status = termsEnum.seek(idx);
assertEquals(status, TermsEnum.SeekStatus.FOUND);
assertTrue(termsEnum.term().bytesEquals(new BytesRef(term.text2)));
assertEquals(term.docs.length, termsEnum.docFreq());
if (field.omitTF) {
this.verifyDocs(term.docs, term.positions, termsEnum.docs(null, null), false);
} else {
this.verifyDocs(term.docs, term.positions, termsEnum.docsAndPositions(null, null), true);
try {
status = termsEnum.seek(idx);
} catch (UnsupportedOperationException uoe) {
// ok -- skip it
status = null;
}
if (status != null) {
assertEquals(status, TermsEnum.SeekStatus.FOUND);
assertTrue(termsEnum.term().bytesEquals(new BytesRef(term.text2)));
assertEquals(term.docs.length, termsEnum.docFreq());
if (field.omitTF) {
this.verifyDocs(term.docs, term.positions, termsEnum.docs(null, null), false);
} else {
this.verifyDocs(term.docs, term.positions, termsEnum.docsAndPositions(null, null), true);
}
}
// Test seek to non-existent terms:
@ -520,9 +527,12 @@ public class TestCodecs extends MultiCodecTestCase {
// Seek to each term by ord, backwards
for(int i=field.terms.length-1;i>=0;i--) {
assertEquals(Thread.currentThread().getName() + ": field=" + field.fieldInfo.name + " term=" + field.terms[i].text2, TermsEnum.SeekStatus.FOUND, termsEnum.seek(i));
assertEquals(field.terms[i].docs.length, termsEnum.docFreq());
assertTrue(termsEnum.term().bytesEquals(new BytesRef(field.terms[i].text2)));
try {
assertEquals(Thread.currentThread().getName() + ": field=" + field.fieldInfo.name + " term=" + field.terms[i].text2, TermsEnum.SeekStatus.FOUND, termsEnum.seek(i));
assertEquals(field.terms[i].docs.length, termsEnum.docFreq());
assertTrue(termsEnum.term().bytesEquals(new BytesRef(field.terms[i].text2)));
} catch (UnsupportedOperationException uoe) {
}
}
// Seek to non-existent empty-string term

View File

@ -20,6 +20,8 @@ package org.apache.lucene.index;
import java.io.*;
import java.util.*;
import org.apache.lucene.store.*;
import org.apache.lucene.index.codecs.*;
import org.apache.lucene.index.codecs.standard.*;
import org.apache.lucene.search.*;
import org.apache.lucene.analysis.*;
import org.apache.lucene.document.*;
@ -64,7 +66,8 @@ public class TestFlex extends LuceneTestCase {
public void testTermOrd() throws Exception {
Directory d = new MockRAMDirectory();
IndexWriter w = new IndexWriter(d, new MockAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
IndexWriter w = new IndexWriter(d, new IndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec("Standard")));
Document doc = new Document();
doc.add(new Field("f", "a b c", Field.Store.NO, Field.Index.ANALYZED));
w.addDocument(doc);

View File

@ -1675,7 +1675,7 @@ public class TestIndexReader extends LuceneTestCase
// LUCENE-1586: getUniqueTermCount
public void testUniqueTermCount() throws Exception {
Directory dir = new MockRAMDirectory();
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec("Standard")));
Document doc = new Document();
doc.add(new Field("field", "a b c d e f g h i j k l m n o p q r s t u v w x y z", Field.Store.NO, Field.Index.ANALYZED));
doc.add(new Field("number", "0 1 2 3 4 5 6 7 8 9", Field.Store.NO, Field.Index.ANALYZED));
@ -1708,7 +1708,7 @@ public class TestIndexReader extends LuceneTestCase
// LUCENE-1609: don't load terms index
public void testNoTermsIndex() throws Throwable {
Directory dir = new MockRAMDirectory();
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec("Standard")));
Document doc = new Document();
doc.add(new Field("field", "a b c d e f g h i j k l m n o p q r s t u v w x y z", Field.Store.NO, Field.Index.ANALYZED));
doc.add(new Field("number", "0 1 2 3 4 5 6 7 8 9", Field.Store.NO, Field.Index.ANALYZED));
@ -1725,7 +1725,7 @@ public class TestIndexReader extends LuceneTestCase
}
assertEquals(-1, ((SegmentReader) r.getSequentialSubReaders()[0]).getTermInfosIndexDivisor());
writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec("Standard")));
writer.addDocument(doc);
writer.close();

View File

@ -4559,7 +4559,7 @@ public class TestIndexWriter extends LuceneTestCase {
dir.close();
}
// LUCENE-2095: make sure with multiple threads commit
// LUCENE-2095: make sure with multiple threads commit
// doesn't return until all changes are in fact in the
// index
public void testCommitThreadSafety() throws Throwable {
@ -4670,16 +4670,16 @@ public class TestIndexWriter extends LuceneTestCase {
}
// Make sure terms, including ones with surrogate pairs,
// sort in UTF16 sort order by default
// sort in codepoint sort order by default
public void testTermUTF16SortOrder() throws Throwable {
Random rnd = newRandom();
Directory dir = new MockRAMDirectory();
IndexWriter writer = new IndexWriter(dir, new MockAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
RandomIndexWriter writer = new RandomIndexWriter(rnd, dir);
Document d = new Document();
// Single segment
Field f = new Field("f", "", Field.Store.NO, Field.Index.NOT_ANALYZED);
d.add(f);
char[] chars = new char[2];
Random rnd = newRandom();
final Set<String> allTerms = new HashSet<String>();
for(int i=0;i<200*_TestUtil.getRandomMultiplier();i++) {
@ -4705,14 +4705,13 @@ public class TestIndexWriter extends LuceneTestCase {
allTerms.add(s);
f.setValue(s);
//System.out.println("add " + termDesc(s));
writer.addDocument(d);
if ((1+i) % 42 == 0) {
writer.commit();
}
}
IndexReader r = writer.getReader();
// Test each sub-segment

View File

@ -394,18 +394,18 @@ public class TestIndexWriterDelete extends LuceneTestCase {
}
public void testDeletesOnDiskFull() throws IOException {
testOperationsOnDiskFull(false);
doTestOperationsOnDiskFull(false);
}
public void testUpdatesOnDiskFull() throws IOException {
testOperationsOnDiskFull(true);
doTestOperationsOnDiskFull(true);
}
/**
* Make sure if modifier tries to commit but hits disk full that modifier
* remains consistent and usable. Similar to TestIndexReader.testDiskFull().
*/
private void testOperationsOnDiskFull(boolean updates) throws IOException {
private void doTestOperationsOnDiskFull(boolean updates) throws IOException {
Term searchTerm = new Term("content", "aaa");
int START_COUNT = 157;
@ -700,6 +700,7 @@ public class TestIndexWriterDelete extends LuceneTestCase {
try {
modifier.commit();
} catch (IOException ioe) {
// expected
failed = true;
}

View File

@ -27,11 +27,12 @@ public class TestMultiFields extends LuceneTestCase {
public void testRandom() throws Exception {
Random r = newRandom();
for(int iter=0;iter<2*_TestUtil.getRandomMultiplier();iter++) {
Directory dir = new MockRAMDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(NoMergePolicy.COMPOUND_FILES));
Random r = new Random();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(NoMergePolicy.COMPOUND_FILES));
Map<BytesRef,List<Integer>> docs = new HashMap<BytesRef,List<Integer>>();
Set<Integer> deleted = new HashSet<Integer>();
@ -45,7 +46,7 @@ public class TestMultiFields extends LuceneTestCase {
doc.add(id);
boolean onlyUniqueTerms = r.nextBoolean();
Set<BytesRef> uniqueTerms = new HashSet<BytesRef>();
for(int i=0;i<numDocs;i++) {
if (!onlyUniqueTerms && r.nextBoolean() && terms.size() > 0) {
@ -61,6 +62,7 @@ public class TestMultiFields extends LuceneTestCase {
}
docs.get(term).add(i);
terms.add(term);
uniqueTerms.add(term);
f.setValue(s);
}
id.setValue(""+i);
@ -75,8 +77,18 @@ public class TestMultiFields extends LuceneTestCase {
}
}
if (VERBOSE) {
List<BytesRef> termsList = new ArrayList<BytesRef>(uniqueTerms);
Collections.sort(termsList, BytesRef.getUTF8SortedAsUTF16Comparator());
System.out.println("UTF16 order:");
for(BytesRef b : termsList) {
System.out.println(" " + UnicodeUtil.toHexString(b.utf8ToString()));
}
}
IndexReader reader = w.getReader();
w.close();
//System.out.println("TEST reader=" + reader);
Bits delDocs = MultiFields.getDeletedDocs(reader);
for(int delDoc : deleted) {

View File

@ -31,7 +31,7 @@ public class TestRollback extends LuceneTestCase {
// LUCENE-2536
public void testRollbackIntegrityWithBufferFlush() throws Exception {
Directory dir = new MockRAMDirectory();
RandomIndexWriter rw = new RandomIndexWriter(newRandom(), dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter rw = new RandomIndexWriter(newRandom(), dir);
for (int i = 0; i < 5; i++) {
Document doc = new Document();
doc.add(new Field("pk", Integer.toString(i), Store.YES, Index.ANALYZED_NO_NORMS));

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util._TestUtil;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@ -64,7 +65,7 @@ public class TestSegmentTermEnum extends LuceneTestCase {
public void testPrevTermAtEnd() throws IOException
{
Directory dir = new MockRAMDirectory();
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec("Standard")));
addDoc(writer, "aaa bbb");
writer.close();
SegmentReader reader = SegmentReader.getOnlySegmentReader(dir);

View File

@ -68,10 +68,10 @@ public class TestStressIndexing2 extends MultiCodecTestCase {
// TODO: verify equals using IW.getReader
DocsAndWriter dw = indexRandomIWReader(5, 3, 100, dir);
IndexReader r = dw.writer.getReader();
IndexReader reader = dw.writer.getReader();
dw.writer.commit();
verifyEquals(r, dir, "id");
r.close();
verifyEquals(r, reader, dir, "id");
reader.close();
dw.writer.close();
dir.close();
}
@ -261,8 +261,8 @@ public class TestStressIndexing2 extends MultiCodecTestCase {
w.close();
}
public static void verifyEquals(IndexReader r1, Directory dir2, String idField) throws Throwable {
IndexReader r2 = IndexReader.open(dir2, true);
public static void verifyEquals(Random r, IndexReader r1, Directory dir2, String idField) throws Throwable {
IndexReader r2 = IndexReader.open(dir2);
verifyEquals(r1, r2, idField);
r2.close();
}

View File

@ -18,8 +18,10 @@ package org.apache.lucene.index.codecs.preflex;
*/
import org.apache.lucene.store.*;
import org.apache.lucene.document.*;
import org.apache.lucene.analysis.*;
import org.apache.lucene.index.*;
import org.apache.lucene.index.codecs.*;
import org.apache.lucene.index.codecs.preflexrw.PreFlexRWCodec;
import org.apache.lucene.util.*;
import java.util.*;
@ -30,8 +32,6 @@ import org.junit.Test;
public class TestSurrogates extends LuceneTestCaseJ4 {
// chooses from a very limited alphabet to exacerbate the
// surrogate seeking required
private static String makeDifficultRandomUnicodeString(Random r) {
final int end = r.nextInt(20);
if (end == 0) {
@ -44,154 +44,297 @@ public class TestSurrogates extends LuceneTestCaseJ4 {
if (0 == t && i < end - 1) {
// hi
buffer[i++] = (char) 0xd800;
buffer[i++] = (char) (0xd800 + r.nextInt(2));
// lo
buffer[i] = (char) 0xdc00;
buffer[i] = (char) (0xdc00 + r.nextInt(2));
} else if (t <= 3) {
buffer[i] = 'a';
buffer[i] = (char) ('a' + r.nextInt(2));
} else if (4 == t) {
buffer[i] = 0xe000;
buffer[i] = (char) (0xe000 + r.nextInt(2));
}
}
return new String(buffer, 0, end);
}
private SegmentInfo makePreFlexSegment(Random r, String segName, Directory dir, FieldInfos fieldInfos, Codec codec, List<Term> fieldTerms) throws IOException {
final int numField = _TestUtil.nextInt(r, 2, 5);
List<Term> terms = new ArrayList<Term>();
int tc = 0;
for(int f=0;f<numField;f++) {
String field = "f" + f;
Term protoTerm = new Term(field);
fieldInfos.add(field, true, false, false, false, false, false, false);
final int numTerms = 10000*_TestUtil.getRandomMultiplier();
for(int i=0;i<numTerms;i++) {
String s;
if (r.nextInt(3) == 1) {
s = makeDifficultRandomUnicodeString(r);
} else {
s = _TestUtil.randomUnicodeString(r);
// The surrogate dance uses 0xffff to seek-to-end
// of blocks. Also, pre-4.0 indices are already
// guaranteed to not contain the char 0xffff since
// it's mapped during indexing:
s = s.replace((char) 0xffff, (char) 0xfffe);
}
terms.add(protoTerm.createTerm(s + "_" + (tc++)));
}
}
fieldInfos.write(dir, segName);
// sorts in UTF16 order, just like preflex:
Collections.sort(terms, new Comparator<Term>() {
public int compare(Term o1, Term o2) {
return o1.compareToUTF16(o2);
}
});
TermInfosWriter w = new TermInfosWriter(dir, segName, fieldInfos, 128);
TermInfo ti = new TermInfo();
String lastText = null;
int uniqueTermCount = 0;
if (VERBOSE) {
System.out.println("TEST: utf16 order:");
}
for(Term t : terms) {
FieldInfo fi = fieldInfos.fieldInfo(t.field());
String text = t.text();
if (lastText != null && lastText.equals(text)) {
continue;
}
fieldTerms.add(t);
uniqueTermCount++;
lastText = text;
if (VERBOSE) {
System.out.println(" " + toHexString(t));
}
w.add(fi.number, t.bytes().bytes, t.bytes().length, ti);
}
w.close();
Collections.sort(fieldTerms);
if (VERBOSE) {
System.out.println("\nTEST: codepoint order");
for(Term t: fieldTerms) {
System.out.println(" " + t.field() + ":" + toHexString(t));
}
}
dir.createOutput(segName + ".prx").close();
dir.createOutput(segName + ".frq").close();
// !!hack alert!! stuffing uniqueTermCount in as docCount
return new SegmentInfo(segName, uniqueTermCount, dir, false, -1, null, false, true, codec);
}
private String toHexString(Term t) {
return t.field() + ":" + UnicodeUtil.toHexString(t.text());
}
@Test
public void testSurrogatesOrder() throws Exception {
Directory dir = new MockRAMDirectory();
Codec codec = new PreFlexCodec();
private String getRandomString(Random r) {
String s;
if (r.nextInt(5) == 1) {
if (r.nextInt(3) == 1) {
s = makeDifficultRandomUnicodeString(r);
} else {
s = _TestUtil.randomUnicodeString(r);
}
} else {
s = _TestUtil.randomRealisticUnicodeString(r);
}
return s;
}
Random r = newRandom();
FieldInfos fieldInfos = new FieldInfos();
List<Term> fieldTerms = new ArrayList<Term>();
SegmentInfo si = makePreFlexSegment(r, "_0", dir, fieldInfos, codec, fieldTerms);
private static class SortTermAsUTF16Comparator implements Comparator<Term> {
public int compare(Term o1, Term o2) {
return o1.compareToUTF16(o2);
}
}
// hack alert!!
int uniqueTermCount = si.docCount;
private static final SortTermAsUTF16Comparator termAsUTF16Comparator = new SortTermAsUTF16Comparator();
FieldsProducer fields = codec.fieldsProducer(new SegmentReadState(dir, si, fieldInfos, 1024, 1));
assertNotNull(fields);
// single straight enum
private void doTestStraightEnum(List<Term> fieldTerms, IndexReader reader, int uniqueTermCount) throws IOException {
if (VERBOSE) {
System.out.println("\nTEST: now enum");
System.out.println("\nTEST: top now enum reader=" + reader);
}
FieldsEnum fieldsEnum = fields.iterator();
String field;
UnicodeUtil.UTF16Result utf16 = new UnicodeUtil.UTF16Result();
FieldsEnum fieldsEnum = MultiFields.getFields(reader).iterator();
int termCount = 0;
while((field = fieldsEnum.next()) != null) {
TermsEnum termsEnum = fieldsEnum.terms();
BytesRef text;
BytesRef lastText = null;
while((text = termsEnum.next()) != null) {
{
// Test straight enum:
String field;
int termCount = 0;
while((field = fieldsEnum.next()) != null) {
TermsEnum termsEnum = fieldsEnum.terms();
BytesRef text;
BytesRef lastText = null;
while((text = termsEnum.next()) != null) {
Term exp = fieldTerms.get(termCount);
if (VERBOSE) {
System.out.println(" got term=" + field + ":" + UnicodeUtil.toHexString(text.utf8ToString()));
System.out.println(" exp=" + exp.field() + ":" + UnicodeUtil.toHexString(exp.text().toString()));
System.out.println();
}
if (lastText == null) {
lastText = new BytesRef(text);
} else {
assertTrue(lastText.compareTo(text) < 0);
lastText.copy(text);
}
assertEquals(exp.field(), field);
assertEquals(exp.bytes(), text);
termCount++;
}
if (VERBOSE) {
UnicodeUtil.UTF8toUTF16(text.bytes, text.offset, text.length, utf16);
System.out.println("got term=" + field + ":" + UnicodeUtil.toHexString(new String(utf16.result, 0, utf16.length)));
System.out.println();
System.out.println(" no more terms for field=" + field);
}
if (lastText == null) {
lastText = new BytesRef(text);
} else {
assertTrue(lastText.compareTo(text) < 0);
lastText.copy(text);
}
assertEquals(fieldTerms.get(termCount).field(), field);
assertEquals(fieldTerms.get(termCount).bytes(), text);
termCount++;
}
assertEquals(uniqueTermCount, termCount);
}
}
// randomly seeks to term that we know exists, then next's
// from there
private void doTestSeekExists(Random r, List<Term> fieldTerms, IndexReader reader) throws IOException {
final Map<String,TermsEnum> tes = new HashMap<String,TermsEnum>();
// Test random seek to existing term, then enum:
if (VERBOSE) {
System.out.println("\nTEST: top now seek");
}
for(int iter=0;iter<100*_TestUtil.getRandomMultiplier();iter++) {
// pick random field+term
int spot = r.nextInt(fieldTerms.size());
Term term = fieldTerms.get(spot);
String field = term.field();
if (VERBOSE) {
System.out.println(" no more terms for field=" + field);
System.out.println("TEST: exist seek field=" + field + " term=" + UnicodeUtil.toHexString(term.text()));
}
// seek to it
TermsEnum te = tes.get(field);
if (te == null) {
te = MultiFields.getTerms(reader, field).iterator();
tes.put(field, te);
}
if (VERBOSE) {
System.out.println(" done get enum");
}
// seek should find the term
assertEquals(TermsEnum.SeekStatus.FOUND,
te.seek(term.bytes()));
// now .next() this many times:
int ct = _TestUtil.nextInt(r, 5, 100);
for(int i=0;i<ct;i++) {
if (VERBOSE) {
System.out.println("TEST: now next()");
}
if (1+spot+i >= fieldTerms.size()) {
break;
}
term = fieldTerms.get(1+spot+i);
if (term.field() != field) {
assertNull(te.next());
break;
} else {
BytesRef t = te.next();
if (VERBOSE) {
System.out.println(" got term=" + (t == null ? null : UnicodeUtil.toHexString(t.utf8ToString())));
System.out.println(" exp=" + UnicodeUtil.toHexString(term.text().toString()));
}
assertEquals(term.bytes(), t);
}
}
}
assertEquals(uniqueTermCount, termCount);
}
fields.close();
private void doTestSeekDoesNotExist(Random r, int numField, List<Term> fieldTerms, Term[] fieldTermsArray, IndexReader reader) throws IOException {
final Map<String,TermsEnum> tes = new HashMap<String,TermsEnum>();
if (VERBOSE) {
System.out.println("TEST: top random seeks");
}
{
for(int iter=0;iter<100*_TestUtil.getRandomMultiplier();iter++) {
// seek to random spot
String field = ("f" + r.nextInt(numField)).intern();
Term tx = new Term(field, getRandomString(r));
int spot = Arrays.binarySearch(fieldTermsArray, tx);
if (spot < 0) {
if (VERBOSE) {
System.out.println("TEST: non-exist seek to " + field + ":" + UnicodeUtil.toHexString(tx.text()));
}
// term does not exist:
TermsEnum te = tes.get(field);
if (te == null) {
te = MultiFields.getTerms(reader, field).iterator();
tes.put(field, te);
}
if (VERBOSE) {
System.out.println(" got enum");
}
spot = -spot - 1;
if (spot == fieldTerms.size() || fieldTerms.get(spot).field() != field) {
assertEquals(TermsEnum.SeekStatus.END, te.seek(tx.bytes()));
} else {
assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seek(tx.bytes()));
if (VERBOSE) {
System.out.println(" got term=" + UnicodeUtil.toHexString(te.term().utf8ToString()));
System.out.println(" exp term=" + UnicodeUtil.toHexString(fieldTerms.get(spot).text()));
}
assertEquals(fieldTerms.get(spot).bytes(),
te.term());
// now .next() this many times:
int ct = _TestUtil.nextInt(r, 5, 100);
for(int i=0;i<ct;i++) {
if (VERBOSE) {
System.out.println("TEST: now next()");
}
if (1+spot+i >= fieldTerms.size()) {
break;
}
Term term = fieldTerms.get(1+spot+i);
if (term.field() != field) {
assertNull(te.next());
break;
} else {
BytesRef t = te.next();
if (VERBOSE) {
System.out.println(" got term=" + (t == null ? null : UnicodeUtil.toHexString(t.utf8ToString())));
System.out.println(" exp=" + UnicodeUtil.toHexString(term.text().toString()));
}
assertEquals(term.bytes(), t);
}
}
}
}
}
}
}
@Test
public void testSurrogatesOrder() throws Exception {
Random r = newRandom();
Directory dir = new MockRAMDirectory();
RandomIndexWriter w = new RandomIndexWriter(r,
dir,
newIndexWriterConfig(r, TEST_VERSION_CURRENT,
new MockAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec(new PreFlexRWCodec())));
final int numField = _TestUtil.nextInt(r, 2, 5);
int uniqueTermCount = 0;
int tc = 0;
List<Term> fieldTerms = new ArrayList<Term>();
for(int f=0;f<numField;f++) {
String field = "f" + f;
final int numTerms = 10000*_TestUtil.getRandomMultiplier();
final Set<String> uniqueTerms = new HashSet<String>();
for(int i=0;i<numTerms;i++) {
String term = getRandomString(r) + "_ " + (tc++);
uniqueTerms.add(term);
fieldTerms.add(new Term(field, term));
Document doc = new Document();
doc.add(new Field(field, term, Field.Store.NO, Field.Index.NOT_ANALYZED));
w.addDocument(doc);
}
uniqueTermCount += uniqueTerms.size();
}
IndexReader reader = w.getReader();
if (VERBOSE) {
Collections.sort(fieldTerms, termAsUTF16Comparator);
System.out.println("\nTEST: UTF16 order");
for(Term t: fieldTerms) {
System.out.println(" " + toHexString(t));
}
}
// sorts in code point order:
Collections.sort(fieldTerms);
if (VERBOSE) {
System.out.println("\nTEST: codepoint order");
for(Term t: fieldTerms) {
System.out.println(" " + toHexString(t));
}
}
Term[] fieldTermsArray = fieldTerms.toArray(new Term[fieldTerms.size()]);
//SegmentInfo si = makePreFlexSegment(r, "_0", dir, fieldInfos, codec, fieldTerms);
//FieldsProducer fields = codec.fieldsProducer(new SegmentReadState(dir, si, fieldInfos, 1024, 1));
//assertNotNull(fields);
doTestStraightEnum(fieldTerms, reader, uniqueTermCount);
doTestSeekExists(r, fieldTerms, reader);
doTestSeekDoesNotExist(r, numField, fieldTerms, fieldTermsArray, reader);
reader.close();
}
}

View File

@ -0,0 +1,212 @@
package org.apache.lucene.index.codecs.preflexrw;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.codecs.TermsConsumer;
import org.apache.lucene.index.codecs.PostingsConsumer;
import org.apache.lucene.index.codecs.standard.DefaultSkipListWriter;
import org.apache.lucene.index.codecs.preflex.PreFlexCodec;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.codecs.preflex.TermInfo;
import org.apache.lucene.store.IndexOutput;
import java.io.IOException;
import java.util.Comparator;
class PreFlexFieldsWriter extends FieldsConsumer {
private final TermInfosWriter termsOut;
private final IndexOutput freqOut;
private final IndexOutput proxOut;
private final DefaultSkipListWriter skipListWriter;
private final int totalNumDocs;
public PreFlexFieldsWriter(SegmentWriteState state) throws IOException {
termsOut = new TermInfosWriter(state.directory,
state.segmentName,
state.fieldInfos,
state.termIndexInterval);
state.flushedFiles.add(IndexFileNames.segmentFileName(state.segmentName, "", PreFlexCodec.TERMS_EXTENSION));
state.flushedFiles.add(IndexFileNames.segmentFileName(state.segmentName, "", PreFlexCodec.TERMS_INDEX_EXTENSION));
final String freqFile = IndexFileNames.segmentFileName(state.segmentName, "", PreFlexCodec.FREQ_EXTENSION);
freqOut = state.directory.createOutput(freqFile);
state.flushedFiles.add(freqFile);
totalNumDocs = state.numDocs;
if (state.fieldInfos.hasProx()) {
final String proxFile = IndexFileNames.segmentFileName(state.segmentName, "", PreFlexCodec.PROX_EXTENSION);
proxOut = state.directory.createOutput(proxFile);
state.flushedFiles.add(proxFile);
} else {
proxOut = null;
}
skipListWriter = new DefaultSkipListWriter(termsOut.skipInterval,
termsOut.maxSkipLevels,
totalNumDocs,
freqOut,
proxOut);
//System.out.println("\nw start seg=" + segment);
}
@Override
public TermsConsumer addField(FieldInfo field) throws IOException {
assert field.number != -1;
//System.out.println("w field=" + field.name + " storePayload=" + field.storePayloads + " number=" + field.number);
return new PreFlexTermsWriter(field);
}
@Override
public void close() throws IOException {
termsOut.close();
freqOut.close();
if (proxOut != null) {
proxOut.close();
}
}
private class PreFlexTermsWriter extends TermsConsumer {
private final FieldInfo fieldInfo;
private final boolean omitTF;
private final boolean storePayloads;
private final TermInfo termInfo = new TermInfo();
private final PostingsWriter postingsWriter = new PostingsWriter();
public PreFlexTermsWriter(FieldInfo fieldInfo) {
this.fieldInfo = fieldInfo;
omitTF = fieldInfo.omitTermFreqAndPositions;
storePayloads = fieldInfo.storePayloads;
}
private class PostingsWriter extends PostingsConsumer {
private int lastDocID;
private int lastPayloadLength = -1;
private int lastPosition;
private int df;
public PostingsWriter reset() {
df = 0;
lastDocID = 0;
lastPayloadLength = -1;
return this;
}
@Override
public void startDoc(int docID, int termDocFreq) throws IOException {
//System.out.println(" w doc=" + docID);
final int delta = docID - lastDocID;
if (docID < 0 || (df > 0 && delta <= 0)) {
throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " )");
}
if ((++df % termsOut.skipInterval) == 0) {
skipListWriter.setSkipData(lastDocID, storePayloads, lastPayloadLength);
skipListWriter.bufferSkip(df);
}
lastDocID = docID;
assert docID < totalNumDocs: "docID=" + docID + " totalNumDocs=" + totalNumDocs;
if (omitTF) {
freqOut.writeVInt(delta);
} else {
final int code = delta << 1;
if (termDocFreq == 1) {
freqOut.writeVInt(code|1);
} else {
freqOut.writeVInt(code);
freqOut.writeVInt(termDocFreq);
}
}
lastPosition = 0;
}
@Override
public void addPosition(int position, BytesRef payload) throws IOException {
assert proxOut != null;
//System.out.println(" w pos=" + position + " payl=" + payload);
final int delta = position - lastPosition;
lastPosition = position;
if (storePayloads) {
final int payloadLength = payload == null ? 0 : payload.length;
if (payloadLength != lastPayloadLength) {
//System.out.println(" write payload len=" + payloadLength);
lastPayloadLength = payloadLength;
proxOut.writeVInt((delta<<1)|1);
proxOut.writeVInt(payloadLength);
} else {
proxOut.writeVInt(delta << 1);
}
if (payloadLength > 0) {
proxOut.writeBytes(payload.bytes, payload.offset, payload.length);
}
} else {
proxOut.writeVInt(delta);
}
}
@Override
public void finishDoc() throws IOException {
}
}
@Override
public PostingsConsumer startTerm(BytesRef text) throws IOException {
//System.out.println(" w term=" + text.utf8ToString());
skipListWriter.resetSkip();
termInfo.freqPointer = freqOut.getFilePointer();
if (proxOut != null) {
termInfo.proxPointer = proxOut.getFilePointer();
}
return postingsWriter.reset();
}
@Override
public void finishTerm(BytesRef text, int numDocs) throws IOException {
if (numDocs > 0) {
long skipPointer = skipListWriter.writeSkip(freqOut);
termInfo.docFreq = numDocs;
termInfo.skipOffset = (int) (skipPointer - termInfo.freqPointer);
//System.out.println(" w finish term=" + text.utf8ToString() + " fnum=" + fieldInfo.number);
termsOut.add(fieldInfo.number,
text,
termInfo);
}
}
@Override
public void finish() throws IOException {
}
@Override
public Comparator<BytesRef> getComparator() throws IOException {
return BytesRef.getUTF8SortedAsUTF16Comparator();
}
}
}

View File

@ -0,0 +1,77 @@
package org.apache.lucene.index.codecs.preflexrw;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.codecs.preflex.PreFlexCodec;
import org.apache.lucene.index.codecs.preflex.PreFlexFields;
import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.codecs.FieldsProducer;
import org.apache.lucene.util.LuceneTestCaseJ4;
/** Codec, only for testing, that can write and read the
* pre-flex index format.
*
* @lucene.experimental
*/
public class PreFlexRWCodec extends PreFlexCodec {
public PreFlexRWCodec() {
// NOTE: we impersonate the PreFlex codec so that it can
// read the segments we write!
super();
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
return new PreFlexFieldsWriter(state);
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
// Whenever IW opens readers, eg for merging, we have to
// keep terms order in UTF16:
return new PreFlexFields(state.dir, state.fieldInfos, state.segmentInfo, state.readBufferSize, state.termsIndexDivisor) {
@Override
protected boolean sortTermsByUnicode() {
// We carefully peek into stack track above us: if
// we are part of a "merge", we must sort by UTF16:
boolean unicodeSortOrder = true;
StackTraceElement[] trace = new Exception().getStackTrace();
for (int i = 0; i < trace.length; i++) {
//System.out.println(trace[i].getClassName());
if ("merge".equals(trace[i].getMethodName())) {
unicodeSortOrder = false;
if (LuceneTestCaseJ4.VERBOSE) {
System.out.println("NOTE: PreFlexRW codec: forcing legacy UTF16 term sort order");
}
break;
}
}
return unicodeSortOrder;
}
};
}
}

View File

@ -1,4 +1,4 @@
package org.apache.lucene.index.codecs.preflex;
package org.apache.lucene.index.codecs.preflexrw;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -19,9 +19,12 @@ package org.apache.lucene.index.codecs.preflex;
import java.io.IOException;
import org.apache.lucene.store.*;
import org.apache.lucene.index.*;
import org.apache.lucene.util.*;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.index.codecs.preflex.TermInfo;
/** This stores a monotonically increasing set of <Term, TermInfo> pairs in a
@ -71,8 +74,7 @@ final class TermInfosWriter {
private long lastIndexPointer;
private boolean isIndex;
private byte[] lastTermBytes = new byte[10];
private int lastTermBytesLength = 0;
private final BytesRef lastTerm = new BytesRef();
private int lastFieldNumber = -1;
private TermInfosWriter other;
@ -104,13 +106,10 @@ final class TermInfosWriter {
assert initUTF16Results();
}
void add(Term term, TermInfo ti) throws IOException {
add(fieldInfos.fieldNumber(term.field()), term.bytes().bytes, term.bytes().length, ti);
}
// Currently used only by assert statements
UnicodeUtil.UTF16Result utf16Result1;
UnicodeUtil.UTF16Result utf16Result2;
private final BytesRef scratchBytes = new BytesRef();
// Currently used only by assert statements
private boolean initUTF16Results() {
@ -120,7 +119,7 @@ final class TermInfosWriter {
}
// Currently used only by assert statement
private int compareToLastTerm(int fieldNumber, byte[] termBytes, int termBytesLength) {
private int compareToLastTerm(int fieldNumber, BytesRef term) {
if (lastFieldNumber != fieldNumber) {
final int cmp = fieldInfos.fieldName(lastFieldNumber).compareTo(fieldInfos.fieldName(fieldNumber));
@ -132,8 +131,13 @@ final class TermInfosWriter {
return cmp;
}
UnicodeUtil.UTF8toUTF16(lastTermBytes, 0, lastTermBytesLength, utf16Result1);
UnicodeUtil.UTF8toUTF16(termBytes, 0, termBytesLength, utf16Result2);
scratchBytes.copy(term);
assert lastTerm.offset == 0;
UnicodeUtil.UTF8toUTF16(lastTerm.bytes, 0, lastTerm.length, utf16Result1);
assert scratchBytes.offset == 0;
UnicodeUtil.UTF8toUTF16(scratchBytes.bytes, 0, scratchBytes.length, utf16Result2);
final int len;
if (utf16Result1.length < utf16Result2.length)
len = utf16Result1.length;
@ -152,22 +156,22 @@ final class TermInfosWriter {
/** Adds a new <<fieldNumber, termBytes>, TermInfo> pair to the set.
Term must be lexicographically greater than all previous Terms added.
TermInfo pointers must be positive and greater than all previous.*/
void add(int fieldNumber, byte[] termBytes, int termBytesLength, TermInfo ti)
public void add(int fieldNumber, BytesRef term, TermInfo ti)
throws IOException {
assert compareToLastTerm(fieldNumber, termBytes, termBytesLength) < 0 ||
(isIndex && termBytesLength == 0 && lastTermBytesLength == 0) :
assert compareToLastTerm(fieldNumber, term) < 0 ||
(isIndex && term.length == 0 && lastTerm.length == 0) :
"Terms are out of order: field=" + fieldInfos.fieldName(fieldNumber) + " (number " + fieldNumber + ")" +
" lastField=" + fieldInfos.fieldName(lastFieldNumber) + " (number " + lastFieldNumber + ")" +
" text=" + new String(termBytes, 0, termBytesLength, "UTF-8") + " lastText=" + new String(lastTermBytes, 0, lastTermBytesLength, "UTF-8");
" text=" + term.utf8ToString() + " lastText=" + lastTerm.utf8ToString();
assert ti.freqPointer >= lastTi.freqPointer: "freqPointer out of order (" + ti.freqPointer + " < " + lastTi.freqPointer + ")";
assert ti.proxPointer >= lastTi.proxPointer: "proxPointer out of order (" + ti.proxPointer + " < " + lastTi.proxPointer + ")";
if (!isIndex && size % indexInterval == 0)
other.add(lastFieldNumber, lastTermBytes, lastTermBytesLength, lastTi); // add an index term
other.add(lastFieldNumber, lastTerm, lastTi); // add an index term
writeTerm(fieldNumber, termBytes, termBytesLength); // write term
writeTerm(fieldNumber, term); // write term
output.writeVInt(ti.docFreq); // write doc freq
output.writeVLong(ti.freqPointer - lastTi.freqPointer); // write pointers
@ -187,29 +191,27 @@ final class TermInfosWriter {
size++;
}
private void writeTerm(int fieldNumber, byte[] termBytes, int termBytesLength)
private void writeTerm(int fieldNumber, BytesRef term)
throws IOException {
//System.out.println(" tiw.write field=" + fieldNumber + " term=" + term.utf8ToString());
// TODO: UTF16toUTF8 could tell us this prefix
// Compute prefix in common with last term:
int start = 0;
final int limit = termBytesLength < lastTermBytesLength ? termBytesLength : lastTermBytesLength;
final int limit = term.length < lastTerm.length ? term.length : lastTerm.length;
while(start < limit) {
if (termBytes[start] != lastTermBytes[start])
if (term.bytes[start+term.offset] != lastTerm.bytes[start+lastTerm.offset])
break;
start++;
}
final int length = termBytesLength - start;
final int length = term.length - start;
output.writeVInt(start); // write shared prefix length
output.writeVInt(length); // write delta length
output.writeBytes(termBytes, start, length); // write delta bytes
output.writeBytes(term.bytes, start+term.offset, length); // write delta bytes
output.writeVInt(fieldNumber); // write field num
if (lastTermBytes.length < termBytesLength) {
lastTermBytes = ArrayUtil.grow(lastTermBytes, termBytesLength);
}
System.arraycopy(termBytes, start, lastTermBytes, start, length);
lastTermBytesLength = termBytesLength;
lastTerm.copy(term);
}
/** Called to complete TermInfos creation. */

View File

@ -25,8 +25,6 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.RAMDirectory;
@ -104,7 +102,7 @@ public class BaseTestRangeFilter extends LuceneTestCase {
private IndexReader build(Random random, TestIndex index) throws IOException {
/* build an index */
RandomIndexWriter writer = new RandomIndexWriter(random, index.index,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())
newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer())
.setOpenMode(OpenMode.CREATE));
for (int d = minId; d <= maxId; d++) {

View File

@ -20,11 +20,9 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.Random;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
@ -46,8 +44,7 @@ public class TestAutomatonQuery extends LuceneTestCase {
super.setUp();
Random random = newRandom();
directory = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(random, directory);
Document doc = new Document();
Field titleField = new Field("title", "some title", Field.Store.NO,
Field.Index.ANALYZED);

View File

@ -20,11 +20,9 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.Random;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
@ -49,8 +47,7 @@ public class TestAutomatonQueryUnicode extends LuceneTestCase {
super.setUp();
Random random = newRandom();
directory = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(random, directory);
Document doc = new Document();
Field titleField = new Field("title", "some title", Field.Store.NO,
Field.Index.ANALYZED);

View File

@ -54,7 +54,7 @@ public class TestBoolean2 extends LuceneTestCase {
super.setUp();
rnd = newRandom();
RAMDirectory directory = new RAMDirectory();
RandomIndexWriter writer= new RandomIndexWriter(rnd, directory, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer= new RandomIndexWriter(rnd, directory);
for (int i = 0; i < docFields.length; i++) {
Document doc = new Document();
doc.add(new Field(field, docFields[i], Field.Store.NO, Field.Index.ANALYZED));
@ -71,14 +71,14 @@ public class TestBoolean2 extends LuceneTestCase {
int docCount = 0;
do {
final Directory copy = new RAMDirectory(dir2);
RandomIndexWriter w = new RandomIndexWriter(rnd, dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter w = new RandomIndexWriter(rnd, dir2);
w.addIndexes(new Directory[] {copy});
docCount = w.maxDoc();
w.close();
mulFactor *= 2;
} while(docCount < 3000);
RandomIndexWriter w = new RandomIndexWriter(rnd, dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter w = new RandomIndexWriter(rnd, dir2);
Document doc = new Document();
doc.add(new Field("field2", "xxx", Field.Store.NO, Field.Index.ANALYZED));
for(int i=0;i<NUM_EXTRA_DOCS/2;i++) {

View File

@ -20,11 +20,9 @@ package org.apache.lucene.search;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
@ -60,8 +58,7 @@ public class TestBooleanMinShouldMatch extends LuceneTestCase {
};
index = new RAMDirectory();
RandomIndexWriter w = new RandomIndexWriter(rnd, index, new IndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter w = new RandomIndexWriter(rnd, index);
for (int i = 0; i < data.length; i++) {
Document doc = new Document();

View File

@ -20,11 +20,9 @@ import java.util.Random;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
@ -143,8 +141,7 @@ public class TestBooleanOr extends LuceneTestCase {
Random random = newRandom();
//
RandomIndexWriter writer = new RandomIndexWriter(random, dir, new IndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(random, dir);
//
Document d = new Document();

View File

@ -22,11 +22,9 @@ import junit.framework.Test;
import junit.framework.TestSuite;
import junit.textui.TestRunner;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.search.PrefixQuery;
@ -77,8 +75,7 @@ public class TestBooleanPrefixQuery extends LuceneTestCase {
Query rw1 = null;
Query rw2 = null;
IndexReader reader = null;
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory, new IndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory);
for (int i = 0; i < categories.length; i++) {
Document doc = new Document();
doc.add(new Field("category", categories[i], Field.Store.YES, Field.Index.NOT_ANALYZED));

View File

@ -18,9 +18,7 @@ package org.apache.lucene.search;
*/
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.store.MockRAMDirectory;
@ -62,8 +60,7 @@ public class TestBooleanQuery extends LuceneTestCase {
// LUCENE-1630
public void testNullOrSubScorer() throws Throwable {
Directory dir = new MockRAMDirectory();
RandomIndexWriter w = new RandomIndexWriter(newRandom(), dir, new IndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter w = new RandomIndexWriter(newRandom(), dir);
Document doc = new Document();
doc.add(new Field("field", "a b c d", Field.Store.NO, Field.Index.ANALYZED));
w.addDocument(doc);

View File

@ -20,11 +20,9 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.RAMDirectory;
@ -45,8 +43,7 @@ public class TestBooleanScorer extends LuceneTestCase
String[] values = new String[] { "1", "2", "3", "4" };
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory);
for (int i = 0; i < values.length; i++) {
Document doc = new Document();
doc.add(new Field(FIELD, values[i], Field.Store.YES, Field.Index.NOT_ANALYZED));

View File

@ -24,12 +24,10 @@ import java.util.Map;
import java.util.Random;
import java.util.TreeMap;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
@ -59,8 +57,7 @@ public class TestCustomSearcherSort extends LuceneTestCase implements
super.setUp();
Random rand = newRandom();
index = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(rand, index,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(rand, index);
RandomGen random = new RandomGen(rand);
for (int i = 0; i < INDEX_SIZE; ++i) { // don't decrease; if to low the
// problem doesn't show up

View File

@ -18,12 +18,10 @@ package org.apache.lucene.search;
*/
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.RAMDirectory;
@ -47,8 +45,7 @@ public class TestDateFilter extends LuceneTestCase {
public void testBefore() throws IOException {
// create an index
RAMDirectory indexStore = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), indexStore,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), indexStore);
long now = System.currentTimeMillis();
@ -114,8 +111,7 @@ public class TestDateFilter extends LuceneTestCase {
public void testAfter() throws IOException {
// create an index
RAMDirectory indexStore = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), indexStore,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), indexStore);
long now = System.currentTimeMillis();

View File

@ -26,7 +26,6 @@ import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
@ -53,8 +52,7 @@ public class TestDateSort extends LuceneTestCase {
super.setUp();
// Create an index writer.
directory = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory);
// oldest doc:
// Add the first document. text = "Document 1" dateTime = Oct 10 03:25:22 EDT 2007

View File

@ -22,13 +22,13 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import java.text.DecimalFormat;
import java.util.Random;
import java.io.IOException;
/**
@ -80,8 +80,9 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase {
super.setUp();
index = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), index,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())
Random random = newRandom();
RandomIndexWriter writer = new RandomIndexWriter(random, index,
newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer())
.setSimilarity(sim));
// hed is the most important field, dek is secondary

View File

@ -20,10 +20,8 @@ package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.RAMDirectory;
@ -40,8 +38,7 @@ public class TestDocBoost extends LuceneTestCase {
public void testDocBoost() throws Exception {
RAMDirectory store = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), store,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), store);
Fieldable f1 = new Field("field", "word", Field.Store.YES, Field.Index.ANALYZED);
Fieldable f2 = new Field("field", "word", Field.Store.YES, Field.Index.ANALYZED);

View File

@ -24,13 +24,11 @@ import java.util.Iterator;
import junit.framework.Assert;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
@ -104,8 +102,7 @@ public class TestDocIdSet extends LuceneTestCase {
// Tests that if a Filter produces a null DocIdSet, which is given to
// IndexSearcher, everything works fine. This came up in LUCENE-1754.
Directory dir = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir);
Document doc = new Document();
doc.add(new Field("c", "val", Store.NO, Index.NOT_ANALYZED_NO_NORMS));
writer.addDocument(doc);

View File

@ -23,7 +23,6 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.spans.SpanFirstQuery;
@ -70,8 +69,7 @@ public class TestExplanations extends LuceneTestCase {
protected void setUp() throws Exception {
super.setUp();
directory = new RAMDirectory();
RandomIndexWriter writer= new RandomIndexWriter(newRandom(), directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer= new RandomIndexWriter(newRandom(), directory);
for (int i = 0; i < docFields.length; i++) {
Document doc = new Document();
doc.add(new Field(KEY, ""+i, Field.Store.NO, Field.Index.NOT_ANALYZED));

View File

@ -50,8 +50,7 @@ public class TestFieldCache extends LuceneTestCase {
Random r = newRandom();
NUM_DOCS = 1000 * _TestUtil.getRandomMultiplier();
RAMDirectory directory = new RAMDirectory();
RandomIndexWriter writer= new RandomIndexWriter(r, directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer= new RandomIndexWriter(r, directory);
long theLong = Long.MAX_VALUE;
double theDouble = Double.MAX_VALUE;
byte theByte = Byte.MAX_VALUE;

View File

@ -19,11 +19,9 @@ package org.apache.lucene.search;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.store.MockRAMDirectory;
@ -39,8 +37,7 @@ public class TestFieldCacheTermsFilter extends LuceneTestCase {
public void testMissingTerms() throws Exception {
String fieldName = "field1";
MockRAMDirectory rd = new MockRAMDirectory();
RandomIndexWriter w = new RandomIndexWriter(newRandom(), rd,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter w = new RandomIndexWriter(newRandom(), rd);
for (int i = 0; i < 100; i++) {
Document doc = new Document();
int term = i * 10; //terms are units of 10;

View File

@ -17,11 +17,9 @@ package org.apache.lucene.search;
* limitations under the License.
*/
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
@ -50,8 +48,7 @@ public class TestFilteredQuery extends LuceneTestCase {
protected void setUp() throws Exception {
super.setUp();
directory = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter (newRandom(), directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter (newRandom(), directory);
Document doc = new Document();
doc.add (new Field("field", "one two three four five", Field.Store.YES, Field.Index.ANALYZED));
@ -73,6 +70,11 @@ public class TestFilteredQuery extends LuceneTestCase {
doc.add (new Field("sorter", "c", Field.Store.YES, Field.Index.ANALYZED));
writer.addDocument (doc);
// tests here require single segment (eg try seed
// 8239472272678419952L), because SingleDocTestFilter(x)
// blindly accepts that docID in any sub-segment
writer.optimize();
reader = writer.getReader();
writer.close ();

View File

@ -25,7 +25,6 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
@ -42,8 +41,7 @@ public class TestFuzzyQuery extends LuceneTestCase {
public void testFuzziness() throws Exception {
RAMDirectory directory = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory);
addDoc("aaaaa", writer);
addDoc("aaaab", writer);
addDoc("aaabb", writer);
@ -196,8 +194,7 @@ public class TestFuzzyQuery extends LuceneTestCase {
public void testFuzzinessLong() throws Exception {
RAMDirectory directory = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory);
addDoc("aaaaaaa", writer);
addDoc("segment", writer);
@ -287,8 +284,7 @@ public class TestFuzzyQuery extends LuceneTestCase {
public void testTokenLengthOpt() throws IOException {
RAMDirectory directory = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory);
addDoc("12345678911", writer);
addDoc("segment", writer);
@ -325,8 +321,7 @@ public class TestFuzzyQuery extends LuceneTestCase {
/** Test the TopTermsBoostOnlyBooleanQueryRewrite rewrite method. */
public void testBoostOnlyRewrite() throws Exception {
RAMDirectory directory = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory);
addDoc("Lucene", writer);
addDoc("Lucene", writer);
addDoc("Lucenne", writer);
@ -353,8 +348,7 @@ public class TestFuzzyQuery extends LuceneTestCase {
MockAnalyzer analyzer = new MockAnalyzer();
Directory index = new MockRAMDirectory();
RandomIndexWriter w = new RandomIndexWriter(newRandom(), index,
new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
RandomIndexWriter w = new RandomIndexWriter(newRandom(), index);
addDoc("Lucene in Action", w);
addDoc("Lucene for Dummies", w);

View File

@ -27,7 +27,6 @@ import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.RAMDirectory;
@ -88,8 +87,7 @@ public class TestFuzzyQuery2 extends LuceneTestCase {
int terms = (int) Math.pow(2, bits);
RAMDirectory dir = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, dir,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.KEYWORD, false)));
RandomIndexWriter writer = new RandomIndexWriter(random, dir, new MockAnalyzer(MockTokenizer.KEYWORD, false));
Document doc = new Document();
Field field = new Field("field", "", Field.Store.NO, Field.Index.ANALYZED);

View File

@ -17,7 +17,6 @@ package org.apache.lucene.search;
* limitations under the License.
*/
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
@ -25,7 +24,6 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.store.MockRAMDirectory;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@ -46,8 +44,7 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
public void testPhrasePrefix() throws IOException {
MockRAMDirectory indexStore = new MockRAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), indexStore,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), indexStore);
add("blueberry pie", writer);
add("blueberry strudel", writer);
add("blueberry pizza", writer);
@ -152,8 +149,7 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
// The contained PhraseMultiQuery must contain exactly one term array.
MockRAMDirectory indexStore = new MockRAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), indexStore,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), indexStore);
add("blueberry pie", writer);
add("blueberry chewing gum", writer);
add("blue raspberry pie", writer);
@ -185,8 +181,7 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
public void testPhrasePrefixWithBooleanQuery() throws IOException {
MockRAMDirectory indexStore = new MockRAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), indexStore,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), indexStore);
add("This is a test", "object", writer);
add("a note", "note", writer);
@ -214,8 +209,7 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
public void testNoDocs() throws Exception {
MockRAMDirectory indexStore = new MockRAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), indexStore,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), indexStore);
add("a note", "note", writer);
IndexReader reader = writer.getReader();

View File

@ -22,7 +22,6 @@ import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
@ -57,8 +56,7 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter {
"X 4 5 6" };
small = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(rand, small,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)));
RandomIndexWriter writer = new RandomIndexWriter(rand, small, new MockAnalyzer(MockTokenizer.WHITESPACE, false));
for (int i = 0; i < data.length; i++) {
Document doc = new Document();
@ -612,8 +610,7 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter {
/* build an index */
RAMDirectory farsiIndex = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(rand, farsiIndex,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true)));
RandomIndexWriter writer = new RandomIndexWriter(rand, farsiIndex, new MockAnalyzer(MockTokenizer.SIMPLE, true));
Document doc = new Document();
doc.add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES,
Field.Index.NOT_ANALYZED));
@ -653,8 +650,7 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter {
/* build an index */
RAMDirectory danishIndex = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(rand, danishIndex,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true)));
RandomIndexWriter writer = new RandomIndexWriter(rand, danishIndex, new MockAnalyzer(MockTokenizer.SIMPLE, true));
// Danish collation orders the words below in the given order
// (example taken from TestSort.testInternationalSort() ).

View File

@ -22,12 +22,10 @@ import java.util.Locale;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
@ -45,8 +43,7 @@ public class TestMultiValuedNumericRangeQuery extends LuceneTestCase {
final Random rnd = newRandom();
RAMDirectory directory = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(rnd, directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(rnd, directory);
DecimalFormat format = new DecimalFormat("00000000000", new DecimalFormatSymbols(Locale.US));

View File

@ -20,7 +20,6 @@ package org.apache.lucene.search;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.store.RAMDirectory;
@ -40,8 +39,7 @@ public class TestNot extends LuceneTestCase {
public void testNot() throws Exception {
RAMDirectory store = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), store,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), store);
Document d1 = new Document();
d1.add(new Field("field", "a b", Field.Store.YES, Field.Index.ANALYZED));

View File

@ -55,8 +55,7 @@ public class TestNumericRangeQuery32 extends LuceneTestCaseJ4 {
public static void beforeClass() throws Exception {
directory = new RAMDirectory();
Random random = newStaticRandom(TestNumericRangeQuery32.class);
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(random, directory);
NumericField
field8 = new NumericField("field8", 8, Field.Store.YES, true),

View File

@ -54,8 +54,7 @@ public class TestNumericRangeQuery64 extends LuceneTestCaseJ4 {
public static void beforeClass() throws Exception {
directory = new RAMDirectory();
Random random = newStaticRandom(TestNumericRangeQuery64.class);
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(random, directory);
NumericField
field8 = new NumericField("field8", 8, Field.Store.YES, true),

View File

@ -18,13 +18,11 @@ package org.apache.lucene.search;
*/
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.util.BytesRef;
@ -46,8 +44,7 @@ public class TestPhrasePrefixQuery extends LuceneTestCase {
*/
public void testPhrasePrefix() throws IOException {
RAMDirectory indexStore = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), indexStore,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), indexStore);
Document doc1 = new Document();
Document doc2 = new Document();
Document doc3 = new Document();

View File

@ -67,8 +67,7 @@ public class TestPhraseQuery extends LuceneTestCase {
return 100;
}
};
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
RandomIndexWriter writer = new RandomIndexWriter(random, directory, analyzer);
Document doc = new Document();
doc.add(new Field("field", "one two three four five", Field.Store.YES, Field.Index.ANALYZED));
@ -217,7 +216,7 @@ public class TestPhraseQuery extends LuceneTestCase {
RAMDirectory directory = new RAMDirectory();
Analyzer stopAnalyzer = new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, false);
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
new IndexWriterConfig(Version.LUCENE_24, stopAnalyzer));
newIndexWriterConfig(random, Version.LUCENE_24, stopAnalyzer));
Document doc = new Document();
doc.add(new Field("field", "the stop words are here", Field.Store.YES, Field.Index.ANALYZED));
writer.addDocument(doc);
@ -251,8 +250,7 @@ public class TestPhraseQuery extends LuceneTestCase {
public void testPhraseQueryInConjunctionScorer() throws Exception {
RAMDirectory directory = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(random, directory);
Document doc = new Document();
doc.add(new Field("source", "marketing info", Field.Store.YES, Field.Index.ANALYZED));
@ -289,7 +287,7 @@ public class TestPhraseQuery extends LuceneTestCase {
reader.close();
writer = new RandomIndexWriter(random, directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE));
newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE));
doc = new Document();
doc.add(new Field("contents", "map entry woo", Field.Store.YES, Field.Index.ANALYZED));
writer.addDocument(doc);
@ -339,8 +337,7 @@ public class TestPhraseQuery extends LuceneTestCase {
public void testSlopScoring() throws IOException {
Directory directory = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(random, directory);
Document doc = new Document();
doc.add(new Field("field", "foo firstname lastname foo", Field.Store.YES, Field.Index.ANALYZED));
@ -603,8 +600,7 @@ public class TestPhraseQuery extends LuceneTestCase {
Directory dir = new MockRAMDirectory();
Analyzer analyzer = new MockAnalyzer();
RandomIndexWriter w = new RandomIndexWriter(random, dir,
new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
RandomIndexWriter w = new RandomIndexWriter(random, dir, analyzer);
List<List<String>> docs = new ArrayList<List<String>>();
Document d = new Document();
Field f = new Field("f", "", Field.Store.NO, Field.Index.ANALYZED);

View File

@ -35,7 +35,6 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
@ -91,8 +90,7 @@ public class TestPositionIncrement extends LuceneTestCase {
}
};
Directory store = new MockRAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), store,
new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), store, analyzer);
Document d = new Document();
d.add(new Field("field", "bogus", Field.Store.YES, Field.Index.ANALYZED));
writer.addDocument(d);
@ -242,8 +240,7 @@ public class TestPositionIncrement extends LuceneTestCase {
public void testPayloadsPos0() throws Exception {
Directory dir = new MockRAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir,
new IndexWriterConfig(TEST_VERSION_CURRENT, new TestPayloadAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir, new TestPayloadAnalyzer());
Document doc = new Document();
doc.add(new Field("content", new StringReader(
"a a b c d e a f g h i j a b k k")));

View File

@ -20,10 +20,8 @@ package org.apache.lucene.search;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@ -39,8 +37,7 @@ public class TestPrefixFilter extends LuceneTestCase {
"/Computers/Mac/One",
"/Computers/Mac/Two",
"/Computers/Windows"};
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory);
for (int i = 0; i < categories.length; i++) {
Document doc = new Document();
doc.add(new Field("category", categories[i], Field.Store.YES, Field.Index.NOT_ANALYZED));

View File

@ -18,11 +18,9 @@ package org.apache.lucene.search;
*/
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.RAMDirectory;
@ -48,8 +46,7 @@ public class TestPrefixInBooleanQuery extends LuceneTestCase {
protected void setUp() throws Exception {
super.setUp();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory);
for (int i = 0; i < 5137; ++i) {
Document doc = new Document();

View File

@ -20,10 +20,8 @@ package org.apache.lucene.search;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@ -38,8 +36,7 @@ public class TestPrefixQuery extends LuceneTestCase {
String[] categories = new String[] {"/Computers",
"/Computers/Mac",
"/Computers/Windows"};
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory);
for (int i = 0; i < categories.length; i++) {
Document doc = new Document();
doc.add(new Field("category", categories[i], Field.Store.YES, Field.Index.NOT_ANALYZED));

View File

@ -25,7 +25,6 @@ import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.RandomIndexWriter;
@ -51,8 +50,7 @@ public class TestPrefixRandom extends LuceneTestCase {
random = newRandom();
dir = new MockRAMDirectory();
// TODO: fix mocktokenizer to not extend chartokenizer, so you can have an 'empty' keyword.
RandomIndexWriter writer = new RandomIndexWriter(random, dir,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.KEYWORD, false)));
RandomIndexWriter writer = new RandomIndexWriter(random, dir, new MockAnalyzer(MockTokenizer.KEYWORD, false));
Document doc = new Document();
Field field = new Field("field", "", Field.Store.NO, Field.Index.ANALYZED);

View File

@ -17,13 +17,11 @@ package org.apache.lucene.search;
* limitations under the License.
*/
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
@ -35,8 +33,7 @@ public class TestQueryWrapperFilter extends LuceneTestCase {
public void testBasic() throws Exception {
Directory dir = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir);
Document doc = new Document();
doc.add(new Field("field", "value", Store.NO, Index.ANALYZED));
writer.addDocument(doc);

View File

@ -20,11 +20,9 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
@ -48,8 +46,7 @@ public class TestRegexpQuery extends LuceneTestCase {
public void setUp() throws Exception {
super.setUp();
directory = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory);
Document doc = new Document();
doc.add(new Field(FN,
"the quick brown fox jumps over the lazy ??? dog 493432 49344",

View File

@ -51,8 +51,7 @@ public class TestRegexpRandom extends LuceneTestCase {
super.setUp();
random = newRandom();
dir = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, dir,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(random, dir);
Document doc = new Document();
Field field = new Field("field", "", Field.Store.NO, Field.Index.ANALYZED);

View File

@ -19,13 +19,15 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.Random;
import java.util.Collections;
import java.util.List;
import java.util.ArrayList;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.RandomIndexWriter;
@ -58,17 +60,28 @@ public class TestRegexpRandom2 extends LuceneTestCase {
// TODO: fix mocktokenizer to not extend chartokenizer, so you can have an 'empty' keyword.
// currently, this means 'empty tokens' arent created/tested in the enumeration:
// <mikemccand> it's like having a big hairy scary monster in the basement but being upset that it doesn't have fangs
RandomIndexWriter writer = new RandomIndexWriter(random, dir, new IndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(MockTokenizer.KEYWORD, false)));
RandomIndexWriter writer = new RandomIndexWriter(random, dir, new MockAnalyzer(MockTokenizer.KEYWORD, false));
Document doc = new Document();
Field field = new Field("field", "", Field.Store.NO, Field.Index.ANALYZED);
doc.add(field);
List<String> terms = new ArrayList<String>();
for (int i = 0; i < 2000*_TestUtil.getRandomMultiplier(); i++) {
field.setValue(_TestUtil.randomUnicodeString(random));
String s = _TestUtil.randomUnicodeString(random);
field.setValue(s);
terms.add(s);
writer.addDocument(doc);
}
if (VERBOSE) {
// utf16 order
Collections.sort(terms);
System.out.println("UTF16 order:");
for(String s : terms) {
System.out.println(" " + UnicodeUtil.toHexString(s));
}
}
reader = writer.getReader();
searcher = new IndexSearcher(reader);
writer.close();
@ -122,8 +135,11 @@ public class TestRegexpRandom2 extends LuceneTestCase {
/** test a bunch of random regular expressions */
public void testRegexps() throws Exception {
for (int i = 0; i < 1000*_TestUtil.getRandomMultiplier(); i++)
assertSame(AutomatonTestUtil.randomRegexp(random).toString());
for (int i = 0; i < 1000*_TestUtil.getRandomMultiplier(); i++) {
String reg = AutomatonTestUtil.randomRegexp(random).toString();
assertSame(reg);
}
}
/** check that the # of hits is the same as from a very

View File

@ -20,6 +20,7 @@ package org.apache.lucene.search;
import org.apache.lucene.util.LuceneTestCase;
import java.io.IOException;
import java.util.Collection;
import java.util.Random;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
@ -65,8 +66,9 @@ public class TestSimilarity extends LuceneTestCase {
public void testSimilarity() throws Exception {
RAMDirectory store = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), store,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())
Random random = newRandom();
RandomIndexWriter writer = new RandomIndexWriter(random, store,
newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer())
.setSimilarity(new SimpleSimilarity()));
Document d1 = new Document();

View File

@ -25,8 +25,6 @@ import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
@ -128,8 +126,7 @@ public class TestSloppyPhraseQuery extends LuceneTestCase {
query.setSlop(slop);
RAMDirectory ramDir = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, ramDir,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)));
RandomIndexWriter writer = new RandomIndexWriter(random, ramDir, new MockAnalyzer(MockTokenizer.WHITESPACE, false));
writer.addDocument(doc);
IndexReader reader = writer.getReader();

View File

@ -112,8 +112,7 @@ public class TestSort extends LuceneTestCase implements Serializable {
private Searcher getIndex (boolean even, boolean odd)
throws IOException {
RAMDirectory indexStore = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, indexStore,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(random, indexStore);
for (int i=0; i<data.length; ++i) {
if (((i%2)==0 && even) || ((i%2)==1 && odd)) {

View File

@ -18,11 +18,9 @@ package org.apache.lucene.search;
import java.util.List;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.spans.SpanTermQuery;
@ -40,8 +38,7 @@ public class TestSpanQueryFilter extends LuceneTestCase {
public void testFilterWorks() throws Exception {
Directory dir = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir);
for (int i = 0; i < 500; i++) {
Document document = new Document();
document.add(new Field("field", English.intToEnglish(i) + " equals " + English.intToEnglish(i),

View File

@ -22,10 +22,8 @@ import java.text.Collator;
import java.util.Locale;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.store.RAMDirectory;
@ -401,8 +399,7 @@ public class TestTermRangeFilter extends BaseTestRangeFilter {
/* build an index */
RAMDirectory farsiIndex = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(rand, farsiIndex,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(rand, farsiIndex);
Document doc = new Document();
doc.add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES,
Field.Index.NOT_ANALYZED));
@ -442,8 +439,7 @@ public class TestTermRangeFilter extends BaseTestRangeFilter {
/* build an index */
RAMDirectory danishIndex = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(rand, danishIndex,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(rand, danishIndex);
// Danish collation orders the words below in the given order
// (example taken from TestSort.testInternationalSort() ).
String[] words = {"H\u00D8T", "H\u00C5T", "MAND"};

View File

@ -22,11 +22,9 @@ import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.RAMDirectory;
@ -49,8 +47,7 @@ public class TestTermScorer extends LuceneTestCase {
super.setUp();
directory = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory);
for (int i = 0; i < values.length; i++) {
Document doc = new Document();
doc

View File

@ -50,8 +50,7 @@ public class TestTermVectors extends LuceneTestCase {
protected void setUp() throws Exception {
super.setUp();
random = newRandom();
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true)));
RandomIndexWriter writer = new RandomIndexWriter(random, directory, new MockAnalyzer(MockTokenizer.SIMPLE, true));
//writer.setUseCompoundFile(true);
//writer.infoStream = System.out;
for (int i = 0; i < 1000; i++) {
@ -117,8 +116,7 @@ public class TestTermVectors extends LuceneTestCase {
public void testTermVectorsFieldOrder() throws IOException {
Directory dir = new MockRAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, dir,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true)));
RandomIndexWriter writer = new RandomIndexWriter(random, dir, new MockAnalyzer(MockTokenizer.SIMPLE, true));
Document doc = new Document();
doc.add(new Field("c", "some content here", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
doc.add(new Field("a", "some content here", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
@ -248,7 +246,7 @@ public class TestTermVectors extends LuceneTestCase {
Directory dir = new MockRAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, dir,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true))
newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true))
.setOpenMode(OpenMode.CREATE));
writer.addDocument(testDoc1);
writer.addDocument(testDoc2);
@ -361,7 +359,7 @@ public class TestTermVectors extends LuceneTestCase {
// Test only a few docs having vectors
public void testRareVectors() throws IOException {
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true))
newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true))
.setOpenMode(OpenMode.CREATE));
for (int i = 0; i < 100; i++) {
Document doc = new Document();
@ -396,7 +394,7 @@ public class TestTermVectors extends LuceneTestCase {
// vectors up
public void testMixedVectrosVectors() throws IOException {
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
new IndexWriterConfig(TEST_VERSION_CURRENT,
newIndexWriterConfig(random, TEST_VERSION_CURRENT,
new MockAnalyzer(MockTokenizer.SIMPLE, true)).setOpenMode(OpenMode.CREATE));
Document doc = new Document();
doc.add(new Field("field", "one",

View File

@ -24,7 +24,6 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.TimeLimitingCollector.TimeExceededException;
@ -78,8 +77,7 @@ public class TestTimeLimitingCollector extends LuceneTestCase {
"blueberry pizza",
};
directory = new RAMDirectory();
RandomIndexWriter iw = new RandomIndexWriter(newRandom(), directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter iw = new RandomIndexWriter(newRandom(), directory);
for (int i=0; i<N_DOCS; i++) {
add(docText[i%docText.length], iw);

View File

@ -19,10 +19,8 @@ package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
@ -110,7 +108,7 @@ public class TestTopDocsCollector extends LuceneTestCase {
// populate an index with 30 documents, this should be enough for the test.
// The documents have no content - the test uses MatchAllDocsQuery().
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), dir);
for (int i = 0; i < 30; i++) {
writer.addDocument(new Document());
}

View File

@ -19,10 +19,8 @@ package org.apache.lucene.search;
import java.util.Random;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.store.Directory;
@ -42,8 +40,7 @@ public class TestTopScoreDocCollector extends LuceneTestCase {
Directory dir = new RAMDirectory();
Random random = newRandom();
RandomIndexWriter writer = new RandomIndexWriter(random, dir,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(random, dir);
for (int i = 0; i < 10; i++) {
writer.addDocument(new Document());
}

View File

@ -23,7 +23,6 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
@ -202,8 +201,7 @@ public class TestWildcard
private RAMDirectory getIndexStore(String field, String[] contents)
throws IOException {
RAMDirectory indexStore = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, indexStore, new IndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(random, indexStore);
for (int i = 0; i < contents.length; ++i) {
Document doc = new Document();
doc.add(new Field(field, contents[i], Field.Store.YES, Field.Index.ANALYZED));
@ -259,8 +257,7 @@ public class TestWildcard
// prepare the index
RAMDirectory dir = new RAMDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random, dir,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter iw = new RandomIndexWriter(random, dir);
for (int i = 0; i < docs.length; i++) {
Document doc = new Document();
doc.add(new Field(field,docs[i],Store.NO,Index.ANALYZED));

View File

@ -23,11 +23,9 @@ import java.text.NumberFormat;
import java.util.Locale;
import java.util.Random;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
@ -50,8 +48,7 @@ public class TestWildcardRandom extends LuceneTestCase {
super.setUp();
random = newRandom();
dir = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, dir,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer = new RandomIndexWriter(random, dir);
Document doc = new Document();
Field field = new Field("field", "", Field.Store.NO, Field.Index.ANALYZED);

View File

@ -18,6 +18,7 @@ package org.apache.lucene.search.payloads;
import java.io.IOException;
import java.io.Reader;
import java.util.Collection;
import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenizer;
@ -106,8 +107,9 @@ public class TestPayloadNearQuery extends LuceneTestCase {
protected void setUp() throws Exception {
super.setUp();
directory = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer())
Random random = newRandom();
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
newIndexWriterConfig(random, TEST_VERSION_CURRENT, new PayloadAnalyzer())
.setSimilarity(similarity));
//writer.infoStream = System.out;
for (int i = 0; i < 1000; i++) {

View File

@ -45,6 +45,7 @@ import org.apache.lucene.document.Field;
import java.io.Reader;
import java.io.IOException;
import java.util.Random;
/**
@ -112,8 +113,9 @@ public class TestPayloadTermQuery extends LuceneTestCase {
protected void setUp() throws Exception {
super.setUp();
directory = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer())
Random random = newRandom();
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
newIndexWriterConfig(random, TEST_VERSION_CURRENT, new PayloadAnalyzer())
.setSimilarity(similarity));
//writer.infoStream = System.out;
for (int i = 0; i < 1000; i++) {

View File

@ -24,7 +24,6 @@ import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
@ -62,7 +61,7 @@ public class TestBasics extends LuceneTestCase {
super.setUp();
directory = new RAMDirectory();
RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true)));
new MockAnalyzer(MockTokenizer.SIMPLE, true));
//writer.infoStream = System.out;
for (int i = 0; i < 1000; i++) {
Document doc = new Document();

View File

@ -20,11 +20,9 @@ package org.apache.lucene.search.spans;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.CheckHits;
@ -57,8 +55,7 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase {
protected void setUp() throws Exception {
super.setUp();
directory = new RAMDirectory();
RandomIndexWriter writer= new RandomIndexWriter(newRandom(), directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer= new RandomIndexWriter(newRandom(), directory);
writer.addDocument(doc(new Field[] { field("id", "0")
,

View File

@ -21,7 +21,6 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
@ -55,8 +54,7 @@ public class TestNearSpansOrdered extends LuceneTestCase {
protected void setUp() throws Exception {
super.setUp();
directory = new RAMDirectory();
RandomIndexWriter writer= new RandomIndexWriter(newRandom(), directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer= new RandomIndexWriter(newRandom(), directory);
for (int i = 0; i < docFields.length; i++) {
Document doc = new Document();
doc.add(new Field(FIELD, docFields[i], Field.Store.NO, Field.Index.ANALYZED));

View File

@ -51,8 +51,7 @@ public class TestSpans extends LuceneTestCase {
protected void setUp() throws Exception {
super.setUp();
directory = new RAMDirectory();
RandomIndexWriter writer= new RandomIndexWriter(newRandom(), directory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
RandomIndexWriter writer= new RandomIndexWriter(newRandom(), directory);
for (int i = 0; i < docFields.length; i++) {
Document doc = new Document();
doc.add(new Field(field, docFields[i], Field.Store.YES, Field.Index.ANALYZED));

View File

@ -28,7 +28,6 @@ import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
@ -62,9 +61,8 @@ public class TestSpansAdvanced extends LuceneTestCase {
// create test index
mDirectory = new RAMDirectory();
final RandomIndexWriter writer = new RandomIndexWriter(random,
mDirectory, new IndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(MockTokenizer.SIMPLE, true,
MockTokenFilter.ENGLISH_STOPSET, true)));
mDirectory, new MockAnalyzer(MockTokenizer.SIMPLE, true,
MockTokenFilter.ENGLISH_STOPSET, true));
addDocument(writer, "1", "I think it should work.");
addDocument(writer, "2", "I think it should work.");
addDocument(writer, "3", "I think it should work.");

View File

@ -23,7 +23,6 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
@ -47,7 +46,7 @@ public class TestSpansAdvanced2 extends TestSpansAdvanced {
// create test index
final RandomIndexWriter writer = new RandomIndexWriter(random, mDirectory,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(
newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer(
MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true))
.setOpenMode(OpenMode.APPEND));
addDocument(writer, "A", "Should we, could we, would we?");

View File

@ -29,11 +29,15 @@ import java.util.Collections;
import junit.framework.TestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.ConcurrentMergeScheduler;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.FieldCache.CacheEntry;
import org.apache.lucene.util.FieldCacheSanityChecker.Insanity;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.codecs.Codec;
/**
* Base class for all Lucene unit tests.
@ -72,6 +76,10 @@ public abstract class LuceneTestCase extends TestCase {
private volatile Thread.UncaughtExceptionHandler savedUncaughtExceptionHandler = null;
private String savedDefaultCodec;
private String codec;
private Codec preFlexSav;
/** Used to track if setUp and tearDown are called correctly from subclasses */
private boolean setup;
@ -110,6 +118,19 @@ public abstract class LuceneTestCase extends TestCase {
ConcurrentMergeScheduler.setTestMode();
savedBoolMaxClauseCount = BooleanQuery.getMaxClauseCount();
savedDefaultCodec = CodecProvider.getDefaultCodec();
codec = _TestUtil.getTestCodec();
if (codec.equals("random"))
codec = CodecProvider.CORE_CODECS[seedRnd.nextInt(CodecProvider.CORE_CODECS.length)];
// If we're running w/ PreFlex codec we must swap in the
// test-only PreFlexRW codec (since core PreFlex can
// only read segments):
if (codec.equals("PreFlex")) {
preFlexSav = LuceneTestCaseJ4.installPreFlexRW();
}
CodecProvider.setDefaultCodec(codec);
}
/**
@ -135,7 +156,12 @@ public abstract class LuceneTestCase extends TestCase {
assertTrue("ensure your setUp() calls super.setUp()!!!", setup);
setup = false;
BooleanQuery.setMaxClauseCount(savedBoolMaxClauseCount);
// Restore read-only PreFlex codec:
if (codec.equals("PreFlex")) {
LuceneTestCaseJ4.restorePreFlex(preFlexSav);
}
CodecProvider.setDefaultCodec(savedDefaultCodec);
try {
Thread.setDefaultUncaughtExceptionHandler(savedUncaughtExceptionHandler);
if (!uncaughtExceptions.isEmpty()) {
@ -267,7 +293,12 @@ public abstract class LuceneTestCase extends TestCase {
this.seed = Long.valueOf(seed);
return new Random(seed);
}
/** create a new index writer config with random defaults */
public static IndexWriterConfig newIndexWriterConfig(Random r, Version v, Analyzer a) {
return LuceneTestCaseJ4.newIndexWriterConfig(r, v, a);
}
/** Gets a resource from the classpath as {@link File}. This method should only be used,
* if a real file is needed. To get a stream, code should prefer
* {@link Class#getResourceAsStream} using {@code this.getClass()}.
@ -287,6 +318,9 @@ public abstract class LuceneTestCase extends TestCase {
seed = null;
super.runBare();
} catch (Throwable e) {
if (_TestUtil.getTestCodec().equals("random")) {
System.out.println("NOTE: random codec of testcase '" + getName() + "' was: " + codec);
}
if (seed != null) {
System.out.println("NOTE: random seed of testcase '" + getName() + "' was: " + seed);
}

View File

@ -17,14 +17,25 @@ package org.apache.lucene.util;
* limitations under the License.
*/
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.ConcurrentMergeScheduler;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.LogMergePolicy;
import org.apache.lucene.index.SerialMergeScheduler;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.FieldCache.CacheEntry;
import org.apache.lucene.util.FieldCacheSanityChecker.Insanity;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.preflexrw.PreFlexRWCodec;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TestWatchman;
@ -34,7 +45,6 @@ import java.io.File;
import java.io.PrintStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.Random;
import java.util.ArrayList;
@ -127,6 +137,56 @@ public class LuceneTestCaseJ4 {
private static final Map<Class<? extends LuceneTestCaseJ4>,Object> checkedClasses =
Collections.synchronizedMap(new WeakHashMap<Class<? extends LuceneTestCaseJ4>,Object>());
// saves default codec: we do this statically as many build indexes in @beforeClass
private static String savedDefaultCodec;
private static String codec;
private static Codec preFlexSav;
// returns current PreFlex codec
public static Codec installPreFlexRW() {
final Codec preFlex = CodecProvider.getDefault().lookup("PreFlex");
if (preFlex != null) {
CodecProvider.getDefault().unregister(preFlex);
}
CodecProvider.getDefault().register(new PreFlexRWCodec());
return preFlex;
}
// returns current PreFlex codec
public static void restorePreFlex(Codec preFlex) {
Codec preFlexRW = CodecProvider.getDefault().lookup("PreFlex");
if (preFlexRW != null) {
CodecProvider.getDefault().unregister(preFlexRW);
}
CodecProvider.getDefault().register(preFlex);
}
@BeforeClass
public static void beforeClassLuceneTestCaseJ4() {
savedDefaultCodec = CodecProvider.getDefaultCodec();
codec = _TestUtil.getTestCodec();
if (codec.equals("random"))
codec = CodecProvider.CORE_CODECS[seedRnd.nextInt(CodecProvider.CORE_CODECS.length)];
// If we're running w/ PreFlex codec we must swap in the
// test-only PreFlexRW codec (since core PreFlex can
// only read segments):
if (codec.equals("PreFlex")) {
preFlexSav = installPreFlexRW();
}
CodecProvider.setDefaultCodec(codec);
}
@AfterClass
public static void afterClassLuceneTestCaseJ4() {
// Restore read-only PreFlex codec:
if (codec.equals("PreFlex")) {
restorePreFlex(preFlexSav);
}
CodecProvider.setDefaultCodec(savedDefaultCodec);
}
// This is how we get control when errors occur.
// Think of this as start/end/success/failed
// events.
@ -372,6 +432,34 @@ public class LuceneTestCaseJ4 {
return new Random(seed);
}
/** create a new index writer config with random defaults */
public static IndexWriterConfig newIndexWriterConfig(Random r, Version v, Analyzer a) {
IndexWriterConfig c = new IndexWriterConfig(v, a);
if (r.nextBoolean()) {
c.setMergePolicy(new LogDocMergePolicy());
}
if (r.nextBoolean()) {
c.setMergeScheduler(new SerialMergeScheduler());
}
if (r.nextBoolean()) {
c.setMaxBufferedDocs(_TestUtil.nextInt(r, 2, 1000));
}
if (r.nextBoolean()) {
c.setTermIndexInterval(_TestUtil.nextInt(r, 1, 1000));
}
if (c.getMergePolicy() instanceof LogMergePolicy) {
LogMergePolicy logmp = (LogMergePolicy) c.getMergePolicy();
logmp.setUseCompoundDocStore(r.nextBoolean());
logmp.setUseCompoundFile(r.nextBoolean());
logmp.setCalibrateSizeByDeletes(r.nextBoolean());
logmp.setMergeFactor(_TestUtil.nextInt(r, 2, 20));
}
c.setReaderPooling(r.nextBoolean());
return c;
}
public String getName() {
return this.name;
}
@ -395,6 +483,10 @@ public class LuceneTestCaseJ4 {
System.out.println("NOTE: random static seed of testclass '" + getName() + "' was: " + staticSeed);
}
if (_TestUtil.getTestCodec().equals("random")) {
System.out.println("NOTE: random codec of testcase '" + getName() + "' was: " + codec);
}
if (seed != null) {
System.out.println("NOTE: random seed of testcase '" + getName() + "' was: " + seed);
}
@ -407,5 +499,4 @@ public class LuceneTestCaseJ4 {
private static final Random seedRnd = new Random();
private String name = "<unknown>";
}

View File

@ -23,6 +23,9 @@ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.MergeScheduler;
import org.apache.lucene.index.ConcurrentMergeScheduler;
import org.apache.lucene.index.CheckIndex;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.Directory;
import java.io.ByteArrayOutputStream;
import java.io.PrintStream;
@ -130,7 +133,7 @@ public class _TestUtil {
final char[] buffer = new char[end];
for (int i = 0; i < end; i++) {
int t = r.nextInt(5);
//buffer[i] = (char) (97 + r.nextInt(26));
if (0 == t && i < end - 1) {
// Make a surrogate pair
// High surrogate
@ -218,4 +221,39 @@ public class _TestUtil {
public static int getRandomMultiplier() {
return Integer.parseInt(System.getProperty("random.multiplier", "1"));
}
/** gets the codec to run tests with */
public static String getTestCodec() {
// by default we randomly pick a different codec for
// each test case (non-J4 tests) and each test class (J4
// tests)
return System.getProperty("tests.codec", "random");
}
public static CodecProvider alwaysCodec(final Codec c) {
return new CodecProvider() {
@Override
public Codec getWriter(SegmentWriteState state) {
return c;
}
@Override
public Codec lookup(String name) {
// can't do this until we fix PreFlexRW to not
//impersonate PreFlex:
if (name.equals(c.name)) {
return c;
} else {
return CodecProvider.getDefault().lookup(name);
}
}
};
}
/** Return a CodecProvider that can read any of the
* default codecs, but always writes in the specified
* codec. */
public static CodecProvider alwaysCodec(final String codec) {
return alwaysCodec(CodecProvider.getDefault().lookup(codec));
}
}

View File

@ -435,6 +435,7 @@
>
<sysproperty key="java.util.logging.config.file" value="${common-solr.dir}/testlogging.properties"/>
<sysproperty key="tests.luceneMatchVersion" value="${tests.luceneMatchVersion}"/>
<sysproperty key="tests.codec" value="${tests.codec}"/>
<jvmarg line="${dir.prop}"/>
<jvmarg line="${args}"/>

View File

@ -44,6 +44,8 @@
<!-- TODO: measure toning this down by default to 1 -->
<property name="threadsPerProcessor" value="2"/>
<property name="tests.codec" value="random" />
<!-- Example directory -->
<property name="example" value="${common-solr.dir}/example" />
<!--