LUCENE-2205: port to trunk in preflex codec; port TestPagedBytes and PagedBytes.DataInput/Output to trunk

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1190017 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2011-10-27 20:49:13 +00:00
parent 60079a441f
commit 00da65393c
7 changed files with 693 additions and 63 deletions

View File

@ -723,6 +723,9 @@ Optimizations
FilteredQuery/IndexSearcher added by LUCENE-1536 to Lucene 4.0.
(Uwe Schindler)
* LUCENE-2205: Very substantial (3-5X) RAM reduction required to hold
the terms index on opening an IndexReader (Aaron McCurry via Mike McCandless)
Test Cases
* LUCENE-3420: Disable the finalness checks in TokenStream and Analyzer

View File

@ -47,9 +47,8 @@ public final class TermInfosReader {
private final SegmentTermEnum origEnum;
private final long size;
private final Term[] indexTerms;
private final TermInfo[] indexInfos;
private final long[] indexPointers;
private final TermInfosReaderIndex index;
private final int indexLength;
private final int totalIndexInterval;
@ -118,37 +117,23 @@ public final class TermInfosReader {
if (indexDivisor != -1) {
// Load terms index
totalIndexInterval = origEnum.indexInterval * indexDivisor;
final SegmentTermEnum indexEnum = new SegmentTermEnum(directory.openInput(IndexFileNames.segmentFileName(segment, "", PreFlexCodec.TERMS_INDEX_EXTENSION),
final String indexFileName = IndexFileNames.segmentFileName(segment, "", PreFlexCodec.TERMS_INDEX_EXTENSION);
final SegmentTermEnum indexEnum = new SegmentTermEnum(directory.openInput(indexFileName,
context), fieldInfos, true);
try {
int indexSize = 1+((int)indexEnum.size-1)/indexDivisor; // otherwise read index
indexTerms = new Term[indexSize];
indexInfos = new TermInfo[indexSize];
indexPointers = new long[indexSize];
for (int i=0;indexEnum.next(); i++) {
indexTerms[i] = indexEnum.term();
assert indexTerms[i] != null;
assert indexTerms[i].text() != null;
assert indexTerms[i].field() != null;
indexInfos[i] = indexEnum.termInfo();
indexPointers[i] = indexEnum.indexPointer;
for (int j = 1; j < indexDivisor; j++)
if (!indexEnum.next())
break;
}
// nocommit don't cast to int..
index = new TermInfosReaderIndex(indexEnum, indexDivisor, (int) dir.fileLength(indexFileName), totalIndexInterval);
indexLength = index.length();
} finally {
indexEnum.close();
}
} else {
// Do not load terms index:
totalIndexInterval = -1;
indexTerms = null;
indexInfos = null;
indexPointers = null;
index = null;
indexLength = -1;
}
success = true;
} finally {
@ -203,31 +188,6 @@ public final class TermInfosReader {
}
}
/** Returns the offset of the greatest index entry which is less than or equal to term.*/
private int getIndexOffset(Term term) {
int lo = 0; // binary search indexTerms[]
int hi = indexTerms.length - 1;
while (hi >= lo) {
int mid = (lo + hi) >>> 1;
assert indexTerms[mid] != null : "indexTerms = " + indexTerms.length + " mid=" + mid;
int delta = compareAsUTF16(term, indexTerms[mid]);
if (delta < 0)
hi = mid - 1;
else if (delta > 0)
lo = mid + 1;
else
return mid;
}
return hi;
}
private void seekEnum(SegmentTermEnum enumerator, int indexOffset) throws IOException {
enumerator.seek(indexPointers[indexOffset],
((long) indexOffset * totalIndexInterval) - 1,
indexTerms[indexOffset], indexInfos[indexOffset]);
}
/** Returns the TermInfo for a Term in the set, or null. */
TermInfo get(Term term) throws IOException {
return get(term, false);
@ -272,8 +232,8 @@ public final class TermInfosReader {
&& ((enumerator.prev() != null && compareAsUTF16(term, enumerator.prev())> 0)
|| compareAsUTF16(term, enumerator.term()) >= 0)) {
int enumOffset = (int)(enumerator.position/totalIndexInterval)+1;
if (indexTerms.length == enumOffset // but before end of block
|| compareAsUTF16(term, indexTerms[enumOffset]) < 0) {
if (indexLength == enumOffset // but before end of block
|| index.compareTo(term, enumOffset) < 0) {
// no need to seek
final TermInfo ti;
@ -309,10 +269,10 @@ public final class TermInfosReader {
indexPos = (int) (tiOrd.termOrd / totalIndexInterval);
} else {
// Must do binary search:
indexPos = getIndexOffset(term);
indexPos = index.getIndexOffset(term);
}
seekEnum(enumerator, indexPos);
index.seekEnum(enumerator, indexPos);
enumerator.scanTo(term);
final TermInfo ti;
@ -352,7 +312,7 @@ public final class TermInfosReader {
}
private void ensureIndexIsRead() {
if (indexTerms == null) {
if (index == null) {
throw new IllegalStateException("terms index was not loaded when this reader was created");
}
}
@ -362,10 +322,10 @@ public final class TermInfosReader {
if (size == 0) return -1;
ensureIndexIsRead();
int indexOffset = getIndexOffset(term);
int indexOffset = index.getIndexOffset(term);
SegmentTermEnum enumerator = getThreadResources().termEnum;
seekEnum(enumerator, indexOffset);
index.seekEnum(enumerator, indexOffset);
while(compareAsUTF16(term, enumerator.term()) > 0 && enumerator.next()) {}

View File

@ -0,0 +1,252 @@
package org.apache.lucene.index.codecs.preflex;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.BitUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PagedBytes.PagedBytesDataInput;
import org.apache.lucene.util.PagedBytes.PagedBytesDataOutput;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.packed.GrowableWriter;
import org.apache.lucene.util.packed.PackedInts;
/**
* This stores a monotonically increasing set of <Term, TermInfo> pairs in an
* index segment. Pairs are accessed either by Term or by ordinal position the
* set. The Terms and TermInfo are actually serialized and stored into a byte
* array and pointers to the position of each are stored in a int array.
*/
class TermInfosReaderIndex {
private static final int MAX_PAGE_BITS = 18; // 256 KB block
private Term[] fields;
private int totalIndexInterval;
private Comparator<BytesRef> comparator = BytesRef.getUTF8SortedAsUTF16Comparator();
private final PagedBytesDataInput dataInput;
private final PackedInts.Reader indexToDataOffset;
private final int indexSize;
private final int skipInterval;
/**
* Loads the segment information at segment load time.
*
* @param indexEnum
* the term enum.
* @param indexDivisor
* the index divisor.
* @param tiiFileLength
* the size of the tii file, used to approximate the size of the
* buffer.
* @param totalIndexInterval
* the total index interval.
*/
TermInfosReaderIndex(SegmentTermEnum indexEnum, int indexDivisor, long tiiFileLength, int totalIndexInterval) throws IOException {
this.totalIndexInterval = totalIndexInterval;
indexSize = 1 + ((int) indexEnum.size - 1) / indexDivisor;
skipInterval = indexEnum.skipInterval;
// this is only an inital size, it will be GCed once the build is complete
long initialSize = (long) (tiiFileLength * 1.5) / indexDivisor;
PagedBytes dataPagedBytes = new PagedBytes(estimatePageBits(initialSize));
PagedBytesDataOutput dataOutput = dataPagedBytes.getDataOutput();
GrowableWriter indexToTerms = new GrowableWriter(4, indexSize, false);
String currentField = null;
List<String> fieldStrs = new ArrayList<String>();
int fieldCounter = -1;
for (int i = 0; indexEnum.next(); i++) {
Term term = indexEnum.term();
if (currentField == null || !currentField.equals(term.field())) {
currentField = term.field();
fieldStrs.add(currentField);
fieldCounter++;
}
TermInfo termInfo = indexEnum.termInfo();
indexToTerms.set(i, dataOutput.getPosition());
dataOutput.writeVInt(fieldCounter);
dataOutput.writeString(term.text());
dataOutput.writeVInt(termInfo.docFreq);
if (termInfo.docFreq >= skipInterval) {
dataOutput.writeVInt(termInfo.skipOffset);
}
dataOutput.writeVLong(termInfo.freqPointer);
dataOutput.writeVLong(termInfo.proxPointer);
dataOutput.writeVLong(indexEnum.indexPointer);
for (int j = 1; j < indexDivisor; j++) {
if (!indexEnum.next()) {
break;
}
}
}
fields = new Term[fieldStrs.size()];
for (int i = 0; i < fields.length; i++) {
fields[i] = new Term(fieldStrs.get(i));
}
dataPagedBytes.freeze(true);
dataInput = dataPagedBytes.getDataInput();
indexToDataOffset = indexToTerms.getMutable();
}
private static int estimatePageBits(long estSize) {
return Math.max(Math.min(64 - BitUtil.nlz(estSize), MAX_PAGE_BITS), 4);
}
void seekEnum(SegmentTermEnum enumerator, int indexOffset) throws IOException {
PagedBytesDataInput input = (PagedBytesDataInput) dataInput.clone();
input.setPosition(indexToDataOffset.get(indexOffset));
// read the term
int fieldId = input.readVInt();
Term field = fields[fieldId];
Term term = new Term(field.field(), input.readString());
// read the terminfo
TermInfo termInfo = new TermInfo();
termInfo.docFreq = input.readVInt();
if (termInfo.docFreq >= skipInterval) {
termInfo.skipOffset = input.readVInt();
} else {
termInfo.skipOffset = 0;
}
termInfo.freqPointer = input.readVLong();
termInfo.proxPointer = input.readVLong();
long pointer = input.readVLong();
// perform the seek
enumerator.seek(pointer, ((long) indexOffset * totalIndexInterval) - 1, term, termInfo);
}
/**
* Binary search for the given term.
*
* @param term
* the term to locate.
* @throws IOException
*/
int getIndexOffset(Term term) throws IOException {
int lo = 0;
int hi = indexSize - 1;
PagedBytesDataInput input = (PagedBytesDataInput) dataInput.clone();
BytesRef scratch = new BytesRef();
while (hi >= lo) {
int mid = (lo + hi) >>> 1;
int delta = compareTo(term, mid, input, scratch);
if (delta < 0)
hi = mid - 1;
else if (delta > 0)
lo = mid + 1;
else
return mid;
}
return hi;
}
/**
* Gets the term at the given position. For testing.
*
* @param termIndex
* the position to read the term from the index.
* @return the term.
* @throws IOException
*/
Term getTerm(int termIndex) throws IOException {
PagedBytesDataInput input = (PagedBytesDataInput) dataInput.clone();
input.setPosition(indexToDataOffset.get(termIndex));
// read the term
int fieldId = input.readVInt();
Term field = fields[fieldId];
return new Term(field.field(), input.readString());
}
/**
* Returns the number of terms.
*
* @return int.
*/
int length() {
return indexSize;
}
/**
* The compares the given term against the term in the index specified by the
* term index. ie It returns negative N when term is less than index term;
*
* @param term
* the given term.
* @param termIndex
* the index of the of term to compare.
* @return int.
* @throws IOException
*/
int compareTo(Term term, int termIndex) throws IOException {
return compareTo(term, termIndex, (PagedBytesDataInput) dataInput.clone(), new BytesRef());
}
/**
* Compare the fields of the terms first, and if not equals return from
* compare. If equal compare terms.
*
* @param term
* the term to compare.
* @param termIndex
* the position of the term in the input to compare
* @param input
* the input buffer.
* @return int.
* @throws IOException
*/
private int compareTo(Term term, int termIndex, PagedBytesDataInput input, BytesRef reuse) throws IOException {
// if term field does not equal mid's field index, then compare fields
// else if they are equal, compare term's string values...
int c = compareField(term, termIndex, input);
if (c == 0) {
reuse.length = input.readVInt();
reuse.grow(reuse.length);
input.readBytes(reuse.bytes, 0, reuse.length);
return comparator.compare(term.bytes(), reuse);
}
return c;
}
/**
* Compares the fields before checking the text of the terms.
*
* @param term
* the given term.
* @param termIndex
* the term that exists in the data block.
* @param input
* the data block.
* @return int.
* @throws IOException
*/
private int compareField(Term term, int termIndex, PagedBytesDataInput input) throws IOException {
input.setPosition(indexToDataOffset.get(termIndex));
return term.field().compareTo(fields[input.readVInt()].field());
}
}

View File

@ -17,12 +17,14 @@ package org.apache.lucene.util;
* limitations under the License.
*/
import org.apache.lucene.store.IndexInput;
import java.util.List;
import java.util.ArrayList;
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexInput;
/** Represents a logical byte[] as a series of pages. You
* can write-once into the logical byte[] (append only),
@ -37,6 +39,8 @@ public final class PagedBytes {
private final int blockSize;
private final int blockBits;
private final int blockMask;
private boolean didSkipBytes;
private boolean frozen;
private int upto;
private byte[] currentBlock;
@ -320,6 +324,7 @@ public final class PagedBytes {
if (currentBlock != null) {
blocks.add(currentBlock);
blockEnd.add(upto);
didSkipBytes = true;
}
currentBlock = new byte[blockSize];
upto = 0;
@ -338,6 +343,12 @@ public final class PagedBytes {
/** Commits final byte[], trimming it if necessary and if trim=true */
public Reader freeze(boolean trim) {
if (frozen) {
throw new IllegalStateException("already frozen");
}
if (didSkipBytes) {
throw new IllegalStateException("cannot freeze when copy(BytesRef, BytesRef) was used");
}
if (trim && upto < blockSize) {
final byte[] newBlock = new byte[upto];
System.arraycopy(currentBlock, 0, newBlock, 0, upto);
@ -348,6 +359,7 @@ public final class PagedBytes {
}
blocks.add(currentBlock);
blockEnd.add(upto);
frozen = true;
currentBlock = null;
return new Reader(this);
}
@ -389,4 +401,150 @@ public final class PagedBytes {
return pointer;
}
public final class PagedBytesDataInput extends DataInput {
private int currentBlockIndex;
private int currentBlockUpto;
private byte[] currentBlock;
PagedBytesDataInput() {
currentBlock = blocks.get(0);
}
@Override
public Object clone() {
PagedBytesDataInput clone = getDataInput();
clone.setPosition(getPosition());
return clone;
}
/** Returns the current byte position. */
public long getPosition() {
return currentBlockIndex * blockSize + currentBlockUpto;
}
/** Seek to a position previously obtained from
* {@link #getPosition}. */
public void setPosition(long pos) {
currentBlockIndex = (int) (pos >> blockBits);
currentBlock = blocks.get(currentBlockIndex);
currentBlockUpto = (int) (pos & blockMask);
}
@Override
public byte readByte() {
if (currentBlockUpto == blockSize) {
nextBlock();
}
return currentBlock[currentBlockUpto++];
}
@Override
public void readBytes(byte[] b, int offset, int len) {
final int offsetEnd = offset + len;
while (true) {
final int blockLeft = blockSize - currentBlockUpto;
final int left = offsetEnd - offset;
if (blockLeft < left) {
System.arraycopy(currentBlock, currentBlockUpto,
b, offset,
blockLeft);
nextBlock();
offset += blockLeft;
} else {
// Last block
System.arraycopy(currentBlock, currentBlockUpto,
b, offset,
left);
currentBlockUpto += left;
break;
}
}
}
private void nextBlock() {
currentBlockIndex++;
currentBlockUpto = 0;
currentBlock = blocks.get(currentBlockIndex);
}
}
public final class PagedBytesDataOutput extends DataOutput {
@Override
public void writeByte(byte b) {
if (upto == blockSize) {
if (currentBlock != null) {
blocks.add(currentBlock);
blockEnd.add(upto);
}
currentBlock = new byte[blockSize];
upto = 0;
}
currentBlock[upto++] = b;
}
@Override
public void writeBytes(byte[] b, int offset, int length) throws IOException {
if (length == 0) {
return;
}
if (upto == blockSize) {
if (currentBlock != null) {
blocks.add(currentBlock);
blockEnd.add(upto);
}
currentBlock = new byte[blockSize];
upto = 0;
}
final int offsetEnd = offset + length;
while(true) {
final int left = offsetEnd - offset;
final int blockLeft = blockSize - upto;
if (blockLeft < left) {
System.arraycopy(b, offset, currentBlock, upto, blockLeft);
blocks.add(currentBlock);
blockEnd.add(blockSize);
currentBlock = new byte[blockSize];
upto = 0;
offset += blockLeft;
} else {
// Last block
System.arraycopy(b, offset, currentBlock, upto, left);
upto += left;
break;
}
}
}
/** Return the current byte position. */
public long getPosition() {
if (currentBlock == null) {
return 0;
} else {
return blocks.size() * blockSize + upto;
}
}
}
/** Returns a DataInput to read values from this
* PagedBytes instance. */
public PagedBytesDataInput getDataInput() {
if (!frozen) {
throw new IllegalStateException("must call freeze() before getDataInput");
}
return new PagedBytesDataInput();
}
/** Returns a DataOutput that you may use to write into
* this PagedBytes instance. If you do this, you should
* not call the other writing methods (eg, copy);
* results are undefined. */
public PagedBytesDataOutput getDataOutput() {
if (frozen) {
throw new IllegalStateException("cannot get DataOutput after freeze()");
}
return new PagedBytesDataOutput();
}
}

View File

@ -0,0 +1,193 @@
package org.apache.lucene.index.codecs.preflex;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Random;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LogMergePolicy;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.codecs.CoreCodecProvider;
import org.apache.lucene.index.codecs.preflexrw.PreFlexRWCodec;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
public class TestTermInfosReaderIndex extends LuceneTestCase {
private static final int NUMBER_OF_DOCUMENTS = 1000;
private static final int NUMBER_OF_FIELDS = 100;
private TermInfosReaderIndex index;
private Directory directory;
private SegmentTermEnum termEnum;
private int indexDivisor;
private int termIndexInterval;
private IndexReader reader;
private List<Term> sampleTerms;
@Override
public void setUp() throws Exception {
super.setUp();
indexDivisor = _TestUtil.nextInt(random, 1, 10);
directory = newDirectory();
termIndexInterval = populate(directory);
IndexReader r0 = IndexReader.open(directory);
SegmentReader r = (SegmentReader) r0.getSequentialSubReaders()[0];
String segment = r.getSegmentName();
r.close();
FieldInfos fieldInfos = new FieldInfos(directory, IndexFileNames.segmentFileName(segment, "", IndexFileNames.FIELD_INFOS_EXTENSION));
String segmentFileName = IndexFileNames.segmentFileName(segment, "", PreFlexCodec.TERMS_INDEX_EXTENSION);
long tiiFileLength = directory.fileLength(segmentFileName);
IndexInput input = directory.openInput(segmentFileName, newIOContext(random));
termEnum = new SegmentTermEnum(directory.openInput(IndexFileNames.segmentFileName(segment, "", PreFlexCodec.TERMS_EXTENSION), newIOContext(random)), fieldInfos, false);
int totalIndexInterval = termEnum.indexInterval * indexDivisor;
SegmentTermEnum indexEnum = new SegmentTermEnum(input, fieldInfos, true);
index = new TermInfosReaderIndex(indexEnum, indexDivisor, tiiFileLength, totalIndexInterval);
indexEnum.close();
input.close();
reader = IndexReader.open(directory);
sampleTerms = sample(reader,1000);
}
@Override
public void tearDown() throws Exception {
termEnum.close();
reader.close();
directory.close();
super.tearDown();
}
public void testSeekEnum() throws CorruptIndexException, IOException {
int indexPosition = 3;
SegmentTermEnum clone = (SegmentTermEnum) termEnum.clone();
Term term = findTermThatWouldBeAtIndex(clone, indexPosition);
SegmentTermEnum enumerator = clone;
index.seekEnum(enumerator, indexPosition);
assertEquals(term, enumerator.term());
clone.close();
}
public void testCompareTo() throws IOException {
Term term = new Term("field" + random.nextInt(NUMBER_OF_FIELDS) ,getText());
for (int i = 0; i < index.length(); i++) {
Term t = index.getTerm(i);
int compareTo = term.compareTo(t);
assertEquals(compareTo, index.compareTo(term, i));
}
}
public void testRandomSearchPerformance() throws CorruptIndexException, IOException {
IndexSearcher searcher = new IndexSearcher(reader);
for (Term t : sampleTerms) {
TermQuery query = new TermQuery(t);
TopDocs topDocs = searcher.search(query, 10);
assertTrue(topDocs.totalHits > 0);
}
searcher.close();
}
private List<Term> sample(IndexReader reader, int size) throws IOException {
List<Term> sample = new ArrayList<Term>();
Random random = new Random();
FieldsEnum fieldsEnum = MultiFields.getFields(reader).iterator();
String field;
while((field = fieldsEnum.next()) != null) {
TermsEnum terms = fieldsEnum.terms();
while (terms.next() != null) {
if (sample.size() >= size) {
int pos = random.nextInt(size);
sample.set(pos, new Term(field, terms.term()));
} else {
sample.add(new Term(field, terms.term()));
}
}
}
Collections.shuffle(sample);
return sample;
}
private Term findTermThatWouldBeAtIndex(SegmentTermEnum termEnum, int index) throws IOException {
int termPosition = index * termIndexInterval * indexDivisor;
for (int i = 0; i < termPosition; i++) {
if (!termEnum.next()) {
fail("Should not have run out of terms.");
}
}
return termEnum.term();
}
private int populate(Directory directory) throws CorruptIndexException, LockObtainFailedException, IOException {
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random, MockTokenizer.KEYWORD, false));
CoreCodecProvider cp = new CoreCodecProvider();
cp.unregister(cp.lookup("PreFlex"));
cp.register(new PreFlexRWCodec());
cp.setDefaultFieldCodec("PreFlex");
config.setCodecProvider(cp);
// turn off compound file, this test will open some index files directly.
LogMergePolicy mp = newLogMergePolicy();
mp.setUseCompoundFile(false);
config.setMergePolicy(mp);
RandomIndexWriter writer = new RandomIndexWriter(random, directory, config);
for (int i = 0; i < NUMBER_OF_DOCUMENTS; i++) {
Document document = new Document();
for (int f = 0; f < NUMBER_OF_FIELDS; f++) {
document.add(newField("field" + f, getText(), StringField.TYPE_UNSTORED));
}
writer.addDocument(document);
}
writer.optimize();
writer.close();
return config.getTermIndexInterval();
}
private String getText() {
return Long.toString(random.nextLong(),Character.MAX_RADIX);
}
}

View File

@ -0,0 +1,64 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;
import java.util.Arrays;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
public class TestPagedBytes extends LuceneTestCase {
public void testDataInputOutput() throws Exception {
for(int iter=0;iter<5*RANDOM_MULTIPLIER;iter++) {
final PagedBytes p = new PagedBytes(_TestUtil.nextInt(random, 1, 20));
final DataOutput out = p.getDataOutput();
final int numBytes = random.nextInt(10000000);
final byte[] answer = new byte[numBytes];
random.nextBytes(answer);
int written = 0;
while(written < numBytes) {
if (random.nextInt(10) == 7) {
out.writeByte(answer[written++]);
} else {
int chunk = Math.max(random.nextInt(1000), numBytes - written);
out.writeBytes(answer, written, chunk);
written += chunk;
}
}
p.freeze(random.nextBoolean());
final DataInput in = p.getDataInput();
final byte[] verify = new byte[numBytes];
int read = 0;
while(read < numBytes) {
if (random.nextInt(10) == 7) {
verify[read++] = in.readByte();
} else {
int chunk = Math.max(random.nextInt(1000), numBytes - read);
in.readBytes(verify, read, chunk);
read += chunk;
}
}
assertTrue(Arrays.equals(answer, verify));
}
}
}