LUCENE-3342: improve FrozenBufferedDeletes ram efficiency

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1152055 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-07-29 00:12:54 +00:00
parent b6de525da5
commit 963bce23d1
12 changed files with 528 additions and 155 deletions

View File

@ -18,17 +18,14 @@ package org.apache.lucene.index;
*/
import java.util.ArrayList;
import java.util.Iterator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.index.BufferedDeletesStream.QueryAndLimit;
/* Holds buffered deletes, by docID, term or query for a
* single segment. This is used to hold buffered pending
@ -67,7 +64,7 @@ class BufferedDeletes {
final static int BYTES_PER_DEL_QUERY = 5*RamUsageEstimator.NUM_BYTES_OBJECT_REF + 2*RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + 2*RamUsageEstimator.NUM_BYTES_INT + 24;
final AtomicInteger numTermDeletes = new AtomicInteger();
final Map<Term,Integer> terms;
final Map<Term,Integer> terms = new HashMap<Term,Integer>();
final Map<Query,Integer> queries = new HashMap<Query,Integer>();
final List<Integer> docIDs = new ArrayList<Integer>();
@ -78,18 +75,13 @@ class BufferedDeletes {
private final static boolean VERBOSE_DELETES = false;
long gen;
public BufferedDeletes(boolean sortTerms) {
this(sortTerms, new AtomicLong());
public BufferedDeletes() {
this(new AtomicLong());
}
BufferedDeletes(boolean sortTerms, AtomicLong bytesUsed) {
BufferedDeletes(AtomicLong bytesUsed) {
assert bytesUsed != null;
this.bytesUsed = bytesUsed;
if (sortTerms) {
terms = new TreeMap<Term,Integer>();
} else {
terms = new HashMap<Term,Integer>();
}
}
@Override
@ -117,50 +109,6 @@ class BufferedDeletes {
}
}
void update(BufferedDeletes in) {
numTermDeletes.addAndGet(in.numTermDeletes.get());
for (Map.Entry<Term,Integer> ent : in.terms.entrySet()) {
final Term term = ent.getKey();
if (!terms.containsKey(term)) {
// only incr bytesUsed if this term wasn't already buffered:
bytesUsed.addAndGet(BYTES_PER_DEL_TERM);
}
terms.put(term, MAX_INT);
}
for (Map.Entry<Query,Integer> ent : in.queries.entrySet()) {
final Query query = ent.getKey();
if (!queries.containsKey(query)) {
// only incr bytesUsed if this query wasn't already buffered:
bytesUsed.addAndGet(BYTES_PER_DEL_QUERY);
}
queries.put(query, MAX_INT);
}
// docIDs never move across segments and the docIDs
// should already be cleared
}
void update(FrozenBufferedDeletes in) {
numTermDeletes.addAndGet(in.numTermDeletes);
for(Term term : in.terms) {
if (!terms.containsKey(term)) {
// only incr bytesUsed if this term wasn't already buffered:
bytesUsed.addAndGet(BYTES_PER_DEL_TERM);
}
terms.put(term, MAX_INT);
}
for(int queryIdx=0;queryIdx<in.queries.length;queryIdx++) {
final Query query = in.queries[queryIdx];
if (!queries.containsKey(query)) {
// only incr bytesUsed if this query wasn't already buffered:
bytesUsed.addAndGet(BYTES_PER_DEL_QUERY);
}
queries.put(query, MAX_INT);
}
}
public void addQuery(Query query, int docIDUpto) {
Integer current = queries.put(query, docIDUpto);
// increment bytes used only if the query wasn't added so far.
@ -194,43 +142,6 @@ class BufferedDeletes {
}
}
public Iterable<Term> termsIterable() {
return new Iterable<Term>() {
@Override
public Iterator<Term> iterator() {
return terms.keySet().iterator();
}
};
}
public Iterable<QueryAndLimit> queriesIterable() {
return new Iterable<QueryAndLimit>() {
@Override
public Iterator<QueryAndLimit> iterator() {
return new Iterator<QueryAndLimit>() {
private final Iterator<Map.Entry<Query,Integer>> iter = queries.entrySet().iterator();
@Override
public boolean hasNext() {
return iter.hasNext();
}
@Override
public QueryAndLimit next() {
final Map.Entry<Query,Integer> ent = iter.next();
return new QueryAndLimit(ent.getKey(), ent.getValue());
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
};
}
void clear() {
terms.clear();
queries.clear();

View File

@ -33,6 +33,7 @@ import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.BytesRef;
/* Tracks the stream of {@link BufferedDeletes}.
* When DocumentsWriterPerThread flushes, its buffered
@ -182,7 +183,7 @@ class BufferedDeletesStream {
infos2.addAll(infos);
Collections.sort(infos2, sortSegInfoByDelGen);
BufferedDeletes coalescedDeletes = null;
CoalescedDeletes coalescedDeletes = null;
boolean anyNewDeletes = false;
int infosIDX = infos2.size()-1;
@ -200,7 +201,7 @@ class BufferedDeletesStream {
if (packet != null && segGen < packet.delGen()) {
//System.out.println(" coalesce");
if (coalescedDeletes == null) {
coalescedDeletes = new BufferedDeletes(true);
coalescedDeletes = new CoalescedDeletes();
}
if (!packet.isSegmentPrivate) {
/*
@ -251,7 +252,7 @@ class BufferedDeletesStream {
}
if (coalescedDeletes == null) {
coalescedDeletes = new BufferedDeletes(true);
coalescedDeletes = new CoalescedDeletes();
}
/*
@ -463,7 +464,8 @@ class BufferedDeletesStream {
if (term != null) {
assert lastDeleteTerm == null || term.compareTo(lastDeleteTerm) > 0: "lastTerm=" + lastDeleteTerm + " vs term=" + term;
}
lastDeleteTerm = term;
// TODO: we re-use term now in our merged iterable, but we shouldn't clone, instead copy for this assert
lastDeleteTerm = term == null ? null : new Term(term.field(), new BytesRef(term.bytes));
return true;
}

View File

@ -0,0 +1,191 @@
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.ArrayList;
import java.util.Iterator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.index.BufferedDeletesStream.QueryAndLimit;
class CoalescedDeletes {
final Map<Query,Integer> queries = new HashMap<Query,Integer>();
final List<Iterable<Term>> iterables = new ArrayList<Iterable<Term>>();
@Override
public String toString() {
// note: we could add/collect more debugging information
return "CoalescedDeletes(termSets=" + iterables.size() + ",queries=" + queries.size() + ")";
}
void update(FrozenBufferedDeletes in) {
iterables.add(in.termsIterable());
for(int queryIdx=0;queryIdx<in.queries.length;queryIdx++) {
final Query query = in.queries[queryIdx];
queries.put(query, BufferedDeletes.MAX_INT);
}
}
public Iterable<Term> termsIterable() {
return new Iterable<Term>() {
@Override
public Iterator<Term> iterator() {
ArrayList<Iterator<Term>> subs = new ArrayList<Iterator<Term>>(iterables.size());
for (Iterable<Term> iterable : iterables) {
subs.add(iterable.iterator());
}
return mergedIterator(subs);
}
};
}
public Iterable<QueryAndLimit> queriesIterable() {
return new Iterable<QueryAndLimit>() {
@Override
public Iterator<QueryAndLimit> iterator() {
return new Iterator<QueryAndLimit>() {
private final Iterator<Map.Entry<Query,Integer>> iter = queries.entrySet().iterator();
@Override
public boolean hasNext() {
return iter.hasNext();
}
@Override
public QueryAndLimit next() {
final Map.Entry<Query,Integer> ent = iter.next();
return new QueryAndLimit(ent.getKey(), ent.getValue());
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
};
}
/** provides a merged view across multiple iterators */
static Iterator<Term> mergedIterator(final List<Iterator<Term>> iterators) {
return new Iterator<Term>() {
Term current;
TermMergeQueue queue = new TermMergeQueue(iterators.size());
SubIterator[] top = new SubIterator[iterators.size()];
int numTop;
{
int index = 0;
for (Iterator<Term> iterator : iterators) {
if (iterator.hasNext()) {
SubIterator sub = new SubIterator();
sub.current = iterator.next();
sub.iterator = iterator;
sub.index = index++;
queue.add(sub);
}
}
}
public boolean hasNext() {
if (queue.size() > 0) {
return true;
}
for (int i = 0; i < numTop; i++) {
if (top[i].iterator.hasNext()) {
return true;
}
}
return false;
}
public Term next() {
// restore queue
pushTop();
// gather equal top fields
if (queue.size() > 0) {
pullTop();
} else {
current = null;
}
return current;
}
public void remove() {
throw new UnsupportedOperationException();
}
private void pullTop() {
// extract all subs from the queue that have the same top term
assert numTop == 0;
while (true) {
top[numTop++] = queue.pop();
if (queue.size() == 0
|| !(queue.top()).current.equals(top[0].current)) {
break;
}
}
current = top[0].current;
}
private void pushTop() {
// call next() on each top, and put back into queue
for (int i = 0; i < numTop; i++) {
if (top[i].iterator.hasNext()) {
top[i].current = top[i].iterator.next();
queue.add(top[i]);
} else {
// no more terms
top[i].current = null;
}
}
numTop = 0;
}
};
}
private static class SubIterator {
Iterator<Term> iterator;
Term current;
int index;
}
private static class TermMergeQueue extends PriorityQueue<SubIterator> {
TermMergeQueue(int size) {
super(size);
}
@Override
protected boolean lessThan(SubIterator a, SubIterator b) {
final int cmp = a.current.compareTo(b.current);
if (cmp != 0) {
return cmp < 0;
} else {
return a.index < b.index;
}
}
}
}

View File

@ -81,7 +81,7 @@ final class DocumentsWriterDeleteQueue {
}
DocumentsWriterDeleteQueue(long generation) {
this(new BufferedDeletes(false), generation);
this(new BufferedDeletes(), generation);
}
DocumentsWriterDeleteQueue(BufferedDeletes globalBufferedDeletes, long generation) {

View File

@ -188,7 +188,7 @@ public class DocumentsWriterPerThread {
bytesUsed = new AtomicLong(0);
byteBlockAllocator = new DirectTrackingAllocator(bytesUsed);
consumer = indexingChain.getChain(this);
pendingDeletes = new BufferedDeletes(false);
pendingDeletes = new BufferedDeletes();
initialize();
}
@ -476,7 +476,7 @@ public class DocumentsWriterPerThread {
segmentDeletes = null;
} else {
segmentDeletes = pendingDeletes;
pendingDeletes = new BufferedDeletes(false);
pendingDeletes = new BufferedDeletes();
}
if (infoStream != null) {

View File

@ -21,31 +21,23 @@ import java.util.Iterator;
import java.util.Map;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.index.BufferedDeletesStream.QueryAndLimit;
/** Holds buffered deletes by term or query, once pushed.
* Pushed deltes are write-once, so we shift to more
* Pushed deletes are write-once, so we shift to more
* memory efficient data structure to hold them. We don't
* hold docIDs because these are applied on flush. */
class FrozenBufferedDeletes {
/* Rough logic: Term is object w/
String field and BytesRef text (OBJ_HEADER + 2*POINTER).
String field is (OBJ_HEADER + 4*INT +
POINTER + OBJ_HEADER + CHAR*field.length).
Term's text is BytesRef (OBJ_HEADER + 2*INT + POINTER +
OBJ_HEADER + bytes.length). */
final static int BYTES_PER_DEL_TERM = 4*RamUsageEstimator.NUM_BYTES_OBJECT_REF + 5*RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + 6*RamUsageEstimator.NUM_BYTES_INT;
/* Query we often undercount (say 24 bytes), plus int. */
final static int BYTES_PER_DEL_QUERY = RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.NUM_BYTES_INT + 24;
// Terms, in sorted order:
// TODO: would be more RAM efficient to store BytesRef[],
// per field:
final Term[] terms;
final PrefixCodedTerms terms;
int termCount; // just for debugging
// Parallel array of deleted query, and the docIDUpto for
// each
@ -62,7 +54,15 @@ class FrozenBufferedDeletes {
public FrozenBufferedDeletes(BufferedDeletes deletes, boolean isSegmentPrivate) {
this.isSegmentPrivate = isSegmentPrivate;
assert !isSegmentPrivate || deletes.terms.size() == 0 : "segment private package should only have del queries";
terms = deletes.terms.keySet().toArray(new Term[deletes.terms.size()]);
Term termsArray[] = deletes.terms.keySet().toArray(new Term[deletes.terms.size()]);
termCount = termsArray.length;
ArrayUtil.mergeSort(termsArray);
PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
for (Term term : termsArray) {
builder.add(term);
}
terms = builder.finish();
queries = new Query[deletes.queries.size()];
queryLimits = new int[deletes.queries.size()];
int upto = 0;
@ -71,13 +71,8 @@ class FrozenBufferedDeletes {
queryLimits[upto] = ent.getValue();
upto++;
}
int termDataBytes = 0;
for(Map.Entry<Term,Integer> ent : deletes.terms.entrySet()) {
final Term term = ent.getKey();
termDataBytes += term.bytes().length;
termDataBytes += term.field().length() * RamUsageEstimator.NUM_BYTES_CHAR;
}
bytesUsed = terms.length * BYTES_PER_DEL_TERM + queries.length * BYTES_PER_DEL_QUERY + termDataBytes;
bytesUsed = (int) terms.getSizeInBytes() + queries.length * BYTES_PER_DEL_QUERY;
numTermDeletes = deletes.numTermDeletes.get();
}
@ -95,24 +90,7 @@ class FrozenBufferedDeletes {
return new Iterable<Term>() {
@Override
public Iterator<Term> iterator() {
return new Iterator<Term>() {
private int upto;
@Override
public boolean hasNext() {
return upto < terms.length;
}
@Override
public Term next() {
return terms[upto++];
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
return terms.iterator();
}
};
}
@ -149,7 +127,7 @@ class FrozenBufferedDeletes {
public String toString() {
String s = "";
if (numTermDeletes != 0) {
s += " " + numTermDeletes + " deleted terms (unique count=" + terms.length + ")";
s += " " + numTermDeletes + " deleted terms (unique count=" + termCount + ")";
}
if (queries.length != 0) {
s += " " + queries.length + " deleted queries";
@ -162,6 +140,6 @@ class FrozenBufferedDeletes {
}
boolean any() {
return terms.length > 0 || queries.length > 0;
return termCount > 0 || queries.length > 0;
}
}

View File

@ -0,0 +1,146 @@
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Iterator;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.RAMFile;
import org.apache.lucene.store.RAMInputStream;
import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.BytesRef;
/**
* Prefix codes term instances (prefixes are shared)
* @lucene.experimental
*/
class PrefixCodedTerms implements Iterable<Term> {
final RAMFile buffer;
private PrefixCodedTerms(RAMFile buffer) {
this.buffer = buffer;
}
/** @return size in bytes */
public long getSizeInBytes() {
return buffer.getSizeInBytes();
}
/** @return iterator over the bytes */
public Iterator<Term> iterator() {
return new PrefixCodedTermsIterator();
}
class PrefixCodedTermsIterator implements Iterator<Term> {
final IndexInput input;
String field = "";
BytesRef bytes = new BytesRef();
Term term = new Term(field, bytes);
PrefixCodedTermsIterator() {
try {
input = new RAMInputStream(buffer);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public boolean hasNext() {
return input.getFilePointer() < input.length();
}
public Term next() {
assert hasNext();
try {
int code = input.readVInt();
if ((code & 1) != 0) {
// new field
field = input.readString();
}
int prefix = code >>> 1;
int suffix = input.readVInt();
bytes.grow(prefix + suffix);
input.readBytes(bytes.bytes, prefix, suffix);
bytes.length = prefix + suffix;
term.set(field, bytes);
return term;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public void remove() {
throw new UnsupportedOperationException();
}
}
/** Builds a PrefixCodedTerms: call add repeatedly, then finish. */
public static class Builder {
private RAMFile buffer = new RAMFile();
private RAMOutputStream output = new RAMOutputStream(buffer);
private Term lastTerm = new Term("");
/** add a term */
public void add(Term term) {
assert lastTerm.equals(new Term("")) || term.compareTo(lastTerm) > 0;
try {
int prefix = sharedPrefix(lastTerm.bytes, term.bytes);
int suffix = term.bytes.length - prefix;
if (term.field.equals(lastTerm.field)) {
output.writeVInt(prefix << 1);
} else {
output.writeVInt(prefix << 1 | 1);
output.writeString(term.field);
}
output.writeVInt(suffix);
output.writeBytes(term.bytes.bytes, term.bytes.offset + prefix, suffix);
lastTerm.bytes.copy(term.bytes);
lastTerm.field = term.field;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
/** return finalized form */
public PrefixCodedTerms finish() {
try {
output.close();
return new PrefixCodedTerms(buffer);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
private int sharedPrefix(BytesRef term1, BytesRef term2) {
int pos1 = 0;
int pos1End = pos1 + Math.min(term1.length, term2.length);
int pos2 = 0;
while(pos1 < pos1End) {
if (term1.bytes[term1.offset + pos1] != term2.bytes[term2.offset + pos2]) {
return pos1;
}
pos1++;
pos2++;
}
return pos1;
}
}
}

View File

@ -29,7 +29,7 @@ public class RAMFile {
private long lastModified = System.currentTimeMillis();
// File used as buffer, in no RAMDirectory
protected RAMFile() {}
public RAMFile() {}
RAMFile(RAMDirectory directory) {
this.directory = directory;

View File

@ -19,8 +19,10 @@ package org.apache.lucene.store;
import java.io.IOException;
/** A memory-resident {@link IndexInput} implementation. */
class RAMInputStream extends IndexInput implements Cloneable {
/** A memory-resident {@link IndexInput} implementation.
*
* @lucene.internal */
public class RAMInputStream extends IndexInput implements Cloneable {
static final int BUFFER_SIZE = RAMOutputStream.BUFFER_SIZE;
private RAMFile file;
@ -33,7 +35,7 @@ class RAMInputStream extends IndexInput implements Cloneable {
private long bufferStart;
private int bufferLength;
RAMInputStream(RAMFile f) throws IOException {
public RAMInputStream(RAMFile f) throws IOException {
file = f;
length = file.length;
if (length/BUFFER_SIZE >= Integer.MAX_VALUE) {

View File

@ -41,8 +41,8 @@ public class TestDocumentsWriterDeleteQueue extends LuceneTestCase {
}
DeleteSlice slice1 = queue.newSlice();
DeleteSlice slice2 = queue.newSlice();
BufferedDeletes bd1 = new BufferedDeletes(false);
BufferedDeletes bd2 = new BufferedDeletes(false);
BufferedDeletes bd1 = new BufferedDeletes();
BufferedDeletes bd2 = new BufferedDeletes();
int last1 = 0;
int last2 = 0;
Set<Term> uniqueValues = new HashSet<Term>();
@ -70,8 +70,11 @@ public class TestDocumentsWriterDeleteQueue extends LuceneTestCase {
}
assertEquals(uniqueValues, bd1.terms.keySet());
assertEquals(uniqueValues, bd2.terms.keySet());
assertEquals(uniqueValues, new HashSet<Term>(Arrays.asList(queue
.freezeGlobalBuffer(null).terms)));
HashSet<Term> frozenSet = new HashSet<Term>();
for (Term t : queue.freezeGlobalBuffer(null).termsIterable()) {
frozenSet.add(t);
}
assertEquals(uniqueValues, frozenSet);
assertEquals("num deletes must be 0 after freeze", 0, queue
.numGlobalTermDeletes());
}
@ -129,7 +132,7 @@ public class TestDocumentsWriterDeleteQueue extends LuceneTestCase {
if (random.nextInt(5) == 0) {
FrozenBufferedDeletes freezeGlobalBuffer = queue
.freezeGlobalBuffer(null);
assertEquals(termsSinceFreeze, freezeGlobalBuffer.terms.length);
assertEquals(termsSinceFreeze, freezeGlobalBuffer.termCount);
assertEquals(queriesSinceFreeze, freezeGlobalBuffer.queries.length);
queriesSinceFreeze = 0;
termsSinceFreeze = 0;
@ -168,8 +171,11 @@ public class TestDocumentsWriterDeleteQueue extends LuceneTestCase {
assertEquals(uniqueValues, deletes.terms.keySet());
}
queue.tryApplyGlobalSlice();
assertEquals(uniqueValues, new HashSet<Term>(Arrays.asList(queue
.freezeGlobalBuffer(null).terms)));
HashSet<Term> frozenSet = new HashSet<Term>();
for (Term t : queue.freezeGlobalBuffer(null).termsIterable()) {
frozenSet.add(t);
}
assertEquals(uniqueValues, frozenSet);
assertEquals("num deletes must be 0 after freeze", 0, queue
.numGlobalTermDeletes());
}
@ -188,7 +194,7 @@ public class TestDocumentsWriterDeleteQueue extends LuceneTestCase {
this.index = index;
this.ids = ids;
this.slice = queue.newSlice();
deletes = new BufferedDeletes(false);
deletes = new BufferedDeletes();
this.latch = latch;
}

View File

@ -954,9 +954,10 @@ public class TestIndexWriterDelete extends LuceneTestCase {
Directory dir = newDirectory();
// Cannot use RandomIndexWriter because we don't want to
// ever call commit() for this test:
// note: tiny rambuffer used, as with a 1MB buffer the test is too slow (flush @ 128,999)
IndexWriter w = new IndexWriter(dir,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
.setRAMBufferSizeMB(1.0f).setMaxBufferedDocs(1000).setMergePolicy(NoMergePolicy.NO_COMPOUND_FILES).setReaderPooling(false));
.setRAMBufferSizeMB(0.2f).setMaxBufferedDocs(1000).setMergePolicy(NoMergePolicy.NO_COMPOUND_FILES).setReaderPooling(false));
w.setInfoStream(VERBOSE ? System.out : null);
int count = 0;
while(true) {
@ -981,7 +982,7 @@ public class TestIndexWriterDelete extends LuceneTestCase {
}
count++;
// Today we applyDelets @ count=7199; even if we make
// Today we applyDeletes @ count=21553; even if we make
// sizable improvements to RAM efficiency of buffered
// del term we're unlikely to go over 100K:
if (count > 100000) {

View File

@ -0,0 +1,136 @@
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
public class TestPrefixCodedTerms extends LuceneTestCase {
public void testEmpty() {
PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder();
PrefixCodedTerms pb = b.finish();
assertFalse(pb.iterator().hasNext());
}
public void testOne() {
Term term = new Term("foo", "bogus");
PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder();
b.add(term);
PrefixCodedTerms pb = b.finish();
Iterator<Term> iterator = pb.iterator();
assertTrue(iterator.hasNext());
assertEquals(term, iterator.next());
}
public void testRandom() {
Set<Term> terms = new TreeSet<Term>();
int nterms = atLeast(10000);
for (int i = 0; i < nterms; i++) {
Term term = new Term(_TestUtil.randomUnicodeString(random, 2), _TestUtil.randomUnicodeString(random));
terms.add(term);
}
PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder();
for (Term ref: terms) {
b.add(ref);
}
PrefixCodedTerms pb = b.finish();
Iterator<Term> expected = terms.iterator();
for (Term t : pb) {
assertTrue(expected.hasNext());
assertEquals(expected.next(), t);
}
assertFalse(expected.hasNext());
}
public void testMergeEmpty() {
List<Iterator<Term>> subs = Collections.emptyList();
assertFalse(CoalescedDeletes.mergedIterator(subs).hasNext());
subs = new ArrayList<Iterator<Term>>();
subs.add(new PrefixCodedTerms.Builder().finish().iterator());
subs.add(new PrefixCodedTerms.Builder().finish().iterator());
Iterator<Term> merged = CoalescedDeletes.mergedIterator(subs);
assertFalse(merged.hasNext());
}
public void testMergeOne() {
Term t1 = new Term("foo", "a");
PrefixCodedTerms.Builder b1 = new PrefixCodedTerms.Builder();
b1.add(t1);
PrefixCodedTerms pb1 = b1.finish();
Term t2 = new Term("foo", "b");
PrefixCodedTerms.Builder b2 = new PrefixCodedTerms.Builder();
b2.add(t2);
PrefixCodedTerms pb2 = b2.finish();
List<Iterator<Term>> subs = new ArrayList<Iterator<Term>>();
subs.add(pb1.iterator());
subs.add(pb2.iterator());
Iterator<Term> merged = CoalescedDeletes.mergedIterator(subs);
assertTrue(merged.hasNext());
assertEquals(t1, merged.next());
assertTrue(merged.hasNext());
assertEquals(t2, merged.next());
}
public void testMergeRandom() {
PrefixCodedTerms pb[] = new PrefixCodedTerms[_TestUtil.nextInt(random, 2, 10)];
Set<Term> superSet = new TreeSet<Term>();
for (int i = 0; i < pb.length; i++) {
Set<Term> terms = new TreeSet<Term>();
int nterms = _TestUtil.nextInt(random, 0, 10000);
for (int j = 0; j < nterms; j++) {
Term term = new Term(_TestUtil.randomUnicodeString(random, 2), _TestUtil.randomUnicodeString(random, 4));
terms.add(term);
}
superSet.addAll(terms);
PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder();
for (Term ref: terms) {
b.add(ref);
}
pb[i] = b.finish();
}
List<Iterator<Term>> subs = new ArrayList<Iterator<Term>>();
for (int i = 0; i < pb.length; i++) {
subs.add(pb[i].iterator());
}
Iterator<Term> expected = superSet.iterator();
Iterator<Term> actual = CoalescedDeletes.mergedIterator(subs);
while (actual.hasNext()) {
assertTrue(expected.hasNext());
assertEquals(expected.next(), actual.next());
}
assertFalse(expected.hasNext());
}
}