mirror of
https://github.com/apache/lucene.git
synced 2025-02-06 18:18:38 +00:00
fix OOM (allocating too-large int[] in indexer for binary lengths)
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1440224 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d08b259fba
commit
34634ff4ec
@ -23,23 +23,32 @@ import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.store.RAMFile;
|
||||
import org.apache.lucene.store.RAMInputStream;
|
||||
import org.apache.lucene.store.RAMOutputStream;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefArray;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.packed.AppendingLongBuffer;
|
||||
|
||||
|
||||
/** Buffers up pending byte[] per doc, then flushes when
|
||||
* segment flushes. */
|
||||
class BinaryDocValuesWriter extends DocValuesWriter {
|
||||
|
||||
private final BytesRefArray bytesRefArray;
|
||||
private final RAMFile bytes;
|
||||
private final RAMOutputStream bytesWriter;
|
||||
private final AppendingLongBuffer lengths;
|
||||
private final FieldInfo fieldInfo;
|
||||
private final Counter iwBytesUsed;
|
||||
private long bytesUsed;
|
||||
private int addedValues = 0;
|
||||
private final BytesRef emptyBytesRef = new BytesRef();
|
||||
|
||||
public BinaryDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
|
||||
this.fieldInfo = fieldInfo;
|
||||
this.bytesRefArray = new BytesRefArray(iwBytesUsed);
|
||||
this.bytes = new RAMFile();
|
||||
this.bytesWriter = new RAMOutputStream(bytes);
|
||||
this.iwBytesUsed = iwBytesUsed;
|
||||
this.lengths = new AppendingLongBuffer();
|
||||
}
|
||||
|
||||
public void addValue(int docID, BytesRef value) {
|
||||
@ -56,19 +65,41 @@ class BinaryDocValuesWriter extends DocValuesWriter {
|
||||
// Fill in any holes:
|
||||
while(addedValues < docID) {
|
||||
addedValues++;
|
||||
bytesRefArray.append(emptyBytesRef);
|
||||
lengths.add(0);
|
||||
}
|
||||
addedValues++;
|
||||
bytesRefArray.append(value);
|
||||
lengths.add(value.length);
|
||||
try {
|
||||
bytesWriter.writeBytes(value.bytes, value.offset, value.length);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
updateBytesUsed();
|
||||
}
|
||||
|
||||
private void updateBytesUsed() {
|
||||
// nocommit not totally accurate, but just fix not to use RAMFile anyway
|
||||
long numBuffers = (bytesWriter.getFilePointer() / 1024) + 1; // round up
|
||||
long oversize = numBuffers * (1024 + 32); // fudge for arraylist/etc overhead
|
||||
final long newBytesUsed = lengths.ramBytesUsed() + oversize;
|
||||
iwBytesUsed.addAndGet(newBytesUsed - bytesUsed);
|
||||
bytesUsed = newBytesUsed;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finish(int maxDoc) {
|
||||
try {
|
||||
bytesWriter.close();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void flush(SegmentWriteState state, DocValuesConsumer dvConsumer) throws IOException {
|
||||
final int maxDoc = state.segmentInfo.getDocCount();
|
||||
final int size = addedValues;
|
||||
|
||||
dvConsumer.addBinaryField(fieldInfo,
|
||||
new Iterable<BytesRef>() {
|
||||
@ -76,8 +107,18 @@ class BinaryDocValuesWriter extends DocValuesWriter {
|
||||
@Override
|
||||
public Iterator<BytesRef> iterator() {
|
||||
return new Iterator<BytesRef>() {
|
||||
RAMInputStream bytesReader;
|
||||
AppendingLongBuffer.Iterator iter = lengths.iterator();
|
||||
BytesRef value = new BytesRef();
|
||||
int upto;
|
||||
|
||||
{
|
||||
try {
|
||||
bytesReader = new RAMInputStream("bogus", bytes);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
@ -91,8 +132,15 @@ class BinaryDocValuesWriter extends DocValuesWriter {
|
||||
|
||||
@Override
|
||||
public BytesRef next() {
|
||||
if (upto < bytesRefArray.size()) {
|
||||
bytesRefArray.get(value, upto);
|
||||
if (upto < size) {
|
||||
int length = (int) iter.next();
|
||||
value.grow(length);
|
||||
try {
|
||||
bytesReader.readBytes(value.bytes, 0, length);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
value.length = length;
|
||||
} else {
|
||||
value.length = 0;
|
||||
}
|
||||
|
@ -37,7 +37,6 @@ import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
|
||||
public class Test2BBinaryDocValues extends LuceneTestCase {
|
||||
|
||||
// indexes Integer.MAX_VALUE docs with a fixed binary field
|
||||
// nocommit: broken ram accounting? ant test -Dtestcase=Test2BBinaryDocValues -Dtests.method=testFixedBinary -Dtests.seed=5554AA830176B848 -Dtests.slow=true -Dtests.docvaluesformat=Disk -Dtests.locale=sr_RS_#Latn -Dtests.timezone=Africa/Luanda -Dtests.file.encoding=UTF-8
|
||||
public void testFixedBinary() throws Exception {
|
||||
BaseDirectoryWrapper dir = newFSDirectory(_TestUtil.getTempDir("2BFixedBinary"));
|
||||
if (dir instanceof MockDirectoryWrapper) {
|
||||
@ -98,7 +97,6 @@ public class Test2BBinaryDocValues extends LuceneTestCase {
|
||||
}
|
||||
|
||||
// indexes Integer.MAX_VALUE docs with a variable binary field
|
||||
// nocommit: broken ram accounting? ant test -Dtestcase=Test2BBinaryDocValues -Dtests.method=testVariableBinary -Dtests.seed=FD50D16920062578 -Dtests.slow=true -Dtests.docvaluesformat=Disk -Dtests.locale=sr_ME_#Latn -Dtests.timezone=America/Argentina/Tucuman -Dtests.file.encoding=UTF-8
|
||||
public void testVariableBinary() throws Exception {
|
||||
BaseDirectoryWrapper dir = newFSDirectory(_TestUtil.getTempDir("2BVariableBinary"));
|
||||
if (dir instanceof MockDirectoryWrapper) {
|
||||
|
@ -22,7 +22,6 @@ import java.util.Comparator;
|
||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefArray;
|
||||
import org.apache.lucene.util.Counter;
|
||||
|
||||
/**
|
||||
|
@ -1,4 +1,4 @@
|
||||
package org.apache.lucene.util;
|
||||
package org.apache.lucene.search.suggest;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
@ -20,6 +20,13 @@ package org.apache.lucene.util;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.ByteBlockPool;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.SorterTemplate;
|
||||
|
||||
/**
|
||||
* A simple append only random-access {@link BytesRef} array that stores full
|
||||
@ -31,7 +38,7 @@ import java.util.Comparator;
|
||||
* @lucene.internal
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class BytesRefArray {
|
||||
final class BytesRefArray {
|
||||
private final ByteBlockPool pool;
|
||||
private int[] offsets = new int[1];
|
||||
private int lastElement = 0;
|
@ -1,4 +1,4 @@
|
||||
package org.apache.lucene.search.suggest.fst;
|
||||
package org.apache.lucene.search.suggest;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
@ -19,9 +19,9 @@ package org.apache.lucene.search.suggest.fst;
|
||||
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.search.suggest.fst.BytesRefSorter;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
import org.apache.lucene.util.BytesRefArray;
|
||||
import org.apache.lucene.util.Counter;
|
||||
|
||||
/**
|
@ -1,4 +1,4 @@
|
||||
package org.apache.lucene.search.suggest.fst;
|
||||
package org.apache.lucene.search.suggest;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
@ -22,9 +22,8 @@ import java.io.IOException;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||
import org.apache.lucene.search.suggest.fst.Sort;
|
||||
import org.apache.lucene.search.suggest.fst.Sort.ByteSequencesReader;
|
||||
import org.apache.lucene.search.suggest.fst.Sort.ByteSequencesWriter;
|
||||
import org.apache.lucene.search.suggest.Sort.ByteSequencesReader;
|
||||
import org.apache.lucene.search.suggest.Sort.ByteSequencesWriter;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
@ -34,7 +34,7 @@ import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.TokenStreamToAutomaton;
|
||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||
import org.apache.lucene.search.suggest.Lookup;
|
||||
import org.apache.lucene.search.suggest.fst.Sort;
|
||||
import org.apache.lucene.search.suggest.Sort;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
|
@ -20,7 +20,8 @@ package org.apache.lucene.search.suggest.fst;
|
||||
import java.io.*;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.search.suggest.fst.Sort.ByteSequencesReader;
|
||||
import org.apache.lucene.search.suggest.Sort;
|
||||
import org.apache.lucene.search.suggest.Sort.ByteSequencesReader;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
@ -20,6 +20,7 @@ package org.apache.lucene.search.suggest.fst;
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.search.suggest.InMemorySorter;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
|
@ -26,8 +26,9 @@ import java.util.List;
|
||||
|
||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||
import org.apache.lucene.search.suggest.Lookup;
|
||||
import org.apache.lucene.search.suggest.Sort;
|
||||
import org.apache.lucene.search.suggest.Sort.SortInfo;
|
||||
import org.apache.lucene.search.suggest.fst.FSTCompletion.Completion;
|
||||
import org.apache.lucene.search.suggest.fst.Sort.SortInfo;
|
||||
import org.apache.lucene.search.suggest.tst.TSTLookup;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
|
@ -28,7 +28,7 @@ import java.util.List;
|
||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||
import org.apache.lucene.search.suggest.Lookup;
|
||||
import org.apache.lucene.search.suggest.SortedTermFreqIteratorWrapper;
|
||||
import org.apache.lucene.search.suggest.fst.Sort.ByteSequencesWriter;
|
||||
import org.apache.lucene.search.suggest.Sort.ByteSequencesWriter;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.store.InputStreamDataInput;
|
||||
|
@ -1,4 +1,4 @@
|
||||
package org.apache.lucene.util;
|
||||
package org.apache.lucene.search.suggest;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
@ -22,7 +22,7 @@ import java.util.*;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
import org.apache.lucene.util.BytesRefArray;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
@ -17,6 +17,8 @@ package org.apache.lucene.search.suggest.fst;
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.search.suggest.InMemorySorter;
|
||||
import org.apache.lucene.search.suggest.Sort;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
@ -19,6 +19,7 @@ package org.apache.lucene.search.suggest.fst;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
import org.apache.lucene.search.suggest.Sort;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
|
@ -22,9 +22,10 @@ import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.search.suggest.fst.Sort.BufferSize;
|
||||
import org.apache.lucene.search.suggest.fst.Sort.ByteSequencesWriter;
|
||||
import org.apache.lucene.search.suggest.fst.Sort.SortInfo;
|
||||
import org.apache.lucene.search.suggest.Sort;
|
||||
import org.apache.lucene.search.suggest.Sort.BufferSize;
|
||||
import org.apache.lucene.search.suggest.Sort.ByteSequencesWriter;
|
||||
import org.apache.lucene.search.suggest.Sort.SortInfo;
|
||||
import org.apache.lucene.util.*;
|
||||
import org.junit.*;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user