mirror of https://github.com/apache/lucene.git
LUCENE-6814: release heap in PatternTokenizer.close
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1712865 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
7fed8ccb25
commit
04b3b96a94
|
@ -248,6 +248,11 @@ Bug Fixes
|
|||
* LUCENE-6872: IndexWriter handles any VirtualMachineError, not just OOM,
|
||||
as tragic. (Robert Muir)
|
||||
|
||||
* LUCENE-6814: PatternTokenizer no longer hangs onto heap sized to the
|
||||
maximum input string it's ever seen, which can be a large memory
|
||||
"leak" if you tokenize large strings with many threads across many
|
||||
indices (Alex Chow via Mike McCandless)
|
||||
|
||||
Other
|
||||
|
||||
* LUCENE-6478: Test execution can hang with java.security.debug. (Dawid Weiss)
|
||||
|
|
|
@ -136,10 +136,20 @@ public final class PatternTokenizer extends Tokenizer {
|
|||
offsetAtt.setOffset(ofs, ofs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
try {
|
||||
super.close();
|
||||
} finally {
|
||||
str.setLength(0);
|
||||
str.trimToSize();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
super.reset();
|
||||
fillBuffer(str, input);
|
||||
fillBuffer(input);
|
||||
matcher.reset(str);
|
||||
index = 0;
|
||||
}
|
||||
|
@ -147,11 +157,11 @@ public final class PatternTokenizer extends Tokenizer {
|
|||
// TODO: we should see if we can make this tokenizer work without reading
|
||||
// the entire document into RAM, perhaps with Matcher.hitEnd/requireEnd ?
|
||||
final char[] buffer = new char[8192];
|
||||
private void fillBuffer(StringBuilder sb, Reader input) throws IOException {
|
||||
private void fillBuffer(Reader input) throws IOException {
|
||||
int len;
|
||||
sb.setLength(0);
|
||||
str.setLength(0);
|
||||
while ((len = input.read(buffer)) > 0) {
|
||||
sb.append(buffer, 0, len);
|
||||
str.append(buffer, 0, len);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -146,4 +146,37 @@ public class TestPatternTokenizer extends BaseTokenStreamTestCase
|
|||
checkRandomData(random(), b, 1000*RANDOM_MULTIPLIER);
|
||||
b.close();
|
||||
}
|
||||
|
||||
// LUCENE-6814
|
||||
public void testHeapFreedAfterClose() throws Exception {
|
||||
// TODO: can we move this to BaseTSTC to catch other "hangs onto heap"ers?
|
||||
|
||||
// Build a 1MB string:
|
||||
StringBuilder b = new StringBuilder();
|
||||
for(int i=0;i<1024;i++) {
|
||||
// 1023 spaces, then an x
|
||||
for(int j=0;j<1023;j++) {
|
||||
b.append(' ');
|
||||
}
|
||||
b.append('x');
|
||||
}
|
||||
|
||||
String big = b.toString();
|
||||
|
||||
Pattern x = Pattern.compile("x");
|
||||
|
||||
List<Tokenizer> tokenizers = new ArrayList<>();
|
||||
for(int i=0;i<512;i++) {
|
||||
Tokenizer stream = new PatternTokenizer(x, -1);
|
||||
tokenizers.add(stream);
|
||||
stream.setReader(new StringReader(big));
|
||||
stream.reset();
|
||||
for(int j=0;j<1024;j++) {
|
||||
assertTrue(stream.incrementToken());
|
||||
}
|
||||
assertFalse(stream.incrementToken());
|
||||
stream.end();
|
||||
stream.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue