mirror of https://github.com/apache/lucene.git
LUCENE-6814: release heap in PatternTokenizer.close
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1712865 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
7fed8ccb25
commit
04b3b96a94
|
@ -248,6 +248,11 @@ Bug Fixes
|
||||||
* LUCENE-6872: IndexWriter handles any VirtualMachineError, not just OOM,
|
* LUCENE-6872: IndexWriter handles any VirtualMachineError, not just OOM,
|
||||||
as tragic. (Robert Muir)
|
as tragic. (Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-6814: PatternTokenizer no longer hangs onto heap sized to the
|
||||||
|
maximum input string it's ever seen, which can be a large memory
|
||||||
|
"leak" if you tokenize large strings with many threads across many
|
||||||
|
indices (Alex Chow via Mike McCandless)
|
||||||
|
|
||||||
Other
|
Other
|
||||||
|
|
||||||
* LUCENE-6478: Test execution can hang with java.security.debug. (Dawid Weiss)
|
* LUCENE-6478: Test execution can hang with java.security.debug. (Dawid Weiss)
|
||||||
|
|
|
@ -136,10 +136,20 @@ public final class PatternTokenizer extends Tokenizer {
|
||||||
offsetAtt.setOffset(ofs, ofs);
|
offsetAtt.setOffset(ofs, ofs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
try {
|
||||||
|
super.close();
|
||||||
|
} finally {
|
||||||
|
str.setLength(0);
|
||||||
|
str.trimToSize();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void reset() throws IOException {
|
public void reset() throws IOException {
|
||||||
super.reset();
|
super.reset();
|
||||||
fillBuffer(str, input);
|
fillBuffer(input);
|
||||||
matcher.reset(str);
|
matcher.reset(str);
|
||||||
index = 0;
|
index = 0;
|
||||||
}
|
}
|
||||||
|
@ -147,11 +157,11 @@ public final class PatternTokenizer extends Tokenizer {
|
||||||
// TODO: we should see if we can make this tokenizer work without reading
|
// TODO: we should see if we can make this tokenizer work without reading
|
||||||
// the entire document into RAM, perhaps with Matcher.hitEnd/requireEnd ?
|
// the entire document into RAM, perhaps with Matcher.hitEnd/requireEnd ?
|
||||||
final char[] buffer = new char[8192];
|
final char[] buffer = new char[8192];
|
||||||
private void fillBuffer(StringBuilder sb, Reader input) throws IOException {
|
private void fillBuffer(Reader input) throws IOException {
|
||||||
int len;
|
int len;
|
||||||
sb.setLength(0);
|
str.setLength(0);
|
||||||
while ((len = input.read(buffer)) > 0) {
|
while ((len = input.read(buffer)) > 0) {
|
||||||
sb.append(buffer, 0, len);
|
str.append(buffer, 0, len);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -146,4 +146,37 @@ public class TestPatternTokenizer extends BaseTokenStreamTestCase
|
||||||
checkRandomData(random(), b, 1000*RANDOM_MULTIPLIER);
|
checkRandomData(random(), b, 1000*RANDOM_MULTIPLIER);
|
||||||
b.close();
|
b.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LUCENE-6814
|
||||||
|
public void testHeapFreedAfterClose() throws Exception {
|
||||||
|
// TODO: can we move this to BaseTSTC to catch other "hangs onto heap"ers?
|
||||||
|
|
||||||
|
// Build a 1MB string:
|
||||||
|
StringBuilder b = new StringBuilder();
|
||||||
|
for(int i=0;i<1024;i++) {
|
||||||
|
// 1023 spaces, then an x
|
||||||
|
for(int j=0;j<1023;j++) {
|
||||||
|
b.append(' ');
|
||||||
|
}
|
||||||
|
b.append('x');
|
||||||
|
}
|
||||||
|
|
||||||
|
String big = b.toString();
|
||||||
|
|
||||||
|
Pattern x = Pattern.compile("x");
|
||||||
|
|
||||||
|
List<Tokenizer> tokenizers = new ArrayList<>();
|
||||||
|
for(int i=0;i<512;i++) {
|
||||||
|
Tokenizer stream = new PatternTokenizer(x, -1);
|
||||||
|
tokenizers.add(stream);
|
||||||
|
stream.setReader(new StringReader(big));
|
||||||
|
stream.reset();
|
||||||
|
for(int j=0;j<1024;j++) {
|
||||||
|
assertTrue(stream.incrementToken());
|
||||||
|
}
|
||||||
|
assertFalse(stream.incrementToken());
|
||||||
|
stream.end();
|
||||||
|
stream.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue