LUCENE-3720: fix BeiderMorseFilter OOM issues

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1386662 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-09-17 15:32:46 +00:00
parent e359874a3c
commit 310eb39792
13 changed files with 12 additions and 16 deletions

View File

@ -103,7 +103,7 @@
<classpathentry kind="lib" path="lucene/test-framework/lib/junit-4.10.jar"/>
<classpathentry kind="lib" path="lucene/sandbox/lib/jakarta-regexp-1.4.jar"/>
<classpathentry kind="lib" path="lucene/analysis/icu/lib/icu4j-49.1.jar"/>
<classpathentry kind="lib" path="lucene/analysis/phonetic/lib/commons-codec-1.6.jar"/>
<classpathentry kind="lib" path="lucene/analysis/phonetic/lib/commons-codec-1.7.jar"/>
<classpathentry kind="lib" path="lucene/analysis/morfologik/lib/morfologik-fsa-1.5.3.jar"/>
<classpathentry kind="lib" path="lucene/analysis/morfologik/lib/morfologik-polish-1.5.3.jar"/>
<classpathentry kind="lib" path="lucene/analysis/morfologik/lib/morfologik-stemming-1.5.3.jar"/>

View File

@ -152,7 +152,7 @@
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.6</version>
<version>1.7</version>
</dependency>
<dependency>
<groupId>commons-cli</groupId>

View File

@ -164,6 +164,9 @@ Bug Fixes
RAM, instead of checking the estimated size of the segment
to decide whether to cache it. (Mike McCandless)
* LUCENE-3720: fix memory-consumption issues with BeiderMorseFilter.
(Thomas Neidhart via Robert Muir)
Optimizations
* LUCENE-4322: Decrease lucene-core JAR size. The core JAR size had increased a

View File

@ -31,7 +31,7 @@ lucene-analyzers-morfologik-XX.jar
lucene-analyzers-phonetic-XX.jar
An add-on analysis library that provides phonetic encoders via Apache
Commons-Codec. Note: this module depends on the commons-codec jar
file (version >= 1.4)
file
lucene-analyzers-smartcn-XX.jar
An add-on analysis library that provides word segmentation for Simplified

View File

@ -27,7 +27,7 @@
<path id="classpath">
<pathelement path="${analyzers-common.jar}"/>
<pathelement path="lib/commons-codec-1.6.jar"/>
<pathelement path="lib/commons-codec-1.7.jar"/>
<path refid="base.classpath"/>
</path>

View File

@ -19,7 +19,7 @@
<ivy-module version="2.0">
<info organisation="org.apache.lucene" module="analyzers-phonetic"/>
<dependencies>
<dependency org="commons-codec" name="commons-codec" rev="1.6" transitive="false"/>
<dependency org="commons-codec" name="commons-codec" rev="1.7" transitive="false"/>
<exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/>
</dependencies>
</ivy-module>

View File

@ -32,12 +32,6 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
/**
* TokenFilter for Beider-Morse phonetic encoding.
* <p>
* <b><font color="red">
* WARNING: some inputs can cause extremely high RAM usage!
* https://issues.apache.org/jira/browse/CODEC-132
* </font></b>
* </p>
* @see BeiderMorseEncoder
* @lucene.experimental
*/

View File

@ -88,8 +88,7 @@ public class TestBeiderMorseFilter extends BaseTokenStreamTestCase {
new int[] { 4 },
new int[] { 1 });
}
@Ignore("broken: causes OOM on some strings (https://issues.apache.org/jira/browse/CODEC-132)")
public void testRandom() throws Exception {
checkRandomData(random(), analyzer, 1000 * RANDOM_MULTIPLIER);
}

View File

@ -1 +0,0 @@
b7f0fc8f61ecadeb3695f0b9464755eee44374d4

View File

@ -0,0 +1 @@
9cd61d269c88f9fb0eb36cea1efcd596ab74772f

View File

@ -20,7 +20,7 @@
<info organisation="org.apache.solr" module="core"/>
<dependencies>
<dependency org="commons-codec" name="commons-codec" rev="1.6" transitive="false"/>
<dependency org="commons-codec" name="commons-codec" rev="1.7" transitive="false"/>
<dependency org="commons-fileupload" name="commons-fileupload" rev="1.2.1" transitive="false"/>
<dependency org="commons-cli" name="commons-cli" rev="1.2" transitive="false"/>
<dependency org="commons-lang" name="commons-lang" rev="2.6" transitive="false"/>

View File

@ -1 +0,0 @@
b7f0fc8f61ecadeb3695f0b9464755eee44374d4

View File

@ -0,0 +1 @@
9cd61d269c88f9fb0eb36cea1efcd596ab74772f