LUCENE-3720: fix BeiderMorseFilter OOM issues

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1386662 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-09-17 15:32:46 +00:00
parent e359874a3c
commit 310eb39792
13 changed files with 12 additions and 16 deletions

View File

@ -103,7 +103,7 @@
<classpathentry kind="lib" path="lucene/test-framework/lib/junit-4.10.jar"/> <classpathentry kind="lib" path="lucene/test-framework/lib/junit-4.10.jar"/>
<classpathentry kind="lib" path="lucene/sandbox/lib/jakarta-regexp-1.4.jar"/> <classpathentry kind="lib" path="lucene/sandbox/lib/jakarta-regexp-1.4.jar"/>
<classpathentry kind="lib" path="lucene/analysis/icu/lib/icu4j-49.1.jar"/> <classpathentry kind="lib" path="lucene/analysis/icu/lib/icu4j-49.1.jar"/>
<classpathentry kind="lib" path="lucene/analysis/phonetic/lib/commons-codec-1.6.jar"/> <classpathentry kind="lib" path="lucene/analysis/phonetic/lib/commons-codec-1.7.jar"/>
<classpathentry kind="lib" path="lucene/analysis/morfologik/lib/morfologik-fsa-1.5.3.jar"/> <classpathentry kind="lib" path="lucene/analysis/morfologik/lib/morfologik-fsa-1.5.3.jar"/>
<classpathentry kind="lib" path="lucene/analysis/morfologik/lib/morfologik-polish-1.5.3.jar"/> <classpathentry kind="lib" path="lucene/analysis/morfologik/lib/morfologik-polish-1.5.3.jar"/>
<classpathentry kind="lib" path="lucene/analysis/morfologik/lib/morfologik-stemming-1.5.3.jar"/> <classpathentry kind="lib" path="lucene/analysis/morfologik/lib/morfologik-stemming-1.5.3.jar"/>

View File

@ -152,7 +152,7 @@
<dependency> <dependency>
<groupId>commons-codec</groupId> <groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId> <artifactId>commons-codec</artifactId>
<version>1.6</version> <version>1.7</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>commons-cli</groupId> <groupId>commons-cli</groupId>

View File

@ -164,6 +164,9 @@ Bug Fixes
RAM, instead of checking the estimated size of the segment RAM, instead of checking the estimated size of the segment
to decide whether to cache it. (Mike McCandless) to decide whether to cache it. (Mike McCandless)
* LUCENE-3720: fix memory-consumption issues with BeiderMorseFilter.
(Thomas Neidhart via Robert Muir)
Optimizations Optimizations
* LUCENE-4322: Decrease lucene-core JAR size. The core JAR size had increased a * LUCENE-4322: Decrease lucene-core JAR size. The core JAR size had increased a

View File

@ -31,7 +31,7 @@ lucene-analyzers-morfologik-XX.jar
lucene-analyzers-phonetic-XX.jar lucene-analyzers-phonetic-XX.jar
An add-on analysis library that provides phonetic encoders via Apache An add-on analysis library that provides phonetic encoders via Apache
Commons-Codec. Note: this module depends on the commons-codec jar Commons-Codec. Note: this module depends on the commons-codec jar
file (version >= 1.4) file
lucene-analyzers-smartcn-XX.jar lucene-analyzers-smartcn-XX.jar
An add-on analysis library that provides word segmentation for Simplified An add-on analysis library that provides word segmentation for Simplified

View File

@ -27,7 +27,7 @@
<path id="classpath"> <path id="classpath">
<pathelement path="${analyzers-common.jar}"/> <pathelement path="${analyzers-common.jar}"/>
<pathelement path="lib/commons-codec-1.6.jar"/> <pathelement path="lib/commons-codec-1.7.jar"/>
<path refid="base.classpath"/> <path refid="base.classpath"/>
</path> </path>

View File

@ -19,7 +19,7 @@
<ivy-module version="2.0"> <ivy-module version="2.0">
<info organisation="org.apache.lucene" module="analyzers-phonetic"/> <info organisation="org.apache.lucene" module="analyzers-phonetic"/>
<dependencies> <dependencies>
<dependency org="commons-codec" name="commons-codec" rev="1.6" transitive="false"/> <dependency org="commons-codec" name="commons-codec" rev="1.7" transitive="false"/>
<exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/> <exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/>
</dependencies> </dependencies>
</ivy-module> </ivy-module>

View File

@ -32,12 +32,6 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
/** /**
* TokenFilter for Beider-Morse phonetic encoding. * TokenFilter for Beider-Morse phonetic encoding.
* <p>
* <b><font color="red">
* WARNING: some inputs can cause extremely high RAM usage!
* https://issues.apache.org/jira/browse/CODEC-132
* </font></b>
* </p>
* @see BeiderMorseEncoder * @see BeiderMorseEncoder
* @lucene.experimental * @lucene.experimental
*/ */

View File

@ -89,7 +89,6 @@ public class TestBeiderMorseFilter extends BaseTokenStreamTestCase {
new int[] { 1 }); new int[] { 1 });
} }
@Ignore("broken: causes OOM on some strings (https://issues.apache.org/jira/browse/CODEC-132)")
public void testRandom() throws Exception { public void testRandom() throws Exception {
checkRandomData(random(), analyzer, 1000 * RANDOM_MULTIPLIER); checkRandomData(random(), analyzer, 1000 * RANDOM_MULTIPLIER);
} }

View File

@ -1 +0,0 @@
b7f0fc8f61ecadeb3695f0b9464755eee44374d4

View File

@ -0,0 +1 @@
9cd61d269c88f9fb0eb36cea1efcd596ab74772f

View File

@ -20,7 +20,7 @@
<info organisation="org.apache.solr" module="core"/> <info organisation="org.apache.solr" module="core"/>
<dependencies> <dependencies>
<dependency org="commons-codec" name="commons-codec" rev="1.6" transitive="false"/> <dependency org="commons-codec" name="commons-codec" rev="1.7" transitive="false"/>
<dependency org="commons-fileupload" name="commons-fileupload" rev="1.2.1" transitive="false"/> <dependency org="commons-fileupload" name="commons-fileupload" rev="1.2.1" transitive="false"/>
<dependency org="commons-cli" name="commons-cli" rev="1.2" transitive="false"/> <dependency org="commons-cli" name="commons-cli" rev="1.2" transitive="false"/>
<dependency org="commons-lang" name="commons-lang" rev="2.6" transitive="false"/> <dependency org="commons-lang" name="commons-lang" rev="2.6" transitive="false"/>

View File

@ -1 +0,0 @@
b7f0fc8f61ecadeb3695f0b9464755eee44374d4

View File

@ -0,0 +1 @@
9cd61d269c88f9fb0eb36cea1efcd596ab74772f