From a265a8ab9c12af888dd7acd0c38f745230f5d847 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Tue, 22 Jul 2014 03:50:05 +0000 Subject: [PATCH] LUCENE-5840: fix parsing of zero-affixes with continuation classes git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1612452 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/analysis/hunspell/Dictionary.java | 7 +++- .../analysis/hunspell/TestZeroAffix2.java | 32 +++++++++++++++++++ .../lucene/analysis/hunspell/zeroaffix2.aff | 6 ++++ .../lucene/analysis/hunspell/zeroaffix2.dic | 2 ++ 4 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestZeroAffix2.java create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/zeroaffix2.aff create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/zeroaffix2.dic diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java index f8308f0ad68..1adcb2ac5f4 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java @@ -474,9 +474,10 @@ public class Dictionary { char flag = flagParsingStrategy.parseFlag(ruleArgs[1]); String strip = ruleArgs[2].equals("0") ? "" : ruleArgs[2]; - String affixArg = ruleArgs[3].equals("0") ? "" : ruleArgs[3]; + String affixArg = ruleArgs[3]; char appendFlags[] = null; + // first: parse continuation classes out of affix int flagSep = affixArg.lastIndexOf('/'); if (flagSep != -1) { String flagPart = affixArg.substring(flagSep + 1); @@ -490,6 +491,10 @@ public class Dictionary { Arrays.sort(appendFlags); twoStageAffix = true; } + // zero affix -> empty string + if ("0".equals(affixArg)) { + affixArg = ""; + } String condition = ruleArgs.length > 4 ? ruleArgs[4] : "."; // at least the gascon affix file has this issue diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestZeroAffix2.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestZeroAffix2.java new file mode 100644 index 00000000000..1fcc0c07d2f --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestZeroAffix2.java @@ -0,0 +1,32 @@ +package org.apache.lucene.analysis.hunspell; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.junit.BeforeClass; + +public class TestZeroAffix2 extends StemmerTestBase { + + @BeforeClass + public static void beforeClass() throws Exception { + init("zeroaffix2.aff", "zeroaffix2.dic"); + } + + public void testStemming() { + assertStemsTo("b", "beer"); + } +} diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/zeroaffix2.aff b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/zeroaffix2.aff new file mode 100644 index 00000000000..72e273f53f1 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/zeroaffix2.aff @@ -0,0 +1,6 @@ +SET UTF-8 +FLAG num + +SFX 322 Y 1 +SFX 322 eer 0/100 . + diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/zeroaffix2.dic b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/zeroaffix2.dic new file mode 100644 index 00000000000..41715640712 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/zeroaffix2.dic @@ -0,0 +1,2 @@ +1 +beer/322