mirror of https://github.com/apache/lucene.git
LUCENE-5777: fix double escaping of dash in hunspell conditions
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1603851 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
79351129ef
commit
1e881bf793
|
@ -343,7 +343,7 @@ Bug fixes
|
|||
* LUCENE-5747: Project-specific settings for the eclipse development
|
||||
environment will prevent automatic code reformatting. (Shawn Heisey)
|
||||
|
||||
* LUCENE-5768: Hunspell condition checks containing character classes
|
||||
* LUCENE-5768, LUCENE-5777: Hunspell condition checks containing character classes
|
||||
were buggy. (Clinton Gormley, Robert Muir)
|
||||
|
||||
Test Framework
|
||||
|
|
|
@ -356,6 +356,25 @@ public class Dictionary {
|
|||
}
|
||||
return builder.finish();
|
||||
}
|
||||
|
||||
static String escapeDash(String re) {
|
||||
// we have to be careful, even though dash doesn't have a special meaning,
|
||||
// some dictionaries already escape it (e.g. pt_PT), so we don't want to nullify it
|
||||
StringBuilder escaped = new StringBuilder();
|
||||
for (int i = 0; i < re.length(); i++) {
|
||||
char c = re.charAt(i);
|
||||
if (c == '-') {
|
||||
escaped.append("\\-");
|
||||
} else {
|
||||
escaped.append(c);
|
||||
if (c == '\\' && i + 1 < re.length()) {
|
||||
escaped.append(re.charAt(i+1));
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
return escaped.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a specific affix rule putting the result into the provided affix map
|
||||
|
@ -425,7 +444,7 @@ public class Dictionary {
|
|||
}
|
||||
// "dash hasn't got special meaning" (we must escape it)
|
||||
if (condition.indexOf('-') >= 0) {
|
||||
condition = condition.replace("-", "\\-");
|
||||
condition = escapeDash(condition);
|
||||
}
|
||||
|
||||
final String regex;
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
package org.apache.lucene.analysis.hunspell;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
public class TestDoubleEscape extends StemmerTestBase {
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
init("double-escaped.aff", "double-escaped.dic");
|
||||
}
|
||||
|
||||
public void testStemming() {
|
||||
assertStemsTo("adubo", "adubar");
|
||||
}
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
SET UTF-8
|
||||
|
||||
SFX X Y 1
|
||||
SFX X ar o [^\-]ar
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
1
|
||||
adubar/X
|
Loading…
Reference in New Issue