mirror of https://github.com/apache/lucene.git
LUCENE-2001: Fix parsing bug in wordnet contrib
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@828091 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
56f83862cf
commit
afc66e4e66
|
@ -38,6 +38,9 @@ Bug fixes
|
|||
* LUCENE-1953: FastVectorHighlighter: small fragCharSize can cause
|
||||
StringIndexOutOfBoundsException. (Koji Sekiguchi)
|
||||
|
||||
* LUCENE-2001: Wordnet Syns2Index incorrectly parses synonyms that
|
||||
contain a single quote. (Parag H. Dave via Robert Muir)
|
||||
|
||||
New features
|
||||
|
||||
* LUCENE-1924: Added BalancedSegmentMergePolicy to contrib/misc,
|
||||
|
|
|
@ -45,6 +45,14 @@ public class TestSynonymTokenFilter extends BaseTokenStreamTestCase {
|
|||
new int[] { 1, 1, 1, 1, 0, 0 });
|
||||
}
|
||||
|
||||
public void testSynonymsSingleQuote() throws Exception {
|
||||
SynonymMap map = new SynonymMap(new FileInputStream(testFile));
|
||||
/* all expansions */
|
||||
Analyzer analyzer = new SynonymWhitespaceAnalyzer(map, Integer.MAX_VALUE);
|
||||
assertAnalyzesTo(analyzer, "king",
|
||||
new String[] { "king", "baron" });
|
||||
}
|
||||
|
||||
public void testSynonymsLimitedAmount() throws Exception {
|
||||
SynonymMap map = new SynonymMap(new FileInputStream(testFile));
|
||||
/* limit to one synonym expansion */
|
||||
|
|
|
@ -3,3 +3,7 @@ s(100000001,2,'wood',n,1,0).
|
|||
s(100000001,3,'forest',n,1,0).
|
||||
s(100000002,1,'wolfish',n,1,0).
|
||||
s(100000002,2,'ravenous',n,1,0).
|
||||
s(100000003,1,'king',n,1,1).
|
||||
s(100000003,2,'baron',n,1,1).
|
||||
s(100000004,1,'king''sevil',n,1,1).
|
||||
s(100000004,2,'meany',n,1,1).
|
||||
|
|
|
@ -165,8 +165,8 @@ public class Syns2Index
|
|||
String num = line.substring(0, comma);
|
||||
int q1 = line.indexOf('\'');
|
||||
line = line.substring(q1 + 1);
|
||||
int q2 = line.indexOf('\'');
|
||||
String word = line.substring(0, q2).toLowerCase();
|
||||
int q2 = line.lastIndexOf('\'');
|
||||
String word = line.substring(0, q2).toLowerCase().replace("''", "'");
|
||||
|
||||
// make sure is a normal word
|
||||
if (! isDecent(word))
|
||||
|
|
|
@ -0,0 +1,89 @@
|
|||
package org.apache.lucene.wordnet;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Searcher;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestWordnet extends LuceneTestCase {
|
||||
private Searcher searcher;
|
||||
|
||||
File dataDir = new File(System.getProperty("dataDir", "./bin"));
|
||||
File testFile = new File(dataDir, "org/apache/lucene/wordnet/testSynonyms.txt");
|
||||
|
||||
String storePathName =
|
||||
new File(System.getProperty("tempDir"),"testLuceneWordnet").getAbsolutePath();
|
||||
|
||||
@Override
|
||||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
// create a temporary synonym index
|
||||
String commandLineArgs[] = { testFile.getAbsolutePath(), storePathName };
|
||||
|
||||
try {
|
||||
Syns2Index.main(commandLineArgs);
|
||||
} catch (Throwable t) { throw new RuntimeException(t); }
|
||||
|
||||
searcher = new IndexSearcher(FSDirectory.open(new File(storePathName)), true);
|
||||
}
|
||||
|
||||
public void testExpansion() throws IOException {
|
||||
assertExpandsTo("woods", new String[] { "woods", "forest", "wood" });
|
||||
}
|
||||
|
||||
public void testExpansionSingleQuote() throws IOException {
|
||||
assertExpandsTo("king", new String[] { "king", "baron" });
|
||||
}
|
||||
|
||||
private void assertExpandsTo(String term, String expected[]) throws IOException {
|
||||
Query expandedQuery = SynExpand.expand(term, searcher, new
|
||||
WhitespaceAnalyzer(), "field", 1F);
|
||||
BooleanQuery expectedQuery = new BooleanQuery();
|
||||
for (String t : expected)
|
||||
expectedQuery.add(new TermQuery(new Term("field", t)),
|
||||
BooleanClause.Occur.SHOULD);
|
||||
assertEquals(expectedQuery, expandedQuery);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void tearDown() throws Exception {
|
||||
searcher.close();
|
||||
rmDir(storePathName); // delete our temporary synonym index
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
private void rmDir(String directory) {
|
||||
File dir = new File(directory);
|
||||
File[] files = dir.listFiles();
|
||||
for (int i = 0; i < files.length; i++) {
|
||||
files[i].delete();
|
||||
}
|
||||
dir.delete();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
s(100000001,1,'woods',n,1,0).
|
||||
s(100000001,2,'wood',n,1,0).
|
||||
s(100000001,3,'forest',n,1,0).
|
||||
s(100000002,1,'wolfish',n,1,0).
|
||||
s(100000002,2,'ravenous',n,1,0).
|
||||
s(100000003,1,'king',n,1,1).
|
||||
s(100000003,2,'baron',n,1,1).
|
||||
s(100000004,1,'king''sevil',n,1,1).
|
||||
s(100000004,2,'meany',n,1,1).
|
Loading…
Reference in New Issue