LUCENE-2001: Fix parsing bug in wordnet contrib

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@828091 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2009-10-21 16:32:03 +00:00
parent 56f83862cf
commit afc66e4e66
6 changed files with 115 additions and 2 deletions

View File

@ -38,6 +38,9 @@ Bug fixes
* LUCENE-1953: FastVectorHighlighter: small fragCharSize can cause
StringIndexOutOfBoundsException. (Koji Sekiguchi)
* LUCENE-2001: Wordnet Syns2Index incorrectly parses synonyms that
contain a single quote. (Parag H. Dave via Robert Muir)
New features
* LUCENE-1924: Added BalancedSegmentMergePolicy to contrib/misc,

View File

@ -45,6 +45,14 @@ public class TestSynonymTokenFilter extends BaseTokenStreamTestCase {
new int[] { 1, 1, 1, 1, 0, 0 });
}
public void testSynonymsSingleQuote() throws Exception {
SynonymMap map = new SynonymMap(new FileInputStream(testFile));
/* all expansions */
Analyzer analyzer = new SynonymWhitespaceAnalyzer(map, Integer.MAX_VALUE);
assertAnalyzesTo(analyzer, "king",
new String[] { "king", "baron" });
}
public void testSynonymsLimitedAmount() throws Exception {
SynonymMap map = new SynonymMap(new FileInputStream(testFile));
/* limit to one synonym expansion */

View File

@ -3,3 +3,7 @@ s(100000001,2,'wood',n,1,0).
s(100000001,3,'forest',n,1,0).
s(100000002,1,'wolfish',n,1,0).
s(100000002,2,'ravenous',n,1,0).
s(100000003,1,'king',n,1,1).
s(100000003,2,'baron',n,1,1).
s(100000004,1,'king''sevil',n,1,1).
s(100000004,2,'meany',n,1,1).

View File

@ -165,8 +165,8 @@ public class Syns2Index
String num = line.substring(0, comma);
int q1 = line.indexOf('\'');
line = line.substring(q1 + 1);
int q2 = line.indexOf('\'');
String word = line.substring(0, q2).toLowerCase();
int q2 = line.lastIndexOf('\'');
String word = line.substring(0, q2).toLowerCase().replace("''", "'");
// make sure is a normal word
if (! isDecent(word))

View File

@ -0,0 +1,89 @@
package org.apache.lucene.wordnet;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.LuceneTestCase;
public class TestWordnet extends LuceneTestCase {
private Searcher searcher;
File dataDir = new File(System.getProperty("dataDir", "./bin"));
File testFile = new File(dataDir, "org/apache/lucene/wordnet/testSynonyms.txt");
String storePathName =
new File(System.getProperty("tempDir"),"testLuceneWordnet").getAbsolutePath();
@Override
protected void setUp() throws Exception {
super.setUp();
// create a temporary synonym index
String commandLineArgs[] = { testFile.getAbsolutePath(), storePathName };
try {
Syns2Index.main(commandLineArgs);
} catch (Throwable t) { throw new RuntimeException(t); }
searcher = new IndexSearcher(FSDirectory.open(new File(storePathName)), true);
}
public void testExpansion() throws IOException {
assertExpandsTo("woods", new String[] { "woods", "forest", "wood" });
}
public void testExpansionSingleQuote() throws IOException {
assertExpandsTo("king", new String[] { "king", "baron" });
}
private void assertExpandsTo(String term, String expected[]) throws IOException {
Query expandedQuery = SynExpand.expand(term, searcher, new
WhitespaceAnalyzer(), "field", 1F);
BooleanQuery expectedQuery = new BooleanQuery();
for (String t : expected)
expectedQuery.add(new TermQuery(new Term("field", t)),
BooleanClause.Occur.SHOULD);
assertEquals(expectedQuery, expandedQuery);
}
@Override
protected void tearDown() throws Exception {
searcher.close();
rmDir(storePathName); // delete our temporary synonym index
super.tearDown();
}
private void rmDir(String directory) {
File dir = new File(directory);
File[] files = dir.listFiles();
for (int i = 0; i < files.length; i++) {
files[i].delete();
}
dir.delete();
}
}

View File

@ -0,0 +1,9 @@
s(100000001,1,'woods',n,1,0).
s(100000001,2,'wood',n,1,0).
s(100000001,3,'forest',n,1,0).
s(100000002,1,'wolfish',n,1,0).
s(100000002,2,'ravenous',n,1,0).
s(100000003,1,'king',n,1,1).
s(100000003,2,'baron',n,1,1).
s(100000004,1,'king''sevil',n,1,1).
s(100000004,2,'meany',n,1,1).