mirror of https://github.com/apache/lucene.git
LUCENE-2001: Fix parsing bug in wordnet contrib
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@828091 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
56f83862cf
commit
afc66e4e66
|
@ -38,6 +38,9 @@ Bug fixes
|
||||||
* LUCENE-1953: FastVectorHighlighter: small fragCharSize can cause
|
* LUCENE-1953: FastVectorHighlighter: small fragCharSize can cause
|
||||||
StringIndexOutOfBoundsException. (Koji Sekiguchi)
|
StringIndexOutOfBoundsException. (Koji Sekiguchi)
|
||||||
|
|
||||||
|
* LUCENE-2001: Wordnet Syns2Index incorrectly parses synonyms that
|
||||||
|
contain a single quote. (Parag H. Dave via Robert Muir)
|
||||||
|
|
||||||
New features
|
New features
|
||||||
|
|
||||||
* LUCENE-1924: Added BalancedSegmentMergePolicy to contrib/misc,
|
* LUCENE-1924: Added BalancedSegmentMergePolicy to contrib/misc,
|
||||||
|
|
|
@ -45,6 +45,14 @@ public class TestSynonymTokenFilter extends BaseTokenStreamTestCase {
|
||||||
new int[] { 1, 1, 1, 1, 0, 0 });
|
new int[] { 1, 1, 1, 1, 0, 0 });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testSynonymsSingleQuote() throws Exception {
|
||||||
|
SynonymMap map = new SynonymMap(new FileInputStream(testFile));
|
||||||
|
/* all expansions */
|
||||||
|
Analyzer analyzer = new SynonymWhitespaceAnalyzer(map, Integer.MAX_VALUE);
|
||||||
|
assertAnalyzesTo(analyzer, "king",
|
||||||
|
new String[] { "king", "baron" });
|
||||||
|
}
|
||||||
|
|
||||||
public void testSynonymsLimitedAmount() throws Exception {
|
public void testSynonymsLimitedAmount() throws Exception {
|
||||||
SynonymMap map = new SynonymMap(new FileInputStream(testFile));
|
SynonymMap map = new SynonymMap(new FileInputStream(testFile));
|
||||||
/* limit to one synonym expansion */
|
/* limit to one synonym expansion */
|
||||||
|
|
|
@ -3,3 +3,7 @@ s(100000001,2,'wood',n,1,0).
|
||||||
s(100000001,3,'forest',n,1,0).
|
s(100000001,3,'forest',n,1,0).
|
||||||
s(100000002,1,'wolfish',n,1,0).
|
s(100000002,1,'wolfish',n,1,0).
|
||||||
s(100000002,2,'ravenous',n,1,0).
|
s(100000002,2,'ravenous',n,1,0).
|
||||||
|
s(100000003,1,'king',n,1,1).
|
||||||
|
s(100000003,2,'baron',n,1,1).
|
||||||
|
s(100000004,1,'king''sevil',n,1,1).
|
||||||
|
s(100000004,2,'meany',n,1,1).
|
||||||
|
|
|
@ -165,8 +165,8 @@ public class Syns2Index
|
||||||
String num = line.substring(0, comma);
|
String num = line.substring(0, comma);
|
||||||
int q1 = line.indexOf('\'');
|
int q1 = line.indexOf('\'');
|
||||||
line = line.substring(q1 + 1);
|
line = line.substring(q1 + 1);
|
||||||
int q2 = line.indexOf('\'');
|
int q2 = line.lastIndexOf('\'');
|
||||||
String word = line.substring(0, q2).toLowerCase();
|
String word = line.substring(0, q2).toLowerCase().replace("''", "'");
|
||||||
|
|
||||||
// make sure is a normal word
|
// make sure is a normal word
|
||||||
if (! isDecent(word))
|
if (! isDecent(word))
|
||||||
|
|
|
@ -0,0 +1,89 @@
|
||||||
|
package org.apache.lucene.wordnet;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.search.BooleanClause;
|
||||||
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.Searcher;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
import org.apache.lucene.store.FSDirectory;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
|
public class TestWordnet extends LuceneTestCase {
|
||||||
|
private Searcher searcher;
|
||||||
|
|
||||||
|
File dataDir = new File(System.getProperty("dataDir", "./bin"));
|
||||||
|
File testFile = new File(dataDir, "org/apache/lucene/wordnet/testSynonyms.txt");
|
||||||
|
|
||||||
|
String storePathName =
|
||||||
|
new File(System.getProperty("tempDir"),"testLuceneWordnet").getAbsolutePath();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void setUp() throws Exception {
|
||||||
|
super.setUp();
|
||||||
|
// create a temporary synonym index
|
||||||
|
String commandLineArgs[] = { testFile.getAbsolutePath(), storePathName };
|
||||||
|
|
||||||
|
try {
|
||||||
|
Syns2Index.main(commandLineArgs);
|
||||||
|
} catch (Throwable t) { throw new RuntimeException(t); }
|
||||||
|
|
||||||
|
searcher = new IndexSearcher(FSDirectory.open(new File(storePathName)), true);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testExpansion() throws IOException {
|
||||||
|
assertExpandsTo("woods", new String[] { "woods", "forest", "wood" });
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testExpansionSingleQuote() throws IOException {
|
||||||
|
assertExpandsTo("king", new String[] { "king", "baron" });
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertExpandsTo(String term, String expected[]) throws IOException {
|
||||||
|
Query expandedQuery = SynExpand.expand(term, searcher, new
|
||||||
|
WhitespaceAnalyzer(), "field", 1F);
|
||||||
|
BooleanQuery expectedQuery = new BooleanQuery();
|
||||||
|
for (String t : expected)
|
||||||
|
expectedQuery.add(new TermQuery(new Term("field", t)),
|
||||||
|
BooleanClause.Occur.SHOULD);
|
||||||
|
assertEquals(expectedQuery, expandedQuery);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void tearDown() throws Exception {
|
||||||
|
searcher.close();
|
||||||
|
rmDir(storePathName); // delete our temporary synonym index
|
||||||
|
super.tearDown();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void rmDir(String directory) {
|
||||||
|
File dir = new File(directory);
|
||||||
|
File[] files = dir.listFiles();
|
||||||
|
for (int i = 0; i < files.length; i++) {
|
||||||
|
files[i].delete();
|
||||||
|
}
|
||||||
|
dir.delete();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,9 @@
|
||||||
|
s(100000001,1,'woods',n,1,0).
|
||||||
|
s(100000001,2,'wood',n,1,0).
|
||||||
|
s(100000001,3,'forest',n,1,0).
|
||||||
|
s(100000002,1,'wolfish',n,1,0).
|
||||||
|
s(100000002,2,'ravenous',n,1,0).
|
||||||
|
s(100000003,1,'king',n,1,1).
|
||||||
|
s(100000003,2,'baron',n,1,1).
|
||||||
|
s(100000004,1,'king''sevil',n,1,1).
|
||||||
|
s(100000004,2,'meany',n,1,1).
|
Loading…
Reference in New Issue