mirror of https://github.com/apache/lucene.git
LUCENE-1904: Move wordnet synonym code from contrib/memory to contrib/wordnet
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@830699 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f2d0d94d7e
commit
36b65637fc
|
@ -89,6 +89,9 @@ Documentation
|
||||||
|
|
||||||
Build
|
Build
|
||||||
|
|
||||||
|
* LUCENE-1904: Moved wordnet-based synonym support from contrib/memory
|
||||||
|
into contrib/wordnet. (Robert Muir)
|
||||||
|
|
||||||
Test Cases
|
Test Cases
|
||||||
======================= Release 2.9.0 2009-09-23 =======================
|
======================= Release 2.9.0 2009-09-23 =======================
|
||||||
|
|
||||||
|
|
|
@ -1,9 +0,0 @@
|
||||||
s(100000001,1,'woods',n,1,0).
|
|
||||||
s(100000001,2,'wood',n,1,0).
|
|
||||||
s(100000001,3,'forest',n,1,0).
|
|
||||||
s(100000002,1,'wolfish',n,1,0).
|
|
||||||
s(100000002,2,'ravenous',n,1,0).
|
|
||||||
s(100000003,1,'king',n,1,1).
|
|
||||||
s(100000003,2,'baron',n,1,1).
|
|
||||||
s(100000004,1,'king''sevil',n,1,1).
|
|
||||||
s(100000004,2,'meany',n,1,1).
|
|
|
@ -1,4 +1,4 @@
|
||||||
package org.apache.lucene.index.memory;
|
package org.apache.lucene.wordnet;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
@ -97,7 +97,7 @@ public final class SynExpand {
|
||||||
*
|
*
|
||||||
* @param a optional analyzer used to parse the users query else {@link StandardAnalyzer} is used
|
* @param a optional analyzer used to parse the users query else {@link StandardAnalyzer} is used
|
||||||
*
|
*
|
||||||
* @param field optional field name to search in or null if you want the default of "contents"
|
* @param f optional field name to search in or null if you want the default of "contents"
|
||||||
*
|
*
|
||||||
* @param boost optional boost applied to synonyms else no boost is applied
|
* @param boost optional boost applied to synonyms else no boost is applied
|
||||||
*
|
*
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
package org.apache.lucene.index.memory;
|
package org.apache.lucene.wordnet;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
@ -1,4 +1,4 @@
|
||||||
package org.apache.lucene.index.memory;
|
package org.apache.lucene.wordnet;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
@ -21,8 +21,14 @@
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
|
|
||||||
This package uses synonyms defined by <a href="http://www.cogsci.princeton.edu/~wn/">WordNet</a> to build a
|
This package uses synonyms defined by <a href="http://www.cogsci.princeton.edu/~wn/">WordNet</a>.
|
||||||
Lucene index storing them, which in turn can be used for query expansion.
|
There are two methods: query expansion and analysis.
|
||||||
|
|
||||||
|
Both methods first require you to download the <a href="http://www.cogsci.princeton.edu/2.0/WNprolog-2.0.tar.gz">WordNet prolog database</a>
|
||||||
|
Inside this archive is a file named wn_s.pl, which contains the WordNet synonyms.
|
||||||
|
|
||||||
|
<h1>Query Expansion Method</h1>
|
||||||
|
This method creates Lucene index storing the synonyms, which in turn can be used for query expansion.
|
||||||
|
|
||||||
You normally run {@link org.apache.lucene.wordnet.Syns2Index} once to build the query index/"database", and then call
|
You normally run {@link org.apache.lucene.wordnet.Syns2Index} once to build the query index/"database", and then call
|
||||||
{@link org.apache.lucene.wordnet.SynExpand#expand SynExpand.expand(...)} to expand a query.
|
{@link org.apache.lucene.wordnet.SynExpand#expand SynExpand.expand(...)} to expand a query.
|
||||||
|
@ -31,12 +37,21 @@
|
||||||
|
|
||||||
<h3> Instructions </h3>
|
<h3> Instructions </h3>
|
||||||
<ol>
|
<ol>
|
||||||
<li> Download the <a href="http://www.cogsci.princeton.edu/2.0/WNprolog-2.0.tar.gz">WordNet prolog database</a> , gunzip, untar etc.
|
|
||||||
<li> Invoke Syn2Index as appropriate to build a synonym index.
|
<li> Invoke Syn2Index as appropriate to build a synonym index.
|
||||||
It'll take 2 arguments, the path to wn_s.pl from that WordNet download, and the index name.
|
It'll take 2 arguments, the path to wn_s.pl from the WordNet download, and the index name.
|
||||||
|
|
||||||
<li> Update your UI so that as appropriate you call SynExpand.expand(...) to expand user queries with synonyms.
|
<li> Update your UI so that as appropriate you call SynExpand.expand(...) to expand user queries with synonyms.
|
||||||
</ol>
|
</ol>
|
||||||
|
|
||||||
|
<h1>Analysis Method</h1>
|
||||||
|
This method injects additional synonym tokens for tokens from a child {@link org.apache.lucene.analysis.TokenStream}.
|
||||||
|
|
||||||
|
<h3> Instructions </h3>
|
||||||
|
<ol>
|
||||||
|
<li>Create a {@link org.apache.lucene.wordnet.SynonymMap}, passing in the path to wn_s.pl
|
||||||
|
<li>Add a {@link org.apache.lucene.wordnet.SynonymTokenFilter} to your analyzer. Note: SynonymTokenFilter should be after LowerCaseFilter,
|
||||||
|
because it expects terms to already be in lowercase.
|
||||||
|
</ol>
|
||||||
|
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
|
@ -1,4 +1,4 @@
|
||||||
package org.apache.lucene.index.memory;
|
package org.apache.lucene.wordnet;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
@ -32,7 +32,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
|
|
||||||
public class TestSynonymTokenFilter extends BaseTokenStreamTestCase {
|
public class TestSynonymTokenFilter extends BaseTokenStreamTestCase {
|
||||||
File dataDir = new File(System.getProperty("dataDir", "./bin"));
|
File dataDir = new File(System.getProperty("dataDir", "./bin"));
|
||||||
File testFile = new File(dataDir, "org/apache/lucene/index/memory/testSynonyms.txt");
|
File testFile = new File(dataDir, "org/apache/lucene/wordnet/testSynonyms.txt");
|
||||||
|
|
||||||
public void testSynonyms() throws Exception {
|
public void testSynonyms() throws Exception {
|
||||||
SynonymMap map = new SynonymMap(new FileInputStream(testFile));
|
SynonymMap map = new SynonymMap(new FileInputStream(testFile));
|
Loading…
Reference in New Issue