From 36b65637fcb2614619a7930159477364e57b3738 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Wed, 28 Oct 2009 17:49:53 +0000 Subject: [PATCH] LUCENE-1904: Move wordnet synonym code from contrib/memory to contrib/wordnet git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@830699 13f79535-47bb-0310-9956-ffa450edef68 --- contrib/CHANGES.txt | 3 +++ .../lucene/index/memory/testSynonyms.txt | 9 -------- .../apache/lucene/wordnet}/AnalyzerUtil.java | 2 +- .../org/apache/lucene/wordnet/SynExpand.java | 2 +- .../apache/lucene/wordnet}/SynonymMap.java | 2 +- .../lucene/wordnet}/SynonymTokenFilter.java | 2 +- .../org/apache/lucene/wordnet/package.html | 23 +++++++++++++++---- .../wordnet}/TestSynonymTokenFilter.java | 4 ++-- 8 files changed, 28 insertions(+), 19 deletions(-) delete mode 100644 contrib/memory/src/test/org/apache/lucene/index/memory/testSynonyms.txt rename contrib/{memory/src/java/org/apache/lucene/index/memory => wordnet/src/java/org/apache/lucene/wordnet}/AnalyzerUtil.java (99%) rename contrib/{memory/src/java/org/apache/lucene/index/memory => wordnet/src/java/org/apache/lucene/wordnet}/SynonymMap.java (99%) rename contrib/{memory/src/java/org/apache/lucene/index/memory => wordnet/src/java/org/apache/lucene/wordnet}/SynonymTokenFilter.java (99%) rename contrib/{memory/src/test/org/apache/lucene/index/memory => wordnet/src/test/org/apache/lucene/wordnet}/TestSynonymTokenFilter.java (97%) diff --git a/contrib/CHANGES.txt b/contrib/CHANGES.txt index d0ad850114e..3474a3e7feb 100644 --- a/contrib/CHANGES.txt +++ b/contrib/CHANGES.txt @@ -89,6 +89,9 @@ Documentation Build + * LUCENE-1904: Moved wordnet-based synonym support from contrib/memory + into contrib/wordnet. (Robert Muir) + Test Cases ======================= Release 2.9.0 2009-09-23 ======================= diff --git a/contrib/memory/src/test/org/apache/lucene/index/memory/testSynonyms.txt b/contrib/memory/src/test/org/apache/lucene/index/memory/testSynonyms.txt deleted file mode 100644 index 822bc96858c..00000000000 --- a/contrib/memory/src/test/org/apache/lucene/index/memory/testSynonyms.txt +++ /dev/null @@ -1,9 +0,0 @@ -s(100000001,1,'woods',n,1,0). -s(100000001,2,'wood',n,1,0). -s(100000001,3,'forest',n,1,0). -s(100000002,1,'wolfish',n,1,0). -s(100000002,2,'ravenous',n,1,0). -s(100000003,1,'king',n,1,1). -s(100000003,2,'baron',n,1,1). -s(100000004,1,'king''sevil',n,1,1). -s(100000004,2,'meany',n,1,1). diff --git a/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java b/contrib/wordnet/src/java/org/apache/lucene/wordnet/AnalyzerUtil.java similarity index 99% rename from contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java rename to contrib/wordnet/src/java/org/apache/lucene/wordnet/AnalyzerUtil.java index d3a58104b85..0132bb6f8d3 100644 --- a/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java +++ b/contrib/wordnet/src/java/org/apache/lucene/wordnet/AnalyzerUtil.java @@ -1,4 +1,4 @@ -package org.apache.lucene.index.memory; +package org.apache.lucene.wordnet; /** * Licensed to the Apache Software Foundation (ASF) under one or more diff --git a/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java b/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java index b57ff10983c..e4d4bcb068c 100755 --- a/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java +++ b/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java @@ -97,7 +97,7 @@ public final class SynExpand { * * @param a optional analyzer used to parse the users query else {@link StandardAnalyzer} is used * - * @param field optional field name to search in or null if you want the default of "contents" + * @param f optional field name to search in or null if you want the default of "contents" * * @param boost optional boost applied to synonyms else no boost is applied * diff --git a/contrib/memory/src/java/org/apache/lucene/index/memory/SynonymMap.java b/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynonymMap.java similarity index 99% rename from contrib/memory/src/java/org/apache/lucene/index/memory/SynonymMap.java rename to contrib/wordnet/src/java/org/apache/lucene/wordnet/SynonymMap.java index d5dfdb2d5f3..31a698ca9cb 100644 --- a/contrib/memory/src/java/org/apache/lucene/index/memory/SynonymMap.java +++ b/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynonymMap.java @@ -1,4 +1,4 @@ -package org.apache.lucene.index.memory; +package org.apache.lucene.wordnet; /** * Licensed to the Apache Software Foundation (ASF) under one or more diff --git a/contrib/memory/src/java/org/apache/lucene/index/memory/SynonymTokenFilter.java b/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynonymTokenFilter.java similarity index 99% rename from contrib/memory/src/java/org/apache/lucene/index/memory/SynonymTokenFilter.java rename to contrib/wordnet/src/java/org/apache/lucene/wordnet/SynonymTokenFilter.java index 1b84101b8c5..cc26c3ed4f4 100644 --- a/contrib/memory/src/java/org/apache/lucene/index/memory/SynonymTokenFilter.java +++ b/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynonymTokenFilter.java @@ -1,4 +1,4 @@ -package org.apache.lucene.index.memory; +package org.apache.lucene.wordnet; /** * Licensed to the Apache Software Foundation (ASF) under one or more diff --git a/contrib/wordnet/src/java/org/apache/lucene/wordnet/package.html b/contrib/wordnet/src/java/org/apache/lucene/wordnet/package.html index 631fb900f0f..19c5b579ba4 100755 --- a/contrib/wordnet/src/java/org/apache/lucene/wordnet/package.html +++ b/contrib/wordnet/src/java/org/apache/lucene/wordnet/package.html @@ -21,8 +21,14 @@ - This package uses synonyms defined by WordNet to build a - Lucene index storing them, which in turn can be used for query expansion. + This package uses synonyms defined by WordNet. + There are two methods: query expansion and analysis. + + Both methods first require you to download the WordNet prolog database + Inside this archive is a file named wn_s.pl, which contains the WordNet synonyms. + +

Query Expansion Method

+ This method creates Lucene index storing the synonyms, which in turn can be used for query expansion. You normally run {@link org.apache.lucene.wordnet.Syns2Index} once to build the query index/"database", and then call {@link org.apache.lucene.wordnet.SynExpand#expand SynExpand.expand(...)} to expand a query. @@ -31,12 +37,21 @@

Instructions

    -
  1. Download the WordNet prolog database , gunzip, untar etc.
  2. Invoke Syn2Index as appropriate to build a synonym index. - It'll take 2 arguments, the path to wn_s.pl from that WordNet download, and the index name. + It'll take 2 arguments, the path to wn_s.pl from the WordNet download, and the index name.
  3. Update your UI so that as appropriate you call SynExpand.expand(...) to expand user queries with synonyms.
+ +

Analysis Method

+ This method injects additional synonym tokens for tokens from a child {@link org.apache.lucene.analysis.TokenStream}. + +

Instructions

+
    +
  1. Create a {@link org.apache.lucene.wordnet.SynonymMap}, passing in the path to wn_s.pl +
  2. Add a {@link org.apache.lucene.wordnet.SynonymTokenFilter} to your analyzer. Note: SynonymTokenFilter should be after LowerCaseFilter, + because it expects terms to already be in lowercase. +
\ No newline at end of file diff --git a/contrib/memory/src/test/org/apache/lucene/index/memory/TestSynonymTokenFilter.java b/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestSynonymTokenFilter.java similarity index 97% rename from contrib/memory/src/test/org/apache/lucene/index/memory/TestSynonymTokenFilter.java rename to contrib/wordnet/src/test/org/apache/lucene/wordnet/TestSynonymTokenFilter.java index de57b62f796..ccbab5c1b26 100644 --- a/contrib/memory/src/test/org/apache/lucene/index/memory/TestSynonymTokenFilter.java +++ b/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestSynonymTokenFilter.java @@ -1,4 +1,4 @@ -package org.apache.lucene.index.memory; +package org.apache.lucene.wordnet; /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -32,7 +32,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase; public class TestSynonymTokenFilter extends BaseTokenStreamTestCase { File dataDir = new File(System.getProperty("dataDir", "./bin")); - File testFile = new File(dataDir, "org/apache/lucene/index/memory/testSynonyms.txt"); + File testFile = new File(dataDir, "org/apache/lucene/wordnet/testSynonyms.txt"); public void testSynonyms() throws Exception { SynonymMap map = new SynonymMap(new FileInputStream(testFile));