diff --git a/lucene/contrib/CHANGES.txt b/lucene/contrib/CHANGES.txt index 5cd189b37a2..1ce2f0b2ec9 100644 --- a/lucene/contrib/CHANGES.txt +++ b/lucene/contrib/CHANGES.txt @@ -4,6 +4,11 @@ For more information on past and future Lucene versions, please see: http://s.apache.org/luceneversions ======================= Trunk (not yet released) ======================= + +Changes in runtime behavior + + * LUCENE-3250: Wordnet's SynExpand requires a non-null Analyzer (it no longer + treats null as StandardAnalyzer). (Robert Muir) Build diff --git a/lucene/contrib/misc/build.xml b/lucene/contrib/misc/build.xml index 520e81f87e8..7613de6acf0 100644 --- a/lucene/contrib/misc/build.xml +++ b/lucene/contrib/misc/build.xml @@ -27,22 +27,6 @@ - - - - - - - - - - - - - - - diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java b/lucene/contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java index c41d6dcd272..2f328a0e319 100644 --- a/lucene/contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java +++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java @@ -21,7 +21,6 @@ import java.io.File; import java.io.IOException; import java.util.ArrayList; -import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.index.IndexWriter; // javadoc import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.Directory; @@ -98,7 +97,7 @@ public class MultiPassIndexSplitter { } IndexWriter w = new IndexWriter(outputs[i], new IndexWriterConfig( Version.LUCENE_CURRENT, - new WhitespaceAnalyzer(Version.LUCENE_CURRENT)) + null) .setOpenMode(OpenMode.CREATE)); System.err.println("Writing part " + (i + 1) + " ..."); w.addIndexes(input); diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/misc/IndexMergeTool.java b/lucene/contrib/misc/src/java/org/apache/lucene/misc/IndexMergeTool.java index d29d5783193..775354f33e5 100644 --- a/lucene/contrib/misc/src/java/org/apache/lucene/misc/IndexMergeTool.java +++ b/lucene/contrib/misc/src/java/org/apache/lucene/misc/IndexMergeTool.java @@ -16,7 +16,6 @@ package org.apache.lucene.misc; * limitations under the License. */ -import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; @@ -40,7 +39,7 @@ public class IndexMergeTool { FSDirectory mergedIndex = FSDirectory.open(new File(args[0])); IndexWriter writer = new IndexWriter(mergedIndex, new IndexWriterConfig( - Version.LUCENE_CURRENT, new WhitespaceAnalyzer(Version.LUCENE_CURRENT)) + Version.LUCENE_CURRENT, null) .setOpenMode(OpenMode.CREATE)); Directory[] indexes = new Directory[args.length - 1]; diff --git a/lucene/contrib/wordnet/build.xml b/lucene/contrib/wordnet/build.xml index 9785e3e4e38..3e0e096e6af 100644 --- a/lucene/contrib/wordnet/build.xml +++ b/lucene/contrib/wordnet/build.xml @@ -30,22 +30,6 @@ - - - - - - - - - - - - - - - Index already exists - must remove first. @@ -83,24 +67,4 @@ - - - Index does not exist. - - - - Must specify 'query' property. - - - - - - - - - - - - - diff --git a/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java b/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java index c68562690b9..a7bd81270fd 100755 --- a/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java +++ b/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java @@ -17,7 +17,6 @@ package org.apache.lucene.wordnet; * limitations under the License. */ -import java.io.File; import java.io.IOException; import java.io.StringReader; import java.util.HashSet; @@ -28,7 +27,6 @@ import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; @@ -41,8 +39,6 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.TermQuery; -import org.apache.lucene.store.FSDirectory; -import org.apache.lucene.util.Version; /** @@ -53,41 +49,6 @@ import org.apache.lucene.util.Version; */ public final class SynExpand { - /** - * Test driver for synonym expansion. - * Uses boost factor of 0.9 for illustrative purposes. - * - * If you pass in the query "big dog" then it prints out: - * - *
-	 * Query: big adult^0.9 bad^0.9 bighearted^0.9 boastful^0.9 boastfully^0.9 bounteous^0.9 bountiful^0.9 braggy^0.9 crowing^0.9 freehanded^0.9 giving^0.9 grown^0.9 grownup^0.9 handsome^0.9 large^0.9 liberal^0.9 magnanimous^0.9 momentous^0.9 openhanded^0.9 prominent^0.9 swelled^0.9 vainglorious^0.9 vauntingly^0.9
-	 * dog andiron^0.9 blackguard^0.9 bounder^0.9 cad^0.9 chase^0.9 click^0.9 detent^0.9 dogtooth^0.9 firedog^0.9 frank^0.9 frankfurter^0.9 frump^0.9 heel^0.9 hotdog^0.9 hound^0.9 pawl^0.9 tag^0.9 tail^0.9 track^0.9 trail^0.9 weenie^0.9 wiener^0.9 wienerwurst^0.9
-	 * 
- */ - public static void main(String[] args) throws IOException - { - if (args.length != 2) - { - System.out.println( - "java org.apache.lucene.wordnet.SynExpand "); - } - - FSDirectory directory = FSDirectory.open(new File(args[0])); - IndexSearcher searcher = new IndexSearcher(directory, true); - - String query = args[1]; - String field = "contents"; - - Query q = expand( query, searcher, new StandardAnalyzer(Version.LUCENE_CURRENT), field, 0.9f); - System.out.println( "Query: " + q.toString( field)); - - - - searcher.close(); - directory.close(); - } - - /** * Perform synonym expansion on a query. * @@ -95,7 +56,7 @@ public final class SynExpand { * * @param syns a opened to the Lucene index you previously created with {@link Syns2Index}. The searcher is not closed or otherwise altered. * - * @param a optional analyzer used to parse the users query else {@link StandardAnalyzer} is used + * @param a analyzer used to parse the users query. * * @param f optional field name to search in or null if you want the default of "contents" * @@ -113,7 +74,6 @@ public final class SynExpand { final Set already = new HashSet(); // avoid dups List top = new LinkedList(); // needs to be separately listed.. final String field = ( f == null) ? "contents" : f; - if ( a == null) a = new StandardAnalyzer(Version.LUCENE_CURRENT); // [1] Parse query into separate words so that when we expand we can avoid dups TokenStream ts = a.reusableTokenStream( field, new StringReader( query)); diff --git a/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java b/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java index 215e20039bf..2c2fb14951d 100644 --- a/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java +++ b/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java @@ -41,6 +41,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TotalHitCountCollector; import org.apache.lucene.store.FSDirectory; @@ -48,24 +49,6 @@ import org.apache.lucene.store.FSDirectory; * Test program to look up synonyms. */ public class SynLookup { - - final static class CountingCollector extends Collector { - public int numHits = 0; - - @Override - public void setScorer(Scorer scorer) throws IOException {} - @Override - public void collect(int doc) throws IOException { - numHits++; - } - - @Override - public void setNextReader(AtomicReaderContext context) {} - @Override - public boolean acceptsDocsOutOfOrder() { - return true; - } - } public static void main(String[] args) throws IOException { if (args.length != 2) { @@ -78,16 +61,16 @@ public class SynLookup { String word = args[1]; Query query = new TermQuery(new Term(Syns2Index.F_WORD, word)); - CountingCollector countingCollector = new CountingCollector(); + TotalHitCountCollector countingCollector = new TotalHitCountCollector(); searcher.search(query, countingCollector); - if (countingCollector.numHits == 0) { + if (countingCollector.getTotalHits() == 0) { System.out.println("No synonyms found for " + word); } else { System.out.println("Synonyms found for \"" + word + "\":"); } - ScoreDoc[] hits = searcher.search(query, countingCollector.numHits).scoreDocs; + ScoreDoc[] hits = searcher.search(query, countingCollector.getTotalHits()).scoreDocs; for (int i = 0; i < hits.length; i++) { Document doc = searcher.doc(hits[i].doc); diff --git a/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java b/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java index abe77850bd5..8d3ea0c3d60 100644 --- a/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java +++ b/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java @@ -22,6 +22,7 @@ import java.io.File; import java.io.FileInputStream; import java.io.InputStreamReader; import java.io.PrintStream; +import java.io.Reader; import java.util.Iterator; import java.util.LinkedList; import java.util.List; @@ -31,7 +32,7 @@ import java.util.TreeMap; import java.util.TreeSet; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; @@ -90,9 +91,15 @@ public class Syns2Index public static final String F_WORD = "word"; /** - * + * we don't actually analyze any text (only a NOT_ANALYZED field), + * but analyzer can't be null, docinverter wants the offset gap! */ - private static final Analyzer ana = new StandardAnalyzer(Version.LUCENE_CURRENT); + private static final Analyzer ana = new Analyzer() { + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + return null; + } + }; /** * Takes arg of prolog file name and index directory. diff --git a/modules/suggest/build.xml b/modules/suggest/build.xml index f10718a5d25..df34d3170a9 100755 --- a/modules/suggest/build.xml +++ b/modules/suggest/build.xml @@ -29,21 +29,5 @@ - - - - - - - - - - - - - - - diff --git a/modules/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java b/modules/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java index f556ad4544e..7d99a7662ff 100755 --- a/modules/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java +++ b/modules/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java @@ -18,12 +18,14 @@ package org.apache.lucene.search.spell; */ import java.io.IOException; +import java.io.Reader; import java.util.ArrayList; import java.util.Comparator; import java.util.Iterator; import java.util.List; -import org.apache.lucene.analysis.core.WhitespaceAnalyzer; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; @@ -112,6 +114,17 @@ public class SpellChecker implements java.io.Closeable { private StringDistance sd; private Comparator comparator; + + /** we don't need to actually analyze any content: + * all fields are indexed NOT_ANALYZED, but docsinverter + * needs this for the offset gap! + */ + private static Analyzer noAnalyzer = new Analyzer() { + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + return null; + } + }; /** * Use the given directory as a spell checker index. The directory @@ -168,7 +181,7 @@ public class SpellChecker implements java.io.Closeable { if (!IndexReader.indexExists(spellIndexDir)) { IndexWriter writer = new IndexWriter(spellIndexDir, new IndexWriterConfig(Version.LUCENE_CURRENT, - new WhitespaceAnalyzer(Version.LUCENE_CURRENT))); + noAnalyzer)); writer.close(); } swapSearcher(spellIndexDir); @@ -466,7 +479,7 @@ public class SpellChecker implements java.io.Closeable { final Directory dir = this.spellIndex; final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( Version.LUCENE_CURRENT, - new WhitespaceAnalyzer(Version.LUCENE_CURRENT)) + noAnalyzer) .setOpenMode(OpenMode.CREATE)); writer.close(); swapSearcher(dir); @@ -503,7 +516,7 @@ public class SpellChecker implements java.io.Closeable { synchronized (modifyCurrentIndexLock) { ensureOpen(); final Directory dir = this.spellIndex; - final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, new WhitespaceAnalyzer(Version.LUCENE_CURRENT)).setRAMBufferSizeMB(ramMB)); + final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, noAnalyzer).setRAMBufferSizeMB(ramMB)); ((TieredMergePolicy) writer.getConfig().getMergePolicy()).setMaxMergeAtOnce(mergeFactor); IndexSearcher indexSearcher = obtainSearcher(); final List termsEnums = new ArrayList();