From 369a07a196ef9e0692112f70486f09199b44050d Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 4 Nov 2011 09:54:55 +0000 Subject: [PATCH] LUCENE-3557: SpellChecker should take IWC, remove sneaky silent optimize() methods git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1197469 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/contrib/CHANGES.txt | 8 +++++ .../lucene/search/spell/SpellChecker.java | 32 ++++--------------- .../search/spell/TestLuceneDictionary.java | 2 +- .../search/spell/TestPlainTextDictionary.java | 2 +- .../lucene/search/spell/TestSpellChecker.java | 4 +-- .../solr/spelling/FileBasedSpellChecker.java | 2 +- .../solr/spelling/IndexBasedSpellChecker.java | 3 +- 7 files changed, 21 insertions(+), 32 deletions(-) diff --git a/lucene/contrib/CHANGES.txt b/lucene/contrib/CHANGES.txt index 31ca0f3f164..c5e866db325 100644 --- a/lucene/contrib/CHANGES.txt +++ b/lucene/contrib/CHANGES.txt @@ -88,6 +88,14 @@ Changes in backwards compatibility policy * LUCENE-3508: Changed some method signatures in decompounding TokenFilters to make them no longer use the Token class. (Uwe Schindler) + + * LUCENE-3557: The various SpellChecker.indexDictionary methods were removed, + and consolidated to one: + + indexDictionary(Dictionary dict, IndexWriterConfig config, boolean optimize) + + Previously, there was no way to specify an IndexWriterConfig, and some + of these methods would sneakily pass 'true' to optimize. (Robert Muir) New Features diff --git a/modules/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java b/modules/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java index 65e9fbcdfd1..68ac65e8cd3 100755 --- a/modules/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java +++ b/modules/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java @@ -31,7 +31,6 @@ import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.TieredMergePolicy; import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.Terms; @@ -481,18 +480,16 @@ public class SpellChecker implements java.io.Closeable { /** * Indexes the data from the given {@link Dictionary}. * @param dict Dictionary to index - * @param mergeFactor mergeFactor to use when indexing - * @param ramMB the max amount or memory in MB to use + * @param config {@link IndexWriterConfig} to use * @param optimize whether or not the spellcheck index should be optimized * @throws AlreadyClosedException if the Spellchecker is already closed * @throws IOException */ - public final void indexDictionary(Dictionary dict, int mergeFactor, int ramMB, boolean optimize) throws IOException { + public final void indexDictionary(Dictionary dict, IndexWriterConfig config, boolean optimize) throws IOException { synchronized (modifyCurrentIndexLock) { ensureOpen(); final Directory dir = this.spellIndex; - final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, null).setRAMBufferSizeMB(ramMB)); - ((TieredMergePolicy) writer.getConfig().getMergePolicy()).setMaxMergeAtOnce(mergeFactor); + final IndexWriter writer = new IndexWriter(dir, config); IndexSearcher indexSearcher = obtainSearcher(); final List termsEnums = new ArrayList(); @@ -543,32 +540,15 @@ public class SpellChecker implements java.io.Closeable { if (optimize) writer.optimize(); writer.close(); + // TODO: this isn't that great, maybe in the future SpellChecker should take + // IWC in its ctor / keep its writer open? + // also re-open the spell index to see our own changes when the next suggestion // is fetched: swapSearcher(dir); } } - /** - * Indexes the data from the given {@link Dictionary}. - * @param dict the dictionary to index - * @param mergeFactor mergeFactor to use when indexing - * @param ramMB the max amount or memory in MB to use - * @throws IOException - */ - public final void indexDictionary(Dictionary dict, int mergeFactor, int ramMB) throws IOException { - indexDictionary(dict, mergeFactor, ramMB, true); - } - - /** - * Indexes the data from the given {@link Dictionary}. - * @param dict the dictionary to index - * @throws IOException - */ - public final void indexDictionary(Dictionary dict) throws IOException { - indexDictionary(dict, 300, (int)IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB); - } - private static int getMin(int l) { if (l > 5) { return 3; diff --git a/modules/suggest/src/test/org/apache/lucene/search/spell/TestLuceneDictionary.java b/modules/suggest/src/test/org/apache/lucene/search/spell/TestLuceneDictionary.java index f7f33d9477b..714e19a5dfd 100644 --- a/modules/suggest/src/test/org/apache/lucene/search/spell/TestLuceneDictionary.java +++ b/modules/suggest/src/test/org/apache/lucene/search/spell/TestLuceneDictionary.java @@ -195,7 +195,7 @@ public class TestLuceneDictionary extends LuceneTestCase { Directory dir = newDirectory(); SpellChecker sc = new SpellChecker(dir); indexReader = IndexReader.open(store, true); - sc.indexDictionary(new LuceneDictionary(indexReader, "contents")); + sc.indexDictionary(new LuceneDictionary(indexReader, "contents"), newIndexWriterConfig(TEST_VERSION_CURRENT, null), false); String[] suggestions = sc.suggestSimilar("Tam", 1); assertEquals(1, suggestions.length); assertEquals("Tom", suggestions[0]); diff --git a/modules/suggest/src/test/org/apache/lucene/search/spell/TestPlainTextDictionary.java b/modules/suggest/src/test/org/apache/lucene/search/spell/TestPlainTextDictionary.java index e56a689de4b..ae33c6e47b9 100644 --- a/modules/suggest/src/test/org/apache/lucene/search/spell/TestPlainTextDictionary.java +++ b/modules/suggest/src/test/org/apache/lucene/search/spell/TestPlainTextDictionary.java @@ -35,7 +35,7 @@ public class TestPlainTextDictionary extends LuceneTestCase { PlainTextDictionary ptd = new PlainTextDictionary(new StringReader(input)); Directory ramDir = newDirectory(); SpellChecker spellChecker = new SpellChecker(ramDir); - spellChecker.indexDictionary(ptd); + spellChecker.indexDictionary(ptd, newIndexWriterConfig(TEST_VERSION_CURRENT, null), false); String[] similar = spellChecker.suggestSimilar("treeword", 2); assertEquals(2, similar.length); assertEquals(similar[0], "threeword"); diff --git a/modules/suggest/src/test/org/apache/lucene/search/spell/TestSpellChecker.java b/modules/suggest/src/test/org/apache/lucene/search/spell/TestSpellChecker.java index f74468a3b5e..f5e7324d30c 100755 --- a/modules/suggest/src/test/org/apache/lucene/search/spell/TestSpellChecker.java +++ b/modules/suggest/src/test/org/apache/lucene/search/spell/TestSpellChecker.java @@ -331,7 +331,7 @@ public class TestSpellChecker extends LuceneTestCase { private void addwords(IndexReader r, SpellChecker sc, String field) throws IOException { long time = System.currentTimeMillis(); - sc.indexDictionary(new LuceneDictionary(r, field)); + sc.indexDictionary(new LuceneDictionary(r, field), newIndexWriterConfig(TEST_VERSION_CURRENT, null), false); time = System.currentTimeMillis() - time; //System.out.println("time to build " + field + ": " + time); } @@ -379,7 +379,7 @@ public class TestSpellChecker extends LuceneTestCase { } try { - spellChecker.indexDictionary(new LuceneDictionary(r, field)); + spellChecker.indexDictionary(new LuceneDictionary(r, field), newIndexWriterConfig(TEST_VERSION_CURRENT, null), false); fail("spellchecker was already closed"); } catch (AlreadyClosedException e) { // expected diff --git a/solr/core/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java b/solr/core/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java index 459feb3d046..e8971b4f4fe 100644 --- a/solr/core/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java +++ b/solr/core/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java @@ -62,7 +62,7 @@ public class FileBasedSpellChecker extends AbstractLuceneSpellChecker { try { loadExternalFileDictionary(core); spellChecker.clearIndex(); - spellChecker.indexDictionary(dictionary); + spellChecker.indexDictionary(dictionary, new IndexWriterConfig(core.getSolrConfig().luceneMatchVersion, null), false); } catch (IOException e) { throw new RuntimeException(e); } diff --git a/solr/core/src/java/org/apache/solr/spelling/IndexBasedSpellChecker.java b/solr/core/src/java/org/apache/solr/spelling/IndexBasedSpellChecker.java index 1963006fdef..77a03d929ee 100644 --- a/solr/core/src/java/org/apache/solr/spelling/IndexBasedSpellChecker.java +++ b/solr/core/src/java/org/apache/solr/spelling/IndexBasedSpellChecker.java @@ -17,6 +17,7 @@ package org.apache.solr.spelling; */ import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.search.spell.HighFrequencyDictionary; @@ -86,7 +87,7 @@ public class IndexBasedSpellChecker extends AbstractLuceneSpellChecker { dictionary = new HighFrequencyDictionary(reader, field, threshold); spellChecker.clearIndex(); - spellChecker.indexDictionary(dictionary); + spellChecker.indexDictionary(dictionary, new IndexWriterConfig(core.getSolrConfig().luceneMatchVersion, null), false); } catch (IOException e) { throw new RuntimeException(e);