diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index d027b741446..e7ef40949ed 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -237,6 +237,9 @@ New Features
Koji Sekiguchi, Iván de Prado, Ryan McKinley, Marc Sturlese, Peter Karich,
Bojan Smid, Charles Hornberger, Dieter Grad, Dmitry Lihachev, Doug Steigerwald,
Karsten Sperling, Michael Gundlach, Oleg Gnatovskiy, Thomas Traeger, yonik)
+
+* SOLR-1316: Create autosuggest component.
+ (Ankul Garg, Jason Rutherglen, Shalin Shekhar Mangar, Grant Ingersoll, Robert Muir, ab)
Optimizations
diff --git a/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java b/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
index f2d8dd0449a..5cc6b0d47ac 100644
--- a/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
+++ b/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
@@ -111,7 +111,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
spellChecker.build(rb.req.getCore(), rb.req.getSearcher());
rb.rsp.add("command", "build");
} else if (params.getBool(SPELLCHECK_RELOAD, false)) {
- spellChecker.reload();
+ spellChecker.reload(rb.req.getCore(), rb.req.getSearcher());
rb.rsp.add("command", "reload");
}
}
@@ -555,7 +555,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
try {
LOG.info("Loading spell index for spellchecker: "
+ checker.getDictionaryName());
- checker.reload();
+ checker.reload(core, newSearcher);
} catch (IOException e) {
log.error( "Exception in reloading spell check index for spellchecker: " + checker.getDictionaryName(), e);
}
diff --git a/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java b/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java
index a76506cefb9..2a5d10c872d 100644
--- a/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java
+++ b/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java
@@ -44,6 +44,7 @@ import org.apache.lucene.store.RAMDirectory;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.FieldType;
+import org.apache.solr.search.SolrIndexSearcher;
/**
@@ -195,7 +196,7 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker {
return reader;
}
- public void reload() throws IOException {
+ public void reload(SolrCore core, SolrIndexSearcher searcher) throws IOException {
spellChecker.setSpellIndex(index);
}
diff --git a/solr/src/java/org/apache/solr/spelling/IndexBasedSpellChecker.java b/solr/src/java/org/apache/solr/spelling/IndexBasedSpellChecker.java
index 7c7dafb15c8..3f85b259baf 100644
--- a/solr/src/java/org/apache/solr/spelling/IndexBasedSpellChecker.java
+++ b/solr/src/java/org/apache/solr/spelling/IndexBasedSpellChecker.java
@@ -102,8 +102,8 @@ public class IndexBasedSpellChecker extends AbstractLuceneSpellChecker {
}
@Override
- public void reload() throws IOException {
- super.reload();
+ public void reload(SolrCore core, SolrIndexSearcher searcher) throws IOException {
+ super.reload(core, searcher);
//reload the source
initSourceReader();
}
diff --git a/solr/src/java/org/apache/solr/spelling/SolrSpellChecker.java b/solr/src/java/org/apache/solr/spelling/SolrSpellChecker.java
index 4e702086d2e..9d132c50116 100644
--- a/solr/src/java/org/apache/solr/spelling/SolrSpellChecker.java
+++ b/solr/src/java/org/apache/solr/spelling/SolrSpellChecker.java
@@ -63,7 +63,7 @@ public abstract class SolrSpellChecker {
*
* @throws java.io.IOException
*/
- public abstract void reload() throws IOException;
+ public abstract void reload(SolrCore core, SolrIndexSearcher searcher) throws IOException;
/**
* (re)Builds the spelling index. May be a NOOP if the implementation doesn't require building, or can't be rebuilt.
diff --git a/solr/src/java/org/apache/solr/spelling/suggest/BufferingTermFreqIteratorWrapper.java b/solr/src/java/org/apache/solr/spelling/suggest/BufferingTermFreqIteratorWrapper.java
new file mode 100644
index 00000000000..98c8113702e
--- /dev/null
+++ b/solr/src/java/org/apache/solr/spelling/suggest/BufferingTermFreqIteratorWrapper.java
@@ -0,0 +1,70 @@
+package org.apache.solr.spelling.suggest;
+
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.solr.util.TermFreqIterator;
+
+/**
+ * This wrapper buffers incoming elements.
+ */
+public class BufferingTermFreqIteratorWrapper implements TermFreqIterator {
+
+ /** Entry in the buffer. */
+ public static final class Entry implements Comparable {
+ String word;
+ float freq;
+
+ public Entry(String word, float freq) {
+ this.word = word;
+ this.freq = freq;
+ }
+
+ @Override
+ public int compareTo(Entry o) {
+ return word.compareTo(o.word);
+ }
+ }
+
+ protected ArrayList entries = new ArrayList();
+
+ protected int curPos;
+ protected Entry curEntry;
+
+ public BufferingTermFreqIteratorWrapper(TermFreqIterator source) {
+ // read all source data into buffer
+ while (source.hasNext()) {
+ String w = source.next();
+ Entry e = new Entry(w, source.freq());
+ entries.add(e);
+ }
+ curPos = 0;
+ }
+
+ @Override
+ public float freq() {
+ return curEntry.freq;
+ }
+
+ @Override
+ public boolean hasNext() {
+ return curPos < entries.size();
+ }
+
+ @Override
+ public String next() {
+ curEntry = entries.get(curPos);
+ curPos++;
+ return curEntry.word;
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException("remove is not supported");
+ }
+
+ public List entries() {
+ return entries;
+ }
+}
diff --git a/solr/src/java/org/apache/solr/spelling/suggest/FileDictionary.java b/solr/src/java/org/apache/solr/spelling/suggest/FileDictionary.java
new file mode 100644
index 00000000000..c59fe658f0e
--- /dev/null
+++ b/solr/src/java/org/apache/solr/spelling/suggest/FileDictionary.java
@@ -0,0 +1,95 @@
+package org.apache.solr.spelling.suggest;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import java.io.*;
+
+import org.apache.lucene.search.spell.Dictionary;
+import org.apache.solr.util.TermFreqIterator;
+
+
+/**
+ * Dictionary represented by a text file.
+ *
+ * Format allowed: 1 string per line, optionally with a tab-separated integer value:
+ * word1 TAB 100
+ * word2 word3 TAB 101
+ * word4 word5 TAB 102
+ */
+public class FileDictionary implements Dictionary {
+
+ private BufferedReader in;
+ private String line;
+ private boolean hasNextCalled;
+
+ public FileDictionary(InputStream dictFile) {
+ in = new BufferedReader(new InputStreamReader(dictFile));
+ }
+
+ /**
+ * Creates a dictionary based on a reader.
+ */
+ public FileDictionary(Reader reader) {
+ in = new BufferedReader(reader);
+ }
+
+ public TermFreqIterator getWordsIterator() {
+ return new fileIterator();
+ }
+
+ final class fileIterator implements TermFreqIterator {
+ private float curFreq;
+
+ public String next() {
+ if (!hasNextCalled) {
+ hasNext();
+ }
+ hasNextCalled = false;
+ return line;
+ }
+
+ public float freq() {
+ return curFreq;
+ }
+
+ public boolean hasNext() {
+ hasNextCalled = true;
+ try {
+ line = in.readLine();
+ if (line != null) {
+ String[] fields = line.split("\t");
+ if (fields.length > 1) {
+ curFreq = Float.parseFloat(fields[1]);
+ line = fields[0];
+ } else {
+ curFreq = 1;
+ }
+ }
+ } catch (IOException ex) {
+ throw new RuntimeException(ex);
+ }
+ return (line != null) ? true : false;
+ }
+
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+}
diff --git a/solr/src/java/org/apache/solr/spelling/suggest/Lookup.java b/solr/src/java/org/apache/solr/spelling/suggest/Lookup.java
new file mode 100644
index 00000000000..6eec6cecb6b
--- /dev/null
+++ b/solr/src/java/org/apache/solr/spelling/suggest/Lookup.java
@@ -0,0 +1,117 @@
+package org.apache.solr.spelling.suggest;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.lucene.search.spell.Dictionary;
+import org.apache.lucene.util.PriorityQueue;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.util.TermFreqIterator;
+
+public abstract class Lookup {
+
+ /**
+ * Result of a lookup.
+ */
+ public static final class LookupResult {
+ String key;
+ float value;
+
+ public LookupResult(String key, float value) {
+ this.key = key;
+ this.value = value;
+ }
+
+ public String toString() {
+ return key + "/" + value;
+ }
+ }
+
+ public static final class LookupPriorityQueue extends PriorityQueue {
+
+ public LookupPriorityQueue(int size) {
+ initialize(size);
+ }
+
+ @Override
+ protected boolean lessThan(LookupResult a, LookupResult b) {
+ return a.value < b.value;
+ }
+
+ public LookupResult[] getResults() {
+ int size = size();
+ LookupResult[] res = new LookupResult[size];
+ for (int i = size - 1; i >= 0; i--) {
+ res[i] = pop();
+ }
+ return res;
+ }
+ }
+
+ /** Initialize the lookup. */
+ public abstract void init(NamedList config, SolrCore core);
+
+ /** Build lookup from a dictionary. Some implementations may require sorted
+ * or unsorted keys from the dictionary's iterator - use
+ * {@link SortedTermFreqIteratorWrapper} or
+ * {@link UnsortedTermFreqIteratorWrapper} in such case.
+ */
+ public void build(Dictionary dict) throws IOException {
+ Iterator it = dict.getWordsIterator();
+ TermFreqIterator tfit;
+ if (it instanceof TermFreqIterator) {
+ tfit = (TermFreqIterator)it;
+ } else {
+ tfit = new TermFreqIterator.TermFreqIteratorWrapper(it);
+ }
+ build(tfit);
+ }
+
+ protected abstract void build(TermFreqIterator tfit) throws IOException;
+
+ /**
+ * Persist the constructed lookup data to a directory. Optional operation.
+ * @param storeDir directory where data can be stored.
+ * @return true if successful, false if unsuccessful or not supported.
+ * @throws IOException when fatal IO error occurs.
+ */
+ public abstract boolean store(File storeDir) throws IOException;
+
+ /**
+ * Discard current lookup data and load it from a previously saved copy.
+ * Optional operation.
+ * @param storeDir directory where lookup data was stored.
+ * @return true if completed successfully, false if unsuccessful or not supported.
+ * @throws IOException when fatal IO error occurs.
+ */
+ public abstract boolean load(File storeDir) throws IOException;
+
+ /**
+ * Look up a key and return possible completion for this key.
+ * @param key lookup key. Depending on the implementation this may be
+ * a prefix, misspelling, or even infix.
+ * @param onlyMorePopular return only more popular results
+ * @param num maximum number of results to return
+ * @return a list of possible completions, with their relative weight (e.g. popularity)
+ */
+ public abstract List lookup(String key, boolean onlyMorePopular, int num);
+
+ /**
+ * Modify the lookup data by recording additional data. Optional operation.
+ * @param key new lookup key
+ * @param value value to associate with this key
+ * @return true if new key is added, false if it already exists or operation
+ * is not supported.
+ */
+ public abstract boolean add(String key, Object value);
+
+ /**
+ * Get value associated with a specific key.
+ * @param key lookup key
+ * @return associated value
+ */
+ public abstract Object get(String key);
+}
diff --git a/solr/src/java/org/apache/solr/spelling/suggest/SortedTermFreqIteratorWrapper.java b/solr/src/java/org/apache/solr/spelling/suggest/SortedTermFreqIteratorWrapper.java
new file mode 100644
index 00000000000..5cf2f5813c9
--- /dev/null
+++ b/solr/src/java/org/apache/solr/spelling/suggest/SortedTermFreqIteratorWrapper.java
@@ -0,0 +1,18 @@
+package org.apache.solr.spelling.suggest;
+
+import java.util.Collections;
+
+import org.apache.solr.util.SortedIterator;
+import org.apache.solr.util.TermFreqIterator;
+
+/**
+ * This wrapper buffers incoming elements and makes sure they are sorted in
+ * ascending lexicographic order.
+ */
+public class SortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper implements SortedIterator {
+
+ public SortedTermFreqIteratorWrapper(TermFreqIterator source) {
+ super(source);
+ Collections.sort(entries);
+ }
+}
diff --git a/solr/src/java/org/apache/solr/spelling/suggest/Suggester.java b/solr/src/java/org/apache/solr/spelling/suggest/Suggester.java
new file mode 100644
index 00000000000..1d77a1003ba
--- /dev/null
+++ b/solr/src/java/org/apache/solr/spelling/suggest/Suggester.java
@@ -0,0 +1,158 @@
+package org.apache.solr.spelling.suggest;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.UnsupportedEncodingException;
+import java.util.List;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.spell.Dictionary;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.spelling.SolrSpellChecker;
+import org.apache.solr.spelling.SpellingOptions;
+import org.apache.solr.spelling.SpellingResult;
+import org.apache.solr.spelling.suggest.Lookup.LookupResult;
+import org.apache.solr.spelling.suggest.jaspell.JaspellLookup;
+import org.apache.solr.util.HighFrequencyDictionary;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class Suggester extends SolrSpellChecker {
+ private static final Logger LOG = LoggerFactory.getLogger(Suggester.class);
+
+ /** Location of the source data - either a path to a file, or null for the
+ * current IndexReader.
+ */
+ public static final String LOCATION = "sourceLocation";
+ /** Field to use as the source of terms if using IndexReader. */
+ public static final String FIELD = "field";
+ /** Fully-qualified class of the {@link Lookup} implementation. */
+ public static final String LOOKUP_IMPL = "lookupImpl";
+ /**
+ * Minimum frequency of terms to consider when building the dictionary.
+ */
+ public static final String THRESHOLD_TOKEN_FREQUENCY = "threshold";
+ /**
+ * Name of the location where to persist the dictionary. If this location
+ * is relative then the data will be stored under the core's dataDir. If this
+ * is null the storing will be disabled.
+ */
+ public static final String STORE_DIR = "storeDir";
+
+ protected String sourceLocation;
+ protected File storeDir;
+ protected String field;
+ protected float threshold;
+ protected Dictionary dictionary;
+ protected IndexReader reader;
+ protected Lookup lookup;
+ protected String lookupImpl;
+ protected SolrCore core;
+
+ @Override
+ public String init(NamedList config, SolrCore core) {
+ LOG.info("init: " + config);
+ String name = super.init(config, core);
+ threshold = config.get(THRESHOLD_TOKEN_FREQUENCY) == null ? 0.0f
+ : (Float) config.get(THRESHOLD_TOKEN_FREQUENCY);
+ sourceLocation = (String) config.get(LOCATION);
+ field = (String)config.get(FIELD);
+ lookupImpl = (String)config.get(LOOKUP_IMPL);
+ if (lookupImpl == null) {
+ lookupImpl = JaspellLookup.class.getName();
+ }
+ String store = (String)config.get(STORE_DIR);
+ if (store != null) {
+ storeDir = new File(store);
+ if (!storeDir.isAbsolute()) {
+ storeDir = new File(core.getDataDir() + File.separator + storeDir);
+ }
+ if (!storeDir.exists()) {
+ storeDir.mkdirs();
+ }
+ }
+ return name;
+ }
+
+ @Override
+ public void build(SolrCore core, SolrIndexSearcher searcher) {
+ LOG.info("build()");
+ if (sourceLocation == null) {
+ reader = searcher.getReader();
+ dictionary = new HighFrequencyDictionary(reader, field, threshold);
+ } else {
+ try {
+ dictionary = new FileDictionary(new InputStreamReader(
+ core.getResourceLoader().openResource(sourceLocation), "UTF-8"));
+ } catch (UnsupportedEncodingException e) {
+ e.printStackTrace();
+ }
+ }
+ lookup = (Lookup) core.getResourceLoader().newInstance(lookupImpl);
+ try {
+ lookup.build(dictionary);
+ if (storeDir != null) {
+ lookup.store(storeDir);
+ }
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ @Override
+ public void reload(SolrCore core, SolrIndexSearcher searcher) throws IOException {
+ LOG.info("reload()");
+ if (dictionary == null && storeDir != null) {
+ // this may be a firstSearcher event, try loading it
+ if (lookup.load(storeDir)) {
+ return; // loaded ok
+ }
+ }
+ // dictionary based on the current index may need refreshing
+ if (dictionary instanceof HighFrequencyDictionary) {
+ reader = reader.reopen();
+ dictionary = new HighFrequencyDictionary(reader, field, threshold);
+ try {
+ lookup.build(dictionary);
+ if (storeDir != null) {
+ lookup.store(storeDir);
+ }
+ } catch (Exception e) {
+ throw new IOException(e);
+ }
+ }
+ }
+
+ public void add(String query, int numHits) {
+ LOG.info("add " + query + ", " + numHits);
+ lookup.add(query, new Integer(numHits));
+ }
+
+ static SpellingResult EMPTY_RESULT = new SpellingResult();
+
+ @Override
+ public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
+ LOG.debug("getSuggestions: " + options.tokens);
+ if (lookup == null) {
+ LOG.info("Lookup is null - invoke spellchecker.build first");
+ return EMPTY_RESULT;
+ }
+ SpellingResult res = new SpellingResult();
+ for (Token t : options.tokens) {
+ String term = new String(t.buffer(), 0, t.length());
+ List suggestions = lookup.lookup(term,
+ options.onlyMorePopular, options.count);
+ if (suggestions == null) {
+ continue;
+ }
+ for (LookupResult lr : suggestions) {
+ res.add(t, lr.key, ((Number)lr.value).intValue());
+ }
+ }
+ return res;
+ }
+}
diff --git a/solr/src/java/org/apache/solr/spelling/suggest/UnsortedTermFreqIteratorWrapper.java b/solr/src/java/org/apache/solr/spelling/suggest/UnsortedTermFreqIteratorWrapper.java
new file mode 100644
index 00000000000..41f24b85d3e
--- /dev/null
+++ b/solr/src/java/org/apache/solr/spelling/suggest/UnsortedTermFreqIteratorWrapper.java
@@ -0,0 +1,17 @@
+package org.apache.solr.spelling.suggest;
+
+import java.util.Collections;
+
+import org.apache.solr.util.TermFreqIterator;
+
+/**
+ * This wrapper buffers the incoming elements and makes sure they are in
+ * random order.
+ */
+public class UnsortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper {
+
+ public UnsortedTermFreqIteratorWrapper(TermFreqIterator source) {
+ super(source);
+ Collections.shuffle(entries);
+ }
+}
diff --git a/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellLookup.java b/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellLookup.java
new file mode 100644
index 00000000000..f4b808cc8c0
--- /dev/null
+++ b/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellLookup.java
@@ -0,0 +1,102 @@
+package org.apache.solr.spelling.suggest.jaspell;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.spelling.suggest.Lookup;
+import org.apache.solr.spelling.suggest.UnsortedTermFreqIteratorWrapper;
+import org.apache.solr.util.SortedIterator;
+import org.apache.solr.util.TermFreqIterator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class JaspellLookup extends Lookup {
+ private static final Logger LOG = LoggerFactory.getLogger(JaspellLookup.class);
+ JaspellTernarySearchTrie trie;
+ private boolean usePrefix = true;
+ private int editDistance = 2;
+
+ @Override
+ public void init(NamedList config, SolrCore core) {
+ LOG.info("init: " + config);
+ }
+
+ @Override
+ public void build(TermFreqIterator tfit) throws IOException {
+ if (tfit instanceof SortedIterator) {
+ // make sure it's unsorted
+ tfit = new UnsortedTermFreqIteratorWrapper(tfit);
+ }
+ trie = new JaspellTernarySearchTrie();
+ trie.setMatchAlmostDiff(editDistance);
+ while (tfit.hasNext()) {
+ String key = tfit.next();
+ float freq = tfit.freq();
+ if (key.length() == 0) {
+ continue;
+ }
+ trie.put(key, new Float(freq));
+ }
+ }
+
+ @Override
+ public boolean add(String key, Object value) {
+ trie.put(key, value);
+ // XXX
+ return false;
+ }
+
+ @Override
+ public Object get(String key) {
+ return trie.get(key);
+ }
+
+ @Override
+ public List lookup(String key, boolean onlyMorePopular, int num) {
+ List res = new ArrayList();
+ List list;
+ int count = onlyMorePopular ? num * 2 : num;
+ if (usePrefix) {
+ list = trie.matchPrefix(key, count);
+ } else {
+ list = trie.matchAlmost(key, count);
+ }
+ if (list == null || list.size() == 0) {
+ return res;
+
+ }
+ int maxCnt = Math.min(num, list.size());
+ if (onlyMorePopular) {
+ LookupPriorityQueue queue = new LookupPriorityQueue(num);
+ for (String s : list) {
+ float freq = (Float)trie.get(s);
+ queue.insertWithOverflow(new LookupResult(s, freq));
+ }
+ for (LookupResult lr : queue.getResults()) {
+ res.add(lr);
+ }
+ } else {
+ for (int i = 0; i < maxCnt; i++) {
+ String s = list.get(i);
+ float freq = (Float)trie.get(s);
+ res.add(new LookupResult(s, freq));
+ }
+ }
+ return res;
+ }
+
+ @Override
+ public boolean load(File storeDir) throws IOException {
+ return false;
+ }
+
+ @Override
+ public boolean store(File storeDir) throws IOException {
+ return false;
+ }
+
+}
diff --git a/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellTernarySearchTrie.java b/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellTernarySearchTrie.java
new file mode 100644
index 00000000000..5d2c44f4d73
--- /dev/null
+++ b/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellTernarySearchTrie.java
@@ -0,0 +1,827 @@
+package org.apache.solr.spelling.suggest.jaspell;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.List;
+import java.util.Vector;
+import java.util.zip.GZIPInputStream;
+
+/**
+ * Implementation of a Ternary Search Trie, a data structure for storing
+ * String
objects that combines the compact size of a binary search
+ * tree with the speed of a digital search trie, and is therefore ideal for
+ * practical use in sorting and searching data.
+ *
+ *
+ * This data structure is faster than hashing for many typical search problems,
+ * and supports a broader range of useful problems and operations. Ternary
+ * searches are faster than hashing and more powerful, too.
+ *
+ *
+ *
+ * The theory of ternary search trees was described at a symposium in 1997 (see
+ * "Fast Algorithms for Sorting and Searching Strings," by J.L. Bentley and R.
+ * Sedgewick, Proceedings of the 8th Annual ACM-SIAM Symposium on Discrete
+ * Algorithms, January 1997). Algorithms in C, Third Edition, by Robert
+ * Sedgewick (Addison-Wesley, 1998) provides yet another view of ternary search
+ * trees.
+ *
+ * @author Bruno Martins
+ *
+ */
+public class JaspellTernarySearchTrie {
+
+ /**
+ * An inner class of Ternary Search Trie that represents a node in the trie.
+ */
+ protected final class TSTNode {
+
+ /** Index values for accessing relatives array. */
+ protected final static int PARENT = 0, LOKID = 1, EQKID = 2, HIKID = 3;
+
+ /** The key to the node. */
+ protected Object data;
+
+ /** The relative nodes. */
+ protected TSTNode[] relatives = new TSTNode[4];
+
+ /** The char used in the split. */
+ protected char splitchar;
+
+ /**
+ * Constructor method.
+ *
+ *@param splitchar
+ * The char used in the split.
+ *@param parent
+ * The parent node.
+ */
+ protected TSTNode(char splitchar, TSTNode parent) {
+ this.splitchar = splitchar;
+ relatives[PARENT] = parent;
+ }
+ }
+
+ /**
+ * Compares characters by alfabetical order.
+ *
+ *@param cCompare2
+ * The first char in the comparison.
+ *@param cRef
+ * The second char in the comparison.
+ *@return A negative number, 0 or a positive number if the second char is
+ * less, equal or greater.
+ */
+ private static int compareCharsAlphabetically(char cCompare2, char cRef) {
+ return Character.toLowerCase(cCompare2) - Character.toLowerCase(cRef);
+ }
+
+ /* what follows is the original Jaspell code.
+ private static int compareCharsAlphabetically(int cCompare2, int cRef) {
+ int cCompare = 0;
+ if (cCompare2 >= 65) {
+ if (cCompare2 < 89) {
+ cCompare = (2 * cCompare2) - 65;
+ } else if (cCompare2 < 97) {
+ cCompare = cCompare2 + 24;
+ } else if (cCompare2 < 121) {
+ cCompare = (2 * cCompare2) - 128;
+ } else cCompare = cCompare2;
+ } else cCompare = cCompare2;
+ if (cRef < 65) {
+ return cCompare - cRef;
+ }
+ if (cRef < 89) {
+ return cCompare - ((2 * cRef) - 65);
+ }
+ if (cRef < 97) {
+ return cCompare - (cRef + 24);
+ }
+ if (cRef < 121) {
+ return cCompare - ((2 * cRef) - 128);
+ }
+ return cCompare - cRef;
+ }
+ */
+
+ /**
+ * The default number of values returned by the matchAlmost
+ * method.
+ */
+ private int defaultNumReturnValues = -1;
+
+ /**
+ * the number of differences allowed in a call to the
+ * matchAlmostKey
method.
+ */
+ private int matchAlmostDiff;
+
+ /** The base node in the trie. */
+ private TSTNode rootNode;
+
+ /**
+ * Constructs an empty Ternary Search Trie.
+ */
+ public JaspellTernarySearchTrie() {
+ }
+
+ /**
+ * Constructs a Ternary Search Trie and loads data from a File
+ * into the Trie. The file is a normal text document, where each line is of
+ * the form word TAB float.
+ *
+ *@param file
+ * The File
with the data to load into the Trie.
+ *@exception IOException
+ * A problem occured while reading the data.
+ */
+ public JaspellTernarySearchTrie(File file) throws IOException {
+ this(file, false);
+ }
+
+ /**
+ * Constructs a Ternary Search Trie and loads data from a File
+ * into the Trie. The file is a normal text document, where each line is of
+ * the form "word TAB float".
+ *
+ *@param file
+ * The File
with the data to load into the Trie.
+ *@param compression
+ * If true, the file is compressed with the GZIP algorithm, and if
+ * false, the file is a normal text document.
+ *@exception IOException
+ * A problem occured while reading the data.
+ */
+ public JaspellTernarySearchTrie(File file, boolean compression)
+ throws IOException {
+ this();
+ BufferedReader in;
+ if (compression)
+ in = new BufferedReader(new InputStreamReader(new GZIPInputStream(
+ new FileInputStream(file))));
+ else in = new BufferedReader(new InputStreamReader((new FileInputStream(
+ file))));
+ String word;
+ int pos;
+ Float occur, one = new Float(1);
+ int numWords = 0;
+ while ((word = in.readLine()) != null) {
+ numWords++;
+ pos = word.indexOf("\t");
+ occur = one;
+ if (pos != -1) {
+ occur = Float.parseFloat(word.substring(pos + 1).trim());
+ word = word.substring(0, pos);
+ }
+ String key = word.toLowerCase();
+ if (rootNode == null) {
+ rootNode = new TSTNode(key.charAt(0), null);
+ }
+ TSTNode node = null;
+ if (key.length() > 0 && rootNode != null) {
+ TSTNode currentNode = rootNode;
+ int charIndex = 0;
+ while (true) {
+ if (currentNode == null) break;
+ int charComp = compareCharsAlphabetically(key.charAt(charIndex),
+ currentNode.splitchar);
+ if (charComp == 0) {
+ charIndex++;
+ if (charIndex == key.length()) {
+ node = currentNode;
+ break;
+ }
+ currentNode = currentNode.relatives[TSTNode.EQKID];
+ } else if (charComp < 0) {
+ currentNode = currentNode.relatives[TSTNode.LOKID];
+ } else {
+ currentNode = currentNode.relatives[TSTNode.HIKID];
+ }
+ }
+ Float occur2 = null;
+ if (node != null) occur2 = ((Float) (node.data));
+ if (occur2 != null) {
+ occur += occur2.floatValue();
+ }
+ currentNode = getOrCreateNode(word.trim().toLowerCase());
+ currentNode.data = occur;
+ }
+ }
+ in.close();
+ }
+
+ /**
+ * Deletes the node passed in as an argument. If this node has non-null data,
+ * then both the node and the data will be deleted. It also deletes any other
+ * nodes in the trie that are no longer needed after the deletion of the node.
+ *
+ *@param nodeToDelete
+ * The node to delete.
+ */
+ private void deleteNode(TSTNode nodeToDelete) {
+ if (nodeToDelete == null) {
+ return;
+ }
+ nodeToDelete.data = null;
+ while (nodeToDelete != null) {
+ nodeToDelete = deleteNodeRecursion(nodeToDelete);
+ // deleteNodeRecursion(nodeToDelete);
+ }
+ }
+
+ /**
+ * Recursively visits each node to be deleted.
+ *
+ * To delete a node, first set its data to null, then pass it into this
+ * method, then pass the node returned by this method into this method (make
+ * sure you don't delete the data of any of the nodes returned from this
+ * method!) and continue in this fashion until the node returned by this
+ * method is null
.
+ *
+ * The TSTNode instance returned by this method will be next node to be
+ * operated on by deleteNodeRecursion
(This emulates recursive
+ * method call while avoiding the JVM overhead normally associated with a
+ * recursive method.)
+ *
+ *@param currentNode
+ * The node to delete.
+ *@return The next node to be called in deleteNodeRecursion.
+ */
+ private TSTNode deleteNodeRecursion(TSTNode currentNode) {
+ if (currentNode == null) {
+ return null;
+ }
+ if (currentNode.relatives[TSTNode.EQKID] != null
+ || currentNode.data != null) {
+ return null;
+ }
+ // can't delete this node if it has a non-null eq kid or data
+ TSTNode currentParent = currentNode.relatives[TSTNode.PARENT];
+ boolean lokidNull = currentNode.relatives[TSTNode.LOKID] == null;
+ boolean hikidNull = currentNode.relatives[TSTNode.HIKID] == null;
+ int childType;
+ if (currentParent.relatives[TSTNode.LOKID] == currentNode) {
+ childType = TSTNode.LOKID;
+ } else if (currentParent.relatives[TSTNode.EQKID] == currentNode) {
+ childType = TSTNode.EQKID;
+ } else if (currentParent.relatives[TSTNode.HIKID] == currentNode) {
+ childType = TSTNode.HIKID;
+ } else {
+ rootNode = null;
+ return null;
+ }
+ if (lokidNull && hikidNull) {
+ currentParent.relatives[childType] = null;
+ return currentParent;
+ }
+ if (lokidNull) {
+ currentParent.relatives[childType] = currentNode.relatives[TSTNode.HIKID];
+ currentNode.relatives[TSTNode.HIKID].relatives[TSTNode.PARENT] = currentParent;
+ return currentParent;
+ }
+ if (hikidNull) {
+ currentParent.relatives[childType] = currentNode.relatives[TSTNode.LOKID];
+ currentNode.relatives[TSTNode.LOKID].relatives[TSTNode.PARENT] = currentParent;
+ return currentParent;
+ }
+ int deltaHi = currentNode.relatives[TSTNode.HIKID].splitchar
+ - currentNode.splitchar;
+ int deltaLo = currentNode.splitchar
+ - currentNode.relatives[TSTNode.LOKID].splitchar;
+ int movingKid;
+ TSTNode targetNode;
+ if (deltaHi == deltaLo) {
+ if (Math.random() < 0.5) {
+ deltaHi++;
+ } else {
+ deltaLo++;
+ }
+ }
+ if (deltaHi > deltaLo) {
+ movingKid = TSTNode.HIKID;
+ targetNode = currentNode.relatives[TSTNode.LOKID];
+ } else {
+ movingKid = TSTNode.LOKID;
+ targetNode = currentNode.relatives[TSTNode.HIKID];
+ }
+ while (targetNode.relatives[movingKid] != null) {
+ targetNode = targetNode.relatives[movingKid];
+ }
+ targetNode.relatives[movingKid] = currentNode.relatives[movingKid];
+ currentParent.relatives[childType] = targetNode;
+ targetNode.relatives[TSTNode.PARENT] = currentParent;
+ if (!lokidNull) {
+ currentNode.relatives[TSTNode.LOKID] = null;
+ }
+ if (!hikidNull) {
+ currentNode.relatives[TSTNode.HIKID] = null;
+ }
+ return currentParent;
+ }
+
+ /**
+ * Retrieve the object indexed by a key.
+ *
+ *@param key
+ * A String
index.
+ *@return The object retrieved from the Ternary Search Trie.
+ */
+ public Object get(String key) {
+ TSTNode node = getNode(key.trim().toLowerCase());
+ if (node == null) {
+ return null;
+ }
+ return node.data;
+ }
+
+ /**
+ * Retrieve the Float
indexed by key, increment it by one unit
+ * and store the new Float
.
+ *
+ *@param key
+ * A String
index.
+ *@return The Float
retrieved from the Ternary Search Trie.
+ */
+ public Float getAndIncrement(String key) {
+ String key2 = key.trim().toLowerCase();
+ TSTNode node = getNode(key2);
+ if (node == null) {
+ return null;
+ }
+ Float aux = (Float) (node.data);
+ if (aux == null) {
+ aux = new Float(1);
+ } else {
+ aux = new Float(aux.intValue() + 1);
+ }
+ put(key2, aux);
+ return aux;
+ }
+
+ /**
+ * Returns the key that indexes the node argument.
+ *
+ *@param node
+ * The node whose index is to be calculated.
+ *@return The String
that indexes the node argument.
+ */
+ protected String getKey(TSTNode node) {
+ StringBuffer getKeyBuffer = new StringBuffer();
+ getKeyBuffer.setLength(0);
+ getKeyBuffer.append("" + node.splitchar);
+ TSTNode currentNode;
+ TSTNode lastNode;
+ currentNode = node.relatives[TSTNode.PARENT];
+ lastNode = node;
+ while (currentNode != null) {
+ if (currentNode.relatives[TSTNode.EQKID] == lastNode) {
+ getKeyBuffer.append("" + currentNode.splitchar);
+ }
+ lastNode = currentNode;
+ currentNode = currentNode.relatives[TSTNode.PARENT];
+ }
+ getKeyBuffer.reverse();
+ return getKeyBuffer.toString();
+ }
+
+ /**
+ * Returns the node indexed by key, or null
if that node doesn't
+ * exist. Search begins at root node.
+ *
+ *@param key
+ * A String
that indexes the node that is returned.
+ *@return The node object indexed by key. This object is an instance of an
+ * inner class named TernarySearchTrie.TSTNode
.
+ */
+ public TSTNode getNode(String key) {
+ return getNode(key, rootNode);
+ }
+
+ /**
+ * Returns the node indexed by key, or null
if that node doesn't
+ * exist. The search begins at root node.
+ *
+ *@param key2
+ * A String
that indexes the node that is returned.
+ *@param startNode
+ * The top node defining the subtrie to be searched.
+ *@return The node object indexed by key. This object is an instance of an
+ * inner class named TernarySearchTrie.TSTNode
.
+ */
+ protected TSTNode getNode(String key2, TSTNode startNode) {
+ String key = key2.trim().toLowerCase();
+ if (key == null || startNode == null || key.length() == 0) {
+ return null;
+ }
+ TSTNode currentNode = startNode;
+ int charIndex = 0;
+ while (true) {
+ if (currentNode == null) {
+ return null;
+ }
+ int charComp = compareCharsAlphabetically(key.charAt(charIndex),
+ currentNode.splitchar);
+ if (charComp == 0) {
+ charIndex++;
+ if (charIndex == key.length()) {
+ return currentNode;
+ }
+ currentNode = currentNode.relatives[TSTNode.EQKID];
+ } else if (charComp < 0) {
+ currentNode = currentNode.relatives[TSTNode.LOKID];
+ } else {
+ currentNode = currentNode.relatives[TSTNode.HIKID];
+ }
+ }
+ }
+
+ /**
+ * Returns the node indexed by key, creating that node if it doesn't exist,
+ * and creating any required intermediate nodes if they don't exist.
+ *
+ *@param key
+ * A String
that indexes the node that is returned.
+ *@return The node object indexed by key. This object is an instance of an
+ * inner class named TernarySearchTrie.TSTNode
.
+ *@exception NullPointerException
+ * If the key is null
.
+ *@exception IllegalArgumentException
+ * If the key is an empty String
.
+ */
+ protected TSTNode getOrCreateNode(String key) throws NullPointerException,
+ IllegalArgumentException {
+ if (key == null) {
+ throw new NullPointerException(
+ "attempt to get or create node with null key");
+ }
+ if (key.length() == 0) {
+ throw new IllegalArgumentException(
+ "attempt to get or create node with key of zero length");
+ }
+ if (rootNode == null) {
+ rootNode = new TSTNode(key.charAt(0), null);
+ }
+ TSTNode currentNode = rootNode;
+ int charIndex = 0;
+ while (true) {
+ int charComp = compareCharsAlphabetically(key.charAt(charIndex),
+ currentNode.splitchar);
+ if (charComp == 0) {
+ charIndex++;
+ if (charIndex == key.length()) {
+ return currentNode;
+ }
+ if (currentNode.relatives[TSTNode.EQKID] == null) {
+ currentNode.relatives[TSTNode.EQKID] = new TSTNode(key
+ .charAt(charIndex), currentNode);
+ }
+ currentNode = currentNode.relatives[TSTNode.EQKID];
+ } else if (charComp < 0) {
+ if (currentNode.relatives[TSTNode.LOKID] == null) {
+ currentNode.relatives[TSTNode.LOKID] = new TSTNode(key
+ .charAt(charIndex), currentNode);
+ }
+ currentNode = currentNode.relatives[TSTNode.LOKID];
+ } else {
+ if (currentNode.relatives[TSTNode.HIKID] == null) {
+ currentNode.relatives[TSTNode.HIKID] = new TSTNode(key
+ .charAt(charIndex), currentNode);
+ }
+ currentNode = currentNode.relatives[TSTNode.HIKID];
+ }
+ }
+ }
+
+ /**
+ * Returns a List
of keys that almost match the argument key.
+ * Keys returned will have exactly diff characters that do not match the
+ * target key, where diff is equal to the last value passed in as an argument
+ * to the setMatchAlmostDiff
method.
+ *
+ * If the matchAlmost
method is called before the
+ * setMatchAlmostDiff
method has been called for the first time,
+ * then diff = 0.
+ *
+ *@param key
+ * The target key.
+ *@return A List
with the results.
+ */
+ public List matchAlmost(String key) {
+ return matchAlmost(key, defaultNumReturnValues);
+ }
+
+ /**
+ * Returns a List
of keys that almost match the argument key.
+ * Keys returned will have exactly diff characters that do not match the
+ * target key, where diff is equal to the last value passed in as an argument
+ * to the setMatchAlmostDiff
method.
+ *
+ * If the matchAlmost
method is called before the
+ * setMatchAlmostDiff
method has been called for the first time,
+ * then diff = 0.
+ *
+ *@param key
+ * The target key.
+ *@param numReturnValues
+ * The maximum number of values returned by this method.
+ *@return A List
with the results
+ */
+ public List matchAlmost(String key, int numReturnValues) {
+ return matchAlmostRecursion(rootNode, 0, matchAlmostDiff, key,
+ ((numReturnValues < 0) ? -1 : numReturnValues), new Vector(), false);
+ }
+
+ /**
+ * Recursivelly vists the nodes in order to find the ones that almost match a
+ * given key.
+ *
+ *@param currentNode
+ * The current node.
+ *@param charIndex
+ * The current char.
+ *@param d
+ * The number of differences so far.
+ *@param matchAlmostNumReturnValues
+ * The maximum number of values in the result List
.
+ *@param matchAlmostResult2
+ * The results so far.
+ *@param upTo
+ * If true all keys having up to and including matchAlmostDiff
+ * mismatched letters will be included in the result (including a key
+ * that is exactly the same as the target string) otherwise keys will
+ * be included in the result only if they have exactly
+ * matchAlmostDiff number of mismatched letters.
+ *@param matchAlmostKey
+ * The key being searched.
+ *@return A List
with the results.
+ */
+ private List matchAlmostRecursion(TSTNode currentNode, int charIndex,
+ int d, String matchAlmostKey, int matchAlmostNumReturnValues,
+ List matchAlmostResult2, boolean upTo) {
+ if ((currentNode == null)
+ || (matchAlmostNumReturnValues != -1 && matchAlmostResult2.size() >= matchAlmostNumReturnValues)
+ || (d < 0) || (charIndex >= matchAlmostKey.length())) {
+ return matchAlmostResult2;
+ }
+ int charComp = compareCharsAlphabetically(matchAlmostKey.charAt(charIndex),
+ currentNode.splitchar);
+ List matchAlmostResult = matchAlmostResult2;
+ if ((d > 0) || (charComp < 0)) {
+ matchAlmostResult = matchAlmostRecursion(
+ currentNode.relatives[TSTNode.LOKID], charIndex, d,
+ matchAlmostKey, matchAlmostNumReturnValues, matchAlmostResult,
+ upTo);
+ }
+ int nextD = (charComp == 0) ? d : d - 1;
+ boolean cond = (upTo) ? (nextD >= 0) : (nextD == 0);
+ if ((matchAlmostKey.length() == charIndex + 1) && cond
+ && (currentNode.data != null)) {
+ matchAlmostResult.add(getKey(currentNode));
+ }
+ matchAlmostResult = matchAlmostRecursion(
+ currentNode.relatives[TSTNode.EQKID], charIndex + 1, nextD,
+ matchAlmostKey, matchAlmostNumReturnValues, matchAlmostResult, upTo);
+ if ((d > 0) || (charComp > 0)) {
+ matchAlmostResult = matchAlmostRecursion(
+ currentNode.relatives[TSTNode.HIKID], charIndex, d,
+ matchAlmostKey, matchAlmostNumReturnValues, matchAlmostResult,
+ upTo);
+ }
+ return matchAlmostResult;
+ }
+
+ /**
+ * Returns an alphabetical List
of all keys in the trie that
+ * begin with a given prefix. Only keys for nodes having non-null data are
+ * included in the List
.
+ *
+ *@param prefix
+ * Each key returned from this method will begin with the characters
+ * in prefix.
+ *@return A List
with the results.
+ */
+ public List matchPrefix(String prefix) {
+ return matchPrefix(prefix, defaultNumReturnValues);
+ }
+
+ /**
+ * Returns an alphabetical List
of all keys in the trie that
+ * begin with a given prefix. Only keys for nodes having non-null data are
+ * included in the List
.
+ *
+ *@param prefix
+ * Each key returned from this method will begin with the characters
+ * in prefix.
+ *@param numReturnValues
+ * The maximum number of values returned from this method.
+ *@return A List
with the results
+ */
+ public List matchPrefix(String prefix, int numReturnValues) {
+ Vector sortKeysResult = new Vector();
+ TSTNode startNode = getNode(prefix);
+ if (startNode == null) {
+ return sortKeysResult;
+ }
+ if (startNode.data != null) {
+ sortKeysResult.addElement(getKey(startNode));
+ }
+ return sortKeysRecursion(startNode.relatives[TSTNode.EQKID],
+ ((numReturnValues < 0) ? -1 : numReturnValues), sortKeysResult);
+ }
+
+ /**
+ * Returns the number of nodes in the trie that have non-null data.
+ *
+ *@return The number of nodes in the trie that have non-null data.
+ */
+ public int numDataNodes() {
+ return numDataNodes(rootNode);
+ }
+
+ /**
+ * Returns the number of nodes in the subtrie below and including the starting
+ * node. The method counts only nodes that have non-null data.
+ *
+ *@param startingNode
+ * The top node of the subtrie. the node that defines the subtrie.
+ *@return The total number of nodes in the subtrie.
+ */
+ protected int numDataNodes(TSTNode startingNode) {
+ return recursiveNodeCalculator(startingNode, true, 0);
+ }
+
+ /**
+ * Returns the total number of nodes in the trie. The method counts nodes
+ * whether or not they have data.
+ *
+ *@return The total number of nodes in the trie.
+ */
+ public int numNodes() {
+ return numNodes(rootNode);
+ }
+
+ /**
+ * Returns the total number of nodes in the subtrie below and including the
+ * starting Node. The method counts nodes whether or not they have data.
+ *
+ *@param startingNode
+ * The top node of the subtrie. The node that defines the subtrie.
+ *@return The total number of nodes in the subtrie.
+ */
+ protected int numNodes(TSTNode startingNode) {
+ return recursiveNodeCalculator(startingNode, false, 0);
+ }
+
+ /**
+ * Stores a value in the trie. The value may be retrieved using the key.
+ *
+ *@param key
+ * A String
that indexes the object to be stored.
+ *@param value
+ * The object to be stored in the Trie.
+ */
+ public void put(String key, Object value) {
+ getOrCreateNode(key.trim().toLowerCase()).data = value;
+ }
+
+ /**
+ * Recursivelly visists each node to calculate the number of nodes.
+ *
+ *@param currentNode
+ * The current node.
+ *@param checkData
+ * If true we check the data to be different of null
.
+ *@param numNodes2
+ * The number of nodes so far.
+ *@return The number of nodes accounted.
+ */
+ private int recursiveNodeCalculator(TSTNode currentNode, boolean checkData,
+ int numNodes2) {
+ if (currentNode == null) {
+ return numNodes2;
+ }
+ int numNodes = recursiveNodeCalculator(
+ currentNode.relatives[TSTNode.LOKID], checkData, numNodes2);
+ numNodes = recursiveNodeCalculator(currentNode.relatives[TSTNode.EQKID],
+ checkData, numNodes);
+ numNodes = recursiveNodeCalculator(currentNode.relatives[TSTNode.HIKID],
+ checkData, numNodes);
+ if (checkData) {
+ if (currentNode.data != null) {
+ numNodes++;
+ }
+ } else {
+ numNodes++;
+ }
+ return numNodes;
+ }
+
+ /**
+ * Removes the value indexed by key. Also removes all nodes that are rendered
+ * unnecessary by the removal of this data.
+ *
+ *@param key
+ * A string
that indexes the object to be removed from
+ * the Trie.
+ */
+ public void remove(String key) {
+ deleteNode(getNode(key.trim().toLowerCase()));
+ }
+
+ /**
+ * Sets the number of characters by which words can differ from target word
+ * when calling the matchAlmost
method.
+ *
+ * Arguments less than 0 will set the char difference to 0, and arguments
+ * greater than 3 will set the char difference to 3.
+ *
+ *@param diff
+ * The number of characters by which words can differ from target
+ * word.
+ */
+ public void setMatchAlmostDiff(int diff) {
+ if (diff < 0) {
+ matchAlmostDiff = 0;
+ } else if (diff > 3) {
+ matchAlmostDiff = 3;
+ } else {
+ matchAlmostDiff = diff;
+ }
+ }
+
+ /**
+ * Sets the default maximum number of values returned from the
+ * matchPrefix
and matchAlmost
methods.
+ *
+ * The value should be set this to -1 to get an unlimited number of return
+ * values. note that the methods mentioned above provide overloaded versions
+ * that allow you to specify the maximum number of return values, in which
+ * case this value is temporarily overridden.
+ *
+ **@param num
+ * The number of values that will be returned when calling the
+ * methods above.
+ */
+ public void setNumReturnValues(int num) {
+ defaultNumReturnValues = (num < 0) ? -1 : num;
+ }
+
+ /**
+ * Returns keys sorted in alphabetical order. This includes the start Node and
+ * all nodes connected to the start Node.
+ *
+ * The number of keys returned is limited to numReturnValues. To get a list
+ * that isn't limited in size, set numReturnValues to -1.
+ *
+ *@param startNode
+ * The top node defining the subtrie to be searched.
+ *@param numReturnValues
+ * The maximum number of values returned from this method.
+ *@return A List
with the results.
+ */
+ protected List sortKeys(TSTNode startNode, int numReturnValues) {
+ return sortKeysRecursion(startNode, ((numReturnValues < 0) ? -1
+ : numReturnValues), new Vector());
+ }
+
+ /**
+ * Returns keys sorted in alphabetical order. This includes the current Node
+ * and all nodes connected to the current Node.
+ *
+ * Sorted keys will be appended to the end of the resulting List
.
+ * The result may be empty when this method is invoked, but may not be
+ * null
.
+ *
+ *@param currentNode
+ * The current node.
+ *@param sortKeysNumReturnValues
+ * The maximum number of values in the result.
+ *@param sortKeysResult2
+ * The results so far.
+ *@return A List
with the results.
+ */
+ private List sortKeysRecursion(TSTNode currentNode,
+ int sortKeysNumReturnValues, List sortKeysResult2) {
+ if (currentNode == null) {
+ return sortKeysResult2;
+ }
+ List sortKeysResult = sortKeysRecursion(
+ currentNode.relatives[TSTNode.LOKID], sortKeysNumReturnValues,
+ sortKeysResult2);
+ if (sortKeysNumReturnValues != -1
+ && sortKeysResult.size() >= sortKeysNumReturnValues) {
+ return sortKeysResult;
+ }
+ if (currentNode.data != null) {
+ sortKeysResult.add(getKey(currentNode));
+ }
+ sortKeysResult = sortKeysRecursion(currentNode.relatives[TSTNode.EQKID],
+ sortKeysNumReturnValues, sortKeysResult);
+ return sortKeysRecursion(currentNode.relatives[TSTNode.HIKID],
+ sortKeysNumReturnValues, sortKeysResult);
+ }
+
+}
diff --git a/solr/src/java/org/apache/solr/spelling/suggest/tst/TSTAutocomplete.java b/solr/src/java/org/apache/solr/spelling/suggest/tst/TSTAutocomplete.java
new file mode 100644
index 00000000000..d725d2e77c0
--- /dev/null
+++ b/solr/src/java/org/apache/solr/spelling/suggest/tst/TSTAutocomplete.java
@@ -0,0 +1,142 @@
+package org.apache.solr.spelling.suggest.tst;
+
+import java.util.*;
+
+public class TSTAutocomplete {
+
+ /**
+ * Inserting keys in TST in the order middle,small,big (lexicographic measure)
+ * recursively creates a balanced tree which reduces insertion and search
+ * times significantly.
+ *
+ * @param tokens
+ * Sorted list of keys to be inserted in TST.
+ * @param lo
+ * stores the lower index of current list.
+ * @param hi
+ * stores the higher index of current list.
+ * @param root
+ * a reference object to root of TST.
+ */
+ public void balancedTree(Object[] tokens, Object[] vals, int lo, int hi,
+ TernaryTreeNode root) {
+ if (lo > hi) return;
+ int mid = (lo + hi) / 2;
+ root = insert(root, (String) tokens[mid], vals[mid], 0);
+ balancedTree(tokens, vals, lo, mid - 1, root);
+ balancedTree(tokens, vals, mid + 1, hi, root);
+ }
+
+ /**
+ * Inserts a key in TST creating a series of Binary Search Trees at each node.
+ * The key is actually stored across the eqKid of each node in a successive
+ * manner.
+ *
+ * @param currentNode
+ * a reference node where the insertion will take currently.
+ * @param s
+ * key to be inserted in TST.
+ * @param x
+ * index of character in key to be inserted currently.
+ * @return currentNode The new reference to root node of TST
+ */
+ public TernaryTreeNode insert(TernaryTreeNode currentNode, String s,
+ Object val, int x) {
+ if (s == null || s.length() <= x) {
+ return currentNode;
+ }
+ if (currentNode == null) {
+ TernaryTreeNode newNode = new TernaryTreeNode();
+ newNode.splitchar = s.charAt(x);
+ currentNode = newNode;
+ if (x < s.length() - 1) {
+ currentNode.eqKid = insert(currentNode.eqKid, s, val, x + 1);
+ } else {
+ currentNode.token = s;
+ currentNode.val = val;
+ return currentNode;
+ }
+ } else if (currentNode.splitchar > s.charAt(x)) {
+ currentNode.loKid = insert(currentNode.loKid, s, val, x);
+ } else if (currentNode.splitchar == s.charAt(x)) {
+ if (x < s.length() - 1) {
+ currentNode.eqKid = insert(currentNode.eqKid, s, val, x + 1);
+ } else {
+ currentNode.token = s;
+ currentNode.val = val;
+ return currentNode;
+ }
+ } else {
+ currentNode.hiKid = insert(currentNode.hiKid, s, val, x);
+ }
+ return currentNode;
+ }
+
+ /**
+ * Auto-completes a given prefix query using Depth-First Search with the end
+ * of prefix as source node each time finding a new leaf to get a complete key
+ * to be added in the suggest list.
+ *
+ * @param root
+ * a reference to root node of TST.
+ * @param s
+ * prefix query to be auto-completed.
+ * @param x
+ * index of current character to be searched while traversing through
+ * the prefix in TST.
+ * @return suggest list of auto-completed keys for the given prefix query.
+ */
+ public ArrayList prefixCompletion(TernaryTreeNode root,
+ String s, int x) {
+
+ TernaryTreeNode p = root;
+ ArrayList suggest = new ArrayList();
+
+ while (p != null) {
+ if (s.charAt(x) < p.splitchar) {
+ p = p.loKid;
+ } else if (s.charAt(x) == p.splitchar) {
+ if (x == s.length() - 1) {
+ break;
+ } else {
+ x++;
+ }
+ p = p.eqKid;
+ } else {
+ p = p.hiKid;
+ }
+ }
+
+ if (p == null) return suggest;
+ if (p.eqKid == null && p.token == null) return suggest;
+ if (p.eqKid == null && p.token != null) {
+ suggest.add(p);
+ return suggest;
+ }
+
+ if (p.token != null) {
+ suggest.add(p);
+ }
+ p = p.eqKid;
+
+ Stack st = new Stack();
+ st.push(p);
+ while (!st.empty()) {
+ TernaryTreeNode top = (TernaryTreeNode) st.peek();
+ st.pop();
+ if (top.token != null) {
+ suggest.add(top);
+ }
+ if (top.eqKid != null) {
+ st.push(top.eqKid);
+ }
+ if (top.loKid != null) {
+ st.push(top.loKid);
+ }
+ if (top.hiKid != null) {
+ st.push(top.hiKid);
+ }
+ }
+ return suggest;
+ }
+}
diff --git a/solr/src/java/org/apache/solr/spelling/suggest/tst/TSTLookup.java b/solr/src/java/org/apache/solr/spelling/suggest/tst/TSTLookup.java
new file mode 100644
index 00000000000..27efa328a32
--- /dev/null
+++ b/solr/src/java/org/apache/solr/spelling/suggest/tst/TSTLookup.java
@@ -0,0 +1,89 @@
+package org.apache.solr.spelling.suggest.tst;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.spelling.suggest.Lookup;
+import org.apache.solr.spelling.suggest.SortedTermFreqIteratorWrapper;
+import org.apache.solr.util.SortedIterator;
+import org.apache.solr.util.TermFreqIterator;
+
+public class TSTLookup extends Lookup {
+ TernaryTreeNode root;
+ TSTAutocomplete autocomplete;
+
+ @Override
+ public void init(NamedList config, SolrCore core) {
+ }
+
+ @Override
+ public void build(TermFreqIterator tfit) throws IOException {
+ root = new TernaryTreeNode();
+ autocomplete = new TSTAutocomplete();
+ // buffer first
+ if (!(tfit instanceof SortedIterator)) {
+ // make sure it's sorted
+ tfit = new SortedTermFreqIteratorWrapper(tfit);
+ }
+
+ ArrayList tokens = new ArrayList();
+ ArrayList vals = new ArrayList();
+ while (tfit.hasNext()) {
+ tokens.add(tfit.next());
+ vals.add(new Float(tfit.freq()));
+ }
+ autocomplete.balancedTree(tokens.toArray(), vals.toArray(), 0, tokens.size() - 1, root);
+ }
+
+ @Override
+ public boolean add(String key, Object value) {
+ autocomplete.insert(root, key, value, 0);
+ // XXX we don't know if a new node was created
+ return true;
+ }
+
+ @Override
+ public Object get(String key) {
+ throw new UnsupportedOperationException("get() is not supported here");
+ }
+
+ @Override
+ public List lookup(String key, boolean onlyMorePopular, int num) {
+ List list = autocomplete.prefixCompletion(root, key, 0);
+ List res = new ArrayList();
+ if (list == null || list.size() == 0) {
+ return res;
+ }
+ int maxCnt = Math.min(num, list.size());
+ if (onlyMorePopular) {
+ LookupPriorityQueue queue = new LookupPriorityQueue(num);
+ for (TernaryTreeNode ttn : list) {
+ queue.insertWithOverflow(new LookupResult(ttn.token, (Float)ttn.val));
+ }
+ for (LookupResult lr : queue.getResults()) {
+ res.add(lr);
+ }
+ } else {
+ for (int i = 0; i < maxCnt; i++) {
+ TernaryTreeNode ttn = list.get(i);
+ res.add(new LookupResult(ttn.token, (Float)ttn.val));
+ }
+ }
+ return res;
+ }
+
+ @Override
+ public boolean load(File storeDir) throws IOException {
+ return false;
+ }
+
+ @Override
+ public boolean store(File storeDir) throws IOException {
+ return false;
+ }
+
+}
diff --git a/solr/src/java/org/apache/solr/spelling/suggest/tst/TernaryTreeNode.java b/solr/src/java/org/apache/solr/spelling/suggest/tst/TernaryTreeNode.java
new file mode 100644
index 00000000000..61df16db603
--- /dev/null
+++ b/solr/src/java/org/apache/solr/spelling/suggest/tst/TernaryTreeNode.java
@@ -0,0 +1,21 @@
+package org.apache.solr.spelling.suggest.tst;
+
+/**
+ * The class creates a TST node.
+ * @variable splitchar the character stored by a node.
+ * @variable loKid a reference object to the node containing character smaller than
+ * this node's character.
+ * @variable eqKid a reference object to the node containg character next to this
+ * node's character as occuring in the inserted token.
+ * @variable hiKid a reference object to the node containing character higher than
+ * this node's character.
+ * @variable token used by leaf nodes to store the complete tokens to be added to
+ * suggest list while auto-completing the prefix.
+ */
+
+public class TernaryTreeNode {
+ char splitchar;
+ TernaryTreeNode loKid, eqKid, hiKid;
+ String token;
+ Object val;
+}
diff --git a/solr/src/java/org/apache/solr/util/HighFrequencyDictionary.java b/solr/src/java/org/apache/solr/util/HighFrequencyDictionary.java
index fe88b460151..91413ea4b95 100644
--- a/solr/src/java/org/apache/solr/util/HighFrequencyDictionary.java
+++ b/solr/src/java/org/apache/solr/util/HighFrequencyDictionary.java
@@ -53,7 +53,7 @@ public class HighFrequencyDictionary implements Dictionary {
return new HighFrequencyIterator();
}
- final class HighFrequencyIterator implements Iterator {
+ final class HighFrequencyIterator implements TermFreqIterator, SortedIterator {
private TermsEnum termsEnum;
private BytesRef actualTerm;
private boolean hasNextCalled;
@@ -74,8 +74,12 @@ public class HighFrequencyDictionary implements Dictionary {
private boolean isFrequent(int freq) {
return freq >= minNumDocs;
}
-
- public Object next() {
+
+ public float freq() {
+ return termsEnum.docFreq();
+ }
+
+ public String next() {
if (!hasNextCalled && !hasNext()) {
return null;
}
diff --git a/solr/src/java/org/apache/solr/util/SortedIterator.java b/solr/src/java/org/apache/solr/util/SortedIterator.java
new file mode 100644
index 00000000000..992188eff34
--- /dev/null
+++ b/solr/src/java/org/apache/solr/util/SortedIterator.java
@@ -0,0 +1,11 @@
+package org.apache.solr.util;
+
+import java.util.Iterator;
+
+/**
+ * Marker interface to signal that elements coming from {@link Iterator}
+ * come in ascending lexicographic order.
+ */
+public interface SortedIterator {
+
+}
diff --git a/solr/src/java/org/apache/solr/util/TermFreqIterator.java b/solr/src/java/org/apache/solr/util/TermFreqIterator.java
new file mode 100644
index 00000000000..0abcf831bc5
--- /dev/null
+++ b/solr/src/java/org/apache/solr/util/TermFreqIterator.java
@@ -0,0 +1,37 @@
+package org.apache.solr.util;
+
+import java.util.Iterator;
+
+public interface TermFreqIterator extends Iterator {
+
+ public float freq();
+
+ public static class TermFreqIteratorWrapper implements TermFreqIterator {
+ private Iterator wrapped;
+
+ public TermFreqIteratorWrapper(Iterator wrapped) {
+ this.wrapped = wrapped;
+ }
+
+ @Override
+ public float freq() {
+ return 1.0f;
+ }
+
+ @Override
+ public boolean hasNext() {
+ return wrapped.hasNext();
+ }
+
+ @Override
+ public String next() {
+ return wrapped.next().toString();
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+ }
+}
diff --git a/solr/src/test/org/apache/solr/handler/component/DummyCustomParamSpellChecker.java b/solr/src/test/org/apache/solr/handler/component/DummyCustomParamSpellChecker.java
index efb79eb4ff0..95428e78b5f 100644
--- a/solr/src/test/org/apache/solr/handler/component/DummyCustomParamSpellChecker.java
+++ b/solr/src/test/org/apache/solr/handler/component/DummyCustomParamSpellChecker.java
@@ -35,7 +35,7 @@ import java.util.Iterator;
public class DummyCustomParamSpellChecker extends SolrSpellChecker {
@Override
- public void reload() throws IOException {
+ public void reload(SolrCore core, SolrIndexSearcher searcher) throws IOException {
}
diff --git a/solr/src/test/org/apache/solr/spelling/suggest/SuggesterTest.java b/solr/src/test/org/apache/solr/spelling/suggest/SuggesterTest.java
new file mode 100644
index 00000000000..5fce8c45251
--- /dev/null
+++ b/solr/src/test/org/apache/solr/spelling/suggest/SuggesterTest.java
@@ -0,0 +1,253 @@
+package org.apache.solr.spelling.suggest;
+
+import java.io.StringWriter;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.params.SpellingParams;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.request.LocalSolrQueryRequest;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.request.SolrRequestHandler;
+import org.apache.solr.response.QueryResponseWriter;
+import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.spelling.suggest.Lookup.LookupResult;
+import org.apache.solr.spelling.suggest.jaspell.JaspellLookup;
+import org.apache.solr.spelling.suggest.tst.TSTLookup;
+import org.apache.solr.util.AbstractSolrTestCase;
+import org.apache.solr.util.RefCounted;
+import org.apache.solr.util.TermFreqIterator;
+import org.apache.solr.util.TestHarness;
+
+public class SuggesterTest extends AbstractSolrTestCase {
+ SolrRequestHandler handler;
+
+ @Override
+ public String getSchemaFile() {
+ return "schema-spellchecker.xml";
+ }
+
+ @Override
+ public String getSolrConfigFile() {
+ return "solrconfig-spellchecker.xml";
+ }
+
+ @Override
+ public void setUp() throws Exception {
+ super.setUp();
+ // empty
+ h.validateUpdate("*:*");
+ // populate
+ h.validateAddDoc(
+ "id", "1",
+ "text", "acceptable accidentally accommodate acquire"
+ );
+ h.validateAddDoc(
+ "id", "2",
+ "text", "believe bellwether accommodate acquire"
+ );
+ h.validateAddDoc(
+ "id", "3",
+ "text", "cemetery changeable conscientious consensus acquire bellwether"
+ );
+ h.validateUpdate("");
+ handler = h.getCore().getRequestHandler("/suggest");
+ // build
+ ModifiableSolrParams params = new ModifiableSolrParams();
+ params.set(SpellingParams.SPELLCHECK_BUILD, true);
+ LocalSolrQueryRequest req = new LocalSolrQueryRequest(h.getCore(), params);
+ SolrQueryResponse rsp = new SolrQueryResponse();
+ handler.handleRequest(req, rsp);
+ }
+
+ private String assertXPath(SolrCore core, SolrQueryRequest req, SolrQueryResponse rsp, String... tests) throws Exception {
+ StringWriter sw = new StringWriter(32000);
+ QueryResponseWriter responseWriter = core.getQueryResponseWriter(req);
+ responseWriter.write(sw,req,rsp);
+ req.close();
+ System.out.println(sw.toString());
+ return h.validateXPath(sw.toString(), tests);
+ }
+
+ public void testSuggestions() throws Exception {
+ ModifiableSolrParams params = new ModifiableSolrParams();
+ params.set(CommonParams.Q, "ac");
+ params.set(SpellingParams.SPELLCHECK_COUNT, 2);
+ params.set(SpellingParams.SPELLCHECK_ONLY_MORE_POPULAR, true);
+ LocalSolrQueryRequest req = new LocalSolrQueryRequest(h.getCore(), params);
+ SolrQueryResponse rsp = new SolrQueryResponse();
+ handler.handleRequest(req, rsp);
+ String res = assertXPath(h.getCore(), req, rsp,
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/int[@name='numFound'][.='2']",
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/arr[@name='suggestion']/str[1][.='acquire']",
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/arr[@name='suggestion']/str[2][.='accommodate']"
+ );
+ assertNull(res, res);
+ }
+
+ public void testReload() throws Exception {
+ String coreName = h.getCore().getName();
+ RefCounted searcher = h.getCore().getSearcher();
+ SolrIndexSearcher indexSearcher = searcher.get();
+ log.info("Core " + coreName + ", NumDocs before reload: " + indexSearcher.getIndexReader().numDocs());
+ log.info("Directory: " + indexSearcher.getIndexDir());
+ searcher.decref();
+ h.close();
+ solrConfig = TestHarness.createConfig(getSolrConfigFile());
+ h = new TestHarness( dataDir.getAbsolutePath(),
+ solrConfig,
+ getSchemaFile());
+ searcher = h.getCore().getSearcher();
+ indexSearcher = searcher.get();
+ log.info("Core " + coreName + ", NumDocs now: " + indexSearcher.getIndexReader().numDocs());
+ log.info("Directory: " + indexSearcher.getIndexDir());
+ searcher.decref();
+ // rebuilds on commit
+ h.validateUpdate("");
+ handler = h.getCore().getRequestHandler("/suggest");
+ ModifiableSolrParams params = new ModifiableSolrParams();
+ params.set(CommonParams.Q, "ac");
+ params.set(SpellingParams.SPELLCHECK_COUNT, 2);
+ params.set(SpellingParams.SPELLCHECK_ONLY_MORE_POPULAR, true);
+ LocalSolrQueryRequest req = new LocalSolrQueryRequest(h.getCore(), params);
+ SolrQueryResponse rsp = new SolrQueryResponse();
+ handler.handleRequest(req, rsp);
+ String res = assertXPath(h.getCore(), req, rsp,
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/int[@name='numFound'][.='2']",
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/arr[@name='suggestion']/str[1][.='acquire']",
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/arr[@name='suggestion']/str[2][.='accommodate']"
+ );
+ assertNull(res, res);
+ }
+
+ private TermFreqIterator getTFIT() {
+ final int count = 100000;
+ TermFreqIterator tfit = new TermFreqIterator() {
+ Random r = new Random(1234567890L);
+ Random r1 = new Random(1234567890L);
+ int pos;
+
+ @Override
+ public float freq() {
+ return r1.nextInt(4);
+ }
+
+ @Override
+ public boolean hasNext() {
+ return pos < count;
+ }
+
+ @Override
+ public String next() {
+ pos++;
+ return Long.toString(r.nextLong());
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+ };
+ return tfit;
+ }
+
+ private void _benchmark(Lookup lookup, Map ref, boolean estimate, Bench bench) throws Exception {
+ long start = System.currentTimeMillis();
+ lookup.build(getTFIT());
+ long buildTime = System.currentTimeMillis() - start;
+ TermFreqIterator tfit = getTFIT();
+ long elapsed = 0;
+ while (tfit.hasNext()) {
+ String key = tfit.next();
+ // take only the first part of the key
+ int len = key.length() > 4 ? key.length() / 3 : 2;
+ String prefix = key.substring(0, len);
+ start = System.nanoTime();
+ List res = lookup.lookup(prefix, true, 10);
+ elapsed += System.nanoTime() - start;
+ assertTrue(res.size() > 0);
+ for (LookupResult lr : res) {
+ assertTrue(lr.key.startsWith(prefix));
+ }
+ if (ref != null) { // verify the counts
+ Integer Cnt = ref.get(key);
+ if (Cnt == null) { // first pass
+ ref.put(key, res.size());
+ } else {
+ assertEquals(key + ", prefix: " + prefix, Cnt.intValue(), res.size());
+ }
+ }
+ }
+ if (estimate) {
+ RamUsageEstimator rue = new RamUsageEstimator();
+ long size = rue.estimateRamUsage(lookup);
+ System.err.println(lookup.getClass().getSimpleName() + " - size=" + size);
+ }
+ if (bench != null) {
+ bench.buildTime += buildTime;
+ bench.lookupTime += elapsed;
+ }
+ }
+
+ class Bench {
+ long buildTime;
+ long lookupTime;
+ }
+
+ public void testBenchmark() throws Exception {
+ // this benchmark is very time consuming
+ boolean doTest = false;
+ if (!doTest) {
+ return;
+ }
+ Map ref = new HashMap();
+ JaspellLookup jaspell = new JaspellLookup();
+ TSTLookup tst = new TSTLookup();
+
+ _benchmark(tst, ref, true, null);
+ _benchmark(jaspell, ref, true, null);
+ jaspell = null;
+ tst = null;
+ int count = 100;
+ Bench b = runBenchmark(JaspellLookup.class, count);
+ System.err.println(JaspellLookup.class.getSimpleName() + ": buildTime[ms]=" + (b.buildTime / count) +
+ " lookupTime[ms]=" + (b.lookupTime / count / 1000000));
+ b = runBenchmark(TSTLookup.class, count);
+ System.err.println(TSTLookup.class.getSimpleName() + ": buildTime[ms]=" + (b.buildTime / count) +
+ " lookupTime[ms]=" + (b.lookupTime / count / 1000000));
+ }
+
+ private Bench runBenchmark(Class extends Lookup> cls, int count) throws Exception {
+ System.err.println("* Running " + count + " iterations for " + cls.getSimpleName() + " ...");
+ System.err.println(" - warm-up 10 iterations...");
+ for (int i = 0; i < 10; i++) {
+ System.runFinalization();
+ System.gc();
+ Lookup lookup = cls.newInstance();
+ _benchmark(lookup, null, false, null);
+ lookup = null;
+ }
+ Bench b = new Bench();
+ System.err.print(" - main iterations:"); System.err.flush();
+ for (int i = 0; i < count; i++) {
+ System.runFinalization();
+ System.gc();
+ Lookup lookup = cls.newInstance();
+ _benchmark(lookup, null, false, b);
+ lookup = null;
+ if (i > 0 && (i % 10 == 0)) {
+ System.err.print(" " + i);
+ System.err.flush();
+ }
+ }
+ System.err.println();
+ return b;
+ }
+}
diff --git a/solr/src/test/test-files/solr/conf/schema-spellchecker.xml b/solr/src/test/test-files/solr/conf/schema-spellchecker.xml
index f6f9045c171..7124065626d 100644
--- a/solr/src/test/test-files/solr/conf/schema-spellchecker.xml
+++ b/solr/src/test/test-files/solr/conf/schema-spellchecker.xml
@@ -68,9 +68,13 @@
+
+
+
+
id
diff --git a/solr/src/test/test-files/solr/conf/solrconfig-spellchecker.xml b/solr/src/test/test-files/solr/conf/solrconfig-spellchecker.xml
index 64e0542cd4a..e6d18793c51 100644
--- a/solr/src/test/test-files/solr/conf/solrconfig-spellchecker.xml
+++ b/solr/src/test/test-files/solr/conf/solrconfig-spellchecker.xml
@@ -92,6 +92,30 @@
+
+
+
+ suggest
+ org.apache.solr.spelling.suggest.Suggester
+ org.apache.solr.spelling.suggest.jaspell.JaspellLookup
+ suggest
+ true
+
+
+
+
+
+ true
+ suggest
+ true
+
+
+ suggest
+
+
+