- SOLR-81: SpellCheckerRequentHandler

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@519107 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Otis Gospodnetic 2007-03-16 19:28:47 +00:00
parent a1b021eb60
commit f1c149f8fb
5 changed files with 173 additions and 2 deletions

View File

@ -120,7 +120,11 @@ New Features
allows for specifying the amount of default slop to use when parsing
explicit phrase queries from the user.
(Adam Hiatt via hossman)
16. SOLR-81: SpellCheckerRequestHandler that uses the SpellChecker from
the Lucene contrib.
(Otis Gospodnetic and Adam Hiatt)
Changes in runtime behavior
1. Highlighting using DisMax will only pick up terms from the main
user query, not boost or filter queries (klaas).

View File

@ -0,0 +1,42 @@
<add>
<doc>
<field name="id">1</field>
<field name="word">ipod</field>
</doc>
<doc>
<field name="id">2</field>
<field name="word">ipod nano</field>
</doc>
<doc>
<field name="id">3</field>
<field name="word">ipod video</field>
</doc>
<doc>
<field name="id">4</field>
<field name="word">ipod shuffle</field>
</doc>
<doc>
<field name="id">5</field>
<field name="word">wii</field>
</doc>
<doc>
<field name="id">6</field>
<field name="word">blackberry</field>
</doc>
<doc>
<field name="id">7</field>
<field name="word">blackjack</field>
</doc>
<doc>
<field name="id">8</field>
<field name="word">creative</field>
</doc>
<doc>
<field name="id">9</field>
<field name="word">creative labs</field>
</doc>
<doc>
<field name="id">10</field>
<field name="word">creative zen</field>
</doc>
</add>

View File

@ -341,7 +341,24 @@
2&lt;-1 5&lt;-2 6&lt;90%
</str>
</requestHandler>
<!-- SpellCheckerRequestHandler takes in a word (or several words) as the
value of the "q" parameter and returns a list of alternative spelling
suggestions. If invoked with a ...&cmd=rebuild, it will rebuild the
spellchecker index.
-->
<requestHandler name="spellchecker" class="solr.SpellCheckerRequestHandler">
<!-- default values for query parameters -->
<lst name="defaults">
<int name="suggestionCount">1</int>
<str name="version">1.0</str>
</lst>
<!-- main init params for handler -->
<str name="spellcheckerIndexDir">/home/otis/dev/repos/lucene/solr/trunk/example/solr/data/index</str>
<str name="termSourceField">word</str>
</requestHandler>
<!-- Standard update plugin. If we put this on /update, it will get all the new goodness -->
<requestHandler name="/update/xml" class="solr.XmlUpdateRequestHandler" >

View File

@ -0,0 +1,2 @@
AnyObjectId[d7cdc56b23f6c6d18a5af1f4226428ad869add60] was removed in git history.
Apache SVN contains full history.

View File

@ -0,0 +1,106 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.request;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.spell.SpellChecker;
import org.apache.lucene.store.FSDirectory;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.RequestHandlerBase;
import org.apache.solr.util.NamedList;
import java.io.IOException;
import java.net.URL;
import java.util.Arrays;
/**
* Takes a string (e.g. a query string) as the value of the "q" parameter
* and looks up alternative spelling suggestions in the spellchecker.
* The spellchecker used by this handler is the Lucene contrib SpellChecker.
* @see http://wiki.apache.org/jakarta-lucene/SpellChecker
*
* @author Otis Gospodnetic
*/
public class SpellCheckerRequestHandler extends RequestHandlerBase {
private static SpellChecker spellChecker;
/*
* From http://wiki.apache.org/jakarta-lucene/SpellChecker
* If reader and restrictToField are both not null:
* 1. The returned words are restricted only to the words presents in the field
* "restrictToField "of the Lucene Index "reader".
*
* 2. The list is also sorted with a second criterium: the popularity (the
* frequence) of the word in the user field.
*
* 3. If "onlyMorePopular" is true and the mispelled word exist in the user field,
* return only the words more frequent than this.
*
*/
private static IndexReader reader = null;
private String restrictToField = null;
private boolean onlyMorePopular = false;
private String spellcheckerIndexDir;
public void init(NamedList args) {
super.init( args );
spellcheckerIndexDir = invariants.get("spellcheckerIndexDir");
try {
spellChecker = new SpellChecker(FSDirectory.getDirectory(spellcheckerIndexDir));
} catch (IOException e) {
throw new RuntimeException("Cannot open SpellChecker index", e);
}
}
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp)
throws Exception {
SolrParams p = req.getParams();
String words = p.get("q");
System.out.println(getDescription());
int numSug = 5;
String[] suggestions = spellChecker.suggestSimilar(words, numSug,
reader, restrictToField, onlyMorePopular);
rsp.add("suggestions", Arrays.asList(suggestions));
}
//////////////////////// SolrInfoMBeans methods //////////////////////
public String getVersion() {
return SolrCore.version;
}
public String getDescription() {
return "The SpellChecker Solr request handler for SpellChecker index: " + spellcheckerIndexDir;
}
public String getSourceId() {
return "$Id$";
}
public String getSource() {
return "$URL$";
}
public URL[] getDocs() {
return null;
}
}