mirror of https://github.com/apache/lucene.git
- SOLR-81: SpellCheckerRequentHandler
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@519107 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a1b021eb60
commit
f1c149f8fb
|
@ -120,7 +120,11 @@ New Features
|
|||
allows for specifying the amount of default slop to use when parsing
|
||||
explicit phrase queries from the user.
|
||||
(Adam Hiatt via hossman)
|
||||
|
||||
|
||||
16. SOLR-81: SpellCheckerRequestHandler that uses the SpellChecker from
|
||||
the Lucene contrib.
|
||||
(Otis Gospodnetic and Adam Hiatt)
|
||||
|
||||
Changes in runtime behavior
|
||||
1. Highlighting using DisMax will only pick up terms from the main
|
||||
user query, not boost or filter queries (klaas).
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
<add>
|
||||
<doc>
|
||||
<field name="id">1</field>
|
||||
<field name="word">ipod</field>
|
||||
</doc>
|
||||
<doc>
|
||||
<field name="id">2</field>
|
||||
<field name="word">ipod nano</field>
|
||||
</doc>
|
||||
<doc>
|
||||
<field name="id">3</field>
|
||||
<field name="word">ipod video</field>
|
||||
</doc>
|
||||
<doc>
|
||||
<field name="id">4</field>
|
||||
<field name="word">ipod shuffle</field>
|
||||
</doc>
|
||||
<doc>
|
||||
<field name="id">5</field>
|
||||
<field name="word">wii</field>
|
||||
</doc>
|
||||
<doc>
|
||||
<field name="id">6</field>
|
||||
<field name="word">blackberry</field>
|
||||
</doc>
|
||||
<doc>
|
||||
<field name="id">7</field>
|
||||
<field name="word">blackjack</field>
|
||||
</doc>
|
||||
<doc>
|
||||
<field name="id">8</field>
|
||||
<field name="word">creative</field>
|
||||
</doc>
|
||||
<doc>
|
||||
<field name="id">9</field>
|
||||
<field name="word">creative labs</field>
|
||||
</doc>
|
||||
<doc>
|
||||
<field name="id">10</field>
|
||||
<field name="word">creative zen</field>
|
||||
</doc>
|
||||
</add>
|
|
@ -341,7 +341,24 @@
|
|||
2<-1 5<-2 6<90%
|
||||
</str>
|
||||
</requestHandler>
|
||||
|
||||
|
||||
|
||||
<!-- SpellCheckerRequestHandler takes in a word (or several words) as the
|
||||
value of the "q" parameter and returns a list of alternative spelling
|
||||
suggestions. If invoked with a ...&cmd=rebuild, it will rebuild the
|
||||
spellchecker index.
|
||||
-->
|
||||
<requestHandler name="spellchecker" class="solr.SpellCheckerRequestHandler">
|
||||
<!-- default values for query parameters -->
|
||||
<lst name="defaults">
|
||||
<int name="suggestionCount">1</int>
|
||||
<str name="version">1.0</str>
|
||||
</lst>
|
||||
<!-- main init params for handler -->
|
||||
<str name="spellcheckerIndexDir">/home/otis/dev/repos/lucene/solr/trunk/example/solr/data/index</str>
|
||||
<str name="termSourceField">word</str>
|
||||
</requestHandler>
|
||||
|
||||
|
||||
<!-- Standard update plugin. If we put this on /update, it will get all the new goodness -->
|
||||
<requestHandler name="/update/xml" class="solr.XmlUpdateRequestHandler" >
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
AnyObjectId[d7cdc56b23f6c6d18a5af1f4226428ad869add60] was removed in git history.
|
||||
Apache SVN contains full history.
|
|
@ -0,0 +1,106 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.request;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.spell.SpellChecker;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.handler.RequestHandlerBase;
|
||||
import org.apache.solr.util.NamedList;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Takes a string (e.g. a query string) as the value of the "q" parameter
|
||||
* and looks up alternative spelling suggestions in the spellchecker.
|
||||
* The spellchecker used by this handler is the Lucene contrib SpellChecker.
|
||||
* @see http://wiki.apache.org/jakarta-lucene/SpellChecker
|
||||
*
|
||||
* @author Otis Gospodnetic
|
||||
*/
|
||||
public class SpellCheckerRequestHandler extends RequestHandlerBase {
|
||||
|
||||
private static SpellChecker spellChecker;
|
||||
|
||||
/*
|
||||
* From http://wiki.apache.org/jakarta-lucene/SpellChecker
|
||||
* If reader and restrictToField are both not null:
|
||||
* 1. The returned words are restricted only to the words presents in the field
|
||||
* "restrictToField "of the Lucene Index "reader".
|
||||
*
|
||||
* 2. The list is also sorted with a second criterium: the popularity (the
|
||||
* frequence) of the word in the user field.
|
||||
*
|
||||
* 3. If "onlyMorePopular" is true and the mispelled word exist in the user field,
|
||||
* return only the words more frequent than this.
|
||||
*
|
||||
*/
|
||||
private static IndexReader reader = null;
|
||||
private String restrictToField = null;
|
||||
private boolean onlyMorePopular = false;
|
||||
|
||||
private String spellcheckerIndexDir;
|
||||
|
||||
public void init(NamedList args) {
|
||||
super.init( args );
|
||||
spellcheckerIndexDir = invariants.get("spellcheckerIndexDir");
|
||||
try {
|
||||
spellChecker = new SpellChecker(FSDirectory.getDirectory(spellcheckerIndexDir));
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Cannot open SpellChecker index", e);
|
||||
}
|
||||
}
|
||||
|
||||
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp)
|
||||
throws Exception {
|
||||
SolrParams p = req.getParams();
|
||||
String words = p.get("q");
|
||||
|
||||
System.out.println(getDescription());
|
||||
int numSug = 5;
|
||||
String[] suggestions = spellChecker.suggestSimilar(words, numSug,
|
||||
reader, restrictToField, onlyMorePopular);
|
||||
|
||||
rsp.add("suggestions", Arrays.asList(suggestions));
|
||||
}
|
||||
|
||||
//////////////////////// SolrInfoMBeans methods //////////////////////
|
||||
|
||||
public String getVersion() {
|
||||
return SolrCore.version;
|
||||
}
|
||||
|
||||
public String getDescription() {
|
||||
return "The SpellChecker Solr request handler for SpellChecker index: " + spellcheckerIndexDir;
|
||||
}
|
||||
|
||||
public String getSourceId() {
|
||||
return "$Id$";
|
||||
}
|
||||
|
||||
public String getSource() {
|
||||
return "$URL$";
|
||||
}
|
||||
|
||||
public URL[] getDocs() {
|
||||
return null;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue