SOLR-2010: added richer support for spell checking collations

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1021439 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Grant Ingersoll 2010-10-11 17:32:11 +00:00
parent 98b20d6479
commit 52b19fa5f0
15 changed files with 1299 additions and 72 deletions

View File

@ -289,6 +289,9 @@ New Features
to retrieve correction candidates directly from the term dictionary using
levenshtein automata. (rmuir)
* SOLR-2010: Added ability to verify that spell checking collations have
actual results in the index. (James Dyer via gsingers)
Optimizations
----------------------

View File

@ -81,9 +81,34 @@ public interface SpellingParams {
* Take the top suggestion for each token and create a new query from it
*/
public static final String SPELLCHECK_COLLATE = SPELLCHECK_PREFIX + "collate";
/**
* <p>
* The maximum number of collations to return. Default=1. Ignored if "spellcheck.collate" is false.
* </p>
*/
public static final String SPELLCHECK_MAX_COLLATIONS = SPELLCHECK_PREFIX + "maxCollations";
/**
* <p>
* The maximum number of collations to test by querying against the index.
* When testing, the collation is substituted for the original query's "q" param. Any "qf"s are retained.
* If this is set to zero, does not test for hits before returning collations (returned collations may result in zero hits).
* Default=0. Ignored of "spellcheck.collate" is false.
* </p>
*/
public static final String SPELLCHECK_MAX_COLLATION_TRIES = SPELLCHECK_PREFIX + "maxCollationTries";
/**
* <p>
* Whether to use the Extended Results Format for collations.
* Includes "before>after" pairs to easily allow clients to generate messages like "no results for PORK. did you mean POLK?"
* Also indicates the # of hits each collation will return on re-query. Default=false, which retains 1.4-compatible output.
* </p>
*/
public static final String SPELLCHECK_COLLATE_EXTENDED_RESULTS = SPELLCHECK_PREFIX + "collateExtendedResults";
/**
* Certain spelling implementations may allow for an accuracy setting.
*/
public static final String SPELLCHECK_ACCURACY = SPELLCHECK_PREFIX + "accuracy";
}

View File

@ -49,8 +49,7 @@ import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.SpellingParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.NamedList;import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrEventListener;
import org.apache.solr.core.SolrResourceLoader;
@ -151,12 +150,15 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE);
SolrParams customParams = getCustomParams(getDictionaryName(params), params, shardRequest);
SpellingOptions options = new SpellingOptions(tokens, reader, count, onlyMorePopular, extendedResults,
accuracy, customParams);
accuracy, customParams);
SpellingResult spellingResult = spellChecker.getSuggestions(options);
if (spellingResult != null) {
response.add("suggestions", toNamedList(shardRequest, spellingResult, q,
extendedResults, collate));
rb.rsp.add("spellcheck", response);
NamedList suggestions = toNamedList(shardRequest, spellingResult, q, extendedResults, collate);
if (collate) {
addCollationsToResponse(params, spellingResult, rb, q, suggestions);
}
response.add("suggestions", suggestions);
rb.rsp.add("spellcheck", response);
}
} else {
@ -165,6 +167,42 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
}
}
}
@SuppressWarnings("unchecked")
protected void addCollationsToResponse(SolrParams params, SpellingResult spellingResult, ResponseBuilder rb, String q,
NamedList response) {
int maxCollations = params.getInt(SPELLCHECK_MAX_COLLATIONS, 1);
int maxCollationTries = params.getInt(SPELLCHECK_MAX_COLLATION_TRIES, 0);
boolean collationExtendedResults = params.getBool(SPELLCHECK_COLLATE_EXTENDED_RESULTS, false);
boolean shard = params.getBool(ShardParams.IS_SHARD, false);
SpellCheckCollator collator = new SpellCheckCollator();
List<SpellCheckCollation> collations = collator.collate(spellingResult, q, rb, maxCollations, maxCollationTries);
//by sorting here we guarantee a non-distributed request returns all
//results in the same order as a distributed request would,
//even in cases when the internal rank is the same.
Collections.sort(collations);
for (SpellCheckCollation collation : collations) {
if (collationExtendedResults) {
NamedList extendedResult = new NamedList();
extendedResult.add("collationQuery", collation.getCollationQuery());
extendedResult.add("hits", collation.getHits());
extendedResult.add("misspellingsAndCorrections", collation.getMisspellingsAndCorrections());
if(maxCollationTries>0 && shard)
{
extendedResult.add("collationInternalRank", collation.getInternalRank());
}
response.add("collation", extendedResult);
} else {
response.add("collation", collation.getCollationQuery());
if(maxCollationTries>0 && shard)
{
response.add("collationInternalRank", collation.getInternalRank());
}
}
}
}
/**
* For every param that is of the form "spellcheck.[dictionary name].XXXX=YYYY, add
@ -215,6 +253,9 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
boolean extendedResults = params.getBool(SPELLCHECK_EXTENDED_RESULTS, false);
boolean collate = params.getBool(SPELLCHECK_COLLATE, false);
boolean collationExtendedResults = params.getBool(SPELLCHECK_COLLATE_EXTENDED_RESULTS, false);
int maxCollationTries = params.getInt(SPELLCHECK_MAX_COLLATION_TRIES, 0);
int maxCollations = params.getInt(SPELLCHECK_MAX_COLLATIONS, 1);
String origQuery = params.get(SPELLCHECK_Q);
if (origQuery == null) {
@ -255,6 +296,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
Map<String, HashSet<String>> origVsSuggested = new LinkedHashMap<String, HashSet<String>>();
// alternative string -> corresponding SuggestWord object
Map<String, SuggestWord> suggestedVsWord = new HashMap<String, SuggestWord>();
Map<String, SpellCheckCollation> collations = new HashMap<String, SpellCheckCollation>();
int totalNumberShardResponses = 0;
for (ShardRequest sreq : rb.finished) {
@ -304,6 +346,51 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
}
}
}
NamedList suggestions = (NamedList) nl.get("suggestions");
if(suggestions != null) {
List<Object> collationList = suggestions.getAll("collation");
List<Object> collationRankList = suggestions.getAll("collationInternalRank");
int i=0;
if(collationList != null) {
for(Object o : collationList)
{
if(o instanceof String)
{
SpellCheckCollation coll = new SpellCheckCollation();
coll.setCollationQuery((String) o);
if(collationRankList!= null && collationRankList.size()>0)
{
coll.setInternalRank((Integer) collationRankList.get(i));
i++;
}
SpellCheckCollation priorColl = collations.get(coll.getCollationQuery());
if(priorColl != null)
{
coll.setInternalRank(Math.max(coll.getInternalRank(),priorColl.getInternalRank()));
}
collations.put(coll.getCollationQuery(), coll);
} else
{
NamedList expandedCollation = (NamedList) o;
SpellCheckCollation coll = new SpellCheckCollation();
coll.setCollationQuery((String) expandedCollation.get("collationQuery"));
coll.setHits((Integer) expandedCollation.get("hits"));
if(maxCollationTries>0)
{
coll.setInternalRank((Integer) expandedCollation.get("collationInternalRank"));
}
coll.setMisspellingsAndCorrections((NamedList) expandedCollation.get("misspellingsAndCorrections"));
SpellCheckCollation priorColl = collations.get(coll.getCollationQuery());
if(priorColl != null)
{
coll.setHits(coll.getHits() + priorColl.getHits());
coll.setInternalRank(Math.max(coll.getInternalRank(),priorColl.getInternalRank()));
}
collations.put(coll.getCollationQuery(), coll);
}
}
}
}
}
}
}
@ -359,7 +446,28 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
}
NamedList response = new SimpleOrderedMap();
response.add("suggestions", toNamedList(false, result, origQuery, extendedResults, collate));
NamedList suggestions = toNamedList(false, result, origQuery, extendedResults, collate);
if (collate) {
SpellCheckCollation[] sortedCollations = collations.values().toArray(new SpellCheckCollation[collations.size()]);
Arrays.sort(sortedCollations);
int i = 0;
while (i < maxCollations && i < sortedCollations.length) {
SpellCheckCollation collation = sortedCollations[i];
i++;
if (collationExtendedResults) {
NamedList extendedResult = new NamedList();
extendedResult.add("collationQuery", collation.getCollationQuery());
extendedResult.add("hits", collation.getHits());
extendedResult.add("misspellingsAndCorrections", collation
.getMisspellingsAndCorrections());
suggestions.add("collation", extendedResult);
} else {
suggestions.add("collation", collation.getCollationQuery());
}
}
}
response.add("suggestions", suggestions);
rb.rsp.add("spellcheck", response);
}
@ -412,10 +520,6 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
Map<Token, LinkedHashMap<String, Integer>> suggestions = spellingResult.getSuggestions();
boolean hasFreqInfo = spellingResult.hasTokenFrequencyInfo();
boolean isCorrectlySpelled = false;
Map<Token, String> best = null;
if (collate == true){
best = new LinkedHashMap<Token, String>(suggestions.size());
}
int numSuggestions = 0;
for(LinkedHashMap<String, Integer> theSuggestion : suggestions.values())
@ -424,7 +528,8 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
{
numSuggestions++;
}
}
}
// will be flipped to false if any of the suggestions are not in the index and hasFreqInfo is true
if(numSuggestions > 0) {
isCorrectlySpelled = true;
@ -462,9 +567,6 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
suggestionList.add("suggestion", theSuggestions.keySet());
}
if (collate == true && theSuggestions.size()>0){//set aside the best suggestion for this token
best.put(inputToken, theSuggestions.keySet().iterator().next());
}
if (hasFreqInfo) {
isCorrectlySpelled = isCorrectlySpelled && spellingResult.getTokenFrequency(inputToken) > 0;
}
@ -476,24 +578,6 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
} else if(extendedResults && suggestions.size() == 0) { // if the word is misspelled, its added to suggestions with freqinfo
result.add("correctlySpelled", true);
}
if (collate == true){
StringBuilder collation = new StringBuilder(origQuery);
int offset = 0;
for (Iterator<Map.Entry<Token, String>> bestIter = best.entrySet().iterator(); bestIter.hasNext();) {
Map.Entry<Token, String> entry = bestIter.next();
Token tok = entry.getKey();
// we are replacing the query in order, but injected terms might cause illegal offsets due to previous replacements.
if (tok.getPositionIncrement() == 0) continue;
collation.replace(tok.startOffset() + offset,
tok.endOffset() + offset, entry.getValue());
offset += entry.getValue().length() - (tok.endOffset() - tok.startOffset());
}
String collVal = collation.toString();
if (collVal.equals(origQuery) == false) {
LOG.debug("Collation:" + collation);
result.add("collation", collVal);
}
}
return result;
}

View File

@ -0,0 +1,167 @@
package org.apache.solr.spelling;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import org.apache.lucene.analysis.Token;
/**
* <p>
* Given a list of possible Spelling Corrections for multiple mis-spelled words
* in a query, This iterator returns Possible Correction combinations ordered by
* reasonable probability that such a combination will return actual hits if
* re-queried. This implementation simply ranks the Possible Combinations by the
* sum of their component ranks.
* </p>
*
*/
public class PossibilityIterator implements Iterator<RankedSpellPossibility> {
private List<List<SpellCheckCorrection>> possibilityList = new ArrayList<List<SpellCheckCorrection>>();
private List<RankedSpellPossibility> rankedPossibilityList = new ArrayList<RankedSpellPossibility>();
private Iterator<RankedSpellPossibility> rankedPossibilityIterator;
private int correctionIndex[];
private boolean done = false;
@SuppressWarnings("unused")
private PossibilityIterator() {
throw new AssertionError("You shan't go here.");
}
/**
* <p>
* We assume here that the passed-in inner LinkedHashMaps are already sorted
* in order of "Best Possible Correction".
* </p>
*
* @param suggestions
*/
public PossibilityIterator(Map<Token, LinkedHashMap<String, Integer>> suggestions) {
for (Map.Entry<Token, LinkedHashMap<String, Integer>> entry : suggestions.entrySet()) {
Token token = entry.getKey();
List<SpellCheckCorrection> possibleCorrections = new ArrayList<SpellCheckCorrection>();
for (Map.Entry<String, Integer> entry1 : entry.getValue().entrySet()) {
SpellCheckCorrection correction = new SpellCheckCorrection();
correction.setOriginal(token);
correction.setCorrection(entry1.getKey());
correction.setNumberOfOccurences(entry1.getValue());
possibleCorrections.add(correction);
}
possibilityList.add(possibleCorrections);
}
int wrapSize = possibilityList.size();
if (wrapSize == 0) {
done = true;
} else {
correctionIndex = new int[wrapSize];
for (int i = 0; i < wrapSize; i++) {
int suggestSize = possibilityList.get(i).size();
if (suggestSize == 0) {
done = true;
break;
}
correctionIndex[i] = 0;
}
}
while (internalHasNext()) {
rankedPossibilityList.add(internalNext());
}
Collections.sort(rankedPossibilityList);
rankedPossibilityIterator = rankedPossibilityList.iterator();
}
private boolean internalHasNext() {
return !done;
}
/**
* <p>
* This method is converting the independent LinkHashMaps containing various
* (silo'ed) suggestions for each mis-spelled word into individual
* "holistic query corrections", aka. "Spell Check Possibility"
* </p>
* <p>
* Rank here is the sum of each selected term's position in its respective
* LinkedHashMap.
* </p>
*
* @return
*/
private RankedSpellPossibility internalNext() {
if (done) {
throw new NoSuchElementException();
}
List<SpellCheckCorrection> possibleCorrection = new ArrayList<SpellCheckCorrection>();
int rank = 0;
for (int i = 0; i < correctionIndex.length; i++) {
List<SpellCheckCorrection> singleWordPossibilities = possibilityList.get(i);
SpellCheckCorrection singleWordPossibility = singleWordPossibilities.get(correctionIndex[i]);
rank += correctionIndex[i];
if (i == correctionIndex.length - 1) {
correctionIndex[i]++;
if (correctionIndex[i] == singleWordPossibilities.size()) {
correctionIndex[i] = 0;
if (correctionIndex.length == 1) {
done = true;
}
for (int ii = i - 1; ii >= 0; ii--) {
correctionIndex[ii]++;
if (correctionIndex[ii] >= possibilityList.get(ii).size() && ii > 0) {
correctionIndex[ii] = 0;
} else {
break;
}
}
}
}
possibleCorrection.add(singleWordPossibility);
}
if(correctionIndex[0] == possibilityList.get(0).size())
{
done = true;
}
RankedSpellPossibility rsl = new RankedSpellPossibility();
rsl.setCorrections(possibleCorrection);
rsl.setRank(rank);
return rsl;
}
public boolean hasNext() {
return rankedPossibilityIterator.hasNext();
}
public RankedSpellPossibility next() {
return rankedPossibilityIterator.next();
}
public void remove() {
throw new UnsupportedOperationException();
}
}

View File

@ -0,0 +1,44 @@
package org.apache.solr.spelling;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.List;
public class RankedSpellPossibility implements Comparable<RankedSpellPossibility> {
private List<SpellCheckCorrection> corrections;
private int rank;
public int compareTo(RankedSpellPossibility rcl) {
return new Integer(rank).compareTo(rcl.rank);
}
public List<SpellCheckCorrection> getCorrections() {
return corrections;
}
public void setCorrections(List<SpellCheckCorrection> corrections) {
this.corrections = corrections;
}
public int getRank() {
return rank;
}
public void setRank(int rank) {
this.rank = rank;
}
}

View File

@ -0,0 +1,68 @@
package org.apache.solr.spelling;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.solr.common.util.NamedList;
public class SpellCheckCollation implements Comparable<SpellCheckCollation> {
private NamedList<String> misspellingsAndCorrections;
private int hits;
private int internalRank;
private String collationQuery;
public int compareTo(SpellCheckCollation scc) {
int c = new Integer(internalRank).compareTo(scc.internalRank);
if (c == 0) {
return collationQuery.compareTo(scc.collationQuery);
}
return c;
}
public NamedList<String> getMisspellingsAndCorrections() {
return misspellingsAndCorrections;
}
public void setMisspellingsAndCorrections(
NamedList<String> misspellingsAndCorrections) {
this.misspellingsAndCorrections = misspellingsAndCorrections;
}
public int getHits() {
return hits;
}
public void setHits(int hits) {
this.hits = hits;
}
public String getCollationQuery() {
return collationQuery;
}
public void setCollationQuery(String collationQuery) {
this.collationQuery = collationQuery;
}
public int getInternalRank() {
return internalRank;
}
public void setInternalRank(int internalRank) {
this.internalRank = internalRank;
}
}

View File

@ -0,0 +1,142 @@
package org.apache.solr.spelling;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.lucene.analysis.Token;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.handler.component.QueryComponent;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.handler.component.SearchComponent;
import org.apache.solr.handler.component.SearchHandler;
import org.apache.solr.request.SolrQueryRequestBase;
import org.apache.solr.response.SolrQueryResponse;
import org.mortbay.log.Log;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class SpellCheckCollator {
private static final Logger LOG = LoggerFactory.getLogger(SpellCheckCollator.class);
public List<SpellCheckCollation> collate(SpellingResult result, String originalQuery, ResponseBuilder ultimateResponse,
int maxCollations, int maxTries) {
List<SpellCheckCollation> collations = new ArrayList<SpellCheckCollation>();
QueryComponent queryComponent = null;
if (ultimateResponse.components != null) {
for (SearchComponent sc : ultimateResponse.components) {
if (sc instanceof QueryComponent) {
queryComponent = (QueryComponent) sc;
break;
}
}
}
boolean verifyCandidateWithQuery = true;
if (maxTries < 1) {
maxTries = 1;
verifyCandidateWithQuery = false;
}
if (queryComponent == null && verifyCandidateWithQuery) {
LOG.warn("Could not find an instance of QueryComponent. Disabling collation verification against the index.");
maxTries = 1;
verifyCandidateWithQuery = false;
}
int tryNo = 0;
int collNo = 0;
PossibilityIterator possibilityIter = new PossibilityIterator(result.getSuggestions());
while (tryNo < maxTries && collNo < maxCollations && possibilityIter.hasNext()) {
RankedSpellPossibility possibility = possibilityIter.next();
String collationQueryStr = getCollation(originalQuery, possibility.getCorrections());
int hits = 0;
if (verifyCandidateWithQuery) {
tryNo++;
ResponseBuilder checkResponse = new ResponseBuilder();
checkResponse.setQparser(ultimateResponse.getQparser());
checkResponse.setFilters(ultimateResponse.getFilters());
checkResponse.setQueryString(collationQueryStr);
checkResponse.components = Arrays.asList(new SearchComponent[] { queryComponent });
ModifiableSolrParams params = new ModifiableSolrParams(ultimateResponse.req.getParams());
params.remove(CommonParams.Q);
params.add(CommonParams.Q, collationQueryStr);
params.remove(CommonParams.START);
params.remove(CommonParams.ROWS);
params.add(CommonParams.FL, "id");
params.add(CommonParams.ROWS, "0");
//Would rather have found a concrete class to use...
checkResponse.req = new SolrQueryRequestBase(ultimateResponse.req.getCore(), params) { };
checkResponse.rsp = new SolrQueryResponse();
try {
queryComponent.prepare(checkResponse);
queryComponent.process(checkResponse);
hits = (Integer) checkResponse.rsp.getToLog().get("hits");
} catch (Exception e) {
Log.warn("Exception trying to re-query to check if a spell check possibility would return any hits.", e);
}
}
if (hits > 0 || !verifyCandidateWithQuery) {
collNo++;
SpellCheckCollation collation = new SpellCheckCollation();
collation.setCollationQuery(collationQueryStr);
collation.setHits(hits);
collation.setInternalRank(possibility.getRank());
NamedList<String> misspellingsAndCorrections = new NamedList<String>();
for (SpellCheckCorrection corr : possibility.getCorrections()) {
misspellingsAndCorrections.add(corr.getOriginal().toString(), corr.getCorrection());
}
collation.setMisspellingsAndCorrections(misspellingsAndCorrections);
collations.add(collation);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Collation: " + collationQueryStr + (verifyCandidateWithQuery ? (" will return " + hits + " hits.") : ""));
}
}
return collations;
}
private String getCollation(String origQuery,
List<SpellCheckCorrection> corrections) {
StringBuilder collation = new StringBuilder(origQuery);
int offset = 0;
for (SpellCheckCorrection correction : corrections) {
Token tok = correction.getOriginal();
// we are replacing the query in order, but injected terms might cause
// illegal offsets due to previous replacements.
if (tok.getPositionIncrement() == 0)
continue;
collation.replace(tok.startOffset() + offset, tok.endOffset() + offset,
correction.getCorrection());
offset += correction.getCorrection().length()
- (tok.endOffset() - tok.startOffset());
}
return collation.toString();
}
}

View File

@ -0,0 +1,50 @@
package org.apache.solr.spelling;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.Token;
public class SpellCheckCorrection {
private Token original;
private String correction;
private int numberOfOccurences;
public Token getOriginal() {
return original;
}
public void setOriginal(Token original) {
this.original = original;
}
public String getCorrection() {
return correction;
}
public void setCorrection(String correction) {
this.correction = correction;
}
public int getNumberOfOccurences() {
return numberOfOccurences;
}
public void setNumberOfOccurences(int numberOfOccurences) {
this.numberOfOccurences = numberOfOccurences;
}
}

View File

@ -31,7 +31,7 @@ import java.util.Map;
*/
public class SpellCheckResponse {
private boolean correctlySpelled;
private String collation;
private List<Collation> collations;
private List<Suggestion> suggestions = new ArrayList<Suggestion>();
Map<String, Suggestion> suggestionMap = new LinkedHashMap<String, Suggestion>();
@ -45,8 +45,39 @@ public class SpellCheckResponse {
String n = sugg.getName(i);
if ("correctlySpelled".equals(n)) {
correctlySpelled = (Boolean) sugg.getVal(i);
} else if ("collation".equals(n)) {
collation = (String) sugg.getVal(i);
} else if ("collationInternalRank".equals(n)){
//continue;
} else if ("collation".equals(n)) {
List<Object> collationInfo = sugg.getAll(n);
collations = new ArrayList<Collation>(collationInfo.size());
for (Object o : collationInfo) {
if (o instanceof String) {
collations.add(new Collation()
.setCollationQueryString((String) sugg.getVal(i)));
} else if (o instanceof NamedList) {
NamedList expandedCollation = (NamedList) o;
String collationQuery = (String) expandedCollation
.get("collationQuery");
int hits = (Integer) expandedCollation.get("hits");
NamedList<String> misspellingsAndCorrections = (NamedList<String>) expandedCollation
.get("misspellingsAndCorrections");
Collation collation = new Collation();
collation.setCollationQueryString(collationQuery);
collation.setNumberOfHits(hits);
for (int ii = 0; ii < misspellingsAndCorrections.size(); ii++) {
String misspelling = misspellingsAndCorrections.getName(ii);
String correction = misspellingsAndCorrections.getVal(ii);
collation.addMisspellingsAndCorrection(new Correction(
misspelling, correction));
}
collations.add(collation);
} else {
throw new AssertionError(
"Should get Lists of Strings or List of NamedLists here.");
}
}
} else {
Suggestion s = new Suggestion(n, (NamedList<Object>) sugg.getVal(i));
suggestionMap.put(n, s);
@ -77,8 +108,25 @@ public class SpellCheckResponse {
return s.getAlternatives().get(0);
}
/**
* <p>
* Return the first collated query string. For convenience and backwards-compatibility. Use getCollatedResults() for full data.
* </p>
* @return
*/
public String getCollatedResult() {
return collation;
return collations==null || collations.size()==0 ? null : collations.get(0).collationQueryString;
}
/**
* <p>
* Return all collations.
* Will include # of hits and misspelling-to-correction details if "spellcheck.collateExtendedResults was true.
* </p>
* @return
*/
public List<Collation> getCollatedResults() {
return collations;
}
public static class Suggestion {
@ -162,4 +210,63 @@ public class SpellCheckResponse {
}
}
public class Collation {
private String collationQueryString;
private List<Correction> misspellingsAndCorrections = new ArrayList<Correction>();
private long numberOfHits;
public long getNumberOfHits() {
return numberOfHits;
}
public void setNumberOfHits(long numberOfHits) {
this.numberOfHits = numberOfHits;
}
public String getCollationQueryString() {
return collationQueryString;
}
public Collation setCollationQueryString(String collationQueryString) {
this.collationQueryString = collationQueryString;
return this;
}
public List<Correction> getMisspellingsAndCorrections() {
return misspellingsAndCorrections;
}
public Collation addMisspellingsAndCorrection(Correction correction) {
this.misspellingsAndCorrections.add(correction);
return this;
}
}
public class Correction {
private String original;
private String correction;
public Correction(String original, String correction) {
this.original = original;
this.correction = correction;
}
public String getOriginal() {
return original;
}
public void setOriginal(String original) {
this.original = original;
}
public String getCorrection() {
return correction;
}
public void setCorrection(String correction) {
this.correction = correction;
}
}
}

View File

@ -20,15 +20,28 @@ import junit.framework.Assert;
import org.apache.solr.client.solrj.SolrJettyTestBase;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.request.QueryRequest;
import org.apache.solr.client.solrj.response.SpellCheckResponse.Collation;
import org.apache.solr.client.solrj.response.SpellCheckResponse.Correction;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SpellingParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.SearchComponent;
import org.apache.solr.handler.component.SpellCheckComponent;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrRequestHandler;
import org.apache.solr.response.SolrQueryResponse;
import org.junit.BeforeClass;
import org.junit.Test;
import java.util.List;
import static org.junit.Assert.fail;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
/**
@ -103,4 +116,79 @@ public class TestSpellCheckResponse extends SolrJettyTestBase {
// Hmmm... the API for SpellCheckResponse could be nicer:
response.getSuggestions().get(0).getAlternatives().get(0);
}
@Test
public void testSpellCheckCollationResponse() throws Exception {
getSolrServer();
SolrInputDocument doc = new SolrInputDocument();
doc.setField("id", "0");
doc.setField("name", "faith hope and love");
server.add(doc);
doc = new SolrInputDocument();
doc.setField("id", "1");
doc.setField("name", "faith hope and loaves");
server.add(doc);
doc = new SolrInputDocument();
doc.setField("id", "2");
doc.setField("name", "fat hops and loaves");
server.add(doc);
doc = new SolrInputDocument();
doc.setField("id", "3");
doc.setField("name", "faith of homer");
server.add(doc);
doc = new SolrInputDocument();
doc.setField("id", "4");
doc.setField("name", "fat of homer");
server.add(doc);
server.commit(true, true);
//Test Backwards Compatibility
SolrQuery query = new SolrQuery("name:(+fauth +home +loane)");
query.set(CommonParams.QT, "/spell");
query.set("spellcheck", true);
query.set(SpellingParams.SPELLCHECK_BUILD, true);
query.set(SpellingParams.SPELLCHECK_COUNT, 10);
query.set(SpellingParams.SPELLCHECK_COLLATE, true);
QueryRequest request = new QueryRequest(query);
SpellCheckResponse response = request.process(server).getSpellCheckResponse();
response = request.process(server).getSpellCheckResponse();
assertTrue("name:(+faith +homer +loaves)".equals(response.getCollatedResult()));
//Test Expanded Collation Results
query.set(SpellingParams.SPELLCHECK_COLLATE_EXTENDED_RESULTS, true);
query.set(SpellingParams.SPELLCHECK_MAX_COLLATION_TRIES, 5);
query.set(SpellingParams.SPELLCHECK_MAX_COLLATIONS, 2);
request = new QueryRequest(query);
response = request.process(server).getSpellCheckResponse();
assertTrue("name:(+faith +hope +love)".equals(response.getCollatedResult()) || "name:(+faith +hope +loaves)".equals(response.getCollatedResult()));
List<Collation> collations = response.getCollatedResults();
assertTrue(collations.size()==2);
for(Collation collation : collations)
{
assertTrue("name:(+faith +hope +love)".equals(collation.getCollationQueryString()) || "name:(+faith +hope +loaves)".equals(collation.getCollationQueryString()));
assertTrue(collation.getNumberOfHits()==1);
List<Correction> misspellingsAndCorrections = collation.getMisspellingsAndCorrections();
assertTrue(misspellingsAndCorrections.size()==3);
for(Correction correction : misspellingsAndCorrections)
{
if("fauth".equals(correction.getOriginal()))
{
assertTrue("faith".equals(correction.getCorrection()));
} else if("home".equals(correction.getOriginal()))
{
assertTrue("hope".equals(correction.getCorrection()));
} else if("loane".equals(correction.getOriginal()))
{
assertTrue("love".equals(correction.getCorrection()) || "loaves".equals(correction.getCorrection()));
} else
{
fail("Original Word Should have been either fauth, home or loane.");
}
}
}
}
}

View File

@ -0,0 +1,87 @@
package org.apache.solr.handler.component;
import java.io.File;
import org.apache.solr.BaseDistributedSearchTestCase;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.util.AbstractSolrTestCase;
public class DistributedSpellCollatorTest extends BaseDistributedSearchTestCase {
private String saveProp;
@Override
public void setUp() throws Exception {
// this test requires FSDir
saveProp = System.getProperty("solr.directoryFactory");
System.setProperty("solr.directoryFactory", "solr.StandardDirectoryFactory");
super.setUp();
}
@Override
public void tearDown() throws Exception {
super.tearDown();
if (saveProp == null)
System.clearProperty("solr.directoryFactory");
else
System.setProperty("solr.directoryFactory", saveProp);
}
private void q(Object... q) throws Exception {
final ModifiableSolrParams params = new ModifiableSolrParams();
for (int i = 0; i < q.length; i += 2) {
params.add(q[i].toString(), q[i + 1].toString());
}
controlClient.query(params);
// query a random server
params.set("shards", shards);
int which = r.nextInt(clients.size());
SolrServer client = clients.get(which);
client.query(params);
}
@Override
public void doTest() throws Exception {
index(id, "1", "lowerfilt", "The quick red fox jumped over the lazy brown dogs.");
index(id, "2" , "lowerfilt", "The quack rex fox jumped over the lazy brown dogs.");
index(id, "3" , "lowerfilt", "The quote rex fox jumped over the lazy brown dogs.");
index(id, "4" , "lowerfilt", "The quote redo fox jumped over the lazy brown dogs.");
index(id, "5" , "lowerfilt", "The quote redo fox jumped over the lazy brown dogs.");
index(id, "6" , "lowerfilt", "The quote redo fox jumped over the lazy brown dogs.");
index(id, "7" , "lowerfilt", "The quote redo fox jumped over the lazy brown dogs.");
index(id, "8" , "lowerfilt", "The quote redo fox jumped over the lazy brown dogs.");
index(id, "9" , "lowerfilt", "The quote redo fox jumped over the lazy brown dogs.");
index(id, "10", "lowerfilt", "The quote redo fox jumped over the lazy brown dogs.");
index(id, "11", "lowerfilt", "The quote redo fox jumped over the lazy brown dogs.");
index(id, "12", "lowerfilt", "The quote redo fox jumped over the lazy brown dogs.");
index(id, "13", "lowerfilt", "The quote redo fox jumped over the lazy brown dogs.");
index(id, "14", "lowerfilt", "The quote redo fox jumped over the lazy brown dogs.");
index(id, "15", "lowerfilt", "The quote redo fox jumped over the lazy brown dogs.");
commit();
handle.clear();
handle.put("QTime", SKIPVAL);
handle.put("timestamp", SKIPVAL);
handle.put("maxScore", SKIPVAL);
// we care only about the spellcheck results
handle.put("response", SKIP);
q("q", "*:*", SpellCheckComponent.SPELLCHECK_BUILD, "true", "qt", "spellCheckCompRH", "shards.qt", "spellCheckCompRH");
query("q", "lowerfilt:(+quock +reb)", "fl", "id,lowerfilt", "spellcheck", "true", "qt", "spellCheckCompRH", "shards.qt", "spellCheckCompRH", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "10", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "10", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "10", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true");
query("q", "lowerfilt:(+quock +reb)", "fl", "id,lowerfilt", "spellcheck", "true", "qt", "spellCheckCompRH", "shards.qt", "spellCheckCompRH", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "10", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "10", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "10", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "false");
query("q", "lowerfilt:(+quock +reb)", "fl", "id,lowerfilt", "spellcheck", "true", "qt", "spellCheckCompRH", "shards.qt", "spellCheckCompRH", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "10", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "0", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "1", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "false");
// Ensure that each iteration of test uses a fresh Jetty data directory.
// Otherwise we get incorrect # hits
// This probably should be fixed in BaseDistributedSearch in its own issue,
// but I needed this test to pass now...
AbstractSolrTestCase.recurseDelete(testDir);
testDir = new File(System.getProperty("java.io.tmpdir")
+ System.getProperty("file.separator") + getClass().getName() + "-"
+ System.currentTimeMillis());
testDir.mkdirs();
}
}

View File

@ -0,0 +1,226 @@
package org.apache.solr.spelling;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.SearchComponent;
import org.apache.solr.handler.component.SpellCheckComponent;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrRequestHandler;
import org.apache.solr.response.SolrQueryResponse;
import org.junit.BeforeClass;
import org.junit.Test;
public class SpellCheckCollatorTest extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig.xml", "schema.xml");
assertNull(h.validateUpdate(adoc("id", "0", "lowerfilt", "faith hope and love")));
assertNull(h.validateUpdate(adoc("id", "1", "lowerfilt", "faith hope and loaves")));
assertNull(h.validateUpdate(adoc("id", "2", "lowerfilt", "fat hops and loaves")));
assertNull(h.validateUpdate(adoc("id", "3", "lowerfilt", "faith of homer")));
assertNull(h.validateUpdate(adoc("id", "4", "lowerfilt", "fat of homer")));
assertNull(h.validateUpdate(adoc("id", "5", "lowerfilt1", "peace")));
assertNull(h.validateUpdate(commit()));
}
@Test
public void testCollateWithFilter() throws Exception
{
SolrCore core = h.getCore();
SearchComponent speller = core.getSearchComponent("spellcheck");
assertTrue("speller is null and it shouldn't be", speller != null);
ModifiableSolrParams params = new ModifiableSolrParams();
params.add(SpellCheckComponent.COMPONENT_NAME, "true");
params.add(SpellCheckComponent.SPELLCHECK_BUILD, "true");
params.add(SpellCheckComponent.SPELLCHECK_COUNT, "10");
params.add(SpellCheckComponent.SPELLCHECK_COLLATE, "true");
params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "5");
params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "2");
params.add(CommonParams.Q, "lowerfilt:(+fauth +home +loane)");
params.add(CommonParams.FQ, "NOT(id:1)");
//Because a FilterQuery is applied which removes doc id#1 from possible hits, we would
//not want the collations to return us "lowerfilt:(+faith +hope +loaves)" as this only matches doc id#1.
SolrRequestHandler handler = core.getRequestHandler("spellCheckCompRH");
SolrQueryResponse rsp = new SolrQueryResponse();
rsp.add("responseHeader", new SimpleOrderedMap());
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
NamedList values = rsp.getValues();
NamedList spellCheck = (NamedList) values.get("spellcheck");
NamedList suggestions = (NamedList) spellCheck.get("suggestions");
List<String> collations = suggestions.getAll("collation");
assertTrue(collations.size() == 1);
assertTrue(collations.get(0).equals("lowerfilt:(+faith +hope +love)"));
}
@Test
public void testCollateWithMultipleRequestHandlers() throws Exception
{
SolrCore core = h.getCore();
SearchComponent speller = core.getSearchComponent("spellcheck");
assertTrue("speller is null and it shouldn't be", speller != null);
ModifiableSolrParams params = new ModifiableSolrParams();
params.add(SpellCheckComponent.COMPONENT_NAME, "true");
params.add(SpellCheckComponent.SPELLCHECK_DICT, "multipleFields");
params.add(SpellCheckComponent.SPELLCHECK_BUILD, "true");
params.add(SpellCheckComponent.SPELLCHECK_COUNT, "10");
params.add(SpellCheckComponent.SPELLCHECK_COLLATE, "true");
params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "1");
params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "1");
params.add(CommonParams.Q, "peac");
//SpellCheckCompRH has no "qf" defined. It will not find "peace" from "peac" despite it being in the dictionary
//because requrying against this Request Handler results in 0 hits.
SolrRequestHandler handler = core.getRequestHandler("spellCheckCompRH");
SolrQueryResponse rsp = new SolrQueryResponse();
rsp.add("responseHeader", new SimpleOrderedMap());
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
NamedList values = rsp.getValues();
NamedList spellCheck = (NamedList) values.get("spellcheck");
NamedList suggestions = (NamedList) spellCheck.get("suggestions");
String singleCollation = (String) suggestions.get("collation");
assertNull(singleCollation);
//SpellCheckCompRH1 has "lowerfilt1" defined in the "qf" param. It will find "peace" from "peac" because
//requrying field "lowerfilt1" returns the hit.
params.remove(SpellCheckComponent.SPELLCHECK_BUILD);
handler = core.getRequestHandler("spellCheckCompRH1");
rsp = new SolrQueryResponse();
rsp.add("responseHeader", new SimpleOrderedMap());
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
values = rsp.getValues();
spellCheck = (NamedList) values.get("spellcheck");
suggestions = (NamedList) spellCheck.get("suggestions");
singleCollation = (String) suggestions.get("collation");
assertEquals(singleCollation, "peace");
}
@Test
public void testExtendedCollate() throws Exception {
SolrCore core = h.getCore();
SearchComponent speller = core.getSearchComponent("spellcheck");
assertTrue("speller is null and it shouldn't be", speller != null);
ModifiableSolrParams params = new ModifiableSolrParams();
params.add(CommonParams.QT, "spellCheckCompRH");
params.add(CommonParams.Q, "lowerfilt:(+fauth +home +loane)");
params.add(SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true");
params.add(SpellCheckComponent.COMPONENT_NAME, "true");
params.add(SpellCheckComponent.SPELLCHECK_BUILD, "true");
params.add(SpellCheckComponent.SPELLCHECK_COUNT, "10");
params.add(SpellCheckComponent.SPELLCHECK_COLLATE, "true");
// Testing backwards-compatible behavior.
// Returns 1 collation as a single string.
// All words are "correct" per the dictionary, but this collation would
// return no results if tried.
SolrRequestHandler handler = core.getRequestHandler("spellCheckCompRH");
SolrQueryResponse rsp = new SolrQueryResponse();
rsp.add("responseHeader", new SimpleOrderedMap());
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
NamedList values = rsp.getValues();
NamedList spellCheck = (NamedList) values.get("spellcheck");
NamedList suggestions = (NamedList) spellCheck.get("suggestions");
String singleCollation = (String) suggestions.get("collation");
assertEquals("lowerfilt:(+faith +homer +loaves)", singleCollation);
// Testing backwards-compatible response format but will only return a
// collation that would return results.
params.remove(SpellCheckComponent.SPELLCHECK_BUILD);
params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "5");
params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "1");
handler = core.getRequestHandler("spellCheckCompRH");
rsp = new SolrQueryResponse();
rsp.add("responseHeader", new SimpleOrderedMap());
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
values = rsp.getValues();
spellCheck = (NamedList) values.get("spellcheck");
suggestions = (NamedList) spellCheck.get("suggestions");
singleCollation = (String) suggestions.get("collation");
assertEquals("lowerfilt:(+faith +hope +loaves)", singleCollation);
// Testing returning multiple collations if more than one valid
// combination exists.
params.remove(SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES);
params.remove(SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS);
params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "5");
params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "2");
handler = core.getRequestHandler("spellCheckCompRH");
rsp = new SolrQueryResponse();
rsp.add("responseHeader", new SimpleOrderedMap());
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
values = rsp.getValues();
spellCheck = (NamedList) values.get("spellcheck");
suggestions = (NamedList) spellCheck.get("suggestions");
List<String> collations = suggestions.getAll("collation");
assertTrue(collations.size() == 2);
for (String multipleCollation : collations) {
assertTrue(multipleCollation.equals("lowerfilt:(+faith +hope +love)")
|| multipleCollation.equals("lowerfilt:(+faith +hope +loaves)"));
}
// Testing return multiple collations with expanded collation response
// format.
params.add(SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true");
handler = core.getRequestHandler("spellCheckCompRH");
rsp = new SolrQueryResponse();
rsp.add("responseHeader", new SimpleOrderedMap());
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
values = rsp.getValues();
spellCheck = (NamedList) values.get("spellcheck");
suggestions = (NamedList) spellCheck.get("suggestions");
List<NamedList> expandedCollationList = suggestions.getAll("collation");
Set<String> usedcollations = new HashSet<String>();
assertTrue(expandedCollationList.size() == 2);
for (NamedList expandedCollation : expandedCollationList) {
String multipleCollation = (String) expandedCollation.get("collationQuery");
assertTrue(multipleCollation.equals("lowerfilt:(+faith +hope +love)")
|| multipleCollation.equals("lowerfilt:(+faith +hope +loaves)"));
assertTrue(!usedcollations.contains(multipleCollation));
usedcollations.add(multipleCollation);
int hits = (Integer) expandedCollation.get("hits");
assertTrue(hits == 1);
NamedList misspellingsAndCorrections = (NamedList) expandedCollation.get("misspellingsAndCorrections");
assertTrue(misspellingsAndCorrections.size() == 3);
String correctionForFauth = (String) misspellingsAndCorrections.get("fauth");
String correctionForHome = (String) misspellingsAndCorrections.get("home");
String correctionForLoane = (String) misspellingsAndCorrections.get("loane");
assertTrue(correctionForFauth.equals("faith"));
assertTrue(correctionForHome.equals("hope"));
assertTrue(correctionForLoane.equals("love") || correctionForLoane.equals("loaves"));
}
}
}

View File

@ -0,0 +1,117 @@
package org.apache.solr.spelling;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import static org.junit.Assert.*;
import java.util.LinkedHashMap;
import java.util.Map;
import org.apache.lucene.analysis.Token;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.spelling.PossibilityIterator;
import org.junit.BeforeClass;
import org.junit.Test;
public class SpellPossibilityIteratorTest extends SolrTestCaseJ4 {
private static Map<Token, LinkedHashMap<String, Integer>> suggestions = new LinkedHashMap<Token, LinkedHashMap<String, Integer>>();
@BeforeClass
public static void beforeClass() throws Exception {
suggestions.clear();
LinkedHashMap<String, Integer> AYE = new LinkedHashMap<String, Integer>();
AYE.put("I", 0);
AYE.put("II", 0);
AYE.put("III", 0);
AYE.put("IV", 0);
AYE.put("V", 0);
AYE.put("VI", 0);
AYE.put("VII", 0);
AYE.put("VIII", 0);
LinkedHashMap<String, Integer> BEE = new LinkedHashMap<String, Integer>();
BEE.put("alpha", 0);
BEE.put("beta", 0);
BEE.put("gamma", 0);
BEE.put("delta", 0);
BEE.put("epsilon", 0);
BEE.put("zeta", 0);
BEE.put("eta", 0);
BEE.put("theta", 0);
BEE.put("iota", 0);
LinkedHashMap<String, Integer> CEE = new LinkedHashMap<String, Integer>();
CEE.put("one", 0);
CEE.put("two", 0);
CEE.put("three", 0);
CEE.put("four", 0);
CEE.put("five", 0);
CEE.put("six", 0);
CEE.put("seven", 0);
CEE.put("eight", 0);
CEE.put("nine", 0);
CEE.put("ten", 0);
suggestions.put(new Token("AYE", 0, 2), AYE);
suggestions.put(new Token("BEE", 0, 2), BEE);
suggestions.put(new Token("CEE", 0, 2), CEE);
}
@Test
public void testSpellPossibilityIterator() throws Exception {
PossibilityIterator iter = new PossibilityIterator(suggestions);
int count = 0;
while (iter.hasNext()) {
iter.next();
count++;
}
assertTrue(("Three maps (8*9*10) should return 720 iterations but instead returned " + count), count == 720);
suggestions.remove(new Token("CEE", 0, 2));
iter = new PossibilityIterator(suggestions);
count = 0;
while (iter.hasNext()) {
iter.next();
count++;
}
assertTrue(("Two maps (8*9) should return 72 iterations but instead returned " + count), count == 72);
suggestions.remove(new Token("BEE", 0, 2));
iter = new PossibilityIterator(suggestions);
count = 0;
while (iter.hasNext()) {
iter.next();
count++;
}
assertTrue(("One map of 8 should return 8 iterations but instead returned " + count), count == 8);
suggestions.remove(new Token("AYE", 0, 2));
iter = new PossibilityIterator(suggestions);
count = 0;
while (iter.hasNext()) {
iter.next();
count++;
}
assertTrue(("No maps should return 0 iterations but instead returned " + count), count == 0);
}
}

View File

@ -19,8 +19,8 @@
<!-- The Solr schema file. This file should be named "schema.xml" and
should be located where the classloader for the Solr webapp can find it.
This schema is used for testing, and as such has everything and the
kitchen sink thrown in. See example/solr/conf/schema.xml for a
This schema is used for testing, and as such has everything and the
kitchen sink thrown in. See example/solr/conf/schema.xml for a
more concise example.
$Id: schema.xml 382610 2006-03-03 01:43:03Z yonik $
@ -50,7 +50,7 @@
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
@ -100,7 +100,7 @@
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="wdf_preserve" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
@ -353,7 +353,7 @@
<filter name="syn" class="solr.SynonymFilterFactory" synonyms="old_synonyms.txt"/>
</analyzer>
</fieldtype>
<!-- Demonstrates How RemoveDuplicatesTokenFilter makes stemmed
synonyms "better"
-->
@ -378,7 +378,7 @@
</fieldtype>
<fieldType name="uuid" class="solr.UUIDField" />
<!-- Try out some point types -->
<fieldType name="xy" class="solr.PointType" dimension="2" subFieldType="double"/>
<fieldType name="x" class="solr.PointType" dimension="1" subFieldType="double"/>
@ -444,7 +444,7 @@
<field name="test_notv" type="text" termVectors="false"/>
<field name="test_postv" type="text" termVectors="true" termPositions="true"/>
<field name="test_offtv" type="text" termVectors="true" termOffsets="true"/>
<field name="test_posofftv" type="text" termVectors="true"
<field name="test_posofftv" type="text" termVectors="true"
termPositions="true" termOffsets="true"/>
<!-- test highlit field settings -->
@ -463,6 +463,8 @@
<field name="standardtokfilt" type="standardtokfilt" indexed="true" stored="true"/>
<field name="standardfilt" type="standardfilt" indexed="true" stored="true"/>
<field name="lowerfilt" type="lowerfilt" indexed="true" stored="true"/>
<field name="lowerfilt1" type="lowerfilt" indexed="true" stored="true"/>
<field name="lowerfilt1and2" type="lowerfilt" indexed="true" stored="true"/>
<field name="patterntok" type="patterntok" indexed="true" stored="true"/>
<field name="patternreplacefilt" type="patternreplacefilt" indexed="true" stored="true"/>
<field name="porterfilt" type="porterfilt" indexed="true" stored="true"/>
@ -487,14 +489,14 @@
<field name="sku2" type="skutype2" indexed="true" stored="true"/>
<field name="textgap" type="textgap" indexed="true" stored="true"/>
<field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
<field name="multiDefault" type="string" indexed="true" stored="true" default="muLti-Default" multiValued="true"/>
<field name="intDefault" type="int" indexed="true" stored="true" default="42" multiValued="false"/>
<field name="tlong" type="tlong" indexed="true" stored="true" />
<!-- Dynamic field definitions. If a field name is not found, dynamicFields
will be used if the name matches any of the patterns.
RESTRICTION: the glob-like pattern in the name attribute must have
@ -531,22 +533,22 @@
<dynamicField name="*_pl" type="plong" indexed="true" stored="true"/>
<dynamicField name="*_pd" type="pdouble" indexed="true" stored="true"/>
<dynamicField name="*_pdt" type="pdate" indexed="true" stored="true"/>
<dynamicField name="*_sI" type="string" indexed="true" stored="false"/>
<dynamicField name="*_sS" type="string" indexed="false" stored="true"/>
<dynamicField name="t_*" type="text" indexed="true" stored="true"/>
<dynamicField name="tv_*" type="text" indexed="true" stored="true"
<dynamicField name="tv_*" type="text" indexed="true" stored="true"
termVectors="true" termPositions="true" termOffsets="true"/>
<dynamicField name="tv_mv_*" type="text" indexed="true" stored="true" multiValued="true"
termVectors="true" termPositions="true" termOffsets="true"/>
<dynamicField name="*_p" type="xyd" indexed="true" stored="true" multiValued="false"/>
<dynamicField name="*_p" type="xyd" indexed="true" stored="true" multiValued="false"/>
<!-- special fields for dynamic copyField test -->
<dynamicField name="dynamic_*" type="string" indexed="true" stored="true"/>
<dynamicField name="*_dynamic" type="string" indexed="true" stored="true"/>
<!-- for testing to ensure that longer patterns are matched first -->
<dynamicField name="*aa" type="string" indexed="true" stored="true"/>
<dynamicField name="*aaa" type="pint" indexed="false" stored="true"/>
@ -569,24 +571,26 @@
<copyField source="title" dest="title_lettertok"/>
<copyField source="title" dest="text"/>
<copyField source="subject" dest="text"/>
<copyField source="*_t" dest="text"/>
<copyField source="subject" dest="text"/>
<copyField source="id" dest="range_facet_si"/>
<copyField source="id" dest="range_facet_l"/>
<copyField source="id" dest="range_facet_sl"/>
<copyField source="range_facet_f" dest="range_facet_sf"/>
<copyField source="range_facet_f" dest="range_facet_d"/>
<copyField source="range_facet_f" dest="range_facet_sd"/>
<copyField source="lowerfilt1" dest="lowerfilt1and2"/>
<copyField source="lowerfilt" dest="lowerfilt1and2"/>
<copyField source="bday" dest="bday_pdt"/>
<copyField source="a_tdt" dest="a_pdt"/>
<copyField source="*_t" dest="text"/>
<copyField source="id" dest="range_facet_si"/>
<copyField source="id" dest="range_facet_l"/>
<copyField source="id" dest="range_facet_sl"/>
<copyField source="range_facet_f" dest="range_facet_sf"/>
<copyField source="range_facet_f" dest="range_facet_d"/>
<copyField source="range_facet_f" dest="range_facet_sd"/>
<copyField source="bday" dest="bday_pdt"/>
<copyField source="a_tdt" dest="a_pdt"/>
<!-- dynamic destination -->
<copyField source="*_dynamic" dest="dynamic_*"/>
<!-- Similarity is the scoring routine for each document vs a query.
A custom similarity may be specified here, but the default is fine
for most applications.

View File

@ -29,7 +29,7 @@
tests. if you need to test something esoteric please add a new
"solrconfig-your-esoteric-purpose.xml" config file.
Note in particular that this test is used by MinimalSchemaTest so
Note in particular that this test is used by MinimalSchemaTest so
Anything added to this file needs to work correctly even if there
is now uniqueKey or defaultSearch Field.
@ -115,15 +115,15 @@
<updateHandler class="solr.DirectUpdateHandler2">
<!-- autocommit pending docs if certain criteria are met
<autoCommit>
<!-- autocommit pending docs if certain criteria are met
<autoCommit>
<maxDocs>10000</maxDocs>
<maxTime>3600000</maxTime>
<maxTime>3600000</maxTime>
</autoCommit>
-->
<!-- represents a lower bound on the frequency that commits may
occur (in seconds). NOTE: not yet implemented
<commitIntervalLowerBound>0</commitIntervalLowerBound>
-->
@ -342,6 +342,12 @@
<str name="spellcheckIndexDir">spellchecker1</str>
<str name="buildOnCommit">true</str>
</lst>
<lst name="spellchecker">
<str name="name">multipleFields</str>
<str name="field">lowerfilt1and2</str>
<str name="spellcheckIndexDir">spellcheckerMultipleFields</str>
<str name="buildOnCommit">true</str>
</lst>
<!-- Example of using different distance measure -->
<lst name="spellchecker">
<str name="name">jarowinkler</str>
@ -411,8 +417,17 @@
<str>spellcheck</str>
</arr>
</requestHandler>
<requestHandler name="spellCheckCompRH1" class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
<str name="defType">dismax</str>
<str name="qf">lowerfilt1^1</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>
<searchComponent name="tvComponent" class="org.apache.solr.handler.component.TermVectorComponent"/>
<requestHandler name="tvrh" class="org.apache.solr.handler.component.SearchHandler">
@ -502,5 +517,5 @@
</processor>
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
</config>