mirror of https://github.com/apache/lucene.git
SOLR-2010: added richer support for spell checking collations
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1021439 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
98b20d6479
commit
52b19fa5f0
|
@ -289,6 +289,9 @@ New Features
|
|||
to retrieve correction candidates directly from the term dictionary using
|
||||
levenshtein automata. (rmuir)
|
||||
|
||||
* SOLR-2010: Added ability to verify that spell checking collations have
|
||||
actual results in the index. (James Dyer via gsingers)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -81,9 +81,34 @@ public interface SpellingParams {
|
|||
* Take the top suggestion for each token and create a new query from it
|
||||
*/
|
||||
public static final String SPELLCHECK_COLLATE = SPELLCHECK_PREFIX + "collate";
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* The maximum number of collations to return. Default=1. Ignored if "spellcheck.collate" is false.
|
||||
* </p>
|
||||
*/
|
||||
public static final String SPELLCHECK_MAX_COLLATIONS = SPELLCHECK_PREFIX + "maxCollations";
|
||||
/**
|
||||
* <p>
|
||||
* The maximum number of collations to test by querying against the index.
|
||||
* When testing, the collation is substituted for the original query's "q" param. Any "qf"s are retained.
|
||||
* If this is set to zero, does not test for hits before returning collations (returned collations may result in zero hits).
|
||||
* Default=0. Ignored of "spellcheck.collate" is false.
|
||||
* </p>
|
||||
*/
|
||||
public static final String SPELLCHECK_MAX_COLLATION_TRIES = SPELLCHECK_PREFIX + "maxCollationTries";
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Whether to use the Extended Results Format for collations.
|
||||
* Includes "before>after" pairs to easily allow clients to generate messages like "no results for PORK. did you mean POLK?"
|
||||
* Also indicates the # of hits each collation will return on re-query. Default=false, which retains 1.4-compatible output.
|
||||
* </p>
|
||||
*/
|
||||
public static final String SPELLCHECK_COLLATE_EXTENDED_RESULTS = SPELLCHECK_PREFIX + "collateExtendedResults";
|
||||
|
||||
/**
|
||||
* Certain spelling implementations may allow for an accuracy setting.
|
||||
*/
|
||||
public static final String SPELLCHECK_ACCURACY = SPELLCHECK_PREFIX + "accuracy";
|
||||
|
||||
}
|
||||
|
|
|
@ -49,8 +49,7 @@ import org.apache.solr.common.params.CommonParams;
|
|||
import org.apache.solr.common.params.ShardParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.params.SpellingParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.common.util.NamedList;import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.SolrEventListener;
|
||||
import org.apache.solr.core.SolrResourceLoader;
|
||||
|
@ -151,12 +150,15 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
|||
float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE);
|
||||
SolrParams customParams = getCustomParams(getDictionaryName(params), params, shardRequest);
|
||||
SpellingOptions options = new SpellingOptions(tokens, reader, count, onlyMorePopular, extendedResults,
|
||||
accuracy, customParams);
|
||||
accuracy, customParams);
|
||||
SpellingResult spellingResult = spellChecker.getSuggestions(options);
|
||||
if (spellingResult != null) {
|
||||
response.add("suggestions", toNamedList(shardRequest, spellingResult, q,
|
||||
extendedResults, collate));
|
||||
rb.rsp.add("spellcheck", response);
|
||||
NamedList suggestions = toNamedList(shardRequest, spellingResult, q, extendedResults, collate);
|
||||
if (collate) {
|
||||
addCollationsToResponse(params, spellingResult, rb, q, suggestions);
|
||||
}
|
||||
response.add("suggestions", suggestions);
|
||||
rb.rsp.add("spellcheck", response);
|
||||
}
|
||||
|
||||
} else {
|
||||
|
@ -165,6 +167,42 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
protected void addCollationsToResponse(SolrParams params, SpellingResult spellingResult, ResponseBuilder rb, String q,
|
||||
NamedList response) {
|
||||
int maxCollations = params.getInt(SPELLCHECK_MAX_COLLATIONS, 1);
|
||||
int maxCollationTries = params.getInt(SPELLCHECK_MAX_COLLATION_TRIES, 0);
|
||||
boolean collationExtendedResults = params.getBool(SPELLCHECK_COLLATE_EXTENDED_RESULTS, false);
|
||||
boolean shard = params.getBool(ShardParams.IS_SHARD, false);
|
||||
|
||||
SpellCheckCollator collator = new SpellCheckCollator();
|
||||
List<SpellCheckCollation> collations = collator.collate(spellingResult, q, rb, maxCollations, maxCollationTries);
|
||||
//by sorting here we guarantee a non-distributed request returns all
|
||||
//results in the same order as a distributed request would,
|
||||
//even in cases when the internal rank is the same.
|
||||
Collections.sort(collations);
|
||||
|
||||
for (SpellCheckCollation collation : collations) {
|
||||
if (collationExtendedResults) {
|
||||
NamedList extendedResult = new NamedList();
|
||||
extendedResult.add("collationQuery", collation.getCollationQuery());
|
||||
extendedResult.add("hits", collation.getHits());
|
||||
extendedResult.add("misspellingsAndCorrections", collation.getMisspellingsAndCorrections());
|
||||
if(maxCollationTries>0 && shard)
|
||||
{
|
||||
extendedResult.add("collationInternalRank", collation.getInternalRank());
|
||||
}
|
||||
response.add("collation", extendedResult);
|
||||
} else {
|
||||
response.add("collation", collation.getCollationQuery());
|
||||
if(maxCollationTries>0 && shard)
|
||||
{
|
||||
response.add("collationInternalRank", collation.getInternalRank());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* For every param that is of the form "spellcheck.[dictionary name].XXXX=YYYY, add
|
||||
|
@ -215,6 +253,9 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
|||
|
||||
boolean extendedResults = params.getBool(SPELLCHECK_EXTENDED_RESULTS, false);
|
||||
boolean collate = params.getBool(SPELLCHECK_COLLATE, false);
|
||||
boolean collationExtendedResults = params.getBool(SPELLCHECK_COLLATE_EXTENDED_RESULTS, false);
|
||||
int maxCollationTries = params.getInt(SPELLCHECK_MAX_COLLATION_TRIES, 0);
|
||||
int maxCollations = params.getInt(SPELLCHECK_MAX_COLLATIONS, 1);
|
||||
|
||||
String origQuery = params.get(SPELLCHECK_Q);
|
||||
if (origQuery == null) {
|
||||
|
@ -255,6 +296,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
|||
Map<String, HashSet<String>> origVsSuggested = new LinkedHashMap<String, HashSet<String>>();
|
||||
// alternative string -> corresponding SuggestWord object
|
||||
Map<String, SuggestWord> suggestedVsWord = new HashMap<String, SuggestWord>();
|
||||
Map<String, SpellCheckCollation> collations = new HashMap<String, SpellCheckCollation>();
|
||||
|
||||
int totalNumberShardResponses = 0;
|
||||
for (ShardRequest sreq : rb.finished) {
|
||||
|
@ -304,6 +346,51 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
|||
}
|
||||
}
|
||||
}
|
||||
NamedList suggestions = (NamedList) nl.get("suggestions");
|
||||
if(suggestions != null) {
|
||||
List<Object> collationList = suggestions.getAll("collation");
|
||||
List<Object> collationRankList = suggestions.getAll("collationInternalRank");
|
||||
int i=0;
|
||||
if(collationList != null) {
|
||||
for(Object o : collationList)
|
||||
{
|
||||
if(o instanceof String)
|
||||
{
|
||||
SpellCheckCollation coll = new SpellCheckCollation();
|
||||
coll.setCollationQuery((String) o);
|
||||
if(collationRankList!= null && collationRankList.size()>0)
|
||||
{
|
||||
coll.setInternalRank((Integer) collationRankList.get(i));
|
||||
i++;
|
||||
}
|
||||
SpellCheckCollation priorColl = collations.get(coll.getCollationQuery());
|
||||
if(priorColl != null)
|
||||
{
|
||||
coll.setInternalRank(Math.max(coll.getInternalRank(),priorColl.getInternalRank()));
|
||||
}
|
||||
collations.put(coll.getCollationQuery(), coll);
|
||||
} else
|
||||
{
|
||||
NamedList expandedCollation = (NamedList) o;
|
||||
SpellCheckCollation coll = new SpellCheckCollation();
|
||||
coll.setCollationQuery((String) expandedCollation.get("collationQuery"));
|
||||
coll.setHits((Integer) expandedCollation.get("hits"));
|
||||
if(maxCollationTries>0)
|
||||
{
|
||||
coll.setInternalRank((Integer) expandedCollation.get("collationInternalRank"));
|
||||
}
|
||||
coll.setMisspellingsAndCorrections((NamedList) expandedCollation.get("misspellingsAndCorrections"));
|
||||
SpellCheckCollation priorColl = collations.get(coll.getCollationQuery());
|
||||
if(priorColl != null)
|
||||
{
|
||||
coll.setHits(coll.getHits() + priorColl.getHits());
|
||||
coll.setInternalRank(Math.max(coll.getInternalRank(),priorColl.getInternalRank()));
|
||||
}
|
||||
collations.put(coll.getCollationQuery(), coll);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -359,7 +446,28 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
|||
}
|
||||
|
||||
NamedList response = new SimpleOrderedMap();
|
||||
response.add("suggestions", toNamedList(false, result, origQuery, extendedResults, collate));
|
||||
NamedList suggestions = toNamedList(false, result, origQuery, extendedResults, collate);
|
||||
if (collate) {
|
||||
SpellCheckCollation[] sortedCollations = collations.values().toArray(new SpellCheckCollation[collations.size()]);
|
||||
Arrays.sort(sortedCollations);
|
||||
int i = 0;
|
||||
while (i < maxCollations && i < sortedCollations.length) {
|
||||
SpellCheckCollation collation = sortedCollations[i];
|
||||
i++;
|
||||
if (collationExtendedResults) {
|
||||
NamedList extendedResult = new NamedList();
|
||||
extendedResult.add("collationQuery", collation.getCollationQuery());
|
||||
extendedResult.add("hits", collation.getHits());
|
||||
extendedResult.add("misspellingsAndCorrections", collation
|
||||
.getMisspellingsAndCorrections());
|
||||
suggestions.add("collation", extendedResult);
|
||||
} else {
|
||||
suggestions.add("collation", collation.getCollationQuery());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
response.add("suggestions", suggestions);
|
||||
rb.rsp.add("spellcheck", response);
|
||||
}
|
||||
|
||||
|
@ -412,10 +520,6 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
|||
Map<Token, LinkedHashMap<String, Integer>> suggestions = spellingResult.getSuggestions();
|
||||
boolean hasFreqInfo = spellingResult.hasTokenFrequencyInfo();
|
||||
boolean isCorrectlySpelled = false;
|
||||
Map<Token, String> best = null;
|
||||
if (collate == true){
|
||||
best = new LinkedHashMap<Token, String>(suggestions.size());
|
||||
}
|
||||
|
||||
int numSuggestions = 0;
|
||||
for(LinkedHashMap<String, Integer> theSuggestion : suggestions.values())
|
||||
|
@ -424,7 +528,8 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
|||
{
|
||||
numSuggestions++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// will be flipped to false if any of the suggestions are not in the index and hasFreqInfo is true
|
||||
if(numSuggestions > 0) {
|
||||
isCorrectlySpelled = true;
|
||||
|
@ -462,9 +567,6 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
|||
suggestionList.add("suggestion", theSuggestions.keySet());
|
||||
}
|
||||
|
||||
if (collate == true && theSuggestions.size()>0){//set aside the best suggestion for this token
|
||||
best.put(inputToken, theSuggestions.keySet().iterator().next());
|
||||
}
|
||||
if (hasFreqInfo) {
|
||||
isCorrectlySpelled = isCorrectlySpelled && spellingResult.getTokenFrequency(inputToken) > 0;
|
||||
}
|
||||
|
@ -476,24 +578,6 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
|||
} else if(extendedResults && suggestions.size() == 0) { // if the word is misspelled, its added to suggestions with freqinfo
|
||||
result.add("correctlySpelled", true);
|
||||
}
|
||||
if (collate == true){
|
||||
StringBuilder collation = new StringBuilder(origQuery);
|
||||
int offset = 0;
|
||||
for (Iterator<Map.Entry<Token, String>> bestIter = best.entrySet().iterator(); bestIter.hasNext();) {
|
||||
Map.Entry<Token, String> entry = bestIter.next();
|
||||
Token tok = entry.getKey();
|
||||
// we are replacing the query in order, but injected terms might cause illegal offsets due to previous replacements.
|
||||
if (tok.getPositionIncrement() == 0) continue;
|
||||
collation.replace(tok.startOffset() + offset,
|
||||
tok.endOffset() + offset, entry.getValue());
|
||||
offset += entry.getValue().length() - (tok.endOffset() - tok.startOffset());
|
||||
}
|
||||
String collVal = collation.toString();
|
||||
if (collVal.equals(origQuery) == false) {
|
||||
LOG.debug("Collation:" + collation);
|
||||
result.add("collation", collVal);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,167 @@
|
|||
package org.apache.solr.spelling;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.NoSuchElementException;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Given a list of possible Spelling Corrections for multiple mis-spelled words
|
||||
* in a query, This iterator returns Possible Correction combinations ordered by
|
||||
* reasonable probability that such a combination will return actual hits if
|
||||
* re-queried. This implementation simply ranks the Possible Combinations by the
|
||||
* sum of their component ranks.
|
||||
* </p>
|
||||
*
|
||||
*/
|
||||
public class PossibilityIterator implements Iterator<RankedSpellPossibility> {
|
||||
private List<List<SpellCheckCorrection>> possibilityList = new ArrayList<List<SpellCheckCorrection>>();
|
||||
private List<RankedSpellPossibility> rankedPossibilityList = new ArrayList<RankedSpellPossibility>();
|
||||
private Iterator<RankedSpellPossibility> rankedPossibilityIterator;
|
||||
private int correctionIndex[];
|
||||
private boolean done = false;
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
private PossibilityIterator() {
|
||||
throw new AssertionError("You shan't go here.");
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* We assume here that the passed-in inner LinkedHashMaps are already sorted
|
||||
* in order of "Best Possible Correction".
|
||||
* </p>
|
||||
*
|
||||
* @param suggestions
|
||||
*/
|
||||
public PossibilityIterator(Map<Token, LinkedHashMap<String, Integer>> suggestions) {
|
||||
for (Map.Entry<Token, LinkedHashMap<String, Integer>> entry : suggestions.entrySet()) {
|
||||
Token token = entry.getKey();
|
||||
List<SpellCheckCorrection> possibleCorrections = new ArrayList<SpellCheckCorrection>();
|
||||
for (Map.Entry<String, Integer> entry1 : entry.getValue().entrySet()) {
|
||||
SpellCheckCorrection correction = new SpellCheckCorrection();
|
||||
correction.setOriginal(token);
|
||||
correction.setCorrection(entry1.getKey());
|
||||
correction.setNumberOfOccurences(entry1.getValue());
|
||||
possibleCorrections.add(correction);
|
||||
}
|
||||
possibilityList.add(possibleCorrections);
|
||||
}
|
||||
|
||||
int wrapSize = possibilityList.size();
|
||||
if (wrapSize == 0) {
|
||||
done = true;
|
||||
} else {
|
||||
correctionIndex = new int[wrapSize];
|
||||
for (int i = 0; i < wrapSize; i++) {
|
||||
int suggestSize = possibilityList.get(i).size();
|
||||
if (suggestSize == 0) {
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
correctionIndex[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
while (internalHasNext()) {
|
||||
rankedPossibilityList.add(internalNext());
|
||||
}
|
||||
Collections.sort(rankedPossibilityList);
|
||||
rankedPossibilityIterator = rankedPossibilityList.iterator();
|
||||
}
|
||||
|
||||
private boolean internalHasNext() {
|
||||
return !done;
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* This method is converting the independent LinkHashMaps containing various
|
||||
* (silo'ed) suggestions for each mis-spelled word into individual
|
||||
* "holistic query corrections", aka. "Spell Check Possibility"
|
||||
* </p>
|
||||
* <p>
|
||||
* Rank here is the sum of each selected term's position in its respective
|
||||
* LinkedHashMap.
|
||||
* </p>
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
private RankedSpellPossibility internalNext() {
|
||||
if (done) {
|
||||
throw new NoSuchElementException();
|
||||
}
|
||||
|
||||
List<SpellCheckCorrection> possibleCorrection = new ArrayList<SpellCheckCorrection>();
|
||||
int rank = 0;
|
||||
for (int i = 0; i < correctionIndex.length; i++) {
|
||||
List<SpellCheckCorrection> singleWordPossibilities = possibilityList.get(i);
|
||||
SpellCheckCorrection singleWordPossibility = singleWordPossibilities.get(correctionIndex[i]);
|
||||
rank += correctionIndex[i];
|
||||
|
||||
if (i == correctionIndex.length - 1) {
|
||||
correctionIndex[i]++;
|
||||
if (correctionIndex[i] == singleWordPossibilities.size()) {
|
||||
correctionIndex[i] = 0;
|
||||
if (correctionIndex.length == 1) {
|
||||
done = true;
|
||||
}
|
||||
for (int ii = i - 1; ii >= 0; ii--) {
|
||||
correctionIndex[ii]++;
|
||||
if (correctionIndex[ii] >= possibilityList.get(ii).size() && ii > 0) {
|
||||
correctionIndex[ii] = 0;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
possibleCorrection.add(singleWordPossibility);
|
||||
}
|
||||
|
||||
if(correctionIndex[0] == possibilityList.get(0).size())
|
||||
{
|
||||
done = true;
|
||||
}
|
||||
|
||||
RankedSpellPossibility rsl = new RankedSpellPossibility();
|
||||
rsl.setCorrections(possibleCorrection);
|
||||
rsl.setRank(rank);
|
||||
return rsl;
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return rankedPossibilityIterator.hasNext();
|
||||
}
|
||||
|
||||
public RankedSpellPossibility next() {
|
||||
return rankedPossibilityIterator.next();
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
package org.apache.solr.spelling;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class RankedSpellPossibility implements Comparable<RankedSpellPossibility> {
|
||||
private List<SpellCheckCorrection> corrections;
|
||||
private int rank;
|
||||
|
||||
public int compareTo(RankedSpellPossibility rcl) {
|
||||
return new Integer(rank).compareTo(rcl.rank);
|
||||
}
|
||||
|
||||
public List<SpellCheckCorrection> getCorrections() {
|
||||
return corrections;
|
||||
}
|
||||
|
||||
public void setCorrections(List<SpellCheckCorrection> corrections) {
|
||||
this.corrections = corrections;
|
||||
}
|
||||
|
||||
public int getRank() {
|
||||
return rank;
|
||||
}
|
||||
|
||||
public void setRank(int rank) {
|
||||
this.rank = rank;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,68 @@
|
|||
package org.apache.solr.spelling;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
|
||||
public class SpellCheckCollation implements Comparable<SpellCheckCollation> {
|
||||
private NamedList<String> misspellingsAndCorrections;
|
||||
private int hits;
|
||||
private int internalRank;
|
||||
private String collationQuery;
|
||||
|
||||
public int compareTo(SpellCheckCollation scc) {
|
||||
int c = new Integer(internalRank).compareTo(scc.internalRank);
|
||||
if (c == 0) {
|
||||
return collationQuery.compareTo(scc.collationQuery);
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
public NamedList<String> getMisspellingsAndCorrections() {
|
||||
return misspellingsAndCorrections;
|
||||
}
|
||||
|
||||
public void setMisspellingsAndCorrections(
|
||||
NamedList<String> misspellingsAndCorrections) {
|
||||
this.misspellingsAndCorrections = misspellingsAndCorrections;
|
||||
}
|
||||
|
||||
public int getHits() {
|
||||
return hits;
|
||||
}
|
||||
|
||||
public void setHits(int hits) {
|
||||
this.hits = hits;
|
||||
}
|
||||
|
||||
public String getCollationQuery() {
|
||||
return collationQuery;
|
||||
}
|
||||
|
||||
public void setCollationQuery(String collationQuery) {
|
||||
this.collationQuery = collationQuery;
|
||||
}
|
||||
|
||||
public int getInternalRank() {
|
||||
return internalRank;
|
||||
}
|
||||
|
||||
public void setInternalRank(int internalRank) {
|
||||
this.internalRank = internalRank;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,142 @@
|
|||
package org.apache.solr.spelling;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.handler.component.QueryComponent;
|
||||
import org.apache.solr.handler.component.ResponseBuilder;
|
||||
import org.apache.solr.handler.component.SearchComponent;
|
||||
import org.apache.solr.handler.component.SearchHandler;
|
||||
import org.apache.solr.request.SolrQueryRequestBase;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.mortbay.log.Log;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class SpellCheckCollator {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(SpellCheckCollator.class);
|
||||
|
||||
public List<SpellCheckCollation> collate(SpellingResult result, String originalQuery, ResponseBuilder ultimateResponse,
|
||||
int maxCollations, int maxTries) {
|
||||
List<SpellCheckCollation> collations = new ArrayList<SpellCheckCollation>();
|
||||
|
||||
QueryComponent queryComponent = null;
|
||||
if (ultimateResponse.components != null) {
|
||||
for (SearchComponent sc : ultimateResponse.components) {
|
||||
if (sc instanceof QueryComponent) {
|
||||
queryComponent = (QueryComponent) sc;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
boolean verifyCandidateWithQuery = true;
|
||||
if (maxTries < 1) {
|
||||
maxTries = 1;
|
||||
verifyCandidateWithQuery = false;
|
||||
}
|
||||
if (queryComponent == null && verifyCandidateWithQuery) {
|
||||
LOG.warn("Could not find an instance of QueryComponent. Disabling collation verification against the index.");
|
||||
maxTries = 1;
|
||||
verifyCandidateWithQuery = false;
|
||||
}
|
||||
|
||||
int tryNo = 0;
|
||||
int collNo = 0;
|
||||
PossibilityIterator possibilityIter = new PossibilityIterator(result.getSuggestions());
|
||||
while (tryNo < maxTries && collNo < maxCollations && possibilityIter.hasNext()) {
|
||||
|
||||
RankedSpellPossibility possibility = possibilityIter.next();
|
||||
String collationQueryStr = getCollation(originalQuery, possibility.getCorrections());
|
||||
int hits = 0;
|
||||
|
||||
if (verifyCandidateWithQuery) {
|
||||
tryNo++;
|
||||
|
||||
ResponseBuilder checkResponse = new ResponseBuilder();
|
||||
checkResponse.setQparser(ultimateResponse.getQparser());
|
||||
checkResponse.setFilters(ultimateResponse.getFilters());
|
||||
checkResponse.setQueryString(collationQueryStr);
|
||||
checkResponse.components = Arrays.asList(new SearchComponent[] { queryComponent });
|
||||
|
||||
ModifiableSolrParams params = new ModifiableSolrParams(ultimateResponse.req.getParams());
|
||||
params.remove(CommonParams.Q);
|
||||
params.add(CommonParams.Q, collationQueryStr);
|
||||
params.remove(CommonParams.START);
|
||||
params.remove(CommonParams.ROWS);
|
||||
params.add(CommonParams.FL, "id");
|
||||
params.add(CommonParams.ROWS, "0");
|
||||
//Would rather have found a concrete class to use...
|
||||
checkResponse.req = new SolrQueryRequestBase(ultimateResponse.req.getCore(), params) { };
|
||||
checkResponse.rsp = new SolrQueryResponse();
|
||||
|
||||
try {
|
||||
queryComponent.prepare(checkResponse);
|
||||
queryComponent.process(checkResponse);
|
||||
hits = (Integer) checkResponse.rsp.getToLog().get("hits");
|
||||
} catch (Exception e) {
|
||||
Log.warn("Exception trying to re-query to check if a spell check possibility would return any hits.", e);
|
||||
}
|
||||
}
|
||||
if (hits > 0 || !verifyCandidateWithQuery) {
|
||||
collNo++;
|
||||
SpellCheckCollation collation = new SpellCheckCollation();
|
||||
collation.setCollationQuery(collationQueryStr);
|
||||
collation.setHits(hits);
|
||||
collation.setInternalRank(possibility.getRank());
|
||||
|
||||
NamedList<String> misspellingsAndCorrections = new NamedList<String>();
|
||||
for (SpellCheckCorrection corr : possibility.getCorrections()) {
|
||||
misspellingsAndCorrections.add(corr.getOriginal().toString(), corr.getCorrection());
|
||||
}
|
||||
collation.setMisspellingsAndCorrections(misspellingsAndCorrections);
|
||||
collations.add(collation);
|
||||
}
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Collation: " + collationQueryStr + (verifyCandidateWithQuery ? (" will return " + hits + " hits.") : ""));
|
||||
}
|
||||
}
|
||||
return collations;
|
||||
}
|
||||
|
||||
private String getCollation(String origQuery,
|
||||
List<SpellCheckCorrection> corrections) {
|
||||
StringBuilder collation = new StringBuilder(origQuery);
|
||||
int offset = 0;
|
||||
for (SpellCheckCorrection correction : corrections) {
|
||||
Token tok = correction.getOriginal();
|
||||
// we are replacing the query in order, but injected terms might cause
|
||||
// illegal offsets due to previous replacements.
|
||||
if (tok.getPositionIncrement() == 0)
|
||||
continue;
|
||||
collation.replace(tok.startOffset() + offset, tok.endOffset() + offset,
|
||||
correction.getCorrection());
|
||||
offset += correction.getCorrection().length()
|
||||
- (tok.endOffset() - tok.startOffset());
|
||||
}
|
||||
return collation.toString();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
package org.apache.solr.spelling;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
|
||||
public class SpellCheckCorrection {
|
||||
private Token original;
|
||||
private String correction;
|
||||
private int numberOfOccurences;
|
||||
|
||||
public Token getOriginal() {
|
||||
return original;
|
||||
}
|
||||
|
||||
public void setOriginal(Token original) {
|
||||
this.original = original;
|
||||
}
|
||||
|
||||
public String getCorrection() {
|
||||
return correction;
|
||||
}
|
||||
|
||||
public void setCorrection(String correction) {
|
||||
this.correction = correction;
|
||||
}
|
||||
|
||||
public int getNumberOfOccurences() {
|
||||
return numberOfOccurences;
|
||||
}
|
||||
|
||||
public void setNumberOfOccurences(int numberOfOccurences) {
|
||||
this.numberOfOccurences = numberOfOccurences;
|
||||
}
|
||||
|
||||
}
|
|
@ -31,7 +31,7 @@ import java.util.Map;
|
|||
*/
|
||||
public class SpellCheckResponse {
|
||||
private boolean correctlySpelled;
|
||||
private String collation;
|
||||
private List<Collation> collations;
|
||||
private List<Suggestion> suggestions = new ArrayList<Suggestion>();
|
||||
Map<String, Suggestion> suggestionMap = new LinkedHashMap<String, Suggestion>();
|
||||
|
||||
|
@ -45,8 +45,39 @@ public class SpellCheckResponse {
|
|||
String n = sugg.getName(i);
|
||||
if ("correctlySpelled".equals(n)) {
|
||||
correctlySpelled = (Boolean) sugg.getVal(i);
|
||||
} else if ("collation".equals(n)) {
|
||||
collation = (String) sugg.getVal(i);
|
||||
} else if ("collationInternalRank".equals(n)){
|
||||
//continue;
|
||||
} else if ("collation".equals(n)) {
|
||||
List<Object> collationInfo = sugg.getAll(n);
|
||||
collations = new ArrayList<Collation>(collationInfo.size());
|
||||
for (Object o : collationInfo) {
|
||||
if (o instanceof String) {
|
||||
collations.add(new Collation()
|
||||
.setCollationQueryString((String) sugg.getVal(i)));
|
||||
} else if (o instanceof NamedList) {
|
||||
NamedList expandedCollation = (NamedList) o;
|
||||
String collationQuery = (String) expandedCollation
|
||||
.get("collationQuery");
|
||||
int hits = (Integer) expandedCollation.get("hits");
|
||||
NamedList<String> misspellingsAndCorrections = (NamedList<String>) expandedCollation
|
||||
.get("misspellingsAndCorrections");
|
||||
|
||||
Collation collation = new Collation();
|
||||
collation.setCollationQueryString(collationQuery);
|
||||
collation.setNumberOfHits(hits);
|
||||
|
||||
for (int ii = 0; ii < misspellingsAndCorrections.size(); ii++) {
|
||||
String misspelling = misspellingsAndCorrections.getName(ii);
|
||||
String correction = misspellingsAndCorrections.getVal(ii);
|
||||
collation.addMisspellingsAndCorrection(new Correction(
|
||||
misspelling, correction));
|
||||
}
|
||||
collations.add(collation);
|
||||
} else {
|
||||
throw new AssertionError(
|
||||
"Should get Lists of Strings or List of NamedLists here.");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Suggestion s = new Suggestion(n, (NamedList<Object>) sugg.getVal(i));
|
||||
suggestionMap.put(n, s);
|
||||
|
@ -77,8 +108,25 @@ public class SpellCheckResponse {
|
|||
return s.getAlternatives().get(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Return the first collated query string. For convenience and backwards-compatibility. Use getCollatedResults() for full data.
|
||||
* </p>
|
||||
* @return
|
||||
*/
|
||||
public String getCollatedResult() {
|
||||
return collation;
|
||||
return collations==null || collations.size()==0 ? null : collations.get(0).collationQueryString;
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Return all collations.
|
||||
* Will include # of hits and misspelling-to-correction details if "spellcheck.collateExtendedResults was true.
|
||||
* </p>
|
||||
* @return
|
||||
*/
|
||||
public List<Collation> getCollatedResults() {
|
||||
return collations;
|
||||
}
|
||||
|
||||
public static class Suggestion {
|
||||
|
@ -162,4 +210,63 @@ public class SpellCheckResponse {
|
|||
}
|
||||
|
||||
}
|
||||
|
||||
public class Collation {
|
||||
private String collationQueryString;
|
||||
private List<Correction> misspellingsAndCorrections = new ArrayList<Correction>();
|
||||
private long numberOfHits;
|
||||
|
||||
public long getNumberOfHits() {
|
||||
return numberOfHits;
|
||||
}
|
||||
|
||||
public void setNumberOfHits(long numberOfHits) {
|
||||
this.numberOfHits = numberOfHits;
|
||||
}
|
||||
|
||||
public String getCollationQueryString() {
|
||||
return collationQueryString;
|
||||
}
|
||||
|
||||
public Collation setCollationQueryString(String collationQueryString) {
|
||||
this.collationQueryString = collationQueryString;
|
||||
return this;
|
||||
}
|
||||
|
||||
public List<Correction> getMisspellingsAndCorrections() {
|
||||
return misspellingsAndCorrections;
|
||||
}
|
||||
|
||||
public Collation addMisspellingsAndCorrection(Correction correction) {
|
||||
this.misspellingsAndCorrections.add(correction);
|
||||
return this;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public class Correction {
|
||||
private String original;
|
||||
private String correction;
|
||||
|
||||
public Correction(String original, String correction) {
|
||||
this.original = original;
|
||||
this.correction = correction;
|
||||
}
|
||||
|
||||
public String getOriginal() {
|
||||
return original;
|
||||
}
|
||||
|
||||
public void setOriginal(String original) {
|
||||
this.original = original;
|
||||
}
|
||||
|
||||
public String getCorrection() {
|
||||
return correction;
|
||||
}
|
||||
|
||||
public void setCorrection(String correction) {
|
||||
this.correction = correction;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,15 +20,28 @@ import junit.framework.Assert;
|
|||
import org.apache.solr.client.solrj.SolrJettyTestBase;
|
||||
import org.apache.solr.client.solrj.SolrQuery;
|
||||
import org.apache.solr.client.solrj.request.QueryRequest;
|
||||
import org.apache.solr.client.solrj.response.SpellCheckResponse.Collation;
|
||||
import org.apache.solr.client.solrj.response.SpellCheckResponse.Correction;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.params.SpellingParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.handler.component.SearchComponent;
|
||||
import org.apache.solr.handler.component.SpellCheckComponent;
|
||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||
import org.apache.solr.request.SolrRequestHandler;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.Assert.fail;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
/**
|
||||
|
@ -103,4 +116,79 @@ public class TestSpellCheckResponse extends SolrJettyTestBase {
|
|||
// Hmmm... the API for SpellCheckResponse could be nicer:
|
||||
response.getSuggestions().get(0).getAlternatives().get(0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSpellCheckCollationResponse() throws Exception {
|
||||
getSolrServer();
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
doc.setField("id", "0");
|
||||
doc.setField("name", "faith hope and love");
|
||||
server.add(doc);
|
||||
doc = new SolrInputDocument();
|
||||
doc.setField("id", "1");
|
||||
doc.setField("name", "faith hope and loaves");
|
||||
server.add(doc);
|
||||
doc = new SolrInputDocument();
|
||||
doc.setField("id", "2");
|
||||
doc.setField("name", "fat hops and loaves");
|
||||
server.add(doc);
|
||||
doc = new SolrInputDocument();
|
||||
doc.setField("id", "3");
|
||||
doc.setField("name", "faith of homer");
|
||||
server.add(doc);
|
||||
doc = new SolrInputDocument();
|
||||
doc.setField("id", "4");
|
||||
doc.setField("name", "fat of homer");
|
||||
server.add(doc);
|
||||
server.commit(true, true);
|
||||
|
||||
//Test Backwards Compatibility
|
||||
SolrQuery query = new SolrQuery("name:(+fauth +home +loane)");
|
||||
query.set(CommonParams.QT, "/spell");
|
||||
query.set("spellcheck", true);
|
||||
query.set(SpellingParams.SPELLCHECK_BUILD, true);
|
||||
query.set(SpellingParams.SPELLCHECK_COUNT, 10);
|
||||
query.set(SpellingParams.SPELLCHECK_COLLATE, true);
|
||||
QueryRequest request = new QueryRequest(query);
|
||||
SpellCheckResponse response = request.process(server).getSpellCheckResponse();
|
||||
response = request.process(server).getSpellCheckResponse();
|
||||
assertTrue("name:(+faith +homer +loaves)".equals(response.getCollatedResult()));
|
||||
|
||||
//Test Expanded Collation Results
|
||||
query.set(SpellingParams.SPELLCHECK_COLLATE_EXTENDED_RESULTS, true);
|
||||
query.set(SpellingParams.SPELLCHECK_MAX_COLLATION_TRIES, 5);
|
||||
query.set(SpellingParams.SPELLCHECK_MAX_COLLATIONS, 2);
|
||||
request = new QueryRequest(query);
|
||||
response = request.process(server).getSpellCheckResponse();
|
||||
assertTrue("name:(+faith +hope +love)".equals(response.getCollatedResult()) || "name:(+faith +hope +loaves)".equals(response.getCollatedResult()));
|
||||
|
||||
List<Collation> collations = response.getCollatedResults();
|
||||
assertTrue(collations.size()==2);
|
||||
for(Collation collation : collations)
|
||||
{
|
||||
assertTrue("name:(+faith +hope +love)".equals(collation.getCollationQueryString()) || "name:(+faith +hope +loaves)".equals(collation.getCollationQueryString()));
|
||||
assertTrue(collation.getNumberOfHits()==1);
|
||||
|
||||
List<Correction> misspellingsAndCorrections = collation.getMisspellingsAndCorrections();
|
||||
assertTrue(misspellingsAndCorrections.size()==3);
|
||||
for(Correction correction : misspellingsAndCorrections)
|
||||
{
|
||||
if("fauth".equals(correction.getOriginal()))
|
||||
{
|
||||
assertTrue("faith".equals(correction.getCorrection()));
|
||||
} else if("home".equals(correction.getOriginal()))
|
||||
{
|
||||
assertTrue("hope".equals(correction.getCorrection()));
|
||||
} else if("loane".equals(correction.getOriginal()))
|
||||
{
|
||||
assertTrue("love".equals(correction.getCorrection()) || "loaves".equals(correction.getCorrection()));
|
||||
} else
|
||||
{
|
||||
fail("Original Word Should have been either fauth, home or loane.");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,87 @@
|
|||
package org.apache.solr.handler.component;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
import org.apache.solr.BaseDistributedSearchTestCase;
|
||||
import org.apache.solr.client.solrj.SolrServer;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.util.AbstractSolrTestCase;
|
||||
|
||||
public class DistributedSpellCollatorTest extends BaseDistributedSearchTestCase {
|
||||
|
||||
private String saveProp;
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
// this test requires FSDir
|
||||
saveProp = System.getProperty("solr.directoryFactory");
|
||||
System.setProperty("solr.directoryFactory", "solr.StandardDirectoryFactory");
|
||||
super.setUp();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
super.tearDown();
|
||||
if (saveProp == null)
|
||||
System.clearProperty("solr.directoryFactory");
|
||||
else
|
||||
System.setProperty("solr.directoryFactory", saveProp);
|
||||
}
|
||||
|
||||
private void q(Object... q) throws Exception {
|
||||
final ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
|
||||
for (int i = 0; i < q.length; i += 2) {
|
||||
params.add(q[i].toString(), q[i + 1].toString());
|
||||
}
|
||||
|
||||
controlClient.query(params);
|
||||
|
||||
// query a random server
|
||||
params.set("shards", shards);
|
||||
int which = r.nextInt(clients.size());
|
||||
SolrServer client = clients.get(which);
|
||||
client.query(params);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void doTest() throws Exception {
|
||||
index(id, "1", "lowerfilt", "The quick red fox jumped over the lazy brown dogs.");
|
||||
index(id, "2" , "lowerfilt", "The quack rex fox jumped over the lazy brown dogs.");
|
||||
index(id, "3" , "lowerfilt", "The quote rex fox jumped over the lazy brown dogs.");
|
||||
index(id, "4" , "lowerfilt", "The quote redo fox jumped over the lazy brown dogs.");
|
||||
index(id, "5" , "lowerfilt", "The quote redo fox jumped over the lazy brown dogs.");
|
||||
index(id, "6" , "lowerfilt", "The quote redo fox jumped over the lazy brown dogs.");
|
||||
index(id, "7" , "lowerfilt", "The quote redo fox jumped over the lazy brown dogs.");
|
||||
index(id, "8" , "lowerfilt", "The quote redo fox jumped over the lazy brown dogs.");
|
||||
index(id, "9" , "lowerfilt", "The quote redo fox jumped over the lazy brown dogs.");
|
||||
index(id, "10", "lowerfilt", "The quote redo fox jumped over the lazy brown dogs.");
|
||||
index(id, "11", "lowerfilt", "The quote redo fox jumped over the lazy brown dogs.");
|
||||
index(id, "12", "lowerfilt", "The quote redo fox jumped over the lazy brown dogs.");
|
||||
index(id, "13", "lowerfilt", "The quote redo fox jumped over the lazy brown dogs.");
|
||||
index(id, "14", "lowerfilt", "The quote redo fox jumped over the lazy brown dogs.");
|
||||
index(id, "15", "lowerfilt", "The quote redo fox jumped over the lazy brown dogs.");
|
||||
commit();
|
||||
|
||||
handle.clear();
|
||||
handle.put("QTime", SKIPVAL);
|
||||
handle.put("timestamp", SKIPVAL);
|
||||
handle.put("maxScore", SKIPVAL);
|
||||
// we care only about the spellcheck results
|
||||
handle.put("response", SKIP);
|
||||
q("q", "*:*", SpellCheckComponent.SPELLCHECK_BUILD, "true", "qt", "spellCheckCompRH", "shards.qt", "spellCheckCompRH");
|
||||
|
||||
query("q", "lowerfilt:(+quock +reb)", "fl", "id,lowerfilt", "spellcheck", "true", "qt", "spellCheckCompRH", "shards.qt", "spellCheckCompRH", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "10", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "10", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "10", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true");
|
||||
query("q", "lowerfilt:(+quock +reb)", "fl", "id,lowerfilt", "spellcheck", "true", "qt", "spellCheckCompRH", "shards.qt", "spellCheckCompRH", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "10", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "10", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "10", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "false");
|
||||
query("q", "lowerfilt:(+quock +reb)", "fl", "id,lowerfilt", "spellcheck", "true", "qt", "spellCheckCompRH", "shards.qt", "spellCheckCompRH", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "10", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "0", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "1", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "false");
|
||||
|
||||
// Ensure that each iteration of test uses a fresh Jetty data directory.
|
||||
// Otherwise we get incorrect # hits
|
||||
// This probably should be fixed in BaseDistributedSearch in its own issue,
|
||||
// but I needed this test to pass now...
|
||||
AbstractSolrTestCase.recurseDelete(testDir);
|
||||
testDir = new File(System.getProperty("java.io.tmpdir")
|
||||
+ System.getProperty("file.separator") + getClass().getName() + "-"
|
||||
+ System.currentTimeMillis());
|
||||
testDir.mkdirs();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,226 @@
|
|||
package org.apache.solr.spelling;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.handler.component.SearchComponent;
|
||||
import org.apache.solr.handler.component.SpellCheckComponent;
|
||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||
import org.apache.solr.request.SolrRequestHandler;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
public class SpellCheckCollatorTest extends SolrTestCaseJ4 {
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
initCore("solrconfig.xml", "schema.xml");
|
||||
assertNull(h.validateUpdate(adoc("id", "0", "lowerfilt", "faith hope and love")));
|
||||
assertNull(h.validateUpdate(adoc("id", "1", "lowerfilt", "faith hope and loaves")));
|
||||
assertNull(h.validateUpdate(adoc("id", "2", "lowerfilt", "fat hops and loaves")));
|
||||
assertNull(h.validateUpdate(adoc("id", "3", "lowerfilt", "faith of homer")));
|
||||
assertNull(h.validateUpdate(adoc("id", "4", "lowerfilt", "fat of homer")));
|
||||
assertNull(h.validateUpdate(adoc("id", "5", "lowerfilt1", "peace")));
|
||||
assertNull(h.validateUpdate(commit()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCollateWithFilter() throws Exception
|
||||
{
|
||||
SolrCore core = h.getCore();
|
||||
SearchComponent speller = core.getSearchComponent("spellcheck");
|
||||
assertTrue("speller is null and it shouldn't be", speller != null);
|
||||
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.add(SpellCheckComponent.COMPONENT_NAME, "true");
|
||||
params.add(SpellCheckComponent.SPELLCHECK_BUILD, "true");
|
||||
params.add(SpellCheckComponent.SPELLCHECK_COUNT, "10");
|
||||
params.add(SpellCheckComponent.SPELLCHECK_COLLATE, "true");
|
||||
params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "5");
|
||||
params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "2");
|
||||
params.add(CommonParams.Q, "lowerfilt:(+fauth +home +loane)");
|
||||
params.add(CommonParams.FQ, "NOT(id:1)");
|
||||
|
||||
//Because a FilterQuery is applied which removes doc id#1 from possible hits, we would
|
||||
//not want the collations to return us "lowerfilt:(+faith +hope +loaves)" as this only matches doc id#1.
|
||||
SolrRequestHandler handler = core.getRequestHandler("spellCheckCompRH");
|
||||
SolrQueryResponse rsp = new SolrQueryResponse();
|
||||
rsp.add("responseHeader", new SimpleOrderedMap());
|
||||
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
|
||||
NamedList values = rsp.getValues();
|
||||
NamedList spellCheck = (NamedList) values.get("spellcheck");
|
||||
NamedList suggestions = (NamedList) spellCheck.get("suggestions");
|
||||
List<String> collations = suggestions.getAll("collation");
|
||||
assertTrue(collations.size() == 1);
|
||||
assertTrue(collations.get(0).equals("lowerfilt:(+faith +hope +love)"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCollateWithMultipleRequestHandlers() throws Exception
|
||||
{
|
||||
SolrCore core = h.getCore();
|
||||
SearchComponent speller = core.getSearchComponent("spellcheck");
|
||||
assertTrue("speller is null and it shouldn't be", speller != null);
|
||||
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.add(SpellCheckComponent.COMPONENT_NAME, "true");
|
||||
params.add(SpellCheckComponent.SPELLCHECK_DICT, "multipleFields");
|
||||
params.add(SpellCheckComponent.SPELLCHECK_BUILD, "true");
|
||||
params.add(SpellCheckComponent.SPELLCHECK_COUNT, "10");
|
||||
params.add(SpellCheckComponent.SPELLCHECK_COLLATE, "true");
|
||||
params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "1");
|
||||
params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "1");
|
||||
params.add(CommonParams.Q, "peac");
|
||||
|
||||
//SpellCheckCompRH has no "qf" defined. It will not find "peace" from "peac" despite it being in the dictionary
|
||||
//because requrying against this Request Handler results in 0 hits.
|
||||
SolrRequestHandler handler = core.getRequestHandler("spellCheckCompRH");
|
||||
SolrQueryResponse rsp = new SolrQueryResponse();
|
||||
rsp.add("responseHeader", new SimpleOrderedMap());
|
||||
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
|
||||
NamedList values = rsp.getValues();
|
||||
NamedList spellCheck = (NamedList) values.get("spellcheck");
|
||||
NamedList suggestions = (NamedList) spellCheck.get("suggestions");
|
||||
String singleCollation = (String) suggestions.get("collation");
|
||||
assertNull(singleCollation);
|
||||
|
||||
//SpellCheckCompRH1 has "lowerfilt1" defined in the "qf" param. It will find "peace" from "peac" because
|
||||
//requrying field "lowerfilt1" returns the hit.
|
||||
params.remove(SpellCheckComponent.SPELLCHECK_BUILD);
|
||||
handler = core.getRequestHandler("spellCheckCompRH1");
|
||||
rsp = new SolrQueryResponse();
|
||||
rsp.add("responseHeader", new SimpleOrderedMap());
|
||||
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
|
||||
values = rsp.getValues();
|
||||
spellCheck = (NamedList) values.get("spellcheck");
|
||||
suggestions = (NamedList) spellCheck.get("suggestions");
|
||||
singleCollation = (String) suggestions.get("collation");
|
||||
assertEquals(singleCollation, "peace");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExtendedCollate() throws Exception {
|
||||
SolrCore core = h.getCore();
|
||||
SearchComponent speller = core.getSearchComponent("spellcheck");
|
||||
assertTrue("speller is null and it shouldn't be", speller != null);
|
||||
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.add(CommonParams.QT, "spellCheckCompRH");
|
||||
params.add(CommonParams.Q, "lowerfilt:(+fauth +home +loane)");
|
||||
params.add(SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true");
|
||||
params.add(SpellCheckComponent.COMPONENT_NAME, "true");
|
||||
params.add(SpellCheckComponent.SPELLCHECK_BUILD, "true");
|
||||
params.add(SpellCheckComponent.SPELLCHECK_COUNT, "10");
|
||||
params.add(SpellCheckComponent.SPELLCHECK_COLLATE, "true");
|
||||
|
||||
// Testing backwards-compatible behavior.
|
||||
// Returns 1 collation as a single string.
|
||||
// All words are "correct" per the dictionary, but this collation would
|
||||
// return no results if tried.
|
||||
SolrRequestHandler handler = core.getRequestHandler("spellCheckCompRH");
|
||||
SolrQueryResponse rsp = new SolrQueryResponse();
|
||||
rsp.add("responseHeader", new SimpleOrderedMap());
|
||||
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
|
||||
NamedList values = rsp.getValues();
|
||||
NamedList spellCheck = (NamedList) values.get("spellcheck");
|
||||
NamedList suggestions = (NamedList) spellCheck.get("suggestions");
|
||||
String singleCollation = (String) suggestions.get("collation");
|
||||
assertEquals("lowerfilt:(+faith +homer +loaves)", singleCollation);
|
||||
|
||||
// Testing backwards-compatible response format but will only return a
|
||||
// collation that would return results.
|
||||
params.remove(SpellCheckComponent.SPELLCHECK_BUILD);
|
||||
params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "5");
|
||||
params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "1");
|
||||
handler = core.getRequestHandler("spellCheckCompRH");
|
||||
rsp = new SolrQueryResponse();
|
||||
rsp.add("responseHeader", new SimpleOrderedMap());
|
||||
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
|
||||
values = rsp.getValues();
|
||||
spellCheck = (NamedList) values.get("spellcheck");
|
||||
suggestions = (NamedList) spellCheck.get("suggestions");
|
||||
singleCollation = (String) suggestions.get("collation");
|
||||
assertEquals("lowerfilt:(+faith +hope +loaves)", singleCollation);
|
||||
|
||||
// Testing returning multiple collations if more than one valid
|
||||
// combination exists.
|
||||
params.remove(SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES);
|
||||
params.remove(SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS);
|
||||
params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "5");
|
||||
params.add(SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "2");
|
||||
handler = core.getRequestHandler("spellCheckCompRH");
|
||||
rsp = new SolrQueryResponse();
|
||||
rsp.add("responseHeader", new SimpleOrderedMap());
|
||||
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
|
||||
values = rsp.getValues();
|
||||
spellCheck = (NamedList) values.get("spellcheck");
|
||||
suggestions = (NamedList) spellCheck.get("suggestions");
|
||||
List<String> collations = suggestions.getAll("collation");
|
||||
assertTrue(collations.size() == 2);
|
||||
for (String multipleCollation : collations) {
|
||||
assertTrue(multipleCollation.equals("lowerfilt:(+faith +hope +love)")
|
||||
|| multipleCollation.equals("lowerfilt:(+faith +hope +loaves)"));
|
||||
}
|
||||
|
||||
// Testing return multiple collations with expanded collation response
|
||||
// format.
|
||||
params.add(SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true");
|
||||
handler = core.getRequestHandler("spellCheckCompRH");
|
||||
rsp = new SolrQueryResponse();
|
||||
rsp.add("responseHeader", new SimpleOrderedMap());
|
||||
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
|
||||
values = rsp.getValues();
|
||||
spellCheck = (NamedList) values.get("spellcheck");
|
||||
suggestions = (NamedList) spellCheck.get("suggestions");
|
||||
List<NamedList> expandedCollationList = suggestions.getAll("collation");
|
||||
Set<String> usedcollations = new HashSet<String>();
|
||||
assertTrue(expandedCollationList.size() == 2);
|
||||
for (NamedList expandedCollation : expandedCollationList) {
|
||||
String multipleCollation = (String) expandedCollation.get("collationQuery");
|
||||
assertTrue(multipleCollation.equals("lowerfilt:(+faith +hope +love)")
|
||||
|| multipleCollation.equals("lowerfilt:(+faith +hope +loaves)"));
|
||||
assertTrue(!usedcollations.contains(multipleCollation));
|
||||
usedcollations.add(multipleCollation);
|
||||
|
||||
int hits = (Integer) expandedCollation.get("hits");
|
||||
assertTrue(hits == 1);
|
||||
|
||||
NamedList misspellingsAndCorrections = (NamedList) expandedCollation.get("misspellingsAndCorrections");
|
||||
assertTrue(misspellingsAndCorrections.size() == 3);
|
||||
|
||||
String correctionForFauth = (String) misspellingsAndCorrections.get("fauth");
|
||||
String correctionForHome = (String) misspellingsAndCorrections.get("home");
|
||||
String correctionForLoane = (String) misspellingsAndCorrections.get("loane");
|
||||
assertTrue(correctionForFauth.equals("faith"));
|
||||
assertTrue(correctionForHome.equals("hope"));
|
||||
assertTrue(correctionForLoane.equals("love") || correctionForLoane.equals("loaves"));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,117 @@
|
|||
package org.apache.solr.spelling;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.spelling.PossibilityIterator;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
public class SpellPossibilityIteratorTest extends SolrTestCaseJ4 {
|
||||
|
||||
private static Map<Token, LinkedHashMap<String, Integer>> suggestions = new LinkedHashMap<Token, LinkedHashMap<String, Integer>>();
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
|
||||
suggestions.clear();
|
||||
|
||||
LinkedHashMap<String, Integer> AYE = new LinkedHashMap<String, Integer>();
|
||||
AYE.put("I", 0);
|
||||
AYE.put("II", 0);
|
||||
AYE.put("III", 0);
|
||||
AYE.put("IV", 0);
|
||||
AYE.put("V", 0);
|
||||
AYE.put("VI", 0);
|
||||
AYE.put("VII", 0);
|
||||
AYE.put("VIII", 0);
|
||||
|
||||
LinkedHashMap<String, Integer> BEE = new LinkedHashMap<String, Integer>();
|
||||
BEE.put("alpha", 0);
|
||||
BEE.put("beta", 0);
|
||||
BEE.put("gamma", 0);
|
||||
BEE.put("delta", 0);
|
||||
BEE.put("epsilon", 0);
|
||||
BEE.put("zeta", 0);
|
||||
BEE.put("eta", 0);
|
||||
BEE.put("theta", 0);
|
||||
BEE.put("iota", 0);
|
||||
|
||||
|
||||
LinkedHashMap<String, Integer> CEE = new LinkedHashMap<String, Integer>();
|
||||
CEE.put("one", 0);
|
||||
CEE.put("two", 0);
|
||||
CEE.put("three", 0);
|
||||
CEE.put("four", 0);
|
||||
CEE.put("five", 0);
|
||||
CEE.put("six", 0);
|
||||
CEE.put("seven", 0);
|
||||
CEE.put("eight", 0);
|
||||
CEE.put("nine", 0);
|
||||
CEE.put("ten", 0);
|
||||
|
||||
suggestions.put(new Token("AYE", 0, 2), AYE);
|
||||
suggestions.put(new Token("BEE", 0, 2), BEE);
|
||||
suggestions.put(new Token("CEE", 0, 2), CEE);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSpellPossibilityIterator() throws Exception {
|
||||
PossibilityIterator iter = new PossibilityIterator(suggestions);
|
||||
int count = 0;
|
||||
while (iter.hasNext()) {
|
||||
|
||||
iter.next();
|
||||
count++;
|
||||
}
|
||||
assertTrue(("Three maps (8*9*10) should return 720 iterations but instead returned " + count), count == 720);
|
||||
|
||||
suggestions.remove(new Token("CEE", 0, 2));
|
||||
iter = new PossibilityIterator(suggestions);
|
||||
count = 0;
|
||||
while (iter.hasNext()) {
|
||||
iter.next();
|
||||
count++;
|
||||
}
|
||||
assertTrue(("Two maps (8*9) should return 72 iterations but instead returned " + count), count == 72);
|
||||
|
||||
suggestions.remove(new Token("BEE", 0, 2));
|
||||
iter = new PossibilityIterator(suggestions);
|
||||
count = 0;
|
||||
while (iter.hasNext()) {
|
||||
iter.next();
|
||||
count++;
|
||||
}
|
||||
assertTrue(("One map of 8 should return 8 iterations but instead returned " + count), count == 8);
|
||||
|
||||
suggestions.remove(new Token("AYE", 0, 2));
|
||||
iter = new PossibilityIterator(suggestions);
|
||||
count = 0;
|
||||
while (iter.hasNext()) {
|
||||
iter.next();
|
||||
count++;
|
||||
}
|
||||
assertTrue(("No maps should return 0 iterations but instead returned " + count), count == 0);
|
||||
|
||||
}
|
||||
}
|
|
@ -19,8 +19,8 @@
|
|||
<!-- The Solr schema file. This file should be named "schema.xml" and
|
||||
should be located where the classloader for the Solr webapp can find it.
|
||||
|
||||
This schema is used for testing, and as such has everything and the
|
||||
kitchen sink thrown in. See example/solr/conf/schema.xml for a
|
||||
This schema is used for testing, and as such has everything and the
|
||||
kitchen sink thrown in. See example/solr/conf/schema.xml for a
|
||||
more concise example.
|
||||
|
||||
$Id: schema.xml 382610 2006-03-03 01:43:03Z yonik $
|
||||
|
@ -50,7 +50,7 @@
|
|||
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||
|
||||
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
||||
<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
||||
|
@ -100,7 +100,7 @@
|
|||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
|
||||
<fieldtype name="wdf_preserve" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
|
@ -353,7 +353,7 @@
|
|||
<filter name="syn" class="solr.SynonymFilterFactory" synonyms="old_synonyms.txt"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
|
||||
<!-- Demonstrates How RemoveDuplicatesTokenFilter makes stemmed
|
||||
synonyms "better"
|
||||
-->
|
||||
|
@ -378,7 +378,7 @@
|
|||
</fieldtype>
|
||||
|
||||
<fieldType name="uuid" class="solr.UUIDField" />
|
||||
|
||||
|
||||
<!-- Try out some point types -->
|
||||
<fieldType name="xy" class="solr.PointType" dimension="2" subFieldType="double"/>
|
||||
<fieldType name="x" class="solr.PointType" dimension="1" subFieldType="double"/>
|
||||
|
@ -444,7 +444,7 @@
|
|||
<field name="test_notv" type="text" termVectors="false"/>
|
||||
<field name="test_postv" type="text" termVectors="true" termPositions="true"/>
|
||||
<field name="test_offtv" type="text" termVectors="true" termOffsets="true"/>
|
||||
<field name="test_posofftv" type="text" termVectors="true"
|
||||
<field name="test_posofftv" type="text" termVectors="true"
|
||||
termPositions="true" termOffsets="true"/>
|
||||
|
||||
<!-- test highlit field settings -->
|
||||
|
@ -463,6 +463,8 @@
|
|||
<field name="standardtokfilt" type="standardtokfilt" indexed="true" stored="true"/>
|
||||
<field name="standardfilt" type="standardfilt" indexed="true" stored="true"/>
|
||||
<field name="lowerfilt" type="lowerfilt" indexed="true" stored="true"/>
|
||||
<field name="lowerfilt1" type="lowerfilt" indexed="true" stored="true"/>
|
||||
<field name="lowerfilt1and2" type="lowerfilt" indexed="true" stored="true"/>
|
||||
<field name="patterntok" type="patterntok" indexed="true" stored="true"/>
|
||||
<field name="patternreplacefilt" type="patternreplacefilt" indexed="true" stored="true"/>
|
||||
<field name="porterfilt" type="porterfilt" indexed="true" stored="true"/>
|
||||
|
@ -487,14 +489,14 @@
|
|||
<field name="sku2" type="skutype2" indexed="true" stored="true"/>
|
||||
|
||||
<field name="textgap" type="textgap" indexed="true" stored="true"/>
|
||||
|
||||
|
||||
<field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
|
||||
<field name="multiDefault" type="string" indexed="true" stored="true" default="muLti-Default" multiValued="true"/>
|
||||
<field name="intDefault" type="int" indexed="true" stored="true" default="42" multiValued="false"/>
|
||||
|
||||
|
||||
|
||||
<field name="tlong" type="tlong" indexed="true" stored="true" />
|
||||
|
||||
|
||||
<!-- Dynamic field definitions. If a field name is not found, dynamicFields
|
||||
will be used if the name matches any of the patterns.
|
||||
RESTRICTION: the glob-like pattern in the name attribute must have
|
||||
|
@ -531,22 +533,22 @@
|
|||
<dynamicField name="*_pl" type="plong" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_pd" type="pdouble" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_pdt" type="pdate" indexed="true" stored="true"/>
|
||||
|
||||
|
||||
|
||||
<dynamicField name="*_sI" type="string" indexed="true" stored="false"/>
|
||||
<dynamicField name="*_sS" type="string" indexed="false" stored="true"/>
|
||||
<dynamicField name="t_*" type="text" indexed="true" stored="true"/>
|
||||
<dynamicField name="tv_*" type="text" indexed="true" stored="true"
|
||||
<dynamicField name="tv_*" type="text" indexed="true" stored="true"
|
||||
termVectors="true" termPositions="true" termOffsets="true"/>
|
||||
<dynamicField name="tv_mv_*" type="text" indexed="true" stored="true" multiValued="true"
|
||||
termVectors="true" termPositions="true" termOffsets="true"/>
|
||||
|
||||
<dynamicField name="*_p" type="xyd" indexed="true" stored="true" multiValued="false"/>
|
||||
<dynamicField name="*_p" type="xyd" indexed="true" stored="true" multiValued="false"/>
|
||||
|
||||
<!-- special fields for dynamic copyField test -->
|
||||
<dynamicField name="dynamic_*" type="string" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_dynamic" type="string" indexed="true" stored="true"/>
|
||||
|
||||
|
||||
<!-- for testing to ensure that longer patterns are matched first -->
|
||||
<dynamicField name="*aa" type="string" indexed="true" stored="true"/>
|
||||
<dynamicField name="*aaa" type="pint" indexed="false" stored="true"/>
|
||||
|
@ -569,24 +571,26 @@
|
|||
<copyField source="title" dest="title_lettertok"/>
|
||||
|
||||
<copyField source="title" dest="text"/>
|
||||
<copyField source="subject" dest="text"/>
|
||||
|
||||
<copyField source="*_t" dest="text"/>
|
||||
<copyField source="subject" dest="text"/>
|
||||
|
||||
<copyField source="id" dest="range_facet_si"/>
|
||||
<copyField source="id" dest="range_facet_l"/>
|
||||
<copyField source="id" dest="range_facet_sl"/>
|
||||
<copyField source="range_facet_f" dest="range_facet_sf"/>
|
||||
<copyField source="range_facet_f" dest="range_facet_d"/>
|
||||
<copyField source="range_facet_f" dest="range_facet_sd"/>
|
||||
<copyField source="lowerfilt1" dest="lowerfilt1and2"/>
|
||||
<copyField source="lowerfilt" dest="lowerfilt1and2"/>
|
||||
|
||||
<copyField source="bday" dest="bday_pdt"/>
|
||||
<copyField source="a_tdt" dest="a_pdt"/>
|
||||
|
||||
<copyField source="*_t" dest="text"/>
|
||||
|
||||
<copyField source="id" dest="range_facet_si"/>
|
||||
<copyField source="id" dest="range_facet_l"/>
|
||||
<copyField source="id" dest="range_facet_sl"/>
|
||||
<copyField source="range_facet_f" dest="range_facet_sf"/>
|
||||
<copyField source="range_facet_f" dest="range_facet_d"/>
|
||||
<copyField source="range_facet_f" dest="range_facet_sd"/>
|
||||
|
||||
<copyField source="bday" dest="bday_pdt"/>
|
||||
<copyField source="a_tdt" dest="a_pdt"/>
|
||||
|
||||
<!-- dynamic destination -->
|
||||
<copyField source="*_dynamic" dest="dynamic_*"/>
|
||||
|
||||
|
||||
<!-- Similarity is the scoring routine for each document vs a query.
|
||||
A custom similarity may be specified here, but the default is fine
|
||||
for most applications.
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
tests. if you need to test something esoteric please add a new
|
||||
"solrconfig-your-esoteric-purpose.xml" config file.
|
||||
|
||||
Note in particular that this test is used by MinimalSchemaTest so
|
||||
Note in particular that this test is used by MinimalSchemaTest so
|
||||
Anything added to this file needs to work correctly even if there
|
||||
is now uniqueKey or defaultSearch Field.
|
||||
|
||||
|
@ -115,15 +115,15 @@
|
|||
|
||||
<updateHandler class="solr.DirectUpdateHandler2">
|
||||
|
||||
<!-- autocommit pending docs if certain criteria are met
|
||||
<autoCommit>
|
||||
<!-- autocommit pending docs if certain criteria are met
|
||||
<autoCommit>
|
||||
<maxDocs>10000</maxDocs>
|
||||
<maxTime>3600000</maxTime>
|
||||
<maxTime>3600000</maxTime>
|
||||
</autoCommit>
|
||||
-->
|
||||
<!-- represents a lower bound on the frequency that commits may
|
||||
occur (in seconds). NOTE: not yet implemented
|
||||
|
||||
|
||||
<commitIntervalLowerBound>0</commitIntervalLowerBound>
|
||||
-->
|
||||
|
||||
|
@ -342,6 +342,12 @@
|
|||
<str name="spellcheckIndexDir">spellchecker1</str>
|
||||
<str name="buildOnCommit">true</str>
|
||||
</lst>
|
||||
<lst name="spellchecker">
|
||||
<str name="name">multipleFields</str>
|
||||
<str name="field">lowerfilt1and2</str>
|
||||
<str name="spellcheckIndexDir">spellcheckerMultipleFields</str>
|
||||
<str name="buildOnCommit">true</str>
|
||||
</lst>
|
||||
<!-- Example of using different distance measure -->
|
||||
<lst name="spellchecker">
|
||||
<str name="name">jarowinkler</str>
|
||||
|
@ -411,8 +417,17 @@
|
|||
<str>spellcheck</str>
|
||||
</arr>
|
||||
</requestHandler>
|
||||
<requestHandler name="spellCheckCompRH1" class="org.apache.solr.handler.component.SearchHandler">
|
||||
<lst name="defaults">
|
||||
<str name="defType">dismax</str>
|
||||
<str name="qf">lowerfilt1^1</str>
|
||||
</lst>
|
||||
<arr name="last-components">
|
||||
<str>spellcheck</str>
|
||||
</arr>
|
||||
</requestHandler>
|
||||
|
||||
|
||||
|
||||
<searchComponent name="tvComponent" class="org.apache.solr.handler.component.TermVectorComponent"/>
|
||||
|
||||
<requestHandler name="tvrh" class="org.apache.solr.handler.component.SearchHandler">
|
||||
|
@ -502,5 +517,5 @@
|
|||
</processor>
|
||||
<processor class="solr.RunUpdateProcessorFactory" />
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
|
||||
</config>
|
||||
|
|
Loading…
Reference in New Issue