mirror of https://github.com/apache/lucene.git
SOLR-5378: A new SuggestComponent that fully utilizes the Lucene suggester module and adds pluggable dictionaries, payloads and better distributed support
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1544793 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2f4c2380cd
commit
7763793c55
|
@ -98,6 +98,11 @@ New Features
|
|||
set of distinct values and their count. This can also be specified per field
|
||||
e.g. 'f.field.stats.calcdistinct'. (Elran Dvir via shalin)
|
||||
|
||||
* SOLR-5378: A new SuggestComponent that fully utilizes the Lucene suggester
|
||||
module and adds pluggable dictionaries, payloads and better distributed support.
|
||||
This is intended to eventually replace the Suggester support through the
|
||||
SpellCheckComponent. (Areek Zillur, Varun Thacker via shalin)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -92,6 +92,8 @@
|
|||
<pathelement location="${memory.jar}"/>
|
||||
<pathelement location="${misc.jar}"/>
|
||||
<pathelement location="${spatial.jar}"/>
|
||||
<fileset dir="${common.dir}/expressions/lib"/>
|
||||
<pathelement location="${expressions.jar}"/>
|
||||
<pathelement location="${suggest.jar}"/>
|
||||
<pathelement location="${grouping.jar}"/>
|
||||
<pathelement location="${queries.jar}"/>
|
||||
|
@ -155,7 +157,7 @@
|
|||
</target>
|
||||
|
||||
<target name="prep-lucene-jars"
|
||||
depends="jar-lucene-core, jar-analyzers-phonetic, jar-analyzers-kuromoji, jar-codecs, jar-suggest, jar-highlighter, jar-memory,
|
||||
depends="jar-lucene-core, jar-analyzers-phonetic, jar-analyzers-kuromoji, jar-codecs,jar-expressions, jar-suggest, jar-highlighter, jar-memory,
|
||||
jar-misc, jar-spatial, jar-grouping, jar-queries, jar-queryparser, jar-join">
|
||||
<property name="solr.deps.compiled" value="true"/>
|
||||
</target>
|
||||
|
@ -228,7 +230,7 @@
|
|||
<property name="lucenedocs" location="${common.dir}/build/docs"/>
|
||||
|
||||
<!-- dependency to ensure all lucene javadocs are present -->
|
||||
<target name="lucene-javadocs" depends="javadocs-lucene-core,javadocs-analyzers-common,javadocs-analyzers-icu,javadocs-analyzers-kuromoji,javadocs-analyzers-phonetic,javadocs-analyzers-smartcn,javadocs-analyzers-morfologik,javadocs-analyzers-stempel,javadocs-analyzers-uima,javadocs-codecs,javadocs-suggest,javadocs-grouping,javadocs-queries,javadocs-queryparser,javadocs-highlighter,javadocs-memory,javadocs-misc,javadocs-spatial,javadocs-test-framework"/>
|
||||
<target name="lucene-javadocs" depends="javadocs-lucene-core,javadocs-analyzers-common,javadocs-analyzers-icu,javadocs-analyzers-kuromoji,javadocs-analyzers-phonetic,javadocs-analyzers-smartcn,javadocs-analyzers-morfologik,javadocs-analyzers-stempel,javadocs-analyzers-uima,javadocs-codecs,javadocs-expressions,javadocs-suggest,javadocs-grouping,javadocs-queries,javadocs-queryparser,javadocs-highlighter,javadocs-memory,javadocs-misc,javadocs-spatial,javadocs-test-framework"/>
|
||||
|
||||
<!-- create javadocs for the current module -->
|
||||
<target name="javadocs" depends="compile-core,define-lucene-javadoc-url,lucene-javadocs,javadocs-solr-core">
|
||||
|
@ -295,6 +297,7 @@
|
|||
<link offline="true" href="${lucene.javadoc.url}analyzers-stempel" packagelistloc="${lucenedocs}/analyzers-stempel"/>
|
||||
<link offline="true" href="${lucene.javadoc.url}analyzers-uima" packagelistloc="${lucenedocs}/analyzers-uima"/>
|
||||
<link offline="true" href="${lucene.javadoc.url}codecs" packagelistloc="${lucenedocs}/codecs"/>
|
||||
<link offline="true" href="${lucene.javadoc.url}expressions" packagelistloc="${lucenedocs}/expressions"/>
|
||||
<link offline="true" href="${lucene.javadoc.url}suggest" packagelistloc="${lucenedocs}/suggest"/>
|
||||
<link offline="true" href="${lucene.javadoc.url}grouping" packagelistloc="${lucenedocs}/grouping"/>
|
||||
<link offline="true" href="${lucene.javadoc.url}queries" packagelistloc="${lucenedocs}/queries"/>
|
||||
|
|
|
@ -79,7 +79,7 @@ public class SolrResourceLoader implements ResourceLoader,Closeable
|
|||
|
||||
static final String project = "solr";
|
||||
static final String base = "org.apache" + "." + project;
|
||||
static final String[] packages = {"","analysis.","schema.","handler.","search.","update.","core.","response.","request.","update.processor.","util.", "spelling.", "handler.component.", "handler.dataimport." };
|
||||
static final String[] packages = {"","analysis.","schema.","handler.","search.","update.","core.","response.","request.","update.processor.","util.", "spelling.", "handler.component.", "handler.dataimport.", "spelling.suggest.", "spelling.suggest.fst." };
|
||||
|
||||
protected URLClassLoader classLoader;
|
||||
private final String instanceDir;
|
||||
|
|
|
@ -0,0 +1,442 @@
|
|||
package org.apache.solr.handler.component;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import org.apache.lucene.search.suggest.Lookup;
|
||||
import org.apache.lucene.search.suggest.Lookup.LookupResult;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.params.ShardParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.SolrEventListener;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.spelling.suggest.SolrSuggester;
|
||||
import org.apache.solr.spelling.suggest.SuggesterOptions;
|
||||
import org.apache.solr.spelling.suggest.SuggesterParams;
|
||||
import org.apache.solr.spelling.suggest.SuggesterResult;
|
||||
import org.apache.solr.util.plugin.SolrCoreAware;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* SuggestComponent: interacts with multiple {@link SolrSuggester} to serve up suggestions
|
||||
* Responsible for routing commands and queries to the appropriate {@link SolrSuggester}
|
||||
* and for initializing them as specified by SolrConfig
|
||||
*/
|
||||
public class SuggestComponent extends SearchComponent implements SolrCoreAware, SuggesterParams {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(SuggestComponent.class);
|
||||
|
||||
/** Name used to identify whether the user query concerns this component */
|
||||
public static final String COMPONENT_NAME = "suggest";
|
||||
|
||||
/** Name assigned to an unnamed suggester (at most one suggester) can be unnamed */
|
||||
private static final String DEFAULT_DICT_NAME = SolrSuggester.DEFAULT_DICT_NAME;
|
||||
|
||||
/** SolrConfig label to identify Config time settings */
|
||||
private static final String CONFIG_PARAM_LABEL = "suggester";
|
||||
|
||||
/** SolrConfig label to identify boolean value to build suggesters on commit */
|
||||
private static final String BUILD_ON_COMMIT_LABEL = "buildOnCommit";
|
||||
|
||||
/** SolrConfig label to identify boolean value to build suggesters on optimize */
|
||||
private static final String BUILD_ON_OPTIMIZE_LABEL = "buildOnOptimize";
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
protected NamedList initParams;
|
||||
|
||||
/**
|
||||
* Key is the dictionary name used in SolrConfig, value is the corrosponding {@link SolrSuggester}
|
||||
*/
|
||||
protected Map<String, SolrSuggester> suggesters = new ConcurrentHashMap<String, SolrSuggester>();
|
||||
|
||||
/** Container for various labels used in the responses generated by this component */
|
||||
private static class SuggesterResultLabels {
|
||||
static final String SUGGEST = "suggest";
|
||||
static final String SUGGESTIONS = "suggestions";
|
||||
static final String SUGGESTION = "suggestion";
|
||||
static final String SUGGESTION_NUM_FOUND = "numFound";
|
||||
static final String SUGGESTION_TERM = "term";
|
||||
static final String SUGGESTION_WEIGHT = "weight";
|
||||
static final String SUGGESTION_PAYLOAD = "payload";
|
||||
}
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public void init(NamedList args) {
|
||||
super.init(args);
|
||||
this.initParams = args;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void inform(SolrCore core) {
|
||||
if (initParams != null) {
|
||||
LOG.info("Initializing SuggesterComponent");
|
||||
boolean hasDefault = false;
|
||||
for (int i = 0; i < initParams.size(); i++) {
|
||||
if (initParams.getName(i).equals(CONFIG_PARAM_LABEL)) {
|
||||
NamedList suggesterParams = (NamedList) initParams.getVal(i);
|
||||
SolrSuggester suggester = new SolrSuggester();
|
||||
String dictionary = suggester.init(suggesterParams, core);
|
||||
if (dictionary != null) {
|
||||
boolean isDefault = dictionary.equals(DEFAULT_DICT_NAME);
|
||||
if (isDefault && !hasDefault) {
|
||||
hasDefault = true;
|
||||
} else if (isDefault){
|
||||
throw new RuntimeException("More than one dictionary is missing name.");
|
||||
}
|
||||
suggesters.put(dictionary, suggester);
|
||||
} else {
|
||||
if (!hasDefault){
|
||||
suggesters.put(DEFAULT_DICT_NAME, suggester);
|
||||
hasDefault = true;
|
||||
} else {
|
||||
throw new RuntimeException("More than one dictionary is missing name.");
|
||||
}
|
||||
}
|
||||
|
||||
// Register event listeners for this Suggester
|
||||
core.registerFirstSearcherListener(new SuggesterListener(core, suggester, false, false));
|
||||
boolean buildOnCommit = Boolean.parseBoolean((String) suggesterParams.get(BUILD_ON_COMMIT_LABEL));
|
||||
boolean buildOnOptimize = Boolean.parseBoolean((String) suggesterParams.get(BUILD_ON_OPTIMIZE_LABEL));
|
||||
if (buildOnCommit || buildOnOptimize) {
|
||||
LOG.info("Registering newSearcher listener for suggester: " + suggester.getName());
|
||||
core.registerNewSearcherListener(new SuggesterListener(core, suggester, buildOnCommit, buildOnOptimize));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Responsible for issuing build and rebload command to the specified {@link SolrSuggester} */
|
||||
@Override
|
||||
public void prepare(ResponseBuilder rb) throws IOException {
|
||||
SolrParams params = rb.req.getParams();
|
||||
LOG.info("Suggester prepare with : " + params);
|
||||
if (!params.getBool(COMPONENT_NAME, false)) {
|
||||
return;
|
||||
}
|
||||
|
||||
SolrSuggester suggester = getSuggester(params);
|
||||
if (suggester == null) {
|
||||
throw new IllegalArgumentException("Error in configuration, no suggester found");
|
||||
}
|
||||
if (params.getBool(SUGGEST_BUILD, false)) {
|
||||
suggester.build(rb.req.getCore(), rb.req.getSearcher());
|
||||
rb.rsp.add("command", "build");
|
||||
} else if (params.getBool(SUGGEST_RELOAD, false)) {
|
||||
suggester.reload(rb.req.getCore(), rb.req.getSearcher());
|
||||
rb.rsp.add("command", "reload");
|
||||
}
|
||||
}
|
||||
|
||||
/** Dispatch shard request in <code>STAGE_EXECUTE_QUERY</code> stage */
|
||||
@Override
|
||||
public int distributedProcess(ResponseBuilder rb) {
|
||||
SolrParams params = rb.req.getParams();
|
||||
LOG.info("Suggester distributedProcess with : " + params);
|
||||
if (rb.stage < ResponseBuilder.STAGE_EXECUTE_QUERY)
|
||||
return ResponseBuilder.STAGE_EXECUTE_QUERY;
|
||||
if (rb.stage == ResponseBuilder.STAGE_EXECUTE_QUERY) {
|
||||
ShardRequest sreq = new ShardRequest();
|
||||
sreq.purpose = ShardRequest.PURPOSE_GET_TOP_IDS;
|
||||
sreq.params = new ModifiableSolrParams(rb.req.getParams());
|
||||
sreq.params.remove(ShardParams.SHARDS);
|
||||
rb.addRequest(this, sreq);
|
||||
return ResponseBuilder.STAGE_GET_FIELDS;
|
||||
}
|
||||
|
||||
return ResponseBuilder.STAGE_DONE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Responsible for using the specified suggester to get the suggestions
|
||||
* for the query and write the results
|
||||
* */
|
||||
@Override
|
||||
public void process(ResponseBuilder rb) throws IOException {
|
||||
SolrParams params = rb.req.getParams();
|
||||
LOG.info("Suggester process with : " + params);
|
||||
if (!params.getBool(COMPONENT_NAME, false) || suggesters.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
SolrSuggester suggester = getSuggester(params);
|
||||
String query = params.get(SUGGEST_Q);
|
||||
if (query == null) {
|
||||
query = rb.getQueryString();
|
||||
if (query == null) {
|
||||
query = params.get(CommonParams.Q);
|
||||
}
|
||||
}
|
||||
|
||||
if (query != null) {
|
||||
int count = params.getInt(SUGGEST_COUNT, 1);
|
||||
SuggesterOptions options = new SuggesterOptions(new CharsRef(query), count);
|
||||
SuggesterResult suggesterResult = suggester.getSuggestions(options);
|
||||
|
||||
NamedList response = new SimpleOrderedMap();
|
||||
NamedList<NamedList> namedListResult = toNamedList(suggesterResult);
|
||||
response.add(SuggesterResultLabels.SUGGESTIONS, namedListResult);
|
||||
rb.rsp.add(SuggesterResultLabels.SUGGEST, response);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Used in Distributed Search, merges the suggestion results from every shard
|
||||
* */
|
||||
@Override
|
||||
public void finishStage(ResponseBuilder rb) {
|
||||
SolrParams params = rb.req.getParams();
|
||||
LOG.info("Suggester finishStage with : " + params);
|
||||
if (!params.getBool(COMPONENT_NAME, false) || rb.stage != ResponseBuilder.STAGE_GET_FIELDS)
|
||||
return;
|
||||
int count = params.getInt(SUGGEST_COUNT, 1);
|
||||
|
||||
List<SuggesterResult> suggesterResults = new ArrayList<>();
|
||||
NamedList response = new SimpleOrderedMap();
|
||||
NamedList<NamedList> namedListResult = null;
|
||||
|
||||
// Collect Shard responses
|
||||
for (ShardRequest sreq : rb.finished) {
|
||||
for (ShardResponse srsp : sreq.responses) {
|
||||
NamedList<NamedList> namedList =
|
||||
(NamedList<NamedList>) srsp.getSolrResponse().getResponse().get(SuggesterResultLabels.SUGGEST);
|
||||
LOG.info(srsp.getShard() + " : " + namedList);
|
||||
suggesterResults.add(toSuggesterResult(namedList));
|
||||
}
|
||||
}
|
||||
|
||||
// Merge Shard responses
|
||||
SuggesterResult suggesterResult = merge(suggesterResults, count);
|
||||
namedListResult = toNamedList(suggesterResult);
|
||||
|
||||
response.add(SuggesterResultLabels.SUGGESTIONS, namedListResult);
|
||||
rb.rsp.add(SuggesterResultLabels.SUGGEST, response);
|
||||
};
|
||||
|
||||
/**
|
||||
* Given a list of {@link SuggesterResult} and <code>count</code>
|
||||
* returns a {@link SuggesterResult} containing <code>count</code>
|
||||
* number of {@link LookupResult}, sorted by their associated
|
||||
* weights
|
||||
* */
|
||||
private static SuggesterResult merge(List<SuggesterResult> suggesterResults, int count) {
|
||||
SuggesterResult result = new SuggesterResult();
|
||||
Set<String> allTokens = new HashSet<>();
|
||||
|
||||
// collect all tokens
|
||||
for (SuggesterResult shardResult : suggesterResults) {
|
||||
allTokens.addAll(shardResult.getTokens());
|
||||
}
|
||||
|
||||
// Get Top N for every token in every shard (using weights)
|
||||
for (String token : allTokens) {
|
||||
Lookup.LookupPriorityQueue resultQueue = new Lookup.LookupPriorityQueue(
|
||||
count);
|
||||
for (SuggesterResult shardResult : suggesterResults) {
|
||||
List<LookupResult> suggests = shardResult.getLookupResult(token);
|
||||
if (suggests == null) {
|
||||
continue;
|
||||
}
|
||||
for (LookupResult res : suggests) {
|
||||
resultQueue.insertWithOverflow(res);
|
||||
}
|
||||
}
|
||||
List<LookupResult> sortedSuggests = new LinkedList<>();
|
||||
Collections.addAll(sortedSuggests, resultQueue.getResults());
|
||||
result.add(token, sortedSuggests);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDescription() {
|
||||
return "Suggester component";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getSource() {
|
||||
return "$URL$";
|
||||
}
|
||||
|
||||
@Override
|
||||
public NamedList getStatistics() {
|
||||
NamedList<String> stats = new SimpleOrderedMap<String>();
|
||||
stats.add("totalSizeInBytes", String.valueOf(sizeInBytes()));
|
||||
for (Map.Entry<String, SolrSuggester> entry : suggesters.entrySet()) {
|
||||
SolrSuggester suggester = entry.getValue();
|
||||
stats.add(entry.getKey(), suggester.toString());
|
||||
}
|
||||
return stats;
|
||||
}
|
||||
|
||||
private long sizeInBytes() {
|
||||
long sizeInBytes = 0;
|
||||
for (SolrSuggester suggester : suggesters.values()) {
|
||||
sizeInBytes += suggester.sizeInBytes();
|
||||
}
|
||||
return sizeInBytes;
|
||||
}
|
||||
|
||||
private SolrSuggester getSuggester(SolrParams params) {
|
||||
return suggesters.get(getSuggesterName(params));
|
||||
|
||||
}
|
||||
|
||||
private String getSuggesterName(SolrParams params){
|
||||
return (params.get(SUGGEST_DICT) != null) ?
|
||||
(String)params.get(SUGGEST_DICT)
|
||||
: DEFAULT_DICT_NAME;
|
||||
|
||||
}
|
||||
|
||||
/** Convert {@link SuggesterResult} to NamedList for constructing responses */
|
||||
private NamedList<NamedList> toNamedList(SuggesterResult suggesterResult) {
|
||||
NamedList<NamedList> results = new NamedList<NamedList>();
|
||||
for (String token : suggesterResult.getTokens()) {
|
||||
SimpleOrderedMap suggestionBody = new SimpleOrderedMap();
|
||||
List<LookupResult> lookupResults = suggesterResult.getLookupResult(token);
|
||||
suggestionBody.add(SuggesterResultLabels.SUGGESTION_NUM_FOUND, lookupResults.size());
|
||||
|
||||
for (LookupResult lookupResult : lookupResults) {
|
||||
String suggestionString = lookupResult.key.toString();
|
||||
long weight = lookupResult.value;
|
||||
String payload = (lookupResult.payload != null) ?
|
||||
lookupResult.payload.utf8ToString()
|
||||
: "";
|
||||
|
||||
SimpleOrderedMap suggestEntryNamedList = new SimpleOrderedMap();
|
||||
suggestEntryNamedList.add(SuggesterResultLabels.SUGGESTION_TERM, suggestionString);
|
||||
suggestEntryNamedList.add(SuggesterResultLabels.SUGGESTION_WEIGHT, weight);
|
||||
suggestEntryNamedList.add(SuggesterResultLabels.SUGGESTION_PAYLOAD, payload);
|
||||
|
||||
suggestionBody.add(SuggesterResultLabels.SUGGESTION, suggestEntryNamedList);
|
||||
}
|
||||
results.add(token, suggestionBody);
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
/** Convert NamedList (suggester response) to {@link SuggesterResult} */
|
||||
private SuggesterResult toSuggesterResult(NamedList<NamedList> suggesterRespNamedList) {
|
||||
SuggesterResult result = new SuggesterResult();
|
||||
if (suggesterRespNamedList == null) {
|
||||
return result;
|
||||
}
|
||||
NamedList suggestions = (NamedList) suggesterRespNamedList.get(SuggesterResultLabels.SUGGESTIONS);
|
||||
if (suggestions != null) {
|
||||
// for each token
|
||||
for(int i = 0; i < suggestions.size() ; i++) {
|
||||
String tokenString = suggestions.getName(i);
|
||||
List<LookupResult> lookupResults = new ArrayList<>();
|
||||
NamedList suggestion = (NamedList) suggestions.getVal(i);
|
||||
// for each suggestion
|
||||
for (int j = 0; j < suggestion.size(); j++) {
|
||||
String property = suggestion.getName(j);
|
||||
if (property.equals(SuggesterResultLabels.SUGGESTION)) {
|
||||
NamedList suggestionEntry = (NamedList) suggestion.getVal(j);
|
||||
String term = (String) suggestionEntry.get(SuggesterResultLabels.SUGGESTION_TERM);
|
||||
long weight = (long) suggestionEntry.get(SuggesterResultLabels.SUGGESTION_WEIGHT);
|
||||
String payload = (String) suggestionEntry.get(SuggesterResultLabels.SUGGESTION_PAYLOAD);
|
||||
LookupResult res = new LookupResult(new CharsRef(term), weight, new BytesRef(payload));
|
||||
lookupResults.add(res);
|
||||
}
|
||||
result.add(tokenString, lookupResults);
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/** Listener to build or reload the maintained {@link SolrSuggester} by this component */
|
||||
private static class SuggesterListener implements SolrEventListener {
|
||||
private final SolrCore core;
|
||||
private final SolrSuggester suggester;
|
||||
private final boolean buildOnCommit;
|
||||
private final boolean buildOnOptimize;
|
||||
|
||||
public SuggesterListener(SolrCore core, SolrSuggester checker, boolean buildOnCommit, boolean buildOnOptimize) {
|
||||
this.core = core;
|
||||
this.suggester = checker;
|
||||
this.buildOnCommit = buildOnCommit;
|
||||
this.buildOnOptimize = buildOnOptimize;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void init(NamedList args) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void newSearcher(SolrIndexSearcher newSearcher,
|
||||
SolrIndexSearcher currentSearcher) {
|
||||
if (currentSearcher == null) {
|
||||
// firstSearcher event
|
||||
try {
|
||||
LOG.info("Loading suggester index for: " + suggester.getName());
|
||||
suggester.reload(core, newSearcher);
|
||||
} catch (IOException e) {
|
||||
log.error("Exception in reloading suggester index for: " + suggester.getName(), e);
|
||||
}
|
||||
} else {
|
||||
// newSearcher event
|
||||
if (buildOnCommit) {
|
||||
buildSuggesterIndex(newSearcher);
|
||||
} else if (buildOnOptimize) {
|
||||
if (newSearcher.getIndexReader().leaves().size() == 1) {
|
||||
buildSuggesterIndex(newSearcher);
|
||||
} else {
|
||||
LOG.info("Index is not optimized therefore skipping building suggester index for: "
|
||||
+ suggester.getName());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void buildSuggesterIndex(SolrIndexSearcher newSearcher) {
|
||||
try {
|
||||
LOG.info("Building suggester index for: " + suggester.getName());
|
||||
suggester.build(core, newSearcher);
|
||||
} catch (Exception e) {
|
||||
log.error("Exception in building suggester index for: " + suggester.getName(), e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void postCommit() {}
|
||||
|
||||
@Override
|
||||
public void postSoftCommit() {}
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
package org.apache.solr.spelling.suggest;
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.search.spell.Dictionary;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
|
||||
/**
|
||||
* Encapsulates shared fields for all types of dictionaryFactory classes
|
||||
*/
|
||||
public abstract class DictionaryFactory {
|
||||
|
||||
/** Default dictionary implementation to use for FileBasedDictionaries */
|
||||
public static String DEFAULT_FILE_BASED_DICT = FileDictionaryFactory.class.getName();
|
||||
|
||||
/** Default dictionary implementation to use for IndexBasedDictionaries */
|
||||
public static String DEFAULT_INDEX_BASED_DICT = HighFrequencyDictionaryFactory.class.getName();
|
||||
|
||||
protected NamedList params;
|
||||
|
||||
/** Sets the parameters available to SolrSuggester for use in Dictionary creation */
|
||||
public void setParams(NamedList params) {
|
||||
this.params = params;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a Dictionary using options in <code>core</code> and optionally
|
||||
* uses <code>searcher</code>, in case of index based dictionaries
|
||||
*/
|
||||
public abstract Dictionary create(SolrCore core, SolrIndexSearcher searcher);
|
||||
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
package org.apache.solr.spelling.suggest;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.search.spell.Dictionary;
|
||||
import org.apache.lucene.search.suggest.DocumentDictionary;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
|
||||
/**
|
||||
* Factory for {@link DocumentDictionary}
|
||||
*/
|
||||
public class DocumentDictionaryFactory extends DictionaryFactory {
|
||||
|
||||
public static final String FIELD = "field";
|
||||
|
||||
public static final String WEIGHT_FIELD = "weightField";
|
||||
|
||||
public static final String PAYLOAD_FIELD = "payloadField";
|
||||
|
||||
@Override
|
||||
public Dictionary create(SolrCore core, SolrIndexSearcher searcher) {
|
||||
if(params == null) {
|
||||
// should not happen; implies setParams was not called
|
||||
throw new IllegalStateException("Value of params not set");
|
||||
}
|
||||
String field = (String) params.get(FIELD);
|
||||
String weightField = (String) params.get(WEIGHT_FIELD);
|
||||
String payloadField = (String) params.get(PAYLOAD_FIELD);
|
||||
|
||||
if (field == null) {
|
||||
throw new IllegalArgumentException(FIELD + " is a mandatory parameter");
|
||||
}
|
||||
if (weightField == null) {
|
||||
throw new IllegalArgumentException(WEIGHT_FIELD + " is a mandatory parameter");
|
||||
}
|
||||
|
||||
return new DocumentDictionary(searcher.getIndexReader(), field, weightField, payloadField);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,112 @@
|
|||
package org.apache.solr.spelling.suggest;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.spell.Dictionary;
|
||||
import org.apache.lucene.search.suggest.DocumentExpressionDictionary;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.schema.DoubleField;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.schema.FloatField;
|
||||
import org.apache.solr.schema.IntField;
|
||||
import org.apache.solr.schema.LongField;
|
||||
import org.apache.solr.schema.TrieDoubleField;
|
||||
import org.apache.solr.schema.TrieFloatField;
|
||||
import org.apache.solr.schema.TrieIntField;
|
||||
import org.apache.solr.schema.TrieLongField;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
|
||||
/**
|
||||
* Factory for {@link DocumentExpressionDictionary}
|
||||
*/
|
||||
public class DocumentExpressionDictionaryFactory extends DictionaryFactory {
|
||||
|
||||
/** Label for defining field to use for terms */
|
||||
public static final String FIELD = "field";
|
||||
|
||||
/** Label for defining payloadField to use for terms (optional) */
|
||||
public static final String PAYLOAD_FIELD = "payloadField";
|
||||
|
||||
/** Label for defining expression to evaluate the weight for the terms */
|
||||
public static final String WEIGHT_EXPRESSION = "weightExpression";
|
||||
|
||||
/** Label used to define the name of the
|
||||
* sortField used in the {@link #WEIGHT_EXPRESSION} */
|
||||
public static final String SORT_FIELD = "sortField";
|
||||
|
||||
@Override
|
||||
public Dictionary create(SolrCore core, SolrIndexSearcher searcher) {
|
||||
if(params == null) {
|
||||
// should not happen; implies setParams was not called
|
||||
throw new IllegalStateException("Value of params not set");
|
||||
}
|
||||
|
||||
String field = (String) params.get(FIELD);
|
||||
String payloadField = (String) params.get(PAYLOAD_FIELD);
|
||||
String weightExpression = (String) params.get(WEIGHT_EXPRESSION);
|
||||
Set<SortField> sortFields = new HashSet<>();
|
||||
|
||||
if (field == null) {
|
||||
throw new IllegalArgumentException(FIELD + " is a mandatory parameter");
|
||||
}
|
||||
|
||||
if (weightExpression == null) {
|
||||
throw new IllegalArgumentException(WEIGHT_EXPRESSION + " is a mandatory parameter");
|
||||
}
|
||||
|
||||
for(int i = 0; i < params.size(); i++) {
|
||||
if (params.getName(i).equals(SORT_FIELD)) {
|
||||
String sortFieldName = (String) params.getVal(i);
|
||||
|
||||
SortField.Type sortFieldType = getSortFieldType(core, sortFieldName);
|
||||
|
||||
if (sortFieldType == null) {
|
||||
throw new IllegalArgumentException(sortFieldName + " could not be mapped to any appropriate type"
|
||||
+ " [long, int, float, double]");
|
||||
}
|
||||
|
||||
SortField sortField = new SortField(sortFieldName, sortFieldType);
|
||||
sortFields.add(sortField);
|
||||
}
|
||||
}
|
||||
|
||||
return new DocumentExpressionDictionary(searcher.getIndexReader(), field, weightExpression,
|
||||
sortFields, payloadField);
|
||||
}
|
||||
|
||||
private SortField.Type getSortFieldType(SolrCore core, String sortFieldName) {
|
||||
SortField.Type type = null;
|
||||
String fieldTypeName = core.getLatestSchema().getField(sortFieldName).getType().getTypeName();
|
||||
FieldType ft = core.getLatestSchema().getFieldTypes().get(fieldTypeName);
|
||||
if (ft instanceof FloatField || ft instanceof TrieFloatField) {
|
||||
type = SortField.Type.FLOAT;
|
||||
} else if (ft instanceof IntField || ft instanceof TrieIntField) {
|
||||
type = SortField.Type.INT;
|
||||
} else if (ft instanceof LongField || ft instanceof TrieLongField) {
|
||||
type = SortField.Type.LONG;
|
||||
} else if (ft instanceof DoubleField || ft instanceof TrieDoubleField) {
|
||||
type = SortField.Type.DOUBLE;
|
||||
}
|
||||
return type;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,62 @@
|
|||
package org.apache.solr.spelling.suggest;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
|
||||
import org.apache.lucene.search.spell.Dictionary;
|
||||
import org.apache.lucene.search.suggest.FileDictionary;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
|
||||
/**
|
||||
* Factory for {@link FileDictionary}
|
||||
*/
|
||||
public class FileDictionaryFactory extends DictionaryFactory {
|
||||
|
||||
/** Label for defining fieldDelimiter to be used */
|
||||
public static final String FIELD_DELIMITER = "fieldDelimiter";
|
||||
|
||||
@Override
|
||||
public Dictionary create(SolrCore core, SolrIndexSearcher searcher) {
|
||||
if (params == null) {
|
||||
// should not happen; implies setParams was not called
|
||||
throw new IllegalStateException("Value of params not set");
|
||||
}
|
||||
|
||||
String sourceLocation = (String)params.get(Suggester.LOCATION);
|
||||
|
||||
if (sourceLocation == null) {
|
||||
throw new IllegalArgumentException(Suggester.LOCATION + " parameter is mandatory for using FileDictionary");
|
||||
}
|
||||
|
||||
String fieldDelimiter = (params.get(FIELD_DELIMITER) != null)
|
||||
? (String) params.get(FIELD_DELIMITER) :
|
||||
FileDictionary.DEFAULT_FIELD_DELIMITER;
|
||||
|
||||
try {
|
||||
return new FileDictionary(new InputStreamReader(
|
||||
core.getResourceLoader().openResource(sourceLocation), IOUtils.CHARSET_UTF_8), fieldDelimiter);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
package org.apache.solr.spelling.suggest;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.search.spell.Dictionary;
|
||||
import org.apache.lucene.search.spell.HighFrequencyDictionary;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.spelling.SolrSpellChecker;
|
||||
|
||||
/**
|
||||
* Factory for {@link HighFrequencyDictionary}
|
||||
*/
|
||||
public class HighFrequencyDictionaryFactory extends DictionaryFactory {
|
||||
/**
|
||||
* Minimum frequency of terms to consider when building the dictionary.
|
||||
*/
|
||||
public static final String THRESHOLD_TOKEN_FREQUENCY = "threshold";
|
||||
|
||||
@Override
|
||||
public Dictionary create(SolrCore core, SolrIndexSearcher searcher) {
|
||||
if(params == null) {
|
||||
// should not happen; implies setParams was not called
|
||||
throw new IllegalStateException("Value of params not set");
|
||||
}
|
||||
String field = (String)params.get(SolrSpellChecker.FIELD);
|
||||
|
||||
if (field == null) {
|
||||
throw new IllegalArgumentException(SolrSpellChecker.FIELD + " is a mandatory parameter");
|
||||
}
|
||||
|
||||
float threshold = params.get(THRESHOLD_TOKEN_FREQUENCY) == null ? 0.0f
|
||||
: (Float)params.get(THRESHOLD_TOKEN_FREQUENCY);
|
||||
|
||||
return new HighFrequencyDictionary(searcher.getIndexReader(), field, threshold);
|
||||
}
|
||||
|
||||
}
|
|
@ -20,11 +20,25 @@ package org.apache.solr.spelling.suggest;
|
|||
import org.apache.lucene.search.suggest.Lookup;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.spelling.suggest.jaspell.JaspellLookupFactory;
|
||||
|
||||
/**
|
||||
* Suggester factory for creating {@link Lookup} instances.
|
||||
*/
|
||||
public abstract class LookupFactory {
|
||||
|
||||
/** Default lookup implementation to use for SolrSuggester */
|
||||
public static String DEFAULT_FILE_BASED_DICT = JaspellLookupFactory.class.getName();
|
||||
|
||||
/**
|
||||
* Create a Lookup using config options in <code>params</code> and
|
||||
* current <code>core</code>
|
||||
*/
|
||||
public abstract Lookup create(NamedList params, SolrCore core);
|
||||
|
||||
/**
|
||||
* <p>Returns the filename in which the in-memory data structure is stored </p>
|
||||
* <b>NOTE:</b> not all {@link Lookup} implementations store in-memory data structures
|
||||
* */
|
||||
public abstract String storeFileName();
|
||||
}
|
||||
|
|
|
@ -0,0 +1,206 @@
|
|||
package org.apache.solr.spelling.suggest;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.search.spell.Dictionary;
|
||||
import org.apache.lucene.search.suggest.Lookup.LookupResult;
|
||||
import org.apache.lucene.search.suggest.Lookup;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* Responsible for loading the lookup and dictionary Implementations specified by
|
||||
* the SolrConfig.
|
||||
* Interacts (query/build/reload) with Lucene Suggesters through {@link Lookup} and
|
||||
* {@link Dictionary}
|
||||
* */
|
||||
public class SolrSuggester {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(SolrSuggester.class);
|
||||
|
||||
/** Name used when an unnamed suggester config is passed */
|
||||
public static final String DEFAULT_DICT_NAME = "default";
|
||||
|
||||
/** Label to identify the name of the suggester */
|
||||
public static final String NAME = "name";
|
||||
|
||||
/** Location of the source data - either a path to a file, or null for the
|
||||
* current IndexReader.
|
||||
* */
|
||||
public static final String LOCATION = "sourceLocation";
|
||||
|
||||
/** Fully-qualified class of the {@link Lookup} implementation. */
|
||||
public static final String LOOKUP_IMPL = "lookupImpl";
|
||||
|
||||
/** Fully-qualified class of the {@link Dictionary} implementation */
|
||||
public static final String DICTIONARY_IMPL = "dictionaryImpl";
|
||||
|
||||
/**
|
||||
* Name of the location where to persist the dictionary. If this location
|
||||
* is relative then the data will be stored under the core's dataDir. If this
|
||||
* is null the storing will be disabled.
|
||||
*/
|
||||
public static final String STORE_DIR = "storeDir";
|
||||
|
||||
static SuggesterResult EMPTY_RESULT = new SuggesterResult();
|
||||
|
||||
private String sourceLocation;
|
||||
private File storeDir;
|
||||
private Dictionary dictionary;
|
||||
private Lookup lookup;
|
||||
private String lookupImpl;
|
||||
private String dictionaryImpl;
|
||||
private String name;
|
||||
|
||||
private LookupFactory factory;
|
||||
private DictionaryFactory dictionaryFactory;
|
||||
|
||||
/**
|
||||
* Uses the <code>config</code> and the <code>core</code> to initialize the underlying
|
||||
* Lucene suggester
|
||||
* */
|
||||
public String init(NamedList<?> config, SolrCore core) {
|
||||
LOG.info("init: " + config);
|
||||
|
||||
// read the config
|
||||
name = config.get(NAME) != null ? (String) config.get(NAME)
|
||||
: DEFAULT_DICT_NAME;
|
||||
sourceLocation = (String) config.get(LOCATION);
|
||||
lookupImpl = (String) config.get(LOOKUP_IMPL);
|
||||
dictionaryImpl = (String) config.get(DICTIONARY_IMPL);
|
||||
String store = (String)config.get(STORE_DIR);
|
||||
|
||||
if (lookupImpl == null) {
|
||||
lookupImpl = LookupFactory.DEFAULT_FILE_BASED_DICT;
|
||||
LOG.info("No " + LOOKUP_IMPL + " parameter was provided falling back to " + lookupImpl);
|
||||
}
|
||||
// initialize appropriate lookup instance
|
||||
factory = core.getResourceLoader().newInstance(lookupImpl, LookupFactory.class);
|
||||
lookup = factory.create(config, core);
|
||||
|
||||
// if store directory is provided make it or load up the lookup with its content
|
||||
if (store != null) {
|
||||
storeDir = new File(store);
|
||||
if (!storeDir.isAbsolute()) {
|
||||
storeDir = new File(core.getDataDir() + File.separator + storeDir);
|
||||
}
|
||||
if (!storeDir.exists()) {
|
||||
storeDir.mkdirs();
|
||||
} else {
|
||||
// attempt reload of the stored lookup
|
||||
try {
|
||||
lookup.load(new FileInputStream(new File(storeDir, factory.storeFileName())));
|
||||
} catch (IOException e) {
|
||||
LOG.warn("Loading stored lookup data failed, possibly not cached yet");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// dictionary configuration
|
||||
if (dictionaryImpl == null) {
|
||||
dictionaryImpl = (sourceLocation == null) ? DictionaryFactory.DEFAULT_INDEX_BASED_DICT :
|
||||
DictionaryFactory.DEFAULT_FILE_BASED_DICT;
|
||||
LOG.info("No " + DICTIONARY_IMPL + " parameter was provided falling back to " + dictionaryImpl);
|
||||
}
|
||||
|
||||
dictionaryFactory = core.getResourceLoader().newInstance(dictionaryImpl, DictionaryFactory.class);
|
||||
dictionaryFactory.setParams(config);
|
||||
LOG.info("Dictionary loaded with params: " + config);
|
||||
|
||||
return name;
|
||||
}
|
||||
|
||||
/** Build the underlying Lucene Suggester */
|
||||
public void build(SolrCore core, SolrIndexSearcher searcher) throws IOException {
|
||||
LOG.info("build()");
|
||||
|
||||
dictionary = dictionaryFactory.create(core, searcher);
|
||||
lookup.build(dictionary);
|
||||
if (storeDir != null) {
|
||||
File target = new File(storeDir, factory.storeFileName());
|
||||
if(!lookup.store(new FileOutputStream(target))) {
|
||||
LOG.error("Store Lookup build failed");
|
||||
} else {
|
||||
LOG.info("Stored suggest data to: " + target.getAbsolutePath());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Reloads the underlying Lucene Suggester */
|
||||
public void reload(SolrCore core, SolrIndexSearcher searcher) throws IOException {
|
||||
LOG.info("reload()");
|
||||
if (dictionary == null && storeDir != null) {
|
||||
// this may be a firstSearcher event, try loading it
|
||||
FileInputStream is = new FileInputStream(new File(storeDir, factory.storeFileName()));
|
||||
try {
|
||||
if (lookup.load(is)) {
|
||||
return; // loaded ok
|
||||
}
|
||||
} finally {
|
||||
IOUtils.closeWhileHandlingException(is);
|
||||
}
|
||||
LOG.debug("load failed, need to build Lookup again");
|
||||
}
|
||||
// loading was unsuccessful - build it again
|
||||
build(core, searcher);
|
||||
}
|
||||
|
||||
/** Returns suggestions based on the {@link SuggesterOptions} passed */
|
||||
public SuggesterResult getSuggestions(SuggesterOptions options) throws IOException {
|
||||
LOG.debug("getSuggestions: " + options.token);
|
||||
if (lookup == null) {
|
||||
LOG.info("Lookup is null - invoke suggest.build first");
|
||||
return EMPTY_RESULT;
|
||||
}
|
||||
|
||||
SuggesterResult res = new SuggesterResult();
|
||||
List<LookupResult> suggestions = lookup.lookup(options.token, false, options.count);
|
||||
res.add(options.token.toString(), suggestions);
|
||||
return res;
|
||||
}
|
||||
|
||||
/** Returns the unique name of the suggester */
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
/** Returns the size of the in-memory data structure used by the underlying lookup implementation */
|
||||
public long sizeInBytes() {
|
||||
return lookup.sizeInBytes();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "SolrSuggester [ name=" + name + ", "
|
||||
+ "sourceLocation=" + sourceLocation + ", "
|
||||
+ "storeDir=" + ((storeDir == null) ? "" : storeDir.getAbsoluteFile()) + ", "
|
||||
+ "lookupImpl=" + lookupImpl + ", "
|
||||
+ "dictionaryImpl=" + dictionaryImpl + ", "
|
||||
+ "sizeInBytes=" + ((lookup!=null) ? String.valueOf(sizeInBytes()) : "0") + " ]";
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
package org.apache.solr.spelling.suggest;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
|
||||
/**
|
||||
* Encapsulates the inputs required to be passed on to
|
||||
* the underlying suggester in {@link SolrSuggester}
|
||||
**/
|
||||
public class SuggesterOptions {
|
||||
|
||||
/** The token to lookup */
|
||||
CharsRef token;
|
||||
|
||||
/** Number of suggestions requested */
|
||||
int count;
|
||||
|
||||
public SuggesterOptions(CharsRef token, int count) {
|
||||
this.token = token;
|
||||
this.count = count;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
package org.apache.solr.spelling.suggest;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
public interface SuggesterParams {
|
||||
public static final String SUGGEST_PREFIX = "suggest.";
|
||||
|
||||
/**
|
||||
* The name of the dictionary to be used for giving the suggestion for a
|
||||
* request. The value for this parameter is configured in solrconfig.xml
|
||||
*/
|
||||
public static final String SUGGEST_DICT = SUGGEST_PREFIX + "dictionary";
|
||||
|
||||
/**
|
||||
* The count of suggestions to return for each query term not in the index and/or dictionary.
|
||||
* <p/>
|
||||
* If this parameter is absent in the request then only one suggestion is
|
||||
* returned. If it is more than one then a maximum of given suggestions are
|
||||
* returned for each token in the query.
|
||||
*/
|
||||
public static final String SUGGEST_COUNT = SUGGEST_PREFIX + "count";
|
||||
|
||||
/**
|
||||
* Use the value for this parameter as the query to spell check.
|
||||
* <p/>
|
||||
* This parameter is <b>optional</b>. If absent, then the q parameter is
|
||||
* used.
|
||||
*/
|
||||
public static final String SUGGEST_Q = SUGGEST_PREFIX + "q";
|
||||
|
||||
/**
|
||||
* Whether to build the index or not. Optional and false by default.
|
||||
*/
|
||||
public static final String SUGGEST_BUILD = SUGGEST_PREFIX + "build";
|
||||
|
||||
/**
|
||||
* Whether to reload the index. Optional and false by default.
|
||||
*/
|
||||
public static final String SUGGEST_RELOAD = SUGGEST_PREFIX + "reload";
|
||||
|
||||
}
|
|
@ -0,0 +1,62 @@
|
|||
package org.apache.solr.spelling.suggest;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.search.suggest.Lookup.LookupResult;
|
||||
|
||||
/**
|
||||
* Encapsulates the results returned by the suggester in {@link SolrSuggester}
|
||||
* */
|
||||
public class SuggesterResult {
|
||||
|
||||
public SuggesterResult() {}
|
||||
|
||||
/** token -> lookup results mapping*/
|
||||
private Map<String, List<LookupResult>> suggestions = new HashMap<String, List<LookupResult>>();
|
||||
|
||||
/** Add suggestion results for <code>token</code> */
|
||||
public void add(String token, List<LookupResult> results) {
|
||||
List<LookupResult> res = this.suggestions.get(token);
|
||||
if (res == null) {
|
||||
res = results;
|
||||
this.suggestions.put(token, res);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a list of lookup result for a given <code>token</code>
|
||||
* null can be returned, if there are no lookup results
|
||||
* for the <code>token</code>
|
||||
* */
|
||||
public List<LookupResult> getLookupResult(String token) {
|
||||
return this.suggestions.get(token);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the set of tokens that are present in the
|
||||
* instance
|
||||
*/
|
||||
public Set<String> getTokens() {
|
||||
return this.suggestions.keySet();
|
||||
}
|
||||
}
|
|
@ -69,6 +69,9 @@ public class AnalyzingInfixLookupFactory extends LookupFactory {
|
|||
throw new IllegalArgumentException("Error in configuration: " + QUERY_ANALYZER + " parameter is mandatory");
|
||||
}
|
||||
FieldType ft = core.getLatestSchema().getFieldTypeByName(fieldTypeName.toString());
|
||||
if (ft == null) {
|
||||
throw new IllegalArgumentException("Error in configuration: " + fieldTypeName.toString() + " is not defined in the schema");
|
||||
}
|
||||
Analyzer indexAnalyzer = ft.getAnalyzer();
|
||||
Analyzer queryAnalyzer = ft.getQueryAnalyzer();
|
||||
|
||||
|
|
|
@ -83,6 +83,10 @@ public class AnalyzingLookupFactory extends LookupFactory {
|
|||
throw new IllegalArgumentException("Error in configuration: " + QUERY_ANALYZER + " parameter is mandatory");
|
||||
}
|
||||
FieldType ft = core.getLatestSchema().getFieldTypeByName(fieldTypeName.toString());
|
||||
if (ft == null) {
|
||||
throw new IllegalArgumentException("Error in configuration: " + fieldTypeName.toString() + " is not defined in the schema");
|
||||
}
|
||||
|
||||
Analyzer indexAnalyzer = ft.getAnalyzer();
|
||||
Analyzer queryAnalyzer = ft.getQueryAnalyzer();
|
||||
|
||||
|
|
|
@ -78,6 +78,9 @@ public class FuzzyLookupFactory extends LookupFactory {
|
|||
}
|
||||
// retrieve index and query analyzers for the field
|
||||
FieldType ft = core.getLatestSchema().getFieldTypeByName(fieldTypeName.toString());
|
||||
if (ft == null) {
|
||||
throw new IllegalArgumentException("Error in configuration: " + fieldTypeName.toString() + " is not defined in the schema");
|
||||
}
|
||||
Analyzer indexAnalyzer = ft.getAnalyzer();
|
||||
Analyzer queryAnalyzer = ft.getQueryAnalyzer();
|
||||
|
||||
|
|
|
@ -53,6 +53,7 @@
|
|||
<fields>
|
||||
<field name="id" type="int" indexed="true" stored="true" multiValued="false" required="false"/>
|
||||
<field name="text" type="text" indexed="true" stored="false"/>
|
||||
<field name="stext" type="text" indexed="true" stored="true"/>
|
||||
</fields>
|
||||
|
||||
<defaultSearchField>text</defaultSearchField>
|
||||
|
|
|
@ -83,6 +83,42 @@
|
|||
<str name="queryAnalyzerFieldType">phrase_suggest</str>
|
||||
</searchComponent>
|
||||
|
||||
|
||||
<!-- FuzzyLookup suggest component with HighFrequencyDictionary -->
|
||||
<searchComponent class="solr.SuggestComponent" name="fuzzy_suggest_analyzing_with_high_freq_dict">
|
||||
<lst name="suggester">
|
||||
<str name="name">fuzzy_suggest_analyzing_with_high_freq_dict</str>
|
||||
<str name="lookupImpl">org.apache.solr.spelling.suggest.fst.FuzzyLookupFactory</str>
|
||||
<str name="dictionaryImpl">org.apache.solr.spelling.suggest.HighFrequencyDictionaryFactory</str>
|
||||
<str name="storeDir">fuzzy_suggest_analyzing</str>
|
||||
<str name="buildOnCommit">false</str>
|
||||
|
||||
<!-- Suggester properties -->
|
||||
<bool name="exactMatchFirst">true</bool>
|
||||
<str name="suggestAnalyzerFieldType">text</str>
|
||||
<bool name="preserveSep">false</bool>
|
||||
<str name="field">stext</str>
|
||||
</lst>
|
||||
|
||||
</searchComponent>
|
||||
|
||||
<!-- FuzzyLookup suggest component with FileDictionaryFactory -->
|
||||
<searchComponent class="solr.SuggestComponent" name="fuzzy_suggest_analyzing_with_file_dict">
|
||||
<lst name="suggester">
|
||||
<str name="name">fuzzy_suggest_analyzing_with_file_dict</str>
|
||||
<str name="lookupImpl">org.apache.solr.spelling.suggest.fst.FuzzyLookupFactory</str>
|
||||
<str name="dictionaryImpl">org.apache.solr.spelling.suggest.FileDictionaryFactory</str>
|
||||
<str name="storeDir">fuzzy_suggest_analyzing</str>
|
||||
<str name="buildOnCommit">false</str>
|
||||
|
||||
<!-- Suggester properties -->
|
||||
<bool name="exactMatchFirst">true</bool>
|
||||
<str name="suggestAnalyzerFieldType">text</str>
|
||||
<bool name="preserveSep">false</bool>
|
||||
|
||||
<str name="sourceLocation">fuzzysuggest.txt</str>
|
||||
</lst>
|
||||
</searchComponent>
|
||||
<!-- FuzzyLookup suggest component (default)-->
|
||||
<searchComponent class="solr.SpellCheckComponent" name="fuzzy_suggest_analyzing">
|
||||
<lst name="spellchecker">
|
||||
|
@ -215,6 +251,26 @@
|
|||
</arr>
|
||||
</requestHandler>
|
||||
|
||||
<requestHandler class="org.apache.solr.handler.component.SearchHandler" name="/fuzzy_suggest_analyzing_with_file_dict">
|
||||
<lst name="defaults">
|
||||
<str name="suggest">true</str>
|
||||
<str name="suggest.dictionary">fuzzy_suggest_analyzing_with_file_dict</str>
|
||||
</lst>
|
||||
<arr name="components">
|
||||
<str>fuzzy_suggest_analyzing_with_file_dict</str>
|
||||
</arr>
|
||||
</requestHandler>
|
||||
|
||||
<requestHandler class="org.apache.solr.handler.component.SearchHandler" name="/fuzzy_suggest_analyzing_with_high_freq_dict">
|
||||
<lst name="defaults">
|
||||
<str name="suggest">true</str>
|
||||
<str name="suggest.dictionary">fuzzy_suggest_analyzing_with_high_freq_dict</str>
|
||||
</lst>
|
||||
<arr name="components">
|
||||
<str>fuzzy_suggest_analyzing_with_high_freq_dict</str>
|
||||
</arr>
|
||||
</requestHandler>
|
||||
|
||||
<requestHandler class="org.apache.solr.handler.component.SearchHandler" name="/fuzzy_suggest_analyzing">
|
||||
<lst name="defaults">
|
||||
<str name="spellcheck">true</str>
|
||||
|
|
|
@ -0,0 +1,95 @@
|
|||
<?xml version="1.0" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<config>
|
||||
<xi:include href="solrconfig.snippet.randomindexconfig.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
|
||||
<luceneMatchVersion>${tests.luceneMatchVersion:LUCENE_CURRENT}</luceneMatchVersion>
|
||||
<!-- The DirectoryFactory to use for indexes.
|
||||
solr.StandardDirectoryFactory, the default, is filesystem based.
|
||||
solr.RAMDirectoryFactory is memory based and not persistent. -->
|
||||
<dataDir>${solr.data.dir:}</dataDir>
|
||||
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
|
||||
|
||||
<updateHandler class="solr.DirectUpdateHandler2"/>
|
||||
|
||||
<requestHandler name="standard" class="solr.StandardRequestHandler" />
|
||||
|
||||
<searchComponent class="solr.SuggestComponent" name="suggest">
|
||||
|
||||
<!-- Suggest component (default index based dictionary) -->
|
||||
<lst name="suggester">
|
||||
<str name="name">suggest_fuzzy_with_high_freq_dict</str>
|
||||
<str name="lookupImpl">FuzzyLookupFactory</str>
|
||||
<str name="field">cat</str>
|
||||
<str name="storeDir">suggest_fuzzy_with_high_freq_dict</str>
|
||||
<str name="suggestAnalyzerFieldType">text</str>
|
||||
<str name="buildOnCommit">true</str>
|
||||
|
||||
<!-- Suggester properties -->
|
||||
<float name="threshold">0.0</float>
|
||||
</lst>
|
||||
|
||||
<!-- Suggest component (default file based dictionary) -->
|
||||
<lst name="suggester">
|
||||
<str name="name">suggest_fuzzy_file_based</str>
|
||||
<str name="lookupImpl">FuzzyLookupFactory</str>
|
||||
<str name="sourceLocation">fuzzysuggest.txt</str>
|
||||
<str name="storeDir">suggest_fuzzy_file_based</str>
|
||||
<str name="suggestAnalyzerFieldType">text</str>
|
||||
<str name="buildOnCommit">true</str>
|
||||
</lst>
|
||||
|
||||
<!-- Suggest component (Document Dictionary) -->
|
||||
<lst name="suggester">
|
||||
<str name="name">suggest_fuzzy_doc_dict</str>
|
||||
<str name="lookupImpl">FuzzyLookupFactory</str>
|
||||
<str name="dictionaryImpl">DocumentDictionaryFactory</str>
|
||||
<str name="field">cat</str>
|
||||
<str name="weightField">price</str>
|
||||
<str name="storeDir">suggest_fuzzy_doc_dict_payload</str>
|
||||
<str name="suggestAnalyzerFieldType">text</str>
|
||||
<str name="buildOnCommit">true</str>
|
||||
</lst>
|
||||
|
||||
<!-- Suggest component (Document Expression Dictionary) -->
|
||||
<lst name="suggester">
|
||||
<str name="name">suggest_fuzzy_doc_expr_dict</str>
|
||||
<str name="dictionaryImpl">DocumentExpressionDictionaryFactory</str>
|
||||
<str name="lookupImpl">FuzzyLookupFactory</str>
|
||||
<str name="field">cat</str>
|
||||
<str name="weightExpression">((price * 2) + weight)</str>
|
||||
<str name="sortField">weight</str>
|
||||
<str name="sortField">price</str>
|
||||
<str name="storeDir">suggest_fuzzy_doc_expr_dict</str>
|
||||
<str name="suggestAnalyzerFieldType">text</str>
|
||||
<str name="buildOnCommit">true</str>
|
||||
</lst>
|
||||
</searchComponent>
|
||||
|
||||
<requestHandler name="/suggest" class="org.apache.solr.handler.component.SearchHandler">
|
||||
<lst name="defaults">
|
||||
<str name="suggest">true</str>
|
||||
</lst>
|
||||
<arr name="components">
|
||||
<str>suggest</str>
|
||||
</arr>
|
||||
</requestHandler>
|
||||
|
||||
<requestHandler name="/update" class="solr.UpdateRequestHandler" />
|
||||
|
||||
</config>
|
|
@ -340,6 +340,8 @@
|
|||
<str>termsComp</str>
|
||||
</arr>
|
||||
</requestHandler>
|
||||
|
||||
|
||||
<!--
|
||||
The SpellingQueryConverter to convert raw (CommonParams.Q) queries into tokens. Uses a simple regular expression
|
||||
to strip off field markup, boosts, ranges, etc. but it is not guaranteed to match an exact parse from the query parser.
|
||||
|
|
|
@ -0,0 +1,141 @@
|
|||
package org.apache.solr.handler.component;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import junit.framework.Assert;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase.Slow;
|
||||
import org.apache.solr.BaseDistributedSearchTestCase;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.spelling.suggest.SuggesterParams;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
/**
|
||||
* Test for SuggestComponent's distributed querying
|
||||
*
|
||||
* @see org.apache.solr.handler.component.SuggestComponent
|
||||
*/
|
||||
@Slow
|
||||
public class DistributedSuggesterComponentTest extends BaseDistributedSearchTestCase {
|
||||
|
||||
public DistributedSuggesterComponentTest() {
|
||||
//Helpful for debugging
|
||||
//fixShardCount=true;
|
||||
//shardCount=2;
|
||||
//stress=0;
|
||||
//deadServers=null;
|
||||
configString = "solrconfig-suggestercomponent.xml";
|
||||
}
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
useFactory(null); // need an FS factory
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void validateControlData(QueryResponse control) throws Exception
|
||||
{
|
||||
NamedList<Object> nl = control.getResponse();
|
||||
@SuppressWarnings("unchecked")
|
||||
NamedList<Object> sc = (NamedList<Object>) nl.get("suggest");
|
||||
@SuppressWarnings("unchecked")
|
||||
NamedList<Object> sug = (NamedList<Object>) sc.get("suggestions");
|
||||
if(sug.size()==0) {
|
||||
Assert.fail("Control data did not return any suggestions.");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void doTest() throws Exception {
|
||||
del("*:*");
|
||||
index(id, "1", "cat", "This is another title", "price", "10", "weight", "10");
|
||||
index(id, "2", "cat", "Yet another", "price", "15", "weight", "10");
|
||||
index(id, "3", "cat", "Yet another title", "price", "20", "weight", "20");
|
||||
index(id, "4", "cat", "suggestions for suggest", "price", "25", "weight", "20");
|
||||
index(id, "5", "cat", "Red fox", "price", "30", "weight", "20");
|
||||
index(id, "6", "cat", "Rad fox", "price", "35", "weight", "30");
|
||||
index(id, "7", "cat", "example data", "price", "40", "weight", "30");
|
||||
index(id, "8", "cat", "example inputdata", "price", "45", "weight", "30");
|
||||
index(id, "9", "cat", "blah in blah", "price", "50", "weight", "40");
|
||||
index(id, "10", "cat", "another blah in blah", "price", "55", "weight", "40");
|
||||
commit();
|
||||
|
||||
handle.clear();
|
||||
handle.put("QTime", SKIPVAL);
|
||||
handle.put("timestamp", SKIPVAL);
|
||||
handle.put("maxScore", SKIPVAL);
|
||||
handle.put("response", SKIP);
|
||||
|
||||
String requestHandlerName = "/suggest";
|
||||
String docDictName = "suggest_fuzzy_doc_dict";
|
||||
String docExprDictName = "suggest_fuzzy_doc_expr_dict";
|
||||
|
||||
//Shortcut names
|
||||
String build = SuggesterParams.SUGGEST_BUILD;
|
||||
String count = SuggesterParams.SUGGEST_COUNT;
|
||||
String dictionaryName = SuggesterParams.SUGGEST_DICT;
|
||||
|
||||
//Build the suggest dictionary
|
||||
query(buildRequest("", true, requestHandlerName, build, "true", dictionaryName, docDictName));
|
||||
query(buildRequest("", true, requestHandlerName, build, "true", dictionaryName, docExprDictName));
|
||||
|
||||
//Test Basic Functionality
|
||||
query(buildRequest("exampel", false, requestHandlerName, dictionaryName, docDictName, count, "2"));
|
||||
query(buildRequest("Yet", false, requestHandlerName, dictionaryName, docExprDictName, count, "2"));
|
||||
query(buildRequest("blah", true, requestHandlerName, dictionaryName, docExprDictName, count, "2"));
|
||||
query(buildRequest("blah", true, requestHandlerName, dictionaryName, docDictName, count, "2"));
|
||||
|
||||
}
|
||||
private Object[] buildRequest(String q, boolean useSuggestQ, String handlerName, String... addlParams) {
|
||||
List<Object> params = new ArrayList<Object>();
|
||||
|
||||
if(useSuggestQ) {
|
||||
params.add("suggest.q");
|
||||
} else {
|
||||
params.add("q");
|
||||
}
|
||||
params.add(q);
|
||||
|
||||
params.add("qt");
|
||||
params.add(handlerName);
|
||||
|
||||
params.add("shards.qt");
|
||||
params.add(handlerName);
|
||||
|
||||
if(addlParams!=null) {
|
||||
params.addAll(Arrays.asList(addlParams));
|
||||
}
|
||||
return params.toArray(new Object[params.size()]);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,118 @@
|
|||
package org.apache.solr.handler.component;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.spelling.suggest.SuggesterParams;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
|
||||
public class SuggestComponentTest extends SolrTestCaseJ4 {
|
||||
static String rh = "/suggest";
|
||||
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
initCore("solrconfig-suggestercomponent.xml","schema.xml");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
|
||||
// id, cat, price, weight
|
||||
assertU(adoc("id", "0", "cat", "This is a title", "price", "5", "weight", "10"));
|
||||
assertU(adoc("id", "1", "cat", "This is another title", "price", "10", "weight", "10"));
|
||||
assertU(adoc("id", "2", "cat", "Yet another", "price", "15", "weight", "10"));
|
||||
assertU(adoc("id", "3", "cat", "Yet another title", "price", "20", "weight", "20"));
|
||||
assertU(adoc("id", "4", "cat", "suggestions for suggest", "price", "25", "weight", "20"));
|
||||
assertU(adoc("id", "5", "cat", "Red fox", "price", "30", "weight", "20"));
|
||||
assertU(adoc("id", "6", "cat", "Rad fox", "price", "35", "weight", "30"));
|
||||
assertU(adoc("id", "7", "cat", "example data", "price", "40", "weight", "30"));
|
||||
assertU(adoc("id", "8", "cat", "example inputdata", "price", "45", "weight", "30"));
|
||||
assertU(adoc("id", "9", "cat", "blah in blah", "price", "50", "weight", "40"));
|
||||
assertU(adoc("id", "10", "cat", "another blah in blah", "price", "55", "weight", "40"));
|
||||
assertU((commit()));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
super.tearDown();
|
||||
assertU(delQ("*:*"));
|
||||
optimize();
|
||||
assertU((commit()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDocumentBased() throws Exception {
|
||||
assertQ(req("qt", rh,
|
||||
SuggesterParams.SUGGEST_DICT, "suggest_fuzzy_doc_dict",
|
||||
SuggesterParams.SUGGEST_BUILD, "true",
|
||||
SuggesterParams.SUGGEST_Q, "exampel",
|
||||
SuggesterParams.SUGGEST_COUNT, "2"),
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='exampel']/int[@name='numFound'][.='2']",
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='exampel']/lst[@name='suggestion'][1]/str[@name='term'][.='example inputdata']",
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='exampel']/lst[@name='suggestion'][1]/long[@name='weight'][.='45']",
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='exampel']/lst[@name='suggestion'][2]/str[@name='term'][.='example data']",
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='exampel']/lst[@name='suggestion'][2]/long[@name='weight'][.='40']"
|
||||
);
|
||||
|
||||
assertQ(req("qt", rh,
|
||||
SuggesterParams.SUGGEST_DICT, "suggest_fuzzy_doc_dict",
|
||||
SuggesterParams.SUGGEST_BUILD, "true",
|
||||
SuggesterParams.SUGGEST_Q, "Rad",
|
||||
SuggesterParams.SUGGEST_COUNT, "2"),
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='Rad']/int[@name='numFound'][.='2']",
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='Rad']/lst[@name='suggestion'][1]/str[@name='term'][.='Rad fox']",
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='Rad']/lst[@name='suggestion'][1]/long[@name='weight'][.='35']",
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='Rad']/lst[@name='suggestion'][2]/str[@name='term'][.='Red fox']",
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='Rad']/lst[@name='suggestion'][2]/long[@name='weight'][.='30']"
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExpressionBased() throws Exception {
|
||||
assertQ(req("qt", rh,
|
||||
SuggesterParams.SUGGEST_DICT, "suggest_fuzzy_doc_expr_dict",
|
||||
SuggesterParams.SUGGEST_BUILD, "true",
|
||||
SuggesterParams.SUGGEST_Q, "exampel",
|
||||
SuggesterParams.SUGGEST_COUNT, "2"),
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='exampel']/int[@name='numFound'][.='2']",
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='exampel']/lst[@name='suggestion'][1]/str[@name='term'][.='example inputdata']",
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='exampel']/lst[@name='suggestion'][1]/long[@name='weight'][.='120']",
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='exampel']/lst[@name='suggestion'][2]/str[@name='term'][.='example data']",
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='exampel']/lst[@name='suggestion'][2]/long[@name='weight'][.='110']"
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFileBased() throws Exception {
|
||||
assertQ(req("qt", rh,
|
||||
SuggesterParams.SUGGEST_DICT, "suggest_fuzzy_file_based",
|
||||
SuggesterParams.SUGGEST_BUILD, "true",
|
||||
SuggesterParams.SUGGEST_Q, "chn",
|
||||
SuggesterParams.SUGGEST_COUNT, "2"),
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='chn']/int[@name='numFound'][.='2']",
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='chn']/lst[@name='suggestion'][1]/str[@name='term'][.='chance']",
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='chn']/lst[@name='suggestion'][1]/long[@name='weight'][.='1']",
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='chn']/lst[@name='suggestion'][2]/str[@name='term'][.='change']",
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='chn']/lst[@name='suggestion'][2]/long[@name='weight'][.='1']"
|
||||
);
|
||||
}
|
||||
}
|
|
@ -116,6 +116,7 @@ public class SuggesterTest extends SolrTestCaseJ4 {
|
|||
public void testAnalyzer() throws Exception {
|
||||
Suggester suggester = new Suggester();
|
||||
NamedList params = new NamedList();
|
||||
params.add("field", "test_field");
|
||||
params.add("lookupImpl", "org.apache.solr.spelling.suggest.tst.TSTLookupFactory");
|
||||
suggester.init(params, h.getCore());
|
||||
assertTrue(suggester.getQueryAnalyzer() != null);
|
||||
|
|
|
@ -0,0 +1,60 @@
|
|||
package org.apache.solr.spelling.suggest;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
public class TestFileDictionaryLookup extends SolrTestCaseJ4 {
|
||||
static final String REQUEST_URI = "/fuzzy_suggest_analyzing_with_file_dict";
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
initCore("solrconfig-phrasesuggest.xml","schema-phrasesuggest.xml");
|
||||
assertQ(req("qt", REQUEST_URI, "q", "", SuggesterParams.SUGGEST_BUILD, "true"));
|
||||
}
|
||||
|
||||
public void testDefault() throws Exception {
|
||||
|
||||
// tests to demonstrate default maxEdit parameter (value: 1), control for testWithMaxEdit2
|
||||
assertQ(req("qt", REQUEST_URI, "q", "chagn", SuggesterParams.SUGGEST_COUNT, "3"),
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='chagn']/int[@name='numFound'][.='2']",
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='chagn']/lst[@name='suggestion'][1]/str[@name='term'][.='chance']",
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='chagn']/lst[@name='suggestion'][2]/str[@name='term'][.='change']"
|
||||
);
|
||||
|
||||
assertQ(req("qt", REQUEST_URI, "q", "chacn", SuggesterParams.SUGGEST_COUNT, "3"),
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='chacn']/int[@name='numFound'][.='2']",
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='chacn']/lst[@name='suggestion'][1]/str[@name='term'][.='chance']",
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='chacn']/lst[@name='suggestion'][2]/str[@name='term'][.='change']"
|
||||
);
|
||||
|
||||
assertQ(req("qt", REQUEST_URI, "q", "chagr", SuggesterParams.SUGGEST_COUNT, "3"),
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='chagr']/int[@name='numFound'][.='1']",
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='chagr']/lst[@name='suggestion'][1]/str[@name='term'][.='charge']"
|
||||
);
|
||||
|
||||
assertQ(req("qt", REQUEST_URI, "q", "chanr", SuggesterParams.SUGGEST_COUNT, "3"),
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='chanr']/int[@name='numFound'][.='3']"
|
||||
);
|
||||
|
||||
assertQ(req("qt", REQUEST_URI, "q", "cyhnce", SuggesterParams.SUGGEST_COUNT, "3"),
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='cyhnce']/int[@name='numFound'][.='0']"
|
||||
);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,74 @@
|
|||
package org.apache.solr.spelling.suggest;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
public class TestHighFrequencyDictionaryFactory extends SolrTestCaseJ4 {
|
||||
|
||||
static final String REQUEST_URI = "/fuzzy_suggest_analyzing_with_high_freq_dict";
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
initCore("solrconfig-phrasesuggest.xml","schema-phrasesuggest.xml");
|
||||
// Suggestions text include : change, charge, chance
|
||||
assertU(adoc("id", "9999991",
|
||||
"text", "true",
|
||||
"stext", "change"));
|
||||
assertU(adoc("id", "9999992",
|
||||
"text", "true",
|
||||
"stext", "charge"));
|
||||
assertU(adoc("id", "9999992",
|
||||
"text", "true",
|
||||
"stext", "chance"));
|
||||
|
||||
assertU(commit());
|
||||
assertQ(req("qt", REQUEST_URI, "q", "", SuggesterParams.SUGGEST_BUILD, "true"));
|
||||
}
|
||||
|
||||
|
||||
public void testDefault() throws Exception {
|
||||
|
||||
// tests to demonstrate default maxEdit parameter (value: 1), control for testWithMaxEdit2
|
||||
assertQ(req("qt", REQUEST_URI, "q", "chagn", SuggesterParams.SUGGEST_COUNT, "3"),
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='chagn']/int[@name='numFound'][.='2']",
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='chagn']/lst[@name='suggestion'][1]/str[@name='term'][.='chance']",
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='chagn']/lst[@name='suggestion'][2]/str[@name='term'][.='change']"
|
||||
);
|
||||
|
||||
assertQ(req("qt", REQUEST_URI, "q", "chacn", SuggesterParams.SUGGEST_COUNT, "3"),
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='chacn']/int[@name='numFound'][.='2']",
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='chacn']/lst[@name='suggestion'][1]/str[@name='term'][.='chance']",
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='chacn']/lst[@name='suggestion'][2]/str[@name='term'][.='change']"
|
||||
);
|
||||
|
||||
assertQ(req("qt", REQUEST_URI, "q", "chagr", SuggesterParams.SUGGEST_COUNT, "3"),
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='chagr']/int[@name='numFound'][.='1']",
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='chagr']/lst[@name='suggestion'][1]/str[@name='term'][.='charge']"
|
||||
);
|
||||
|
||||
assertQ(req("qt", REQUEST_URI, "q", "chanr", SuggesterParams.SUGGEST_COUNT, "3"),
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='chanr']/int[@name='numFound'][.='3']"
|
||||
);
|
||||
|
||||
assertQ(req("qt", REQUEST_URI, "q", "cyhnce", SuggesterParams.SUGGEST_COUNT, "3"),
|
||||
"//lst[@name='suggest']/lst[@name='suggestions']/lst[@name='cyhnce']/int[@name='numFound'][.='0']"
|
||||
);
|
||||
}
|
||||
}
|
|
@ -21,8 +21,7 @@
|
|||
<field name="manu">Dell, Inc.</field>
|
||||
<!-- Join -->
|
||||
<field name="manu_id_s">dell</field>
|
||||
<field name="cat">electronics</field>
|
||||
<field name="cat">monitor</field>
|
||||
<field name="cat">electronics and computer1</field>
|
||||
<field name="features">30" TFT active matrix LCD, 2560 x 1600, .25mm dot pitch, 700:1 contrast</field>
|
||||
<field name="includes">USB cable</field>
|
||||
<field name="weight">401.6</field>
|
||||
|
|
|
@ -21,8 +21,7 @@
|
|||
<field name="manu">ViewSonic Corp.</field>
|
||||
<!-- Join -->
|
||||
<field name="manu_id_s">viewsonic</field>
|
||||
<field name="cat">electronics</field>
|
||||
<field name="cat">monitor</field>
|
||||
<field name="cat">electronics and stuff2</field>
|
||||
<field name="features">19" TFT active matrix LCD, 8ms response time, 1280 x 1024 native resolution</field>
|
||||
<field name="weight">190.4</field>
|
||||
<field name="price">279.95</field>
|
||||
|
|
|
@ -1358,6 +1358,26 @@
|
|||
</arr>
|
||||
</requestHandler>
|
||||
|
||||
<searchComponent name="suggest" class="solr.SuggestComponent">
|
||||
<lst name="suggester">
|
||||
<str name="name">mySuggester</str>
|
||||
<str name="lookupImpl">FuzzyLookupFactory</str> <!-- org.apache.solr.spelling.suggest.fst -->
|
||||
<str name="dictionaryImpl">DocumentDictionaryFactory</str> <!-- org.apache.solr.spelling.suggest.HighFrequencyDictionaryFactory -->
|
||||
<str name="field">cat</str>
|
||||
<str name="weightField">price</str>
|
||||
<str name="suggestAnalyzerFieldType">string</str>
|
||||
</lst>
|
||||
</searchComponent>
|
||||
|
||||
<requestHandler name="/suggest" class="solr.SearchHandler" startup="lazy">
|
||||
<lst name="defaults">
|
||||
<str name="suggest">true</str>
|
||||
<str name="suggest.count">10</str>
|
||||
</lst>
|
||||
<arr name="components">
|
||||
<str>suggest</str>
|
||||
</arr>
|
||||
</requestHandler>
|
||||
<!-- Term Vector Component
|
||||
|
||||
http://wiki.apache.org/solr/TermVectorComponent
|
||||
|
|
Loading…
Reference in New Issue