mirror of https://github.com/apache/lucene.git
SOLR-2282: add distributed support to search results clustering
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1051715 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1d9135f0e4
commit
2c859ba49b
|
@ -6,7 +6,7 @@ See http://wiki.apache.org/solr/ClusteringComponent
|
|||
|
||||
CHANGES
|
||||
|
||||
$Id:$
|
||||
$Id$
|
||||
|
||||
================== Release XXXX ==================
|
||||
|
||||
|
@ -17,6 +17,9 @@ $Id:$
|
|||
* SOLR-1804: Re-enabled clustering on trunk, updated to latest version of Carrot2. No more LGPL run-time dependencies.
|
||||
This release of C2 also does not have a specific Lucene dependency. (Stanislaw Osinski, gsingers)
|
||||
|
||||
* SOLR-2282: Add distributed search support for search result clustering.
|
||||
(Brad Giaccio, koji)
|
||||
|
||||
================== Release 1.4.0 ==================
|
||||
|
||||
Solr Clustering will be released for the first time in Solr 1.4. See http://wiki.apache.org/solr/ClusteringComponent
|
||||
|
|
|
@ -44,6 +44,10 @@
|
|||
<pathelement location="${common-solr.dir}/build/tests"/> <!-- include solr test code -->
|
||||
<pathelement location="${common-solr.dir}/../lucene/build/classes/test" /> <!-- include some lucene test code -->
|
||||
<path refid="common.classpath"/>
|
||||
<!-- DistributedClusteringComponentTest uses Jetty -->
|
||||
<fileset dir="${solr-path}/example/lib">
|
||||
<include name="**/*.jar" />
|
||||
</fileset>
|
||||
</path>
|
||||
|
||||
<target name="clean">
|
||||
|
|
|
@ -16,14 +16,22 @@ package org.apache.solr.handler.clustering;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.SolrResourceLoader;
|
||||
import org.apache.solr.handler.clustering.carrot2.CarrotClusteringEngine;
|
||||
import org.apache.solr.handler.clustering.carrot2.CarrotParams;
|
||||
import org.apache.solr.handler.component.ResponseBuilder;
|
||||
import org.apache.solr.handler.component.SearchComponent;
|
||||
import org.apache.solr.handler.component.ShardRequest;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.search.DocListAndSet;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.util.SolrPluginUtils;
|
||||
import org.apache.solr.util.plugin.SolrCoreAware;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
@ -31,7 +39,9 @@ import org.slf4j.LoggerFactory;
|
|||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -53,7 +63,7 @@ public class ClusteringComponent extends SearchComponent implements SolrCoreAwar
|
|||
public static final String COMPONENT_NAME = "clustering";
|
||||
private NamedList initParams;
|
||||
|
||||
|
||||
@Override
|
||||
public void prepare(ResponseBuilder rb) throws IOException {
|
||||
SolrParams params = rb.req.getParams();
|
||||
if (!params.getBool(COMPONENT_NAME, false)) {
|
||||
|
@ -61,18 +71,21 @@ public class ClusteringComponent extends SearchComponent implements SolrCoreAwar
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void process(ResponseBuilder rb) throws IOException {
|
||||
SolrParams params = rb.req.getParams();
|
||||
if (!params.getBool(COMPONENT_NAME, false)) {
|
||||
return;
|
||||
}
|
||||
String name = params.get(ClusteringParams.ENGINE_NAME, ClusteringEngine.DEFAULT_ENGINE_NAME);
|
||||
String name = getClusteringEngineName(rb);
|
||||
boolean useResults = params.getBool(ClusteringParams.USE_SEARCH_RESULTS, false);
|
||||
if (useResults == true) {
|
||||
SearchClusteringEngine engine = searchClusteringEngines.get(name);
|
||||
SearchClusteringEngine engine = getSearchClusteringEngine(rb);
|
||||
if (engine != null) {
|
||||
DocListAndSet results = rb.getResults();
|
||||
Object clusters = engine.cluster(rb.getQuery(), results.docList, rb.req);
|
||||
Map<SolrDocument,Integer> docIds = new HashMap<SolrDocument, Integer>(results.docList.size());
|
||||
SolrDocumentList solrDocList = engine.getSolrDocumentList(results.docList, rb.req, docIds);
|
||||
Object clusters = engine.cluster(rb.getQuery(), solrDocList, docIds, rb.req);
|
||||
rb.rsp.add("clusters", clusters);
|
||||
} else {
|
||||
log.warn("No engine for: " + name);
|
||||
|
@ -97,6 +110,72 @@ public class ClusteringComponent extends SearchComponent implements SolrCoreAwar
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
private SearchClusteringEngine getSearchClusteringEngine(ResponseBuilder rb){
|
||||
return searchClusteringEngines.get(getClusteringEngineName(rb));
|
||||
}
|
||||
|
||||
private String getClusteringEngineName(ResponseBuilder rb){
|
||||
return rb.req.getParams().get(ClusteringParams.ENGINE_NAME, ClusteringEngine.DEFAULT_ENGINE_NAME);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void modifyRequest(ResponseBuilder rb, SearchComponent who, ShardRequest sreq) {
|
||||
SolrParams params = rb.req.getParams();
|
||||
if (!params.getBool(COMPONENT_NAME, false) || !params.getBool(ClusteringParams.USE_SEARCH_RESULTS, false)) {
|
||||
return;
|
||||
}
|
||||
sreq.params.remove(COMPONENT_NAME);
|
||||
if( ( sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS ) != 0 ){
|
||||
String fl = sreq.params.get(CommonParams.FL,"*");
|
||||
// if fl=* then we don't need check
|
||||
if( fl.indexOf( '*' ) >= 0 ) return;
|
||||
Set<String> fields = getSearchClusteringEngine(rb).getFieldsToLoad(rb.req);
|
||||
if( fields == null || fields.size() == 0 ) return;
|
||||
StringBuilder sb = new StringBuilder();
|
||||
String[] flparams = fl.split( "[,\\s]+" );
|
||||
Set<String> flParamSet = new HashSet<String>(flparams.length);
|
||||
for( String flparam : flparams ){
|
||||
// no need trim() because of split() by \s+
|
||||
flParamSet.add(flparam);
|
||||
}
|
||||
for( String aFieldToLoad : fields ){
|
||||
if( !flParamSet.contains( aFieldToLoad ) ){
|
||||
sb.append( ',' ).append( aFieldToLoad );
|
||||
}
|
||||
}
|
||||
if( sb.length() > 0 ){
|
||||
sreq.params.set( CommonParams.FL, fl + sb.toString() );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finishStage(ResponseBuilder rb) {
|
||||
SolrParams params = rb.req.getParams();
|
||||
if (!params.getBool(COMPONENT_NAME, false) || !params.getBool(ClusteringParams.USE_SEARCH_RESULTS, false)) {
|
||||
return;
|
||||
}
|
||||
if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
|
||||
SearchClusteringEngine engine = getSearchClusteringEngine(rb);
|
||||
if (engine != null) {
|
||||
SolrDocumentList solrDocList = (SolrDocumentList)rb.rsp.getValues().get("response");
|
||||
// TODO: Currently, docIds is set to null in distributed environment.
|
||||
// This causes CarrotParams.PRODUCE_SUMMARY doesn't work.
|
||||
// To work CarrotParams.PRODUCE_SUMMARY under distributed mode, we can choose either one of:
|
||||
// (a) In each shard, ClusteringComponent produces summary and finishStage()
|
||||
// merges these summaries.
|
||||
// (b) Adding doHighlighting(SolrDocumentList, ...) method to SolrHighlighter and
|
||||
// making SolrHighlighter uses "external text" rather than stored values to produce snippets.
|
||||
Map<SolrDocument,Integer> docIds = null;
|
||||
Object clusters = engine.cluster(rb.getQuery(), solrDocList, docIds, rb.req);
|
||||
rb.rsp.add("clusters", clusters);
|
||||
} else {
|
||||
String name = getClusteringEngineName(rb);
|
||||
log.warn("No engine for: " + name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
|
@ -174,17 +253,17 @@ public class ClusteringComponent extends SearchComponent implements SolrCoreAwar
|
|||
|
||||
@Override
|
||||
public String getVersion() {
|
||||
return "$Revision:$";
|
||||
return "$Revision$";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getSourceId() {
|
||||
return "$Id:$";
|
||||
return "$Id$";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getSource() {
|
||||
return "$URL:$";
|
||||
return "$URL$";
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -16,12 +16,16 @@ package org.apache.solr.handler.clustering;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.search.DocList;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.search.DocList;
|
||||
import org.apache.solr.util.SolrPluginUtils;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -30,8 +34,27 @@ import org.apache.lucene.search.Query;
|
|||
**/
|
||||
public abstract class SearchClusteringEngine extends ClusteringEngine {
|
||||
|
||||
|
||||
@Deprecated
|
||||
public abstract Object cluster(Query query, DocList docList, SolrQueryRequest sreq);
|
||||
|
||||
// TODO: need DocList, too?
|
||||
public abstract Object cluster(Query query, SolrDocumentList solrDocumentList,
|
||||
Map<SolrDocument,Integer> docIds, SolrQueryRequest sreq);
|
||||
|
||||
/**
|
||||
* Returns the set of field names to load.
|
||||
* Concrete classes can override this method if needed.
|
||||
* Default implementation returns null, that is, all stored fields are loaded.
|
||||
* @param sreq
|
||||
* @return set of field names to load
|
||||
*/
|
||||
protected Set<String> getFieldsToLoad(SolrQueryRequest sreq){
|
||||
return null;
|
||||
}
|
||||
|
||||
public SolrDocumentList getSolrDocumentList(DocList docList, SolrQueryRequest sreq,
|
||||
Map<SolrDocument, Integer> docIds) throws IOException{
|
||||
return SolrPluginUtils.docListToSolrDocumentList(
|
||||
docList, sreq.getSearcher(), getFieldsToLoad(sreq), docIds);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,25 +18,38 @@ package org.apache.solr.handler.clustering.carrot2;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.lucene.document.FieldSelector;
|
||||
import org.apache.lucene.document.SetBasedFieldSelector;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.params.HighlightParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.handler.clustering.SearchClusteringEngine;
|
||||
import org.apache.solr.highlight.SolrHighlighter;
|
||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.search.*;
|
||||
import org.apache.solr.util.RefCounted;
|
||||
import org.carrot2.core.*;
|
||||
import org.apache.solr.search.DocList;
|
||||
import org.apache.solr.search.DocSlice;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.util.SolrPluginUtils;
|
||||
import org.carrot2.core.Cluster;
|
||||
import org.carrot2.core.Controller;
|
||||
import org.carrot2.core.ControllerFactory;
|
||||
import org.carrot2.core.Document;
|
||||
import org.carrot2.core.IClusteringAlgorithm;
|
||||
import org.carrot2.core.attribute.AttributeNames;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
@ -63,11 +76,25 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
|
||||
private String idFieldName;
|
||||
|
||||
@Deprecated
|
||||
public Object cluster(Query query, DocList docList, SolrQueryRequest sreq) {
|
||||
SolrIndexSearcher searcher = sreq.getSearcher();
|
||||
SolrDocumentList solrDocList;
|
||||
try {
|
||||
Map<SolrDocument,Integer> docIds = new HashMap<SolrDocument, Integer>(docList.size());
|
||||
solrDocList = SolrPluginUtils.docListToSolrDocumentList( docList, searcher, getFieldsToLoad(sreq), docIds );
|
||||
return cluster(query, solrDocList, docIds, sreq);
|
||||
} catch (IOException e) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, e);
|
||||
}
|
||||
}
|
||||
|
||||
public Object cluster(Query query, SolrDocumentList solrDocList,
|
||||
Map<SolrDocument, Integer> docIds, SolrQueryRequest sreq) {
|
||||
try {
|
||||
// Prepare attributes for Carrot2 clustering call
|
||||
Map<String, Object> attributes = new HashMap<String, Object>();
|
||||
List<Document> documents = getDocuments(docList, query, sreq);
|
||||
List<Document> documents = getDocuments(solrDocList, docIds, query, sreq);
|
||||
attributes.put(AttributeNames.DOCUMENTS, documents);
|
||||
attributes.put(AttributeNames.QUERY, query.toString());
|
||||
|
||||
|
@ -79,7 +106,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
clusteringAlgorithmClass).getClusters(), sreq.getParams());
|
||||
} catch (Exception e) {
|
||||
log.error("Carrot2 clustering failed", e);
|
||||
throw new RuntimeException(e);
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Carrot2 clustering failed", e);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -114,31 +141,36 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Set<String> getFieldsToLoad(SolrQueryRequest sreq){
|
||||
SolrParams solrParams = sreq.getParams();
|
||||
|
||||
// Names of fields to deliver content for clustering
|
||||
String urlField = solrParams.get(CarrotParams.URL_FIELD_NAME, "url");
|
||||
String titleField = solrParams.get(CarrotParams.TITLE_FIELD_NAME, "title");
|
||||
String snippetField = solrParams.get(CarrotParams.SNIPPET_FIELD_NAME, titleField);
|
||||
if (StringUtils.isBlank(snippetField)) {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, CarrotParams.SNIPPET_FIELD_NAME
|
||||
+ " must not be blank.");
|
||||
}
|
||||
return Sets.newHashSet(urlField, titleField, snippetField, idFieldName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepares Carrot2 documents for clustering.
|
||||
*/
|
||||
private List<Document> getDocuments(DocList docList,
|
||||
private List<Document> getDocuments(SolrDocumentList solrDocList, Map<SolrDocument, Integer> docIds,
|
||||
Query query, final SolrQueryRequest sreq) throws IOException {
|
||||
SolrHighlighter highlighter = null;
|
||||
SolrParams solrParams = sreq.getParams();
|
||||
SolrCore core = sreq.getCore();
|
||||
|
||||
// Names of fields to deliver content for clustering
|
||||
String urlField = solrParams.get(CarrotParams.URL_FIELD_NAME, "url");
|
||||
String titleField = solrParams.get(CarrotParams.TITLE_FIELD_NAME, "title");
|
||||
String snippetField = solrParams.get(CarrotParams.SNIPPET_FIELD_NAME,
|
||||
titleField);
|
||||
if (StringUtils.isBlank(snippetField)) {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, CarrotParams.SNIPPET_FIELD_NAME
|
||||
+ " must not be blank.");
|
||||
}
|
||||
Set<String> fieldsToLoad = Sets.newHashSet(urlField, titleField,
|
||||
snippetField, idFieldName);
|
||||
|
||||
String snippetField = solrParams.get(CarrotParams.SNIPPET_FIELD_NAME, titleField);
|
||||
|
||||
// Get the documents
|
||||
DocIterator docsIter = docList.iterator();
|
||||
boolean produceSummary = solrParams.getBool(CarrotParams.PRODUCE_SUMMARY,
|
||||
false);
|
||||
boolean produceSummary = solrParams.getBool(CarrotParams.PRODUCE_SUMMARY, false);
|
||||
|
||||
SolrQueryRequest req = null;
|
||||
String[] snippetFieldAry = null;
|
||||
|
@ -164,20 +196,20 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
}
|
||||
}
|
||||
|
||||
SolrIndexSearcher searcher = sreq.getSearcher();
|
||||
List<Document> result = new ArrayList<Document>(docList.size());
|
||||
Iterator<SolrDocument> docsIter = solrDocList.iterator();
|
||||
List<Document> result = new ArrayList<Document>(solrDocList.size());
|
||||
|
||||
float[] scores = {1.0f};
|
||||
int[] docsHolder = new int[1];
|
||||
Query theQuery = query;
|
||||
|
||||
while (docsIter.hasNext()) {
|
||||
Integer id = docsIter.next();
|
||||
org.apache.lucene.document.Document doc = searcher.doc(id,
|
||||
fieldsToLoad);
|
||||
String snippet = getValue(doc, snippetField);
|
||||
if (produceSummary == true) {
|
||||
docsHolder[0] = id.intValue();
|
||||
SolrDocument sdoc = docsIter.next();
|
||||
String snippet = getValue(sdoc, snippetField);
|
||||
// TODO: docIds will be null when running distributed search.
|
||||
// See comment in ClusteringComponent#finishStage().
|
||||
if (produceSummary && docIds != null) {
|
||||
docsHolder[0] = docIds.get(sdoc).intValue();
|
||||
DocList docAsList = new DocSlice(0, 1, docsHolder, scores, 1, 1.0f);
|
||||
NamedList highlights = highlighter.doHighlighting(docAsList, theQuery, req, snippetFieldAry);
|
||||
if (highlights != null && highlights.size() == 1) {//should only be one value given our setup
|
||||
|
@ -189,15 +221,16 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
}
|
||||
}
|
||||
}
|
||||
Document carrotDocument = new Document(getValue(doc, titleField),
|
||||
snippet, doc.get(urlField));
|
||||
carrotDocument.setField("solrId", doc.get(idFieldName));
|
||||
Document carrotDocument = new Document(getValue(sdoc, titleField),
|
||||
snippet, (String)sdoc.getFieldValue(urlField));
|
||||
carrotDocument.setField("solrId", sdoc.getFieldValue(idFieldName));
|
||||
result.add(carrotDocument);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
protected String getValue(org.apache.lucene.document.Document doc,
|
||||
String field) {
|
||||
StringBuilder result = new StringBuilder();
|
||||
|
@ -211,6 +244,20 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
return result.toString().trim();
|
||||
}
|
||||
|
||||
protected String getValue(SolrDocument sdoc, String field) {
|
||||
StringBuilder result = new StringBuilder();
|
||||
Collection<Object> vals = sdoc.getFieldValues(field);
|
||||
if(vals == null) return "";
|
||||
Iterator<Object> ite = vals.iterator();
|
||||
while(ite.hasNext()){
|
||||
// Join multiple values with a period so that Carrot2 does not pick up
|
||||
// phrases that cross field value boundaries (in most cases it would
|
||||
// create useless phrases).
|
||||
result.append((String)ite.next()).append(" . ");
|
||||
}
|
||||
return result.toString().trim();
|
||||
}
|
||||
|
||||
private List clustersToNamedList(List<Cluster> carrotClusters,
|
||||
SolrParams solrParams) {
|
||||
List result = new ArrayList();
|
||||
|
|
|
@ -0,0 +1,47 @@
|
|||
package org.apache.solr.handler.clustering;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.solr.BaseDistributedSearchTestCase;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
|
||||
public class DistributedClusteringComponentTest extends
|
||||
BaseDistributedSearchTestCase {
|
||||
|
||||
@Override
|
||||
public void doTest() throws Exception {
|
||||
del("*:*");
|
||||
int numberOfDocs = 0;
|
||||
for (String[] doc : AbstractClusteringTestCase.DOCUMENTS) {
|
||||
index(id, Integer.toString(numberOfDocs++), "url", doc[0], "title", doc[1], "snippet", doc[2]);
|
||||
}
|
||||
commit();
|
||||
handle.clear();
|
||||
// Only really care about the clusters for this test case, so drop the header and response
|
||||
handle.put("responseHeader", SKIP);
|
||||
handle.put("response", SKIP);
|
||||
query(
|
||||
ClusteringComponent.COMPONENT_NAME, "true",
|
||||
CommonParams.Q, "*:*",
|
||||
CommonParams.SORT, id + " desc",
|
||||
ClusteringParams.USE_SEARCH_RESULTS, "true");
|
||||
// destroy is not needed because tearDown method of base class does it.
|
||||
//destroyServers();
|
||||
}
|
||||
|
||||
}
|
|
@ -22,6 +22,8 @@ import org.apache.lucene.search.MatchAllDocsQuery;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
|
@ -31,11 +33,14 @@ import org.apache.solr.request.LocalSolrQueryRequest;
|
|||
import org.apache.solr.search.DocList;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.util.RefCounted;
|
||||
import org.apache.solr.util.SolrPluginUtils;
|
||||
import org.carrot2.util.attribute.AttributeUtils;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
|
@ -133,21 +138,23 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
docList = searcher.getDocList(query, (Query) null, new Sort(), 0,
|
||||
numberOfDocs);
|
||||
assertEquals("docList size", expectedNumDocs, docList.matches());
|
||||
|
||||
ModifiableSolrParams solrParams = new ModifiableSolrParams();
|
||||
solrParams.add(CarrotParams.PRODUCE_SUMMARY, "true");
|
||||
solrParams.add(clusteringParams);
|
||||
|
||||
// Perform clustering
|
||||
LocalSolrQueryRequest req = new LocalSolrQueryRequest(h.getCore(), solrParams);
|
||||
Map<SolrDocument,Integer> docIds = new HashMap<SolrDocument, Integer>(docList.size());
|
||||
SolrDocumentList solrDocList = SolrPluginUtils.docListToSolrDocumentList( docList, searcher, engine.getFieldsToLoad(req), docIds );
|
||||
List results = (List)engine.cluster(query, solrDocList, docIds, req);
|
||||
req.close();
|
||||
assertEquals("number of clusters: " + results, expectedNumClusters, results.size());
|
||||
checkClusters(results, false);
|
||||
return results;
|
||||
} finally {
|
||||
ref.decref();
|
||||
}
|
||||
|
||||
ModifiableSolrParams solrParams = new ModifiableSolrParams();
|
||||
solrParams.add(CarrotParams.PRODUCE_SUMMARY, "true");
|
||||
solrParams.add(clusteringParams);
|
||||
|
||||
// Perform clustering
|
||||
LocalSolrQueryRequest req = new LocalSolrQueryRequest(h.getCore(), solrParams);
|
||||
List results = (List) engine.cluster(query, docList, req);
|
||||
req.close();
|
||||
assertEquals("number of clusters: " + results, expectedNumClusters, results.size());
|
||||
checkClusters(results, false);
|
||||
return results;
|
||||
}
|
||||
|
||||
private void checkClusters(List results, int expectedDocCount,
|
||||
|
|
Loading…
Reference in New Issue