SOLR-1692: fix produceSummary issue with Carrot2 clustering

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@895640 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Grant Ingersoll 2010-01-04 14:28:59 +00:00
parent e9e86f2a47
commit f6ac4ff2bb
4 changed files with 57 additions and 23 deletions

View File

@ -12,6 +12,8 @@ $Id:$
* SOLR-1684: Switch to use the SolrIndexSearcher.doc(int, Set<String>) method b/c it can use the document cache (gsingers)
* SOLR-1692: Fix bug relating to carrot.produceSummary option (gsingers)
================== Release 1.4.0 ==================
Solr Clustering will be released for the first time in Solr 1.4. See http://wiki.apache.org/solr/ClusteringComponent

View File

@ -113,7 +113,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
*/
private List<Document> getDocuments(DocList docList,
Query query, final SolrQueryRequest sreq) throws IOException {
SolrHighlighter highligher = null;
SolrHighlighter highlighter = null;
SolrParams solrParams = sreq.getParams();
SolrCore core = sreq.getCore();
@ -137,17 +137,25 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
SolrQueryRequest req = null;
String[] snippetFieldAry = null;
if (produceSummary == true) {
highligher = core.getHighlighter();
Map args = new HashMap();
snippetFieldAry = new String[]{snippetField};
args.put(HighlightParams.FIELDS, snippetFieldAry);
args.put(HighlightParams.HIGHLIGHT, "true");
req = new LocalSolrQueryRequest(core, query.toString(), "", 0, 1, args) {
@Override
public SolrIndexSearcher getSearcher() {
return sreq.getSearcher();
}
};
highlighter = core.getHighlighter();
if (highlighter != null){
Map args = new HashMap();
snippetFieldAry = new String[]{snippetField};
args.put(HighlightParams.FIELDS, snippetFieldAry);
args.put(HighlightParams.HIGHLIGHT, "true");
args.put(HighlightParams.SIMPLE_PRE, ""); //we don't care about actually highlighting the area
args.put(HighlightParams.SIMPLE_POST, "");
args.put(HighlightParams.FRAGSIZE, solrParams.getInt(CarrotParams.SUMMARY_FRAGSIZE, solrParams.getInt(HighlightParams.FRAGSIZE, 100)));
req = new LocalSolrQueryRequest(core, query.toString(), "", 0, 1, args) {
@Override
public SolrIndexSearcher getSearcher() {
return sreq.getSearcher();
}
};
} else {
log.warn("No highlighter configured, cannot produce summary");
produceSummary = false;
}
}
SolrIndexSearcher searcher = sreq.getSearcher();
@ -165,11 +173,19 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
if (produceSummary == true) {
docsHolder[0] = id.intValue();
DocList docAsList = new DocSlice(0, 1, docsHolder, scores, 1, 1.0f);
highligher.doHighlighting(docAsList, theQuery, req, snippetFieldAry);
NamedList highlights = highlighter.doHighlighting(docAsList, theQuery, req, snippetFieldAry);
if (highlights != null && highlights.size() == 1) {//should only be one value given our setup
//should only be one document with one field
NamedList tmp = (NamedList) highlights.getVal(0);
String [] highlt = (String[]) tmp.get(snippetField);
if (highlt != null && highlt.length == 1) {
snippet = highlt[0];
}
}
}
Document carrotDocument = new Document(getValue(doc, titleField),
snippet, doc.get(urlField));
carrotDocument.addField("solrId", doc.get(idFieldName));
carrotDocument.setField("solrId", doc.get(idFieldName));
result.add(carrotDocument);
}

View File

@ -3,6 +3,7 @@ package org.apache.solr.handler.clustering.carrot2;
import java.util.Set;
import com.google.common.collect.ImmutableSet;
import org.apache.solr.common.params.HighlightParams;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -33,8 +34,9 @@ public interface CarrotParams {
String PRODUCE_SUMMARY = CARROT_PREFIX + "produceSummary";
String NUM_DESCRIPTIONS = CARROT_PREFIX + "numDescriptions";
String OUTPUT_SUB_CLUSTERS = CARROT_PREFIX + "outputSubClusters";
String SUMMARY_FRAGSIZE = CARROT_PREFIX + "fragzise";
public static final Set<String> CARROT_PARAM_NAMES = ImmutableSet.of(
ALGORITHM, TITLE_FIELD_NAME, URL_FIELD_NAME, SNIPPET_FIELD_NAME,
PRODUCE_SUMMARY, NUM_DESCRIPTIONS, OUTPUT_SUB_CLUSTERS);
PRODUCE_SUMMARY, NUM_DESCRIPTIONS, OUTPUT_SUB_CLUSTERS, SUMMARY_FRAGSIZE);
}

View File

@ -17,9 +17,11 @@ package org.apache.solr.handler.clustering.carrot2;
* limitations under the License.
*/
import org.apache.lucene.index.Term;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermQuery;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
@ -43,6 +45,13 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTest {
checkEngine(getClusteringEngine("default"), 10);
}
public void testProduceSummary() throws Exception {
ModifiableSolrParams solrParams = new ModifiableSolrParams();
solrParams.add(CarrotParams.SNIPPET_FIELD_NAME, "snippet");
solrParams.add(CarrotParams.SUMMARY_FRAGSIZE, "200");//how do we validate this?
checkEngine(getClusteringEngine("default"), numberOfDocs -2 /*two don't have mining in the snippet*/, 16, new TermQuery(new Term("snippet", "mine")), solrParams);
}
public void testCarrotStc() throws Exception {
checkEngine(getClusteringEngine("stc"), 1);
}
@ -55,8 +64,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTest {
public void testWithSubclusters() throws Exception {
ModifiableSolrParams params = new ModifiableSolrParams();
params.set(CarrotParams.OUTPUT_SUB_CLUSTERS, true);
checkClusters(checkEngine(getClusteringEngine("mock"), this.numberOfDocs,
params), 1, 1, 2);
checkClusters(checkEngine(getClusteringEngine("mock"), this.numberOfDocs), 1, 1, 2);
}
public void testNumDescriptions() throws Exception {
@ -87,21 +95,27 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTest {
}
private List checkEngine(CarrotClusteringEngine engine,
int expectedNumClusters) throws IOException {
return checkEngine(engine, expectedNumClusters, new ModifiableSolrParams());
int expectedNumClusters) throws IOException {
return checkEngine(engine, numberOfDocs, expectedNumClusters, new MatchAllDocsQuery(), new ModifiableSolrParams());
}
private List checkEngine(CarrotClusteringEngine engine,
int expectedNumClusters, SolrParams clusteringParams) throws IOException {
int expectedNumClusters, SolrParams clusteringParams) throws IOException {
return checkEngine(engine, numberOfDocs, expectedNumClusters, new MatchAllDocsQuery(), clusteringParams);
}
private List checkEngine(CarrotClusteringEngine engine, int expectedNumDocs,
int expectedNumClusters, Query query, SolrParams clusteringParams) throws IOException {
// Get all documents to cluster
RefCounted<SolrIndexSearcher> ref = h.getCore().getSearcher();
MatchAllDocsQuery query = new MatchAllDocsQuery();
DocList docList;
try {
SolrIndexSearcher searcher = ref.get();
docList = searcher.getDocList(query, (Query) null, new Sort(), 0,
numberOfDocs);
assertEquals("docList size", this.numberOfDocs, docList.matches());
assertEquals("docList size", expectedNumDocs, docList.matches());
} finally {
ref.decref();
}
@ -114,7 +128,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTest {
LocalSolrQueryRequest req = new LocalSolrQueryRequest(h.getCore(), solrParams);
List results = (List) engine.cluster(query, docList, req);
req.close();
assertEquals("number of clusters", expectedNumClusters, results.size());
assertEquals("number of clusters: " + results, expectedNumClusters, results.size());
checkClusters(results, false);
return results;
}