mirror of https://github.com/apache/lucene.git
SOLR-1692: fix produceSummary issue with Carrot2 clustering
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@895640 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e9e86f2a47
commit
f6ac4ff2bb
|
@ -12,6 +12,8 @@ $Id:$
|
|||
|
||||
* SOLR-1684: Switch to use the SolrIndexSearcher.doc(int, Set<String>) method b/c it can use the document cache (gsingers)
|
||||
|
||||
* SOLR-1692: Fix bug relating to carrot.produceSummary option (gsingers)
|
||||
|
||||
================== Release 1.4.0 ==================
|
||||
|
||||
Solr Clustering will be released for the first time in Solr 1.4. See http://wiki.apache.org/solr/ClusteringComponent
|
||||
|
|
|
@ -113,7 +113,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
*/
|
||||
private List<Document> getDocuments(DocList docList,
|
||||
Query query, final SolrQueryRequest sreq) throws IOException {
|
||||
SolrHighlighter highligher = null;
|
||||
SolrHighlighter highlighter = null;
|
||||
SolrParams solrParams = sreq.getParams();
|
||||
SolrCore core = sreq.getCore();
|
||||
|
||||
|
@ -137,17 +137,25 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
SolrQueryRequest req = null;
|
||||
String[] snippetFieldAry = null;
|
||||
if (produceSummary == true) {
|
||||
highligher = core.getHighlighter();
|
||||
Map args = new HashMap();
|
||||
snippetFieldAry = new String[]{snippetField};
|
||||
args.put(HighlightParams.FIELDS, snippetFieldAry);
|
||||
args.put(HighlightParams.HIGHLIGHT, "true");
|
||||
req = new LocalSolrQueryRequest(core, query.toString(), "", 0, 1, args) {
|
||||
@Override
|
||||
public SolrIndexSearcher getSearcher() {
|
||||
return sreq.getSearcher();
|
||||
}
|
||||
};
|
||||
highlighter = core.getHighlighter();
|
||||
if (highlighter != null){
|
||||
Map args = new HashMap();
|
||||
snippetFieldAry = new String[]{snippetField};
|
||||
args.put(HighlightParams.FIELDS, snippetFieldAry);
|
||||
args.put(HighlightParams.HIGHLIGHT, "true");
|
||||
args.put(HighlightParams.SIMPLE_PRE, ""); //we don't care about actually highlighting the area
|
||||
args.put(HighlightParams.SIMPLE_POST, "");
|
||||
args.put(HighlightParams.FRAGSIZE, solrParams.getInt(CarrotParams.SUMMARY_FRAGSIZE, solrParams.getInt(HighlightParams.FRAGSIZE, 100)));
|
||||
req = new LocalSolrQueryRequest(core, query.toString(), "", 0, 1, args) {
|
||||
@Override
|
||||
public SolrIndexSearcher getSearcher() {
|
||||
return sreq.getSearcher();
|
||||
}
|
||||
};
|
||||
} else {
|
||||
log.warn("No highlighter configured, cannot produce summary");
|
||||
produceSummary = false;
|
||||
}
|
||||
}
|
||||
|
||||
SolrIndexSearcher searcher = sreq.getSearcher();
|
||||
|
@ -165,11 +173,19 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
if (produceSummary == true) {
|
||||
docsHolder[0] = id.intValue();
|
||||
DocList docAsList = new DocSlice(0, 1, docsHolder, scores, 1, 1.0f);
|
||||
highligher.doHighlighting(docAsList, theQuery, req, snippetFieldAry);
|
||||
NamedList highlights = highlighter.doHighlighting(docAsList, theQuery, req, snippetFieldAry);
|
||||
if (highlights != null && highlights.size() == 1) {//should only be one value given our setup
|
||||
//should only be one document with one field
|
||||
NamedList tmp = (NamedList) highlights.getVal(0);
|
||||
String [] highlt = (String[]) tmp.get(snippetField);
|
||||
if (highlt != null && highlt.length == 1) {
|
||||
snippet = highlt[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
Document carrotDocument = new Document(getValue(doc, titleField),
|
||||
snippet, doc.get(urlField));
|
||||
carrotDocument.addField("solrId", doc.get(idFieldName));
|
||||
carrotDocument.setField("solrId", doc.get(idFieldName));
|
||||
result.add(carrotDocument);
|
||||
}
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@ package org.apache.solr.handler.clustering.carrot2;
|
|||
import java.util.Set;
|
||||
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import org.apache.solr.common.params.HighlightParams;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -33,8 +34,9 @@ public interface CarrotParams {
|
|||
String PRODUCE_SUMMARY = CARROT_PREFIX + "produceSummary";
|
||||
String NUM_DESCRIPTIONS = CARROT_PREFIX + "numDescriptions";
|
||||
String OUTPUT_SUB_CLUSTERS = CARROT_PREFIX + "outputSubClusters";
|
||||
String SUMMARY_FRAGSIZE = CARROT_PREFIX + "fragzise";
|
||||
|
||||
public static final Set<String> CARROT_PARAM_NAMES = ImmutableSet.of(
|
||||
ALGORITHM, TITLE_FIELD_NAME, URL_FIELD_NAME, SNIPPET_FIELD_NAME,
|
||||
PRODUCE_SUMMARY, NUM_DESCRIPTIONS, OUTPUT_SUB_CLUSTERS);
|
||||
PRODUCE_SUMMARY, NUM_DESCRIPTIONS, OUTPUT_SUB_CLUSTERS, SUMMARY_FRAGSIZE);
|
||||
}
|
||||
|
|
|
@ -17,9 +17,11 @@ package org.apache.solr.handler.clustering.carrot2;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
|
@ -43,6 +45,13 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTest {
|
|||
checkEngine(getClusteringEngine("default"), 10);
|
||||
}
|
||||
|
||||
public void testProduceSummary() throws Exception {
|
||||
ModifiableSolrParams solrParams = new ModifiableSolrParams();
|
||||
solrParams.add(CarrotParams.SNIPPET_FIELD_NAME, "snippet");
|
||||
solrParams.add(CarrotParams.SUMMARY_FRAGSIZE, "200");//how do we validate this?
|
||||
checkEngine(getClusteringEngine("default"), numberOfDocs -2 /*two don't have mining in the snippet*/, 16, new TermQuery(new Term("snippet", "mine")), solrParams);
|
||||
}
|
||||
|
||||
public void testCarrotStc() throws Exception {
|
||||
checkEngine(getClusteringEngine("stc"), 1);
|
||||
}
|
||||
|
@ -55,8 +64,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTest {
|
|||
public void testWithSubclusters() throws Exception {
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.set(CarrotParams.OUTPUT_SUB_CLUSTERS, true);
|
||||
checkClusters(checkEngine(getClusteringEngine("mock"), this.numberOfDocs,
|
||||
params), 1, 1, 2);
|
||||
checkClusters(checkEngine(getClusteringEngine("mock"), this.numberOfDocs), 1, 1, 2);
|
||||
}
|
||||
|
||||
public void testNumDescriptions() throws Exception {
|
||||
|
@ -87,21 +95,27 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTest {
|
|||
}
|
||||
|
||||
private List checkEngine(CarrotClusteringEngine engine,
|
||||
int expectedNumClusters) throws IOException {
|
||||
return checkEngine(engine, expectedNumClusters, new ModifiableSolrParams());
|
||||
int expectedNumClusters) throws IOException {
|
||||
return checkEngine(engine, numberOfDocs, expectedNumClusters, new MatchAllDocsQuery(), new ModifiableSolrParams());
|
||||
}
|
||||
|
||||
private List checkEngine(CarrotClusteringEngine engine,
|
||||
int expectedNumClusters, SolrParams clusteringParams) throws IOException {
|
||||
int expectedNumClusters, SolrParams clusteringParams) throws IOException {
|
||||
return checkEngine(engine, numberOfDocs, expectedNumClusters, new MatchAllDocsQuery(), clusteringParams);
|
||||
}
|
||||
|
||||
|
||||
private List checkEngine(CarrotClusteringEngine engine, int expectedNumDocs,
|
||||
int expectedNumClusters, Query query, SolrParams clusteringParams) throws IOException {
|
||||
// Get all documents to cluster
|
||||
RefCounted<SolrIndexSearcher> ref = h.getCore().getSearcher();
|
||||
MatchAllDocsQuery query = new MatchAllDocsQuery();
|
||||
|
||||
DocList docList;
|
||||
try {
|
||||
SolrIndexSearcher searcher = ref.get();
|
||||
docList = searcher.getDocList(query, (Query) null, new Sort(), 0,
|
||||
numberOfDocs);
|
||||
assertEquals("docList size", this.numberOfDocs, docList.matches());
|
||||
assertEquals("docList size", expectedNumDocs, docList.matches());
|
||||
} finally {
|
||||
ref.decref();
|
||||
}
|
||||
|
@ -114,7 +128,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTest {
|
|||
LocalSolrQueryRequest req = new LocalSolrQueryRequest(h.getCore(), solrParams);
|
||||
List results = (List) engine.cluster(query, docList, req);
|
||||
req.close();
|
||||
assertEquals("number of clusters", expectedNumClusters, results.size());
|
||||
assertEquals("number of clusters: " + results, expectedNumClusters, results.size());
|
||||
checkClusters(results, false);
|
||||
return results;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue