mirror of
https://github.com/apache/lucene.git
synced 2025-02-09 11:35:14 +00:00
SOLR-6248: MoreLikeThis QParser that works in standalone/cloud mode
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1634937 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
7bd7a91057
commit
a5ba0b84f7
@ -33,10 +33,8 @@ import org.apache.lucene.search.TermQuery;
|
|||||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||||
import org.apache.lucene.search.similarities.TFIDFSimilarity;
|
import org.apache.lucene.search.similarities.TFIDFSimilarity;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.CharsRef;
|
|
||||||
import org.apache.lucene.util.CharsRefBuilder;
|
import org.apache.lucene.util.CharsRefBuilder;
|
||||||
import org.apache.lucene.util.PriorityQueue;
|
import org.apache.lucene.util.PriorityQueue;
|
||||||
import org.apache.lucene.util.UnicodeUtil;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
@ -588,6 +586,20 @@ public final class MoreLikeThis {
|
|||||||
return createQuery(retrieveTerms(docNum));
|
return createQuery(retrieveTerms(docNum));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param filteredDocument Document with field values extracted for selected fields.
|
||||||
|
* @return More Like This query for the passed document.
|
||||||
|
*/
|
||||||
|
public Query like(Map<String, ArrayList<String>> filteredDocument) throws IOException {
|
||||||
|
if (fieldNames == null) {
|
||||||
|
// gather list of valid fields from lucene
|
||||||
|
Collection<String> fields = MultiFields.getIndexedFields(ir);
|
||||||
|
fieldNames = fields.toArray(new String[fields.size()]);
|
||||||
|
}
|
||||||
|
return createQuery(retrieveTerms(filteredDocument));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return a query that will return docs like the passed Readers.
|
* Return a query that will return docs like the passed Readers.
|
||||||
* This was added in order to treat multi-value fields.
|
* This was added in order to treat multi-value fields.
|
||||||
@ -741,6 +753,24 @@ public final class MoreLikeThis {
|
|||||||
return createQueue(termFreqMap);
|
return createQueue(termFreqMap);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private PriorityQueue<ScoreTerm> retrieveTerms(Map<String, ArrayList<String>> fields) throws
|
||||||
|
IOException {
|
||||||
|
HashMap<String,Int> termFreqMap = new HashMap();
|
||||||
|
for (String fieldName : fieldNames) {
|
||||||
|
|
||||||
|
for (String field : fields.keySet()) {
|
||||||
|
ArrayList<String> fieldValues = fields.get(field);
|
||||||
|
for(String fieldValue:fieldValues) {
|
||||||
|
if (fieldValue != null) {
|
||||||
|
addTermFrequencies(new StringReader(fieldValue), termFreqMap,
|
||||||
|
fieldName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return createQueue(termFreqMap);
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* Adds terms and frequencies found in vector into the Map termFreqMap
|
* Adds terms and frequencies found in vector into the Map termFreqMap
|
||||||
*
|
*
|
||||||
|
@ -194,6 +194,10 @@ New Features
|
|||||||
properties to allow easier overriding of just the right piece of the Solr URL.
|
properties to allow easier overriding of just the right piece of the Solr URL.
|
||||||
(ehatcher)
|
(ehatcher)
|
||||||
|
|
||||||
|
* SOLR-6248: MoreLikeThis QParser that accepts a document id and returns documents that
|
||||||
|
have similar content. It works in standalone/cloud mode and shares logic with the
|
||||||
|
Lucene MoreLikeThis class (Anshum Gupta).
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
@ -22,6 +22,7 @@ import org.apache.solr.core.SolrInfoMBean;
|
|||||||
import org.apache.solr.request.SolrQueryRequest;
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
import org.apache.solr.search.join.BlockJoinChildQParserPlugin;
|
import org.apache.solr.search.join.BlockJoinChildQParserPlugin;
|
||||||
import org.apache.solr.search.join.BlockJoinParentQParserPlugin;
|
import org.apache.solr.search.join.BlockJoinParentQParserPlugin;
|
||||||
|
import org.apache.solr.search.mlt.MLTQParserPlugin;
|
||||||
import org.apache.solr.util.plugin.NamedListInitializedPlugin;
|
import org.apache.solr.util.plugin.NamedListInitializedPlugin;
|
||||||
|
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
@ -63,7 +64,8 @@ public abstract class QParserPlugin implements NamedListInitializedPlugin, SolrI
|
|||||||
SimpleQParserPlugin.NAME, SimpleQParserPlugin.class,
|
SimpleQParserPlugin.NAME, SimpleQParserPlugin.class,
|
||||||
ComplexPhraseQParserPlugin.NAME, ComplexPhraseQParserPlugin.class,
|
ComplexPhraseQParserPlugin.NAME, ComplexPhraseQParserPlugin.class,
|
||||||
ReRankQParserPlugin.NAME, ReRankQParserPlugin.class,
|
ReRankQParserPlugin.NAME, ReRankQParserPlugin.class,
|
||||||
ExportQParserPlugin.NAME, ExportQParserPlugin.class
|
ExportQParserPlugin.NAME, ExportQParserPlugin.class,
|
||||||
|
MLTQParserPlugin.NAME, MLTQParserPlugin.class
|
||||||
};
|
};
|
||||||
|
|
||||||
/** return a {@link QParser} */
|
/** return a {@link QParser} */
|
||||||
|
@ -0,0 +1,100 @@
|
|||||||
|
package org.apache.solr.search.mlt;
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.queries.mlt.MoreLikeThis;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.solr.common.SolrDocument;
|
||||||
|
import org.apache.solr.common.SolrException;
|
||||||
|
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||||
|
import org.apache.solr.common.params.SolrParams;
|
||||||
|
import org.apache.solr.common.util.NamedList;
|
||||||
|
import org.apache.solr.core.SolrCore;
|
||||||
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
|
import org.apache.solr.request.SolrQueryRequestBase;
|
||||||
|
import org.apache.solr.response.SolrQueryResponse;
|
||||||
|
import org.apache.solr.search.QParser;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
public class CloudMLTQParser extends QParser {
|
||||||
|
|
||||||
|
public CloudMLTQParser(String qstr, SolrParams localParams,
|
||||||
|
SolrParams params, SolrQueryRequest req) {
|
||||||
|
super(qstr, localParams, params, req);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Query parse() {
|
||||||
|
String id = localParams.get("id");
|
||||||
|
// Do a Real Time Get for the document
|
||||||
|
SolrDocument doc = getDocument(id);
|
||||||
|
|
||||||
|
MoreLikeThis mlt = new MoreLikeThis(req.getSearcher().getIndexReader());
|
||||||
|
// TODO: Are the mintf and mindf defaults ok at 1/0 ?
|
||||||
|
|
||||||
|
mlt.setMinTermFreq(localParams.getInt("mintf", 1));
|
||||||
|
mlt.setMinDocFreq(localParams.getInt("mindf", 0));
|
||||||
|
if(localParams.get("minwl") != null)
|
||||||
|
mlt.setMinWordLen(localParams.getInt("minwl"));
|
||||||
|
|
||||||
|
if(localParams.get("maxwl") != null)
|
||||||
|
mlt.setMaxWordLen(localParams.getInt("maxwl"));
|
||||||
|
|
||||||
|
mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());
|
||||||
|
|
||||||
|
String[] qf = localParams.getParams("qf");
|
||||||
|
Map<String, ArrayList<String>> filteredDocument = new HashMap();
|
||||||
|
|
||||||
|
if (qf != null) {
|
||||||
|
mlt.setFieldNames(qf);
|
||||||
|
for (String field : qf) {
|
||||||
|
filteredDocument.put(field, (ArrayList<String>) doc.get(field));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (String field : doc.getFieldNames()) {
|
||||||
|
filteredDocument.put(field, (ArrayList<String>) doc.get(field));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
return mlt.like(filteredDocument);
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Bad Request");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private SolrDocument getDocument(String id) {
|
||||||
|
SolrCore core = req.getCore();
|
||||||
|
SolrQueryResponse rsp = new SolrQueryResponse();
|
||||||
|
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||||
|
params.add("id", id);
|
||||||
|
|
||||||
|
SolrQueryRequestBase request = new SolrQueryRequestBase(core, params) {
|
||||||
|
};
|
||||||
|
|
||||||
|
core.getRequestHandler("/get").handleRequest(request, rsp);
|
||||||
|
NamedList response = rsp.getValues();
|
||||||
|
|
||||||
|
return (SolrDocument) response.get("doc");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,45 @@
|
|||||||
|
package org.apache.solr.search.mlt;
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.solr.common.params.SolrParams;
|
||||||
|
import org.apache.solr.common.util.NamedList;
|
||||||
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
|
import org.apache.solr.search.QParser;
|
||||||
|
import org.apache.solr.search.QParserPlugin;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Usage: {!mlt id=uniqueId qf=queryField1,queryField2}
|
||||||
|
*/
|
||||||
|
public class MLTQParserPlugin extends QParserPlugin {
|
||||||
|
public static final String NAME = "mlt";
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void init(NamedList args) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public QParser createParser(String qstr, SolrParams localParams,
|
||||||
|
SolrParams params, SolrQueryRequest req) {
|
||||||
|
if (req.getCore().getCoreDescriptor().getCloudDescriptor() != null) {
|
||||||
|
return new CloudMLTQParser(qstr, localParams, params, req);
|
||||||
|
} else {
|
||||||
|
return new SimpleMLTQParser(qstr, localParams, params, req);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,109 @@
|
|||||||
|
package org.apache.solr.search.mlt;
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.queries.mlt.MoreLikeThis;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.ScoreDoc;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
import org.apache.lucene.search.TopDocs;
|
||||||
|
import org.apache.lucene.util.BytesRefBuilder;
|
||||||
|
import org.apache.lucene.util.NumericUtils;
|
||||||
|
import org.apache.solr.common.SolrException;
|
||||||
|
import org.apache.solr.common.params.SolrParams;
|
||||||
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
|
import org.apache.solr.schema.SchemaField;
|
||||||
|
import org.apache.solr.search.QParser;
|
||||||
|
import org.apache.solr.search.SolrIndexSearcher;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
public class SimpleMLTQParser extends QParser {
|
||||||
|
|
||||||
|
public SimpleMLTQParser(String qstr, SolrParams localParams,
|
||||||
|
SolrParams params, SolrQueryRequest req) {
|
||||||
|
super(qstr, localParams, params, req);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Query parse() {
|
||||||
|
|
||||||
|
String defaultField = req.getSchema().getUniqueKeyField().getName();
|
||||||
|
String uniqueValue = localParams.get("id");
|
||||||
|
String[] qf = localParams.getParams("qf");
|
||||||
|
|
||||||
|
SolrIndexSearcher searcher = req.getSearcher();
|
||||||
|
Query docIdQuery = createIdQuery(defaultField, uniqueValue);
|
||||||
|
|
||||||
|
try {
|
||||||
|
TopDocs td = searcher.search(docIdQuery, 1);
|
||||||
|
if (td.totalHits != 1) throw new SolrException(
|
||||||
|
SolrException.ErrorCode.BAD_REQUEST, "Error completing MLT request. Could not fetch " +
|
||||||
|
"document with id [" + uniqueValue + "]");
|
||||||
|
ScoreDoc[] scoreDocs = td.scoreDocs;
|
||||||
|
MoreLikeThis mlt = new MoreLikeThis(req.getSearcher().getIndexReader());
|
||||||
|
// TODO: Are the mintf and mindf defaults ok at '1' ?
|
||||||
|
mlt.setMinTermFreq(localParams.getInt("mintf", 1));
|
||||||
|
mlt.setMinDocFreq(localParams.getInt("mindf", 1));
|
||||||
|
if(localParams.get("minwl") != null)
|
||||||
|
mlt.setMinWordLen(localParams.getInt("minwl"));
|
||||||
|
|
||||||
|
if(localParams.get("maxwl") != null)
|
||||||
|
mlt.setMaxWordLen(localParams.getInt("maxwl"));
|
||||||
|
|
||||||
|
ArrayList<String> fields = new ArrayList();
|
||||||
|
|
||||||
|
if (qf != null) {
|
||||||
|
mlt.setFieldNames(qf);
|
||||||
|
} else {
|
||||||
|
|
||||||
|
Map<String, SchemaField> fieldNames = req.getSearcher().getSchema().getFields();
|
||||||
|
for (String fieldName : fieldNames.keySet()) {
|
||||||
|
if (fieldNames.get(fieldName).indexed() && fieldNames.get(fieldName).stored())
|
||||||
|
if (fieldNames.get(fieldName).getType().getNumericType() == null)
|
||||||
|
fields.add(fieldName);
|
||||||
|
}
|
||||||
|
mlt.setFieldNames(fields.toArray(new String[fields.size()]));
|
||||||
|
}
|
||||||
|
|
||||||
|
mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());
|
||||||
|
|
||||||
|
return mlt.like(scoreDocs[0].doc);
|
||||||
|
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||||
|
"Error completing MLT request" + e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Query createIdQuery(String defaultField, String uniqueValue) {
|
||||||
|
return new TermQuery(req.getSchema().getField(defaultField).getType().getNumericType() != null
|
||||||
|
? createNumericTerm(defaultField, uniqueValue)
|
||||||
|
: new Term(defaultField, uniqueValue));
|
||||||
|
}
|
||||||
|
|
||||||
|
private Term createNumericTerm(String field, String uniqueValue) {
|
||||||
|
BytesRefBuilder bytesRefBuilder = new BytesRefBuilder();
|
||||||
|
bytesRefBuilder.grow(NumericUtils.BUF_SIZE_INT);
|
||||||
|
NumericUtils.intToPrefixCoded(Integer.parseInt(uniqueValue), 0, bytesRefBuilder);
|
||||||
|
return new Term(field, bytesRefBuilder.toBytesRef());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
27
solr/core/src/java/org/apache/solr/search/mlt/package.html
Normal file
27
solr/core/src/java/org/apache/solr/search/mlt/package.html
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<p>
|
||||||
|
APIs and classes for implementing MoreLikeThis (mlt) QueryParser.
|
||||||
|
</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
@ -19,6 +19,7 @@ package org.apache.solr.search;
|
|||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.QueryUtils;
|
import org.apache.lucene.search.QueryUtils;
|
||||||
import org.apache.solr.SolrTestCaseJ4;
|
import org.apache.solr.SolrTestCaseJ4;
|
||||||
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
import org.apache.solr.request.SolrQueryRequest;
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
import org.apache.solr.request.SolrRequestInfo;
|
import org.apache.solr.request.SolrRequestInfo;
|
||||||
import org.apache.solr.response.SolrQueryResponse;
|
import org.apache.solr.response.SolrQueryResponse;
|
||||||
@ -838,6 +839,21 @@ public class QueryEqualityTest extends SolrTestCaseJ4 {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testQueryMLT() throws Exception {
|
||||||
|
assertU(adoc("id", "1", "lowerfilt", "sample data"));
|
||||||
|
assertU(commit());
|
||||||
|
SolrQueryRequest req = req("qf","lowerfilt","id","1");
|
||||||
|
try {
|
||||||
|
assertQueryEquals("mlt", req,
|
||||||
|
"{!mlt qf=lowerfilt id=1}");
|
||||||
|
} finally {
|
||||||
|
delQ("*:*");
|
||||||
|
assertU(commit());
|
||||||
|
req.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* NOTE: defType is not only used to pick the parser, but also to record
|
* NOTE: defType is not only used to pick the parser, but also to record
|
||||||
* the parser being tested for coverage sanity checking
|
* the parser being tested for coverage sanity checking
|
||||||
|
@ -0,0 +1,141 @@
|
|||||||
|
package org.apache.solr.search.mlt;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||||
|
import org.apache.solr.cloud.AbstractFullDistribZkTestBase;
|
||||||
|
import org.apache.solr.common.SolrDocument;
|
||||||
|
import org.apache.solr.common.SolrDocumentList;
|
||||||
|
import org.apache.solr.common.params.CommonParams;
|
||||||
|
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.HashSet;
|
||||||
|
|
||||||
|
public class CloudMLTQParserTest extends AbstractFullDistribZkTestBase {
|
||||||
|
public CloudMLTQParserTest() {
|
||||||
|
configString = "solrconfig.xml";
|
||||||
|
schemaString = "schema.xml";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected String getCloudSolrConfig() {
|
||||||
|
return configString;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void doTest() throws Exception {
|
||||||
|
|
||||||
|
waitForRecoveriesToFinish(false);
|
||||||
|
|
||||||
|
String id = "id";
|
||||||
|
delQ("*:*");
|
||||||
|
indexDoc(sdoc(id, "1", "lowerfilt", "toyota"));
|
||||||
|
indexDoc(sdoc(id, "2", "lowerfilt", "chevrolet"));
|
||||||
|
indexDoc(sdoc(id, "3", "lowerfilt", "bmw usa"));
|
||||||
|
indexDoc(sdoc(id, "4", "lowerfilt", "ford"));
|
||||||
|
indexDoc(sdoc(id, "5", "lowerfilt", "ferrari"));
|
||||||
|
indexDoc(sdoc(id, "6", "lowerfilt", "jaguar"));
|
||||||
|
indexDoc(sdoc(id, "7", "lowerfilt", "mclaren moon or the moon and moon moon shine and the moon but moon was good foxes too"));
|
||||||
|
indexDoc(sdoc(id, "8", "lowerfilt", "sonata"));
|
||||||
|
indexDoc(sdoc(id, "9", "lowerfilt", "The quick red fox jumped over the lazy big and large brown dogs."));
|
||||||
|
indexDoc(sdoc(id, "10", "lowerfilt", "blue"));
|
||||||
|
indexDoc(sdoc(id, "12", "lowerfilt", "glue"));
|
||||||
|
indexDoc(sdoc(id, "13", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."));
|
||||||
|
indexDoc(sdoc(id, "14", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."));
|
||||||
|
indexDoc(sdoc(id, "15", "lowerfilt", "The fat red fox jumped over the lazy brown dogs."));
|
||||||
|
indexDoc(sdoc(id, "16", "lowerfilt", "The slim red fox jumped over the lazy brown dogs."));
|
||||||
|
indexDoc(sdoc(id, "17", "lowerfilt", "The quote red fox jumped moon over the lazy brown dogs moon. Of course moon. Foxes and moon come back to the foxes and moon"));
|
||||||
|
indexDoc(sdoc(id, "18", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."));
|
||||||
|
indexDoc(sdoc(id, "19", "lowerfilt", "The hose red fox jumped over the lazy brown dogs."));
|
||||||
|
indexDoc(sdoc(id, "20", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."));
|
||||||
|
indexDoc(sdoc(id, "21", "lowerfilt", "The court red fox jumped over the lazy brown dogs."));
|
||||||
|
indexDoc(sdoc(id, "22", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."));
|
||||||
|
indexDoc(sdoc(id, "23", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."));
|
||||||
|
indexDoc(sdoc(id, "24", "lowerfilt", "The file red fox jumped over the lazy brown dogs."));
|
||||||
|
indexDoc(sdoc(id, "25", "lowerfilt", "rod fix"));
|
||||||
|
indexDoc(sdoc(id, "26", "lowerfilt", "bmw usa 328i"));
|
||||||
|
indexDoc(sdoc(id, "27", "lowerfilt", "bmw usa 535i"));
|
||||||
|
indexDoc(sdoc(id, "28", "lowerfilt", "bmw 750Li"));
|
||||||
|
|
||||||
|
commit();
|
||||||
|
|
||||||
|
handle.clear();
|
||||||
|
handle.put("QTime", SKIPVAL);
|
||||||
|
handle.put("timestamp", SKIPVAL);
|
||||||
|
handle.put("maxScore", SKIPVAL);
|
||||||
|
|
||||||
|
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||||
|
|
||||||
|
params.set(CommonParams.Q, "{!mlt id=17 qf=lowerfilt}");
|
||||||
|
QueryResponse queryResponse = cloudClient.query(params);
|
||||||
|
SolrDocumentList solrDocuments = queryResponse.getResults();
|
||||||
|
int[] expectedIds = new int[]{17, 13, 14, 20, 22, 15, 16, 24, 18, 23};
|
||||||
|
int[] actualIds = new int[10];
|
||||||
|
int i = 0;
|
||||||
|
for (SolrDocument solrDocument : solrDocuments) {
|
||||||
|
actualIds[i++] = Integer.valueOf(String.valueOf(solrDocument.getFieldValue("id")));
|
||||||
|
}
|
||||||
|
assertArrayEquals(expectedIds, actualIds);
|
||||||
|
|
||||||
|
params = new ModifiableSolrParams();
|
||||||
|
params.set(CommonParams.Q, "{!mlt id=3 qf=lowerfilt}");
|
||||||
|
queryResponse = queryServer(params);
|
||||||
|
solrDocuments = queryResponse.getResults();
|
||||||
|
expectedIds = new int[]{3, 27, 26, 28};
|
||||||
|
actualIds = new int[4];
|
||||||
|
i = 0;
|
||||||
|
for (SolrDocument solrDocument : solrDocuments) {
|
||||||
|
actualIds[i++] = Integer.valueOf(String.valueOf(solrDocument.getFieldValue("id")));
|
||||||
|
}
|
||||||
|
assertArrayEquals(expectedIds, actualIds);
|
||||||
|
|
||||||
|
params = new ModifiableSolrParams();
|
||||||
|
params.set(CommonParams.Q, "{!mlt id=20 qf=lowerfilt}");
|
||||||
|
params.set("debug" , "query");
|
||||||
|
queryResponse = queryServer(params);
|
||||||
|
solrDocuments = queryResponse.getResults();
|
||||||
|
expectedIds = new int[]{18, 23, 13, 14, 20, 22, 19, 21, 15, 16};
|
||||||
|
actualIds = new int[10];
|
||||||
|
i = 0;
|
||||||
|
for (SolrDocument solrDocument : solrDocuments) {
|
||||||
|
actualIds[i++] = Integer.valueOf(String.valueOf(solrDocument.getFieldValue("id")));
|
||||||
|
}
|
||||||
|
assertArrayEquals(expectedIds, actualIds);
|
||||||
|
|
||||||
|
String expectedQueryString = "lowerfilt:over lowerfilt:fox lowerfilt:lazy lowerfilt:brown "
|
||||||
|
+ "lowerfilt:jumped lowerfilt:red lowerfilt:dogs. lowerfilt:quote lowerfilt:the";
|
||||||
|
|
||||||
|
ArrayList<String> actualParsedQueries = (ArrayList<String>) queryResponse
|
||||||
|
.getDebugMap().get("parsedquery");
|
||||||
|
|
||||||
|
for(int counter=0; counter < actualParsedQueries.size(); counter++) {
|
||||||
|
assertTrue("Parsed queries aren't equal",
|
||||||
|
compareParsedQueryStrings(expectedQueryString,
|
||||||
|
actualParsedQueries.get(counter)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean compareParsedQueryStrings(String expected, String actual) {
|
||||||
|
HashSet<String> expectedQueryParts = new HashSet<>();
|
||||||
|
expectedQueryParts.addAll(Arrays.asList(expected.split("\\s+")));
|
||||||
|
HashSet<String> actualQueryParts = new HashSet();
|
||||||
|
actualQueryParts.addAll(Arrays.asList(actual.split("\\s+")));
|
||||||
|
return expectedQueryParts.containsAll(actualQueryParts);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,85 @@
|
|||||||
|
package org.apache.solr.search.mlt;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.solr.SolrTestCaseJ4;
|
||||||
|
import org.apache.solr.common.params.CommonParams;
|
||||||
|
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
// TODO: Assert against expected parsed query for different min/maxidf values.
|
||||||
|
public class SimpleMLTQParserTest extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void moreLikeThisBeforeClass() throws Exception {
|
||||||
|
initCore("solrconfig.xml", "schema.xml");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void doTest() throws Exception {
|
||||||
|
String id = "id";
|
||||||
|
delQ("*:*");
|
||||||
|
assertU(adoc(id, "1", "lowerfilt", "toyota"));
|
||||||
|
assertU(adoc(id, "2", "lowerfilt", "chevrolet"));
|
||||||
|
assertU(adoc(id, "3", "lowerfilt", "suzuki"));
|
||||||
|
assertU(adoc(id, "4", "lowerfilt", "ford"));
|
||||||
|
assertU(adoc(id, "5", "lowerfilt", "ferrari"));
|
||||||
|
assertU(adoc(id, "6", "lowerfilt", "jaguar"));
|
||||||
|
assertU(adoc(id, "7", "lowerfilt", "mclaren moon or the moon and moon moon shine " +
|
||||||
|
"and the moon but moon was good foxes too"));
|
||||||
|
assertU(adoc(id, "8", "lowerfilt", "sonata"));
|
||||||
|
assertU(adoc(id, "9", "lowerfilt", "The quick red fox jumped over the lazy big " +
|
||||||
|
"and large brown dogs."));
|
||||||
|
assertU(adoc(id, "10", "lowerfilt", "blue"));
|
||||||
|
assertU(adoc(id, "12", "lowerfilt", "glue"));
|
||||||
|
assertU(adoc(id, "13", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."));
|
||||||
|
assertU(adoc(id, "14", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."));
|
||||||
|
assertU(adoc(id, "15", "lowerfilt", "The fat red fox jumped over the lazy brown dogs."));
|
||||||
|
assertU(adoc(id, "16", "lowerfilt", "The slim red fox jumped over the lazy brown dogs."));
|
||||||
|
assertU(adoc(id, "17", "lowerfilt", "The quote red fox jumped moon over the lazy " +
|
||||||
|
"brown dogs moon. Of course moon. Foxes and moon come back to the foxes and moon"));
|
||||||
|
assertU(adoc(id, "18", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."));
|
||||||
|
assertU(adoc(id, "19", "lowerfilt", "The hose red fox jumped over the lazy brown dogs."));
|
||||||
|
assertU(adoc(id, "20", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."));
|
||||||
|
assertU(adoc(id, "21", "lowerfilt", "The court red fox jumped over the lazy brown dogs."));
|
||||||
|
assertU(adoc(id, "22", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."));
|
||||||
|
assertU(adoc(id, "23", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."));
|
||||||
|
assertU(adoc(id, "24", "lowerfilt", "The file red fox jumped over the lazy brown dogs."));
|
||||||
|
assertU(adoc(id, "25", "lowerfilt", "rod fix"));
|
||||||
|
|
||||||
|
assertU(commit());
|
||||||
|
|
||||||
|
|
||||||
|
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||||
|
params.set(CommonParams.Q, "{!mlt id=17 qf=lowerfilt}");
|
||||||
|
assertQ(req(params),
|
||||||
|
"//result/doc[1]/int[@name='id'][.='17']",
|
||||||
|
"//result/doc[2]/int[@name='id'][.='13']",
|
||||||
|
"//result/doc[3]/int[@name='id'][.='14']",
|
||||||
|
"//result/doc[4]/int[@name='id'][.='18']",
|
||||||
|
"//result/doc[5]/int[@name='id'][.='20']",
|
||||||
|
"//result/doc[6]/int[@name='id'][.='22']",
|
||||||
|
"//result/doc[7]/int[@name='id'][.='23']",
|
||||||
|
"//result/doc[8]/int[@name='id'][.='9']",
|
||||||
|
"//result/doc[9]/int[@name='id'][.='7']",
|
||||||
|
"//result/doc[10]/int[@name='id'][.='15']"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user