SOLR-2834: Handle CharacterFilters in Solr

Impacts Document and Field Analyzes and was previously a class cast error
This commit is contained in:
Alexandre Rafalovitch 2018-07-04 22:05:45 -04:00
parent d96ef95e0f
commit 2aae3fb3d2
7 changed files with 55 additions and 20 deletions

View File

@ -138,6 +138,9 @@ Bug Fixes
* SOLR-12533 Collection collection fails if metrics are called during core creation (Peter Cseh, Mano Kovacs)
* SOLR-2834: Fix SolrJ Field and Document analyzes for types that include CharacterFilter (Alexandre Rafalovitch)
Optimizations
----------------------

View File

@ -57,24 +57,43 @@ public class AnalysisResponseBase extends SolrResponseBase {
* </lst>
* </code></pre>
*
* The special case is a CharacterFilter that just returns a string, which we then map to a single token without type.
*
* @param phaseNL The names list to parse.
*
* @return The built analysis phases list.
*/
protected List<AnalysisPhase> buildPhases(NamedList<List<NamedList<Object>>> phaseNL) {
protected List<AnalysisPhase> buildPhases(NamedList<Object> phaseNL) {
List<AnalysisPhase> phases = new ArrayList<>(phaseNL.size());
for (Map.Entry<String, List<NamedList<Object>>> phaseEntry : phaseNL) {
for (Map.Entry<String, Object> phaseEntry : phaseNL) {
AnalysisPhase phase = new AnalysisPhase(phaseEntry.getKey());
List<NamedList<Object>> tokens = phaseEntry.getValue();
for (NamedList<Object> token : tokens) {
TokenInfo tokenInfo = buildTokenInfo(token);
Object phaseValue = phaseEntry.getValue();
if (phaseValue instanceof String) {
// We are looking at CharacterFilter, which - exceptionally - returns a string
TokenInfo tokenInfo = buildTokenInfoFromString((String) phaseValue);
phase.addTokenInfo(tokenInfo);
} else {
List<NamedList<Object>> tokens = (List<NamedList<Object>>) phaseEntry.getValue();
for (NamedList<Object> token : tokens) {
TokenInfo tokenInfo = buildTokenInfo(token);
phase.addTokenInfo(tokenInfo);
}
}
phases.add(phase);
}
return phases;
}
/**
* Convert a string value (from CharacterFilter) into a TokenInfo for its value full span.
* @param value String value
* @return The built token info (with type set to null)
*/
protected TokenInfo buildTokenInfoFromString(String value) {
return new TokenInfo(value, value, null, 0, value.length(), 1, false);
}
/**
* Parses the given named list and builds a token infoform it. Expects a named list of the form:
* <br>

View File

@ -49,19 +49,19 @@ public class DocumentAnalysisResponse extends AnalysisResponseBase implements It
NamedList<Object> field = fieldEntry.getValue();
@SuppressWarnings("unchecked")
NamedList<List<NamedList<Object>>> query
= (NamedList<List<NamedList<Object>>>) field.get("query");
NamedList<Object> query
= (NamedList<Object>) field.get("query");
if (query != null) {
List<AnalysisPhase> phases = buildPhases(query);
fieldAnalysis.setQueryPhases(phases);
}
@SuppressWarnings("unchecked")
NamedList<NamedList<List<NamedList<Object>>>> index
= (NamedList<NamedList<List<NamedList<Object>>>>) field.get("index");
for (Map.Entry<String, NamedList<List<NamedList<Object>>>> valueEntry : index) {
NamedList<NamedList<Object>> index
= (NamedList<NamedList<Object>>) field.get("index");
for (Map.Entry<String, NamedList<Object>> valueEntry : index) {
String fieldValue = valueEntry.getKey();
NamedList<List<NamedList<Object>>> valueNL = valueEntry.getValue();
NamedList<Object> valueNL = valueEntry.getValue();
List<AnalysisPhase> phases = buildPhases(valueNL);
fieldAnalysis.setIndexPhases(fieldValue, phases);
}

View File

@ -39,30 +39,30 @@ public class FieldAnalysisResponse extends AnalysisResponseBase {
super.setResponse(response);
@SuppressWarnings("unchecked")
NamedList<NamedList<NamedList<NamedList<List<NamedList<Object>>>>>> analysisNL
= (NamedList<NamedList<NamedList<NamedList<List<NamedList<Object>>>>>>) response.get("analysis");
NamedList<NamedList<NamedList<NamedList<Object>>>> analysisNL
= (NamedList<NamedList<NamedList<NamedList<Object>>>>) response.get("analysis");
for (Map.Entry<String, NamedList<NamedList<List<NamedList<Object>>>>> entry
for (Map.Entry<String, NamedList<NamedList<Object>>> entry
: analysisNL.get("field_types")) {
analysisByFieldTypeName.put(entry.getKey(), buildAnalysis(entry.getValue()));
}
for (Map.Entry<String, NamedList<NamedList<List<NamedList<Object>>>>> entry
for (Map.Entry<String, NamedList<NamedList<Object>>> entry
: analysisNL.get("field_names")) {
analysisByFieldName.put(entry.getKey(), buildAnalysis(entry.getValue()));
}
}
private Analysis buildAnalysis(NamedList<NamedList<List<NamedList<Object>>>> value) {
private Analysis buildAnalysis(NamedList<NamedList<Object>> value) {
Analysis analysis = new Analysis();
NamedList<List<NamedList<Object>>> queryNL = value.get("query");
NamedList<Object> queryNL = value.get("query");
List<AnalysisPhase> phases = (queryNL == null) ? null : buildPhases(queryNL);
analysis.setQueryPhases(phases);
NamedList<List<NamedList<Object>>> indexNL = value.get("index");
NamedList<Object> indexNL = value.get("index");
phases = buildPhases(indexNL);
analysis.setIndexPhases(phases);

View File

@ -98,6 +98,19 @@ public class AnlysisResponseBaseTest extends LuceneTestCase {
assertPhase(phases.get(3), "Filter3", 3, tokenInfo);
}
/**
* Tests the {@link AnalysisResponseBase#buildPhases(org.apache.solr.common.util.NamedList)} )}
* method for the special case of CharacterFilter.
*/
@Test
public void testCharFilterBuildPhases() throws Exception {
NamedList nl = new NamedList();
nl.add("CharFilter1", "CharFilterOutput"); //not list of tokens
AnalysisResponseBase response = new AnalysisResponseBase();
List<AnalysisResponseBase.AnalysisPhase> phases = response.buildPhases(nl);
assertEquals(1, phases.size());
}
//================================================ Helper Methods ==================================================
private List<NamedList> buildFakeTokenInfoList(int numberOfTokens) {

View File

@ -47,7 +47,7 @@ public class DocumentAnalysisResponseTest extends LuceneTestCase {
DocumentAnalysisResponse response = new DocumentAnalysisResponse() {
@Override
protected List<AnalysisPhase> buildPhases(NamedList<List<NamedList<Object>>> phaseNL) {
protected List<AnalysisPhase> buildPhases(NamedList<Object> phaseNL) {
return phases;
}
};

View File

@ -48,7 +48,7 @@ public class FieldAnalysisResponseTest extends LuceneTestCase {
NamedList responseNL = buildResponse();
FieldAnalysisResponse response = new FieldAnalysisResponse() {
@Override
protected List<AnalysisPhase> buildPhases(NamedList<List<NamedList<Object>>> phaseNL) {
protected List<AnalysisPhase> buildPhases(NamedList<Object> phaseNL) {
return phases;
}
};