SOLR-8460: /analysis/field could throw exceptions for custom attributes.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1721638 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
David Wayne Smiley 2015-12-24 16:25:59 +00:00
parent b650fe38f8
commit ef2aa314c5
4 changed files with 147 additions and 15 deletions

View File

@ -294,6 +294,8 @@ Bug Fixes
* SOLR-8059: &debug=results for distributed search when distrib.singlePass (sometimes activated
automatically) could result in an NPE. (David Smiley, Markus Jelsma)
* SOLR-8460: /analysis/field could throw exceptions for custom attributes. (David Smiley, Uwe Schindler)
Other Changes
----------------------
@ -835,7 +837,11 @@ Other Changes
check and update luceneMatchVersion under solr/example/ configs as well logic. (Varun Thacker)
================== 5.3.2 ==================
(No Changes)
Bug Fixes
----------------------
* SOLR-8460: /analysis/field could throw exceptions for custom attributes. (David Smiley, Uwe Schindler)
================== 5.3.1 ==================

View File

@ -129,16 +129,17 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context));
ListBasedTokenStream listBasedTokenStream = new ListBasedTokenStream(tokens);
ListBasedTokenStream listBasedTokenStream = new ListBasedTokenStream(tokenStream, tokens);
for (TokenFilterFactory tokenFilterFactory : filtfacs) {
for (final AttributeSource tok : tokens) {
tok.getAttribute(TokenTrackingAttribute.class).freezeStage();
}
// overwrite the vars "tokenStream", "tokens", and "listBasedTokenStream"
tokenStream = tokenFilterFactory.create(listBasedTokenStream);
tokens = analyzeTokenStream(tokenStream);
namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context));
listBasedTokenStream = new ListBasedTokenStream(tokens);
listBasedTokenStream = new ListBasedTokenStream(listBasedTokenStream, tokens);
}
return namedList;
@ -190,7 +191,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
trackerAtt.setActPosition(position);
tokens.add(tokenStream.cloneAttributes());
}
tokenStream.end();
tokenStream.end(); // TODO should we capture?
} catch (IOException ioe) {
throw new RuntimeException("Error occured while iterating over tokenstream", ioe);
} finally {
@ -318,7 +319,6 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
}
// ================================================= Inner classes =================================================
/**
* TokenStream that iterates over a list of pre-existing Tokens
* @lucene.internal
@ -330,10 +330,19 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
/**
* Creates a new ListBasedTokenStream which uses the given tokens as its token source.
*
* @param attributeSource source of the attribute factory and attribute impls
* @param tokens Source of tokens to be used
*/
ListBasedTokenStream(List<AttributeSource> tokens) {
ListBasedTokenStream(AttributeSource attributeSource, List<AttributeSource> tokens) {
super(attributeSource.getAttributeFactory());
this.tokens = tokens;
// Make sure all the attributes of the source are here too
addAttributes(attributeSource);
}
@Override
public void reset() throws IOException {
super.reset();
tokenIterator = tokens.iterator();
}
@ -342,9 +351,9 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
if (tokenIterator.hasNext()) {
clearAttributes();
AttributeSource next = tokenIterator.next();
Iterator<Class<? extends Attribute>> atts = next.getAttributeClassesIterator();
while (atts.hasNext()) // make sure all att impls in the token exist here
addAttribute(atts.next());
addAttributes(next); // just in case there were delayed attribute additions
next.copyTo(this);
return true;
} else {
@ -352,10 +361,15 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
}
}
@Override
public void reset() throws IOException {
super.reset();
tokenIterator = tokens.iterator();
protected void addAttributes(AttributeSource attributeSource) {
// note: ideally we wouldn't call addAttributeImpl which is marked internal. But nonetheless it's possible
// this method is used by some custom attributes, especially since Solr doesn't provide a way to customize the
// AttributeFactory which is the recommended way to choose which classes implement which attributes.
Iterator<AttributeImpl> atts = attributeSource.getAttributeImplsIterator();
while (atts.hasNext()) {
addAttributeImpl(atts.next()); // adds both impl & interfaces
}
}
}

View File

@ -207,8 +207,8 @@ public class FieldAnalysisRequestHandler extends AnalysisRequestHandlerBase {
*
* @return NamedList containing the tokens produced by the analyzers of the given field, separated into an index and
* a query group
*/
private NamedList<NamedList> analyzeValues(FieldAnalysisRequest analysisRequest, FieldType fieldType, String fieldName) {
*/ // package access for testing
NamedList<NamedList> analyzeValues(FieldAnalysisRequest analysisRequest, FieldType fieldType, String fieldName) {
final String queryValue = analysisRequest.getQuery();
final Set<BytesRef> termsToMatch = (queryValue != null && analysisRequest.isShowMatch())

View File

@ -17,8 +17,22 @@
package org.apache.solr.handler;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.FlagsAttributeImpl;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector;
import org.apache.solr.analysis.TokenizerChain;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.AnalysisParams;
import org.apache.solr.common.params.CommonParams;
@ -27,10 +41,14 @@ import org.apache.solr.common.util.NamedList;
import org.apache.solr.client.solrj.request.FieldAnalysisRequest;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.CustomAnalyzerStrField;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.TextField;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
@ -433,4 +451,98 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
Collections.sort(vals);
assertEquals( "[s, s7, s7w, s7w1+, s9, s9v, s9v2+, sp, spp, spp5+, sv, svk, svk6+]", vals.toString() );
}
@Test //See SOLR-8460
public void testCustomAttribute() throws Exception {
FieldAnalysisRequest request = new FieldAnalysisRequest();
request.addFieldType("skutype1");
request.setFieldValue("hi, 3456-12 a Test");
request.setShowMatch(false);
FieldType fieldType = new TextField();
Analyzer analyzer = new TokenizerChain(
new TokenizerFactory(Collections.emptyMap()) {
@Override
public Tokenizer create(AttributeFactory factory) {
return new CustomTokenizer(factory);
}
},
new TokenFilterFactory[] {
new TokenFilterFactory(Collections.emptyMap()) {
@Override
public TokenStream create(TokenStream input) {
return new CustomTokenFilter(input);
}
}
}
);
fieldType.setIndexAnalyzer(analyzer);
NamedList<NamedList> result = handler.analyzeValues(request, fieldType, "fieldNameUnused");
// just test that we see "900" in the flags attribute here
List<NamedList> tokenInfoList = (List<NamedList>) result.findRecursive("index", CustomTokenFilter.class.getName());
// '1' from CustomTokenFilter plus 900 from CustomFlagsAttributeImpl.
assertEquals(901, tokenInfoList.get(0).get("org.apache.lucene.analysis.tokenattributes.FlagsAttribute#flags"));
}
/** A custom impl of a standard attribute impl; test this instance is used. */
public class CustomFlagsAttributeImpl extends FlagsAttributeImpl {
@Override
public void setFlags(int flags) {
super.setFlags(900 + flags);//silly modification
}
}
private class CustomTokenizer extends Tokenizer {
CharTermAttribute charAtt;
FlagsAttribute customAtt;
boolean sentOneToken;
public CustomTokenizer(AttributeFactory factory) {
super(factory);
addAttributeImpl(new CustomFlagsAttributeImpl());
charAtt = addAttribute(CharTermAttribute.class);
customAtt = addAttribute(FlagsAttribute.class);
}
@Override
public void reset() throws IOException {
sentOneToken = false;
}
@Override
public boolean incrementToken() throws IOException {
if (sentOneToken) {
return false;
}
sentOneToken = true;
clearAttributes();
charAtt.append("firstToken");
return true;
}
}
private class CustomTokenFilter extends TokenFilter {
FlagsAttribute flagAtt;
public CustomTokenFilter(TokenStream input) {
super(input);
flagAtt = getAttribute(FlagsAttribute.class);
if (flagAtt == null) {
throw new IllegalStateException("FlagsAttribute should have been added already");
}
if (!(flagAtt instanceof CustomFlagsAttributeImpl)) {
throw new IllegalStateException("FlagsAttribute should be our custom " + CustomFlagsAttributeImpl.class);
}
}
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
flagAtt.setFlags(1);
return true;
} else {
return false;
}
}
}
}