mirror of https://github.com/apache/lucene.git
SOLR-2400: Field- and DocumentAnalysisRequestHandler now provide a position history for each token, so you can follow the token through all analysis stages
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1134685 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
86b864324a
commit
c884384d5c
|
@ -285,6 +285,12 @@ New Features
|
|||
compared to ternary trees and jaspell and very fast lookups at runtime.
|
||||
(Dawid Weiss)
|
||||
|
||||
* SOLR-2400: Field- and DocumentAnalysisRequestHandler now provide a position
|
||||
history for each token, so you can follow the token through all analysis stages.
|
||||
The output contains a separate string attribute, that is a "/"-delimited string
|
||||
containing all positions from previous Tokenizers/TokenFilters.
|
||||
(Uwe Schindler)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -25,10 +25,11 @@ import org.apache.lucene.analysis.tokenattributes.*;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.index.Payload;
|
||||
import org.apache.lucene.util.Attribute;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.AttributeReflector;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.SorterTemplate;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.solr.analysis.CharFilterFactory;
|
||||
import org.apache.solr.analysis.TokenFilterFactory;
|
||||
import org.apache.solr.analysis.TokenizerChain;
|
||||
|
@ -120,10 +121,13 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
|||
ListBasedTokenStream listBasedTokenStream = new ListBasedTokenStream(tokens);
|
||||
|
||||
for (TokenFilterFactory tokenFilterFactory : filtfacs) {
|
||||
for (final AttributeSource tok : tokens) {
|
||||
tok.getAttribute(TokenTrackingAttribute.class).freezeStage();
|
||||
}
|
||||
tokenStream = tokenFilterFactory.create(listBasedTokenStream);
|
||||
List<AttributeSource> tokenList = analyzeTokenStream(tokenStream);
|
||||
namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokenList, context));
|
||||
listBasedTokenStream = new ListBasedTokenStream(tokenList);
|
||||
tokens = analyzeTokenStream(tokenStream);
|
||||
namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context));
|
||||
listBasedTokenStream = new ListBasedTokenStream(tokens);
|
||||
}
|
||||
|
||||
return namedList;
|
||||
|
@ -160,15 +164,19 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
|||
* @return List of tokens produced from the TokenStream
|
||||
*/
|
||||
private List<AttributeSource> analyzeTokenStream(TokenStream tokenStream) {
|
||||
List<AttributeSource> tokens = new ArrayList<AttributeSource>();
|
||||
final List<AttributeSource> tokens = new ArrayList<AttributeSource>();
|
||||
final PositionIncrementAttribute posIncrAtt = tokenStream.addAttribute(PositionIncrementAttribute.class);
|
||||
final TokenTrackingAttribute trackerAtt = tokenStream.addAttribute(TokenTrackingAttribute.class);
|
||||
// for backwards compatibility, add all "common" attributes
|
||||
tokenStream.addAttribute(PositionIncrementAttribute.class);
|
||||
tokenStream.addAttribute(OffsetAttribute.class);
|
||||
tokenStream.addAttribute(TypeAttribute.class);
|
||||
final BytesRef bytes = new BytesRef();
|
||||
try {
|
||||
tokenStream.reset();
|
||||
int position = 0;
|
||||
while (tokenStream.incrementToken()) {
|
||||
position += posIncrAtt.getPositionIncrement();
|
||||
trackerAtt.setActPosition(position);
|
||||
tokens.add(tokenStream.cloneAttributes());
|
||||
}
|
||||
} catch (IOException ioe) {
|
||||
|
@ -183,6 +191,8 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
|||
put(OffsetAttribute.class.getName() + "#startOffset", "start");
|
||||
put(OffsetAttribute.class.getName() + "#endOffset", "end");
|
||||
put(TypeAttribute.class.getName() + "#type", "type");
|
||||
put(TokenTrackingAttribute.class.getName() + "#position", "position");
|
||||
put(TokenTrackingAttribute.class.getName() + "#positionHistory", "positionHistory");
|
||||
}});
|
||||
|
||||
/**
|
||||
|
@ -193,49 +203,35 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
|||
*
|
||||
* @return List of NamedLists containing the relevant information taken from the tokens
|
||||
*/
|
||||
private List<NamedList> convertTokensToNamedLists(final List<AttributeSource> tokens, AnalysisContext context) {
|
||||
private List<NamedList> convertTokensToNamedLists(final List<AttributeSource> tokenList, AnalysisContext context) {
|
||||
final List<NamedList> tokensNamedLists = new ArrayList<NamedList>();
|
||||
|
||||
final int[] positions = new int[tokens.size()];
|
||||
int position = 0;
|
||||
for (int i = 0, c = tokens.size(); i < c; i++) {
|
||||
AttributeSource token = tokens.get(i);
|
||||
position += token.addAttribute(PositionIncrementAttribute.class).getPositionIncrement();
|
||||
positions[i] = position;
|
||||
}
|
||||
final FieldType fieldType = context.getFieldType();
|
||||
final AttributeSource[] tokens = tokenList.toArray(new AttributeSource[tokenList.size()]);
|
||||
|
||||
// sort the tokens by absoulte position
|
||||
new SorterTemplate() {
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
final int p = positions[i];
|
||||
positions[i] = positions[j];
|
||||
positions[j] = p;
|
||||
Collections.swap(tokens, i, j);
|
||||
ArrayUtil.mergeSort(tokens, new Comparator<AttributeSource>() {
|
||||
public int compare(AttributeSource a, AttributeSource b) {
|
||||
return arrayCompare(
|
||||
a.getAttribute(TokenTrackingAttribute.class).getPositions(),
|
||||
b.getAttribute(TokenTrackingAttribute.class).getPositions()
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
return positions[i] - positions[j];
|
||||
private int arrayCompare(int[] a, int[] b) {
|
||||
int p = 0;
|
||||
final int stop = Math.min(a.length, b.length);
|
||||
while(p < stop) {
|
||||
int diff = a[p] - b[p];
|
||||
if (diff != 0) return diff;
|
||||
p++;
|
||||
}
|
||||
// One is a prefix of the other, or, they are equal:
|
||||
return a.length - b.length;
|
||||
}
|
||||
});
|
||||
|
||||
@Override
|
||||
protected void setPivot(int i) {
|
||||
pivot = positions[i];
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int comparePivot(int j) {
|
||||
return pivot - positions[j];
|
||||
}
|
||||
|
||||
private int pivot;
|
||||
}.mergeSort(0, tokens.size() - 1);
|
||||
|
||||
FieldType fieldType = context.getFieldType();
|
||||
|
||||
for (int i = 0, c = tokens.size(); i < c; i++) {
|
||||
AttributeSource token = tokens.get(i);
|
||||
for (int i = 0; i < tokens.length; i++) {
|
||||
AttributeSource token = tokens[i];
|
||||
final NamedList<Object> tokenNamedList = new SimpleOrderedMap<Object>();
|
||||
final TermToBytesRefAttribute termAtt = token.getAttribute(TermToBytesRefAttribute.class);
|
||||
BytesRef rawBytes = termAtt.getBytesRef();
|
||||
|
@ -256,8 +252,6 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
|||
tokenNamedList.add("match", true);
|
||||
}
|
||||
|
||||
tokenNamedList.add("position", positions[i]);
|
||||
|
||||
token.reflectWith(new AttributeReflector() {
|
||||
public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
|
||||
// leave out position and bytes term
|
||||
|
@ -312,8 +306,8 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
|||
|
||||
/**
|
||||
* TokenStream that iterates over a list of pre-existing Tokens
|
||||
* @lucene.internal
|
||||
*/
|
||||
// TODO refactor to support custom attributes
|
||||
protected final static class ListBasedTokenStream extends TokenStream {
|
||||
private final List<AttributeSource> tokens;
|
||||
private Iterator<AttributeSource> tokenIterator;
|
||||
|
@ -350,6 +344,69 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
|||
}
|
||||
}
|
||||
|
||||
/** This is an {@link Attribute} used to track the positions of tokens
|
||||
* in the analysis chain.
|
||||
* @lucene.internal This class is only public for usage by the {@link AttributeSource} API.
|
||||
*/
|
||||
public interface TokenTrackingAttribute extends Attribute {
|
||||
void freezeStage();
|
||||
void setActPosition(int pos);
|
||||
int[] getPositions();
|
||||
void reset(int[] basePositions, int position);
|
||||
}
|
||||
|
||||
/** Implementation of {@link TokenTrackingAttribute}.
|
||||
* @lucene.internal This class is only public for usage by the {@link AttributeSource} API.
|
||||
*/
|
||||
public static final class TokenTrackingAttributeImpl extends AttributeImpl implements TokenTrackingAttribute {
|
||||
private int[] basePositions = new int[0];
|
||||
private int position = 0;
|
||||
|
||||
public void freezeStage() {
|
||||
this.basePositions = getPositions();
|
||||
this.position = 0;
|
||||
}
|
||||
|
||||
public void setActPosition(int pos) {
|
||||
this.position = pos;
|
||||
}
|
||||
|
||||
public int[] getPositions() {
|
||||
final int[] positions = new int[basePositions.length + 1];
|
||||
System.arraycopy(basePositions, 0, positions, 0, basePositions.length);
|
||||
positions[basePositions.length] = position;
|
||||
return positions;
|
||||
}
|
||||
|
||||
public void reset(int[] basePositions, int position) {
|
||||
this.basePositions = basePositions;
|
||||
this.position = position;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
// we do nothing here, as all attribute values are controlled externally by consumer
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reflectWith(AttributeReflector reflector) {
|
||||
final int[] positions = getPositions();
|
||||
final StringBuilder sb = new StringBuilder(positions.length * 2);
|
||||
for (int p : positions) {
|
||||
if (sb.length() > 0) sb.append('/');
|
||||
sb.append(p);
|
||||
}
|
||||
reflector.reflect(TokenTrackingAttribute.class, "positionHistory", sb.toString());
|
||||
reflector.reflect(TokenTrackingAttribute.class, "position", position);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyTo(AttributeImpl target) {
|
||||
final TokenTrackingAttribute t = (TokenTrackingAttribute) target;
|
||||
t.reset(basePositions, position);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Serves as the context of an analysis process. This context contains the following constructs
|
||||
*/
|
||||
|
|
|
@ -37,6 +37,7 @@ public abstract class AnalysisRequestHandlerTestBase extends SolrTestCaseJ4 {
|
|||
assertEquals(new Integer(info.getStart()), token.get("start"));
|
||||
assertEquals(new Integer(info.getEnd()), token.get("end"));
|
||||
assertEquals(new Integer(info.getPosition()), token.get("position"));
|
||||
assertEquals(info.getPositionHistory(), token.get("positionHistory"));
|
||||
if (info.isMatch()) {
|
||||
assertEquals(Boolean.TRUE, token.get("match"));
|
||||
}
|
||||
|
@ -57,6 +58,7 @@ public abstract class AnalysisRequestHandlerTestBase extends SolrTestCaseJ4 {
|
|||
private int end;
|
||||
private String payload;
|
||||
private int position;
|
||||
private String positionHistory;
|
||||
private boolean match;
|
||||
|
||||
public TokenInfo(
|
||||
|
@ -66,6 +68,7 @@ public abstract class AnalysisRequestHandlerTestBase extends SolrTestCaseJ4 {
|
|||
int start,
|
||||
int end,
|
||||
int position,
|
||||
String positionHistory,
|
||||
String payload,
|
||||
boolean match) {
|
||||
|
||||
|
@ -75,6 +78,7 @@ public abstract class AnalysisRequestHandlerTestBase extends SolrTestCaseJ4 {
|
|||
this.start = start;
|
||||
this.end = end;
|
||||
this.position = position;
|
||||
this.positionHistory = positionHistory;
|
||||
this.payload = payload;
|
||||
this.match = match;
|
||||
}
|
||||
|
@ -107,6 +111,10 @@ public abstract class AnalysisRequestHandlerTestBase extends SolrTestCaseJ4 {
|
|||
return position;
|
||||
}
|
||||
|
||||
public String getPositionHistory() {
|
||||
return positionHistory;
|
||||
}
|
||||
|
||||
public boolean isMatch() {
|
||||
return match;
|
||||
}
|
||||
|
|
|
@ -235,7 +235,7 @@ public class DocumentAnalysisRequestHandlerTest extends AnalysisRequestHandlerTe
|
|||
assertTrue("Only the default analyzer should be applied", name.matches("org.apache.solr.schema.FieldType\\$DefaultAnalyzer.*"));
|
||||
List<NamedList> tokenList = (List<NamedList>) queryResult.getVal(0);
|
||||
assertEquals("Query has only one token", 1, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("JUMPING", null, "word", 0, 7, 1, null, false));
|
||||
assertToken(tokenList.get(0), new TokenInfo("JUMPING", null, "word", 0, 7, 1, "1", null, false));
|
||||
NamedList<Object> indexResult = idResult.get("index");
|
||||
|
||||
assertEquals("The id field has only a single value", 1, indexResult.size());
|
||||
|
@ -245,7 +245,7 @@ public class DocumentAnalysisRequestHandlerTest extends AnalysisRequestHandlerTe
|
|||
assertTrue("Only the default analyzer should be applied", name.matches("org.apache.solr.schema.FieldType\\$DefaultAnalyzer.*"));
|
||||
tokenList = valueResult.getVal(0);
|
||||
assertEquals("The 'id' field value has only one token", 1, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("1", null, "word", 0, 1, 1, null, false));
|
||||
assertToken(tokenList.get(0), new TokenInfo("1", null, "word", 0, 1, 1, "1", null, false));
|
||||
***/
|
||||
|
||||
// the name field
|
||||
|
@ -255,14 +255,14 @@ public class DocumentAnalysisRequestHandlerTest extends AnalysisRequestHandlerTe
|
|||
tokenList = (List<NamedList>) queryResult.get("org.apache.lucene.analysis.core.WhitespaceTokenizer");
|
||||
assertNotNull("Expecting the 'WhitespaceTokenizer' to be applied on the query for the 'whitetok' field", tokenList);
|
||||
assertEquals("Query has only one token", 1, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("JUMPING", null, "word", 0, 7, 1, null, false));
|
||||
assertToken(tokenList.get(0), new TokenInfo("JUMPING", null, "word", 0, 7, 1, "1", null, false));
|
||||
indexResult = whitetokResult.get("index");
|
||||
assertEquals("The 'whitetok' field has only a single value", 1, indexResult.size());
|
||||
valueResult = (NamedList<List<NamedList>>) indexResult.get("Jumping Jack");
|
||||
tokenList = valueResult.getVal(0);
|
||||
assertEquals("Expecting 2 tokens to be present", 2, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("Jumping", null, "word", 0, 7, 1, null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("Jack", null, "word", 8, 12, 2, null, false));
|
||||
assertToken(tokenList.get(0), new TokenInfo("Jumping", null, "word", 0, 7, 1, "1", null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("Jack", null, "word", 8, 12, 2, "2", null, false));
|
||||
|
||||
// the text field
|
||||
NamedList<NamedList<Object>> textResult = documentResult.get("text");
|
||||
|
@ -271,66 +271,66 @@ public class DocumentAnalysisRequestHandlerTest extends AnalysisRequestHandlerTe
|
|||
tokenList = (List<NamedList>) queryResult.get("org.apache.lucene.analysis.standard.StandardTokenizer");
|
||||
assertNotNull("Expecting the 'StandardTokenizer' to be applied on the query for the 'text' field", tokenList);
|
||||
assertEquals("Query has only one token", 1, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("JUMPING", null, "<ALPHANUM>", 0, 7, 1, null, false));
|
||||
assertToken(tokenList.get(0), new TokenInfo("JUMPING", null, "<ALPHANUM>", 0, 7, 1, "1", null, false));
|
||||
tokenList = (List<NamedList>) queryResult.get("org.apache.lucene.analysis.standard.StandardFilter");
|
||||
assertNotNull("Expecting the 'StandardFilter' to be applied on the query for the 'text' field", tokenList);
|
||||
assertEquals("Query has only one token", 1, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("JUMPING", null, "<ALPHANUM>", 0, 7, 1, null, false));
|
||||
assertToken(tokenList.get(0), new TokenInfo("JUMPING", null, "<ALPHANUM>", 0, 7, 1, "1/1", null, false));
|
||||
tokenList = (List<NamedList>) queryResult.get("org.apache.lucene.analysis.core.LowerCaseFilter");
|
||||
assertNotNull("Expecting the 'LowerCaseFilter' to be applied on the query for the 'text' field", tokenList);
|
||||
assertEquals("Query has only one token", 1, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("jumping", null, "<ALPHANUM>", 0, 7, 1, null, false));
|
||||
assertToken(tokenList.get(0), new TokenInfo("jumping", null, "<ALPHANUM>", 0, 7, 1, "1/1/1", null, false));
|
||||
tokenList = (List<NamedList>) queryResult.get("org.apache.lucene.analysis.core.StopFilter");
|
||||
assertNotNull("Expecting the 'StopFilter' to be applied on the query for the 'text' field", tokenList);
|
||||
assertEquals("Query has only one token", 1, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("jumping", null, "<ALPHANUM>", 0, 7, 1, null, false));
|
||||
assertToken(tokenList.get(0), new TokenInfo("jumping", null, "<ALPHANUM>", 0, 7, 1, "1/1/1/1", null, false));
|
||||
tokenList = (List<NamedList>) queryResult.get("org.apache.lucene.analysis.en.PorterStemFilter");
|
||||
assertNotNull("Expecting the 'PorterStemFilter' to be applied on the query for the 'text' field", tokenList);
|
||||
assertEquals("Query has only one token", 1, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("jump", null, "<ALPHANUM>", 0, 7, 1, null, false));
|
||||
assertToken(tokenList.get(0), new TokenInfo("jump", null, "<ALPHANUM>", 0, 7, 1, "1/1/1/1/1", null, false));
|
||||
indexResult = textResult.get("index");
|
||||
assertEquals("The 'text' field has only a single value", 1, indexResult.size());
|
||||
valueResult = (NamedList<List<NamedList>>) indexResult.get("The Fox Jumped Over The Dogs");
|
||||
tokenList = valueResult.get("org.apache.lucene.analysis.standard.StandardTokenizer");
|
||||
assertNotNull("Expecting the 'StandardTokenizer' to be applied on the index for the 'text' field", tokenList);
|
||||
assertEquals("Expecting 6 tokens", 6, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("The", null, "<ALPHANUM>", 0, 3, 1, null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("Fox", null, "<ALPHANUM>", 4, 7, 2, null, false));
|
||||
assertToken(tokenList.get(2), new TokenInfo("Jumped", null, "<ALPHANUM>", 8, 14, 3, null, false));
|
||||
assertToken(tokenList.get(3), new TokenInfo("Over", null, "<ALPHANUM>", 15, 19, 4, null, false));
|
||||
assertToken(tokenList.get(4), new TokenInfo("The", null, "<ALPHANUM>", 20, 23, 5, null, false));
|
||||
assertToken(tokenList.get(5), new TokenInfo("Dogs", null, "<ALPHANUM>", 24, 28, 6, null, false));
|
||||
assertToken(tokenList.get(0), new TokenInfo("The", null, "<ALPHANUM>", 0, 3, 1, "1", null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("Fox", null, "<ALPHANUM>", 4, 7, 2, "2", null, false));
|
||||
assertToken(tokenList.get(2), new TokenInfo("Jumped", null, "<ALPHANUM>", 8, 14, 3, "3", null, false));
|
||||
assertToken(tokenList.get(3), new TokenInfo("Over", null, "<ALPHANUM>", 15, 19, 4, "4", null, false));
|
||||
assertToken(tokenList.get(4), new TokenInfo("The", null, "<ALPHANUM>", 20, 23, 5, "5", null, false));
|
||||
assertToken(tokenList.get(5), new TokenInfo("Dogs", null, "<ALPHANUM>", 24, 28, 6, "6", null, false));
|
||||
tokenList = valueResult.get("org.apache.lucene.analysis.standard.StandardFilter");
|
||||
assertNotNull("Expecting the 'StandardFilter' to be applied on the index for the 'text' field", tokenList);
|
||||
assertEquals("Expecting 6 tokens", 6, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("The", null, "<ALPHANUM>", 0, 3, 1, null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("Fox", null, "<ALPHANUM>", 4, 7, 2, null, false));
|
||||
assertToken(tokenList.get(2), new TokenInfo("Jumped", null, "<ALPHANUM>", 8, 14, 3, null, false));
|
||||
assertToken(tokenList.get(3), new TokenInfo("Over", null, "<ALPHANUM>", 15, 19, 4, null, false));
|
||||
assertToken(tokenList.get(4), new TokenInfo("The", null, "<ALPHANUM>", 20, 23, 5, null, false));
|
||||
assertToken(tokenList.get(5), new TokenInfo("Dogs", null, "<ALPHANUM>", 24, 28, 6, null, false));
|
||||
assertToken(tokenList.get(0), new TokenInfo("The", null, "<ALPHANUM>", 0, 3, 1, "1/1", null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("Fox", null, "<ALPHANUM>", 4, 7, 2, "2/2", null, false));
|
||||
assertToken(tokenList.get(2), new TokenInfo("Jumped", null, "<ALPHANUM>", 8, 14, 3, "3/3", null, false));
|
||||
assertToken(tokenList.get(3), new TokenInfo("Over", null, "<ALPHANUM>", 15, 19, 4, "4/4", null, false));
|
||||
assertToken(tokenList.get(4), new TokenInfo("The", null, "<ALPHANUM>", 20, 23, 5, "5/5", null, false));
|
||||
assertToken(tokenList.get(5), new TokenInfo("Dogs", null, "<ALPHANUM>", 24, 28, 6, "6/6", null, false));
|
||||
tokenList = valueResult.get("org.apache.lucene.analysis.core.LowerCaseFilter");
|
||||
assertNotNull("Expecting the 'LowerCaseFilter' to be applied on the index for the 'text' field", tokenList);
|
||||
assertEquals("Expecting 6 tokens", 6, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("the", null, "<ALPHANUM>", 0, 3, 1, null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("fox", null, "<ALPHANUM>", 4, 7, 2, null, false));
|
||||
assertToken(tokenList.get(2), new TokenInfo("jumped", null, "<ALPHANUM>", 8, 14, 3, null, false));
|
||||
assertToken(tokenList.get(3), new TokenInfo("over", null, "<ALPHANUM>", 15, 19, 4, null, false));
|
||||
assertToken(tokenList.get(4), new TokenInfo("the", null, "<ALPHANUM>", 20, 23, 5, null, false));
|
||||
assertToken(tokenList.get(5), new TokenInfo("dogs", null, "<ALPHANUM>", 24, 28, 6, null, false));
|
||||
assertToken(tokenList.get(0), new TokenInfo("the", null, "<ALPHANUM>", 0, 3, 1, "1/1/1", null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("fox", null, "<ALPHANUM>", 4, 7, 2, "2/2/2", null, false));
|
||||
assertToken(tokenList.get(2), new TokenInfo("jumped", null, "<ALPHANUM>", 8, 14, 3, "3/3/3", null, false));
|
||||
assertToken(tokenList.get(3), new TokenInfo("over", null, "<ALPHANUM>", 15, 19, 4, "4/4/4", null, false));
|
||||
assertToken(tokenList.get(4), new TokenInfo("the", null, "<ALPHANUM>", 20, 23, 5, "5/5/5", null, false));
|
||||
assertToken(tokenList.get(5), new TokenInfo("dogs", null, "<ALPHANUM>", 24, 28, 6, "6/6/6", null, false));
|
||||
tokenList = valueResult.get("org.apache.lucene.analysis.core.StopFilter");
|
||||
assertNotNull("Expecting the 'StopFilter' to be applied on the index for the 'text' field", tokenList);
|
||||
assertEquals("Expecting 4 tokens after stop word removal", 4, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 4, 7, 1, null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("jumped", null, "<ALPHANUM>", 8, 14, 2, null, false));
|
||||
assertToken(tokenList.get(2), new TokenInfo("over", null, "<ALPHANUM>", 15, 19, 3, null, false));
|
||||
assertToken(tokenList.get(3), new TokenInfo("dogs", null, "<ALPHANUM>", 24, 28, 4, null, false));
|
||||
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 4, 7, 1, "2/2/2/1", null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("jumped", null, "<ALPHANUM>", 8, 14, 2, "3/3/3/2", null, false));
|
||||
assertToken(tokenList.get(2), new TokenInfo("over", null, "<ALPHANUM>", 15, 19, 3, "4/4/4/3", null, false));
|
||||
assertToken(tokenList.get(3), new TokenInfo("dogs", null, "<ALPHANUM>", 24, 28, 4, "6/6/6/4", null, false));
|
||||
tokenList = valueResult.get("org.apache.lucene.analysis.en.PorterStemFilter");
|
||||
assertNotNull("Expecting the 'PorterStemFilter' to be applied on the index for the 'text' field", tokenList);
|
||||
assertEquals("Expecting 4 tokens", 4, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 4, 7, 1, null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("jump", null, "<ALPHANUM>", 8, 14, 2, null, true));
|
||||
assertToken(tokenList.get(2), new TokenInfo("over", null, "<ALPHANUM>", 15, 19, 3, null, false));
|
||||
assertToken(tokenList.get(3), new TokenInfo("dog", null, "<ALPHANUM>", 24, 28, 4, null, false));
|
||||
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 4, 7, 1, "2/2/2/1/1", null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("jump", null, "<ALPHANUM>", 8, 14, 2, "3/3/3/2/2", null, true));
|
||||
assertToken(tokenList.get(2), new TokenInfo("over", null, "<ALPHANUM>", 15, 19, 3, "4/4/4/3/3", null, false));
|
||||
assertToken(tokenList.get(3), new TokenInfo("dog", null, "<ALPHANUM>", 24, 28, 4, "6/6/6/4/4", null, false));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -139,64 +139,64 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
|
|||
List<NamedList> tokenList = indexPart.get("org.apache.lucene.analysis.standard.StandardTokenizer");
|
||||
assertNotNull("Expcting StandardTokenizer analysis breakdown", tokenList);
|
||||
assertEquals(tokenList.size(), 10);
|
||||
assertToken(tokenList.get(0), new TokenInfo("the", null, "<ALPHANUM>", 0, 3, 1, null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("quick", null, "<ALPHANUM>", 4, 9, 2, null, false));
|
||||
assertToken(tokenList.get(2), new TokenInfo("red", null, "<ALPHANUM>", 10, 13, 3, null, false));
|
||||
assertToken(tokenList.get(3), new TokenInfo("fox", null, "<ALPHANUM>", 14, 17, 4, null, true));
|
||||
assertToken(tokenList.get(4), new TokenInfo("jumped", null, "<ALPHANUM>", 18, 24, 5, null, false));
|
||||
assertToken(tokenList.get(5), new TokenInfo("over", null, "<ALPHANUM>", 25, 29, 6, null, false));
|
||||
assertToken(tokenList.get(6), new TokenInfo("the", null, "<ALPHANUM>", 30, 33, 7, null, false));
|
||||
assertToken(tokenList.get(7), new TokenInfo("lazy", null, "<ALPHANUM>", 34, 38, 8, null, false));
|
||||
assertToken(tokenList.get(8), new TokenInfo("brown", null, "<ALPHANUM>", 39, 44, 9, null, true));
|
||||
assertToken(tokenList.get(9), new TokenInfo("dogs", null, "<ALPHANUM>", 45, 49, 10, null, false));
|
||||
assertToken(tokenList.get(0), new TokenInfo("the", null, "<ALPHANUM>", 0, 3, 1, "1", null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("quick", null, "<ALPHANUM>", 4, 9, 2, "2", null, false));
|
||||
assertToken(tokenList.get(2), new TokenInfo("red", null, "<ALPHANUM>", 10, 13, 3, "3", null, false));
|
||||
assertToken(tokenList.get(3), new TokenInfo("fox", null, "<ALPHANUM>", 14, 17, 4, "4", null, true));
|
||||
assertToken(tokenList.get(4), new TokenInfo("jumped", null, "<ALPHANUM>", 18, 24, 5, "5", null, false));
|
||||
assertToken(tokenList.get(5), new TokenInfo("over", null, "<ALPHANUM>", 25, 29, 6, "6", null, false));
|
||||
assertToken(tokenList.get(6), new TokenInfo("the", null, "<ALPHANUM>", 30, 33, 7, "7", null, false));
|
||||
assertToken(tokenList.get(7), new TokenInfo("lazy", null, "<ALPHANUM>", 34, 38, 8, "8", null, false));
|
||||
assertToken(tokenList.get(8), new TokenInfo("brown", null, "<ALPHANUM>", 39, 44, 9, "9", null, true));
|
||||
assertToken(tokenList.get(9), new TokenInfo("dogs", null, "<ALPHANUM>", 45, 49, 10, "10", null, false));
|
||||
tokenList = indexPart.get("org.apache.lucene.analysis.standard.StandardFilter");
|
||||
assertNotNull("Expcting StandardFilter analysis breakdown", tokenList);
|
||||
assertEquals(tokenList.size(), 10);
|
||||
assertToken(tokenList.get(0), new TokenInfo("the", null, "<ALPHANUM>", 0, 3, 1, null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("quick", null, "<ALPHANUM>", 4, 9, 2, null, false));
|
||||
assertToken(tokenList.get(2), new TokenInfo("red", null, "<ALPHANUM>", 10, 13, 3, null, false));
|
||||
assertToken(tokenList.get(3), new TokenInfo("fox", null, "<ALPHANUM>", 14, 17, 4, null, true));
|
||||
assertToken(tokenList.get(4), new TokenInfo("jumped", null, "<ALPHANUM>", 18, 24, 5, null, false));
|
||||
assertToken(tokenList.get(5), new TokenInfo("over", null, "<ALPHANUM>", 25, 29, 6, null, false));
|
||||
assertToken(tokenList.get(6), new TokenInfo("the", null, "<ALPHANUM>", 30, 33, 7, null, false));
|
||||
assertToken(tokenList.get(7), new TokenInfo("lazy", null, "<ALPHANUM>", 34, 38, 8, null, false));
|
||||
assertToken(tokenList.get(8), new TokenInfo("brown", null, "<ALPHANUM>", 39, 44, 9, null, true));
|
||||
assertToken(tokenList.get(9), new TokenInfo("dogs", null, "<ALPHANUM>", 45, 49, 10, null, false));
|
||||
assertToken(tokenList.get(0), new TokenInfo("the", null, "<ALPHANUM>", 0, 3, 1, "1/1", null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("quick", null, "<ALPHANUM>", 4, 9, 2, "2/2", null, false));
|
||||
assertToken(tokenList.get(2), new TokenInfo("red", null, "<ALPHANUM>", 10, 13, 3, "3/3", null, false));
|
||||
assertToken(tokenList.get(3), new TokenInfo("fox", null, "<ALPHANUM>", 14, 17, 4, "4/4", null, true));
|
||||
assertToken(tokenList.get(4), new TokenInfo("jumped", null, "<ALPHANUM>", 18, 24, 5, "5/5", null, false));
|
||||
assertToken(tokenList.get(5), new TokenInfo("over", null, "<ALPHANUM>", 25, 29, 6, "6/6", null, false));
|
||||
assertToken(tokenList.get(6), new TokenInfo("the", null, "<ALPHANUM>", 30, 33, 7, "7/7", null, false));
|
||||
assertToken(tokenList.get(7), new TokenInfo("lazy", null, "<ALPHANUM>", 34, 38, 8, "8/8", null, false));
|
||||
assertToken(tokenList.get(8), new TokenInfo("brown", null, "<ALPHANUM>", 39, 44, 9, "9/9", null, true));
|
||||
assertToken(tokenList.get(9), new TokenInfo("dogs", null, "<ALPHANUM>", 45, 49, 10, "10/10", null, false));
|
||||
tokenList = indexPart.get("org.apache.lucene.analysis.core.LowerCaseFilter");
|
||||
assertNotNull("Expcting LowerCaseFilter analysis breakdown", tokenList);
|
||||
assertEquals(tokenList.size(), 10);
|
||||
assertToken(tokenList.get(0), new TokenInfo("the", null, "<ALPHANUM>", 0, 3, 1, null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("quick", null, "<ALPHANUM>", 4, 9, 2, null, false));
|
||||
assertToken(tokenList.get(2), new TokenInfo("red", null, "<ALPHANUM>", 10, 13, 3, null, false));
|
||||
assertToken(tokenList.get(3), new TokenInfo("fox", null, "<ALPHANUM>", 14, 17, 4, null, true));
|
||||
assertToken(tokenList.get(4), new TokenInfo("jumped", null, "<ALPHANUM>", 18, 24, 5, null, false));
|
||||
assertToken(tokenList.get(5), new TokenInfo("over", null, "<ALPHANUM>", 25, 29, 6, null, false));
|
||||
assertToken(tokenList.get(6), new TokenInfo("the", null, "<ALPHANUM>", 30, 33, 7, null, false));
|
||||
assertToken(tokenList.get(7), new TokenInfo("lazy", null, "<ALPHANUM>", 34, 38, 8, null, false));
|
||||
assertToken(tokenList.get(8), new TokenInfo("brown", null, "<ALPHANUM>", 39, 44, 9, null, true));
|
||||
assertToken(tokenList.get(9), new TokenInfo("dogs", null, "<ALPHANUM>", 45, 49, 10, null, false));
|
||||
assertToken(tokenList.get(0), new TokenInfo("the", null, "<ALPHANUM>", 0, 3, 1, "1/1/1", null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("quick", null, "<ALPHANUM>", 4, 9, 2, "2/2/2", null, false));
|
||||
assertToken(tokenList.get(2), new TokenInfo("red", null, "<ALPHANUM>", 10, 13, 3, "3/3/3", null, false));
|
||||
assertToken(tokenList.get(3), new TokenInfo("fox", null, "<ALPHANUM>", 14, 17, 4, "4/4/4", null, true));
|
||||
assertToken(tokenList.get(4), new TokenInfo("jumped", null, "<ALPHANUM>", 18, 24, 5, "5/5/5", null, false));
|
||||
assertToken(tokenList.get(5), new TokenInfo("over", null, "<ALPHANUM>", 25, 29, 6, "6/6/6", null, false));
|
||||
assertToken(tokenList.get(6), new TokenInfo("the", null, "<ALPHANUM>", 30, 33, 7, "7/7/7", null, false));
|
||||
assertToken(tokenList.get(7), new TokenInfo("lazy", null, "<ALPHANUM>", 34, 38, 8, "8/8/8", null, false));
|
||||
assertToken(tokenList.get(8), new TokenInfo("brown", null, "<ALPHANUM>", 39, 44, 9, "9/9/9", null, true));
|
||||
assertToken(tokenList.get(9), new TokenInfo("dogs", null, "<ALPHANUM>", 45, 49, 10, "10/10/10", null, false));
|
||||
tokenList = indexPart.get("org.apache.lucene.analysis.core.StopFilter");
|
||||
assertNotNull("Expcting StopFilter analysis breakdown", tokenList);
|
||||
assertEquals(tokenList.size(), 8);
|
||||
assertToken(tokenList.get(0), new TokenInfo("quick", null, "<ALPHANUM>", 4, 9, 1, null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("red", null, "<ALPHANUM>", 10, 13, 2, null, false));
|
||||
assertToken(tokenList.get(2), new TokenInfo("fox", null, "<ALPHANUM>", 14, 17, 3, null, true));
|
||||
assertToken(tokenList.get(3), new TokenInfo("jumped", null, "<ALPHANUM>", 18, 24, 4, null, false));
|
||||
assertToken(tokenList.get(4), new TokenInfo("over", null, "<ALPHANUM>", 25, 29, 5, null, false));
|
||||
assertToken(tokenList.get(5), new TokenInfo("lazy", null, "<ALPHANUM>", 34, 38, 6, null, false));
|
||||
assertToken(tokenList.get(6), new TokenInfo("brown", null, "<ALPHANUM>", 39, 44, 7, null, true));
|
||||
assertToken(tokenList.get(7), new TokenInfo("dogs", null, "<ALPHANUM>", 45, 49, 8, null, false));
|
||||
assertToken(tokenList.get(0), new TokenInfo("quick", null, "<ALPHANUM>", 4, 9, 1, "2/2/2/1", null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("red", null, "<ALPHANUM>", 10, 13, 2, "3/3/3/2", null, false));
|
||||
assertToken(tokenList.get(2), new TokenInfo("fox", null, "<ALPHANUM>", 14, 17, 3, "4/4/4/3", null, true));
|
||||
assertToken(tokenList.get(3), new TokenInfo("jumped", null, "<ALPHANUM>", 18, 24, 4, "5/5/5/4", null, false));
|
||||
assertToken(tokenList.get(4), new TokenInfo("over", null, "<ALPHANUM>", 25, 29, 5, "6/6/6/5", null, false));
|
||||
assertToken(tokenList.get(5), new TokenInfo("lazy", null, "<ALPHANUM>", 34, 38, 6, "8/8/8/6", null, false));
|
||||
assertToken(tokenList.get(6), new TokenInfo("brown", null, "<ALPHANUM>", 39, 44, 7, "9/9/9/7", null, true));
|
||||
assertToken(tokenList.get(7), new TokenInfo("dogs", null, "<ALPHANUM>", 45, 49, 8, "10/10/10/8", null, false));
|
||||
tokenList = indexPart.get("org.apache.lucene.analysis.en.PorterStemFilter");
|
||||
assertNotNull("Expcting PorterStemFilter analysis breakdown", tokenList);
|
||||
assertEquals(tokenList.size(), 8);
|
||||
assertToken(tokenList.get(0), new TokenInfo("quick", null, "<ALPHANUM>", 4, 9, 1, null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("red", null, "<ALPHANUM>", 10, 13, 2, null, false));
|
||||
assertToken(tokenList.get(2), new TokenInfo("fox", null, "<ALPHANUM>", 14, 17, 3, null, true));
|
||||
assertToken(tokenList.get(3), new TokenInfo("jump", null, "<ALPHANUM>", 18, 24, 4, null, false));
|
||||
assertToken(tokenList.get(4), new TokenInfo("over", null, "<ALPHANUM>", 25, 29, 5, null, false));
|
||||
assertToken(tokenList.get(5), new TokenInfo("lazi", null, "<ALPHANUM>", 34, 38, 6, null, false));
|
||||
assertToken(tokenList.get(6), new TokenInfo("brown", null, "<ALPHANUM>", 39, 44, 7, null, true));
|
||||
assertToken(tokenList.get(7), new TokenInfo("dog", null, "<ALPHANUM>", 45, 49, 8, null, false));
|
||||
assertToken(tokenList.get(0), new TokenInfo("quick", null, "<ALPHANUM>", 4, 9, 1, "2/2/2/1/1", null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("red", null, "<ALPHANUM>", 10, 13, 2, "3/3/3/2/2", null, false));
|
||||
assertToken(tokenList.get(2), new TokenInfo("fox", null, "<ALPHANUM>", 14, 17, 3, "4/4/4/3/3", null, true));
|
||||
assertToken(tokenList.get(3), new TokenInfo("jump", null, "<ALPHANUM>", 18, 24, 4, "5/5/5/4/4", null, false));
|
||||
assertToken(tokenList.get(4), new TokenInfo("over", null, "<ALPHANUM>", 25, 29, 5, "6/6/6/5/5", null, false));
|
||||
assertToken(tokenList.get(5), new TokenInfo("lazi", null, "<ALPHANUM>", 34, 38, 6, "8/8/8/6/6", null, false));
|
||||
assertToken(tokenList.get(6), new TokenInfo("brown", null, "<ALPHANUM>", 39, 44, 7, "9/9/9/7/7", null, true));
|
||||
assertToken(tokenList.get(7), new TokenInfo("dog", null, "<ALPHANUM>", 45, 49, 8, "10/10/10/8/8", null, false));
|
||||
|
||||
NamedList<List<NamedList>> queryPart = textType.get("query");
|
||||
assertNotNull("expecting a query token analysis for field type 'text'", queryPart);
|
||||
|
@ -204,28 +204,28 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
|
|||
tokenList = queryPart.get("org.apache.lucene.analysis.standard.StandardTokenizer");
|
||||
assertNotNull("Expecting StandardTokenizer analysis breakdown", tokenList);
|
||||
assertEquals("Expecting StandardTokenizer to produce 2 tokens from '" + request.getQuery() + "'", 2, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("brown", null, "<ALPHANUM>", 4, 9, 2, null, false));
|
||||
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, "1", null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("brown", null, "<ALPHANUM>", 4, 9, 2, "2", null, false));
|
||||
tokenList = queryPart.get("org.apache.lucene.analysis.standard.StandardFilter");
|
||||
assertNotNull("Expcting StandardFilter analysis breakdown", tokenList);
|
||||
assertEquals(2, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("brown", null, "<ALPHANUM>", 4, 9, 2, null, false));
|
||||
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, "1/1", null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("brown", null, "<ALPHANUM>", 4, 9, 2, "2/2", null, false));
|
||||
tokenList = queryPart.get("org.apache.lucene.analysis.core.LowerCaseFilter");
|
||||
assertNotNull("Expcting LowerCaseFilter analysis breakdown", tokenList);
|
||||
assertEquals(2, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("brown", null, "<ALPHANUM>", 4, 9, 2, null, false));
|
||||
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, "1/1/1", null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("brown", null, "<ALPHANUM>", 4, 9, 2, "2/2/2", null, false));
|
||||
tokenList = queryPart.get("org.apache.lucene.analysis.core.StopFilter");
|
||||
assertNotNull("Expcting StopFilter analysis breakdown", tokenList);
|
||||
assertEquals(2, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("brown", null, "<ALPHANUM>", 4, 9, 2, null, false));
|
||||
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, "1/1/1/1", null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("brown", null, "<ALPHANUM>", 4, 9, 2, "2/2/2/2", null, false));
|
||||
tokenList = queryPart.get("org.apache.lucene.analysis.en.PorterStemFilter");
|
||||
assertNotNull("Expcting PorterStemFilter analysis breakdown", tokenList);
|
||||
assertEquals(2, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("brown", null, "<ALPHANUM>", 4, 9, 2, null, false));
|
||||
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, "1/1/1/1/1", null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("brown", null, "<ALPHANUM>", 4, 9, 2, "2/2/2/2/2", null, false));
|
||||
|
||||
NamedList<NamedList> nameTextType = fieldTypes.get("nametext");
|
||||
assertNotNull("expecting result for field type 'nametext'", nameTextType);
|
||||
|
@ -236,22 +236,22 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
|
|||
tokenList = indexPart.get("org.apache.lucene.analysis.core.WhitespaceTokenizer");
|
||||
assertNotNull("Expcting WhitespaceTokenizer analysis breakdown", tokenList);
|
||||
assertEquals(10, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("the", null, "word", 0, 3, 1, null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("quick", null, "word", 4, 9, 2, null, false));
|
||||
assertToken(tokenList.get(2), new TokenInfo("red", null, "word", 10, 13, 3, null, false));
|
||||
assertToken(tokenList.get(3), new TokenInfo("fox", null, "word", 14, 17, 4, null, true));
|
||||
assertToken(tokenList.get(4), new TokenInfo("jumped", null, "word", 18, 24, 5, null, false));
|
||||
assertToken(tokenList.get(5), new TokenInfo("over", null, "word", 25, 29, 6, null, false));
|
||||
assertToken(tokenList.get(6), new TokenInfo("the", null, "word", 30, 33, 7, null, false));
|
||||
assertToken(tokenList.get(7), new TokenInfo("lazy", null, "word", 34, 38, 8, null, false));
|
||||
assertToken(tokenList.get(8), new TokenInfo("brown", null, "word", 39, 44, 9, null, true));
|
||||
assertToken(tokenList.get(9), new TokenInfo("dogs", null, "word", 45, 49, 10, null, false));
|
||||
assertToken(tokenList.get(0), new TokenInfo("the", null, "word", 0, 3, 1, "1", null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("quick", null, "word", 4, 9, 2, "2", null, false));
|
||||
assertToken(tokenList.get(2), new TokenInfo("red", null, "word", 10, 13, 3, "3", null, false));
|
||||
assertToken(tokenList.get(3), new TokenInfo("fox", null, "word", 14, 17, 4, "4", null, true));
|
||||
assertToken(tokenList.get(4), new TokenInfo("jumped", null, "word", 18, 24, 5, "5", null, false));
|
||||
assertToken(tokenList.get(5), new TokenInfo("over", null, "word", 25, 29, 6, "6", null, false));
|
||||
assertToken(tokenList.get(6), new TokenInfo("the", null, "word", 30, 33, 7, "7", null, false));
|
||||
assertToken(tokenList.get(7), new TokenInfo("lazy", null, "word", 34, 38, 8, "8", null, false));
|
||||
assertToken(tokenList.get(8), new TokenInfo("brown", null, "word", 39, 44, 9, "9", null, true));
|
||||
assertToken(tokenList.get(9), new TokenInfo("dogs", null, "word", 45, 49, 10, "10", null, false));
|
||||
|
||||
queryPart = nameTextType.get("query");
|
||||
assertNotNull("expecting a query token analysis for field type 'nametext'", queryPart);
|
||||
tokenList = queryPart.get(WhitespaceTokenizer.class.getName());
|
||||
assertToken(tokenList.get(0), new TokenInfo("fox", null, "word", 0, 3, 1, null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("brown", null, "word", 4, 9, 2, null, false));
|
||||
assertToken(tokenList.get(0), new TokenInfo("fox", null, "word", 0, 3, 1, "1", null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("brown", null, "word", 4, 9, 2, "2", null, false));
|
||||
|
||||
NamedList<NamedList> fieldNames = result.get("field_names");
|
||||
assertNotNull("field_nameds should never be null", fieldNames);
|
||||
|
@ -265,16 +265,16 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
|
|||
tokenList = indexPart.get(WhitespaceTokenizer.class.getName());
|
||||
assertNotNull("expecting only WhitespaceTokenizer to be applied", tokenList);
|
||||
assertEquals("expecting WhitespaceTokenizer to produce 10 tokens", 10, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("the", null, "word", 0, 3, 1, null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("quick", null, "word", 4, 9, 2, null, false));
|
||||
assertToken(tokenList.get(2), new TokenInfo("red", null, "word", 10, 13, 3, null, false));
|
||||
assertToken(tokenList.get(3), new TokenInfo("fox", null, "word", 14, 17, 4, null, true));
|
||||
assertToken(tokenList.get(4), new TokenInfo("jumped", null, "word", 18, 24, 5, null, false));
|
||||
assertToken(tokenList.get(5), new TokenInfo("over", null, "word", 25, 29, 6, null, false));
|
||||
assertToken(tokenList.get(6), new TokenInfo("the", null, "word", 30, 33, 7, null, false));
|
||||
assertToken(tokenList.get(7), new TokenInfo("lazy", null, "word", 34, 38, 8, null, false));
|
||||
assertToken(tokenList.get(8), new TokenInfo("brown", null, "word", 39, 44, 9, null, true));
|
||||
assertToken(tokenList.get(9), new TokenInfo("dogs", null, "word", 45, 49, 10, null, false));
|
||||
assertToken(tokenList.get(0), new TokenInfo("the", null, "word", 0, 3, 1, "1", null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("quick", null, "word", 4, 9, 2, "2", null, false));
|
||||
assertToken(tokenList.get(2), new TokenInfo("red", null, "word", 10, 13, 3, "3", null, false));
|
||||
assertToken(tokenList.get(3), new TokenInfo("fox", null, "word", 14, 17, 4, "4", null, true));
|
||||
assertToken(tokenList.get(4), new TokenInfo("jumped", null, "word", 18, 24, 5, "5", null, false));
|
||||
assertToken(tokenList.get(5), new TokenInfo("over", null, "word", 25, 29, 6, "6", null, false));
|
||||
assertToken(tokenList.get(6), new TokenInfo("the", null, "word", 30, 33, 7, "7", null, false));
|
||||
assertToken(tokenList.get(7), new TokenInfo("lazy", null, "word", 34, 38, 8, "8", null, false));
|
||||
assertToken(tokenList.get(8), new TokenInfo("brown", null, "word", 39, 44, 9, "9", null, true));
|
||||
assertToken(tokenList.get(9), new TokenInfo("dogs", null, "word", 45, 49, 10, "10", null, false));
|
||||
|
||||
queryPart = whitetok.get("query");
|
||||
assertNotNull("expecting a query token analysis for field 'whitetok'", queryPart);
|
||||
|
@ -282,8 +282,8 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
|
|||
tokenList = queryPart.get(WhitespaceTokenizer.class.getName());
|
||||
assertNotNull("expecting only WhitespaceTokenizer to be applied", tokenList);
|
||||
assertEquals("expecting WhitespaceTokenizer to produce 2 tokens", 2, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("fox", null, "word", 0, 3, 1, null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("brown", null, "word", 4, 9, 2, null, false));
|
||||
assertToken(tokenList.get(0), new TokenInfo("fox", null, "word", 0, 3, 1, "1", null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("brown", null, "word", 4, 9, 2, "2", null, false));
|
||||
|
||||
NamedList<NamedList> keywordtok = fieldNames.get("keywordtok");
|
||||
assertNotNull("expecting result for field 'keywordtok'", keywordtok);
|
||||
|
@ -294,7 +294,7 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
|
|||
tokenList = indexPart.get(KeywordTokenizer.class.getName());
|
||||
assertNotNull("expecting only KeywordTokenizer to be applied", tokenList);
|
||||
assertEquals("expecting KeywordTokenizer to produce 1 token", 1, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("the quick red fox jumped over the lazy brown dogs", null, "word", 0, 49, 1, null, false));
|
||||
assertToken(tokenList.get(0), new TokenInfo("the quick red fox jumped over the lazy brown dogs", null, "word", 0, 49, 1, "1", null, false));
|
||||
|
||||
queryPart = keywordtok.get("query");
|
||||
assertNotNull("expecting a query token analysis for field 'keywordtok'", queryPart);
|
||||
|
@ -302,7 +302,7 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
|
|||
tokenList = queryPart.get(KeywordTokenizer.class.getName());
|
||||
assertNotNull("expecting only KeywordTokenizer to be applied", tokenList);
|
||||
assertEquals("expecting KeywordTokenizer to produce 1 token", 1, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("fox brown", null, "word", 0, 9, 1, null, false));
|
||||
assertToken(tokenList.get(0), new TokenInfo("fox brown", null, "word", 0, 9, 1, "1", null, false));
|
||||
|
||||
}
|
||||
|
||||
|
@ -331,6 +331,53 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
|
|||
List<NamedList> tokenList = (List<NamedList>)indexPart.get("org.apache.lucene.analysis.core.WhitespaceTokenizer");
|
||||
assertNotNull("Expecting WhitespaceTokenizer analysis breakdown", tokenList);
|
||||
assertEquals(tokenList.size(), 1);
|
||||
assertToken(tokenList.get(0), new TokenInfo("whatever", null, "word", 12, 20, 1, null, false));
|
||||
assertToken(tokenList.get(0), new TokenInfo("whatever", null, "word", 12, 20, 1, "1", null, false));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPositionHistoryWithWDF() throws Exception {
|
||||
|
||||
FieldAnalysisRequest request = new FieldAnalysisRequest();
|
||||
request.addFieldType("skutype1");
|
||||
request.setFieldValue("hi, 3456-12 a Test");
|
||||
request.setShowMatch(false);
|
||||
|
||||
NamedList<NamedList> result = handler.handleAnalysisRequest(request, h.getCore().getSchema());
|
||||
assertTrue("result is null and it shouldn't be", result != null);
|
||||
|
||||
NamedList<NamedList> fieldTypes = result.get("field_types");
|
||||
assertNotNull("field_types should never be null", fieldTypes);
|
||||
NamedList<NamedList> textType = fieldTypes.get("skutype1");
|
||||
assertNotNull("expecting result for field type 'skutype1'", textType);
|
||||
|
||||
NamedList<List<NamedList>> indexPart = textType.get("index");
|
||||
assertNotNull("expecting an index token analysis for field type 'skutype1'", indexPart);
|
||||
|
||||
List<NamedList> tokenList = indexPart.get("org.apache.lucene.analysis.core.WhitespaceTokenizer");
|
||||
assertNotNull("Expcting WhitespaceTokenizer analysis breakdown", tokenList);
|
||||
assertEquals(4, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("hi,", null, "word", 0, 3, 1, "1", null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("3456-12", null, "word", 4, 11, 2, "2", null, false));
|
||||
assertToken(tokenList.get(2), new TokenInfo("a", null, "word", 12, 13, 3, "3", null, false));
|
||||
assertToken(tokenList.get(3), new TokenInfo("Test", null, "word", 14, 18, 4, "4", null, false));
|
||||
tokenList = indexPart.get("org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter");
|
||||
assertNotNull("Expcting WordDelimiterFilter analysis breakdown", tokenList);
|
||||
assertEquals(6, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("hi", null, "word", 0, 2, 1, "1/1", null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("3456", null, "word", 4, 8, 2, "2/2", null, false));
|
||||
assertToken(tokenList.get(2), new TokenInfo("12", null, "word", 9, 11, 3, "2/3", null, false));
|
||||
assertToken(tokenList.get(3), new TokenInfo("345612", null, "word", 4, 11, 3, "2/3", null, false));
|
||||
assertToken(tokenList.get(4), new TokenInfo("a", null, "word", 12, 13, 4, "3/4", null, false));
|
||||
assertToken(tokenList.get(5), new TokenInfo("Test", null, "word", 14, 18, 5, "4/5", null, false));
|
||||
tokenList = indexPart.get("org.apache.lucene.analysis.core.LowerCaseFilter");
|
||||
assertNotNull("Expcting LowerCaseFilter analysis breakdown", tokenList);
|
||||
assertEquals(6, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("hi", null, "word", 0, 2, 1, "1/1/1", null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("3456", null, "word", 4, 8, 2, "2/2/2", null, false));
|
||||
assertToken(tokenList.get(2), new TokenInfo("12", null, "word", 9, 11, 3, "2/3/3", null, false));
|
||||
assertToken(tokenList.get(3), new TokenInfo("345612", null, "word", 4, 11, 3, "2/3/3", null, false));
|
||||
assertToken(tokenList.get(4), new TokenInfo("a", null, "word", 12, 13, 4, "3/4/4", null, false));
|
||||
assertToken(tokenList.get(5), new TokenInfo("test", null, "word", 14, 18, 5, "4/5/5", null, false));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue