mirror of https://github.com/apache/lucene.git
LUCENE-7867: Remove deprecated Token class
This commit is contained in:
parent
69d0c70651
commit
af1ee47f2b
|
@ -86,6 +86,9 @@ API Changes
|
||||||
* LUCENE-7877: PrefixAwareTokenStream is replaced with ConcatenatingTokenStream
|
* LUCENE-7877: PrefixAwareTokenStream is replaced with ConcatenatingTokenStream
|
||||||
(Alan Woodward, Uwe Schindler, Adrien Grand)
|
(Alan Woodward, Uwe Schindler, Adrien Grand)
|
||||||
|
|
||||||
|
* LUCENE-7867: The deprecated Token class is now only available in the test
|
||||||
|
framework (Alan Woodward, Adrien Grand)
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
|
|
||||||
* LUCENE-7626: IndexWriter will no longer accept broken token offsets
|
* LUCENE-7626: IndexWriter will no longer accept broken token offsets
|
||||||
|
|
|
@ -17,18 +17,17 @@
|
||||||
package org.apache.lucene.analysis.payloads;
|
package org.apache.lucene.analysis.payloads;
|
||||||
|
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Assigns a payload to a token based on the {@link org.apache.lucene.analysis.Token#type()}
|
* Assigns a payload to a token based on the {@link org.apache.lucene.analysis.tokenattributes.TypeAttribute}
|
||||||
*
|
|
||||||
**/
|
**/
|
||||||
public class NumericPayloadTokenFilter extends TokenFilter {
|
public class NumericPayloadTokenFilter extends TokenFilter {
|
||||||
|
|
||||||
|
|
|
@ -17,17 +17,17 @@
|
||||||
package org.apache.lucene.analysis.payloads;
|
package org.apache.lucene.analysis.payloads;
|
||||||
|
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Makes the {@link org.apache.lucene.analysis.Token#type()} a payload.
|
* Makes the {@link TypeAttribute} a payload.
|
||||||
*
|
*
|
||||||
* Encodes the type using {@link String#getBytes(String)} with "UTF-8" as the encoding
|
* Encodes the type using {@link String#getBytes(String)} with "UTF-8" as the encoding
|
||||||
*
|
*
|
||||||
|
|
|
@ -40,12 +40,7 @@ import org.apache.lucene.util.AttributeSource;
|
||||||
* <li>{@link TokenFilter}, a <code>TokenStream</code> whose input is another
|
* <li>{@link TokenFilter}, a <code>TokenStream</code> whose input is another
|
||||||
* <code>TokenStream</code>.
|
* <code>TokenStream</code>.
|
||||||
* </ul>
|
* </ul>
|
||||||
* A new <code>TokenStream</code> API has been introduced with Lucene 2.9. This API
|
* <code>TokenStream</code> extends {@link AttributeSource}, which provides
|
||||||
* has moved from being {@link Token}-based to {@link Attribute}-based. While
|
|
||||||
* {@link Token} still exists in 2.9 as a convenience class, the preferred way
|
|
||||||
* to store the information of a {@link Token} is to use {@link AttributeImpl}s.
|
|
||||||
* <p>
|
|
||||||
* <code>TokenStream</code> now extends {@link AttributeSource}, which provides
|
|
||||||
* access to all of the token {@link Attribute}s for the <code>TokenStream</code>.
|
* access to all of the token {@link Attribute}s for the <code>TokenStream</code>.
|
||||||
* Note that only one instance per {@link AttributeImpl} is created and reused
|
* Note that only one instance per {@link AttributeImpl} is created and reused
|
||||||
* for every token. This approach reduces object creation and allows local
|
* for every token. This approach reduces object creation and allows local
|
||||||
|
|
|
@ -253,11 +253,6 @@ public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttr
|
||||||
/**
|
/**
|
||||||
* Returns solely the term text as specified by the
|
* Returns solely the term text as specified by the
|
||||||
* {@link CharSequence} interface.
|
* {@link CharSequence} interface.
|
||||||
* <p>This method changed the behavior with Lucene 3.1,
|
|
||||||
* before it returned a String representation of the whole
|
|
||||||
* term with all attributes.
|
|
||||||
* This affects especially the
|
|
||||||
* {@link org.apache.lucene.analysis.Token} subclass.
|
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
|
|
|
@ -16,7 +16,6 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.search.highlight;
|
package org.apache.lucene.search.highlight;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
|
@ -29,7 +28,6 @@ public class TokenGroup {
|
||||||
|
|
||||||
private static final int MAX_NUM_TOKENS_PER_GROUP = 50;
|
private static final int MAX_NUM_TOKENS_PER_GROUP = 50;
|
||||||
|
|
||||||
private Token[] tokens = new Token[MAX_NUM_TOKENS_PER_GROUP];
|
|
||||||
private float[] scores = new float[MAX_NUM_TOKENS_PER_GROUP];
|
private float[] scores = new float[MAX_NUM_TOKENS_PER_GROUP];
|
||||||
private int numTokens = 0;
|
private int numTokens = 0;
|
||||||
private int startOffset = 0;
|
private int startOffset = 0;
|
||||||
|
@ -68,10 +66,7 @@ public class TokenGroup {
|
||||||
tot += score;
|
tot += score;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Token token = new Token();
|
|
||||||
token.setOffset(termStartOffset, termEndOffset);
|
|
||||||
token.setEmpty().append(termAtt);
|
|
||||||
tokens[numTokens] = token;
|
|
||||||
scores[numTokens] = score;
|
scores[numTokens] = score;
|
||||||
numTokens++;
|
numTokens++;
|
||||||
}
|
}
|
||||||
|
@ -86,14 +81,6 @@ public class TokenGroup {
|
||||||
tot = 0;
|
tot = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @param index a value between 0 and numTokens -1
|
|
||||||
* @return the "n"th token
|
|
||||||
*/
|
|
||||||
public Token getToken(int index) {
|
|
||||||
return tokens[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* @param index a value between 0 and numTokens -1
|
* @param index a value between 0 and numTokens -1
|
||||||
|
|
|
@ -21,7 +21,7 @@ import java.io.*;
|
||||||
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that
|
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that
|
||||||
* this does not do line-number counting, but instead keeps track of the
|
* this does not do line-number counting, but instead keeps track of the
|
||||||
* character position of the token in the input, as required by Lucene's {@link
|
* character position of the token in the input, as required by Lucene's {@link
|
||||||
* org.apache.lucene.analysis.Token} API.
|
* org.apache.lucene.analysis.tokenattributes.OffsetAttribute} API.
|
||||||
* */
|
* */
|
||||||
public final class FastCharStream implements CharStream {
|
public final class FastCharStream implements CharStream {
|
||||||
char[] buffer = null;
|
char[] buffer = null;
|
||||||
|
|
|
@ -21,7 +21,7 @@ import java.io.*;
|
||||||
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that
|
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that
|
||||||
* this does not do line-number counting, but instead keeps track of the
|
* this does not do line-number counting, but instead keeps track of the
|
||||||
* character position of the token in the input, as required by Lucene's {@link
|
* character position of the token in the input, as required by Lucene's {@link
|
||||||
* org.apache.lucene.analysis.Token} API.
|
* org.apache.lucene.analysis.tokenattributes.OffsetAttribute} API.
|
||||||
* */
|
* */
|
||||||
public final class FastCharStream implements CharStream {
|
public final class FastCharStream implements CharStream {
|
||||||
char[] buffer = null;
|
char[] buffer = null;
|
||||||
|
|
|
@ -21,7 +21,7 @@ import java.io.*;
|
||||||
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that
|
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that
|
||||||
* this does not do line-number counting, but instead keeps track of the
|
* this does not do line-number counting, but instead keeps track of the
|
||||||
* character position of the token in the input, as required by Lucene's {@link
|
* character position of the token in the input, as required by Lucene's {@link
|
||||||
* org.apache.lucene.analysis.Token} API. */
|
* org.apache.lucene.analysis.tokenattributes.OffsetAttribute} API. */
|
||||||
public final class FastCharStream implements CharStream {
|
public final class FastCharStream implements CharStream {
|
||||||
char[] buffer = null;
|
char[] buffer = null;
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.analysis;
|
||||||
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl;
|
import org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||||
import org.apache.lucene.util.Attribute;
|
|
||||||
import org.apache.lucene.util.AttributeFactory;
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.AttributeImpl;
|
import org.apache.lucene.util.AttributeImpl;
|
||||||
import org.apache.lucene.util.AttributeReflector;
|
import org.apache.lucene.util.AttributeReflector;
|
||||||
|
@ -45,14 +44,6 @@ import org.apache.lucene.util.BytesRef;
|
||||||
length byte array. Use {@link org.apache.lucene.index.PostingsEnum#getPayload()} to retrieve the
|
length byte array. Use {@link org.apache.lucene.index.PostingsEnum#getPayload()} to retrieve the
|
||||||
payloads from the index.
|
payloads from the index.
|
||||||
|
|
||||||
<br><br>
|
|
||||||
|
|
||||||
<p><b>NOTE:</b> As of 2.9, Token implements all {@link Attribute} interfaces
|
|
||||||
that are part of core Lucene and can be found in the {@code tokenattributes} subpackage.
|
|
||||||
Even though it is not necessary to use Token anymore, with the new TokenStream API it can
|
|
||||||
be used as convenience class that implements all {@link Attribute}s, which is especially useful
|
|
||||||
to easily switch from the old to the new TokenStream API.
|
|
||||||
|
|
||||||
A few things to note:
|
A few things to note:
|
||||||
<ul>
|
<ul>
|
||||||
<li>clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but should affect no one.</li>
|
<li>clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but should affect no one.</li>
|
||||||
|
@ -60,13 +51,7 @@ import org.apache.lucene.util.BytesRef;
|
||||||
<li>The startOffset and endOffset represent the start and offset in the source text, so be careful in adjusting them.</li>
|
<li>The startOffset and endOffset represent the start and offset in the source text, so be careful in adjusting them.</li>
|
||||||
<li>When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again.</li>
|
<li>When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again.</li>
|
||||||
</ul>
|
</ul>
|
||||||
<p>
|
|
||||||
<b>Please note:</b> With Lucene 3.1, the <code>{@linkplain #toString toString()}</code> method had to be changed to match the
|
|
||||||
{@link CharSequence} interface introduced by the interface {@link org.apache.lucene.analysis.tokenattributes.CharTermAttribute}.
|
|
||||||
This method now only prints the term text, no additional information anymore.
|
|
||||||
@deprecated This class is outdated and no longer used since Lucene 2.9. Nuke it finally!
|
|
||||||
*/
|
*/
|
||||||
@Deprecated
|
|
||||||
public class Token extends PackedTokenAttributeImpl implements FlagsAttribute, PayloadAttribute {
|
public class Token extends PackedTokenAttributeImpl implements FlagsAttribute, PayloadAttribute {
|
||||||
|
|
||||||
private int flags;
|
private int flags;
|
||||||
|
@ -166,7 +151,7 @@ public class Token extends PackedTokenAttributeImpl implements FlagsAttribute, P
|
||||||
public Token clone() {
|
public Token clone() {
|
||||||
final Token t = (Token) super.clone();
|
final Token t = (Token) super.clone();
|
||||||
if (payload != null) {
|
if (payload != null) {
|
||||||
t.payload = payload.clone();
|
t.payload = BytesRef.deepCopyOf(payload);
|
||||||
}
|
}
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
|
@ -190,7 +175,7 @@ public class Token extends PackedTokenAttributeImpl implements FlagsAttribute, P
|
||||||
public void copyTo(AttributeImpl target) {
|
public void copyTo(AttributeImpl target) {
|
||||||
super.copyTo(target);
|
super.copyTo(target);
|
||||||
((FlagsAttribute) target).setFlags(flags);
|
((FlagsAttribute) target).setFlags(flags);
|
||||||
((PayloadAttribute) target).setPayload((payload == null) ? null : payload.clone());
|
((PayloadAttribute) target).setPayload((payload == null) ? null : BytesRef.deepCopyOf(payload));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
|
@ -31,7 +31,6 @@ import java.util.Map;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
@ -60,8 +59,8 @@ import org.apache.solr.schema.FieldType;
|
||||||
import org.apache.solr.schema.IndexSchema;
|
import org.apache.solr.schema.IndexSchema;
|
||||||
import org.apache.solr.search.DocSet;
|
import org.apache.solr.search.DocSet;
|
||||||
import org.apache.solr.search.QParser;
|
import org.apache.solr.search.QParser;
|
||||||
import org.apache.solr.search.SyntaxError;
|
|
||||||
import org.apache.solr.search.SolrIndexSearcher;
|
import org.apache.solr.search.SolrIndexSearcher;
|
||||||
|
import org.apache.solr.search.SyntaxError;
|
||||||
import org.apache.solr.spelling.AbstractLuceneSpellChecker;
|
import org.apache.solr.spelling.AbstractLuceneSpellChecker;
|
||||||
import org.apache.solr.spelling.ConjunctionSolrSpellChecker;
|
import org.apache.solr.spelling.ConjunctionSolrSpellChecker;
|
||||||
import org.apache.solr.spelling.IndexBasedSpellChecker;
|
import org.apache.solr.spelling.IndexBasedSpellChecker;
|
||||||
|
@ -72,6 +71,7 @@ import org.apache.solr.spelling.SpellCheckCollator;
|
||||||
import org.apache.solr.spelling.SpellingOptions;
|
import org.apache.solr.spelling.SpellingOptions;
|
||||||
import org.apache.solr.spelling.SpellingQueryConverter;
|
import org.apache.solr.spelling.SpellingQueryConverter;
|
||||||
import org.apache.solr.spelling.SpellingResult;
|
import org.apache.solr.spelling.SpellingResult;
|
||||||
|
import org.apache.solr.spelling.Token;
|
||||||
import org.apache.solr.util.plugin.SolrCoreAware;
|
import org.apache.solr.util.plugin.SolrCoreAware;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
|
@ -21,7 +21,7 @@ import java.io.*;
|
||||||
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that
|
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that
|
||||||
* this does not do line-number counting, but instead keeps track of the
|
* this does not do line-number counting, but instead keeps track of the
|
||||||
* character position of the token in the input, as required by Lucene's {@link
|
* character position of the token in the input, as required by Lucene's {@link
|
||||||
* org.apache.lucene.analysis.Token} API.
|
* org.apache.lucene.analysis.tokenattributes.OffsetAttribute} API.
|
||||||
* */
|
* */
|
||||||
public final class FastCharStream implements CharStream {
|
public final class FastCharStream implements CharStream {
|
||||||
char[] buffer = null;
|
char[] buffer = null;
|
||||||
|
|
|
@ -16,7 +16,6 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.solr.spelling;
|
package org.apache.solr.spelling;
|
||||||
|
|
||||||
import org.apache.lucene.search.spell.StringDistance;
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
@ -24,19 +23,18 @@ import java.util.Collections;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.search.spell.SuggestWord;
|
|
||||||
import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
|
|
||||||
import org.apache.lucene.search.spell.SuggestWordQueue;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.spell.Dictionary;
|
import org.apache.lucene.search.spell.Dictionary;
|
||||||
import org.apache.lucene.search.spell.LevensteinDistance;
|
import org.apache.lucene.search.spell.LevensteinDistance;
|
||||||
import org.apache.lucene.search.spell.SpellChecker;
|
import org.apache.lucene.search.spell.SpellChecker;
|
||||||
|
import org.apache.lucene.search.spell.StringDistance;
|
||||||
|
import org.apache.lucene.search.spell.SuggestWord;
|
||||||
|
import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
|
||||||
|
import org.apache.lucene.search.spell.SuggestWordQueue;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.FilterDirectory;
|
|
||||||
import org.apache.lucene.store.FSDirectory;
|
import org.apache.lucene.store.FSDirectory;
|
||||||
|
import org.apache.lucene.store.FilterDirectory;
|
||||||
import org.apache.lucene.store.RAMDirectory;
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
import org.apache.solr.core.SolrCore;
|
import org.apache.solr.core.SolrCore;
|
||||||
|
|
|
@ -26,7 +26,6 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.search.spell.StringDistance;
|
import org.apache.lucene.search.spell.StringDistance;
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
import org.apache.solr.core.SolrCore;
|
import org.apache.solr.core.SolrCore;
|
||||||
|
|
|
@ -22,7 +22,6 @@ import java.util.Collections;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.spell.DirectSpellChecker;
|
import org.apache.lucene.search.spell.DirectSpellChecker;
|
||||||
import org.apache.lucene.search.spell.StringDistance;
|
import org.apache.lucene.search.spell.StringDistance;
|
||||||
|
|
|
@ -29,8 +29,6 @@ import java.util.NoSuchElementException;
|
||||||
import java.util.PriorityQueue;
|
import java.util.PriorityQueue;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>
|
* <p>
|
||||||
* Given a list of possible Spelling Corrections for multiple mis-spelled words
|
* Given a list of possible Spelling Corrections for multiple mis-spelled words
|
||||||
|
|
|
@ -15,13 +15,12 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package org.apache.solr.spelling;
|
package org.apache.solr.spelling;
|
||||||
|
import java.util.Collection;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
import org.apache.solr.util.plugin.NamedListInitializedPlugin;
|
import org.apache.solr.util.plugin.NamedListInitializedPlugin;
|
||||||
|
|
||||||
import java.util.Collection;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>
|
* <p>
|
||||||
* The QueryConverter is an abstract base class defining a method for converting
|
* The QueryConverter is an abstract base class defining a method for converting
|
||||||
|
@ -81,7 +80,7 @@ public abstract class QueryConverter implements NamedListInitializedPlugin {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the Collection of {@link org.apache.lucene.analysis.Token}s for
|
* Returns the Collection of {@link Token}s for
|
||||||
* the query. Offsets on the Token should correspond to the correct
|
* the query. Offsets on the Token should correspond to the correct
|
||||||
* offset in the origQuery
|
* offset in the origQuery
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -16,8 +16,6 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.solr.spelling;
|
package org.apache.solr.spelling;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
|
|
||||||
public class ResultEntry {
|
public class ResultEntry {
|
||||||
public Token token;
|
public Token token;
|
||||||
public String suggestion;
|
public String suggestion;
|
||||||
|
|
|
@ -15,8 +15,13 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package org.apache.solr.spelling;
|
package org.apache.solr.spelling;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||||
import org.apache.lucene.search.spell.LevensteinDistance;
|
import org.apache.lucene.search.spell.LevensteinDistance;
|
||||||
import org.apache.lucene.search.spell.StringDistance;
|
import org.apache.lucene.search.spell.StringDistance;
|
||||||
|
@ -31,12 +36,6 @@ import org.apache.solr.schema.FieldType;
|
||||||
import org.apache.solr.schema.IndexSchema;
|
import org.apache.solr.schema.IndexSchema;
|
||||||
import org.apache.solr.search.SolrIndexSearcher;
|
import org.apache.solr.search.SolrIndexSearcher;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>
|
* <p>
|
||||||
|
|
|
@ -15,15 +15,12 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package org.apache.solr.spelling;
|
package org.apache.solr.spelling;
|
||||||
import static org.apache.solr.common.params.CommonParams.ID;
|
|
||||||
|
|
||||||
import java.lang.invoke.MethodHandles;
|
import java.lang.invoke.MethodHandles;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.solr.common.params.CommonParams;
|
import org.apache.solr.common.params.CommonParams;
|
||||||
import org.apache.solr.common.params.CursorMarkParams;
|
import org.apache.solr.common.params.CursorMarkParams;
|
||||||
|
@ -43,6 +40,8 @@ import org.apache.solr.search.SolrIndexSearcher;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import static org.apache.solr.common.params.CommonParams.ID;
|
||||||
|
|
||||||
public class SpellCheckCollator {
|
public class SpellCheckCollator {
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||||
private int maxCollations = 1;
|
private int maxCollations = 1;
|
||||||
|
|
|
@ -15,7 +15,6 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package org.apache.solr.spelling;
|
package org.apache.solr.spelling;
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
|
|
||||||
public class SpellCheckCorrection {
|
public class SpellCheckCorrection {
|
||||||
private Token original;
|
private Token original;
|
||||||
|
|
|
@ -16,13 +16,12 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.solr.spelling;
|
package org.apache.solr.spelling;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
import java.util.Collection;
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.search.spell.SuggestMode;
|
import org.apache.lucene.search.spell.SuggestMode;
|
||||||
import org.apache.solr.common.params.SolrParams;
|
import org.apache.solr.common.params.SolrParams;
|
||||||
|
|
||||||
import java.util.Collection;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
|
|
|
@ -23,7 +23,6 @@ import java.util.Collections;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
|
|
|
@ -15,7 +15,6 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package org.apache.solr.spelling;
|
package org.apache.solr.spelling;
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.LinkedHashMap;
|
import java.util.LinkedHashMap;
|
||||||
|
@ -80,7 +79,7 @@ public class SpellingResult {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Suggestions must be added with the best suggestion first. ORDER is important.
|
* Suggestions must be added with the best suggestion first. ORDER is important.
|
||||||
* @param token The {@link org.apache.lucene.analysis.Token}
|
* @param token The {@link Token}
|
||||||
* @param suggestion The suggestion for the Token
|
* @param suggestion The suggestion for the Token
|
||||||
* @param docFreq The document frequency
|
* @param docFreq The document frequency
|
||||||
*/
|
*/
|
||||||
|
@ -97,7 +96,7 @@ public class SpellingResult {
|
||||||
/**
|
/**
|
||||||
* Gets the suggestions for the given token.
|
* Gets the suggestions for the given token.
|
||||||
*
|
*
|
||||||
* @param token The {@link org.apache.lucene.analysis.Token} to look up
|
* @param token The {@link Token} to look up
|
||||||
* @return A LinkedHashMap of the suggestions. Key is the suggestion, value is the token frequency in the index, else {@link #NO_FREQUENCY_INFO}.
|
* @return A LinkedHashMap of the suggestions. Key is the suggestion, value is the token frequency in the index, else {@link #NO_FREQUENCY_INFO}.
|
||||||
*
|
*
|
||||||
* The suggestions are added in sorted order (i.e. best suggestion first) then the iterator will return the suggestions in order
|
* The suggestions are added in sorted order (i.e. best suggestion first) then the iterator will return the suggestions in order
|
||||||
|
|
|
@ -21,8 +21,6 @@ import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Passes the entire query string to the configured analyzer as-is.
|
* Passes the entire query string to the configured analyzer as-is.
|
||||||
**/
|
**/
|
||||||
|
|
|
@ -0,0 +1,175 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.solr.spelling;
|
||||||
|
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||||
|
import org.apache.lucene.util.AttributeImpl;
|
||||||
|
import org.apache.lucene.util.AttributeReflector;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
|
/**
|
||||||
|
A Token is an occurrence of a term from the text of a field. It consists of
|
||||||
|
a term's text, the start and end offset of the term in the text of the field,
|
||||||
|
and a type string.
|
||||||
|
<p>
|
||||||
|
The start and end offsets permit applications to re-associate a token with
|
||||||
|
its source text, e.g., to display highlighted query terms in a document
|
||||||
|
browser, or to show matching text fragments in a <a href="http://en.wikipedia.org/wiki/Key_Word_in_Context">KWIC</a>
|
||||||
|
display, etc.
|
||||||
|
<p>
|
||||||
|
The type is a string, assigned by a lexical analyzer
|
||||||
|
(a.k.a. tokenizer), naming the lexical or syntactic class that the token
|
||||||
|
belongs to. For example an end of sentence marker token might be implemented
|
||||||
|
with type "eos". The default token type is "word".
|
||||||
|
<p>
|
||||||
|
A Token can optionally have metadata (a.k.a. payload) in the form of a variable
|
||||||
|
length byte array. Use {@link org.apache.lucene.index.PostingsEnum#getPayload()} to retrieve the
|
||||||
|
payloads from the index.
|
||||||
|
|
||||||
|
A few things to note:
|
||||||
|
<ul>
|
||||||
|
<li>clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but should affect no one.</li>
|
||||||
|
<li>Because <code>TokenStreams</code> can be chained, one cannot assume that the <code>Token's</code> current type is correct.</li>
|
||||||
|
<li>The startOffset and endOffset represent the start and offset in the source text, so be careful in adjusting them.</li>
|
||||||
|
<li>When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again.</li>
|
||||||
|
</ul>
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
|
public class Token extends PackedTokenAttributeImpl implements FlagsAttribute, PayloadAttribute {
|
||||||
|
|
||||||
|
// TODO Refactor the spellchecker API to use TokenStreams properly, rather than this hack
|
||||||
|
|
||||||
|
private int flags;
|
||||||
|
private BytesRef payload;
|
||||||
|
|
||||||
|
/** Constructs a Token will null text. */
|
||||||
|
public Token() {
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Constructs a Token with the given term text, start
|
||||||
|
* and end offsets. The type defaults to "word."
|
||||||
|
* <b>NOTE:</b> for better indexing speed you should
|
||||||
|
* instead use the char[] termBuffer methods to set the
|
||||||
|
* term text.
|
||||||
|
* @param text term text
|
||||||
|
* @param start start offset in the source text
|
||||||
|
* @param end end offset in the source text
|
||||||
|
*/
|
||||||
|
public Token(CharSequence text, int start, int end) {
|
||||||
|
append(text);
|
||||||
|
setOffset(start, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
* @see FlagsAttribute
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public int getFlags() {
|
||||||
|
return flags;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
* @see FlagsAttribute
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void setFlags(int flags) {
|
||||||
|
this.flags = flags;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
* @see PayloadAttribute
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public BytesRef getPayload() {
|
||||||
|
return this.payload;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
* @see PayloadAttribute
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void setPayload(BytesRef payload) {
|
||||||
|
this.payload = payload;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Resets the term text, payload, flags, positionIncrement, positionLength,
|
||||||
|
* startOffset, endOffset and token type to default.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void clear() {
|
||||||
|
super.clear();
|
||||||
|
flags = 0;
|
||||||
|
payload = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object obj) {
|
||||||
|
if (obj == this)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (obj instanceof Token) {
|
||||||
|
final Token other = (Token) obj;
|
||||||
|
return (
|
||||||
|
flags == other.flags &&
|
||||||
|
(payload == null ? other.payload == null : payload.equals(other.payload)) &&
|
||||||
|
super.equals(obj)
|
||||||
|
);
|
||||||
|
} else
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
int code = super.hashCode();
|
||||||
|
code = code * 31 + flags;
|
||||||
|
if (payload != null) {
|
||||||
|
code = code * 31 + payload.hashCode();
|
||||||
|
}
|
||||||
|
return code;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Token clone() {
|
||||||
|
final Token t = (Token) super.clone();
|
||||||
|
if (payload != null) {
|
||||||
|
t.payload = BytesRef.deepCopyOf(payload);
|
||||||
|
}
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void copyTo(AttributeImpl target) {
|
||||||
|
super.copyTo(target);
|
||||||
|
((FlagsAttribute) target).setFlags(flags);
|
||||||
|
((PayloadAttribute) target).setPayload((payload == null) ? null : BytesRef.deepCopyOf(payload));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void reflectWith(AttributeReflector reflector) {
|
||||||
|
super.reflectWith(reflector);
|
||||||
|
reflector.reflect(FlagsAttribute.class, "flags", flags);
|
||||||
|
reflector.reflect(PayloadAttribute.class, "payload", payload);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -24,7 +24,6 @@ import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.spell.CombineSuggestion;
|
import org.apache.lucene.search.spell.CombineSuggestion;
|
||||||
|
|
|
@ -28,7 +28,6 @@ import java.nio.charset.StandardCharsets;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.search.spell.Dictionary;
|
import org.apache.lucene.search.spell.Dictionary;
|
||||||
import org.apache.lucene.search.spell.HighFrequencyDictionary;
|
import org.apache.lucene.search.spell.HighFrequencyDictionary;
|
||||||
|
@ -47,6 +46,7 @@ import org.apache.solr.search.SolrIndexSearcher;
|
||||||
import org.apache.solr.spelling.SolrSpellChecker;
|
import org.apache.solr.spelling.SolrSpellChecker;
|
||||||
import org.apache.solr.spelling.SpellingOptions;
|
import org.apache.solr.spelling.SpellingOptions;
|
||||||
import org.apache.solr.spelling.SpellingResult;
|
import org.apache.solr.spelling.SpellingResult;
|
||||||
|
import org.apache.solr.spelling.Token;
|
||||||
import org.apache.solr.spelling.suggest.fst.FSTLookupFactory;
|
import org.apache.solr.spelling.suggest.fst.FSTLookupFactory;
|
||||||
import org.apache.solr.spelling.suggest.jaspell.JaspellLookupFactory;
|
import org.apache.solr.spelling.suggest.jaspell.JaspellLookupFactory;
|
||||||
import org.apache.solr.spelling.suggest.tst.TSTLookupFactory;
|
import org.apache.solr.spelling.suggest.tst.TSTLookupFactory;
|
||||||
|
|
|
@ -16,18 +16,18 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.solr.handler.component;
|
package org.apache.solr.handler.component;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.solr.core.SolrCore;
|
|
||||||
import org.apache.solr.search.SolrIndexSearcher;
|
|
||||||
import org.apache.solr.spelling.SolrSpellChecker;
|
|
||||||
import org.apache.solr.spelling.SpellingOptions;
|
|
||||||
import org.apache.solr.spelling.SpellingResult;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.solr.core.SolrCore;
|
||||||
|
import org.apache.solr.search.SolrIndexSearcher;
|
||||||
|
import org.apache.solr.spelling.SolrSpellChecker;
|
||||||
|
import org.apache.solr.spelling.SpellingOptions;
|
||||||
|
import org.apache.solr.spelling.SpellingResult;
|
||||||
|
import org.apache.solr.spelling.Token;
|
||||||
/**
|
/**
|
||||||
* A Dummy SpellChecker for testing purposes
|
* A Dummy SpellChecker for testing purposes
|
||||||
*
|
*
|
||||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.solr.spelling;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.util.LuceneTestCase.SuppressTempFileChecks;
|
import org.apache.lucene.util.LuceneTestCase.SuppressTempFileChecks;
|
||||||
import org.apache.solr.SolrTestCaseJ4;
|
import org.apache.solr.SolrTestCaseJ4;
|
||||||
import org.apache.solr.common.params.SpellingParams;
|
import org.apache.solr.common.params.SpellingParams;
|
||||||
|
|
|
@ -20,7 +20,6 @@ import java.io.File;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.LuceneTestCase.SuppressTempFileChecks;
|
import org.apache.lucene.util.LuceneTestCase.SuppressTempFileChecks;
|
||||||
import org.apache.solr.SolrTestCaseJ4;
|
import org.apache.solr.SolrTestCaseJ4;
|
||||||
|
|
|
@ -22,7 +22,6 @@ import java.util.Comparator;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
|
|
|
@ -16,7 +16,10 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.solr.spelling;
|
package org.apache.solr.spelling;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
import java.io.IOException;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.HashSet;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
@ -25,9 +28,6 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -20,9 +20,7 @@ import java.util.LinkedHashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.solr.SolrTestCaseJ4;
|
import org.apache.solr.SolrTestCaseJ4;
|
||||||
import org.apache.solr.spelling.PossibilityIterator;
|
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
|
|
@ -16,16 +16,15 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.solr.spelling;
|
package org.apache.solr.spelling;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test for SpellingQueryConverter
|
* Test for SpellingQueryConverter
|
||||||
|
|
|
@ -22,11 +22,9 @@ import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
import org.apache.lucene.analysis.CannedTokenStream;
|
|
||||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||||
|
@ -65,7 +63,11 @@ public class TestSuggestSpellingConverter extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
public void assertConvertsTo(String text, String expected[]) throws IOException {
|
public void assertConvertsTo(String text, String expected[]) throws IOException {
|
||||||
Collection<Token> tokens = converter.convert(text);
|
Collection<Token> tokens = converter.convert(text);
|
||||||
TokenStream ts = new CannedTokenStream(tokens.toArray(new Token[0]));
|
assertEquals(tokens.size(), expected.length);
|
||||||
assertTokenStreamContents(ts, expected);
|
int i = 0;
|
||||||
|
for (Token token : tokens) {
|
||||||
|
assertEquals(token.toString(), expected[i]);
|
||||||
|
i++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,7 +21,6 @@ import java.util.LinkedHashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.util.LuceneTestCase.SuppressTempFileChecks;
|
import org.apache.lucene.util.LuceneTestCase.SuppressTempFileChecks;
|
||||||
import org.apache.solr.SolrTestCaseJ4;
|
import org.apache.solr.SolrTestCaseJ4;
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
|
|
Loading…
Reference in New Issue