LUCENE-7867: Remove deprecated Token class

This commit is contained in:
Alan Woodward 2017-06-08 14:43:43 +01:00
parent 69d0c70651
commit af1ee47f2b
37 changed files with 236 additions and 120 deletions

View File

@ -86,6 +86,9 @@ API Changes
* LUCENE-7877: PrefixAwareTokenStream is replaced with ConcatenatingTokenStream * LUCENE-7877: PrefixAwareTokenStream is replaced with ConcatenatingTokenStream
(Alan Woodward, Uwe Schindler, Adrien Grand) (Alan Woodward, Uwe Schindler, Adrien Grand)
* LUCENE-7867: The deprecated Token class is now only available in the test
framework (Alan Woodward, Adrien Grand)
Bug Fixes Bug Fixes
* LUCENE-7626: IndexWriter will no longer accept broken token offsets * LUCENE-7626: IndexWriter will no longer accept broken token offsets

View File

@ -17,18 +17,17 @@
package org.apache.lucene.analysis.payloads; package org.apache.lucene.analysis.payloads;
import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import java.io.IOException;
/** /**
* Assigns a payload to a token based on the {@link org.apache.lucene.analysis.Token#type()} * Assigns a payload to a token based on the {@link org.apache.lucene.analysis.tokenattributes.TypeAttribute}
*
**/ **/
public class NumericPayloadTokenFilter extends TokenFilter { public class NumericPayloadTokenFilter extends TokenFilter {

View File

@ -17,17 +17,17 @@
package org.apache.lucene.analysis.payloads; package org.apache.lucene.analysis.payloads;
import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import java.io.IOException;
/** /**
* Makes the {@link org.apache.lucene.analysis.Token#type()} a payload. * Makes the {@link TypeAttribute} a payload.
* *
* Encodes the type using {@link String#getBytes(String)} with "UTF-8" as the encoding * Encodes the type using {@link String#getBytes(String)} with "UTF-8" as the encoding
* *

View File

@ -40,12 +40,7 @@ import org.apache.lucene.util.AttributeSource;
* <li>{@link TokenFilter}, a <code>TokenStream</code> whose input is another * <li>{@link TokenFilter}, a <code>TokenStream</code> whose input is another
* <code>TokenStream</code>. * <code>TokenStream</code>.
* </ul> * </ul>
* A new <code>TokenStream</code> API has been introduced with Lucene 2.9. This API * <code>TokenStream</code> extends {@link AttributeSource}, which provides
* has moved from being {@link Token}-based to {@link Attribute}-based. While
* {@link Token} still exists in 2.9 as a convenience class, the preferred way
* to store the information of a {@link Token} is to use {@link AttributeImpl}s.
* <p>
* <code>TokenStream</code> now extends {@link AttributeSource}, which provides
* access to all of the token {@link Attribute}s for the <code>TokenStream</code>. * access to all of the token {@link Attribute}s for the <code>TokenStream</code>.
* Note that only one instance per {@link AttributeImpl} is created and reused * Note that only one instance per {@link AttributeImpl} is created and reused
* for every token. This approach reduces object creation and allows local * for every token. This approach reduces object creation and allows local

View File

@ -253,11 +253,6 @@ public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttr
/** /**
* Returns solely the term text as specified by the * Returns solely the term text as specified by the
* {@link CharSequence} interface. * {@link CharSequence} interface.
* <p>This method changed the behavior with Lucene 3.1,
* before it returned a String representation of the whole
* term with all attributes.
* This affects especially the
* {@link org.apache.lucene.analysis.Token} subclass.
*/ */
@Override @Override
public String toString() { public String toString() {

View File

@ -16,7 +16,6 @@
*/ */
package org.apache.lucene.search.highlight; package org.apache.lucene.search.highlight;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
@ -29,7 +28,6 @@ public class TokenGroup {
private static final int MAX_NUM_TOKENS_PER_GROUP = 50; private static final int MAX_NUM_TOKENS_PER_GROUP = 50;
private Token[] tokens = new Token[MAX_NUM_TOKENS_PER_GROUP];
private float[] scores = new float[MAX_NUM_TOKENS_PER_GROUP]; private float[] scores = new float[MAX_NUM_TOKENS_PER_GROUP];
private int numTokens = 0; private int numTokens = 0;
private int startOffset = 0; private int startOffset = 0;
@ -68,10 +66,7 @@ public class TokenGroup {
tot += score; tot += score;
} }
} }
Token token = new Token();
token.setOffset(termStartOffset, termEndOffset);
token.setEmpty().append(termAtt);
tokens[numTokens] = token;
scores[numTokens] = score; scores[numTokens] = score;
numTokens++; numTokens++;
} }
@ -86,14 +81,6 @@ public class TokenGroup {
tot = 0; tot = 0;
} }
/**
* @param index a value between 0 and numTokens -1
* @return the "n"th token
*/
public Token getToken(int index) {
return tokens[index];
}
/** /**
* *
* @param index a value between 0 and numTokens -1 * @param index a value between 0 and numTokens -1

View File

@ -21,7 +21,7 @@ import java.io.*;
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that /** An efficient implementation of JavaCC's CharStream interface. <p>Note that
* this does not do line-number counting, but instead keeps track of the * this does not do line-number counting, but instead keeps track of the
* character position of the token in the input, as required by Lucene's {@link * character position of the token in the input, as required by Lucene's {@link
* org.apache.lucene.analysis.Token} API. * org.apache.lucene.analysis.tokenattributes.OffsetAttribute} API.
* */ * */
public final class FastCharStream implements CharStream { public final class FastCharStream implements CharStream {
char[] buffer = null; char[] buffer = null;

View File

@ -21,7 +21,7 @@ import java.io.*;
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that /** An efficient implementation of JavaCC's CharStream interface. <p>Note that
* this does not do line-number counting, but instead keeps track of the * this does not do line-number counting, but instead keeps track of the
* character position of the token in the input, as required by Lucene's {@link * character position of the token in the input, as required by Lucene's {@link
* org.apache.lucene.analysis.Token} API. * org.apache.lucene.analysis.tokenattributes.OffsetAttribute} API.
* */ * */
public final class FastCharStream implements CharStream { public final class FastCharStream implements CharStream {
char[] buffer = null; char[] buffer = null;

View File

@ -21,7 +21,7 @@ import java.io.*;
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that /** An efficient implementation of JavaCC's CharStream interface. <p>Note that
* this does not do line-number counting, but instead keeps track of the * this does not do line-number counting, but instead keeps track of the
* character position of the token in the input, as required by Lucene's {@link * character position of the token in the input, as required by Lucene's {@link
* org.apache.lucene.analysis.Token} API. */ * org.apache.lucene.analysis.tokenattributes.OffsetAttribute} API. */
public final class FastCharStream implements CharStream { public final class FastCharStream implements CharStream {
char[] buffer = null; char[] buffer = null;

View File

@ -20,7 +20,6 @@ package org.apache.lucene.analysis;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl; import org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeFactory; import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector; import org.apache.lucene.util.AttributeReflector;
@ -44,14 +43,6 @@ import org.apache.lucene.util.BytesRef;
A Token can optionally have metadata (a.k.a. payload) in the form of a variable A Token can optionally have metadata (a.k.a. payload) in the form of a variable
length byte array. Use {@link org.apache.lucene.index.PostingsEnum#getPayload()} to retrieve the length byte array. Use {@link org.apache.lucene.index.PostingsEnum#getPayload()} to retrieve the
payloads from the index. payloads from the index.
<br><br>
<p><b>NOTE:</b> As of 2.9, Token implements all {@link Attribute} interfaces
that are part of core Lucene and can be found in the {@code tokenattributes} subpackage.
Even though it is not necessary to use Token anymore, with the new TokenStream API it can
be used as convenience class that implements all {@link Attribute}s, which is especially useful
to easily switch from the old to the new TokenStream API.
A few things to note: A few things to note:
<ul> <ul>
@ -60,13 +51,7 @@ import org.apache.lucene.util.BytesRef;
<li>The startOffset and endOffset represent the start and offset in the source text, so be careful in adjusting them.</li> <li>The startOffset and endOffset represent the start and offset in the source text, so be careful in adjusting them.</li>
<li>When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again.</li> <li>When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again.</li>
</ul> </ul>
<p>
<b>Please note:</b> With Lucene 3.1, the <code>{@linkplain #toString toString()}</code> method had to be changed to match the
{@link CharSequence} interface introduced by the interface {@link org.apache.lucene.analysis.tokenattributes.CharTermAttribute}.
This method now only prints the term text, no additional information anymore.
@deprecated This class is outdated and no longer used since Lucene 2.9. Nuke it finally!
*/ */
@Deprecated
public class Token extends PackedTokenAttributeImpl implements FlagsAttribute, PayloadAttribute { public class Token extends PackedTokenAttributeImpl implements FlagsAttribute, PayloadAttribute {
private int flags; private int flags;
@ -166,7 +151,7 @@ public class Token extends PackedTokenAttributeImpl implements FlagsAttribute, P
public Token clone() { public Token clone() {
final Token t = (Token) super.clone(); final Token t = (Token) super.clone();
if (payload != null) { if (payload != null) {
t.payload = payload.clone(); t.payload = BytesRef.deepCopyOf(payload);
} }
return t; return t;
} }
@ -190,7 +175,7 @@ public class Token extends PackedTokenAttributeImpl implements FlagsAttribute, P
public void copyTo(AttributeImpl target) { public void copyTo(AttributeImpl target) {
super.copyTo(target); super.copyTo(target);
((FlagsAttribute) target).setFlags(flags); ((FlagsAttribute) target).setFlags(flags);
((PayloadAttribute) target).setPayload((payload == null) ? null : payload.clone()); ((PayloadAttribute) target).setPayload((payload == null) ? null : BytesRef.deepCopyOf(payload));
} }
@Override @Override

View File

@ -31,7 +31,6 @@ import java.util.Map;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -60,8 +59,8 @@ import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.IndexSchema;
import org.apache.solr.search.DocSet; import org.apache.solr.search.DocSet;
import org.apache.solr.search.QParser; import org.apache.solr.search.QParser;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.spelling.AbstractLuceneSpellChecker; import org.apache.solr.spelling.AbstractLuceneSpellChecker;
import org.apache.solr.spelling.ConjunctionSolrSpellChecker; import org.apache.solr.spelling.ConjunctionSolrSpellChecker;
import org.apache.solr.spelling.IndexBasedSpellChecker; import org.apache.solr.spelling.IndexBasedSpellChecker;
@ -72,6 +71,7 @@ import org.apache.solr.spelling.SpellCheckCollator;
import org.apache.solr.spelling.SpellingOptions; import org.apache.solr.spelling.SpellingOptions;
import org.apache.solr.spelling.SpellingQueryConverter; import org.apache.solr.spelling.SpellingQueryConverter;
import org.apache.solr.spelling.SpellingResult; import org.apache.solr.spelling.SpellingResult;
import org.apache.solr.spelling.Token;
import org.apache.solr.util.plugin.SolrCoreAware; import org.apache.solr.util.plugin.SolrCoreAware;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;

View File

@ -21,7 +21,7 @@ import java.io.*;
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that /** An efficient implementation of JavaCC's CharStream interface. <p>Note that
* this does not do line-number counting, but instead keeps track of the * this does not do line-number counting, but instead keeps track of the
* character position of the token in the input, as required by Lucene's {@link * character position of the token in the input, as required by Lucene's {@link
* org.apache.lucene.analysis.Token} API. * org.apache.lucene.analysis.tokenattributes.OffsetAttribute} API.
* */ * */
public final class FastCharStream implements CharStream { public final class FastCharStream implements CharStream {
char[] buffer = null; char[] buffer = null;

View File

@ -16,7 +16,6 @@
*/ */
package org.apache.solr.spelling; package org.apache.solr.spelling;
import org.apache.lucene.search.spell.StringDistance;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
@ -24,19 +23,18 @@ import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
import java.util.List; import java.util.List;
import org.apache.lucene.search.spell.SuggestWord;
import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
import org.apache.lucene.search.spell.SuggestWordQueue;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.spell.Dictionary; import org.apache.lucene.search.spell.Dictionary;
import org.apache.lucene.search.spell.LevensteinDistance; import org.apache.lucene.search.spell.LevensteinDistance;
import org.apache.lucene.search.spell.SpellChecker; import org.apache.lucene.search.spell.SpellChecker;
import org.apache.lucene.search.spell.StringDistance;
import org.apache.lucene.search.spell.SuggestWord;
import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
import org.apache.lucene.search.spell.SuggestWordQueue;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FilterDirectory;
import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.FilterDirectory;
import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.RAMDirectory;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;

View File

@ -26,7 +26,6 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.search.spell.StringDistance; import org.apache.lucene.search.spell.StringDistance;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;

View File

@ -22,7 +22,6 @@ import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
import java.util.List; import java.util.List;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.spell.DirectSpellChecker; import org.apache.lucene.search.spell.DirectSpellChecker;
import org.apache.lucene.search.spell.StringDistance; import org.apache.lucene.search.spell.StringDistance;

View File

@ -29,8 +29,6 @@ import java.util.NoSuchElementException;
import java.util.PriorityQueue; import java.util.PriorityQueue;
import java.util.Set; import java.util.Set;
import org.apache.lucene.analysis.Token;
/** /**
* <p> * <p>
* Given a list of possible Spelling Corrections for multiple mis-spelled words * Given a list of possible Spelling Corrections for multiple mis-spelled words

View File

@ -15,13 +15,12 @@
* limitations under the License. * limitations under the License.
*/ */
package org.apache.solr.spelling; package org.apache.solr.spelling;
import java.util.Collection;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.util.plugin.NamedListInitializedPlugin; import org.apache.solr.util.plugin.NamedListInitializedPlugin;
import java.util.Collection;
/** /**
* <p> * <p>
* The QueryConverter is an abstract base class defining a method for converting * The QueryConverter is an abstract base class defining a method for converting
@ -81,7 +80,7 @@ public abstract class QueryConverter implements NamedListInitializedPlugin {
} }
/** /**
* Returns the Collection of {@link org.apache.lucene.analysis.Token}s for * Returns the Collection of {@link Token}s for
* the query. Offsets on the Token should correspond to the correct * the query. Offsets on the Token should correspond to the correct
* offset in the origQuery * offset in the origQuery
*/ */

View File

@ -16,8 +16,6 @@
*/ */
package org.apache.solr.spelling; package org.apache.solr.spelling;
import org.apache.lucene.analysis.Token;
public class ResultEntry { public class ResultEntry {
public Token token; public Token token;
public String suggestion; public String suggestion;

View File

@ -15,8 +15,13 @@
* limitations under the License. * limitations under the License.
*/ */
package org.apache.solr.spelling; package org.apache.solr.spelling;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.search.spell.LevensteinDistance; import org.apache.lucene.search.spell.LevensteinDistance;
import org.apache.lucene.search.spell.StringDistance; import org.apache.lucene.search.spell.StringDistance;
@ -31,12 +36,6 @@ import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.IndexSchema;
import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrIndexSearcher;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
/** /**
* <p> * <p>

View File

@ -15,15 +15,12 @@
* limitations under the License. * limitations under the License.
*/ */
package org.apache.solr.spelling; package org.apache.solr.spelling;
import static org.apache.solr.common.params.CommonParams.ID;
import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.CursorMarkParams; import org.apache.solr.common.params.CursorMarkParams;
@ -43,6 +40,8 @@ import org.apache.solr.search.SolrIndexSearcher;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import static org.apache.solr.common.params.CommonParams.ID;
public class SpellCheckCollator { public class SpellCheckCollator {
private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private int maxCollations = 1; private int maxCollations = 1;

View File

@ -15,7 +15,6 @@
* limitations under the License. * limitations under the License.
*/ */
package org.apache.solr.spelling; package org.apache.solr.spelling;
import org.apache.lucene.analysis.Token;
public class SpellCheckCorrection { public class SpellCheckCorrection {
private Token original; private Token original;

View File

@ -16,13 +16,12 @@
*/ */
package org.apache.solr.spelling; package org.apache.solr.spelling;
import org.apache.lucene.analysis.Token; import java.util.Collection;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.spell.SuggestMode; import org.apache.lucene.search.spell.SuggestMode;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
import java.util.Collection;
/** /**
* *
* *

View File

@ -23,7 +23,6 @@ import java.util.Collections;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;

View File

@ -15,7 +15,6 @@
* limitations under the License. * limitations under the License.
*/ */
package org.apache.solr.spelling; package org.apache.solr.spelling;
import org.apache.lucene.analysis.Token;
import java.util.Collection; import java.util.Collection;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
@ -80,7 +79,7 @@ public class SpellingResult {
/** /**
* Suggestions must be added with the best suggestion first. ORDER is important. * Suggestions must be added with the best suggestion first. ORDER is important.
* @param token The {@link org.apache.lucene.analysis.Token} * @param token The {@link Token}
* @param suggestion The suggestion for the Token * @param suggestion The suggestion for the Token
* @param docFreq The document frequency * @param docFreq The document frequency
*/ */
@ -97,7 +96,7 @@ public class SpellingResult {
/** /**
* Gets the suggestions for the given token. * Gets the suggestions for the given token.
* *
* @param token The {@link org.apache.lucene.analysis.Token} to look up * @param token The {@link Token} to look up
* @return A LinkedHashMap of the suggestions. Key is the suggestion, value is the token frequency in the index, else {@link #NO_FREQUENCY_INFO}. * @return A LinkedHashMap of the suggestions. Key is the suggestion, value is the token frequency in the index, else {@link #NO_FREQUENCY_INFO}.
* *
* The suggestions are added in sorted order (i.e. best suggestion first) then the iterator will return the suggestions in order * The suggestions are added in sorted order (i.e. best suggestion first) then the iterator will return the suggestions in order

View File

@ -21,8 +21,6 @@ import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import org.apache.lucene.analysis.Token;
/** /**
* Passes the entire query string to the configured analyzer as-is. * Passes the entire query string to the configured analyzer as-is.
**/ **/

View File

@ -0,0 +1,175 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.spelling;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector;
import org.apache.lucene.util.BytesRef;
/**
A Token is an occurrence of a term from the text of a field. It consists of
a term's text, the start and end offset of the term in the text of the field,
and a type string.
<p>
The start and end offsets permit applications to re-associate a token with
its source text, e.g., to display highlighted query terms in a document
browser, or to show matching text fragments in a <a href="http://en.wikipedia.org/wiki/Key_Word_in_Context">KWIC</a>
display, etc.
<p>
The type is a string, assigned by a lexical analyzer
(a.k.a. tokenizer), naming the lexical or syntactic class that the token
belongs to. For example an end of sentence marker token might be implemented
with type "eos". The default token type is "word".
<p>
A Token can optionally have metadata (a.k.a. payload) in the form of a variable
length byte array. Use {@link org.apache.lucene.index.PostingsEnum#getPayload()} to retrieve the
payloads from the index.
A few things to note:
<ul>
<li>clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but should affect no one.</li>
<li>Because <code>TokenStreams</code> can be chained, one cannot assume that the <code>Token's</code> current type is correct.</li>
<li>The startOffset and endOffset represent the start and offset in the source text, so be careful in adjusting them.</li>
<li>When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again.</li>
</ul>
*/
@Deprecated
public class Token extends PackedTokenAttributeImpl implements FlagsAttribute, PayloadAttribute {
// TODO Refactor the spellchecker API to use TokenStreams properly, rather than this hack
private int flags;
private BytesRef payload;
/** Constructs a Token will null text. */
public Token() {
}
/** Constructs a Token with the given term text, start
* and end offsets. The type defaults to "word."
* <b>NOTE:</b> for better indexing speed you should
* instead use the char[] termBuffer methods to set the
* term text.
* @param text term text
* @param start start offset in the source text
* @param end end offset in the source text
*/
public Token(CharSequence text, int start, int end) {
append(text);
setOffset(start, end);
}
/**
* {@inheritDoc}
* @see FlagsAttribute
*/
@Override
public int getFlags() {
return flags;
}
/**
* {@inheritDoc}
* @see FlagsAttribute
*/
@Override
public void setFlags(int flags) {
this.flags = flags;
}
/**
* {@inheritDoc}
* @see PayloadAttribute
*/
@Override
public BytesRef getPayload() {
return this.payload;
}
/**
* {@inheritDoc}
* @see PayloadAttribute
*/
@Override
public void setPayload(BytesRef payload) {
this.payload = payload;
}
/** Resets the term text, payload, flags, positionIncrement, positionLength,
* startOffset, endOffset and token type to default.
*/
@Override
public void clear() {
super.clear();
flags = 0;
payload = null;
}
@Override
public boolean equals(Object obj) {
if (obj == this)
return true;
if (obj instanceof Token) {
final Token other = (Token) obj;
return (
flags == other.flags &&
(payload == null ? other.payload == null : payload.equals(other.payload)) &&
super.equals(obj)
);
} else
return false;
}
@Override
public int hashCode() {
int code = super.hashCode();
code = code * 31 + flags;
if (payload != null) {
code = code * 31 + payload.hashCode();
}
return code;
}
@Override
public Token clone() {
final Token t = (Token) super.clone();
if (payload != null) {
t.payload = BytesRef.deepCopyOf(payload);
}
return t;
}
@Override
public void copyTo(AttributeImpl target) {
super.copyTo(target);
((FlagsAttribute) target).setFlags(flags);
((PayloadAttribute) target).setPayload((payload == null) ? null : BytesRef.deepCopyOf(payload));
}
@Override
public void reflectWith(AttributeReflector reflector) {
super.reflectWith(reflector);
reflector.reflect(FlagsAttribute.class, "flags", flags);
reflector.reflect(PayloadAttribute.class, "payload", payload);
}
}

View File

@ -24,7 +24,6 @@ import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.spell.CombineSuggestion; import org.apache.lucene.search.spell.CombineSuggestion;

View File

@ -28,7 +28,6 @@ import java.nio.charset.StandardCharsets;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.spell.Dictionary; import org.apache.lucene.search.spell.Dictionary;
import org.apache.lucene.search.spell.HighFrequencyDictionary; import org.apache.lucene.search.spell.HighFrequencyDictionary;
@ -47,6 +46,7 @@ import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.spelling.SolrSpellChecker; import org.apache.solr.spelling.SolrSpellChecker;
import org.apache.solr.spelling.SpellingOptions; import org.apache.solr.spelling.SpellingOptions;
import org.apache.solr.spelling.SpellingResult; import org.apache.solr.spelling.SpellingResult;
import org.apache.solr.spelling.Token;
import org.apache.solr.spelling.suggest.fst.FSTLookupFactory; import org.apache.solr.spelling.suggest.fst.FSTLookupFactory;
import org.apache.solr.spelling.suggest.jaspell.JaspellLookupFactory; import org.apache.solr.spelling.suggest.jaspell.JaspellLookupFactory;
import org.apache.solr.spelling.suggest.tst.TSTLookupFactory; import org.apache.solr.spelling.suggest.tst.TSTLookupFactory;

View File

@ -16,18 +16,18 @@
*/ */
package org.apache.solr.handler.component; package org.apache.solr.handler.component;
import org.apache.lucene.analysis.Token;
import org.apache.solr.core.SolrCore;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.spelling.SolrSpellChecker;
import org.apache.solr.spelling.SpellingOptions;
import org.apache.solr.spelling.SpellingResult;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import org.apache.solr.core.SolrCore;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.spelling.SolrSpellChecker;
import org.apache.solr.spelling.SpellingOptions;
import org.apache.solr.spelling.SpellingResult;
import org.apache.solr.spelling.Token;
/** /**
* A Dummy SpellChecker for testing purposes * A Dummy SpellChecker for testing purposes
* *

View File

@ -19,7 +19,6 @@ package org.apache.solr.spelling;
import java.util.Collection; import java.util.Collection;
import java.util.Map; import java.util.Map;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.util.LuceneTestCase.SuppressTempFileChecks; import org.apache.lucene.util.LuceneTestCase.SuppressTempFileChecks;
import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.params.SpellingParams; import org.apache.solr.common.params.SpellingParams;

View File

@ -20,7 +20,6 @@ import java.io.File;
import java.util.Collection; import java.util.Collection;
import java.util.Map; import java.util.Map;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.SuppressTempFileChecks; import org.apache.lucene.util.LuceneTestCase.SuppressTempFileChecks;
import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.SolrTestCaseJ4;

View File

@ -22,7 +22,6 @@ import java.util.Comparator;
import java.util.Date; import java.util.Date;
import java.util.Map; import java.util.Map;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;

View File

@ -16,7 +16,10 @@
*/ */
package org.apache.solr.spelling; package org.apache.solr.spelling;
import org.apache.lucene.analysis.Token; import java.io.IOException;
import java.util.Collection;
import java.util.HashSet;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -25,9 +28,6 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import java.util.Collection;
import java.util.HashSet;
import java.io.IOException;
/** /**

View File

@ -20,9 +20,7 @@ import java.util.LinkedHashMap;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import org.apache.lucene.analysis.Token;
import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.spelling.PossibilityIterator;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;

View File

@ -16,16 +16,15 @@
*/ */
package org.apache.solr.spelling; package org.apache.solr.spelling;
import org.apache.lucene.analysis.Token; import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.junit.Test; import org.junit.Test;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
/** /**
* Test for SpellingQueryConverter * Test for SpellingQueryConverter

View File

@ -22,11 +22,9 @@ import java.util.regex.Pattern;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CannedTokenStream;
import org.apache.lucene.analysis.LowerCaseFilter; import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer;
@ -65,7 +63,11 @@ public class TestSuggestSpellingConverter extends BaseTokenStreamTestCase {
public void assertConvertsTo(String text, String expected[]) throws IOException { public void assertConvertsTo(String text, String expected[]) throws IOException {
Collection<Token> tokens = converter.convert(text); Collection<Token> tokens = converter.convert(text);
TokenStream ts = new CannedTokenStream(tokens.toArray(new Token[0])); assertEquals(tokens.size(), expected.length);
assertTokenStreamContents(ts, expected); int i = 0;
for (Token token : tokens) {
assertEquals(token.toString(), expected[i]);
i++;
}
} }
} }

View File

@ -21,7 +21,6 @@ import java.util.LinkedHashMap;
import java.util.Map; import java.util.Map;
import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.util.LuceneTestCase.SuppressTempFileChecks; import org.apache.lucene.util.LuceneTestCase.SuppressTempFileChecks;
import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;