mirror of https://github.com/apache/lucene.git
LUCENE-7867: Remove deprecated Token class
This commit is contained in:
parent
69d0c70651
commit
af1ee47f2b
|
@ -86,6 +86,9 @@ API Changes
|
|||
* LUCENE-7877: PrefixAwareTokenStream is replaced with ConcatenatingTokenStream
|
||||
(Alan Woodward, Uwe Schindler, Adrien Grand)
|
||||
|
||||
* LUCENE-7867: The deprecated Token class is now only available in the test
|
||||
framework (Alan Woodward, Adrien Grand)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-7626: IndexWriter will no longer accept broken token offsets
|
||||
|
|
|
@ -17,18 +17,17 @@
|
|||
package org.apache.lucene.analysis.payloads;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
||||
/**
|
||||
* Assigns a payload to a token based on the {@link org.apache.lucene.analysis.Token#type()}
|
||||
*
|
||||
* Assigns a payload to a token based on the {@link org.apache.lucene.analysis.tokenattributes.TypeAttribute}
|
||||
**/
|
||||
public class NumericPayloadTokenFilter extends TokenFilter {
|
||||
|
||||
|
|
|
@ -17,17 +17,17 @@
|
|||
package org.apache.lucene.analysis.payloads;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
||||
/**
|
||||
* Makes the {@link org.apache.lucene.analysis.Token#type()} a payload.
|
||||
* Makes the {@link TypeAttribute} a payload.
|
||||
*
|
||||
* Encodes the type using {@link String#getBytes(String)} with "UTF-8" as the encoding
|
||||
*
|
||||
|
|
|
@ -40,12 +40,7 @@ import org.apache.lucene.util.AttributeSource;
|
|||
* <li>{@link TokenFilter}, a <code>TokenStream</code> whose input is another
|
||||
* <code>TokenStream</code>.
|
||||
* </ul>
|
||||
* A new <code>TokenStream</code> API has been introduced with Lucene 2.9. This API
|
||||
* has moved from being {@link Token}-based to {@link Attribute}-based. While
|
||||
* {@link Token} still exists in 2.9 as a convenience class, the preferred way
|
||||
* to store the information of a {@link Token} is to use {@link AttributeImpl}s.
|
||||
* <p>
|
||||
* <code>TokenStream</code> now extends {@link AttributeSource}, which provides
|
||||
* <code>TokenStream</code> extends {@link AttributeSource}, which provides
|
||||
* access to all of the token {@link Attribute}s for the <code>TokenStream</code>.
|
||||
* Note that only one instance per {@link AttributeImpl} is created and reused
|
||||
* for every token. This approach reduces object creation and allows local
|
||||
|
|
|
@ -253,11 +253,6 @@ public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttr
|
|||
/**
|
||||
* Returns solely the term text as specified by the
|
||||
* {@link CharSequence} interface.
|
||||
* <p>This method changed the behavior with Lucene 3.1,
|
||||
* before it returned a String representation of the whole
|
||||
* term with all attributes.
|
||||
* This affects especially the
|
||||
* {@link org.apache.lucene.analysis.Token} subclass.
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
*/
|
||||
package org.apache.lucene.search.highlight;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
|
@ -29,7 +28,6 @@ public class TokenGroup {
|
|||
|
||||
private static final int MAX_NUM_TOKENS_PER_GROUP = 50;
|
||||
|
||||
private Token[] tokens = new Token[MAX_NUM_TOKENS_PER_GROUP];
|
||||
private float[] scores = new float[MAX_NUM_TOKENS_PER_GROUP];
|
||||
private int numTokens = 0;
|
||||
private int startOffset = 0;
|
||||
|
@ -68,10 +66,7 @@ public class TokenGroup {
|
|||
tot += score;
|
||||
}
|
||||
}
|
||||
Token token = new Token();
|
||||
token.setOffset(termStartOffset, termEndOffset);
|
||||
token.setEmpty().append(termAtt);
|
||||
tokens[numTokens] = token;
|
||||
|
||||
scores[numTokens] = score;
|
||||
numTokens++;
|
||||
}
|
||||
|
@ -86,14 +81,6 @@ public class TokenGroup {
|
|||
tot = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param index a value between 0 and numTokens -1
|
||||
* @return the "n"th token
|
||||
*/
|
||||
public Token getToken(int index) {
|
||||
return tokens[index];
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param index a value between 0 and numTokens -1
|
||||
|
|
|
@ -21,7 +21,7 @@ import java.io.*;
|
|||
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that
|
||||
* this does not do line-number counting, but instead keeps track of the
|
||||
* character position of the token in the input, as required by Lucene's {@link
|
||||
* org.apache.lucene.analysis.Token} API.
|
||||
* org.apache.lucene.analysis.tokenattributes.OffsetAttribute} API.
|
||||
* */
|
||||
public final class FastCharStream implements CharStream {
|
||||
char[] buffer = null;
|
||||
|
|
|
@ -21,7 +21,7 @@ import java.io.*;
|
|||
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that
|
||||
* this does not do line-number counting, but instead keeps track of the
|
||||
* character position of the token in the input, as required by Lucene's {@link
|
||||
* org.apache.lucene.analysis.Token} API.
|
||||
* org.apache.lucene.analysis.tokenattributes.OffsetAttribute} API.
|
||||
* */
|
||||
public final class FastCharStream implements CharStream {
|
||||
char[] buffer = null;
|
||||
|
|
|
@ -21,7 +21,7 @@ import java.io.*;
|
|||
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that
|
||||
* this does not do line-number counting, but instead keeps track of the
|
||||
* character position of the token in the input, as required by Lucene's {@link
|
||||
* org.apache.lucene.analysis.Token} API. */
|
||||
* org.apache.lucene.analysis.tokenattributes.OffsetAttribute} API. */
|
||||
public final class FastCharStream implements CharStream {
|
||||
char[] buffer = null;
|
||||
|
||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.analysis;
|
|||
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.util.Attribute;
|
||||
import org.apache.lucene.util.AttributeFactory;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.AttributeReflector;
|
||||
|
@ -45,14 +44,6 @@ import org.apache.lucene.util.BytesRef;
|
|||
length byte array. Use {@link org.apache.lucene.index.PostingsEnum#getPayload()} to retrieve the
|
||||
payloads from the index.
|
||||
|
||||
<br><br>
|
||||
|
||||
<p><b>NOTE:</b> As of 2.9, Token implements all {@link Attribute} interfaces
|
||||
that are part of core Lucene and can be found in the {@code tokenattributes} subpackage.
|
||||
Even though it is not necessary to use Token anymore, with the new TokenStream API it can
|
||||
be used as convenience class that implements all {@link Attribute}s, which is especially useful
|
||||
to easily switch from the old to the new TokenStream API.
|
||||
|
||||
A few things to note:
|
||||
<ul>
|
||||
<li>clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but should affect no one.</li>
|
||||
|
@ -60,13 +51,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
<li>The startOffset and endOffset represent the start and offset in the source text, so be careful in adjusting them.</li>
|
||||
<li>When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again.</li>
|
||||
</ul>
|
||||
<p>
|
||||
<b>Please note:</b> With Lucene 3.1, the <code>{@linkplain #toString toString()}</code> method had to be changed to match the
|
||||
{@link CharSequence} interface introduced by the interface {@link org.apache.lucene.analysis.tokenattributes.CharTermAttribute}.
|
||||
This method now only prints the term text, no additional information anymore.
|
||||
@deprecated This class is outdated and no longer used since Lucene 2.9. Nuke it finally!
|
||||
*/
|
||||
@Deprecated
|
||||
public class Token extends PackedTokenAttributeImpl implements FlagsAttribute, PayloadAttribute {
|
||||
|
||||
private int flags;
|
||||
|
@ -166,7 +151,7 @@ public class Token extends PackedTokenAttributeImpl implements FlagsAttribute, P
|
|||
public Token clone() {
|
||||
final Token t = (Token) super.clone();
|
||||
if (payload != null) {
|
||||
t.payload = payload.clone();
|
||||
t.payload = BytesRef.deepCopyOf(payload);
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
@ -190,7 +175,7 @@ public class Token extends PackedTokenAttributeImpl implements FlagsAttribute, P
|
|||
public void copyTo(AttributeImpl target) {
|
||||
super.copyTo(target);
|
||||
((FlagsAttribute) target).setFlags(flags);
|
||||
((PayloadAttribute) target).setPayload((payload == null) ? null : payload.clone());
|
||||
((PayloadAttribute) target).setPayload((payload == null) ? null : BytesRef.deepCopyOf(payload));
|
||||
}
|
||||
|
||||
@Override
|
|
@ -31,7 +31,6 @@ import java.util.Map;
|
|||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
@ -60,8 +59,8 @@ import org.apache.solr.schema.FieldType;
|
|||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.search.DocSet;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.SyntaxError;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.search.SyntaxError;
|
||||
import org.apache.solr.spelling.AbstractLuceneSpellChecker;
|
||||
import org.apache.solr.spelling.ConjunctionSolrSpellChecker;
|
||||
import org.apache.solr.spelling.IndexBasedSpellChecker;
|
||||
|
@ -72,6 +71,7 @@ import org.apache.solr.spelling.SpellCheckCollator;
|
|||
import org.apache.solr.spelling.SpellingOptions;
|
||||
import org.apache.solr.spelling.SpellingQueryConverter;
|
||||
import org.apache.solr.spelling.SpellingResult;
|
||||
import org.apache.solr.spelling.Token;
|
||||
import org.apache.solr.util.plugin.SolrCoreAware;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
|
|
@ -21,7 +21,7 @@ import java.io.*;
|
|||
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that
|
||||
* this does not do line-number counting, but instead keeps track of the
|
||||
* character position of the token in the input, as required by Lucene's {@link
|
||||
* org.apache.lucene.analysis.Token} API.
|
||||
* org.apache.lucene.analysis.tokenattributes.OffsetAttribute} API.
|
||||
* */
|
||||
public final class FastCharStream implements CharStream {
|
||||
char[] buffer = null;
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
*/
|
||||
package org.apache.solr.spelling;
|
||||
|
||||
import org.apache.lucene.search.spell.StringDistance;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
@ -24,19 +23,18 @@ import java.util.Collections;
|
|||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.search.spell.SuggestWord;
|
||||
import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
|
||||
import org.apache.lucene.search.spell.SuggestWordQueue;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.spell.Dictionary;
|
||||
import org.apache.lucene.search.spell.LevensteinDistance;
|
||||
import org.apache.lucene.search.spell.SpellChecker;
|
||||
import org.apache.lucene.search.spell.StringDistance;
|
||||
import org.apache.lucene.search.spell.SuggestWord;
|
||||
import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
|
||||
import org.apache.lucene.search.spell.SuggestWordQueue;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FilterDirectory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.store.FilterDirectory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
|
|
|
@ -26,7 +26,6 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.search.spell.StringDistance;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
|
|
|
@ -22,7 +22,6 @@ import java.util.Collections;
|
|||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.spell.DirectSpellChecker;
|
||||
import org.apache.lucene.search.spell.StringDistance;
|
||||
|
|
|
@ -29,8 +29,6 @@ import java.util.NoSuchElementException;
|
|||
import java.util.PriorityQueue;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Given a list of possible Spelling Corrections for multiple mis-spelled words
|
||||
|
|
|
@ -15,13 +15,12 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.spelling;
|
||||
import java.util.Collection;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.util.plugin.NamedListInitializedPlugin;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* The QueryConverter is an abstract base class defining a method for converting
|
||||
|
@ -81,7 +80,7 @@ public abstract class QueryConverter implements NamedListInitializedPlugin {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns the Collection of {@link org.apache.lucene.analysis.Token}s for
|
||||
* Returns the Collection of {@link Token}s for
|
||||
* the query. Offsets on the Token should correspond to the correct
|
||||
* offset in the origQuery
|
||||
*/
|
||||
|
|
|
@ -16,8 +16,6 @@
|
|||
*/
|
||||
package org.apache.solr.spelling;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
|
||||
public class ResultEntry {
|
||||
public Token token;
|
||||
public String suggestion;
|
||||
|
|
|
@ -15,8 +15,13 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.spelling;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.search.spell.LevensteinDistance;
|
||||
import org.apache.lucene.search.spell.StringDistance;
|
||||
|
@ -31,12 +36,6 @@ import org.apache.solr.schema.FieldType;
|
|||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
/**
|
||||
* <p>
|
||||
|
|
|
@ -15,15 +15,12 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.spelling;
|
||||
import static org.apache.solr.common.params.CommonParams.ID;
|
||||
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.CursorMarkParams;
|
||||
|
@ -43,6 +40,8 @@ import org.apache.solr.search.SolrIndexSearcher;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.ID;
|
||||
|
||||
public class SpellCheckCollator {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
private int maxCollations = 1;
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.spelling;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
|
||||
public class SpellCheckCorrection {
|
||||
private Token original;
|
||||
|
|
|
@ -16,13 +16,12 @@
|
|||
*/
|
||||
package org.apache.solr.spelling;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import java.util.Collection;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.spell.SuggestMode;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
|
|
|
@ -23,7 +23,6 @@ import java.util.Collections;
|
|||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.spelling;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.LinkedHashMap;
|
||||
|
@ -80,7 +79,7 @@ public class SpellingResult {
|
|||
|
||||
/**
|
||||
* Suggestions must be added with the best suggestion first. ORDER is important.
|
||||
* @param token The {@link org.apache.lucene.analysis.Token}
|
||||
* @param token The {@link Token}
|
||||
* @param suggestion The suggestion for the Token
|
||||
* @param docFreq The document frequency
|
||||
*/
|
||||
|
@ -97,7 +96,7 @@ public class SpellingResult {
|
|||
/**
|
||||
* Gets the suggestions for the given token.
|
||||
*
|
||||
* @param token The {@link org.apache.lucene.analysis.Token} to look up
|
||||
* @param token The {@link Token} to look up
|
||||
* @return A LinkedHashMap of the suggestions. Key is the suggestion, value is the token frequency in the index, else {@link #NO_FREQUENCY_INFO}.
|
||||
*
|
||||
* The suggestions are added in sorted order (i.e. best suggestion first) then the iterator will return the suggestions in order
|
||||
|
|
|
@ -21,8 +21,6 @@ import java.util.ArrayList;
|
|||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
|
||||
/**
|
||||
* Passes the entire query string to the configured analyzer as-is.
|
||||
**/
|
||||
|
|
|
@ -0,0 +1,175 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.spelling;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.AttributeReflector;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
A Token is an occurrence of a term from the text of a field. It consists of
|
||||
a term's text, the start and end offset of the term in the text of the field,
|
||||
and a type string.
|
||||
<p>
|
||||
The start and end offsets permit applications to re-associate a token with
|
||||
its source text, e.g., to display highlighted query terms in a document
|
||||
browser, or to show matching text fragments in a <a href="http://en.wikipedia.org/wiki/Key_Word_in_Context">KWIC</a>
|
||||
display, etc.
|
||||
<p>
|
||||
The type is a string, assigned by a lexical analyzer
|
||||
(a.k.a. tokenizer), naming the lexical or syntactic class that the token
|
||||
belongs to. For example an end of sentence marker token might be implemented
|
||||
with type "eos". The default token type is "word".
|
||||
<p>
|
||||
A Token can optionally have metadata (a.k.a. payload) in the form of a variable
|
||||
length byte array. Use {@link org.apache.lucene.index.PostingsEnum#getPayload()} to retrieve the
|
||||
payloads from the index.
|
||||
|
||||
A few things to note:
|
||||
<ul>
|
||||
<li>clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but should affect no one.</li>
|
||||
<li>Because <code>TokenStreams</code> can be chained, one cannot assume that the <code>Token's</code> current type is correct.</li>
|
||||
<li>The startOffset and endOffset represent the start and offset in the source text, so be careful in adjusting them.</li>
|
||||
<li>When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again.</li>
|
||||
</ul>
|
||||
*/
|
||||
@Deprecated
|
||||
public class Token extends PackedTokenAttributeImpl implements FlagsAttribute, PayloadAttribute {
|
||||
|
||||
// TODO Refactor the spellchecker API to use TokenStreams properly, rather than this hack
|
||||
|
||||
private int flags;
|
||||
private BytesRef payload;
|
||||
|
||||
/** Constructs a Token will null text. */
|
||||
public Token() {
|
||||
}
|
||||
|
||||
/** Constructs a Token with the given term text, start
|
||||
* and end offsets. The type defaults to "word."
|
||||
* <b>NOTE:</b> for better indexing speed you should
|
||||
* instead use the char[] termBuffer methods to set the
|
||||
* term text.
|
||||
* @param text term text
|
||||
* @param start start offset in the source text
|
||||
* @param end end offset in the source text
|
||||
*/
|
||||
public Token(CharSequence text, int start, int end) {
|
||||
append(text);
|
||||
setOffset(start, end);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
* @see FlagsAttribute
|
||||
*/
|
||||
@Override
|
||||
public int getFlags() {
|
||||
return flags;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
* @see FlagsAttribute
|
||||
*/
|
||||
@Override
|
||||
public void setFlags(int flags) {
|
||||
this.flags = flags;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
* @see PayloadAttribute
|
||||
*/
|
||||
@Override
|
||||
public BytesRef getPayload() {
|
||||
return this.payload;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
* @see PayloadAttribute
|
||||
*/
|
||||
@Override
|
||||
public void setPayload(BytesRef payload) {
|
||||
this.payload = payload;
|
||||
}
|
||||
|
||||
/** Resets the term text, payload, flags, positionIncrement, positionLength,
|
||||
* startOffset, endOffset and token type to default.
|
||||
*/
|
||||
@Override
|
||||
public void clear() {
|
||||
super.clear();
|
||||
flags = 0;
|
||||
payload = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (obj == this)
|
||||
return true;
|
||||
|
||||
if (obj instanceof Token) {
|
||||
final Token other = (Token) obj;
|
||||
return (
|
||||
flags == other.flags &&
|
||||
(payload == null ? other.payload == null : payload.equals(other.payload)) &&
|
||||
super.equals(obj)
|
||||
);
|
||||
} else
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int code = super.hashCode();
|
||||
code = code * 31 + flags;
|
||||
if (payload != null) {
|
||||
code = code * 31 + payload.hashCode();
|
||||
}
|
||||
return code;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Token clone() {
|
||||
final Token t = (Token) super.clone();
|
||||
if (payload != null) {
|
||||
t.payload = BytesRef.deepCopyOf(payload);
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyTo(AttributeImpl target) {
|
||||
super.copyTo(target);
|
||||
((FlagsAttribute) target).setFlags(flags);
|
||||
((PayloadAttribute) target).setPayload((payload == null) ? null : BytesRef.deepCopyOf(payload));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reflectWith(AttributeReflector reflector) {
|
||||
super.reflectWith(reflector);
|
||||
reflector.reflect(FlagsAttribute.class, "flags", flags);
|
||||
reflector.reflect(PayloadAttribute.class, "payload", payload);
|
||||
}
|
||||
|
||||
}
|
|
@ -24,7 +24,6 @@ import java.util.List;
|
|||
import java.util.Locale;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.spell.CombineSuggestion;
|
||||
|
|
|
@ -28,7 +28,6 @@ import java.nio.charset.StandardCharsets;
|
|||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.spell.Dictionary;
|
||||
import org.apache.lucene.search.spell.HighFrequencyDictionary;
|
||||
|
@ -47,6 +46,7 @@ import org.apache.solr.search.SolrIndexSearcher;
|
|||
import org.apache.solr.spelling.SolrSpellChecker;
|
||||
import org.apache.solr.spelling.SpellingOptions;
|
||||
import org.apache.solr.spelling.SpellingResult;
|
||||
import org.apache.solr.spelling.Token;
|
||||
import org.apache.solr.spelling.suggest.fst.FSTLookupFactory;
|
||||
import org.apache.solr.spelling.suggest.jaspell.JaspellLookupFactory;
|
||||
import org.apache.solr.spelling.suggest.tst.TSTLookupFactory;
|
||||
|
|
|
@ -16,18 +16,18 @@
|
|||
*/
|
||||
package org.apache.solr.handler.component;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.spelling.SolrSpellChecker;
|
||||
import org.apache.solr.spelling.SpellingOptions;
|
||||
import org.apache.solr.spelling.SpellingResult;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.spelling.SolrSpellChecker;
|
||||
import org.apache.solr.spelling.SpellingOptions;
|
||||
import org.apache.solr.spelling.SpellingResult;
|
||||
import org.apache.solr.spelling.Token;
|
||||
/**
|
||||
* A Dummy SpellChecker for testing purposes
|
||||
*
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.solr.spelling;
|
|||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.util.LuceneTestCase.SuppressTempFileChecks;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.params.SpellingParams;
|
||||
|
|
|
@ -20,7 +20,6 @@ import java.io.File;
|
|||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.LuceneTestCase.SuppressTempFileChecks;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
|
|
|
@ -22,7 +22,6 @@ import java.util.Comparator;
|
|||
import java.util.Date;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
|
|
|
@ -16,7 +16,10 @@
|
|||
*/
|
||||
package org.apache.solr.spelling;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
@ -25,9 +28,6 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.io.IOException;
|
||||
|
||||
|
||||
/**
|
||||
|
|
|
@ -20,9 +20,7 @@ import java.util.LinkedHashMap;
|
|||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.spelling.PossibilityIterator;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
|
|
|
@ -16,16 +16,15 @@
|
|||
*/
|
||||
package org.apache.solr.spelling;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
* Test for SpellingQueryConverter
|
||||
|
|
|
@ -22,11 +22,9 @@ import java.util.regex.Pattern;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.CannedTokenStream;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
|
@ -65,7 +63,11 @@ public class TestSuggestSpellingConverter extends BaseTokenStreamTestCase {
|
|||
|
||||
public void assertConvertsTo(String text, String expected[]) throws IOException {
|
||||
Collection<Token> tokens = converter.convert(text);
|
||||
TokenStream ts = new CannedTokenStream(tokens.toArray(new Token[0]));
|
||||
assertTokenStreamContents(ts, expected);
|
||||
assertEquals(tokens.size(), expected.length);
|
||||
int i = 0;
|
||||
for (Token token : tokens) {
|
||||
assertEquals(token.toString(), expected[i]);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,7 +21,6 @@ import java.util.LinkedHashMap;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.util.LuceneTestCase.SuppressTempFileChecks;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
|
|
Loading…
Reference in New Issue