From af1ee47f2bb2e19f39f7bef4be09e375ca84a52b Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Thu, 8 Jun 2017 14:43:43 +0100 Subject: [PATCH] LUCENE-7867: Remove deprecated Token class --- lucene/CHANGES.txt | 3 + .../payloads/NumericPayloadTokenFilter.java | 7 +- .../payloads/TypeAsPayloadTokenFilter.java | 6 +- .../apache/lucene/analysis/TokenStream.java | 7 +- .../CharTermAttributeImpl.java | 5 - .../lucene/search/highlight/TokenGroup.java | 15 +- .../queryparser/classic/FastCharStream.java | 2 +- .../standard/parser/FastCharStream.java | 2 +- .../surround/parser/FastCharStream.java | 2 +- .../org/apache/lucene/analysis/Token.java | 19 +- .../component/SpellCheckComponent.java | 4 +- .../apache/solr/parser/FastCharStream.java | 2 +- .../spelling/AbstractLuceneSpellChecker.java | 12 +- .../spelling/ConjunctionSolrSpellChecker.java | 1 - .../solr/spelling/DirectSolrSpellChecker.java | 1 - .../solr/spelling/PossibilityIterator.java | 2 - .../apache/solr/spelling/QueryConverter.java | 7 +- .../org/apache/solr/spelling/ResultEntry.java | 2 - .../solr/spelling/SolrSpellChecker.java | 13 +- .../solr/spelling/SpellCheckCollator.java | 5 +- .../solr/spelling/SpellCheckCorrection.java | 1 - .../apache/solr/spelling/SpellingOptions.java | 5 +- .../solr/spelling/SpellingQueryConverter.java | 1 - .../apache/solr/spelling/SpellingResult.java | 5 +- .../solr/spelling/SuggestQueryConverter.java | 2 - .../java/org/apache/solr/spelling/Token.java | 175 ++++++++++++++++++ .../spelling/WordBreakSolrSpellChecker.java | 1 - .../solr/spelling/suggest/Suggester.java | 2 +- .../DummyCustomParamSpellChecker.java | 14 +- .../spelling/DirectSolrSpellCheckerTest.java | 1 - .../spelling/FileBasedSpellCheckerTest.java | 1 - .../spelling/IndexBasedSpellCheckerTest.java | 1 - .../solr/spelling/SimpleQueryConverter.java | 8 +- .../SpellPossibilityIteratorTest.java | 2 - .../spelling/SpellingQueryConverterTest.java | 9 +- .../TestSuggestSpellingConverter.java | 10 +- .../WordBreakSolrSpellCheckerTest.java | 1 - 37 files changed, 236 insertions(+), 120 deletions(-) rename lucene/{core => test-framework}/src/java/org/apache/lucene/analysis/Token.java (86%) create mode 100644 solr/core/src/java/org/apache/solr/spelling/Token.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 8365017c240..eede65b4285 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -86,6 +86,9 @@ API Changes * LUCENE-7877: PrefixAwareTokenStream is replaced with ConcatenatingTokenStream (Alan Woodward, Uwe Schindler, Adrien Grand) +* LUCENE-7867: The deprecated Token class is now only available in the test + framework (Alan Woodward, Adrien Grand) + Bug Fixes * LUCENE-7626: IndexWriter will no longer accept broken token offsets diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java index e5a4a4588fe..81c5dd49011 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java @@ -17,18 +17,17 @@ package org.apache.lucene.analysis.payloads; +import java.io.IOException; + import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.util.BytesRef; -import java.io.IOException; - /** - * Assigns a payload to a token based on the {@link org.apache.lucene.analysis.Token#type()} - * + * Assigns a payload to a token based on the {@link org.apache.lucene.analysis.tokenattributes.TypeAttribute} **/ public class NumericPayloadTokenFilter extends TokenFilter { diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java index 92fc76a8d73..9dabe7924eb 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java @@ -17,17 +17,17 @@ package org.apache.lucene.analysis.payloads; +import java.io.IOException; + import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.util.BytesRef; -import java.io.IOException; - /** - * Makes the {@link org.apache.lucene.analysis.Token#type()} a payload. + * Makes the {@link TypeAttribute} a payload. * * Encodes the type using {@link String#getBytes(String)} with "UTF-8" as the encoding * diff --git a/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java b/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java index af1e7bd5e9b..a19d31df5b6 100644 --- a/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java +++ b/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java @@ -40,12 +40,7 @@ import org.apache.lucene.util.AttributeSource; *
  • {@link TokenFilter}, a TokenStream whose input is another * TokenStream. * - * A new TokenStream API has been introduced with Lucene 2.9. This API - * has moved from being {@link Token}-based to {@link Attribute}-based. While - * {@link Token} still exists in 2.9 as a convenience class, the preferred way - * to store the information of a {@link Token} is to use {@link AttributeImpl}s. - *

    - * TokenStream now extends {@link AttributeSource}, which provides + * TokenStream extends {@link AttributeSource}, which provides * access to all of the token {@link Attribute}s for the TokenStream. * Note that only one instance per {@link AttributeImpl} is created and reused * for every token. This approach reduces object creation and allows local diff --git a/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java b/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java index cde8dd92686..9a5b9fa29bb 100644 --- a/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java +++ b/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java @@ -253,11 +253,6 @@ public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttr /** * Returns solely the term text as specified by the * {@link CharSequence} interface. - *

    This method changed the behavior with Lucene 3.1, - * before it returned a String representation of the whole - * term with all attributes. - * This affects especially the - * {@link org.apache.lucene.analysis.Token} subclass. */ @Override public String toString() { diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenGroup.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenGroup.java index 6af89f8c24f..ebb37d79956 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenGroup.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenGroup.java @@ -16,7 +16,6 @@ */ package org.apache.lucene.search.highlight; -import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; @@ -29,7 +28,6 @@ public class TokenGroup { private static final int MAX_NUM_TOKENS_PER_GROUP = 50; - private Token[] tokens = new Token[MAX_NUM_TOKENS_PER_GROUP]; private float[] scores = new float[MAX_NUM_TOKENS_PER_GROUP]; private int numTokens = 0; private int startOffset = 0; @@ -68,10 +66,7 @@ public class TokenGroup { tot += score; } } - Token token = new Token(); - token.setOffset(termStartOffset, termEndOffset); - token.setEmpty().append(termAtt); - tokens[numTokens] = token; + scores[numTokens] = score; numTokens++; } @@ -86,14 +81,6 @@ public class TokenGroup { tot = 0; } - /** - * @param index a value between 0 and numTokens -1 - * @return the "n"th token - */ - public Token getToken(int index) { - return tokens[index]; - } - /** * * @param index a value between 0 and numTokens -1 diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/FastCharStream.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/FastCharStream.java index ad0646b77e0..d528111f4d1 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/FastCharStream.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/FastCharStream.java @@ -21,7 +21,7 @@ import java.io.*; /** An efficient implementation of JavaCC's CharStream interface.

    Note that * this does not do line-number counting, but instead keeps track of the * character position of the token in the input, as required by Lucene's {@link - * org.apache.lucene.analysis.Token} API. + * org.apache.lucene.analysis.tokenattributes.OffsetAttribute} API. * */ public final class FastCharStream implements CharStream { char[] buffer = null; diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/FastCharStream.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/FastCharStream.java index 06bf9ab355a..ee0f9afe822 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/FastCharStream.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/FastCharStream.java @@ -21,7 +21,7 @@ import java.io.*; /** An efficient implementation of JavaCC's CharStream interface.

    Note that * this does not do line-number counting, but instead keeps track of the * character position of the token in the input, as required by Lucene's {@link - * org.apache.lucene.analysis.Token} API. + * org.apache.lucene.analysis.tokenattributes.OffsetAttribute} API. * */ public final class FastCharStream implements CharStream { char[] buffer = null; diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/parser/FastCharStream.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/parser/FastCharStream.java index d3cc18bbfec..3a033f5046d 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/parser/FastCharStream.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/parser/FastCharStream.java @@ -21,7 +21,7 @@ import java.io.*; /** An efficient implementation of JavaCC's CharStream interface.

    Note that * this does not do line-number counting, but instead keeps track of the * character position of the token in the input, as required by Lucene's {@link - * org.apache.lucene.analysis.Token} API. */ + * org.apache.lucene.analysis.tokenattributes.OffsetAttribute} API. */ public final class FastCharStream implements CharStream { char[] buffer = null; diff --git a/lucene/core/src/java/org/apache/lucene/analysis/Token.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/Token.java similarity index 86% rename from lucene/core/src/java/org/apache/lucene/analysis/Token.java rename to lucene/test-framework/src/java/org/apache/lucene/analysis/Token.java index 77ab85e069b..04b1df8609e 100644 --- a/lucene/core/src/java/org/apache/lucene/analysis/Token.java +++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/Token.java @@ -20,7 +20,6 @@ package org.apache.lucene.analysis; import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; import org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; -import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeFactory; import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.AttributeReflector; @@ -44,14 +43,6 @@ import org.apache.lucene.util.BytesRef; A Token can optionally have metadata (a.k.a. payload) in the form of a variable length byte array. Use {@link org.apache.lucene.index.PostingsEnum#getPayload()} to retrieve the payloads from the index. - -

    - -

    NOTE: As of 2.9, Token implements all {@link Attribute} interfaces - that are part of core Lucene and can be found in the {@code tokenattributes} subpackage. - Even though it is not necessary to use Token anymore, with the new TokenStream API it can - be used as convenience class that implements all {@link Attribute}s, which is especially useful - to easily switch from the old to the new TokenStream API. A few things to note:

    -

    - Please note: With Lucene 3.1, the {@linkplain #toString toString()} method had to be changed to match the - {@link CharSequence} interface introduced by the interface {@link org.apache.lucene.analysis.tokenattributes.CharTermAttribute}. - This method now only prints the term text, no additional information anymore. - @deprecated This class is outdated and no longer used since Lucene 2.9. Nuke it finally! */ -@Deprecated public class Token extends PackedTokenAttributeImpl implements FlagsAttribute, PayloadAttribute { private int flags; @@ -166,7 +151,7 @@ public class Token extends PackedTokenAttributeImpl implements FlagsAttribute, P public Token clone() { final Token t = (Token) super.clone(); if (payload != null) { - t.payload = payload.clone(); + t.payload = BytesRef.deepCopyOf(payload); } return t; } @@ -190,7 +175,7 @@ public class Token extends PackedTokenAttributeImpl implements FlagsAttribute, P public void copyTo(AttributeImpl target) { super.copyTo(target); ((FlagsAttribute) target).setFlags(flags); - ((PayloadAttribute) target).setPayload((payload == null) ? null : payload.clone()); + ((PayloadAttribute) target).setPayload((payload == null) ? null : BytesRef.deepCopyOf(payload)); } @Override diff --git a/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java b/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java index 6aba296d657..c881aa6ef9f 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java @@ -31,7 +31,6 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; @@ -60,8 +59,8 @@ import org.apache.solr.schema.FieldType; import org.apache.solr.schema.IndexSchema; import org.apache.solr.search.DocSet; import org.apache.solr.search.QParser; -import org.apache.solr.search.SyntaxError; import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.search.SyntaxError; import org.apache.solr.spelling.AbstractLuceneSpellChecker; import org.apache.solr.spelling.ConjunctionSolrSpellChecker; import org.apache.solr.spelling.IndexBasedSpellChecker; @@ -72,6 +71,7 @@ import org.apache.solr.spelling.SpellCheckCollator; import org.apache.solr.spelling.SpellingOptions; import org.apache.solr.spelling.SpellingQueryConverter; import org.apache.solr.spelling.SpellingResult; +import org.apache.solr.spelling.Token; import org.apache.solr.util.plugin.SolrCoreAware; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/solr/core/src/java/org/apache/solr/parser/FastCharStream.java b/solr/core/src/java/org/apache/solr/parser/FastCharStream.java index 712a28335b5..7039c0a582e 100644 --- a/solr/core/src/java/org/apache/solr/parser/FastCharStream.java +++ b/solr/core/src/java/org/apache/solr/parser/FastCharStream.java @@ -21,7 +21,7 @@ import java.io.*; /** An efficient implementation of JavaCC's CharStream interface.

    Note that * this does not do line-number counting, but instead keeps track of the * character position of the token in the input, as required by Lucene's {@link - * org.apache.lucene.analysis.Token} API. + * org.apache.lucene.analysis.tokenattributes.OffsetAttribute} API. * */ public final class FastCharStream implements CharStream { char[] buffer = null; diff --git a/solr/core/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java b/solr/core/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java index 22da107ec70..a03e911a524 100644 --- a/solr/core/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java +++ b/solr/core/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java @@ -16,7 +16,6 @@ */ package org.apache.solr.spelling; -import org.apache.lucene.search.spell.StringDistance; import java.io.File; import java.io.IOException; import java.util.Arrays; @@ -24,19 +23,18 @@ import java.util.Collections; import java.util.Comparator; import java.util.List; -import org.apache.lucene.search.spell.SuggestWord; -import org.apache.lucene.search.spell.SuggestWordFrequencyComparator; -import org.apache.lucene.search.spell.SuggestWordQueue; - -import org.apache.lucene.analysis.Token; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.spell.Dictionary; import org.apache.lucene.search.spell.LevensteinDistance; import org.apache.lucene.search.spell.SpellChecker; +import org.apache.lucene.search.spell.StringDistance; +import org.apache.lucene.search.spell.SuggestWord; +import org.apache.lucene.search.spell.SuggestWordFrequencyComparator; +import org.apache.lucene.search.spell.SuggestWordQueue; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.FilterDirectory; import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.FilterDirectory; import org.apache.lucene.store.RAMDirectory; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; diff --git a/solr/core/src/java/org/apache/solr/spelling/ConjunctionSolrSpellChecker.java b/solr/core/src/java/org/apache/solr/spelling/ConjunctionSolrSpellChecker.java index 881b4d5c799..2daab28d405 100644 --- a/solr/core/src/java/org/apache/solr/spelling/ConjunctionSolrSpellChecker.java +++ b/solr/core/src/java/org/apache/solr/spelling/ConjunctionSolrSpellChecker.java @@ -26,7 +26,6 @@ import java.util.List; import java.util.Map; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.Token; import org.apache.lucene.search.spell.StringDistance; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; diff --git a/solr/core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java b/solr/core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java index 15fee72c3b2..a1f8df8564f 100644 --- a/solr/core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java +++ b/solr/core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java @@ -22,7 +22,6 @@ import java.util.Collections; import java.util.Comparator; import java.util.List; -import org.apache.lucene.analysis.Token; import org.apache.lucene.index.Term; import org.apache.lucene.search.spell.DirectSpellChecker; import org.apache.lucene.search.spell.StringDistance; diff --git a/solr/core/src/java/org/apache/solr/spelling/PossibilityIterator.java b/solr/core/src/java/org/apache/solr/spelling/PossibilityIterator.java index 0203f18270b..3873e9893c7 100644 --- a/solr/core/src/java/org/apache/solr/spelling/PossibilityIterator.java +++ b/solr/core/src/java/org/apache/solr/spelling/PossibilityIterator.java @@ -29,8 +29,6 @@ import java.util.NoSuchElementException; import java.util.PriorityQueue; import java.util.Set; -import org.apache.lucene.analysis.Token; - /** *

    * Given a list of possible Spelling Corrections for multiple mis-spelled words diff --git a/solr/core/src/java/org/apache/solr/spelling/QueryConverter.java b/solr/core/src/java/org/apache/solr/spelling/QueryConverter.java index edb94c458fa..3c3a42be2dd 100644 --- a/solr/core/src/java/org/apache/solr/spelling/QueryConverter.java +++ b/solr/core/src/java/org/apache/solr/spelling/QueryConverter.java @@ -15,13 +15,12 @@ * limitations under the License. */ package org.apache.solr.spelling; +import java.util.Collection; + import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.Token; import org.apache.solr.common.util.NamedList; import org.apache.solr.util.plugin.NamedListInitializedPlugin; -import java.util.Collection; - /** *

    * The QueryConverter is an abstract base class defining a method for converting @@ -81,7 +80,7 @@ public abstract class QueryConverter implements NamedListInitializedPlugin { } /** - * Returns the Collection of {@link org.apache.lucene.analysis.Token}s for + * Returns the Collection of {@link Token}s for * the query. Offsets on the Token should correspond to the correct * offset in the origQuery */ diff --git a/solr/core/src/java/org/apache/solr/spelling/ResultEntry.java b/solr/core/src/java/org/apache/solr/spelling/ResultEntry.java index 4b667cd05ab..dd0310b4a16 100644 --- a/solr/core/src/java/org/apache/solr/spelling/ResultEntry.java +++ b/solr/core/src/java/org/apache/solr/spelling/ResultEntry.java @@ -16,8 +16,6 @@ */ package org.apache.solr.spelling; -import org.apache.lucene.analysis.Token; - public class ResultEntry { public Token token; public String suggestion; diff --git a/solr/core/src/java/org/apache/solr/spelling/SolrSpellChecker.java b/solr/core/src/java/org/apache/solr/spelling/SolrSpellChecker.java index db0d5ff8d10..bb461ab4614 100644 --- a/solr/core/src/java/org/apache/solr/spelling/SolrSpellChecker.java +++ b/solr/core/src/java/org/apache/solr/spelling/SolrSpellChecker.java @@ -15,8 +15,13 @@ * limitations under the License. */ package org.apache.solr.spelling; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; + import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.search.spell.LevensteinDistance; import org.apache.lucene.search.spell.StringDistance; @@ -31,12 +36,6 @@ import org.apache.solr.schema.FieldType; import org.apache.solr.schema.IndexSchema; import org.apache.solr.search.SolrIndexSearcher; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Map; - /** *

    diff --git a/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java b/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java index cc388988ff8..859d84fe02a 100644 --- a/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java +++ b/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java @@ -15,15 +15,12 @@ * limitations under the License. */ package org.apache.solr.spelling; -import static org.apache.solr.common.params.CommonParams.ID; - import java.lang.invoke.MethodHandles; import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; import java.util.List; -import org.apache.lucene.analysis.Token; import org.apache.lucene.index.IndexReader; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.CursorMarkParams; @@ -43,6 +40,8 @@ import org.apache.solr.search.SolrIndexSearcher; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.solr.common.params.CommonParams.ID; + public class SpellCheckCollator { private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private int maxCollations = 1; diff --git a/solr/core/src/java/org/apache/solr/spelling/SpellCheckCorrection.java b/solr/core/src/java/org/apache/solr/spelling/SpellCheckCorrection.java index cae02568880..034690bf914 100644 --- a/solr/core/src/java/org/apache/solr/spelling/SpellCheckCorrection.java +++ b/solr/core/src/java/org/apache/solr/spelling/SpellCheckCorrection.java @@ -15,7 +15,6 @@ * limitations under the License. */ package org.apache.solr.spelling; -import org.apache.lucene.analysis.Token; public class SpellCheckCorrection { private Token original; diff --git a/solr/core/src/java/org/apache/solr/spelling/SpellingOptions.java b/solr/core/src/java/org/apache/solr/spelling/SpellingOptions.java index e8dd2f0d72b..b5b42f38e23 100644 --- a/solr/core/src/java/org/apache/solr/spelling/SpellingOptions.java +++ b/solr/core/src/java/org/apache/solr/spelling/SpellingOptions.java @@ -16,13 +16,12 @@ */ package org.apache.solr.spelling; -import org.apache.lucene.analysis.Token; +import java.util.Collection; + import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.spell.SuggestMode; import org.apache.solr.common.params.SolrParams; -import java.util.Collection; - /** * * diff --git a/solr/core/src/java/org/apache/solr/spelling/SpellingQueryConverter.java b/solr/core/src/java/org/apache/solr/spelling/SpellingQueryConverter.java index 4cc75b597bb..a5292c72fec 100644 --- a/solr/core/src/java/org/apache/solr/spelling/SpellingQueryConverter.java +++ b/solr/core/src/java/org/apache/solr/spelling/SpellingQueryConverter.java @@ -23,7 +23,6 @@ import java.util.Collections; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; diff --git a/solr/core/src/java/org/apache/solr/spelling/SpellingResult.java b/solr/core/src/java/org/apache/solr/spelling/SpellingResult.java index de98c22dd5b..fb13bbc4920 100644 --- a/solr/core/src/java/org/apache/solr/spelling/SpellingResult.java +++ b/solr/core/src/java/org/apache/solr/spelling/SpellingResult.java @@ -15,7 +15,6 @@ * limitations under the License. */ package org.apache.solr.spelling; -import org.apache.lucene.analysis.Token; import java.util.Collection; import java.util.LinkedHashMap; @@ -80,7 +79,7 @@ public class SpellingResult { /** * Suggestions must be added with the best suggestion first. ORDER is important. - * @param token The {@link org.apache.lucene.analysis.Token} + * @param token The {@link Token} * @param suggestion The suggestion for the Token * @param docFreq The document frequency */ @@ -97,7 +96,7 @@ public class SpellingResult { /** * Gets the suggestions for the given token. * - * @param token The {@link org.apache.lucene.analysis.Token} to look up + * @param token The {@link Token} to look up * @return A LinkedHashMap of the suggestions. Key is the suggestion, value is the token frequency in the index, else {@link #NO_FREQUENCY_INFO}. * * The suggestions are added in sorted order (i.e. best suggestion first) then the iterator will return the suggestions in order diff --git a/solr/core/src/java/org/apache/solr/spelling/SuggestQueryConverter.java b/solr/core/src/java/org/apache/solr/spelling/SuggestQueryConverter.java index a806973e61e..33ad41eb22d 100644 --- a/solr/core/src/java/org/apache/solr/spelling/SuggestQueryConverter.java +++ b/solr/core/src/java/org/apache/solr/spelling/SuggestQueryConverter.java @@ -21,8 +21,6 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; -import org.apache.lucene.analysis.Token; - /** * Passes the entire query string to the configured analyzer as-is. **/ diff --git a/solr/core/src/java/org/apache/solr/spelling/Token.java b/solr/core/src/java/org/apache/solr/spelling/Token.java new file mode 100644 index 00000000000..b98d350a2a4 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/spelling/Token.java @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.spelling; + + +import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; +import org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl; +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; +import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.util.AttributeReflector; +import org.apache.lucene.util.BytesRef; + +/** + A Token is an occurrence of a term from the text of a field. It consists of + a term's text, the start and end offset of the term in the text of the field, + and a type string. +

    + The start and end offsets permit applications to re-associate a token with + its source text, e.g., to display highlighted query terms in a document + browser, or to show matching text fragments in a KWIC + display, etc. +

    + The type is a string, assigned by a lexical analyzer + (a.k.a. tokenizer), naming the lexical or syntactic class that the token + belongs to. For example an end of sentence marker token might be implemented + with type "eos". The default token type is "word". +

    + A Token can optionally have metadata (a.k.a. payload) in the form of a variable + length byte array. Use {@link org.apache.lucene.index.PostingsEnum#getPayload()} to retrieve the + payloads from the index. + + A few things to note: +

    + */ +@Deprecated +public class Token extends PackedTokenAttributeImpl implements FlagsAttribute, PayloadAttribute { + + // TODO Refactor the spellchecker API to use TokenStreams properly, rather than this hack + + private int flags; + private BytesRef payload; + + /** Constructs a Token will null text. */ + public Token() { + } + + /** Constructs a Token with the given term text, start + * and end offsets. The type defaults to "word." + * NOTE: for better indexing speed you should + * instead use the char[] termBuffer methods to set the + * term text. + * @param text term text + * @param start start offset in the source text + * @param end end offset in the source text + */ + public Token(CharSequence text, int start, int end) { + append(text); + setOffset(start, end); + } + + /** + * {@inheritDoc} + * @see FlagsAttribute + */ + @Override + public int getFlags() { + return flags; + } + + /** + * {@inheritDoc} + * @see FlagsAttribute + */ + @Override + public void setFlags(int flags) { + this.flags = flags; + } + + /** + * {@inheritDoc} + * @see PayloadAttribute + */ + @Override + public BytesRef getPayload() { + return this.payload; + } + + /** + * {@inheritDoc} + * @see PayloadAttribute + */ + @Override + public void setPayload(BytesRef payload) { + this.payload = payload; + } + + /** Resets the term text, payload, flags, positionIncrement, positionLength, + * startOffset, endOffset and token type to default. + */ + @Override + public void clear() { + super.clear(); + flags = 0; + payload = null; + } + + @Override + public boolean equals(Object obj) { + if (obj == this) + return true; + + if (obj instanceof Token) { + final Token other = (Token) obj; + return ( + flags == other.flags && + (payload == null ? other.payload == null : payload.equals(other.payload)) && + super.equals(obj) + ); + } else + return false; + } + + @Override + public int hashCode() { + int code = super.hashCode(); + code = code * 31 + flags; + if (payload != null) { + code = code * 31 + payload.hashCode(); + } + return code; + } + + @Override + public Token clone() { + final Token t = (Token) super.clone(); + if (payload != null) { + t.payload = BytesRef.deepCopyOf(payload); + } + return t; + } + + @Override + public void copyTo(AttributeImpl target) { + super.copyTo(target); + ((FlagsAttribute) target).setFlags(flags); + ((PayloadAttribute) target).setPayload((payload == null) ? null : BytesRef.deepCopyOf(payload)); + } + + @Override + public void reflectWith(AttributeReflector reflector) { + super.reflectWith(reflector); + reflector.reflect(FlagsAttribute.class, "flags", flags); + reflector.reflect(PayloadAttribute.class, "payload", payload); + } + +} diff --git a/solr/core/src/java/org/apache/solr/spelling/WordBreakSolrSpellChecker.java b/solr/core/src/java/org/apache/solr/spelling/WordBreakSolrSpellChecker.java index a5d692b6fe2..f96233fd551 100644 --- a/solr/core/src/java/org/apache/solr/spelling/WordBreakSolrSpellChecker.java +++ b/solr/core/src/java/org/apache/solr/spelling/WordBreakSolrSpellChecker.java @@ -24,7 +24,6 @@ import java.util.List; import java.util.Locale; import java.util.regex.Pattern; -import org.apache.lucene.analysis.Token; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.spell.CombineSuggestion; diff --git a/solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java b/solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java index d585fed72f3..c0e77093c39 100644 --- a/solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java +++ b/solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java @@ -28,7 +28,6 @@ import java.nio.charset.StandardCharsets; import java.util.Collections; import java.util.List; -import org.apache.lucene.analysis.Token; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.spell.Dictionary; import org.apache.lucene.search.spell.HighFrequencyDictionary; @@ -47,6 +46,7 @@ import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.spelling.SolrSpellChecker; import org.apache.solr.spelling.SpellingOptions; import org.apache.solr.spelling.SpellingResult; +import org.apache.solr.spelling.Token; import org.apache.solr.spelling.suggest.fst.FSTLookupFactory; import org.apache.solr.spelling.suggest.jaspell.JaspellLookupFactory; import org.apache.solr.spelling.suggest.tst.TSTLookupFactory; diff --git a/solr/core/src/test/org/apache/solr/handler/component/DummyCustomParamSpellChecker.java b/solr/core/src/test/org/apache/solr/handler/component/DummyCustomParamSpellChecker.java index 30924c313a0..10f7cc086b2 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/DummyCustomParamSpellChecker.java +++ b/solr/core/src/test/org/apache/solr/handler/component/DummyCustomParamSpellChecker.java @@ -16,18 +16,18 @@ */ package org.apache.solr.handler.component; -import org.apache.lucene.analysis.Token; -import org.apache.solr.core.SolrCore; -import org.apache.solr.search.SolrIndexSearcher; -import org.apache.solr.spelling.SolrSpellChecker; -import org.apache.solr.spelling.SpellingOptions; -import org.apache.solr.spelling.SpellingResult; - import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; import java.util.List; + +import org.apache.solr.core.SolrCore; +import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.spelling.SolrSpellChecker; +import org.apache.solr.spelling.SpellingOptions; +import org.apache.solr.spelling.SpellingResult; +import org.apache.solr.spelling.Token; /** * A Dummy SpellChecker for testing purposes * diff --git a/solr/core/src/test/org/apache/solr/spelling/DirectSolrSpellCheckerTest.java b/solr/core/src/test/org/apache/solr/spelling/DirectSolrSpellCheckerTest.java index 1cde8d2604f..cb5bba7d214 100644 --- a/solr/core/src/test/org/apache/solr/spelling/DirectSolrSpellCheckerTest.java +++ b/solr/core/src/test/org/apache/solr/spelling/DirectSolrSpellCheckerTest.java @@ -19,7 +19,6 @@ package org.apache.solr.spelling; import java.util.Collection; import java.util.Map; -import org.apache.lucene.analysis.Token; import org.apache.lucene.util.LuceneTestCase.SuppressTempFileChecks; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.params.SpellingParams; diff --git a/solr/core/src/test/org/apache/solr/spelling/FileBasedSpellCheckerTest.java b/solr/core/src/test/org/apache/solr/spelling/FileBasedSpellCheckerTest.java index f1070248195..800a2a087ec 100644 --- a/solr/core/src/test/org/apache/solr/spelling/FileBasedSpellCheckerTest.java +++ b/solr/core/src/test/org/apache/solr/spelling/FileBasedSpellCheckerTest.java @@ -20,7 +20,6 @@ import java.io.File; import java.util.Collection; import java.util.Map; -import org.apache.lucene.analysis.Token; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase.SuppressTempFileChecks; import org.apache.solr.SolrTestCaseJ4; diff --git a/solr/core/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java b/solr/core/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java index b2210444992..08190831a9a 100644 --- a/solr/core/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java +++ b/solr/core/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java @@ -22,7 +22,6 @@ import java.util.Comparator; import java.util.Date; import java.util.Map; -import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; diff --git a/solr/core/src/test/org/apache/solr/spelling/SimpleQueryConverter.java b/solr/core/src/test/org/apache/solr/spelling/SimpleQueryConverter.java index d2877d9e034..8d91a1bc8bd 100644 --- a/solr/core/src/test/org/apache/solr/spelling/SimpleQueryConverter.java +++ b/solr/core/src/test/org/apache/solr/spelling/SimpleQueryConverter.java @@ -16,7 +16,10 @@ */ package org.apache.solr.spelling; -import org.apache.lucene.analysis.Token; +import java.io.IOException; +import java.util.Collection; +import java.util.HashSet; + import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; @@ -25,9 +28,6 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; -import java.util.Collection; -import java.util.HashSet; -import java.io.IOException; /** diff --git a/solr/core/src/test/org/apache/solr/spelling/SpellPossibilityIteratorTest.java b/solr/core/src/test/org/apache/solr/spelling/SpellPossibilityIteratorTest.java index 1e69b732ab9..ff53e042aea 100644 --- a/solr/core/src/test/org/apache/solr/spelling/SpellPossibilityIteratorTest.java +++ b/solr/core/src/test/org/apache/solr/spelling/SpellPossibilityIteratorTest.java @@ -20,9 +20,7 @@ import java.util.LinkedHashMap; import java.util.Map; import java.util.Set; -import org.apache.lucene.analysis.Token; import org.apache.solr.SolrTestCaseJ4; -import org.apache.solr.spelling.PossibilityIterator; import org.junit.Before; import org.junit.Test; diff --git a/solr/core/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java b/solr/core/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java index 821fe736d56..11a31c2880b 100644 --- a/solr/core/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java +++ b/solr/core/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java @@ -16,16 +16,15 @@ */ package org.apache.solr.spelling; -import org.apache.lucene.analysis.Token; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.util.LuceneTestCase; import org.apache.solr.common.util.NamedList; import org.junit.Test; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; - /** * Test for SpellingQueryConverter diff --git a/solr/core/src/test/org/apache/solr/spelling/TestSuggestSpellingConverter.java b/solr/core/src/test/org/apache/solr/spelling/TestSuggestSpellingConverter.java index fdf64ffa976..0e4a0115efb 100644 --- a/solr/core/src/test/org/apache/solr/spelling/TestSuggestSpellingConverter.java +++ b/solr/core/src/test/org/apache/solr/spelling/TestSuggestSpellingConverter.java @@ -22,11 +22,9 @@ import java.util.regex.Pattern; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; -import org.apache.lucene.analysis.CannedTokenStream; import org.apache.lucene.analysis.LowerCaseFilter; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; @@ -65,7 +63,11 @@ public class TestSuggestSpellingConverter extends BaseTokenStreamTestCase { public void assertConvertsTo(String text, String expected[]) throws IOException { Collection tokens = converter.convert(text); - TokenStream ts = new CannedTokenStream(tokens.toArray(new Token[0])); - assertTokenStreamContents(ts, expected); + assertEquals(tokens.size(), expected.length); + int i = 0; + for (Token token : tokens) { + assertEquals(token.toString(), expected[i]); + i++; + } } } diff --git a/solr/core/src/test/org/apache/solr/spelling/WordBreakSolrSpellCheckerTest.java b/solr/core/src/test/org/apache/solr/spelling/WordBreakSolrSpellCheckerTest.java index 92e06bb2403..f24bcbaa7bc 100644 --- a/solr/core/src/test/org/apache/solr/spelling/WordBreakSolrSpellCheckerTest.java +++ b/solr/core/src/test/org/apache/solr/spelling/WordBreakSolrSpellCheckerTest.java @@ -21,7 +21,6 @@ import java.util.LinkedHashMap; import java.util.Map; import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.analysis.Token; import org.apache.lucene.util.LuceneTestCase.SuppressTempFileChecks; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.util.NamedList;