From 7f766cf603770fc044e8e731c9f41a1784d013df Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 25 Nov 2011 13:55:41 +0000 Subject: [PATCH] LUCENE-3590: nuke BytesRef.utf8ToChars git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1206174 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/search/vectorhighlight/FieldTermStack.java | 4 +++- lucene/src/java/org/apache/lucene/util/BytesRef.java | 6 ------ .../lucene/analysis/query/QueryAutoStopWordAnalyzer.java | 4 +++- .../queries/function/docvalues/StringIndexDocValues.java | 3 ++- .../java/org/apache/lucene/queries/mlt/MoreLikeThis.java | 4 +++- .../apache/lucene/search/spell/DirectSpellChecker.java | 9 +++++++-- .../lucene/search/spell/HighFrequencyDictionary.java | 8 +++++++- .../org/apache/lucene/search/spell/LuceneDictionary.java | 5 +++-- .../apache/solr/handler/admin/LukeRequestHandler.java | 7 +++++-- .../apache/solr/handler/component/QueryComponent.java | 4 +++- .../solr/request/PerSegmentSingleValuedFaceting.java | 6 ++++-- solr/core/src/java/org/apache/solr/schema/DateField.java | 3 ++- solr/core/src/java/org/apache/solr/schema/FieldType.java | 2 +- .../java/org/apache/solr/schema/SortableDoubleField.java | 3 ++- .../java/org/apache/solr/schema/SortableFloatField.java | 4 +++- .../java/org/apache/solr/schema/SortableIntField.java | 4 +++- .../java/org/apache/solr/schema/SortableLongField.java | 4 +++- .../SearchGroupsResultTransformer.java | 4 +++- .../TopGroupsResultTransformer.java | 7 +++++-- 19 files changed, 62 insertions(+), 29 deletions(-) diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java index b0dc0cfd9d5..daa7c6073e1 100644 --- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java +++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java @@ -29,6 +29,7 @@ import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; +import org.apache.lucene.util.UnicodeUtil; /** * FieldTermStack is a stack that keeps query terms in the specified field @@ -95,7 +96,8 @@ public class FieldTermStack { DocsAndPositionsEnum dpEnum = null; BytesRef text; while ((text = termsEnum.next()) != null) { - final String term = text.utf8ToChars(spare).toString(); + UnicodeUtil.UTF8toUTF16(text, spare); + final String term = spare.toString(); if (!termSet.contains(term)) { continue; } diff --git a/lucene/src/java/org/apache/lucene/util/BytesRef.java b/lucene/src/java/org/apache/lucene/util/BytesRef.java index 11a9e84a805..2c94a7005b0 100644 --- a/lucene/src/java/org/apache/lucene/util/BytesRef.java +++ b/lucene/src/java/org/apache/lucene/util/BytesRef.java @@ -165,12 +165,6 @@ public final class BytesRef implements Comparable,Cloneable { UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref); return ref.toString(); } - - /** Interprets stored bytes as UTF8 bytes into the given {@link CharsRef} */ - public CharsRef utf8ToChars(CharsRef ref) { - UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref); - return ref; - } /** Returns hex encoded bytes, eg [0x6c 0x75 0x63 0x65 0x6e 0x65] */ @Override diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java index 92098e9f5a1..e01980d0fab 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java @@ -25,6 +25,7 @@ import org.apache.lucene.index.MultiFields; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.StopFilter; import org.apache.lucene.util.CharsRef; +import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.Version; import org.apache.lucene.util.BytesRef; @@ -158,7 +159,8 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper { BytesRef text; while ((text = te.next()) != null) { if (te.docFreq() > maxDocFreq) { - stopWords.add(text.utf8ToChars(spare).toString()); + UnicodeUtil.UTF8toUTF16(text, spare); + stopWords.add(spare.toString()); } } } diff --git a/modules/queries/src/java/org/apache/lucene/queries/function/docvalues/StringIndexDocValues.java b/modules/queries/src/java/org/apache/lucene/queries/function/docvalues/StringIndexDocValues.java index c74e07347f1..7e0bf9278dc 100755 --- a/modules/queries/src/java/org/apache/lucene/queries/function/docvalues/StringIndexDocValues.java +++ b/modules/queries/src/java/org/apache/lucene/queries/function/docvalues/StringIndexDocValues.java @@ -25,6 +25,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; +import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.mutable.MutableValue; import org.apache.lucene.util.mutable.MutableValueStr; @@ -77,7 +78,7 @@ public abstract class StringIndexDocValues extends DocValues { int ord=termsIndex.getOrd(doc); if (ord==0) return null; termsIndex.lookup(ord, spare); - spare.utf8ToChars(spareChars); + UnicodeUtil.UTF8toUTF16(spare, spareChars); return spareChars.toString(); } diff --git a/modules/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java b/modules/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java index ee9582d1ac0..bea471ae2e4 100644 --- a/modules/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java +++ b/modules/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java @@ -34,6 +34,7 @@ import org.apache.lucene.search.similarities.TFIDFSimilarity; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.PriorityQueue; +import org.apache.lucene.util.UnicodeUtil; /** @@ -740,7 +741,8 @@ public final class MoreLikeThis { final CharsRef spare = new CharsRef(); BytesRef text; while((text = termsEnum.next()) != null) { - final String term = text.utf8ToChars(spare).toString(); + UnicodeUtil.UTF8toUTF16(text, spare); + final String term = spare.toString(); if (isNoiseWord(term)) { continue; } diff --git a/modules/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java b/modules/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java index 7c6e71eae0e..b91ea35f197 100644 --- a/modules/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java +++ b/modules/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java @@ -36,6 +36,7 @@ import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; +import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.automaton.LevenshteinAutomata; /** @@ -371,7 +372,10 @@ public class DirectSpellChecker { int index = suggestions.length - 1; for (ScoreTerm s : terms) { SuggestWord suggestion = new SuggestWord(); - suggestion.string = s.termAsString != null ? s.termAsString : s.term.utf8ToChars(spare).toString(); + if (s.termAsString == null) { + UnicodeUtil.UTF8toUTF16(s.term, spare); + s.termAsString = spare.toString(); + } suggestion.score = s.score; suggestion.freq = s.docfreq; suggestions[index--] = suggestion; @@ -428,7 +432,8 @@ public class DirectSpellChecker { // undo FuzzyTermsEnum's scale factor for a real scaled lev score score = boost / e.getScaleFactor() + e.getMinSimilarity(); } else { - termAsString = candidateTerm.utf8ToChars(spare).toString(); + UnicodeUtil.UTF8toUTF16(candidateTerm, spare); + termAsString = spare.toString(); score = distance.getDistance(term.text(), termAsString); } diff --git a/modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java b/modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java index f339924cdf6..c8672538249 100644 --- a/modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java +++ b/modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java @@ -26,6 +26,7 @@ import org.apache.lucene.index.Terms; import org.apache.lucene.index.MultiFields; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.UnicodeUtil; /** * HighFrequencyDictionary: terms taken from the given field @@ -89,7 +90,12 @@ public class HighFrequencyDictionary implements Dictionary { } hasNextCalled = false; - return (actualTerm != null) ? actualTerm.utf8ToChars(spare).toString() : null; + if (actualTerm == null) { + return null; + } else { + UnicodeUtil.UTF8toUTF16(actualTerm, spare); + return spare.toString(); + } } public boolean hasNext() { diff --git a/modules/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java b/modules/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java index 3f0d56ccacd..894dc0cfdea 100755 --- a/modules/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java +++ b/modules/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java @@ -24,6 +24,7 @@ import java.util.Iterator; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; +import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.index.Terms; import org.apache.lucene.index.MultiFields; @@ -75,7 +76,7 @@ public class LuceneDictionary implements Dictionary { return null; } - final String result = pendingTerm.utf8ToChars(spare).toString(); + UnicodeUtil.UTF8toUTF16(pendingTerm, spare); try { pendingTerm = termsEnum.next(); @@ -83,7 +84,7 @@ public class LuceneDictionary implements Dictionary { throw new RuntimeException(e); } - return result; + return spare.toString(); } public boolean hasNext() { diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index 56660898f56..841dbf26865 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -48,6 +48,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.PriorityQueue; +import org.apache.lucene.util.UnicodeUtil; import org.apache.solr.analysis.CharFilterFactory; import org.apache.solr.analysis.TokenFilterFactory; import org.apache.solr.analysis.TokenizerChain; @@ -273,7 +274,8 @@ public class LukeRequestHandler extends RequestHandlerBase BytesRef text; while((text = termsEnum.next()) != null) { final int freq = (int) termsEnum.totalTermFreq(); - tfv.add( text.utf8ToChars(spare).toString(), freq ); + UnicodeUtil.UTF8toUTF16(text, spare); + tfv.add(spare.toString(), freq); } f.add( "termVector", tfv ); } @@ -649,7 +651,8 @@ public class LukeRequestHandler extends RequestHandlerBase TermsEnum termsEnum = terms.iterator(null); BytesRef text; while((text = termsEnum.next()) != null) { - String t = text.utf8ToChars(spare).toString(); + UnicodeUtil.UTF8toUTF16(text, spare); + String t = spare.toString(); // Compute distinct terms for every field TopTermQueue tiq = info.get( field ); diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java index 9861860553c..1b110d237d6 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java @@ -30,6 +30,7 @@ import org.apache.lucene.search.grouping.TopGroups; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.ReaderUtil; +import org.apache.lucene.util.UnicodeUtil; import org.apache.solr.cloud.CloudDescriptor; import org.apache.solr.cloud.ZkController; import org.apache.solr.common.SolrDocument; @@ -605,7 +606,8 @@ public class QueryComponent extends SearchComponent // String field in Lucene, which returns the terms // data as BytesRef: if (val instanceof BytesRef) { - field.setValue(((BytesRef)val).utf8ToChars(spare).toString()); + UnicodeUtil.UTF8toUTF16((BytesRef)val, spare); + field.setValue(spare.toString()); val = ft.toObject(field); } diff --git a/solr/core/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java b/solr/core/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java index 24d982b3a3c..8dfebd9a1fd 100755 --- a/solr/core/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java +++ b/solr/core/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java @@ -366,7 +366,8 @@ class CountSortedFacetCollector extends FacetCollector { // NOTE: we use c>min rather than c>=min as an optimization because we are going in // index order, so we already know that the keys are ordered. This can be very // important if a lot of the counts are repeated (like zero counts would be). - queue.add(new SimpleFacets.CountPair(term.utf8ToChars(spare).toString(), count)); + UnicodeUtil.UTF8toUTF16(term, spare); + queue.add(new SimpleFacets.CountPair(spare.toString(), count)); if (queue.size()>=maxsize) min=queue.last().val; } return false; @@ -414,7 +415,8 @@ class IndexSortedFacetCollector extends FacetCollector { } if (limit > 0) { - res.add(term.utf8ToChars(spare).toString(), count); + UnicodeUtil.UTF8toUTF16(term, spare); + res.add(spare.toString(), count); limit--; } diff --git a/solr/core/src/java/org/apache/solr/schema/DateField.java b/solr/core/src/java/org/apache/solr/schema/DateField.java index 7eb443051bc..530169a8da1 100644 --- a/solr/core/src/java/org/apache/solr/schema/DateField.java +++ b/solr/core/src/java/org/apache/solr/schema/DateField.java @@ -28,6 +28,7 @@ import org.apache.lucene.queries.function.docvalues.StringIndexDocValues; import org.apache.lucene.queries.function.valuesource.FieldCacheSource; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; +import org.apache.lucene.util.UnicodeUtil; import org.apache.solr.common.SolrException; import org.apache.solr.common.util.DateUtil; import org.apache.solr.request.SolrQueryRequest; @@ -205,7 +206,7 @@ public class DateField extends FieldType { @Override public CharsRef indexedToReadable(BytesRef input, CharsRef charsRef) { - input.utf8ToChars(charsRef); + UnicodeUtil.UTF8toUTF16(input, charsRef); charsRef.append(Z_ARRAY, 0, 1); return charsRef; } diff --git a/solr/core/src/java/org/apache/solr/schema/FieldType.java b/solr/core/src/java/org/apache/solr/schema/FieldType.java index 2bdbeef0c2e..77214b01a82 100644 --- a/solr/core/src/java/org/apache/solr/schema/FieldType.java +++ b/solr/core/src/java/org/apache/solr/schema/FieldType.java @@ -349,7 +349,7 @@ public abstract class FieldType extends FieldProperties { /** Given an indexed term, append the human readable representation*/ public CharsRef indexedToReadable(BytesRef input, CharsRef output) { - input.utf8ToChars(output); + UnicodeUtil.UTF8toUTF16(input, output); return output; } diff --git a/solr/core/src/java/org/apache/solr/schema/SortableDoubleField.java b/solr/core/src/java/org/apache/solr/schema/SortableDoubleField.java index 32da3f95716..9bd93499b69 100644 --- a/solr/core/src/java/org/apache/solr/schema/SortableDoubleField.java +++ b/solr/core/src/java/org/apache/solr/schema/SortableDoubleField.java @@ -24,6 +24,7 @@ import org.apache.lucene.queries.function.valuesource.FieldCacheSource; import org.apache.lucene.search.SortField; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; +import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.mutable.MutableValue; import org.apache.lucene.util.mutable.MutableValueDouble; import org.apache.solr.search.QParser; @@ -79,7 +80,7 @@ public class SortableDoubleField extends FieldType { @Override public CharsRef indexedToReadable(BytesRef input, CharsRef charsRef) { // TODO: this could be more efficient, but the sortable types should be deprecated instead - input.utf8ToChars(charsRef); + UnicodeUtil.UTF8toUTF16(input, charsRef); final char[] indexedToReadable = indexedToReadable(charsRef.toString()).toCharArray(); charsRef.copyChars(indexedToReadable, 0, indexedToReadable.length); return charsRef; diff --git a/solr/core/src/java/org/apache/solr/schema/SortableFloatField.java b/solr/core/src/java/org/apache/solr/schema/SortableFloatField.java index a3a739b0029..1ed0edf5074 100644 --- a/solr/core/src/java/org/apache/solr/schema/SortableFloatField.java +++ b/solr/core/src/java/org/apache/solr/schema/SortableFloatField.java @@ -24,6 +24,7 @@ import org.apache.lucene.queries.function.valuesource.FieldCacheSource; import org.apache.lucene.search.SortField; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; +import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.mutable.MutableValue; import org.apache.lucene.util.mutable.MutableValueFloat; import org.apache.solr.search.QParser; @@ -78,7 +79,8 @@ public class SortableFloatField extends FieldType { public CharsRef indexedToReadable(BytesRef input, CharsRef charsRef) { // TODO: this could be more efficient, but the sortable types should be deprecated instead - final char[] indexedToReadable = indexedToReadable(input.utf8ToChars(charsRef).toString()).toCharArray(); + UnicodeUtil.UTF8toUTF16(input, charsRef); + final char[] indexedToReadable = indexedToReadable(charsRef.toString()).toCharArray(); charsRef.copyChars(indexedToReadable, 0, indexedToReadable.length); return charsRef; } diff --git a/solr/core/src/java/org/apache/solr/schema/SortableIntField.java b/solr/core/src/java/org/apache/solr/schema/SortableIntField.java index d8c6a35abc6..834bd645f47 100644 --- a/solr/core/src/java/org/apache/solr/schema/SortableIntField.java +++ b/solr/core/src/java/org/apache/solr/schema/SortableIntField.java @@ -24,6 +24,7 @@ import org.apache.lucene.queries.function.valuesource.FieldCacheSource; import org.apache.lucene.search.SortField; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; +import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.mutable.MutableValue; import org.apache.lucene.util.mutable.MutableValueInt; import org.apache.solr.search.QParser; @@ -76,7 +77,8 @@ public class SortableIntField extends FieldType { public CharsRef indexedToReadable(BytesRef input, CharsRef charsRef) { // TODO: this could be more efficient, but the sortable types should be deprecated instead - final char[] indexedToReadable = indexedToReadable(input.utf8ToChars(charsRef).toString()).toCharArray(); + UnicodeUtil.UTF8toUTF16(input, charsRef); + final char[] indexedToReadable = indexedToReadable(charsRef.toString()).toCharArray(); charsRef.copyChars(indexedToReadable, 0, indexedToReadable.length); return charsRef; } diff --git a/solr/core/src/java/org/apache/solr/schema/SortableLongField.java b/solr/core/src/java/org/apache/solr/schema/SortableLongField.java index 50feceb7ba6..0e0b3a52906 100644 --- a/solr/core/src/java/org/apache/solr/schema/SortableLongField.java +++ b/solr/core/src/java/org/apache/solr/schema/SortableLongField.java @@ -24,6 +24,7 @@ import org.apache.lucene.queries.function.valuesource.FieldCacheSource; import org.apache.lucene.search.SortField; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; +import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.mutable.MutableValue; import org.apache.lucene.util.mutable.MutableValueLong; import org.apache.solr.search.QParser; @@ -68,7 +69,8 @@ public class SortableLongField extends FieldType { public CharsRef indexedToReadable(BytesRef input, CharsRef charsRef) { // TODO: this could be more efficient, but the sortable types should be deprecated instead - final char[] indexedToReadable = indexedToReadable(input.utf8ToChars(charsRef).toString()).toCharArray(); + UnicodeUtil.UTF8toUTF16(input, charsRef); + final char[] indexedToReadable = indexedToReadable(charsRef.toString()).toCharArray(); charsRef.copyChars(indexedToReadable, 0, indexedToReadable.length); return charsRef; } diff --git a/solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/SearchGroupsResultTransformer.java b/solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/SearchGroupsResultTransformer.java index 8225d87e38a..2cb61f3ad0b 100644 --- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/SearchGroupsResultTransformer.java +++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/SearchGroupsResultTransformer.java @@ -21,6 +21,7 @@ import org.apache.lucene.search.Sort; import org.apache.lucene.search.grouping.SearchGroup; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; +import org.apache.lucene.util.UnicodeUtil; import org.apache.solr.common.util.NamedList; import org.apache.solr.schema.FieldType; import org.apache.solr.schema.SchemaField; @@ -99,7 +100,8 @@ public class SearchGroupsResultTransformer implements ShardResultTransformer