LUCENE-3590: nuke BytesRef.utf8ToChars

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1206174 13f79535-47bb-0310-9956-ffa450edef68
2011-11-25 13:55:41 +00:00 · 2011-11-25 13:55:41 +00:00 · 7f766cf603
parent 233a289a66
commit 7f766cf603
19 changed files with 62 additions and 29 deletions
--- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java
+++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java
@ -29,6 +29,7 @@ import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.UnicodeUtil;

 /**
 * <code>FieldTermStack</code> is a stack that keeps query terms in the specified field
@ -95,7 +96,8 @@ public class FieldTermStack {
    DocsAndPositionsEnum dpEnum = null;
    BytesRef text;
    while ((text = termsEnum.next()) != null) {
-      final String term = text.utf8ToChars(spare).toString();
+      UnicodeUtil.UTF8toUTF16(text, spare);
+      final String term = spare.toString();
      if (!termSet.contains(term)) {
        continue;
      }
--- a/lucene/src/java/org/apache/lucene/util/BytesRef.java
+++ b/lucene/src/java/org/apache/lucene/util/BytesRef.java
@ -165,12 +165,6 @@ public final class BytesRef implements Comparable<BytesRef>,Cloneable {
    UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref);
    return ref.toString(); 
  }
-  
-  /** Interprets stored bytes as UTF8 bytes into the given {@link CharsRef} */
-  public CharsRef utf8ToChars(CharsRef ref) {
-    UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref);
-    return ref;
-  }

  /** Returns hex encoded bytes, eg [0x6c 0x75 0x63 0x65 0x6e 0x65] */
  @Override
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
@ -25,6 +25,7 @@ import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.core.StopFilter;
 import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util.Version;
 import org.apache.lucene.util.BytesRef;

@ -158,7 +159,8 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper {
        BytesRef text;
        while ((text = te.next()) != null) {
          if (te.docFreq() > maxDocFreq) {
-            stopWords.add(text.utf8ToChars(spare).toString());
+            UnicodeUtil.UTF8toUTF16(text, spare);
+            stopWords.add(spare.toString());
          }
        }
      }
--- a/modules/queries/src/java/org/apache/lucene/queries/function/docvalues/StringIndexDocValues.java
+++ b/modules/queries/src/java/org/apache/lucene/queries/function/docvalues/StringIndexDocValues.java
@ -25,6 +25,7 @@ import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util.mutable.MutableValue;
 import org.apache.lucene.util.mutable.MutableValueStr;

@ -77,7 +78,7 @@ public abstract class StringIndexDocValues extends DocValues {
    int ord=termsIndex.getOrd(doc);
    if (ord==0) return null;
    termsIndex.lookup(ord, spare);
-    spare.utf8ToChars(spareChars);
+    UnicodeUtil.UTF8toUTF16(spare, spareChars);
    return spareChars.toString();
  }

--- a/modules/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java
+++ b/modules/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java
@ -34,6 +34,7 @@ import org.apache.lucene.search.similarities.TFIDFSimilarity;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.PriorityQueue;
+import org.apache.lucene.util.UnicodeUtil;


 /**
@ -740,7 +741,8 @@ public final class MoreLikeThis {
    final CharsRef spare = new CharsRef();
    BytesRef text;
    while((text = termsEnum.next()) != null) {
-      final String term = text.utf8ToChars(spare).toString();
+      UnicodeUtil.UTF8toUTF16(text, spare);
+      final String term = spare.toString();
      if (isNoiseWord(term)) {
        continue;
      }
--- a/modules/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java
@ -36,6 +36,7 @@ import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util.automaton.LevenshteinAutomata;

 /**
@ -371,7 +372,10 @@ public class DirectSpellChecker {
    int index = suggestions.length - 1;
    for (ScoreTerm s : terms) {
      SuggestWord suggestion = new SuggestWord();
-      suggestion.string = s.termAsString != null ? s.termAsString : s.term.utf8ToChars(spare).toString();
+      if (s.termAsString == null) {
+        UnicodeUtil.UTF8toUTF16(s.term, spare);
+        s.termAsString = spare.toString();
+      }
      suggestion.score = s.score;
      suggestion.freq = s.docfreq;
      suggestions[index--] = suggestion;
@ -428,7 +432,8 @@ public class DirectSpellChecker {
        // undo FuzzyTermsEnum's scale factor for a real scaled lev score
        score = boost / e.getScaleFactor() + e.getMinSimilarity();
      } else {
-        termAsString = candidateTerm.utf8ToChars(spare).toString();
+        UnicodeUtil.UTF8toUTF16(candidateTerm, spare);
+        termAsString = spare.toString();
        score = distance.getDistance(term.text(), termAsString);
      }
      
--- a/modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java
@ -26,6 +26,7 @@ import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.UnicodeUtil;

 /**
 * HighFrequencyDictionary: terms taken from the given field
@ -89,7 +90,12 @@ public class HighFrequencyDictionary implements Dictionary {
      }
      hasNextCalled = false;

-      return (actualTerm != null) ? actualTerm.utf8ToChars(spare).toString() : null;
+      if (actualTerm == null) {
+        return null;
+      } else {
+        UnicodeUtil.UTF8toUTF16(actualTerm, spare);
+        return spare.toString();
+      }
    }

    public boolean hasNext() {
--- a/modules/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java
@ -24,6 +24,7 @@ import java.util.Iterator;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.MultiFields;

@ -75,7 +76,7 @@ public class LuceneDictionary implements Dictionary {
        return null;
      }

-      final String result = pendingTerm.utf8ToChars(spare).toString();
+      UnicodeUtil.UTF8toUTF16(pendingTerm, spare);

      try {
        pendingTerm = termsEnum.next();
@ -83,7 +84,7 @@ public class LuceneDictionary implements Dictionary {
        throw new RuntimeException(e);
      }

-      return result;
+      return spare.toString();
    }

    public boolean hasNext() {
--- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
@ -48,6 +48,7 @@ import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.PriorityQueue;
+import org.apache.lucene.util.UnicodeUtil;
 import org.apache.solr.analysis.CharFilterFactory;
 import org.apache.solr.analysis.TokenFilterFactory;
 import org.apache.solr.analysis.TokenizerChain;
@ -273,7 +274,8 @@ public class LukeRequestHandler extends RequestHandlerBase
            BytesRef text;
            while((text = termsEnum.next()) != null) {
              final int freq = (int) termsEnum.totalTermFreq();
-              tfv.add( text.utf8ToChars(spare).toString(), freq );
+              UnicodeUtil.UTF8toUTF16(text, spare);
+              tfv.add(spare.toString(), freq);
            }
            f.add( "termVector", tfv );
          }
@ -649,7 +651,8 @@ public class LukeRequestHandler extends RequestHandlerBase
        TermsEnum termsEnum = terms.iterator(null);
        BytesRef text;
        while((text = termsEnum.next()) != null) {
-          String t = text.utf8ToChars(spare).toString();
+          UnicodeUtil.UTF8toUTF16(text, spare);
+          String t = spare.toString();
  
          // Compute distinct terms for every field
          TopTermQueue tiq = info.get( field );
--- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
@ -30,6 +30,7 @@ import org.apache.lucene.search.grouping.TopGroups;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.ReaderUtil;
+import org.apache.lucene.util.UnicodeUtil;
 import org.apache.solr.cloud.CloudDescriptor;
 import org.apache.solr.cloud.ZkController;
 import org.apache.solr.common.SolrDocument;
@ -605,7 +606,8 @@ public class QueryComponent extends SearchComponent
          // String field in Lucene, which returns the terms
          // data as BytesRef:
          if (val instanceof BytesRef) {
-            field.setValue(((BytesRef)val).utf8ToChars(spare).toString());
+            UnicodeUtil.UTF8toUTF16((BytesRef)val, spare);
+            field.setValue(spare.toString());
            val = ft.toObject(field);
          }

--- a/solr/core/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java
+++ b/solr/core/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java
@ -366,7 +366,8 @@ class CountSortedFacetCollector extends FacetCollector {
      // NOTE: we use c>min rather than c>=min as an optimization because we are going in
      // index order, so we already know that the keys are ordered.  This can be very
      // important if a lot of the counts are repeated (like zero counts would be).
-      queue.add(new SimpleFacets.CountPair<String,Integer>(term.utf8ToChars(spare).toString(), count));
+      UnicodeUtil.UTF8toUTF16(term, spare);
+      queue.add(new SimpleFacets.CountPair<String,Integer>(spare.toString(), count));
      if (queue.size()>=maxsize) min=queue.last().val;
    }
    return false;
@ -414,7 +415,8 @@ class IndexSortedFacetCollector extends FacetCollector {
    }

    if (limit > 0) {
-      res.add(term.utf8ToChars(spare).toString(), count);
+      UnicodeUtil.UTF8toUTF16(term, spare);
+      res.add(spare.toString(), count);
      limit--;
    }

--- a/solr/core/src/java/org/apache/solr/schema/DateField.java
+++ b/solr/core/src/java/org/apache/solr/schema/DateField.java
@ -28,6 +28,7 @@ import org.apache.lucene.queries.function.docvalues.StringIndexDocValues;
 import org.apache.lucene.queries.function.valuesource.FieldCacheSource;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.UnicodeUtil;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.util.DateUtil;
 import org.apache.solr.request.SolrQueryRequest;
@ -205,7 +206,7 @@ public class DateField extends FieldType {

  @Override
  public CharsRef indexedToReadable(BytesRef input, CharsRef charsRef) {
-    input.utf8ToChars(charsRef);
+    UnicodeUtil.UTF8toUTF16(input, charsRef);
    charsRef.append(Z_ARRAY, 0, 1);
    return charsRef;
  }
--- a/solr/core/src/java/org/apache/solr/schema/FieldType.java
+++ b/solr/core/src/java/org/apache/solr/schema/FieldType.java
@ -349,7 +349,7 @@ public abstract class FieldType extends FieldProperties {

  /** Given an indexed term, append the human readable representation*/
  public CharsRef indexedToReadable(BytesRef input, CharsRef output) {
-    input.utf8ToChars(output);
+    UnicodeUtil.UTF8toUTF16(input, output);
    return output;
  }

--- a/solr/core/src/java/org/apache/solr/schema/SortableDoubleField.java
+++ b/solr/core/src/java/org/apache/solr/schema/SortableDoubleField.java
@ -24,6 +24,7 @@ import org.apache.lucene.queries.function.valuesource.FieldCacheSource;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util.mutable.MutableValue;
 import org.apache.lucene.util.mutable.MutableValueDouble;
 import org.apache.solr.search.QParser;
@ -79,7 +80,7 @@ public class SortableDoubleField extends FieldType {
  @Override
  public CharsRef indexedToReadable(BytesRef input, CharsRef charsRef) {
    // TODO: this could be more efficient, but the sortable types should be deprecated instead
-    input.utf8ToChars(charsRef);
+    UnicodeUtil.UTF8toUTF16(input, charsRef);
    final char[] indexedToReadable = indexedToReadable(charsRef.toString()).toCharArray();
    charsRef.copyChars(indexedToReadable, 0, indexedToReadable.length);
    return charsRef;
--- a/solr/core/src/java/org/apache/solr/schema/SortableFloatField.java
+++ b/solr/core/src/java/org/apache/solr/schema/SortableFloatField.java
@ -24,6 +24,7 @@ import org.apache.lucene.queries.function.valuesource.FieldCacheSource;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util.mutable.MutableValue;
 import org.apache.lucene.util.mutable.MutableValueFloat;
 import org.apache.solr.search.QParser;
@ -78,7 +79,8 @@ public class SortableFloatField extends FieldType {

  public CharsRef indexedToReadable(BytesRef input, CharsRef charsRef) {
    // TODO: this could be more efficient, but the sortable types should be deprecated instead
-    final char[] indexedToReadable = indexedToReadable(input.utf8ToChars(charsRef).toString()).toCharArray();
+    UnicodeUtil.UTF8toUTF16(input, charsRef);
+    final char[] indexedToReadable = indexedToReadable(charsRef.toString()).toCharArray();
    charsRef.copyChars(indexedToReadable, 0, indexedToReadable.length);
    return charsRef;
  }
--- a/solr/core/src/java/org/apache/solr/schema/SortableIntField.java
+++ b/solr/core/src/java/org/apache/solr/schema/SortableIntField.java
@ -24,6 +24,7 @@ import org.apache.lucene.queries.function.valuesource.FieldCacheSource;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util.mutable.MutableValue;
 import org.apache.lucene.util.mutable.MutableValueInt;
 import org.apache.solr.search.QParser;
@ -76,7 +77,8 @@ public class SortableIntField extends FieldType {

  public CharsRef indexedToReadable(BytesRef input, CharsRef charsRef) {
    // TODO: this could be more efficient, but the sortable types should be deprecated instead
-    final char[] indexedToReadable = indexedToReadable(input.utf8ToChars(charsRef).toString()).toCharArray();
+    UnicodeUtil.UTF8toUTF16(input, charsRef);
+    final char[] indexedToReadable = indexedToReadable(charsRef.toString()).toCharArray();
    charsRef.copyChars(indexedToReadable, 0, indexedToReadable.length);
    return charsRef;
  }
--- a/solr/core/src/java/org/apache/solr/schema/SortableLongField.java
+++ b/solr/core/src/java/org/apache/solr/schema/SortableLongField.java
@ -24,6 +24,7 @@ import org.apache.lucene.queries.function.valuesource.FieldCacheSource;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util.mutable.MutableValue;
 import org.apache.lucene.util.mutable.MutableValueLong;
 import org.apache.solr.search.QParser;
@ -68,7 +69,8 @@ public class SortableLongField extends FieldType {

  public CharsRef indexedToReadable(BytesRef input, CharsRef charsRef) {
    // TODO: this could be more efficient, but the sortable types should be deprecated instead
-    final char[] indexedToReadable = indexedToReadable(input.utf8ToChars(charsRef).toString()).toCharArray();
+    UnicodeUtil.UTF8toUTF16(input, charsRef);
+    final char[] indexedToReadable = indexedToReadable(charsRef.toString()).toCharArray();
    charsRef.copyChars(indexedToReadable, 0, indexedToReadable.length);
    return charsRef;
  }
--- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/SearchGroupsResultTransformer.java
+++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/SearchGroupsResultTransformer.java
@ -21,6 +21,7 @@ import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.grouping.SearchGroup;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.UnicodeUtil;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.schema.FieldType;
 import org.apache.solr.schema.SchemaField;
@ -99,7 +100,8 @@ public class SearchGroupsResultTransformer implements ShardResultTransformer<Lis
        if (field != null) {
          FieldType fieldType = field.getType();
          if (sortValue instanceof BytesRef) {
-            String indexedValue = ((BytesRef) sortValue).utf8ToChars(spare).toString();
+            UnicodeUtil.UTF8toUTF16((BytesRef)sortValue, spare);
+            String indexedValue = spare.toString();
            sortValue = (Comparable) fieldType.toObject(field.createField(fieldType.indexedToReadable(indexedValue), 0.0f));
          } else if (sortValue instanceof String) {
            sortValue = (Comparable) fieldType.toObject(field.createField(fieldType.indexedToReadable((String) sortValue), 0.0f));
--- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/TopGroupsResultTransformer.java
+++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/TopGroupsResultTransformer.java
@ -27,6 +27,7 @@ import org.apache.lucene.search.grouping.GroupDocs;
 import org.apache.lucene.search.grouping.TopGroups;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.UnicodeUtil;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.handler.component.ResponseBuilder;
 import org.apache.solr.handler.component.ShardDoc;
@ -200,7 +201,8 @@ public class TopGroupsResultTransformer implements ShardResultTransformer<List<C
          if (field != null) {
            FieldType fieldType = field.getType();
            if (sortValue instanceof BytesRef) {
-              String indexedValue = ((BytesRef) sortValue).utf8ToChars(spare).toString();
+              UnicodeUtil.UTF8toUTF16((BytesRef)sortValue, spare);
+              String indexedValue = spare.toString();
              sortValue = fieldType.toObject(field.createField(fieldType.indexedToReadable(indexedValue), 0.0f));
            } else if (sortValue instanceof String) {
              sortValue = fieldType.toObject(field.createField(fieldType.indexedToReadable((String) sortValue), 0.0f));
@ -252,7 +254,8 @@ public class TopGroupsResultTransformer implements ShardResultTransformer<List<C
        if (field != null) {
          FieldType fieldType = field.getType();
          if (sortValue instanceof BytesRef) {
-            String indexedValue = ((BytesRef) sortValue).utf8ToChars(spare).toString();
+            UnicodeUtil.UTF8toUTF16((BytesRef)sortValue, spare);
+            String indexedValue = spare.toString();
            sortValue = fieldType.toObject(field.createField(fieldType.indexedToReadable(indexedValue), 0.0f));
          } else if (sortValue instanceof String) {
            sortValue = fieldType.toObject(field.createField(fieldType.indexedToReadable((String) sortValue), 0.0f));