LUCENE-3590: nuke BytesRef.utf8ToChars

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1206174 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-11-25 13:55:41 +00:00
parent 233a289a66
commit 7f766cf603
19 changed files with 62 additions and 29 deletions

View File

@ -29,6 +29,7 @@ import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil;
/**
* <code>FieldTermStack</code> is a stack that keeps query terms in the specified field
@ -95,7 +96,8 @@ public class FieldTermStack {
DocsAndPositionsEnum dpEnum = null;
BytesRef text;
while ((text = termsEnum.next()) != null) {
final String term = text.utf8ToChars(spare).toString();
UnicodeUtil.UTF8toUTF16(text, spare);
final String term = spare.toString();
if (!termSet.contains(term)) {
continue;
}

View File

@ -166,12 +166,6 @@ public final class BytesRef implements Comparable<BytesRef>,Cloneable {
return ref.toString();
}
/** Interprets stored bytes as UTF8 bytes into the given {@link CharsRef} */
public CharsRef utf8ToChars(CharsRef ref) {
UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref);
return ref;
}
/** Returns hex encoded bytes, eg [0x6c 0x75 0x63 0x65 0x6e 0x65] */
@Override
public String toString() {

View File

@ -25,6 +25,7 @@ import org.apache.lucene.index.MultiFields;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.BytesRef;
@ -158,7 +159,8 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper {
BytesRef text;
while ((text = te.next()) != null) {
if (te.docFreq() > maxDocFreq) {
stopWords.add(text.utf8ToChars(spare).toString());
UnicodeUtil.UTF8toUTF16(text, spare);
stopWords.add(spare.toString());
}
}
}

View File

@ -25,6 +25,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.mutable.MutableValue;
import org.apache.lucene.util.mutable.MutableValueStr;
@ -77,7 +78,7 @@ public abstract class StringIndexDocValues extends DocValues {
int ord=termsIndex.getOrd(doc);
if (ord==0) return null;
termsIndex.lookup(ord, spare);
spare.utf8ToChars(spareChars);
UnicodeUtil.UTF8toUTF16(spare, spareChars);
return spareChars.toString();
}

View File

@ -34,6 +34,7 @@ import org.apache.lucene.search.similarities.TFIDFSimilarity;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.UnicodeUtil;
/**
@ -740,7 +741,8 @@ public final class MoreLikeThis {
final CharsRef spare = new CharsRef();
BytesRef text;
while((text = termsEnum.next()) != null) {
final String term = text.utf8ToChars(spare).toString();
UnicodeUtil.UTF8toUTF16(text, spare);
final String term = spare.toString();
if (isNoiseWord(term)) {
continue;
}

View File

@ -36,6 +36,7 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.automaton.LevenshteinAutomata;
/**
@ -371,7 +372,10 @@ public class DirectSpellChecker {
int index = suggestions.length - 1;
for (ScoreTerm s : terms) {
SuggestWord suggestion = new SuggestWord();
suggestion.string = s.termAsString != null ? s.termAsString : s.term.utf8ToChars(spare).toString();
if (s.termAsString == null) {
UnicodeUtil.UTF8toUTF16(s.term, spare);
s.termAsString = spare.toString();
}
suggestion.score = s.score;
suggestion.freq = s.docfreq;
suggestions[index--] = suggestion;
@ -428,7 +432,8 @@ public class DirectSpellChecker {
// undo FuzzyTermsEnum's scale factor for a real scaled lev score
score = boost / e.getScaleFactor() + e.getMinSimilarity();
} else {
termAsString = candidateTerm.utf8ToChars(spare).toString();
UnicodeUtil.UTF8toUTF16(candidateTerm, spare);
termAsString = spare.toString();
score = distance.getDistance(term.text(), termAsString);
}

View File

@ -26,6 +26,7 @@ import org.apache.lucene.index.Terms;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.UnicodeUtil;
/**
* HighFrequencyDictionary: terms taken from the given field
@ -89,7 +90,12 @@ public class HighFrequencyDictionary implements Dictionary {
}
hasNextCalled = false;
return (actualTerm != null) ? actualTerm.utf8ToChars(spare).toString() : null;
if (actualTerm == null) {
return null;
} else {
UnicodeUtil.UTF8toUTF16(actualTerm, spare);
return spare.toString();
}
}
public boolean hasNext() {

View File

@ -24,6 +24,7 @@ import java.util.Iterator;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.MultiFields;
@ -75,7 +76,7 @@ public class LuceneDictionary implements Dictionary {
return null;
}
final String result = pendingTerm.utf8ToChars(spare).toString();
UnicodeUtil.UTF8toUTF16(pendingTerm, spare);
try {
pendingTerm = termsEnum.next();
@ -83,7 +84,7 @@ public class LuceneDictionary implements Dictionary {
throw new RuntimeException(e);
}
return result;
return spare.toString();
}
public boolean hasNext() {

View File

@ -48,6 +48,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.solr.analysis.CharFilterFactory;
import org.apache.solr.analysis.TokenFilterFactory;
import org.apache.solr.analysis.TokenizerChain;
@ -273,7 +274,8 @@ public class LukeRequestHandler extends RequestHandlerBase
BytesRef text;
while((text = termsEnum.next()) != null) {
final int freq = (int) termsEnum.totalTermFreq();
tfv.add( text.utf8ToChars(spare).toString(), freq );
UnicodeUtil.UTF8toUTF16(text, spare);
tfv.add(spare.toString(), freq);
}
f.add( "termVector", tfv );
}
@ -649,7 +651,8 @@ public class LukeRequestHandler extends RequestHandlerBase
TermsEnum termsEnum = terms.iterator(null);
BytesRef text;
while((text = termsEnum.next()) != null) {
String t = text.utf8ToChars(spare).toString();
UnicodeUtil.UTF8toUTF16(text, spare);
String t = spare.toString();
// Compute distinct terms for every field
TopTermQueue tiq = info.get( field );

View File

@ -30,6 +30,7 @@ import org.apache.lucene.search.grouping.TopGroups;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.solr.cloud.CloudDescriptor;
import org.apache.solr.cloud.ZkController;
import org.apache.solr.common.SolrDocument;
@ -605,7 +606,8 @@ public class QueryComponent extends SearchComponent
// String field in Lucene, which returns the terms
// data as BytesRef:
if (val instanceof BytesRef) {
field.setValue(((BytesRef)val).utf8ToChars(spare).toString());
UnicodeUtil.UTF8toUTF16((BytesRef)val, spare);
field.setValue(spare.toString());
val = ft.toObject(field);
}

View File

@ -366,7 +366,8 @@ class CountSortedFacetCollector extends FacetCollector {
// NOTE: we use c>min rather than c>=min as an optimization because we are going in
// index order, so we already know that the keys are ordered. This can be very
// important if a lot of the counts are repeated (like zero counts would be).
queue.add(new SimpleFacets.CountPair<String,Integer>(term.utf8ToChars(spare).toString(), count));
UnicodeUtil.UTF8toUTF16(term, spare);
queue.add(new SimpleFacets.CountPair<String,Integer>(spare.toString(), count));
if (queue.size()>=maxsize) min=queue.last().val;
}
return false;
@ -414,7 +415,8 @@ class IndexSortedFacetCollector extends FacetCollector {
}
if (limit > 0) {
res.add(term.utf8ToChars(spare).toString(), count);
UnicodeUtil.UTF8toUTF16(term, spare);
res.add(spare.toString(), count);
limit--;
}

View File

@ -28,6 +28,7 @@ import org.apache.lucene.queries.function.docvalues.StringIndexDocValues;
import org.apache.lucene.queries.function.valuesource.FieldCacheSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.DateUtil;
import org.apache.solr.request.SolrQueryRequest;
@ -205,7 +206,7 @@ public class DateField extends FieldType {
@Override
public CharsRef indexedToReadable(BytesRef input, CharsRef charsRef) {
input.utf8ToChars(charsRef);
UnicodeUtil.UTF8toUTF16(input, charsRef);
charsRef.append(Z_ARRAY, 0, 1);
return charsRef;
}

View File

@ -349,7 +349,7 @@ public abstract class FieldType extends FieldProperties {
/** Given an indexed term, append the human readable representation*/
public CharsRef indexedToReadable(BytesRef input, CharsRef output) {
input.utf8ToChars(output);
UnicodeUtil.UTF8toUTF16(input, output);
return output;
}

View File

@ -24,6 +24,7 @@ import org.apache.lucene.queries.function.valuesource.FieldCacheSource;
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.mutable.MutableValue;
import org.apache.lucene.util.mutable.MutableValueDouble;
import org.apache.solr.search.QParser;
@ -79,7 +80,7 @@ public class SortableDoubleField extends FieldType {
@Override
public CharsRef indexedToReadable(BytesRef input, CharsRef charsRef) {
// TODO: this could be more efficient, but the sortable types should be deprecated instead
input.utf8ToChars(charsRef);
UnicodeUtil.UTF8toUTF16(input, charsRef);
final char[] indexedToReadable = indexedToReadable(charsRef.toString()).toCharArray();
charsRef.copyChars(indexedToReadable, 0, indexedToReadable.length);
return charsRef;

View File

@ -24,6 +24,7 @@ import org.apache.lucene.queries.function.valuesource.FieldCacheSource;
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.mutable.MutableValue;
import org.apache.lucene.util.mutable.MutableValueFloat;
import org.apache.solr.search.QParser;
@ -78,7 +79,8 @@ public class SortableFloatField extends FieldType {
public CharsRef indexedToReadable(BytesRef input, CharsRef charsRef) {
// TODO: this could be more efficient, but the sortable types should be deprecated instead
final char[] indexedToReadable = indexedToReadable(input.utf8ToChars(charsRef).toString()).toCharArray();
UnicodeUtil.UTF8toUTF16(input, charsRef);
final char[] indexedToReadable = indexedToReadable(charsRef.toString()).toCharArray();
charsRef.copyChars(indexedToReadable, 0, indexedToReadable.length);
return charsRef;
}

View File

@ -24,6 +24,7 @@ import org.apache.lucene.queries.function.valuesource.FieldCacheSource;
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.mutable.MutableValue;
import org.apache.lucene.util.mutable.MutableValueInt;
import org.apache.solr.search.QParser;
@ -76,7 +77,8 @@ public class SortableIntField extends FieldType {
public CharsRef indexedToReadable(BytesRef input, CharsRef charsRef) {
// TODO: this could be more efficient, but the sortable types should be deprecated instead
final char[] indexedToReadable = indexedToReadable(input.utf8ToChars(charsRef).toString()).toCharArray();
UnicodeUtil.UTF8toUTF16(input, charsRef);
final char[] indexedToReadable = indexedToReadable(charsRef.toString()).toCharArray();
charsRef.copyChars(indexedToReadable, 0, indexedToReadable.length);
return charsRef;
}

View File

@ -24,6 +24,7 @@ import org.apache.lucene.queries.function.valuesource.FieldCacheSource;
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.mutable.MutableValue;
import org.apache.lucene.util.mutable.MutableValueLong;
import org.apache.solr.search.QParser;
@ -68,7 +69,8 @@ public class SortableLongField extends FieldType {
public CharsRef indexedToReadable(BytesRef input, CharsRef charsRef) {
// TODO: this could be more efficient, but the sortable types should be deprecated instead
final char[] indexedToReadable = indexedToReadable(input.utf8ToChars(charsRef).toString()).toCharArray();
UnicodeUtil.UTF8toUTF16(input, charsRef);
final char[] indexedToReadable = indexedToReadable(charsRef.toString()).toCharArray();
charsRef.copyChars(indexedToReadable, 0, indexedToReadable.length);
return charsRef;
}

View File

@ -21,6 +21,7 @@ import org.apache.lucene.search.Sort;
import org.apache.lucene.search.grouping.SearchGroup;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
@ -99,7 +100,8 @@ public class SearchGroupsResultTransformer implements ShardResultTransformer<Lis
if (field != null) {
FieldType fieldType = field.getType();
if (sortValue instanceof BytesRef) {
String indexedValue = ((BytesRef) sortValue).utf8ToChars(spare).toString();
UnicodeUtil.UTF8toUTF16((BytesRef)sortValue, spare);
String indexedValue = spare.toString();
sortValue = (Comparable) fieldType.toObject(field.createField(fieldType.indexedToReadable(indexedValue), 0.0f));
} else if (sortValue instanceof String) {
sortValue = (Comparable) fieldType.toObject(field.createField(fieldType.indexedToReadable((String) sortValue), 0.0f));

View File

@ -27,6 +27,7 @@ import org.apache.lucene.search.grouping.GroupDocs;
import org.apache.lucene.search.grouping.TopGroups;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.handler.component.ShardDoc;
@ -200,7 +201,8 @@ public class TopGroupsResultTransformer implements ShardResultTransformer<List<C
if (field != null) {
FieldType fieldType = field.getType();
if (sortValue instanceof BytesRef) {
String indexedValue = ((BytesRef) sortValue).utf8ToChars(spare).toString();
UnicodeUtil.UTF8toUTF16((BytesRef)sortValue, spare);
String indexedValue = spare.toString();
sortValue = fieldType.toObject(field.createField(fieldType.indexedToReadable(indexedValue), 0.0f));
} else if (sortValue instanceof String) {
sortValue = fieldType.toObject(field.createField(fieldType.indexedToReadable((String) sortValue), 0.0f));
@ -252,7 +254,8 @@ public class TopGroupsResultTransformer implements ShardResultTransformer<List<C
if (field != null) {
FieldType fieldType = field.getType();
if (sortValue instanceof BytesRef) {
String indexedValue = ((BytesRef) sortValue).utf8ToChars(spare).toString();
UnicodeUtil.UTF8toUTF16((BytesRef)sortValue, spare);
String indexedValue = spare.toString();
sortValue = fieldType.toObject(field.createField(fieldType.indexedToReadable(indexedValue), 0.0f));
} else if (sortValue instanceof String) {
sortValue = fieldType.toObject(field.createField(fieldType.indexedToReadable((String) sortValue), 0.0f));