mirror of https://github.com/apache/lucene.git
LUCENE-3590: nuke BytesRef.utf8ToChars
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1206174 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
233a289a66
commit
7f766cf603
|
@ -29,6 +29,7 @@ import org.apache.lucene.index.Terms;
|
|||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
|
||||
/**
|
||||
* <code>FieldTermStack</code> is a stack that keeps query terms in the specified field
|
||||
|
@ -95,7 +96,8 @@ public class FieldTermStack {
|
|||
DocsAndPositionsEnum dpEnum = null;
|
||||
BytesRef text;
|
||||
while ((text = termsEnum.next()) != null) {
|
||||
final String term = text.utf8ToChars(spare).toString();
|
||||
UnicodeUtil.UTF8toUTF16(text, spare);
|
||||
final String term = spare.toString();
|
||||
if (!termSet.contains(term)) {
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -165,12 +165,6 @@ public final class BytesRef implements Comparable<BytesRef>,Cloneable {
|
|||
UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref);
|
||||
return ref.toString();
|
||||
}
|
||||
|
||||
/** Interprets stored bytes as UTF8 bytes into the given {@link CharsRef} */
|
||||
public CharsRef utf8ToChars(CharsRef ref) {
|
||||
UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref);
|
||||
return ref;
|
||||
}
|
||||
|
||||
/** Returns hex encoded bytes, eg [0x6c 0x75 0x63 0x65 0x6e 0x65] */
|
||||
@Override
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.index.MultiFields;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
|
@ -158,7 +159,8 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper {
|
|||
BytesRef text;
|
||||
while ((text = te.next()) != null) {
|
||||
if (te.docFreq() > maxDocFreq) {
|
||||
stopWords.add(text.utf8ToChars(spare).toString());
|
||||
UnicodeUtil.UTF8toUTF16(text, spare);
|
||||
stopWords.add(spare.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.mutable.MutableValue;
|
||||
import org.apache.lucene.util.mutable.MutableValueStr;
|
||||
|
||||
|
@ -77,7 +78,7 @@ public abstract class StringIndexDocValues extends DocValues {
|
|||
int ord=termsIndex.getOrd(doc);
|
||||
if (ord==0) return null;
|
||||
termsIndex.lookup(ord, spare);
|
||||
spare.utf8ToChars(spareChars);
|
||||
UnicodeUtil.UTF8toUTF16(spare, spareChars);
|
||||
return spareChars.toString();
|
||||
}
|
||||
|
||||
|
|
|
@ -34,6 +34,7 @@ import org.apache.lucene.search.similarities.TFIDFSimilarity;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -740,7 +741,8 @@ public final class MoreLikeThis {
|
|||
final CharsRef spare = new CharsRef();
|
||||
BytesRef text;
|
||||
while((text = termsEnum.next()) != null) {
|
||||
final String term = text.utf8ToChars(spare).toString();
|
||||
UnicodeUtil.UTF8toUTF16(text, spare);
|
||||
final String term = spare.toString();
|
||||
if (isNoiseWord(term)) {
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -36,6 +36,7 @@ import org.apache.lucene.util.ArrayUtil;
|
|||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.automaton.LevenshteinAutomata;
|
||||
|
||||
/**
|
||||
|
@ -371,7 +372,10 @@ public class DirectSpellChecker {
|
|||
int index = suggestions.length - 1;
|
||||
for (ScoreTerm s : terms) {
|
||||
SuggestWord suggestion = new SuggestWord();
|
||||
suggestion.string = s.termAsString != null ? s.termAsString : s.term.utf8ToChars(spare).toString();
|
||||
if (s.termAsString == null) {
|
||||
UnicodeUtil.UTF8toUTF16(s.term, spare);
|
||||
s.termAsString = spare.toString();
|
||||
}
|
||||
suggestion.score = s.score;
|
||||
suggestion.freq = s.docfreq;
|
||||
suggestions[index--] = suggestion;
|
||||
|
@ -428,7 +432,8 @@ public class DirectSpellChecker {
|
|||
// undo FuzzyTermsEnum's scale factor for a real scaled lev score
|
||||
score = boost / e.getScaleFactor() + e.getMinSimilarity();
|
||||
} else {
|
||||
termAsString = candidateTerm.utf8ToChars(spare).toString();
|
||||
UnicodeUtil.UTF8toUTF16(candidateTerm, spare);
|
||||
termAsString = spare.toString();
|
||||
score = distance.getDistance(term.text(), termAsString);
|
||||
}
|
||||
|
||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.lucene.index.Terms;
|
|||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
|
||||
/**
|
||||
* HighFrequencyDictionary: terms taken from the given field
|
||||
|
@ -89,7 +90,12 @@ public class HighFrequencyDictionary implements Dictionary {
|
|||
}
|
||||
hasNextCalled = false;
|
||||
|
||||
return (actualTerm != null) ? actualTerm.utf8ToChars(spare).toString() : null;
|
||||
if (actualTerm == null) {
|
||||
return null;
|
||||
} else {
|
||||
UnicodeUtil.UTF8toUTF16(actualTerm, spare);
|
||||
return spare.toString();
|
||||
}
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
|
|
|
@ -24,6 +24,7 @@ import java.util.Iterator;
|
|||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
|
||||
|
@ -75,7 +76,7 @@ public class LuceneDictionary implements Dictionary {
|
|||
return null;
|
||||
}
|
||||
|
||||
final String result = pendingTerm.utf8ToChars(spare).toString();
|
||||
UnicodeUtil.UTF8toUTF16(pendingTerm, spare);
|
||||
|
||||
try {
|
||||
pendingTerm = termsEnum.next();
|
||||
|
@ -83,7 +84,7 @@ public class LuceneDictionary implements Dictionary {
|
|||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
return result;
|
||||
return spare.toString();
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
|
|
|
@ -48,6 +48,7 @@ import org.apache.lucene.store.Directory;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.solr.analysis.CharFilterFactory;
|
||||
import org.apache.solr.analysis.TokenFilterFactory;
|
||||
import org.apache.solr.analysis.TokenizerChain;
|
||||
|
@ -273,7 +274,8 @@ public class LukeRequestHandler extends RequestHandlerBase
|
|||
BytesRef text;
|
||||
while((text = termsEnum.next()) != null) {
|
||||
final int freq = (int) termsEnum.totalTermFreq();
|
||||
tfv.add( text.utf8ToChars(spare).toString(), freq );
|
||||
UnicodeUtil.UTF8toUTF16(text, spare);
|
||||
tfv.add(spare.toString(), freq);
|
||||
}
|
||||
f.add( "termVector", tfv );
|
||||
}
|
||||
|
@ -649,7 +651,8 @@ public class LukeRequestHandler extends RequestHandlerBase
|
|||
TermsEnum termsEnum = terms.iterator(null);
|
||||
BytesRef text;
|
||||
while((text = termsEnum.next()) != null) {
|
||||
String t = text.utf8ToChars(spare).toString();
|
||||
UnicodeUtil.UTF8toUTF16(text, spare);
|
||||
String t = spare.toString();
|
||||
|
||||
// Compute distinct terms for every field
|
||||
TopTermQueue tiq = info.get( field );
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.search.grouping.TopGroups;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.ReaderUtil;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.solr.cloud.CloudDescriptor;
|
||||
import org.apache.solr.cloud.ZkController;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
|
@ -605,7 +606,8 @@ public class QueryComponent extends SearchComponent
|
|||
// String field in Lucene, which returns the terms
|
||||
// data as BytesRef:
|
||||
if (val instanceof BytesRef) {
|
||||
field.setValue(((BytesRef)val).utf8ToChars(spare).toString());
|
||||
UnicodeUtil.UTF8toUTF16((BytesRef)val, spare);
|
||||
field.setValue(spare.toString());
|
||||
val = ft.toObject(field);
|
||||
}
|
||||
|
||||
|
|
|
@ -366,7 +366,8 @@ class CountSortedFacetCollector extends FacetCollector {
|
|||
// NOTE: we use c>min rather than c>=min as an optimization because we are going in
|
||||
// index order, so we already know that the keys are ordered. This can be very
|
||||
// important if a lot of the counts are repeated (like zero counts would be).
|
||||
queue.add(new SimpleFacets.CountPair<String,Integer>(term.utf8ToChars(spare).toString(), count));
|
||||
UnicodeUtil.UTF8toUTF16(term, spare);
|
||||
queue.add(new SimpleFacets.CountPair<String,Integer>(spare.toString(), count));
|
||||
if (queue.size()>=maxsize) min=queue.last().val;
|
||||
}
|
||||
return false;
|
||||
|
@ -414,7 +415,8 @@ class IndexSortedFacetCollector extends FacetCollector {
|
|||
}
|
||||
|
||||
if (limit > 0) {
|
||||
res.add(term.utf8ToChars(spare).toString(), count);
|
||||
UnicodeUtil.UTF8toUTF16(term, spare);
|
||||
res.add(spare.toString(), count);
|
||||
limit--;
|
||||
}
|
||||
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.queries.function.docvalues.StringIndexDocValues;
|
|||
import org.apache.lucene.queries.function.valuesource.FieldCacheSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.util.DateUtil;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
|
@ -205,7 +206,7 @@ public class DateField extends FieldType {
|
|||
|
||||
@Override
|
||||
public CharsRef indexedToReadable(BytesRef input, CharsRef charsRef) {
|
||||
input.utf8ToChars(charsRef);
|
||||
UnicodeUtil.UTF8toUTF16(input, charsRef);
|
||||
charsRef.append(Z_ARRAY, 0, 1);
|
||||
return charsRef;
|
||||
}
|
||||
|
|
|
@ -349,7 +349,7 @@ public abstract class FieldType extends FieldProperties {
|
|||
|
||||
/** Given an indexed term, append the human readable representation*/
|
||||
public CharsRef indexedToReadable(BytesRef input, CharsRef output) {
|
||||
input.utf8ToChars(output);
|
||||
UnicodeUtil.UTF8toUTF16(input, output);
|
||||
return output;
|
||||
}
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.queries.function.valuesource.FieldCacheSource;
|
|||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.mutable.MutableValue;
|
||||
import org.apache.lucene.util.mutable.MutableValueDouble;
|
||||
import org.apache.solr.search.QParser;
|
||||
|
@ -79,7 +80,7 @@ public class SortableDoubleField extends FieldType {
|
|||
@Override
|
||||
public CharsRef indexedToReadable(BytesRef input, CharsRef charsRef) {
|
||||
// TODO: this could be more efficient, but the sortable types should be deprecated instead
|
||||
input.utf8ToChars(charsRef);
|
||||
UnicodeUtil.UTF8toUTF16(input, charsRef);
|
||||
final char[] indexedToReadable = indexedToReadable(charsRef.toString()).toCharArray();
|
||||
charsRef.copyChars(indexedToReadable, 0, indexedToReadable.length);
|
||||
return charsRef;
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.queries.function.valuesource.FieldCacheSource;
|
|||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.mutable.MutableValue;
|
||||
import org.apache.lucene.util.mutable.MutableValueFloat;
|
||||
import org.apache.solr.search.QParser;
|
||||
|
@ -78,7 +79,8 @@ public class SortableFloatField extends FieldType {
|
|||
|
||||
public CharsRef indexedToReadable(BytesRef input, CharsRef charsRef) {
|
||||
// TODO: this could be more efficient, but the sortable types should be deprecated instead
|
||||
final char[] indexedToReadable = indexedToReadable(input.utf8ToChars(charsRef).toString()).toCharArray();
|
||||
UnicodeUtil.UTF8toUTF16(input, charsRef);
|
||||
final char[] indexedToReadable = indexedToReadable(charsRef.toString()).toCharArray();
|
||||
charsRef.copyChars(indexedToReadable, 0, indexedToReadable.length);
|
||||
return charsRef;
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.queries.function.valuesource.FieldCacheSource;
|
|||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.mutable.MutableValue;
|
||||
import org.apache.lucene.util.mutable.MutableValueInt;
|
||||
import org.apache.solr.search.QParser;
|
||||
|
@ -76,7 +77,8 @@ public class SortableIntField extends FieldType {
|
|||
|
||||
public CharsRef indexedToReadable(BytesRef input, CharsRef charsRef) {
|
||||
// TODO: this could be more efficient, but the sortable types should be deprecated instead
|
||||
final char[] indexedToReadable = indexedToReadable(input.utf8ToChars(charsRef).toString()).toCharArray();
|
||||
UnicodeUtil.UTF8toUTF16(input, charsRef);
|
||||
final char[] indexedToReadable = indexedToReadable(charsRef.toString()).toCharArray();
|
||||
charsRef.copyChars(indexedToReadable, 0, indexedToReadable.length);
|
||||
return charsRef;
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.queries.function.valuesource.FieldCacheSource;
|
|||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.mutable.MutableValue;
|
||||
import org.apache.lucene.util.mutable.MutableValueLong;
|
||||
import org.apache.solr.search.QParser;
|
||||
|
@ -68,7 +69,8 @@ public class SortableLongField extends FieldType {
|
|||
|
||||
public CharsRef indexedToReadable(BytesRef input, CharsRef charsRef) {
|
||||
// TODO: this could be more efficient, but the sortable types should be deprecated instead
|
||||
final char[] indexedToReadable = indexedToReadable(input.utf8ToChars(charsRef).toString()).toCharArray();
|
||||
UnicodeUtil.UTF8toUTF16(input, charsRef);
|
||||
final char[] indexedToReadable = indexedToReadable(charsRef.toString()).toCharArray();
|
||||
charsRef.copyChars(indexedToReadable, 0, indexedToReadable.length);
|
||||
return charsRef;
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@ import org.apache.lucene.search.Sort;
|
|||
import org.apache.lucene.search.grouping.SearchGroup;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
|
@ -99,7 +100,8 @@ public class SearchGroupsResultTransformer implements ShardResultTransformer<Lis
|
|||
if (field != null) {
|
||||
FieldType fieldType = field.getType();
|
||||
if (sortValue instanceof BytesRef) {
|
||||
String indexedValue = ((BytesRef) sortValue).utf8ToChars(spare).toString();
|
||||
UnicodeUtil.UTF8toUTF16((BytesRef)sortValue, spare);
|
||||
String indexedValue = spare.toString();
|
||||
sortValue = (Comparable) fieldType.toObject(field.createField(fieldType.indexedToReadable(indexedValue), 0.0f));
|
||||
} else if (sortValue instanceof String) {
|
||||
sortValue = (Comparable) fieldType.toObject(field.createField(fieldType.indexedToReadable((String) sortValue), 0.0f));
|
||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.lucene.search.grouping.GroupDocs;
|
|||
import org.apache.lucene.search.grouping.TopGroups;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.handler.component.ResponseBuilder;
|
||||
import org.apache.solr.handler.component.ShardDoc;
|
||||
|
@ -200,7 +201,8 @@ public class TopGroupsResultTransformer implements ShardResultTransformer<List<C
|
|||
if (field != null) {
|
||||
FieldType fieldType = field.getType();
|
||||
if (sortValue instanceof BytesRef) {
|
||||
String indexedValue = ((BytesRef) sortValue).utf8ToChars(spare).toString();
|
||||
UnicodeUtil.UTF8toUTF16((BytesRef)sortValue, spare);
|
||||
String indexedValue = spare.toString();
|
||||
sortValue = fieldType.toObject(field.createField(fieldType.indexedToReadable(indexedValue), 0.0f));
|
||||
} else if (sortValue instanceof String) {
|
||||
sortValue = fieldType.toObject(field.createField(fieldType.indexedToReadable((String) sortValue), 0.0f));
|
||||
|
@ -252,7 +254,8 @@ public class TopGroupsResultTransformer implements ShardResultTransformer<List<C
|
|||
if (field != null) {
|
||||
FieldType fieldType = field.getType();
|
||||
if (sortValue instanceof BytesRef) {
|
||||
String indexedValue = ((BytesRef) sortValue).utf8ToChars(spare).toString();
|
||||
UnicodeUtil.UTF8toUTF16((BytesRef)sortValue, spare);
|
||||
String indexedValue = spare.toString();
|
||||
sortValue = fieldType.toObject(field.createField(fieldType.indexedToReadable(indexedValue), 0.0f));
|
||||
} else if (sortValue instanceof String) {
|
||||
sortValue = fieldType.toObject(field.createField(fieldType.indexedToReadable((String) sortValue), 0.0f));
|
||||
|
|
Loading…
Reference in New Issue