LUCENE-1607: String.intern() cache StringHelper.intern()

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@801344 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2009-08-05 18:05:06 +00:00
parent ab276a5ab9
commit 7f1319fdf0
24 changed files with 287 additions and 36 deletions

View File

@ -717,6 +717,12 @@ Optimizations
optional scorers, or not enough optional scorers to satisfy optional scorers, or not enough optional scorers to satisfy
minShouldMatch). (Shai Erera via Mike McCandless) minShouldMatch). (Shai Erera via Mike McCandless)
12. LUCENE-1607: To speed up string interning for commonly used
strings, the StringHelper.intern() interface was added with a
default implementation that uses a lockless cache.
(Earwin Burrfoot, yonik)
Documentation Documentation
Build Build

View File

@ -11,6 +11,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.util.StringHelper;
/** /**
* {@link Scorer} implementation which scores text fragments by the number of * {@link Scorer} implementation which scores text fragments by the number of
@ -67,7 +68,7 @@ public class QueryScorer implements Scorer {
*/ */
public QueryScorer(Query query, IndexReader reader, String field, String defaultField) public QueryScorer(Query query, IndexReader reader, String field, String defaultField)
throws IOException { throws IOException {
this.defaultField = defaultField.intern(); this.defaultField = StringHelper.intern(defaultField);
init(query, field, reader, true); init(query, field, reader, true);
} }
@ -75,7 +76,7 @@ public class QueryScorer implements Scorer {
* @param defaultField - The default field for queries with the field name unspecified * @param defaultField - The default field for queries with the field name unspecified
*/ */
public QueryScorer(Query query, String field, String defaultField) { public QueryScorer(Query query, String field, String defaultField) {
this.defaultField = defaultField.intern(); this.defaultField = StringHelper.intern(defaultField);
init(query, field, null, true); init(query, field, null, true);
} }

View File

@ -26,6 +26,7 @@ import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FilteredQuery; import org.apache.lucene.search.FilteredQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.util.StringHelper;
/** /**
* Utility class used to extract the terms used in a query, plus any weights. * Utility class used to extract the terms used in a query, plus any weights.
@ -95,7 +96,7 @@ public final class QueryTermExtractor
HashSet terms=new HashSet(); HashSet terms=new HashSet();
if(fieldName!=null) if(fieldName!=null)
{ {
fieldName=fieldName.intern(); fieldName= StringHelper.intern(fieldName);
} }
getTerms(query,terms,prohibited,fieldName); getTerms(query,terms,prohibited,fieldName);
return (WeightedTerm[]) terms.toArray(new WeightedTerm[0]); return (WeightedTerm[]) terms.toArray(new WeightedTerm[0]);

View File

@ -50,6 +50,7 @@ import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.Spans; import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.StringHelper;
/** /**
* Class used to extract {@link WeightedSpanTerm}s from a {@link Query} based on whether Terms from the query are contained in a supplied TokenStream. * Class used to extract {@link WeightedSpanTerm}s from a {@link Query} based on whether Terms from the query are contained in a supplied TokenStream.
@ -68,7 +69,7 @@ public class WeightedSpanTermExtractor {
public WeightedSpanTermExtractor(String defaultField) { public WeightedSpanTermExtractor(String defaultField) {
if (defaultField != null) { if (defaultField != null) {
this.defaultField = defaultField.intern(); this.defaultField = StringHelper.intern(defaultField);
} }
} }
@ -362,7 +363,7 @@ public class WeightedSpanTermExtractor {
public Map getWeightedSpanTerms(Query query, TokenStream tokenStream, public Map getWeightedSpanTerms(Query query, TokenStream tokenStream,
String fieldName) throws IOException { String fieldName) throws IOException {
if (fieldName != null) { if (fieldName != null) {
this.fieldName = fieldName.intern(); this.fieldName = StringHelper.intern(fieldName);
} }
Map terms = new PositionCheckingMap(); Map terms = new PositionCheckingMap();

View File

@ -43,6 +43,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermVectorOffsetInfo; import org.apache.lucene.index.TermVectorOffsetInfo;
import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.Similarity; import org.apache.lucene.search.Similarity;
import org.apache.lucene.util.StringHelper;
/** /**
* This class, similar to {@link org.apache.lucene.index.IndexWriter}, has no locking mechanism. * This class, similar to {@link org.apache.lucene.index.IndexWriter}, has no locking mechanism.
@ -458,7 +459,7 @@ public class InstantiatedIndexWriter {
FieldSetting fieldSetting = fieldSettingsByFieldName.get(field.name()); FieldSetting fieldSetting = fieldSettingsByFieldName.get(field.name());
if (fieldSetting == null) { if (fieldSetting == null) {
fieldSetting = new FieldSetting(); fieldSetting = new FieldSetting();
fieldSetting.fieldName = field.name().intern(); fieldSetting.fieldName = StringHelper.intern(field.name());
fieldSettingsByFieldName.put(fieldSetting.fieldName, fieldSetting); fieldSettingsByFieldName.put(fieldSetting.fieldName, fieldSetting);
fieldNameBuffer.add(fieldSetting.fieldName); fieldNameBuffer.add(fieldSetting.fieldName);
} }

View File

@ -23,6 +23,7 @@ import java.util.Date;
import org.apache.lucene.search.Similarity; import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.StringHelper;
/** /**
* Given a directory and a list of fields, updates the fieldNorms in place for every document. * Given a directory and a list of fields, updates the fieldNorms in place for every document.
@ -103,7 +104,7 @@ public class FieldNormModifier {
* @param field the field whose norms should be reset * @param field the field whose norms should be reset
*/ */
public void reSetNorms(String field) throws IOException { public void reSetNorms(String field) throws IOException {
String fieldName = field.intern(); String fieldName = StringHelper.intern(field);
int[] termCounts = new int[0]; int[] termCounts = new int[0];
byte[] fakeNorms = new byte[0]; byte[] fakeNorms = new byte[0];

View File

@ -1,5 +1,7 @@
package org.apache.lucene.index; package org.apache.lucene.index;
import org.apache.lucene.util.StringHelper;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
@ -52,7 +54,7 @@ public class TermVectorAccessor {
*/ */
public void accept(IndexReader indexReader, int documentNumber, String fieldName, TermVectorMapper mapper) throws IOException { public void accept(IndexReader indexReader, int documentNumber, String fieldName, TermVectorMapper mapper) throws IOException {
fieldName = fieldName.intern(); fieldName = StringHelper.intern(fieldName);
decoratedMapper.decorated = mapper; decoratedMapper.decorated = mapper;
decoratedMapper.termVectorStored = false; decoratedMapper.termVectorStored = false;

View File

@ -23,6 +23,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Similarity; import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.StringHelper;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
@ -105,7 +106,7 @@ public class LengthNormModifier {
* @param field the field whose norms should be reset * @param field the field whose norms should be reset
*/ */
public void reSetNorms(String field) throws IOException { public void reSetNorms(String field) throws IOException {
String fieldName = field.intern(); String fieldName = StringHelper.intern(field);
int[] termCounts = new int[0]; int[] termCounts = new int[0];
IndexReader reader = null; IndexReader reader = null;

View File

@ -23,6 +23,7 @@ import java.util.Iterator;
import org.apache.lucene.index.TermEnum; import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.util.StringHelper;
import java.io.*; import java.io.*;
@ -42,7 +43,7 @@ public class LuceneDictionary implements Dictionary {
public LuceneDictionary(IndexReader reader, String field) { public LuceneDictionary(IndexReader reader, String field) {
this.reader = reader; this.reader = reader;
this.field = field.intern(); this.field = StringHelper.intern(field);
} }
public final Iterator getWordsIterator() { public final Iterator getWordsIterator() {

View File

@ -17,7 +17,8 @@ package org.apache.lucene.document;
import org.apache.lucene.search.PhraseQuery; // for javadocs import org.apache.lucene.search.PhraseQuery; // for javadocs
import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.analysis.TokenStream; // for javadocs import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.util.StringHelper; // for javadocs
/** /**
@ -54,7 +55,7 @@ public abstract class AbstractField implements Fieldable {
protected AbstractField(String name, Field.Store store, Field.Index index, Field.TermVector termVector) { protected AbstractField(String name, Field.Store store, Field.Index index, Field.TermVector termVector) {
if (name == null) if (name == null)
throw new NullPointerException("name cannot be null"); throw new NullPointerException("name cannot be null");
this.name = name.intern(); // field names are interned this.name = StringHelper.intern(name); // field names are interned
if (store == Field.Store.YES){ if (store == Field.Store.YES){
this.isStored = true; this.isStored = true;

View File

@ -20,6 +20,7 @@ package org.apache.lucene.document;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.IndexWriter; // for javadoc import org.apache.lucene.index.IndexWriter; // for javadoc
import org.apache.lucene.util.Parameter; import org.apache.lucene.util.Parameter;
import org.apache.lucene.util.StringHelper;
import java.io.Reader; import java.io.Reader;
import java.io.Serializable; import java.io.Serializable;
@ -337,7 +338,7 @@ public final class Field extends AbstractField implements Fieldable, Serializabl
+ "for a field that is not indexed"); + "for a field that is not indexed");
if (internName) // field names are optionally interned if (internName) // field names are optionally interned
name = name.intern(); name = StringHelper.intern(name);
this.name = name; this.name = name;
@ -417,7 +418,7 @@ public final class Field extends AbstractField implements Fieldable, Serializabl
if (reader == null) if (reader == null)
throw new NullPointerException("reader cannot be null"); throw new NullPointerException("reader cannot be null");
this.name = name.intern(); // field names are interned this.name = StringHelper.intern(name); // field names are interned
this.fieldsData = reader; this.fieldsData = reader;
this.isStored = false; this.isStored = false;
@ -464,7 +465,7 @@ public final class Field extends AbstractField implements Fieldable, Serializabl
if (tokenStream == null) if (tokenStream == null)
throw new NullPointerException("tokenStream cannot be null"); throw new NullPointerException("tokenStream cannot be null");
this.name = name.intern(); // field names are interned this.name = StringHelper.intern(name); // field names are interned
this.fieldsData = null; this.fieldsData = null;
this.tokenStream = tokenStream; this.tokenStream = tokenStream;
@ -509,7 +510,7 @@ public final class Field extends AbstractField implements Fieldable, Serializabl
if (value == null) if (value == null)
throw new IllegalArgumentException("value cannot be null"); throw new IllegalArgumentException("value cannot be null");
this.name = name.intern(); this.name = StringHelper.intern(name); // field names are interned
fieldsData = value; fieldsData = value;
if (store == Store.YES) { if (store == Store.YES) {

View File

@ -22,6 +22,7 @@ import org.apache.lucene.document.Fieldable;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.StringHelper;
import java.io.IOException; import java.io.IOException;
import java.util.*; import java.util.*;
@ -248,7 +249,7 @@ final class FieldInfos {
private FieldInfo addInternal(String name, boolean isIndexed, private FieldInfo addInternal(String name, boolean isIndexed,
boolean storeTermVector, boolean storePositionWithTermVector, boolean storeTermVector, boolean storePositionWithTermVector,
boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) { boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) {
name = name.intern(); name = StringHelper.intern(name);
FieldInfo fi = new FieldInfo(name, isIndexed, byNumber.size(), storeTermVector, storePositionWithTermVector, FieldInfo fi = new FieldInfo(name, isIndexed, byNumber.size(), storeTermVector, storePositionWithTermVector,
storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions); storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
byNumber.add(fi); byNumber.add(fi);
@ -352,7 +353,7 @@ final class FieldInfos {
} }
for (int i = 0; i < size; i++) { for (int i = 0; i < size; i++) {
String name = input.readString().intern(); String name = StringHelper.intern(input.readString());
byte bits = input.readByte(); byte bits = input.readByte();
boolean isIndexed = (bits & IS_INDEXED) != 0; boolean isIndexed = (bits & IS_INDEXED) != 0;
boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0; boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0;

View File

@ -24,6 +24,7 @@ import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.BufferedIndexInput; import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.util.CloseableThreadLocal; import org.apache.lucene.util.CloseableThreadLocal;
import org.apache.lucene.util.StringHelper;
import java.io.IOException; import java.io.IOException;
import java.io.Reader; import java.io.Reader;
@ -643,7 +644,7 @@ final class FieldsReader implements Cloneable {
this.isTokenized = tokenize; this.isTokenized = tokenize;
this.name = fi.name.intern(); this.name = StringHelper.intern(fi.name);
this.isIndexed = fi.isIndexed; this.isIndexed = fi.isIndexed;
this.omitNorms = fi.omitNorms; this.omitNorms = fi.omitNorms;
this.omitTermFreqAndPositions = fi.omitTermFreqAndPositions; this.omitTermFreqAndPositions = fi.omitTermFreqAndPositions;

View File

@ -1,5 +1,7 @@
package org.apache.lucene.index; package org.apache.lucene.index;
import org.apache.lucene.util.StringHelper;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -33,7 +35,8 @@ public final class Term implements Comparable, java.io.Serializable {
* <p>Note that a null field or null text value results in undefined * <p>Note that a null field or null text value results in undefined
* behavior for most Lucene APIs that accept a Term parameter. */ * behavior for most Lucene APIs that accept a Term parameter. */
public Term(String fld, String txt) { public Term(String fld, String txt) {
this(fld, txt, true); field = StringHelper.intern(fld);
text = txt;
} }
/** Constructs a Term with the given field and empty text. /** Constructs a Term with the given field and empty text.
@ -47,8 +50,8 @@ public final class Term implements Comparable, java.io.Serializable {
} }
Term(String fld, String txt, boolean intern) { Term(String fld, String txt, boolean intern) {
field = intern ? fld.intern() : fld; // field names are interned field = intern ? StringHelper.intern(fld) : fld; // field names are interned
text = txt; // unless already known to be text = txt; // unless already known to be
} }
/** Returns the field of this term, an interned string. The field indicates /** Returns the field of this term, an interned string. The field indicates
@ -130,6 +133,6 @@ public final class Term implements Comparable, java.io.Serializable {
throws java.io.IOException, ClassNotFoundException throws java.io.IOException, ClassNotFoundException
{ {
in.defaultReadObject(); in.defaultReadObject();
field = field.intern(); field = StringHelper.intern(field);
} }
} }

View File

@ -21,6 +21,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs; import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum; import org.apache.lucene.index.TermEnum;
import org.apache.lucene.util.StringHelper;
import java.io.IOException; import java.io.IOException;
import java.util.HashMap; import java.util.HashMap;
@ -103,7 +104,7 @@ class FieldCacheImpl implements ExtendedFieldCache {
/** Creates one of these objects. */ /** Creates one of these objects. */
Entry (String field, int type, Locale locale) { Entry (String field, int type, Locale locale) {
this.field = field.intern(); this.field = StringHelper.intern(field);
this.type = type; this.type = type;
this.custom = null; this.custom = null;
this.locale = locale; this.locale = locale;
@ -111,7 +112,7 @@ class FieldCacheImpl implements ExtendedFieldCache {
/** Creates one of these objects for a custom comparator/parser. */ /** Creates one of these objects for a custom comparator/parser. */
Entry (String field, Object custom) { Entry (String field, Object custom) {
this.field = field.intern(); this.field = StringHelper.intern(field);
this.type = SortField.CUSTOM; this.type = SortField.CUSTOM;
this.custom = custom; this.custom = custom;
this.locale = null; this.locale = null;
@ -119,7 +120,7 @@ class FieldCacheImpl implements ExtendedFieldCache {
/** Creates one of these objects for a custom type with parser, needed by FieldSortedHitQueue. */ /** Creates one of these objects for a custom type with parser, needed by FieldSortedHitQueue. */
Entry (String field, int type, Parser parser) { Entry (String field, int type, Parser parser) {
this.field = field.intern(); this.field = StringHelper.intern(field);
this.type = type; this.type = type;
this.custom = parser; this.custom = parser;
this.locale = null; this.locale = null;
@ -463,7 +464,7 @@ class FieldCacheImpl implements ExtendedFieldCache {
protected Object createValue(IndexReader reader, Object fieldKey) protected Object createValue(IndexReader reader, Object fieldKey)
throws IOException { throws IOException {
String field = ((String) fieldKey).intern(); String field = StringHelper.intern((String) fieldKey);
final String[] retArray = new String[reader.maxDoc()]; final String[] retArray = new String[reader.maxDoc()];
TermDocs termDocs = reader.termDocs(); TermDocs termDocs = reader.termDocs();
TermEnum termEnum = reader.terms (new Term (field)); TermEnum termEnum = reader.terms (new Term (field));
@ -495,7 +496,7 @@ class FieldCacheImpl implements ExtendedFieldCache {
protected Object createValue(IndexReader reader, Object fieldKey) protected Object createValue(IndexReader reader, Object fieldKey)
throws IOException { throws IOException {
String field = ((String) fieldKey).intern(); String field = StringHelper.intern((String) fieldKey);
final int[] retArray = new int[reader.maxDoc()]; final int[] retArray = new int[reader.maxDoc()];
String[] mterms = new String[reader.maxDoc()+1]; String[] mterms = new String[reader.maxDoc()+1];
TermDocs termDocs = reader.termDocs(); TermDocs termDocs = reader.termDocs();
@ -574,7 +575,7 @@ class FieldCacheImpl implements ExtendedFieldCache {
protected Object createValue(IndexReader reader, Object fieldKey) protected Object createValue(IndexReader reader, Object fieldKey)
throws IOException { throws IOException {
String field = ((String)fieldKey).intern(); String field = StringHelper.intern((String) fieldKey);
TermEnum enumerator = reader.terms (new Term (field)); TermEnum enumerator = reader.terms (new Term (field));
try { try {
Term term = enumerator.term(); Term term = enumerator.term();

View File

@ -24,6 +24,7 @@ import org.apache.lucene.analysis.NumericTokenStream; // for javadocs
import org.apache.lucene.document.NumericField; // for javadocs import org.apache.lucene.document.NumericField; // for javadocs
import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
@ -151,7 +152,7 @@ public final class NumericRangeQuery extends MultiTermQuery {
assert (valSize == 32 || valSize == 64); assert (valSize == 32 || valSize == 64);
if (precisionStep < 1) if (precisionStep < 1)
throw new IllegalArgumentException("precisionStep must be >=1"); throw new IllegalArgumentException("precisionStep must be >=1");
this.field = field.intern(); this.field = StringHelper.intern(field);
this.precisionStep = precisionStep; this.precisionStep = precisionStep;
this.valSize = valSize; this.valSize = valSize;
this.min = min; this.min = min;

View File

@ -25,6 +25,7 @@ import org.apache.lucene.document.NumericField; // javadocs
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum; import org.apache.lucene.index.TermEnum;
import org.apache.lucene.util.StringHelper;
/** /**
* Stores information about how to sort documents by terms in an individual * Stores information about how to sort documents by terms in an individual
@ -272,7 +273,7 @@ implements Serializable {
if (type != SCORE && type != DOC) if (type != SCORE && type != DOC)
throw new IllegalArgumentException("field can only be null when type is SCORE or DOC"); throw new IllegalArgumentException("field can only be null when type is SCORE or DOC");
} else { } else {
this.field = field.intern(); this.field = StringHelper.intern(field);
} }
} }
@ -510,7 +511,7 @@ implements Serializable {
* @deprecated * @deprecated
*/ */
static int detectFieldType(IndexReader reader, String fieldKey) throws IOException { static int detectFieldType(IndexReader reader, String fieldKey) throws IOException {
String field = fieldKey.intern(); String field = StringHelper.intern(fieldKey);
TermEnum enumerator = reader.terms(new Term(field)); TermEnum enumerator = reader.terms(new Term(field));
try { try {
Term term = enumerator.term(); Term term = enumerator.term();

View File

@ -22,6 +22,7 @@ import java.text.Collator;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.util.StringHelper;
/** /**
* Subclass of FilteredTermEnum for enumerating all terms that match the * Subclass of FilteredTermEnum for enumerating all terms that match the
@ -75,7 +76,7 @@ public class TermRangeTermEnum extends FilteredTermEnum {
this.lowerTermText = lowerTermText; this.lowerTermText = lowerTermText;
this.includeLower = includeLower; this.includeLower = includeLower;
this.includeUpper = includeUpper; this.includeUpper = includeUpper;
this.field = field.intern(); this.field = StringHelper.intern(field);
// do a little bit of normalization... // do a little bit of normalization...
// open ended range queries should always be inclusive. // open ended range queries should always be inclusive.

View File

@ -0,0 +1,82 @@
package org.apache.lucene.util;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Simple lockless and memory barrier free String intern cache that is guaranteed
* to return the same String instance as String.intern() does.
*/
public class SimpleStringInterner extends StringInterner {
private static class Entry {
final private String str;
final private int hash;
private Entry next;
private Entry(String str, int hash, Entry next) {
this.str = str;
this.hash = hash;
this.next = next;
}
}
private final Entry[] cache;
private final int maxChainLength;
/**
* @param tableSize Size of the hash table, should be a power of two.
* @param maxChainLength Maximum length of each bucket, after which the oldest item inserted is dropped.
*/
public SimpleStringInterner(int tableSize, int maxChainLength) {
cache = new Entry[Math.max(1,BitUtil.nextHighestPowerOfTwo(tableSize))];
this.maxChainLength = Math.max(2,maxChainLength);
}
// @Override
public String intern(String s) {
int h = s.hashCode();
// In the future, it may be worth augmenting the string hash
// if the lower bits need better distribution.
int slot = h & (cache.length-1);
Entry first = this.cache[slot];
Entry nextToLast = null;
int chainLength = 0;
for(Entry e=first; e!=null; e=e.next) {
if (e.hash == h && (e.str == s || e.str.compareTo(s)==0)) {
// if (e.str == s || (e.hash == h && e.str.compareTo(s)==0)) {
return e.str;
}
chainLength++;
if (e.next != null) {
nextToLast = e;
}
}
// insertion-order cache: add new entry at head
s = s.intern();
this.cache[slot] = new Entry(s, h, first);
if (chainLength >= maxChainLength) {
// prune last entry
nextToLast.next = null;
}
return s;
}
}

View File

@ -24,6 +24,17 @@ package org.apache.lucene.util;
* $Id$ * $Id$
*/ */
public abstract class StringHelper { public abstract class StringHelper {
/**
* Expert:
* The StringInterner implementation used by Lucene.
* This shouldn't be changed to an incompatible implementation after other Lucene APIs have been used.
*/
public static StringInterner interner = new SimpleStringInterner(1024,8);
/** Return the same string object for all equal strings */
public static String intern(String s) {
return interner.intern(s);
}
/** /**
* Compares two byte[] arrays, element by element, and returns the * Compares two byte[] arrays, element by element, and returns the

View File

@ -0,0 +1,37 @@
package org.apache.lucene.util;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Subclasses of StringInterner are required to
* return the same single String object for all equal strings.
* Depending on the implementation, this may not be
* the same object returned as String.intern().
*
* This StringInterner base class simply delegates to String.intern().
*/
public class StringInterner {
/** Returns a single object instance for each equal string. */
public String intern(String s) {
return s.intern();
}
/** Returns a single object instance for each equal string. */
public String intern(char[] arr, int offset, int len) {
return intern(new String(arr, offset, len));
}
}

View File

@ -20,6 +20,7 @@ import org.apache.lucene.analysis.*;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil; import org.apache.lucene.util._TestUtil;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import java.util.*; import java.util.*;
@ -266,7 +267,7 @@ public class TestStressIndexing2 extends LuceneTestCase {
TermDocs termDocs2 = r2.termDocs(); TermDocs termDocs2 = r2.termDocs();
// create mapping from id2 space to id2 based on idField // create mapping from id2 space to id2 based on idField
idField = idField.intern(); idField = StringHelper.intern(idField);
TermEnum termEnum = r1.terms (new Term (idField, "")); TermEnum termEnum = r1.terms (new Term (idField, ""));
do { do {
Term term = termEnum.term(); Term term = termEnum.term();

View File

@ -21,6 +21,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs; import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum; import org.apache.lucene.index.TermEnum;
import org.apache.lucene.util.StringHelper;
import java.io.IOException; import java.io.IOException;
import java.io.Serializable; import java.io.Serializable;
@ -69,7 +70,7 @@ implements Comparable, Serializable {
return new SortComparatorSource () { return new SortComparatorSource () {
public ScoreDocComparator newComparator (final IndexReader reader, String fieldname) public ScoreDocComparator newComparator (final IndexReader reader, String fieldname)
throws IOException { throws IOException {
final String field = fieldname.intern (); final String field = StringHelper.intern(fieldname);
final TermEnum enumerator = reader.terms (new Term (fieldname, "")); final TermEnum enumerator = reader.terms (new Term (fieldname, ""));
try { try {
return new ScoreDocComparator () { return new ScoreDocComparator () {
@ -104,7 +105,7 @@ implements Comparable, Serializable {
*/ */
protected Comparable[] fillCache (IndexReader reader, TermEnum enumerator, String fieldname) protected Comparable[] fillCache (IndexReader reader, TermEnum enumerator, String fieldname)
throws IOException { throws IOException {
final String field = fieldname.intern (); final String field = StringHelper.intern(fieldname);
Comparable[] retArray = new Comparable[reader.maxDoc ()]; Comparable[] retArray = new Comparable[reader.maxDoc ()];
if (retArray.length > 0) { if (retArray.length > 0) {
TermDocs termDocs = reader.termDocs (); TermDocs termDocs = reader.termDocs ();

View File

@ -0,0 +1,93 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;
import junit.framework.TestCase;
import java.util.Random;
public class TestStringIntern extends LuceneTestCase {
String[] testStrings;
String[] internedStrings;
Random r = newRandom();
private String randStr(int len) {
char[] arr = new char[len];
for (int i=0; i<len; i++) {
arr[i] = (char)('a' + r.nextInt(26));
}
return new String(arr);
}
private void makeStrings(int sz) {
testStrings = new String[sz];
internedStrings = new String[sz];
for (int i=0; i<sz; i++) {
testStrings[i] = randStr(r.nextInt(8)+3);
}
}
public void testStringIntern() throws InterruptedException {
makeStrings(1024*10); // something greater than the capacity of the default cache size
// makeStrings(100); // realistic for perf testing
int nThreads = 20;
// final int iter=100000;
final int iter=1000000;
final boolean newStrings=true;
// try native intern
// StringHelper.interner = new StringInterner();
Thread[] threads = new Thread[nThreads];
for (int i=0; i<nThreads; i++) {
final int seed = i;
threads[i] = new Thread() {
public void run() {
Random rand = new Random(seed);
String[] myInterned = new String[testStrings.length];
for (int j=0; j<iter; j++) {
int idx = rand.nextInt(testStrings.length);
String s = testStrings[idx];
if (newStrings == true && rand.nextBoolean()) s = new String(s); // make a copy half of the time
String interned = StringHelper.intern(s);
String prevInterned = myInterned[idx];
String otherInterned = internedStrings[idx];
// test against other threads
if (otherInterned != null && otherInterned != interned) {
TestCase.fail();
}
internedStrings[idx] = interned;
// test against local copy
if (prevInterned != null && prevInterned != interned) {
TestCase.fail();
}
myInterned[idx] = interned;
}
}
};
threads[i].start();
}
for (int i=0; i<nThreads; i++) {
threads[i].join();
}
}
}