From 7f1319fdf0e1e92daaef5ddb23418d7d2ed24b51 Mon Sep 17 00:00:00 2001 From: Yonik Seeley Date: Wed, 5 Aug 2009 18:05:06 +0000 Subject: [PATCH] LUCENE-1607: String.intern() cache StringHelper.intern() git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@801344 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 6 ++ .../lucene/search/highlight/QueryScorer.java | 5 +- .../search/highlight/QueryTermExtractor.java | 3 +- .../highlight/WeightedSpanTermExtractor.java | 5 +- .../instantiated/InstantiatedIndexWriter.java | 3 +- .../lucene/index/FieldNormModifier.java | 3 +- .../lucene/index/TermVectorAccessor.java | 4 +- .../lucene/misc/LengthNormModifier.java | 3 +- .../lucene/search/spell/LuceneDictionary.java | 3 +- .../apache/lucene/document/AbstractField.java | 5 +- .../org/apache/lucene/document/Field.java | 9 +- .../org/apache/lucene/index/FieldInfos.java | 5 +- .../org/apache/lucene/index/FieldsReader.java | 3 +- src/java/org/apache/lucene/index/Term.java | 11 ++- .../apache/lucene/search/FieldCacheImpl.java | 13 +-- .../lucene/search/NumericRangeQuery.java | 3 +- .../org/apache/lucene/search/SortField.java | 5 +- .../lucene/search/TermRangeTermEnum.java | 3 +- .../lucene/util/SimpleStringInterner.java | 82 ++++++++++++++++ .../org/apache/lucene/util/StringHelper.java | 11 +++ .../apache/lucene/util/StringInterner.java | 37 ++++++++ .../lucene/index/TestStressIndexing2.java | 3 +- .../lucene/search/SampleComparable.java | 5 +- .../apache/lucene/util/TestStringIntern.java | 93 +++++++++++++++++++ 24 files changed, 287 insertions(+), 36 deletions(-) create mode 100755 src/java/org/apache/lucene/util/SimpleStringInterner.java create mode 100755 src/java/org/apache/lucene/util/StringInterner.java create mode 100755 src/test/org/apache/lucene/util/TestStringIntern.java diff --git a/CHANGES.txt b/CHANGES.txt index 60352713a66..456f0b08442 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -717,6 +717,12 @@ Optimizations optional scorers, or not enough optional scorers to satisfy minShouldMatch). (Shai Erera via Mike McCandless) +12. LUCENE-1607: To speed up string interning for commonly used + strings, the StringHelper.intern() interface was added with a + default implementation that uses a lockless cache. + (Earwin Burrfoot, yonik) + + Documentation Build diff --git a/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java b/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java index 482023e91a0..8c602e6da4c 100644 --- a/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java +++ b/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java @@ -11,6 +11,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Query; +import org.apache.lucene.util.StringHelper; /** * {@link Scorer} implementation which scores text fragments by the number of @@ -67,7 +68,7 @@ public class QueryScorer implements Scorer { */ public QueryScorer(Query query, IndexReader reader, String field, String defaultField) throws IOException { - this.defaultField = defaultField.intern(); + this.defaultField = StringHelper.intern(defaultField); init(query, field, reader, true); } @@ -75,7 +76,7 @@ public class QueryScorer implements Scorer { * @param defaultField - The default field for queries with the field name unspecified */ public QueryScorer(Query query, String field, String defaultField) { - this.defaultField = defaultField.intern(); + this.defaultField = StringHelper.intern(defaultField); init(query, field, null, true); } diff --git a/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermExtractor.java b/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermExtractor.java index a5f595ce424..f8eb4183a25 100644 --- a/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermExtractor.java +++ b/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermExtractor.java @@ -26,6 +26,7 @@ import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.FilteredQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.util.StringHelper; /** * Utility class used to extract the terms used in a query, plus any weights. @@ -95,7 +96,7 @@ public final class QueryTermExtractor HashSet terms=new HashSet(); if(fieldName!=null) { - fieldName=fieldName.intern(); + fieldName= StringHelper.intern(fieldName); } getTerms(query,terms,prohibited,fieldName); return (WeightedTerm[]) terms.toArray(new WeightedTerm[0]); diff --git a/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java b/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java index 2ed0b472a3c..cc0cb8f4e10 100644 --- a/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java +++ b/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java @@ -50,6 +50,7 @@ import org.apache.lucene.search.spans.SpanOrQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.search.spans.Spans; +import org.apache.lucene.util.StringHelper; /** * Class used to extract {@link WeightedSpanTerm}s from a {@link Query} based on whether Terms from the query are contained in a supplied TokenStream. @@ -68,7 +69,7 @@ public class WeightedSpanTermExtractor { public WeightedSpanTermExtractor(String defaultField) { if (defaultField != null) { - this.defaultField = defaultField.intern(); + this.defaultField = StringHelper.intern(defaultField); } } @@ -362,7 +363,7 @@ public class WeightedSpanTermExtractor { public Map getWeightedSpanTerms(Query query, TokenStream tokenStream, String fieldName) throws IOException { if (fieldName != null) { - this.fieldName = fieldName.intern(); + this.fieldName = StringHelper.intern(fieldName); } Map terms = new PositionCheckingMap(); diff --git a/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java index 4279163d547..350bcfd7094 100644 --- a/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java +++ b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java @@ -43,6 +43,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.index.TermVectorOffsetInfo; import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.Similarity; +import org.apache.lucene.util.StringHelper; /** * This class, similar to {@link org.apache.lucene.index.IndexWriter}, has no locking mechanism. @@ -458,7 +459,7 @@ public class InstantiatedIndexWriter { FieldSetting fieldSetting = fieldSettingsByFieldName.get(field.name()); if (fieldSetting == null) { fieldSetting = new FieldSetting(); - fieldSetting.fieldName = field.name().intern(); + fieldSetting.fieldName = StringHelper.intern(field.name()); fieldSettingsByFieldName.put(fieldSetting.fieldName, fieldSetting); fieldNameBuffer.add(fieldSetting.fieldName); } diff --git a/contrib/miscellaneous/src/java/org/apache/lucene/index/FieldNormModifier.java b/contrib/miscellaneous/src/java/org/apache/lucene/index/FieldNormModifier.java index c369c0bd137..9f25b36bb15 100644 --- a/contrib/miscellaneous/src/java/org/apache/lucene/index/FieldNormModifier.java +++ b/contrib/miscellaneous/src/java/org/apache/lucene/index/FieldNormModifier.java @@ -23,6 +23,7 @@ import java.util.Date; import org.apache.lucene.search.Similarity; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.util.StringHelper; /** * Given a directory and a list of fields, updates the fieldNorms in place for every document. @@ -103,7 +104,7 @@ public class FieldNormModifier { * @param field the field whose norms should be reset */ public void reSetNorms(String field) throws IOException { - String fieldName = field.intern(); + String fieldName = StringHelper.intern(field); int[] termCounts = new int[0]; byte[] fakeNorms = new byte[0]; diff --git a/contrib/miscellaneous/src/java/org/apache/lucene/index/TermVectorAccessor.java b/contrib/miscellaneous/src/java/org/apache/lucene/index/TermVectorAccessor.java index 765f7e572bd..314aba4ea75 100644 --- a/contrib/miscellaneous/src/java/org/apache/lucene/index/TermVectorAccessor.java +++ b/contrib/miscellaneous/src/java/org/apache/lucene/index/TermVectorAccessor.java @@ -1,5 +1,7 @@ package org.apache.lucene.index; +import org.apache.lucene.util.StringHelper; + import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -52,7 +54,7 @@ public class TermVectorAccessor { */ public void accept(IndexReader indexReader, int documentNumber, String fieldName, TermVectorMapper mapper) throws IOException { - fieldName = fieldName.intern(); + fieldName = StringHelper.intern(fieldName); decoratedMapper.decorated = mapper; decoratedMapper.termVectorStored = false; diff --git a/contrib/miscellaneous/src/java/org/apache/lucene/misc/LengthNormModifier.java b/contrib/miscellaneous/src/java/org/apache/lucene/misc/LengthNormModifier.java index 18ce975cf2f..9fdb725eeed 100644 --- a/contrib/miscellaneous/src/java/org/apache/lucene/misc/LengthNormModifier.java +++ b/contrib/miscellaneous/src/java/org/apache/lucene/misc/LengthNormModifier.java @@ -23,6 +23,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Similarity; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.util.StringHelper; import java.io.File; import java.io.IOException; @@ -105,7 +106,7 @@ public class LengthNormModifier { * @param field the field whose norms should be reset */ public void reSetNorms(String field) throws IOException { - String fieldName = field.intern(); + String fieldName = StringHelper.intern(field); int[] termCounts = new int[0]; IndexReader reader = null; diff --git a/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java b/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java index eb87a5168fa..58396df7342 100755 --- a/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java +++ b/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java @@ -23,6 +23,7 @@ import java.util.Iterator; import org.apache.lucene.index.TermEnum; import org.apache.lucene.index.Term; +import org.apache.lucene.util.StringHelper; import java.io.*; @@ -42,7 +43,7 @@ public class LuceneDictionary implements Dictionary { public LuceneDictionary(IndexReader reader, String field) { this.reader = reader; - this.field = field.intern(); + this.field = StringHelper.intern(field); } public final Iterator getWordsIterator() { diff --git a/src/java/org/apache/lucene/document/AbstractField.java b/src/java/org/apache/lucene/document/AbstractField.java index c92be7cb4fb..09309e6b3e4 100755 --- a/src/java/org/apache/lucene/document/AbstractField.java +++ b/src/java/org/apache/lucene/document/AbstractField.java @@ -17,7 +17,8 @@ package org.apache.lucene.document; import org.apache.lucene.search.PhraseQuery; // for javadocs import org.apache.lucene.search.spans.SpanQuery; -import org.apache.lucene.analysis.TokenStream; // for javadocs +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.util.StringHelper; // for javadocs /** @@ -54,7 +55,7 @@ public abstract class AbstractField implements Fieldable { protected AbstractField(String name, Field.Store store, Field.Index index, Field.TermVector termVector) { if (name == null) throw new NullPointerException("name cannot be null"); - this.name = name.intern(); // field names are interned + this.name = StringHelper.intern(name); // field names are interned if (store == Field.Store.YES){ this.isStored = true; diff --git a/src/java/org/apache/lucene/document/Field.java b/src/java/org/apache/lucene/document/Field.java index 487b9a17503..062e50b91b3 100644 --- a/src/java/org/apache/lucene/document/Field.java +++ b/src/java/org/apache/lucene/document/Field.java @@ -20,6 +20,7 @@ package org.apache.lucene.document; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.index.IndexWriter; // for javadoc import org.apache.lucene.util.Parameter; +import org.apache.lucene.util.StringHelper; import java.io.Reader; import java.io.Serializable; @@ -337,7 +338,7 @@ public final class Field extends AbstractField implements Fieldable, Serializabl + "for a field that is not indexed"); if (internName) // field names are optionally interned - name = name.intern(); + name = StringHelper.intern(name); this.name = name; @@ -417,7 +418,7 @@ public final class Field extends AbstractField implements Fieldable, Serializabl if (reader == null) throw new NullPointerException("reader cannot be null"); - this.name = name.intern(); // field names are interned + this.name = StringHelper.intern(name); // field names are interned this.fieldsData = reader; this.isStored = false; @@ -464,7 +465,7 @@ public final class Field extends AbstractField implements Fieldable, Serializabl if (tokenStream == null) throw new NullPointerException("tokenStream cannot be null"); - this.name = name.intern(); // field names are interned + this.name = StringHelper.intern(name); // field names are interned this.fieldsData = null; this.tokenStream = tokenStream; @@ -509,7 +510,7 @@ public final class Field extends AbstractField implements Fieldable, Serializabl if (value == null) throw new IllegalArgumentException("value cannot be null"); - this.name = name.intern(); + this.name = StringHelper.intern(name); // field names are interned fieldsData = value; if (store == Store.YES) { diff --git a/src/java/org/apache/lucene/index/FieldInfos.java b/src/java/org/apache/lucene/index/FieldInfos.java index 443cb860329..b8fcf10a372 100644 --- a/src/java/org/apache/lucene/index/FieldInfos.java +++ b/src/java/org/apache/lucene/index/FieldInfos.java @@ -22,6 +22,7 @@ import org.apache.lucene.document.Fieldable; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.StringHelper; import java.io.IOException; import java.util.*; @@ -248,7 +249,7 @@ final class FieldInfos { private FieldInfo addInternal(String name, boolean isIndexed, boolean storeTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) { - name = name.intern(); + name = StringHelper.intern(name); FieldInfo fi = new FieldInfo(name, isIndexed, byNumber.size(), storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions); byNumber.add(fi); @@ -352,7 +353,7 @@ final class FieldInfos { } for (int i = 0; i < size; i++) { - String name = input.readString().intern(); + String name = StringHelper.intern(input.readString()); byte bits = input.readByte(); boolean isIndexed = (bits & IS_INDEXED) != 0; boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0; diff --git a/src/java/org/apache/lucene/index/FieldsReader.java b/src/java/org/apache/lucene/index/FieldsReader.java index 7b5ecece9c0..61c4a830c5b 100644 --- a/src/java/org/apache/lucene/index/FieldsReader.java +++ b/src/java/org/apache/lucene/index/FieldsReader.java @@ -24,6 +24,7 @@ import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.BufferedIndexInput; import org.apache.lucene.util.CloseableThreadLocal; +import org.apache.lucene.util.StringHelper; import java.io.IOException; import java.io.Reader; @@ -643,7 +644,7 @@ final class FieldsReader implements Cloneable { this.isTokenized = tokenize; - this.name = fi.name.intern(); + this.name = StringHelper.intern(fi.name); this.isIndexed = fi.isIndexed; this.omitNorms = fi.omitNorms; this.omitTermFreqAndPositions = fi.omitTermFreqAndPositions; diff --git a/src/java/org/apache/lucene/index/Term.java b/src/java/org/apache/lucene/index/Term.java index 3a6cce85d3e..4551b66174a 100644 --- a/src/java/org/apache/lucene/index/Term.java +++ b/src/java/org/apache/lucene/index/Term.java @@ -1,5 +1,7 @@ package org.apache.lucene.index; +import org.apache.lucene.util.StringHelper; + /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -33,7 +35,8 @@ public final class Term implements Comparable, java.io.Serializable { *

Note that a null field or null text value results in undefined * behavior for most Lucene APIs that accept a Term parameter. */ public Term(String fld, String txt) { - this(fld, txt, true); + field = StringHelper.intern(fld); + text = txt; } /** Constructs a Term with the given field and empty text. @@ -47,8 +50,8 @@ public final class Term implements Comparable, java.io.Serializable { } Term(String fld, String txt, boolean intern) { - field = intern ? fld.intern() : fld; // field names are interned - text = txt; // unless already known to be + field = intern ? StringHelper.intern(fld) : fld; // field names are interned + text = txt; // unless already known to be } /** Returns the field of this term, an interned string. The field indicates @@ -130,6 +133,6 @@ public final class Term implements Comparable, java.io.Serializable { throws java.io.IOException, ClassNotFoundException { in.defaultReadObject(); - field = field.intern(); + field = StringHelper.intern(field); } } diff --git a/src/java/org/apache/lucene/search/FieldCacheImpl.java b/src/java/org/apache/lucene/search/FieldCacheImpl.java index 1e8140451ad..8b1e1da4e3b 100644 --- a/src/java/org/apache/lucene/search/FieldCacheImpl.java +++ b/src/java/org/apache/lucene/search/FieldCacheImpl.java @@ -21,6 +21,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermDocs; import org.apache.lucene.index.TermEnum; +import org.apache.lucene.util.StringHelper; import java.io.IOException; import java.util.HashMap; @@ -103,7 +104,7 @@ class FieldCacheImpl implements ExtendedFieldCache { /** Creates one of these objects. */ Entry (String field, int type, Locale locale) { - this.field = field.intern(); + this.field = StringHelper.intern(field); this.type = type; this.custom = null; this.locale = locale; @@ -111,7 +112,7 @@ class FieldCacheImpl implements ExtendedFieldCache { /** Creates one of these objects for a custom comparator/parser. */ Entry (String field, Object custom) { - this.field = field.intern(); + this.field = StringHelper.intern(field); this.type = SortField.CUSTOM; this.custom = custom; this.locale = null; @@ -119,7 +120,7 @@ class FieldCacheImpl implements ExtendedFieldCache { /** Creates one of these objects for a custom type with parser, needed by FieldSortedHitQueue. */ Entry (String field, int type, Parser parser) { - this.field = field.intern(); + this.field = StringHelper.intern(field); this.type = type; this.custom = parser; this.locale = null; @@ -463,7 +464,7 @@ class FieldCacheImpl implements ExtendedFieldCache { protected Object createValue(IndexReader reader, Object fieldKey) throws IOException { - String field = ((String) fieldKey).intern(); + String field = StringHelper.intern((String) fieldKey); final String[] retArray = new String[reader.maxDoc()]; TermDocs termDocs = reader.termDocs(); TermEnum termEnum = reader.terms (new Term (field)); @@ -495,7 +496,7 @@ class FieldCacheImpl implements ExtendedFieldCache { protected Object createValue(IndexReader reader, Object fieldKey) throws IOException { - String field = ((String) fieldKey).intern(); + String field = StringHelper.intern((String) fieldKey); final int[] retArray = new int[reader.maxDoc()]; String[] mterms = new String[reader.maxDoc()+1]; TermDocs termDocs = reader.termDocs(); @@ -574,7 +575,7 @@ class FieldCacheImpl implements ExtendedFieldCache { protected Object createValue(IndexReader reader, Object fieldKey) throws IOException { - String field = ((String)fieldKey).intern(); + String field = StringHelper.intern((String) fieldKey); TermEnum enumerator = reader.terms (new Term (field)); try { Term term = enumerator.term(); diff --git a/src/java/org/apache/lucene/search/NumericRangeQuery.java b/src/java/org/apache/lucene/search/NumericRangeQuery.java index d2f94ec207a..4493fc870cc 100644 --- a/src/java/org/apache/lucene/search/NumericRangeQuery.java +++ b/src/java/org/apache/lucene/search/NumericRangeQuery.java @@ -24,6 +24,7 @@ import org.apache.lucene.analysis.NumericTokenStream; // for javadocs import org.apache.lucene.document.NumericField; // for javadocs import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.ToStringUtils; +import org.apache.lucene.util.StringHelper; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; @@ -151,7 +152,7 @@ public final class NumericRangeQuery extends MultiTermQuery { assert (valSize == 32 || valSize == 64); if (precisionStep < 1) throw new IllegalArgumentException("precisionStep must be >=1"); - this.field = field.intern(); + this.field = StringHelper.intern(field); this.precisionStep = precisionStep; this.valSize = valSize; this.min = min; diff --git a/src/java/org/apache/lucene/search/SortField.java b/src/java/org/apache/lucene/search/SortField.java index 356bf1f861f..7a2ef1f3aaa 100644 --- a/src/java/org/apache/lucene/search/SortField.java +++ b/src/java/org/apache/lucene/search/SortField.java @@ -25,6 +25,7 @@ import org.apache.lucene.document.NumericField; // javadocs import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermEnum; +import org.apache.lucene.util.StringHelper; /** * Stores information about how to sort documents by terms in an individual @@ -272,7 +273,7 @@ implements Serializable { if (type != SCORE && type != DOC) throw new IllegalArgumentException("field can only be null when type is SCORE or DOC"); } else { - this.field = field.intern(); + this.field = StringHelper.intern(field); } } @@ -510,7 +511,7 @@ implements Serializable { * @deprecated */ static int detectFieldType(IndexReader reader, String fieldKey) throws IOException { - String field = fieldKey.intern(); + String field = StringHelper.intern(fieldKey); TermEnum enumerator = reader.terms(new Term(field)); try { Term term = enumerator.term(); diff --git a/src/java/org/apache/lucene/search/TermRangeTermEnum.java b/src/java/org/apache/lucene/search/TermRangeTermEnum.java index 9a3977627cb..2acc07a00d4 100644 --- a/src/java/org/apache/lucene/search/TermRangeTermEnum.java +++ b/src/java/org/apache/lucene/search/TermRangeTermEnum.java @@ -22,6 +22,7 @@ import java.text.Collator; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.util.StringHelper; /** * Subclass of FilteredTermEnum for enumerating all terms that match the @@ -75,7 +76,7 @@ public class TermRangeTermEnum extends FilteredTermEnum { this.lowerTermText = lowerTermText; this.includeLower = includeLower; this.includeUpper = includeUpper; - this.field = field.intern(); + this.field = StringHelper.intern(field); // do a little bit of normalization... // open ended range queries should always be inclusive. diff --git a/src/java/org/apache/lucene/util/SimpleStringInterner.java b/src/java/org/apache/lucene/util/SimpleStringInterner.java new file mode 100755 index 00000000000..29200824d11 --- /dev/null +++ b/src/java/org/apache/lucene/util/SimpleStringInterner.java @@ -0,0 +1,82 @@ +package org.apache.lucene.util; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/** + * Simple lockless and memory barrier free String intern cache that is guaranteed + * to return the same String instance as String.intern() does. + */ +public class SimpleStringInterner extends StringInterner { + + private static class Entry { + final private String str; + final private int hash; + private Entry next; + private Entry(String str, int hash, Entry next) { + this.str = str; + this.hash = hash; + this.next = next; + } + } + + private final Entry[] cache; + private final int maxChainLength; + + /** + * @param tableSize Size of the hash table, should be a power of two. + * @param maxChainLength Maximum length of each bucket, after which the oldest item inserted is dropped. + */ + public SimpleStringInterner(int tableSize, int maxChainLength) { + cache = new Entry[Math.max(1,BitUtil.nextHighestPowerOfTwo(tableSize))]; + this.maxChainLength = Math.max(2,maxChainLength); + } + + // @Override + public String intern(String s) { + int h = s.hashCode(); + // In the future, it may be worth augmenting the string hash + // if the lower bits need better distribution. + int slot = h & (cache.length-1); + + Entry first = this.cache[slot]; + Entry nextToLast = null; + + int chainLength = 0; + + for(Entry e=first; e!=null; e=e.next) { + if (e.hash == h && (e.str == s || e.str.compareTo(s)==0)) { + // if (e.str == s || (e.hash == h && e.str.compareTo(s)==0)) { + return e.str; + } + + chainLength++; + if (e.next != null) { + nextToLast = e; + } + } + + // insertion-order cache: add new entry at head + s = s.intern(); + this.cache[slot] = new Entry(s, h, first); + if (chainLength >= maxChainLength) { + // prune last entry + nextToLast.next = null; + } + return s; + } +} \ No newline at end of file diff --git a/src/java/org/apache/lucene/util/StringHelper.java b/src/java/org/apache/lucene/util/StringHelper.java index 7ffdd92aacb..1ee8e3caf38 100644 --- a/src/java/org/apache/lucene/util/StringHelper.java +++ b/src/java/org/apache/lucene/util/StringHelper.java @@ -24,6 +24,17 @@ package org.apache.lucene.util; * $Id$ */ public abstract class StringHelper { + /** + * Expert: + * The StringInterner implementation used by Lucene. + * This shouldn't be changed to an incompatible implementation after other Lucene APIs have been used. + */ + public static StringInterner interner = new SimpleStringInterner(1024,8); + + /** Return the same string object for all equal strings */ + public static String intern(String s) { + return interner.intern(s); + } /** * Compares two byte[] arrays, element by element, and returns the diff --git a/src/java/org/apache/lucene/util/StringInterner.java b/src/java/org/apache/lucene/util/StringInterner.java new file mode 100755 index 00000000000..feffbff9330 --- /dev/null +++ b/src/java/org/apache/lucene/util/StringInterner.java @@ -0,0 +1,37 @@ +package org.apache.lucene.util; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Subclasses of StringInterner are required to + * return the same single String object for all equal strings. + * Depending on the implementation, this may not be + * the same object returned as String.intern(). + * + * This StringInterner base class simply delegates to String.intern(). + */ +public class StringInterner { + /** Returns a single object instance for each equal string. */ + public String intern(String s) { + return s.intern(); + } + + /** Returns a single object instance for each equal string. */ + public String intern(char[] arr, int offset, int len) { + return intern(new String(arr, offset, len)); + } +} diff --git a/src/test/org/apache/lucene/index/TestStressIndexing2.java b/src/test/org/apache/lucene/index/TestStressIndexing2.java index 9239cc551d5..719bc430184 100644 --- a/src/test/org/apache/lucene/index/TestStressIndexing2.java +++ b/src/test/org/apache/lucene/index/TestStressIndexing2.java @@ -20,6 +20,7 @@ import org.apache.lucene.analysis.*; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; +import org.apache.lucene.util.StringHelper; import org.apache.lucene.search.TermQuery; import java.util.*; @@ -266,7 +267,7 @@ public class TestStressIndexing2 extends LuceneTestCase { TermDocs termDocs2 = r2.termDocs(); // create mapping from id2 space to id2 based on idField - idField = idField.intern(); + idField = StringHelper.intern(idField); TermEnum termEnum = r1.terms (new Term (idField, "")); do { Term term = termEnum.term(); diff --git a/src/test/org/apache/lucene/search/SampleComparable.java b/src/test/org/apache/lucene/search/SampleComparable.java index 3c5d636a66b..ed7440badea 100644 --- a/src/test/org/apache/lucene/search/SampleComparable.java +++ b/src/test/org/apache/lucene/search/SampleComparable.java @@ -21,6 +21,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermDocs; import org.apache.lucene.index.TermEnum; +import org.apache.lucene.util.StringHelper; import java.io.IOException; import java.io.Serializable; @@ -69,7 +70,7 @@ implements Comparable, Serializable { return new SortComparatorSource () { public ScoreDocComparator newComparator (final IndexReader reader, String fieldname) throws IOException { - final String field = fieldname.intern (); + final String field = StringHelper.intern(fieldname); final TermEnum enumerator = reader.terms (new Term (fieldname, "")); try { return new ScoreDocComparator () { @@ -104,7 +105,7 @@ implements Comparable, Serializable { */ protected Comparable[] fillCache (IndexReader reader, TermEnum enumerator, String fieldname) throws IOException { - final String field = fieldname.intern (); + final String field = StringHelper.intern(fieldname); Comparable[] retArray = new Comparable[reader.maxDoc ()]; if (retArray.length > 0) { TermDocs termDocs = reader.termDocs (); diff --git a/src/test/org/apache/lucene/util/TestStringIntern.java b/src/test/org/apache/lucene/util/TestStringIntern.java new file mode 100755 index 00000000000..521635ecb5e --- /dev/null +++ b/src/test/org/apache/lucene/util/TestStringIntern.java @@ -0,0 +1,93 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.util; + +import junit.framework.TestCase; + +import java.util.Random; + +public class TestStringIntern extends LuceneTestCase { + String[] testStrings; + String[] internedStrings; + Random r = newRandom(); + + private String randStr(int len) { + char[] arr = new char[len]; + for (int i=0; i