LUCENE-2943: fix thread-safety issues with ICU collation

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1075850 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-03-01 15:47:14 +00:00
parent e71e682352
commit 2509d35c11
8 changed files with 119 additions and 4 deletions

View File

@ -149,6 +149,9 @@ Bug fixes
* LUCENE-2874: Highlighting overlapping tokens outputted doubled words. * LUCENE-2874: Highlighting overlapping tokens outputted doubled words.
(Pierre Gossé via Robert Muir) (Pierre Gossé via Robert Muir)
* LUCENE-2943: Fix thread-safety issues with ICUCollationKeyFilter.
(Robert Muir)
API Changes API Changes
* LUCENE-2867: Some contrib queryparser methods that receives CharSequence as * LUCENE-2867: Some contrib queryparser methods that receives CharSequence as

View File

@ -85,7 +85,9 @@ public final class CollationKeyFilter extends TokenFilter {
*/ */
public CollationKeyFilter(TokenStream input, Collator collator) { public CollationKeyFilter(TokenStream input, Collator collator) {
super(input); super(input);
this.collator = collator; // clone in case JRE doesnt properly sync,
// or to reduce contention in case they do
this.collator = (Collator) collator.clone();
} }
@Override @Override

View File

@ -34,7 +34,9 @@ public class CollatedTermAttributeImpl extends CharTermAttributeImpl {
* @param collator Collation key generator * @param collator Collation key generator
*/ */
public CollatedTermAttributeImpl(Collator collator) { public CollatedTermAttributeImpl(Collator collator) {
this.collator = collator; // clone in case JRE doesnt properly sync,
// or to reduce contention in case they do
this.collator = (Collator) collator.clone();
} }
@Override @Override

View File

@ -21,6 +21,8 @@ package org.apache.lucene.collation;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
@ -39,9 +41,12 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IndexableBinaryStringTools; import org.apache.lucene.util.IndexableBinaryStringTools;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
import java.io.StringReader; import java.io.StringReader;
import java.io.IOException; import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
public abstract class CollationTestBase extends LuceneTestCase { public abstract class CollationTestBase extends LuceneTestCase {
@ -252,4 +257,77 @@ public abstract class CollationTestBase extends LuceneTestCase {
} }
assertEquals(expectedResult, buff.toString()); assertEquals(expectedResult, buff.toString());
} }
private String randomString() {
// ideally we could do this!
// return _TestUtil.randomUnicodeString(random);
//
// http://bugs.icu-project.org/trac/ticket/8060
// http://bugs.icu-project.org/trac/ticket/7732
// ...
//
// as a workaround, just test the BMP for now (and avoid 0xFFFF etc)
int length = _TestUtil.nextInt(random, 0, 10);
char chars[] = new char[length];
for (int i = 0; i < length; i++) {
if (random.nextBoolean()) {
chars[i] = (char) _TestUtil.nextInt(random, 0, 0xD7FF);
} else {
chars[i] = (char) _TestUtil.nextInt(random, 0xE000, 0xFFFD);
}
}
return new String(chars, 0, length);
}
public void assertThreadSafe(final Analyzer analyzer) throws Exception {
int numTestPoints = 1000 * RANDOM_MULTIPLIER;
int numThreads = _TestUtil.nextInt(random, 4, 8);
final HashMap<String,BytesRef> map = new HashMap<String,BytesRef>();
BytesRef spare = new BytesRef();
// create a map<String,SortKey> up front.
// then with multiple threads, generate sort keys for all the keys in the map
// and ensure they are the same as the ones we produced in serial fashion.
for (int i = 0; i < numTestPoints; i++) {
String term = randomString();
TokenStream ts = analyzer.reusableTokenStream("fake", new StringReader(term));
TermToBytesRefAttribute bytes = ts.addAttribute(TermToBytesRefAttribute.class);
ts.reset();
assertTrue(ts.incrementToken());
bytes.toBytesRef(spare);
// ensure we make a copy of the actual bytes too
map.put(term, new BytesRef(spare));
}
Thread threads[] = new Thread[numThreads];
for (int i = 0; i < numThreads; i++) {
threads[i] = new Thread() {
@Override
public void run() {
try {
BytesRef spare = new BytesRef();
for (Map.Entry<String,BytesRef> mapping : map.entrySet()) {
String term = mapping.getKey();
BytesRef expected = mapping.getValue();
TokenStream ts = analyzer.reusableTokenStream("fake", new StringReader(term));
TermToBytesRefAttribute bytes = ts.addAttribute(TermToBytesRefAttribute.class);
ts.reset();
assertTrue(ts.incrementToken());
bytes.toBytesRef(spare);
assertEquals(expected, spare);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
};
}
for (int i = 0; i < numThreads; i++) {
threads[i].start();
}
for (int i = 0; i < numThreads; i++) {
threads[i].join();
}
}
} }

View File

@ -83,4 +83,14 @@ public class TestCollationKeyAnalyzer extends CollationTestBase {
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer,
oStrokeFirst ? "BFJHD" : "BFJDH", "EACGI", "BJDFH", "BJDHF"); oStrokeFirst ? "BFJHD" : "BFJDH", "EACGI", "BJDFH", "BJDHF");
} }
public void testThreadSafe() throws Exception {
int iters = 20 * RANDOM_MULTIPLIER;
for (int i = 0; i < iters; i++) {
Locale locale = randomLocale(random);
Collator collator = Collator.getInstance(locale);
collator.setStrength(Collator.PRIMARY);
assertThreadSafe(new CollationKeyAnalyzer(TEST_VERSION_CURRENT, collator));
}
}
} }

View File

@ -84,7 +84,12 @@ public final class ICUCollationKeyFilter extends TokenFilter {
*/ */
public ICUCollationKeyFilter(TokenStream input, Collator collator) { public ICUCollationKeyFilter(TokenStream input, Collator collator) {
super(input); super(input);
this.collator = collator; // clone the collator: see http://userguide.icu-project.org/collation/architecture
try {
this.collator = (Collator) collator.clone();
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e);
}
} }
@Override @Override

View File

@ -36,7 +36,12 @@ public class ICUCollatedTermAttributeImpl extends CharTermAttributeImpl {
* @param collator Collation key generator * @param collator Collation key generator
*/ */
public ICUCollatedTermAttributeImpl(Collator collator) { public ICUCollatedTermAttributeImpl(Collator collator) {
this.collator = collator; // clone the collator: see http://userguide.icu-project.org/collation/architecture
try {
this.collator = (Collator) collator.clone();
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e);
}
} }
@Override @Override

View File

@ -84,4 +84,14 @@ public class TestICUCollationKeyAnalyzer extends CollationTestBase {
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer,
"BFJHD", "ECAGI", "BJDFH", "BJDHF"); "BFJHD", "ECAGI", "BJDFH", "BJDHF");
} }
public void testThreadSafe() throws Exception {
int iters = 20 * RANDOM_MULTIPLIER;
for (int i = 0; i < iters; i++) {
Locale locale = randomLocale(random);
Collator collator = Collator.getInstance(locale);
collator.setStrength(Collator.IDENTICAL);
assertThreadSafe(new ICUCollationKeyAnalyzer(TEST_VERSION_CURRENT, collator));
}
}
} }