mirror of https://github.com/apache/lucene.git
LUCENE-2943: fix thread-safety issues with ICU collation
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1075850 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e71e682352
commit
2509d35c11
|
@ -149,6 +149,9 @@ Bug fixes
|
|||
* LUCENE-2874: Highlighting overlapping tokens outputted doubled words.
|
||||
(Pierre Gossé via Robert Muir)
|
||||
|
||||
* LUCENE-2943: Fix thread-safety issues with ICUCollationKeyFilter.
|
||||
(Robert Muir)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-2867: Some contrib queryparser methods that receives CharSequence as
|
||||
|
|
|
@ -85,7 +85,9 @@ public final class CollationKeyFilter extends TokenFilter {
|
|||
*/
|
||||
public CollationKeyFilter(TokenStream input, Collator collator) {
|
||||
super(input);
|
||||
this.collator = collator;
|
||||
// clone in case JRE doesnt properly sync,
|
||||
// or to reduce contention in case they do
|
||||
this.collator = (Collator) collator.clone();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -34,7 +34,9 @@ public class CollatedTermAttributeImpl extends CharTermAttributeImpl {
|
|||
* @param collator Collation key generator
|
||||
*/
|
||||
public CollatedTermAttributeImpl(Collator collator) {
|
||||
this.collator = collator;
|
||||
// clone in case JRE doesnt properly sync,
|
||||
// or to reduce contention in case they do
|
||||
this.collator = (Collator) collator.clone();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -21,6 +21,8 @@ package org.apache.lucene.collation;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
|
@ -39,9 +41,12 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IndexableBinaryStringTools;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
import java.io.StringReader;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
public abstract class CollationTestBase extends LuceneTestCase {
|
||||
|
||||
|
@ -252,4 +257,77 @@ public abstract class CollationTestBase extends LuceneTestCase {
|
|||
}
|
||||
assertEquals(expectedResult, buff.toString());
|
||||
}
|
||||
|
||||
private String randomString() {
|
||||
// ideally we could do this!
|
||||
// return _TestUtil.randomUnicodeString(random);
|
||||
//
|
||||
// http://bugs.icu-project.org/trac/ticket/8060
|
||||
// http://bugs.icu-project.org/trac/ticket/7732
|
||||
// ...
|
||||
//
|
||||
// as a workaround, just test the BMP for now (and avoid 0xFFFF etc)
|
||||
int length = _TestUtil.nextInt(random, 0, 10);
|
||||
char chars[] = new char[length];
|
||||
for (int i = 0; i < length; i++) {
|
||||
if (random.nextBoolean()) {
|
||||
chars[i] = (char) _TestUtil.nextInt(random, 0, 0xD7FF);
|
||||
} else {
|
||||
chars[i] = (char) _TestUtil.nextInt(random, 0xE000, 0xFFFD);
|
||||
}
|
||||
}
|
||||
return new String(chars, 0, length);
|
||||
}
|
||||
|
||||
public void assertThreadSafe(final Analyzer analyzer) throws Exception {
|
||||
int numTestPoints = 1000 * RANDOM_MULTIPLIER;
|
||||
int numThreads = _TestUtil.nextInt(random, 4, 8);
|
||||
final HashMap<String,BytesRef> map = new HashMap<String,BytesRef>();
|
||||
BytesRef spare = new BytesRef();
|
||||
|
||||
// create a map<String,SortKey> up front.
|
||||
// then with multiple threads, generate sort keys for all the keys in the map
|
||||
// and ensure they are the same as the ones we produced in serial fashion.
|
||||
|
||||
for (int i = 0; i < numTestPoints; i++) {
|
||||
String term = randomString();
|
||||
TokenStream ts = analyzer.reusableTokenStream("fake", new StringReader(term));
|
||||
TermToBytesRefAttribute bytes = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||
ts.reset();
|
||||
assertTrue(ts.incrementToken());
|
||||
bytes.toBytesRef(spare);
|
||||
// ensure we make a copy of the actual bytes too
|
||||
map.put(term, new BytesRef(spare));
|
||||
}
|
||||
|
||||
Thread threads[] = new Thread[numThreads];
|
||||
for (int i = 0; i < numThreads; i++) {
|
||||
threads[i] = new Thread() {
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
BytesRef spare = new BytesRef();
|
||||
for (Map.Entry<String,BytesRef> mapping : map.entrySet()) {
|
||||
String term = mapping.getKey();
|
||||
BytesRef expected = mapping.getValue();
|
||||
TokenStream ts = analyzer.reusableTokenStream("fake", new StringReader(term));
|
||||
TermToBytesRefAttribute bytes = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||
ts.reset();
|
||||
assertTrue(ts.incrementToken());
|
||||
bytes.toBytesRef(spare);
|
||||
assertEquals(expected, spare);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
for (int i = 0; i < numThreads; i++) {
|
||||
threads[i].start();
|
||||
}
|
||||
for (int i = 0; i < numThreads; i++) {
|
||||
threads[i].join();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -83,4 +83,14 @@ public class TestCollationKeyAnalyzer extends CollationTestBase {
|
|||
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer,
|
||||
oStrokeFirst ? "BFJHD" : "BFJDH", "EACGI", "BJDFH", "BJDHF");
|
||||
}
|
||||
|
||||
public void testThreadSafe() throws Exception {
|
||||
int iters = 20 * RANDOM_MULTIPLIER;
|
||||
for (int i = 0; i < iters; i++) {
|
||||
Locale locale = randomLocale(random);
|
||||
Collator collator = Collator.getInstance(locale);
|
||||
collator.setStrength(Collator.PRIMARY);
|
||||
assertThreadSafe(new CollationKeyAnalyzer(TEST_VERSION_CURRENT, collator));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -84,7 +84,12 @@ public final class ICUCollationKeyFilter extends TokenFilter {
|
|||
*/
|
||||
public ICUCollationKeyFilter(TokenStream input, Collator collator) {
|
||||
super(input);
|
||||
this.collator = collator;
|
||||
// clone the collator: see http://userguide.icu-project.org/collation/architecture
|
||||
try {
|
||||
this.collator = (Collator) collator.clone();
|
||||
} catch (CloneNotSupportedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -36,7 +36,12 @@ public class ICUCollatedTermAttributeImpl extends CharTermAttributeImpl {
|
|||
* @param collator Collation key generator
|
||||
*/
|
||||
public ICUCollatedTermAttributeImpl(Collator collator) {
|
||||
this.collator = collator;
|
||||
// clone the collator: see http://userguide.icu-project.org/collation/architecture
|
||||
try {
|
||||
this.collator = (Collator) collator.clone();
|
||||
} catch (CloneNotSupportedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -84,4 +84,14 @@ public class TestICUCollationKeyAnalyzer extends CollationTestBase {
|
|||
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer,
|
||||
"BFJHD", "ECAGI", "BJDFH", "BJDHF");
|
||||
}
|
||||
|
||||
public void testThreadSafe() throws Exception {
|
||||
int iters = 20 * RANDOM_MULTIPLIER;
|
||||
for (int i = 0; i < iters; i++) {
|
||||
Locale locale = randomLocale(random);
|
||||
Collator collator = Collator.getInstance(locale);
|
||||
collator.setStrength(Collator.IDENTICAL);
|
||||
assertThreadSafe(new ICUCollationKeyAnalyzer(TEST_VERSION_CURRENT, collator));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue