mirror of https://github.com/apache/lucene.git
LUCENE-2943: fix thread-safety issues with ICU collation
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1075850 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e71e682352
commit
2509d35c11
|
@ -148,6 +148,9 @@ Bug fixes
|
||||||
|
|
||||||
* LUCENE-2874: Highlighting overlapping tokens outputted doubled words.
|
* LUCENE-2874: Highlighting overlapping tokens outputted doubled words.
|
||||||
(Pierre Gossé via Robert Muir)
|
(Pierre Gossé via Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-2943: Fix thread-safety issues with ICUCollationKeyFilter.
|
||||||
|
(Robert Muir)
|
||||||
|
|
||||||
API Changes
|
API Changes
|
||||||
|
|
||||||
|
|
|
@ -85,7 +85,9 @@ public final class CollationKeyFilter extends TokenFilter {
|
||||||
*/
|
*/
|
||||||
public CollationKeyFilter(TokenStream input, Collator collator) {
|
public CollationKeyFilter(TokenStream input, Collator collator) {
|
||||||
super(input);
|
super(input);
|
||||||
this.collator = collator;
|
// clone in case JRE doesnt properly sync,
|
||||||
|
// or to reduce contention in case they do
|
||||||
|
this.collator = (Collator) collator.clone();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -34,7 +34,9 @@ public class CollatedTermAttributeImpl extends CharTermAttributeImpl {
|
||||||
* @param collator Collation key generator
|
* @param collator Collation key generator
|
||||||
*/
|
*/
|
||||||
public CollatedTermAttributeImpl(Collator collator) {
|
public CollatedTermAttributeImpl(Collator collator) {
|
||||||
this.collator = collator;
|
// clone in case JRE doesnt properly sync,
|
||||||
|
// or to reduce contention in case they do
|
||||||
|
this.collator = (Collator) collator.clone();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -21,6 +21,8 @@ package org.apache.lucene.collation;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||||
import org.apache.lucene.store.RAMDirectory;
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
|
@ -39,9 +41,12 @@ import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.IndexableBinaryStringTools;
|
import org.apache.lucene.util.IndexableBinaryStringTools;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util._TestUtil;
|
||||||
|
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
public abstract class CollationTestBase extends LuceneTestCase {
|
public abstract class CollationTestBase extends LuceneTestCase {
|
||||||
|
|
||||||
|
@ -252,4 +257,77 @@ public abstract class CollationTestBase extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
assertEquals(expectedResult, buff.toString());
|
assertEquals(expectedResult, buff.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private String randomString() {
|
||||||
|
// ideally we could do this!
|
||||||
|
// return _TestUtil.randomUnicodeString(random);
|
||||||
|
//
|
||||||
|
// http://bugs.icu-project.org/trac/ticket/8060
|
||||||
|
// http://bugs.icu-project.org/trac/ticket/7732
|
||||||
|
// ...
|
||||||
|
//
|
||||||
|
// as a workaround, just test the BMP for now (and avoid 0xFFFF etc)
|
||||||
|
int length = _TestUtil.nextInt(random, 0, 10);
|
||||||
|
char chars[] = new char[length];
|
||||||
|
for (int i = 0; i < length; i++) {
|
||||||
|
if (random.nextBoolean()) {
|
||||||
|
chars[i] = (char) _TestUtil.nextInt(random, 0, 0xD7FF);
|
||||||
|
} else {
|
||||||
|
chars[i] = (char) _TestUtil.nextInt(random, 0xE000, 0xFFFD);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new String(chars, 0, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void assertThreadSafe(final Analyzer analyzer) throws Exception {
|
||||||
|
int numTestPoints = 1000 * RANDOM_MULTIPLIER;
|
||||||
|
int numThreads = _TestUtil.nextInt(random, 4, 8);
|
||||||
|
final HashMap<String,BytesRef> map = new HashMap<String,BytesRef>();
|
||||||
|
BytesRef spare = new BytesRef();
|
||||||
|
|
||||||
|
// create a map<String,SortKey> up front.
|
||||||
|
// then with multiple threads, generate sort keys for all the keys in the map
|
||||||
|
// and ensure they are the same as the ones we produced in serial fashion.
|
||||||
|
|
||||||
|
for (int i = 0; i < numTestPoints; i++) {
|
||||||
|
String term = randomString();
|
||||||
|
TokenStream ts = analyzer.reusableTokenStream("fake", new StringReader(term));
|
||||||
|
TermToBytesRefAttribute bytes = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||||
|
ts.reset();
|
||||||
|
assertTrue(ts.incrementToken());
|
||||||
|
bytes.toBytesRef(spare);
|
||||||
|
// ensure we make a copy of the actual bytes too
|
||||||
|
map.put(term, new BytesRef(spare));
|
||||||
|
}
|
||||||
|
|
||||||
|
Thread threads[] = new Thread[numThreads];
|
||||||
|
for (int i = 0; i < numThreads; i++) {
|
||||||
|
threads[i] = new Thread() {
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
try {
|
||||||
|
BytesRef spare = new BytesRef();
|
||||||
|
for (Map.Entry<String,BytesRef> mapping : map.entrySet()) {
|
||||||
|
String term = mapping.getKey();
|
||||||
|
BytesRef expected = mapping.getValue();
|
||||||
|
TokenStream ts = analyzer.reusableTokenStream("fake", new StringReader(term));
|
||||||
|
TermToBytesRefAttribute bytes = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||||
|
ts.reset();
|
||||||
|
assertTrue(ts.incrementToken());
|
||||||
|
bytes.toBytesRef(spare);
|
||||||
|
assertEquals(expected, spare);
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
for (int i = 0; i < numThreads; i++) {
|
||||||
|
threads[i].start();
|
||||||
|
}
|
||||||
|
for (int i = 0; i < numThreads; i++) {
|
||||||
|
threads[i].join();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -83,4 +83,14 @@ public class TestCollationKeyAnalyzer extends CollationTestBase {
|
||||||
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer,
|
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer,
|
||||||
oStrokeFirst ? "BFJHD" : "BFJDH", "EACGI", "BJDFH", "BJDHF");
|
oStrokeFirst ? "BFJHD" : "BFJDH", "EACGI", "BJDFH", "BJDHF");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testThreadSafe() throws Exception {
|
||||||
|
int iters = 20 * RANDOM_MULTIPLIER;
|
||||||
|
for (int i = 0; i < iters; i++) {
|
||||||
|
Locale locale = randomLocale(random);
|
||||||
|
Collator collator = Collator.getInstance(locale);
|
||||||
|
collator.setStrength(Collator.PRIMARY);
|
||||||
|
assertThreadSafe(new CollationKeyAnalyzer(TEST_VERSION_CURRENT, collator));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -84,7 +84,12 @@ public final class ICUCollationKeyFilter extends TokenFilter {
|
||||||
*/
|
*/
|
||||||
public ICUCollationKeyFilter(TokenStream input, Collator collator) {
|
public ICUCollationKeyFilter(TokenStream input, Collator collator) {
|
||||||
super(input);
|
super(input);
|
||||||
this.collator = collator;
|
// clone the collator: see http://userguide.icu-project.org/collation/architecture
|
||||||
|
try {
|
||||||
|
this.collator = (Collator) collator.clone();
|
||||||
|
} catch (CloneNotSupportedException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -36,7 +36,12 @@ public class ICUCollatedTermAttributeImpl extends CharTermAttributeImpl {
|
||||||
* @param collator Collation key generator
|
* @param collator Collation key generator
|
||||||
*/
|
*/
|
||||||
public ICUCollatedTermAttributeImpl(Collator collator) {
|
public ICUCollatedTermAttributeImpl(Collator collator) {
|
||||||
this.collator = collator;
|
// clone the collator: see http://userguide.icu-project.org/collation/architecture
|
||||||
|
try {
|
||||||
|
this.collator = (Collator) collator.clone();
|
||||||
|
} catch (CloneNotSupportedException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -84,4 +84,14 @@ public class TestICUCollationKeyAnalyzer extends CollationTestBase {
|
||||||
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer,
|
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer,
|
||||||
"BFJHD", "ECAGI", "BJDFH", "BJDHF");
|
"BFJHD", "ECAGI", "BJDFH", "BJDHF");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testThreadSafe() throws Exception {
|
||||||
|
int iters = 20 * RANDOM_MULTIPLIER;
|
||||||
|
for (int i = 0; i < iters; i++) {
|
||||||
|
Locale locale = randomLocale(random);
|
||||||
|
Collator collator = Collator.getInstance(locale);
|
||||||
|
collator.setStrength(Collator.IDENTICAL);
|
||||||
|
assertThreadSafe(new ICUCollationKeyAnalyzer(TEST_VERSION_CURRENT, collator));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue