mirror of https://github.com/apache/lucene.git
LUCENE-1450: make sure RangeQuery/Filter check all terms in the index when using a Collator
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@713332 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9d8c882385
commit
4646692022
|
@ -70,7 +70,8 @@ public class RangeTermEnum extends FilteredTermEnum {
|
|||
this.includeUpper = true;
|
||||
}
|
||||
|
||||
setEnum(reader.terms(new Term(this.field, this.lowerTermText)));
|
||||
String startTermText = collator == null ? this.lowerTermText : "";
|
||||
setEnum(reader.terms(new Term(this.field, startTermText)));
|
||||
}
|
||||
|
||||
public float difference() {
|
||||
|
|
|
@ -564,4 +564,43 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter {
|
|||
assertEquals("The index Term should be included.", 1, result.length);
|
||||
search.close();
|
||||
}
|
||||
|
||||
public void testDanish() throws Exception {
|
||||
|
||||
/* build an index */
|
||||
RAMDirectory danishIndex = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(danishIndex, new SimpleAnalyzer(), T,
|
||||
IndexWriter.MaxFieldLength.LIMITED);
|
||||
|
||||
// Danish collation orders the words below in the given order
|
||||
// (example taken from TestSort.testInternationalSort() ).
|
||||
String[] words = { "H\u00D8T", "H\u00C5T", "MAND" };
|
||||
for (int docnum = 0 ; docnum < words.length ; ++docnum) {
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("content", words[docnum],
|
||||
Field.Store.YES, Field.Index.UN_TOKENIZED));
|
||||
doc.add(new Field("body", "body",
|
||||
Field.Store.YES, Field.Index.UN_TOKENIZED));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
|
||||
IndexReader reader = IndexReader.open(danishIndex);
|
||||
IndexSearcher search = new IndexSearcher(reader);
|
||||
Query q = new TermQuery(new Term("body","body"));
|
||||
|
||||
Collator c = Collator.getInstance(new Locale("da", "dk"));
|
||||
|
||||
// Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
|
||||
// but Danish collation does.
|
||||
ScoreDoc[] result = search.search
|
||||
(csrq("content", "H\u00D8T", "MAND", F, F, c), null, 1000).scoreDocs;
|
||||
assertEquals("The index Term should be included.", 1, result.length);
|
||||
|
||||
result = search.search
|
||||
(csrq("content", "H\u00C5T", "MAND", F, F, c), null, 1000).scoreDocs;
|
||||
assertEquals("The index Term should not be included.", 0, result.length);
|
||||
search.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -376,4 +376,46 @@ public class TestRangeFilter extends BaseTestRangeFilter {
|
|||
assertEquals("The index Term should be included.", 1, result.length());
|
||||
search.close();
|
||||
}
|
||||
|
||||
public void testDanish() throws Exception {
|
||||
|
||||
/* build an index */
|
||||
RAMDirectory danishIndex = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter
|
||||
(danishIndex, new SimpleAnalyzer(), T,
|
||||
IndexWriter.MaxFieldLength.LIMITED);
|
||||
// Danish collation orders the words below in the given order
|
||||
// (example taken from TestSort.testInternationalSort() ).
|
||||
String[] words = { "H\u00D8T", "H\u00C5T", "MAND" };
|
||||
for (int docnum = 0 ; docnum < words.length ; ++docnum) {
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("content", words[docnum],
|
||||
Field.Store.YES, Field.Index.UN_TOKENIZED));
|
||||
doc.add(new Field("body", "body",
|
||||
Field.Store.YES, Field.Index.UN_TOKENIZED));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
|
||||
IndexReader reader = IndexReader.open(danishIndex);
|
||||
IndexSearcher search = new IndexSearcher(reader);
|
||||
Query q = new TermQuery(new Term("body","body"));
|
||||
|
||||
Collator collator = Collator.getInstance(new Locale("da", "dk"));
|
||||
Query query = new RangeQuery
|
||||
("content", "H\u00D8T", "MAND", false, false, collator);
|
||||
|
||||
// Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
|
||||
// but Danish collation does.
|
||||
Hits result = search.search
|
||||
(q, new RangeFilter("content", "H\u00D8T", "MAND", F, F, collator));
|
||||
assertEquals("The index Term should be included.", 1, result.length());
|
||||
|
||||
result = search.search
|
||||
(q, new RangeFilter("content", "H\u00C5T", "MAND", F, F, collator));
|
||||
assertEquals
|
||||
("The index Term should not be included.", 0, result.length());
|
||||
search.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -187,6 +187,26 @@ public class TestRangeQuery extends LuceneTestCase {
|
|||
assertEquals("The index Term should be included.", 1, hits.length);
|
||||
searcher.close();
|
||||
}
|
||||
|
||||
public void testDanish() throws Exception {
|
||||
Collator collator = Collator.getInstance(new Locale("da", "dk"));
|
||||
// Danish collation orders the words below in the given order (example taken
|
||||
// from TestSort.testInternationalSort() ).
|
||||
String[] words = { "H\u00D8T", "H\u00C5T", "MAND" };
|
||||
Query query = new RangeQuery("content", "H\u00D8T", "MAND", false, false, collator);
|
||||
|
||||
// Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
|
||||
// but Danish collation does.
|
||||
initializeIndex(words);
|
||||
IndexSearcher searcher = new IndexSearcher(dir);
|
||||
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
|
||||
assertEquals("The index Term should be included.", 1, hits.length);
|
||||
|
||||
query = new RangeQuery("content", "H\u00C5T", "MAND", false, false, collator);
|
||||
hits = searcher.search(query, null, 1000).scoreDocs;
|
||||
assertEquals("The index Term should not be included.", 0, hits.length);
|
||||
searcher.close();
|
||||
}
|
||||
|
||||
private void initializeIndex(String[] values) throws IOException {
|
||||
IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
|
||||
|
|
Loading…
Reference in New Issue