mirror of https://github.com/apache/lucene.git
LUCENE-4806: change facet delim character to use 3 bytes instead of 1 (in UTF-8)
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1451578 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9b6b4ec703
commit
ae89bfccb7
|
@ -79,6 +79,11 @@ Changes in backwards compatibility policy
|
||||||
* LUCENE-4748: A FacetRequest on a non-existent field now returns an
|
* LUCENE-4748: A FacetRequest on a non-existent field now returns an
|
||||||
empty FacetResult instead of skipping it. (Shai Erera, Mike McCandless)
|
empty FacetResult instead of skipping it. (Shai Erera, Mike McCandless)
|
||||||
|
|
||||||
|
* LUCENE-4806: The default category delimiter character was changed
|
||||||
|
from U+F749 to U+001F, since the latter uses 1 byte vs 3 bytes for
|
||||||
|
the former. Existing facet indices must be reindexed. (Robert
|
||||||
|
Muir, Shai Erera, Mike McCandless)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
* LUCENE-4687: BloomFilterPostingsFormat now lazily initializes delegate
|
* LUCENE-4687: BloomFilterPostingsFormat now lazily initializes delegate
|
||||||
|
|
|
@ -58,7 +58,7 @@ public class FacetIndexingParams {
|
||||||
* make sure that you return a character that's not found in any path
|
* make sure that you return a character that's not found in any path
|
||||||
* component.
|
* component.
|
||||||
*/
|
*/
|
||||||
public static final char DEFAULT_FACET_DELIM_CHAR = '\uF749';
|
public static final char DEFAULT_FACET_DELIM_CHAR = '\u001F';
|
||||||
|
|
||||||
private final int partitionSize = Integer.MAX_VALUE;
|
private final int partitionSize = Integer.MAX_VALUE;
|
||||||
|
|
||||||
|
|
|
@ -36,9 +36,10 @@ abstract class Consts {
|
||||||
* <P>
|
* <P>
|
||||||
* Originally, we used \uFFFE, officially a "unicode noncharacter" (invalid
|
* Originally, we used \uFFFE, officially a "unicode noncharacter" (invalid
|
||||||
* unicode character) for this purpose. Recently, we switched to the
|
* unicode character) for this purpose. Recently, we switched to the
|
||||||
* "private-use" character \uF749.
|
* "private-use" character \uF749. Even more recently, we
|
||||||
|
* switched to \U001F (INFORMATION_SEPARATOR).
|
||||||
*/
|
*/
|
||||||
//static final char DEFAULT_DELIMITER = '\uFFFE';
|
//static final char DEFAULT_DELIMITER = '\uFFFE';
|
||||||
static final char DEFAULT_DELIMITER = '\uF749';
|
//static final char DEFAULT_DELIMITER = '\uF749';
|
||||||
|
static final char DEFAULT_DELIMITER = '\u001F';
|
||||||
}
|
}
|
||||||
|
|
|
@ -802,7 +802,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
||||||
te = terms.iterator(te);
|
te = terms.iterator(te);
|
||||||
while (te.next() != null) {
|
while (te.next() != null) {
|
||||||
String value = te.term().utf8ToString();
|
String value = te.term().utf8ToString();
|
||||||
CategoryPath cp = new CategoryPath(value, Consts.DEFAULT_DELIMITER);
|
CategoryPath cp = new CategoryPath(value, delimiter);
|
||||||
final int ordinal = addCategory(cp);
|
final int ordinal = addCategory(cp);
|
||||||
docs = te.docs(null, docs, DocsEnum.FLAG_NONE);
|
docs = te.docs(null, docs, DocsEnum.FLAG_NONE);
|
||||||
ordinalMap.addMapping(docs.nextDoc() + base, ordinal);
|
ordinalMap.addMapping(docs.nextDoc() + base, ordinal);
|
||||||
|
|
Loading…
Reference in New Issue