diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 55381babcb3..da28e60294f 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -79,6 +79,11 @@ Changes in backwards compatibility policy * LUCENE-4748: A FacetRequest on a non-existent field now returns an empty FacetResult instead of skipping it. (Shai Erera, Mike McCandless) +* LUCENE-4806: The default category delimiter character was changed + from U+F749 to U+001F, since the latter uses 1 byte vs 3 bytes for + the former. Existing facet indices must be reindexed. (Robert + Muir, Shai Erera, Mike McCandless) + Optimizations * LUCENE-4687: BloomFilterPostingsFormat now lazily initializes delegate diff --git a/lucene/facet/src/java/org/apache/lucene/facet/params/FacetIndexingParams.java b/lucene/facet/src/java/org/apache/lucene/facet/params/FacetIndexingParams.java index ed269f8fd23..344f4af07ac 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/params/FacetIndexingParams.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/params/FacetIndexingParams.java @@ -58,7 +58,7 @@ public class FacetIndexingParams { * make sure that you return a character that's not found in any path * component. */ - public static final char DEFAULT_FACET_DELIM_CHAR = '\uF749'; + public static final char DEFAULT_FACET_DELIM_CHAR = '\u001F'; private final int partitionSize = Integer.MAX_VALUE; diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java index c86431f65e4..b3d8850cb4f 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java @@ -36,9 +36,10 @@ abstract class Consts { *

* Originally, we used \uFFFE, officially a "unicode noncharacter" (invalid * unicode character) for this purpose. Recently, we switched to the - * "private-use" character \uF749. + * "private-use" character \uF749. Even more recently, we + * switched to \U001F (INFORMATION_SEPARATOR). */ //static final char DEFAULT_DELIMITER = '\uFFFE'; - static final char DEFAULT_DELIMITER = '\uF749'; - + //static final char DEFAULT_DELIMITER = '\uF749'; + static final char DEFAULT_DELIMITER = '\u001F'; } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java index c74a3d4d0c3..62766d1a166 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java @@ -802,7 +802,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter { te = terms.iterator(te); while (te.next() != null) { String value = te.term().utf8ToString(); - CategoryPath cp = new CategoryPath(value, Consts.DEFAULT_DELIMITER); + CategoryPath cp = new CategoryPath(value, delimiter); final int ordinal = addCategory(cp); docs = te.docs(null, docs, DocsEnum.FLAG_NONE); ordinalMap.addMapping(docs.nextDoc() + base, ordinal);