LUCENE-4806: change facet delim character to use 3 bytes instead of 1 (in UTF-8)

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1451578 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2013-03-01 12:51:21 +00:00
parent 9b6b4ec703
commit ae89bfccb7
4 changed files with 11 additions and 5 deletions

View File

@ -79,6 +79,11 @@ Changes in backwards compatibility policy
* LUCENE-4748: A FacetRequest on a non-existent field now returns an
empty FacetResult instead of skipping it. (Shai Erera, Mike McCandless)
* LUCENE-4806: The default category delimiter character was changed
from U+F749 to U+001F, since the latter uses 1 byte vs 3 bytes for
the former. Existing facet indices must be reindexed. (Robert
Muir, Shai Erera, Mike McCandless)
Optimizations
* LUCENE-4687: BloomFilterPostingsFormat now lazily initializes delegate

View File

@ -58,7 +58,7 @@ public class FacetIndexingParams {
* make sure that you return a character that's not found in any path
* component.
*/
public static final char DEFAULT_FACET_DELIM_CHAR = '\uF749';
public static final char DEFAULT_FACET_DELIM_CHAR = '\u001F';
private final int partitionSize = Integer.MAX_VALUE;

View File

@ -36,9 +36,10 @@ abstract class Consts {
* <P>
* Originally, we used \uFFFE, officially a "unicode noncharacter" (invalid
* unicode character) for this purpose. Recently, we switched to the
* "private-use" character \uF749.
* "private-use" character \uF749. Even more recently, we
* switched to \U001F (INFORMATION_SEPARATOR).
*/
//static final char DEFAULT_DELIMITER = '\uFFFE';
static final char DEFAULT_DELIMITER = '\uF749';
//static final char DEFAULT_DELIMITER = '\uF749';
static final char DEFAULT_DELIMITER = '\u001F';
}

View File

@ -802,7 +802,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
te = terms.iterator(te);
while (te.next() != null) {
String value = te.term().utf8ToString();
CategoryPath cp = new CategoryPath(value, Consts.DEFAULT_DELIMITER);
CategoryPath cp = new CategoryPath(value, delimiter);
final int ordinal = addCategory(cp);
docs = te.docs(null, docs, DocsEnum.FLAG_NONE);
ordinalMap.addMapping(docs.nextDoc() + base, ordinal);