mirror of https://github.com/apache/lucene.git
LUCENE-4806: change facet delim character to use 3 bytes instead of 1 (in UTF-8)
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1451578 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9b6b4ec703
commit
ae89bfccb7
|
@ -79,6 +79,11 @@ Changes in backwards compatibility policy
|
|||
* LUCENE-4748: A FacetRequest on a non-existent field now returns an
|
||||
empty FacetResult instead of skipping it. (Shai Erera, Mike McCandless)
|
||||
|
||||
* LUCENE-4806: The default category delimiter character was changed
|
||||
from U+F749 to U+001F, since the latter uses 1 byte vs 3 bytes for
|
||||
the former. Existing facet indices must be reindexed. (Robert
|
||||
Muir, Shai Erera, Mike McCandless)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-4687: BloomFilterPostingsFormat now lazily initializes delegate
|
||||
|
|
|
@ -58,7 +58,7 @@ public class FacetIndexingParams {
|
|||
* make sure that you return a character that's not found in any path
|
||||
* component.
|
||||
*/
|
||||
public static final char DEFAULT_FACET_DELIM_CHAR = '\uF749';
|
||||
public static final char DEFAULT_FACET_DELIM_CHAR = '\u001F';
|
||||
|
||||
private final int partitionSize = Integer.MAX_VALUE;
|
||||
|
||||
|
|
|
@ -36,9 +36,10 @@ abstract class Consts {
|
|||
* <P>
|
||||
* Originally, we used \uFFFE, officially a "unicode noncharacter" (invalid
|
||||
* unicode character) for this purpose. Recently, we switched to the
|
||||
* "private-use" character \uF749.
|
||||
* "private-use" character \uF749. Even more recently, we
|
||||
* switched to \U001F (INFORMATION_SEPARATOR).
|
||||
*/
|
||||
//static final char DEFAULT_DELIMITER = '\uFFFE';
|
||||
static final char DEFAULT_DELIMITER = '\uF749';
|
||||
|
||||
//static final char DEFAULT_DELIMITER = '\uF749';
|
||||
static final char DEFAULT_DELIMITER = '\u001F';
|
||||
}
|
||||
|
|
|
@ -802,7 +802,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
te = terms.iterator(te);
|
||||
while (te.next() != null) {
|
||||
String value = te.term().utf8ToString();
|
||||
CategoryPath cp = new CategoryPath(value, Consts.DEFAULT_DELIMITER);
|
||||
CategoryPath cp = new CategoryPath(value, delimiter);
|
||||
final int ordinal = addCategory(cp);
|
||||
docs = te.docs(null, docs, DocsEnum.FLAG_NONE);
|
||||
ordinalMap.addMapping(docs.nextDoc() + base, ordinal);
|
||||
|
|
Loading…
Reference in New Issue