mirror of https://github.com/apache/lucene.git
sorted bytes file format
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1407612 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5910327656
commit
75b5ba898a
|
@ -62,6 +62,9 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
|
||||||
// used for bytes
|
// used for bytes
|
||||||
final static BytesRef MAXLENGTH = new BytesRef(" maxlength ");
|
final static BytesRef MAXLENGTH = new BytesRef(" maxlength ");
|
||||||
final static BytesRef LENGTH = new BytesRef("length ");
|
final static BytesRef LENGTH = new BytesRef("length ");
|
||||||
|
// used for sorted bytes
|
||||||
|
final static BytesRef NUMVALUES = new BytesRef(" numvalues");
|
||||||
|
final static BytesRef ORDPATTERN = new BytesRef(" ordpattern");
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SimpleDVConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
public SimpleDVConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
||||||
|
@ -101,6 +104,28 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
|
||||||
* so a document's value can be retrieved by seeking to startOffset + (9+pattern.length+maxlength)*docid
|
* so a document's value can be retrieved by seeking to startOffset + (9+pattern.length+maxlength)*docid
|
||||||
* the extra 9 is 2 newlines, plus "length " itself.
|
* the extra 9 is 2 newlines, plus "length " itself.
|
||||||
*
|
*
|
||||||
|
* for sorted bytes this is a fixed-width file, for example:
|
||||||
|
* <pre>
|
||||||
|
* field myField
|
||||||
|
* numvalues 10
|
||||||
|
* maxLength 8
|
||||||
|
* pattern 0
|
||||||
|
* ordpattern 00
|
||||||
|
* length 6
|
||||||
|
* foobar[space][space]
|
||||||
|
* length 3
|
||||||
|
* baz[space][space][space][space][space]
|
||||||
|
* ...
|
||||||
|
* 03
|
||||||
|
* 06
|
||||||
|
* 01
|
||||||
|
* 10
|
||||||
|
* ...
|
||||||
|
* </pre>
|
||||||
|
* so the "ord section" begins at startOffset + (9+pattern.length+maxlength)*numValues.
|
||||||
|
* a document's ord can be retrieved by seeking to "ord section" + (1+ordpattern.length())*docid
|
||||||
|
* an ord's value can be retrieved by seeking to startOffset + (9+pattern.length+maxlength)*ord
|
||||||
|
*
|
||||||
* the reader can just scan this file when it opens, skipping over the data blocks
|
* the reader can just scan this file when it opens, skipping over the data blocks
|
||||||
* and saving the offset/etc for each field.
|
* and saving the offset/etc for each field.
|
||||||
*/
|
*/
|
||||||
|
|
Loading…
Reference in New Issue