git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1410888 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2012-11-18 14:15:03 +00:00
parent f9bfb920c6
commit 8354b41253
15 changed files with 264 additions and 42 deletions

View File

@ -109,7 +109,7 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
* baz[space][space][space][space][space]
* ...
* </pre>
* so an ord's value can be retrieved by seeking to startOffset + (9+pattern.length+maxlength)*ord
* so a doc's value can be retrieved by seeking to startOffset + (9+pattern.length+maxlength)*doc
* the extra 9 is 2 newlines, plus "length " itself.
*
* for sorted bytes this is a fixed-width file, for example:
@ -214,8 +214,10 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
SimpleTextUtil.write(data, encoder.format(value.length), scratch);
SimpleTextUtil.writeNewline(data);
// write bytes
SimpleTextUtil.write(data, value);
// write bytes -- don't use SimpleText.write
// because it escapes:
data.writeBytes(value.bytes, value.offset, value.length);
// pad to fit
for (int i = value.length; i < maxLength; i++) {
data.writeByte((byte)' ');
@ -278,8 +280,10 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
SimpleTextUtil.write(data, encoder.format(value.length), scratch);
SimpleTextUtil.writeNewline(data);
// write bytes
SimpleTextUtil.write(data, value);
// write bytes -- don't use SimpleText.write
// because it escapes:
data.writeBytes(value.bytes, value.offset, value.length);
// pad to fit
for (int i = value.length; i < maxLength; i++) {
data.writeByte((byte)' ');
@ -351,6 +355,7 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
SimpleTextDocValuesReader(FieldInfos fieldInfos, Directory dir, SegmentInfo si, IOContext context) throws IOException {
super(si.getDocCount());
System.out.println("dir=" + dir + " seg=" + si.name);
data = dir.openInput(IndexFileNames.segmentFileName(si.name, "", "dat"), context);
maxDoc = si.getDocCount();
while(true) {
@ -360,6 +365,7 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
}
assert startsWith(FIELD) : scratch.utf8ToString();
String fieldName = stripPrefix(FIELD);
//System.out.println(" field=" + fieldName);
FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldName);
assert fieldInfo != null;
@ -370,7 +376,7 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
DocValues.Type dvType = fieldInfo.getDocValuesType();
assert dvType != null;
if (DocValues.isNumber(dvType)) {
if (DocValues.isNumber(dvType) || DocValues.isFloat(dvType)) {
readLine();
assert startsWith(MINVALUE);
field.minValue = Long.parseLong(stripPrefix(MINVALUE));
@ -388,7 +394,6 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
field.pattern = stripPrefix(PATTERN);
field.dataStartFilePointer = data.getFilePointer();
data.seek(data.getFilePointer() + (9+field.pattern.length()+field.maxLength) * maxDoc);
break;
} else if (DocValues.isSortedBytes(dvType)) {
readLine();
assert startsWith(NUMVALUES);
@ -405,8 +410,6 @@ public class SimpleTextSimpleDocValuesFormat extends SimpleDocValuesFormat {
field.dataStartFilePointer = data.getFilePointer();
data.seek(data.getFilePointer() + (9+field.pattern.length()+field.maxLength) * field.numValues + (1+field.ordPattern.length())*maxDoc);
// nocommit: we need to seek past the data section!!!!
} else if (DocValues.isFloat(dvType)) {
// nocommit
} else {
throw new AssertionError();
}

View File

@ -38,8 +38,10 @@ public abstract class BinaryDocValuesConsumer {
final int maxDoc = reader.maxDoc();
final Bits liveDocs = reader.getLiveDocs();
// nocommit what if this is null...? need default source?
final BinaryDocValues source = reader.getBinaryDocValues(mergeState.fieldInfo.name);
BinaryDocValues source = reader.getBinaryDocValues(mergeState.fieldInfo.name);
if (source == null) {
source = BinaryDocValues.DEFAULT;
}
for (int i = 0; i < maxDoc; i++) {
if (liveDocs == null || liveDocs.get(i)) {

View File

@ -36,8 +36,10 @@ public abstract class NumericDocValuesConsumer {
for (AtomicReader reader : mergeState.readers) {
final int maxDoc = reader.maxDoc();
final Bits liveDocs = reader.getLiveDocs();
// nocommit what if this is null...? need default source?
final NumericDocValues source = reader.getNumericDocValues(mergeState.fieldInfo.name);
NumericDocValues source = reader.getNumericDocValues(mergeState.fieldInfo.name);
if (source == null) {
source = NumericDocValues.DEFAULT;
}
for (int i = 0; i < maxDoc; i++) {
if (liveDocs == null || liveDocs.get(i)) {
add(source.get(i));

View File

@ -21,11 +21,13 @@ import java.io.Closeable;
import java.io.IOException;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues.SortedSource;
import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@ -80,16 +82,14 @@ public abstract class SimpleDVConsumer implements Closeable {
for (AtomicReader reader : mergeState.readers) {
final int maxDoc = reader.maxDoc();
final Bits liveDocs = reader.getLiveDocs();
DocValues docValues = reader.docValues(mergeState.fieldInfo.name);
final Source source;
//System.out.println("merge field=" + mergeState.fieldInfo.name);
NumericDocValues docValues = reader.getNumericDocValues(mergeState.fieldInfo.name);
if (docValues == null) {
source = DocValues.getDefaultSource(mergeState.fieldInfo.getDocValuesType());
} else {
source = docValues.getDirectSource();
docValues = NumericDocValues.DEFAULT;
}
for (int i = 0; i < maxDoc; i++) {
if (liveDocs == null || liveDocs.get(i)) {
long val = source.getInt(i);
long val = docValues.get(i);
minValue = Math.min(val, minValue);
maxValue = Math.min(val, maxValue);
}
@ -110,16 +110,13 @@ public abstract class SimpleDVConsumer implements Closeable {
for (AtomicReader reader : mergeState.readers) {
final int maxDoc = reader.maxDoc();
final Bits liveDocs = reader.getLiveDocs();
DocValues docValues = reader.docValues(mergeState.fieldInfo.name);
final Source source;
BinaryDocValues docValues = reader.getBinaryDocValues(mergeState.fieldInfo.name);
if (docValues == null) {
source = DocValues.getDefaultSource(mergeState.fieldInfo.getDocValuesType());
} else {
source = docValues.getDirectSource();
docValues = BinaryDocValues.DEFAULT;
}
for (int i = 0; i < maxDoc; i++) {
if (liveDocs == null || liveDocs.get(i)) {
source.getBytes(i, bytes);
docValues.get(i, bytes);
if (maxLength == -1) {
maxLength = bytes.length;
} else {

View File

@ -105,6 +105,10 @@ public abstract class SortedDocValuesConsumer {
SegmentState state = new SegmentState();
state.reader = reader;
state.values = reader.getSortedDocValues(mergeState.fieldInfo.name);
if (state.values == null) {
state.values = SortedDocValues.DEFAULT;
}
segStates.add(state);
assert state.values.getValueCount() < Integer.MAX_VALUE;
if (reader.hasDeletions()) {

View File

@ -42,9 +42,7 @@ public class DoubleDocValuesField extends StoredField {
*/
public static final FieldType TYPE = new FieldType();
static {
// nocommit kinda messy ... if user calls .numericValue
// they get back strange int ... hmmm
TYPE.setDocValueType(DocValues.Type.FIXED_INTS_64);
TYPE.setDocValueType(DocValues.Type.FLOAT_64);
TYPE.freeze();
}
@ -56,8 +54,6 @@ public class DoubleDocValuesField extends StoredField {
*/
public DoubleDocValuesField(String name, double value) {
super(name, TYPE);
// nocommit kinda messy ... if user calls .numericValue
// they get back strange int ... hmmm
fieldsData = Double.doubleToRawLongBits(value);
fieldsData = value;
}
}

View File

@ -41,9 +41,7 @@ public class FloatDocValuesField extends StoredField {
*/
public static final FieldType TYPE = new FieldType();
static {
// nocommit kinda messy ... if user calls .numericValue
// they get back strange int ... hmmm
TYPE.setDocValueType(DocValues.Type.FIXED_INTS_32);
TYPE.setDocValueType(DocValues.Type.FLOAT_32);
TYPE.freeze();
}
@ -55,8 +53,6 @@ public class FloatDocValuesField extends StoredField {
*/
public FloatDocValuesField(String name, float value) {
super(name, TYPE);
// nocommit kinda messy ... if user calls .numericValue
// they get back strange int ... hmmm
fieldsData = Float.floatToRawIntBits(value);
fieldsData = value;
}
}

View File

@ -23,4 +23,11 @@ import org.apache.lucene.util.BytesRef;
public abstract class BinaryDocValues {
// nocommit throws IOE or not?
public abstract void get(int docID, BytesRef result);
public static final BinaryDocValues DEFAULT = new BinaryDocValues() {
@Override
public void get(int docID, BytesRef ret) {
ret.length = 0;
}
};
}

View File

@ -325,6 +325,12 @@ final class DocFieldProcessor extends DocConsumer {
case FIXED_INTS_64:
fp.addNumberDVField(docState.docID, field.numericValue());
break;
case FLOAT_32:
fp.addFloatDVField(docState.docID, field.numericValue());
break;
case FLOAT_64:
fp.addDoubleDVField(docState.docID, field.numericValue());
break;
default:
break;
}

View File

@ -74,6 +74,22 @@ final class DocFieldProcessorPerField {
numberDVWriter.addValue(docID, value.longValue());
}
// nocommit make this generic chain through consumer?
public void addFloatDVField(int docID, Number value) {
if (numberDVWriter == null) {
numberDVWriter = new NumberDVWriter(fieldInfo, bytesUsed);
}
numberDVWriter.addValue(docID, Float.floatToRawIntBits(value.floatValue()));
}
// nocommit make this generic chain through consumer?
public void addDoubleDVField(int docID, Number value) {
if (numberDVWriter == null) {
numberDVWriter = new NumberDVWriter(fieldInfo, bytesUsed);
}
numberDVWriter.addValue(docID, Double.doubleToRawLongBits(value.doubleValue()));
}
public void addField(IndexableField field) {
if (fieldCount == fields.length) {
int newSize = ArrayUtil.oversize(fieldCount + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);

View File

@ -62,7 +62,6 @@ class NumberDVWriter {
pending.add(value);
updateBytesUsed();
//System.out.println("ADD: " + value);
}
private void updateBytesUsed() {
@ -87,7 +86,8 @@ class NumberDVWriter {
AppendingLongBuffer.Iterator it = pending.iterator();
for(int docID=0;docID<bufferedDocCount;docID++) {
assert it.hasNext();
consumer.add(it.next());
long v = it.next();
consumer.add(v);
}
assert !it.hasNext();
final int maxDoc = state.segmentInfo.getDocCount();

View File

@ -21,4 +21,11 @@ package org.apache.lucene.index;
public abstract class NumericDocValues {
// nocommit throws IOE or not?
public abstract long get(int docID);
public static final NumericDocValues DEFAULT = new NumericDocValues() {
@Override
public long get(int docID) {
return 0;
}
};
}

View File

@ -163,11 +163,16 @@ final class SegmentCoreReaders {
// Field was not indexed with doc values
return null;
}
if (!DocValues.isNumber(fi.getDocValuesType())) {
if (!DocValues.isNumber(fi.getDocValuesType()) && !DocValues.isFloat(fi.getDocValuesType())) {
// DocValues were not numeric
return null;
}
// nocommit change to assert != null!!
if (simpleDVProducer == null) {
return null;
}
return simpleDVProducer.getNumeric(fi);
}
@ -187,6 +192,11 @@ final class SegmentCoreReaders {
return null;
}
// nocommit change to assert != null!!
if (simpleDVProducer == null) {
return null;
}
return simpleDVProducer.getBinary(fi);
}
@ -206,6 +216,11 @@ final class SegmentCoreReaders {
return null;
}
// nocommit change to assert != null!!
if (simpleDVProducer == null) {
return null;
}
return simpleDVProducer.getSorted(fi);
}

View File

@ -31,4 +31,25 @@ public abstract class SortedDocValues {
public abstract int getValueCount();
// nocommit binary search lookup?
public static final SortedDocValues DEFAULT = new SortedDocValues() {
@Override
public int getOrd(int docID) {
return 0;
}
@Override
public void lookupOrd(int ord, BytesRef ret) {
if (ord != 0) {
throw new IllegalArgumentException("ord should be 0");
}
ret.length = 0;
}
@Override
public int getValueCount() {
return 1;
}
};
}

View File

@ -23,6 +23,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatDocValuesField;
import org.apache.lucene.document.PackedLongDocValuesField;
import org.apache.lucene.document.SortedBytesDocValuesField;
import org.apache.lucene.document.StraightBytesDocValuesField;
@ -89,6 +90,128 @@ public class TestDemoDocValue extends LuceneTestCase {
ireader.close();
directory.close();
}
public void testDemoFloat() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
// Store the index in memory:
Directory directory = newDirectory();
// To store an index on disk, use this instead:
// Directory directory = FSDirectory.open(new File("/tmp/testindex"));
// we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
IndexWriter iwriter = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
Document doc = new Document();
String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
String text = "This is the text to be indexed. " + longTerm;
doc.add(newTextField("fieldname", text, Field.Store.YES));
doc.add(new FloatDocValuesField("dv", 5.7f));
iwriter.addDocument(doc);
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
assertEquals(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits);
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.search(query, null, 1);
assertEquals(1, hits.totalHits);
// Iterate through the results:
for (int i = 0; i < hits.scoreDocs.length; i++) {
StoredDocument hitDoc = isearcher.doc(hits.scoreDocs[i].doc);
assertEquals(text, hitDoc.get("fieldname"));
assert ireader.leaves().size() == 1;
NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv");
assertEquals(Float.floatToRawIntBits(5.7f), dv.get(hits.scoreDocs[i].doc));
}
ireader.close();
directory.close();
}
public void testDemoTwoFieldsNumber() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
// Store the index in memory:
Directory directory = newDirectory();
// To store an index on disk, use this instead:
// Directory directory = FSDirectory.open(new File("/tmp/testindex"));
// we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
IndexWriter iwriter = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
Document doc = new Document();
String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
String text = "This is the text to be indexed. " + longTerm;
doc.add(newTextField("fieldname", text, Field.Store.YES));
doc.add(new PackedLongDocValuesField("dv1", 5));
doc.add(new PackedLongDocValuesField("dv2", 17));
iwriter.addDocument(doc);
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
assertEquals(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits);
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.search(query, null, 1);
assertEquals(1, hits.totalHits);
// Iterate through the results:
for (int i = 0; i < hits.scoreDocs.length; i++) {
StoredDocument hitDoc = isearcher.doc(hits.scoreDocs[i].doc);
assertEquals(text, hitDoc.get("fieldname"));
assert ireader.leaves().size() == 1;
NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv1");
assertEquals(5, dv.get(hits.scoreDocs[i].doc));
dv = ireader.leaves().get(0).reader().getNumericDocValues("dv2");
assertEquals(17, dv.get(hits.scoreDocs[i].doc));
}
ireader.close();
directory.close();
}
public void testDemoTwoFieldsMixed() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
// Store the index in memory:
Directory directory = newDirectory();
// To store an index on disk, use this instead:
// Directory directory = FSDirectory.open(new File("/tmp/testindex"));
// we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
IndexWriter iwriter = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
Document doc = new Document();
String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
String text = "This is the text to be indexed. " + longTerm;
doc.add(newTextField("fieldname", text, Field.Store.YES));
doc.add(new PackedLongDocValuesField("dv1", 5));
doc.add(new StraightBytesDocValuesField("dv2", new BytesRef("hello world")));
iwriter.addDocument(doc);
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
assertEquals(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits);
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.search(query, null, 1);
assertEquals(1, hits.totalHits);
BytesRef scratch = new BytesRef();
// Iterate through the results:
for (int i = 0; i < hits.scoreDocs.length; i++) {
StoredDocument hitDoc = isearcher.doc(hits.scoreDocs[i].doc);
assertEquals(text, hitDoc.get("fieldname"));
assert ireader.leaves().size() == 1;
NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv1");
assertEquals(5, dv.get(hits.scoreDocs[i].doc));
BinaryDocValues dv2 = ireader.leaves().get(0).reader().getBinaryDocValues("dv2");
dv2.get(hits.scoreDocs[i].doc, scratch);
assertEquals(new BytesRef("hello world"), scratch);
}
ireader.close();
directory.close();
}
public void testTwoDocumentsNumeric() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
@ -254,7 +377,7 @@ public class TestDemoDocValue extends LuceneTestCase {
iwriter.commit();
doc = new Document();
doc.add(newField("id", "1", StringField.TYPE_STORED));
doc.add(new StraightBytesDocValuesField("dv", new BytesRef("hello world 2")));
doc.add(new StraightBytesDocValuesField("dv", new BytesRef("hello 2")));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
iwriter.close();
@ -270,7 +393,7 @@ public class TestDemoDocValue extends LuceneTestCase {
if (doc2.get("id").equals("0")) {
expected = "hello world 1";
} else {
expected = "hello world 2";
expected = "hello 2";
}
dv.get(i, scratch);
assertEquals(expected, scratch.utf8ToString());
@ -398,4 +521,31 @@ public class TestDemoDocValue extends LuceneTestCase {
directory.close();
}
public void testBytesWithNewline() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
// Store the index in memory:
Directory directory = newDirectory();
// To store an index on disk, use this instead:
// Directory directory = FSDirectory.open(new File("/tmp/testindex"));
// we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwc.setMergePolicy(newLogMergePolicy());
IndexWriter iwriter = new IndexWriter(directory, iwc);
Document doc = new Document();
doc.add(new StraightBytesDocValuesField("dv", new BytesRef("hello\nworld\r1")));
iwriter.addDocument(doc);
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
BinaryDocValues dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv");
BytesRef scratch = new BytesRef();
dv.get(0, scratch);
assertEquals(new BytesRef("hello\nworld\r1"), scratch);
ireader.close();
directory.close();
}
}