Applied patch #29370 supplied by Drew Farris and

Bernhard Messer.


git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150510 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Christoph Goller 2004-09-15 12:50:23 +00:00
parent fd4cd859c5
commit a6fad246dd
5 changed files with 239 additions and 90 deletions

View File

@ -144,13 +144,15 @@ public final class Document implements java.io.Serializable {
/** Returns the string value of the field with the given name if any exist in /** Returns the string value of the field with the given name if any exist in
* this document, or null. If multiple fields exist with this name, this * this document, or null. If multiple fields exist with this name, this
* method returns the first value added. * method returns the first value added. If only binary fields with this name
* exist, returns null.
*/ */
public final String get(String name) { public final String get(String name) {
Field field = getField(name); for (int i = 0; i < fields.size(); i++) {
if (field != null) Field field = (Field)fields.get(i);
if (field.name().equals(name) && (!field.isBinary()))
return field.stringValue(); return field.stringValue();
else }
return null; return null;
} }
@ -189,14 +191,57 @@ public final class Document implements java.io.Serializable {
* @return a <code>String[]</code> of field values * @return a <code>String[]</code> of field values
*/ */
public final String[] getValues(String name) { public final String[] getValues(String name) {
Field[] namedFields = getFields(name); List result = new ArrayList();
if (namedFields == null) for (int i = 0; i < fields.size(); i++) {
return null; Field field = (Field)fields.get(i);
String[] values = new String[namedFields.length]; if (field.name().equals(name) && (!field.isBinary()))
for (int i = 0; i < namedFields.length; i++) { result.add(field.stringValue());
values[i] = namedFields[i].stringValue();
} }
return values;
if (result.size() == 0)
return null;
return (String[])result.toArray(new String[result.size()]);
}
/**
* Returns an array of byte arrays for of the fields that have the name specified
* as the method parameter. This method will return <code>null</code> if no
* binary fields with the specified name are available.
*
* @param name the name of the field
* @return a <code>byte[][]</code> of binary field values.
*/
public final byte[][] getBinaryValues(String name) {
List result = new ArrayList();
for (int i = 0; i < fields.size(); i++) {
Field field = (Field)fields.get(i);
if (field.name().equals(name) && (field.isBinary()))
result.add(field.binaryValue());
}
if (result.size() == 0)
return null;
return (byte[][])result.toArray(new byte[result.size()][]);
}
/**
* Returns an array of bytes for the first (or only) field that has the name
* specified as the method parameter. This method will return <code>null</code>
* if no binary fields with the specified name are available.
* There may be non-binary fields with the same name.
*
* @param name the name of the field.
* @return a <code>byte[]</code> containing the binary field value.
*/
public final byte[] getBinaryValue(String name) {
for (int i=0; i < fields.size(); i++) {
Field field = (Field)fields.get(i);
if (field.name().equals(name) && (field.isBinary()))
return field.binaryValue();
}
return null;
} }
/** Prints the fields of a document for human consumption. */ /** Prints the fields of a document for human consumption. */

View File

@ -18,9 +18,10 @@ package org.apache.lucene.document;
import java.io.Reader; import java.io.Reader;
import java.util.Date; import java.util.Date;
import org.apache.lucene.index.IndexReader; // for javadoc
import org.apache.lucene.search.Similarity; // for javadoc import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Hits; // for javadoc import org.apache.lucene.search.Hits;
import org.apache.lucene.search.Similarity;
/** /**
A field is a section of a Document. Each field has two parts, a name and a A field is a section of a Document. Each field has two parts, a name and a
@ -33,11 +34,14 @@ import org.apache.lucene.search.Hits; // for javadoc
public final class Field implements java.io.Serializable { public final class Field implements java.io.Serializable {
private String name = "body"; private String name = "body";
private String stringValue = null; private String stringValue = null;
private boolean storeTermVector = false;
private Reader readerValue = null; private Reader readerValue = null;
private byte[] binaryValue = null;
private boolean storeTermVector = false;
private boolean isStored = false; private boolean isStored = false;
private boolean isIndexed = true; private boolean isIndexed = true;
private boolean isTokenized = true; private boolean isTokenized = true;
private boolean isBinary = false;
private float boost = 1.0f; private float boost = 1.0f;
@ -224,6 +228,10 @@ public final class Field implements java.io.Serializable {
/** The value of the field as a Reader, or null. If null, the String value /** The value of the field as a Reader, or null. If null, the String value
is used. Exactly one of stringValue() and readerValue() must be set. */ is used. Exactly one of stringValue() and readerValue() must be set. */
public Reader readerValue() { return readerValue; } public Reader readerValue() { return readerValue; }
/** The value of the field in Binary, or null. If null, the Reader or
String value is used. Exactly one of stringValue(), readerValue() and
binaryValue() must be set. */
public byte[] binaryValue() { return binaryValue; }
/** /**
* Create a field by specifying its name, value and how it will * Create a field by specifying its name, value and how it will
@ -340,6 +348,23 @@ public final class Field implements java.io.Serializable {
this(name, string, store, index, token, false); this(name, string, store, index, token, false);
} }
public Field(String name, byte[] value) {
if (name == null)
throw new IllegalArgumentException("name cannot be null");
if (value == null)
throw new IllegalArgumentException("value cannot be null");
this.name = name.intern();
this.binaryValue = value;
this.isBinary = true;
this.isStored = true;
this.isIndexed = false;
this.isTokenized = false;
this.storeTermVector = false;
}
/** /**
* *
* @param name The name of the field * @param name The name of the field
@ -402,6 +427,9 @@ public final class Field implements java.io.Serializable {
*/ */
public final boolean isTermVectorStored() { return storeTermVector; } public final boolean isTermVectorStored() { return storeTermVector; }
/** True iff the value of the filed is stored as binary */
public final boolean isBinary() { return isBinary; }
/** Prints a Field for human consumption. */ /** Prints a Field for human consumption. */
public final String toString() { public final String toString() {
StringBuffer result = new StringBuffer(); StringBuffer result = new StringBuffer();
@ -422,6 +450,12 @@ public final class Field implements java.io.Serializable {
result.append(","); result.append(",");
result.append("termVector"); result.append("termVector");
} }
if (isBinary) {
if (result.length() > 0)
result.append(",");
result.append("binary");
}
result.append('<'); result.append('<');
result.append(name); result.append(name);
result.append(':'); result.append(':');

View File

@ -18,10 +18,10 @@ package org.apache.lucene.index;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.InputStream;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.InputStream;
/** /**
* Class responsible for access to stored document fields. * Class responsible for access to stored document fields.
@ -67,6 +67,12 @@ final class FieldsReader {
byte bits = fieldsStream.readByte(); byte bits = fieldsStream.readByte();
if ((bits & 2) != 0) {
final byte[] b = new byte[fieldsStream.readVInt()];
fieldsStream.readBytes(b, 0, b.length);
doc.add(new Field(fi.name, b));
}
else {
Field.Index index; Field.Index index;
boolean tokenize = (bits & 1) != 0; boolean tokenize = (bits & 1) != 0;
if (fi.isIndexed && tokenize) if (fi.isIndexed && tokenize)
@ -80,7 +86,9 @@ final class FieldsReader {
Field.Store.YES, index, Field.Store.YES, index,
fi.storeTermVector ? Field.TermVector.YES : Field.TermVector.NO)); fi.storeTermVector ? Field.TermVector.YES : Field.TermVector.NO));
} }
}
return doc; return doc;
} }
} }

View File

@ -3,34 +3,36 @@ package org.apache.lucene.index;
/** /**
* Copyright 2004 The Apache Software Foundation * Copyright 2004 The Apache Software Foundation
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License"); you may not
* you may not use this file except in compliance with the License. * use this file except in compliance with the License. You may obtain a copy of
* You may obtain a copy of the License at * the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* See the License for the specific language governing permissions and * License for the specific language governing permissions and limitations under
* limitations under the License. * the License.
*/ */
import java.util.Enumeration;
import java.io.IOException; import java.io.IOException;
import java.util.Enumeration;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.OutputStream;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.OutputStream;
final class FieldsWriter { final class FieldsWriter
{
private FieldInfos fieldInfos; private FieldInfos fieldInfos;
private OutputStream fieldsStream; private OutputStream fieldsStream;
private OutputStream indexStream; private OutputStream indexStream;
FieldsWriter(Directory d, String segment, FieldInfos fn) FieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException {
throws IOException {
fieldInfos = fn; fieldInfos = fn;
fieldsStream = d.createFile(segment + ".fdt"); fieldsStream = d.createFile(segment + ".fdt");
indexStream = d.createFile(segment + ".fdx"); indexStream = d.createFile(segment + ".fdx");
@ -47,7 +49,7 @@ final class FieldsWriter {
int storedCount = 0; int storedCount = 0;
Enumeration fields = doc.fields(); Enumeration fields = doc.fields();
while (fields.hasMoreElements()) { while (fields.hasMoreElements()) {
Field field = (Field)fields.nextElement(); Field field = (Field) fields.nextElement();
if (field.isStored()) if (field.isStored())
storedCount++; storedCount++;
} }
@ -55,17 +57,27 @@ final class FieldsWriter {
fields = doc.fields(); fields = doc.fields();
while (fields.hasMoreElements()) { while (fields.hasMoreElements()) {
Field field = (Field)fields.nextElement(); Field field = (Field) fields.nextElement();
if (field.isStored()) { if (field.isStored()) {
fieldsStream.writeVInt(fieldInfos.fieldNumber(field.name())); fieldsStream.writeVInt(fieldInfos.fieldNumber(field.name()));
byte bits = 0; byte bits = 0;
if (field.isTokenized()) if (field.isTokenized())
bits |= 1; bits |= 1;
if (field.isBinary())
bits |= 2;
fieldsStream.writeByte(bits); fieldsStream.writeByte(bits);
if (field.isBinary()) {
byte[] data = field.binaryValue();
final int len = data.length;
fieldsStream.writeVInt(len);
fieldsStream.writeBytes(data, len);
} else {
fieldsStream.writeString(field.stringValue()); fieldsStream.writeString(field.stringValue());
} }
} }
} }
}
} }

View File

@ -39,6 +39,56 @@ import org.apache.lucene.search.Hits;
public class TestDocument extends TestCase public class TestDocument extends TestCase
{ {
String binaryVal = "this text will be stored as a byte array in the index";
String binaryVal2 = "this text will be also stored as a byte array in the index";
public void testBinaryField()
throws Exception
{
Document doc = new Document();
Field stringFld = new Field("string", binaryVal, Field.Store.YES, Field.Index.NO);
Field binaryFld = new Field("binary", binaryVal.getBytes());
Field binaryFld2 = new Field("binary", binaryVal2.getBytes());
doc.add(stringFld);
doc.add(binaryFld);
assertEquals(2, doc.fields.size());
assertTrue(binaryFld.isBinary());
assertTrue(binaryFld.isStored());
assertFalse(binaryFld.isIndexed());
assertFalse(binaryFld.isTokenized());
String binaryTest = new String(doc.getBinaryValue("binary"));
assertTrue(binaryTest.equals(binaryVal));
String stringTest = doc.get("string");
assertTrue(binaryTest.equals(stringTest));
doc.add(binaryFld2);
assertEquals(3, doc.fields.size());
byte[][] binaryTests = doc.getBinaryValues("binary");
assertEquals(2, binaryTests.length);
binaryTest = new String(binaryTests[0]);
String binaryTest2 = new String(binaryTests[1]);
assertFalse(binaryTest.equals(binaryTest2));
assertTrue(binaryTest.equals(binaryVal));
assertTrue(binaryTest2.equals(binaryVal2));
doc.removeField("string");
assertEquals(2, doc.fields.size());
doc.removeFields("binary");
assertEquals(0, doc.fields.size());
}
/** /**
* Tests {@link Document#removeField(String)} method for a brand new Document * Tests {@link Document#removeField(String)} method for a brand new Document
* that has not been indexed yet. * that has not been indexed yet.