diff --git a/src/java/org/apache/lucene/document/Document.java b/src/java/org/apache/lucene/document/Document.java
index ea0a42686bb..5fb0ce5e693 100644
--- a/src/java/org/apache/lucene/document/Document.java
+++ b/src/java/org/apache/lucene/document/Document.java
@@ -144,14 +144,16 @@ public final class Document implements java.io.Serializable {
/** Returns the string value of the field with the given name if any exist in
* this document, or null. If multiple fields exist with this name, this
- * method returns the first value added.
+ * method returns the first value added. If only binary fields with this name
+ * exist, returns null.
*/
public final String get(String name) {
- Field field = getField(name);
- if (field != null)
- return field.stringValue();
- else
- return null;
+ for (int i = 0; i < fields.size(); i++) {
+ Field field = (Field)fields.get(i);
+ if (field.name().equals(name) && (!field.isBinary()))
+ return field.stringValue();
+ }
+ return null;
}
/** Returns an Enumeration of all the fields in a document. */
@@ -189,16 +191,59 @@ public final class Document implements java.io.Serializable {
* @return a String[]
of field values
*/
public final String[] getValues(String name) {
- Field[] namedFields = getFields(name);
- if (namedFields == null)
- return null;
- String[] values = new String[namedFields.length];
- for (int i = 0; i < namedFields.length; i++) {
- values[i] = namedFields[i].stringValue();
+ List result = new ArrayList();
+ for (int i = 0; i < fields.size(); i++) {
+ Field field = (Field)fields.get(i);
+ if (field.name().equals(name) && (!field.isBinary()))
+ result.add(field.stringValue());
}
- return values;
+
+ if (result.size() == 0)
+ return null;
+
+ return (String[])result.toArray(new String[result.size()]);
}
+ /**
+ * Returns an array of byte arrays for of the fields that have the name specified
+ * as the method parameter. This method will return null
if no
+ * binary fields with the specified name are available.
+ *
+ * @param name the name of the field
+ * @return a byte[][]
of binary field values.
+ */
+ public final byte[][] getBinaryValues(String name) {
+ List result = new ArrayList();
+ for (int i = 0; i < fields.size(); i++) {
+ Field field = (Field)fields.get(i);
+ if (field.name().equals(name) && (field.isBinary()))
+ result.add(field.binaryValue());
+ }
+
+ if (result.size() == 0)
+ return null;
+
+ return (byte[][])result.toArray(new byte[result.size()][]);
+ }
+
+ /**
+ * Returns an array of bytes for the first (or only) field that has the name
+ * specified as the method parameter. This method will return null
+ * if no binary fields with the specified name are available.
+ * There may be non-binary fields with the same name.
+ *
+ * @param name the name of the field.
+ * @return a byte[]
containing the binary field value.
+ */
+ public final byte[] getBinaryValue(String name) {
+ for (int i=0; i < fields.size(); i++) {
+ Field field = (Field)fields.get(i);
+ if (field.name().equals(name) && (field.isBinary()))
+ return field.binaryValue();
+ }
+ return null;
+ }
+
/** Prints the fields of a document for human consumption. */
public final String toString() {
StringBuffer buffer = new StringBuffer();
diff --git a/src/java/org/apache/lucene/document/Field.java b/src/java/org/apache/lucene/document/Field.java
index 5890df6a25c..90fe3fb822a 100644
--- a/src/java/org/apache/lucene/document/Field.java
+++ b/src/java/org/apache/lucene/document/Field.java
@@ -18,9 +18,10 @@ package org.apache.lucene.document;
import java.io.Reader;
import java.util.Date;
-import org.apache.lucene.index.IndexReader; // for javadoc
-import org.apache.lucene.search.Similarity; // for javadoc
-import org.apache.lucene.search.Hits; // for javadoc
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.Hits;
+import org.apache.lucene.search.Similarity;
/**
A field is a section of a Document. Each field has two parts, a name and a
@@ -33,12 +34,15 @@ import org.apache.lucene.search.Hits; // for javadoc
public final class Field implements java.io.Serializable {
private String name = "body";
private String stringValue = null;
- private boolean storeTermVector = false;
private Reader readerValue = null;
+ private byte[] binaryValue = null;
+
+ private boolean storeTermVector = false;
private boolean isStored = false;
private boolean isIndexed = true;
private boolean isTokenized = true;
-
+ private boolean isBinary = false;
+
private float boost = 1.0f;
public static final class Store {
@@ -101,7 +105,7 @@ public final class Field implements java.io.Serializable {
* of the document's terms and their number of occurences in that document. */
public static final TermVector YES = new TermVector("YES");
}
-
+
/** Sets the boost factor hits on this field. This value will be
* multiplied into the score of all hits on this this field of this
* document.
@@ -213,7 +217,7 @@ public final class Field implements java.io.Serializable {
f.storeTermVector = storeTermVector;
return f;
}
-
+
/** The name of the field (e.g., "date", "title", "body", ...)
as an interned string. */
public String name() { return name; }
@@ -224,7 +228,11 @@ public final class Field implements java.io.Serializable {
/** The value of the field as a Reader, or null. If null, the String value
is used. Exactly one of stringValue() and readerValue() must be set. */
public Reader readerValue() { return readerValue; }
-
+ /** The value of the field in Binary, or null. If null, the Reader or
+ String value is used. Exactly one of stringValue(), readerValue() and
+ binaryValue() must be set. */
+ public byte[] binaryValue() { return binaryValue; }
+
/**
* Create a field by specifying its name, value and how it will
* be saved in the index. Term vectors will not be stored in the index.
@@ -240,7 +248,7 @@ public final class Field implements java.io.Serializable {
public Field(String name, String value, Store store, Index index) {
this(name, value, store, index, TermVector.NO);
}
-
+
/**
* Create a field by specifying its name, value and how it will
* be saved in the index.
@@ -340,6 +348,23 @@ public final class Field implements java.io.Serializable {
this(name, string, store, index, token, false);
}
+ public Field(String name, byte[] value) {
+ if (name == null)
+ throw new IllegalArgumentException("name cannot be null");
+ if (value == null)
+ throw new IllegalArgumentException("value cannot be null");
+
+ this.name = name.intern();
+ this.binaryValue = value;
+
+ this.isBinary = true;
+ this.isStored = true;
+
+ this.isIndexed = false;
+ this.isTokenized = false;
+ this.storeTermVector = false;
+ }
+
/**
*
* @param name The name of the field
@@ -402,6 +427,9 @@ public final class Field implements java.io.Serializable {
*/
public final boolean isTermVectorStored() { return storeTermVector; }
+ /** True iff the value of the filed is stored as binary */
+ public final boolean isBinary() { return isBinary; }
+
/** Prints a Field for human consumption. */
public final String toString() {
StringBuffer result = new StringBuffer();
@@ -422,6 +450,12 @@ public final class Field implements java.io.Serializable {
result.append(",");
result.append("termVector");
}
+ if (isBinary) {
+ if (result.length() > 0)
+ result.append(",");
+ result.append("binary");
+ }
+
result.append('<');
result.append(name);
result.append(':');
diff --git a/src/java/org/apache/lucene/index/FieldsReader.java b/src/java/org/apache/lucene/index/FieldsReader.java
index 145c4c96b0e..08cfa712d84 100644
--- a/src/java/org/apache/lucene/index/FieldsReader.java
+++ b/src/java/org/apache/lucene/index/FieldsReader.java
@@ -18,10 +18,10 @@ package org.apache.lucene.index;
import java.io.IOException;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.InputStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.InputStream;
/**
* Class responsible for access to stored document fields.
@@ -67,20 +67,28 @@ final class FieldsReader {
byte bits = fieldsStream.readByte();
- Field.Index index;
- boolean tokenize = (bits & 1) != 0;
- if (fi.isIndexed && tokenize)
- index = Field.Index.TOKENIZED;
- else if (fi.isIndexed && !tokenize)
- index = Field.Index.UN_TOKENIZED;
- else
- index = Field.Index.NO;
- doc.add(new Field(fi.name, // name
- fieldsStream.readString(), // read value
- Field.Store.YES, index,
- fi.storeTermVector ? Field.TermVector.YES : Field.TermVector.NO));
+ if ((bits & 2) != 0) {
+ final byte[] b = new byte[fieldsStream.readVInt()];
+ fieldsStream.readBytes(b, 0, b.length);
+ doc.add(new Field(fi.name, b));
+ }
+ else {
+ Field.Index index;
+ boolean tokenize = (bits & 1) != 0;
+ if (fi.isIndexed && tokenize)
+ index = Field.Index.TOKENIZED;
+ else if (fi.isIndexed && !tokenize)
+ index = Field.Index.UN_TOKENIZED;
+ else
+ index = Field.Index.NO;
+ doc.add(new Field(fi.name, // name
+ fieldsStream.readString(), // read value
+ Field.Store.YES, index,
+ fi.storeTermVector ? Field.TermVector.YES : Field.TermVector.NO));
+ }
}
return doc;
}
+
}
diff --git a/src/java/org/apache/lucene/index/FieldsWriter.java b/src/java/org/apache/lucene/index/FieldsWriter.java
index 9cef98eba02..dfb17827db6 100644
--- a/src/java/org/apache/lucene/index/FieldsWriter.java
+++ b/src/java/org/apache/lucene/index/FieldsWriter.java
@@ -2,70 +2,82 @@ package org.apache.lucene.index;
/**
* Copyright 2004 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
* Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
*/
-import java.util.Enumeration;
import java.io.IOException;
+import java.util.Enumeration;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.OutputStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.OutputStream;
-final class FieldsWriter {
- private FieldInfos fieldInfos;
- private OutputStream fieldsStream;
- private OutputStream indexStream;
-
- FieldsWriter(Directory d, String segment, FieldInfos fn)
- throws IOException {
- fieldInfos = fn;
- fieldsStream = d.createFile(segment + ".fdt");
- indexStream = d.createFile(segment + ".fdx");
- }
+final class FieldsWriter
+{
+ private FieldInfos fieldInfos;
- final void close() throws IOException {
- fieldsStream.close();
- indexStream.close();
- }
+ private OutputStream fieldsStream;
- final void addDocument(Document doc) throws IOException {
- indexStream.writeLong(fieldsStream.getFilePointer());
-
- int storedCount = 0;
- Enumeration fields = doc.fields();
- while (fields.hasMoreElements()) {
- Field field = (Field)fields.nextElement();
- if (field.isStored())
- storedCount++;
+ private OutputStream indexStream;
+
+ FieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException {
+ fieldInfos = fn;
+ fieldsStream = d.createFile(segment + ".fdt");
+ indexStream = d.createFile(segment + ".fdx");
}
- fieldsStream.writeVInt(storedCount);
-
- fields = doc.fields();
- while (fields.hasMoreElements()) {
- Field field = (Field)fields.nextElement();
- if (field.isStored()) {
- fieldsStream.writeVInt(fieldInfos.fieldNumber(field.name()));
- byte bits = 0;
- if (field.isTokenized())
- bits |= 1;
- fieldsStream.writeByte(bits);
-
- fieldsStream.writeString(field.stringValue());
- }
+ final void close() throws IOException {
+ fieldsStream.close();
+ indexStream.close();
}
- }
-}
+
+ final void addDocument(Document doc) throws IOException {
+ indexStream.writeLong(fieldsStream.getFilePointer());
+
+ int storedCount = 0;
+ Enumeration fields = doc.fields();
+ while (fields.hasMoreElements()) {
+ Field field = (Field) fields.nextElement();
+ if (field.isStored())
+ storedCount++;
+ }
+ fieldsStream.writeVInt(storedCount);
+
+ fields = doc.fields();
+ while (fields.hasMoreElements()) {
+ Field field = (Field) fields.nextElement();
+ if (field.isStored()) {
+ fieldsStream.writeVInt(fieldInfos.fieldNumber(field.name()));
+
+ byte bits = 0;
+ if (field.isTokenized())
+ bits |= 1;
+ if (field.isBinary())
+ bits |= 2;
+ fieldsStream.writeByte(bits);
+
+ if (field.isBinary()) {
+ byte[] data = field.binaryValue();
+ final int len = data.length;
+ fieldsStream.writeVInt(len);
+ fieldsStream.writeBytes(data, len);
+ } else {
+ fieldsStream.writeString(field.stringValue());
+ }
+ }
+ }
+ }
+
+}
\ No newline at end of file
diff --git a/src/test/org/apache/lucene/document/TestDocument.java b/src/test/org/apache/lucene/document/TestDocument.java
index 0f1c96e663a..ff0c1aaa8a5 100644
--- a/src/test/org/apache/lucene/document/TestDocument.java
+++ b/src/test/org/apache/lucene/document/TestDocument.java
@@ -39,6 +39,56 @@ import org.apache.lucene.search.Hits;
public class TestDocument extends TestCase
{
+ String binaryVal = "this text will be stored as a byte array in the index";
+ String binaryVal2 = "this text will be also stored as a byte array in the index";
+
+ public void testBinaryField()
+ throws Exception
+ {
+ Document doc = new Document();
+ Field stringFld = new Field("string", binaryVal, Field.Store.YES, Field.Index.NO);
+ Field binaryFld = new Field("binary", binaryVal.getBytes());
+ Field binaryFld2 = new Field("binary", binaryVal2.getBytes());
+
+ doc.add(stringFld);
+ doc.add(binaryFld);
+
+ assertEquals(2, doc.fields.size());
+
+ assertTrue(binaryFld.isBinary());
+ assertTrue(binaryFld.isStored());
+ assertFalse(binaryFld.isIndexed());
+ assertFalse(binaryFld.isTokenized());
+
+ String binaryTest = new String(doc.getBinaryValue("binary"));
+ assertTrue(binaryTest.equals(binaryVal));
+
+ String stringTest = doc.get("string");
+ assertTrue(binaryTest.equals(stringTest));
+
+ doc.add(binaryFld2);
+
+ assertEquals(3, doc.fields.size());
+
+ byte[][] binaryTests = doc.getBinaryValues("binary");
+
+ assertEquals(2, binaryTests.length);
+
+ binaryTest = new String(binaryTests[0]);
+ String binaryTest2 = new String(binaryTests[1]);
+
+ assertFalse(binaryTest.equals(binaryTest2));
+
+ assertTrue(binaryTest.equals(binaryVal));
+ assertTrue(binaryTest2.equals(binaryVal2));
+
+ doc.removeField("string");
+ assertEquals(2, doc.fields.size());
+
+ doc.removeFields("binary");
+ assertEquals(0, doc.fields.size());
+ }
+
/**
* Tests {@link Document#removeField(String)} method for a brand new Document
* that has not been indexed yet.