LUCENE-964: remove DocumentWriter

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@560378 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2007-07-27 20:50:19 +00:00
parent 98fa2d898d
commit 1bc5a68549
9 changed files with 100 additions and 658 deletions

View File

@ -1,556 +0,0 @@
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexOutput;
import java.io.IOException;
import java.io.PrintStream;
import java.io.Reader;
import java.io.StringReader;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
final class DocumentWriter {
private Analyzer analyzer;
private Directory directory;
private Similarity similarity;
private FieldInfos fieldInfos;
private int maxFieldLength;
private int termIndexInterval = IndexWriter.DEFAULT_TERM_INDEX_INTERVAL;
private PrintStream infoStream;
/** This ctor used by test code only.
*
* @param directory The directory to write the document information to
* @param analyzer The analyzer to use for the document
* @param similarity The Similarity function
* @param maxFieldLength The maximum number of tokens a field may have
*/
DocumentWriter(Directory directory, Analyzer analyzer,
Similarity similarity, int maxFieldLength) {
this.directory = directory;
this.analyzer = analyzer;
this.similarity = similarity;
this.maxFieldLength = maxFieldLength;
}
DocumentWriter(Directory directory, Analyzer analyzer, IndexWriter writer) {
this.directory = directory;
this.analyzer = analyzer;
this.similarity = writer.getSimilarity();
this.maxFieldLength = writer.getMaxFieldLength();
this.termIndexInterval = writer.getTermIndexInterval();
}
final void addDocument(String segment, Document doc)
throws CorruptIndexException, IOException {
// create field infos
fieldInfos = new FieldInfos();
fieldInfos.add(doc);
// invert doc into postingTable
postingTable.clear(); // clear postingTable
fieldLengths = new int[fieldInfos.size()]; // init fieldLengths
fieldPositions = new int[fieldInfos.size()]; // init fieldPositions
fieldOffsets = new int[fieldInfos.size()]; // init fieldOffsets
fieldStoresPayloads = new BitSet(fieldInfos.size());
fieldBoosts = new float[fieldInfos.size()]; // init fieldBoosts
Arrays.fill(fieldBoosts, doc.getBoost());
try {
// Before we write the FieldInfos we invert the Document. The reason is that
// during invertion the TokenStreams of tokenized fields are being processed
// and we might encounter tokens that have payloads associated with them. In
// this case we have to update the FieldInfo of the particular field.
invertDocument(doc);
// sort postingTable into an array
Posting[] postings = sortPostingTable();
// write field infos
fieldInfos.write(directory, segment + ".fnm");
// write field values
FieldsWriter fieldsWriter =
new FieldsWriter(directory, segment, fieldInfos);
try {
fieldsWriter.addDocument(doc);
} finally {
fieldsWriter.close();
}
/*
for (int i = 0; i < postings.length; i++) {
Posting posting = postings[i];
System.out.print(posting.term);
System.out.print(" freq=" + posting.freq);
System.out.print(" pos=");
System.out.print(posting.positions[0]);
for (int j = 1; j < posting.freq; j++)
System.out.print("," + posting.positions[j]);
System.out.println("");
}
*/
// write postings
writePostings(postings, segment);
// write norms of indexed fields
writeNorms(segment);
} finally {
// close TokenStreams
IOException ex = null;
Iterator it = openTokenStreams.iterator();
while (it.hasNext()) {
try {
((TokenStream) it.next()).close();
} catch (IOException e) {
if (ex != null) {
ex = e;
}
}
}
openTokenStreams.clear();
if (ex != null) {
throw ex;
}
}
}
// Keys are Terms, values are Postings.
// Used to buffer a document before it is written to the index.
private final Hashtable postingTable = new Hashtable();
private int[] fieldLengths;
private int[] fieldPositions;
private int[] fieldOffsets;
private float[] fieldBoosts;
// If any of the tokens of a paticular field carry a payload
// then we enable payloads for that field.
private BitSet fieldStoresPayloads;
// Keep references of the token streams. We must close them after
// the postings are written to the segment.
private List openTokenStreams = new LinkedList();
// Tokenizes the fields of a document into Postings.
private final void invertDocument(Document doc)
throws IOException {
Iterator fieldIterator = doc.getFields().iterator();
while (fieldIterator.hasNext()) {
Fieldable field = (Fieldable) fieldIterator.next();
String fieldName = field.name();
int fieldNumber = fieldInfos.fieldNumber(fieldName);
int length = fieldLengths[fieldNumber]; // length of field
int position = fieldPositions[fieldNumber]; // position in field
if (length>0) position+=analyzer.getPositionIncrementGap(fieldName);
int offset = fieldOffsets[fieldNumber]; // offset field
if (field.isIndexed()) {
if (!field.isTokenized()) { // un-tokenized field
String stringValue = field.stringValue();
if(field.isStoreOffsetWithTermVector())
addPosition(fieldName, stringValue, position++, null, new TermVectorOffsetInfo(offset, offset + stringValue.length()));
else
addPosition(fieldName, stringValue, position++, null, null);
offset += stringValue.length();
length++;
} else
{ // tokenized field
TokenStream stream = field.tokenStreamValue();
// the field does not have a TokenStream,
// so we have to obtain one from the analyzer
if (stream == null) {
Reader reader; // find or make Reader
if (field.readerValue() != null)
reader = field.readerValue();
else if (field.stringValue() != null)
reader = new StringReader(field.stringValue());
else
throw new IllegalArgumentException
("field must have either String or Reader value");
// Tokenize field and add to postingTable
stream = analyzer.tokenStream(fieldName, reader);
}
// remember this TokenStream, we must close it later
openTokenStreams.add(stream);
// reset the TokenStream to the first token
stream.reset();
Token lastToken = null;
for (Token t = stream.next(); t != null; t = stream.next()) {
position += (t.getPositionIncrement() - 1);
Payload payload = t.getPayload();
if (payload != null) {
// enable payloads for this field
fieldStoresPayloads.set(fieldNumber);
}
TermVectorOffsetInfo termVectorOffsetInfo;
if (field.isStoreOffsetWithTermVector()) {
termVectorOffsetInfo = new TermVectorOffsetInfo(offset + t.startOffset(), offset + t.endOffset());
} else {
termVectorOffsetInfo = null;
}
addPosition(fieldName, t.termText(), position++, payload, termVectorOffsetInfo);
lastToken = t;
if (++length >= maxFieldLength) {
if (infoStream != null)
infoStream.println("maxFieldLength " +maxFieldLength+ " reached, ignoring following tokens");
break;
}
}
if(lastToken != null)
offset += lastToken.endOffset() + 1;
}
fieldLengths[fieldNumber] = length; // save field length
fieldPositions[fieldNumber] = position; // save field position
fieldBoosts[fieldNumber] *= field.getBoost();
fieldOffsets[fieldNumber] = offset;
}
}
// update fieldInfos for all fields that have one or more tokens with payloads
for (int i = fieldStoresPayloads.nextSetBit(0); i >= 0; i = fieldStoresPayloads.nextSetBit(i+1)) {
fieldInfos.fieldInfo(i).storePayloads = true;
}
}
private final Term termBuffer = new Term("", ""); // avoid consing
private final void addPosition(String field, String text, int position, Payload payload, TermVectorOffsetInfo offset) {
termBuffer.set(field, text);
//System.out.println("Offset: " + offset);
Posting ti = (Posting) postingTable.get(termBuffer);
if (ti != null) { // word seen before
int freq = ti.freq;
if (ti.positions.length == freq) { // positions array is full
int[] newPositions = new int[freq * 2]; // double size
int[] positions = ti.positions;
System.arraycopy(positions, 0, newPositions, 0, freq);
ti.positions = newPositions;
if (ti.payloads != null) {
// the current field stores payloads
Payload[] newPayloads = new Payload[freq * 2]; // grow payloads array
Payload[] payloads = ti.payloads;
System.arraycopy(payloads, 0, newPayloads, 0, payloads.length);
ti.payloads = newPayloads;
}
}
ti.positions[freq] = position; // add new position
if (payload != null) {
if (ti.payloads == null) {
// lazily allocate payload array
ti.payloads = new Payload[ti.positions.length];
}
ti.payloads[freq] = payload;
}
if (offset != null) {
if (ti.offsets.length == freq){
TermVectorOffsetInfo [] newOffsets = new TermVectorOffsetInfo[freq*2];
TermVectorOffsetInfo [] offsets = ti.offsets;
System.arraycopy(offsets, 0, newOffsets, 0, freq);
ti.offsets = newOffsets;
}
ti.offsets[freq] = offset;
}
ti.freq = freq + 1; // update frequency
} else { // word not seen before
Term term = new Term(field, text, false);
postingTable.put(term, new Posting(term, position, payload, offset));
}
}
private final Posting[] sortPostingTable() {
// copy postingTable into an array
Posting[] array = new Posting[postingTable.size()];
Enumeration postings = postingTable.elements();
for (int i = 0; postings.hasMoreElements(); i++)
array[i] = (Posting) postings.nextElement();
// sort the array
quickSort(array, 0, array.length - 1);
return array;
}
private static final void quickSort(Posting[] postings, int lo, int hi) {
if (lo >= hi)
return;
int mid = (lo + hi) >>> 1;
if (postings[lo].term.compareTo(postings[mid].term) > 0) {
Posting tmp = postings[lo];
postings[lo] = postings[mid];
postings[mid] = tmp;
}
if (postings[mid].term.compareTo(postings[hi].term) > 0) {
Posting tmp = postings[mid];
postings[mid] = postings[hi];
postings[hi] = tmp;
if (postings[lo].term.compareTo(postings[mid].term) > 0) {
Posting tmp2 = postings[lo];
postings[lo] = postings[mid];
postings[mid] = tmp2;
}
}
int left = lo + 1;
int right = hi - 1;
if (left >= right)
return;
Term partition = postings[mid].term;
for (; ;) {
while (postings[right].term.compareTo(partition) > 0)
--right;
while (left < right && postings[left].term.compareTo(partition) <= 0)
++left;
if (left < right) {
Posting tmp = postings[left];
postings[left] = postings[right];
postings[right] = tmp;
--right;
} else {
break;
}
}
quickSort(postings, lo, left);
quickSort(postings, left + 1, hi);
}
private final void writePostings(Posting[] postings, String segment)
throws CorruptIndexException, IOException {
IndexOutput freq = null, prox = null;
TermInfosWriter tis = null;
TermVectorsWriter termVectorWriter = null;
try {
//open files for inverse index storage
freq = directory.createOutput(segment + ".frq");
prox = directory.createOutput(segment + ".prx");
tis = new TermInfosWriter(directory, segment, fieldInfos,
termIndexInterval);
TermInfo ti = new TermInfo();
String currentField = null;
boolean currentFieldHasPayloads = false;
for (int i = 0; i < postings.length; i++) {
Posting posting = postings[i];
// check to see if we switched to a new field
String termField = posting.term.field();
if (currentField != termField) {
// changing field - see if there is something to save
currentField = termField;
FieldInfo fi = fieldInfos.fieldInfo(currentField);
currentFieldHasPayloads = fi.storePayloads;
if (fi.storeTermVector) {
if (termVectorWriter == null) {
termVectorWriter =
new TermVectorsWriter(directory, segment, fieldInfos);
termVectorWriter.openDocument();
}
termVectorWriter.openField(currentField);
} else if (termVectorWriter != null) {
termVectorWriter.closeField();
}
}
// add an entry to the dictionary with pointers to prox and freq files
ti.set(1, freq.getFilePointer(), prox.getFilePointer(), -1);
tis.add(posting.term, ti);
// add an entry to the freq file
int postingFreq = posting.freq;
if (postingFreq == 1) // optimize freq=1
freq.writeVInt(1); // set low bit of doc num.
else {
freq.writeVInt(0); // the document number
freq.writeVInt(postingFreq); // frequency in doc
}
int lastPosition = 0; // write positions
int[] positions = posting.positions;
Payload[] payloads = posting.payloads;
int lastPayloadLength = -1;
// The following encoding is being used for positions and payloads:
// Case 1: current field does not store payloads
// Positions -> <PositionDelta>^freq
// PositionDelta -> VInt
// The PositionDelta is the difference between the current
// and the previous position
// Case 2: current field stores payloads
// Positions -> <PositionDelta, Payload>^freq
// Payload -> <PayloadLength?, PayloadData>
// PositionDelta -> VInt
// PayloadLength -> VInt
// PayloadData -> byte^PayloadLength
// In this case PositionDelta/2 is the difference between
// the current and the previous position. If PositionDelta
// is odd, then a PayloadLength encoded as VInt follows,
// if PositionDelta is even, then it is assumed that the
// length of the current Payload equals the length of the
// previous Payload.
for (int j = 0; j < postingFreq; j++) { // use delta-encoding
int position = positions[j];
int delta = position - lastPosition;
if (currentFieldHasPayloads) {
int payloadLength = 0;
Payload payload = null;
if (payloads != null) {
payload = payloads[j];
if (payload != null) {
payloadLength = payload.length;
}
}
if (payloadLength == lastPayloadLength) {
// the length of the current payload equals the length
// of the previous one. So we do not have to store the length
// again and we only shift the position delta by one bit
prox.writeVInt(delta * 2);
} else {
// the length of the current payload is different from the
// previous one. We shift the position delta, set the lowest
// bit and store the current payload length as VInt.
prox.writeVInt(delta * 2 + 1);
prox.writeVInt(payloadLength);
lastPayloadLength = payloadLength;
}
if (payloadLength > 0) {
// write current payload
prox.writeBytes(payload.data, payload.offset, payload.length);
}
} else {
// field does not store payloads, just write position delta as VInt
prox.writeVInt(delta);
}
lastPosition = position;
}
if (termVectorWriter != null && termVectorWriter.isFieldOpen()) {
termVectorWriter.addTerm(posting.term.text(), postingFreq, posting.positions, posting.offsets);
}
}
if (termVectorWriter != null)
termVectorWriter.closeDocument();
} finally {
// make an effort to close all streams we can but remember and re-throw
// the first exception encountered in this process
IOException keep = null;
if (freq != null) try { freq.close(); } catch (IOException e) { if (keep == null) keep = e; }
if (prox != null) try { prox.close(); } catch (IOException e) { if (keep == null) keep = e; }
if (tis != null) try { tis.close(); } catch (IOException e) { if (keep == null) keep = e; }
if (termVectorWriter != null) try { termVectorWriter.close(); } catch (IOException e) { if (keep == null) keep = e; }
if (keep != null) throw (IOException) keep.fillInStackTrace();
}
}
private final void writeNorms(String segment) throws IOException {
for(int n = 0; n < fieldInfos.size(); n++){
FieldInfo fi = fieldInfos.fieldInfo(n);
if(fi.isIndexed && !fi.omitNorms){
float norm = fieldBoosts[n] * similarity.lengthNorm(fi.name, fieldLengths[n]);
IndexOutput norms = directory.createOutput(segment + ".f" + n);
try {
norms.writeByte(Similarity.encodeNorm(norm));
} finally {
norms.close();
}
}
}
}
/** If non-null, a message will be printed to this if maxFieldLength is reached.
*/
void setInfoStream(PrintStream infoStream) {
this.infoStream = infoStream;
}
int getNumFields() {
return fieldInfos.size();
}
}
final class Posting { // info about a Term in a doc
Term term; // the Term
int freq; // its frequency in doc
int[] positions; // positions it occurs at
Payload[] payloads; // the payloads of the terms
TermVectorOffsetInfo [] offsets;
Posting(Term t, int position, Payload payload, TermVectorOffsetInfo offset) {
term = t;
freq = 1;
positions = new int[1];
positions[0] = position;
if (payload != null) {
payloads = new Payload[1];
payloads[0] = payload;
} else
payloads = null;
if(offset != null){
offsets = new TermVectorOffsetInfo[1];
offsets[0] = offset;
} else
offsets = null;
}
}

View File

@ -207,55 +207,38 @@ class DocHelper {
}
/**
* Writes the document to the directory using a segment named "test"
* Writes the document to the directory using a segment
* named "test"; returns the SegmentInfo describing the new
* segment
* @param dir
* @param doc
* @throws IOException
*/
public static void writeDoc(Directory dir, Document doc) throws IOException
public static SegmentInfo writeDoc(Directory dir, Document doc) throws IOException
{
writeDoc(dir, "test", doc);
return writeDoc(dir, new WhitespaceAnalyzer(), Similarity.getDefault(), doc);
}
/**
* Writes the document to the directory in the given segment
* @param dir
* @param segment
* @param doc
* @throws IOException
*/
public static void writeDoc(Directory dir, String segment, Document doc) throws IOException
{
Similarity similarity = Similarity.getDefault();
writeDoc(dir, new WhitespaceAnalyzer(), similarity, segment, doc);
}
/**
* Writes the document to the directory segment named "test" using the specified analyzer and similarity
* Writes the document to the directory using the analyzer
* and the similarity score; returns the SegmentInfo
* describing the new segment
* @param dir
* @param analyzer
* @param similarity
* @param doc
* @throws IOException
*/
public static void writeDoc(Directory dir, Analyzer analyzer, Similarity similarity, Document doc) throws IOException
public static SegmentInfo writeDoc(Directory dir, Analyzer analyzer, Similarity similarity, Document doc) throws IOException
{
writeDoc(dir, analyzer, similarity, "test", doc);
}
/**
* Writes the document to the directory segment using the analyzer and the similarity score
* @param dir
* @param analyzer
* @param similarity
* @param segment
* @param doc
* @throws IOException
*/
public static void writeDoc(Directory dir, Analyzer analyzer, Similarity similarity, String segment, Document doc) throws IOException
{
DocumentWriter writer = new DocumentWriter(dir, analyzer, similarity, 50);
writer.addDocument(segment, doc);
IndexWriter writer = new IndexWriter(dir, analyzer);
writer.setSimilarity(similarity);
//writer.setUseCompoundFile(false);
writer.addDocument(doc);
writer.flush();
SegmentInfo info = writer.segmentInfos.info(writer.segmentInfos.size()-1);
writer.close();
return info;
}
public static int numFields(Document doc) {

View File

@ -105,14 +105,16 @@ public class TestDoc extends TestCase {
StringWriter sw = new StringWriter();
PrintWriter out = new PrintWriter(sw, true);
Directory directory = FSDirectory.getDirectory(indexDir, true);
directory.close();
Directory directory = FSDirectory.getDirectory(indexDir);
IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true);
SegmentInfo si1 = indexDoc("one", "test.txt");
SegmentInfo si1 = indexDoc(writer, "test.txt");
printSegment(out, si1);
SegmentInfo si2 = indexDoc("two", "test2.txt");
SegmentInfo si2 = indexDoc(writer, "test2.txt");
printSegment(out, si2);
writer.close();
directory.close();
SegmentInfo siMerge = merge(si1, si2, "merge", false);
printSegment(out, siMerge);
@ -131,14 +133,16 @@ public class TestDoc extends TestCase {
sw = new StringWriter();
out = new PrintWriter(sw, true);
directory = FSDirectory.getDirectory(indexDir, true);
directory.close();
directory = FSDirectory.getDirectory(indexDir);
writer = new IndexWriter(directory, new SimpleAnalyzer(), true);
si1 = indexDoc("one", "test.txt");
si1 = indexDoc(writer, "test.txt");
printSegment(out, si1);
si2 = indexDoc("two", "test2.txt");
si2 = indexDoc(writer, "test2.txt");
printSegment(out, si2);
writer.close();
directory.close();
siMerge = merge(si1, si2, "merge", true);
printSegment(out, siMerge);
@ -157,21 +161,14 @@ public class TestDoc extends TestCase {
}
private SegmentInfo indexDoc(String segment, String fileName)
private SegmentInfo indexDoc(IndexWriter writer, String fileName)
throws Exception
{
Directory directory = FSDirectory.getDirectory(indexDir, false);
Analyzer analyzer = new SimpleAnalyzer();
DocumentWriter writer =
new DocumentWriter(directory, analyzer, Similarity.getDefault(), 1000);
File file = new File(workDir, fileName);
Document doc = FileDocument.Document(file);
writer.addDocument(segment, doc);
directory.close();
return new SegmentInfo(segment, 1, directory, false, false);
writer.addDocument(doc);
writer.flush();
return writer.segmentInfos.info(writer.segmentInfos.size()-1);
}

View File

@ -32,6 +32,8 @@ import org.apache.lucene.store.RAMDirectory;
import java.io.Reader;
import java.io.IOException;
import java.util.Arrays;
public class TestDocumentWriter extends TestCase {
private RAMDirectory dir;
@ -57,11 +59,13 @@ public class TestDocumentWriter extends TestCase {
DocHelper.setupDoc(testDoc);
Analyzer analyzer = new WhitespaceAnalyzer();
Similarity similarity = Similarity.getDefault();
DocumentWriter writer = new DocumentWriter(dir, analyzer, similarity, 50);
String segName = "test";
writer.addDocument(segName, testDoc);
IndexWriter writer = new IndexWriter(dir, analyzer, true);
writer.addDocument(testDoc);
writer.flush();
SegmentInfo info = writer.segmentInfos.info(writer.segmentInfos.size()-1);
writer.close();
//After adding the document, we should be able to read it back in
SegmentReader reader = SegmentReader.get(new SegmentInfo(segName, 1, dir));
SegmentReader reader = SegmentReader.get(info);
assertTrue(reader != null);
Document doc = reader.document(0);
assertTrue(doc != null);
@ -89,14 +93,14 @@ public class TestDocumentWriter extends TestCase {
assertTrue(fields != null && fields.length == 1);
assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_3_TEXT));
// test that the norm file is not present if omitNorms is true
// test that the norms are not present in the segment if
// omitNorms is true
for (int i = 0; i < reader.fieldInfos.size(); i++) {
FieldInfo fi = reader.fieldInfos.fieldInfo(i);
if (fi.isIndexed) {
assertTrue(fi.omitNorms == !dir.fileExists(segName + ".f" + i));
assertTrue(fi.omitNorms == !reader.hasNorms(fi.name));
}
}
}
public void testPositionIncrementGap() throws IOException {
@ -111,14 +115,17 @@ public class TestDocumentWriter extends TestCase {
};
Similarity similarity = Similarity.getDefault();
DocumentWriter writer = new DocumentWriter(dir, analyzer, similarity, 50);
IndexWriter writer = new IndexWriter(dir, analyzer, true);
Document doc = new Document();
doc.add(new Field("repeated", "repeated one", Field.Store.YES, Field.Index.TOKENIZED));
doc.add(new Field("repeated", "repeated two", Field.Store.YES, Field.Index.TOKENIZED));
String segName = "test";
writer.addDocument(segName, doc);
SegmentReader reader = SegmentReader.get(new SegmentInfo(segName, 1, dir));
writer.addDocument(doc);
writer.flush();
SegmentInfo info = writer.segmentInfos.info(writer.segmentInfos.size()-1);
writer.close();
SegmentReader reader = SegmentReader.get(info);
TermPositions termPositions = reader.termPositions(new Term("repeated", "repeated"));
assertTrue(termPositions.next());
@ -130,7 +137,7 @@ public class TestDocumentWriter extends TestCase {
public void testPreAnalyzedField() throws IOException {
Similarity similarity = Similarity.getDefault();
DocumentWriter writer = new DocumentWriter(dir, new SimpleAnalyzer(), similarity, 50);
IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true);
Document doc = new Document();
doc.add(new Field("preanalyzed", new TokenStream() {
@ -147,9 +154,11 @@ public class TestDocumentWriter extends TestCase {
}, TermVector.NO));
String segName = "test";
writer.addDocument(segName, doc);
SegmentReader reader = SegmentReader.get(new SegmentInfo(segName, 1, dir));
writer.addDocument(doc);
writer.flush();
SegmentInfo info = writer.segmentInfos.info(writer.segmentInfos.size()-1);
writer.close();
SegmentReader reader = SegmentReader.get(info);
TermPositions termPositions = reader.termPositions(new Term("preanalyzed", "term1"));
assertTrue(termPositions.next());

View File

@ -35,6 +35,8 @@ public class TestFieldsReader extends TestCase {
private Document testDoc = new Document();
private FieldInfos fieldInfos = null;
private final static String TEST_SEGMENT_NAME = "_0";
public TestFieldsReader(String s) {
super(s);
}
@ -43,16 +45,16 @@ public class TestFieldsReader extends TestCase {
fieldInfos = new FieldInfos();
DocHelper.setupDoc(testDoc);
fieldInfos.add(testDoc);
DocumentWriter writer = new DocumentWriter(dir, new WhitespaceAnalyzer(),
Similarity.getDefault(), 50);
assertTrue(writer != null);
writer.addDocument("test", testDoc);
IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
writer.setUseCompoundFile(false);
writer.addDocument(testDoc);
writer.close();
}
public void test() throws IOException {
assertTrue(dir != null);
assertTrue(fieldInfos != null);
FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos);
assertTrue(reader != null);
assertTrue(reader.size() == 1);
Document doc = reader.doc(0, null);
@ -82,7 +84,7 @@ public class TestFieldsReader extends TestCase {
public void testLazyFields() throws Exception {
assertTrue(dir != null);
assertTrue(fieldInfos != null);
FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos);
assertTrue(reader != null);
assertTrue(reader.size() == 1);
Set loadFieldNames = new HashSet();
@ -137,7 +139,7 @@ public class TestFieldsReader extends TestCase {
public void testLazyFieldsAfterClose() throws Exception {
assertTrue(dir != null);
assertTrue(fieldInfos != null);
FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos);
assertTrue(reader != null);
assertTrue(reader.size() == 1);
Set loadFieldNames = new HashSet();
@ -167,7 +169,7 @@ public class TestFieldsReader extends TestCase {
public void testLoadFirst() throws Exception {
assertTrue(dir != null);
assertTrue(fieldInfos != null);
FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos);
assertTrue(reader != null);
assertTrue(reader.size() == 1);
LoadFirstFieldSelector fieldSelector = new LoadFirstFieldSelector();
@ -200,10 +202,12 @@ public class TestFieldsReader extends TestCase {
_TestUtil.rmDir(file);
FSDirectory tmpDir = FSDirectory.getDirectory(file);
assertTrue(tmpDir != null);
DocumentWriter writer = new DocumentWriter(tmpDir, new WhitespaceAnalyzer(),
Similarity.getDefault(), 50);
assertTrue(writer != null);
writer.addDocument("test", testDoc);
IndexWriter writer = new IndexWriter(tmpDir, new WhitespaceAnalyzer(), true);
writer.setUseCompoundFile(false);
writer.addDocument(testDoc);
writer.close();
assertTrue(fieldInfos != null);
FieldsReader reader;
long lazyTime = 0;
@ -214,7 +218,7 @@ public class TestFieldsReader extends TestCase {
SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(Collections.EMPTY_SET, lazyFieldNames);
for (int i = 0; i < length; i++) {
reader = new FieldsReader(tmpDir, "test", fieldInfos);
reader = new FieldsReader(tmpDir, TEST_SEGMENT_NAME, fieldInfos);
assertTrue(reader != null);
assertTrue(reader.size() == 1);
@ -238,7 +242,7 @@ public class TestFieldsReader extends TestCase {
doc = null;
//Hmmm, are we still in cache???
System.gc();
reader = new FieldsReader(tmpDir, "test", fieldInfos);
reader = new FieldsReader(tmpDir, TEST_SEGMENT_NAME, fieldInfos);
doc = reader.doc(0, fieldSelector);
field = doc.getFieldable(DocHelper.LARGE_LAZY_FIELD_KEY);
assertTrue("field is not lazy", field.isLazy() == true);
@ -256,7 +260,7 @@ public class TestFieldsReader extends TestCase {
}
public void testLoadSize() throws IOException {
FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos);
Document doc;
doc = reader.doc(0, new FieldSelector(){

View File

@ -43,15 +43,20 @@ public class TestMultiReader extends TestCase {
protected void setUp() throws IOException {
DocHelper.setupDoc(doc1);
DocHelper.setupDoc(doc2);
DocHelper.writeDoc(dir, "seg-1", doc1);
DocHelper.writeDoc(dir, "seg-2", doc2);
SegmentInfo info1 = DocHelper.writeDoc(dir, doc1);
SegmentInfo info2 = DocHelper.writeDoc(dir, doc2);
sis.write(dir);
reader1 = SegmentReader.get(new SegmentInfo("seg-1", 1, dir));
reader2 = SegmentReader.get(new SegmentInfo("seg-2", 1, dir));
openReaders();
}
private void openReaders() throws IOException {
sis.read(dir);
reader1 = SegmentReader.get(sis.info(0));
reader2 = SegmentReader.get(sis.info(1));
readers[0] = reader1;
readers[1] = reader2;
}
public void test() {
assertTrue(dir != null);
assertTrue(reader1 != null);
@ -88,6 +93,7 @@ public class TestMultiReader extends TestCase {
reader.commit();
reader.close();
sis.read(dir);
openReaders();
reader = new MultiSegmentReader(dir, sis, false, readers);
assertEquals( 2, reader.numDocs() );

View File

@ -32,12 +32,10 @@ public class TestSegmentMerger extends TestCase {
//First segment to be merged
private Directory merge1Dir = new RAMDirectory();
private Document doc1 = new Document();
private String merge1Segment = "test-1";
private SegmentReader reader1 = null;
//Second Segment to be merged
private Directory merge2Dir = new RAMDirectory();
private Document doc2 = new Document();
private String merge2Segment = "test-2";
private SegmentReader reader2 = null;
@ -47,11 +45,11 @@ public class TestSegmentMerger extends TestCase {
protected void setUp() throws IOException {
DocHelper.setupDoc(doc1);
DocHelper.writeDoc(merge1Dir, merge1Segment, doc1);
SegmentInfo info1 = DocHelper.writeDoc(merge1Dir, doc1);
DocHelper.setupDoc(doc2);
DocHelper.writeDoc(merge2Dir, merge2Segment, doc2);
reader1 = SegmentReader.get(new SegmentInfo(merge1Segment, 1, merge1Dir));
reader2 = SegmentReader.get(new SegmentInfo(merge2Segment, 1, merge2Dir));
SegmentInfo info2 = DocHelper.writeDoc(merge2Dir, doc2);
reader1 = SegmentReader.get(info1);
reader2 = SegmentReader.get(info2);
}
public void test() {

View File

@ -41,8 +41,8 @@ public class TestSegmentReader extends TestCase {
//TODO: Setup the reader w/ multiple documents
protected void setUp() throws IOException {
DocHelper.setupDoc(testDoc);
DocHelper.writeDoc(dir, testDoc);
reader = SegmentReader.get(new SegmentInfo("test", 1, dir));
SegmentInfo info = DocHelper.writeDoc(dir, testDoc);
reader = SegmentReader.get(info);
}
protected void tearDown() {
@ -75,8 +75,8 @@ public class TestSegmentReader extends TestCase {
public void testDelete() throws IOException {
Document docToDelete = new Document();
DocHelper.setupDoc(docToDelete);
DocHelper.writeDoc(dir, "seg-to-delete", docToDelete);
SegmentReader deleteReader = SegmentReader.get(new SegmentInfo("seg-to-delete", 1, dir));
SegmentInfo info = DocHelper.writeDoc(dir, docToDelete);
SegmentReader deleteReader = SegmentReader.get(info);
assertTrue(deleteReader != null);
assertTrue(deleteReader.numDocs() == 1);
deleteReader.deleteDocument(0);

View File

@ -29,6 +29,7 @@ import java.io.IOException;
public class TestSegmentTermDocs extends TestCase {
private Document testDoc = new Document();
private Directory dir = new RAMDirectory();
private SegmentInfo info;
public TestSegmentTermDocs(String s) {
super(s);
@ -36,7 +37,7 @@ public class TestSegmentTermDocs extends TestCase {
protected void setUp() throws IOException {
DocHelper.setupDoc(testDoc);
DocHelper.writeDoc(dir, testDoc);
info = DocHelper.writeDoc(dir, testDoc);
}
@ -50,7 +51,7 @@ public class TestSegmentTermDocs extends TestCase {
public void testTermDocs() throws IOException {
//After adding the document, we should be able to read it back in
SegmentReader reader = SegmentReader.get(new SegmentInfo("test", 1, dir));
SegmentReader reader = SegmentReader.get(info);
assertTrue(reader != null);
SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
assertTrue(segTermDocs != null);
@ -68,7 +69,7 @@ public class TestSegmentTermDocs extends TestCase {
public void testBadSeek() throws IOException {
{
//After adding the document, we should be able to read it back in
SegmentReader reader = SegmentReader.get(new SegmentInfo("test", 1, dir));
SegmentReader reader = SegmentReader.get(info);
assertTrue(reader != null);
SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
assertTrue(segTermDocs != null);
@ -78,7 +79,7 @@ public class TestSegmentTermDocs extends TestCase {
}
{
//After adding the document, we should be able to read it back in
SegmentReader reader = SegmentReader.get(new SegmentInfo("test", 1, dir));
SegmentReader reader = SegmentReader.get(info);
assertTrue(reader != null);
SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
assertTrue(segTermDocs != null);