mirror of https://github.com/apache/lucene.git
LUCENE-2739: Refactor TestIndexWriter, move out term vectors writing tests
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1031496 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d29a64d182
commit
a8fd6068ed
|
@ -1050,64 +1050,6 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-1008
|
||||
public void testNoTermVectorAfterTermVector() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
Document document = new Document();
|
||||
document.add(newField("tvtest", "a b c", Field.Store.NO, Field.Index.ANALYZED,
|
||||
Field.TermVector.YES));
|
||||
iw.addDocument(document);
|
||||
document = new Document();
|
||||
document.add(newField("tvtest", "x y z", Field.Store.NO, Field.Index.ANALYZED,
|
||||
Field.TermVector.NO));
|
||||
iw.addDocument(document);
|
||||
// Make first segment
|
||||
iw.commit();
|
||||
|
||||
document.add(newField("tvtest", "a b c", Field.Store.NO, Field.Index.ANALYZED,
|
||||
Field.TermVector.YES));
|
||||
iw.addDocument(document);
|
||||
// Make 2nd segment
|
||||
iw.commit();
|
||||
|
||||
iw.optimize();
|
||||
iw.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-1010
|
||||
public void testNoTermVectorAfterTermVectorMerge() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
Document document = new Document();
|
||||
document.add(newField("tvtest", "a b c", Field.Store.NO, Field.Index.ANALYZED,
|
||||
Field.TermVector.YES));
|
||||
iw.addDocument(document);
|
||||
iw.commit();
|
||||
|
||||
document = new Document();
|
||||
document.add(newField("tvtest", "x y z", Field.Store.NO, Field.Index.ANALYZED,
|
||||
Field.TermVector.NO));
|
||||
iw.addDocument(document);
|
||||
// Make first segment
|
||||
iw.commit();
|
||||
|
||||
iw.optimize();
|
||||
|
||||
document.add(newField("tvtest", "a b c", Field.Store.NO, Field.Index.ANALYZED,
|
||||
Field.TermVector.YES));
|
||||
iw.addDocument(document);
|
||||
// Make 2nd segment
|
||||
iw.commit();
|
||||
iw.optimize();
|
||||
|
||||
iw.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-1036
|
||||
public void testMaxThreadPriority() throws IOException {
|
||||
int pri = Thread.currentThread().getPriority();
|
||||
|
@ -1376,139 +1318,6 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-1168
|
||||
public void testTermVectorCorruption() throws IOException {
|
||||
|
||||
Directory dir = newDirectory();
|
||||
for(int iter=0;iter<2;iter++) {
|
||||
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer())
|
||||
.setMaxBufferedDocs(2).setRAMBufferSizeMB(
|
||||
IndexWriterConfig.DISABLE_AUTO_FLUSH).setMergeScheduler(
|
||||
new SerialMergeScheduler()).setMergePolicy(
|
||||
new LogDocMergePolicy()));
|
||||
|
||||
Document document = new Document();
|
||||
|
||||
Field storedField = newField("stored", "stored", Field.Store.YES,
|
||||
Field.Index.NO);
|
||||
document.add(storedField);
|
||||
writer.addDocument(document);
|
||||
writer.addDocument(document);
|
||||
|
||||
document = new Document();
|
||||
document.add(storedField);
|
||||
Field termVectorField = newField("termVector", "termVector",
|
||||
Field.Store.NO, Field.Index.NOT_ANALYZED,
|
||||
Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
|
||||
document.add(termVectorField);
|
||||
writer.addDocument(document);
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
|
||||
IndexReader reader = IndexReader.open(dir, true);
|
||||
for(int i=0;i<reader.numDocs();i++) {
|
||||
reader.document(i);
|
||||
reader.getTermFreqVectors(i);
|
||||
}
|
||||
reader.close();
|
||||
|
||||
writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT,
|
||||
new MockAnalyzer()).setMaxBufferedDocs(2)
|
||||
.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)
|
||||
.setMergeScheduler(new SerialMergeScheduler()).setMergePolicy(
|
||||
new LogDocMergePolicy()));
|
||||
|
||||
Directory[] indexDirs = {new MockDirectoryWrapper(new RAMDirectory(dir))};
|
||||
writer.addIndexes(indexDirs);
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
}
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-1168
|
||||
public void testTermVectorCorruption2() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
for(int iter=0;iter<2;iter++) {
|
||||
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer())
|
||||
.setMaxBufferedDocs(2).setRAMBufferSizeMB(
|
||||
IndexWriterConfig.DISABLE_AUTO_FLUSH).setMergeScheduler(
|
||||
new SerialMergeScheduler()).setMergePolicy(
|
||||
new LogDocMergePolicy()));
|
||||
|
||||
Document document = new Document();
|
||||
|
||||
Field storedField = newField("stored", "stored", Field.Store.YES,
|
||||
Field.Index.NO);
|
||||
document.add(storedField);
|
||||
writer.addDocument(document);
|
||||
writer.addDocument(document);
|
||||
|
||||
document = new Document();
|
||||
document.add(storedField);
|
||||
Field termVectorField = newField("termVector", "termVector",
|
||||
Field.Store.NO, Field.Index.NOT_ANALYZED,
|
||||
Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
document.add(termVectorField);
|
||||
writer.addDocument(document);
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
|
||||
IndexReader reader = IndexReader.open(dir, true);
|
||||
assertTrue(reader.getTermFreqVectors(0)==null);
|
||||
assertTrue(reader.getTermFreqVectors(1)==null);
|
||||
assertTrue(reader.getTermFreqVectors(2)!=null);
|
||||
reader.close();
|
||||
}
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-1168
|
||||
public void testTermVectorCorruption3() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer())
|
||||
.setMaxBufferedDocs(2).setRAMBufferSizeMB(
|
||||
IndexWriterConfig.DISABLE_AUTO_FLUSH).setMergeScheduler(
|
||||
new SerialMergeScheduler()).setMergePolicy(new LogDocMergePolicy()));
|
||||
|
||||
Document document = new Document();
|
||||
|
||||
document = new Document();
|
||||
Field storedField = newField("stored", "stored", Field.Store.YES,
|
||||
Field.Index.NO);
|
||||
document.add(storedField);
|
||||
Field termVectorField = newField("termVector", "termVector",
|
||||
Field.Store.NO, Field.Index.NOT_ANALYZED,
|
||||
Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
document.add(termVectorField);
|
||||
for(int i=0;i<10;i++)
|
||||
writer.addDocument(document);
|
||||
writer.close();
|
||||
|
||||
writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT,
|
||||
new MockAnalyzer()).setMaxBufferedDocs(2)
|
||||
.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)
|
||||
.setMergeScheduler(new SerialMergeScheduler()).setMergePolicy(
|
||||
new LogDocMergePolicy()));
|
||||
for(int i=0;i<6;i++)
|
||||
writer.addDocument(document);
|
||||
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
|
||||
IndexReader reader = IndexReader.open(dir, true);
|
||||
for(int i=0;i<10;i++) {
|
||||
reader.getTermFreqVectors(i);
|
||||
reader.document(i);
|
||||
}
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-1084: test user-specified field length
|
||||
public void testUserSpecifiedMaxFieldLength() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
|
@ -2186,223 +1995,6 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-1442
|
||||
public void testDoubleOffsetCounting() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
Document doc = new Document();
|
||||
Field f = newField("field", "abcd", Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
doc.add(f);
|
||||
doc.add(f);
|
||||
Field f2 = newField("field", "", Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
doc.add(f2);
|
||||
doc.add(f);
|
||||
w.addDocument(doc);
|
||||
w.close();
|
||||
|
||||
IndexReader r = IndexReader.open(dir, true);
|
||||
TermVectorOffsetInfo[] termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(0);
|
||||
|
||||
// Token "" occurred once
|
||||
assertEquals(1, termOffsets.length);
|
||||
assertEquals(8, termOffsets[0].getStartOffset());
|
||||
assertEquals(8, termOffsets[0].getEndOffset());
|
||||
|
||||
// Token "abcd" occurred three times
|
||||
termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(1);
|
||||
assertEquals(3, termOffsets.length);
|
||||
assertEquals(0, termOffsets[0].getStartOffset());
|
||||
assertEquals(4, termOffsets[0].getEndOffset());
|
||||
assertEquals(4, termOffsets[1].getStartOffset());
|
||||
assertEquals(8, termOffsets[1].getEndOffset());
|
||||
assertEquals(8, termOffsets[2].getStartOffset());
|
||||
assertEquals(12, termOffsets[2].getEndOffset());
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-1442
|
||||
public void testDoubleOffsetCounting2() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
Document doc = new Document();
|
||||
Field f = newField("field", "abcd", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
doc.add(f);
|
||||
doc.add(f);
|
||||
w.addDocument(doc);
|
||||
w.close();
|
||||
|
||||
IndexReader r = IndexReader.open(dir, true);
|
||||
TermVectorOffsetInfo[] termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(0);
|
||||
assertEquals(2, termOffsets.length);
|
||||
assertEquals(0, termOffsets[0].getStartOffset());
|
||||
assertEquals(4, termOffsets[0].getEndOffset());
|
||||
assertEquals(5, termOffsets[1].getStartOffset());
|
||||
assertEquals(9, termOffsets[1].getEndOffset());
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-1448
|
||||
public void testEndOffsetPositionCharAnalyzer() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
Document doc = new Document();
|
||||
Field f = newField("field", "abcd ", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
doc.add(f);
|
||||
doc.add(f);
|
||||
w.addDocument(doc);
|
||||
w.close();
|
||||
|
||||
IndexReader r = IndexReader.open(dir, true);
|
||||
TermVectorOffsetInfo[] termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(0);
|
||||
assertEquals(2, termOffsets.length);
|
||||
assertEquals(0, termOffsets[0].getStartOffset());
|
||||
assertEquals(4, termOffsets[0].getEndOffset());
|
||||
assertEquals(8, termOffsets[1].getStartOffset());
|
||||
assertEquals(12, termOffsets[1].getEndOffset());
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-1448
|
||||
public void testEndOffsetPositionWithCachingTokenFilter() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
Analyzer analyzer = new MockAnalyzer();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer));
|
||||
Document doc = new Document();
|
||||
TokenStream stream = new CachingTokenFilter(analyzer.tokenStream("field", new StringReader("abcd ")));
|
||||
Field f = new Field("field", stream, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
doc.add(f);
|
||||
doc.add(f);
|
||||
w.addDocument(doc);
|
||||
w.close();
|
||||
|
||||
IndexReader r = IndexReader.open(dir, true);
|
||||
TermVectorOffsetInfo[] termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(0);
|
||||
assertEquals(2, termOffsets.length);
|
||||
assertEquals(0, termOffsets[0].getStartOffset());
|
||||
assertEquals(4, termOffsets[0].getEndOffset());
|
||||
assertEquals(8, termOffsets[1].getStartOffset());
|
||||
assertEquals(12, termOffsets[1].getEndOffset());
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-1448
|
||||
public void testEndOffsetPositionStopFilter() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)));
|
||||
Document doc = new Document();
|
||||
Field f = newField("field", "abcd the", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
doc.add(f);
|
||||
doc.add(f);
|
||||
w.addDocument(doc);
|
||||
w.close();
|
||||
|
||||
IndexReader r = IndexReader.open(dir, true);
|
||||
TermVectorOffsetInfo[] termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(0);
|
||||
assertEquals(2, termOffsets.length);
|
||||
assertEquals(0, termOffsets[0].getStartOffset());
|
||||
assertEquals(4, termOffsets[0].getEndOffset());
|
||||
assertEquals(9, termOffsets[1].getStartOffset());
|
||||
assertEquals(13, termOffsets[1].getEndOffset());
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-1448
|
||||
public void testEndOffsetPositionStandard() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
Document doc = new Document();
|
||||
Field f = newField("field", "abcd the ", Field.Store.NO,
|
||||
Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
Field f2 = newField("field", "crunch man", Field.Store.NO,
|
||||
Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
doc.add(f);
|
||||
doc.add(f2);
|
||||
w.addDocument(doc);
|
||||
w.close();
|
||||
|
||||
IndexReader r = IndexReader.open(dir, true);
|
||||
TermPositionVector tpv = ((TermPositionVector) r.getTermFreqVector(0, "field"));
|
||||
TermVectorOffsetInfo[] termOffsets = tpv.getOffsets(0);
|
||||
assertEquals(1, termOffsets.length);
|
||||
assertEquals(0, termOffsets[0].getStartOffset());
|
||||
assertEquals(4, termOffsets[0].getEndOffset());
|
||||
termOffsets = tpv.getOffsets(1);
|
||||
assertEquals(11, termOffsets[0].getStartOffset());
|
||||
assertEquals(17, termOffsets[0].getEndOffset());
|
||||
termOffsets = tpv.getOffsets(2);
|
||||
assertEquals(18, termOffsets[0].getStartOffset());
|
||||
assertEquals(21, termOffsets[0].getEndOffset());
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-1448
|
||||
public void testEndOffsetPositionStandardEmptyField() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
Document doc = new Document();
|
||||
Field f = newField("field", "", Field.Store.NO,
|
||||
Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
Field f2 = newField("field", "crunch man", Field.Store.NO,
|
||||
Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
doc.add(f);
|
||||
doc.add(f2);
|
||||
w.addDocument(doc);
|
||||
w.close();
|
||||
|
||||
IndexReader r = IndexReader.open(dir, true);
|
||||
TermPositionVector tpv = ((TermPositionVector) r.getTermFreqVector(0, "field"));
|
||||
TermVectorOffsetInfo[] termOffsets = tpv.getOffsets(0);
|
||||
assertEquals(1, termOffsets.length);
|
||||
assertEquals(1, termOffsets[0].getStartOffset());
|
||||
assertEquals(7, termOffsets[0].getEndOffset());
|
||||
termOffsets = tpv.getOffsets(1);
|
||||
assertEquals(8, termOffsets[0].getStartOffset());
|
||||
assertEquals(11, termOffsets[0].getEndOffset());
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-1448
|
||||
public void testEndOffsetPositionStandardEmptyField2() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
Document doc = new Document();
|
||||
|
||||
Field f = newField("field", "abcd", Field.Store.NO,
|
||||
Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
doc.add(f);
|
||||
doc.add(newField("field", "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
|
||||
|
||||
Field f2 = newField("field", "crunch", Field.Store.NO,
|
||||
Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
doc.add(f2);
|
||||
|
||||
w.addDocument(doc);
|
||||
w.close();
|
||||
|
||||
IndexReader r = IndexReader.open(dir, true);
|
||||
TermPositionVector tpv = ((TermPositionVector) r.getTermFreqVector(0, "field"));
|
||||
TermVectorOffsetInfo[] termOffsets = tpv.getOffsets(0);
|
||||
assertEquals(1, termOffsets.length);
|
||||
assertEquals(0, termOffsets[0].getStartOffset());
|
||||
assertEquals(4, termOffsets[0].getEndOffset());
|
||||
termOffsets = tpv.getOffsets(1);
|
||||
assertEquals(6, termOffsets[0].getStartOffset());
|
||||
assertEquals(12, termOffsets[0].getEndOffset());
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-2529
|
||||
public void testPositionIncrementGapEmptyField() throws Exception {
|
||||
|
|
|
@ -0,0 +1,446 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CachingTokenFilter;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenFilter;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
/** tests for writing term vectors */
|
||||
public class TestTermVectorsWriter extends LuceneTestCase {
|
||||
// LUCENE-1442
|
||||
public void testDoubleOffsetCounting() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
Document doc = new Document();
|
||||
Field f = newField("field", "abcd", Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
doc.add(f);
|
||||
doc.add(f);
|
||||
Field f2 = newField("field", "", Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
doc.add(f2);
|
||||
doc.add(f);
|
||||
w.addDocument(doc);
|
||||
w.close();
|
||||
|
||||
IndexReader r = IndexReader.open(dir, true);
|
||||
TermVectorOffsetInfo[] termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(0);
|
||||
|
||||
// Token "" occurred once
|
||||
assertEquals(1, termOffsets.length);
|
||||
assertEquals(8, termOffsets[0].getStartOffset());
|
||||
assertEquals(8, termOffsets[0].getEndOffset());
|
||||
|
||||
// Token "abcd" occurred three times
|
||||
termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(1);
|
||||
assertEquals(3, termOffsets.length);
|
||||
assertEquals(0, termOffsets[0].getStartOffset());
|
||||
assertEquals(4, termOffsets[0].getEndOffset());
|
||||
assertEquals(4, termOffsets[1].getStartOffset());
|
||||
assertEquals(8, termOffsets[1].getEndOffset());
|
||||
assertEquals(8, termOffsets[2].getStartOffset());
|
||||
assertEquals(12, termOffsets[2].getEndOffset());
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-1442
|
||||
public void testDoubleOffsetCounting2() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
Document doc = new Document();
|
||||
Field f = newField("field", "abcd", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
doc.add(f);
|
||||
doc.add(f);
|
||||
w.addDocument(doc);
|
||||
w.close();
|
||||
|
||||
IndexReader r = IndexReader.open(dir, true);
|
||||
TermVectorOffsetInfo[] termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(0);
|
||||
assertEquals(2, termOffsets.length);
|
||||
assertEquals(0, termOffsets[0].getStartOffset());
|
||||
assertEquals(4, termOffsets[0].getEndOffset());
|
||||
assertEquals(5, termOffsets[1].getStartOffset());
|
||||
assertEquals(9, termOffsets[1].getEndOffset());
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-1448
|
||||
public void testEndOffsetPositionCharAnalyzer() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
Document doc = new Document();
|
||||
Field f = newField("field", "abcd ", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
doc.add(f);
|
||||
doc.add(f);
|
||||
w.addDocument(doc);
|
||||
w.close();
|
||||
|
||||
IndexReader r = IndexReader.open(dir, true);
|
||||
TermVectorOffsetInfo[] termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(0);
|
||||
assertEquals(2, termOffsets.length);
|
||||
assertEquals(0, termOffsets[0].getStartOffset());
|
||||
assertEquals(4, termOffsets[0].getEndOffset());
|
||||
assertEquals(8, termOffsets[1].getStartOffset());
|
||||
assertEquals(12, termOffsets[1].getEndOffset());
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-1448
|
||||
public void testEndOffsetPositionWithCachingTokenFilter() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
Analyzer analyzer = new MockAnalyzer();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer));
|
||||
Document doc = new Document();
|
||||
TokenStream stream = new CachingTokenFilter(analyzer.tokenStream("field", new StringReader("abcd ")));
|
||||
Field f = new Field("field", stream, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
doc.add(f);
|
||||
doc.add(f);
|
||||
w.addDocument(doc);
|
||||
w.close();
|
||||
|
||||
IndexReader r = IndexReader.open(dir, true);
|
||||
TermVectorOffsetInfo[] termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(0);
|
||||
assertEquals(2, termOffsets.length);
|
||||
assertEquals(0, termOffsets[0].getStartOffset());
|
||||
assertEquals(4, termOffsets[0].getEndOffset());
|
||||
assertEquals(8, termOffsets[1].getStartOffset());
|
||||
assertEquals(12, termOffsets[1].getEndOffset());
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-1448
|
||||
public void testEndOffsetPositionStopFilter() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)));
|
||||
Document doc = new Document();
|
||||
Field f = newField("field", "abcd the", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
doc.add(f);
|
||||
doc.add(f);
|
||||
w.addDocument(doc);
|
||||
w.close();
|
||||
|
||||
IndexReader r = IndexReader.open(dir, true);
|
||||
TermVectorOffsetInfo[] termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(0);
|
||||
assertEquals(2, termOffsets.length);
|
||||
assertEquals(0, termOffsets[0].getStartOffset());
|
||||
assertEquals(4, termOffsets[0].getEndOffset());
|
||||
assertEquals(9, termOffsets[1].getStartOffset());
|
||||
assertEquals(13, termOffsets[1].getEndOffset());
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-1448
|
||||
public void testEndOffsetPositionStandard() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
Document doc = new Document();
|
||||
Field f = newField("field", "abcd the ", Field.Store.NO,
|
||||
Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
Field f2 = newField("field", "crunch man", Field.Store.NO,
|
||||
Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
doc.add(f);
|
||||
doc.add(f2);
|
||||
w.addDocument(doc);
|
||||
w.close();
|
||||
|
||||
IndexReader r = IndexReader.open(dir, true);
|
||||
TermPositionVector tpv = ((TermPositionVector) r.getTermFreqVector(0, "field"));
|
||||
TermVectorOffsetInfo[] termOffsets = tpv.getOffsets(0);
|
||||
assertEquals(1, termOffsets.length);
|
||||
assertEquals(0, termOffsets[0].getStartOffset());
|
||||
assertEquals(4, termOffsets[0].getEndOffset());
|
||||
termOffsets = tpv.getOffsets(1);
|
||||
assertEquals(11, termOffsets[0].getStartOffset());
|
||||
assertEquals(17, termOffsets[0].getEndOffset());
|
||||
termOffsets = tpv.getOffsets(2);
|
||||
assertEquals(18, termOffsets[0].getStartOffset());
|
||||
assertEquals(21, termOffsets[0].getEndOffset());
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-1448
|
||||
public void testEndOffsetPositionStandardEmptyField() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
Document doc = new Document();
|
||||
Field f = newField("field", "", Field.Store.NO,
|
||||
Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
Field f2 = newField("field", "crunch man", Field.Store.NO,
|
||||
Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
doc.add(f);
|
||||
doc.add(f2);
|
||||
w.addDocument(doc);
|
||||
w.close();
|
||||
|
||||
IndexReader r = IndexReader.open(dir, true);
|
||||
TermPositionVector tpv = ((TermPositionVector) r.getTermFreqVector(0, "field"));
|
||||
TermVectorOffsetInfo[] termOffsets = tpv.getOffsets(0);
|
||||
assertEquals(1, termOffsets.length);
|
||||
assertEquals(1, termOffsets[0].getStartOffset());
|
||||
assertEquals(7, termOffsets[0].getEndOffset());
|
||||
termOffsets = tpv.getOffsets(1);
|
||||
assertEquals(8, termOffsets[0].getStartOffset());
|
||||
assertEquals(11, termOffsets[0].getEndOffset());
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-1448
|
||||
public void testEndOffsetPositionStandardEmptyField2() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
Document doc = new Document();
|
||||
|
||||
Field f = newField("field", "abcd", Field.Store.NO,
|
||||
Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
doc.add(f);
|
||||
doc.add(newField("field", "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
|
||||
|
||||
Field f2 = newField("field", "crunch", Field.Store.NO,
|
||||
Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
doc.add(f2);
|
||||
|
||||
w.addDocument(doc);
|
||||
w.close();
|
||||
|
||||
IndexReader r = IndexReader.open(dir, true);
|
||||
TermPositionVector tpv = ((TermPositionVector) r.getTermFreqVector(0, "field"));
|
||||
TermVectorOffsetInfo[] termOffsets = tpv.getOffsets(0);
|
||||
assertEquals(1, termOffsets.length);
|
||||
assertEquals(0, termOffsets[0].getStartOffset());
|
||||
assertEquals(4, termOffsets[0].getEndOffset());
|
||||
termOffsets = tpv.getOffsets(1);
|
||||
assertEquals(6, termOffsets[0].getStartOffset());
|
||||
assertEquals(12, termOffsets[0].getEndOffset());
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-1168
|
||||
public void testTermVectorCorruption() throws IOException {
|
||||
|
||||
Directory dir = newDirectory();
|
||||
for(int iter=0;iter<2;iter++) {
|
||||
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer())
|
||||
.setMaxBufferedDocs(2).setRAMBufferSizeMB(
|
||||
IndexWriterConfig.DISABLE_AUTO_FLUSH).setMergeScheduler(
|
||||
new SerialMergeScheduler()).setMergePolicy(
|
||||
new LogDocMergePolicy()));
|
||||
|
||||
Document document = new Document();
|
||||
|
||||
Field storedField = newField("stored", "stored", Field.Store.YES,
|
||||
Field.Index.NO);
|
||||
document.add(storedField);
|
||||
writer.addDocument(document);
|
||||
writer.addDocument(document);
|
||||
|
||||
document = new Document();
|
||||
document.add(storedField);
|
||||
Field termVectorField = newField("termVector", "termVector",
|
||||
Field.Store.NO, Field.Index.NOT_ANALYZED,
|
||||
Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
|
||||
document.add(termVectorField);
|
||||
writer.addDocument(document);
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
|
||||
IndexReader reader = IndexReader.open(dir, true);
|
||||
for(int i=0;i<reader.numDocs();i++) {
|
||||
reader.document(i);
|
||||
reader.getTermFreqVectors(i);
|
||||
}
|
||||
reader.close();
|
||||
|
||||
writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT,
|
||||
new MockAnalyzer()).setMaxBufferedDocs(2)
|
||||
.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)
|
||||
.setMergeScheduler(new SerialMergeScheduler()).setMergePolicy(
|
||||
new LogDocMergePolicy()));
|
||||
|
||||
Directory[] indexDirs = {new MockDirectoryWrapper(new RAMDirectory(dir))};
|
||||
writer.addIndexes(indexDirs);
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
}
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-1168
|
||||
public void testTermVectorCorruption2() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
for(int iter=0;iter<2;iter++) {
|
||||
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer())
|
||||
.setMaxBufferedDocs(2).setRAMBufferSizeMB(
|
||||
IndexWriterConfig.DISABLE_AUTO_FLUSH).setMergeScheduler(
|
||||
new SerialMergeScheduler()).setMergePolicy(
|
||||
new LogDocMergePolicy()));
|
||||
|
||||
Document document = new Document();
|
||||
|
||||
Field storedField = newField("stored", "stored", Field.Store.YES,
|
||||
Field.Index.NO);
|
||||
document.add(storedField);
|
||||
writer.addDocument(document);
|
||||
writer.addDocument(document);
|
||||
|
||||
document = new Document();
|
||||
document.add(storedField);
|
||||
Field termVectorField = newField("termVector", "termVector",
|
||||
Field.Store.NO, Field.Index.NOT_ANALYZED,
|
||||
Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
document.add(termVectorField);
|
||||
writer.addDocument(document);
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
|
||||
IndexReader reader = IndexReader.open(dir, true);
|
||||
assertTrue(reader.getTermFreqVectors(0)==null);
|
||||
assertTrue(reader.getTermFreqVectors(1)==null);
|
||||
assertTrue(reader.getTermFreqVectors(2)!=null);
|
||||
reader.close();
|
||||
}
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-1168
|
||||
public void testTermVectorCorruption3() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer())
|
||||
.setMaxBufferedDocs(2).setRAMBufferSizeMB(
|
||||
IndexWriterConfig.DISABLE_AUTO_FLUSH).setMergeScheduler(
|
||||
new SerialMergeScheduler()).setMergePolicy(new LogDocMergePolicy()));
|
||||
|
||||
Document document = new Document();
|
||||
|
||||
document = new Document();
|
||||
Field storedField = newField("stored", "stored", Field.Store.YES,
|
||||
Field.Index.NO);
|
||||
document.add(storedField);
|
||||
Field termVectorField = newField("termVector", "termVector",
|
||||
Field.Store.NO, Field.Index.NOT_ANALYZED,
|
||||
Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
document.add(termVectorField);
|
||||
for(int i=0;i<10;i++)
|
||||
writer.addDocument(document);
|
||||
writer.close();
|
||||
|
||||
writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT,
|
||||
new MockAnalyzer()).setMaxBufferedDocs(2)
|
||||
.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)
|
||||
.setMergeScheduler(new SerialMergeScheduler()).setMergePolicy(
|
||||
new LogDocMergePolicy()));
|
||||
for(int i=0;i<6;i++)
|
||||
writer.addDocument(document);
|
||||
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
|
||||
IndexReader reader = IndexReader.open(dir, true);
|
||||
for(int i=0;i<10;i++) {
|
||||
reader.getTermFreqVectors(i);
|
||||
reader.document(i);
|
||||
}
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-1008
|
||||
public void testNoTermVectorAfterTermVector() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
Document document = new Document();
|
||||
document.add(newField("tvtest", "a b c", Field.Store.NO, Field.Index.ANALYZED,
|
||||
Field.TermVector.YES));
|
||||
iw.addDocument(document);
|
||||
document = new Document();
|
||||
document.add(newField("tvtest", "x y z", Field.Store.NO, Field.Index.ANALYZED,
|
||||
Field.TermVector.NO));
|
||||
iw.addDocument(document);
|
||||
// Make first segment
|
||||
iw.commit();
|
||||
|
||||
document.add(newField("tvtest", "a b c", Field.Store.NO, Field.Index.ANALYZED,
|
||||
Field.TermVector.YES));
|
||||
iw.addDocument(document);
|
||||
// Make 2nd segment
|
||||
iw.commit();
|
||||
|
||||
iw.optimize();
|
||||
iw.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-1010
|
||||
public void testNoTermVectorAfterTermVectorMerge() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
Document document = new Document();
|
||||
document.add(newField("tvtest", "a b c", Field.Store.NO, Field.Index.ANALYZED,
|
||||
Field.TermVector.YES));
|
||||
iw.addDocument(document);
|
||||
iw.commit();
|
||||
|
||||
document = new Document();
|
||||
document.add(newField("tvtest", "x y z", Field.Store.NO, Field.Index.ANALYZED,
|
||||
Field.TermVector.NO));
|
||||
iw.addDocument(document);
|
||||
// Make first segment
|
||||
iw.commit();
|
||||
|
||||
iw.optimize();
|
||||
|
||||
document.add(newField("tvtest", "a b c", Field.Store.NO, Field.Index.ANALYZED,
|
||||
Field.TermVector.YES));
|
||||
iw.addDocument(document);
|
||||
// Make 2nd segment
|
||||
iw.commit();
|
||||
iw.optimize();
|
||||
|
||||
iw.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue