diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java index d3c1ec497aa..5cf6f3dacd5 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java @@ -252,6 +252,7 @@ public final class Lucene40PostingsWriter extends PostingsWriterBase { // and the numbers aren't that much smaller anyways. int offsetDelta = startOffset - lastOffset; int offsetLength = endOffset - startOffset; + assert offsetDelta >= 0 && offsetLength >= 0 : "startOffset=" + startOffset + ",lastOffset=" + lastOffset + ",endOffset=" + endOffset; if (offsetLength != lastOffsetLength) { proxOut.writeVInt(offsetDelta << 1 | 1); proxOut.writeVInt(offsetLength); diff --git a/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java b/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java index 69c829cea15..f6ce4beec81 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java +++ b/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java @@ -148,15 +148,16 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem postings.lastPositions[termID] = fieldState.position; } - void writeOffsets(final int termID, int prevOffset) { + void writeOffsets(final int termID, int offsetAccum) { assert hasOffsets; - final int startOffset = offsetAttribute.startOffset(); - final int endOffset = offsetAttribute.endOffset(); + final int startOffset = offsetAccum + offsetAttribute.startOffset(); + final int endOffset = offsetAccum + offsetAttribute.endOffset(); //System.out.println("writeOffsets termID=" + termID + " prevOffset=" + prevOffset + " startOff=" + startOffset + " endOff=" + endOffset); - termsHashPerField.writeVInt(1, startOffset - prevOffset); + FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray; + assert startOffset - postings.lastOffsets[termID] >= 0; + termsHashPerField.writeVInt(1, startOffset - postings.lastOffsets[termID]); termsHashPerField.writeVInt(1, endOffset - startOffset); - FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray; postings.lastOffsets[termID] = startOffset; } @@ -224,6 +225,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem if (hasProx) { writeProx(termID, fieldState.position); if (hasOffsets) { + postings.lastOffsets[termID] = 0; writeOffsets(termID, fieldState.offset); } } else { @@ -236,7 +238,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem writeProx(termID, fieldState.position-postings.lastPositions[termID]); } if (hasOffsets) { - writeOffsets(termID, postings.lastOffsets[termID]); + writeOffsets(termID, fieldState.offset); } } } @@ -523,14 +525,15 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem if (readOffsets) { final int startOffset = offset + prox.readVInt(); final int endOffset = startOffset + prox.readVInt(); - offset = startOffset; if (writePositions) { if (writeOffsets) { + assert startOffset >=0 && endOffset >= startOffset : "startOffset=" + startOffset + ",endOffset=" + endOffset + ",offset=" + offset; postingsConsumer.addPosition(position, thisPayload, startOffset, endOffset); } else { postingsConsumer.addPosition(position, thisPayload, -1, -1); } } + offset = startOffset; } else if (writePositions) { postingsConsumer.addPosition(position, thisPayload, -1, -1); } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestPostingsOffsets.java b/lucene/core/src/test/org/apache/lucene/index/TestPostingsOffsets.java index 29313b0e83a..5c7a07f2400 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestPostingsOffsets.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestPostingsOffsets.java @@ -385,6 +385,22 @@ public class TestPostingsOffsets extends LuceneTestCase { dir.close(); } + public void testAddFieldTwice() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + FieldType customType3 = new FieldType(TextField.TYPE_STORED); + customType3.setStoreTermVectors(true); + customType3.setStoreTermVectorPositions(true); + customType3.setStoreTermVectorOffsets(true); + customType3.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + doc.add(new Field("content3", "here is more content with aaa aaa aaa", customType3)); + doc.add(new Field("content3", "here is more content with aaa aaa aaa", customType3)); + iw.addDocument(doc); + iw.close(); + dir.close(); // checkindex + } + // NOTE: the next two tests aren't that good as we need an EvilToken... public void testNegativeOffsets() throws Exception { try { diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java index b9a1165ac51..05e359c58d5 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java @@ -26,12 +26,22 @@ import java.io.StringReader; import java.io.StringWriter; import java.io.Writer; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; import java.util.Random; import java.util.Map; import java.util.HashMap; +import java.util.Set; import org.apache.lucene.analysis.tokenattributes.*; +import org.apache.lucene.codecs.PostingsFormat; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.store.Directory; import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.IOUtils; @@ -384,13 +394,14 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { final boolean useCharFilter; final boolean simple; final boolean offsetsAreCorrect; + final RandomIndexWriter iw; // NOTE: not volatile because we don't want the tests to // add memory barriers (ie alter how threads // interact)... so this is just "best effort": public boolean failed; - AnalysisThread(long seed, Analyzer a, int iterations, int maxWordLength, boolean useCharFilter, boolean simple, boolean offsetsAreCorrect) { + AnalysisThread(long seed, Analyzer a, int iterations, int maxWordLength, boolean useCharFilter, boolean simple, boolean offsetsAreCorrect, RandomIndexWriter iw) { this.seed = seed; this.a = a; this.iterations = iterations; @@ -398,6 +409,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { this.useCharFilter = useCharFilter; this.simple = simple; this.offsetsAreCorrect = offsetsAreCorrect; + this.iw = iw; } @Override @@ -406,7 +418,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { try { // see the part in checkRandomData where it replays the same text again // to verify reproducability/reuse: hopefully this would catch thread hazards. - checkRandomData(new Random(seed), a, iterations, maxWordLength, useCharFilter, simple, offsetsAreCorrect); + checkRandomData(new Random(seed), a, iterations, maxWordLength, useCharFilter, simple, offsetsAreCorrect, iw); success = true; } catch (IOException e) { Rethrow.rethrow(e); @@ -423,34 +435,88 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { public static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength, boolean simple, boolean offsetsAreCorrect) throws IOException { long seed = random.nextLong(); boolean useCharFilter = random.nextBoolean(); - checkRandomData(new Random(seed), a, iterations, maxWordLength, useCharFilter, simple, offsetsAreCorrect); - // now test with multiple threads: note we do the EXACT same thing we did before in each thread, - // so this should only really fail from another thread if its an actual thread problem - int numThreads = _TestUtil.nextInt(random, 2, 4); - AnalysisThread threads[] = new AnalysisThread[numThreads]; - for (int i = 0; i < threads.length; i++) { - threads[i] = new AnalysisThread(seed, a, iterations, maxWordLength, useCharFilter, simple, offsetsAreCorrect); + Directory dir = null; + RandomIndexWriter iw = null; + if (rarely(random)) { + dir = newFSDirectory(_TestUtil.getTempDir("bttc")); + iw = new RandomIndexWriter(new Random(seed), dir, a); } - for (int i = 0; i < threads.length; i++) { - threads[i].start(); - } - for (int i = 0; i < threads.length; i++) { - try { - threads[i].join(); - } catch (InterruptedException e) { - throw new RuntimeException(e); + boolean success = false; + try { + checkRandomData(new Random(seed), a, iterations, maxWordLength, useCharFilter, simple, offsetsAreCorrect, iw); + // now test with multiple threads: note we do the EXACT same thing we did before in each thread, + // so this should only really fail from another thread if its an actual thread problem + int numThreads = _TestUtil.nextInt(random, 2, 4); + AnalysisThread threads[] = new AnalysisThread[numThreads]; + for (int i = 0; i < threads.length; i++) { + threads[i] = new AnalysisThread(seed, a, iterations, maxWordLength, useCharFilter, simple, offsetsAreCorrect, iw); } - } - for (int i = 0; i < threads.length; i++) { - if (threads[i].failed) { - throw new RuntimeException("some thread(s) failed"); + for (int i = 0; i < threads.length; i++) { + threads[i].start(); + } + for (int i = 0; i < threads.length; i++) { + try { + threads[i].join(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + for (int i = 0; i < threads.length; i++) { + if (threads[i].failed) { + throw new RuntimeException("some thread(s) failed"); + } + } + success = true; + } finally { + if (success) { + IOUtils.close(iw, dir); + } else { + IOUtils.closeWhileHandlingException(iw, dir); // checkindex } } } - private static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength, boolean useCharFilter, boolean simple, boolean offsetsAreCorrect) throws IOException { + static final Set doesntSupportOffsets = new HashSet() {{ + add("Lucene3x"); + add("MockFixedIntBlock"); + add("MockVariableIntBlock"); + add("MockSep"); + add("MockRandom"); + }}; + + private static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength, boolean useCharFilter, boolean simple, boolean offsetsAreCorrect, RandomIndexWriter iw) throws IOException { final LineFileDocs docs = new LineFileDocs(random); + Document doc = null; + Field field = null, currentField = null; + StringReader bogus = new StringReader(""); + if (iw != null) { + doc = new Document(); + FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); + if (random.nextBoolean()) { + ft.setStoreTermVectors(true); + ft.setStoreTermVectorOffsets(random.nextBoolean()); + ft.setStoreTermVectorPositions(random.nextBoolean()); + } + if (random.nextBoolean()) { + ft.setOmitNorms(true); + } + String pf = _TestUtil.getPostingsFormat("dummy"); + boolean supportsOffsets = !doesntSupportOffsets.contains(pf); + switch(random.nextInt(4)) { + case 0: ft.setIndexOptions(IndexOptions.DOCS_ONLY); break; + case 1: ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS); break; + case 2: ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); break; + default: + if (supportsOffsets && offsetsAreCorrect) { + ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + } else { + ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); + } + } + currentField = field = new Field("dummy", bogus, ft); + doc.add(currentField); + } try { for (int i = 0; i < iterations; i++) { @@ -481,7 +547,23 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { } try { - checkAnalysisConsistency(random, a, useCharFilter, text, offsetsAreCorrect); + checkAnalysisConsistency(random, a, useCharFilter, text, offsetsAreCorrect, currentField); + if (iw != null) { + if (random.nextInt(7) == 0) { + // pile up a multivalued field + FieldType ft = field.fieldType(); + currentField = new Field("dummy", bogus, ft); + doc.add(currentField); + } else { + iw.addDocument(doc); + if (doc.getFields().size() > 1) { + // back to 1 field + currentField = field; + doc.removeFields("dummy"); + doc.add(currentField); + } + } + } } catch (Throwable t) { // TODO: really we should pass a random seed to // checkAnalysisConsistency then print it here too: @@ -528,6 +610,10 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { } public static void checkAnalysisConsistency(Random random, Analyzer a, boolean useCharFilter, String text, boolean offsetsAreCorrect) throws IOException { + checkAnalysisConsistency(random, a, useCharFilter, text, offsetsAreCorrect, null); + } + + private static void checkAnalysisConsistency(Random random, Analyzer a, boolean useCharFilter, String text, boolean offsetsAreCorrect, Field field) throws IOException { if (VERBOSE) { System.out.println(Thread.currentThread().getName() + ": NOTE: BaseTokenStreamTestCase: get first token stream now text=" + text); @@ -649,6 +735,8 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { } reader = new StringReader(text); + long seed = random.nextLong(); + random = new Random(seed); if (random.nextInt(30) == 7) { if (VERBOSE) { System.out.println(Thread.currentThread().getName() + ": NOTE: BaseTokenStreamTestCase: using spoon-feed reader"); @@ -718,6 +806,20 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { assertTokenStreamContents(ts, tokens.toArray(new String[tokens.size()])); } + + if (field != null) { + reader = new StringReader(text); + random = new Random(seed); + if (random.nextInt(30) == 7) { + if (VERBOSE) { + System.out.println(Thread.currentThread().getName() + ": NOTE: BaseTokenStreamTestCase: indexing using spoon-feed reader"); + } + + reader = new MockReaderWrapper(random, reader); + } + + field.setReaderValue(useCharFilter ? new MockCharFilter(reader, remainder) : reader); + } } private static String randomAnalysisString(Random random, int maxLength, boolean simple) {