LUCENE-5675: delete docs on flush

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5675@1596091 13f79535-47bb-0310-9956-ffa450edef68
2014-05-19 22:59:48 +00:00 · 2014-05-19 22:59:48 +00:00 · d6968c3924
parent 4e0b7974b6
commit d6968c3924
15 changed files with 420 additions and 185 deletions
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java
@ -26,6 +26,7 @@ import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.TermState;
 import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;

 public final class IDVersionPostingsWriter extends PushPostingsWriterBase {
@ -71,11 +72,9 @@ public final class IDVersionPostingsWriter extends PushPostingsWriterBase {
  @Override
  public void startDoc(int docID, int termDocFreq) throws IOException {
    if (lastDocID != -1) {
-      // nocommit need test
      throw new IllegalArgumentException("term appears in more than one document");
    }
    if (termDocFreq != 1) {
-      // nocommit need test
      throw new IllegalArgumentException("term appears more than once in the document");
    }

@ -86,16 +85,13 @@ public final class IDVersionPostingsWriter extends PushPostingsWriterBase {
  @Override
  public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
    if (lastPosition != -1) {
-      // nocommit need test
      throw new IllegalArgumentException("term appears more than once in document");
    }
    lastPosition = position;
    if (payload == null) {
-      // nocommit need test
      throw new IllegalArgumentException("token doens't have a payload");
    }
    if (payload.length != 8) {
-      // nocommit need test
      throw new IllegalArgumentException("payload.length != 8 (got " + payload.length + ")");
    }
    
@ -108,7 +104,6 @@ public final class IDVersionPostingsWriter extends PushPostingsWriterBase {
  @Override
  public void finishDoc() throws IOException {
    if (lastPosition == -1) {
-      // nocommit need test
      throw new IllegalArgumentException("missing addPosition");
    }
  }
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java
@ -53,9 +53,6 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
  boolean termExists;
  final VersionFieldReader fr;

-  // nocommit make this public "for casting" and add a getVersion method?
-
-  // nocommit unused?
  private int targetBeforeCurrentLength;

  private final ByteArrayDataInput scratchReader = new ByteArrayDataInput();
@ -228,6 +225,11 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
    }
  }

+  /** Only valid if we are positioned. */
+  public long getVersion() {
+    return ((IDVersionTermState) currentFrame.state).idVersion;
+  }
+
  /** Returns false if the term deos not exist, or it exists but its version is too old (< minIDVersion). */
  public boolean seekExact(final BytesRef target, long minIDVersion) throws IOException {

@ -357,11 +359,6 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
         }
        currentFrame = lastFrame;
        currentFrame.rewind();
-        // nocommit put this back to BT also?
-        //term.length = targetUpto;
-
-        // nocommit put this back???
-        //termExists = false;
      } else {
        // Target is exactly the same as current term
        assert term.length == target.length;
@ -559,7 +556,6 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {

    if (currentFrame.maxIDVersion < minIDVersion) {
      // The max version for all terms in this block is lower than the minVersion
-      // nocommit need same logic here as above?
      termExists = false;
      term.length = targetUpto;
      return false;
--- a/lucene/codecs/src/test/org/apache/lucene/codecs/idversion/StringAndPayloadField.java
+++ b/lucene/codecs/src/test/org/apache/lucene/codecs/idversion/StringAndPayloadField.java
@ -0,0 +1,104 @@
+package org.apache.lucene.codecs.idversion;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.util.BytesRef;
+
+// nocommit can we take a BytesRef token instead?
+
+/** Produces a single String token from the provided value, with the provided payload. */
+class StringAndPayloadField extends Field {
+
+  public static final FieldType TYPE = new FieldType();
+
+  static {
+    TYPE.setIndexed(true);
+    TYPE.setOmitNorms(true);
+    TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
+    TYPE.setTokenized(true);
+    TYPE.freeze();
+  }
+
+  private final BytesRef payload;
+
+  public StringAndPayloadField(String name, String value, BytesRef payload) {
+    super(name, value, TYPE);
+    this.payload = payload;
+  }
+
+  @Override
+  public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) throws IOException {
+    SingleTokenWithPayloadTokenStream ts;
+    if (reuse instanceof SingleTokenWithPayloadTokenStream) {
+      ts = (SingleTokenWithPayloadTokenStream) reuse;
+    } else {
+      ts = new SingleTokenWithPayloadTokenStream();
+    }
+    ts.setValue((String) fieldsData, payload);
+    return ts;
+  }
+
+  private static final class SingleTokenWithPayloadTokenStream extends TokenStream {
+
+    private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
+    private final PayloadAttribute payloadAttribute = addAttribute(PayloadAttribute.class);
+    private boolean used = false;
+    private String value = null;
+    private BytesRef payload;
+    
+    /** Sets the string value. */
+    void setValue(String value, BytesRef payload) {
+      this.value = value;
+      this.payload = payload;
+    }
+
+    @Override
+    public boolean incrementToken() {
+      if (used) {
+        return false;
+      }
+      clearAttributes();
+      termAttribute.append(value);
+      payloadAttribute.setPayload(payload);
+      used = true;
+      return true;
+    }
+
+    @Override
+    public void reset() {
+      used = false;
+    }
+
+    @Override
+    public void close() {
+      value = null;
+      payload = null;
+    }
+  }
+}
+
+
--- a/lucene/codecs/src/test/org/apache/lucene/codecs/idversion/TestIDVersionPostingsFormat.java
+++ b/lucene/codecs/src/test/org/apache/lucene/codecs/idversion/TestIDVersionPostingsFormat.java
@ -36,15 +36,18 @@ import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.index.BasePostingsFormatTestCase;
+import org.apache.lucene.index.ConcurrentMergeScheduler;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.MergeScheduler;
 import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.index.PerThreadPKLookup;
 import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
@ -262,6 +265,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
            System.out.println("  lookup exact version (should be found)");
          }
          assertTrue("term should have been found (version too old)", lookup.lookup(idValueBytes, expectedVersion.longValue()) != -1);
+          assertEquals(expectedVersion.longValue(), lookup.getVersion());
        } else {
          if (VERBOSE) {
            System.out.println("  lookup version+1 (should not be found)");
@ -281,6 +285,8 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
      super(r, field);
    }

+    long lastVersion;
+
    /** Returns docID if found, else -1. */
    public int lookup(BytesRef id, long version) throws IOException {
      for(int seg=0;seg<numSegs;seg++) {
@ -291,6 +297,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
          docsEnums[seg] = termsEnums[seg].docs(liveDocs[seg], docsEnums[seg], 0);
          int docID = docsEnums[seg].nextDoc();
          if (docID != DocsEnum.NO_MORE_DOCS) {
+            lastVersion = ((IDVersionSegmentTermsEnum) termsEnums[seg]).getVersion();
            return docBases[seg] + docID;
          }
          assert hasDeletions;
@ -299,83 +306,10 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {

      return -1;
    }
-  }

-  /** Produces a single token from the provided value, with the provided payload. */
-  private static class StringAndPayloadField extends Field {
-
-    public static final FieldType TYPE = new FieldType();
-
-    static {
-      TYPE.setIndexed(true);
-      TYPE.setOmitNorms(true);
-      TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
-      TYPE.setTokenized(true);
-      TYPE.freeze();
-    }
-
-    private final BytesRef payload;
-
-    public StringAndPayloadField(String name, String value, BytesRef payload) {
-      super(name, value, TYPE);
-      this.payload = payload;
-    }
-
-    @Override
-    public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) throws IOException {
-      SingleTokenWithPayloadTokenStream ts;
-      if (reuse instanceof SingleTokenWithPayloadTokenStream) {
-        ts = (SingleTokenWithPayloadTokenStream) reuse;
-      } else {
-        ts = new SingleTokenWithPayloadTokenStream();
-      }
-      ts.setValue((String) fieldsData, payload);
-      return ts;
-    }
-  }
-
-  private static final class SingleTokenWithPayloadTokenStream extends TokenStream {
-
-    private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
-    private final PayloadAttribute payloadAttribute = addAttribute(PayloadAttribute.class);
-    private boolean used = false;
-    private String value = null;
-    private BytesRef payload;
-    
-    /** Creates a new TokenStream that returns a String+payload as single token.
-     * <p>Warning: Does not initialize the value, you must call
-     * {@link #setValue(String)} afterwards!
-     */
-    SingleTokenWithPayloadTokenStream() {
-    }
-    
-    /** Sets the string value. */
-    void setValue(String value, BytesRef payload) {
-      this.value = value;
-      this.payload = payload;
-    }
-
-    @Override
-    public boolean incrementToken() {
-      if (used) {
-        return false;
-      }
-      clearAttributes();
-      termAttribute.append(value);
-      payloadAttribute.setPayload(payload);
-      used = true;
-      return true;
-    }
-
-    @Override
-    public void reset() {
-      used = false;
-    }
-
-    @Override
-    public void close() {
-      value = null;
-      payload = null;
+    /** Only valid if lookup returned a valid docID. */
+    public long getVersion() {
+      return lastVersion;
    }
  }

@ -394,8 +328,6 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
    */
  }

-  /*
-  // Invalid
  public void testMoreThanOneDocPerIDOneSegment() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
@ -412,14 +344,138 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
      fail("didn't hit expected exception");
    } catch (IllegalArgumentException iae) {
      // expected
-      iae.printStackTrace();
    }
    w.close();
    dir.close();
  }

-  // Invalid
  public void testMoreThanOneDocPerIDTwoSegments() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
+    MergeScheduler ms = iwc.getMergeScheduler();
+    if (ms instanceof ConcurrentMergeScheduler) {
+      iwc.setMergeScheduler(new ConcurrentMergeScheduler() {
+          @Override
+          protected void handleMergeException(Throwable exc) {
+            assertTrue(exc instanceof IllegalArgumentException);
+          }
+        });
+    }
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+    Document doc = new Document();
+    doc.add(makeIDField("id", 17));
+    w.addDocument(doc);
+    w.commit();
+    doc = new Document();
+    doc.add(makeIDField("id", 17));
+    try {
+      w.addDocument(doc);
+      w.commit();
+      w.forceMerge(1);
+      fail("didn't hit exception");
+    } catch (IllegalArgumentException iae) {
+      // expected: SMS will hit this
+    } catch (IOException ioe) {
+      // expected
+      assertTrue(ioe.getCause() instanceof IllegalArgumentException);
+    }
+    w.w.close();
+    dir.close();
+  }
+
+  public void testMoreThanOneDocPerIDWithUpdates() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+    Document doc = new Document();
+    doc.add(makeIDField("id", 17));
+    w.addDocument(doc);
+    doc = new Document();
+    doc.add(makeIDField("id", 17));
+    // Replaces the doc we just indexed:
+    w.updateDocument(new Term("id", "id"), doc);
+    w.commit();
+    w.close();
+    dir.close();
+  }
+
+  public void testMoreThanOneDocPerIDWithDeletes() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+    Document doc = new Document();
+    doc.add(makeIDField("id", 17));
+    w.addDocument(doc);
+    w.deleteDocuments(new Term("id", "id"));
+    doc = new Document();
+    doc.add(makeIDField("id", 17));
+    w.addDocument(doc);
+    w.commit();
+    w.close();
+    dir.close();
+  }
+
+  public void testMissingPayload() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+    Document doc = new Document();
+    doc.add(newTextField("id", "id", Field.Store.NO));
+    try {
+      w.addDocument(doc);
+      w.commit();
+      fail("didn't hit expected exception");
+    } catch (IllegalArgumentException iae) {
+      // expected
+    }
+             
+    w.close();
+    dir.close();
+  }
+
+  public void testMissingPositions() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+    Document doc = new Document();
+    doc.add(newStringField("id", "id", Field.Store.NO));
+    try {
+      w.addDocument(doc);
+      w.commit();
+      fail("didn't hit expected exception");
+    } catch (IllegalArgumentException iae) {
+      // expected
+    }
+             
+    w.close();
+    dir.close();
+  }
+
+  public void testInvalidPayload() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+    Document doc = new Document();
+    doc.add(new StringAndPayloadField("id", "id", new BytesRef("foo")));
+    try {
+      w.addDocument(doc);
+      w.commit();
+      fail("didn't hit expected exception");
+    } catch (IllegalArgumentException iae) {
+      // expected
+    }
+             
+    w.close();
+    dir.close();
+  }
+
+  public void testMoreThanOneDocPerIDWithDeletesAcrossSegments() throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
@ -430,15 +486,29 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
    w.commit();
    doc = new Document();
    doc.add(makeIDField("id", 17));
-    w.addDocument(doc);
-    w.commit();
+    // Replaces the doc we just indexed:
+    w.updateDocument(new Term("id", "id"), doc);
    w.forceMerge(1);
    w.close();
    dir.close();
  }

-  public void testMoreThanOneDocPerIDWithDeletes() {
-    
+  public void testMoreThanOnceInSingleDoc() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+    Document doc = new Document();
+    doc.add(makeIDField("id", 17));
+    doc.add(makeIDField("id", 17));
+    try {
+      w.addDocument(doc);
+      w.commit();
+      fail("didn't hit expected exception");
+    } catch (IllegalArgumentException iae) {
+      // expected
+    }
+    w.close();
+    dir.close();
  }
-  */
 }
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
@ -613,7 +613,6 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {

    void writeBlocks(IntsRef prevTerm, int prefixLength, int count) throws IOException {
      System.out.println("writeBlocks count=" + count);
-      // nocommit nuke the prefixLength == 0 case, but testVaryingTermsPerSegment fails!!
      if (count <= maxItemsInBlock) {
        // Easy case: not floor block.  Eg, prefix is "foo",
        // and we found 30 terms/sub-blocks starting w/ that
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@ -1642,6 +1642,12 @@ public class CheckIndex {

          // Only agg stats if the doc is live:
          final boolean doStats = liveDocs == null || liveDocs.get(j);
+
+          if (doStats == false) {
+            // nocommit is it OK to stop verifying deleted docs?
+            continue;
+          }
+
          if (doStats) {
            status.docCount++;
          }
--- a/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java
@ -37,6 +37,8 @@ import org.apache.lucene.util.BytesRef;
 class FreqProxFields extends Fields {
  final Map<String,FreqProxTermsWriterPerField> fields = new LinkedHashMap<>();

+  private Bits liveDocs;
+
  public FreqProxFields(List<FreqProxTermsWriterPerField> fieldList) {
    // NOTE: fields are already sorted by field name
    for(FreqProxTermsWriterPerField field : fieldList) {
@ -44,6 +46,10 @@ class FreqProxFields extends Fields {
    }
  }

+  public void setLiveDocs(Bits liveDocs) {
+    this.liveDocs = liveDocs;
+  }
+
  public Iterator<String> iterator() {
    return fields.keySet().iterator();
  }
@ -51,7 +57,7 @@ class FreqProxFields extends Fields {
  @Override
  public Terms terms(String field) throws IOException {
    FreqProxTermsWriterPerField perField = fields.get(field);
-    return perField == null ? null : new FreqProxTerms(perField);
+    return perField == null ? null : new FreqProxTerms(perField, liveDocs);
  }

  @Override
@ -62,9 +68,11 @@ class FreqProxFields extends Fields {

  private static class FreqProxTerms extends Terms {
    final FreqProxTermsWriterPerField terms;
+    final Bits liveDocs;

-    public FreqProxTerms(FreqProxTermsWriterPerField terms) {
+    public FreqProxTerms(FreqProxTermsWriterPerField terms, Bits liveDocs) {
      this.terms = terms;
+      this.liveDocs = liveDocs;
    }

    @Override
@ -72,8 +80,9 @@ class FreqProxFields extends Fields {
      FreqProxTermsEnum termsEnum;
      if (reuse instanceof FreqProxTermsEnum && ((FreqProxTermsEnum) reuse).terms == this.terms) {
        termsEnum = (FreqProxTermsEnum) reuse;
+        assert termsEnum.liveDocs == this.liveDocs;
      } else {
-        termsEnum = new FreqProxTermsEnum(terms);
+        termsEnum = new FreqProxTermsEnum(terms, liveDocs);
      }
      termsEnum.reset();
      return termsEnum;
@ -136,11 +145,13 @@ class FreqProxFields extends Fields {
    final FreqProxPostingsArray postingsArray;
    final BytesRef scratch = new BytesRef();
    final int numTerms;
+    final Bits liveDocs;
    int ord;

-    public FreqProxTermsEnum(FreqProxTermsWriterPerField terms) {
+    public FreqProxTermsEnum(FreqProxTermsWriterPerField terms, Bits liveDocs) {
      this.terms = terms;
      this.numTerms = terms.bytesHash.size();
+      this.liveDocs = liveDocs;
      sortedTermIDs = terms.sortedTermIDs;
      assert sortedTermIDs != null;
      postingsArray = (FreqProxPostingsArray) terms.postingsArray;
@ -228,8 +239,8 @@ class FreqProxFields extends Fields {
    }

    @Override
-    public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) {
-      if (liveDocs != null) {
+    public DocsEnum docs(Bits liveDocsIn, DocsEnum reuse, int flags) {
+      if (liveDocsIn != null) {
        throw new IllegalArgumentException("liveDocs must be null");
      }

@ -244,18 +255,20 @@ class FreqProxFields extends Fields {
      if (reuse instanceof FreqProxDocsEnum) {
        docsEnum = (FreqProxDocsEnum) reuse;
        if (docsEnum.postingsArray != postingsArray) {
-          docsEnum = new FreqProxDocsEnum(terms, postingsArray);
+          docsEnum = new FreqProxDocsEnum(terms, postingsArray, liveDocs);
+        } else {
+          assert docsEnum.liveDocs == liveDocs;
        }
      } else {
-        docsEnum = new FreqProxDocsEnum(terms, postingsArray);
+        docsEnum = new FreqProxDocsEnum(terms, postingsArray, liveDocs);
      }
      docsEnum.reset(sortedTermIDs[ord]);
      return docsEnum;
    }

    @Override
-    public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) {
-      if (liveDocs != null) {
+    public DocsAndPositionsEnum docsAndPositions(Bits liveDocsIn, DocsAndPositionsEnum reuse, int flags) {
+      if (liveDocsIn != null) {
        throw new IllegalArgumentException("liveDocs must be null");
      }
      FreqProxDocsAndPositionsEnum posEnum;
@ -275,10 +288,12 @@ class FreqProxFields extends Fields {
      if (reuse instanceof FreqProxDocsAndPositionsEnum) {
        posEnum = (FreqProxDocsAndPositionsEnum) reuse;
        if (posEnum.postingsArray != postingsArray) {
-          posEnum = new FreqProxDocsAndPositionsEnum(terms, postingsArray);
+          posEnum = new FreqProxDocsAndPositionsEnum(terms, postingsArray, liveDocs);
+        } else {
+          assert posEnum.liveDocs == liveDocs;
        }
      } else {
-        posEnum = new FreqProxDocsAndPositionsEnum(terms, postingsArray);
+        posEnum = new FreqProxDocsAndPositionsEnum(terms, postingsArray, liveDocs);
      }
      posEnum.reset(sortedTermIDs[ord]);
      return posEnum;
@ -311,15 +326,17 @@ class FreqProxFields extends Fields {
    final FreqProxPostingsArray postingsArray;
    final ByteSliceReader reader = new ByteSliceReader();
    final boolean readTermFreq;
+    final Bits liveDocs;
    int docID;
    int freq;
    boolean ended;
    int termID;

-    public FreqProxDocsEnum(FreqProxTermsWriterPerField terms, FreqProxPostingsArray postingsArray) {
+    public FreqProxDocsEnum(FreqProxTermsWriterPerField terms, FreqProxPostingsArray postingsArray, Bits liveDocs) {
      this.terms = terms;
      this.postingsArray = postingsArray;
      this.readTermFreq = terms.hasFreq;
+      this.liveDocs = liveDocs;
    }

    public void reset(int termID) {
@ -347,33 +364,39 @@ class FreqProxFields extends Fields {

    @Override
    public int nextDoc() throws IOException {
-      if (reader.eof()) {
-        if (ended) {
-          return NO_MORE_DOCS;
-        } else {
-          ended = true;
-          docID = postingsArray.lastDocIDs[termID];
-          if (readTermFreq) {
-            freq = postingsArray.termFreqs[termID];
-          }
-        }
-      } else {
-        int code = reader.readVInt();
-        if (!readTermFreq) {
-          docID += code;
-        } else {
-          docID += code >>> 1;
-          if ((code & 1) != 0) {
-            freq = 1;
+      while (true) {
+        if (reader.eof()) {
+          if (ended) {
+            return NO_MORE_DOCS;
          } else {
-            freq = reader.readVInt();
+            ended = true;
+            docID = postingsArray.lastDocIDs[termID];
+            if (readTermFreq) {
+              freq = postingsArray.termFreqs[termID];
+            }
          }
+        } else {
+          int code = reader.readVInt();
+          if (!readTermFreq) {
+            docID += code;
+          } else {
+            docID += code >>> 1;
+            if ((code & 1) != 0) {
+              freq = 1;
+            } else {
+              freq = reader.readVInt();
+            }
+          }
+
+          assert docID != postingsArray.lastDocIDs[termID];
        }

-        assert docID != postingsArray.lastDocIDs[termID];
-      }
+        if (liveDocs != null && liveDocs.get(docID) == false) {
+          continue;
+        }

-      return docID;
+        return docID;
+      }
    }

    @Override
@ -394,6 +417,7 @@ class FreqProxFields extends Fields {
    final ByteSliceReader reader = new ByteSliceReader();
    final ByteSliceReader posReader = new ByteSliceReader();
    final boolean readOffsets;
+    final Bits liveDocs;
    int docID;
    int freq;
    int pos;
@ -405,10 +429,11 @@ class FreqProxFields extends Fields {
    boolean hasPayload;
    BytesRef payload = new BytesRef();

-    public FreqProxDocsAndPositionsEnum(FreqProxTermsWriterPerField terms, FreqProxPostingsArray postingsArray) {
+    public FreqProxDocsAndPositionsEnum(FreqProxTermsWriterPerField terms, FreqProxPostingsArray postingsArray, Bits liveDocs) {
      this.terms = terms;
      this.postingsArray = postingsArray;
      this.readOffsets = terms.hasOffsets;
+      this.liveDocs = liveDocs;
      assert terms.hasProx;
      assert terms.hasFreq;
    }
@ -434,34 +459,40 @@ class FreqProxFields extends Fields {

    @Override
    public int nextDoc() throws IOException {
-      while (posLeft != 0) {
-        nextPosition();
-      }
-
-      if (reader.eof()) {
-        if (ended) {
-          return NO_MORE_DOCS;
-        } else {
-          ended = true;
-          docID = postingsArray.lastDocIDs[termID];
-          freq = postingsArray.termFreqs[termID];
-        }
-      } else {
-        int code = reader.readVInt();
-        docID += code >>> 1;
-        if ((code & 1) != 0) {
-          freq = 1;
-        } else {
-          freq = reader.readVInt();
+      while (true) {
+        while (posLeft != 0) {
+          nextPosition();
        }

-        assert docID != postingsArray.lastDocIDs[termID];
-      }
+        if (reader.eof()) {
+          if (ended) {
+            return NO_MORE_DOCS;
+          } else {
+            ended = true;
+            docID = postingsArray.lastDocIDs[termID];
+            freq = postingsArray.termFreqs[termID];
+          }
+        } else {
+          int code = reader.readVInt();
+          docID += code >>> 1;
+          if ((code & 1) != 0) {
+            freq = 1;
+          } else {
+            freq = reader.readVInt();
+          }

-      posLeft = freq;
-      pos = 0;
-      startOffset = 0;
-      return docID;
+          assert docID != postingsArray.lastDocIDs[termID];
+        }
+
+        posLeft = freq;
+        pos = 0;
+        startOffset = 0;
+        if (liveDocs != null && liveDocs.get(docID) == false) {
+          continue;
+        }
+
+        return docID;
+      }
    }

    @Override
--- a/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
@ -34,6 +34,8 @@ final class FreqProxTermsWriter extends TermsHash {
  }

  private void applyDeletes(SegmentWriteState state, Fields fields) throws IOException {
+    System.out.println("applyDeletes segUpdates=" + state.segUpdates);
+
    // Process any pending Term deletes for this newly
    // flushed segment:
    if (state.segUpdates != null && state.segUpdates.terms.size() > 0) {
@ -98,10 +100,16 @@ final class FreqProxTermsWriter extends TermsHash {
    // Sort by field name
    CollectionUtil.introSort(allFields);

-    Fields fields = new FreqProxFields(allFields);
+    FreqProxFields fields = new FreqProxFields(allFields);

    applyDeletes(state, fields);

+    if (state.liveDocs != null) {
+      fields.setLiveDocs(state.liveDocs);
+    }
+
+    System.out.println("now: " + state.liveDocs + " pf=" + state.segmentInfo.getCodec().postingsFormat());
+
    FieldsConsumer consumer = state.segmentInfo.getCodec().postingsFormat().fieldsConsumer(state);
    boolean success = false;
    try {
--- a/lucene/core/src/java/org/apache/lucene/index/SegmentWriteState.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SegmentWriteState.java
@ -105,5 +105,6 @@ public class SegmentWriteState {
    this.segmentSuffix = segmentSuffix;
    segUpdates = state.segUpdates;
    delCountOnFlush = state.delCountOnFlush;
+    liveDocs = state.liveDocs;
  }
 }
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
@ -508,7 +508,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
    writer.shutdown();
    IndexReader reader = DirectoryReader.open(dir);
    final Term t = new Term("content", "aa");
-    assertEquals(3, reader.docFreq(t));
+    assertEquals(2, reader.docFreq(t));

    // Make sure the doc that hit the exception was marked
    // as deleted:
@ -648,7 +648,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
      IndexReader reader = DirectoryReader.open(dir);
      if (i == 0) { 
        int expected = 5;
-        assertEquals(expected, reader.docFreq(new Term("contents", "here")));
+        assertEquals(expected-1, reader.docFreq(new Term("contents", "here")));
        assertEquals(expected, reader.maxDoc());
        int numDel = 0;
        final Bits liveDocs = MultiFields.getLiveDocs(reader);
@ -760,8 +760,8 @@ public class TestIndexWriterExceptions extends LuceneTestCase {

      IndexReader reader = DirectoryReader.open(dir);
      int expected = (3+(1-i)*2)*NUM_THREAD*NUM_ITER;
-      assertEquals("i=" + i, expected, reader.docFreq(new Term("contents", "here")));
-      assertEquals(expected, reader.maxDoc());
+      assertEquals("i=" + i, expected - NUM_THREAD*NUM_ITER, reader.docFreq(new Term("contents", "here")));
+      assertEquals("i=" + i, expected, reader.maxDoc());
      int numDel = 0;
      final Bits liveDocs = MultiFields.getLiveDocs(reader);
      assertNotNull(liveDocs);
--- a/lucene/core/src/test/org/apache/lucene/index/TestMultiFields.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestMultiFields.java
@ -123,14 +123,18 @@ public class TestMultiFields extends LuceneTestCase {
        }
        
        DocsEnum docsEnum = TestUtil.docs(random(), reader, "field", term, liveDocs, null, DocsEnum.FLAG_NONE);
-        assertNotNull(docsEnum);
-
-        for(int docID : docs.get(term)) {
-          if (!deleted.contains(docID)) {
-            assertEquals(docID, docsEnum.nextDoc());
+        if (docsEnum == null) {
+          for(int docID : docs.get(term)) {
+            assert deleted.contains(docID);
          }
+        } else {
+          for(int docID : docs.get(term)) {
+            if (!deleted.contains(docID)) {
+              assertEquals(docID, docsEnum.nextDoc());
+            }
+          }
+          assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsEnum.nextDoc());
        }
-        assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsEnum.nextDoc());
      }

      reader.close();
--- a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
@ -891,9 +891,8 @@ public class TestTermsEnum extends LuceneTestCase {
    Directory d = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), d);
    Set<String> terms = new HashSet<String>();
-    // nocommit
-    String prefix = TestUtil.randomSimpleString(random(), 1, 20);
-    //String prefix = TestUtil.randomRealisticUnicodeString(random(), 1, 20);
+    //String prefix = TestUtil.randomSimpleString(random(), 1, 20);
+    String prefix = TestUtil.randomRealisticUnicodeString(random(), 1, 20);
    int numTerms = atLeast(1000);
    if (VERBOSE) {
      System.out.println("TEST: " + numTerms + " terms; prefix=" + prefix);
--- a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
@ -381,7 +381,7 @@ public class ToParentBlockJoinQuery extends Query {
    @Override
    public int advance(int parentTarget) throws IOException {

-      //System.out.println("Q.advance parentTarget=" + parentTarget);
+      // System.out.println("Q.advance parentTarget=" + parentTarget);
      if (parentTarget == NO_MORE_DOCS) {
        return parentDoc = NO_MORE_DOCS;
      }
@ -398,13 +398,13 @@ public class ToParentBlockJoinQuery extends Query {

      prevParentDoc = parentBits.prevSetBit(parentTarget-1);

-      //System.out.println("  rolled back to prevParentDoc=" + prevParentDoc + " vs parentDoc=" + parentDoc);
+      // System.out.println("  rolled back to prevParentDoc=" + prevParentDoc + " vs parentDoc=" + parentDoc);
      assert prevParentDoc >= parentDoc;
      if (prevParentDoc > nextChildDoc) {
        nextChildDoc = childScorer.advance(prevParentDoc);
        // System.out.println("  childScorer advanced to child docID=" + nextChildDoc);
-      //} else {
-        //System.out.println("  skip childScorer advance");
+      } else {
+        // System.out.println("  skip childScorer advance");
      }

      // Parent & child docs are supposed to be orthogonal:
@ -413,15 +413,21 @@ public class ToParentBlockJoinQuery extends Query {
      }

      final int nd = nextDoc();
-      //System.out.println("  return nextParentDoc=" + nd);
+      // System.out.println("  return nextParentDoc=" + nd);
      return nd;
    }

    public Explanation explain(int docBase) throws IOException {
-      int start = docBase + prevParentDoc + 1; // +1 b/c prevParentDoc is previous parent doc
-      int end = docBase + parentDoc - 1; // -1 b/c parentDoc is parent doc
+      int start = prevParentDoc + 1; // +1 b/c prevParentDoc is previous parent doc
+      if (acceptDocs != null) {
+        // Skip deleted docs:
+        while (acceptDocs.get(start) == false) {
+          start++;
+        }
+      }
+      int end = parentDoc - 1; // -1 b/c parentDoc is parent doc
      return new ComplexExplanation(
-          true, score(), String.format(Locale.ROOT, "Score based on child doc range from %d to %d", start, end)
+          true, score(), String.format(Locale.ROOT, "Score based on child doc range from %d to %d", docBase+start, docBase+end)
      );
    }

--- a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
+++ b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
@ -623,8 +623,14 @@ public class TestBlockJoin extends LuceneTestCase {
      System.out.println("TEST: reader=" + r);
      System.out.println("TEST: joinReader=" + joinR);

+      Bits liveDocs = MultiFields.getLiveDocs(joinR);
      for(int docIDX=0;docIDX<joinR.maxDoc();docIDX++) {
-        System.out.println("  docID=" + docIDX + " doc=" + joinR.document(docIDX));
+        System.out.println("  docID=" + docIDX + " doc=" + joinR.document(docIDX) + " deleted?=" + (liveDocs != null && liveDocs.get(docIDX) == false));
+      }
+      DocsEnum parents = MultiFields.getTermDocsEnum(joinR, null, "isParent", new BytesRef("x"));
+      System.out.println("parent docIDs:");
+      while (parents.nextDoc() != parents.NO_MORE_DOCS) {
+        System.out.println("  " + parents.docID());
      }
    }

@ -823,6 +829,7 @@ public class TestBlockJoin extends LuceneTestCase {
          Explanation explanation = joinS.explain(childJoinQuery, hit.doc);
          StoredDocument document = joinS.doc(hit.doc - 1);
          int childId = Integer.parseInt(document.get("childID"));
+          //System.out.println("  hit docID=" + hit.doc + " childId=" + childId + " parentId=" + document.get("parentID"));
          assertTrue(explanation.isMatch());
          assertEquals(hit.score, explanation.getValue(), 0.0f);
          assertEquals(String.format(Locale.ROOT, "Score based on child doc range from %d to %d", hit.doc - 1 - childId, hit.doc - 1), explanation.getDescription());
--- a/lucene/misc/src/test/org/apache/lucene/uninverting/TestFieldCacheVsDocValues.java
+++ b/lucene/misc/src/test/org/apache/lucene/uninverting/TestFieldCacheVsDocValues.java
@ -315,11 +315,14 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
    }
    
    // delete some docs
+    // nocommit hmmm what to do
+    /*
    int numDeletions = random().nextInt(numDocs/10);
    for (int i = 0; i < numDeletions; i++) {
      int id = random().nextInt(numDocs);
      writer.deleteDocuments(new Term("id", Integer.toString(id)));
    }
+    */
    writer.shutdown();
    
    // compare
@ -379,11 +382,14 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
    }
    
    // delete some docs
+    // nocommit hmmm what to do
+    /*
    int numDeletions = random().nextInt(numDocs/10);
    for (int i = 0; i < numDeletions; i++) {
      int id = random().nextInt(numDocs);
      writer.deleteDocuments(new Term("id", Integer.toString(id)));
    }
+    */
    
    // compare per-segment
    DirectoryReader ir = writer.getReader();
@ -443,11 +449,14 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
    }
    
    // delete some docs
+    // nocommit hmmm what to do
+    /*
    int numDeletions = random().nextInt(numDocs/10);
    for (int i = 0; i < numDeletions; i++) {
      int id = random().nextInt(numDocs);
      writer.deleteDocuments(new Term("id", Integer.toString(id)));
    }
+    */

    // merge some segments and ensure that at least one of them has more than
    // 256 values