Merge remote-tracking branch 'origin/master' into gradle-master

2020-01-02 10:49:19 +01:00 · 2020-01-02 10:49:19 +01:00 · 1a690d95ad
parent 128fd9a4ff b1bb7bf8c2
commit 1a690d95ad
23 changed files with 497 additions and 250 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -83,7 +83,10 @@ Other

 API Changes
 ---------------------
-(No changes)
+
+* LUCENE-9093: Not an API change but a change in behavior of the UnifiedHighlighter's LengthGoalBreakIterator that will
+  yield Passages sized a little different due to the fact that the sizing pivot is now the center of the first match and
+  not its left edge.

 New Features
 ---------------------
@ -104,6 +107,12 @@ Improvements

 * LUCENE-9105: UniformSplit postings format detects corrupted index and better handles IO exceptions. (Bruno Roustant)

+* LUCENE-9106: UniformSplit postings format allows extension of block/line serializers. (Bruno Roustant)
+
+* LUCENE-9093: UnifiedHighlighter's LengthGoalBreakIterator has a new fragmentAlignment option to better center the
+  first match in the passage. Also the sizing point now pivots at the center of the first match term and not its left
+  edge. This yields Passages that won't be identical to the previous behavior. (Nándor Mátravölgyi, David Smiley)
+
 Optimizations
 ---------------------
 (No changes)
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockHeader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockHeader.java
@ -134,43 +134,49 @@ public class BlockHeader implements Accountable {
    return basePayloadsFP;
  }

-  public void write(DataOutput output) throws IOException {
-    assert linesCount > 0 : "block header does not seem to be initialized";
-    output.writeVInt(linesCount);
-
-    output.writeVLong(baseDocsFP);
-    output.writeVLong(basePositionsFP);
-    output.writeVLong(basePayloadsFP);
-
-    output.writeVInt(termStatesBaseOffset);
-    output.writeVInt(middleLineOffset);
-  }
-
-  public static BlockHeader read(DataInput input, BlockHeader reuse) throws IOException {
-    int linesCount = input.readVInt();
-    if (linesCount <= 0 || linesCount > UniformSplitTermsWriter.MAX_NUM_BLOCK_LINES) {
-      throw new CorruptIndexException("Illegal number of lines in a block: " + linesCount, input);
-    }
-
-    long baseDocsFP = input.readVLong();
-    long basePositionsFP = input.readVLong();
-    long basePayloadsFP = input.readVLong();
-
-    int termStatesBaseOffset = input.readVInt();
-    if (termStatesBaseOffset < 0) {
-      throw new CorruptIndexException("Illegal termStatesBaseOffset= " + termStatesBaseOffset, input);
-    }
-    int middleTermOffset = input.readVInt();
-    if (middleTermOffset < 0) {
-      throw new CorruptIndexException("Illegal middleTermOffset= " + middleTermOffset, input);
-    }
-
-    BlockHeader blockHeader = reuse == null ? new BlockHeader() : reuse;
-    return blockHeader.reset(linesCount, baseDocsFP, basePositionsFP, basePayloadsFP, termStatesBaseOffset, middleTermOffset);
-  }
-
  @Override
  public long ramBytesUsed() {
    return RAM_USAGE;
  }
+
+  /**
+   * Reads/writes block header.
+   */
+  public static class Serializer {
+
+    public void write(DataOutput output, BlockHeader blockHeader) throws IOException {
+      assert blockHeader.linesCount > 0 : "Block header is not initialized";
+      output.writeVInt(blockHeader.linesCount);
+
+      output.writeVLong(blockHeader.baseDocsFP);
+      output.writeVLong(blockHeader.basePositionsFP);
+      output.writeVLong(blockHeader.basePayloadsFP);
+
+      output.writeVInt(blockHeader.termStatesBaseOffset);
+      output.writeVInt(blockHeader.middleLineOffset);
+    }
+
+    public BlockHeader read(DataInput input, BlockHeader reuse) throws IOException {
+      int linesCount = input.readVInt();
+      if (linesCount <= 0 || linesCount > UniformSplitTermsWriter.MAX_NUM_BLOCK_LINES) {
+        throw new CorruptIndexException("Illegal number of lines in block: " + linesCount, input);
+      }
+
+      long baseDocsFP = input.readVLong();
+      long basePositionsFP = input.readVLong();
+      long basePayloadsFP = input.readVLong();
+
+      int termStatesBaseOffset = input.readVInt();
+      if (termStatesBaseOffset < 0) {
+        throw new CorruptIndexException("Illegal termStatesBaseOffset= " + termStatesBaseOffset, input);
+      }
+      int middleTermOffset = input.readVInt();
+      if (middleTermOffset < 0) {
+        throw new CorruptIndexException("Illegal middleTermOffset= " + middleTermOffset, input);
+      }
+
+      BlockHeader blockHeader = reuse == null ? new BlockHeader() : reuse;
+      return blockHeader.reset(linesCount, baseDocsFP, basePositionsFP, basePayloadsFP, termStatesBaseOffset, middleTermOffset);
+    }
+  }
 }
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockLine.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockLine.java
@ -107,7 +107,7 @@ public class BlockLine implements Accountable {
  }

  /**
-   * Reads block lines with terms encoded incrementally inside a block.
+   * Reads/writes block lines with terms encoded incrementally inside a block.
   * This class keeps a state of the previous term read to decode the next term.
   */
  public static class Serializer implements Accountable {
@ -149,7 +149,7 @@ public class BlockLine implements Accountable {
     *                                  the incremental encoding. {@code true} for the first
     *                                  and middle term, {@code false} for other terms.
     */
-    public static void writeLine(DataOutput blockOutput, BlockLine line, BlockLine previousLine,
+    public void writeLine(DataOutput blockOutput, BlockLine line, BlockLine previousLine,
                                 int termStateRelativeOffset, boolean isIncrementalEncodingSeed) throws IOException {
      blockOutput.writeVInt(termStateRelativeOffset);
      writeIncrementallyEncodedTerm(line.getTermBytes(), previousLine == null ? null : previousLine.getTermBytes(),
@ -161,13 +161,13 @@ public class BlockLine implements Accountable {
     *
     * @param termStatesOutput The output pointing to the details region.
     */
-    protected static void writeLineTermState(DataOutput termStatesOutput, BlockLine line,
+    protected void writeLineTermState(DataOutput termStatesOutput, BlockLine line,
                                   FieldInfo fieldInfo, DeltaBaseTermStateSerializer encoder) throws IOException {
      assert line.termState != null;
      encoder.writeTermState(termStatesOutput, fieldInfo, line.termState);
    }

-    protected static void writeIncrementallyEncodedTerm(TermBytes termBytes, TermBytes previousTermBytes,
+    protected void writeIncrementallyEncodedTerm(TermBytes termBytes, TermBytes previousTermBytes,
                                                      boolean isIncrementalEncodingSeed, DataOutput blockOutput) throws IOException {
      BytesRef term = termBytes.getTerm();
      assert term.offset == 0;
@ -240,7 +240,7 @@ public class BlockLine implements Accountable {
     * Reads {@code length} bytes from the given {@link DataInput} and stores
     * them at {@code offset} in {@code bytes.bytes}.
     */
-    protected static void readBytes(DataInput input, BytesRef bytes, int offset, int length) throws IOException {
+    protected void readBytes(DataInput input, BytesRef bytes, int offset, int length) throws IOException {
      assert bytes.offset == 0;
      bytes.length = offset + length;
      bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length);
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockReader.java
@ -60,6 +60,7 @@ public class BlockReader extends BaseTermsEnum implements Accountable {
  protected final FieldMetadata fieldMetadata;
  protected final BlockDecoder blockDecoder;

+  protected BlockHeader.Serializer blockHeaderReader;
  protected BlockLine.Serializer blockLineReader;
  /**
   * In-memory read buffer for the current block.
@ -406,14 +407,27 @@ public class BlockReader extends BaseTermsEnum implements Accountable {
  protected void initializeBlockReadLazily() throws IOException {
    if (blockStartFP == -1) {
      blockInput = blockInput.clone();
-      blockLineReader = new BlockLine.Serializer();
+      blockHeaderReader = createBlockHeaderSerializer();
+      blockLineReader = createBlockLineSerializer();
      blockReadBuffer = new ByteArrayDataInput();
      termStatesReadBuffer = new ByteArrayDataInput();
-      termStateSerializer = new DeltaBaseTermStateSerializer();
+      termStateSerializer = createDeltaBaseTermStateSerializer();
      scratchBlockBytes = new BytesRef();
    }
  }

+  protected BlockHeader.Serializer createBlockHeaderSerializer() {
+    return new BlockHeader.Serializer();
+  }
+
+  protected BlockLine.Serializer createBlockLineSerializer() {
+    return new BlockLine.Serializer();
+  }
+
+  protected DeltaBaseTermStateSerializer createDeltaBaseTermStateSerializer() {
+    return new DeltaBaseTermStateSerializer();
+  }
+
  /**
   * Reads the block header.
   * Sets {@link #blockHeader}.
@ -428,7 +442,7 @@ public class BlockReader extends BaseTermsEnum implements Accountable {
    BytesRef blockBytesRef = decodeBlockBytesIfNeeded(numBlockBytes);
    blockReadBuffer.reset(blockBytesRef.bytes, blockBytesRef.offset, blockBytesRef.length);
    termStatesReadBuffer.reset(blockBytesRef.bytes, blockBytesRef.offset, blockBytesRef.length);
-    return blockHeader = BlockHeader.read(blockReadBuffer, blockHeader);
+    return blockHeader = blockHeaderReader.read(blockReadBuffer, blockHeader);
  }

  protected BytesRef decodeBlockBytesIfNeeded(int numBlockBytes) throws IOException {
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockWriter.java
@ -60,6 +60,8 @@ public class BlockWriter {
  protected final ByteBuffersDataOutput blockLinesWriteBuffer;
  protected final ByteBuffersDataOutput termStatesWriteBuffer;

+  protected final BlockHeader.Serializer blockHeaderWriter;
+  protected final BlockLine.Serializer blockLineWriter;
  protected final DeltaBaseTermStateSerializer termStateSerializer;
  protected final BlockEncoder blockEncoder;
  protected final ByteBuffersDataOutput blockWriteBuffer;
@ -81,7 +83,9 @@ public class BlockWriter {
    this.blockEncoder = blockEncoder;

    this.blockLines = new ArrayList<>(targetNumBlockLines);
-    this.termStateSerializer = new DeltaBaseTermStateSerializer();
+    this.blockHeaderWriter = createBlockHeaderSerializer();
+    this.blockLineWriter = createBlockLineSerializer();
+    this.termStateSerializer = createDeltaBaseTermStateSerializer();

    this.blockLinesWriteBuffer = ByteBuffersDataOutput.newResettableInstance();
    this.termStatesWriteBuffer = ByteBuffersDataOutput.newResettableInstance();
@ -91,6 +95,18 @@ public class BlockWriter {
    this.scratchBytesRef = new BytesRef();
  }

+  protected BlockHeader.Serializer createBlockHeaderSerializer() {
+    return new BlockHeader.Serializer();
+  }
+
+  protected BlockLine.Serializer createBlockLineSerializer() {
+    return new BlockLine.Serializer();
+  }
+
+  protected DeltaBaseTermStateSerializer createDeltaBaseTermStateSerializer() {
+    return new DeltaBaseTermStateSerializer();
+  }
+
  /**
   * Adds a new {@link BlockLine} term for the current field.
   * <p>
@ -196,7 +212,7 @@ public class BlockWriter {

    reusableBlockHeader.reset(blockLines.size(), termStateSerializer.getBaseDocStartFP(), termStateSerializer.getBasePosStartFP(),
        termStateSerializer.getBasePayStartFP(), Math.toIntExact(blockLinesWriteBuffer.size()), middleOffset);
-    reusableBlockHeader.write(blockWriteBuffer);
+    blockHeaderWriter.write(blockWriteBuffer, reusableBlockHeader);

    blockLinesWriteBuffer.copyTo(blockWriteBuffer);
    termStatesWriteBuffer.copyTo(blockWriteBuffer);
@ -236,8 +252,8 @@ public class BlockWriter {

  protected void writeBlockLine(boolean isIncrementalEncodingSeed, BlockLine line, BlockLine previousLine) throws IOException {
    assert fieldMetadata != null;
-    BlockLine.Serializer.writeLine(blockLinesWriteBuffer, line, previousLine, Math.toIntExact(termStatesWriteBuffer.size()), isIncrementalEncodingSeed);
-    BlockLine.Serializer.writeLineTermState(termStatesWriteBuffer, line, fieldMetadata.getFieldInfo(), termStateSerializer);
+    blockLineWriter.writeLine(blockLinesWriteBuffer, line, previousLine, Math.toIntExact(termStatesWriteBuffer.size()), isIncrementalEncodingSeed);
+    blockLineWriter.writeLineTermState(termStatesWriteBuffer, line, fieldMetadata.getFieldInfo(), termStateSerializer);
  }

  /**
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/FieldMetadata.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/FieldMetadata.java
@ -194,88 +194,99 @@ public class FieldMetadata implements Accountable {
        + (docsSeen == null ? 0 : docsSeen.ramBytesUsed());
  }

-  public static FieldMetadata read(DataInput input, FieldInfos fieldInfos, int maxNumDocs) throws IOException {
-    int fieldId = input.readVInt();
-    FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldId);
-    if (fieldInfo == null) {
-      throw new CorruptIndexException("Illegal field id= " + fieldId, input);
-    }
-    FieldMetadata fieldMetadata = new FieldMetadata(fieldInfo, 0, false);
+  /**
+   * Reads/writes field metadata.
+   */
+  public static class Serializer {

-    fieldMetadata.numTerms = input.readVInt();
-    if (fieldMetadata.numTerms <= 0) {
-      throw new CorruptIndexException("Illegal number of terms= " + fieldMetadata.numTerms + " for field= " + fieldId, input);
-    }
+    /**
+     * Stateless singleton.
+     */
+    public static final Serializer INSTANCE = new Serializer();

-    fieldMetadata.sumDocFreq = input.readVInt();
-    fieldMetadata.sumTotalTermFreq = fieldMetadata.sumDocFreq;
-    if (fieldMetadata.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0) {
-      fieldMetadata.sumTotalTermFreq += input.readVInt();
-      if (fieldMetadata.sumTotalTermFreq < fieldMetadata.sumDocFreq) {
-        // #positions must be >= #postings.
-        throw new CorruptIndexException("Illegal sumTotalTermFreq= " + fieldMetadata.sumTotalTermFreq
-            + " sumDocFreq= " + fieldMetadata.sumDocFreq + " for field= " + fieldId, input);
+    public void write(DataOutput output, FieldMetadata fieldMetadata) throws IOException {
+      assert fieldMetadata.dictionaryStartFP >= 0;
+      assert fieldMetadata.firstBlockStartFP >= 0;
+      assert fieldMetadata.lastBlockStartFP >= 0;
+      assert fieldMetadata.numTerms > 0 : "There should be at least one term for field " + fieldMetadata.fieldInfo.name + ": " + fieldMetadata.numTerms;
+      assert fieldMetadata.firstBlockStartFP <= fieldMetadata.lastBlockStartFP : "start: " + fieldMetadata.firstBlockStartFP + " end: " + fieldMetadata.lastBlockStartFP;
+      assert fieldMetadata.lastTerm != null : "you must set the last term";
+
+      output.writeVInt(fieldMetadata.fieldInfo.number);
+
+      output.writeVInt(fieldMetadata.numTerms);
+      output.writeVInt(fieldMetadata.sumDocFreq);
+
+      if (fieldMetadata.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0) {
+        assert fieldMetadata.sumTotalTermFreq >= fieldMetadata.sumDocFreq : "sumTotalFQ: " + fieldMetadata.sumTotalTermFreq + " sumDocFQ: " + fieldMetadata.sumDocFreq;
+        output.writeVInt(fieldMetadata.sumTotalTermFreq - fieldMetadata.sumDocFreq);
+      }
+
+      output.writeVInt(fieldMetadata.getDocCount());
+
+      output.writeVLong(fieldMetadata.dictionaryStartFP);
+      output.writeVLong(fieldMetadata.firstBlockStartFP);
+      output.writeVLong(fieldMetadata.lastBlockStartFP);
+
+      if (fieldMetadata.lastTerm.length > 0) {
+        output.writeVInt(fieldMetadata.lastTerm.length);
+        output.writeBytes(fieldMetadata.lastTerm.bytes, fieldMetadata.lastTerm.offset, fieldMetadata.lastTerm.length);
+      } else {
+        output.writeVInt(0);
      }
    }

-    fieldMetadata.docCount = input.readVInt();
-    if (fieldMetadata.docCount < 0 || fieldMetadata.docCount > maxNumDocs) {
-      // #docs with field must be <= #docs.
-      throw new CorruptIndexException("Illegal number of docs= " + fieldMetadata.docCount
-          + " maxNumDocs= " + maxNumDocs + " for field=" + fieldId, input);
-    }
-    if (fieldMetadata.sumDocFreq < fieldMetadata.docCount) {
-      // #postings must be >= #docs with field.
-      throw new CorruptIndexException("Illegal sumDocFreq= " + fieldMetadata.sumDocFreq
-          + " docCount= " + fieldMetadata.docCount + " for field= " + fieldId, input);
-    }
+    public FieldMetadata read(DataInput input, FieldInfos fieldInfos, int maxNumDocs) throws IOException {
+      int fieldId = input.readVInt();
+      FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldId);
+      if (fieldInfo == null) {
+        throw new CorruptIndexException("Illegal field id= " + fieldId, input);
+      }
+      FieldMetadata fieldMetadata = new FieldMetadata(fieldInfo, 0, false);

-    fieldMetadata.dictionaryStartFP = input.readVLong();
-    fieldMetadata.firstBlockStartFP = input.readVLong();
-    fieldMetadata.lastBlockStartFP = input.readVLong();
+      fieldMetadata.numTerms = input.readVInt();
+      if (fieldMetadata.numTerms <= 0) {
+        throw new CorruptIndexException("Illegal number of terms= " + fieldMetadata.numTerms + " for field= " + fieldId, input);
+      }

-    int lastTermLength = input.readVInt();
-    BytesRef lastTerm = new BytesRef(lastTermLength);
-    if (lastTermLength > 0) {
-      input.readBytes(lastTerm.bytes, 0, lastTermLength);
-      lastTerm.length = lastTermLength;
-    } else if (lastTermLength < 0) {
-      throw new CorruptIndexException("Illegal last term length= " + lastTermLength + " for field= " + fieldId, input);
-    }
-    fieldMetadata.setLastTerm(lastTerm);
+      fieldMetadata.sumDocFreq = input.readVInt();
+      fieldMetadata.sumTotalTermFreq = fieldMetadata.sumDocFreq;
+      if (fieldMetadata.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0) {
+        fieldMetadata.sumTotalTermFreq += input.readVInt();
+        if (fieldMetadata.sumTotalTermFreq < fieldMetadata.sumDocFreq) {
+          // #positions must be >= #postings.
+          throw new CorruptIndexException("Illegal sumTotalTermFreq= " + fieldMetadata.sumTotalTermFreq
+              + " sumDocFreq= " + fieldMetadata.sumDocFreq + " for field= " + fieldId, input);
+        }
+      }

-    return fieldMetadata;
-  }
+      fieldMetadata.docCount = input.readVInt();
+      if (fieldMetadata.docCount < 0 || fieldMetadata.docCount > maxNumDocs) {
+        // #docs with field must be <= #docs.
+        throw new CorruptIndexException("Illegal number of docs= " + fieldMetadata.docCount
+            + " maxNumDocs= " + maxNumDocs + " for field=" + fieldId, input);
+      }
+      if (fieldMetadata.sumDocFreq < fieldMetadata.docCount) {
+        // #postings must be >= #docs with field.
+        throw new CorruptIndexException("Illegal sumDocFreq= " + fieldMetadata.sumDocFreq
+            + " docCount= " + fieldMetadata.docCount + " for field= " + fieldId, input);
+      }

-  public void write(DataOutput output) throws IOException {
-    assert dictionaryStartFP >= 0;
-    assert firstBlockStartFP >= 0;
-    assert lastBlockStartFP >= 0;
-    assert numTerms > 0 : "There should be at least one term for field " + fieldInfo.name + ": " + numTerms;
-    assert firstBlockStartFP <= lastBlockStartFP : "start: " + firstBlockStartFP + " end: " + lastBlockStartFP;
-    assert lastTerm != null : "you must set the last term";
+      fieldMetadata.dictionaryStartFP = input.readVLong();
+      fieldMetadata.firstBlockStartFP = input.readVLong();
+      fieldMetadata.lastBlockStartFP = input.readVLong();

-    output.writeVInt(fieldInfo.number);
+      int lastTermLength = input.readVInt();
+      BytesRef lastTerm = new BytesRef(lastTermLength);
+      if (lastTermLength > 0) {
+        input.readBytes(lastTerm.bytes, 0, lastTermLength);
+        lastTerm.length = lastTermLength;
+      } else if (lastTermLength < 0) {
+        throw new CorruptIndexException("Illegal last term length= " + lastTermLength + " for field= " + fieldId, input);
+      }
+      fieldMetadata.setLastTerm(lastTerm);

-    output.writeVInt(numTerms);
-    output.writeVInt(sumDocFreq);
-
-    if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0) {
-      assert sumTotalTermFreq >= sumDocFreq : "sumTotalFQ: " + sumTotalTermFreq + " sumDocFQ: " + sumDocFreq;
-      output.writeVInt(sumTotalTermFreq - sumDocFreq);
-    }
-
-    output.writeVInt(getDocCount());
-
-    output.writeVLong(dictionaryStartFP);
-    output.writeVLong(firstBlockStartFP);
-    output.writeVLong(lastBlockStartFP);
-
-    if (lastTerm.length > 0) {
-      output.writeVInt(lastTerm.length);
-      output.writeBytes(lastTerm.bytes, lastTerm.offset, lastTerm.length);
-    } else {
-      output.writeVInt(0);
+      return fieldMetadata;
    }
  }
 }
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/UniformSplitTermsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/UniformSplitTermsReader.java
@ -69,7 +69,7 @@ public class UniformSplitTermsReader extends FieldsProducer {
   *                     It can be used for decompression or decryption.
   */
  public UniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder) throws IOException {
-    this(postingsReader, state, blockDecoder, NAME, VERSION_START, VERSION_CURRENT,
+    this(postingsReader, state, blockDecoder, FieldMetadata.Serializer.INSTANCE, NAME, VERSION_START, VERSION_CURRENT,
        TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
   }
   
@ -77,8 +77,10 @@ public class UniformSplitTermsReader extends FieldsProducer {
   * @param blockDecoder Optional block decoder, may be null if none.
   *                     It can be used for decompression or decryption.
   */
-  protected UniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder,
-                                     String codecName, int versionStart, int versionCurrent, String termsBlocksExtension, String dictionaryExtension) throws IOException {
+  protected UniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state,
+                                    BlockDecoder blockDecoder, FieldMetadata.Serializer fieldMetadataReader,
+                                     String codecName, int versionStart, int versionCurrent,
+                                    String termsBlocksExtension, String dictionaryExtension) throws IOException {
     IndexInput dictionaryInput = null;
     IndexInput blockInput = null;
     boolean success = false;
@ -100,7 +102,7 @@ public class UniformSplitTermsReader extends FieldsProducer {
       CodecUtil.retrieveChecksum(blockInput);

       seekFieldsMetadata(blockInput);
-       Collection<FieldMetadata> fieldMetadataCollection = parseFieldsMetadata(blockInput, state.fieldInfos, state.segmentInfo.maxDoc());
+       Collection<FieldMetadata> fieldMetadataCollection = parseFieldsMetadata(blockInput, state.fieldInfos, fieldMetadataReader, state.segmentInfo.maxDoc());

       fieldToTermsMap = new HashMap<>();
       this.blockInput = blockInput;
@ -133,19 +135,19 @@ public class UniformSplitTermsReader extends FieldsProducer {
   * @param indexInput {@link IndexInput} must be positioned to the fields metadata
   *                   details by calling {@link #seekFieldsMetadata(IndexInput)} before this call.
   */
-  protected static Collection<FieldMetadata> parseFieldsMetadata(IndexInput indexInput, FieldInfos fieldInfos, int maxNumDocs) throws IOException {
+  protected static Collection<FieldMetadata> parseFieldsMetadata(IndexInput indexInput, FieldInfos fieldInfos,
+                                                                 FieldMetadata.Serializer fieldMetadataReader, int maxNumDocs) throws IOException {
    int numFields = indexInput.readVInt();
    if (numFields < 0) {
      throw new CorruptIndexException("Illegal number of fields= " + numFields, indexInput);
    }
    Collection<FieldMetadata> fieldMetadataCollection = new ArrayList<>(numFields);
    for (int i = 0; i < numFields; i++) {
-      fieldMetadataCollection.add(FieldMetadata.read(indexInput, fieldInfos, maxNumDocs));
+      fieldMetadataCollection.add(fieldMetadataReader.read(indexInput, fieldInfos, maxNumDocs));
    }
    return fieldMetadataCollection;
  }

-
  @Override
  public void close() throws IOException {
    try {
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/UniformSplitTermsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/UniformSplitTermsWriter.java
@ -128,6 +128,7 @@ public class UniformSplitTermsWriter extends FieldsConsumer {
  protected final int deltaNumLines;

  protected final BlockEncoder blockEncoder;
+  protected final FieldMetadata.Serializer fieldMetadataWriter;
  protected final IndexOutput blockOutput;
  protected final IndexOutput dictionaryOutput;

@ -146,7 +147,7 @@ public class UniformSplitTermsWriter extends FieldsConsumer {
   */
  public UniformSplitTermsWriter(PostingsWriterBase postingsWriter, SegmentWriteState state,
                          int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder) throws IOException {
-    this(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder,
+    this(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder, FieldMetadata.Serializer.INSTANCE,
        NAME, VERSION_CURRENT, TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
  }

@ -164,7 +165,7 @@ public class UniformSplitTermsWriter extends FieldsConsumer {
   *                            It can be used for compression or encryption.
   */
  protected UniformSplitTermsWriter(PostingsWriterBase postingsWriter, SegmentWriteState state,
-                          int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder,
+                          int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder, FieldMetadata.Serializer fieldMetadataWriter,
                          String codecName, int versionCurrent, String termsBlocksExtension, String dictionaryExtension) throws IOException {
    validateSettings(targetNumBlockLines, deltaNumLines);
    IndexOutput blockOutput = null;
@ -177,6 +178,7 @@ public class UniformSplitTermsWriter extends FieldsConsumer {
      this.targetNumBlockLines = targetNumBlockLines;
      this.deltaNumLines = deltaNumLines;
      this.blockEncoder = blockEncoder;
+      this.fieldMetadataWriter = fieldMetadataWriter;

      String termsName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, termsBlocksExtension);
      blockOutput = state.directory.createOutput(termsName, state.context);
@ -278,7 +280,7 @@ public class UniformSplitTermsWriter extends FieldsConsumer {

    if (fieldMetadata.getNumTerms() > 0) {
      fieldMetadata.setLastTerm(lastTerm);
-      fieldMetadata.write(fieldsOutput);
+      fieldMetadataWriter.write(fieldsOutput, fieldMetadata);
      writeDictionary(dictionaryBuilder);
      return 1;
    }
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockLine.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockLine.java
@ -75,7 +75,7 @@ public class STBlockLine extends BlockLine {
    /**
     * Writes all the {@link BlockTermState} of the provided {@link STBlockLine} to the given output.
     */
-    public static void writeLineTermStates(DataOutput termStatesOutput, STBlockLine line,
+    public void writeLineTermStates(DataOutput termStatesOutput, STBlockLine line,
                                    DeltaBaseTermStateSerializer encoder) throws IOException {

      FieldMetadataTermState fieldMetadataTermState;
@ -111,7 +111,7 @@ public class STBlockLine extends BlockLine {
     * @return The {@link BlockTermState} corresponding to the provided field id; or null if the field
     * does not occur in the line.
     */
-    public static BlockTermState readTermStateForField(int fieldId, DataInput termStatesInput,
+    public BlockTermState readTermStateForField(int fieldId, DataInput termStatesInput,
                                                DeltaBaseTermStateSerializer termStateSerializer,
                                                BlockHeader blockHeader, FieldInfos fieldInfos,
                                                BlockTermState reuse) throws IOException {
@ -161,7 +161,7 @@ public class STBlockLine extends BlockLine {
     * @param fieldTermStatesMap Map filled with the term states for each field. It is cleared first.
     * @see #readTermStateForField
     */
-    public static void readFieldTermStatesMap(DataInput termStatesInput,
+    public void readFieldTermStatesMap(DataInput termStatesInput,
                                       DeltaBaseTermStateSerializer termStateSerializer,
                                       BlockHeader blockHeader,
                                       FieldInfos fieldInfos,
@ -183,7 +183,7 @@ public class STBlockLine extends BlockLine {
    /**
     * Reads all the field ids in the current block line of the provided input.
     */
-    public static int[] readFieldIds(DataInput termStatesInput, int numFields) throws IOException {
+    public int[] readFieldIds(DataInput termStatesInput, int numFields) throws IOException {
      int[] fieldIds = new int[numFields];
      for (int i = 0; i < numFields; i++) {
        fieldIds[i] = termStatesInput.readVInt();
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockReader.java
@ -117,6 +117,11 @@ public class STBlockReader extends BlockReader {
    return blockStartFP > fieldMetadata.getLastBlockStartFP() || super.isBeyondLastTerm(searchedTerm, blockStartFP);
  }

+  @Override
+  protected STBlockLine.Serializer createBlockLineSerializer() {
+    return new STBlockLine.Serializer();
+  }
+
  /**
   * Reads the {@link BlockTermState} on the current line for this reader's field.
   *
@ -125,7 +130,7 @@ public class STBlockReader extends BlockReader {
  @Override
  protected BlockTermState readTermState() throws IOException {
    termStatesReadBuffer.setPosition(blockFirstLineStart + blockHeader.getTermStatesBaseOffset() + blockLine.getTermStateRelativeOffset());
-    return termState = STBlockLine.Serializer.readTermStateForField(
+    return termState = ((STBlockLine.Serializer) blockLineReader).readTermStateForField(
        fieldMetadata.getFieldInfo().number,
        termStatesReadBuffer,
        termStateSerializer,
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockWriter.java
@ -84,10 +84,15 @@ public class STBlockWriter extends BlockWriter {
    super.finishLastBlock(dictionaryBuilder);
  }

+  @Override
+  protected BlockLine.Serializer createBlockLineSerializer() {
+    return new STBlockLine.Serializer();
+  }
+
  @Override
  protected void writeBlockLine(boolean isIncrementalEncodingSeed, BlockLine line, BlockLine previousLine) throws IOException {
-    STBlockLine.Serializer.writeLine(blockLinesWriteBuffer, line, previousLine, Math.toIntExact(termStatesWriteBuffer.size()), isIncrementalEncodingSeed);
-    STBlockLine.Serializer.writeLineTermStates(termStatesWriteBuffer, (STBlockLine) line, termStateSerializer);
+    blockLineWriter.writeLine(blockLinesWriteBuffer, line, previousLine, Math.toIntExact(termStatesWriteBuffer.size()), isIncrementalEncodingSeed);
+    ((STBlockLine.Serializer) blockLineWriter).writeLineTermStates(termStatesWriteBuffer, (STBlockLine) line, termStateSerializer);
    ((STBlockLine) line).collectFields(fieldsInBlock);
  }

--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STIntersectBlockReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STIntersectBlockReader.java
@ -91,6 +91,11 @@ public class STIntersectBlockReader extends IntersectBlockReader {
    return super.nextBlockMatchingPrefix() && blockHeader != null;
  }

+  @Override
+  protected STBlockLine.Serializer createBlockLineSerializer() {
+    return new STBlockLine.Serializer();
+  }
+
  /**
   * Reads the {@link BlockTermState} on the current line for the specific field
   * corresponding this this reader.
@ -100,7 +105,7 @@ public class STIntersectBlockReader extends IntersectBlockReader {
  @Override
  protected BlockTermState readTermState() throws IOException {
    termStatesReadBuffer.setPosition(blockFirstLineStart + blockHeader.getTermStatesBaseOffset() + blockLine.getTermStateRelativeOffset());
-    return STBlockLine.Serializer.readTermStateForField(
+    return ((STBlockLine.Serializer) blockLineReader).readTermStateForField(
        fieldMetadata.getFieldInfo().number,
        termStatesReadBuffer,
        termStateSerializer,
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STMergingBlockReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STMergingBlockReader.java
@ -98,7 +98,7 @@ public class STMergingBlockReader extends STBlockReader {
  public void readFieldTermStatesMap(Map<String, BlockTermState> fieldTermStatesMap) throws IOException {
    if (term() != null) {
      termStatesReadBuffer.setPosition(blockFirstLineStart + blockHeader.getTermStatesBaseOffset() + blockLine.getTermStateRelativeOffset());
-      STBlockLine.Serializer.readFieldTermStatesMap(
+      ((STBlockLine.Serializer) blockLineReader).readFieldTermStatesMap(
          termStatesReadBuffer,
          termStateSerializer,
          blockHeader,
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STUniformSplitTermsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STUniformSplitTermsReader.java
@ -47,13 +47,15 @@ import static org.apache.lucene.codecs.uniformsplit.sharedterms.STUniformSplitPo
 public class STUniformSplitTermsReader extends UniformSplitTermsReader {

  public STUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder) throws IOException {
-    super(postingsReader, state, blockDecoder, NAME, VERSION_START,
-        VERSION_CURRENT, TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
+    this(postingsReader, state, blockDecoder, FieldMetadata.Serializer.INSTANCE,
+        NAME, VERSION_START, VERSION_CURRENT, TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
  }

-  protected STUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder,
-                                      String codecName, int versionStart, int versionCurrent, String termsBlocksExtension, String dictionaryExtension) throws IOException {
-    super(postingsReader, state, blockDecoder, codecName, versionStart, versionCurrent, termsBlocksExtension, dictionaryExtension);
+  protected STUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state,
+                                      BlockDecoder blockDecoder, FieldMetadata.Serializer fieldMetadataReader,
+                                      String codecName, int versionStart, int versionCurrent,
+                                      String termsBlocksExtension, String dictionaryExtension) throws IOException {
+    super(postingsReader, state, blockDecoder, fieldMetadataReader, codecName, versionStart, versionCurrent, termsBlocksExtension, dictionaryExtension);
  }

  @Override
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STUniformSplitTermsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STUniformSplitTermsWriter.java
@ -88,13 +88,14 @@ public class STUniformSplitTermsWriter extends UniformSplitTermsWriter {

  public STUniformSplitTermsWriter(PostingsWriterBase postingsWriter, SegmentWriteState state,
                                   int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder) throws IOException {
-    this(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder, NAME, VERSION_CURRENT, TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
+    this(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder, FieldMetadata.Serializer.INSTANCE,
+        NAME, VERSION_CURRENT, TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
  }

  protected STUniformSplitTermsWriter(PostingsWriterBase postingsWriter, SegmentWriteState state,
-                                      int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder,
+                                      int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder, FieldMetadata.Serializer fieldMetadataWriter,
                                      String codecName, int versionCurrent, String termsBlocksExtension, String dictionaryExtension) throws IOException {
-    super(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder, codecName, versionCurrent, termsBlocksExtension, dictionaryExtension);
+    super(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder, fieldMetadataWriter, codecName, versionCurrent, termsBlocksExtension, dictionaryExtension);
  }

  @Override
@ -200,7 +201,7 @@ public class STUniformSplitTermsWriter extends UniformSplitTermsWriter {
    int fieldsNumber = 0;
    for (FieldMetadata fieldMetadata : fieldMetadataList) {
      if (fieldMetadata.getNumTerms() > 0) {
-        fieldMetadata.write(fieldsOutput);
+        fieldMetadataWriter.write(fieldsOutput, fieldMetadata);
        fieldsNumber++;
      }
    }
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java
@ -141,6 +141,7 @@ public class FieldHighlighter {
      }
    });
    Passage passage = new Passage(); // the current passage in-progress.  Will either get reset or added to queue.
+    int lastPassageEnd = 0;

    do {
      int start = off.startOffset();
@ -158,9 +159,12 @@ public class FieldHighlighter {
        if (start >= contentLength) {
          break;
        }
+        // find fragment from the middle of the match, so the result's length may be closer to fragsize
+        final int center = start + (end - start) / 2;
        // advance breakIterator
-        passage.setStartOffset(Math.max(this.breakIterator.preceding(start + 1), 0));
-        passage.setEndOffset(Math.min(this.breakIterator.following(start), contentLength));
+        passage.setStartOffset(Math.min(start, Math.max(this.breakIterator.preceding(Math.max(start + 1, center)), lastPassageEnd)));
+        lastPassageEnd = Math.max(end, Math.min(this.breakIterator.following(Math.min(end - 1, center)), contentLength));
+        passage.setEndOffset(lastPassageEnd);
      }
      // Add this term to the passage.
      BytesRef term = off.getTerm();// a reference; safe to refer to
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/LengthGoalBreakIterator.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/LengthGoalBreakIterator.java
@ -33,22 +33,49 @@ public class LengthGoalBreakIterator extends BreakIterator {

  private final BreakIterator baseIter;
  private final int lengthGoal;
+  private final float fragmentAlignment; // how much text to align before match-fragment, valid in range [0, 1]
  private final boolean isMinimumLength; // if false then is "closest to" length
+  private int currentCache;

-  /** Breaks will be at least {@code minLength} apart (to the extent possible). */
+  /**
+   * Breaks will be at least {@code minLength} apart (to the extent possible),
+   * while trying to position the match inside the fragment according to {@code fragmentAlignment}.
+   */
+  public static LengthGoalBreakIterator createMinLength(BreakIterator baseIter, int minLength,
+                                                        float fragmentAlignment) {
+    return new LengthGoalBreakIterator(baseIter, minLength, fragmentAlignment, true, baseIter.current());
+  }
+
+  /** For backwards compatibility you can initialise the break iterator without fragmentAlignment. */
+  @Deprecated
  public static LengthGoalBreakIterator createMinLength(BreakIterator baseIter, int minLength) {
-    return new LengthGoalBreakIterator(baseIter, minLength, true);
+    return createMinLength(baseIter, minLength, 0.f);
  }

-  /** Breaks will be on average {@code targetLength} apart; the closest break to this target (before or after)
-   * is chosen. */
+  /**
+   * Breaks will be on average {@code targetLength} apart; the closest break to this target (before or after)
+   * is chosen. The match will be positioned according to {@code fragmentAlignment} as much as possible.
+   */
+  public static LengthGoalBreakIterator createClosestToLength(BreakIterator baseIter, int targetLength,
+                                                              float fragmentAlignment) {
+    return new LengthGoalBreakIterator(baseIter, targetLength, fragmentAlignment, false, baseIter.current());
+  }
+
+  /** For backwards compatibility you can initialise the break iterator without fragmentAlignment. */
+  @Deprecated
  public static LengthGoalBreakIterator createClosestToLength(BreakIterator baseIter, int targetLength) {
-    return new LengthGoalBreakIterator(baseIter, targetLength, false);
+    return createClosestToLength(baseIter, targetLength, 0.f);
  }

-  private LengthGoalBreakIterator(BreakIterator baseIter, int lengthGoal, boolean isMinimumLength) {
+  private LengthGoalBreakIterator(BreakIterator baseIter, int lengthGoal, float fragmentAlignment,
+                                  boolean isMinimumLength, int currentCache) {
    this.baseIter = baseIter;
+    this.currentCache = currentCache;
    this.lengthGoal = lengthGoal;
+    if (fragmentAlignment < 0.f || fragmentAlignment > 1.f || !Float.isFinite(fragmentAlignment)) {
+      throw new IllegalArgumentException("fragmentAlignment must be >= zero and <= one");
+    }
+    this.fragmentAlignment = fragmentAlignment;
    this.isMinimumLength = isMinimumLength;
  }

@ -60,12 +87,15 @@ public class LengthGoalBreakIterator extends BreakIterator {
  @Override
  public String toString() {
    String goalDesc = isMinimumLength ? "minLen" : "targetLen";
-    return getClass().getSimpleName() + "{" + goalDesc + "=" + lengthGoal + ", baseIter=" + baseIter + "}";
+    return getClass().getSimpleName() + "{" + goalDesc + "=" + lengthGoal + ", fragAlign=" + fragmentAlignment +
+        ", baseIter=" + baseIter + "}";
  }

  @Override
  public Object clone() {
-    return new LengthGoalBreakIterator((BreakIterator) baseIter.clone(), lengthGoal, isMinimumLength);
+    return new LengthGoalBreakIterator(
+        (BreakIterator) baseIter.clone(), lengthGoal, fragmentAlignment, isMinimumLength, currentCache
+    );
  }

  @Override
@ -76,26 +106,28 @@ public class LengthGoalBreakIterator extends BreakIterator {
  @Override
  public void setText(String newText) {
    baseIter.setText(newText);
+    currentCache = baseIter.current();
  }

  @Override
  public void setText(CharacterIterator newText) {
    baseIter.setText(newText);
+    currentCache = baseIter.current();
  }

  @Override
  public int current() {
-    return baseIter.current();
+    return currentCache;
  }

  @Override
  public int first() {
-    return baseIter.first();
+    return currentCache = baseIter.first();
  }

  @Override
  public int last() {
-    return baseIter.last();
+    return currentCache = baseIter.last();
  }

  @Override
@ -104,10 +136,10 @@ public class LengthGoalBreakIterator extends BreakIterator {
    return baseIter.next(n); // probably wrong
  }

-  // called by getSummaryPassagesNoHighlight to generate default summary.
+  // Called by getSummaryPassagesNoHighlight to generate default summary.
  @Override
  public int next() {
-    return following(current());
+    return following(currentCache, currentCache + lengthGoal);
  }

  @Override
@ -116,65 +148,70 @@ public class LengthGoalBreakIterator extends BreakIterator {
    return baseIter.previous();
  }

-  // called while the current position is the start of a new passage; find end of passage
  @Override
-  public int following(int followingIdx) {
-    final int startIdx = current();
-    if (followingIdx < startIdx) {
-      assert false : "Not supported";
-      return baseIter.following(followingIdx);
-    }
-    final int targetIdx = startIdx + lengthGoal;
-    // When followingIdx >= targetIdx, we can simply delegate since it will be >= the target
-    if (followingIdx >= targetIdx - 1) {
-      return baseIter.following(followingIdx);
-    }
-    // If target exceeds the text length, return the last index.
-    if (targetIdx >= getText().getEndIndex()) {
-      return baseIter.last();
-    }
-
-    // Find closest break >= the target
-    final int afterIdx = baseIter.following(targetIdx - 1);
-    if (afterIdx == DONE) { // we're at the end; can this happen?
-      return current();
-    }
-    if (afterIdx == targetIdx) { // right on the money
-      return afterIdx;
-    }
-    if (isMinimumLength) { // thus never undershoot
-      return afterIdx;
-    }
-
-    // note: it is a shame that we invoke preceding() *in addition to* following(); BI's are sometimes expensive.
-
-    // Find closest break < target
-    final int beforeIdx = baseIter.preceding(targetIdx); // or could do baseIter.previous() but we hope the BI implements preceding()
-    if (beforeIdx <= followingIdx) { // too far back
-      return moveToBreak(afterIdx);
-    }
-
-    if (targetIdx - beforeIdx <= afterIdx - targetIdx) {
-      return beforeIdx;
-    }
-    return moveToBreak(afterIdx);
+  public int following(int matchEndIndex) {
+    return following(matchEndIndex, (matchEndIndex + 1) + (int)(lengthGoal * (1.f - fragmentAlignment)));
  }

-  private int moveToBreak(int idx) { // precondition: idx is a known break
-    // bi.isBoundary(idx) has side-effect of moving the position.  Not obvious!
-    //boolean moved = baseIter.isBoundary(idx); // probably not particularly expensive
-    //assert moved && current() == idx;
+  private int following(int matchEndIndex, int targetIdx) {
+    if (targetIdx >= getText().getEndIndex()) {
+      if (currentCache == baseIter.last()) {
+        return DONE;
+      }
+      return currentCache = baseIter.last();
+    }
+    final int afterIdx = baseIter.following(targetIdx - 1);
+    if (afterIdx == DONE) {
+      currentCache = baseIter.last();
+      return DONE;
+    }
+    if (afterIdx == targetIdx) { // right on the money
+      return currentCache = afterIdx;
+    }
+    if (isMinimumLength) { // thus never undershoot
+      return currentCache = afterIdx;
+    }

-    // TODO fix: Would prefer to do "- 1" instead of "- 2" but CustomSeparatorBreakIterator has a bug.
-    int current = baseIter.following(idx - 2);
-    assert current == idx : "following() didn't move us to the expected index.";
-    return idx;
+    // note: it is a shame that we invoke preceding() *one more time*; BI's are sometimes expensive.
+
+    // Find closest break to target
+    final int beforeIdx = baseIter.preceding(targetIdx);
+    if (targetIdx - beforeIdx < afterIdx - targetIdx && beforeIdx > matchEndIndex) {
+      return currentCache = beforeIdx;
+    }
+    return currentCache = afterIdx;
  }

  // called at start of new Passage given first word start offset
  @Override
-  public int preceding(int offset) {
-    return baseIter.preceding(offset); // no change needed
+  public int preceding(int matchStartIndex) {
+    final int targetIdx = (matchStartIndex - 1) - (int)(lengthGoal * fragmentAlignment);
+    if (targetIdx <= 0) {
+      if (currentCache == baseIter.first()) {
+        return DONE;
+      }
+      return currentCache = baseIter.first();
+    }
+    final int beforeIdx = baseIter.preceding(targetIdx + 1);
+    if (beforeIdx == DONE) {
+      currentCache = baseIter.first();
+      return DONE;
+    }
+    if (beforeIdx == targetIdx) { // right on the money
+      return currentCache = beforeIdx;
+    }
+    if (isMinimumLength) { // thus never undershoot
+      return currentCache = beforeIdx;
+    }
+
+    // note: it is a shame that we invoke following() *one more time*; BI's are sometimes expensive.
+
+    // Find closest break to target
+    final int afterIdx = baseIter.following(targetIdx - 1);
+    if (afterIdx - targetIdx < targetIdx - beforeIdx && afterIdx < matchStartIndex) {
+      return currentCache = afterIdx;
+    }
+    return currentCache = beforeIdx;
  }

  @Override
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/LengthGoalBreakIteratorTest.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/LengthGoalBreakIteratorTest.java
@ -18,6 +18,7 @@
 package org.apache.lucene.search.uhighlight;

 import java.io.IOException;
+import java.text.BreakIterator;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
@ -28,6 +29,7 @@ import org.apache.lucene.util.QueryBuilder;

 public class LengthGoalBreakIteratorTest extends LuceneTestCase {
  private static final String FIELD = "body";
+  private static final float[] ALIGNS = {0.f, 0.5f, 1.f};

  // We test LengthGoalBreakIterator as it is used by the UnifiedHighlighter instead of directly, because it is
  //  not a general purpose BreakIterator.  A unit test of it directly wouldn't give as much confidence.
@ -39,65 +41,159 @@ public class LengthGoalBreakIteratorTest extends LuceneTestCase {
  //                      0         1
  //                      01234567890123456789
  static final String CONTENT = "Aa bb. Cc dd. Ee ff";
+  static final String CONTENT2 = "Aa bb Cc dd X Ee ff Gg hh.";
+  static final String CONTENT3 = "Aa bbcc ddxyzee ffgg hh.";
+
+  public void testFragmentAlignmentConstructor() throws IOException {
+    BreakIterator baseBI = new CustomSeparatorBreakIterator('.');
+    // test fragmentAlignment validation
+    float[] valid_aligns = {0.f, 0.3333f, 0.5f, 0.99f, 1.f};
+    for (float alignment : valid_aligns) {
+      LengthGoalBreakIterator.createClosestToLength(baseBI, 50, alignment);
+    }
+    float[] invalid_aligns = {-0.01f, -1.f, 1.5f, Float.NaN, Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY};
+    for (float alignment : invalid_aligns) {
+      expectThrows(IllegalArgumentException.class, () -> {
+        LengthGoalBreakIterator.createClosestToLength(baseBI, 50, alignment);
+      });
+    }
+    // test backwards compatibility constructors
+    String backwardCompString = LengthGoalBreakIterator.createClosestToLength(baseBI, 50).toString();
+    assertTrue(backwardCompString, backwardCompString.contains("fragAlign=0.0"));
+    backwardCompString = LengthGoalBreakIterator.createMinLength(baseBI, 50).toString();
+    assertTrue(backwardCompString, backwardCompString.contains("fragAlign=0.0"));
+  }

  public void testTargetLen() throws IOException {
    // "goal" means target length goal to find closest break

    // at first word:
    Query query = query("aa");
-    assertEquals("almost two sent",
-        "<b>Aa</b> bb.", highlightClosestToLen(CONTENT, query, 9));
-    assertEquals( "barely two sent",
-        "<b>Aa</b> bb. Cc dd.", highlightClosestToLen(CONTENT, query, 10));
-    assertEquals("long goal",
-        "<b>Aa</b> bb. Cc dd. Ee ff", highlightClosestToLen(CONTENT, query, 17 + random().nextInt(20)));
-
+    assertEquals("almost two sent A",
+        "<b>Aa</b> bb.", highlightClosestToLen(CONTENT, query, 7, 0.f));
+    assertEquals("almost two sent B",
+        "<b>Aa</b> bb.", highlightClosestToLen(CONTENT, query, 15, 0.5f));
+    assertEquals("almost two sent C",
+        "<b>Aa</b> bb.", highlightClosestToLen(CONTENT, query, 64, 1.f));
+    assertEquals("barely two sent A",
+        "<b>Aa</b> bb. Cc dd.", highlightClosestToLen(CONTENT, query, 8, 0.f));
+    assertEquals("barely two sent B",
+        "<b>Aa</b> bb. Cc dd.", highlightClosestToLen(CONTENT, query, 16, 0.5f));
+    assertEquals("long goal A",
+        "<b>Aa</b> bb. Cc dd. Ee ff", highlightClosestToLen(CONTENT, query, 14 + random().nextInt(20), 0.f));
+    assertEquals("long goal B",
+        "<b>Aa</b> bb. Cc dd. Ee ff", highlightClosestToLen(CONTENT, query, 28 + random().nextInt(20), 0.5f));
    // at some word not at start of passage
    query = query("dd");
-    assertEquals("short goal",
-        " Cc <b>dd</b>.", highlightClosestToLen(CONTENT, query, random().nextInt(5)));
-    assertEquals("almost two sent",
-        " Cc <b>dd</b>.", highlightClosestToLen(CONTENT, query, 10));
-    assertEquals("barely two sent",
-        " Cc <b>dd</b>. Ee ff", highlightClosestToLen(CONTENT, query, 11));
-    assertEquals("long goal",
-        " Cc <b>dd</b>. Ee ff", highlightClosestToLen(CONTENT, query, 12 + random().nextInt(20)));
+    for (float align : ALIGNS) {
+      // alignment is not meaningful if fragsize is shorter than or closer to match-fragment boundaries
+      assertEquals("short goal " + align,
+          " Cc <b>dd</b>.", highlightClosestToLen(CONTENT, query, random().nextInt(4), align));
+    }
+    // preceding/following inclusion by alignment parameter
+    assertEquals("barely two sent A",
+        " Cc <b>dd</b>. Ee ff", highlightClosestToLen(CONTENT, query, 11, 0.f));
+    assertEquals("barely two sent B",
+        " Cc <b>dd</b>. Ee ff", highlightClosestToLen(CONTENT, query, 11, 0.5f));
+    assertEquals("barely two sent C",
+        "Aa bb. Cc <b>dd</b>.", highlightClosestToLen(CONTENT, query, 11, 1.f));
+    assertEquals("long goal A",
+        " Cc <b>dd</b>. Ee ff", highlightClosestToLen(CONTENT, query, 17 + random().nextInt(20), 0.f));
+    assertEquals("long goal B",
+        "Aa bb. Cc <b>dd</b>. Ee ff", highlightClosestToLen(CONTENT, query, 17 + random().nextInt(20), 0.5f));
+    assertEquals("long goal C",
+        "Aa bb. Cc <b>dd</b>.", highlightClosestToLen(CONTENT, query, 17 + random().nextInt(20), 1.f));
+
+    query = query("ddxyzee");
+    assertEquals("test fragment search from the middle of the match; almost including",
+        "<b>ddxyzee</b> ", highlightClosestToLen(CONTENT3, query, 7, 0.5f, 1, ' '));
+    assertEquals("test fragment search from the middle of the match; barely including",
+        "bbcc <b>ddxyzee</b> ffgg ", highlightClosestToLen(CONTENT3, query, 14, 0.5f, 1, ' '));
  }

  public void testMinLen() throws IOException {
    // minLen mode is simpler than targetLen... just test a few cases

    Query query = query("dd");
-    assertEquals("almost two sent",
-        " Cc <b>dd</b>.", highlightMinLen(CONTENT, query, 6));
-    assertEquals("barely two sent",
-        " Cc <b>dd</b>. Ee ff", highlightMinLen(CONTENT, query, 7));
+    assertEquals("almost two sent A",
+        " Cc <b>dd</b>.", highlightMinLen(CONTENT, query, 0, 0.f));
+    assertEquals("almost two sent B",
+        " Cc <b>dd</b>.", highlightMinLen(CONTENT, query, 1, 0.5f));
+    assertEquals("almost two sent C",
+        " Cc <b>dd</b>.", highlightMinLen(CONTENT, query, 5, 1.f));
+
+    assertEquals("barely two sent A",
+        " Cc <b>dd</b>. Ee ff", highlightMinLen(CONTENT, query, 1, 0.f));
+    assertEquals("barely two sent B",
+        " Cc <b>dd</b>. Ee ff", highlightMinLen(CONTENT, query, 2, 0.5f));
+    assertEquals("barely two sent C",
+        "Aa bb. Cc <b>dd</b>.", highlightMinLen(CONTENT, query, 7, 1.f));
+    assertEquals("barely two sent D/a",
+        " Cc <b>dd</b>.", highlightMinLen(CONTENT, query, 2, 0.55f));
+    assertEquals("barely two sent D/b",
+        " Cc <b>dd</b>. Ee ff", highlightMinLen(CONTENT, query, 3, 0.55f));
+    assertEquals("barely two sent E/a",
+        " Cc <b>dd</b>. Ee ff", highlightMinLen(CONTENT, query, 10, 0.5f));
+    assertEquals("barely two sent E/b",
+        "Aa bb. Cc <b>dd</b>. Ee ff", highlightMinLen(CONTENT, query, 10, 0.7f));
+    assertEquals("barely two sent E/c",
+        "Aa bb. Cc <b>dd</b>.", highlightMinLen(CONTENT, query, 9, 0.9f));
+
+    query = query("ddxyzee");
+    assertEquals("test fragment search from the middle of the match; almost including",
+        "<b>ddxyzee</b> ", highlightMinLen(CONTENT3, query, 7, 0.5f, ' '));
+    assertEquals("test fragment search from the middle of the match; barely including",
+        "bbcc <b>ddxyzee</b> ffgg ", highlightMinLen(CONTENT3, query, 8, 0.5f, ' '));
+  }
+
+  public void testMinLenPrecision() throws IOException {
+    Query query = query("x");
+    assertEquals("test absolute minimal length",
+        "<b>X</b> ", highlightMinLen(CONTENT2, query, 1, 0.5f, ' '));
+    assertEquals("test slightly above minimal length",
+        "dd <b>X</b> Ee ", highlightMinLen(CONTENT2, query, 4, 0.5f, ' '));
  }

  public void testDefaultSummaryTargetLen() throws IOException {
    Query query = query("zz");
-    assertEquals("Aa bb.",
-        highlightClosestToLen(CONTENT, query, random().nextInt(10))); // < 10
+    for (float align : ALIGNS) { // alignment is not used for creating default-summary
+      assertEquals("Aa bb.",
+          highlightClosestToLen(CONTENT, query, 6 + random().nextInt(4), align));
+      assertEquals("Aa bb. Cc dd.",
+          highlightClosestToLen(CONTENT, query, 12 + random().nextInt(4), align));
+      assertEquals("Aa bb. Cc dd. Ee ff",
+          highlightClosestToLen(CONTENT, query, 17 + random().nextInt(20), align));
+    }
    assertEquals("Aa bb. Cc dd.",
-        highlightClosestToLen(CONTENT, query, 10 + 6)); // cusp of adding 3rd sentence
-    assertEquals("Aa bb. Cc dd. Ee ff",
-        highlightClosestToLen(CONTENT, query, 17 + random().nextInt(20))); // >= 14
+        highlightClosestToLen(CONTENT, query, 6 + random().nextInt(4), 0.f, 2));
  }

  private Query query(String qStr) {
    return new QueryBuilder(analyzer).createBooleanQuery(FIELD, qStr);
  }

-  private String highlightClosestToLen(String content, Query query, int lengthGoal) throws IOException {
-    UnifiedHighlighter highlighter = new UnifiedHighlighter(null, analyzer);
-    highlighter.setBreakIterator(() -> LengthGoalBreakIterator.createClosestToLength(new CustomSeparatorBreakIterator('.'), lengthGoal));
-    return highlighter.highlightWithoutSearcher(FIELD, query, content, 1).toString();
+  private String highlightClosestToLen(String content, Query query, int lengthGoal, float fragAlign) throws IOException {
+    return highlightClosestToLen(content, query, lengthGoal, fragAlign, 1);
  }

-  private String highlightMinLen(String content, Query query, int lengthGoal) throws IOException {
+  private String highlightClosestToLen(String content, Query query, int lengthGoal, float fragAlign, int maxPassages) throws IOException {
+    return highlightClosestToLen(content, query, lengthGoal, fragAlign, maxPassages, '.');
+  }
+
+  private String highlightClosestToLen(String content, Query query, int lengthGoal, float fragAlign, int maxPassages, char separator) throws IOException {
+    UnifiedHighlighter highlighter = new UnifiedHighlighter(null, analyzer);
+    highlighter.setBreakIterator(() -> LengthGoalBreakIterator.createClosestToLength(new CustomSeparatorBreakIterator(separator), lengthGoal, fragAlign));
+    return highlighter.highlightWithoutSearcher(FIELD, query, content, maxPassages).toString();
+  }
+
+  private String highlightMinLen(String content, Query query, int lengthGoal, float fragAlign) throws IOException {
+    return highlightMinLen(content, query, lengthGoal, fragAlign, '.');
+  }
+
+  private String highlightMinLen(String content, Query query, int lengthGoal, float fragAlign, char separator) throws IOException {
    // differs from above only by "createMinLength"
    UnifiedHighlighter highlighter = new UnifiedHighlighter(null, analyzer);
-    highlighter.setBreakIterator(() -> LengthGoalBreakIterator.createMinLength(new CustomSeparatorBreakIterator('.'), lengthGoal));
+    highlighter.setBreakIterator(() -> LengthGoalBreakIterator.createMinLength(new CustomSeparatorBreakIterator(separator), lengthGoal, fragAlign));
    return highlighter.highlightWithoutSearcher(FIELD, query, content, 1).toString();
  }
 }
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -184,6 +184,10 @@ Improvements

 * SOLR-14129: Reuse Jackson ObjectMapper in AuditLoggerPlugin (janhoy)

+* LUCENE-9093: The Unified highlighter has two new passage sizing parameters, hl.fragAlignRatio and
+  hl.fragsizeIsMinimum, with defaults that aim to better center matches in fragments than previously. See the ref guide.
+  Regardless of the settings, the passages may be sized differently than before. (Nándor Mátravölgyi, David Smiley)
+
 Optimizations
 ---------------------
 (No changes)
--- a/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java
+++ b/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java
@ -326,8 +326,11 @@ public class UnifiedSolrHighlighter extends SolrHighlighter implements PluginInf
      if (fragsize <= 1) { // no real minimum size
        return baseBI;
      }
-      return LengthGoalBreakIterator.createMinLength(baseBI, fragsize);
-      // TODO option for using createClosestToLength()
+      float fragalign = params.getFieldFloat(field, HighlightParams.FRAGALIGNRATIO, 0.5f);
+      if (params.getFieldBool(field, HighlightParams.FRAGSIZEISMINIMUM, false)) {
+        return LengthGoalBreakIterator.createMinLength(baseBI, fragsize, fragalign);
+      }
+      return LengthGoalBreakIterator.createClosestToLength(baseBI, fragsize, fragalign);
    }

    /**
--- a/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java
+++ b/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java
@ -251,8 +251,17 @@ public class TestUnifiedSolrHighlighter extends SolrTestCaseJ4 {
        req("q", "text:document", "sort", "id asc", "hl", "true", "hl.bs.type", "SEPARATOR","hl.bs.separator","#","hl.fragsize", "-1"),
        "//lst[@name='highlighting']/lst[@name='104']/arr[@name='text']/str='While the other <em>document</em> contains the same #'");

+    assertQ("CUSTOM breakiterator with fragsize 70 minimum",
+        req("q", "text:document", "sort", "id asc", "hl", "true", "hl.bs.type", "SEPARATOR","hl.bs.separator","#","hl.fragsize", "70", "hl.fragsizeIsMinimum", "true"),
+        "//lst[@name='highlighting']/lst[@name='103']/arr[@name='text']/str='This <em>document</em> contains # special characters, while the other <em>document</em> contains the same #'");
    assertQ("CUSTOM breakiterator with fragsize 70",
        req("q", "text:document", "sort", "id asc", "hl", "true", "hl.bs.type", "SEPARATOR","hl.bs.separator","#","hl.fragsize", "70"),
+        "//lst[@name='highlighting']/lst[@name='103']/arr[@name='text']/str='This <em>document</em> contains #'");
+    assertQ("CUSTOM breakiterator with fragsize 90",
+        req("q", "text:document", "sort", "id asc", "hl", "true", "hl.bs.type", "SEPARATOR","hl.bs.separator","#","hl.fragsize", "90"),
+        "//lst[@name='highlighting']/lst[@name='103']/arr[@name='text']/str='This <em>document</em> contains #'");
+    assertQ("CUSTOM breakiterator with fragsize 100",
+        req("q", "text:document", "sort", "id asc", "hl", "true", "hl.bs.type", "SEPARATOR","hl.bs.separator","#","hl.fragsize", "100"),
        "//lst[@name='highlighting']/lst[@name='103']/arr[@name='text']/str='This <em>document</em> contains # special characters, while the other <em>document</em> contains the same #'");
  }

@ -262,11 +271,17 @@ public class TestUnifiedSolrHighlighter extends SolrTestCaseJ4 {
    assertU(adoc("id", "10", "text", "This is a sentence just under seventy chars in length blah blah. Next sentence is here."));
    assertU(commit());
    assertQ("default fragsize",
-        req("q", "text:seventy", "hl", "true"),
+        req("q", "text:seventy", "hl", "true", "hl.fragsizeIsMinimum", "true"),
        "//lst[@name='highlighting']/lst[@name='10']/arr[@name='text']/str='This is a sentence just under <em>seventy</em> chars in length blah blah. Next sentence is here.'");
-    assertQ("smaller fragsize",
-        req("q", "text:seventy", "hl", "true", "hl.fragsize", "60"), // a bit smaller
+    assertQ("default fragsize",
+        req("q", "text:seventy", "hl", "true", "hl.fragsizeIsMinimum", "true", "hl.fragsize", "60"),
        "//lst[@name='highlighting']/lst[@name='10']/arr[@name='text']/str='This is a sentence just under <em>seventy</em> chars in length blah blah. '");
+    assertQ("smaller fragsize",
+        req("q", "text:seventy", "hl", "true"),
+        "//lst[@name='highlighting']/lst[@name='10']/arr[@name='text']/str='This is a sentence just under <em>seventy</em> chars in length blah blah. '");
+    assertQ("default fragsize",
+        req("q", "text:seventy", "hl", "true", "hl.fragsize", "90"),
+        "//lst[@name='highlighting']/lst[@name='10']/arr[@name='text']/str='This is a sentence just under <em>seventy</em> chars in length blah blah. Next sentence is here.'");
  }
  
  public void testEncoder() {
--- a/solr/solr-ref-guide/src/highlighting.adoc
+++ b/solr/solr-ref-guide/src/highlighting.adoc
@ -225,6 +225,14 @@ By default, the Unified Highlighter will usually pick the right offset source (s
 +
 The offset source can be explicitly configured to one of: `ANALYSIS`, `POSTINGS`, `POSTINGS_WITH_TERM_VECTORS`, or `TERM_VECTORS`.

+`hl.fragAlignRatio`::
+Fragment alignment can influence where the match in a passage is positioned. This floating point value is used to break the `hl.fragsize` around the match. The default value of `0.5` means to align the match to the middle. A value of `0.0` would mean to align the match to the left, while `1.0` to align it to the right. _Note: there are situations where the requested alignment is not plausible. This depends on the length of the match, the used breakiterator and the text content around the match._
+
+`hl.fragsizeIsMinimum`::
+By default this parameter is `false`. In this case the highlighter will accept a shorter fragment than `hl.fragsize`, if that is closer to the target than what the next longer choice would be.
+
+When treating `hl.fragsize` as a strict minimum length is acceptable, turning this on has some performance benefits to consider.
+
 `hl.tag.ellipsis`::
 By default, each snippet is returned as a separate value (as is done with the other highlighters). Set this parameter to instead return one string with this text as the delimiter. _Note: this is likely to be removed in the future._

--- a/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java
+++ b/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java
@ -47,6 +47,8 @@ public interface HighlightParams {

  // sizing
  public static final String FRAGSIZE    = HIGHLIGHT+".fragsize"; // OH, FVH, UH
+  public static final String FRAGSIZEISMINIMUM = HIGHLIGHT+".fragsizeIsMinimum"; // UH
+  public static final String FRAGALIGNRATIO = HIGHLIGHT+".fragAlignRatio"; // UH
  public static final String FRAGMENTER  = HIGHLIGHT+".fragmenter"; // OH
  public static final String INCREMENT   = HIGHLIGHT+".increment"; // OH
  public static final String REGEX       = "regex"; // OH