NIFI-12238 Fix SplitText endline trimming with max fragment size (#7892)

This commit is contained in:
Gabor Gyimesi 2023-10-18 18:40:52 +02:00 committed by GitHub
parent 574c2b2168
commit 91e4b453b4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 34 additions and 13 deletions

View File

@ -459,13 +459,6 @@ public class SplitText extends AbstractProcessor {
while ((offsetInfo = demarcator.nextOffsetInfo()) != null) {
lastCrlfLength = offsetInfo.getCrlfLength();
if (offsetInfo.getLength() == offsetInfo.getCrlfLength()) {
trailingCrlfLength += offsetInfo.getCrlfLength();
trailingLineCount++;
} else if (offsetInfo.getLength() > offsetInfo.getCrlfLength()) {
trailingCrlfLength = 0; // non-empty line came in, thus resetting counter
}
if (length + offsetInfo.getLength() + startingLength > this.maxSplitSize) {
if (length == 0) { // single line per split
length += offsetInfo.getLength();
@ -474,12 +467,19 @@ public class SplitText extends AbstractProcessor {
remaningOffsetInfo = offsetInfo;
}
break;
} else {
length += offsetInfo.getLength();
actualLineCount++;
if (splitMaxLineCount > 0 && actualLineCount >= splitMaxLineCount) {
break;
}
}
if (offsetInfo.getLength() == offsetInfo.getCrlfLength()) {
trailingCrlfLength += offsetInfo.getCrlfLength();
trailingLineCount++;
} else if (offsetInfo.getLength() > offsetInfo.getCrlfLength()) {
trailingCrlfLength = 0; // non-empty line came in, thus resetting counter
}
length += offsetInfo.getLength();
actualLineCount++;
if (splitMaxLineCount > 0 && actualLineCount >= splitMaxLineCount) {
break;
}
}

View File

@ -890,4 +890,25 @@ public class TestSplitText {
splits.get(1).assertContentEquals("\n");
}
@Test
public void testMaxFragmentSizeWithTrimmedEndlines() {
final TestRunner splitRunner = TestRunners.newTestRunner(new SplitText());
splitRunner.setProperty(SplitText.HEADER_LINE_COUNT, "2");
splitRunner.setProperty(SplitText.LINE_SPLIT_COUNT, "0");
splitRunner.setProperty(SplitText.FRAGMENT_MAX_SIZE, "30 B");
splitRunner.setProperty(SplitText.REMOVE_TRAILING_NEWLINES, "true");
splitRunner.enqueue("header1\nheader2\nline1 longer than limit\nline2\nline3\n\n\n\n\n");
splitRunner.run();
splitRunner.assertTransferCount(SplitText.REL_SPLITS, 3);
splitRunner.assertTransferCount(SplitText.REL_ORIGINAL, 1);
splitRunner.assertTransferCount(SplitText.REL_FAILURE, 0);
final List<MockFlowFile> splits = splitRunner.getFlowFilesForRelationship(SplitText.REL_SPLITS);
splits.get(0).assertContentEquals("header1\nheader2\nline1 longer than limit");
splits.get(1).assertContentEquals("header1\nheader2\nline2\nline3");
splits.get(2).assertContentEquals("header1\nheader2");
}
}