NIFI-11430 - PaginatedJsonQueryElasticsearch processors should not output empty FlowFile if hits have been found; PaginatedJsonQueryElasticsearch processors should be able to use _source and _meta only result formats when grouping by query

This closes #7163

Signed-off-by: Chris Sampson <chris.sampson82@gmail.com>
This commit is contained in:
Ryan Van Den Bos 2023-04-12 10:21:57 +01:00 committed by Chris Sampson
parent f7e36a07ac
commit 658f2547d8
No known key found for this signature in database
GPG Key ID: 546AEB0826587237
7 changed files with 199 additions and 105 deletions

View File

@ -367,7 +367,7 @@ public abstract class AbstractJsonQueryElasticsearch<Q extends JsonQueryParamete
} }
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
private List<Map<String, Object>> formatHits(final List<Map<String, Object>> hits) { List<Map<String, Object>> formatHits(final List<Map<String, Object>> hits) {
final List<Map<String, Object>> formattedHits; final List<Map<String, Object>> formattedHits;
if (hitFormat == SearchResultsFormat.METADATA_ONLY) { if (hitFormat == SearchResultsFormat.METADATA_ONLY) {

View File

@ -75,6 +75,7 @@ public abstract class AbstractPaginatedJsonQueryElasticsearch extends AbstractJs
.build(); .build();
static final List<PropertyDescriptor> paginatedPropertyDescriptors; static final List<PropertyDescriptor> paginatedPropertyDescriptors;
static { static {
final List<PropertyDescriptor> descriptors = new ArrayList<>(); final List<PropertyDescriptor> descriptors = new ArrayList<>();
descriptors.add(QUERY_ATTRIBUTE); descriptors.add(QUERY_ATTRIBUTE);
@ -239,7 +240,7 @@ public abstract class AbstractPaginatedJsonQueryElasticsearch extends AbstractJs
private void combineHits(final List<Map<String, Object>> hits, final PaginatedJsonQueryParameters paginatedJsonQueryParameters, private void combineHits(final List<Map<String, Object>> hits, final PaginatedJsonQueryParameters paginatedJsonQueryParameters,
final ProcessSession session, final FlowFile parent, final ProcessSession session, final FlowFile parent,
final Map<String, String> attributes, final List<FlowFile> hitsFlowFiles) { final Map<String, String> attributes, final List<FlowFile> hitsFlowFiles, final boolean newQuery) {
if (hits != null && !hits.isEmpty()) { if (hits != null && !hits.isEmpty()) {
final FlowFile hitFlowFile; final FlowFile hitFlowFile;
final boolean append = !hitsFlowFiles.isEmpty(); final boolean append = !hitsFlowFiles.isEmpty();
@ -251,7 +252,7 @@ public abstract class AbstractPaginatedJsonQueryElasticsearch extends AbstractJs
hitsFlowFiles.add(writeCombinedHitFlowFile(paginatedJsonQueryParameters.getHitCount() + hits.size(), hitsFlowFiles.add(writeCombinedHitFlowFile(paginatedJsonQueryParameters.getHitCount() + hits.size(),
hits, session, hitFlowFile, attributes, append)); hits, session, hitFlowFile, attributes, append));
} else if (isOutputNoHits()) { } else if (isOutputNoHits() && newQuery) {
final FlowFile hitFlowFile = createChildFlowFile(session, parent); final FlowFile hitFlowFile = createChildFlowFile(session, parent);
hitsFlowFiles.add(writeHitFlowFile(0, "", session, hitFlowFile, attributes)); hitsFlowFiles.add(writeHitFlowFile(0, "", session, hitFlowFile, attributes));
} }
@ -271,7 +272,9 @@ public abstract class AbstractPaginatedJsonQueryElasticsearch extends AbstractJs
attributes.put("page.number", Integer.toString(paginatedJsonQueryParameters.getPageCount())); attributes.put("page.number", Integer.toString(paginatedJsonQueryParameters.getPageCount()));
if (hitStrategy == ResultOutputStrategy.PER_QUERY) { if (hitStrategy == ResultOutputStrategy.PER_QUERY) {
combineHits(hits, paginatedJsonQueryParameters, session, parent, attributes, hitsFlowFiles);
final List<Map<String, Object>> formattedHits = formatHits(hits);
combineHits(formattedHits, paginatedJsonQueryParameters, session, parent, attributes, hitsFlowFiles, newQuery);
// output results if it seems we've combined all available results (i.e. no hits in this page and therefore no more expected) // output results if it seems we've combined all available results (i.e. no hits in this page and therefore no more expected)
if (!hitsFlowFiles.isEmpty() && (hits == null || hits.isEmpty())) { if (!hitsFlowFiles.isEmpty() && (hits == null || hits.isEmpty())) {

View File

@ -42,9 +42,8 @@ import static org.junit.jupiter.api.Assertions.assertInstanceOf
import static org.junit.jupiter.api.Assertions.assertNotNull import static org.junit.jupiter.api.Assertions.assertNotNull
import static org.junit.jupiter.api.Assertions.assertThrows import static org.junit.jupiter.api.Assertions.assertThrows
import static org.junit.jupiter.api.Assertions.assertTrue import static org.junit.jupiter.api.Assertions.assertTrue
abstract class AbstractJsonQueryElasticsearchTest<P extends AbstractJsonQueryElasticsearch> { abstract class AbstractJsonQueryElasticsearchTest<P extends AbstractJsonQueryElasticsearch> {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
static final String INDEX_NAME = "messages" static final String INDEX_NAME = "messages"
@ -101,8 +100,9 @@ abstract class AbstractJsonQueryElasticsearchTest<P extends AbstractJsonQueryEla
.stream().map(r -> r.getValue()) .stream().map(r -> r.getValue())
.collect(Collectors.joining(", ")) .collect(Collectors.joining(", "))
final String expectedAllowedSplitHits = processor instanceof AbstractPaginatedJsonQueryElasticsearch final String expectedAllowedSplitHits = processor instanceof AbstractPaginatedJsonQueryElasticsearch
? ResultOutputStrategy.values().collect {r -> r.getValue()}.join(", ") ? ResultOutputStrategy.values().collect { r -> r.getValue() }.join(", ")
: nonPaginatedResultOutputStrategies : ResultOutputStrategy.getNonPaginatedResponseOutputStrategies().stream()
.map(r -> r.getValue()).collect(Collectors.joining(", "))
final AssertionError assertionError = assertThrows(AssertionError.class, runner.&run) final AssertionError assertionError = assertThrows(AssertionError.class, runner.&run)
assertThat(assertionError.getMessage(), equalTo(String.format("Processor has 8 validation failures:\n" + assertThat(assertionError.getMessage(), equalTo(String.format("Processor has 8 validation failures:\n" +
@ -130,7 +130,7 @@ abstract class AbstractJsonQueryElasticsearchTest<P extends AbstractJsonQueryEla
void testBasicQuery() throws Exception { void testBasicQuery() throws Exception {
// test hits (no splitting) - full hit format // test hits (no splitting) - full hit format
final TestRunner runner = createRunner(false) final TestRunner runner = createRunner(false)
runner.setProperty(AbstractJsonQueryElasticsearch.QUERY, prettyPrint(toJson([query: [ match_all: [:] ]]))) runner.setProperty(AbstractJsonQueryElasticsearch.QUERY, prettyPrint(toJson([query: [match_all: [:]]])))
runner.setProperty(AbstractJsonQueryElasticsearch.SEARCH_RESULTS_FORMAT, SearchResultsFormat.FULL.getValue()) runner.setProperty(AbstractJsonQueryElasticsearch.SEARCH_RESULTS_FORMAT, SearchResultsFormat.FULL.getValue())
runOnce(runner) runOnce(runner)
testCounts(runner, isInput() ? 1 : 0, 1, 0, 0) testCounts(runner, isInput() ? 1 : 0, 1, 0, 0)
@ -139,7 +139,7 @@ abstract class AbstractJsonQueryElasticsearchTest<P extends AbstractJsonQueryEla
assertOutputContent(hits.getContent(), 10, false) assertOutputContent(hits.getContent(), 10, false)
final List<Map<String, Object>> result = OBJECT_MAPPER.readValue(hits.getContent(), List.class) final List<Map<String, Object>> result = OBJECT_MAPPER.readValue(hits.getContent(), List.class)
result.forEach({ hit -> result.forEach({ hit ->
final Map<String, Object> h = ((Map<String, Object>)hit) final Map<String, Object> h = ((Map<String, Object>) hit)
assertFalse(h.isEmpty()) assertFalse(h.isEmpty())
assertTrue(h.containsKey("_source")) assertTrue(h.containsKey("_source"))
assertTrue(h.containsKey("_index")) assertTrue(h.containsKey("_index"))
@ -211,14 +211,13 @@ abstract class AbstractJsonQueryElasticsearchTest<P extends AbstractJsonQueryEla
final TestRunner runner = createRunner(false) final TestRunner runner = createRunner(false)
final TestElasticsearchClientService service = getService(runner) final TestElasticsearchClientService service = getService(runner)
service.setMaxPages(0) service.setMaxPages(0)
runner.setProperty(AbstractJsonQueryElasticsearch.QUERY, prettyPrint(toJson([query: [ match_all: [:] ]]))) runner.setProperty(AbstractJsonQueryElasticsearch.QUERY, prettyPrint(toJson([query: [match_all: [:]]])))
runner.setProperty(AbstractJsonQueryElasticsearch.OUTPUT_NO_HITS, "false") runner.setProperty(AbstractJsonQueryElasticsearch.OUTPUT_NO_HITS, "false")
runOnce(runner) runOnce(runner)
testCounts(runner, isInput() ? 1 : 0, 0, 0, 0) testCounts(runner, isInput() ? 1 : 0, 0, 0, 0)
assertThat( assertThat(
runner.getProvenanceEvents().stream().filter({ pe -> runner.getProvenanceEvents().stream().filter({ pe ->
pe.getEventType() == ProvenanceEventType.RECEIVE && pe.getEventType() == ProvenanceEventType.RECEIVE
pe.getAttribute("uuid") == hits.getAttribute("uuid")
}).count(), }).count(),
is(0L) is(0L)
) )
@ -247,8 +246,8 @@ abstract class AbstractJsonQueryElasticsearchTest<P extends AbstractJsonQueryEla
@Test @Test
void testAggregations() throws Exception { void testAggregations() throws Exception {
String query = prettyPrint(toJson([ String query = prettyPrint(toJson([
query: [ match_all: [:] ], query: [match_all: [:]],
aggs: [ term_agg: [ terms: [ field: "msg" ] ], term_agg2: [ terms: [ field: "msg" ] ] ] aggs : [term_agg: [terms: [field: "msg"]], term_agg2: [terms: [field: "msg"]]]
])) ]))
// test aggregations (no splitting) - full aggregation format // test aggregations (no splitting) - full aggregation format
@ -289,7 +288,7 @@ abstract class AbstractJsonQueryElasticsearchTest<P extends AbstractJsonQueryEla
agg.keySet().forEach({ aggName -> agg.keySet().forEach({ aggName ->
final List<Map<String, Object>> termAgg = agg.get(aggName) as List<Map<String, Object>> final List<Map<String, Object>> termAgg = agg.get(aggName) as List<Map<String, Object>>
assertThat(termAgg.size(), is(5)) assertThat(termAgg.size(), is(5))
termAgg.forEach({a -> termAgg.forEach({ a ->
assertTrue(a.containsKey("key")) assertTrue(a.containsKey("key"))
assertTrue(a.containsKey("doc_count")) assertTrue(a.containsKey("doc_count"))
}) })
@ -321,8 +320,8 @@ abstract class AbstractJsonQueryElasticsearchTest<P extends AbstractJsonQueryEla
// test using Expression Language (index, type, query) // test using Expression Language (index, type, query)
query = prettyPrint(toJson([ query = prettyPrint(toJson([
query: [ match_all: [:] ], query: [match_all: [:]],
aggs: [ term_agg: [ terms: [ field: "\${fieldValue}" ] ], term_agg2: [ terms: [ field: "\${fieldValue}" ] ] ] aggs : [term_agg: [terms: [field: "\${fieldValue}"]], term_agg2: [terms: [field: "\${fieldValue}"]]]
])) ]))
runner.setVariable("fieldValue", "msg") runner.setVariable("fieldValue", "msg")
runner.setVariable("es.index", INDEX_NAME) runner.setVariable("es.index", INDEX_NAME)
@ -347,8 +346,8 @@ abstract class AbstractJsonQueryElasticsearchTest<P extends AbstractJsonQueryEla
@Test @Test
void testErrorDuringSearch() throws Exception { void testErrorDuringSearch() throws Exception {
String query = prettyPrint(toJson([ String query = prettyPrint(toJson([
query: [ match_all: [:] ], query: [match_all: [:]],
aggs: [ term_agg: [ terms: [ field: "msg" ] ], term_agg2: [ terms: [ field: "msg" ] ] ] aggs : [term_agg: [terms: [field: "msg"]], term_agg2: [terms: [field: "msg"]]]
])) ]))
final TestRunner runner = createRunner(true) final TestRunner runner = createRunner(true)
@ -361,8 +360,8 @@ abstract class AbstractJsonQueryElasticsearchTest<P extends AbstractJsonQueryEla
@Test @Test
void testQueryAttribute() throws Exception { void testQueryAttribute() throws Exception {
String query = prettyPrint(toJson([ String query = prettyPrint(toJson([
query: [ match_all: [:] ], query: [match_all: [:]],
aggs: [ term_agg: [ terms: [ field: "msg" ] ], term_agg2: [ terms: [ field: "msg" ] ] ] aggs : [term_agg: [terms: [field: "msg"]], term_agg2: [terms: [field: "msg"]]]
])) ]))
final String queryAttr = "es.query" final String queryAttr = "es.query"
@ -384,7 +383,7 @@ abstract class AbstractJsonQueryElasticsearchTest<P extends AbstractJsonQueryEla
@Test @Test
void testInputHandling() { void testInputHandling() {
final TestRunner runner = createRunner(false) final TestRunner runner = createRunner(false)
runner.setProperty(AbstractJsonQueryElasticsearch.QUERY, prettyPrint(toJson([query: [ match_all: [:] ]]))) runner.setProperty(AbstractJsonQueryElasticsearch.QUERY, prettyPrint(toJson([query: [match_all: [:]]])))
runner.setIncomingConnection(true) runner.setIncomingConnection(true)
runner.run() runner.run()
@ -399,7 +398,7 @@ abstract class AbstractJsonQueryElasticsearchTest<P extends AbstractJsonQueryEla
@Test @Test
void testRequestParameters() { void testRequestParameters() {
final TestRunner runner = createRunner(false) final TestRunner runner = createRunner(false)
runner.setProperty(AbstractJsonQueryElasticsearch.QUERY, prettyPrint(toJson([query: [ match_all: [:] ]]))) runner.setProperty(AbstractJsonQueryElasticsearch.QUERY, prettyPrint(toJson([query: [match_all: [:]]])))
runner.setProperty("refresh", "true") runner.setProperty("refresh", "true")
runner.setProperty("slices", '${slices}') runner.setProperty("slices", '${slices}')
runner.setVariable("slices", "auto") runner.setVariable("slices", "auto")

View File

@ -32,7 +32,9 @@ import static groovy.json.JsonOutput.toJson
import static org.hamcrest.CoreMatchers.equalTo import static org.hamcrest.CoreMatchers.equalTo
import static org.hamcrest.CoreMatchers.is import static org.hamcrest.CoreMatchers.is
import static org.hamcrest.MatcherAssert.assertThat import static org.hamcrest.MatcherAssert.assertThat
import static org.junit.jupiter.api.Assertions.assertFalse
import static org.junit.jupiter.api.Assertions.assertThrows import static org.junit.jupiter.api.Assertions.assertThrows
import static org.junit.jupiter.api.Assertions.assertTrue
abstract class AbstractPaginatedJsonQueryElasticsearchTest extends AbstractJsonQueryElasticsearchTest<AbstractPaginatedJsonQueryElasticsearch> { abstract class AbstractPaginatedJsonQueryElasticsearchTest extends AbstractJsonQueryElasticsearchTest<AbstractPaginatedJsonQueryElasticsearch> {
abstract boolean isInput() abstract boolean isInput()
@ -40,7 +42,7 @@ abstract class AbstractPaginatedJsonQueryElasticsearchTest extends AbstractJsonQ
@Test @Test
void testInvalidPaginationProperties() { void testInvalidPaginationProperties() {
final TestRunner runner = createRunner(false) final TestRunner runner = createRunner(false)
runner.setProperty(AbstractJsonQueryElasticsearch.QUERY, prettyPrint(toJson([query: [ match_all: [:] ]]))) runner.setProperty(AbstractJsonQueryElasticsearch.QUERY, prettyPrint(toJson([query: [match_all: [:]]])))
runner.setProperty(AbstractPaginatedJsonQueryElasticsearch.PAGINATION_KEEP_ALIVE, "not-a-period") runner.setProperty(AbstractPaginatedJsonQueryElasticsearch.PAGINATION_KEEP_ALIVE, "not-a-period")
runner.setProperty(AbstractPaginatedJsonQueryElasticsearch.PAGINATION_TYPE, "not-enum") runner.setProperty(AbstractPaginatedJsonQueryElasticsearch.PAGINATION_TYPE, "not-enum")
@ -49,7 +51,7 @@ abstract class AbstractPaginatedJsonQueryElasticsearchTest extends AbstractJsonQ
"'%s' validated against 'not-enum' is invalid because Given value not found in allowed set '%s'\n" + "'%s' validated against 'not-enum' is invalid because Given value not found in allowed set '%s'\n" +
"'%s' validated against 'not-a-period' is invalid because Must be of format <duration> <TimeUnit> where <duration> " + "'%s' validated against 'not-a-period' is invalid because Must be of format <duration> <TimeUnit> where <duration> " +
"is a non-negative integer and TimeUnit is a supported Time Unit, such as: nanos, millis, secs, mins, hrs, days\n", "is a non-negative integer and TimeUnit is a supported Time Unit, such as: nanos, millis, secs, mins, hrs, days\n",
AbstractPaginatedJsonQueryElasticsearch.PAGINATION_TYPE.getName(), PaginationType.values().collect {p -> p.getValue()}.join(", "), AbstractPaginatedJsonQueryElasticsearch.PAGINATION_TYPE.getName(), PaginationType.values().collect { p -> p.getValue() }.join(", "),
AbstractPaginatedJsonQueryElasticsearch.PAGINATION_KEEP_ALIVE.getName(), AbstractPaginatedJsonQueryElasticsearch.PAGINATION_KEEP_ALIVE.getName(),
AbstractPaginatedJsonQueryElasticsearch.PAGINATION_KEEP_ALIVE.getName() AbstractPaginatedJsonQueryElasticsearch.PAGINATION_KEEP_ALIVE.getName()
))) )))
@ -59,7 +61,7 @@ abstract class AbstractPaginatedJsonQueryElasticsearchTest extends AbstractJsonQ
void testSinglePage() { void testSinglePage() {
// paged query hits (no splitting) // paged query hits (no splitting)
final TestRunner runner = createRunner(false) final TestRunner runner = createRunner(false)
runner.setProperty(AbstractJsonQueryElasticsearch.QUERY, prettyPrint(toJson([query: [ match_all: [:] ]]))) runner.setProperty(AbstractJsonQueryElasticsearch.QUERY, prettyPrint(toJson([query: [match_all: [:]]])))
MockFlowFile input = runOnce(runner) MockFlowFile input = runOnce(runner)
testCounts(runner, isInput() ? 1 : 0, 1, 0, 0) testCounts(runner, isInput() ? 1 : 0, 1, 0, 0)
FlowFile hits = runner.getFlowFilesForRelationship(AbstractJsonQueryElasticsearch.REL_HITS).get(0) FlowFile hits = runner.getFlowFilesForRelationship(AbstractJsonQueryElasticsearch.REL_HITS).get(0)
@ -117,13 +119,104 @@ abstract class AbstractPaginatedJsonQueryElasticsearchTest extends AbstractJsonQ
assertSendEvent(runner, input) assertSendEvent(runner, input)
} }
static void assertFormattedResult(final SearchResultsFormat searchResultsFormat, final Map<String, Object> hit) {
assertFalse(hit.isEmpty())
switch(searchResultsFormat) {
case SearchResultsFormat.SOURCE_ONLY:
assertFalse(hit.containsKey("_source"))
assertFalse(hit.containsKey("_index"))
break
case SearchResultsFormat.METADATA_ONLY:
assertFalse(hit.containsKey("_source"))
assertTrue(hit.containsKey("_index"))
break
case SearchResultsFormat.FULL:
assertTrue(hit.containsKey("_source"))
assertTrue(hit.containsKey("_index"))
break
default:
throw new IllegalArgumentException("Unknown SearchResultsFormat value: " + searchResultsFormat.toString())
}
}
private void assertResultsFormat(final TestRunner runner, final ResultOutputStrategy resultOutputStrategy, final SearchResultsFormat searchResultsFormat) {
int flowFileCount
String hitsCount
boolean ndjson = false
switch (resultOutputStrategy) {
case ResultOutputStrategy.PER_QUERY:
flowFileCount = 1
hitsCount = "10"
ndjson = true
break
case ResultOutputStrategy.PER_HIT:
flowFileCount = 10
hitsCount = "1"
break
case ResultOutputStrategy.PER_RESPONSE:
flowFileCount = 1
hitsCount = "10"
break
default:
throw new IllegalArgumentException("Unknown ResultOutputStrategy value: " + resultOutputStrategy.toString())
}
// Test Relationship counts
testCounts(runner, isInput() ? 1 : 0, flowFileCount, 0, 0)
// Per response outputs an array of values
runner.getFlowFilesForRelationship(AbstractJsonQueryElasticsearch.REL_HITS).forEach({ hit ->
hit.assertAttributeEquals("hit.count", hitsCount)
assertOutputContent(hit.getContent(), hitsCount as int, ndjson)
if (ResultOutputStrategy.PER_RESPONSE == resultOutputStrategy) {
OBJECT_MAPPER.readValue(hit.getContent(), ArrayList.class).forEach(h -> {
assertFormattedResult(searchResultsFormat, h as Map<String, Object>)
})
} else {
final Map<String, Object> h = OBJECT_MAPPER.readValue(hit.getContent(), Map.class)
assertFormattedResult(searchResultsFormat, h)
}
assertThat(
runner.getProvenanceEvents().stream().filter({ pe ->
pe.getEventType() == ProvenanceEventType.RECEIVE &&
pe.getAttribute("uuid") == hit.getAttribute("uuid")
}).count(),
is(1L)
)
})
}
@Test
void testResultsFormat() throws Exception {
for (final ResultOutputStrategy resultOutputStrategy : ResultOutputStrategy.values()) {
final TestRunner runner = createRunner(false)
runner.setProperty(AbstractJsonQueryElasticsearch.QUERY, prettyPrint(toJson([query: [match_all: [:]], "sort": [[message: [order: "asc"]]]])))
runner.setProperty(AbstractJsonQueryElasticsearch.SEARCH_RESULTS_SPLIT, resultOutputStrategy.getValue())
// Test against each results format
for (final SearchResultsFormat searchResultsFormat : SearchResultsFormat.values()) {
runner.setProperty(AbstractJsonQueryElasticsearch.SEARCH_RESULTS_FORMAT, searchResultsFormat.getValue())
// Test against each pagination type
for (final PaginationType paginationType : PaginationType.values()) {
runner.setProperty(AbstractPaginatedJsonQueryElasticsearch.PAGINATION_TYPE, paginationType.getValue())
runOnce(runner)
assertResultsFormat(runner, resultOutputStrategy, searchResultsFormat)
reset(runner)
}
}
}
}
@Test @Test
void testScrollError() { void testScrollError() {
final TestRunner runner = createRunner(false) final TestRunner runner = createRunner(false)
final TestElasticsearchClientService service = getService(runner) final TestElasticsearchClientService service = getService(runner)
service.setThrowErrorInDelete(true) service.setThrowErrorInDelete(true)
runner.setProperty(AbstractPaginatedJsonQueryElasticsearch.PAGINATION_TYPE, PaginationType.SCROLL.getValue()) runner.setProperty(AbstractPaginatedJsonQueryElasticsearch.PAGINATION_TYPE, PaginationType.SCROLL.getValue())
runner.setProperty(AbstractJsonQueryElasticsearch.QUERY, prettyPrint(toJson([sort: [ msg: "desc" ], query: [ match_all: [:] ]]))) runner.setProperty(AbstractJsonQueryElasticsearch.QUERY, prettyPrint(toJson([sort: [msg: "desc"], query: [match_all: [:]]])))
// still expect "success" output for exception during final clean-up // still expect "success" output for exception during final clean-up
runMultiple(runner, 2) runMultiple(runner, 2)
@ -147,7 +240,7 @@ abstract class AbstractPaginatedJsonQueryElasticsearchTest extends AbstractJsonQ
runner.setProperty(AbstractJsonQueryElasticsearch.SEARCH_RESULTS_FORMAT, SearchResultsFormat.FULL.getValue()) runner.setProperty(AbstractJsonQueryElasticsearch.SEARCH_RESULTS_FORMAT, SearchResultsFormat.FULL.getValue())
runner.setProperty(AbstractJsonQueryElasticsearch.AGGREGATION_RESULTS_FORMAT, AggregationResultsFormat.FULL.getValue()) runner.setProperty(AbstractJsonQueryElasticsearch.AGGREGATION_RESULTS_FORMAT, AggregationResultsFormat.FULL.getValue())
runner.setProperty(AbstractPaginatedJsonQueryElasticsearch.PAGINATION_TYPE, PaginationType.POINT_IN_TIME.getValue()) runner.setProperty(AbstractPaginatedJsonQueryElasticsearch.PAGINATION_TYPE, PaginationType.POINT_IN_TIME.getValue())
runner.setProperty(AbstractJsonQueryElasticsearch.QUERY, prettyPrint(toJson([sort: [ msg: "desc" ], query: [ match_all: [:] ]]))) runner.setProperty(AbstractJsonQueryElasticsearch.QUERY, prettyPrint(toJson([sort: [msg: "desc"], query: [match_all: [:]]])))
// still expect "success" output for exception during final clean-up // still expect "success" output for exception during final clean-up
runMultiple(runner, 2) runMultiple(runner, 2)
@ -169,7 +262,7 @@ abstract class AbstractPaginatedJsonQueryElasticsearchTest extends AbstractJsonQ
final TestElasticsearchClientService service = getService(runner) final TestElasticsearchClientService service = getService(runner)
service.setThrowErrorInPit(true) service.setThrowErrorInPit(true)
runner.setProperty(AbstractPaginatedJsonQueryElasticsearch.PAGINATION_TYPE, PaginationType.POINT_IN_TIME.getValue()) runner.setProperty(AbstractPaginatedJsonQueryElasticsearch.PAGINATION_TYPE, PaginationType.POINT_IN_TIME.getValue())
runner.setProperty(AbstractJsonQueryElasticsearch.QUERY, prettyPrint(toJson([sort: [ msg: "desc" ], query: [ match_all: [:] ]]))) runner.setProperty(AbstractJsonQueryElasticsearch.QUERY, prettyPrint(toJson([sort: [msg: "desc"], query: [match_all: [:]]])))
// expect "failure" output for exception during query setup // expect "failure" output for exception during query setup
runOnce(runner) runOnce(runner)
@ -190,7 +283,7 @@ abstract class AbstractPaginatedJsonQueryElasticsearchTest extends AbstractJsonQ
// test PiT without sort // test PiT without sort
final TestRunner runner = createRunner(false) final TestRunner runner = createRunner(false)
runner.setProperty(AbstractPaginatedJsonQueryElasticsearch.PAGINATION_TYPE, PaginationType.POINT_IN_TIME.getValue()) runner.setProperty(AbstractPaginatedJsonQueryElasticsearch.PAGINATION_TYPE, PaginationType.POINT_IN_TIME.getValue())
runner.setProperty(AbstractJsonQueryElasticsearch.QUERY, prettyPrint(toJson([query: [ match_all: [:] ]]))) runner.setProperty(AbstractJsonQueryElasticsearch.QUERY, prettyPrint(toJson([query: [match_all: [:]]])))
// expect "failure" output for exception during query setup // expect "failure" output for exception during query setup
runOnce(runner) runOnce(runner)
@ -265,40 +358,25 @@ abstract class AbstractPaginatedJsonQueryElasticsearchTest extends AbstractJsonQ
is(1L) is(1L)
) )
} else { } else {
assertThat(runner.getProvenanceEvents().stream().filter({ pe -> pe.getEventType() == ProvenanceEventType.SEND}).count(), is(0L)) assertThat(runner.getProvenanceEvents().stream().filter({ pe -> pe.getEventType() == ProvenanceEventType.SEND }).count(), is(0L))
} }
} }
@Test @Test
void testNoHitsFlowFileIsProducedForEachResultSplitSetup() { void testEmptyHitsFlowFileIsProducedForEachResultSplitSetup() {
final TestRunner runner = createRunner(false) final TestRunner runner = createRunner(false)
final TestElasticsearchClientService service = getService(runner) final TestElasticsearchClientService service = getService(runner)
runner.setProperty(AbstractJsonQueryElasticsearch.QUERY, prettyPrint(toJson([query: [match_all: [:]]]))) runner.setProperty(AbstractJsonQueryElasticsearch.QUERY, prettyPrint(toJson([query: [match_all: [:]], "sort": [[message: [order: "asc"]]]])))
runner.setProperty(AbstractPaginatedJsonQueryElasticsearch.OUTPUT_NO_HITS, "true") runner.setProperty(AbstractPaginatedJsonQueryElasticsearch.OUTPUT_NO_HITS, "true")
service.setMaxPages(0) service.setMaxPages(0)
for (final PaginationType paginationType : PaginationType.values()) {
runner.setProperty(AbstractPaginatedJsonQueryElasticsearch.PAGINATION_TYPE, paginationType.getValue())
for (final ResultOutputStrategy resultOutputStrategy : ResultOutputStrategy.values()) {
// test that an empty flow file is produced for a per query setup // test that an empty flow file is produced for a per query setup
runner.setProperty(AbstractPaginatedJsonQueryElasticsearch.SEARCH_RESULTS_SPLIT, ResultOutputStrategy.PER_QUERY.getValue()) runner.setProperty(AbstractPaginatedJsonQueryElasticsearch.SEARCH_RESULTS_SPLIT, resultOutputStrategy.getValue())
runOnce(runner)
testCounts(runner, isInput() ? 1 : 0, 1, 0, 0)
runner.getFlowFilesForRelationship(AbstractJsonQueryElasticsearch.REL_HITS).get(0).assertAttributeEquals("hit.count", "0")
runner.getFlowFilesForRelationship(AbstractJsonQueryElasticsearch.REL_HITS).get(0).assertAttributeEquals("page.number", "1")
runner.getFlowFilesForRelationship(AbstractJsonQueryElasticsearch.REL_HITS).get(0).getSize() == 0
reset(runner)
// test that an empty flow file is produced for a per hit setup
runner.setProperty(AbstractPaginatedJsonQueryElasticsearch.SEARCH_RESULTS_SPLIT, ResultOutputStrategy.PER_HIT.getValue())
runOnce(runner)
testCounts(runner, isInput() ? 1 : 0, 1, 0, 0)
runner.getFlowFilesForRelationship(AbstractJsonQueryElasticsearch.REL_HITS).get(0).assertAttributeEquals("hit.count", "0")
runner.getFlowFilesForRelationship(AbstractJsonQueryElasticsearch.REL_HITS).get(0).assertAttributeEquals("page.number", "1")
runner.getFlowFilesForRelationship(AbstractJsonQueryElasticsearch.REL_HITS).get(0).getSize() == 0
reset(runner)
// test that an empty flow file is produced for a per response setup
runner.setProperty(AbstractPaginatedJsonQueryElasticsearch.SEARCH_RESULTS_SPLIT, ResultOutputStrategy.PER_RESPONSE.getValue())
runOnce(runner) runOnce(runner)
testCounts(runner, isInput() ? 1 : 0, 1, 0, 0) testCounts(runner, isInput() ? 1 : 0, 1, 0, 0)
@ -306,5 +384,9 @@ abstract class AbstractPaginatedJsonQueryElasticsearchTest extends AbstractJsonQ
runner.getFlowFilesForRelationship(AbstractJsonQueryElasticsearch.REL_HITS).get(0).assertAttributeEquals("page.number", "1") runner.getFlowFilesForRelationship(AbstractJsonQueryElasticsearch.REL_HITS).get(0).assertAttributeEquals("page.number", "1")
runner.getFlowFilesForRelationship(AbstractJsonQueryElasticsearch.REL_HITS).get(0).getSize() == 0 runner.getFlowFilesForRelationship(AbstractJsonQueryElasticsearch.REL_HITS).get(0).getSize() == 0
reset(runner) reset(runner)
}
}
} }
} }

View File

@ -17,7 +17,6 @@
package org.apache.nifi.processors.elasticsearch package org.apache.nifi.processors.elasticsearch
import org.apache.nifi.processors.elasticsearch.api.PaginationType import org.apache.nifi.processors.elasticsearch.api.PaginationType
import org.apache.nifi.processors.elasticsearch.api.ResultOutputStrategy import org.apache.nifi.processors.elasticsearch.api.ResultOutputStrategy
import org.apache.nifi.util.TestRunner import org.apache.nifi.util.TestRunner
@ -40,15 +39,9 @@ class PaginatedJsonQueryElasticsearchTest extends AbstractPaginatedJsonQueryElas
return true return true
} }
void testPagination(final PaginationType paginationType) { static void validatePagination(final TestRunner runner, final ResultOutputStrategy resultOutputStrategy) {
// test flowfile per page switch (resultOutputStrategy) {
final TestRunner runner = createRunner(false) case ResultOutputStrategy.PER_RESPONSE:
final TestElasticsearchClientService service = getService(runner)
service.setMaxPages(2)
runner.setProperty(AbstractPaginatedJsonQueryElasticsearch.PAGINATION_TYPE, paginationType.getValue())
runner.setProperty(AbstractJsonQueryElasticsearch.QUERY, prettyPrint(toJson([size: 10, sort: [ msg: "desc"], query: [ match_all: [:] ]])))
runOnce(runner)
testCounts(runner, 1, 2, 0, 0) testCounts(runner, 1, 2, 0, 0)
int page = 1 int page = 1
runner.getFlowFilesForRelationship(AbstractJsonQueryElasticsearch.REL_HITS).forEach( runner.getFlowFilesForRelationship(AbstractJsonQueryElasticsearch.REL_HITS).forEach(
@ -57,29 +50,8 @@ class PaginatedJsonQueryElasticsearchTest extends AbstractPaginatedJsonQueryElas
hit.assertAttributeEquals("page.number", Integer.toString(page++)) hit.assertAttributeEquals("page.number", Integer.toString(page++))
} }
) )
runner.getStateManager().assertStateNotSet() break
reset(runner) case ResultOutputStrategy.PER_QUERY:
// test hits splitting
runner.setProperty(AbstractPaginatedJsonQueryElasticsearch.SEARCH_RESULTS_SPLIT, ResultOutputStrategy.PER_HIT.getValue())
runOnce(runner)
testCounts(runner, 1, 20, 0, 0)
int count = 0
runner.getFlowFilesForRelationship(AbstractJsonQueryElasticsearch.REL_HITS).forEach(
{ hit ->
hit.assertAttributeEquals("hit.count", "1")
// 10 hits per page, so first 10 flowfiles should be page.number 1, the rest page.number 2
hit.assertAttributeEquals("page.number", Integer.toString(Math.ceil(++count / 10) as int))
}
)
runner.getStateManager().assertStateNotSet()
reset(runner)
// test hits combined
runner.setProperty(AbstractPaginatedJsonQueryElasticsearch.SEARCH_RESULTS_SPLIT, ResultOutputStrategy.PER_QUERY.getValue())
runOnce(runner)
testCounts(runner, 1, 1, 0, 0) testCounts(runner, 1, 1, 0, 0)
runner.getFlowFilesForRelationship(AbstractJsonQueryElasticsearch.REL_HITS).get(0).assertAttributeEquals("hit.count", "20") runner.getFlowFilesForRelationship(AbstractJsonQueryElasticsearch.REL_HITS).get(0).assertAttributeEquals("hit.count", "20")
// the "last" page.number is used, so 2 here because there were 2 pages of hits // the "last" page.number is used, so 2 here because there were 2 pages of hits
@ -88,6 +60,44 @@ class PaginatedJsonQueryElasticsearchTest extends AbstractPaginatedJsonQueryElas
runner.getFlowFilesForRelationship(AbstractJsonQueryElasticsearch.REL_HITS).get(0).getContent().split("\n").length, runner.getFlowFilesForRelationship(AbstractJsonQueryElasticsearch.REL_HITS).get(0).getContent().split("\n").length,
is(20) is(20)
) )
break
case ResultOutputStrategy.PER_HIT:
testCounts(runner, 1, 20, 0, 0)
int count = 0
runner.getFlowFilesForRelationship(AbstractJsonQueryElasticsearch.REL_HITS).forEach(
{ hit ->
hit.assertAttributeEquals("hit.count", "1")
// 10 hits per page, so first 10 flow files should be page.number 1, the rest page.number 2
hit.assertAttributeEquals("page.number", Integer.toString(Math.ceil(++count / 10) as int))
}
)
break
default:
throw new IllegalArgumentException("Unknown ResultOutputStrategy value: " + resultOutputStrategy)
}
}
void testPagination(final PaginationType paginationType) {
final TestRunner runner = createRunner(false)
final TestElasticsearchClientService service = getService(runner)
service.setMaxPages(2)
runner.setProperty(AbstractJsonQueryElasticsearch.QUERY, prettyPrint(toJson([size: 10, sort: [msg: "desc"], query: [match_all: [:]]])))
for (final ResultOutputStrategy resultOutputStrategy : ResultOutputStrategy.values()) {
runner.setProperty(AbstractPaginatedJsonQueryElasticsearch.SEARCH_RESULTS_SPLIT, resultOutputStrategy.getValue())
runOnce(runner)
validatePagination(runner, resultOutputStrategy)
runner.getStateManager().assertStateNotSet() runner.getStateManager().assertStateNotSet()
reset(runner)
// Check that OUTPUT_NO_HITS true doesn't have any adverse effects on pagination
runner.setProperty(AbstractJsonQueryElasticsearch.OUTPUT_NO_HITS, "true")
runOnce(runner)
validatePagination(runner, resultOutputStrategy)
// Unset OUTPUT_NO_HITS
runner.setProperty(AbstractJsonQueryElasticsearch.OUTPUT_NO_HITS, "false")
reset(runner)
}
} }
} }

View File

@ -51,7 +51,7 @@ import static org.apache.http.auth.AuthScope.ANY;
public abstract class AbstractElasticsearchITBase { public abstract class AbstractElasticsearchITBase {
// default Elasticsearch version should (ideally) match that in the nifi-elasticsearch-bundle#pom.xml for the integration-tests profile // default Elasticsearch version should (ideally) match that in the nifi-elasticsearch-bundle#pom.xml for the integration-tests profile
protected static final DockerImageName IMAGE = DockerImageName protected static final DockerImageName IMAGE = DockerImageName
.parse(System.getProperty("elasticsearch.docker.image", "docker.elastic.co/elasticsearch/elasticsearch:8.7.0")); .parse(System.getProperty("elasticsearch.docker.image", "docker.elastic.co/elasticsearch/elasticsearch:8.7.1"));
protected static final String ELASTIC_USER_PASSWORD = System.getProperty("elasticsearch.elastic_user.password", RandomStringUtils.randomAlphanumeric(10, 20)); protected static final String ELASTIC_USER_PASSWORD = System.getProperty("elasticsearch.elastic_user.password", RandomStringUtils.randomAlphanumeric(10, 20));
private static final int PORT = 9200; private static final int PORT = 9200;
protected static final ElasticsearchContainer ELASTICSEARCH_CONTAINER = new ElasticsearchContainer(IMAGE) protected static final ElasticsearchContainer ELASTICSEARCH_CONTAINER = new ElasticsearchContainer(IMAGE)

View File

@ -101,7 +101,7 @@ language governing permissions and limitations under the License. -->
</activation> </activation>
<properties> <properties>
<!-- also update the default Elasticsearch version in nifi-elasticsearch-test-utils#src/main/java/org/apache/nifi/elasticsearch/integration/AbstractElasticsearchITBase.java--> <!-- also update the default Elasticsearch version in nifi-elasticsearch-test-utils#src/main/java/org/apache/nifi/elasticsearch/integration/AbstractElasticsearchITBase.java-->
<elasticsearch_docker_image>8.7.0</elasticsearch_docker_image> <elasticsearch_docker_image>8.7.1</elasticsearch_docker_image>
<elasticsearch.elastic.password>s3cret</elasticsearch.elastic.password> <elasticsearch.elastic.password>s3cret</elasticsearch.elastic.password>
</properties> </properties>
<build> <build>
@ -132,7 +132,7 @@ language governing permissions and limitations under the License. -->
<profile> <profile>
<id>elasticsearch7</id> <id>elasticsearch7</id>
<properties> <properties>
<elasticsearch_docker_image>7.17.9</elasticsearch_docker_image> <elasticsearch_docker_image>7.17.10</elasticsearch_docker_image>
</properties> </properties>
</profile> </profile>
</profiles> </profiles>