NIFI-12731: Ensure state is updated in GetHBase whenever the session is committed

Signed-off-by: Pierre Villard <pierre.villard.fr@gmail.com>

This closes #8346.
This commit is contained in:
Matt Burgess 2024-02-01 19:04:26 -05:00 committed by Pierre Villard
parent c6f1d771e8
commit 2f42b44efa
No known key found for this signature in database
GPG Key ID: F92A93B30C07C6D5
2 changed files with 44 additions and 46 deletions

View File

@ -140,7 +140,7 @@ public class GetHBase extends AbstractProcessor implements VisibilityFetchSuppor
.build();
private final AtomicReference<ScanResult> lastResult = new AtomicReference<>();
private volatile List<Column> columns = new ArrayList<>();
private final List<Column> columns = new ArrayList<>();
private volatile String previousTable = null;
@Override
@ -201,11 +201,11 @@ public class GetHBase extends AbstractProcessor implements VisibilityFetchSuppor
for (final String column : columns) {
if (column.contains(":")) {
final String[] parts = column.split(":");
final byte[] cf = parts[0].getBytes(Charset.forName("UTF-8"));
final byte[] cq = parts[1].getBytes(Charset.forName("UTF-8"));
final byte[] cf = parts[0].getBytes(StandardCharsets.UTF_8);
final byte[] cq = parts[1].getBytes(StandardCharsets.UTF_8);
this.columns.add(new Column(cf, cq));
} else {
final byte[] cf = column.getBytes(Charset.forName("UTF-8"));
final byte[] cf = column.getBytes(StandardCharsets.UTF_8);
this.columns.add(new Column(cf, null));
}
}
@ -307,11 +307,7 @@ public class GetHBase extends AbstractProcessor implements VisibilityFetchSuppor
final byte[] cellValue = Arrays.copyOfRange(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength() + cell.getValueOffset());
final String rowHash = new String(rowValue, StandardCharsets.UTF_8);
Set<String> cellHashes = cellsMatchingTimestamp.get(rowHash);
if (cellHashes == null) {
cellHashes = new HashSet<>();
cellsMatchingTimestamp.put(rowHash, cellHashes);
}
Set<String> cellHashes = cellsMatchingTimestamp.computeIfAbsent(rowHash, k -> new HashSet<>());
cellHashes.add(new String(cellValue, StandardCharsets.UTF_8));
}
}
@ -336,40 +332,11 @@ public class GetHBase extends AbstractProcessor implements VisibilityFetchSuppor
rowsPulledHolder.set(++rowsPulled);
if (++rowsPulled % getBatchSize() == 0) {
session.commitAsync();
updateStateAndCommit(session, latestTimestampHolder.get(), cellsMatchingTimestamp);
}
});
final ScanResult scanResults = new ScanResult(latestTimestampHolder.get(), cellsMatchingTimestamp);
final ScanResult latestResult = lastResult.get();
if (latestResult == null || scanResults.getTimestamp() > latestResult.getTimestamp()) {
session.setState(scanResults.toFlatMap(), Scope.CLUSTER);
session.commitAsync(() -> updateScanResultsIfNewer(scanResults));
} else if (scanResults.getTimestamp() == latestResult.getTimestamp()) {
final Map<String, Set<String>> combinedResults = new HashMap<>(scanResults.getMatchingCells());
// copy the results of result.getMatchingCells() to combinedResults.
// do a deep copy because the Set may be modified below.
for (final Map.Entry<String, Set<String>> entry : scanResults.getMatchingCells().entrySet()) {
combinedResults.put(entry.getKey(), new HashSet<>(entry.getValue()));
}
// combined the results from 'lastResult'
for (final Map.Entry<String, Set<String>> entry : latestResult.getMatchingCells().entrySet()) {
final Set<String> existing = combinedResults.get(entry.getKey());
if (existing == null) {
combinedResults.put(entry.getKey(), new HashSet<>(entry.getValue()));
} else {
existing.addAll(entry.getValue());
}
}
final ScanResult scanResult = new ScanResult(scanResults.getTimestamp(), combinedResults);
session.setState(scanResult.toFlatMap(), Scope.CLUSTER);
session.commitAsync(() -> updateScanResultsIfNewer(scanResult));
}
updateStateAndCommit(session, latestTimestampHolder.get(), cellsMatchingTimestamp);
} catch (final IOException e) {
getLogger().error("Failed to receive data from HBase due to {}", e);
session.rollback();
@ -380,6 +347,39 @@ public class GetHBase extends AbstractProcessor implements VisibilityFetchSuppor
}
}
private void updateStateAndCommit(final ProcessSession session, final long latestTimestamp, final Map<String, Set<String>> cellsMatchingTimestamp) throws IOException {
final ScanResult scanResults = new ScanResult(latestTimestamp, cellsMatchingTimestamp);
final ScanResult latestResult = lastResult.get();
if (latestResult == null || scanResults.getTimestamp() > latestResult.getTimestamp()) {
session.setState(scanResults.toFlatMap(), Scope.CLUSTER);
session.commitAsync(() -> updateScanResultsIfNewer(scanResults));
} else if (scanResults.getTimestamp() == latestResult.getTimestamp()) {
final Map<String, Set<String>> combinedResults = new HashMap<>(scanResults.getMatchingCells());
// copy the results of result.getMatchingCells() to combinedResults.
// do a deep copy because the Set may be modified below.
for (final Map.Entry<String, Set<String>> entry : scanResults.getMatchingCells().entrySet()) {
combinedResults.put(entry.getKey(), new HashSet<>(entry.getValue()));
}
// combined the results from 'lastResult'
for (final Map.Entry<String, Set<String>> entry : latestResult.getMatchingCells().entrySet()) {
final Set<String> existing = combinedResults.get(entry.getKey());
if (existing == null) {
combinedResults.put(entry.getKey(), new HashSet<>(entry.getValue()));
} else {
existing.addAll(entry.getValue());
}
}
final ScanResult scanResult = new ScanResult(scanResults.getTimestamp(), combinedResults);
session.setState(scanResult.toFlatMap(), Scope.CLUSTER);
session.commitAsync(() -> updateScanResultsIfNewer(scanResult));
}
}
private void updateScanResultsIfNewer(final ScanResult scanResult) {
lastResult.getAndUpdate(current -> (current == null || scanResult.getTimestamp() > current.getTimestamp()) ? scanResult : current);
}
@ -495,11 +495,7 @@ public class GetHBase extends AbstractProcessor implements VisibilityFetchSuppor
final String rowIndex = matcher.group(1);
final String cellIndex = matcher.group(3);
Set<String> cellHashes = rowIndexToMatchingCellHashes.get(rowIndex);
if (cellHashes == null) {
cellHashes = new HashSet<>();
rowIndexToMatchingCellHashes.put(rowIndex, cellHashes);
}
Set<String> cellHashes = rowIndexToMatchingCellHashes.computeIfAbsent(rowIndex, k -> new HashSet<>());
if (cellIndex == null) {
// this provides a Row ID.

View File

@ -16,11 +16,13 @@
*/
package org.apache.nifi.hbase.scan;
import java.io.IOException;
/**
* Handles a single row from an HBase scan.
*/
public interface ResultHandler {
void handle(byte[] row, ResultCell[] resultCells);
void handle(byte[] row, ResultCell[] resultCells) throws IOException;
}