From 361bfdcaa5f79e531f4eba2359cf1f967606c8e6 Mon Sep 17 00:00:00 2001 From: Suneet Saldanha Date: Wed, 4 Aug 2021 19:44:54 -0400 Subject: [PATCH] Better logging for lookups (#11539) * Better logging for lookups The default pollPeriod of 0 means that lookups are loaded once only at startup Add a warning message to warn operators about this. I suspect that most operators using jdbc or uri would expect eventual consistency with the source of the lookups if using jdbc or uri. So make this a warning to make it easier to debug if an operator notices a data inconsistency issue. * oops --- .../namespace/JdbcExtractionNamespace.java | 19 +++++++++++++++---- .../namespace/UriExtractionNamespace.java | 13 ++++++++++++- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/extensions-core/lookups-cached-global/src/main/java/org/apache/druid/query/lookup/namespace/JdbcExtractionNamespace.java b/extensions-core/lookups-cached-global/src/main/java/org/apache/druid/query/lookup/namespace/JdbcExtractionNamespace.java index 57ba41ea190..71f9847a18c 100644 --- a/extensions-core/lookups-cached-global/src/main/java/org/apache/druid/query/lookup/namespace/JdbcExtractionNamespace.java +++ b/extensions-core/lookups-cached-global/src/main/java/org/apache/druid/query/lookup/namespace/JdbcExtractionNamespace.java @@ -24,6 +24,7 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeName; import com.google.common.base.Preconditions; +import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.metadata.MetadataStorageConnectorConfig; import org.apache.druid.server.initialization.JdbcAccessSecurityConfig; import org.apache.druid.utils.ConnectionUriUtils; @@ -40,6 +41,8 @@ import java.util.Objects; @JsonTypeName("jdbc") public class JdbcExtractionNamespace implements ExtractionNamespace { + private static final Logger LOG = new Logger(JdbcExtractionNamespace.class); + @JsonProperty private final MetadataStorageConnectorConfig connectorConfig; @JsonProperty @@ -62,9 +65,9 @@ public class JdbcExtractionNamespace implements ExtractionNamespace @NotNull @JsonProperty(value = "table", required = true) final String table, @NotNull @JsonProperty(value = "keyColumn", required = true) final String keyColumn, @NotNull @JsonProperty(value = "valueColumn", required = true) final String valueColumn, - @JsonProperty(value = "tsColumn", required = false) @Nullable final String tsColumn, - @JsonProperty(value = "filter", required = false) @Nullable final String filter, - @Min(0) @JsonProperty(value = "pollPeriod", required = false) @Nullable final Period pollPeriod, + @JsonProperty(value = "tsColumn") @Nullable final String tsColumn, + @JsonProperty(value = "filter") @Nullable final String filter, + @Min(0) @JsonProperty(value = "pollPeriod") @Nullable final Period pollPeriod, @JacksonInject JdbcAccessSecurityConfig securityConfig ) { @@ -78,7 +81,15 @@ public class JdbcExtractionNamespace implements ExtractionNamespace this.valueColumn = Preconditions.checkNotNull(valueColumn, "valueColumn"); this.tsColumn = tsColumn; this.filter = filter; - this.pollPeriod = pollPeriod == null ? new Period(0L) : pollPeriod; + if (pollPeriod == null) { + // Warning because if JdbcExtractionNamespace is being used for lookups, any updates to the database will not + // be picked up after the node starts. So for use casses where nodes start at different times (like streaming + // ingestion with peons) there can be data inconsistencies across the cluster. + LOG.warn("No pollPeriod configured for JdbcExtractionNamespace - entries will be loaded only once at startup"); + this.pollPeriod = new Period(0L); + } else { + this.pollPeriod = pollPeriod; + } } /** diff --git a/extensions-core/lookups-cached-global/src/main/java/org/apache/druid/query/lookup/namespace/UriExtractionNamespace.java b/extensions-core/lookups-cached-global/src/main/java/org/apache/druid/query/lookup/namespace/UriExtractionNamespace.java index f454d918271..a3a7d5f8643 100644 --- a/extensions-core/lookups-cached-global/src/main/java/org/apache/druid/query/lookup/namespace/UriExtractionNamespace.java +++ b/extensions-core/lookups-cached-global/src/main/java/org/apache/druid/query/lookup/namespace/UriExtractionNamespace.java @@ -37,6 +37,7 @@ import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.UOE; import org.apache.druid.java.util.common.jackson.JacksonUtils; +import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.java.util.common.parsers.CSVParser; import org.apache.druid.java.util.common.parsers.DelimitedParser; import org.apache.druid.java.util.common.parsers.JSONPathFieldSpec; @@ -64,6 +65,8 @@ import java.util.regex.PatternSyntaxException; @JsonTypeName("uri") public class UriExtractionNamespace implements ExtractionNamespace { + private static final Logger LOG = new Logger(UriExtractionNamespace.class); + @JsonProperty private final URI uri; @JsonProperty @@ -98,7 +101,15 @@ public class UriExtractionNamespace implements ExtractionNamespace throw new IAE("Either uri xor uriPrefix required"); } this.namespaceParseSpec = Preconditions.checkNotNull(namespaceParseSpec, "namespaceParseSpec"); - this.pollPeriod = pollPeriod == null ? Period.ZERO : pollPeriod; + if (pollPeriod == null) { + // Warning because if UriExtractionNamespace is being used for lookups, any updates to the database will not + // be picked up after the node starts. So for use casses where nodes start at different times (like streaming + // ingestion with peons) there can be data inconsistencies across the cluster. + LOG.warn("No pollPeriod configured for UriExtractionNamespace - entries will be loaded only once at startup"); + this.pollPeriod = Period.ZERO; + } else { + this.pollPeriod = pollPeriod; + } this.fileRegex = fileRegex == null ? versionRegex : fileRegex; if (fileRegex != null && versionRegex != null) { throw new IAE("Cannot specify both versionRegex and fileRegex. versionRegex is deprecated");