Better logging for lookups (#11539)

* Better logging for lookups

The default pollPeriod of 0 means that lookups are loaded once only at startup

Add a warning message to warn operators about this. I suspect that most
operators using jdbc or uri would expect eventual consistency with the source
of the lookups if using jdbc or uri. So make this a warning to make it easier
to debug if an operator notices a data inconsistency issue.

* oops
This commit is contained in:
Suneet Saldanha 2021-08-04 19:44:54 -04:00 committed by GitHub
parent 590583699f
commit 361bfdcaa5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 27 additions and 5 deletions

View File

@ -24,6 +24,7 @@ import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonTypeName;
import com.google.common.base.Preconditions;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.metadata.MetadataStorageConnectorConfig;
import org.apache.druid.server.initialization.JdbcAccessSecurityConfig;
import org.apache.druid.utils.ConnectionUriUtils;
@ -40,6 +41,8 @@ import java.util.Objects;
@JsonTypeName("jdbc")
public class JdbcExtractionNamespace implements ExtractionNamespace
{
private static final Logger LOG = new Logger(JdbcExtractionNamespace.class);
@JsonProperty
private final MetadataStorageConnectorConfig connectorConfig;
@JsonProperty
@ -62,9 +65,9 @@ public class JdbcExtractionNamespace implements ExtractionNamespace
@NotNull @JsonProperty(value = "table", required = true) final String table,
@NotNull @JsonProperty(value = "keyColumn", required = true) final String keyColumn,
@NotNull @JsonProperty(value = "valueColumn", required = true) final String valueColumn,
@JsonProperty(value = "tsColumn", required = false) @Nullable final String tsColumn,
@JsonProperty(value = "filter", required = false) @Nullable final String filter,
@Min(0) @JsonProperty(value = "pollPeriod", required = false) @Nullable final Period pollPeriod,
@JsonProperty(value = "tsColumn") @Nullable final String tsColumn,
@JsonProperty(value = "filter") @Nullable final String filter,
@Min(0) @JsonProperty(value = "pollPeriod") @Nullable final Period pollPeriod,
@JacksonInject JdbcAccessSecurityConfig securityConfig
)
{
@ -78,7 +81,15 @@ public class JdbcExtractionNamespace implements ExtractionNamespace
this.valueColumn = Preconditions.checkNotNull(valueColumn, "valueColumn");
this.tsColumn = tsColumn;
this.filter = filter;
this.pollPeriod = pollPeriod == null ? new Period(0L) : pollPeriod;
if (pollPeriod == null) {
// Warning because if JdbcExtractionNamespace is being used for lookups, any updates to the database will not
// be picked up after the node starts. So for use casses where nodes start at different times (like streaming
// ingestion with peons) there can be data inconsistencies across the cluster.
LOG.warn("No pollPeriod configured for JdbcExtractionNamespace - entries will be loaded only once at startup");
this.pollPeriod = new Period(0L);
} else {
this.pollPeriod = pollPeriod;
}
}
/**

View File

@ -37,6 +37,7 @@ import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.UOE;
import org.apache.druid.java.util.common.jackson.JacksonUtils;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.java.util.common.parsers.CSVParser;
import org.apache.druid.java.util.common.parsers.DelimitedParser;
import org.apache.druid.java.util.common.parsers.JSONPathFieldSpec;
@ -64,6 +65,8 @@ import java.util.regex.PatternSyntaxException;
@JsonTypeName("uri")
public class UriExtractionNamespace implements ExtractionNamespace
{
private static final Logger LOG = new Logger(UriExtractionNamespace.class);
@JsonProperty
private final URI uri;
@JsonProperty
@ -98,7 +101,15 @@ public class UriExtractionNamespace implements ExtractionNamespace
throw new IAE("Either uri xor uriPrefix required");
}
this.namespaceParseSpec = Preconditions.checkNotNull(namespaceParseSpec, "namespaceParseSpec");
this.pollPeriod = pollPeriod == null ? Period.ZERO : pollPeriod;
if (pollPeriod == null) {
// Warning because if UriExtractionNamespace is being used for lookups, any updates to the database will not
// be picked up after the node starts. So for use casses where nodes start at different times (like streaming
// ingestion with peons) there can be data inconsistencies across the cluster.
LOG.warn("No pollPeriod configured for UriExtractionNamespace - entries will be loaded only once at startup");
this.pollPeriod = Period.ZERO;
} else {
this.pollPeriod = pollPeriod;
}
this.fileRegex = fileRegex == null ? versionRegex : fileRegex;
if (fileRegex != null && versionRegex != null) {
throw new IAE("Cannot specify both versionRegex and fileRegex. versionRegex is deprecated");