add avro + kafka + schema registry integration test (#10929)

* add avro + schema registry integration test * style * retry init * maybe this * oops heh * this will fix it * review stuffs * fix comment
2021-03-08 08:12:12 -08:00 · 2021-03-08 08:12:12 -08:00 · 96889cdebc
parent 9946306d4b
commit 96889cdebc
20 changed files with 436 additions and 72 deletions
--- a/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/SchemaRegistryBasedAvroBytesDecoder.java
+++ b/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/SchemaRegistryBasedAvroBytesDecoder.java
@ -26,14 +26,17 @@ import io.confluent.kafka.schemaregistry.ParsedSchema;
 import io.confluent.kafka.schemaregistry.avro.AvroSchema;
 import io.confluent.kafka.schemaregistry.client.CachedSchemaRegistryClient;
 import io.confluent.kafka.schemaregistry.client.SchemaRegistryClient;
+import io.confluent.kafka.schemaregistry.client.rest.exceptions.RestClientException;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericDatumReader;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.io.DatumReader;
 import org.apache.avro.io.DecoderFactory;
+import org.apache.druid.java.util.common.RE;
 import org.apache.druid.java.util.common.parsers.ParseException;

 import javax.annotation.Nullable;
+import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.List;
 import java.util.Map;
@ -70,18 +73,32 @@ public class SchemaRegistryBasedAvroBytesDecoder implements AvroBytesDecoder
  @Override
  public GenericRecord parse(ByteBuffer bytes)
  {
-    try {
+    int length = bytes.limit() - 1 - 4;
+    if (length < 0) {
+      throw new ParseException("Failed to decode avro message, not enough bytes to decode (%s)", bytes.limit());
+    }
+
    bytes.get(); // ignore first \0 byte
    int id = bytes.getInt(); // extract schema registry id
-      int length = bytes.limit() - 1 - 4;
    int offset = bytes.position() + bytes.arrayOffset();
+    Schema schema;
+
+    try {
      ParsedSchema parsedSchema = registry.getSchemaById(id);
-      Schema schema = parsedSchema instanceof AvroSchema ? ((AvroSchema) parsedSchema).rawSchema() : null;
+      schema = parsedSchema instanceof AvroSchema ? ((AvroSchema) parsedSchema).rawSchema() : null;
+    }
+    catch (IOException | RestClientException ex) {
+      throw new RE(ex, "Failed to get Avro schema: %s", id);
+    }
+    if (schema == null) {
+      throw new RE("Failed to find Avro schema: %s", id);
+    }
    DatumReader<GenericRecord> reader = new GenericDatumReader<>(schema);
+    try {
      return reader.read(null, DecoderFactory.get().binaryDecoder(bytes.array(), offset, length, null));
    }
    catch (Exception e) {
-      throw new ParseException(e, "Fail to decode avro message!");
+      throw new ParseException(e, "Fail to decode Avro message for schema: %s!", id);
    }
  }

--- a/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/avro/SchemaRegistryBasedAvroBytesDecoderTest.java
+++ b/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/avro/SchemaRegistryBasedAvroBytesDecoderTest.java
@ -20,6 +20,7 @@
 package org.apache.druid.data.input.avro;

 import com.fasterxml.jackson.databind.ObjectMapper;
+import io.confluent.kafka.schemaregistry.ParsedSchema;
 import io.confluent.kafka.schemaregistry.avro.AvroSchema;
 import io.confluent.kafka.schemaregistry.client.SchemaRegistryClient;
 import org.apache.avro.Schema;
@ -29,6 +30,7 @@ import org.apache.avro.io.EncoderFactory;
 import org.apache.avro.specific.SpecificDatumWriter;
 import org.apache.druid.data.input.AvroStreamInputRowParserTest;
 import org.apache.druid.data.input.SomeAvroDatum;
+import org.apache.druid.java.util.common.RE;
 import org.apache.druid.java.util.common.parsers.ParseException;
 import org.junit.Assert;
 import org.junit.Before;
@ -96,40 +98,60 @@ public class SchemaRegistryBasedAvroBytesDecoderTest
  public void testParse() throws Exception
  {
    // Given
-    Mockito.when(registry.getSchemaById(ArgumentMatchers.eq(1234))).thenReturn(new AvroSchema(SomeAvroDatum.getClassSchema()));
+    Mockito.when(registry.getSchemaById(ArgumentMatchers.eq(1234)))
+           .thenReturn(new AvroSchema(SomeAvroDatum.getClassSchema()));
    GenericRecord someAvroDatum = AvroStreamInputRowParserTest.buildSomeAvroDatum();
    Schema schema = SomeAvroDatum.getClassSchema();
    byte[] bytes = getAvroDatum(schema, someAvroDatum);
    ByteBuffer bb = ByteBuffer.allocate(bytes.length + 5).put((byte) 0).putInt(1234).put(bytes);
    bb.rewind();
    // When
-    GenericRecord actual = new SchemaRegistryBasedAvroBytesDecoder(registry).parse(bb);
-    // Then
-    Assert.assertEquals(someAvroDatum.get("id"), actual.get("id"));
+    new SchemaRegistryBasedAvroBytesDecoder(registry).parse(bb);
  }

  @Test(expected = ParseException.class)
-  public void testParseCorrupted() throws Exception
+  public void testParseCorruptedNotEnoughBytesToEvenGetSchemaInfo()
  {
    // Given
-    Mockito.when(registry.getSchemaById(ArgumentMatchers.eq(1234))).thenReturn(new AvroSchema(SomeAvroDatum.getClassSchema()));
-    GenericRecord someAvroDatum = AvroStreamInputRowParserTest.buildSomeAvroDatum();
-    Schema schema = SomeAvroDatum.getClassSchema();
-    byte[] bytes = getAvroDatum(schema, someAvroDatum);
-    ByteBuffer bb = ByteBuffer.allocate(bytes.length + 5).put((byte) 0).putInt(1234).put((bytes), 5, 10);
+    ByteBuffer bb = ByteBuffer.allocate(2).put((byte) 0).put(1, (byte) 1);
+    bb.rewind();
    // When
    new SchemaRegistryBasedAvroBytesDecoder(registry).parse(bb);
  }

  @Test(expected = ParseException.class)
+  public void testParseCorruptedPartial() throws Exception
+  {
+    // Given
+    Mockito.when(registry.getSchemaById(ArgumentMatchers.eq(1234)))
+           .thenReturn(new AvroSchema(SomeAvroDatum.getClassSchema()));
+    GenericRecord someAvroDatum = AvroStreamInputRowParserTest.buildSomeAvroDatum();
+    Schema schema = SomeAvroDatum.getClassSchema();
+    byte[] bytes = getAvroDatum(schema, someAvroDatum);
+    ByteBuffer bb = ByteBuffer.allocate(4 + 5).put((byte) 0).putInt(1234).put(bytes, 5, 4);
+    bb.rewind();
+    // When
+    new SchemaRegistryBasedAvroBytesDecoder(registry).parse(bb);
+  }
+
+  @Test(expected = RE.class)
+  public void testParseWrongSchemaType() throws Exception
+  {
+    // Given
+    Mockito.when(registry.getSchemaById(ArgumentMatchers.eq(1234))).thenReturn(Mockito.mock(ParsedSchema.class));
+    ByteBuffer bb = ByteBuffer.allocate(5).put((byte) 0).putInt(1234);
+    bb.rewind();
+    // When
+    new SchemaRegistryBasedAvroBytesDecoder(registry).parse(bb);
+  }
+
+  @Test(expected = RE.class)
  public void testParseWrongId() throws Exception
  {
    // Given
    Mockito.when(registry.getSchemaById(ArgumentMatchers.anyInt())).thenThrow(new IOException("no pasaran"));
-    GenericRecord someAvroDatum = AvroStreamInputRowParserTest.buildSomeAvroDatum();
-    Schema schema = SomeAvroDatum.getClassSchema();
-    byte[] bytes = getAvroDatum(schema, someAvroDatum);
-    ByteBuffer bb = ByteBuffer.allocate(bytes.length + 5).put((byte) 0).putInt(1234).put(bytes);
+    ByteBuffer bb = ByteBuffer.allocate(5).put((byte) 0).putInt(1234);
+    bb.rewind();
    // When
    new SchemaRegistryBasedAvroBytesDecoder(registry).parse(bb);
  }
--- a/integration-tests/docker/docker-compose.base.yml
+++ b/integration-tests/docker/docker-compose.base.yml
@ -34,7 +34,7 @@ networks:
        - subnet: 172.172.172.0/24

 services:
-### supporting infra:
+### always there supporting infra:
  druid-zookeeper-kafka:
    image: druid/cluster
    container_name: druid-zookeeper-kafka
@ -71,45 +71,6 @@ services:
    env_file:
      - ./environment-configs/common

-  druid-it-hadoop:
-    image: druid-it/hadoop:2.8.5
-    container_name: druid-it-hadoop
-    ports:
-      - 2049:2049
-      - 2122:2122
-      - 8020:8020
-      - 8021:8021
-      - 8030:8030
-      - 8031:8031
-      - 8032:8032
-      - 8033:8033
-      - 8040:8040
-      - 8042:8042
-      - 8088:8088
-      - 8443:8443
-      - 9000:9000
-      - 10020:10020
-      - 19888:19888
-      - 34455:34455
-      - 50010:50010
-      - 50020:50020
-      - 50030:50030
-      - 50060:50060
-      - 50070:50070
-      - 50075:50075
-      - 50090:50090
-      - 51111:51111
-    networks:
-      druid-it-net:
-        ipv4_address: 172.172.172.101
-    privileged: true
-    volumes:
-      - ${HOME}/shared:/shared
-      - ./../src/test/resources:/resources
-    hostname: "druid-it-hadoop"
-    command: "bash -c 'echo Start druid-it-hadoop container... && \
-                /etc/bootstrap.sh && \
-                tail -f /dev/null'"

 ### overlords
  druid-overlord:
@ -357,12 +318,54 @@ services:
      - ./environment-configs/common
      - ./environment-configs/router-custom-check-tls

+### optional supporting infra
+  druid-it-hadoop:
+    image: druid-it/hadoop:2.8.5
+    container_name: druid-it-hadoop
+    ports:
+      - 2049:2049
+      - 2122:2122
+      - 8020:8020
+      - 8021:8021
+      - 8030:8030
+      - 8031:8031
+      - 8032:8032
+      - 8033:8033
+      - 8040:8040
+      - 8042:8042
+      - 8088:8088
+      - 8443:8443
+      - 9000:9000
+      - 10020:10020
+      - 19888:19888
+      - 34455:34455
+      - 50010:50010
+      - 50020:50020
+      - 50030:50030
+      - 50060:50060
+      - 50070:50070
+      - 50075:50075
+      - 50090:50090
+      - 51111:51111
+    networks:
+      druid-it-net:
+        ipv4_address: 172.172.172.101
+    privileged: true
+    volumes:
+      - ${HOME}/shared:/shared
+      - ./../src/test/resources:/resources
+    hostname: "druid-it-hadoop"
+    command: "bash -c 'echo Start druid-it-hadoop container... && \
+                /etc/bootstrap.sh && \
+                tail -f /dev/null'"
+
+
  druid-openldap:
    image: osixia/openldap:1.4.0
    container_name: druid-openldap
    networks:
      druid-it-net:
-        ipv4_address: 172.172.172.74
+        ipv4_address: 172.172.172.102
    ports:
      - 8389:389
      - 8636:636
@ -373,3 +376,26 @@ services:
    env_file:
      - ./environment-configs/common
    command: --copy-service
+
+
+  schema-registry:
+    image: confluentinc/cp-schema-registry:5.5.1
+    container_name: schema-registry
+    ports:
+      - 8085:8085
+    networks:
+      druid-it-net:
+        ipv4_address: 172.172.172.103
+    volumes:
+      - ${HOME}/shared:/shared
+      - ./schema-registry/jaas_config.file:/usr/lib/druid/conf/jaas_config.file
+      - ./schema-registry/password-file:/usr/lib/druid/conf/password-file
+    privileged: true
+    environment:
+      SCHEMA_REGISTRY_HOST_NAME: schema-registry
+      SCHEMA_REGISTRY_LISTENERS: "http://0.0.0.0:8085"
+      SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: druid-zookeeper-kafka:9092
+      SCHEMA_REGISTRY_AUTHENTICATION_METHOD: BASIC
+      SCHEMA_REGISTRY_AUTHENTICATION_REALM: druid
+      SCHEMA_REGISTRY_AUTHENTICATION_ROLES: users
+      SCHEMA_REGISTRY_OPTS: -Djava.security.auth.login.config=/usr/lib/druid/conf/jaas_config.file
--- a/integration-tests/docker/docker-compose.schema-registry-indexer.yml
+++ b/integration-tests/docker/docker-compose.schema-registry-indexer.yml
@ -0,0 +1,29 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+version: "2.2"
+services:
+  schema-registry:
+    extends:
+      file: docker-compose.base.yml
+      service: schema-registry
+    depends_on:
+      - druid-zookeeper-kafka
+    links:
+      - druid-zookeeper-kafka:druid-zookeeper-kafka
+      - druid-coordinator:druid-coordinator
+      - druid-broker:druid-broker
+      - druid-historical:druid-historical
+      - druid-indexer:druid-indexer
--- a/integration-tests/docker/docker-compose.schema-registry.yml
+++ b/integration-tests/docker/docker-compose.schema-registry.yml
@ -0,0 +1,29 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+version: "2.2"
+services:
+  schema-registry:
+    extends:
+      file: docker-compose.base.yml
+      service: schema-registry
+    depends_on:
+      - druid-zookeeper-kafka
+    links:
+      - druid-zookeeper-kafka:druid-zookeeper-kafka
+      - druid-coordinator:druid-coordinator
+      - druid-broker:druid-broker
+      - druid-middlemanager:druid-middlemanager
+      - druid-historical:druid-historical
--- a/integration-tests/docker/schema-registry/jaas_config.file
+++ b/integration-tests/docker/schema-registry/jaas_config.file
@ -0,0 +1,5 @@
+druid {
+  org.eclipse.jetty.jaas.spi.PropertyFileLoginModule required
+  file="/usr/lib/druid/conf/password-file"
+  debug="true";
+};
--- a/integration-tests/docker/schema-registry/password-file
+++ b/integration-tests/docker/schema-registry/password-file
@ -0,0 +1 @@
+druid: diurd,users
--- a/integration-tests/pom.xml
+++ b/integration-tests/pom.xml
@ -31,6 +31,13 @@
        <version>0.22.0-SNAPSHOT</version>
    </parent>

+    <repositories>
+        <repository>
+            <id>confluent</id>
+            <url>https://packages.confluent.io/maven/</url>
+        </repository>
+    </repositories>
+
    <dependencies>
        <dependency>
            <groupId>com.amazonaws</groupId>
@ -320,6 +327,41 @@
            <artifactId>guice-servlet</artifactId>
            <version>${guice.version}</version>
        </dependency>
+        <dependency>
+            <groupId>io.confluent</groupId>
+            <artifactId>kafka-schema-registry-client</artifactId>
+            <version>5.5.1</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>slf4j-log4j12</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.avro</groupId>
+                    <artifactId>avro</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>com.fasterxml.jackson.core</groupId>
+                    <artifactId>jackson-databind</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>javax.ws.rs</groupId>
+                    <artifactId>javax.ws.rs-api</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>javax.ws.rs</groupId>
+                    <artifactId>javax.ws.rs-api</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>javax.ws.rs</groupId>
+                    <artifactId>jsr311-api</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>jakarta.ws.rs</groupId>
+                    <artifactId>jakarta.ws.rs-api</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>

        <!-- Tests -->
        <dependency>
--- a/integration-tests/script/docker_compose_args.sh
+++ b/integration-tests/script/docker_compose_args.sh
@ -28,7 +28,6 @@ getComposeArgs()
      echo "DRUID_INTEGRATION_TEST_INDEXER must be 'indexer' or 'middleManager' (is '$DRUID_INTEGRATION_TEST_INDEXER')"
      exit 1
    fi
-
    if [ "$DRUID_INTEGRATION_TEST_INDEXER" = "indexer" ]
    then
      # Sanity check: cannot combine CliIndexer tests with security, query-retry tests
@ -36,10 +35,14 @@ getComposeArgs()
      then
        echo "Cannot run test group '$DRUID_INTEGRATION_TEST_GROUP' with CliIndexer"
        exit 1
-      fi
-
+      elif [ "$DRUID_INTEGRATION_TEST_GROUP" = "kafka-data-format" ]
+      then
+        # Replace MiddleManager with Indexer + schema registry container
+        echo "-f ${DOCKERDIR}/docker-compose.cli-indexer.yml -f ${DOCKERDIR}/docker-compose.schema-registry-indexer.yml"
+      else
        # Replace MiddleManager with Indexer
        echo "-f ${DOCKERDIR}/docker-compose.cli-indexer.yml"
+      fi
    elif [ "$DRUID_INTEGRATION_TEST_GROUP" = "security" ]
    then
      # default + additional druid router (custom-check-tls, permissive-tls, no-client-auth-tls)
@ -57,6 +60,10 @@ getComposeArgs()
    then
      # the 'high availability' test cluster with multiple coordinators and overlords
      echo "-f ${DOCKERDIR}/docker-compose.high-availability.yml"
+    elif [ "$DRUID_INTEGRATION_TEST_GROUP" = "kafka-data-format" ]
+    then
+      # default + schema registry container
+      echo "-f ${DOCKERDIR}/docker-compose.yml -f ${DOCKERDIR}/docker-compose.schema-registry.yml"
    else
      # default
      echo "-f ${DOCKERDIR}/docker-compose.yml"
--- a/integration-tests/src/main/java/org/apache/druid/testing/ConfigFileConfigProvider.java
+++ b/integration-tests/src/main/java/org/apache/druid/testing/ConfigFileConfigProvider.java
@ -63,6 +63,7 @@ public class ConfigFileConfigProvider implements IntegrationTestingConfigProvide
  private String middleManagerHost;
  private String zookeeperHosts;        // comma-separated list of host:port
  private String kafkaHost;
+  private String schemaRegistryHost;
  private Map<String, String> props = null;
  private String username;
  private String password;
@ -222,6 +223,7 @@ public class ConfigFileConfigProvider implements IntegrationTestingConfigProvide

    zookeeperHosts = props.get("zookeeper_hosts");
    kafkaHost = props.get("kafka_host") + ":" + props.get("kafka_port");
+    schemaRegistryHost = props.get("schema_registry_host") + ":" + props.get("schema_registry_port");

    username = props.get("username");

@ -499,6 +501,12 @@ public class ConfigFileConfigProvider implements IntegrationTestingConfigProvide
        return streamEndpoint;
      }

+      @Override
+      public String getSchemaRegistryHost()
+      {
+        return schemaRegistryHost;
+      }
+
      @Override
      public Map<String, String> getProperties()
      {
--- a/integration-tests/src/main/java/org/apache/druid/testing/DockerConfigProvider.java
+++ b/integration-tests/src/main/java/org/apache/druid/testing/DockerConfigProvider.java
@ -32,6 +32,13 @@ import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;

+/**
+ * The values here should be kept in sync with the values used in the docker-compose files used to bring up the
+ * integration-test clusters.
+ *
+ * integration-tests/docker/docker-compose.base.yml defines most of the hostnames, ports, and addresses, but some
+ * might live in the overrides as well.
+ */
 public class DockerConfigProvider implements IntegrationTestingConfigProvider
 {
  @JsonProperty
@ -317,6 +324,18 @@ public class DockerConfigProvider implements IntegrationTestingConfigProvider
        return "druid-historical";
      }

+      @Override
+      public String getSchemaRegistryHost()
+      {
+        return dockerIp + ":8085";
+      }
+
+      @Override
+      public String getSchemaRegistryInternalHost()
+      {
+        return "schema-registry:8085";
+      }
+
      @Override
      public String getProperty(String prop)
      {
--- a/integration-tests/src/main/java/org/apache/druid/testing/IntegrationTestingConfig.java
+++ b/integration-tests/src/main/java/org/apache/druid/testing/IntegrationTestingConfig.java
@ -164,6 +164,13 @@ public interface IntegrationTestingConfig

  String getStreamEndpoint();

+  String getSchemaRegistryHost();
+
+  default String getSchemaRegistryInternalHost()
+  {
+    return getSchemaRegistryHost();
+  }
+
  boolean isDocker();

  @Nullable
--- a/integration-tests/src/main/java/org/apache/druid/testing/utils/AvroEventSerializer.java
+++ b/integration-tests/src/main/java/org/apache/druid/testing/utils/AvroEventSerializer.java
@ -40,7 +40,7 @@ public class AvroEventSerializer implements EventSerializer
 {
  public static final String TYPE = "avro";

-  private static final Schema SCHEMA = SchemaBuilder
+  static final Schema SCHEMA = SchemaBuilder
      .record("wikipedia")
      .namespace("org.apache.druid")
      .fields()
@ -62,12 +62,12 @@ public class AvroEventSerializer implements EventSerializer
      .requiredInt("delta")
      .endRecord();

-  private final DatumWriter<Object> writer = new GenericDatumWriter<>(SCHEMA);
+  protected final DatumWriter<Object> writer = new GenericDatumWriter<>(SCHEMA);

  @Override
  public byte[] serialize(List<Pair<String, Object>> event) throws IOException
  {
-    final WikipediaRecord record = new WikipediaRecord();
+    final WikipediaRecord record = new WikipediaRecord(SCHEMA);
    event.forEach(pair -> record.put(pair.lhs, pair.rhs));
    final ByteArrayOutputStream out = new ByteArrayOutputStream();
    final BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null);
@ -82,12 +82,18 @@ public class AvroEventSerializer implements EventSerializer
  {
  }

-  private static class WikipediaRecord implements GenericRecord
+  static class WikipediaRecord implements GenericRecord
  {
    private final Map<String, Object> event = new HashMap<>();
    private final BiMap<Integer, String> indexes = HashBiMap.create(SCHEMA.getFields().size());

    private int nextIndex = 0;
+    private final Schema schema;
+
+    public WikipediaRecord(Schema schema)
+    {
+      this.schema = schema;
+    }

    @Override
    public void put(String key, Object v)
@ -125,7 +131,7 @@ public class AvroEventSerializer implements EventSerializer
    @Override
    public Schema getSchema()
    {
-      return SCHEMA;
+      return schema;
    }
  }
 }
--- a/integration-tests/src/main/java/org/apache/druid/testing/utils/AvroSchemaRegistryEventSerializer.java
+++ b/integration-tests/src/main/java/org/apache/druid/testing/utils/AvroSchemaRegistryEventSerializer.java
@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.testing.utils;
+
+import com.fasterxml.jackson.annotation.JacksonInject;
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.google.common.collect.ImmutableMap;
+import io.confluent.kafka.schemaregistry.client.CachedSchemaRegistryClient;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.io.BinaryEncoder;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.io.EncoderFactory;
+import org.apache.druid.java.util.common.Pair;
+import org.apache.druid.java.util.common.RetryUtils;
+import org.apache.druid.java.util.common.StringUtils;
+import org.apache.druid.testing.IntegrationTestingConfig;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.List;
+
+public class AvroSchemaRegistryEventSerializer extends AvroEventSerializer
+{
+  private static final int MAX_INITIALIZE_RETRIES = 10;
+  public static final String TYPE = "avro-schema-registry";
+
+  private final IntegrationTestingConfig config;
+  private final CachedSchemaRegistryClient client;
+  private int schemaId = -1;
+
+  private Schema fromRegistry;
+
+  @JsonCreator
+  public AvroSchemaRegistryEventSerializer(
+      @JacksonInject IntegrationTestingConfig config
+  )
+  {
+    this.config = config;
+    this.client = new CachedSchemaRegistryClient(
+        StringUtils.format("http://%s", config.getSchemaRegistryHost()),
+        Integer.MAX_VALUE,
+        ImmutableMap.of(
+            "basic.auth.credentials.source", "USER_INFO",
+            "basic.auth.user.info", "druid:diurd"
+        ),
+        ImmutableMap.of()
+    );
+
+  }
+
+  @Override
+  public void initialize(String topic)
+  {
+    try {
+      RetryUtils.retry(
+          () -> {
+            schemaId = client.register(topic, AvroEventSerializer.SCHEMA);
+            fromRegistry = client.getById(schemaId);
+            return 0;
+          },
+          (e) -> true,
+          MAX_INITIALIZE_RETRIES
+      );
+    }
+    catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  @Override
+  public byte[] serialize(List<Pair<String, Object>> event) throws IOException
+  {
+    final WikipediaRecord record = new WikipediaRecord(fromRegistry);
+    event.forEach(pair -> record.put(pair.lhs, pair.rhs));
+
+    ByteArrayOutputStream out = new ByteArrayOutputStream();
+    out.write(0x0);
+    out.write(ByteBuffer.allocate(4).putInt(schemaId).array());
+    BinaryEncoder encoder = EncoderFactory.get().directBinaryEncoder(out, null);
+    DatumWriter<Object> writer = new GenericDatumWriter<>(fromRegistry);
+    writer.write(record, encoder);
+    encoder.flush();
+    byte[] bytes = out.toByteArray();
+    out.close();
+    return bytes;
+  }
+}
--- a/integration-tests/src/main/java/org/apache/druid/testing/utils/EventSerializer.java
+++ b/integration-tests/src/main/java/org/apache/druid/testing/utils/EventSerializer.java
@ -42,9 +42,15 @@ import java.util.List;
    @Type(name = JsonEventSerializer.TYPE, value = JsonEventSerializer.class),
    @Type(name = CsvEventSerializer.TYPE, value = CsvEventSerializer.class),
    @Type(name = DelimitedEventSerializer.TYPE, value = DelimitedEventSerializer.class),
-    @Type(name = AvroEventSerializer.TYPE, value = AvroEventSerializer.class)
+    @Type(name = AvroEventSerializer.TYPE, value = AvroEventSerializer.class),
+    @Type(name = AvroSchemaRegistryEventSerializer.TYPE, value = AvroSchemaRegistryEventSerializer.class)
 })
 public interface EventSerializer extends Closeable
 {
+  default void initialize(String topic)
+  {
+
+  }
+
  byte[] serialize(List<Pair<String, Object>> event) throws IOException;
 }
--- a/integration-tests/src/main/java/org/apache/druid/testing/utils/SyntheticStreamGenerator.java
+++ b/integration-tests/src/main/java/org/apache/druid/testing/utils/SyntheticStreamGenerator.java
@ -61,6 +61,7 @@ public abstract class SyntheticStreamGenerator implements StreamGenerator
      DateTime overrrideFirstEventTime
  )
  {
+    serializer.initialize(streamTopic);
    // The idea here is that we will send [eventsPerSecond] events that will either use [nowFlooredToSecond]
    // or the [overrrideFirstEventTime] as the primary timestamp.
    // Having a fixed number of events that use the same timestamp will help in allowing us to determine if any events
--- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractKafkaIndexingServiceTest.java
+++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractKafkaIndexingServiceTest.java
@ -83,6 +83,7 @@ public abstract class AbstractKafkaIndexingServiceTest extends AbstractStreamInd
            "%%TOPIC_VALUE%%",
            streamName
        );
+
        if (AbstractStreamIndexingTest.INPUT_FORMAT.equals(parserType)) {
          spec = StringUtils.replace(
              spec,
@ -116,6 +117,13 @@ public abstract class AbstractKafkaIndexingServiceTest extends AbstractStreamInd
            "%%STREAM_PROPERTIES_KEY%%",
            "consumerProperties"
        );
+
+        spec = StringUtils.replace(
+            spec,
+            "%%SCHEMA_REGISTRY_HOST%%",
+            StringUtils.format("http://%s", config.getSchemaRegistryInternalHost())
+        );
+
        return StringUtils.replace(
            spec,
            "%%STREAM_PROPERTIES_VALUE%%",
--- a/integration-tests/src/test/resources/stream/data/avro_schema_registry/parser/input_row_parser.json
+++ b/integration-tests/src/test/resources/stream/data/avro_schema_registry/parser/input_row_parser.json
@ -0,0 +1,21 @@
+{
+  "type": "avro_stream",
+  "avroBytesDecoder" : {
+    "type": "schema_registry",
+    "url": "%%SCHEMA_REGISTRY_HOST%%",
+    "config": {
+      "basic.auth.credentials.source": "USER_INFO",
+      "basic.auth.user.info": "druid:diurd"
+    }
+  },
+  "parseSpec": {
+    "format": "avro",
+    "timestampSpec": {
+      "column": "timestamp",
+      "format": "auto"
+    },
+    "dimensionsSpec": {
+      "dimensions": ["page", "language", "user", "unpatrolled", "newPage", "robot", "anonymous", "namespace", "continent", "country", "region", "city"]
+    }
+  }
+}
--- a/integration-tests/src/test/resources/stream/data/avro_schema_registry/serializer/serializer.json
+++ b/integration-tests/src/test/resources/stream/data/avro_schema_registry/serializer/serializer.json
@ -0,0 +1,3 @@
+{
+  "type": "avro-schema-registry"
+}
--- a/pom.xml
+++ b/pom.xml
@ -1949,6 +1949,7 @@
                                <exclude>**/*.json</exclude>
                                <exclude>**/*.parq</exclude>
                                <exclude>**/*.parquet</exclude>
+                                <exclude>**/docker/schema-registry/*</exclude>
                                <exclude>LICENSE</exclude>
                                <exclude>LICENSE.BINARY</exclude>
                                <exclude>NOTICE</exclude>