From aa6c51364a1c758b6e6626f19d1497dceef6f2a5 Mon Sep 17 00:00:00 2001 From: Bryan Beaudreault Date: Thu, 16 Feb 2023 13:13:25 -0500 Subject: [PATCH] HADOOP-18215. Enhance WritableName to be able to return aliases for classes that use serializers (#4215) --- .../org/apache/hadoop/io/WritableName.java | 2 +- .../apache/hadoop/io/TestSequenceFile.java | 119 ++++++++++++++++++ .../apache/hadoop/io/TestWritableName.java | 50 +++++++- 3 files changed, 169 insertions(+), 2 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/WritableName.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/WritableName.java index e5e74875225..683d6c099b5 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/WritableName.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/WritableName.java @@ -92,7 +92,7 @@ public class WritableName { ) throws IOException { Class writableClass = NAME_TO_CLASS.get(name); if (writableClass != null) - return writableClass.asSubclass(Writable.class); + return writableClass; try { return conf.getClassByName(name); } catch (ClassNotFoundException e) { diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestSequenceFile.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestSequenceFile.java index 5e4d578caee..d0dc73bacdd 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestSequenceFile.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestSequenceFile.java @@ -26,6 +26,9 @@ import org.apache.hadoop.io.SequenceFile.CompressionType; import org.apache.hadoop.io.SequenceFile.Metadata; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.DefaultCodec; +import org.apache.hadoop.io.serializer.Deserializer; +import org.apache.hadoop.io.serializer.Serialization; +import org.apache.hadoop.io.serializer.Serializer; import org.apache.hadoop.io.serializer.avro.AvroReflectSerialization; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.ReflectionUtils; @@ -750,6 +753,122 @@ public class TestSequenceFile { } } + @Test + public void testSerializationUsingWritableNameAlias() throws IOException { + Configuration config = new Configuration(); + config.set(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, SimpleSerializer.class.getName()); + Path path = new Path(System.getProperty("test.build.data", "."), + "SerializationUsingWritableNameAlias"); + + // write with the original serializable class + SequenceFile.Writer writer = SequenceFile.createWriter( + config, + SequenceFile.Writer.file(path), + SequenceFile.Writer.keyClass(SimpleSerializable.class), + SequenceFile.Writer.valueClass(SimpleSerializable.class)); + + int max = 10; + try { + SimpleSerializable val = new SimpleSerializable(); + val.setId(-1); + for (int i = 0; i < max; i++) { + SimpleSerializable key = new SimpleSerializable(); + key.setId(i); + writer.append(key, val); + } + } finally { + writer.close(); + } + + // override name so it gets forced to the new serializable + WritableName.setName(AnotherSimpleSerializable.class, SimpleSerializable.class.getName()); + + // read and expect our new serializable, and all the correct values read + SequenceFile.Reader reader = new SequenceFile.Reader( + config, + SequenceFile.Reader.file(path)); + + AnotherSimpleSerializable key = new AnotherSimpleSerializable(); + int count = 0; + while (true) { + key = (AnotherSimpleSerializable) reader.next(key); + if (key == null) { + // make sure we exhausted all the ints we wrote + assertEquals(count, max); + break; + } + assertEquals(count++, key.getId()); + } + } + + public static class SimpleSerializable implements Serializable { + + private int id; + + public int getId() { + return id; + } + + public void setId(int id) { + this.id = id; + } + } + + public static class AnotherSimpleSerializable extends SimpleSerializable { + } + + public static class SimpleSerializer implements Serialization { + + @Override + public boolean accept(Class c) { + return SimpleSerializable.class.isAssignableFrom(c); + } + + @Override + public Serializer getSerializer(Class c) { + return new Serializer() { + private DataOutputStream out; + @Override + public void open(OutputStream out) throws IOException { + this.out = new DataOutputStream(out); + } + + @Override + public void serialize(SimpleSerializable simpleSerializable) throws IOException { + out.writeInt(simpleSerializable.getId()); + } + + @Override + public void close() throws IOException { + out.close(); + } + }; + } + + @Override + public Deserializer getDeserializer(Class c) { + return new Deserializer() { + private DataInputStream dis; + @Override + public void open(InputStream in) throws IOException { + dis = new DataInputStream(in); + } + + @Override + public SimpleSerializable deserialize(SimpleSerializable simpleSerializable) + throws IOException { + simpleSerializable.setId(dis.readInt()); + return simpleSerializable; + } + + @Override + public void close() throws IOException { + dis.close(); + } + }; + } + } + /** For debugging and testing. */ public static void main(String[] args) throws Exception { int count = 1024 * 1024; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestWritableName.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestWritableName.java index 5950142220e..22f2aee62ad 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestWritableName.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestWritableName.java @@ -24,8 +24,14 @@ import java.io.IOException; import java.util.Random; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.apache.hadoop.io.serializer.Deserializer; +import org.apache.hadoop.io.serializer.Serialization; +import org.apache.hadoop.io.serializer.SerializationFactory; +import org.apache.hadoop.io.serializer.Serializer; import org.junit.Test; - +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; /** Unit tests for WritableName. */ @@ -63,6 +69,28 @@ public class TestWritableName { } } + private static class SimpleSerializable { + + } + + private static class SimpleSerializer implements Serialization { + + @Override + public boolean accept(Class c) { + return c.equals(SimpleSerializable.class); + } + + @Override + public Serializer getSerializer(Class c) { + return null; + } + + @Override + public Deserializer getDeserializer(Class c) { + return null; + } + } + private static final String testName = "mystring"; @Test @@ -95,7 +123,27 @@ public class TestWritableName { // check original name still works test = WritableName.getClass(testName, conf); assertTrue(test.equals(SimpleWritable.class)); + } + @Test + public void testAddNameSerializable() throws Exception { + Configuration conf = new Configuration(); + conf.set(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, SimpleSerializer.class.getName()); + SerializationFactory serializationFactory = + new SerializationFactory(conf); + + String altName = testName + ".alt"; + + WritableName.addName(SimpleSerializable.class, altName); + + Class test = WritableName.getClass(altName, conf); + assertEquals(test, SimpleSerializable.class); + assertNotNull(serializationFactory.getSerialization(test)); + + // check original name still works + test = WritableName.getClass(SimpleSerializable.class.getName(), conf); + assertEquals(test, SimpleSerializable.class); + assertNotNull(serializationFactory.getSerialization(test)); } @Test