BAEL-3204 (#8085)

* BAEL-3204 * BAEL-3204
2019-11-16 00:15:43 +01:00 · 2019-11-16 00:15:43 +01:00 · 0bf12a690c
commit 0bf12a690c
parent 86c72ffd7b
7 changed files with 384 additions and 0 deletions
--- a/machine-learning/pom.xml
+++ b/machine-learning/pom.xml
@ -0,0 +1,150 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <artifactId>machine-learning</artifactId>
+    <version>1.0-SNAPSHOT</version>
+    <name>Supervised Learning</name>
+    <packaging>jar</packaging>
+
+    <parent>
+        <groupId>com.baeldung</groupId>
+        <artifactId>parent-modules</artifactId>
+        <version>1.0.0-SNAPSHOT</version>
+    </parent>
+
+    <properties>
+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+        <maven.compiler.source>1.7</maven.compiler.source>
+        <maven.compiler.target>1.7</maven.compiler.target>
+        <kotlin.version>1.3.50</kotlin.version>
+        <dl4j.version>0.9.1</dl4j.version>
+    </properties>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.jetbrains.kotlin</groupId>
+            <artifactId>kotlin-stdlib-jdk8</artifactId>
+            <version>${kotlin.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.nd4j</groupId>
+            <artifactId>nd4j-native-platform</artifactId>
+            <version>${dl4j.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.deeplearning4j</groupId>
+            <artifactId>deeplearning4j-core</artifactId>
+            <version>${dl4j.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.jetbrains.kotlin</groupId>
+            <artifactId>kotlin-stdlib-jdk8</artifactId>
+            <version>${kotlin.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.jetbrains.kotlin</groupId>
+            <artifactId>kotlin-test</artifactId>
+            <version>${kotlin.version}</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.jetbrains.kotlin</groupId>
+            <artifactId>kotlin-stdlib-jdk8</artifactId>
+            <version>${kotlin.version}</version>
+        </dependency>
+    </dependencies>
+    <build>
+        <sourceDirectory>src/main/kotlin</sourceDirectory>
+        <testSourceDirectory>src/test</testSourceDirectory>
+        <pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
+            <plugins>
+                <!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
+                <plugin>
+                    <artifactId>maven-clean-plugin</artifactId>
+                    <version>3.1.0</version>
+                </plugin>
+                <!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
+                <plugin>
+                    <artifactId>maven-resources-plugin</artifactId>
+                    <version>3.0.2</version>
+                </plugin>
+                <plugin>
+                    <artifactId>maven-compiler-plugin</artifactId>
+                    <version>3.8.0</version>
+                </plugin>
+                <plugin>
+                    <artifactId>maven-surefire-plugin</artifactId>
+                    <version>2.22.1</version>
+                </plugin>
+                <plugin>
+                    <artifactId>maven-jar-plugin</artifactId>
+                    <version>3.0.2</version>
+                </plugin>
+                <plugin>
+                    <artifactId>maven-install-plugin</artifactId>
+                    <version>2.5.2</version>
+                </plugin>
+                <plugin>
+                    <artifactId>maven-deploy-plugin</artifactId>
+                    <version>2.8.2</version>
+                </plugin>
+                <!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
+                <plugin>
+                    <artifactId>maven-site-plugin</artifactId>
+                    <version>3.7.1</version>
+                </plugin>
+                <plugin>
+                    <artifactId>maven-project-info-reports-plugin</artifactId>
+                    <version>3.0.0</version>
+                </plugin>
+            </plugins>
+        </pluginManagement>
+        <plugins>
+            <plugin>
+                <groupId>org.jetbrains.kotlin</groupId>
+                <artifactId>kotlin-maven-plugin</artifactId>
+                <version>${kotlin.version}</version>
+                <executions>
+                    <execution>
+                        <id>compile</id>
+                        <phase>compile</phase>
+                        <goals>
+                            <goal>compile</goal>
+                        </goals>
+                    </execution>
+                    <execution>
+                        <id>test-compile</id>
+                        <phase>test-compile</phase>
+                        <goals>
+                            <goal>test-compile</goal>
+                        </goals>
+                    </execution>
+                </executions>
+                <configuration>
+                    <jvmTarget>1.8</jvmTarget>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>compile</id>
+                        <phase>compile</phase>
+                        <goals>
+                            <goal>compile</goal>
+                        </goals>
+                    </execution>
+                    <execution>
+                        <id>testCompile</id>
+                        <phase>test-compile</phase>
+                        <goals>
+                            <goal>testCompile</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+</project>
--- a/machine-learning/src/main/kotlin/com/baeldung/cnn/ConvolutionalNeuralNetwork.kt
+++ b/machine-learning/src/main/kotlin/com/baeldung/cnn/ConvolutionalNeuralNetwork.kt
@ -0,0 +1,117 @@
+package com.baeldung.cnn
+
+import org.datavec.api.records.reader.impl.collection.ListStringRecordReader
+import org.datavec.api.split.ListStringSplit
+import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator
+import org.deeplearning4j.eval.Evaluation
+import org.deeplearning4j.nn.conf.NeuralNetConfiguration
+import org.deeplearning4j.nn.conf.inputs.InputType
+import org.deeplearning4j.nn.conf.layers.*
+import org.deeplearning4j.nn.multilayer.MultiLayerNetwork
+import org.deeplearning4j.nn.weights.WeightInit
+import org.nd4j.linalg.activations.Activation
+import org.nd4j.linalg.learning.config.Adam
+import org.nd4j.linalg.lossfunctions.LossFunctions
+
+object ConvolutionalNeuralNetwork {
+
+    @JvmStatic
+    fun main(args: Array<String>) {
+        val dataset = ZalandoMNISTDataSet().load()
+        dataset.shuffle()
+        val trainDatasetIterator = createDatasetIterator(dataset.subList(0, 50_000))
+        val testDatasetIterator = createDatasetIterator(dataset.subList(50_000, 60_000))
+
+        val cnn = buildCNN()
+        learning(cnn, trainDatasetIterator)
+        testing(cnn, testDatasetIterator)
+    }
+
+    private fun createDatasetIterator(dataset: MutableList<List<String>>): RecordReaderDataSetIterator {
+        val listStringRecordReader = ListStringRecordReader()
+        listStringRecordReader.initialize(ListStringSplit(dataset))
+        return RecordReaderDataSetIterator(listStringRecordReader, 128, 28 * 28, 10)
+    }
+
+    private fun buildCNN(): MultiLayerNetwork {
+        val multiLayerNetwork = MultiLayerNetwork(NeuralNetConfiguration.Builder()
+                .seed(123)
+                .l2(0.0005)
+                .updater(Adam())
+                .weightInit(WeightInit.XAVIER)
+                .list()
+                .layer(0, buildInitialConvolutionLayer())
+                .layer(1, buildBatchNormalizationLayer())
+                .layer(2, buildPoolingLayer())
+                .layer(3, buildConvolutionLayer())
+                .layer(4, buildBatchNormalizationLayer())
+                .layer(5, buildPoolingLayer())
+                .layer(6, buildDenseLayer())
+                .layer(7, buildBatchNormalizationLayer())
+                .layer(8, buildDenseLayer())
+                .layer(9, buildOutputLayer())
+                .setInputType(InputType.convolutionalFlat(28, 28, 1))
+                .backprop(true)
+                .build())
+        multiLayerNetwork.init()
+        return multiLayerNetwork
+    }
+
+    private fun buildOutputLayer(): OutputLayer? {
+        return OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
+                .nOut(10)
+                .activation(Activation.SOFTMAX)
+                .build()
+    }
+
+    private fun buildDenseLayer(): DenseLayer? {
+        return DenseLayer.Builder().activation(Activation.RELU)
+                .nOut(500)
+                .dropOut(0.5)
+                .build()
+    }
+
+    private fun buildPoolingLayer(): SubsamplingLayer? {
+        return SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX)
+                .kernelSize(2, 2)
+                .stride(2, 2)
+                .build()
+    }
+
+    private fun buildBatchNormalizationLayer() = BatchNormalization.Builder().build()
+
+    private fun buildConvolutionLayer(): ConvolutionLayer? {
+        return ConvolutionLayer.Builder(5, 5)
+                .stride(1, 1) // nIn need not specified in later layers
+                .nOut(50)
+                .activation(Activation.IDENTITY)
+                .build()
+    }
+
+    private fun buildInitialConvolutionLayer(): ConvolutionLayer? {
+        return ConvolutionLayer.Builder(5, 5)
+                .nIn(1)
+                .stride(1, 1)
+                .nOut(20)
+                .activation(Activation.IDENTITY)
+                .build()
+    }
+
+    private fun learning(cnn: MultiLayerNetwork, trainSet: RecordReaderDataSetIterator) {
+        for (i in 0 until 10) {
+            cnn.fit(trainSet)
+        }
+    }
+
+    private fun testing(cnn: MultiLayerNetwork, testSet: RecordReaderDataSetIterator) {
+        val evaluation = Evaluation(10)
+        while (testSet.hasNext()) {
+            val next = testSet.next()
+            val output = cnn.output(next.features)
+            evaluation.eval(next.labels, output)
+        }
+
+        println(evaluation.stats())
+        println(evaluation.confusionToString())
+    }
+}
--- a/machine-learning/src/main/kotlin/com/baeldung/cnn/ZalandoMNISTDataSet.kt
+++ b/machine-learning/src/main/kotlin/com/baeldung/cnn/ZalandoMNISTDataSet.kt
@ -0,0 +1,45 @@
+package com.baeldung.cnn
+
+import java.io.File
+import java.nio.ByteBuffer
+import java.util.*
+import java.util.stream.Collectors
+import kotlin.streams.asStream
+
+class ZalandoMNISTDataSet {
+    private val OFFSET_SIZE = 4 //in bytes
+    private val NUM_ITEMS_OFFSET = 4
+    private val ITEMS_SIZE = 4
+    private val ROWS = 28
+    private val COLUMNS = 28
+    private val IMAGE_OFFSET = 16
+    private val IMAGE_SIZE = ROWS * COLUMNS
+
+    fun load(): MutableList<List<String>> {
+        val labelsFile = File("machine-learning/src/main/resources/train-labels-idx1-ubyte")
+        val imagesFile = File("machine-learning/src/main/resources/train-images-idx3-ubyte")
+
+        val labelBytes = labelsFile.readBytes()
+        val imageBytes = imagesFile.readBytes()
+
+        val byteLabelCount = Arrays.copyOfRange(labelBytes, NUM_ITEMS_OFFSET, NUM_ITEMS_OFFSET + ITEMS_SIZE)
+        val numberOfLabels = ByteBuffer.wrap(byteLabelCount).int
+
+        val list = mutableListOf<List<String>>()
+
+        for (i in 0 until numberOfLabels) {
+            val label = labelBytes[OFFSET_SIZE + ITEMS_SIZE + i]
+            val startBoundary = i * IMAGE_SIZE + IMAGE_OFFSET
+            val endBoundary = i * IMAGE_SIZE + IMAGE_OFFSET + IMAGE_SIZE
+            val imageData = Arrays.copyOfRange(imageBytes, startBoundary, endBoundary)
+
+            val imageDataList = imageData.iterator()
+                    .asSequence()
+                    .asStream().map { b -> b.toString() }
+                    .collect(Collectors.toList())
+            imageDataList.add(label.toString())
+            list.add(imageDataList)
+        }
+        return list
+    }
+}
--- a/machine-learning/src/main/kotlin/com/baeldung/simplelinearregression/SimpleLinearRegression.kt
+++ b/machine-learning/src/main/kotlin/com/baeldung/simplelinearregression/SimpleLinearRegression.kt
@ -0,0 +1,31 @@
+package com.baeldung.simplelinearregression
+
+import kotlin.math.pow
+
+class SimpleLinearRegression(private val xs: List<Int>, private val ys: List<Int>) {
+    var slope: Double = 0.0
+    var yIntercept: Double = 0.0
+
+    init {
+        val covariance = calculateCovariance(xs, ys)
+        val variance = calculateVariance(xs)
+        slope = calculateSlope(covariance, variance)
+        yIntercept = calculateYIntercept(ys, slope, xs)
+    }
+
+    fun predict(independentVariable: Double) = slope * independentVariable + yIntercept
+
+    fun calculateRSquared(): Double {
+        val sst = ys.sumByDouble { y -> (y - ys.average()).pow(2) }
+        val ssr = xs.zip(ys) { x, y -> (y - predict(x.toDouble())).pow(2) }.sum()
+        return (sst - ssr) / sst
+    }
+
+    private fun calculateYIntercept(ys: List<Int>, slope: Double, xs: List<Int>) = ys.average() - slope * xs.average()
+
+    private fun calculateSlope(covariance: Double, variance: Double) = covariance / variance
+
+    private fun calculateCovariance(xs: List<Int>, ys: List<Int>) = xs.zip(ys) { x, y -> (x - xs.average()) * (y - ys.average()) }.sum()
+
+    private fun calculateVariance(xs: List<Int>) = xs.sumByDouble { x -> (x - xs.average()).pow(2) }
+}
--- a/machine-learning/src/main/resources/train-labels-idx1-ubyte
+++ b/machine-learning/src/main/resources/train-labels-idx1-ubyte
--- a/machine-learning/src/test/com/baeldung/simplelinearregression/SimpleLinearRegressionUnitTest.kt
+++ b/machine-learning/src/test/com/baeldung/simplelinearregression/SimpleLinearRegressionUnitTest.kt
@ -0,0 +1,40 @@
+package com.baeldung.simplelinearregression
+
+import org.junit.Assert.assertEquals
+import org.junit.jupiter.api.Test
+
+class SimpleLinearRegressionUnitTest {
+    @Test
+    fun givenAProperDataSetWhenFedToASimpleLinearRegressionModelThenItPredictsCorrectly() {
+        val xs = arrayListOf(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
+        val ys = arrayListOf(25, 35, 49, 60, 75, 90, 115, 130, 150, 200)
+
+        val model = SimpleLinearRegression(xs, ys)
+
+        val predictionOne = model.predict(2.5)
+        assertEquals(38.99, predictionOne, 0.01)
+
+        val predictionTwo = model.predict(7.5)
+        assertEquals(128.84, predictionTwo, 0.01)
+    }
+
+    @Test
+    fun givenAPredictableDataSetWhenCalculatingTheLossFunctionThenTheModelIsConsideredReliable() {
+        val xs = arrayListOf(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
+        val ys = arrayListOf(25, 35, 49, 60, 75, 90, 115, 130, 150, 200)
+
+        val model = SimpleLinearRegression(xs, ys)
+
+        assertEquals(0.95, model.calculateRSquared(), 0.01)
+    }
+
+    @Test
+    fun givenAnUnpredictableDataSetWhenCalculatingTheLossFunctionThenTheModelIsConsideredUnreliable() {
+        val xs = arrayListOf(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
+        val ys = arrayListOf(200, 0, 200, 0, 0, 0, -115, 1000, 0, 1)
+
+        val model = SimpleLinearRegression(xs, ys)
+
+        assertEquals(0.01, model.calculateRSquared(), 0.01)
+    }
+}
--- a/pom.xml
+++ b/pom.xml
@ -625,6 +625,7 @@

                <module>spring-boot-nashorn</module>
                <module>java-blockchain</module>
+                <module>machine-learning</module>
                <module>wildfly</module>
            </modules>