diff --git a/machine-learning/pom.xml b/machine-learning/pom.xml new file mode 100644 index 0000000000..bb64470c74 --- /dev/null +++ b/machine-learning/pom.xml @@ -0,0 +1,150 @@ + + 4.0.0 + + machine-learning + 1.0-SNAPSHOT + Supervised Learning + jar + + + com.baeldung + parent-modules + 1.0.0-SNAPSHOT + + + + UTF-8 + 1.7 + 1.7 + 1.3.50 + 0.9.1 + + + + + org.jetbrains.kotlin + kotlin-stdlib-jdk8 + ${kotlin.version} + + + org.nd4j + nd4j-native-platform + ${dl4j.version} + + + org.deeplearning4j + deeplearning4j-core + ${dl4j.version} + + + org.jetbrains.kotlin + kotlin-stdlib-jdk8 + ${kotlin.version} + + + org.jetbrains.kotlin + kotlin-test + ${kotlin.version} + test + + + org.jetbrains.kotlin + kotlin-stdlib-jdk8 + ${kotlin.version} + + + + src/main/kotlin + src/test + + + + + maven-clean-plugin + 3.1.0 + + + + maven-resources-plugin + 3.0.2 + + + maven-compiler-plugin + 3.8.0 + + + maven-surefire-plugin + 2.22.1 + + + maven-jar-plugin + 3.0.2 + + + maven-install-plugin + 2.5.2 + + + maven-deploy-plugin + 2.8.2 + + + + maven-site-plugin + 3.7.1 + + + maven-project-info-reports-plugin + 3.0.0 + + + + + + org.jetbrains.kotlin + kotlin-maven-plugin + ${kotlin.version} + + + compile + compile + + compile + + + + test-compile + test-compile + + test-compile + + + + + 1.8 + + + + org.apache.maven.plugins + maven-compiler-plugin + + + compile + compile + + compile + + + + testCompile + test-compile + + testCompile + + + + + + + diff --git a/machine-learning/src/main/kotlin/com/baeldung/cnn/ConvolutionalNeuralNetwork.kt b/machine-learning/src/main/kotlin/com/baeldung/cnn/ConvolutionalNeuralNetwork.kt new file mode 100644 index 0000000000..b77fe273ae --- /dev/null +++ b/machine-learning/src/main/kotlin/com/baeldung/cnn/ConvolutionalNeuralNetwork.kt @@ -0,0 +1,117 @@ +package com.baeldung.cnn + +import org.datavec.api.records.reader.impl.collection.ListStringRecordReader +import org.datavec.api.split.ListStringSplit +import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator +import org.deeplearning4j.eval.Evaluation +import org.deeplearning4j.nn.conf.NeuralNetConfiguration +import org.deeplearning4j.nn.conf.inputs.InputType +import org.deeplearning4j.nn.conf.layers.* +import org.deeplearning4j.nn.multilayer.MultiLayerNetwork +import org.deeplearning4j.nn.weights.WeightInit +import org.nd4j.linalg.activations.Activation +import org.nd4j.linalg.learning.config.Adam +import org.nd4j.linalg.lossfunctions.LossFunctions + +object ConvolutionalNeuralNetwork { + + @JvmStatic + fun main(args: Array) { + val dataset = ZalandoMNISTDataSet().load() + dataset.shuffle() + val trainDatasetIterator = createDatasetIterator(dataset.subList(0, 50_000)) + val testDatasetIterator = createDatasetIterator(dataset.subList(50_000, 60_000)) + + val cnn = buildCNN() + learning(cnn, trainDatasetIterator) + testing(cnn, testDatasetIterator) + } + + private fun createDatasetIterator(dataset: MutableList>): RecordReaderDataSetIterator { + val listStringRecordReader = ListStringRecordReader() + listStringRecordReader.initialize(ListStringSplit(dataset)) + return RecordReaderDataSetIterator(listStringRecordReader, 128, 28 * 28, 10) + } + + private fun buildCNN(): MultiLayerNetwork { + val multiLayerNetwork = MultiLayerNetwork(NeuralNetConfiguration.Builder() + .seed(123) + .l2(0.0005) + .updater(Adam()) + .weightInit(WeightInit.XAVIER) + .list() + .layer(0, buildInitialConvolutionLayer()) + .layer(1, buildBatchNormalizationLayer()) + .layer(2, buildPoolingLayer()) + .layer(3, buildConvolutionLayer()) + .layer(4, buildBatchNormalizationLayer()) + .layer(5, buildPoolingLayer()) + .layer(6, buildDenseLayer()) + .layer(7, buildBatchNormalizationLayer()) + .layer(8, buildDenseLayer()) + .layer(9, buildOutputLayer()) + .setInputType(InputType.convolutionalFlat(28, 28, 1)) + .backprop(true) + .build()) + multiLayerNetwork.init() + return multiLayerNetwork + } + + private fun buildOutputLayer(): OutputLayer? { + return OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .nOut(10) + .activation(Activation.SOFTMAX) + .build() + } + + private fun buildDenseLayer(): DenseLayer? { + return DenseLayer.Builder().activation(Activation.RELU) + .nOut(500) + .dropOut(0.5) + .build() + } + + private fun buildPoolingLayer(): SubsamplingLayer? { + return SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) + .kernelSize(2, 2) + .stride(2, 2) + .build() + } + + private fun buildBatchNormalizationLayer() = BatchNormalization.Builder().build() + + private fun buildConvolutionLayer(): ConvolutionLayer? { + return ConvolutionLayer.Builder(5, 5) + .stride(1, 1) // nIn need not specified in later layers + .nOut(50) + .activation(Activation.IDENTITY) + .build() + } + + private fun buildInitialConvolutionLayer(): ConvolutionLayer? { + return ConvolutionLayer.Builder(5, 5) + .nIn(1) + .stride(1, 1) + .nOut(20) + .activation(Activation.IDENTITY) + .build() + } + + private fun learning(cnn: MultiLayerNetwork, trainSet: RecordReaderDataSetIterator) { + for (i in 0 until 10) { + cnn.fit(trainSet) + } + } + + private fun testing(cnn: MultiLayerNetwork, testSet: RecordReaderDataSetIterator) { + val evaluation = Evaluation(10) + while (testSet.hasNext()) { + val next = testSet.next() + val output = cnn.output(next.features) + evaluation.eval(next.labels, output) + } + + println(evaluation.stats()) + println(evaluation.confusionToString()) + } +} \ No newline at end of file diff --git a/machine-learning/src/main/kotlin/com/baeldung/cnn/ZalandoMNISTDataSet.kt b/machine-learning/src/main/kotlin/com/baeldung/cnn/ZalandoMNISTDataSet.kt new file mode 100644 index 0000000000..f29c8f2d0b --- /dev/null +++ b/machine-learning/src/main/kotlin/com/baeldung/cnn/ZalandoMNISTDataSet.kt @@ -0,0 +1,45 @@ +package com.baeldung.cnn + +import java.io.File +import java.nio.ByteBuffer +import java.util.* +import java.util.stream.Collectors +import kotlin.streams.asStream + +class ZalandoMNISTDataSet { + private val OFFSET_SIZE = 4 //in bytes + private val NUM_ITEMS_OFFSET = 4 + private val ITEMS_SIZE = 4 + private val ROWS = 28 + private val COLUMNS = 28 + private val IMAGE_OFFSET = 16 + private val IMAGE_SIZE = ROWS * COLUMNS + + fun load(): MutableList> { + val labelsFile = File("machine-learning/src/main/resources/train-labels-idx1-ubyte") + val imagesFile = File("machine-learning/src/main/resources/train-images-idx3-ubyte") + + val labelBytes = labelsFile.readBytes() + val imageBytes = imagesFile.readBytes() + + val byteLabelCount = Arrays.copyOfRange(labelBytes, NUM_ITEMS_OFFSET, NUM_ITEMS_OFFSET + ITEMS_SIZE) + val numberOfLabels = ByteBuffer.wrap(byteLabelCount).int + + val list = mutableListOf>() + + for (i in 0 until numberOfLabels) { + val label = labelBytes[OFFSET_SIZE + ITEMS_SIZE + i] + val startBoundary = i * IMAGE_SIZE + IMAGE_OFFSET + val endBoundary = i * IMAGE_SIZE + IMAGE_OFFSET + IMAGE_SIZE + val imageData = Arrays.copyOfRange(imageBytes, startBoundary, endBoundary) + + val imageDataList = imageData.iterator() + .asSequence() + .asStream().map { b -> b.toString() } + .collect(Collectors.toList()) + imageDataList.add(label.toString()) + list.add(imageDataList) + } + return list + } +} \ No newline at end of file diff --git a/machine-learning/src/main/kotlin/com/baeldung/simplelinearregression/SimpleLinearRegression.kt b/machine-learning/src/main/kotlin/com/baeldung/simplelinearregression/SimpleLinearRegression.kt new file mode 100644 index 0000000000..5ab520924e --- /dev/null +++ b/machine-learning/src/main/kotlin/com/baeldung/simplelinearregression/SimpleLinearRegression.kt @@ -0,0 +1,31 @@ +package com.baeldung.simplelinearregression + +import kotlin.math.pow + +class SimpleLinearRegression(private val xs: List, private val ys: List) { + var slope: Double = 0.0 + var yIntercept: Double = 0.0 + + init { + val covariance = calculateCovariance(xs, ys) + val variance = calculateVariance(xs) + slope = calculateSlope(covariance, variance) + yIntercept = calculateYIntercept(ys, slope, xs) + } + + fun predict(independentVariable: Double) = slope * independentVariable + yIntercept + + fun calculateRSquared(): Double { + val sst = ys.sumByDouble { y -> (y - ys.average()).pow(2) } + val ssr = xs.zip(ys) { x, y -> (y - predict(x.toDouble())).pow(2) }.sum() + return (sst - ssr) / sst + } + + private fun calculateYIntercept(ys: List, slope: Double, xs: List) = ys.average() - slope * xs.average() + + private fun calculateSlope(covariance: Double, variance: Double) = covariance / variance + + private fun calculateCovariance(xs: List, ys: List) = xs.zip(ys) { x, y -> (x - xs.average()) * (y - ys.average()) }.sum() + + private fun calculateVariance(xs: List) = xs.sumByDouble { x -> (x - xs.average()).pow(2) } +} \ No newline at end of file diff --git a/machine-learning/src/main/resources/train-labels-idx1-ubyte b/machine-learning/src/main/resources/train-labels-idx1-ubyte new file mode 100644 index 0000000000..30424ca2ea Binary files /dev/null and b/machine-learning/src/main/resources/train-labels-idx1-ubyte differ diff --git a/machine-learning/src/test/com/baeldung/simplelinearregression/SimpleLinearRegressionUnitTest.kt b/machine-learning/src/test/com/baeldung/simplelinearregression/SimpleLinearRegressionUnitTest.kt new file mode 100644 index 0000000000..a741639d50 --- /dev/null +++ b/machine-learning/src/test/com/baeldung/simplelinearregression/SimpleLinearRegressionUnitTest.kt @@ -0,0 +1,40 @@ +package com.baeldung.simplelinearregression + +import org.junit.Assert.assertEquals +import org.junit.jupiter.api.Test + +class SimpleLinearRegressionUnitTest { + @Test + fun givenAProperDataSetWhenFedToASimpleLinearRegressionModelThenItPredictsCorrectly() { + val xs = arrayListOf(1, 2, 3, 4, 5, 6, 7, 8, 9, 10) + val ys = arrayListOf(25, 35, 49, 60, 75, 90, 115, 130, 150, 200) + + val model = SimpleLinearRegression(xs, ys) + + val predictionOne = model.predict(2.5) + assertEquals(38.99, predictionOne, 0.01) + + val predictionTwo = model.predict(7.5) + assertEquals(128.84, predictionTwo, 0.01) + } + + @Test + fun givenAPredictableDataSetWhenCalculatingTheLossFunctionThenTheModelIsConsideredReliable() { + val xs = arrayListOf(1, 2, 3, 4, 5, 6, 7, 8, 9, 10) + val ys = arrayListOf(25, 35, 49, 60, 75, 90, 115, 130, 150, 200) + + val model = SimpleLinearRegression(xs, ys) + + assertEquals(0.95, model.calculateRSquared(), 0.01) + } + + @Test + fun givenAnUnpredictableDataSetWhenCalculatingTheLossFunctionThenTheModelIsConsideredUnreliable() { + val xs = arrayListOf(1, 2, 3, 4, 5, 6, 7, 8, 9, 10) + val ys = arrayListOf(200, 0, 200, 0, 0, 0, -115, 1000, 0, 1) + + val model = SimpleLinearRegression(xs, ys) + + assertEquals(0.01, model.calculateRSquared(), 0.01) + } +} \ No newline at end of file diff --git a/pom.xml b/pom.xml index 650827081e..bc4c38f386 100644 --- a/pom.xml +++ b/pom.xml @@ -625,6 +625,7 @@ spring-boot-nashorn java-blockchain + machine-learning wildfly