* BAEL-3204

* BAEL-3204
This commit is contained in:
YassinHajaj 2019-11-16 00:15:43 +01:00 committed by maibin
parent 86c72ffd7b
commit 0bf12a690c
7 changed files with 384 additions and 0 deletions

150
machine-learning/pom.xml Normal file
View File

@ -0,0 +1,150 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>machine-learning</artifactId>
<version>1.0-SNAPSHOT</version>
<name>Supervised Learning</name>
<packaging>jar</packaging>
<parent>
<groupId>com.baeldung</groupId>
<artifactId>parent-modules</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.7</maven.compiler.source>
<maven.compiler.target>1.7</maven.compiler.target>
<kotlin.version>1.3.50</kotlin.version>
<dl4j.version>0.9.1</dl4j.version>
</properties>
<dependencies>
<dependency>
<groupId>org.jetbrains.kotlin</groupId>
<artifactId>kotlin-stdlib-jdk8</artifactId>
<version>${kotlin.version}</version>
</dependency>
<dependency>
<groupId>org.nd4j</groupId>
<artifactId>nd4j-native-platform</artifactId>
<version>${dl4j.version}</version>
</dependency>
<dependency>
<groupId>org.deeplearning4j</groupId>
<artifactId>deeplearning4j-core</artifactId>
<version>${dl4j.version}</version>
</dependency>
<dependency>
<groupId>org.jetbrains.kotlin</groupId>
<artifactId>kotlin-stdlib-jdk8</artifactId>
<version>${kotlin.version}</version>
</dependency>
<dependency>
<groupId>org.jetbrains.kotlin</groupId>
<artifactId>kotlin-test</artifactId>
<version>${kotlin.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.jetbrains.kotlin</groupId>
<artifactId>kotlin-stdlib-jdk8</artifactId>
<version>${kotlin.version}</version>
</dependency>
</dependencies>
<build>
<sourceDirectory>src/main/kotlin</sourceDirectory>
<testSourceDirectory>src/test</testSourceDirectory>
<pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
<plugins>
<!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>3.1.0</version>
</plugin>
<!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.0</version>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.22.1</version>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-install-plugin</artifactId>
<version>2.5.2</version>
</plugin>
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.8.2</version>
</plugin>
<!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
<plugin>
<artifactId>maven-site-plugin</artifactId>
<version>3.7.1</version>
</plugin>
<plugin>
<artifactId>maven-project-info-reports-plugin</artifactId>
<version>3.0.0</version>
</plugin>
</plugins>
</pluginManagement>
<plugins>
<plugin>
<groupId>org.jetbrains.kotlin</groupId>
<artifactId>kotlin-maven-plugin</artifactId>
<version>${kotlin.version}</version>
<executions>
<execution>
<id>compile</id>
<phase>compile</phase>
<goals>
<goal>compile</goal>
</goals>
</execution>
<execution>
<id>test-compile</id>
<phase>test-compile</phase>
<goals>
<goal>test-compile</goal>
</goals>
</execution>
</executions>
<configuration>
<jvmTarget>1.8</jvmTarget>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<executions>
<execution>
<id>compile</id>
<phase>compile</phase>
<goals>
<goal>compile</goal>
</goals>
</execution>
<execution>
<id>testCompile</id>
<phase>test-compile</phase>
<goals>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,117 @@
package com.baeldung.cnn
import org.datavec.api.records.reader.impl.collection.ListStringRecordReader
import org.datavec.api.split.ListStringSplit
import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator
import org.deeplearning4j.eval.Evaluation
import org.deeplearning4j.nn.conf.NeuralNetConfiguration
import org.deeplearning4j.nn.conf.inputs.InputType
import org.deeplearning4j.nn.conf.layers.*
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork
import org.deeplearning4j.nn.weights.WeightInit
import org.nd4j.linalg.activations.Activation
import org.nd4j.linalg.learning.config.Adam
import org.nd4j.linalg.lossfunctions.LossFunctions
object ConvolutionalNeuralNetwork {
@JvmStatic
fun main(args: Array<String>) {
val dataset = ZalandoMNISTDataSet().load()
dataset.shuffle()
val trainDatasetIterator = createDatasetIterator(dataset.subList(0, 50_000))
val testDatasetIterator = createDatasetIterator(dataset.subList(50_000, 60_000))
val cnn = buildCNN()
learning(cnn, trainDatasetIterator)
testing(cnn, testDatasetIterator)
}
private fun createDatasetIterator(dataset: MutableList<List<String>>): RecordReaderDataSetIterator {
val listStringRecordReader = ListStringRecordReader()
listStringRecordReader.initialize(ListStringSplit(dataset))
return RecordReaderDataSetIterator(listStringRecordReader, 128, 28 * 28, 10)
}
private fun buildCNN(): MultiLayerNetwork {
val multiLayerNetwork = MultiLayerNetwork(NeuralNetConfiguration.Builder()
.seed(123)
.l2(0.0005)
.updater(Adam())
.weightInit(WeightInit.XAVIER)
.list()
.layer(0, buildInitialConvolutionLayer())
.layer(1, buildBatchNormalizationLayer())
.layer(2, buildPoolingLayer())
.layer(3, buildConvolutionLayer())
.layer(4, buildBatchNormalizationLayer())
.layer(5, buildPoolingLayer())
.layer(6, buildDenseLayer())
.layer(7, buildBatchNormalizationLayer())
.layer(8, buildDenseLayer())
.layer(9, buildOutputLayer())
.setInputType(InputType.convolutionalFlat(28, 28, 1))
.backprop(true)
.build())
multiLayerNetwork.init()
return multiLayerNetwork
}
private fun buildOutputLayer(): OutputLayer? {
return OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
.nOut(10)
.activation(Activation.SOFTMAX)
.build()
}
private fun buildDenseLayer(): DenseLayer? {
return DenseLayer.Builder().activation(Activation.RELU)
.nOut(500)
.dropOut(0.5)
.build()
}
private fun buildPoolingLayer(): SubsamplingLayer? {
return SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX)
.kernelSize(2, 2)
.stride(2, 2)
.build()
}
private fun buildBatchNormalizationLayer() = BatchNormalization.Builder().build()
private fun buildConvolutionLayer(): ConvolutionLayer? {
return ConvolutionLayer.Builder(5, 5)
.stride(1, 1) // nIn need not specified in later layers
.nOut(50)
.activation(Activation.IDENTITY)
.build()
}
private fun buildInitialConvolutionLayer(): ConvolutionLayer? {
return ConvolutionLayer.Builder(5, 5)
.nIn(1)
.stride(1, 1)
.nOut(20)
.activation(Activation.IDENTITY)
.build()
}
private fun learning(cnn: MultiLayerNetwork, trainSet: RecordReaderDataSetIterator) {
for (i in 0 until 10) {
cnn.fit(trainSet)
}
}
private fun testing(cnn: MultiLayerNetwork, testSet: RecordReaderDataSetIterator) {
val evaluation = Evaluation(10)
while (testSet.hasNext()) {
val next = testSet.next()
val output = cnn.output(next.features)
evaluation.eval(next.labels, output)
}
println(evaluation.stats())
println(evaluation.confusionToString())
}
}

View File

@ -0,0 +1,45 @@
package com.baeldung.cnn
import java.io.File
import java.nio.ByteBuffer
import java.util.*
import java.util.stream.Collectors
import kotlin.streams.asStream
class ZalandoMNISTDataSet {
private val OFFSET_SIZE = 4 //in bytes
private val NUM_ITEMS_OFFSET = 4
private val ITEMS_SIZE = 4
private val ROWS = 28
private val COLUMNS = 28
private val IMAGE_OFFSET = 16
private val IMAGE_SIZE = ROWS * COLUMNS
fun load(): MutableList<List<String>> {
val labelsFile = File("machine-learning/src/main/resources/train-labels-idx1-ubyte")
val imagesFile = File("machine-learning/src/main/resources/train-images-idx3-ubyte")
val labelBytes = labelsFile.readBytes()
val imageBytes = imagesFile.readBytes()
val byteLabelCount = Arrays.copyOfRange(labelBytes, NUM_ITEMS_OFFSET, NUM_ITEMS_OFFSET + ITEMS_SIZE)
val numberOfLabels = ByteBuffer.wrap(byteLabelCount).int
val list = mutableListOf<List<String>>()
for (i in 0 until numberOfLabels) {
val label = labelBytes[OFFSET_SIZE + ITEMS_SIZE + i]
val startBoundary = i * IMAGE_SIZE + IMAGE_OFFSET
val endBoundary = i * IMAGE_SIZE + IMAGE_OFFSET + IMAGE_SIZE
val imageData = Arrays.copyOfRange(imageBytes, startBoundary, endBoundary)
val imageDataList = imageData.iterator()
.asSequence()
.asStream().map { b -> b.toString() }
.collect(Collectors.toList())
imageDataList.add(label.toString())
list.add(imageDataList)
}
return list
}
}

View File

@ -0,0 +1,31 @@
package com.baeldung.simplelinearregression
import kotlin.math.pow
class SimpleLinearRegression(private val xs: List<Int>, private val ys: List<Int>) {
var slope: Double = 0.0
var yIntercept: Double = 0.0
init {
val covariance = calculateCovariance(xs, ys)
val variance = calculateVariance(xs)
slope = calculateSlope(covariance, variance)
yIntercept = calculateYIntercept(ys, slope, xs)
}
fun predict(independentVariable: Double) = slope * independentVariable + yIntercept
fun calculateRSquared(): Double {
val sst = ys.sumByDouble { y -> (y - ys.average()).pow(2) }
val ssr = xs.zip(ys) { x, y -> (y - predict(x.toDouble())).pow(2) }.sum()
return (sst - ssr) / sst
}
private fun calculateYIntercept(ys: List<Int>, slope: Double, xs: List<Int>) = ys.average() - slope * xs.average()
private fun calculateSlope(covariance: Double, variance: Double) = covariance / variance
private fun calculateCovariance(xs: List<Int>, ys: List<Int>) = xs.zip(ys) { x, y -> (x - xs.average()) * (y - ys.average()) }.sum()
private fun calculateVariance(xs: List<Int>) = xs.sumByDouble { x -> (x - xs.average()).pow(2) }
}

View File

@ -0,0 +1,40 @@
package com.baeldung.simplelinearregression
import org.junit.Assert.assertEquals
import org.junit.jupiter.api.Test
class SimpleLinearRegressionUnitTest {
@Test
fun givenAProperDataSetWhenFedToASimpleLinearRegressionModelThenItPredictsCorrectly() {
val xs = arrayListOf(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
val ys = arrayListOf(25, 35, 49, 60, 75, 90, 115, 130, 150, 200)
val model = SimpleLinearRegression(xs, ys)
val predictionOne = model.predict(2.5)
assertEquals(38.99, predictionOne, 0.01)
val predictionTwo = model.predict(7.5)
assertEquals(128.84, predictionTwo, 0.01)
}
@Test
fun givenAPredictableDataSetWhenCalculatingTheLossFunctionThenTheModelIsConsideredReliable() {
val xs = arrayListOf(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
val ys = arrayListOf(25, 35, 49, 60, 75, 90, 115, 130, 150, 200)
val model = SimpleLinearRegression(xs, ys)
assertEquals(0.95, model.calculateRSquared(), 0.01)
}
@Test
fun givenAnUnpredictableDataSetWhenCalculatingTheLossFunctionThenTheModelIsConsideredUnreliable() {
val xs = arrayListOf(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
val ys = arrayListOf(200, 0, 200, 0, 0, 0, -115, 1000, 0, 1)
val model = SimpleLinearRegression(xs, ys)
assertEquals(0.01, model.calculateRSquared(), 0.01)
}
}

View File

@ -625,6 +625,7 @@
<module>spring-boot-nashorn</module>
<module>java-blockchain</module>
<module>machine-learning</module>
<module>wildfly</module>
</modules>