parent
86c72ffd7b
commit
0bf12a690c
|
@ -0,0 +1,150 @@
|
|||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<artifactId>machine-learning</artifactId>
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
<name>Supervised Learning</name>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<parent>
|
||||
<groupId>com.baeldung</groupId>
|
||||
<artifactId>parent-modules</artifactId>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<maven.compiler.source>1.7</maven.compiler.source>
|
||||
<maven.compiler.target>1.7</maven.compiler.target>
|
||||
<kotlin.version>1.3.50</kotlin.version>
|
||||
<dl4j.version>0.9.1</dl4j.version>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.jetbrains.kotlin</groupId>
|
||||
<artifactId>kotlin-stdlib-jdk8</artifactId>
|
||||
<version>${kotlin.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.nd4j</groupId>
|
||||
<artifactId>nd4j-native-platform</artifactId>
|
||||
<version>${dl4j.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.deeplearning4j</groupId>
|
||||
<artifactId>deeplearning4j-core</artifactId>
|
||||
<version>${dl4j.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.jetbrains.kotlin</groupId>
|
||||
<artifactId>kotlin-stdlib-jdk8</artifactId>
|
||||
<version>${kotlin.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.jetbrains.kotlin</groupId>
|
||||
<artifactId>kotlin-test</artifactId>
|
||||
<version>${kotlin.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.jetbrains.kotlin</groupId>
|
||||
<artifactId>kotlin-stdlib-jdk8</artifactId>
|
||||
<version>${kotlin.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<build>
|
||||
<sourceDirectory>src/main/kotlin</sourceDirectory>
|
||||
<testSourceDirectory>src/test</testSourceDirectory>
|
||||
<pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
|
||||
<plugins>
|
||||
<!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
|
||||
<plugin>
|
||||
<artifactId>maven-clean-plugin</artifactId>
|
||||
<version>3.1.0</version>
|
||||
</plugin>
|
||||
<!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
|
||||
<plugin>
|
||||
<artifactId>maven-resources-plugin</artifactId>
|
||||
<version>3.0.2</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.8.0</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-surefire-plugin</artifactId>
|
||||
<version>2.22.1</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-jar-plugin</artifactId>
|
||||
<version>3.0.2</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-install-plugin</artifactId>
|
||||
<version>2.5.2</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-deploy-plugin</artifactId>
|
||||
<version>2.8.2</version>
|
||||
</plugin>
|
||||
<!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
|
||||
<plugin>
|
||||
<artifactId>maven-site-plugin</artifactId>
|
||||
<version>3.7.1</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-project-info-reports-plugin</artifactId>
|
||||
<version>3.0.0</version>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</pluginManagement>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.jetbrains.kotlin</groupId>
|
||||
<artifactId>kotlin-maven-plugin</artifactId>
|
||||
<version>${kotlin.version}</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>compile</id>
|
||||
<phase>compile</phase>
|
||||
<goals>
|
||||
<goal>compile</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
<execution>
|
||||
<id>test-compile</id>
|
||||
<phase>test-compile</phase>
|
||||
<goals>
|
||||
<goal>test-compile</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
<configuration>
|
||||
<jvmTarget>1.8</jvmTarget>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>compile</id>
|
||||
<phase>compile</phase>
|
||||
<goals>
|
||||
<goal>compile</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
<execution>
|
||||
<id>testCompile</id>
|
||||
<phase>test-compile</phase>
|
||||
<goals>
|
||||
<goal>testCompile</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
|
@ -0,0 +1,117 @@
|
|||
package com.baeldung.cnn
|
||||
|
||||
import org.datavec.api.records.reader.impl.collection.ListStringRecordReader
|
||||
import org.datavec.api.split.ListStringSplit
|
||||
import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator
|
||||
import org.deeplearning4j.eval.Evaluation
|
||||
import org.deeplearning4j.nn.conf.NeuralNetConfiguration
|
||||
import org.deeplearning4j.nn.conf.inputs.InputType
|
||||
import org.deeplearning4j.nn.conf.layers.*
|
||||
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork
|
||||
import org.deeplearning4j.nn.weights.WeightInit
|
||||
import org.nd4j.linalg.activations.Activation
|
||||
import org.nd4j.linalg.learning.config.Adam
|
||||
import org.nd4j.linalg.lossfunctions.LossFunctions
|
||||
|
||||
object ConvolutionalNeuralNetwork {
|
||||
|
||||
@JvmStatic
|
||||
fun main(args: Array<String>) {
|
||||
val dataset = ZalandoMNISTDataSet().load()
|
||||
dataset.shuffle()
|
||||
val trainDatasetIterator = createDatasetIterator(dataset.subList(0, 50_000))
|
||||
val testDatasetIterator = createDatasetIterator(dataset.subList(50_000, 60_000))
|
||||
|
||||
val cnn = buildCNN()
|
||||
learning(cnn, trainDatasetIterator)
|
||||
testing(cnn, testDatasetIterator)
|
||||
}
|
||||
|
||||
private fun createDatasetIterator(dataset: MutableList<List<String>>): RecordReaderDataSetIterator {
|
||||
val listStringRecordReader = ListStringRecordReader()
|
||||
listStringRecordReader.initialize(ListStringSplit(dataset))
|
||||
return RecordReaderDataSetIterator(listStringRecordReader, 128, 28 * 28, 10)
|
||||
}
|
||||
|
||||
private fun buildCNN(): MultiLayerNetwork {
|
||||
val multiLayerNetwork = MultiLayerNetwork(NeuralNetConfiguration.Builder()
|
||||
.seed(123)
|
||||
.l2(0.0005)
|
||||
.updater(Adam())
|
||||
.weightInit(WeightInit.XAVIER)
|
||||
.list()
|
||||
.layer(0, buildInitialConvolutionLayer())
|
||||
.layer(1, buildBatchNormalizationLayer())
|
||||
.layer(2, buildPoolingLayer())
|
||||
.layer(3, buildConvolutionLayer())
|
||||
.layer(4, buildBatchNormalizationLayer())
|
||||
.layer(5, buildPoolingLayer())
|
||||
.layer(6, buildDenseLayer())
|
||||
.layer(7, buildBatchNormalizationLayer())
|
||||
.layer(8, buildDenseLayer())
|
||||
.layer(9, buildOutputLayer())
|
||||
.setInputType(InputType.convolutionalFlat(28, 28, 1))
|
||||
.backprop(true)
|
||||
.build())
|
||||
multiLayerNetwork.init()
|
||||
return multiLayerNetwork
|
||||
}
|
||||
|
||||
private fun buildOutputLayer(): OutputLayer? {
|
||||
return OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
|
||||
.nOut(10)
|
||||
.activation(Activation.SOFTMAX)
|
||||
.build()
|
||||
}
|
||||
|
||||
private fun buildDenseLayer(): DenseLayer? {
|
||||
return DenseLayer.Builder().activation(Activation.RELU)
|
||||
.nOut(500)
|
||||
.dropOut(0.5)
|
||||
.build()
|
||||
}
|
||||
|
||||
private fun buildPoolingLayer(): SubsamplingLayer? {
|
||||
return SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX)
|
||||
.kernelSize(2, 2)
|
||||
.stride(2, 2)
|
||||
.build()
|
||||
}
|
||||
|
||||
private fun buildBatchNormalizationLayer() = BatchNormalization.Builder().build()
|
||||
|
||||
private fun buildConvolutionLayer(): ConvolutionLayer? {
|
||||
return ConvolutionLayer.Builder(5, 5)
|
||||
.stride(1, 1) // nIn need not specified in later layers
|
||||
.nOut(50)
|
||||
.activation(Activation.IDENTITY)
|
||||
.build()
|
||||
}
|
||||
|
||||
private fun buildInitialConvolutionLayer(): ConvolutionLayer? {
|
||||
return ConvolutionLayer.Builder(5, 5)
|
||||
.nIn(1)
|
||||
.stride(1, 1)
|
||||
.nOut(20)
|
||||
.activation(Activation.IDENTITY)
|
||||
.build()
|
||||
}
|
||||
|
||||
private fun learning(cnn: MultiLayerNetwork, trainSet: RecordReaderDataSetIterator) {
|
||||
for (i in 0 until 10) {
|
||||
cnn.fit(trainSet)
|
||||
}
|
||||
}
|
||||
|
||||
private fun testing(cnn: MultiLayerNetwork, testSet: RecordReaderDataSetIterator) {
|
||||
val evaluation = Evaluation(10)
|
||||
while (testSet.hasNext()) {
|
||||
val next = testSet.next()
|
||||
val output = cnn.output(next.features)
|
||||
evaluation.eval(next.labels, output)
|
||||
}
|
||||
|
||||
println(evaluation.stats())
|
||||
println(evaluation.confusionToString())
|
||||
}
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
package com.baeldung.cnn
|
||||
|
||||
import java.io.File
|
||||
import java.nio.ByteBuffer
|
||||
import java.util.*
|
||||
import java.util.stream.Collectors
|
||||
import kotlin.streams.asStream
|
||||
|
||||
class ZalandoMNISTDataSet {
|
||||
private val OFFSET_SIZE = 4 //in bytes
|
||||
private val NUM_ITEMS_OFFSET = 4
|
||||
private val ITEMS_SIZE = 4
|
||||
private val ROWS = 28
|
||||
private val COLUMNS = 28
|
||||
private val IMAGE_OFFSET = 16
|
||||
private val IMAGE_SIZE = ROWS * COLUMNS
|
||||
|
||||
fun load(): MutableList<List<String>> {
|
||||
val labelsFile = File("machine-learning/src/main/resources/train-labels-idx1-ubyte")
|
||||
val imagesFile = File("machine-learning/src/main/resources/train-images-idx3-ubyte")
|
||||
|
||||
val labelBytes = labelsFile.readBytes()
|
||||
val imageBytes = imagesFile.readBytes()
|
||||
|
||||
val byteLabelCount = Arrays.copyOfRange(labelBytes, NUM_ITEMS_OFFSET, NUM_ITEMS_OFFSET + ITEMS_SIZE)
|
||||
val numberOfLabels = ByteBuffer.wrap(byteLabelCount).int
|
||||
|
||||
val list = mutableListOf<List<String>>()
|
||||
|
||||
for (i in 0 until numberOfLabels) {
|
||||
val label = labelBytes[OFFSET_SIZE + ITEMS_SIZE + i]
|
||||
val startBoundary = i * IMAGE_SIZE + IMAGE_OFFSET
|
||||
val endBoundary = i * IMAGE_SIZE + IMAGE_OFFSET + IMAGE_SIZE
|
||||
val imageData = Arrays.copyOfRange(imageBytes, startBoundary, endBoundary)
|
||||
|
||||
val imageDataList = imageData.iterator()
|
||||
.asSequence()
|
||||
.asStream().map { b -> b.toString() }
|
||||
.collect(Collectors.toList())
|
||||
imageDataList.add(label.toString())
|
||||
list.add(imageDataList)
|
||||
}
|
||||
return list
|
||||
}
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
package com.baeldung.simplelinearregression
|
||||
|
||||
import kotlin.math.pow
|
||||
|
||||
class SimpleLinearRegression(private val xs: List<Int>, private val ys: List<Int>) {
|
||||
var slope: Double = 0.0
|
||||
var yIntercept: Double = 0.0
|
||||
|
||||
init {
|
||||
val covariance = calculateCovariance(xs, ys)
|
||||
val variance = calculateVariance(xs)
|
||||
slope = calculateSlope(covariance, variance)
|
||||
yIntercept = calculateYIntercept(ys, slope, xs)
|
||||
}
|
||||
|
||||
fun predict(independentVariable: Double) = slope * independentVariable + yIntercept
|
||||
|
||||
fun calculateRSquared(): Double {
|
||||
val sst = ys.sumByDouble { y -> (y - ys.average()).pow(2) }
|
||||
val ssr = xs.zip(ys) { x, y -> (y - predict(x.toDouble())).pow(2) }.sum()
|
||||
return (sst - ssr) / sst
|
||||
}
|
||||
|
||||
private fun calculateYIntercept(ys: List<Int>, slope: Double, xs: List<Int>) = ys.average() - slope * xs.average()
|
||||
|
||||
private fun calculateSlope(covariance: Double, variance: Double) = covariance / variance
|
||||
|
||||
private fun calculateCovariance(xs: List<Int>, ys: List<Int>) = xs.zip(ys) { x, y -> (x - xs.average()) * (y - ys.average()) }.sum()
|
||||
|
||||
private fun calculateVariance(xs: List<Int>) = xs.sumByDouble { x -> (x - xs.average()).pow(2) }
|
||||
}
|
Binary file not shown.
|
@ -0,0 +1,40 @@
|
|||
package com.baeldung.simplelinearregression
|
||||
|
||||
import org.junit.Assert.assertEquals
|
||||
import org.junit.jupiter.api.Test
|
||||
|
||||
class SimpleLinearRegressionUnitTest {
|
||||
@Test
|
||||
fun givenAProperDataSetWhenFedToASimpleLinearRegressionModelThenItPredictsCorrectly() {
|
||||
val xs = arrayListOf(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
|
||||
val ys = arrayListOf(25, 35, 49, 60, 75, 90, 115, 130, 150, 200)
|
||||
|
||||
val model = SimpleLinearRegression(xs, ys)
|
||||
|
||||
val predictionOne = model.predict(2.5)
|
||||
assertEquals(38.99, predictionOne, 0.01)
|
||||
|
||||
val predictionTwo = model.predict(7.5)
|
||||
assertEquals(128.84, predictionTwo, 0.01)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun givenAPredictableDataSetWhenCalculatingTheLossFunctionThenTheModelIsConsideredReliable() {
|
||||
val xs = arrayListOf(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
|
||||
val ys = arrayListOf(25, 35, 49, 60, 75, 90, 115, 130, 150, 200)
|
||||
|
||||
val model = SimpleLinearRegression(xs, ys)
|
||||
|
||||
assertEquals(0.95, model.calculateRSquared(), 0.01)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun givenAnUnpredictableDataSetWhenCalculatingTheLossFunctionThenTheModelIsConsideredUnreliable() {
|
||||
val xs = arrayListOf(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
|
||||
val ys = arrayListOf(200, 0, 200, 0, 0, 0, -115, 1000, 0, 1)
|
||||
|
||||
val model = SimpleLinearRegression(xs, ys)
|
||||
|
||||
assertEquals(0.01, model.calculateRSquared(), 0.01)
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue