MATH-1548: Move standard quality measures of a SOM into class "NeuronSquareMesh2D".
All these indicators are usually computed in order to evaluate the quality of a SOM: Computing them separately is inefficient when the number of samples becomes large.
This commit is contained in:
parent
9cbf1d1844
commit
28e5b802fe
|
@ -20,6 +20,7 @@ package org.apache.commons.math4.ml.neuralnet.twod;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
import java.util.Collection;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.io.ObjectInputStream;
|
import java.io.ObjectInputStream;
|
||||||
|
|
||||||
|
@ -30,6 +31,10 @@ import org.apache.commons.math4.ml.neuralnet.FeatureInitializer;
|
||||||
import org.apache.commons.math4.ml.neuralnet.Network;
|
import org.apache.commons.math4.ml.neuralnet.Network;
|
||||||
import org.apache.commons.math4.ml.neuralnet.Neuron;
|
import org.apache.commons.math4.ml.neuralnet.Neuron;
|
||||||
import org.apache.commons.math4.ml.neuralnet.SquareNeighbourhood;
|
import org.apache.commons.math4.ml.neuralnet.SquareNeighbourhood;
|
||||||
|
import org.apache.commons.math4.ml.neuralnet.MapRanking;
|
||||||
|
import org.apache.commons.math4.ml.neuralnet.twod.util.LocationFinder;
|
||||||
|
import org.apache.commons.math4.ml.distance.DistanceMeasure;
|
||||||
|
import org.apache.commons.math4.ml.distance.EuclideanDistance;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Neural network with the topology of a two-dimensional surface.
|
* Neural network with the topology of a two-dimensional surface.
|
||||||
|
@ -339,6 +344,17 @@ public class NeuronSquareMesh2D
|
||||||
return location == null ? null : getNeuron(location[0], location[1]);
|
return location == null ? null : getNeuron(location[0], location[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Computes various {@link DataVisualization indicators} of the quality
|
||||||
|
* of the representation of the given {@code data} by this map.
|
||||||
|
*
|
||||||
|
* @param data Features.
|
||||||
|
* @return a new instance holding quality indicators.
|
||||||
|
*/
|
||||||
|
public DataVisualization computeQualityIndicators(Iterable<double[]> data) {
|
||||||
|
return DataVisualization.from(copy(), data);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Computes the location of a neighbouring neuron.
|
* Computes the location of a neighbouring neuron.
|
||||||
* Returns {@code null} if the resulting location is not part
|
* Returns {@code null} if the resulting location is not part
|
||||||
|
@ -625,4 +641,227 @@ public class NeuronSquareMesh2D
|
||||||
featuresList);
|
featuresList);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Miscellaneous indicators of the map quality:
|
||||||
|
* <ul>
|
||||||
|
* <li>Hit histogram</li>
|
||||||
|
* <li>Quantization error</li>
|
||||||
|
* <li>Topographic error</li>
|
||||||
|
* <li>Unified distance matrix</li>
|
||||||
|
* </ul>
|
||||||
|
*/
|
||||||
|
public static class DataVisualization {
|
||||||
|
/** Distance function. */
|
||||||
|
private static final DistanceMeasure DISTANCE = new EuclideanDistance();
|
||||||
|
/** Total number of samples. */
|
||||||
|
private final int numberOfSamples;
|
||||||
|
/** Hit histogram. */
|
||||||
|
private final double[][] hitHistogram;
|
||||||
|
/** Quantization error. */
|
||||||
|
private final double[][] quantizationError;
|
||||||
|
/** Mean quantization error. */
|
||||||
|
private final double meanQuantizationError;
|
||||||
|
/** Topographic error. */
|
||||||
|
private final double[][] topographicError;
|
||||||
|
/** Mean topographic error. */
|
||||||
|
private final double meanTopographicError;
|
||||||
|
/** U-matrix. */
|
||||||
|
private final double[][] uMatrix;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param numberOfSamples Number of samples.
|
||||||
|
* @param hitHistogram Hit histogram.
|
||||||
|
* @param quantizationError Quantization error.
|
||||||
|
* @param topographicError Topographic error.
|
||||||
|
* @param uMatrix U-matrix.
|
||||||
|
*/
|
||||||
|
private DataVisualization(int numberOfSamples,
|
||||||
|
double[][] hitHistogram,
|
||||||
|
double[][] quantizationError,
|
||||||
|
double[][] topographicError,
|
||||||
|
double[][] uMatrix) {
|
||||||
|
this.numberOfSamples = numberOfSamples;
|
||||||
|
this.hitHistogram = hitHistogram;
|
||||||
|
this.quantizationError = quantizationError;
|
||||||
|
meanQuantizationError = hitWeightedMean(quantizationError, hitHistogram);
|
||||||
|
this.topographicError = topographicError;
|
||||||
|
meanTopographicError = hitWeightedMean(topographicError, hitHistogram);
|
||||||
|
this.uMatrix = uMatrix;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param map Map
|
||||||
|
* @param data Data.
|
||||||
|
* @return the metrics.
|
||||||
|
*/
|
||||||
|
static DataVisualization from(NeuronSquareMesh2D map,
|
||||||
|
Iterable<double[]> data) {
|
||||||
|
final LocationFinder finder = new LocationFinder(map);
|
||||||
|
final MapRanking rank = new MapRanking(map, DISTANCE);
|
||||||
|
final Network net = map.getNetwork();
|
||||||
|
final int nR = map.getNumberOfRows();
|
||||||
|
final int nC = map.getNumberOfColumns();
|
||||||
|
|
||||||
|
// Hit bins.
|
||||||
|
final int[][] hitCounter = new int[nR][nC];
|
||||||
|
// Hit bins.
|
||||||
|
final double[][] hitHistogram = new double[nR][nC];
|
||||||
|
// Quantization error bins.
|
||||||
|
final double[][] quantizationError = new double[nR][nC];
|
||||||
|
// Topographic error bins.
|
||||||
|
final double[][] topographicError = new double[nR][nC];
|
||||||
|
// U-matrix.
|
||||||
|
final double[][] uMatrix = new double[nR][nC];
|
||||||
|
|
||||||
|
int numSamples = 0;
|
||||||
|
for (double[] sample : data) {
|
||||||
|
++numSamples;
|
||||||
|
|
||||||
|
final List<Neuron> winners = rank.rank(sample, 2);
|
||||||
|
final Neuron best = winners.get(0);
|
||||||
|
final Neuron secondBest = winners.get(1);
|
||||||
|
|
||||||
|
final LocationFinder.Location locBest = finder.getLocation(best);
|
||||||
|
final int rowBest = locBest.getRow();
|
||||||
|
final int colBest = locBest.getColumn();
|
||||||
|
// Increment hit counter.
|
||||||
|
hitCounter[rowBest][colBest] += 1;
|
||||||
|
|
||||||
|
// Aggregate quantization error.
|
||||||
|
quantizationError[rowBest][colBest] += DISTANCE.compute(sample, best.getFeatures());
|
||||||
|
|
||||||
|
// Aggregate topographic error.
|
||||||
|
if (!net.getNeighbours(best).contains(secondBest)) {
|
||||||
|
// Increment count if first and second best matching units
|
||||||
|
// are not neighbours.
|
||||||
|
topographicError[rowBest][colBest] += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int r = 0; r < nR; r++) {
|
||||||
|
for (int c = 0; c < nC; c++) {
|
||||||
|
final Neuron neuron = map.getNeuron(r, c);
|
||||||
|
final Collection<Neuron> neighbours = net.getNeighbours(neuron);
|
||||||
|
final double[] features = neuron.getFeatures();
|
||||||
|
double uDistance = 0;
|
||||||
|
int neighbourCount = 0;
|
||||||
|
for (Neuron n : neighbours) {
|
||||||
|
++neighbourCount;
|
||||||
|
uDistance += DISTANCE.compute(features, n.getFeatures());
|
||||||
|
}
|
||||||
|
|
||||||
|
final int hitCount = hitCounter[r][c];
|
||||||
|
if (hitCount != 0) {
|
||||||
|
hitHistogram[r][c] = hitCount / (double) numSamples;
|
||||||
|
quantizationError[r][c] /= hitCount;
|
||||||
|
topographicError[r][c] /= hitCount;
|
||||||
|
uMatrix[r][c] = uDistance / neighbourCount;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return new DataVisualization(numSamples,
|
||||||
|
hitHistogram,
|
||||||
|
quantizationError,
|
||||||
|
topographicError,
|
||||||
|
uMatrix);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the total number of samples.
|
||||||
|
*/
|
||||||
|
public final int getNumberOfSamples() {
|
||||||
|
return numberOfSamples;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the quantization error.
|
||||||
|
* Each bin will contain the average of the distances between samples
|
||||||
|
* mapped to the corresponding unit and the weight vector of that unit.
|
||||||
|
* @see #getMeanQuantizationError()
|
||||||
|
*/
|
||||||
|
public double[][] getQuantizationError() {
|
||||||
|
return copy(quantizationError);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the topographic error.
|
||||||
|
* Each bin will contain the number of data for which the first and
|
||||||
|
* second best matching units are not adjacent in the map.
|
||||||
|
* @see #getMeanTopographicError()
|
||||||
|
*/
|
||||||
|
public double[][] getTopographicError() {
|
||||||
|
return copy(topographicError);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the hits histogram (normalized).
|
||||||
|
* Each bin will contain the number of data for which the corresponding
|
||||||
|
* neuron is the best matching unit.
|
||||||
|
*/
|
||||||
|
public double[][] getNormalizedHits() {
|
||||||
|
return copy(hitHistogram);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the U-matrix.
|
||||||
|
* Each bin will contain the average distance between a unit and all its
|
||||||
|
* neighbours will be computed (and stored in the pixel corresponding to
|
||||||
|
* that unit of the 2D-map). The number of neighbours taken into account
|
||||||
|
* depends on the network {@link org.apache.commons.math4.ml.neuralnet.SquareNeighbourhood
|
||||||
|
* neighbourhood type}.
|
||||||
|
*/
|
||||||
|
public double[][] getUMatrix() {
|
||||||
|
return copy(uMatrix);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the mean (hit-weighted) quantization error.
|
||||||
|
* @see #getQuantizationError()
|
||||||
|
*/
|
||||||
|
public double getMeanQuantizationError() {
|
||||||
|
return meanQuantizationError;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the mean (hit-weighted) topographic error.
|
||||||
|
* @see #getTopographicError()
|
||||||
|
*/
|
||||||
|
public double getMeanTopographicError() {
|
||||||
|
return meanTopographicError;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param orig Source.
|
||||||
|
* @return a deep copy of the original array.
|
||||||
|
*/
|
||||||
|
private static double[][] copy(double[][] orig) {
|
||||||
|
final double[][] copy = new double[orig.length][];
|
||||||
|
for (int i = 0; i < orig.length; i++) {
|
||||||
|
copy[i] = orig[i].clone();
|
||||||
|
}
|
||||||
|
|
||||||
|
return copy;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param metrics Metrics.
|
||||||
|
* @param normalizedHits Hits histogram (normalized).
|
||||||
|
* @return the hit-weighted mean of the given {@code metrics}.
|
||||||
|
*/
|
||||||
|
private double hitWeightedMean(double[][] metrics,
|
||||||
|
double[][] normalizedHits) {
|
||||||
|
double mean = 0;
|
||||||
|
final int rows = metrics.length;
|
||||||
|
final int cols = metrics[0].length;
|
||||||
|
for (int i = 0; i < rows; i++) {
|
||||||
|
for (int j = 0; j < cols; j++) {
|
||||||
|
mean += normalizedHits[i][j] * metrics[i][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return mean;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,85 +0,0 @@
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.commons.math4.ml.neuralnet.twod.util;
|
|
||||||
|
|
||||||
import org.apache.commons.math4.ml.neuralnet.MapRanking;
|
|
||||||
import org.apache.commons.math4.ml.neuralnet.Neuron;
|
|
||||||
import org.apache.commons.math4.ml.neuralnet.twod.NeuronSquareMesh2D;
|
|
||||||
import org.apache.commons.math4.ml.distance.DistanceMeasure;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Computes the hit histogram.
|
|
||||||
* Each bin will contain the number of data for which the corresponding
|
|
||||||
* neuron is the best matching unit.
|
|
||||||
* @since 3.6
|
|
||||||
*/
|
|
||||||
public class HitHistogram implements MapDataVisualization {
|
|
||||||
/** Distance. */
|
|
||||||
private final DistanceMeasure distance;
|
|
||||||
/** Whether to compute relative bin counts. */
|
|
||||||
private final boolean normalizeCount;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param normalizeCount Whether to compute relative bin counts.
|
|
||||||
* If {@code true}, the data count in each bin will be divided by the total
|
|
||||||
* number of samples.
|
|
||||||
* @param distance Distance.
|
|
||||||
*/
|
|
||||||
public HitHistogram(boolean normalizeCount,
|
|
||||||
DistanceMeasure distance) {
|
|
||||||
this.normalizeCount = normalizeCount;
|
|
||||||
this.distance = distance;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
@Override
|
|
||||||
public double[][] computeImage(NeuronSquareMesh2D map,
|
|
||||||
Iterable<double[]> data) {
|
|
||||||
final int nR = map.getNumberOfRows();
|
|
||||||
final int nC = map.getNumberOfColumns();
|
|
||||||
|
|
||||||
final LocationFinder finder = new LocationFinder(map);
|
|
||||||
final MapRanking rank = new MapRanking(map.getNetwork(), distance);
|
|
||||||
|
|
||||||
// Totla number of samples.
|
|
||||||
int numSamples = 0;
|
|
||||||
// Hit bins.
|
|
||||||
final double[][] hit = new double[nR][nC];
|
|
||||||
|
|
||||||
for (double[] sample : data) {
|
|
||||||
final Neuron best = rank.rank(sample, 1).get(0);
|
|
||||||
|
|
||||||
final LocationFinder.Location loc = finder.getLocation(best);
|
|
||||||
final int row = loc.getRow();
|
|
||||||
final int col = loc.getColumn();
|
|
||||||
hit[row][col] += 1;
|
|
||||||
|
|
||||||
++numSamples;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (normalizeCount) {
|
|
||||||
for (int r = 0; r < nR; r++) {
|
|
||||||
for (int c = 0; c < nC; c++) {
|
|
||||||
hit[r][c] /= numSamples;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return hit;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,78 +0,0 @@
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.commons.math4.ml.neuralnet.twod.util;
|
|
||||||
|
|
||||||
import org.apache.commons.math4.ml.neuralnet.MapRanking;
|
|
||||||
import org.apache.commons.math4.ml.neuralnet.Neuron;
|
|
||||||
import org.apache.commons.math4.ml.neuralnet.twod.NeuronSquareMesh2D;
|
|
||||||
import org.apache.commons.math4.ml.distance.DistanceMeasure;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Computes the quantization error histogram.
|
|
||||||
* Each bin will contain the average of the distances between samples
|
|
||||||
* mapped to the corresponding unit and the weight vector of that unit.
|
|
||||||
* @since 3.6
|
|
||||||
*/
|
|
||||||
public class QuantizationError implements MapDataVisualization {
|
|
||||||
/** Distance. */
|
|
||||||
private final DistanceMeasure distance;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param distance Distance.
|
|
||||||
*/
|
|
||||||
public QuantizationError(DistanceMeasure distance) {
|
|
||||||
this.distance = distance;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
@Override
|
|
||||||
public double[][] computeImage(NeuronSquareMesh2D map,
|
|
||||||
Iterable<double[]> data) {
|
|
||||||
final int nR = map.getNumberOfRows();
|
|
||||||
final int nC = map.getNumberOfColumns();
|
|
||||||
|
|
||||||
final LocationFinder finder = new LocationFinder(map);
|
|
||||||
final MapRanking rank = new MapRanking(map.getNetwork(), distance);
|
|
||||||
|
|
||||||
// Hit bins.
|
|
||||||
final int[][] hit = new int[nR][nC];
|
|
||||||
// Error bins.
|
|
||||||
final double[][] error = new double[nR][nC];
|
|
||||||
|
|
||||||
for (double[] sample : data) {
|
|
||||||
final Neuron best = rank.rank(sample, 1).get(0);
|
|
||||||
|
|
||||||
final LocationFinder.Location loc = finder.getLocation(best);
|
|
||||||
final int row = loc.getRow();
|
|
||||||
final int col = loc.getColumn();
|
|
||||||
hit[row][col] += 1;
|
|
||||||
error[row][col] += distance.compute(sample, best.getFeatures());
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int r = 0; r < nR; r++) {
|
|
||||||
for (int c = 0; c < nC; c++) {
|
|
||||||
final int count = hit[r][c];
|
|
||||||
if (count != 0) {
|
|
||||||
error[r][c] /= count;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return error;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,93 +0,0 @@
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.commons.math4.ml.neuralnet.twod.util;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import org.apache.commons.math4.ml.neuralnet.MapRanking;
|
|
||||||
import org.apache.commons.math4.ml.neuralnet.Neuron;
|
|
||||||
import org.apache.commons.math4.ml.neuralnet.Network;
|
|
||||||
import org.apache.commons.math4.ml.neuralnet.twod.NeuronSquareMesh2D;
|
|
||||||
import org.apache.commons.math4.ml.distance.DistanceMeasure;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Computes the topographic error histogram.
|
|
||||||
* Each bin will contain the number of data for which the first and
|
|
||||||
* second best matching units are not adjacent in the map.
|
|
||||||
* @since 3.6
|
|
||||||
*/
|
|
||||||
public class TopographicErrorHistogram implements MapDataVisualization {
|
|
||||||
/** Distance. */
|
|
||||||
private final DistanceMeasure distance;
|
|
||||||
/** Whether to compute relative bin counts. */
|
|
||||||
private final boolean relativeCount;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param relativeCount Whether to compute relative bin counts.
|
|
||||||
* If {@code true}, the data count in each bin will be divided by the total
|
|
||||||
* number of samples mapped to the neuron represented by that bin.
|
|
||||||
* @param distance Distance.
|
|
||||||
*/
|
|
||||||
public TopographicErrorHistogram(boolean relativeCount,
|
|
||||||
DistanceMeasure distance) {
|
|
||||||
this.relativeCount = relativeCount;
|
|
||||||
this.distance = distance;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
@Override
|
|
||||||
public double[][] computeImage(NeuronSquareMesh2D map,
|
|
||||||
Iterable<double[]> data) {
|
|
||||||
final int nR = map.getNumberOfRows();
|
|
||||||
final int nC = map.getNumberOfColumns();
|
|
||||||
|
|
||||||
final LocationFinder finder = new LocationFinder(map);
|
|
||||||
final Network net = map.getNetwork();
|
|
||||||
final MapRanking rank = new MapRanking(net, distance);
|
|
||||||
|
|
||||||
// Hit bins.
|
|
||||||
final int[][] hit = new int[nR][nC];
|
|
||||||
// Error bins.
|
|
||||||
final double[][] error = new double[nR][nC];
|
|
||||||
|
|
||||||
for (double[] sample : data) {
|
|
||||||
final List<Neuron> p = rank.rank(sample, 2);
|
|
||||||
final Neuron best = p.get(0);
|
|
||||||
|
|
||||||
final LocationFinder.Location loc = finder.getLocation(best);
|
|
||||||
final int row = loc.getRow();
|
|
||||||
final int col = loc.getColumn();
|
|
||||||
hit[row][col] += 1;
|
|
||||||
|
|
||||||
if (!net.getNeighbours(best).contains(p.get(1))) {
|
|
||||||
// Increment count if first and second best matching units
|
|
||||||
// are not neighbours.
|
|
||||||
error[row][col] += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (relativeCount) {
|
|
||||||
for (int r = 0; r < nR; r++) {
|
|
||||||
for (int c = 0; c < nC; c++) {
|
|
||||||
error[r][c] /= hit[r][c];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return error;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -17,59 +17,36 @@
|
||||||
|
|
||||||
package org.apache.commons.math4.ml.neuralnet.twod.util;
|
package org.apache.commons.math4.ml.neuralnet.twod.util;
|
||||||
|
|
||||||
import java.util.Collection;
|
|
||||||
import org.apache.commons.math4.ml.neuralnet.Neuron;
|
import org.apache.commons.math4.ml.neuralnet.Neuron;
|
||||||
import org.apache.commons.math4.ml.neuralnet.Network;
|
|
||||||
import org.apache.commons.math4.ml.neuralnet.twod.NeuronSquareMesh2D;
|
import org.apache.commons.math4.ml.neuralnet.twod.NeuronSquareMesh2D;
|
||||||
import org.apache.commons.math4.ml.distance.DistanceMeasure;
|
import org.apache.commons.math4.ml.distance.DistanceMeasure;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <a href="http://en.wikipedia.org/wiki/U-Matrix">U-Matrix</a>
|
* <a href="http://en.wikipedia.org/wiki/U-Matrix">U-Matrix</a>
|
||||||
* visualization of high-dimensional data projection.
|
* visualization of high-dimensional data projection.
|
||||||
|
* The 8 individual inter-units distances will be
|
||||||
|
* {@link #computeImage(NeuronSquareMesh2D) computed}. They will be
|
||||||
|
* stored in additional pixels around each of the original units of the
|
||||||
|
* 2D-map. The additional pixels that lie along a "diagonal" are shared
|
||||||
|
* by <em>two</em> pairs of units: their value will be set to the average
|
||||||
|
* distance between the units belonging to each of the pairs. The value
|
||||||
|
* zero will be stored in the pixel corresponding to the location of a
|
||||||
|
* unit of the 2D-map.
|
||||||
|
*
|
||||||
* @since 3.6
|
* @since 3.6
|
||||||
|
* @see NeuronSquareMesh2D.DataVisualization#getUMatrix()
|
||||||
*/
|
*/
|
||||||
public class UnifiedDistanceMatrix implements MapVisualization {
|
public class UnifiedDistanceMatrix implements MapVisualization {
|
||||||
/** Whether to show distance between each pair of neighbouring units. */
|
|
||||||
private final boolean individualDistances;
|
|
||||||
/** Distance. */
|
/** Distance. */
|
||||||
private final DistanceMeasure distance;
|
private final DistanceMeasure distance;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple constructor.
|
|
||||||
*
|
|
||||||
* @param individualDistances If {@code true}, the 8 individual
|
|
||||||
* inter-units distances will be {@link #computeImage(NeuronSquareMesh2D)
|
|
||||||
* computed}. They will be stored in additional pixels around each of
|
|
||||||
* the original units of the 2D-map. The additional pixels that lie
|
|
||||||
* along a "diagonal" are shared by <em>two</em> pairs of units: their
|
|
||||||
* value will be set to the average distance between the units belonging
|
|
||||||
* to each of the pairs. The value zero will be stored in the pixel
|
|
||||||
* corresponding to the location of a unit of the 2D-map.
|
|
||||||
* <br>
|
|
||||||
* If {@code false}, only the average distance between a unit and all its
|
|
||||||
* neighbours will be computed (and stored in the pixel corresponding to
|
|
||||||
* that unit of the 2D-map). In that case, the number of neighbours taken
|
|
||||||
* into account depends on the network's
|
|
||||||
* {@link org.apache.commons.math4.ml.neuralnet.SquareNeighbourhood
|
|
||||||
* neighbourhood type}.
|
|
||||||
* @param distance Distance.
|
* @param distance Distance.
|
||||||
*/
|
*/
|
||||||
public UnifiedDistanceMatrix(boolean individualDistances,
|
public UnifiedDistanceMatrix(DistanceMeasure distance) {
|
||||||
DistanceMeasure distance) {
|
|
||||||
this.individualDistances = individualDistances;
|
|
||||||
this.distance = distance;
|
this.distance = distance;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
@Override
|
|
||||||
public double[][] computeImage(NeuronSquareMesh2D map) {
|
|
||||||
if (individualDistances) {
|
|
||||||
return individualDistances(map);
|
|
||||||
} else {
|
|
||||||
return averageDistances(map);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Computes the distances between a unit of the map and its
|
* Computes the distances between a unit of the map and its
|
||||||
* neighbours.
|
* neighbours.
|
||||||
|
@ -81,7 +58,8 @@ public class UnifiedDistanceMatrix implements MapVisualization {
|
||||||
* @param map Map.
|
* @param map Map.
|
||||||
* @return an image representing the individual distances.
|
* @return an image representing the individual distances.
|
||||||
*/
|
*/
|
||||||
private double[][] individualDistances(NeuronSquareMesh2D map) {
|
@Override
|
||||||
|
public double[][] computeImage(NeuronSquareMesh2D map) {
|
||||||
final int numRows = map.getNumberOfRows();
|
final int numRows = map.getNumberOfRows();
|
||||||
final int numCols = map.getNumberOfColumns();
|
final int numCols = map.getNumberOfColumns();
|
||||||
|
|
||||||
|
@ -174,37 +152,4 @@ public class UnifiedDistanceMatrix implements MapVisualization {
|
||||||
|
|
||||||
return uMatrix;
|
return uMatrix;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Computes the distances between a unit of the map and its neighbours.
|
|
||||||
*
|
|
||||||
* @param map Map.
|
|
||||||
* @return an image representing the average distances.
|
|
||||||
*/
|
|
||||||
private double[][] averageDistances(NeuronSquareMesh2D map) {
|
|
||||||
final int numRows = map.getNumberOfRows();
|
|
||||||
final int numCols = map.getNumberOfColumns();
|
|
||||||
final double[][] uMatrix = new double[numRows][numCols];
|
|
||||||
|
|
||||||
final Network net = map.getNetwork();
|
|
||||||
|
|
||||||
for (int i = 0; i < numRows; i++) {
|
|
||||||
for (int j = 0; j < numCols; j++) {
|
|
||||||
final Neuron neuron = map.getNeuron(i, j);
|
|
||||||
final Collection<Neuron> neighbours = net.getNeighbours(neuron);
|
|
||||||
final double[] features = neuron.getFeatures();
|
|
||||||
|
|
||||||
double d = 0;
|
|
||||||
int count = 0;
|
|
||||||
for (Neuron n : neighbours) {
|
|
||||||
++count;
|
|
||||||
d += distance.compute(features, n.getFeatures());
|
|
||||||
}
|
|
||||||
|
|
||||||
uMatrix[i][j] = d / count;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return uMatrix;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,6 +25,9 @@ import java.io.ObjectOutputStream;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.stream.StreamSupport;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.math4.exception.NumberIsTooSmallException;
|
import org.apache.commons.math4.exception.NumberIsTooSmallException;
|
||||||
import org.apache.commons.math4.exception.OutOfRangeException;
|
import org.apache.commons.math4.exception.OutOfRangeException;
|
||||||
|
@ -872,4 +875,41 @@ public class NeuronSquareMesh2DTest {
|
||||||
Assert.assertTrue(fromMap.contains(n));
|
Assert.assertTrue(fromMap.contains(n));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDataVisualization() {
|
||||||
|
final FeatureInitializer[] initArray = { init };
|
||||||
|
final NeuronSquareMesh2D map = new NeuronSquareMesh2D(3, true,
|
||||||
|
3, true,
|
||||||
|
SquareNeighbourhood.VON_NEUMANN,
|
||||||
|
initArray);
|
||||||
|
|
||||||
|
// Trivial test: Use neurons' features as data.
|
||||||
|
|
||||||
|
final List<double[]> data = StreamSupport.stream(map.spliterator(), false)
|
||||||
|
.map(n -> n.getFeatures())
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
final NeuronSquareMesh2D.DataVisualization v = map.computeQualityIndicators(data);
|
||||||
|
|
||||||
|
final int numRows = map.getNumberOfRows();
|
||||||
|
final int numCols = map.getNumberOfColumns();
|
||||||
|
|
||||||
|
// Test hits.
|
||||||
|
final double[][] hits = v.getNormalizedHits();
|
||||||
|
final double expectedHits = 1d / (numRows * numCols);
|
||||||
|
for (int i = 0; i < numRows; i++) {
|
||||||
|
for (int j = 0; j < numCols; j++) {
|
||||||
|
Assert.assertEquals(expectedHits, hits[i][j], 0d);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test quantization error.
|
||||||
|
final double[][] qe = v.getQuantizationError();
|
||||||
|
final double expectedQE = 0;
|
||||||
|
for (int i = 0; i < numRows; i++) {
|
||||||
|
for (int j = 0; j < numCols; j++) {
|
||||||
|
Assert.assertEquals(expectedQE, qe[i][j], 0d);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue