MATH-1270
Various SOFM visualizations: unified distance matrix, hit histogram, smoothed data histogram, topographic error.
This commit is contained in:
parent
999761ba1b
commit
ce131449a4
|
@ -0,0 +1,85 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.math4.ml.neuralnet.twod.util;
|
||||
|
||||
import org.apache.commons.math4.ml.neuralnet.MapUtils;
|
||||
import org.apache.commons.math4.ml.neuralnet.Neuron;
|
||||
import org.apache.commons.math4.ml.neuralnet.Network;
|
||||
import org.apache.commons.math4.ml.neuralnet.twod.NeuronSquareMesh2D;
|
||||
import org.apache.commons.math4.ml.distance.DistanceMeasure;
|
||||
|
||||
/**
|
||||
* Computes the hit histogram.
|
||||
* Each bin will contain the number of data for which the corresponding
|
||||
* neuron is the best matching unit.
|
||||
*/
|
||||
public class HitHistogram implements MapDataVisualization {
|
||||
/** Distance. */
|
||||
private final DistanceMeasure distance;
|
||||
/** Whether to compute relative bin counts. */
|
||||
private final boolean normalizeCount;
|
||||
|
||||
/**
|
||||
* @param normalizeCount Whether to compute relative bin counts.
|
||||
* If {@code true}, the data count in each bin will be divided by the total
|
||||
* number of samples.
|
||||
* @param distance Distance.
|
||||
*/
|
||||
public HitHistogram(boolean normalizeCount,
|
||||
DistanceMeasure distance) {
|
||||
this.normalizeCount = normalizeCount;
|
||||
this.distance = distance;
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public double[][] computeImage(NeuronSquareMesh2D map,
|
||||
Iterable<double[]> data) {
|
||||
final int nR = map.getNumberOfRows();
|
||||
final int nC = map.getNumberOfColumns();
|
||||
|
||||
final Network net = map.getNetwork();
|
||||
final LocationFinder finder = new LocationFinder(map);
|
||||
|
||||
// Totla number of samples.
|
||||
int numSamples = 0;
|
||||
// Hit bins.
|
||||
final double[][] hit = new double[nR][nC];
|
||||
|
||||
for (double[] sample : data) {
|
||||
final Neuron best = MapUtils.findBest(sample, map, distance);
|
||||
|
||||
final LocationFinder.Location loc = finder.getLocation(best);
|
||||
final int row = loc.getRow();
|
||||
final int col = loc.getColumn();
|
||||
hit[row][col] += 1;
|
||||
|
||||
++numSamples;
|
||||
}
|
||||
|
||||
if (normalizeCount) {
|
||||
for (int r = 0; r < nR; r++) {
|
||||
for (int c = 0; c < nC; c++) {
|
||||
hit[r][c] /= numSamples;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return hit;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,97 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.math4.ml.neuralnet.twod.util;
|
||||
|
||||
import org.apache.commons.math4.ml.neuralnet.MapUtils;
|
||||
import org.apache.commons.math4.ml.neuralnet.Neuron;
|
||||
import org.apache.commons.math4.ml.neuralnet.twod.NeuronSquareMesh2D;
|
||||
import org.apache.commons.math4.ml.distance.DistanceMeasure;
|
||||
import org.apache.commons.math4.exception.NumberIsTooSmallException;
|
||||
|
||||
/**
|
||||
* Visualization of high-dimensional data projection on a 2D-map.
|
||||
* The method is described in
|
||||
* <quote>
|
||||
* <em>Using Smoothed Data Histograms for Cluster Visualization in Self-Organizing Maps</em>
|
||||
* <br>
|
||||
* by Elias Pampalk, Andreas Rauber and Dieter Merkl.
|
||||
* </quote>
|
||||
*/
|
||||
public class SmoothedDataHistogram implements MapDataVisualization {
|
||||
/** Smoothing parameter. */
|
||||
private final int smoothingBins;
|
||||
/** Distance. */
|
||||
private final DistanceMeasure distance;
|
||||
/** Normalization factor. */
|
||||
private final double membershipNormalization;
|
||||
|
||||
/**
|
||||
* @param smoothingBins Number of bins.
|
||||
* @param distance Distance.
|
||||
*/
|
||||
public SmoothedDataHistogram(int smoothingBins,
|
||||
DistanceMeasure distance) {
|
||||
this.smoothingBins = smoothingBins;
|
||||
this.distance = distance;
|
||||
|
||||
double sum = 0;
|
||||
for (int i = 0; i < smoothingBins; i++) {
|
||||
sum += smoothingBins - i;
|
||||
}
|
||||
|
||||
this.membershipNormalization = 1d / sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*
|
||||
* @throws NumberIsTooSmallException if the size of the {@code map}
|
||||
* is smaller than the number of {@link #SmoothedDataHistogram(int,DistanceMeasure)
|
||||
* smoothing bins}.
|
||||
*/
|
||||
@Override
|
||||
public double[][] computeImage(NeuronSquareMesh2D map,
|
||||
Iterable<double[]> data) {
|
||||
final int nR = map.getNumberOfRows();
|
||||
final int nC = map.getNumberOfColumns();
|
||||
|
||||
final int mapSize = nR * nC;
|
||||
if (mapSize < smoothingBins) {
|
||||
throw new NumberIsTooSmallException(mapSize, smoothingBins, true);
|
||||
}
|
||||
|
||||
final LocationFinder finder = new LocationFinder(map);
|
||||
|
||||
// Histogram bins.
|
||||
final double[][] histo = new double[nR][nC];
|
||||
|
||||
for (double[] sample : data) {
|
||||
final Neuron[] sorted = MapUtils.sort(sample,
|
||||
map.getNetwork(),
|
||||
distance);
|
||||
for (int i = 0; i < smoothingBins; i++) {
|
||||
final LocationFinder.Location loc = finder.getLocation(sorted[i]);
|
||||
final int row = loc.getRow();
|
||||
final int col = loc.getColumn();
|
||||
histo[row][col] += (smoothingBins - i) * membershipNormalization;
|
||||
}
|
||||
}
|
||||
|
||||
return histo;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,91 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.math4.ml.neuralnet.twod.util;
|
||||
|
||||
import org.apache.commons.math4.ml.neuralnet.MapUtils;
|
||||
import org.apache.commons.math4.ml.neuralnet.Neuron;
|
||||
import org.apache.commons.math4.ml.neuralnet.Network;
|
||||
import org.apache.commons.math4.ml.neuralnet.twod.NeuronSquareMesh2D;
|
||||
import org.apache.commons.math4.ml.distance.DistanceMeasure;
|
||||
import org.apache.commons.math4.util.Pair;
|
||||
|
||||
/**
|
||||
* Computes the topographic error histogram.
|
||||
* Each bin will contain the number of data for which the first and
|
||||
* second best matching units are not adjacent in the map.
|
||||
*/
|
||||
public class TopographicErrorHistogram implements MapDataVisualization {
|
||||
/** Distance. */
|
||||
private final DistanceMeasure distance;
|
||||
/** Whether to compute relative bin counts. */
|
||||
private final boolean relativeCount;
|
||||
|
||||
/**
|
||||
* @param relativeCount Whether to compute relative bin counts.
|
||||
* If {@code true}, the data count in each bin will be divided by the total
|
||||
* number of samples mapped to the neuron represented by that bin.
|
||||
* @param distance Distance.
|
||||
*/
|
||||
public TopographicErrorHistogram(boolean relativeCount,
|
||||
DistanceMeasure distance) {
|
||||
this.relativeCount = relativeCount;
|
||||
this.distance = distance;
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public double[][] computeImage(NeuronSquareMesh2D map,
|
||||
Iterable<double[]> data) {
|
||||
final int nR = map.getNumberOfRows();
|
||||
final int nC = map.getNumberOfColumns();
|
||||
|
||||
final Network net = map.getNetwork();
|
||||
final LocationFinder finder = new LocationFinder(map);
|
||||
|
||||
// Hit bins.
|
||||
final int[][] hit = new int[nR][nC];
|
||||
// Error bins.
|
||||
final double[][] error = new double[nR][nC];
|
||||
|
||||
for (double[] sample : data) {
|
||||
final Pair<Neuron, Neuron> p = MapUtils.findBestAndSecondBest(sample, map, distance);
|
||||
final Neuron best = p.getFirst();
|
||||
|
||||
final LocationFinder.Location loc = finder.getLocation(best);
|
||||
final int row = loc.getRow();
|
||||
final int col = loc.getColumn();
|
||||
hit[row][col] += 1;
|
||||
|
||||
if (!net.getNeighbours(best).contains(p.getSecond())) {
|
||||
// Increment count if first and second best matching units
|
||||
// are not neighbours.
|
||||
error[row][col] += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (relativeCount) {
|
||||
for (int r = 0; r < nR; r++) {
|
||||
for (int c = 0; c < nC; c++) {
|
||||
error[r][c] /= hit[r][c];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,203 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.math4.ml.neuralnet.twod.util;
|
||||
|
||||
import java.util.Collection;
|
||||
import org.apache.commons.math4.ml.neuralnet.Neuron;
|
||||
import org.apache.commons.math4.ml.neuralnet.Network;
|
||||
import org.apache.commons.math4.ml.neuralnet.twod.NeuronSquareMesh2D;
|
||||
import org.apache.commons.math4.ml.distance.DistanceMeasure;
|
||||
|
||||
/**
|
||||
* <a href="http://en.wikipedia.org/wiki/U-Matrix">U-Matrix</a>
|
||||
* visualization of high-dimensional data projection.
|
||||
*/
|
||||
public class UnifiedDistanceMatrix implements MapVisualization {
|
||||
/** Whether to show distance between each pair of neighbouring units. */
|
||||
private final boolean individualDistances;
|
||||
/** Distance. */
|
||||
private final DistanceMeasure distance;
|
||||
|
||||
/** Simple constructor.
|
||||
* @param individualDistances If {@code true}, the 8 individual
|
||||
* inter-units distances will be {@link #computeImage(NeuronSquareMesh2D)
|
||||
* computed}. They will be stored in additional pixels around each of
|
||||
* the original units of the 2D-map. The value zero will be stored in the
|
||||
* pixel corresponding to the location of a unit of the 2D-map.
|
||||
* If {@code false}, only the average distance between a unit and all its
|
||||
* neighbours will be computed (and stored in the pixel corresponding to
|
||||
* that unit of the 2D-map). In that case, the number of neighbours taken
|
||||
* into account depends on the network's
|
||||
* {@link org.apache.commons.math4.ml.neuralnet.SquareNeighbourhood
|
||||
* neighbourhood type}.
|
||||
* @param distance Distance.
|
||||
*/
|
||||
public UnifiedDistanceMatrix(boolean individualDistances,
|
||||
DistanceMeasure distance) {
|
||||
this.individualDistances = individualDistances;
|
||||
this.distance = distance;
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public double[][] computeImage(NeuronSquareMesh2D map) {
|
||||
if (individualDistances) {
|
||||
return individualDistances(map);
|
||||
} else {
|
||||
return averageDistances(map);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the distances between a unit of the map and its
|
||||
* neighbours.
|
||||
* The image will contain more pixels than the number of neurons
|
||||
* in the given {@code map} because each neuron has 8 neighbours.
|
||||
* The value zero will be stored in the pixels corresponding to
|
||||
* the location of a map unit.
|
||||
*
|
||||
* @param map Map.
|
||||
* @return an image representing the individual distances.
|
||||
*/
|
||||
private double[][] individualDistances(NeuronSquareMesh2D map) {
|
||||
final int numRows = map.getNumberOfRows();
|
||||
final int numCols = map.getNumberOfColumns();
|
||||
|
||||
final double[][] uMatrix = new double[numRows * 2 + 1][numCols * 2 + 1];
|
||||
|
||||
for (int i = 0; i < numRows; i++) {
|
||||
// Current unit's row index in result image.
|
||||
final int iR = 2 * i + 1;
|
||||
|
||||
for (int j = 0; j < numCols; j++) {
|
||||
// Current unit's column index in result image.
|
||||
final int jR = 2 * j + 1;
|
||||
|
||||
final double[] current = map.getNeuron(i, j).getFeatures();
|
||||
Neuron neighbour;
|
||||
|
||||
// Top-left neighbour.
|
||||
neighbour = map.getNeuron(i, j,
|
||||
NeuronSquareMesh2D.HorizontalDirection.LEFT,
|
||||
NeuronSquareMesh2D.VerticalDirection.UP);
|
||||
if (neighbour != null) {
|
||||
uMatrix[iR - 1][jR - 1] = distance.compute(current,
|
||||
neighbour.getFeatures());
|
||||
}
|
||||
|
||||
// Top-center neighbour.
|
||||
neighbour = map.getNeuron(i, j,
|
||||
NeuronSquareMesh2D.HorizontalDirection.CENTER,
|
||||
NeuronSquareMesh2D.VerticalDirection.UP);
|
||||
if (neighbour != null) {
|
||||
uMatrix[iR - 1][jR] = distance.compute(current,
|
||||
neighbour.getFeatures());
|
||||
}
|
||||
|
||||
// Top-right neighbour.
|
||||
neighbour = map.getNeuron(i, j,
|
||||
NeuronSquareMesh2D.HorizontalDirection.RIGHT,
|
||||
NeuronSquareMesh2D.VerticalDirection.UP);
|
||||
if (neighbour != null) {
|
||||
uMatrix[iR - 1][jR + 1] = distance.compute(current,
|
||||
neighbour.getFeatures());
|
||||
}
|
||||
|
||||
// Left neighbour.
|
||||
neighbour = map.getNeuron(i, j,
|
||||
NeuronSquareMesh2D.HorizontalDirection.LEFT,
|
||||
NeuronSquareMesh2D.VerticalDirection.CENTER);
|
||||
if (neighbour != null) {
|
||||
uMatrix[iR][jR - 1] = distance.compute(current,
|
||||
neighbour.getFeatures());
|
||||
}
|
||||
|
||||
// Right neighbour.
|
||||
neighbour = map.getNeuron(i, j,
|
||||
NeuronSquareMesh2D.HorizontalDirection.RIGHT,
|
||||
NeuronSquareMesh2D.VerticalDirection.CENTER);
|
||||
if (neighbour != null) {
|
||||
uMatrix[iR][jR + 1] = distance.compute(current,
|
||||
neighbour.getFeatures());
|
||||
}
|
||||
|
||||
// Bottom-left neighbour.
|
||||
neighbour = map.getNeuron(i, j,
|
||||
NeuronSquareMesh2D.HorizontalDirection.LEFT,
|
||||
NeuronSquareMesh2D.VerticalDirection.DOWN);
|
||||
if (neighbour != null) {
|
||||
uMatrix[iR + 1][jR - 1] = distance.compute(current,
|
||||
neighbour.getFeatures());
|
||||
}
|
||||
|
||||
// Bottom-center neighbour.
|
||||
neighbour = map.getNeuron(i, j,
|
||||
NeuronSquareMesh2D.HorizontalDirection.CENTER,
|
||||
NeuronSquareMesh2D.VerticalDirection.DOWN);
|
||||
if (neighbour != null) {
|
||||
uMatrix[iR + 1][jR] = distance.compute(current,
|
||||
neighbour.getFeatures());
|
||||
}
|
||||
|
||||
// Bottom-right neighbour.
|
||||
neighbour = map.getNeuron(i, j,
|
||||
NeuronSquareMesh2D.HorizontalDirection.RIGHT,
|
||||
NeuronSquareMesh2D.VerticalDirection.DOWN);
|
||||
if (neighbour != null) {
|
||||
uMatrix[iR + 1][jR + 1] = distance.compute(current,
|
||||
neighbour.getFeatures());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return uMatrix;
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the distances between a unit of the map and its neighbours.
|
||||
*
|
||||
* @param map Map.
|
||||
* @return an image representing the average distances.
|
||||
*/
|
||||
private double[][] averageDistances(NeuronSquareMesh2D map) {
|
||||
final int numRows = map.getNumberOfRows();
|
||||
final int numCols = map.getNumberOfColumns();
|
||||
final double[][] uMatrix = new double[numRows][numCols];
|
||||
|
||||
final Network net = map.getNetwork();
|
||||
|
||||
for (int i = 0; i < numRows; i++) {
|
||||
for (int j = 0; j < numCols; j++) {
|
||||
final Neuron neuron = map.getNeuron(i, j);
|
||||
final Collection<Neuron> neighbours = net.getNeighbours(neuron);
|
||||
final double[] features = neuron.getFeatures();
|
||||
|
||||
double d = 0;
|
||||
int count = 0;
|
||||
for (Neuron n : neighbours) {
|
||||
++count;
|
||||
d += distance.compute(features, n.getFeatures());
|
||||
}
|
||||
|
||||
uMatrix[i][j] = d / count;
|
||||
}
|
||||
}
|
||||
|
||||
return uMatrix;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue