partially unrolled some computation loops to increase performance
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/trunk@728478 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
136ffc1054
commit
ed721c48a3
|
@ -41,10 +41,10 @@ import org.apache.commons.math.MathRuntimeException;
|
||||||
public class DenseRealMatrix extends AbstractRealMatrix implements Serializable {
|
public class DenseRealMatrix extends AbstractRealMatrix implements Serializable {
|
||||||
|
|
||||||
/** Serializable version identifier */
|
/** Serializable version identifier */
|
||||||
private static final long serialVersionUID = 5536250491728216579L;
|
private static final long serialVersionUID = 4991895511313664478L;
|
||||||
|
|
||||||
/** Block size. */
|
/** Block size. */
|
||||||
private static final int BLOCK_SIZE = 32;
|
private static final int BLOCK_SIZE = 52;
|
||||||
|
|
||||||
/** Blocks of matrix entries. */
|
/** Blocks of matrix entries. */
|
||||||
private final double blocks[][];
|
private final double blocks[][];
|
||||||
|
@ -396,7 +396,7 @@ public class DenseRealMatrix extends AbstractRealMatrix implements Serializable
|
||||||
* @throws IllegalArgumentException
|
* @throws IllegalArgumentException
|
||||||
* if columnDimension(this) != rowDimension(m)
|
* if columnDimension(this) != rowDimension(m)
|
||||||
*/
|
*/
|
||||||
DenseRealMatrix multiply(DenseRealMatrix m) throws IllegalArgumentException {
|
public DenseRealMatrix multiply(DenseRealMatrix m) throws IllegalArgumentException {
|
||||||
|
|
||||||
// safety check
|
// safety check
|
||||||
checkMultiplicationCompatible(m);
|
checkMultiplicationCompatible(m);
|
||||||
|
@ -412,6 +412,9 @@ public class DenseRealMatrix extends AbstractRealMatrix implements Serializable
|
||||||
|
|
||||||
for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
|
for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
|
||||||
final int jWidth = out.blockWidth(jBlock);
|
final int jWidth = out.blockWidth(jBlock);
|
||||||
|
final int jWidth2 = jWidth + jWidth;
|
||||||
|
final int jWidth3 = jWidth2 + jWidth;
|
||||||
|
final int jWidth4 = jWidth3 + jWidth;
|
||||||
|
|
||||||
// select current block
|
// select current block
|
||||||
final double[] outBlock = out.blocks[blockIndex];
|
final double[] outBlock = out.blocks[blockIndex];
|
||||||
|
@ -426,8 +429,19 @@ public class DenseRealMatrix extends AbstractRealMatrix implements Serializable
|
||||||
final int lEnd = lStart + kWidth;
|
final int lEnd = lStart + kWidth;
|
||||||
for (int nStart = 0; nStart < jWidth; ++nStart) {
|
for (int nStart = 0; nStart < jWidth; ++nStart) {
|
||||||
double sum = 0;
|
double sum = 0;
|
||||||
for (int l = lStart, n = nStart; l < lEnd; ++l, n += jWidth) {
|
int l = lStart;
|
||||||
sum += tBlock[l] * mBlock[n];
|
int n = nStart;
|
||||||
|
while (l < lEnd - 3) {
|
||||||
|
sum += tBlock[l] * mBlock[n] +
|
||||||
|
tBlock[l + 1] * mBlock[n + jWidth] +
|
||||||
|
tBlock[l + 2] * mBlock[n + jWidth2] +
|
||||||
|
tBlock[l + 3] * mBlock[n + jWidth3];
|
||||||
|
l += 4;
|
||||||
|
n += jWidth4;
|
||||||
|
}
|
||||||
|
while (l < lEnd) {
|
||||||
|
sum += tBlock[l++] * mBlock[n];
|
||||||
|
n += jWidth;
|
||||||
}
|
}
|
||||||
outBlock[k++] += sum;
|
outBlock[k++] += sum;
|
||||||
}
|
}
|
||||||
|
@ -772,8 +786,17 @@ public class DenseRealMatrix extends AbstractRealMatrix implements Serializable
|
||||||
final int qEnd = Math.min(qStart + BLOCK_SIZE, columns);
|
final int qEnd = Math.min(qStart + BLOCK_SIZE, columns);
|
||||||
for (int p = pStart, k = 0; p < pEnd; ++p) {
|
for (int p = pStart, k = 0; p < pEnd; ++p) {
|
||||||
double sum = 0;
|
double sum = 0;
|
||||||
for (int q = qStart; q < qEnd; ++q) {
|
int q = qStart;
|
||||||
sum += block[k++] * v[q];
|
while (q < qEnd - 3) {
|
||||||
|
sum += block[k] * v[q] +
|
||||||
|
block[k + 1] * v[q + 1] +
|
||||||
|
block[k + 2] * v[q + 2] +
|
||||||
|
block[k + 3] * v[q + 3];
|
||||||
|
++k;
|
||||||
|
++q;
|
||||||
|
}
|
||||||
|
while (q < qEnd) {
|
||||||
|
sum += block[k++] * v[q++];
|
||||||
}
|
}
|
||||||
out[p] += sum;
|
out[p] += sum;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue