diff --git a/jmh/src/main/java/com/baeldung/FalseSharing.java b/jmh/src/main/java/com/baeldung/FalseSharing.java new file mode 100644 index 0000000000..24ba0c0b6b --- /dev/null +++ b/jmh/src/main/java/com/baeldung/FalseSharing.java @@ -0,0 +1,22 @@ +package com.baeldung; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; + +@State(Scope.Benchmark) +public class FalseSharing { + + private java.util.concurrent.atomic.LongAdder builtin = new java.util.concurrent.atomic.LongAdder(); + private LongAdder custom = new LongAdder(); + + @Benchmark + public void builtin() { + builtin.increment(); + } + + @Benchmark + public void custom() { + custom.increment(); + } +} diff --git a/jmh/src/main/java/com/baeldung/LongAdder.java b/jmh/src/main/java/com/baeldung/LongAdder.java new file mode 100644 index 0000000000..8fd2b85c4f --- /dev/null +++ b/jmh/src/main/java/com/baeldung/LongAdder.java @@ -0,0 +1,235 @@ +package com.baeldung; + +import java.io.Serializable; +import java.util.concurrent.atomic.AtomicLong; + +/** + * One or more variables that together maintain an initially zero + * {@code long} sum. When updates (method {@link #add}) are contended + * across threads, the set of variables may grow dynamically to reduce + * contention. Method {@link #sum} (or, equivalently, {@link + * #longValue}) returns the current total combined across the + * variables maintaining the sum. + * + *
This class is usually preferable to {@link AtomicLong} when + * multiple threads update a common sum that is used for purposes such + * as collecting statistics, not for fine-grained synchronization + * control. Under low update contention, the two classes have similar + * characteristics. But under high contention, expected throughput of + * this class is significantly higher, at the expense of higher space + * consumption. + * + *
LongAdders can be used with a {@link
+ * java.util.concurrent.ConcurrentHashMap} to maintain a scalable
+ * frequency map (a form of histogram or multiset). For example, to
+ * add a count to a {@code ConcurrentHashMap This class extends {@link Number}, but does not define
+ * methods such as {@code equals}, {@code hashCode} and {@code
+ * compareTo} because instances are expected to be mutated, and so are
+ * not useful as collection keys.
+ *
+ * @since 1.8
+ * @author Doug Lea
+ */
+public class LongAdder extends Striped64 implements Serializable {
+ private static final long serialVersionUID = 7249069246863182397L;
+
+ /**
+ * Creates a new adder with initial sum of zero.
+ */
+ public LongAdder() {
+ }
+
+ /**
+ * Adds the given value.
+ *
+ * @param x the value to add
+ */
+ public void add(long x) {
+ Cell[] as; long b, v; int m; Cell a;
+ if ((as = cells) != null || !casBase(b = base, b + x)) {
+ boolean uncontended = true;
+ if (as == null || (m = as.length - 1) < 0 ||
+ (a = as[getProbe() & m]) == null ||
+ !(uncontended = a.cas(v = a.value, v + x)))
+ longAccumulate(x, null, uncontended);
+ }
+ }
+
+ /**
+ * Equivalent to {@code add(1)}.
+ */
+ public void increment() {
+ add(1L);
+ }
+
+ /**
+ * Equivalent to {@code add(-1)}.
+ */
+ public void decrement() {
+ add(-1L);
+ }
+
+ /**
+ * Returns the current sum. The returned value is NOT an
+ * atomic snapshot; invocation in the absence of concurrent
+ * updates returns an accurate result, but concurrent updates that
+ * occur while the sum is being calculated might not be
+ * incorporated.
+ *
+ * @return the sum
+ */
+ public long sum() {
+ Cell[] as = cells; Cell a;
+ long sum = base;
+ if (as != null) {
+ for (int i = 0; i < as.length; ++i) {
+ if ((a = as[i]) != null)
+ sum += a.value;
+ }
+ }
+ return sum;
+ }
+
+ /**
+ * Resets variables maintaining the sum to zero. This method may
+ * be a useful alternative to creating a new adder, but is only
+ * effective if there are no concurrent updates. Because this
+ * method is intrinsically racy, it should only be used when it is
+ * known that no threads are concurrently updating.
+ */
+ public void reset() {
+ Cell[] as = cells; Cell a;
+ base = 0L;
+ if (as != null) {
+ for (int i = 0; i < as.length; ++i) {
+ if ((a = as[i]) != null)
+ a.value = 0L;
+ }
+ }
+ }
+
+ /**
+ * Equivalent in effect to {@link #sum} followed by {@link
+ * #reset}. This method may apply for example during quiescent
+ * points between multithreaded computations. If there are
+ * updates concurrent with this method, the returned value is
+ * not guaranteed to be the final value occurring before
+ * the reset.
+ *
+ * @return the sum
+ */
+ public long sumThenReset() {
+ Cell[] as = cells; Cell a;
+ long sum = base;
+ base = 0L;
+ if (as != null) {
+ for (int i = 0; i < as.length; ++i) {
+ if ((a = as[i]) != null) {
+ sum += a.value;
+ a.value = 0L;
+ }
+ }
+ }
+ return sum;
+ }
+
+ /**
+ * Returns the String representation of the {@link #sum}.
+ * @return the String representation of the {@link #sum}
+ */
+ public String toString() {
+ return Long.toString(sum());
+ }
+
+ /**
+ * Equivalent to {@link #sum}.
+ *
+ * @return the sum
+ */
+ public long longValue() {
+ return sum();
+ }
+
+ /**
+ * Returns the {@link #sum} as an {@code int} after a narrowing
+ * primitive conversion.
+ */
+ public int intValue() {
+ return (int)sum();
+ }
+
+ /**
+ * Returns the {@link #sum} as a {@code float}
+ * after a widening primitive conversion.
+ */
+ public float floatValue() {
+ return (float)sum();
+ }
+
+ /**
+ * Returns the {@link #sum} as a {@code double} after a widening
+ * primitive conversion.
+ */
+ public double doubleValue() {
+ return (double)sum();
+ }
+
+ /**
+ * Serialization proxy, used to avoid reference to the non-public
+ * Striped64 superclass in serialized forms.
+ * @serial include
+ */
+ private static class SerializationProxy implements Serializable {
+ private static final long serialVersionUID = 7249069246863182397L;
+
+ /**
+ * The current value returned by sum().
+ * @serial
+ */
+ private final long value;
+
+ SerializationProxy(LongAdder a) {
+ value = a.sum();
+ }
+
+ /**
+ * Return a {@code LongAdder} object with initial state
+ * held by this proxy.
+ *
+ * @return a {@code LongAdder} object with initial state
+ * held by this proxy.
+ */
+ private Object readResolve() {
+ LongAdder a = new LongAdder();
+ a.base = value;
+ return a;
+ }
+ }
+
+ /**
+ * Returns a
+ *
+ * SerializationProxy
+ * representing the state of this instance.
+ *
+ * @return a {@link SerializationProxy}
+ * representing the state of this instance
+ */
+ private Object writeReplace() {
+ return new SerializationProxy(this);
+ }
+
+ /**
+ * @param s the stream
+ * @throws java.io.InvalidObjectException always
+ */
+ private void readObject(java.io.ObjectInputStream s)
+ throws java.io.InvalidObjectException {
+ throw new java.io.InvalidObjectException("Proxy required");
+ }
+
+}
diff --git a/jmh/src/main/java/com/baeldung/Striped64.java b/jmh/src/main/java/com/baeldung/Striped64.java
new file mode 100644
index 0000000000..d305c334e0
--- /dev/null
+++ b/jmh/src/main/java/com/baeldung/Striped64.java
@@ -0,0 +1,393 @@
+package com.baeldung;
+
+import sun.misc.Unsafe;
+
+import java.lang.reflect.Field;
+import java.util.function.LongBinaryOperator;
+import java.util.function.DoubleBinaryOperator;
+import java.util.concurrent.ThreadLocalRandom;
+
+/**
+ * A package-local class holding common representation and mechanics
+ * for classes supporting dynamic striping on 64bit values. The class
+ * extends Number so that concrete subclasses must publicly do so.
+ */
+@SuppressWarnings("serial")
+abstract class Striped64 extends Number {
+ /*
+ * This class maintains a lazily-initialized table of atomically
+ * updated variables, plus an extra "base" field. The table size
+ * is a power of two. Indexing uses masked per-thread hash codes.
+ * Nearly all declarations in this class are package-private,
+ * accessed directly by subclasses.
+ *
+ * Table entries are of class Cell; a variant of AtomicLong padded
+ * (via @sun.misc.Contended) to reduce cache contention. Padding
+ * is overkill for most Atomics because they are usually
+ * irregularly scattered in memory and thus don't interfere much
+ * with each other. But Atomic objects residing in arrays will
+ * tend to be placed adjacent to each other, and so will most
+ * often share cache lines (with a huge negative performance
+ * impact) without this precaution.
+ *
+ * In part because Cells are relatively large, we avoid creating
+ * them until they are needed. When there is no contention, all
+ * updates are made to the base field. Upon first contention (a
+ * failed CAS on base update), the table is initialized to size 2.
+ * The table size is doubled upon further contention until
+ * reaching the nearest power of two greater than or equal to the
+ * number of CPUS. Table slots remain empty (null) until they are
+ * needed.
+ *
+ * A single spinlock ("cellsBusy") is used for initializing and
+ * resizing the table, as well as populating slots with new Cells.
+ * There is no need for a blocking lock; when the lock is not
+ * available, threads try other slots (or the base). During these
+ * retries, there is increased contention and reduced locality,
+ * which is still better than alternatives.
+ *
+ * The Thread probe fields maintained via ThreadLocalRandom serve
+ * as per-thread hash codes. We let them remain uninitialized as
+ * zero (if they come in this way) until they contend at slot
+ * 0. They are then initialized to values that typically do not
+ * often conflict with others. Contention and/or table collisions
+ * are indicated by failed CASes when performing an update
+ * operation. Upon a collision, if the table size is less than
+ * the capacity, it is doubled in size unless some other thread
+ * holds the lock. If a hashed slot is empty, and lock is
+ * available, a new Cell is created. Otherwise, if the slot
+ * exists, a CAS is tried. Retries proceed by "double hashing",
+ * using a secondary hash (Marsaglia XorShift) to try to find a
+ * free slot.
+ *
+ * The table size is capped because, when there are more threads
+ * than CPUs, supposing that each thread were bound to a CPU,
+ * there would exist a perfect hash function mapping threads to
+ * slots that eliminates collisions. When we reach capacity, we
+ * search for this mapping by randomly varying the hash codes of
+ * colliding threads. Because search is random, and collisions
+ * only become known via CAS failures, convergence can be slow,
+ * and because threads are typically not bound to CPUS forever,
+ * may not occur at all. However, despite these limitations,
+ * observed contention rates are typically low in these cases.
+ *
+ * It is possible for a Cell to become unused when threads that
+ * once hashed to it terminate, as well as in the case where
+ * doubling the table causes no thread to hash to it under
+ * expanded mask. We do not try to detect or remove such cells,
+ * under the assumption that for long-running instances, observed
+ * contention levels will recur, so the cells will eventually be
+ * needed again; and for short-lived ones, it does not matter.
+ */
+
+ /**
+ * Padded variant of AtomicLong supporting only raw accesses plus CAS.
+ *
+ * JVM intrinsics note: It would be possible to use a release-only
+ * form of CAS here, if it were provided.
+ */
+ @sun.misc.Contended static final class Cell {
+ volatile long value;
+ Cell(long x) { value = x; }
+ final boolean cas(long cmp, long val) {
+ return UNSAFE.compareAndSwapLong(this, valueOffset, cmp, val);
+ }
+
+ // Unsafe mechanics
+ private static final sun.misc.Unsafe UNSAFE;
+ private static final long valueOffset;
+ static {
+ try {
+ UNSAFE = getUnsafe();
+ Class> ak = Striped64.Cell.class;
+ valueOffset = UNSAFE.objectFieldOffset
+ (ak.getDeclaredField("value"));
+ } catch (Exception e) {
+ throw new Error(e);
+ }
+ }
+ }
+
+ /** Number of CPUS, to place bound on table size */
+ static final int NCPU = Runtime.getRuntime().availableProcessors();
+
+ /**
+ * Table of cells. When non-null, size is a power of 2.
+ */
+ transient volatile Striped64.Cell[] cells;
+
+ /**
+ * Base value, used mainly when there is no contention, but also as
+ * a fallback during table initialization races. Updated via CAS.
+ */
+ transient volatile long base;
+
+ /**
+ * Spinlock (locked via CAS) used when resizing and/or creating Cells.
+ */
+ transient volatile int cellsBusy;
+
+ /**
+ * Package-private default constructor
+ */
+ Striped64() {
+ }
+
+ /**
+ * CASes the base field.
+ */
+ final boolean casBase(long cmp, long val) {
+ return UNSAFE.compareAndSwapLong(this, BASE, cmp, val);
+ }
+
+ /**
+ * CASes the cellsBusy field from 0 to 1 to acquire lock.
+ */
+ final boolean casCellsBusy() {
+ return UNSAFE.compareAndSwapInt(this, CELLSBUSY, 0, 1);
+ }
+
+ /**
+ * Returns the probe value for the current thread.
+ * Duplicated from ThreadLocalRandom because of packaging restrictions.
+ */
+ static final int getProbe() {
+ return UNSAFE.getInt(Thread.currentThread(), PROBE);
+ }
+
+ /**
+ * Pseudo-randomly advances and records the given probe value for the
+ * given thread.
+ * Duplicated from ThreadLocalRandom because of packaging restrictions.
+ */
+ static final int advanceProbe(int probe) {
+ probe ^= probe << 13; // xorshift
+ probe ^= probe >>> 17;
+ probe ^= probe << 5;
+ UNSAFE.putInt(Thread.currentThread(), PROBE, probe);
+ return probe;
+ }
+
+ /**
+ * Handles cases of updates involving initialization, resizing,
+ * creating new Cells, and/or contention. See above for
+ * explanation. This method suffers the usual non-modularity
+ * problems of optimistic retry code, relying on rechecked sets of
+ * reads.
+ *
+ * @param x the value
+ * @param fn the update function, or null for add (this convention
+ * avoids the need for an extra field or function in LongAdder).
+ * @param wasUncontended false if CAS failed before call
+ */
+ final void longAccumulate(long x, LongBinaryOperator fn,
+ boolean wasUncontended) {
+ int h;
+ if ((h = getProbe()) == 0) {
+ ThreadLocalRandom.current(); // force initialization
+ h = getProbe();
+ wasUncontended = true;
+ }
+ boolean collide = false; // True if last slot nonempty
+ for (;;) {
+ Striped64.Cell[] as; Striped64.Cell a; int n; long v;
+ if ((as = cells) != null && (n = as.length) > 0) {
+ if ((a = as[(n - 1) & h]) == null) {
+ if (cellsBusy == 0) { // Try to attach new Cell
+ Striped64.Cell r = new Striped64.Cell(x); // Optimistically create
+ if (cellsBusy == 0 && casCellsBusy()) {
+ boolean created = false;
+ try { // Recheck under lock
+ Striped64.Cell[] rs; int m, j;
+ if ((rs = cells) != null &&
+ (m = rs.length) > 0 &&
+ rs[j = (m - 1) & h] == null) {
+ rs[j] = r;
+ created = true;
+ }
+ } finally {
+ cellsBusy = 0;
+ }
+ if (created)
+ break;
+ continue; // Slot is now non-empty
+ }
+ }
+ collide = false;
+ }
+ else if (!wasUncontended) // CAS already known to fail
+ wasUncontended = true; // Continue after rehash
+ else if (a.cas(v = a.value, ((fn == null) ? v + x :
+ fn.applyAsLong(v, x))))
+ break;
+ else if (n >= NCPU || cells != as)
+ collide = false; // At max size or stale
+ else if (!collide)
+ collide = true;
+ else if (cellsBusy == 0 && casCellsBusy()) {
+ try {
+ if (cells == as) { // Expand table unless stale
+ Striped64.Cell[] rs = new Striped64.Cell[n << 1];
+ for (int i = 0; i < n; ++i)
+ rs[i] = as[i];
+ cells = rs;
+ }
+ } finally {
+ cellsBusy = 0;
+ }
+ collide = false;
+ continue; // Retry with expanded table
+ }
+ h = advanceProbe(h);
+ }
+ else if (cellsBusy == 0 && cells == as && casCellsBusy()) {
+ boolean init = false;
+ try { // Initialize table
+ if (cells == as) {
+ Striped64.Cell[] rs = new Striped64.Cell[2];
+ rs[h & 1] = new Striped64.Cell(x);
+ cells = rs;
+ init = true;
+ }
+ } finally {
+ cellsBusy = 0;
+ }
+ if (init)
+ break;
+ }
+ else if (casBase(v = base, ((fn == null) ? v + x :
+ fn.applyAsLong(v, x))))
+ break; // Fall back on using base
+ }
+ }
+
+ /**
+ * Same as longAccumulate, but injecting long/double conversions
+ * in too many places to sensibly merge with long version, given
+ * the low-overhead requirements of this class. So must instead be
+ * maintained by copy/paste/adapt.
+ */
+ final void doubleAccumulate(double x, DoubleBinaryOperator fn,
+ boolean wasUncontended) {
+ int h;
+ if ((h = getProbe()) == 0) {
+ ThreadLocalRandom.current(); // force initialization
+ h = getProbe();
+ wasUncontended = true;
+ }
+ boolean collide = false; // True if last slot nonempty
+ for (;;) {
+ Striped64.Cell[] as; Striped64.Cell a; int n; long v;
+ if ((as = cells) != null && (n = as.length) > 0) {
+ if ((a = as[(n - 1) & h]) == null) {
+ if (cellsBusy == 0) { // Try to attach new Cell
+ Striped64.Cell r = new Striped64.Cell(Double.doubleToRawLongBits(x));
+ if (cellsBusy == 0 && casCellsBusy()) {
+ boolean created = false;
+ try { // Recheck under lock
+ Striped64.Cell[] rs; int m, j;
+ if ((rs = cells) != null &&
+ (m = rs.length) > 0 &&
+ rs[j = (m - 1) & h] == null) {
+ rs[j] = r;
+ created = true;
+ }
+ } finally {
+ cellsBusy = 0;
+ }
+ if (created)
+ break;
+ continue; // Slot is now non-empty
+ }
+ }
+ collide = false;
+ }
+ else if (!wasUncontended) // CAS already known to fail
+ wasUncontended = true; // Continue after rehash
+ else if (a.cas(v = a.value,
+ ((fn == null) ?
+ Double.doubleToRawLongBits
+ (Double.longBitsToDouble(v) + x) :
+ Double.doubleToRawLongBits
+ (fn.applyAsDouble
+ (Double.longBitsToDouble(v), x)))))
+ break;
+ else if (n >= NCPU || cells != as)
+ collide = false; // At max size or stale
+ else if (!collide)
+ collide = true;
+ else if (cellsBusy == 0 && casCellsBusy()) {
+ try {
+ if (cells == as) { // Expand table unless stale
+ Striped64.Cell[] rs = new Striped64.Cell[n << 1];
+ for (int i = 0; i < n; ++i)
+ rs[i] = as[i];
+ cells = rs;
+ }
+ } finally {
+ cellsBusy = 0;
+ }
+ collide = false;
+ continue; // Retry with expanded table
+ }
+ h = advanceProbe(h);
+ }
+ else if (cellsBusy == 0 && cells == as && casCellsBusy()) {
+ boolean init = false;
+ try { // Initialize table
+ if (cells == as) {
+ Striped64.Cell[] rs = new Striped64.Cell[2];
+ rs[h & 1] = new Striped64.Cell(Double.doubleToRawLongBits(x));
+ cells = rs;
+ init = true;
+ }
+ } finally {
+ cellsBusy = 0;
+ }
+ if (init)
+ break;
+ }
+ else if (casBase(v = base,
+ ((fn == null) ?
+ Double.doubleToRawLongBits
+ (Double.longBitsToDouble(v) + x) :
+ Double.doubleToRawLongBits
+ (fn.applyAsDouble
+ (Double.longBitsToDouble(v), x)))))
+ break; // Fall back on using base
+ }
+ }
+
+ // Unsafe mechanics
+ private static final sun.misc.Unsafe UNSAFE;
+ private static final long BASE;
+ private static final long CELLSBUSY;
+ private static final long PROBE;
+ static {
+ try {
+ UNSAFE = getUnsafe();
+ Class> sk = Striped64.class;
+ BASE = UNSAFE.objectFieldOffset
+ (sk.getDeclaredField("base"));
+ CELLSBUSY = UNSAFE.objectFieldOffset
+ (sk.getDeclaredField("cellsBusy"));
+ Class> tk = Thread.class;
+ PROBE = UNSAFE.objectFieldOffset
+ (tk.getDeclaredField("threadLocalRandomProbe"));
+ } catch (Exception e) {
+ throw new Error(e);
+ }
+ }
+
+ private static Unsafe getUnsafe() {
+ try {
+ Field field = Unsafe.class.getDeclaredField("theUnsafe");
+ field.setAccessible(true);
+
+ return (Unsafe) field.get(null);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+}
\ No newline at end of file