diff --git a/jmh/src/main/java/com/baeldung/FalseSharing.java b/jmh/src/main/java/com/baeldung/FalseSharing.java new file mode 100644 index 0000000000..24ba0c0b6b --- /dev/null +++ b/jmh/src/main/java/com/baeldung/FalseSharing.java @@ -0,0 +1,22 @@ +package com.baeldung; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; + +@State(Scope.Benchmark) +public class FalseSharing { + + private java.util.concurrent.atomic.LongAdder builtin = new java.util.concurrent.atomic.LongAdder(); + private LongAdder custom = new LongAdder(); + + @Benchmark + public void builtin() { + builtin.increment(); + } + + @Benchmark + public void custom() { + custom.increment(); + } +} diff --git a/jmh/src/main/java/com/baeldung/LongAdder.java b/jmh/src/main/java/com/baeldung/LongAdder.java new file mode 100644 index 0000000000..8fd2b85c4f --- /dev/null +++ b/jmh/src/main/java/com/baeldung/LongAdder.java @@ -0,0 +1,235 @@ +package com.baeldung; + +import java.io.Serializable; +import java.util.concurrent.atomic.AtomicLong; + +/** + * One or more variables that together maintain an initially zero + * {@code long} sum. When updates (method {@link #add}) are contended + * across threads, the set of variables may grow dynamically to reduce + * contention. Method {@link #sum} (or, equivalently, {@link + * #longValue}) returns the current total combined across the + * variables maintaining the sum. + * + *

This class is usually preferable to {@link AtomicLong} when + * multiple threads update a common sum that is used for purposes such + * as collecting statistics, not for fine-grained synchronization + * control. Under low update contention, the two classes have similar + * characteristics. But under high contention, expected throughput of + * this class is significantly higher, at the expense of higher space + * consumption. + * + *

LongAdders can be used with a {@link + * java.util.concurrent.ConcurrentHashMap} to maintain a scalable + * frequency map (a form of histogram or multiset). For example, to + * add a count to a {@code ConcurrentHashMap freqs}, + * initializing if not already present, you can use {@code + * freqs.computeIfAbsent(k -> new LongAdder()).increment();} + * + *

This class extends {@link Number}, but does not define + * methods such as {@code equals}, {@code hashCode} and {@code + * compareTo} because instances are expected to be mutated, and so are + * not useful as collection keys. + * + * @since 1.8 + * @author Doug Lea + */ +public class LongAdder extends Striped64 implements Serializable { + private static final long serialVersionUID = 7249069246863182397L; + + /** + * Creates a new adder with initial sum of zero. + */ + public LongAdder() { + } + + /** + * Adds the given value. + * + * @param x the value to add + */ + public void add(long x) { + Cell[] as; long b, v; int m; Cell a; + if ((as = cells) != null || !casBase(b = base, b + x)) { + boolean uncontended = true; + if (as == null || (m = as.length - 1) < 0 || + (a = as[getProbe() & m]) == null || + !(uncontended = a.cas(v = a.value, v + x))) + longAccumulate(x, null, uncontended); + } + } + + /** + * Equivalent to {@code add(1)}. + */ + public void increment() { + add(1L); + } + + /** + * Equivalent to {@code add(-1)}. + */ + public void decrement() { + add(-1L); + } + + /** + * Returns the current sum. The returned value is NOT an + * atomic snapshot; invocation in the absence of concurrent + * updates returns an accurate result, but concurrent updates that + * occur while the sum is being calculated might not be + * incorporated. + * + * @return the sum + */ + public long sum() { + Cell[] as = cells; Cell a; + long sum = base; + if (as != null) { + for (int i = 0; i < as.length; ++i) { + if ((a = as[i]) != null) + sum += a.value; + } + } + return sum; + } + + /** + * Resets variables maintaining the sum to zero. This method may + * be a useful alternative to creating a new adder, but is only + * effective if there are no concurrent updates. Because this + * method is intrinsically racy, it should only be used when it is + * known that no threads are concurrently updating. + */ + public void reset() { + Cell[] as = cells; Cell a; + base = 0L; + if (as != null) { + for (int i = 0; i < as.length; ++i) { + if ((a = as[i]) != null) + a.value = 0L; + } + } + } + + /** + * Equivalent in effect to {@link #sum} followed by {@link + * #reset}. This method may apply for example during quiescent + * points between multithreaded computations. If there are + * updates concurrent with this method, the returned value is + * not guaranteed to be the final value occurring before + * the reset. + * + * @return the sum + */ + public long sumThenReset() { + Cell[] as = cells; Cell a; + long sum = base; + base = 0L; + if (as != null) { + for (int i = 0; i < as.length; ++i) { + if ((a = as[i]) != null) { + sum += a.value; + a.value = 0L; + } + } + } + return sum; + } + + /** + * Returns the String representation of the {@link #sum}. + * @return the String representation of the {@link #sum} + */ + public String toString() { + return Long.toString(sum()); + } + + /** + * Equivalent to {@link #sum}. + * + * @return the sum + */ + public long longValue() { + return sum(); + } + + /** + * Returns the {@link #sum} as an {@code int} after a narrowing + * primitive conversion. + */ + public int intValue() { + return (int)sum(); + } + + /** + * Returns the {@link #sum} as a {@code float} + * after a widening primitive conversion. + */ + public float floatValue() { + return (float)sum(); + } + + /** + * Returns the {@link #sum} as a {@code double} after a widening + * primitive conversion. + */ + public double doubleValue() { + return (double)sum(); + } + + /** + * Serialization proxy, used to avoid reference to the non-public + * Striped64 superclass in serialized forms. + * @serial include + */ + private static class SerializationProxy implements Serializable { + private static final long serialVersionUID = 7249069246863182397L; + + /** + * The current value returned by sum(). + * @serial + */ + private final long value; + + SerializationProxy(LongAdder a) { + value = a.sum(); + } + + /** + * Return a {@code LongAdder} object with initial state + * held by this proxy. + * + * @return a {@code LongAdder} object with initial state + * held by this proxy. + */ + private Object readResolve() { + LongAdder a = new LongAdder(); + a.base = value; + return a; + } + } + + /** + * Returns a + * + * SerializationProxy + * representing the state of this instance. + * + * @return a {@link SerializationProxy} + * representing the state of this instance + */ + private Object writeReplace() { + return new SerializationProxy(this); + } + + /** + * @param s the stream + * @throws java.io.InvalidObjectException always + */ + private void readObject(java.io.ObjectInputStream s) + throws java.io.InvalidObjectException { + throw new java.io.InvalidObjectException("Proxy required"); + } + +} diff --git a/jmh/src/main/java/com/baeldung/Striped64.java b/jmh/src/main/java/com/baeldung/Striped64.java new file mode 100644 index 0000000000..d305c334e0 --- /dev/null +++ b/jmh/src/main/java/com/baeldung/Striped64.java @@ -0,0 +1,393 @@ +package com.baeldung; + +import sun.misc.Unsafe; + +import java.lang.reflect.Field; +import java.util.function.LongBinaryOperator; +import java.util.function.DoubleBinaryOperator; +import java.util.concurrent.ThreadLocalRandom; + +/** + * A package-local class holding common representation and mechanics + * for classes supporting dynamic striping on 64bit values. The class + * extends Number so that concrete subclasses must publicly do so. + */ +@SuppressWarnings("serial") +abstract class Striped64 extends Number { + /* + * This class maintains a lazily-initialized table of atomically + * updated variables, plus an extra "base" field. The table size + * is a power of two. Indexing uses masked per-thread hash codes. + * Nearly all declarations in this class are package-private, + * accessed directly by subclasses. + * + * Table entries are of class Cell; a variant of AtomicLong padded + * (via @sun.misc.Contended) to reduce cache contention. Padding + * is overkill for most Atomics because they are usually + * irregularly scattered in memory and thus don't interfere much + * with each other. But Atomic objects residing in arrays will + * tend to be placed adjacent to each other, and so will most + * often share cache lines (with a huge negative performance + * impact) without this precaution. + * + * In part because Cells are relatively large, we avoid creating + * them until they are needed. When there is no contention, all + * updates are made to the base field. Upon first contention (a + * failed CAS on base update), the table is initialized to size 2. + * The table size is doubled upon further contention until + * reaching the nearest power of two greater than or equal to the + * number of CPUS. Table slots remain empty (null) until they are + * needed. + * + * A single spinlock ("cellsBusy") is used for initializing and + * resizing the table, as well as populating slots with new Cells. + * There is no need for a blocking lock; when the lock is not + * available, threads try other slots (or the base). During these + * retries, there is increased contention and reduced locality, + * which is still better than alternatives. + * + * The Thread probe fields maintained via ThreadLocalRandom serve + * as per-thread hash codes. We let them remain uninitialized as + * zero (if they come in this way) until they contend at slot + * 0. They are then initialized to values that typically do not + * often conflict with others. Contention and/or table collisions + * are indicated by failed CASes when performing an update + * operation. Upon a collision, if the table size is less than + * the capacity, it is doubled in size unless some other thread + * holds the lock. If a hashed slot is empty, and lock is + * available, a new Cell is created. Otherwise, if the slot + * exists, a CAS is tried. Retries proceed by "double hashing", + * using a secondary hash (Marsaglia XorShift) to try to find a + * free slot. + * + * The table size is capped because, when there are more threads + * than CPUs, supposing that each thread were bound to a CPU, + * there would exist a perfect hash function mapping threads to + * slots that eliminates collisions. When we reach capacity, we + * search for this mapping by randomly varying the hash codes of + * colliding threads. Because search is random, and collisions + * only become known via CAS failures, convergence can be slow, + * and because threads are typically not bound to CPUS forever, + * may not occur at all. However, despite these limitations, + * observed contention rates are typically low in these cases. + * + * It is possible for a Cell to become unused when threads that + * once hashed to it terminate, as well as in the case where + * doubling the table causes no thread to hash to it under + * expanded mask. We do not try to detect or remove such cells, + * under the assumption that for long-running instances, observed + * contention levels will recur, so the cells will eventually be + * needed again; and for short-lived ones, it does not matter. + */ + + /** + * Padded variant of AtomicLong supporting only raw accesses plus CAS. + * + * JVM intrinsics note: It would be possible to use a release-only + * form of CAS here, if it were provided. + */ + @sun.misc.Contended static final class Cell { + volatile long value; + Cell(long x) { value = x; } + final boolean cas(long cmp, long val) { + return UNSAFE.compareAndSwapLong(this, valueOffset, cmp, val); + } + + // Unsafe mechanics + private static final sun.misc.Unsafe UNSAFE; + private static final long valueOffset; + static { + try { + UNSAFE = getUnsafe(); + Class ak = Striped64.Cell.class; + valueOffset = UNSAFE.objectFieldOffset + (ak.getDeclaredField("value")); + } catch (Exception e) { + throw new Error(e); + } + } + } + + /** Number of CPUS, to place bound on table size */ + static final int NCPU = Runtime.getRuntime().availableProcessors(); + + /** + * Table of cells. When non-null, size is a power of 2. + */ + transient volatile Striped64.Cell[] cells; + + /** + * Base value, used mainly when there is no contention, but also as + * a fallback during table initialization races. Updated via CAS. + */ + transient volatile long base; + + /** + * Spinlock (locked via CAS) used when resizing and/or creating Cells. + */ + transient volatile int cellsBusy; + + /** + * Package-private default constructor + */ + Striped64() { + } + + /** + * CASes the base field. + */ + final boolean casBase(long cmp, long val) { + return UNSAFE.compareAndSwapLong(this, BASE, cmp, val); + } + + /** + * CASes the cellsBusy field from 0 to 1 to acquire lock. + */ + final boolean casCellsBusy() { + return UNSAFE.compareAndSwapInt(this, CELLSBUSY, 0, 1); + } + + /** + * Returns the probe value for the current thread. + * Duplicated from ThreadLocalRandom because of packaging restrictions. + */ + static final int getProbe() { + return UNSAFE.getInt(Thread.currentThread(), PROBE); + } + + /** + * Pseudo-randomly advances and records the given probe value for the + * given thread. + * Duplicated from ThreadLocalRandom because of packaging restrictions. + */ + static final int advanceProbe(int probe) { + probe ^= probe << 13; // xorshift + probe ^= probe >>> 17; + probe ^= probe << 5; + UNSAFE.putInt(Thread.currentThread(), PROBE, probe); + return probe; + } + + /** + * Handles cases of updates involving initialization, resizing, + * creating new Cells, and/or contention. See above for + * explanation. This method suffers the usual non-modularity + * problems of optimistic retry code, relying on rechecked sets of + * reads. + * + * @param x the value + * @param fn the update function, or null for add (this convention + * avoids the need for an extra field or function in LongAdder). + * @param wasUncontended false if CAS failed before call + */ + final void longAccumulate(long x, LongBinaryOperator fn, + boolean wasUncontended) { + int h; + if ((h = getProbe()) == 0) { + ThreadLocalRandom.current(); // force initialization + h = getProbe(); + wasUncontended = true; + } + boolean collide = false; // True if last slot nonempty + for (;;) { + Striped64.Cell[] as; Striped64.Cell a; int n; long v; + if ((as = cells) != null && (n = as.length) > 0) { + if ((a = as[(n - 1) & h]) == null) { + if (cellsBusy == 0) { // Try to attach new Cell + Striped64.Cell r = new Striped64.Cell(x); // Optimistically create + if (cellsBusy == 0 && casCellsBusy()) { + boolean created = false; + try { // Recheck under lock + Striped64.Cell[] rs; int m, j; + if ((rs = cells) != null && + (m = rs.length) > 0 && + rs[j = (m - 1) & h] == null) { + rs[j] = r; + created = true; + } + } finally { + cellsBusy = 0; + } + if (created) + break; + continue; // Slot is now non-empty + } + } + collide = false; + } + else if (!wasUncontended) // CAS already known to fail + wasUncontended = true; // Continue after rehash + else if (a.cas(v = a.value, ((fn == null) ? v + x : + fn.applyAsLong(v, x)))) + break; + else if (n >= NCPU || cells != as) + collide = false; // At max size or stale + else if (!collide) + collide = true; + else if (cellsBusy == 0 && casCellsBusy()) { + try { + if (cells == as) { // Expand table unless stale + Striped64.Cell[] rs = new Striped64.Cell[n << 1]; + for (int i = 0; i < n; ++i) + rs[i] = as[i]; + cells = rs; + } + } finally { + cellsBusy = 0; + } + collide = false; + continue; // Retry with expanded table + } + h = advanceProbe(h); + } + else if (cellsBusy == 0 && cells == as && casCellsBusy()) { + boolean init = false; + try { // Initialize table + if (cells == as) { + Striped64.Cell[] rs = new Striped64.Cell[2]; + rs[h & 1] = new Striped64.Cell(x); + cells = rs; + init = true; + } + } finally { + cellsBusy = 0; + } + if (init) + break; + } + else if (casBase(v = base, ((fn == null) ? v + x : + fn.applyAsLong(v, x)))) + break; // Fall back on using base + } + } + + /** + * Same as longAccumulate, but injecting long/double conversions + * in too many places to sensibly merge with long version, given + * the low-overhead requirements of this class. So must instead be + * maintained by copy/paste/adapt. + */ + final void doubleAccumulate(double x, DoubleBinaryOperator fn, + boolean wasUncontended) { + int h; + if ((h = getProbe()) == 0) { + ThreadLocalRandom.current(); // force initialization + h = getProbe(); + wasUncontended = true; + } + boolean collide = false; // True if last slot nonempty + for (;;) { + Striped64.Cell[] as; Striped64.Cell a; int n; long v; + if ((as = cells) != null && (n = as.length) > 0) { + if ((a = as[(n - 1) & h]) == null) { + if (cellsBusy == 0) { // Try to attach new Cell + Striped64.Cell r = new Striped64.Cell(Double.doubleToRawLongBits(x)); + if (cellsBusy == 0 && casCellsBusy()) { + boolean created = false; + try { // Recheck under lock + Striped64.Cell[] rs; int m, j; + if ((rs = cells) != null && + (m = rs.length) > 0 && + rs[j = (m - 1) & h] == null) { + rs[j] = r; + created = true; + } + } finally { + cellsBusy = 0; + } + if (created) + break; + continue; // Slot is now non-empty + } + } + collide = false; + } + else if (!wasUncontended) // CAS already known to fail + wasUncontended = true; // Continue after rehash + else if (a.cas(v = a.value, + ((fn == null) ? + Double.doubleToRawLongBits + (Double.longBitsToDouble(v) + x) : + Double.doubleToRawLongBits + (fn.applyAsDouble + (Double.longBitsToDouble(v), x))))) + break; + else if (n >= NCPU || cells != as) + collide = false; // At max size or stale + else if (!collide) + collide = true; + else if (cellsBusy == 0 && casCellsBusy()) { + try { + if (cells == as) { // Expand table unless stale + Striped64.Cell[] rs = new Striped64.Cell[n << 1]; + for (int i = 0; i < n; ++i) + rs[i] = as[i]; + cells = rs; + } + } finally { + cellsBusy = 0; + } + collide = false; + continue; // Retry with expanded table + } + h = advanceProbe(h); + } + else if (cellsBusy == 0 && cells == as && casCellsBusy()) { + boolean init = false; + try { // Initialize table + if (cells == as) { + Striped64.Cell[] rs = new Striped64.Cell[2]; + rs[h & 1] = new Striped64.Cell(Double.doubleToRawLongBits(x)); + cells = rs; + init = true; + } + } finally { + cellsBusy = 0; + } + if (init) + break; + } + else if (casBase(v = base, + ((fn == null) ? + Double.doubleToRawLongBits + (Double.longBitsToDouble(v) + x) : + Double.doubleToRawLongBits + (fn.applyAsDouble + (Double.longBitsToDouble(v), x))))) + break; // Fall back on using base + } + } + + // Unsafe mechanics + private static final sun.misc.Unsafe UNSAFE; + private static final long BASE; + private static final long CELLSBUSY; + private static final long PROBE; + static { + try { + UNSAFE = getUnsafe(); + Class sk = Striped64.class; + BASE = UNSAFE.objectFieldOffset + (sk.getDeclaredField("base")); + CELLSBUSY = UNSAFE.objectFieldOffset + (sk.getDeclaredField("cellsBusy")); + Class tk = Thread.class; + PROBE = UNSAFE.objectFieldOffset + (tk.getDeclaredField("threadLocalRandomProbe")); + } catch (Exception e) { + throw new Error(e); + } + } + + private static Unsafe getUnsafe() { + try { + Field field = Unsafe.class.getDeclaredField("theUnsafe"); + field.setAccessible(true); + + return (Unsafe) field.get(null); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + +} \ No newline at end of file