From 8f0bc799c67a2dbc97aaa291e707e15fc52233b8 Mon Sep 17 00:00:00 2001
From: Shay Banon <kimchy@gmail.com>
Date: Sun, 10 Jun 2012 00:42:54 +0200
Subject: [PATCH] Upgrade to latest jst166y and jsr166e                        
                                                                              
                                                                              
        Embed the code now in our source, since jsr166e jar generation with
 1.6 instead of 1.7 is complicated when doing it on its own as it relies on
 ThreadLocalRandom, and we have it in jsr166y

---
 pom.xml                                       |   16 -
 .../java/jsr166e/ConcurrentHashMapV8.java     | 3282 +++++++++++++++++
 src/main/java/jsr166e/DoubleAdder.java        |  201 +
 src/main/java/jsr166e/DoubleMaxUpdater.java   |  196 +
 src/main/java/jsr166e/LongAdder.java          |  202 +
 src/main/java/jsr166e/LongAdderTable.java     |  189 +
 src/main/java/jsr166e/LongMaxUpdater.java     |  186 +
 src/main/java/jsr166e/SequenceLock.java       |  639 ++++
 src/main/java/jsr166e/Striped64.java          |  340 ++
 src/main/java/jsr166e/extra/AtomicDouble.java |  273 ++
 .../java/jsr166e/extra/AtomicDoubleArray.java |  320 ++
 .../java/jsr166e/extra/ReadMostlyVector.java  | 1636 ++++++++
 src/main/java/jsr166e/package-info.java       |    9 +
 .../java/jsr166y/ConcurrentLinkedDeque.java   | 1468 ++++++++
 src/main/java/jsr166y/CountedCompleter.java   |  480 +++
 src/main/java/jsr166y/ForkJoinPool.java       | 2888 +++++++++++++++
 src/main/java/jsr166y/ForkJoinTask.java       | 1568 ++++++++
 .../java/jsr166y/ForkJoinWorkerThread.java    |  119 +
 .../java/jsr166y/LinkedTransferQueue.java     | 1352 +++++++
 src/main/java/jsr166y/Phaser.java             | 1162 ++++++
 src/main/java/jsr166y/RecursiveAction.java    |  164 +
 src/main/java/jsr166y/RecursiveTask.java      |   68 +
 src/main/java/jsr166y/ThreadLocalRandom.java  |  197 +
 src/main/java/jsr166y/TransferQueue.java      |  133 +
 src/main/java/jsr166y/package-info.java       |   30 +
 25 files changed, 17102 insertions(+), 16 deletions(-)
 create mode 100644 src/main/java/jsr166e/ConcurrentHashMapV8.java
 create mode 100644 src/main/java/jsr166e/DoubleAdder.java
 create mode 100644 src/main/java/jsr166e/DoubleMaxUpdater.java
 create mode 100644 src/main/java/jsr166e/LongAdder.java
 create mode 100644 src/main/java/jsr166e/LongAdderTable.java
 create mode 100644 src/main/java/jsr166e/LongMaxUpdater.java
 create mode 100644 src/main/java/jsr166e/SequenceLock.java
 create mode 100644 src/main/java/jsr166e/Striped64.java
 create mode 100644 src/main/java/jsr166e/extra/AtomicDouble.java
 create mode 100644 src/main/java/jsr166e/extra/AtomicDoubleArray.java
 create mode 100644 src/main/java/jsr166e/extra/ReadMostlyVector.java
 create mode 100644 src/main/java/jsr166e/package-info.java
 create mode 100644 src/main/java/jsr166y/ConcurrentLinkedDeque.java
 create mode 100644 src/main/java/jsr166y/CountedCompleter.java
 create mode 100644 src/main/java/jsr166y/ForkJoinPool.java
 create mode 100644 src/main/java/jsr166y/ForkJoinTask.java
 create mode 100644 src/main/java/jsr166y/ForkJoinWorkerThread.java
 create mode 100644 src/main/java/jsr166y/LinkedTransferQueue.java
 create mode 100644 src/main/java/jsr166y/Phaser.java
 create mode 100644 src/main/java/jsr166y/RecursiveAction.java
 create mode 100644 src/main/java/jsr166y/RecursiveTask.java
 create mode 100644 src/main/java/jsr166y/ThreadLocalRandom.java
 create mode 100644 src/main/java/jsr166y/TransferQueue.java
 create mode 100644 src/main/java/jsr166y/package-info.java
diff --git a/pom.xml b/pom.xml
index 5bc8ed37fff..8bcff4e5eef 100644
--- a/pom.xml
+++ b/pom.xml
@@ -100,20 +100,6 @@
             <version>3.0.3</version>
         </dependency>
 
-        <dependency>
-            <groupId>org.elasticsearch</groupId>
-            <artifactId>es-jsr166y</artifactId>
-            <version>20120131</version>
-            <scope>compile</scope>
-        </dependency>
-
-        <dependency>
-            <groupId>org.elasticsearch</groupId>
-            <artifactId>es-jsr166e</artifactId>
-            <version>20120131</version>
-            <scope>compile</scope>
-        </dependency>
-
         <dependency>
             <groupId>joda-time</groupId>
             <artifactId>joda-time</artifactId>
@@ -314,8 +300,6 @@
                         <includes>
                             <include>com.google.guava:guava</include>
                             <include>net.sf.trove4j:trove4j</include>
-                            <include>org.elasticsearch:es-jsr166y</include>
-                            <include>org.elasticsearch:es-jsr166e</include>
                             <include>org.mvel:mvel2</include>
                             <include>org.codehaus.jackson:jackson-core-asl</include>
                             <include>org.codehaus.jackson:jackson-smile</include>
diff --git a/src/main/java/jsr166e/ConcurrentHashMapV8.java b/src/main/java/jsr166e/ConcurrentHashMapV8.java
new file mode 100644
index 00000000000..506c1c0b608
--- /dev/null
+++ b/src/main/java/jsr166e/ConcurrentHashMapV8.java
@@ -0,0 +1,3282 @@
+/*
+ * Written by Doug Lea with assistance from members of JCP JSR-166
+ * Expert Group and released to the public domain, as explained at
+ * http://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+// Snapshot Tue Jun  5 14:56:09 2012  Doug Lea  (dl at altair)
+
+package jsr166e;
+import jsr166e.LongAdder;
+import jsr166y.ThreadLocalRandom;
+
+import java.util.Arrays;
+import java.util.Map;
+import java.util.Set;
+import java.util.Collection;
+import java.util.AbstractMap;
+import java.util.AbstractSet;
+import java.util.AbstractCollection;
+import java.util.Hashtable;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Enumeration;
+import java.util.ConcurrentModificationException;
+import java.util.NoSuchElementException;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.locks.LockSupport;
+import java.util.concurrent.locks.AbstractQueuedSynchronizer;
+import java.io.Serializable;
+
+/**
+ * A hash table supporting full concurrency of retrievals and
+ * high expected concurrency for updates. This class obeys the
+ * same functional specification as {@link java.util.Hashtable}, and
+ * includes versions of methods corresponding to each method of
+ * {@code Hashtable}. However, even though all operations are
+ * thread-safe, retrieval operations do <em>not</em> entail locking,
+ * and there is <em>not</em> any support for locking the entire table
+ * in a way that prevents all access.  This class is fully
+ * interoperable with {@code Hashtable} in programs that rely on its
+ * thread safety but not on its synchronization details.
+ *
+ * <p> Retrieval operations (including {@code get}) generally do not
+ * block, so may overlap with update operations (including {@code put}
+ * and {@code remove}). Retrievals reflect the results of the most
+ * recently <em>completed</em> update operations holding upon their
+ * onset.  For aggregate operations such as {@code putAll} and {@code
+ * clear}, concurrent retrievals may reflect insertion or removal of
+ * only some entries.  Similarly, Iterators and Enumerations return
+ * elements reflecting the state of the hash table at some point at or
+ * since the creation of the iterator/enumeration.  They do
+ * <em>not</em> throw {@link ConcurrentModificationException}.
+ * However, iterators are designed to be used by only one thread at a
+ * time.  Bear in mind that the results of aggregate status methods
+ * including {@code size}, {@code isEmpty}, and {@code containsValue}
+ * are typically useful only when a map is not undergoing concurrent
+ * updates in other threads.  Otherwise the results of these methods
+ * reflect transient states that may be adequate for monitoring
+ * or estimation purposes, but not for program control.
+ *
+ * <p> The table is dynamically expanded when there are too many
+ * collisions (i.e., keys that have distinct hash codes but fall into
+ * the same slot modulo the table size), with the expected average
+ * effect of maintaining roughly two bins per mapping (corresponding
+ * to a 0.75 load factor threshold for resizing). There may be much
+ * variance around this average as mappings are added and removed, but
+ * overall, this maintains a commonly accepted time/space tradeoff for
+ * hash tables.  However, resizing this or any other kind of hash
+ * table may be a relatively slow operation. When possible, it is a
+ * good idea to provide a size estimate as an optional {@code
+ * initialCapacity} constructor argument. An additional optional
+ * {@code loadFactor} constructor argument provides a further means of
+ * customizing initial table capacity by specifying the table density
+ * to be used in calculating the amount of space to allocate for the
+ * given number of elements.  Also, for compatibility with previous
+ * versions of this class, constructors may optionally specify an
+ * expected {@code concurrencyLevel} as an additional hint for
+ * internal sizing.  Note that using many keys with exactly the same
+ * {@code hashCode()} is a sure way to slow down performance of any
+ * hash table.
+ *
+ * <p>This class and its views and iterators implement all of the
+ * <em>optional</em> methods of the {@link Map} and {@link Iterator}
+ * interfaces.
+ *
+ * <p> Like {@link Hashtable} but unlike {@link HashMap}, this class
+ * does <em>not</em> allow {@code null} to be used as a key or value.
+ *
+ * <p>This class is a member of the
+ * <a href="{@docRoot}/../technotes/guides/collections/index.html">
+ * Java Collections Framework</a>.
+ *
+ * <p><em>jsr166e note: This class is a candidate replacement for
+ * java.util.concurrent.ConcurrentHashMap.<em>
+ *
+ * @since 1.5
+ * @author Doug Lea
+ * @param <K> the type of keys maintained by this map
+ * @param <V> the type of mapped values
+ */
+public class ConcurrentHashMapV8<K, V>
+        implements ConcurrentMap<K, V>, Serializable {
+    private static final long serialVersionUID = 7249069246763182397L;
+
+    /**
+     * A function computing a mapping from the given key to a value.
+     * This is a place-holder for an upcoming JDK8 interface.
+     */
+    public static interface MappingFunction<K, V> {
+        /**
+         * Returns a non-null value for the given key.
+         *
+         * @param key the (non-null) key
+         * @return a non-null value
+         */
+        V map(K key);
+    }
+
+    /**
+     * A function computing a new mapping given a key and its current
+     * mapped value (or {@code null} if there is no current
+     * mapping). This is a place-holder for an upcoming JDK8
+     * interface.
+     */
+    public static interface RemappingFunction<K, V> {
+        /**
+         * Returns a new value given a key and its current value.
+         *
+         * @param key the (non-null) key
+         * @param value the current value, or null if there is no mapping
+         * @return a non-null value
+         */
+        V remap(K key, V value);
+    }
+
+    /*
+     * Overview:
+     *
+     * The primary design goal of this hash table is to maintain
+     * concurrent readability (typically method get(), but also
+     * iterators and related methods) while minimizing update
+     * contention. Secondary goals are to keep space consumption about
+     * the same or better than java.util.HashMap, and to support high
+     * initial insertion rates on an empty table by many threads.
+     *
+     * Each key-value mapping is held in a Node.  Because Node fields
+     * can contain special values, they are defined using plain Object
+     * types. Similarly in turn, all internal methods that use them
+     * work off Object types. And similarly, so do the internal
+     * methods of auxiliary iterator and view classes.  All public
+     * generic typed methods relay in/out of these internal methods,
+     * supplying null-checks and casts as needed. This also allows
+     * many of the public methods to be factored into a smaller number
+     * of internal methods (although sadly not so for the five
+     * variants of put-related operations). The validation-based
+     * approach explained below leads to a lot of code sprawl because
+     * retry-control precludes factoring into smaller methods.
+     *
+     * The table is lazily initialized to a power-of-two size upon the
+     * first insertion.  Each bin in the table normally contains a
+     * list of Nodes (most often, the list has only zero or one Node).
+     * Table accesses require volatile/atomic reads, writes, and
+     * CASes.  Because there is no other way to arrange this without
+     * adding further indirections, we use intrinsics
+     * (sun.misc.Unsafe) operations.  The lists of nodes within bins
+     * are always accurately traversable under volatile reads, so long
+     * as lookups check hash code and non-nullness of value before
+     * checking key equality.
+     *
+     * We use the top two bits of Node hash fields for control
+     * purposes -- they are available anyway because of addressing
+     * constraints.  As explained further below, these top bits are
+     * used as follows:
+     *  00 - Normal
+     *  01 - Locked
+     *  11 - Locked and may have a thread waiting for lock
+     *  10 - Node is a forwarding node
+     *
+     * The lower 30 bits of each Node's hash field contain a
+     * transformation of the key's hash code, except for forwarding
+     * nodes, for which the lower bits are zero (and so always have
+     * hash field == MOVED).
+     *
+     * Insertion (via put or its variants) of the first node in an
+     * empty bin is performed by just CASing it to the bin.  This is
+     * by far the most common case for put operations under most
+     * key/hash distributions.  Other update operations (insert,
+     * delete, and replace) require locks.  We do not want to waste
+     * the space required to associate a distinct lock object with
+     * each bin, so instead use the first node of a bin list itself as
+     * a lock. Blocking support for these locks relies on the builtin
+     * "synchronized" monitors.  However, we also need a tryLock
+     * construction, so we overlay these by using bits of the Node
+     * hash field for lock control (see above), and so normally use
+     * builtin monitors only for blocking and signalling using
+     * wait/notifyAll constructions. See Node.tryAwaitLock.
+     *
+     * Using the first node of a list as a lock does not by itself
+     * suffice though: When a node is locked, any update must first
+     * validate that it is still the first node after locking it, and
+     * retry if not. Because new nodes are always appended to lists,
+     * once a node is first in a bin, it remains first until deleted
+     * or the bin becomes invalidated (upon resizing).  However,
+     * operations that only conditionally update may inspect nodes
+     * until the point of update. This is a converse of sorts to the
+     * lazy locking technique described by Herlihy & Shavit.
+     *
+     * The main disadvantage of per-bin locks is that other update
+     * operations on other nodes in a bin list protected by the same
+     * lock can stall, for example when user equals() or mapping
+     * functions take a long time.  However, statistically, under
+     * random hash codes, this is not a common problem.  Ideally, the
+     * frequency of nodes in bins follows a Poisson distribution
+     * (http://en.wikipedia.org/wiki/Poisson_distribution) with a
+     * parameter of about 0.5 on average, given the resizing threshold
+     * of 0.75, although with a large variance because of resizing
+     * granularity. Ignoring variance, the expected occurrences of
+     * list size k are (exp(-0.5) * pow(0.5, k) / factorial(k)). The
+     * first values are:
+     *
+     * 0:    0.60653066
+     * 1:    0.30326533
+     * 2:    0.07581633
+     * 3:    0.01263606
+     * 4:    0.00157952
+     * 5:    0.00015795
+     * 6:    0.00001316
+     * 7:    0.00000094
+     * 8:    0.00000006
+     * more: less than 1 in ten million
+     *
+     * Lock contention probability for two threads accessing distinct
+     * elements is roughly 1 / (8 * #elements) under random hashes.
+     *
+     * Actual hash code distributions encountered in practice
+     * sometimes deviate significantly from uniform randomness.  This
+     * includes the case when N > (1<<30), so some keys MUST collide.
+     * Similarly for dumb or hostile usages in which multiple keys are
+     * designed to have identical hash codes. Also, although we guard
+     * against the worst effects of this (see method spread), sets of
+     * hashes may differ only in bits that do not impact their bin
+     * index for a given power-of-two mask.  So we use a secondary
+     * strategy that applies when the number of nodes in a bin exceeds
+     * a threshold, and at least one of the keys implements
+     * Comparable.  These TreeBins use a balanced tree to hold nodes
+     * (a specialized form of red-black trees), bounding search time
+     * to O(log N).  Each search step in a TreeBin is around twice as
+     * slow as in a regular list, but given that N cannot exceed
+     * (1<<64) (before running out of addresses) this bounds search
+     * steps, lock hold times, etc, to reasonable constants (roughly
+     * 100 nodes inspected per operation worst case) so long as keys
+     * are Comparable (which is very common -- String, Long, etc).
+     * TreeBin nodes (TreeNodes) also maintain the same "next"
+     * traversal pointers as regular nodes, so can be traversed in
+     * iterators in the same way.
+     *
+     * The table is resized when occupancy exceeds a percentage
+     * threshold (nominally, 0.75, but see below).  Only a single
+     * thread performs the resize (using field "sizeCtl", to arrange
+     * exclusion), but the table otherwise remains usable for reads
+     * and updates. Resizing proceeds by transferring bins, one by
+     * one, from the table to the next table.  Because we are using
+     * power-of-two expansion, the elements from each bin must either
+     * stay at same index, or move with a power of two offset. We
+     * eliminate unnecessary node creation by catching cases where old
+     * nodes can be reused because their next fields won't change.  On
+     * average, only about one-sixth of them need cloning when a table
+     * doubles. The nodes they replace will be garbage collectable as
+     * soon as they are no longer referenced by any reader thread that
+     * may be in the midst of concurrently traversing table.  Upon
+     * transfer, the old table bin contains only a special forwarding
+     * node (with hash field "MOVED") that contains the next table as
+     * its key. On encountering a forwarding node, access and update
+     * operations restart, using the new table.
+     *
+     * Each bin transfer requires its bin lock. However, unlike other
+     * cases, a transfer can skip a bin if it fails to acquire its
+     * lock, and revisit it later (unless it is a TreeBin). Method
+     * rebuild maintains a buffer of TRANSFER_BUFFER_SIZE bins that
+     * have been skipped because of failure to acquire a lock, and
+     * blocks only if none are available (i.e., only very rarely).
+     * The transfer operation must also ensure that all accessible
+     * bins in both the old and new table are usable by any traversal.
+     * When there are no lock acquisition failures, this is arranged
+     * simply by proceeding from the last bin (table.length - 1) up
+     * towards the first.  Upon seeing a forwarding node, traversals
+     * (see class InternalIterator) arrange to move to the new table
+     * without revisiting nodes.  However, when any node is skipped
+     * during a transfer, all earlier table bins may have become
+     * visible, so are initialized with a reverse-forwarding node back
+     * to the old table until the new ones are established. (This
+     * sometimes requires transiently locking a forwarding node, which
+     * is possible under the above encoding.) These more expensive
+     * mechanics trigger only when necessary.
+     *
+     * The traversal scheme also applies to partial traversals of
+     * ranges of bins (via an alternate InternalIterator constructor)
+     * to support partitioned aggregate operations (that are not
+     * otherwise implemented yet).  Also, read-only operations give up
+     * if ever forwarded to a null table, which provides support for
+     * shutdown-style clearing, which is also not currently
+     * implemented.
+     *
+     * Lazy table initialization minimizes footprint until first use,
+     * and also avoids resizings when the first operation is from a
+     * putAll, constructor with map argument, or deserialization.
+     * These cases attempt to override the initial capacity settings,
+     * but harmlessly fail to take effect in cases of races.
+     *
+     * The element count is maintained using a LongAdder, which avoids
+     * contention on updates but can encounter cache thrashing if read
+     * too frequently during concurrent access. To avoid reading so
+     * often, resizing is attempted either when a bin lock is
+     * contended, or upon adding to a bin already holding two or more
+     * nodes (checked before adding in the xIfAbsent methods, after
+     * adding in others). Under uniform hash distributions, the
+     * probability of this occurring at threshold is around 13%,
+     * meaning that only about 1 in 8 puts check threshold (and after
+     * resizing, many fewer do so). But this approximation has high
+     * variance for small table sizes, so we check on any collision
+     * for sizes <= 64. The bulk putAll operation further reduces
+     * contention by only committing count updates upon these size
+     * checks.
+     *
+     * Maintaining API and serialization compatibility with previous
+     * versions of this class introduces several oddities. Mainly: We
+     * leave untouched but unused constructor arguments refering to
+     * concurrencyLevel. We accept a loadFactor constructor argument,
+     * but apply it only to initial table capacity (which is the only
+     * time that we can guarantee to honor it.) We also declare an
+     * unused "Segment" class that is instantiated in minimal form
+     * only when serializing.
+     */
+
+    /* ---------------- Constants -------------- */
+
+    /**
+     * The largest possible table capacity.  This value must be
+     * exactly 1<<30 to stay within Java array allocation and indexing
+     * bounds for power of two table sizes, and is further required
+     * because the top two bits of 32bit hash fields are used for
+     * control purposes.
+     */
+    private static final int MAXIMUM_CAPACITY = 1 << 30;
+
+    /**
+     * The default initial table capacity.  Must be a power of 2
+     * (i.e., at least 1) and at most MAXIMUM_CAPACITY.
+     */
+    private static final int DEFAULT_CAPACITY = 16;
+
+    /**
+     * The largest possible (non-power of two) array size.
+     * Needed by toArray and related methods.
+     */
+    static final int MAX_ARRAY_SIZE = Integer.MAX_VALUE - 8;
+
+    /**
+     * The default concurrency level for this table. Unused but
+     * defined for compatibility with previous versions of this class.
+     */
+    private static final int DEFAULT_CONCURRENCY_LEVEL = 16;
+
+    /**
+     * The load factor for this table. Overrides of this value in
+     * constructors affect only the initial table capacity.  The
+     * actual floating point value isn't normally used -- it is
+     * simpler to use expressions such as {@code n - (n >>> 2)} for
+     * the associated resizing threshold.
+     */
+    private static final float LOAD_FACTOR = 0.75f;
+
+    /**
+     * The buffer size for skipped bins during transfers. The
+     * value is arbitrary but should be large enough to avoid
+     * most locking stalls during resizes.
+     */
+    private static final int TRANSFER_BUFFER_SIZE = 32;
+
+    /**
+     * The bin count threshold for using a tree rather than list for a
+     * bin.  The value reflects the approximate break-even point for
+     * using tree-based operations.
+     */
+    private static final int TREE_THRESHOLD = 8;
+
+    /*
+     * Encodings for special uses of Node hash fields. See above for
+     * explanation.
+     */
+    static final int MOVED     = 0x80000000; // hash field for forwarding nodes
+    static final int LOCKED    = 0x40000000; // set/tested only as a bit
+    static final int WAITING   = 0xc0000000; // both bits set/tested together
+    static final int HASH_BITS = 0x3fffffff; // usable bits of normal node hash
+
+    /* ---------------- Fields -------------- */
+
+    /**
+     * The array of bins. Lazily initialized upon first insertion.
+     * Size is always a power of two. Accessed directly by iterators.
+     */
+    transient volatile Node[] table;
+
+    /**
+     * The counter maintaining number of elements.
+     */
+    private transient final LongAdder counter;
+
+    /**
+     * Table initialization and resizing control.  When negative, the
+     * table is being initialized or resized. Otherwise, when table is
+     * null, holds the initial table size to use upon creation, or 0
+     * for default. After initialization, holds the next element count
+     * value upon which to resize the table.
+     */
+    private transient volatile int sizeCtl;
+
+    // views
+    private transient KeySet<K,V> keySet;
+    private transient Values<K,V> values;
+    private transient EntrySet<K,V> entrySet;
+
+    /** For serialization compatibility. Null unless serialized; see below */
+    private Segment<K,V>[] segments;
+
+    /* ---------------- Table element access -------------- */
+
+    /*
+     * Volatile access methods are used for table elements as well as
+     * elements of in-progress next table while resizing.  Uses are
+     * null checked by callers, and implicitly bounds-checked, relying
+     * on the invariants that tab arrays have non-zero size, and all
+     * indices are masked with (tab.length - 1) which is never
+     * negative and always less than length. Note that, to be correct
+     * wrt arbitrary concurrency errors by users, bounds checks must
+     * operate on local variables, which accounts for some odd-looking
+     * inline assignments below.
+     */
+
+    static final Node tabAt(Node[] tab, int i) { // used by InternalIterator
+        return (Node)UNSAFE.getObjectVolatile(tab, ((long)i<<ASHIFT)+ABASE);
+    }
+
+    private static final boolean casTabAt(Node[] tab, int i, Node c, Node v) {
+        return UNSAFE.compareAndSwapObject(tab, ((long)i<<ASHIFT)+ABASE, c, v);
+    }
+
+    private static final void setTabAt(Node[] tab, int i, Node v) {
+        UNSAFE.putObjectVolatile(tab, ((long)i<<ASHIFT)+ABASE, v);
+    }
+
+    /* ---------------- Nodes -------------- */
+
+    /**
+     * Key-value entry. Note that this is never exported out as a
+     * user-visible Map.Entry (see WriteThroughEntry and SnapshotEntry
+     * below). Nodes with a hash field of MOVED are special, and do
+     * not contain user keys or values.  Otherwise, keys are never
+     * null, and null val fields indicate that a node is in the
+     * process of being deleted or created. For purposes of read-only
+     * access, a key may be read before a val, but can only be used
+     * after checking val to be non-null.
+     */
+    static class Node {
+        volatile int hash;
+        final Object key;
+        volatile Object val;
+        volatile Node next;
+
+        Node(int hash, Object key, Object val, Node next) {
+            this.hash = hash;
+            this.key = key;
+            this.val = val;
+            this.next = next;
+        }
+
+        /** CompareAndSet the hash field */
+        final boolean casHash(int cmp, int val) {
+            return UNSAFE.compareAndSwapInt(this, hashOffset, cmp, val);
+        }
+
+        /** The number of spins before blocking for a lock */
+        static final int MAX_SPINS =
+            Runtime.getRuntime().availableProcessors() > 1 ? 64 : 1;
+
+        /**
+         * Spins a while if LOCKED bit set and this node is the first
+         * of its bin, and then sets WAITING bits on hash field and
+         * blocks (once) if they are still set.  It is OK for this
+         * method to return even if lock is not available upon exit,
+         * which enables these simple single-wait mechanics.
+         *
+         * The corresponding signalling operation is performed within
+         * callers: Upon detecting that WAITING has been set when
+         * unlocking lock (via a failed CAS from non-waiting LOCKED
+         * state), unlockers acquire the sync lock and perform a
+         * notifyAll.
+         */
+        final void tryAwaitLock(Node[] tab, int i) {
+            if (tab != null && i >= 0 && i < tab.length) { // bounds check
+                int r = ThreadLocalRandom.current().nextInt(); // randomize spins
+                int spins = MAX_SPINS, h;
+                while (tabAt(tab, i) == this && ((h = hash) & LOCKED) != 0) {
+                    if (spins >= 0) {
+                        r ^= r << 1; r ^= r >>> 3; r ^= r << 10; // xorshift
+                        if (r >= 0 && --spins == 0)
+                            Thread.yield();  // yield before block
+                    }
+                    else if (casHash(h, h | WAITING)) {
+                        synchronized (this) {
+                            if (tabAt(tab, i) == this &&
+                                (hash & WAITING) == WAITING) {
+                                try {
+                                    wait();
+                                } catch (InterruptedException ie) {
+                                    Thread.currentThread().interrupt();
+                                }
+                            }
+                            else
+                                notifyAll(); // possibly won race vs signaller
+                        }
+                        break;
+                    }
+                }
+            }
+        }
+
+        // Unsafe mechanics for casHash
+        private static final sun.misc.Unsafe UNSAFE;
+        private static final long hashOffset;
+
+        static {
+            try {
+                UNSAFE = getUnsafe();
+                Class<?> k = Node.class;
+                hashOffset = UNSAFE.objectFieldOffset
+                    (k.getDeclaredField("hash"));
+            } catch (Exception e) {
+                throw new Error(e);
+            }
+        }
+    }
+
+    /* ---------------- TreeBins -------------- */
+
+    /**
+     * Nodes for use in TreeBins
+     */
+    static final class TreeNode extends Node {
+        TreeNode parent;  // red-black tree links
+        TreeNode left;
+        TreeNode right;
+        TreeNode prev;    // needed to unlink next upon deletion
+        boolean red;
+
+        TreeNode(int hash, Object key, Object val, Node next, TreeNode parent) {
+            super(hash, key, val, next);
+            this.parent = parent;
+        }
+    }
+
+    /**
+     * A specialized form of red-black tree for use in bins
+     * whose size exceeds a threshold.
+     *
+     * TreeBins use a special form of comparison for search and
+     * related operations (which is the main reason we cannot use
+     * existing collections such as TreeMaps). TreeBins contain
+     * Comparable elements, but may contain others, as well as
+     * elements that are Comparable but not necessarily Comparable<T>
+     * for the same T, so we cannot invoke compareTo among them. To
+     * handle this, the tree is ordered primarily by hash value, then
+     * by getClass().getName() order, and then by Comparator order
+     * among elements of the same class.  On lookup at a node, if
+     * non-Comparable, both left and right children may need to be
+     * searched in the case of tied hash values. (This corresponds to
+     * the full list search that would be necessary if all elements
+     * were non-Comparable and had tied hashes.)
+     *
+     * TreeBins also maintain a separate locking discipline than
+     * regular bins. Because they are forwarded via special MOVED
+     * nodes at bin heads (which can never change once established),
+     * we cannot use use those nodes as locks. Instead, TreeBin
+     * extends AbstractQueuedSynchronizer to support a simple form of
+     * read-write lock. For update operations and table validation,
+     * the exclusive form of lock behaves in the same way as bin-head
+     * locks. However, lookups use shared read-lock mechanics to allow
+     * multiple readers in the absence of writers.  Additionally,
+     * these lookups do not ever block: While the lock is not
+     * available, they proceed along the slow traversal path (via
+     * next-pointers) until the lock becomes available or the list is
+     * exhausted, whichever comes first. (These cases are not fast,
+     * but maximize aggregate expected throughput.)  The AQS mechanics
+     * for doing this are straightforward.  The lock state is held as
+     * AQS getState().  Read counts are negative; the write count (1)
+     * is positive.  There are no signalling preferences among readers
+     * and writers. Since we don't need to export full Lock API, we
+     * just override the minimal AQS methods and use them directly.
+     */
+    static final class TreeBin extends AbstractQueuedSynchronizer {
+        private static final long serialVersionUID = 2249069246763182397L;
+        TreeNode root;  // root of tree
+        TreeNode first; // head of next-pointer list
+
+        /* AQS overrides */
+        public final boolean isHeldExclusively() { return getState() > 0; }
+        public final boolean tryAcquire(int ignore) {
+            if (compareAndSetState(0, 1)) {
+                setExclusiveOwnerThread(Thread.currentThread());
+                return true;
+            }
+            return false;
+        }
+        public final boolean tryRelease(int ignore) {
+            setExclusiveOwnerThread(null);
+            setState(0);
+            return true;
+        }
+        public final int tryAcquireShared(int ignore) {
+            for (int c;;) {
+                if ((c = getState()) > 0)
+                    return -1;
+                if (compareAndSetState(c, c -1))
+                    return 1;
+            }
+        }
+        public final boolean tryReleaseShared(int ignore) {
+            int c;
+            do {} while (!compareAndSetState(c = getState(), c + 1));
+            return c == -1;
+        }
+
+        /**
+         * Return the TreeNode (or null if not found) for the given key
+         * starting at given root.
+         */
+        @SuppressWarnings("unchecked") // suppress Comparable cast warning
+        final TreeNode getTreeNode(int h, Object k, TreeNode p) {
+            Class<?> c = k.getClass();
+            while (p != null) {
+                int dir, ph;  Object pk; Class<?> pc; TreeNode r;
+                if (h < (ph = p.hash))
+                    dir = -1;
+                else if (h > ph)
+                    dir = 1;
+                else if ((pk = p.key) == k || k.equals(pk))
+                    return p;
+                else if (c != (pc = pk.getClass()))
+                    dir = c.getName().compareTo(pc.getName());
+                else if (k instanceof Comparable)
+                    dir = ((Comparable)k).compareTo((Comparable)pk);
+                else
+                    dir = 0;
+                TreeNode pr = p.right;
+                if (dir > 0)
+                    p = pr;
+                else if (dir == 0 && pr != null && h >= pr.hash &&
+                         (r = getTreeNode(h, k, pr)) != null)
+                    return r;
+                else
+                    p = p.left;
+            }
+            return null;
+        }
+
+        /**
+         * Wrapper for getTreeNode used by CHM.get. Tries to obtain
+         * read-lock to call getTreeNode, but during failure to get
+         * lock, searches along next links.
+         */
+        final Object getValue(int h, Object k) {
+            Node r = null;
+            int c = getState(); // Must read lock state first
+            for (Node e = first; e != null; e = e.next) {
+                if (c <= 0 && compareAndSetState(c, c - 1)) {
+                    try {
+                        r = getTreeNode(h, k, root);
+                    } finally {
+                        releaseShared(0);
+                    }
+                    break;
+                }
+                else if ((e.hash & HASH_BITS) == h && k.equals(e.key)) {
+                    r = e;
+                    break;
+                }
+                else
+                    c = getState();
+            }
+            return r == null ? null : r.val;
+        }
+
+        /**
+         * Find or add a node
+         * @return null if added
+         */
+        @SuppressWarnings("unchecked") // suppress Comparable cast warning
+        final TreeNode putTreeNode(int h, Object k, Object v) {
+            Class<?> c = k.getClass();
+            TreeNode p = root;
+            int dir = 0;
+            if (p != null) {
+                for (;;) {
+                    int ph;  Object pk; Class<?> pc; TreeNode r;
+                    if (h < (ph = p.hash))
+                        dir = -1;
+                    else if (h > ph)
+                        dir = 1;
+                    else if ((pk = p.key) == k || k.equals(pk))
+                        return p;
+                    else if (c != (pc = (pk = p.key).getClass()))
+                        dir = c.getName().compareTo(pc.getName());
+                    else if (k instanceof Comparable)
+                        dir = ((Comparable)k).compareTo((Comparable)pk);
+                    else
+                        dir = 0;
+                    TreeNode pr = p.right, pl;
+                    if (dir > 0) {
+                        if (pr == null)
+                            break;
+                        p = pr;
+                    }
+                    else if (dir == 0 && pr != null && h >= pr.hash &&
+                             (r = getTreeNode(h, k, pr)) != null)
+                        return r;
+                    else if ((pl = p.left) == null)
+                        break;
+                    else
+                        p = pl;
+                }
+            }
+            TreeNode f = first;
+            TreeNode r = first = new TreeNode(h, k, v, f, p);
+            if (p == null)
+                root = r;
+            else {
+                if (dir <= 0)
+                    p.left = r;
+                else
+                    p.right = r;
+                if (f != null)
+                    f.prev = r;
+                fixAfterInsertion(r);
+            }
+            return null;
+        }
+
+        /**
+         * Removes the given node, that must be present before this
+         * call.  This is messier than typical red-black deletion code
+         * because we cannot swap the contents of an interior node
+         * with a leaf successor that is pinned by "next" pointers
+         * that are accessible independently of lock. So instead we
+         * swap the tree linkages.
+         */
+        final void deleteTreeNode(TreeNode p) {
+            TreeNode next = (TreeNode)p.next; // unlink traversal pointers
+            TreeNode pred = p.prev;
+            if (pred == null)
+                first = next;
+            else
+                pred.next = next;
+            if (next != null)
+                next.prev = pred;
+            TreeNode replacement;
+            TreeNode pl = p.left;
+            TreeNode pr = p.right;
+            if (pl != null && pr != null) {
+                TreeNode s = pr;
+                while (s.left != null) // find successor
+                    s = s.left;
+                boolean c = s.red; s.red = p.red; p.red = c; // swap colors
+                TreeNode sr = s.right;
+                TreeNode pp = p.parent;
+                if (s == pr) { // p was s's direct parent
+                    p.parent = s;
+                    s.right = p;
+                }
+                else {
+                    TreeNode sp = s.parent;
+                    if ((p.parent = sp) != null) {
+                        if (s == sp.left)
+                            sp.left = p;
+                        else
+                            sp.right = p;
+                    }
+                    if ((s.right = pr) != null)
+                        pr.parent = s;
+                }
+                p.left = null;
+                if ((p.right = sr) != null)
+                    sr.parent = p;
+                if ((s.left = pl) != null)
+                    pl.parent = s;
+                if ((s.parent = pp) == null)
+                    root = s;
+                else if (p == pp.left)
+                    pp.left = s;
+                else
+                    pp.right = s;
+                replacement = sr;
+            }
+            else
+                replacement = (pl != null) ? pl : pr;
+            TreeNode pp = p.parent;
+            if (replacement == null) {
+                if (pp == null) {
+                    root = null;
+                    return;
+                }
+                replacement = p;
+            }
+            else {
+                replacement.parent = pp;
+                if (pp == null)
+                    root = replacement;
+                else if (p == pp.left)
+                    pp.left = replacement;
+                else
+                    pp.right = replacement;
+                p.left = p.right = p.parent = null;
+            }
+            if (!p.red)
+                fixAfterDeletion(replacement);
+            if (p == replacement && (pp = p.parent) != null) {
+                if (p == pp.left) // detach pointers
+                    pp.left = null;
+                else if (p == pp.right)
+                    pp.right = null;
+                p.parent = null;
+            }
+        }
+
+        // CLR code updated from pre-jdk-collections version at
+        // http://gee.cs.oswego.edu/dl/classes/collections/RBCell.java
+
+        /** From CLR */
+        private void rotateLeft(TreeNode p) {
+            if (p != null) {
+                TreeNode r = p.right, pp, rl;
+                if ((rl = p.right = r.left) != null)
+                    rl.parent = p;
+                if ((pp = r.parent = p.parent) == null)
+                    root = r;
+                else if (pp.left == p)
+                    pp.left = r;
+                else
+                    pp.right = r;
+                r.left = p;
+                p.parent = r;
+            }
+        }
+
+        /** From CLR */
+        private void rotateRight(TreeNode p) {
+            if (p != null) {
+                TreeNode l = p.left, pp, lr;
+                if ((lr = p.left = l.right) != null)
+                    lr.parent = p;
+                if ((pp = l.parent = p.parent) == null)
+                    root = l;
+                else if (pp.right == p)
+                    pp.right = l;
+                else
+                    pp.left = l;
+                l.right = p;
+                p.parent = l;
+            }
+        }
+
+        /** From CLR */
+        private void fixAfterInsertion(TreeNode x) {
+            x.red = true;
+            TreeNode xp, xpp;
+            while (x != null && (xp = x.parent) != null && xp.red &&
+                   (xpp = xp.parent) != null) {
+                TreeNode xppl = xpp.left;
+                if (xp == xppl) {
+                    TreeNode y = xpp.right;
+                    if (y != null && y.red) {
+                        y.red = false;
+                        xp.red = false;
+                        xpp.red = true;
+                        x = xpp;
+                    }
+                    else {
+                        if (x == xp.right) {
+                            x = xp;
+                            rotateLeft(x);
+                            xpp = (xp = x.parent) == null ? null : xp.parent;
+                        }
+                        if (xp != null) {
+                            xp.red = false;
+                            if (xpp != null) {
+                                xpp.red = true;
+                                rotateRight(xpp);
+                            }
+                        }
+                    }
+                }
+                else {
+                    TreeNode y = xppl;
+                    if (y != null && y.red) {
+                        y.red = false;
+                        xp.red = false;
+                        xpp.red = true;
+                        x = xpp;
+                    }
+                    else {
+                        if (x == xp.left) {
+                            x = xp;
+                            rotateRight(x);
+                            xpp = (xp = x.parent) == null ? null : xp.parent;
+                        }
+                        if (xp != null) {
+                            xp.red = false;
+                            if (xpp != null) {
+                                xpp.red = true;
+                                rotateLeft(xpp);
+                            }
+                        }
+                    }
+                }
+            }
+            TreeNode r = root;
+            if (r != null && r.red)
+                r.red = false;
+        }
+
+        /** From CLR */
+        private void fixAfterDeletion(TreeNode x) {
+            while (x != null) {
+                TreeNode xp, xpl;
+                if (x.red || (xp = x.parent) == null) {
+                    x.red = false;
+                    break;
+                }
+                if (x == (xpl = xp.left)) {
+                    TreeNode sib = xp.right;
+                    if (sib != null && sib.red) {
+                        sib.red = false;
+                        xp.red = true;
+                        rotateLeft(xp);
+                        sib = (xp = x.parent) == null ? null : xp.right;
+                    }
+                    if (sib == null)
+                        x = xp;
+                    else {
+                        TreeNode sl = sib.left, sr = sib.right;
+                        if ((sr == null || !sr.red) &&
+                            (sl == null || !sl.red)) {
+                            sib.red = true;
+                            x = xp;
+                        }
+                        else {
+                            if (sr == null || !sr.red) {
+                                if (sl != null)
+                                    sl.red = false;
+                                sib.red = true;
+                                rotateRight(sib);
+                                sib = (xp = x.parent) == null ? null : xp.right;
+                            }
+                            if (sib != null) {
+                                sib.red = (xp == null) ? false : xp.red;
+                                if ((sr = sib.right) != null)
+                                    sr.red = false;
+                            }
+                            if (xp != null) {
+                                xp.red = false;
+                                rotateLeft(xp);
+                            }
+                            x = root;
+                        }
+                    }
+                }
+                else { // symmetric
+                    TreeNode sib = xpl;
+                    if (sib != null && sib.red) {
+                        sib.red = false;
+                        xp.red = true;
+                        rotateRight(xp);
+                        sib = (xp = x.parent) == null ? null : xp.left;
+                    }
+                    if (sib == null)
+                        x = xp;
+                    else {
+                        TreeNode sl = sib.left, sr = sib.right;
+                        if ((sl == null || !sl.red) &&
+                            (sr == null || !sr.red)) {
+                            sib.red = true;
+                            x = xp;
+                        }
+                        else {
+                            if (sl == null || !sl.red) {
+                                if (sr != null)
+                                    sr.red = false;
+                                sib.red = true;
+                                rotateLeft(sib);
+                                sib = (xp = x.parent) == null ? null : xp.left;
+                            }
+                            if (sib != null) {
+                                sib.red = (xp == null) ? false : xp.red;
+                                if ((sl = sib.left) != null)
+                                    sl.red = false;
+                            }
+                            if (xp != null) {
+                                xp.red = false;
+                                rotateRight(xp);
+                            }
+                            x = root;
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    /* ---------------- Collision reduction methods -------------- */
+
+    /**
+     * Spreads higher bits to lower, and also forces top 2 bits to 0.
+     * Because the table uses power-of-two masking, sets of hashes
+     * that vary only in bits above the current mask will always
+     * collide. (Among known examples are sets of Float keys holding
+     * consecutive whole numbers in small tables.)  To counter this,
+     * we apply a transform that spreads the impact of higher bits
+     * downward. There is a tradeoff between speed, utility, and
+     * quality of bit-spreading. Because many common sets of hashes
+     * are already reasonably distributed across bits (so don't benefit
+     * from spreading), and because we use trees to handle large sets
+     * of collisions in bins, we don't need excessively high quality.
+     */
+    private static final int spread(int h) {
+        h ^= (h >>> 18) ^ (h >>> 12);
+        return (h ^ (h >>> 10)) & HASH_BITS;
+    }
+
+    /**
+     * Replaces a list bin with a tree bin. Call only when locked.
+     * Fails to replace if the given key is non-comparable or table
+     * is, or needs, resizing.
+     */
+    private final void replaceWithTreeBin(Node[] tab, int index, Object key) {
+        if ((key instanceof Comparable) &&
+            (tab.length >= MAXIMUM_CAPACITY || counter.sum() < (long)sizeCtl)) {
+            TreeBin t = new TreeBin();
+            for (Node e = tabAt(tab, index); e != null; e = e.next)
+                t.putTreeNode(e.hash & HASH_BITS, e.key, e.val);
+            setTabAt(tab, index, new Node(MOVED, t, null, null));
+        }
+    }
+
+    /* ---------------- Internal access and update methods -------------- */
+
+    /** Implementation for get and containsKey */
+    private final Object internalGet(Object k) {
+        int h = spread(k.hashCode());
+        retry: for (Node[] tab = table; tab != null;) {
+            Node e, p; Object ek, ev; int eh;      // locals to read fields once
+            for (e = tabAt(tab, (tab.length - 1) & h); e != null; e = e.next) {
+                if ((eh = e.hash) == MOVED) {
+                    if ((ek = e.key) instanceof TreeBin)  // search TreeBin
+                        return ((TreeBin)ek).getValue(h, k);
+                    else {                        // restart with new table
+                        tab = (Node[])ek;
+                        continue retry;
+                    }
+                }
+                else if ((eh & HASH_BITS) == h && (ev = e.val) != null &&
+                         ((ek = e.key) == k || k.equals(ek)))
+                    return ev;
+            }
+            break;
+        }
+        return null;
+    }
+
+    /**
+     * Implementation for the four public remove/replace methods:
+     * Replaces node value with v, conditional upon match of cv if
+     * non-null.  If resulting value is null, delete.
+     */
+    private final Object internalReplace(Object k, Object v, Object cv) {
+        int h = spread(k.hashCode());
+        Object oldVal = null;
+        for (Node[] tab = table;;) {
+            Node f; int i, fh; Object fk;
+            if (tab == null ||
+                (f = tabAt(tab, i = (tab.length - 1) & h)) == null)
+                break;
+            else if ((fh = f.hash) == MOVED) {
+                if ((fk = f.key) instanceof TreeBin) {
+                    TreeBin t = (TreeBin)fk;
+                    boolean validated = false;
+                    boolean deleted = false;
+                    t.acquire(0);
+                    try {
+                        if (tabAt(tab, i) == f) {
+                            validated = true;
+                            TreeNode p = t.getTreeNode(h, k, t.root);
+                            if (p != null) {
+                                Object pv = p.val;
+                                if (cv == null || cv == pv || cv.equals(pv)) {
+                                    oldVal = pv;
+                                    if ((p.val = v) == null) {
+                                        deleted = true;
+                                        t.deleteTreeNode(p);
+                                    }
+                                }
+                            }
+                        }
+                    } finally {
+                        t.release(0);
+                    }
+                    if (validated) {
+                        if (deleted)
+                            counter.add(-1L);
+                        break;
+                    }
+                }
+                else
+                    tab = (Node[])fk;
+            }
+            else if ((fh & HASH_BITS) != h && f.next == null) // precheck
+                break;                          // rules out possible existence
+            else if ((fh & LOCKED) != 0) {
+                checkForResize();               // try resizing if can't get lock
+                f.tryAwaitLock(tab, i);
+            }
+            else if (f.casHash(fh, fh | LOCKED)) {
+                boolean validated = false;
+                boolean deleted = false;
+                try {
+                    if (tabAt(tab, i) == f) {
+                        validated = true;
+                        for (Node e = f, pred = null;;) {
+                            Object ek, ev;
+                            if ((e.hash & HASH_BITS) == h &&
+                                ((ev = e.val) != null) &&
+                                ((ek = e.key) == k || k.equals(ek))) {
+                                if (cv == null || cv == ev || cv.equals(ev)) {
+                                    oldVal = ev;
+                                    if ((e.val = v) == null) {
+                                        deleted = true;
+                                        Node en = e.next;
+                                        if (pred != null)
+                                            pred.next = en;
+                                        else
+                                            setTabAt(tab, i, en);
+                                    }
+                                }
+                                break;
+                            }
+                            pred = e;
+                            if ((e = e.next) == null)
+                                break;
+                        }
+                    }
+                } finally {
+                    if (!f.casHash(fh | LOCKED, fh)) {
+                        f.hash = fh;
+                        synchronized (f) { f.notifyAll(); };
+                    }
+                }
+                if (validated) {
+                    if (deleted)
+                        counter.add(-1L);
+                    break;
+                }
+            }
+        }
+        return oldVal;
+    }
+
+    /*
+     * Internal versions of the five insertion methods, each a
+     * little more complicated than the last. All have
+     * the same basic structure as the first (internalPut):
+     *  1. If table uninitialized, create
+     *  2. If bin empty, try to CAS new node
+     *  3. If bin stale, use new table
+     *  4. if bin converted to TreeBin, validate and relay to TreeBin methods
+     *  5. Lock and validate; if valid, scan and add or update
+     *
+     * The others interweave other checks and/or alternative actions:
+     *  * Plain put checks for and performs resize after insertion.
+     *  * putIfAbsent prescans for mapping without lock (and fails to add
+     *    if present), which also makes pre-emptive resize checks worthwhile.
+     *  * computeIfAbsent extends form used in putIfAbsent with additional
+     *    mechanics to deal with, calls, potential exceptions and null
+     *    returns from function call.
+     *  * compute uses the same function-call mechanics, but without
+     *    the prescans
+     *  * putAll attempts to pre-allocate enough table space
+     *    and more lazily performs count updates and checks.
+     *
+     * Someday when details settle down a bit more, it might be worth
+     * some factoring to reduce sprawl.
+     */
+
+    /** Implementation for put */
+    private final Object internalPut(Object k, Object v) {
+        int h = spread(k.hashCode());
+        int count = 0;
+        for (Node[] tab = table;;) {
+            int i; Node f; int fh; Object fk;
+            if (tab == null)
+                tab = initTable();
+            else if ((f = tabAt(tab, i = (tab.length - 1) & h)) == null) {
+                if (casTabAt(tab, i, null, new Node(h, k, v, null)))
+                    break;                   // no lock when adding to empty bin
+            }
+            else if ((fh = f.hash) == MOVED) {
+                if ((fk = f.key) instanceof TreeBin) {
+                    TreeBin t = (TreeBin)fk;
+                    Object oldVal = null;
+                    t.acquire(0);
+                    try {
+                        if (tabAt(tab, i) == f) {
+                            count = 2;
+                            TreeNode p = t.putTreeNode(h, k, v);
+                            if (p != null) {
+                                oldVal = p.val;
+                                p.val = v;
+                            }
+                        }
+                    } finally {
+                        t.release(0);
+                    }
+                    if (count != 0) {
+                        if (oldVal != null)
+                            return oldVal;
+                        break;
+                    }
+                }
+                else
+                    tab = (Node[])fk;
+            }
+            else if ((fh & LOCKED) != 0) {
+                checkForResize();
+                f.tryAwaitLock(tab, i);
+            }
+            else if (f.casHash(fh, fh | LOCKED)) {
+                Object oldVal = null;
+                try {                        // needed in case equals() throws
+                    if (tabAt(tab, i) == f) {
+                        count = 1;
+                        for (Node e = f;; ++count) {
+                            Object ek, ev;
+                            if ((e.hash & HASH_BITS) == h &&
+                                (ev = e.val) != null &&
+                                ((ek = e.key) == k || k.equals(ek))) {
+                                oldVal = ev;
+                                e.val = v;
+                                break;
+                            }
+                            Node last = e;
+                            if ((e = e.next) == null) {
+                                last.next = new Node(h, k, v, null);
+                                if (count >= TREE_THRESHOLD)
+                                    replaceWithTreeBin(tab, i, k);
+                                break;
+                            }
+                        }
+                    }
+                } finally {                  // unlock and signal if needed
+                    if (!f.casHash(fh | LOCKED, fh)) {
+                        f.hash = fh;
+                        synchronized (f) { f.notifyAll(); };
+                    }
+                }
+                if (count != 0) {
+                    if (oldVal != null)
+                        return oldVal;
+                    if (tab.length <= 64)
+                        count = 2;
+                    break;
+                }
+            }
+        }
+        counter.add(1L);
+        if (count > 1)
+            checkForResize();
+        return null;
+    }
+
+    /** Implementation for putIfAbsent */
+    private final Object internalPutIfAbsent(Object k, Object v) {
+        int h = spread(k.hashCode());
+        int count = 0;
+        for (Node[] tab = table;;) {
+            int i; Node f; int fh; Object fk, fv;
+            if (tab == null)
+                tab = initTable();
+            else if ((f = tabAt(tab, i = (tab.length - 1) & h)) == null) {
+                if (casTabAt(tab, i, null, new Node(h, k, v, null)))
+                    break;
+            }
+            else if ((fh = f.hash) == MOVED) {
+                if ((fk = f.key) instanceof TreeBin) {
+                    TreeBin t = (TreeBin)fk;
+                    Object oldVal = null;
+                    t.acquire(0);
+                    try {
+                        if (tabAt(tab, i) == f) {
+                            count = 2;
+                            TreeNode p = t.putTreeNode(h, k, v);
+                            if (p != null)
+                                oldVal = p.val;
+                        }
+                    } finally {
+                        t.release(0);
+                    }
+                    if (count != 0) {
+                        if (oldVal != null)
+                            return oldVal;
+                        break;
+                    }
+                }
+                else
+                    tab = (Node[])fk;
+            }
+            else if ((fh & HASH_BITS) == h && (fv = f.val) != null &&
+                     ((fk = f.key) == k || k.equals(fk)))
+                return fv;
+            else {
+                Node g = f.next;
+                if (g != null) { // at least 2 nodes -- search and maybe resize
+                    for (Node e = g;;) {
+                        Object ek, ev;
+                        if ((e.hash & HASH_BITS) == h && (ev = e.val) != null &&
+                            ((ek = e.key) == k || k.equals(ek)))
+                            return ev;
+                        if ((e = e.next) == null) {
+                            checkForResize();
+                            break;
+                        }
+                    }
+                }
+                if (((fh = f.hash) & LOCKED) != 0) {
+                    checkForResize();
+                    f.tryAwaitLock(tab, i);
+                }
+                else if (tabAt(tab, i) == f && f.casHash(fh, fh | LOCKED)) {
+                    Object oldVal = null;
+                    try {
+                        if (tabAt(tab, i) == f) {
+                            count = 1;
+                            for (Node e = f;; ++count) {
+                                Object ek, ev;
+                                if ((e.hash & HASH_BITS) == h &&
+                                    (ev = e.val) != null &&
+                                    ((ek = e.key) == k || k.equals(ek))) {
+                                    oldVal = ev;
+                                    break;
+                                }
+                                Node last = e;
+                                if ((e = e.next) == null) {
+                                    last.next = new Node(h, k, v, null);
+                                    if (count >= TREE_THRESHOLD)
+                                        replaceWithTreeBin(tab, i, k);
+                                    break;
+                                }
+                            }
+                        }
+                    } finally {
+                        if (!f.casHash(fh | LOCKED, fh)) {
+                            f.hash = fh;
+                            synchronized (f) { f.notifyAll(); };
+                        }
+                    }
+                    if (count != 0) {
+                        if (oldVal != null)
+                            return oldVal;
+                        if (tab.length <= 64)
+                            count = 2;
+                        break;
+                    }
+                }
+            }
+        }
+        counter.add(1L);
+        if (count > 1)
+            checkForResize();
+        return null;
+    }
+
+    /** Implementation for computeIfAbsent */
+    private final Object internalComputeIfAbsent(K k,
+                                                 MappingFunction<? super K, ?> mf) {
+        int h = spread(k.hashCode());
+        Object val = null;
+        int count = 0;
+        for (Node[] tab = table;;) {
+            Node f; int i, fh; Object fk, fv;
+            if (tab == null)
+                tab = initTable();
+            else if ((f = tabAt(tab, i = (tab.length - 1) & h)) == null) {
+                Node node = new Node(fh = h | LOCKED, k, null, null);
+                if (casTabAt(tab, i, null, node)) {
+                    count = 1;
+                    try {
+                        if ((val = mf.map(k)) != null)
+                            node.val = val;
+                    } finally {
+                        if (val == null)
+                            setTabAt(tab, i, null);
+                        if (!node.casHash(fh, h)) {
+                            node.hash = h;
+                            synchronized (node) { node.notifyAll(); };
+                        }
+                    }
+                }
+                if (count != 0)
+                    break;
+            }
+            else if ((fh = f.hash) == MOVED) {
+                if ((fk = f.key) instanceof TreeBin) {
+                    TreeBin t = (TreeBin)fk;
+                    boolean added = false;
+                    t.acquire(0);
+                    try {
+                        if (tabAt(tab, i) == f) {
+                            count = 1;
+                            TreeNode p = t.getTreeNode(h, k, t.root);
+                            if (p != null)
+                                val = p.val;
+                            else if ((val = mf.map(k)) != null) {
+                                added = true;
+                                count = 2;
+                                t.putTreeNode(h, k, val);
+                            }
+                        }
+                    } finally {
+                        t.release(0);
+                    }
+                    if (count != 0) {
+                        if (!added)
+                            return val;
+                        break;
+                    }
+                }
+                else
+                    tab = (Node[])fk;
+            }
+            else if ((fh & HASH_BITS) == h && (fv = f.val) != null &&
+                     ((fk = f.key) == k || k.equals(fk)))
+                return fv;
+            else {
+                Node g = f.next;
+                if (g != null) {
+                    for (Node e = g;;) {
+                        Object ek, ev;
+                        if ((e.hash & HASH_BITS) == h && (ev = e.val) != null &&
+                            ((ek = e.key) == k || k.equals(ek)))
+                            return ev;
+                        if ((e = e.next) == null) {
+                            checkForResize();
+                            break;
+                        }
+                    }
+                }
+                if (((fh = f.hash) & LOCKED) != 0) {
+                    checkForResize();
+                    f.tryAwaitLock(tab, i);
+                }
+                else if (tabAt(tab, i) == f && f.casHash(fh, fh | LOCKED)) {
+                    boolean added = false;
+                    try {
+                        if (tabAt(tab, i) == f) {
+                            count = 1;
+                            for (Node e = f;; ++count) {
+                                Object ek, ev;
+                                if ((e.hash & HASH_BITS) == h &&
+                                    (ev = e.val) != null &&
+                                    ((ek = e.key) == k || k.equals(ek))) {
+                                    val = ev;
+                                    break;
+                                }
+                                Node last = e;
+                                if ((e = e.next) == null) {
+                                    if ((val = mf.map(k)) != null) {
+                                        added = true;
+                                        last.next = new Node(h, k, val, null);
+                                        if (count >= TREE_THRESHOLD)
+                                            replaceWithTreeBin(tab, i, k);
+                                    }
+                                    break;
+                                }
+                            }
+                        }
+                    } finally {
+                        if (!f.casHash(fh | LOCKED, fh)) {
+                            f.hash = fh;
+                            synchronized (f) { f.notifyAll(); };
+                        }
+                    }
+                    if (count != 0) {
+                        if (!added)
+                            return val;
+                        if (tab.length <= 64)
+                            count = 2;
+                        break;
+                    }
+                }
+            }
+        }
+        if (val == null)
+            throw new NullPointerException();
+        counter.add(1L);
+        if (count > 1)
+            checkForResize();
+        return val;
+    }
+
+    /** Implementation for compute */
+    @SuppressWarnings("unchecked")
+    private final Object internalCompute(K k,
+                                         RemappingFunction<? super K, V> mf) {
+        int h = spread(k.hashCode());
+        Object val = null;
+        boolean added = false;
+        int count = 0;
+        for (Node[] tab = table;;) {
+            Node f; int i, fh; Object fk;
+            if (tab == null)
+                tab = initTable();
+            else if ((f = tabAt(tab, i = (tab.length - 1) & h)) == null) {
+                Node node = new Node(fh = h | LOCKED, k, null, null);
+                if (casTabAt(tab, i, null, node)) {
+                    try {
+                        count = 1;
+                        if ((val = mf.remap(k, null)) != null) {
+                            node.val = val;
+                            added = true;
+                        }
+                    } finally {
+                        if (!added)
+                            setTabAt(tab, i, null);
+                        if (!node.casHash(fh, h)) {
+                            node.hash = h;
+                            synchronized (node) { node.notifyAll(); };
+                        }
+                    }
+                }
+                if (count != 0)
+                    break;
+            }
+            else if ((fh = f.hash) == MOVED) {
+                if ((fk = f.key) instanceof TreeBin) {
+                    TreeBin t = (TreeBin)fk;
+                    t.acquire(0);
+                    try {
+                        if (tabAt(tab, i) == f) {
+                            count = 1;
+                            TreeNode p = t.getTreeNode(h, k, t.root);
+                            Object pv = (p == null) ? null : p.val;
+                            if ((val = mf.remap(k, (V)pv)) != null) {
+                                if (p != null)
+                                    p.val = val;
+                                else {
+                                    count = 2;
+                                    added = true;
+                                    t.putTreeNode(h, k, val);
+                                }
+                            }
+                        }
+                    } finally {
+                        t.release(0);
+                    }
+                    if (count != 0)
+                        break;
+                }
+                else
+                    tab = (Node[])fk;
+            }
+            else if ((fh & LOCKED) != 0) {
+                checkForResize();
+                f.tryAwaitLock(tab, i);
+            }
+            else if (f.casHash(fh, fh | LOCKED)) {
+                try {
+                    if (tabAt(tab, i) == f) {
+                        count = 1;
+                        for (Node e = f;; ++count) {
+                            Object ek, ev;
+                            if ((e.hash & HASH_BITS) == h &&
+                                (ev = e.val) != null &&
+                                ((ek = e.key) == k || k.equals(ek))) {
+                                val = mf.remap(k, (V)ev);
+                                if (val != null)
+                                    e.val = val;
+                                break;
+                            }
+                            Node last = e;
+                            if ((e = e.next) == null) {
+                                if ((val = mf.remap(k, null)) != null) {
+                                    last.next = new Node(h, k, val, null);
+                                    added = true;
+                                    if (count >= TREE_THRESHOLD)
+                                        replaceWithTreeBin(tab, i, k);
+                                }
+                                break;
+                            }
+                        }
+                    }
+                } finally {
+                    if (!f.casHash(fh | LOCKED, fh)) {
+                        f.hash = fh;
+                        synchronized (f) { f.notifyAll(); };
+                    }
+                }
+                if (count != 0) {
+                    if (tab.length <= 64)
+                        count = 2;
+                    break;
+                }
+            }
+        }
+        if (val == null)
+            throw new NullPointerException();
+        if (added) {
+            counter.add(1L);
+            if (count > 1)
+                checkForResize();
+        }
+        return val;
+    }
+
+    /** Implementation for putAll */
+    private final void internalPutAll(Map<?, ?> m) {
+        tryPresize(m.size());
+        long delta = 0L;     // number of uncommitted additions
+        boolean npe = false; // to throw exception on exit for nulls
+        try {                // to clean up counts on other exceptions
+            for (Map.Entry<?, ?> entry : m.entrySet()) {
+                Object k, v;
+                if (entry == null || (k = entry.getKey()) == null ||
+                    (v = entry.getValue()) == null) {
+                    npe = true;
+                    break;
+                }
+                int h = spread(k.hashCode());
+                for (Node[] tab = table;;) {
+                    int i; Node f; int fh; Object fk;
+                    if (tab == null)
+                        tab = initTable();
+                    else if ((f = tabAt(tab, i = (tab.length - 1) & h)) == null){
+                        if (casTabAt(tab, i, null, new Node(h, k, v, null))) {
+                            ++delta;
+                            break;
+                        }
+                    }
+                    else if ((fh = f.hash) == MOVED) {
+                        if ((fk = f.key) instanceof TreeBin) {
+                            TreeBin t = (TreeBin)fk;
+                            boolean validated = false;
+                            t.acquire(0);
+                            try {
+                                if (tabAt(tab, i) == f) {
+                                    validated = true;
+                                    TreeNode p = t.getTreeNode(h, k, t.root);
+                                    if (p != null)
+                                        p.val = v;
+                                    else {
+                                        t.putTreeNode(h, k, v);
+                                        ++delta;
+                                    }
+                                }
+                            } finally {
+                                t.release(0);
+                            }
+                            if (validated)
+                                break;
+                        }
+                        else
+                            tab = (Node[])fk;
+                    }
+                    else if ((fh & LOCKED) != 0) {
+                        counter.add(delta);
+                        delta = 0L;
+                        checkForResize();
+                        f.tryAwaitLock(tab, i);
+                    }
+                    else if (f.casHash(fh, fh | LOCKED)) {
+                        int count = 0;
+                        try {
+                            if (tabAt(tab, i) == f) {
+                                count = 1;
+                                for (Node e = f;; ++count) {
+                                    Object ek, ev;
+                                    if ((e.hash & HASH_BITS) == h &&
+                                        (ev = e.val) != null &&
+                                        ((ek = e.key) == k || k.equals(ek))) {
+                                        e.val = v;
+                                        break;
+                                    }
+                                    Node last = e;
+                                    if ((e = e.next) == null) {
+                                        ++delta;
+                                        last.next = new Node(h, k, v, null);
+                                        if (count >= TREE_THRESHOLD)
+                                            replaceWithTreeBin(tab, i, k);
+                                        break;
+                                    }
+                                }
+                            }
+                        } finally {
+                            if (!f.casHash(fh | LOCKED, fh)) {
+                                f.hash = fh;
+                                synchronized (f) { f.notifyAll(); };
+                            }
+                        }
+                        if (count != 0) {
+                            if (count > 1) {
+                                counter.add(delta);
+                                delta = 0L;
+                                checkForResize();
+                            }
+                            break;
+                        }
+                    }
+                }
+            }
+        } finally {
+            if (delta != 0)
+                counter.add(delta);
+        }
+        if (npe)
+            throw new NullPointerException();
+    }
+
+    /* ---------------- Table Initialization and Resizing -------------- */
+
+    /**
+     * Returns a power of two table size for the given desired capacity.
+     * See Hackers Delight, sec 3.2
+     */
+    private static final int tableSizeFor(int c) {
+        int n = c - 1;
+        n |= n >>> 1;
+        n |= n >>> 2;
+        n |= n >>> 4;
+        n |= n >>> 8;
+        n |= n >>> 16;
+        return (n < 0) ? 1 : (n >= MAXIMUM_CAPACITY) ? MAXIMUM_CAPACITY : n + 1;
+    }
+
+    /**
+     * Initializes table, using the size recorded in sizeCtl.
+     */
+    private final Node[] initTable() {
+        Node[] tab; int sc;
+        while ((tab = table) == null) {
+            if ((sc = sizeCtl) < 0)
+                Thread.yield(); // lost initialization race; just spin
+            else if (UNSAFE.compareAndSwapInt(this, sizeCtlOffset, sc, -1)) {
+                try {
+                    if ((tab = table) == null) {
+                        int n = (sc > 0) ? sc : DEFAULT_CAPACITY;
+                        tab = table = new Node[n];
+                        sc = n - (n >>> 2);
+                    }
+                } finally {
+                    sizeCtl = sc;
+                }
+                break;
+            }
+        }
+        return tab;
+    }
+
+    /**
+     * If table is too small and not already resizing, creates next
+     * table and transfers bins.  Rechecks occupancy after a transfer
+     * to see if another resize is already needed because resizings
+     * are lagging additions.
+     */
+    private final void checkForResize() {
+        Node[] tab; int n, sc;
+        while ((tab = table) != null &&
+               (n = tab.length) < MAXIMUM_CAPACITY &&
+               (sc = sizeCtl) >= 0 && counter.sum() >= (long)sc &&
+               UNSAFE.compareAndSwapInt(this, sizeCtlOffset, sc, -1)) {
+            try {
+                if (tab == table) {
+                    table = rebuild(tab);
+                    sc = (n << 1) - (n >>> 1);
+                }
+            } finally {
+                sizeCtl = sc;
+            }
+        }
+    }
+
+    /**
+     * Tries to presize table to accommodate the given number of elements.
+     *
+     * @param size number of elements (doesn't need to be perfectly accurate)
+     */
+    private final void tryPresize(int size) {
+        int c = (size >= (MAXIMUM_CAPACITY >>> 1)) ? MAXIMUM_CAPACITY :
+            tableSizeFor(size + (size >>> 1) + 1);
+        int sc;
+        while ((sc = sizeCtl) >= 0) {
+            Node[] tab = table; int n;
+            if (tab == null || (n = tab.length) == 0) {
+                n = (sc > c) ? sc : c;
+                if (UNSAFE.compareAndSwapInt(this, sizeCtlOffset, sc, -1)) {
+                    try {
+                        if (table == tab) {
+                            table = new Node[n];
+                            sc = n - (n >>> 2);
+                        }
+                    } finally {
+                        sizeCtl = sc;
+                    }
+                }
+            }
+            else if (c <= sc || n >= MAXIMUM_CAPACITY)
+                break;
+            else if (UNSAFE.compareAndSwapInt(this, sizeCtlOffset, sc, -1)) {
+                try {
+                    if (table == tab) {
+                        table = rebuild(tab);
+                        sc = (n << 1) - (n >>> 1);
+                    }
+                } finally {
+                    sizeCtl = sc;
+                }
+            }
+        }
+    }
+
+    /*
+     * Moves and/or copies the nodes in each bin to new table. See
+     * above for explanation.
+     *
+     * @return the new table
+     */
+    private static final Node[] rebuild(Node[] tab) {
+        int n = tab.length;
+        Node[] nextTab = new Node[n << 1];
+        Node fwd = new Node(MOVED, nextTab, null, null);
+        int[] buffer = null;       // holds bins to revisit; null until needed
+        Node rev = null;           // reverse forwarder; null until needed
+        int nbuffered = 0;         // the number of bins in buffer list
+        int bufferIndex = 0;       // buffer index of current buffered bin
+        int bin = n - 1;           // current non-buffered bin or -1 if none
+
+        for (int i = bin;;) {      // start upwards sweep
+            int fh; Node f;
+            if ((f = tabAt(tab, i)) == null) {
+                if (bin >= 0) {    // no lock needed (or available)
+                    if (!casTabAt(tab, i, f, fwd))
+                        continue;
+                }
+                else {             // transiently use a locked forwarding node
+                    Node g = new Node(MOVED|LOCKED, nextTab, null, null);
+                    if (!casTabAt(tab, i, f, g))
+                        continue;
+                    setTabAt(nextTab, i, null);
+                    setTabAt(nextTab, i + n, null);
+                    setTabAt(tab, i, fwd);
+                    if (!g.casHash(MOVED|LOCKED, MOVED)) {
+                        g.hash = MOVED;
+                        synchronized (g) { g.notifyAll(); }
+                    }
+                }
+            }
+            else if ((fh = f.hash) == MOVED) {
+                Object fk = f.key;
+                if (fk instanceof TreeBin) {
+                    TreeBin t = (TreeBin)fk;
+                    boolean validated = false;
+                    t.acquire(0);
+                    try {
+                        if (tabAt(tab, i) == f) {
+                            validated = true;
+                            splitTreeBin(nextTab, i, t);
+                            setTabAt(tab, i, fwd);
+                        }
+                    } finally {
+                        t.release(0);
+                    }
+                    if (!validated)
+                        continue;
+                }
+            }
+            else if ((fh & LOCKED) == 0 && f.casHash(fh, fh|LOCKED)) {
+                boolean validated = false;
+                try {              // split to lo and hi lists; copying as needed
+                    if (tabAt(tab, i) == f) {
+                        validated = true;
+                        splitBin(nextTab, i, f);
+                        setTabAt(tab, i, fwd);
+                    }
+                } finally {
+                    if (!f.casHash(fh | LOCKED, fh)) {
+                        f.hash = fh;
+                        synchronized (f) { f.notifyAll(); };
+                    }
+                }
+                if (!validated)
+                    continue;
+            }
+            else {
+                if (buffer == null) // initialize buffer for revisits
+                    buffer = new int[TRANSFER_BUFFER_SIZE];
+                if (bin < 0 && bufferIndex > 0) {
+                    int j = buffer[--bufferIndex];
+                    buffer[bufferIndex] = i;
+                    i = j;         // swap with another bin
+                    continue;
+                }
+                if (bin < 0 || nbuffered >= TRANSFER_BUFFER_SIZE) {
+                    f.tryAwaitLock(tab, i);
+                    continue;      // no other options -- block
+                }
+                if (rev == null)   // initialize reverse-forwarder
+                    rev = new Node(MOVED, tab, null, null);
+                if (tabAt(tab, i) != f || (f.hash & LOCKED) == 0)
+                    continue;      // recheck before adding to list
+                buffer[nbuffered++] = i;
+                setTabAt(nextTab, i, rev);     // install place-holders
+                setTabAt(nextTab, i + n, rev);
+            }
+
+            if (bin > 0)
+                i = --bin;
+            else if (buffer != null && nbuffered > 0) {
+                bin = -1;
+                i = buffer[bufferIndex = --nbuffered];
+            }
+            else
+                return nextTab;
+        }
+    }
+
+    /**
+     * Split a normal bin with list headed by e into lo and hi parts;
+     * install in given table
+     */
+    private static void splitBin(Node[] nextTab, int i, Node e) {
+        int bit = nextTab.length >>> 1; // bit to split on
+        int runBit = e.hash & bit;
+        Node lastRun = e, lo = null, hi = null;
+        for (Node p = e.next; p != null; p = p.next) {
+            int b = p.hash & bit;
+            if (b != runBit) {
+                runBit = b;
+                lastRun = p;
+            }
+        }
+        if (runBit == 0)
+            lo = lastRun;
+        else
+            hi = lastRun;
+        for (Node p = e; p != lastRun; p = p.next) {
+            int ph = p.hash & HASH_BITS;
+            Object pk = p.key, pv = p.val;
+            if ((ph & bit) == 0)
+                lo = new Node(ph, pk, pv, lo);
+            else
+                hi = new Node(ph, pk, pv, hi);
+        }
+        setTabAt(nextTab, i, lo);
+        setTabAt(nextTab, i + bit, hi);
+    }
+
+    /**
+     * Split a tree bin into lo and hi parts; install in given table
+     */
+    private static void splitTreeBin(Node[] nextTab, int i, TreeBin t) {
+        int bit = nextTab.length >>> 1;
+        TreeBin lt = new TreeBin();
+        TreeBin ht = new TreeBin();
+        int lc = 0, hc = 0;
+        for (Node e = t.first; e != null; e = e.next) {
+            int h = e.hash & HASH_BITS;
+            Object k = e.key, v = e.val;
+            if ((h & bit) == 0) {
+                ++lc;
+                lt.putTreeNode(h, k, v);
+            }
+            else {
+                ++hc;
+                ht.putTreeNode(h, k, v);
+            }
+        }
+        Node ln, hn; // throw away trees if too small
+        if (lc <= (TREE_THRESHOLD >>> 1)) {
+            ln = null;
+            for (Node p = lt.first; p != null; p = p.next)
+                ln = new Node(p.hash, p.key, p.val, ln);
+        }
+        else
+            ln = new Node(MOVED, lt, null, null);
+        setTabAt(nextTab, i, ln);
+        if (hc <= (TREE_THRESHOLD >>> 1)) {
+            hn = null;
+            for (Node p = ht.first; p != null; p = p.next)
+                hn = new Node(p.hash, p.key, p.val, hn);
+        }
+        else
+            hn = new Node(MOVED, ht, null, null);
+        setTabAt(nextTab, i + bit, hn);
+    }
+
+    /**
+     * Implementation for clear. Steps through each bin, removing all
+     * nodes.
+     */
+    private final void internalClear() {
+        long delta = 0L; // negative number of deletions
+        int i = 0;
+        Node[] tab = table;
+        while (tab != null && i < tab.length) {
+            int fh; Object fk;
+            Node f = tabAt(tab, i);
+            if (f == null)
+                ++i;
+            else if ((fh = f.hash) == MOVED) {
+                if ((fk = f.key) instanceof TreeBin) {
+                    TreeBin t = (TreeBin)fk;
+                    t.acquire(0);
+                    try {
+                        if (tabAt(tab, i) == f) {
+                            for (Node p = t.first; p != null; p = p.next) {
+                                p.val = null;
+                                --delta;
+                            }
+                            t.first = null;
+                            t.root = null;
+                            ++i;
+                        }
+                    } finally {
+                        t.release(0);
+                    }
+                }
+                else
+                    tab = (Node[])fk;
+            }
+            else if ((fh & LOCKED) != 0) {
+                counter.add(delta); // opportunistically update count
+                delta = 0L;
+                f.tryAwaitLock(tab, i);
+            }
+            else if (f.casHash(fh, fh | LOCKED)) {
+                try {
+                    if (tabAt(tab, i) == f) {
+                        for (Node e = f; e != null; e = e.next) {
+                            e.val = null;
+                            --delta;
+                        }
+                        setTabAt(tab, i, null);
+                        ++i;
+                    }
+                } finally {
+                    if (!f.casHash(fh | LOCKED, fh)) {
+                        f.hash = fh;
+                        synchronized (f) { f.notifyAll(); };
+                    }
+                }
+            }
+        }
+        if (delta != 0)
+            counter.add(delta);
+    }
+
+    /* ----------------Table Traversal -------------- */
+
+    /**
+     * Encapsulates traversal for methods such as containsValue; also
+     * serves as a base class for other iterators.
+     *
+     * At each step, the iterator snapshots the key ("nextKey") and
+     * value ("nextVal") of a valid node (i.e., one that, at point of
+     * snapshot, has a non-null user value). Because val fields can
+     * change (including to null, indicating deletion), field nextVal
+     * might not be accurate at point of use, but still maintains the
+     * weak consistency property of holding a value that was once
+     * valid.
+     *
+     * Internal traversals directly access these fields, as in:
+     * {@code while (it.next != null) { process(it.nextKey); it.advance(); }}
+     *
+     * Exported iterators (subclasses of ViewIterator) extract key,
+     * value, or key-value pairs as return values of Iterator.next(),
+     * and encapsulate the it.next check as hasNext();
+     *
+     * The iterator visits once each still-valid node that was
+     * reachable upon iterator construction. It might miss some that
+     * were added to a bin after the bin was visited, which is OK wrt
+     * consistency guarantees. Maintaining this property in the face
+     * of possible ongoing resizes requires a fair amount of
+     * bookkeeping state that is difficult to optimize away amidst
+     * volatile accesses.  Even so, traversal maintains reasonable
+     * throughput.
+     *
+     * Normally, iteration proceeds bin-by-bin traversing lists.
+     * However, if the table has been resized, then all future steps
+     * must traverse both the bin at the current index as well as at
+     * (index + baseSize); and so on for further resizings. To
+     * paranoically cope with potential sharing by users of iterators
+     * across threads, iteration terminates if a bounds checks fails
+     * for a table read.
+     *
+     * The range-based constructor enables creation of parallel
+     * range-splitting traversals. (Not yet implemented.)
+     */
+    static class InternalIterator {
+        Node next;           // the next entry to use
+        Node last;           // the last entry used
+        Object nextKey;      // cached key field of next
+        Object nextVal;      // cached val field of next
+        Node[] tab;          // current table; updated if resized
+        int index;           // index of bin to use next
+        int baseIndex;       // current index of initial table
+        final int baseLimit; // index bound for initial table
+        final int baseSize;  // initial table size
+
+        /** Creates iterator for all entries in the table. */
+        InternalIterator(Node[] tab) {
+            this.tab = tab;
+            baseLimit = baseSize = (tab == null) ? 0 : tab.length;
+            index = baseIndex = 0;
+            next = null;
+            advance();
+        }
+
+        /** Creates iterator for the given range of the table */
+        InternalIterator(Node[] tab, int lo, int hi) {
+            this.tab = tab;
+            baseSize = (tab == null) ? 0 : tab.length;
+            baseLimit = (hi <= baseSize) ? hi : baseSize;
+            index = baseIndex = (lo >= 0) ? lo : 0;
+            next = null;
+            advance();
+        }
+
+        /** Advances next. See above for explanation. */
+        final void advance() {
+            Node e = last = next;
+            outer: do {
+                if (e != null)                  // advance past used/skipped node
+                    e = e.next;
+                while (e == null) {             // get to next non-null bin
+                    Node[] t; int b, i, n; Object ek; // checks must use locals
+                    if ((b = baseIndex) >= baseLimit || (i = index) < 0 ||
+                        (t = tab) == null || i >= (n = t.length))
+                        break outer;
+                    else if ((e = tabAt(t, i)) != null && e.hash == MOVED) {
+                        if ((ek = e.key) instanceof TreeBin)
+                            e = ((TreeBin)ek).first;
+                        else {
+                            tab = (Node[])ek;
+                            continue;           // restarts due to null val
+                        }
+                    }                           // visit upper slots if present
+                    index = (i += baseSize) < n ? i : (baseIndex = b + 1);
+                }
+                nextKey = e.key;
+            } while ((nextVal = e.val) == null);// skip deleted or special nodes
+            next = e;
+        }
+    }
+
+    /* ---------------- Public operations -------------- */
+
+    /**
+     * Creates a new, empty map with the default initial table size (16),
+     */
+    public ConcurrentHashMapV8() {
+        this.counter = new LongAdder();
+    }
+
+    /**
+     * Creates a new, empty map with an initial table size
+     * accommodating the specified number of elements without the need
+     * to dynamically resize.
+     *
+     * @param initialCapacity The implementation performs internal
+     * sizing to accommodate this many elements.
+     * @throws IllegalArgumentException if the initial capacity of
+     * elements is negative
+     */
+    public ConcurrentHashMapV8(int initialCapacity) {
+        if (initialCapacity < 0)
+            throw new IllegalArgumentException();
+        int cap = ((initialCapacity >= (MAXIMUM_CAPACITY >>> 1)) ?
+                   MAXIMUM_CAPACITY :
+                   tableSizeFor(initialCapacity + (initialCapacity >>> 1) + 1));
+        this.counter = new LongAdder();
+        this.sizeCtl = cap;
+    }
+
+    /**
+     * Creates a new map with the same mappings as the given map.
+     *
+     * @param m the map
+     */
+    public ConcurrentHashMapV8(Map<? extends K, ? extends V> m) {
+        this.counter = new LongAdder();
+        this.sizeCtl = DEFAULT_CAPACITY;
+        internalPutAll(m);
+    }
+
+    /**
+     * Creates a new, empty map with an initial table size based on
+     * the given number of elements ({@code initialCapacity}) and
+     * initial table density ({@code loadFactor}).
+     *
+     * @param initialCapacity the initial capacity. The implementation
+     * performs internal sizing to accommodate this many elements,
+     * given the specified load factor.
+     * @param loadFactor the load factor (table density) for
+     * establishing the initial table size
+     * @throws IllegalArgumentException if the initial capacity of
+     * elements is negative or the load factor is nonpositive
+     *
+     * @since 1.6
+     */
+    public ConcurrentHashMapV8(int initialCapacity, float loadFactor) {
+        this(initialCapacity, loadFactor, 1);
+    }
+
+    /**
+     * Creates a new, empty map with an initial table size based on
+     * the given number of elements ({@code initialCapacity}), table
+     * density ({@code loadFactor}), and number of concurrently
+     * updating threads ({@code concurrencyLevel}).
+     *
+     * @param initialCapacity the initial capacity. The implementation
+     * performs internal sizing to accommodate this many elements,
+     * given the specified load factor.
+     * @param loadFactor the load factor (table density) for
+     * establishing the initial table size
+     * @param concurrencyLevel the estimated number of concurrently
+     * updating threads. The implementation may use this value as
+     * a sizing hint.
+     * @throws IllegalArgumentException if the initial capacity is
+     * negative or the load factor or concurrencyLevel are
+     * nonpositive
+     */
+    public ConcurrentHashMapV8(int initialCapacity,
+                               float loadFactor, int concurrencyLevel) {
+        if (!(loadFactor > 0.0f) || initialCapacity < 0 || concurrencyLevel <= 0)
+            throw new IllegalArgumentException();
+        if (initialCapacity < concurrencyLevel)   // Use at least as many bins
+            initialCapacity = concurrencyLevel;   // as estimated threads
+        long size = (long)(1.0 + (long)initialCapacity / loadFactor);
+        int cap = ((size >= (long)MAXIMUM_CAPACITY) ?
+                   MAXIMUM_CAPACITY: tableSizeFor((int)size));
+        this.counter = new LongAdder();
+        this.sizeCtl = cap;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public boolean isEmpty() {
+        return counter.sum() <= 0L; // ignore transient negative values
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public int size() {
+        long n = counter.sum();
+        return ((n < 0L) ? 0 :
+                (n > (long)Integer.MAX_VALUE) ? Integer.MAX_VALUE :
+                (int)n);
+    }
+
+    final long longSize() { // accurate version of size needed for views
+        long n = counter.sum();
+        return (n < 0L) ? 0L : n;
+    }
+
+    /**
+     * Returns the value to which the specified key is mapped,
+     * or {@code null} if this map contains no mapping for the key.
+     *
+     * <p>More formally, if this map contains a mapping from a key
+     * {@code k} to a value {@code v} such that {@code key.equals(k)},
+     * then this method returns {@code v}; otherwise it returns
+     * {@code null}.  (There can be at most one such mapping.)
+     *
+     * @throws NullPointerException if the specified key is null
+     */
+    @SuppressWarnings("unchecked")
+    public V get(Object key) {
+        if (key == null)
+            throw new NullPointerException();
+        return (V)internalGet(key);
+    }
+
+    /**
+     * Tests if the specified object is a key in this table.
+     *
+     * @param  key   possible key
+     * @return {@code true} if and only if the specified object
+     *         is a key in this table, as determined by the
+     *         {@code equals} method; {@code false} otherwise
+     * @throws NullPointerException if the specified key is null
+     */
+    public boolean containsKey(Object key) {
+        if (key == null)
+            throw new NullPointerException();
+        return internalGet(key) != null;
+    }
+
+    /**
+     * Returns {@code true} if this map maps one or more keys to the
+     * specified value. Note: This method may require a full traversal
+     * of the map, and is much slower than method {@code containsKey}.
+     *
+     * @param value value whose presence in this map is to be tested
+     * @return {@code true} if this map maps one or more keys to the
+     *         specified value
+     * @throws NullPointerException if the specified value is null
+     */
+    public boolean containsValue(Object value) {
+        if (value == null)
+            throw new NullPointerException();
+        Object v;
+        InternalIterator it = new InternalIterator(table);
+        while (it.next != null) {
+            if ((v = it.nextVal) == value || value.equals(v))
+                return true;
+            it.advance();
+        }
+        return false;
+    }
+
+    /**
+     * Legacy method testing if some key maps into the specified value
+     * in this table.  This method is identical in functionality to
+     * {@link #containsValue}, and exists solely to ensure
+     * full compatibility with class {@link java.util.Hashtable},
+     * which supported this method prior to introduction of the
+     * Java Collections framework.
+     *
+     * @param  value a value to search for
+     * @return {@code true} if and only if some key maps to the
+     *         {@code value} argument in this table as
+     *         determined by the {@code equals} method;
+     *         {@code false} otherwise
+     * @throws NullPointerException if the specified value is null
+     */
+    public boolean contains(Object value) {
+        return containsValue(value);
+    }
+
+    /**
+     * Maps the specified key to the specified value in this table.
+     * Neither the key nor the value can be null.
+     *
+     * <p> The value can be retrieved by calling the {@code get} method
+     * with a key that is equal to the original key.
+     *
+     * @param key key with which the specified value is to be associated
+     * @param value value to be associated with the specified key
+     * @return the previous value associated with {@code key}, or
+     *         {@code null} if there was no mapping for {@code key}
+     * @throws NullPointerException if the specified key or value is null
+     */
+    @SuppressWarnings("unchecked")
+    public V put(K key, V value) {
+        if (key == null || value == null)
+            throw new NullPointerException();
+        return (V)internalPut(key, value);
+    }
+
+    /**
+     * {@inheritDoc}
+     *
+     * @return the previous value associated with the specified key,
+     *         or {@code null} if there was no mapping for the key
+     * @throws NullPointerException if the specified key or value is null
+     */
+    @SuppressWarnings("unchecked")
+    public V putIfAbsent(K key, V value) {
+        if (key == null || value == null)
+            throw new NullPointerException();
+        return (V)internalPutIfAbsent(key, value);
+    }
+
+    /**
+     * Copies all of the mappings from the specified map to this one.
+     * These mappings replace any mappings that this map had for any of the
+     * keys currently in the specified map.
+     *
+     * @param m mappings to be stored in this map
+     */
+    public void putAll(Map<? extends K, ? extends V> m) {
+        internalPutAll(m);
+    }
+
+    /**
+     * If the specified key is not already associated with a value,
+     * computes its value using the given mappingFunction and
+     * enters it into the map.  This is equivalent to
+     * <pre> {@code
+     * if (map.containsKey(key))
+     *   return map.get(key);
+     * value = mappingFunction.map(key);
+     * map.put(key, value);
+     * return value;}</pre>
+     *
+     * except that the action is performed atomically.  If the
+     * function returns {@code null} (in which case a {@code
+     * NullPointerException} is thrown), or the function itself throws
+     * an (unchecked) exception, the exception is rethrown to its
+     * caller, and no mapping is recorded.  Some attempted update
+     * operations on this map by other threads may be blocked while
+     * computation is in progress, so the computation should be short
+     * and simple, and must not attempt to update any other mappings
+     * of this Map. The most appropriate usage is to construct a new
+     * object serving as an initial mapped value, or memoized result,
+     * as in:
+     *
+     *  <pre> {@code
+     * map.computeIfAbsent(key, new MappingFunction<K, V>() {
+     *   public V map(K k) { return new Value(f(k)); }});}</pre>
+     *
+     * @param key key with which the specified value is to be associated
+     * @param mappingFunction the function to compute a value
+     * @return the current (existing or computed) value associated with
+     *         the specified key.
+     * @throws NullPointerException if the specified key, mappingFunction,
+     *         or computed value is null
+     * @throws IllegalStateException if the computation detectably
+     *         attempts a recursive update to this map that would
+     *         otherwise never complete
+     * @throws RuntimeException or Error if the mappingFunction does so,
+     *         in which case the mapping is left unestablished
+     */
+    @SuppressWarnings("unchecked")
+    public V computeIfAbsent(K key, MappingFunction<? super K, ? extends V> mappingFunction) {
+        if (key == null || mappingFunction == null)
+            throw new NullPointerException();
+        return (V)internalComputeIfAbsent(key, mappingFunction);
+    }
+
+    /**
+     * Computes and enters a new mapping value given a key and
+     * its current mapped value (or {@code null} if there is no current
+     * mapping). This is equivalent to
+     *  <pre> {@code
+     *  map.put(key, remappingFunction.remap(key, map.get(key));
+     * }</pre>
+     *
+     * except that the action is performed atomically.  If the
+     * function returns {@code null} (in which case a {@code
+     * NullPointerException} is thrown), or the function itself throws
+     * an (unchecked) exception, the exception is rethrown to its
+     * caller, and current mapping is left unchanged.  Some attempted
+     * update operations on this map by other threads may be blocked
+     * while computation is in progress, so the computation should be
+     * short and simple, and must not attempt to update any other
+     * mappings of this Map. For example, to either create or
+     * append new messages to a value mapping:
+     *
+     * <pre> {@code
+     * Map<Key, String> map = ...;
+     * final String msg = ...;
+     * map.compute(key, new RemappingFunction<Key, String>() {
+     *   public String remap(Key k, String v) {
+     *    return (v == null) ? msg : v + msg;});}}</pre>
+     *
+     * @param key key with which the specified value is to be associated
+     * @param remappingFunction the function to compute a value
+     * @return the new value associated with
+     *         the specified key.
+     * @throws NullPointerException if the specified key or remappingFunction
+     *         or computed value is null
+     * @throws IllegalStateException if the computation detectably
+     *         attempts a recursive update to this map that would
+     *         otherwise never complete
+     * @throws RuntimeException or Error if the remappingFunction does so,
+     *         in which case the mapping is unchanged
+     */
+    @SuppressWarnings("unchecked")
+    public V compute(K key, RemappingFunction<? super K, V> remappingFunction) {
+        if (key == null || remappingFunction == null)
+            throw new NullPointerException();
+        return (V)internalCompute(key, remappingFunction);
+    }
+
+    /**
+     * Removes the key (and its corresponding value) from this map.
+     * This method does nothing if the key is not in the map.
+     *
+     * @param  key the key that needs to be removed
+     * @return the previous value associated with {@code key}, or
+     *         {@code null} if there was no mapping for {@code key}
+     * @throws NullPointerException if the specified key is null
+     */
+    @SuppressWarnings("unchecked")
+    public V remove(Object key) {
+        if (key == null)
+            throw new NullPointerException();
+        return (V)internalReplace(key, null, null);
+    }
+
+    /**
+     * {@inheritDoc}
+     *
+     * @throws NullPointerException if the specified key is null
+     */
+    public boolean remove(Object key, Object value) {
+        if (key == null)
+            throw new NullPointerException();
+        if (value == null)
+            return false;
+        return internalReplace(key, null, value) != null;
+    }
+
+    /**
+     * {@inheritDoc}
+     *
+     * @throws NullPointerException if any of the arguments are null
+     */
+    public boolean replace(K key, V oldValue, V newValue) {
+        if (key == null || oldValue == null || newValue == null)
+            throw new NullPointerException();
+        return internalReplace(key, newValue, oldValue) != null;
+    }
+
+    /**
+     * {@inheritDoc}
+     *
+     * @return the previous value associated with the specified key,
+     *         or {@code null} if there was no mapping for the key
+     * @throws NullPointerException if the specified key or value is null
+     */
+    @SuppressWarnings("unchecked")
+    public V replace(K key, V value) {
+        if (key == null || value == null)
+            throw new NullPointerException();
+        return (V)internalReplace(key, value, null);
+    }
+
+    /**
+     * Removes all of the mappings from this map.
+     */
+    public void clear() {
+        internalClear();
+    }
+
+    /**
+     * Returns a {@link Set} view of the keys contained in this map.
+     * The set is backed by the map, so changes to the map are
+     * reflected in the set, and vice-versa.  The set supports element
+     * removal, which removes the corresponding mapping from this map,
+     * via the {@code Iterator.remove}, {@code Set.remove},
+     * {@code removeAll}, {@code retainAll}, and {@code clear}
+     * operations.  It does not support the {@code add} or
+     * {@code addAll} operations.
+     *
+     * <p>The view's {@code iterator} is a "weakly consistent" iterator
+     * that will never throw {@link ConcurrentModificationException},
+     * and guarantees to traverse elements as they existed upon
+     * construction of the iterator, and may (but is not guaranteed to)
+     * reflect any modifications subsequent to construction.
+     */
+    public Set<K> keySet() {
+        KeySet<K,V> ks = keySet;
+        return (ks != null) ? ks : (keySet = new KeySet<K,V>(this));
+    }
+
+    /**
+     * Returns a {@link Collection} view of the values contained in this map.
+     * The collection is backed by the map, so changes to the map are
+     * reflected in the collection, and vice-versa.  The collection
+     * supports element removal, which removes the corresponding
+     * mapping from this map, via the {@code Iterator.remove},
+     * {@code Collection.remove}, {@code removeAll},
+     * {@code retainAll}, and {@code clear} operations.  It does not
+     * support the {@code add} or {@code addAll} operations.
+     *
+     * <p>The view's {@code iterator} is a "weakly consistent" iterator
+     * that will never throw {@link ConcurrentModificationException},
+     * and guarantees to traverse elements as they existed upon
+     * construction of the iterator, and may (but is not guaranteed to)
+     * reflect any modifications subsequent to construction.
+     */
+    public Collection<V> values() {
+        Values<K,V> vs = values;
+        return (vs != null) ? vs : (values = new Values<K,V>(this));
+    }
+
+    /**
+     * Returns a {@link Set} view of the mappings contained in this map.
+     * The set is backed by the map, so changes to the map are
+     * reflected in the set, and vice-versa.  The set supports element
+     * removal, which removes the corresponding mapping from the map,
+     * via the {@code Iterator.remove}, {@code Set.remove},
+     * {@code removeAll}, {@code retainAll}, and {@code clear}
+     * operations.  It does not support the {@code add} or
+     * {@code addAll} operations.
+     *
+     * <p>The view's {@code iterator} is a "weakly consistent" iterator
+     * that will never throw {@link ConcurrentModificationException},
+     * and guarantees to traverse elements as they existed upon
+     * construction of the iterator, and may (but is not guaranteed to)
+     * reflect any modifications subsequent to construction.
+     */
+    public Set<Map.Entry<K,V>> entrySet() {
+        EntrySet<K,V> es = entrySet;
+        return (es != null) ? es : (entrySet = new EntrySet<K,V>(this));
+    }
+
+    /**
+     * Returns an enumeration of the keys in this table.
+     *
+     * @return an enumeration of the keys in this table
+     * @see #keySet()
+     */
+    public Enumeration<K> keys() {
+        return new KeyIterator<K,V>(this);
+    }
+
+    /**
+     * Returns an enumeration of the values in this table.
+     *
+     * @return an enumeration of the values in this table
+     * @see #values()
+     */
+    public Enumeration<V> elements() {
+        return new ValueIterator<K,V>(this);
+    }
+
+    /**
+     * Returns the hash code value for this {@link Map}, i.e.,
+     * the sum of, for each key-value pair in the map,
+     * {@code key.hashCode() ^ value.hashCode()}.
+     *
+     * @return the hash code value for this map
+     */
+    public int hashCode() {
+        int h = 0;
+        InternalIterator it = new InternalIterator(table);
+        while (it.next != null) {
+            h += it.nextKey.hashCode() ^ it.nextVal.hashCode();
+            it.advance();
+        }
+        return h;
+    }
+
+    /**
+     * Returns a string representation of this map.  The string
+     * representation consists of a list of key-value mappings (in no
+     * particular order) enclosed in braces ("{@code {}}").  Adjacent
+     * mappings are separated by the characters {@code ", "} (comma
+     * and space).  Each key-value mapping is rendered as the key
+     * followed by an equals sign ("{@code =}") followed by the
+     * associated value.
+     *
+     * @return a string representation of this map
+     */
+    public String toString() {
+        InternalIterator it = new InternalIterator(table);
+        StringBuilder sb = new StringBuilder();
+        sb.append('{');
+        if (it.next != null) {
+            for (;;) {
+                Object k = it.nextKey, v = it.nextVal;
+                sb.append(k == this ? "(this Map)" : k);
+                sb.append('=');
+                sb.append(v == this ? "(this Map)" : v);
+                it.advance();
+                if (it.next == null)
+                    break;
+                sb.append(',').append(' ');
+            }
+        }
+        return sb.append('}').toString();
+    }
+
+    /**
+     * Compares the specified object with this map for equality.
+     * Returns {@code true} if the given object is a map with the same
+     * mappings as this map.  This operation may return misleading
+     * results if either map is concurrently modified during execution
+     * of this method.
+     *
+     * @param o object to be compared for equality with this map
+     * @return {@code true} if the specified object is equal to this map
+     */
+    public boolean equals(Object o) {
+        if (o != this) {
+            if (!(o instanceof Map))
+                return false;
+            Map<?,?> m = (Map<?,?>) o;
+            InternalIterator it = new InternalIterator(table);
+            while (it.next != null) {
+                Object val = it.nextVal;
+                Object v = m.get(it.nextKey);
+                if (v == null || (v != val && !v.equals(val)))
+                    return false;
+                it.advance();
+            }
+            for (Map.Entry<?,?> e : m.entrySet()) {
+                Object mk, mv, v;
+                if ((mk = e.getKey()) == null ||
+                    (mv = e.getValue()) == null ||
+                    (v = internalGet(mk)) == null ||
+                    (mv != v && !mv.equals(v)))
+                    return false;
+            }
+        }
+        return true;
+    }
+
+    /* ----------------Iterators -------------- */
+
+    /**
+     * Base class for key, value, and entry iterators.  Adds a map
+     * reference to InternalIterator to support Iterator.remove.
+     */
+    static abstract class ViewIterator<K,V> extends InternalIterator {
+        final ConcurrentHashMapV8<K, V> map;
+        ViewIterator(ConcurrentHashMapV8<K, V> map) {
+            super(map.table);
+            this.map = map;
+        }
+
+        public final void remove() {
+            if (last == null)
+                throw new IllegalStateException();
+            map.remove(last.key);
+            last = null;
+        }
+
+        public final boolean hasNext()         { return next != null; }
+        public final boolean hasMoreElements() { return next != null; }
+    }
+
+    static final class KeyIterator<K,V> extends ViewIterator<K,V>
+        implements Iterator<K>, Enumeration<K> {
+        KeyIterator(ConcurrentHashMapV8<K, V> map) { super(map); }
+
+        @SuppressWarnings("unchecked")
+        public final K next() {
+            if (next == null)
+                throw new NoSuchElementException();
+            Object k = nextKey;
+            advance();
+            return (K)k;
+        }
+
+        public final K nextElement() { return next(); }
+    }
+
+    static final class ValueIterator<K,V> extends ViewIterator<K,V>
+        implements Iterator<V>, Enumeration<V> {
+        ValueIterator(ConcurrentHashMapV8<K, V> map) { super(map); }
+
+        @SuppressWarnings("unchecked")
+        public final V next() {
+            if (next == null)
+                throw new NoSuchElementException();
+            Object v = nextVal;
+            advance();
+            return (V)v;
+        }
+
+        public final V nextElement() { return next(); }
+    }
+
+    static final class EntryIterator<K,V> extends ViewIterator<K,V>
+        implements Iterator<Map.Entry<K,V>> {
+        EntryIterator(ConcurrentHashMapV8<K, V> map) { super(map); }
+
+        @SuppressWarnings("unchecked")
+        public final Map.Entry<K,V> next() {
+            if (next == null)
+                throw new NoSuchElementException();
+            Object k = nextKey;
+            Object v = nextVal;
+            advance();
+            return new WriteThroughEntry<K,V>((K)k, (V)v, map);
+        }
+    }
+
+    static final class SnapshotEntryIterator<K,V> extends ViewIterator<K,V>
+        implements Iterator<Map.Entry<K,V>> {
+        SnapshotEntryIterator(ConcurrentHashMapV8<K, V> map) { super(map); }
+
+        @SuppressWarnings("unchecked")
+        public final Map.Entry<K,V> next() {
+            if (next == null)
+                throw new NoSuchElementException();
+            Object k = nextKey;
+            Object v = nextVal;
+            advance();
+            return new SnapshotEntry<K,V>((K)k, (V)v);
+        }
+    }
+
+    /**
+     * Base of writeThrough and Snapshot entry classes
+     */
+    static abstract class MapEntry<K,V> implements Map.Entry<K, V> {
+        final K key; // non-null
+        V val;       // non-null
+        MapEntry(K key, V val)        { this.key = key; this.val = val; }
+        public final K getKey()       { return key; }
+        public final V getValue()     { return val; }
+        public final int hashCode()   { return key.hashCode() ^ val.hashCode(); }
+        public final String toString(){ return key + "=" + val; }
+
+        public final boolean equals(Object o) {
+            Object k, v; Map.Entry<?,?> e;
+            return ((o instanceof Map.Entry) &&
+                    (k = (e = (Map.Entry<?,?>)o).getKey()) != null &&
+                    (v = e.getValue()) != null &&
+                    (k == key || k.equals(key)) &&
+                    (v == val || v.equals(val)));
+        }
+
+        public abstract V setValue(V value);
+    }
+
+    /**
+     * Entry used by EntryIterator.next(), that relays setValue
+     * changes to the underlying map.
+     */
+    static final class WriteThroughEntry<K,V> extends MapEntry<K,V>
+        implements Map.Entry<K, V> {
+        final ConcurrentHashMapV8<K, V> map;
+        WriteThroughEntry(K key, V val, ConcurrentHashMapV8<K, V> map) {
+            super(key, val);
+            this.map = map;
+        }
+
+        /**
+         * Sets our entry's value and writes through to the map. The
+         * value to return is somewhat arbitrary here. Since a
+         * WriteThroughEntry does not necessarily track asynchronous
+         * changes, the most recent "previous" value could be
+         * different from what we return (or could even have been
+         * removed in which case the put will re-establish). We do not
+         * and cannot guarantee more.
+         */
+        public final V setValue(V value) {
+            if (value == null) throw new NullPointerException();
+            V v = val;
+            val = value;
+            map.put(key, value);
+            return v;
+        }
+    }
+
+    /**
+     * Internal version of entry, that doesn't write though changes
+     */
+    static final class SnapshotEntry<K,V> extends MapEntry<K,V>
+        implements Map.Entry<K, V> {
+        SnapshotEntry(K key, V val) { super(key, val); }
+        public final V setValue(V value) { // only locally update
+            if (value == null) throw new NullPointerException();
+            V v = val;
+            val = value;
+            return v;
+        }
+    }
+
+    /* ----------------Views -------------- */
+
+    /**
+     * Base class for views. This is done mainly to allow adding
+     * customized parallel traversals (not yet implemented.)
+     */
+    static abstract class MapView<K, V> {
+        final ConcurrentHashMapV8<K, V> map;
+        MapView(ConcurrentHashMapV8<K, V> map)  { this.map = map; }
+        public final int size()                 { return map.size(); }
+        public final boolean isEmpty()          { return map.isEmpty(); }
+        public final void clear()               { map.clear(); }
+
+        // implementations below rely on concrete classes supplying these
+        abstract Iterator<?> iter();
+        abstract public boolean contains(Object o);
+        abstract public boolean remove(Object o);
+
+        private static final String oomeMsg = "Required array size too large";
+
+        public final Object[] toArray() {
+            long sz = map.longSize();
+            if (sz > (long)(MAX_ARRAY_SIZE))
+                throw new OutOfMemoryError(oomeMsg);
+            int n = (int)sz;
+            Object[] r = new Object[n];
+            int i = 0;
+            Iterator<?> it = iter();
+            while (it.hasNext()) {
+                if (i == n) {
+                    if (n >= MAX_ARRAY_SIZE)
+                        throw new OutOfMemoryError(oomeMsg);
+                    if (n >= MAX_ARRAY_SIZE - (MAX_ARRAY_SIZE >>> 1) - 1)
+                        n = MAX_ARRAY_SIZE;
+                    else
+                        n += (n >>> 1) + 1;
+                    r = Arrays.copyOf(r, n);
+                }
+                r[i++] = it.next();
+            }
+            return (i == n) ? r : Arrays.copyOf(r, i);
+        }
+
+        @SuppressWarnings("unchecked")
+        public final <T> T[] toArray(T[] a) {
+            long sz = map.longSize();
+            if (sz > (long)(MAX_ARRAY_SIZE))
+                throw new OutOfMemoryError(oomeMsg);
+            int m = (int)sz;
+            T[] r = (a.length >= m) ? a :
+                (T[])java.lang.reflect.Array
+                .newInstance(a.getClass().getComponentType(), m);
+            int n = r.length;
+            int i = 0;
+            Iterator<?> it = iter();
+            while (it.hasNext()) {
+                if (i == n) {
+                    if (n >= MAX_ARRAY_SIZE)
+                        throw new OutOfMemoryError(oomeMsg);
+                    if (n >= MAX_ARRAY_SIZE - (MAX_ARRAY_SIZE >>> 1) - 1)
+                        n = MAX_ARRAY_SIZE;
+                    else
+                        n += (n >>> 1) + 1;
+                    r = Arrays.copyOf(r, n);
+                }
+                r[i++] = (T)it.next();
+            }
+            if (a == r && i < n) {
+                r[i] = null; // null-terminate
+                return r;
+            }
+            return (i == n) ? r : Arrays.copyOf(r, i);
+        }
+
+        public final int hashCode() {
+            int h = 0;
+            for (Iterator<?> it = iter(); it.hasNext();)
+                h += it.next().hashCode();
+            return h;
+        }
+
+        public final String toString() {
+            StringBuilder sb = new StringBuilder();
+            sb.append('[');
+            Iterator<?> it = iter();
+            if (it.hasNext()) {
+                for (;;) {
+                    Object e = it.next();
+                    sb.append(e == this ? "(this Collection)" : e);
+                    if (!it.hasNext())
+                        break;
+                    sb.append(',').append(' ');
+                }
+            }
+            return sb.append(']').toString();
+        }
+
+        public final boolean containsAll(Collection<?> c) {
+            if (c != this) {
+                for (Iterator<?> it = c.iterator(); it.hasNext();) {
+                    Object e = it.next();
+                    if (e == null || !contains(e))
+                        return false;
+                }
+            }
+            return true;
+        }
+
+        public final boolean removeAll(Collection<?> c) {
+            boolean modified = false;
+            for (Iterator<?> it = iter(); it.hasNext();) {
+                if (c.contains(it.next())) {
+                    it.remove();
+                    modified = true;
+                }
+            }
+            return modified;
+        }
+
+        public final boolean retainAll(Collection<?> c) {
+            boolean modified = false;
+            for (Iterator<?> it = iter(); it.hasNext();) {
+                if (!c.contains(it.next())) {
+                    it.remove();
+                    modified = true;
+                }
+            }
+            return modified;
+        }
+
+    }
+
+    static final class KeySet<K,V> extends MapView<K,V> implements Set<K> {
+        KeySet(ConcurrentHashMapV8<K, V> map)   { super(map); }
+        public final boolean contains(Object o) { return map.containsKey(o); }
+        public final boolean remove(Object o)   { return map.remove(o) != null; }
+
+        public final Iterator<K> iterator() {
+            return new KeyIterator<K,V>(map);
+        }
+        final Iterator<?> iter() {
+            return new KeyIterator<K,V>(map);
+        }
+        public final boolean add(K e) {
+            throw new UnsupportedOperationException();
+        }
+        public final boolean addAll(Collection<? extends K> c) {
+            throw new UnsupportedOperationException();
+        }
+        public boolean equals(Object o) {
+            Set<?> c;
+            return ((o instanceof Set) &&
+                    ((c = (Set<?>)o) == this ||
+                     (containsAll(c) && c.containsAll(this))));
+        }
+    }
+
+    static final class Values<K,V> extends MapView<K,V>
+        implements Collection<V> {
+        Values(ConcurrentHashMapV8<K, V> map)   { super(map); }
+        public final boolean contains(Object o) { return map.containsValue(o); }
+
+        public final boolean remove(Object o) {
+            if (o != null) {
+                Iterator<V> it = new ValueIterator<K,V>(map);
+                while (it.hasNext()) {
+                    if (o.equals(it.next())) {
+                        it.remove();
+                        return true;
+                    }
+                }
+            }
+            return false;
+        }
+        public final Iterator<V> iterator() {
+            return new ValueIterator<K,V>(map);
+        }
+        final Iterator<?> iter() {
+            return new ValueIterator<K,V>(map);
+        }
+        public final boolean add(V e) {
+            throw new UnsupportedOperationException();
+        }
+        public final boolean addAll(Collection<? extends V> c) {
+            throw new UnsupportedOperationException();
+        }
+    }
+
+    static final class EntrySet<K,V> extends MapView<K,V>
+        implements Set<Map.Entry<K,V>> {
+        EntrySet(ConcurrentHashMapV8<K, V> map) { super(map); }
+
+        public final boolean contains(Object o) {
+            Object k, v, r; Map.Entry<?,?> e;
+            return ((o instanceof Map.Entry) &&
+                    (k = (e = (Map.Entry<?,?>)o).getKey()) != null &&
+                    (r = map.get(k)) != null &&
+                    (v = e.getValue()) != null &&
+                    (v == r || v.equals(r)));
+        }
+
+        public final boolean remove(Object o) {
+            Object k, v; Map.Entry<?,?> e;
+            return ((o instanceof Map.Entry) &&
+                    (k = (e = (Map.Entry<?,?>)o).getKey()) != null &&
+                    (v = e.getValue()) != null &&
+                    map.remove(k, v));
+        }
+
+        public final Iterator<Map.Entry<K,V>> iterator() {
+            return new EntryIterator<K,V>(map);
+        }
+        final Iterator<?> iter() {
+            return new SnapshotEntryIterator<K,V>(map);
+        }
+        public final boolean add(Entry<K,V> e) {
+            throw new UnsupportedOperationException();
+        }
+        public final boolean addAll(Collection<? extends Entry<K,V>> c) {
+            throw new UnsupportedOperationException();
+        }
+        public boolean equals(Object o) {
+            Set<?> c;
+            return ((o instanceof Set) &&
+                    ((c = (Set<?>)o) == this ||
+                     (containsAll(c) && c.containsAll(this))));
+        }
+    }
+
+    /* ---------------- Serialization Support -------------- */
+
+    /**
+     * Stripped-down version of helper class used in previous version,
+     * declared for the sake of serialization compatibility
+     */
+    static class Segment<K,V> implements Serializable {
+        private static final long serialVersionUID = 2249069246763182397L;
+        final float loadFactor;
+        Segment(float lf) { this.loadFactor = lf; }
+    }
+
+    /**
+     * Saves the state of the {@code ConcurrentHashMapV8} instance to a
+     * stream (i.e., serializes it).
+     * @param s the stream
+     * @serialData
+     * the key (Object) and value (Object)
+     * for each key-value mapping, followed by a null pair.
+     * The key-value mappings are emitted in no particular order.
+     */
+    @SuppressWarnings("unchecked")
+    private void writeObject(java.io.ObjectOutputStream s)
+            throws java.io.IOException {
+        if (segments == null) { // for serialization compatibility
+            segments = (Segment<K,V>[])
+                new Segment<?,?>[DEFAULT_CONCURRENCY_LEVEL];
+            for (int i = 0; i < segments.length; ++i)
+                segments[i] = new Segment<K,V>(LOAD_FACTOR);
+        }
+        s.defaultWriteObject();
+        InternalIterator it = new InternalIterator(table);
+        while (it.next != null) {
+            s.writeObject(it.nextKey);
+            s.writeObject(it.nextVal);
+            it.advance();
+        }
+        s.writeObject(null);
+        s.writeObject(null);
+        segments = null; // throw away
+    }
+
+    /**
+     * Reconstitutes the instance from a stream (that is, deserializes it).
+     * @param s the stream
+     */
+    @SuppressWarnings("unchecked")
+    private void readObject(java.io.ObjectInputStream s)
+            throws java.io.IOException, ClassNotFoundException {
+        s.defaultReadObject();
+        this.segments = null; // unneeded
+        // initialize transient final field
+        UNSAFE.putObjectVolatile(this, counterOffset, new LongAdder());
+
+        // Create all nodes, then place in table once size is known
+        long size = 0L;
+        Node p = null;
+        for (;;) {
+            K k = (K) s.readObject();
+            V v = (V) s.readObject();
+            if (k != null && v != null) {
+                int h = spread(k.hashCode());
+                p = new Node(h, k, v, p);
+                ++size;
+            }
+            else
+                break;
+        }
+        if (p != null) {
+            boolean init = false;
+            int n;
+            if (size >= (long)(MAXIMUM_CAPACITY >>> 1))
+                n = MAXIMUM_CAPACITY;
+            else {
+                int sz = (int)size;
+                n = tableSizeFor(sz + (sz >>> 1) + 1);
+            }
+            int sc = sizeCtl;
+            boolean collide = false;
+            if (n > sc &&
+                UNSAFE.compareAndSwapInt(this, sizeCtlOffset, sc, -1)) {
+                try {
+                    if (table == null) {
+                        init = true;
+                        Node[] tab = new Node[n];
+                        int mask = n - 1;
+                        while (p != null) {
+                            int j = p.hash & mask;
+                            Node next = p.next;
+                            Node q = p.next = tabAt(tab, j);
+                            setTabAt(tab, j, p);
+                            if (!collide && q != null && q.hash == p.hash)
+                                collide = true;
+                            p = next;
+                        }
+                        table = tab;
+                        counter.add(size);
+                        sc = n - (n >>> 2);
+                    }
+                } finally {
+                    sizeCtl = sc;
+                }
+                if (collide) { // rescan and convert to TreeBins
+                    Node[] tab = table;
+                    for (int i = 0; i < tab.length; ++i) {
+                        int c = 0;
+                        for (Node e = tabAt(tab, i); e != null; e = e.next) {
+                            if (++c > TREE_THRESHOLD &&
+                                (e.key instanceof Comparable)) {
+                                replaceWithTreeBin(tab, i, e.key);
+                                break;
+                            }
+                        }
+                    }
+                }
+            }
+            if (!init) { // Can only happen if unsafely published.
+                while (p != null) {
+                    internalPut(p.key, p.val);
+                    p = p.next;
+                }
+            }
+
+        }
+    }
+
+    // Unsafe mechanics
+    private static final sun.misc.Unsafe UNSAFE;
+    private static final long counterOffset;
+    private static final long sizeCtlOffset;
+    private static final long ABASE;
+    private static final int ASHIFT;
+
+    static {
+        int ss;
+        try {
+            UNSAFE = getUnsafe();
+            Class<?> k = ConcurrentHashMapV8.class;
+            counterOffset = UNSAFE.objectFieldOffset
+                (k.getDeclaredField("counter"));
+            sizeCtlOffset = UNSAFE.objectFieldOffset
+                (k.getDeclaredField("sizeCtl"));
+            Class<?> sc = Node[].class;
+            ABASE = UNSAFE.arrayBaseOffset(sc);
+            ss = UNSAFE.arrayIndexScale(sc);
+        } catch (Exception e) {
+            throw new Error(e);
+        }
+        if ((ss & (ss-1)) != 0)
+            throw new Error("data type scale not a power of two");
+        ASHIFT = 31 - Integer.numberOfLeadingZeros(ss);
+    }
+
+    /**
+     * Returns a sun.misc.Unsafe.  Suitable for use in a 3rd party package.
+     * Replace with a simple call to Unsafe.getUnsafe when integrating
+     * into a jdk.
+     *
+     * @return a sun.misc.Unsafe
+     */
+    private static sun.misc.Unsafe getUnsafe() {
+        try {
+            return sun.misc.Unsafe.getUnsafe();
+        } catch (SecurityException se) {
+            try {
+                return java.security.AccessController.doPrivileged
+                    (new java.security
+                     .PrivilegedExceptionAction<sun.misc.Unsafe>() {
+                        public sun.misc.Unsafe run() throws Exception {
+                            java.lang.reflect.Field f = sun.misc
+                                .Unsafe.class.getDeclaredField("theUnsafe");
+                            f.setAccessible(true);
+                            return (sun.misc.Unsafe) f.get(null);
+                        }});
+            } catch (java.security.PrivilegedActionException e) {
+                throw new RuntimeException("Could not initialize intrinsics",
+                                           e.getCause());
+            }
+        }
+    }
+
+}
diff --git a/src/main/java/jsr166e/DoubleAdder.java b/src/main/java/jsr166e/DoubleAdder.java
new file mode 100644
index 00000000000..947c3deffdb
--- /dev/null
+++ b/src/main/java/jsr166e/DoubleAdder.java
@@ -0,0 +1,201 @@
+/*
+ * Written by Doug Lea with assistance from members of JCP JSR-166
+ * Expert Group and released to the public domain, as explained at
+ * http://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+package jsr166e;
+import java.io.IOException;
+import java.io.Serializable;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+
+/**
+ * One or more variables that together maintain an initially zero
+ * {@code double} sum.  When updates (method {@link #add}) are
+ * contended across threads, the set of variables may grow dynamically
+ * to reduce contention.  Method {@link #sum} (or, equivalently {@link
+ * #doubleValue}) returns the current total combined across the
+ * variables maintaining the sum.
+ *
+ * <p>This class extends {@link Number}, but does <em>not</em> define
+ * methods such as {@code hashCode} and {@code compareTo} because
+ * instances are expected to be mutated, and so are not useful as
+ * collection keys.
+ *
+ * <p><em>jsr166e note: This class is targeted to be placed in
+ * java.util.concurrent.atomic<em>
+ *
+ * @since 1.8
+ * @author Doug Lea
+ */
+public class DoubleAdder extends Striped64 implements Serializable {
+    private static final long serialVersionUID = 7249069246863182397L;
+
+    /**
+     * Update function. Note that we must use "long" for underlying
+     * representations, because there is no compareAndSet for double,
+     * due to the fact that the bitwise equals used in any CAS
+     * implementation is not the same as double-precision equals.
+     * However, we use CAS only to detect and alleviate contention,
+     * for which bitwise equals works best anyway. In principle, the
+     * long/double conversions used here should be essentially free on
+     * most platforms since they just re-interpret bits.
+     *
+     * Similar conversions are used in other methods.
+     */
+    final long fn(long v, long x) {
+        return Double.doubleToRawLongBits
+            (Double.longBitsToDouble(v) +
+             Double.longBitsToDouble(x));
+    }
+
+    /**
+     * Creates a new adder with initial sum of zero.
+     */
+    public DoubleAdder() {
+    }
+
+    /**
+     * Adds the given value.
+     *
+     * @param x the value to add
+     */
+    public void add(double x) {
+        Cell[] as; long b, v; HashCode hc; Cell a; int n;
+        if ((as = cells) != null ||
+            !casBase(b = base,
+                     Double.doubleToRawLongBits
+                     (Double.longBitsToDouble(b) + x))) {
+            boolean uncontended = true;
+            int h = (hc = threadHashCode.get()).code;
+            if (as == null || (n = as.length) < 1 ||
+                (a = as[(n - 1) & h]) == null ||
+                !(uncontended = a.cas(v = a.value,
+                                      Double.doubleToRawLongBits
+                                      (Double.longBitsToDouble(v) + x))))
+                retryUpdate(Double.doubleToRawLongBits(x), hc, uncontended);
+        }
+    }
+
+    /**
+     * Returns the current sum.  The returned value is <em>NOT</em> an
+     * atomic snapshot: Invocation in the absence of concurrent
+     * updates returns an accurate result, but concurrent updates that
+     * occur while the sum is being calculated might not be
+     * incorporated.  Also, because double-precision arithmetic is not
+     * strictly associative, the returned result need not be identical
+     * to the value that would be obtained in a sequential series of
+     * updates to a single variable.
+     *
+     * @return the sum
+     */
+    public double sum() {
+        Cell[] as = cells;
+        double sum = Double.longBitsToDouble(base);
+        if (as != null) {
+            int n = as.length;
+            for (int i = 0; i < n; ++i) {
+                Cell a = as[i];
+                if (a != null)
+                    sum += Double.longBitsToDouble(a.value);
+            }
+        }
+        return sum;
+    }
+
+    /**
+     * Resets variables maintaining the sum to zero.  This method may
+     * be a useful alternative to creating a new adder, but is only
+     * effective if there are no concurrent updates.  Because this
+     * method is intrinsically racy, it should only be used when it is
+     * known that no threads are concurrently updating.
+     */
+    public void reset() {
+        internalReset(0L);
+    }
+
+    /**
+     * Equivalent in effect to {@link #sum} followed by {@link
+     * #reset}. This method may apply for example during quiescent
+     * points between multithreaded computations.  If there are
+     * updates concurrent with this method, the returned value is
+     * <em>not</em> guaranteed to be the final value occurring before
+     * the reset.
+     *
+     * @return the sum
+     */
+    public double sumThenReset() {
+        Cell[] as = cells;
+        double sum = Double.longBitsToDouble(base);
+        base = 0L;
+        if (as != null) {
+            int n = as.length;
+            for (int i = 0; i < n; ++i) {
+                Cell a = as[i];
+                if (a != null) {
+                    long v = a.value;
+                    a.value = 0L;
+                    sum += Double.longBitsToDouble(v);
+                }
+            }
+        }
+        return sum;
+    }
+
+    /**
+     * Returns the String representation of the {@link #sum}.
+     * @return the String representation of the {@link #sum}
+     */
+    public String toString() {
+        return Double.toString(sum());
+    }
+
+    /**
+     * Equivalent to {@link #sum}.
+     *
+     * @return the sum
+     */
+    public double doubleValue() {
+        return sum();
+    }
+
+    /**
+     * Returns the {@link #sum} as a {@code long} after a
+     * narrowing primitive conversion.
+     */
+    public long longValue() {
+        return (long)sum();
+    }
+
+    /**
+     * Returns the {@link #sum} as an {@code int} after a
+     * narrowing primitive conversion.
+     */
+    public int intValue() {
+        return (int)sum();
+    }
+
+    /**
+     * Returns the {@link #sum} as a {@code float}
+     * after a narrowing primitive conversion.
+     */
+    public float floatValue() {
+        return (float)sum();
+    }
+
+    private void writeObject(java.io.ObjectOutputStream s)
+        throws java.io.IOException {
+        s.defaultWriteObject();
+        s.writeDouble(sum());
+    }
+
+    private void readObject(ObjectInputStream s)
+        throws IOException, ClassNotFoundException {
+        s.defaultReadObject();
+        busy = 0;
+        cells = null;
+        base = Double.doubleToRawLongBits(s.readDouble());
+    }
+
+}
diff --git a/src/main/java/jsr166e/DoubleMaxUpdater.java b/src/main/java/jsr166e/DoubleMaxUpdater.java
new file mode 100644
index 00000000000..cb0ce884ab2
--- /dev/null
+++ b/src/main/java/jsr166e/DoubleMaxUpdater.java
@@ -0,0 +1,196 @@
+/*
+ * Written by Doug Lea with assistance from members of JCP JSR-166
+ * Expert Group and released to the public domain, as explained at
+ * http://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+package jsr166e;
+import java.io.IOException;
+import java.io.Serializable;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+
+/**
+ * One or more variables that together maintain a running {@code double}
+ * maximum with initial value {@code Double.NEGATIVE_INFINITY}.  When
+ * updates (method {@link #update}) are contended across threads, the
+ * set of variables may grow dynamically to reduce contention.  Method
+ * {@link #max} (or, equivalently, {@link #doubleValue}) returns the
+ * current maximum across the variables maintaining updates.
+ *
+ * <p>This class extends {@link Number}, but does <em>not</em> define
+ * methods such as {@code hashCode} and {@code compareTo} because
+ * instances are expected to be mutated, and so are not useful as
+ * collection keys.
+ *
+ * <p><em>jsr166e note: This class is targeted to be placed in
+ * java.util.concurrent.atomic<em>
+ *
+ * @since 1.8
+ * @author Doug Lea
+ */
+public class DoubleMaxUpdater extends Striped64 implements Serializable {
+    private static final long serialVersionUID = 7249069246863182397L;
+    /**
+     * Long representation of negative infinity. See class Double
+     * internal documentation for explanation.
+     */
+    private static final long MIN_AS_LONG = 0xfff0000000000000L;
+
+    /**
+     * Update function. See class DoubleAdder for rationale
+     * for using conversions from/to long.
+     */
+    final long fn(long v, long x) {
+        return Double.longBitsToDouble(v) > Double.longBitsToDouble(x) ? v : x;
+    }
+
+    /**
+     * Creates a new instance with initial value of {@code
+     * Double.NEGATIVE_INFINITY}.
+     */
+    public DoubleMaxUpdater() {
+        base = MIN_AS_LONG;
+    }
+
+    /**
+     * Updates the maximum to be at least the given value.
+     *
+     * @param x the value to update
+     */
+    public void update(double x) {
+        long lx = Double.doubleToRawLongBits(x);
+        Cell[] as; long b, v; HashCode hc; Cell a; int n;
+        if ((as = cells) != null ||
+            (Double.longBitsToDouble(b = base) < x && !casBase(b, lx))) {
+            boolean uncontended = true;
+            int h = (hc = threadHashCode.get()).code;
+            if (as == null || (n = as.length) < 1 ||
+                (a = as[(n - 1) & h]) == null ||
+                (Double.longBitsToDouble(v = a.value) < x &&
+                 !(uncontended = a.cas(v, lx))))
+                retryUpdate(lx, hc, uncontended);
+        }
+    }
+
+    /**
+     * Returns the current maximum.  The returned value is
+     * <em>NOT</em> an atomic snapshot: Invocation in the absence of
+     * concurrent updates returns an accurate result, but concurrent
+     * updates that occur while the value is being calculated might
+     * not be incorporated.
+     *
+     * @return the maximum
+     */
+    public double max() {
+        Cell[] as = cells;
+        double max = Double.longBitsToDouble(base);
+        if (as != null) {
+            int n = as.length;
+            double v;
+            for (int i = 0; i < n; ++i) {
+                Cell a = as[i];
+                if (a != null && (v = Double.longBitsToDouble(a.value)) > max)
+                    max = v;
+            }
+        }
+        return max;
+    }
+
+    /**
+     * Resets variables maintaining updates to {@code
+     * Double.NEGATIVE_INFINITY}.  This method may be a useful
+     * alternative to creating a new updater, but is only effective if
+     * there are no concurrent updates.  Because this method is
+     * intrinsically racy, it should only be used when it is known
+     * that no threads are concurrently updating.
+     */
+    public void reset() {
+        internalReset(MIN_AS_LONG);
+    }
+
+    /**
+     * Equivalent in effect to {@link #max} followed by {@link
+     * #reset}. This method may apply for example during quiescent
+     * points between multithreaded computations.  If there are
+     * updates concurrent with this method, the returned value is
+     * <em>not</em> guaranteed to be the final value occurring before
+     * the reset.
+     *
+     * @return the maximum
+     */
+    public double maxThenReset() {
+        Cell[] as = cells;
+        double max = Double.longBitsToDouble(base);
+        base = MIN_AS_LONG;
+        if (as != null) {
+            int n = as.length;
+            for (int i = 0; i < n; ++i) {
+                Cell a = as[i];
+                if (a != null) {
+                    double v = Double.longBitsToDouble(a.value);
+                    a.value = MIN_AS_LONG;
+                    if (v > max)
+                        max = v;
+                }
+            }
+        }
+        return max;
+    }
+
+    /**
+     * Returns the String representation of the {@link #max}.
+     * @return the String representation of the {@link #max}
+     */
+    public String toString() {
+        return Double.toString(max());
+    }
+
+    /**
+     * Equivalent to {@link #max}.
+     *
+     * @return the max
+     */
+    public double doubleValue() {
+        return max();
+    }
+
+    /**
+     * Returns the {@link #max} as a {@code long} after a
+     * narrowing primitive conversion.
+     */
+    public long longValue() {
+        return (long)max();
+    }
+
+    /**
+     * Returns the {@link #max} as an {@code int} after a
+     * narrowing primitive conversion.
+     */
+    public int intValue() {
+        return (int)max();
+    }
+
+    /**
+     * Returns the {@link #max} as a {@code float}
+     * after a narrowing primitive conversion.
+     */
+    public float floatValue() {
+        return (float)max();
+    }
+
+    private void writeObject(java.io.ObjectOutputStream s)
+        throws java.io.IOException {
+        s.defaultWriteObject();
+        s.writeDouble(max());
+    }
+
+    private void readObject(ObjectInputStream s)
+        throws IOException, ClassNotFoundException {
+        s.defaultReadObject();
+        busy = 0;
+        cells = null;
+        base = Double.doubleToRawLongBits(s.readDouble());
+    }
+
+}
diff --git a/src/main/java/jsr166e/LongAdder.java b/src/main/java/jsr166e/LongAdder.java
new file mode 100644
index 00000000000..7878cc5327f
--- /dev/null
+++ b/src/main/java/jsr166e/LongAdder.java
@@ -0,0 +1,202 @@
+/*
+ * Written by Doug Lea with assistance from members of JCP JSR-166
+ * Expert Group and released to the public domain, as explained at
+ * http://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+package jsr166e;
+import java.util.concurrent.atomic.AtomicLong;
+import java.io.IOException;
+import java.io.Serializable;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+
+/**
+ * One or more variables that together maintain an initially zero
+ * {@code long} sum.  When updates (method {@link #add}) are contended
+ * across threads, the set of variables may grow dynamically to reduce
+ * contention. Method {@link #sum} (or, equivalently, {@link
+ * #longValue}) returns the current total combined across the
+ * variables maintaining the sum.
+ *
+ * <p> This class is usually preferable to {@link AtomicLong} when
+ * multiple threads update a common sum that is used for purposes such
+ * as collecting statistics, not for fine-grained synchronization
+ * control.  Under low update contention, the two classes have similar
+ * characteristics. But under high contention, expected throughput of
+ * this class is significantly higher, at the expense of higher space
+ * consumption.
+ *
+ * <p>This class extends {@link Number}, but does <em>not</em> define
+ * methods such as {@code hashCode} and {@code compareTo} because
+ * instances are expected to be mutated, and so are not useful as
+ * collection keys.
+ *
+ * <p><em>jsr166e note: This class is targeted to be placed in
+ * java.util.concurrent.atomic<em>
+ *
+ * @since 1.8
+ * @author Doug Lea
+ */
+public class LongAdder extends Striped64 implements Serializable {
+    private static final long serialVersionUID = 7249069246863182397L;
+
+    /**
+     * Version of plus for use in retryUpdate
+     */
+    final long fn(long v, long x) { return v + x; }
+
+    /**
+     * Creates a new adder with initial sum of zero.
+     */
+    public LongAdder() {
+    }
+
+    /**
+     * Adds the given value.
+     *
+     * @param x the value to add
+     */
+    public void add(long x) {
+        Cell[] as; long b, v; HashCode hc; Cell a; int n;
+        if ((as = cells) != null || !casBase(b = base, b + x)) {
+            boolean uncontended = true;
+            int h = (hc = threadHashCode.get()).code;
+            if (as == null || (n = as.length) < 1 ||
+                (a = as[(n - 1) & h]) == null ||
+                !(uncontended = a.cas(v = a.value, v + x)))
+                retryUpdate(x, hc, uncontended);
+        }
+    }
+
+    /**
+     * Equivalent to {@code add(1)}.
+     */
+    public void increment() {
+        add(1L);
+    }
+
+    /**
+     * Equivalent to {@code add(-1)}.
+     */
+    public void decrement() {
+        add(-1L);
+    }
+
+    /**
+     * Returns the current sum.  The returned value is <em>NOT</em> an
+     * atomic snapshot: Invocation in the absence of concurrent
+     * updates returns an accurate result, but concurrent updates that
+     * occur while the sum is being calculated might not be
+     * incorporated.
+     *
+     * @return the sum
+     */
+    public long sum() {
+        long sum = base;
+        Cell[] as = cells;
+        if (as != null) {
+            int n = as.length;
+            for (int i = 0; i < n; ++i) {
+                Cell a = as[i];
+                if (a != null)
+                    sum += a.value;
+            }
+        }
+        return sum;
+    }
+
+    /**
+     * Resets variables maintaining the sum to zero.  This method may
+     * be a useful alternative to creating a new adder, but is only
+     * effective if there are no concurrent updates.  Because this
+     * method is intrinsically racy, it should only be used when it is
+     * known that no threads are concurrently updating.
+     */
+    public void reset() {
+        internalReset(0L);
+    }
+
+    /**
+     * Equivalent in effect to {@link #sum} followed by {@link
+     * #reset}. This method may apply for example during quiescent
+     * points between multithreaded computations.  If there are
+     * updates concurrent with this method, the returned value is
+     * <em>not</em> guaranteed to be the final value occurring before
+     * the reset.
+     *
+     * @return the sum
+     */
+    public long sumThenReset() {
+        long sum = base;
+        Cell[] as = cells;
+        base = 0L;
+        if (as != null) {
+            int n = as.length;
+            for (int i = 0; i < n; ++i) {
+                Cell a = as[i];
+                if (a != null) {
+                    sum += a.value;
+                    a.value = 0L;
+                }
+            }
+        }
+        return sum;
+    }
+
+    /**
+     * Returns the String representation of the {@link #sum}.
+     * @return the String representation of the {@link #sum}
+     */
+    public String toString() {
+        return Long.toString(sum());
+    }
+
+    /**
+     * Equivalent to {@link #sum}.
+     *
+     * @return the sum
+     */
+    public long longValue() {
+        return sum();
+    }
+
+    /**
+     * Returns the {@link #sum} as an {@code int} after a narrowing
+     * primitive conversion.
+     */
+    public int intValue() {
+        return (int)sum();
+    }
+
+    /**
+     * Returns the {@link #sum} as a {@code float}
+     * after a widening primitive conversion.
+     */
+    public float floatValue() {
+        return (float)sum();
+    }
+
+    /**
+     * Returns the {@link #sum} as a {@code double} after a widening
+     * primitive conversion.
+     */
+    public double doubleValue() {
+        return (double)sum();
+    }
+
+    private void writeObject(java.io.ObjectOutputStream s)
+        throws java.io.IOException {
+        s.defaultWriteObject();
+        s.writeLong(sum());
+    }
+
+    private void readObject(ObjectInputStream s)
+        throws IOException, ClassNotFoundException {
+        s.defaultReadObject();
+        busy = 0;
+        cells = null;
+        base = s.readLong();
+    }
+
+}
diff --git a/src/main/java/jsr166e/LongAdderTable.java b/src/main/java/jsr166e/LongAdderTable.java
new file mode 100644
index 00000000000..a1af77c2a06
--- /dev/null
+++ b/src/main/java/jsr166e/LongAdderTable.java
@@ -0,0 +1,189 @@
+/*
+ * Written by Doug Lea with assistance from members of JCP JSR-166
+ * Expert Group and released to the public domain, as explained at
+ * http://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+package jsr166e;
+import jsr166e.LongAdder;
+import java.util.Map;
+import java.util.Set;
+import java.io.Serializable;
+
+/**
+ * A keyed table of adders, that may be useful in computing frequency
+ * counts and histograms, or may be used as a form of multiset.  A
+ * {@link LongAdder} is associated with each key. Keys are added to
+ * the table implicitly upon any attempt to update, or may be added
+ * explicitly using method {@link #install}.
+ *
+ * <p><em>jsr166e note: This class is targeted to be placed in
+ * java.util.concurrent.atomic<em>
+ *
+ * @since 1.8
+ * @author Doug Lea
+ */
+public class LongAdderTable<K> implements Serializable {
+    /** Relies on default serialization */
+    private static final long serialVersionUID = 7249369246863182397L;
+
+    /** The underlying map */
+    private final ConcurrentHashMapV8<K, LongAdder> map;
+
+    static final class CreateAdder
+        implements ConcurrentHashMapV8.MappingFunction<Object, LongAdder> {
+        public LongAdder map(Object unused) { return new LongAdder(); }
+    }
+
+    private static final CreateAdder createAdder = new CreateAdder();
+
+    /**
+     * Creates a new empty table.
+     */
+    public LongAdderTable() {
+        map = new ConcurrentHashMapV8<K, LongAdder>();
+    }
+
+    /**
+     * If the given key does not already exist in the table, inserts
+     * the key with initial sum of zero; in either case returning the
+     * adder associated with this key.
+     *
+     * @param key the key
+     * @return the adder associated with the key
+     */
+    public LongAdder install(K key) {
+        return map.computeIfAbsent(key, createAdder);
+    }
+
+    /**
+     * Adds the given value to the sum associated with the given
+     * key.  If the key does not already exist in the table, it is
+     * inserted.
+     *
+     * @param key the key
+     * @param x the value to add
+     */
+    public void add(K key, long x) {
+        map.computeIfAbsent(key, createAdder).add(x);
+    }
+
+    /**
+     * Increments the sum associated with the given key.  If the key
+     * does not already exist in the table, it is inserted.
+     *
+     * @param key the key
+     */
+    public void increment(K key) { add(key, 1L); }
+
+    /**
+     * Decrements the sum associated with the given key.  If the key
+     * does not already exist in the table, it is inserted.
+     *
+     * @param key the key
+     */
+    public void decrement(K key) { add(key, -1L); }
+
+    /**
+     * Returns the sum associated with the given key, or zero if the
+     * key does not currently exist in the table.
+     *
+     * @param key the key
+     * @return the sum associated with the key, or zero if the key is
+     * not in the table
+     */
+    public long sum(K key) {
+        LongAdder a = map.get(key);
+        return a == null ? 0L : a.sum();
+    }
+
+    /**
+     * Resets the sum associated with the given key to zero if the key
+     * exists in the table.  This method does <em>NOT</em> add or
+     * remove the key from the table (see {@link #remove}).
+     *
+     * @param key the key
+     */
+    public void reset(K key) {
+        LongAdder a = map.get(key);
+        if (a != null)
+            a.reset();
+    }
+
+    /**
+     * Resets the sum associated with the given key to zero if the key
+     * exists in the table.  This method does <em>NOT</em> add or
+     * remove the key from the table (see {@link #remove}).
+     *
+     * @param key the key
+     * @return the previous sum, or zero if the key is not
+     * in the table
+     */
+    public long sumThenReset(K key) {
+        LongAdder a = map.get(key);
+        return a == null ? 0L : a.sumThenReset();
+    }
+
+    /**
+     * Returns the sum totalled across all keys.
+     *
+     * @return the sum totalled across all keys
+     */
+    public long sumAll() {
+        long sum = 0L;
+        for (LongAdder a : map.values())
+            sum += a.sum();
+        return sum;
+    }
+
+    /**
+     * Resets the sum associated with each key to zero.
+     */
+    public void resetAll() {
+        for (LongAdder a : map.values())
+            a.reset();
+    }
+
+    /**
+     * Totals, then resets, the sums associated with all keys.
+     *
+     * @return the sum totalled across all keys
+     */
+    public long sumThenResetAll() {
+        long sum = 0L;
+        for (LongAdder a : map.values())
+            sum += a.sumThenReset();
+        return sum;
+    }
+
+    /**
+     * Removes the given key from the table.
+     *
+     * @param key the key
+     */
+    public void remove(K key) { map.remove(key); }
+
+    /**
+     * Removes all keys from the table.
+     */
+    public void removeAll() { map.clear(); }
+
+    /**
+     * Returns the current set of keys.
+     *
+     * @return the current set of keys
+     */
+    public Set<K> keySet() {
+        return map.keySet();
+    }
+
+    /**
+     * Returns the current set of key-value mappings.
+     *
+     * @return the current set of key-value mappings
+     */
+    public Set<Map.Entry<K,LongAdder>> entrySet() {
+        return map.entrySet();
+    }
+
+}
diff --git a/src/main/java/jsr166e/LongMaxUpdater.java b/src/main/java/jsr166e/LongMaxUpdater.java
new file mode 100644
index 00000000000..d29562ba9ff
--- /dev/null
+++ b/src/main/java/jsr166e/LongMaxUpdater.java
@@ -0,0 +1,186 @@
+/*
+ * Written by Doug Lea with assistance from members of JCP JSR-166
+ * Expert Group and released to the public domain, as explained at
+ * http://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+package jsr166e;
+import java.io.IOException;
+import java.io.Serializable;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+
+/**
+ * One or more variables that together maintain a running {@code long}
+ * maximum with initial value {@code Long.MIN_VALUE}.  When updates
+ * (method {@link #update}) are contended across threads, the set of
+ * variables may grow dynamically to reduce contention.  Method {@link
+ * #max} (or, equivalently, {@link #longValue}) returns the current
+ * maximum across the variables maintaining updates.
+ *
+ * <p>This class extends {@link Number}, but does <em>not</em> define
+ * methods such as {@code hashCode} and {@code compareTo} because
+ * instances are expected to be mutated, and so are not useful as
+ * collection keys.
+ *
+ * <p><em>jsr166e note: This class is targeted to be placed in
+ * java.util.concurrent.atomic<em>
+ *
+ * @since 1.8
+ * @author Doug Lea
+ */
+public class LongMaxUpdater extends Striped64 implements Serializable {
+    private static final long serialVersionUID = 7249069246863182397L;
+
+    /**
+     * Version of max for use in retryUpdate
+     */
+    final long fn(long v, long x) { return v > x ? v : x; }
+
+    /**
+     * Creates a new instance with initial maximum of {@code
+     * Long.MIN_VALUE}.
+     */
+    public LongMaxUpdater() {
+        base = Long.MIN_VALUE;
+    }
+
+    /**
+     * Updates the maximum to be at least the given value.
+     *
+     * @param x the value to update
+     */
+    public void update(long x) {
+        Cell[] as; long b, v; HashCode hc; Cell a; int n;
+        if ((as = cells) != null ||
+            (b = base) < x && !casBase(b, x)) {
+            boolean uncontended = true;
+            int h = (hc = threadHashCode.get()).code;
+            if (as == null || (n = as.length) < 1 ||
+                (a = as[(n - 1) & h]) == null ||
+                ((v = a.value) < x && !(uncontended = a.cas(v, x))))
+                retryUpdate(x, hc, uncontended);
+        }
+    }
+
+    /**
+     * Returns the current maximum.  The returned value is
+     * <em>NOT</em> an atomic snapshot: Invocation in the absence of
+     * concurrent updates returns an accurate result, but concurrent
+     * updates that occur while the value is being calculated might
+     * not be incorporated.
+     *
+     * @return the maximum
+     */
+    public long max() {
+        Cell[] as = cells;
+        long max = base;
+        if (as != null) {
+            int n = as.length;
+            long v;
+            for (int i = 0; i < n; ++i) {
+                Cell a = as[i];
+                if (a != null && (v = a.value) > max)
+                    max = v;
+            }
+        }
+        return max;
+    }
+
+    /**
+     * Resets variables maintaining updates to {@code Long.MIN_VALUE}.
+     * This method may be a useful alternative to creating a new
+     * updater, but is only effective if there are no concurrent
+     * updates.  Because this method is intrinsically racy, it should
+     * only be used when it is known that no threads are concurrently
+     * updating.
+     */
+    public void reset() {
+        internalReset(Long.MIN_VALUE);
+    }
+
+    /**
+     * Equivalent in effect to {@link #max} followed by {@link
+     * #reset}. This method may apply for example during quiescent
+     * points between multithreaded computations.  If there are
+     * updates concurrent with this method, the returned value is
+     * <em>not</em> guaranteed to be the final value occurring before
+     * the reset.
+     *
+     * @return the maximum
+     */
+    public long maxThenReset() {
+        Cell[] as = cells;
+        long max = base;
+        base = Long.MIN_VALUE;
+        if (as != null) {
+            int n = as.length;
+            for (int i = 0; i < n; ++i) {
+                Cell a = as[i];
+                if (a != null) {
+                    long v = a.value;
+                    a.value = Long.MIN_VALUE;
+                    if (v > max)
+                        max = v;
+                }
+            }
+        }
+        return max;
+    }
+
+    /**
+     * Returns the String representation of the {@link #max}.
+     * @return the String representation of the {@link #max}
+     */
+    public String toString() {
+        return Long.toString(max());
+    }
+
+    /**
+     * Equivalent to {@link #max}.
+     *
+     * @return the maximum
+     */
+    public long longValue() {
+        return max();
+    }
+
+    /**
+     * Returns the {@link #max} as an {@code int} after a narrowing
+     * primitive conversion.
+     */
+    public int intValue() {
+        return (int)max();
+    }
+
+    /**
+     * Returns the {@link #max} as a {@code float}
+     * after a widening primitive conversion.
+     */
+    public float floatValue() {
+        return (float)max();
+    }
+
+    /**
+     * Returns the {@link #max} as a {@code double} after a widening
+     * primitive conversion.
+     */
+    public double doubleValue() {
+        return (double)max();
+    }
+
+    private void writeObject(java.io.ObjectOutputStream s)
+        throws java.io.IOException {
+        s.defaultWriteObject();
+        s.writeLong(max());
+    }
+
+    private void readObject(ObjectInputStream s)
+        throws IOException, ClassNotFoundException {
+        s.defaultReadObject();
+        busy = 0;
+        cells = null;
+        base = s.readLong();
+    }
+
+}
diff --git a/src/main/java/jsr166e/SequenceLock.java b/src/main/java/jsr166e/SequenceLock.java
new file mode 100644
index 00000000000..a86d733370d
--- /dev/null
+++ b/src/main/java/jsr166e/SequenceLock.java
@@ -0,0 +1,639 @@
+/*
+ * Written by Doug Lea with assistance from members of JCP JSR-166
+ * Expert Group and released to the public domain, as explained at
+ * http://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+package jsr166e;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.concurrent.locks.Condition;
+import java.util.concurrent.locks.AbstractQueuedLongSynchronizer;
+import java.util.Collection;
+import java.io.ObjectOutputStream;
+import java.io.ObjectInputStream;
+import java.io.IOException;
+
+/**
+ * A reentrant mutual exclusion {@link Lock} in which each lock
+ * acquisition or release advances a sequence number.  When the
+ * sequence number (accessible using {@link #getSequence()}) is odd,
+ * the lock is held. When it is even (i.e., ({@code lock.getSequence()
+ * & 1L) == 0L}), the lock is released. Method {@link
+ * #awaitAvailability} can be used to await availability of the lock,
+ * returning its current sequence number. Sequence numbers (as well as
+ * reentrant hold counts) are of type {@code long} to ensure that they
+ * will not wrap around until hundreds of years of use under current
+ * processor rates.  A SequenceLock can be created with a specified
+ * number of spins. Attempts to acquire the lock in method {@link
+ * #lock} will retry at least the given number of times before
+ * blocking. If not specified, a default, possibly platform-specific,
+ * value is used.
+ *
+ * <p>Except for the lack of support for specified fairness policies,
+ * or {@link Condition} objects, a SequenceLock can be used in the
+ * same way as {@link ReentrantLock}. It provides similar status and
+ * monitoring methods, such as {@link #isHeldByCurrentThread}.
+ * SequenceLocks may be preferable in contexts in which multiple
+ * threads invoke short read-only methods much more frequently than
+ * fully locked methods.
+ *
+ * <p> Methods {@code awaitAvailability} and {@code getSequence} can
+ * be used together to define (partially) optimistic read-only methods
+ * that are usually more efficient than ReadWriteLocks when they
+ * apply.  These methods should in general be structured as loops that
+ * await lock availability, then read {@code volatile} fields into
+ * local variables (and may further read other values derived from
+ * these, for example the {@code length} of a {@code volatile} array),
+ * and retry if the sequence number changed while doing so.
+ * Alternatively, because {@code awaitAvailability} accommodates
+ * reentrancy, a method can retry a bounded number of times before
+ * switching to locking mode.  While conceptually straightforward,
+ * expressing these ideas can be verbose. For example:
+ *
+ *  <pre> {@code
+ * class Point {
+ *   private volatile double x, y;
+ *   private final SequenceLock sl = new SequenceLock();
+ *
+ *   // an exclusively locked method
+ *   void move(double deltaX, double deltaY) {
+ *     sl.lock();
+ *     try {
+ *       x += deltaX;
+ *       y += deltaY;
+ *     } finally {
+ *       sl.unlock();
+ *     }
+ *   }
+ *
+ *   // A read-only method
+ *   double distanceFromOriginV1() {
+ *     double currentX, currentY;
+ *     long seq;
+ *     do {
+ *       seq = sl.awaitAvailability();
+ *       currentX = x;
+ *       currentY = y;
+ *     } while (sl.getSequence() != seq); // retry if sequence changed
+ *     return Math.sqrt(currentX * currentX + currentY * currentY);
+ *   }
+ *
+ *   // Uses bounded retries before locking
+ *   double distanceFromOriginV2() {
+ *     double currentX, currentY;
+ *     long seq;
+ *     int retries = RETRIES_BEFORE_LOCKING; // for example 8
+ *     try {
+ *       do {
+ *         if (--retries < 0)
+ *           sl.lock();
+ *         seq = sl.awaitAvailability();
+ *         currentX = x;
+ *         currentY = y;
+ *       } while (sl.getSequence() != seq);
+ *     } finally {
+ *       if (retries < 0)
+ *         sl.unlock();
+ *     }
+ *     return Math.sqrt(currentX * currentX + currentY * currentY);
+ *   }
+ * }}</pre>
+ *
+ * @since 1.8
+ * @author Doug Lea
+ */
+public class SequenceLock implements Lock, java.io.Serializable {
+    private static final long serialVersionUID = 7373984872572414699L;
+
+    static final class Sync extends AbstractQueuedLongSynchronizer {
+        static final long serialVersionUID = 2540673546047039555L;
+
+        /**
+         * The number of times to spin in lock() and awaitAvailability().
+         */
+        final int spins;
+
+        /**
+         * The number of reentrant holds on this lock. Uses a long for
+         * compatibility with other AbstractQueuedLongSynchronizer
+         * operations. Accessed only by lock holder.
+         */
+        long holds;
+
+        Sync(int spins) { this.spins = spins; }
+
+        // overrides of AQLS methods
+
+        public final boolean isHeldExclusively() {
+            return (getState() & 1L) != 0L &&
+                getExclusiveOwnerThread() == Thread.currentThread();
+        }
+
+        public final boolean tryAcquire(long acquires) {
+            Thread current = Thread.currentThread();
+            long c = getState();
+            if ((c & 1L) == 0L) {
+                if (compareAndSetState(c, c + 1L)) {
+                    holds = acquires;
+                    setExclusiveOwnerThread(current);
+                    return true;
+                }
+            }
+            else if (current == getExclusiveOwnerThread()) {
+                holds += acquires;
+                return true;
+            }
+            return false;
+        }
+
+        public final boolean tryRelease(long releases) {
+            if (Thread.currentThread() != getExclusiveOwnerThread())
+                throw new IllegalMonitorStateException();
+            if ((holds -= releases) == 0L) {
+                setExclusiveOwnerThread(null);
+                setState(getState() + 1L);
+                return true;
+            }
+            return false;
+        }
+
+        public final long tryAcquireShared(long unused) {
+            return (((getState() & 1L) == 0L) ? 1L :
+                    (getExclusiveOwnerThread() == Thread.currentThread()) ? 0L:
+                    -1L);
+        }
+
+        public final boolean tryReleaseShared(long unused) {
+            return (getState() & 1L) == 0L;
+        }
+
+        public final Condition newCondition() {
+            throw new UnsupportedOperationException();
+        }
+
+        // Other methods in support of SequenceLock
+
+        final long getSequence() {
+            return getState();
+        }
+
+        final void lock() {
+            int k = spins;
+            while (!tryAcquire(1L)) {
+                if (k == 0) {
+                    acquire(1L);
+                    break;
+                }
+                --k;
+            }
+        }
+
+        final long awaitAvailability() {
+            long s;
+            while (((s = getState()) & 1L) != 0L &&
+                   getExclusiveOwnerThread() != Thread.currentThread()) {
+                acquireShared(1L);
+                releaseShared(1L);
+            }
+            return s;
+        }
+
+        final long tryAwaitAvailability(long nanos)
+            throws InterruptedException, TimeoutException {
+            Thread current = Thread.currentThread();
+            for (;;) {
+                long s = getState();
+                if ((s & 1L) == 0L || getExclusiveOwnerThread() == current) {
+                    releaseShared(1L);
+                    return s;
+                }
+                if (!tryAcquireSharedNanos(1L, nanos))
+                    throw new TimeoutException();
+                // since tryAcquireSharedNanos doesn't return seq
+                // retry with minimal wait time.
+                nanos = 1L;
+            }
+        }
+
+        final boolean isLocked() {
+            return (getState() & 1L) != 0L;
+        }
+
+        final Thread getOwner() {
+            return (getState() & 1L) == 0L ? null : getExclusiveOwnerThread();
+        }
+
+        final long getHoldCount() {
+            return isHeldExclusively() ? holds : 0;
+        }
+
+        private void readObject(ObjectInputStream s)
+            throws IOException, ClassNotFoundException {
+            s.defaultReadObject();
+            holds = 0L;
+            setState(0L); // reset to unlocked state
+        }
+    }
+
+    private final Sync sync;
+
+    /**
+     * The default spin value for constructor. Future versions of this
+     * class might choose platform-specific values.  Currently, except
+     * on uniprocessors, it is set to a small value that overcomes near
+     * misses between releases and acquires.
+     */
+    static final int DEFAULT_SPINS =
+        Runtime.getRuntime().availableProcessors() > 1 ? 64 : 0;
+
+    /**
+     * Creates an instance of {@code SequenceLock} with the default
+     * number of retry attempts to acquire the lock before blocking.
+     */
+    public SequenceLock() { sync = new Sync(DEFAULT_SPINS); }
+
+    /**
+     * Creates an instance of {@code SequenceLock} that will retry
+     * attempts to acquire the lock at least the given number times
+     * before blocking.
+     */
+    public SequenceLock(int spins) { sync = new Sync(spins); }
+
+    /**
+     * Returns the current sequence number of this lock.  The sequence
+     * number is advanced upon each acquire or release action. When
+     * this value is odd, the lock is held; when even, it is released.
+     *
+     * @return the current sequence number
+     */
+    public long getSequence() { return sync.getSequence(); }
+
+    /**
+     * Returns the current sequence number when the lock is, or
+     * becomes, available. A lock is available if it is either
+     * released, or is held by the current thread.  If the lock is not
+     * available, the current thread becomes disabled for thread
+     * scheduling purposes and lies dormant until the lock has been
+     * released by some other thread.
+     *
+     * @return the current sequence number
+     */
+    public long awaitAvailability() { return sync.awaitAvailability(); }
+
+    /**
+     * Returns the current sequence number if the lock is, or
+     * becomes, available within the specified waiting time.
+     *
+     * <p>If the lock is not available, the current thread becomes
+     * disabled for thread scheduling purposes and lies dormant until
+     * one of three things happens:
+     *
+     * <ul>
+     *
+     * <li>The lock becomes available, in which case the current
+     * sequence number is returned.
+     *
+     * <li>Some other thread {@linkplain Thread#interrupt interrupts}
+     * the current thread, in which case this method throws
+     * {@link InterruptedException}.
+     *
+     * <li>The specified waiting time elapses, in which case
+     * this method throws {@link TimeoutException}.
+     *
+     * </ul>
+     *
+     * @param timeout the time to wait for availability
+     * @param unit the time unit of the timeout argument
+     * @return the current sequence number if the lock is available
+     *         upon return from this method
+     * @throws InterruptedException if the current thread is interrupted
+     * @throws TimeoutException if the lock was not available within
+     * the specified waiting time
+     * @throws NullPointerException if the time unit is null
+     */
+    public long tryAwaitAvailability(long timeout, TimeUnit unit)
+        throws InterruptedException, TimeoutException {
+        return sync.tryAwaitAvailability(unit.toNanos(timeout));
+    }
+
+    /**
+     * Acquires the lock.
+     *
+     * <p>If the current thread already holds this lock then the hold count
+     * is incremented by one and the method returns immediately without
+     * incrementing the sequence number.
+     *
+     * <p>If this lock not held by another thread, this method
+     * increments the sequence number (which thus becomes an odd
+     * number), sets the lock hold count to one, and returns
+     * immediately.
+     *
+     * <p>If the lock is held by another thread then the current
+     * thread may retry acquiring this lock, depending on the {@code
+     * spin} count established in constructor.  If the lock is still
+     * not acquired, the current thread becomes disabled for thread
+     * scheduling purposes and lies dormant until enabled by
+     * some other thread releasing the lock.
+     */
+    public void lock() { sync.lock(); }
+
+    /**
+     * Acquires the lock unless the current thread is
+     * {@linkplain Thread#interrupt interrupted}.
+     *
+     * <p>If the current thread already holds this lock then the hold count
+     * is incremented by one and the method returns immediately without
+     * incrementing the sequence number.
+     *
+     * <p>If this lock not held by another thread, this method
+     * increments the sequence number (which thus becomes an odd
+     * number), sets the lock hold count to one, and returns
+     * immediately.
+     *
+     * <p>If the lock is held by another thread then the current
+     * thread may retry acquiring this lock, depending on the {@code
+     * spin} count established in constructor.  If the lock is still
+     * not acquired, the current thread becomes disabled for thread
+     * scheduling purposes and lies dormant until one of two things
+     * happens:
+     *
+     * <ul>
+     *
+     * <li>The lock is acquired by the current thread; or
+     *
+     * <li>Some other thread {@linkplain Thread#interrupt interrupts} the
+     * current thread.
+     *
+     * </ul>
+     *
+     * <p>If the lock is acquired by the current thread then the lock hold
+     * count is set to one and the sequence number is incremented.
+     *
+     * <p>If the current thread:
+     *
+     * <ul>
+     *
+     * <li>has its interrupted status set on entry to this method; or
+     *
+     * <li>is {@linkplain Thread#interrupt interrupted} while acquiring
+     * the lock,
+     *
+     * </ul>
+     *
+     * then {@link InterruptedException} is thrown and the current thread's
+     * interrupted status is cleared.
+     *
+     * <p>In this implementation, as this method is an explicit
+     * interruption point, preference is given to responding to the
+     * interrupt over normal or reentrant acquisition of the lock.
+     *
+     * @throws InterruptedException if the current thread is interrupted
+     */
+    public void lockInterruptibly() throws InterruptedException {
+        sync.acquireInterruptibly(1L);
+    }
+
+    /**
+     * Acquires the lock only if it is not held by another thread at the time
+     * of invocation.
+     *
+     * <p>If the current thread already holds this lock then the hold
+     * count is incremented by one and the method returns {@code true}
+     * without incrementing the sequence number.
+     *
+     * <p>If this lock not held by another thread, this method
+     * increments the sequence number (which thus becomes an odd
+     * number), sets the lock hold count to one, and returns {@code
+     * true}.
+     *
+     * <p>If the lock is held by another thread then this method
+     * returns {@code false}.
+     *
+     * @return {@code true} if the lock was free and was acquired by the
+     *         current thread, or the lock was already held by the current
+     *         thread; and {@code false} otherwise
+     */
+    public boolean tryLock() { return sync.tryAcquire(1L); }
+
+    /**
+     * Acquires the lock if it is not held by another thread within the given
+     * waiting time and the current thread has not been
+     * {@linkplain Thread#interrupt interrupted}.
+     *
+     * <p>If the current thread already holds this lock then the hold count
+     * is incremented by one and the method returns immediately without
+     * incrementing the sequence number.
+     *
+     * <p>If this lock not held by another thread, this method
+     * increments the sequence number (which thus becomes an odd
+     * number), sets the lock hold count to one, and returns
+     * immediately.
+     *
+     * <p>If the lock is held by another thread then the current
+     * thread may retry acquiring this lock, depending on the {@code
+     * spin} count established in constructor.  If the lock is still
+     * not acquired, the current thread becomes disabled for thread
+     * scheduling purposes and lies dormant until one of three things
+     * happens:
+     *
+     * <ul>
+     *
+     * <li>The lock is acquired by the current thread; or
+     *
+     * <li>Some other thread {@linkplain Thread#interrupt interrupts}
+     * the current thread; or
+     *
+     * <li>The specified waiting time elapses
+     *
+     * </ul>
+     *
+     * <p>If the lock is acquired then the value {@code true} is returned and
+     * the lock hold count is set to one.
+     *
+     * <p>If the current thread:
+     *
+     * <ul>
+     *
+     * <li>has its interrupted status set on entry to this method; or
+     *
+     * <li>is {@linkplain Thread#interrupt interrupted} while
+     * acquiring the lock,
+     *
+     * </ul>
+     * then {@link InterruptedException} is thrown and the current thread's
+     * interrupted status is cleared.
+     *
+     * <p>If the specified waiting time elapses then the value {@code false}
+     * is returned.  If the time is less than or equal to zero, the method
+     * will not wait at all.
+     *
+     * <p>In this implementation, as this method is an explicit
+     * interruption point, preference is given to responding to the
+     * interrupt over normal or reentrant acquisition of the lock, and
+     * over reporting the elapse of the waiting time.
+     *
+     * @param timeout the time to wait for the lock
+     * @param unit the time unit of the timeout argument
+     * @return {@code true} if the lock was free and was acquired by the
+     *         current thread, or the lock was already held by the current
+     *         thread; and {@code false} if the waiting time elapsed before
+     *         the lock could be acquired
+     * @throws InterruptedException if the current thread is interrupted
+     * @throws NullPointerException if the time unit is null
+     *
+     */
+    public boolean tryLock(long timeout, TimeUnit unit)
+        throws InterruptedException {
+        return sync.tryAcquireNanos(1L, unit.toNanos(timeout));
+    }
+
+    /**
+     * Attempts to release this lock.
+     *
+     * <p>If the current thread is the holder of this lock then the
+     * hold count is decremented.  If the hold count is now zero then
+     * the sequence number is incremented (thus becoming an even
+     * number) and the lock is released.  If the current thread is not
+     * the holder of this lock then {@link
+     * IllegalMonitorStateException} is thrown.
+     *
+     * @throws IllegalMonitorStateException if the current thread does not
+     *         hold this lock
+     */
+    public void unlock() { sync.release(1); }
+
+    /**
+     * Throws UnsupportedOperationException. SequenceLocks
+     * do not support Condition objects.
+     *
+     * @throws UnsupportedOperationException
+     */
+    public Condition newCondition() {
+        throw new UnsupportedOperationException();
+    }
+
+    /**
+     * Queries the number of holds on this lock by the current thread.
+     *
+     * <p>A thread has a hold on a lock for each lock action that is not
+     * matched by an unlock action.
+     *
+     * <p>The hold count information is typically only used for testing and
+     * debugging purposes.
+     *
+     * @return the number of holds on this lock by the current thread,
+     *         or zero if this lock is not held by the current thread
+     */
+    public long getHoldCount() { return sync.getHoldCount(); }
+
+    /**
+     * Queries if this lock is held by the current thread.
+     *
+     * @return {@code true} if current thread holds this lock and
+     *         {@code false} otherwise
+     */
+    public boolean isHeldByCurrentThread() { return sync.isHeldExclusively(); }
+
+    /**
+     * Queries if this lock is held by any thread. This method is
+     * designed for use in monitoring of the system state,
+     * not for synchronization control.
+     *
+     * @return {@code true} if any thread holds this lock and
+     *         {@code false} otherwise
+     */
+    public boolean isLocked() { return sync.isLocked(); }
+
+    /**
+     * Returns the thread that currently owns this lock, or
+     * {@code null} if not owned. When this method is called by a
+     * thread that is not the owner, the return value reflects a
+     * best-effort approximation of current lock status. For example,
+     * the owner may be momentarily {@code null} even if there are
+     * threads trying to acquire the lock but have not yet done so.
+     * This method is designed to facilitate construction of
+     * subclasses that provide more extensive lock monitoring
+     * facilities.
+     *
+     * @return the owner, or {@code null} if not owned
+     */
+    protected Thread getOwner() { return sync.getOwner(); }
+
+    /**
+     * Queries whether any threads are waiting to acquire this lock. Note that
+     * because cancellations may occur at any time, a {@code true}
+     * return does not guarantee that any other thread will ever
+     * acquire this lock.  This method is designed primarily for use in
+     * monitoring of the system state.
+     *
+     * @return {@code true} if there may be other threads waiting to
+     *         acquire the lock
+     */
+    public final boolean hasQueuedThreads() {
+        return sync.hasQueuedThreads();
+    }
+
+    /**
+     * Queries whether the given thread is waiting to acquire this
+     * lock. Note that because cancellations may occur at any time, a
+     * {@code true} return does not guarantee that this thread
+     * will ever acquire this lock.  This method is designed primarily for use
+     * in monitoring of the system state.
+     *
+     * @param thread the thread
+     * @return {@code true} if the given thread is queued waiting for this lock
+     * @throws NullPointerException if the thread is null
+     */
+    public final boolean hasQueuedThread(Thread thread) {
+        return sync.isQueued(thread);
+    }
+
+    /**
+     * Returns an estimate of the number of threads waiting to
+     * acquire this lock.  The value is only an estimate because the number of
+     * threads may change dynamically while this method traverses
+     * internal data structures.  This method is designed for use in
+     * monitoring of the system state, not for synchronization
+     * control.
+     *
+     * @return the estimated number of threads waiting for this lock
+     */
+    public final int getQueueLength() {
+        return sync.getQueueLength();
+    }
+
+    /**
+     * Returns a collection containing threads that may be waiting to
+     * acquire this lock.  Because the actual set of threads may change
+     * dynamically while constructing this result, the returned
+     * collection is only a best-effort estimate.  The elements of the
+     * returned collection are in no particular order.  This method is
+     * designed to facilitate construction of subclasses that provide
+     * more extensive monitoring facilities.
+     *
+     * @return the collection of threads
+     */
+    protected Collection<Thread> getQueuedThreads() {
+        return sync.getQueuedThreads();
+    }
+
+    /**
+     * Returns a string identifying this lock, as well as its lock state.
+     * The state, in brackets, includes either the String {@code "Unlocked"}
+     * or the String {@code "Locked by"} followed by the
+     * {@linkplain Thread#getName name} of the owning thread.
+     *
+     * @return a string identifying this lock, as well as its lock state
+     */
+    public String toString() {
+        Thread o = sync.getOwner();
+        return super.toString() + ((o == null) ?
+                                   "[Unlocked]" :
+                                   "[Locked by thread " + o.getName() + "]");
+    }
+
+}
+
diff --git a/src/main/java/jsr166e/Striped64.java b/src/main/java/jsr166e/Striped64.java
new file mode 100644
index 00000000000..a3b8cc322ce
--- /dev/null
+++ b/src/main/java/jsr166e/Striped64.java
@@ -0,0 +1,340 @@
+/*
+ * Written by Doug Lea with assistance from members of JCP JSR-166
+ * Expert Group and released to the public domain, as explained at
+ * http://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+package jsr166e;
+import java.util.Random;
+
+/**
+ * A package-local class holding common representation and mechanics
+ * for classes supporting dynamic striping on 64bit values. The class
+ * extends Number so that concrete subclasses must publicly do so.
+ */
+abstract class Striped64 extends Number {
+    /*
+     * This class maintains a lazily-initialized table of atomically
+     * updated variables, plus an extra "base" field. The table size
+     * is a power of two. Indexing uses masked per-thread hash codes.
+     * Nearly all declarations in this class are package-private,
+     * accessed directly by subclasses.
+     *
+     * Table entries are of class Cell; a variant of AtomicLong padded
+     * to reduce cache contention on most processors. Padding is
+     * overkill for most Atomics because they are usually irregularly
+     * scattered in memory and thus don't interfere much with each
+     * other. But Atomic objects residing in arrays will tend to be
+     * placed adjacent to each other, and so will most often share
+     * cache lines (with a huge negative performance impact) without
+     * this precaution.
+     *
+     * In part because Cells are relatively large, we avoid creating
+     * them until they are needed.  When there is no contention, all
+     * updates are made to the base field.  Upon first contention (a
+     * failed CAS on base update), the table is initialized to size 2.
+     * The table size is doubled upon further contention until
+     * reaching the nearest power of two greater than or equal to the
+     * number of CPUS. Table slots remain empty (null) until they are
+     * needed.
+     *
+     * A single spinlock ("busy") is used for initializing and
+     * resizing the table, as well as populating slots with new Cells.
+     * There is no need for a blocking lock: When the lock is not
+     * available, threads try other slots (or the base).  During these
+     * retries, there is increased contention and reduced locality,
+     * which is still better than alternatives.
+     *
+     * Per-thread hash codes are initialized to random values.
+     * Contention and/or table collisions are indicated by failed
+     * CASes when performing an update operation (see method
+     * retryUpdate). Upon a collision, if the table size is less than
+     * the capacity, it is doubled in size unless some other thread
+     * holds the lock. If a hashed slot is empty, and lock is
+     * available, a new Cell is created. Otherwise, if the slot
+     * exists, a CAS is tried.  Retries proceed by "double hashing",
+     * using a secondary hash (Marsaglia XorShift) to try to find a
+     * free slot.
+     *
+     * The table size is capped because, when there are more threads
+     * than CPUs, supposing that each thread were bound to a CPU,
+     * there would exist a perfect hash function mapping threads to
+     * slots that eliminates collisions. When we reach capacity, we
+     * search for this mapping by randomly varying the hash codes of
+     * colliding threads.  Because search is random, and collisions
+     * only become known via CAS failures, convergence can be slow,
+     * and because threads are typically not bound to CPUS forever,
+     * may not occur at all. However, despite these limitations,
+     * observed contention rates are typically low in these cases.
+     *
+     * It is possible for a Cell to become unused when threads that
+     * once hashed to it terminate, as well as in the case where
+     * doubling the table causes no thread to hash to it under
+     * expanded mask.  We do not try to detect or remove such cells,
+     * under the assumption that for long-running instances, observed
+     * contention levels will recur, so the cells will eventually be
+     * needed again; and for short-lived ones, it does not matter.
+     */
+
+    /**
+     * Padded variant of AtomicLong supporting only raw accesses plus CAS.
+     * The value field is placed between pads, hoping that the JVM doesn't
+     * reorder them.
+     *
+     * JVM intrinsics note: It would be possible to use a release-only
+     * form of CAS here, if it were provided.
+     */
+    static final class Cell {
+        volatile long p0, p1, p2, p3, p4, p5, p6;
+        volatile long value;
+        volatile long q0, q1, q2, q3, q4, q5, q6;
+        Cell(long x) { value = x; }
+
+        final boolean cas(long cmp, long val) {
+            return UNSAFE.compareAndSwapLong(this, valueOffset, cmp, val);
+        }
+
+        // Unsafe mechanics
+        private static final sun.misc.Unsafe UNSAFE;
+        private static final long valueOffset;
+        static {
+            try {
+                UNSAFE = getUnsafe();
+                Class<?> ak = Cell.class;
+                valueOffset = UNSAFE.objectFieldOffset
+                    (ak.getDeclaredField("value"));
+            } catch (Exception e) {
+                throw new Error(e);
+            }
+        }
+
+    }
+
+    /**
+     * Holder for the thread-local hash code. The code is initially
+     * random, but may be set to a different value upon collisions.
+     */
+    static final class HashCode {
+        static final Random rng = new Random();
+        int code;
+        HashCode() {
+            int h = rng.nextInt(); // Avoid zero to allow xorShift rehash
+            code = (h == 0) ? 1 : h;
+        }
+    }
+
+    /**
+     * The corresponding ThreadLocal class
+     */
+    static final class ThreadHashCode extends ThreadLocal<HashCode> {
+        public HashCode initialValue() { return new HashCode(); }
+    }
+
+    /**
+     * Static per-thread hash codes. Shared across all instances to
+     * reduce ThreadLocal pollution and because adjustments due to
+     * collisions in one table are likely to be appropriate for
+     * others.
+     */
+    static final ThreadHashCode threadHashCode = new ThreadHashCode();
+
+    /** Number of CPUS, to place bound on table size */
+    static final int NCPU = Runtime.getRuntime().availableProcessors();
+
+    /**
+     * Table of cells. When non-null, size is a power of 2.
+     */
+    transient volatile Cell[] cells;
+
+    /**
+     * Base value, used mainly when there is no contention, but also as
+     * a fallback during table initialization races. Updated via CAS.
+     */
+    transient volatile long base;
+
+    /**
+     * Spinlock (locked via CAS) used when resizing and/or creating Cells.
+     */
+    transient volatile int busy;
+
+    /**
+     * Package-private default constructor
+     */
+    Striped64() {
+    }
+
+    /**
+     * CASes the base field.
+     */
+    final boolean casBase(long cmp, long val) {
+        return UNSAFE.compareAndSwapLong(this, baseOffset, cmp, val);
+    }
+
+    /**
+     * CASes the busy field from 0 to 1 to acquire lock.
+     */
+    final boolean casBusy() {
+        return UNSAFE.compareAndSwapInt(this, busyOffset, 0, 1);
+    }
+
+    /**
+     * Computes the function of current and new value. Subclasses
+     * should open-code this update function for most uses, but the
+     * virtualized form is needed within retryUpdate.
+     *
+     * @param currentValue the current value (of either base or a cell)
+     * @param newValue the argument from a user update call
+     * @return result of the update function
+     */
+    abstract long fn(long currentValue, long newValue);
+
+    /**
+     * Handles cases of updates involving initialization, resizing,
+     * creating new Cells, and/or contention. See above for
+     * explanation. This method suffers the usual non-modularity
+     * problems of optimistic retry code, relying on rechecked sets of
+     * reads.
+     *
+     * @param x the value
+     * @param hc the hash code holder
+     * @param wasUncontended false if CAS failed before call
+     */
+    final void retryUpdate(long x, HashCode hc, boolean wasUncontended) {
+        int h = hc.code;
+        boolean collide = false;                // True if last slot nonempty
+        for (;;) {
+            Cell[] as; Cell a; int n; long v;
+            if ((as = cells) != null && (n = as.length) > 0) {
+                if ((a = as[(n - 1) & h]) == null) {
+                    if (busy == 0) {            // Try to attach new Cell
+                        Cell r = new Cell(x);   // Optimistically create
+                        if (busy == 0 && casBusy()) {
+                            boolean created = false;
+                            try {               // Recheck under lock
+                                Cell[] rs; int m, j;
+                                if ((rs = cells) != null &&
+                                    (m = rs.length) > 0 &&
+                                    rs[j = (m - 1) & h] == null) {
+                                    rs[j] = r;
+                                    created = true;
+                                }
+                            } finally {
+                                busy = 0;
+                            }
+                            if (created)
+                                break;
+                            continue;           // Slot is now non-empty
+                        }
+                    }
+                    collide = false;
+                }
+                else if (!wasUncontended)       // CAS already known to fail
+                    wasUncontended = true;      // Continue after rehash
+                else if (a.cas(v = a.value, fn(v, x)))
+                    break;
+                else if (n >= NCPU || cells != as)
+                    collide = false;            // At max size or stale
+                else if (!collide)
+                    collide = true;
+                else if (busy == 0 && casBusy()) {
+                    try {
+                        if (cells == as) {      // Expand table unless stale
+                            Cell[] rs = new Cell[n << 1];
+                            for (int i = 0; i < n; ++i)
+                                rs[i] = as[i];
+                            cells = rs;
+                        }
+                    } finally {
+                        busy = 0;
+                    }
+                    collide = false;
+                    continue;                   // Retry with expanded table
+                }
+                h ^= h << 13;                   // Rehash
+                h ^= h >>> 17;
+                h ^= h << 5;
+            }
+            else if (busy == 0 && cells == as && casBusy()) {
+                boolean init = false;
+                try {                           // Initialize table
+                    if (cells == as) {
+                        Cell[] rs = new Cell[2];
+                        rs[h & 1] = new Cell(x);
+                        cells = rs;
+                        init = true;
+                    }
+                } finally {
+                    busy = 0;
+                }
+                if (init)
+                    break;
+            }
+            else if (casBase(v = base, fn(v, x)))
+                break;                          // Fall back on using base
+        }
+        hc.code = h;                            // Record index for next time
+    }
+
+
+    /**
+     * Sets base and all cells to the given value.
+     */
+    final void internalReset(long initialValue) {
+        Cell[] as = cells;
+        base = initialValue;
+        if (as != null) {
+            int n = as.length;
+            for (int i = 0; i < n; ++i) {
+                Cell a = as[i];
+                if (a != null)
+                    a.value = initialValue;
+            }
+        }
+    }
+
+    // Unsafe mechanics
+    private static final sun.misc.Unsafe UNSAFE;
+    private static final long baseOffset;
+    private static final long busyOffset;
+    static {
+        try {
+            UNSAFE = getUnsafe();
+            Class<?> sk = Striped64.class;
+            baseOffset = UNSAFE.objectFieldOffset
+                (sk.getDeclaredField("base"));
+            busyOffset = UNSAFE.objectFieldOffset
+                (sk.getDeclaredField("busy"));
+        } catch (Exception e) {
+            throw new Error(e);
+        }
+    }
+
+    /**
+     * Returns a sun.misc.Unsafe.  Suitable for use in a 3rd party package.
+     * Replace with a simple call to Unsafe.getUnsafe when integrating
+     * into a jdk.
+     *
+     * @return a sun.misc.Unsafe
+     */
+    private static sun.misc.Unsafe getUnsafe() {
+        try {
+            return sun.misc.Unsafe.getUnsafe();
+        } catch (SecurityException se) {
+            try {
+                return java.security.AccessController.doPrivileged
+                    (new java.security
+                     .PrivilegedExceptionAction<sun.misc.Unsafe>() {
+                        public sun.misc.Unsafe run() throws Exception {
+                            java.lang.reflect.Field f = sun.misc
+                                .Unsafe.class.getDeclaredField("theUnsafe");
+                            f.setAccessible(true);
+                            return (sun.misc.Unsafe) f.get(null);
+                        }});
+            } catch (java.security.PrivilegedActionException e) {
+                throw new RuntimeException("Could not initialize intrinsics",
+                                           e.getCause());
+            }
+        }
+    }
+
+}
diff --git a/src/main/java/jsr166e/extra/AtomicDouble.java b/src/main/java/jsr166e/extra/AtomicDouble.java
new file mode 100644
index 00000000000..29588cfe558
--- /dev/null
+++ b/src/main/java/jsr166e/extra/AtomicDouble.java
@@ -0,0 +1,273 @@
+/*
+ * Written by Doug Lea and Martin Buchholz with assistance from
+ * members of JCP JSR-166 Expert Group and released to the public
+ * domain, as explained at
+ * http://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+package jsr166e.extra;
+
+import static java.lang.Double.doubleToRawLongBits;
+import static java.lang.Double.longBitsToDouble;
+
+/**
+ * A {@code double} value that may be updated atomically.  See the
+ * {@link java.util.concurrent.atomic} package specification for
+ * description of the properties of atomic variables.  An {@code
+ * AtomicDouble} is used in applications such as atomic accumulation,
+ * and cannot be used as a replacement for a {@link Double}.  However,
+ * this class does extend {@code Number} to allow uniform access by
+ * tools and utilities that deal with numerically-based classes.
+ *
+ * <p><a name="bitEquals">This class compares primitive {@code double}
+ * values in methods such as {@link #compareAndSet} by comparing their
+ * bitwise representation using {@link Double#doubleToRawLongBits},
+ * which differs from both the primitive double {@code ==} operator
+ * and from {@link Double#equals}, as if implemented by:
+ *  <pre> {@code
+ * static boolean bitEquals(double x, double y) {
+ *   long xBits = Double.doubleToRawLongBits(x);
+ *   long yBits = Double.doubleToRawLongBits(y);
+ *   return xBits == yBits;
+ * }}</pre>
+ *
+ * @see jsr166e.DoubleAdder
+ * @see jsr166e.DoubleMaxUpdater
+ *
+ * @author Doug Lea
+ * @author Martin Buchholz
+ */
+public class AtomicDouble extends Number implements java.io.Serializable {
+    private static final long serialVersionUID = -8405198993435143622L;
+
+    private transient volatile long value;
+
+    /**
+     * Creates a new {@code AtomicDouble} with the given initial value.
+     *
+     * @param initialValue the initial value
+     */
+    public AtomicDouble(double initialValue) {
+        value = doubleToRawLongBits(initialValue);
+    }
+
+    /**
+     * Creates a new {@code AtomicDouble} with initial value {@code 0.0}.
+     */
+    public AtomicDouble() {
+        // assert doubleToRawLongBits(0.0) == 0L;
+    }
+
+    /**
+     * Gets the current value.
+     *
+     * @return the current value
+     */
+    public final double get() {
+        return longBitsToDouble(value);
+    }
+
+    /**
+     * Sets to the given value.
+     *
+     * @param newValue the new value
+     */
+    public final void set(double newValue) {
+        long next = doubleToRawLongBits(newValue);
+        value = next;
+    }
+
+    /**
+     * Eventually sets to the given value.
+     *
+     * @param newValue the new value
+     */
+    public final void lazySet(double newValue) {
+        long next = doubleToRawLongBits(newValue);
+        unsafe.putOrderedLong(this, valueOffset, next);
+    }
+
+    /**
+     * Atomically sets to the given value and returns the old value.
+     *
+     * @param newValue the new value
+     * @return the previous value
+     */
+    public final double getAndSet(double newValue) {
+        long next = doubleToRawLongBits(newValue);
+        while (true) {
+            long current = value;
+            if (unsafe.compareAndSwapLong(this, valueOffset, current, next))
+                return longBitsToDouble(current);
+        }
+    }
+
+    /**
+     * Atomically sets the value to the given updated value
+     * if the current value is <a href="#bitEquals">bitwise equal</a>
+     * to the expected value.
+     *
+     * @param expect the expected value
+     * @param update the new value
+     * @return {@code true} if successful. False return indicates that
+     * the actual value was not bitwise equal to the expected value.
+     */
+    public final boolean compareAndSet(double expect, double update) {
+        return unsafe.compareAndSwapLong(this, valueOffset,
+                                         doubleToRawLongBits(expect),
+                                         doubleToRawLongBits(update));
+    }
+
+    /**
+     * Atomically sets the value to the given updated value
+     * if the current value is <a href="#bitEquals">bitwise equal</a>
+     * to the expected value.
+     *
+     * <p>May <a
+     * href="http://download.oracle.com/javase/7/docs/api/java/util/concurrent/atomic/package-summary.html#Spurious">
+     * fail spuriously</a>
+     * and does not provide ordering guarantees, so is only rarely an
+     * appropriate alternative to {@code compareAndSet}.
+     *
+     * @param expect the expected value
+     * @param update the new value
+     * @return {@code true} if successful
+     */
+    public final boolean weakCompareAndSet(double expect, double update) {
+        return compareAndSet(expect, update);
+    }
+
+    /**
+     * Atomically adds the given value to the current value.
+     *
+     * @param delta the value to add
+     * @return the previous value
+     */
+    public final double getAndAdd(double delta) {
+        while (true) {
+            long current = value;
+            double currentVal = longBitsToDouble(current);
+            double nextVal = currentVal + delta;
+            long next = doubleToRawLongBits(nextVal);
+            if (unsafe.compareAndSwapLong(this, valueOffset, current, next))
+                return currentVal;
+        }
+    }
+
+    /**
+     * Atomically adds the given value to the current value.
+     *
+     * @param delta the value to add
+     * @return the updated value
+     */
+    public final double addAndGet(double delta) {
+        while (true) {
+            long current = value;
+            double currentVal = longBitsToDouble(current);
+            double nextVal = currentVal + delta;
+            long next = doubleToRawLongBits(nextVal);
+            if (unsafe.compareAndSwapLong(this, valueOffset, current, next))
+                return nextVal;
+        }
+    }
+
+    /**
+     * Returns the String representation of the current value.
+     * @return the String representation of the current value
+     */
+    public String toString() {
+        return Double.toString(get());
+    }
+
+    /**
+     * Returns the value of this {@code AtomicDouble} as an {@code int}
+     * after a narrowing primitive conversion.
+     */
+    public int intValue() {
+        return (int) get();
+    }
+
+    /**
+     * Returns the value of this {@code AtomicDouble} as a {@code long}
+     * after a narrowing primitive conversion.
+     */
+    public long longValue() {
+        return (long) get();
+    }
+
+    /**
+     * Returns the value of this {@code AtomicDouble} as a {@code float}
+     * after a narrowing primitive conversion.
+     */
+    public float floatValue() {
+        return (float) get();
+    }
+
+    /**
+     * Returns the value of this {@code AtomicDouble} as a {@code double}.
+     */
+    public double doubleValue() {
+        return get();
+    }
+
+    /**
+     * Saves the state to a stream (that is, serializes it).
+     *
+     * @serialData The current value is emitted (a {@code double}).
+     */
+    private void writeObject(java.io.ObjectOutputStream s)
+        throws java.io.IOException {
+        s.defaultWriteObject();
+
+        s.writeDouble(get());
+    }
+
+    /**
+     * Reconstitutes the instance from a stream (that is, deserializes it).
+     */
+    private void readObject(java.io.ObjectInputStream s)
+        throws java.io.IOException, ClassNotFoundException {
+        s.defaultReadObject();
+
+        set(s.readDouble());
+    }
+
+    // Unsafe mechanics
+    private static final sun.misc.Unsafe unsafe = getUnsafe();
+    private static final long valueOffset;
+
+    static {
+        try {
+            valueOffset = unsafe.objectFieldOffset
+                (AtomicDouble.class.getDeclaredField("value"));
+        } catch (Exception ex) { throw new Error(ex); }
+    }
+
+    /**
+     * Returns a sun.misc.Unsafe.  Suitable for use in a 3rd party package.
+     * Replace with a simple call to Unsafe.getUnsafe when integrating
+     * into a jdk.
+     *
+     * @return a sun.misc.Unsafe
+     */
+    private static sun.misc.Unsafe getUnsafe() {
+        try {
+            return sun.misc.Unsafe.getUnsafe();
+        } catch (SecurityException se) {
+            try {
+                return java.security.AccessController.doPrivileged
+                    (new java.security
+                     .PrivilegedExceptionAction<sun.misc.Unsafe>() {
+                        public sun.misc.Unsafe run() throws Exception {
+                            java.lang.reflect.Field f = sun.misc
+                                .Unsafe.class.getDeclaredField("theUnsafe");
+                            f.setAccessible(true);
+                            return (sun.misc.Unsafe) f.get(null);
+                        }});
+            } catch (java.security.PrivilegedActionException e) {
+                throw new RuntimeException("Could not initialize intrinsics",
+                                           e.getCause());
+            }
+        }
+    }
+}
diff --git a/src/main/java/jsr166e/extra/AtomicDoubleArray.java b/src/main/java/jsr166e/extra/AtomicDoubleArray.java
new file mode 100644
index 00000000000..1f3c8ac941a
--- /dev/null
+++ b/src/main/java/jsr166e/extra/AtomicDoubleArray.java
@@ -0,0 +1,320 @@
+/*
+ * Written by Doug Lea with assistance from members of JCP JSR-166
+ * Expert Group and released to the public domain, as explained at
+ * http://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+package jsr166e.extra;
+
+import static java.lang.Double.doubleToRawLongBits;
+import static java.lang.Double.longBitsToDouble;
+
+/**
+ * A {@code double} array in which elements may be updated atomically.
+ * See the {@link java.util.concurrent.atomic} package specification
+ * for description of the properties of atomic variables.
+ *
+ * <p><a name="bitEquals">This class compares primitive {@code double}
+ * values in methods such as {@link #compareAndSet} by comparing their
+ * bitwise representation using {@link Double#doubleToRawLongBits},
+ * which differs from both the primitive double {@code ==} operator
+ * and from {@link Double#equals}, as if implemented by:
+ *  <pre> {@code
+ * static boolean bitEquals(double x, double y) {
+ *   long xBits = Double.doubleToRawLongBits(x);
+ *   long yBits = Double.doubleToRawLongBits(y);
+ *   return xBits == yBits;
+ * }}</pre>
+ *
+ * @author Doug Lea
+ * @author Martin Buchholz
+ */
+public class AtomicDoubleArray implements java.io.Serializable {
+    private static final long serialVersionUID = -2308431214976778248L;
+
+    private final transient long[] array;
+
+    private long checkedByteOffset(int i) {
+        if (i < 0 || i >= array.length)
+            throw new IndexOutOfBoundsException("index " + i);
+
+        return byteOffset(i);
+    }
+
+    private static long byteOffset(int i) {
+        return ((long) i << shift) + base;
+    }
+
+    /**
+     * Creates a new {@code AtomicDoubleArray} of the given length,
+     * with all elements initially zero.
+     *
+     * @param length the length of the array
+     */
+    public AtomicDoubleArray(int length) {
+        array = new long[length];
+    }
+
+    /**
+     * Creates a new {@code AtomicDoubleArray} with the same length
+     * as, and all elements copied from, the given array.
+     *
+     * @param array the array to copy elements from
+     * @throws NullPointerException if array is null
+     */
+    public AtomicDoubleArray(double[] array) {
+        // Visibility guaranteed by final field guarantees
+        final int len = array.length;
+        final long[] a = new long[len];
+        for (int i = 0; i < len; i++)
+            a[i] = doubleToRawLongBits(array[i]);
+        this.array = a;
+    }
+
+    /**
+     * Returns the length of the array.
+     *
+     * @return the length of the array
+     */
+    public final int length() {
+        return array.length;
+    }
+
+    /**
+     * Gets the current value at position {@code i}.
+     *
+     * @param i the index
+     * @return the current value
+     */
+    public final double get(int i) {
+        return longBitsToDouble(getRaw(checkedByteOffset(i)));
+    }
+
+    private long getRaw(long offset) {
+        return unsafe.getLongVolatile(array, offset);
+    }
+
+    /**
+     * Sets the element at position {@code i} to the given value.
+     *
+     * @param i the index
+     * @param newValue the new value
+     */
+    public final void set(int i, double newValue) {
+        long next = doubleToRawLongBits(newValue);
+        unsafe.putLongVolatile(array, checkedByteOffset(i), next);
+    }
+
+    /**
+     * Eventually sets the element at position {@code i} to the given value.
+     *
+     * @param i the index
+     * @param newValue the new value
+     */
+    public final void lazySet(int i, double newValue) {
+        long next = doubleToRawLongBits(newValue);
+        unsafe.putOrderedLong(array, checkedByteOffset(i), next);
+    }
+
+    /**
+     * Atomically sets the element at position {@code i} to the given value
+     * and returns the old value.
+     *
+     * @param i the index
+     * @param newValue the new value
+     * @return the previous value
+     */
+    public final double getAndSet(int i, double newValue) {
+        long next = doubleToRawLongBits(newValue);
+        long offset = checkedByteOffset(i);
+        while (true) {
+            long current = getRaw(offset);
+            if (compareAndSetRaw(offset, current, next))
+                return longBitsToDouble(current);
+        }
+    }
+
+    /**
+     * Atomically sets the element at position {@code i} to the given
+     * updated value
+     * if the current value is <a href="#bitEquals">bitwise equal</a>
+     * to the expected value.
+     *
+     * @param i the index
+     * @param expect the expected value
+     * @param update the new value
+     * @return true if successful. False return indicates that
+     * the actual value was not equal to the expected value.
+     */
+    public final boolean compareAndSet(int i, double expect, double update) {
+        return compareAndSetRaw(checkedByteOffset(i),
+                                doubleToRawLongBits(expect),
+                                doubleToRawLongBits(update));
+    }
+
+    private boolean compareAndSetRaw(long offset, long expect, long update) {
+        return unsafe.compareAndSwapLong(array, offset, expect, update);
+    }
+
+    /**
+     * Atomically sets the element at position {@code i} to the given
+     * updated value
+     * if the current value is <a href="#bitEquals">bitwise equal</a>
+     * to the expected value.
+     *
+     * <p>May <a
+     * href="http://download.oracle.com/javase/7/docs/api/java/util/concurrent/atomic/package-summary.html#Spurious">
+     * fail spuriously</a>
+     * and does not provide ordering guarantees, so is only rarely an
+     * appropriate alternative to {@code compareAndSet}.
+     *
+     * @param i the index
+     * @param expect the expected value
+     * @param update the new value
+     * @return true if successful
+     */
+    public final boolean weakCompareAndSet(int i, double expect, double update) {
+        return compareAndSet(i, expect, update);
+    }
+
+    /**
+     * Atomically adds the given value to the element at index {@code i}.
+     *
+     * @param i the index
+     * @param delta the value to add
+     * @return the previous value
+     */
+    public final double getAndAdd(int i, double delta) {
+        long offset = checkedByteOffset(i);
+        while (true) {
+            long current = getRaw(offset);
+            double currentVal = longBitsToDouble(current);
+            double nextVal = currentVal + delta;
+            long next = doubleToRawLongBits(nextVal);
+            if (compareAndSetRaw(offset, current, next))
+                return currentVal;
+        }
+    }
+
+    /**
+     * Atomically adds the given value to the element at index {@code i}.
+     *
+     * @param i the index
+     * @param delta the value to add
+     * @return the updated value
+     */
+    public double addAndGet(int i, double delta) {
+        long offset = checkedByteOffset(i);
+        while (true) {
+            long current = getRaw(offset);
+            double currentVal = longBitsToDouble(current);
+            double nextVal = currentVal + delta;
+            long next = doubleToRawLongBits(nextVal);
+            if (compareAndSetRaw(offset, current, next))
+                return nextVal;
+        }
+    }
+
+    /**
+     * Returns the String representation of the current values of array.
+     * @return the String representation of the current values of array
+     */
+    public String toString() {
+        int iMax = array.length - 1;
+        if (iMax == -1)
+            return "[]";
+
+        // Double.toString(Math.PI).length() == 17
+        StringBuilder b = new StringBuilder((17 + 2) * (iMax + 1));
+        b.append('[');
+        for (int i = 0;; i++) {
+            b.append(longBitsToDouble(getRaw(byteOffset(i))));
+            if (i == iMax)
+                return b.append(']').toString();
+            b.append(',').append(' ');
+        }
+    }
+
+    /**
+     * Saves the state to a stream (that is, serializes it).
+     *
+     * @serialData The length of the array is emitted (int), followed by all
+     *             of its elements (each a {@code double}) in the proper order.
+     */
+    private void writeObject(java.io.ObjectOutputStream s)
+        throws java.io.IOException {
+        s.defaultWriteObject();
+
+        // Write out array length
+        int length = length();
+        s.writeInt(length);
+
+        // Write out all elements in the proper order.
+        for (int i = 0; i < length; i++)
+            s.writeDouble(get(i));
+    }
+
+    /**
+     * Reconstitutes the instance from a stream (that is, deserializes it).
+     */
+    private void readObject(java.io.ObjectInputStream s)
+        throws java.io.IOException, ClassNotFoundException {
+        s.defaultReadObject();
+
+        // Read in array length and allocate array
+        int length = s.readInt();
+        unsafe.putObjectVolatile(this, arrayOffset, new long[length]);
+
+        // Read in all elements in the proper order.
+        for (int i = 0; i < length; i++)
+            set(i, s.readDouble());
+    }
+
+    // Unsafe mechanics
+    private static final sun.misc.Unsafe unsafe = getUnsafe();
+    private static final long arrayOffset;
+    private static final int base = unsafe.arrayBaseOffset(long[].class);
+    private static final int shift;
+
+    static {
+        try {
+            Class<?> k = AtomicDoubleArray.class;
+            arrayOffset = unsafe.objectFieldOffset
+                (k.getDeclaredField("array"));
+            int scale = unsafe.arrayIndexScale(long[].class);
+            if ((scale & (scale - 1)) != 0)
+                throw new Error("data type scale not a power of two");
+            shift = 31 - Integer.numberOfLeadingZeros(scale);
+        } catch (Exception e) {
+            throw new Error(e);
+        }
+    }
+
+    /**
+     * Returns a sun.misc.Unsafe.  Suitable for use in a 3rd party package.
+     * Replace with a simple call to Unsafe.getUnsafe when integrating
+     * into a jdk.
+     *
+     * @return a sun.misc.Unsafe
+     */
+    private static sun.misc.Unsafe getUnsafe() {
+        try {
+            return sun.misc.Unsafe.getUnsafe();
+        } catch (SecurityException se) {
+            try {
+                return java.security.AccessController.doPrivileged
+                    (new java.security
+                     .PrivilegedExceptionAction<sun.misc.Unsafe>() {
+                        public sun.misc.Unsafe run() throws Exception {
+                            java.lang.reflect.Field f = sun.misc
+                                .Unsafe.class.getDeclaredField("theUnsafe");
+                            f.setAccessible(true);
+                            return (sun.misc.Unsafe) f.get(null);
+                        }});
+            } catch (java.security.PrivilegedActionException e) {
+                throw new RuntimeException("Could not initialize intrinsics",
+                                           e.getCause());
+            }
+        }
+    }
+}
diff --git a/src/main/java/jsr166e/extra/ReadMostlyVector.java b/src/main/java/jsr166e/extra/ReadMostlyVector.java
new file mode 100644
index 00000000000..d72c4615ec0
--- /dev/null
+++ b/src/main/java/jsr166e/extra/ReadMostlyVector.java
@@ -0,0 +1,1636 @@
+/*
+ * Written by Doug Lea with assistance from members of JCP JSR-166
+ * Expert Group and released to the public domain, as explained at
+ * http://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+package jsr166e.extra;
+import jsr166e.*;
+import java.util.*;
+
+/**
+ * A class with the same methods and array-based characteristics as
+ * {@link java.util.Vector} but with reduced contention and improved
+ * throughput when invocations of read-only methods by multiple
+ * threads are most common.
+ *
+ * <p> The iterators returned by this class's {@link #iterator()
+ * iterator} and {@link #listIterator(int) listIterator} methods are
+ * best-effort in the presence of concurrent modifications, and do
+ * <em>NOT</em> throw {@link ConcurrentModificationException}.  An
+ * iterator's {@code next()} method returns consecutive elements as
+ * they appear in the underlying array upon each access. Alternatively,
+ * method {@link #snapshotIterator} may be used for deterministic
+ * traversals, at the expense of making a copy, and unavailability of
+ * method {@code Iterator.remove}.
+ *
+ * <p>Otherwise, this class supports all methods, under the same
+ * documented specifications, as {@code Vector}.  Consult {@link
+ * java.util.Vector} for detailed specifications.  Additionally, this
+ * class provides methods {@link #addIfAbsent} and {@link
+ * #addAllAbsent}.
+ *
+ * @author Doug Lea
+ */
+public class ReadMostlyVector<E>
+        implements List<E>, RandomAccess, Cloneable, java.io.Serializable {
+    private static final long serialVersionUID = 8673264195747942595L;
+
+    /*
+     * This class exists mainly as a vehicle to exercise various
+     * constructions using SequenceLocks. Read-only methods
+     * take one of a few forms:
+     *
+     * Short methods,including get(index), continually retry obtaining
+     * a snapshot of array, count, and element, using sequence number
+     * to validate.
+     *
+     * Methods that are potentially O(n) (or worse) try once in
+     * read-only mode, and then lock. When in read-only mode, they
+     * validate only at the end of an array scan unless the element is
+     * actually used (for example, as an argument of method equals).
+     *
+     * We rely on some invariants that are always true, even for field
+     * reads in read-only mode that have not yet been validated:
+     * - array != null
+     * - count >= 0
+     */
+
+    /**
+     * The maximum size of array to allocate.
+     * See CopyOnWriteArrayList for explanation.
+     */
+    private static final int MAX_ARRAY_SIZE = Integer.MAX_VALUE - 8;
+
+    // fields are non-private to simplify nested class access
+    volatile Object[] array;
+    final SequenceLock lock;
+    volatile int count;
+    final int capacityIncrement;
+
+    /**
+     * Creates an empty vector with the given initial capacity and
+     * capacity increment.
+     *
+     * @param initialCapacity the initial capacity of the underlying array
+     * @param capacityIncrement if non-zero, the number to
+     * add when resizing to accommodate additional elements.
+     * If zero, the array size is doubled when resized.
+     *
+     * @throws IllegalArgumentException if initial capacity is negative
+     */
+    public ReadMostlyVector(int initialCapacity, int capacityIncrement) {
+        super();
+        if (initialCapacity < 0)
+            throw new IllegalArgumentException("Illegal Capacity: "+
+                                               initialCapacity);
+        this.array = new Object[initialCapacity];
+        this.capacityIncrement = capacityIncrement;
+        this.lock = new SequenceLock();
+    }
+
+    /**
+     * Creates an empty vector with the given initial capacity.
+     *
+     * @param initialCapacity the initial capacity of the underlying array
+     *
+     * @throws IllegalArgumentException if initial capacity is negative
+     */
+    public ReadMostlyVector(int initialCapacity) {
+        this(initialCapacity, 0);
+    }
+
+    /**
+     * Creates an empty vector with an underlying array of size {@code 10}.
+     */
+    public ReadMostlyVector() {
+        this(10, 0);
+    }
+
+    /**
+     * Creates a vector containing the elements of the specified
+     * collection, in the order they are returned by the collection's
+     * iterator.
+     *
+     * @param c the collection of initially held elements
+     * @throws NullPointerException if the specified collection is null
+     */
+    public ReadMostlyVector(Collection<? extends E> c) {
+        Object[] elements = c.toArray();
+        // c.toArray might (incorrectly) not return Object[] (see 6260652)
+        if (elements.getClass() != Object[].class)
+            elements = Arrays.copyOf(elements, elements.length, Object[].class);
+        this.array = elements;
+        this.count = elements.length;
+        this.capacityIncrement = 0;
+        this.lock = new SequenceLock();
+    }
+
+    // internal constructor for clone
+    ReadMostlyVector(Object[] array, int count, int capacityIncrement) {
+        this.array = array;
+        this.count = count;
+        this.capacityIncrement = capacityIncrement;
+        this.lock = new SequenceLock();
+    }
+
+    // For explanation, see CopyOnWriteArrayList
+    final Object[] grow(int minCapacity) {
+        int oldCapacity = array.length;
+        int newCapacity = oldCapacity + ((capacityIncrement > 0) ?
+                                         capacityIncrement : oldCapacity);
+        if (newCapacity - minCapacity < 0)
+            newCapacity = minCapacity;
+        if (newCapacity - MAX_ARRAY_SIZE > 0)
+            newCapacity = hugeCapacity(minCapacity);
+        return array = Arrays.copyOf(array, newCapacity);
+    }
+
+    static int hugeCapacity(int minCapacity) {
+        if (minCapacity < 0) // overflow
+            throw new OutOfMemoryError();
+        return (minCapacity > MAX_ARRAY_SIZE) ?
+            Integer.MAX_VALUE :
+            MAX_ARRAY_SIZE;
+    }
+
+    /*
+     * Internal versions of most base functionality, wrapped
+     * in different ways from public methods from this class
+     * as well as sublist and iterator classes.
+     */
+
+    /**
+     * Version of indexOf that returns -1 if either not present or invalid.
+     *
+     * @throws ArrayIndexOutOfBoundsException if index is negative
+     */
+    final int validatedIndexOf(Object x, Object[] items, int index, int fence,
+                               long seq) {
+        for (int i = index; i < fence; ++i) {
+            Object e = items[i];
+            if (lock.getSequence() != seq)
+                break;
+            if ((x == null) ? e == null : x.equals(e))
+                return i;
+        }
+        return -1;
+    }
+
+    /**
+     * @throws ArrayIndexOutOfBoundsException if index is negative
+     */
+    final int rawIndexOf(Object x, int index, int fence) {
+        Object[] items = array;
+        for (int i = index; i < fence; ++i) {
+            Object e = items[i];
+            if ((x == null) ? e == null : x.equals(e))
+                return i;
+        }
+        return -1;
+    }
+
+    final int validatedLastIndexOf(Object x, Object[] items,
+                                   int index, int origin, long seq) {
+        for (int i = index; i >= origin; --i) {
+            Object e = items[i];
+            if (lock.getSequence() != seq)
+                break;
+            if ((x == null) ? e == null : x.equals(e))
+                return i;
+        }
+        return -1;
+    }
+
+    final int rawLastIndexOf(Object x, int index, int origin) {
+        Object[] items = array;
+        for (int i = index; i >= origin; --i) {
+            Object e = items[i];
+            if ((x == null) ? e == null : x.equals(e))
+                return i;
+        }
+        return -1;
+    }
+
+    final void rawAdd(E e) {
+        int n = count;
+        Object[] items = array;
+        if (n >= items.length)
+            items = grow(n + 1);
+        items[n] = e;
+        count = n + 1;
+    }
+
+    final void rawAddAt(int index, E e) {
+        int n = count;
+        Object[] items = array;
+        if (index > n)
+            throw new ArrayIndexOutOfBoundsException(index);
+        if (n >= items.length)
+            items = grow(n + 1);
+        if (index < n)
+            System.arraycopy(items, index, items, index + 1, n - index);
+        items[index] = e;
+        count = n + 1;
+    }
+
+    final boolean rawAddAllAt(int index, Object[] elements) {
+        int n = count;
+        Object[] items = array;
+        if (index < 0 || index > n)
+            throw new ArrayIndexOutOfBoundsException(index);
+        int len = elements.length;
+        if (len == 0)
+            return false;
+        int newCount = n + len;
+        if (newCount >= items.length)
+            items = grow(newCount);
+        int mv = n - index;
+        if (mv > 0)
+            System.arraycopy(items, index, items, index + len, mv);
+        System.arraycopy(elements, 0, items, index, len);
+        count = newCount;
+        return true;
+    }
+
+    final boolean rawRemoveAt(int index) {
+        int n = count - 1;
+        Object[] items = array;
+        if (index < 0 || index > n)
+            return false;
+        int mv = n - index;
+        if (mv > 0)
+            System.arraycopy(items, index + 1, items, index, mv);
+        items[n] = null;
+        count = n;
+        return true;
+    }
+
+    /**
+     * Internal version of removeAll for lists and sublists. In this
+     * and other similar methods below, the bound argument is, if
+     * non-negative, the purported upper bound of a list/sublist, or
+     * is left negative if the bound should be determined via count
+     * field under lock.
+     */
+    final boolean internalRemoveAll(Collection<?> c, int origin, int bound) {
+        final SequenceLock lock = this.lock;
+        boolean removed = false;
+        lock.lock();
+        try {
+            int n = count;
+            int fence = bound < 0 || bound > n ? n : bound;
+            if (origin >= 0 && origin < fence) {
+                for (Object x : c) {
+                    while (rawRemoveAt(rawIndexOf(x, origin, fence)))
+                        removed = true;
+                }
+            }
+        } finally {
+            lock.unlock();
+        }
+        return removed;
+    }
+
+    final boolean internalRetainAll(Collection<?> c, int origin, int bound) {
+        final SequenceLock lock = this.lock;
+        boolean removed = false;
+        if (c != this) {
+            lock.lock();
+            try {
+                Object[] items = array;
+                int i = origin;
+                int n = count;
+                int fence = bound < 0 || bound > n ? n : bound;
+                while (i >= 0 && i < fence) {
+                    if (c.contains(items[i]))
+                        ++i;
+                    else {
+                        --fence;
+                        int mv = --n - i;
+                        if (mv > 0)
+                            System.arraycopy(items, i + 1, items, i, mv);
+                    }
+                }
+                if (count != n) {
+                    count = n;
+                    removed = true;
+                }
+            } finally {
+                lock.unlock();
+            }
+        }
+        return removed;
+    }
+
+    final void internalClear(int origin, int bound) {
+        int n = count;
+        int fence = bound < 0 || bound > n ? n : bound;
+        if (origin >= 0 && origin < fence) {
+            Object[] items = array;
+            int removed = fence - origin;
+            int newCount = n - removed;
+            int mv = n - (origin + removed);
+            if (mv > 0)
+                System.arraycopy(items, origin + removed, items, origin, mv);
+            for (int i = n; i < newCount; ++i)
+                items[i] = null;
+            count = newCount;
+        }
+    }
+
+    final boolean internalContainsAll(Collection<?> c, int origin, int bound) {
+        final SequenceLock lock = this.lock;
+        boolean contained;
+        boolean locked = false;
+        try {
+            for (;;) {
+                long seq = lock.awaitAvailability();
+                int n = count;
+                Object[] items = array;
+                int len = items.length;
+                if (n > len)
+                    continue;
+                int fence = bound < 0 || bound > n ? n : bound;
+                if (origin < 0)
+                    contained = false;
+                else {
+                    contained = true;
+                    for (Object e : c) {
+                        int idx = (locked ?
+                                   rawIndexOf(e, origin, fence) :
+                                   validatedIndexOf(e, items, origin,
+                                                    fence, seq));
+                        if (idx < 0) {
+                            contained = false;
+                            break;
+                        }
+                    }
+                }
+                if (lock.getSequence() == seq)
+                    break;
+                lock.lock();
+                locked = true;
+            }
+        } finally {
+            if (locked)
+                lock.unlock();
+        }
+        return contained;
+    }
+
+    final boolean internalEquals(List<?> list, int origin, int bound) {
+        final SequenceLock lock = this.lock;
+        boolean locked = false;
+        boolean equal;
+        try {
+            for (;;) {
+                long seq = lock.awaitAvailability();
+                Object[] items = array;
+                int n = count;
+                if (n > items.length || origin < 0)
+                    equal = false;
+                else {
+                    equal = true;
+                    int fence = bound < 0 || bound > n ? n : bound;
+                    Iterator<?> it = list.iterator();
+                    for (int i = origin; i < fence; ++i) {
+                        Object x = items[i];
+                        Object y;
+                        if ((!locked && lock.getSequence() != seq) ||
+                            !it.hasNext() ||
+                            (y = it.next()) == null ?
+                            x != null : !y.equals(x)) {
+                            equal = false;
+                            break;
+                        }
+                    }
+                    if (equal && it.hasNext())
+                        equal = false;
+                }
+                if (lock.getSequence() == seq)
+                    break;
+                lock.lock();
+                locked = true;
+            }
+        } finally {
+            if (locked)
+                lock.unlock();
+        }
+        return equal;
+    }
+
+    final int internalHashCode(int origin, int bound) {
+        final SequenceLock lock = this.lock;
+        int hash;
+        boolean locked = false;
+        try {
+            for (;;) {
+                hash = 1;
+                long seq = lock.awaitAvailability();
+                Object[] items = array;
+                int n = count;
+                int len = items.length;
+                if (n > len)
+                    continue;
+                int fence = bound < 0 || bound > n ? n : bound;
+                if (origin >= 0) {
+                    for (int i = origin; i < fence; ++i) {
+                        Object e = items[i];
+                        hash = 31*hash + (e == null ? 0 : e.hashCode());
+                    }
+                }
+                if (lock.getSequence() == seq)
+                    break;
+                lock.lock();
+                locked = true;
+            }
+        } finally {
+            if (locked)
+                lock.unlock();
+        }
+        return hash;
+    }
+
+    final String internalToString(int origin, int bound) {
+        final SequenceLock lock = this.lock;
+        String ret;
+        boolean locked = false;
+        try {
+            outer:for (;;) {
+                long seq = lock.awaitAvailability();
+                Object[] items = array;
+                int n = count;
+                int len = items.length;
+                if (n > len)
+                    continue;
+                int fence = bound < 0 || bound > n ? n : bound;
+                if (origin < 0 || origin == fence)
+                    ret = "[]";
+                else {
+                    StringBuilder sb = new StringBuilder();
+                    sb.append('[');
+                    for (int i = origin;;) {
+                        Object e = items[i];
+                        if (e == this)
+                            sb.append("(this Collection)");
+                        else if (!locked && lock.getSequence() != seq)
+                            continue outer;
+                        else
+                            sb.append(e.toString());
+                        if (++i < fence)
+                            sb.append(',').append(' ');
+                        else {
+                            ret = sb.append(']').toString();
+                            break;
+                        }
+                    }
+                }
+                if (lock.getSequence() == seq)
+                    break;
+                lock.lock();
+                locked = true;
+            }
+        } finally {
+            if (locked)
+                lock.unlock();
+        }
+        return ret;
+    }
+
+    final Object[] internalToArray(int origin, int bound) {
+        Object[] result;
+        final SequenceLock lock = this.lock;
+        boolean locked = false;
+        try {
+            for (;;) {
+                result = null;
+                long seq = lock.awaitAvailability();
+                Object[] items = array;
+                int n = count;
+                int len = items.length;
+                if (n > len)
+                    continue;
+                int fence = bound < 0 || bound > n ? n : bound;
+                if (origin >= 0)
+                    result = Arrays.copyOfRange(items, origin, fence,
+                                                Object[].class);
+                if (lock.getSequence() == seq)
+                    break;
+                lock.lock();
+                locked = true;
+            }
+        } finally {
+            if (locked)
+                lock.unlock();
+        }
+        return result;
+    }
+
+    @SuppressWarnings("unchecked")
+    final <T> T[] internalToArray(T[] a, int origin, int bound) {
+        int alen = a.length;
+        T[] result;
+        final SequenceLock lock = this.lock;
+        boolean locked = false;
+        try {
+            for (;;) {
+                long seq = lock.awaitAvailability();
+                Object[] items = array;
+                int n = count;
+                int len = items.length;
+                if (n > len)
+                    continue;
+                int fence = bound < 0 || bound > n ? n : bound;
+                int rlen = fence - origin;
+                if (rlen < 0)
+                    rlen = 0;
+                if (origin < 0 || alen >= rlen) {
+                    if (rlen > 0)
+                        System.arraycopy(items, 0, a, origin, rlen);
+                    if (alen > rlen)
+                        a[rlen] = null;
+                    result = a;
+                }
+                else
+                    result = (T[]) Arrays.copyOfRange(items, origin,
+                                                      fence, a.getClass());
+                if (lock.getSequence() == seq)
+                    break;
+                lock.lock();
+                locked = true;
+            }
+        } finally {
+            if (locked)
+                lock.unlock();
+        }
+        return result;
+    }
+
+    // public List methods
+
+    public boolean add(E e) {
+        final SequenceLock lock = this.lock;
+        lock.lock();
+        try {
+            rawAdd(e);
+        } finally {
+            lock.unlock();
+        }
+        return true;
+    }
+
+    public void add(int index, E element) {
+        final SequenceLock lock = this.lock;
+        lock.lock();
+        try {
+            rawAddAt(index, element);
+        } finally {
+            lock.unlock();
+        }
+    }
+
+    public boolean addAll(Collection<? extends E> c) {
+        Object[] elements = c.toArray();
+        int len = elements.length;
+        if (len == 0)
+            return false;
+        final SequenceLock lock = this.lock;
+        lock.lock();
+        try {
+            Object[] items = array;
+            int n = count;
+            int newCount = n + len;
+            if (newCount >= items.length)
+                items = grow(newCount);
+            System.arraycopy(elements, 0, items, n, len);
+            count = newCount;
+        } finally {
+            lock.unlock();
+        }
+        return true;
+    }
+
+    public boolean addAll(int index, Collection<? extends E> c) {
+        final SequenceLock lock = this.lock;
+        boolean ret;
+        Object[] elements = c.toArray();
+        lock.lock();
+        try {
+            ret = rawAddAllAt(index, elements);
+        } finally {
+            lock.unlock();
+        }
+        return ret;
+    }
+
+    public void clear() {
+        final SequenceLock lock = this.lock;
+        lock.lock();
+        try {
+            int n = count;
+            Object[] items = array;
+            for (int i = 0; i < n; i++)
+                items[i] = null;
+            count = 0;
+        } finally {
+            lock.unlock();
+        }
+    }
+
+    public boolean contains(Object o) {
+        return indexOf(o, 0) >= 0;
+    }
+
+    public boolean containsAll(Collection<?> c) {
+        return internalContainsAll(c, 0, -1);
+    }
+
+    public boolean equals(Object o) {
+        if (o == this)
+            return true;
+        if (!(o instanceof List))
+            return false;
+        return internalEquals((List<?>)o, 0, -1);
+    }
+
+    public E get(int index) {
+        final SequenceLock lock = this.lock;
+        for (;;) {
+            long seq = lock.awaitAvailability();
+            int n = count;
+            Object[] items = array;
+            @SuppressWarnings("unchecked")
+            E e = (index < items.length) ? (E) items[index] : null;
+            if (lock.getSequence() == seq) {
+                if (index >= n)
+                    throw new ArrayIndexOutOfBoundsException(index);
+                return e;
+            }
+        }
+    }
+
+    public int hashCode() {
+        return internalHashCode(0, -1);
+    }
+
+    public int indexOf(Object o) {
+        return indexOf(o, 0);
+    }
+
+    public boolean isEmpty() {
+        return count == 0;
+    }
+
+    public Iterator<E> iterator() {
+        return new Itr<E>(this, 0);
+    }
+
+    public int lastIndexOf(Object o) {
+        final SequenceLock lock = this.lock;
+        for (;;) {
+            long seq = lock.awaitAvailability();
+            Object[] items = array;
+            int n = count;
+            if (n <= items.length) {
+                for (int i = n - 1; i >= 0; --i) {
+                    Object e = items[i];
+                    if (lock.getSequence() != seq) {
+                        lock.lock();
+                        try {
+                            return rawLastIndexOf(o, count - 1, 0);
+                        } finally {
+                            lock.unlock();
+                        }
+                    }
+                    else if ((o == null) ? e == null : o.equals(e))
+                        return i;
+                }
+                return -1;
+            }
+        }
+    }
+
+    public ListIterator<E> listIterator() {
+        return new Itr<E>(this, 0);
+    }
+
+    public ListIterator<E> listIterator(int index) {
+        return new Itr<E>(this, index);
+    }
+
+    public E remove(int index) {
+        final SequenceLock lock = this.lock;
+        lock.lock();
+        try {
+            if (index < 0 || index >= count)
+                throw new ArrayIndexOutOfBoundsException(index);
+            @SuppressWarnings("unchecked")
+            E oldValue = (E) array[index];
+            rawRemoveAt(index);
+            return oldValue;
+        } finally {
+            lock.unlock();
+        }
+    }
+
+    public boolean remove(Object o) {
+        final SequenceLock lock = this.lock;
+        lock.lock();
+        try {
+            return rawRemoveAt(rawIndexOf(o, 0, count));
+        } finally {
+            lock.unlock();
+        }
+    }
+
+    public boolean removeAll(Collection<?> c) {
+        return internalRemoveAll(c, 0, -1);
+    }
+
+    public boolean retainAll(Collection<?> c) {
+        return internalRetainAll(c, 0, -1);
+    }
+
+    public E set(int index, E element) {
+        final SequenceLock lock = this.lock;
+        lock.lock();
+        try {
+            Object[] items = array;
+            if (index < 0 || index >= count)
+                throw new ArrayIndexOutOfBoundsException(index);
+            @SuppressWarnings("unchecked")
+            E oldValue = (E) items[index];
+            items[index] = element;
+            return oldValue;
+        } finally {
+            lock.unlock();
+        }
+    }
+
+    public int size() {
+        return count;
+    }
+
+    public List<E> subList(int fromIndex, int toIndex) {
+        int c = size();
+        int ssize = toIndex - fromIndex;
+        if (fromIndex < 0 || toIndex > c || ssize < 0)
+            throw new IndexOutOfBoundsException();
+        return new ReadMostlyVectorSublist<E>(this, fromIndex, ssize);
+    }
+
+    public Object[] toArray() {
+        return internalToArray(0, -1);
+    }
+
+    public <T> T[] toArray(T[] a) {
+        return internalToArray(a, 0, -1);
+    }
+
+    public String toString() {
+        return internalToString(0, -1);
+    }
+
+    // ReadMostlyVector-only methods
+
+    /**
+     * Appends the element, if not present.
+     *
+     * @param e element to be added to this list, if absent
+     * @return {@code true} if the element was added
+     */
+    public boolean addIfAbsent(E e) {
+        final SequenceLock lock = this.lock;
+        lock.lock();
+        try {
+            if (rawIndexOf(e, 0, count) < 0) {
+                rawAdd(e);
+                return true;
+            }
+            else
+                return false;
+        } finally {
+            lock.unlock();
+        }
+    }
+
+    /**
+     * Appends all of the elements in the specified collection that
+     * are not already contained in this list, to the end of
+     * this list, in the order that they are returned by the
+     * specified collection's iterator.
+     *
+     * @param c collection containing elements to be added to this list
+     * @return the number of elements added
+     * @throws NullPointerException if the specified collection is null
+     * @see #addIfAbsent(Object)
+     */
+    public int addAllAbsent(Collection<? extends E> c) {
+        int added = 0;
+        Object[] cs = c.toArray();
+        int clen = cs.length;
+        if (clen != 0) {
+            lock.lock();
+            try {
+                for (int i = 0; i < clen; ++i) {
+                    @SuppressWarnings("unchecked")
+                    E e = (E) cs[i];
+                    if (rawIndexOf(e, 0, count) < 0) {
+                        rawAdd(e);
+                        ++added;
+                    }
+                }
+            } finally {
+                lock.unlock();
+            }
+        }
+        return added;
+    }
+
+    /**
+     * Returns an iterator operating over a snapshot copy of the
+     * elements of this collection created upon construction of the
+     * iterator. The iterator does <em>NOT</em> support the
+     * {@code remove} method.
+     *
+     * @return an iterator over the elements in this list in proper sequence
+     */
+    public Iterator<E> snapshotIterator() {
+        return new SnapshotIterator<E>(this);
+    }
+
+    static final class SnapshotIterator<E> implements Iterator<E> {
+        private final Object[] items;
+        private int cursor;
+        SnapshotIterator(ReadMostlyVector<E> v) { items = v.toArray(); }
+        public boolean hasNext() { return cursor < items.length; }
+        @SuppressWarnings("unchecked")
+        public E next() {
+            if (cursor < items.length)
+                return (E) items[cursor++];
+            throw new NoSuchElementException();
+        }
+        public void remove() { throw new UnsupportedOperationException() ; }
+    }
+
+    // Vector-only methods
+
+    /** See {@link Vector#firstElement} */
+    public E firstElement() {
+        final SequenceLock lock = this.lock;
+        for (;;) {
+            long seq = lock.awaitAvailability();
+            Object[] items = array;
+            int n = count;
+            @SuppressWarnings("unchecked")
+            E e = (items.length > 0) ? (E) items[0] : null;
+            if (lock.getSequence() == seq) {
+                if (n <= 0)
+                    throw new NoSuchElementException();
+                return e;
+            }
+        }
+    }
+
+    /** See {@link Vector#lastElement} */
+    public E lastElement() {
+        final SequenceLock lock = this.lock;
+        for (;;) {
+            long seq = lock.awaitAvailability();
+            Object[] items = array;
+            int n = count;
+            @SuppressWarnings("unchecked")
+            E e = (n > 0 && items.length >= n) ? (E) items[n - 1] : null;
+            if (lock.getSequence() == seq) {
+                if (n <= 0)
+                    throw new NoSuchElementException();
+                return e;
+            }
+        }
+    }
+
+    /** See {@link Vector#indexOf(Object, int)} */
+    public int indexOf(Object o, int index) {
+        final SequenceLock lock = this.lock;
+        long seq = lock.awaitAvailability();
+        Object[] items = array;
+        int n = count;
+        int idx = -1;
+        if (n <= items.length)
+            idx = validatedIndexOf(o, items, index, n, seq);
+        if (lock.getSequence() != seq) {
+            lock.lock();
+            try {
+                idx = rawIndexOf(o, index, count);
+            } finally {
+                lock.unlock();
+            }
+        }
+        // Above code will throw AIOOBE when index < 0
+        return idx;
+    }
+
+    /** See {@link Vector#lastIndexOf(Object, int)} */
+    public int lastIndexOf(Object o, int index) {
+        final SequenceLock lock = this.lock;
+        long seq = lock.awaitAvailability();
+        Object[] items = array;
+        int n = count;
+        int idx = -1;
+        if (index < Math.min(n, items.length))
+            idx = validatedLastIndexOf(o, items, index, 0, seq);
+        if (lock.getSequence() != seq) {
+            lock.lock();
+            try {
+                n = count;
+                if (index < n)
+                    idx = rawLastIndexOf(o, index, 0);
+            } finally {
+                lock.unlock();
+            }
+        }
+        if (index >= n)
+            throw new IndexOutOfBoundsException(index + " >= " + n);
+        return idx;
+    }
+
+    /** See {@link Vector#setSize} */
+    public void setSize(int newSize) {
+        if (newSize < 0)
+            throw new ArrayIndexOutOfBoundsException(newSize);
+        final SequenceLock lock = this.lock;
+        lock.lock();
+        try {
+            int n = count;
+            if (newSize > n)
+                grow(newSize);
+            else {
+                Object[] items = array;
+                for (int i = newSize ; i < n ; i++)
+                    items[i] = null;
+            }
+            count = newSize;
+        } finally {
+            lock.unlock();
+        }
+    }
+
+    /** See {@link Vector#copyInto} */
+    public void copyInto(Object[] anArray) {
+        final SequenceLock lock = this.lock;
+        lock.lock();
+        try {
+            System.arraycopy(array, 0, anArray, 0, count);
+        } finally {
+            lock.unlock();
+        }
+    }
+
+    /** See {@link Vector#trimToSize} */
+    public void trimToSize() {
+        final SequenceLock lock = this.lock;
+        lock.lock();
+        try {
+            Object[] items = array;
+            int n = count;
+            if (n < items.length)
+                array = Arrays.copyOf(items, n);
+        } finally {
+            lock.unlock();
+        }
+    }
+
+    /** See {@link Vector#ensureCapacity} */
+    public void ensureCapacity(int minCapacity) {
+        if (minCapacity > 0) {
+            final SequenceLock lock = this.lock;
+            lock.lock();
+            try {
+                if (minCapacity - array.length > 0)
+                    grow(minCapacity);
+            } finally {
+                lock.unlock();
+            }
+        }
+    }
+
+    /** See {@link Vector#elements} */
+    public Enumeration<E> elements() {
+        return new Itr<E>(this, 0);
+    }
+
+    /** See {@link Vector#capacity} */
+    public int capacity() {
+        return array.length;
+    }
+
+    /** See {@link Vector#elementAt} */
+    public E elementAt(int index) {
+        return get(index);
+    }
+
+    /** See {@link Vector#setElementAt} */
+    public void setElementAt(E obj, int index) {
+        set(index, obj);
+    }
+
+    /** See {@link Vector#removeElementAt} */
+    public void removeElementAt(int index) {
+        remove(index);
+    }
+
+    /** See {@link Vector#insertElementAt} */
+    public void insertElementAt(E obj, int index) {
+        add(index, obj);
+    }
+
+    /** See {@link Vector#addElement} */
+    public void addElement(E obj) {
+        add(obj);
+    }
+
+    /** See {@link Vector#removeElement} */
+    public boolean removeElement(Object obj) {
+        return remove(obj);
+    }
+
+    /** See {@link Vector#removeAllElements} */
+    public void removeAllElements() {
+        clear();
+    }
+
+    // other methods
+
+    public ReadMostlyVector<E> clone() {
+        final SequenceLock lock = this.lock;
+        Object[] a = null;
+        boolean retry = false;
+        long seq = lock.awaitAvailability();
+        Object[] items = array;
+        int n = count;
+        if (n <= items.length)
+            a = Arrays.copyOf(items, n);
+        else
+            retry = true;
+        if (retry || lock.getSequence() != seq) {
+            lock.lock();
+            try {
+                n = count;
+                a = Arrays.copyOf(array, n);
+            } finally {
+                lock.unlock();
+            }
+        }
+        return new ReadMostlyVector<E>(a, n, capacityIncrement);
+    }
+
+    private void writeObject(java.io.ObjectOutputStream s)
+            throws java.io.IOException {
+        final SequenceLock lock = this.lock;
+        lock.lock();
+        try {
+            s.defaultWriteObject();
+        } finally {
+            lock.unlock();
+        }
+    }
+
+    static final class Itr<E> implements ListIterator<E>, Enumeration<E> {
+        final ReadMostlyVector<E> list;
+        final SequenceLock lock;
+        Object[] items;
+        E next, prev;
+        long seq;
+        int cursor;
+        int fence;
+        int lastRet;
+        boolean validNext, validPrev;
+
+        Itr(ReadMostlyVector<E> list, int index) {
+            this.list = list;
+            this.lock = list.lock;
+            this.cursor = index;
+            this.lastRet = -1;
+            refresh();
+            if (index < 0 || index > fence)
+                throw new ArrayIndexOutOfBoundsException(index);
+        }
+
+        private void refresh() {
+            validNext = validPrev = false;
+            do {
+                seq = lock.awaitAvailability();
+                items = list.array;
+            } while ((fence = list.count) > items.length ||
+                     lock.getSequence() != seq);
+        }
+
+        @SuppressWarnings("unchecked")
+        public boolean hasNext() {
+            boolean valid;
+            int i = cursor;
+            for (;;) {
+                if (i >= fence || i < 0 || i >= items.length) {
+                    valid = false;
+                    break;
+                }
+                next = (E) items[i];
+                if (lock.getSequence() == seq) {
+                    valid = true;
+                    break;
+                }
+                refresh();
+            }
+            return validNext = valid;
+        }
+
+        @SuppressWarnings("unchecked")
+        public boolean hasPrevious() {
+            boolean valid;
+            int i = cursor - 1;
+            for (;;) {
+                if (i >= fence || i < 0 || i >= items.length) {
+                    valid = false;
+                    break;
+                }
+                prev = (E) items[i];
+                if (lock.getSequence() == seq) {
+                    valid = true;
+                    break;
+                }
+                refresh();
+            }
+            return validPrev = valid;
+        }
+
+        public E next() {
+            if (validNext || hasNext()) {
+                validNext = false;
+                lastRet = cursor++;
+                return next;
+            }
+            throw new NoSuchElementException();
+        }
+
+        public E previous() {
+            if (validPrev || hasPrevious()) {
+                validPrev = false;
+                lastRet = cursor--;
+                return prev;
+            }
+            throw new NoSuchElementException();
+        }
+
+        public void remove() {
+            int i = lastRet;
+            if (i < 0)
+                throw new IllegalStateException();
+            lock.lock();
+            try {
+                if (i < list.count)
+                    list.remove(i);
+            } finally {
+                lock.unlock();
+            }
+            cursor = i;
+            lastRet = -1;
+            refresh();
+        }
+
+        public void set(E e) {
+            int i = lastRet;
+            if (i < 0)
+                throw new IllegalStateException();
+            lock.lock();
+            try {
+                if (i < list.count)
+                    list.set(i, e);
+            } finally {
+                lock.unlock();
+            }
+            refresh();
+        }
+
+        public void add(E e) {
+            int i = cursor;
+            if (i < 0)
+                throw new IllegalStateException();
+            lock.lock();
+            try {
+                if (i <= list.count)
+                    list.add(i, e);
+            } finally {
+                lock.unlock();
+            }
+            cursor = i + 1;
+            lastRet = -1;
+            refresh();
+        }
+
+        public boolean hasMoreElements() { return hasNext(); }
+        public E nextElement() { return next(); }
+        public int nextIndex() { return cursor; }
+        public int previousIndex() { return cursor - 1; }
+    }
+
+    static final class ReadMostlyVectorSublist<E>
+            implements List<E>, RandomAccess, java.io.Serializable {
+        static final long serialVersionUID = 3041673470172026059L;
+
+        final ReadMostlyVector<E> list;
+        final int offset;
+        volatile int size;
+
+        ReadMostlyVectorSublist(ReadMostlyVector<E> list,
+                                int offset, int size) {
+            this.list = list;
+            this.offset = offset;
+            this.size = size;
+        }
+
+        private void rangeCheck(int index) {
+            if (index < 0 || index >= size)
+                throw new ArrayIndexOutOfBoundsException(index);
+        }
+
+        public boolean add(E element) {
+            final SequenceLock lock = list.lock;
+            lock.lock();
+            try {
+                int c = size;
+                list.rawAddAt(c + offset, element);
+                size = c + 1;
+            } finally {
+                lock.unlock();
+            }
+            return true;
+        }
+
+        public void add(int index, E element) {
+            final SequenceLock lock = list.lock;
+            lock.lock();
+            try {
+                if (index < 0 || index > size)
+                    throw new ArrayIndexOutOfBoundsException(index);
+                list.rawAddAt(index + offset, element);
+                ++size;
+            } finally {
+                lock.unlock();
+            }
+        }
+
+        public boolean addAll(Collection<? extends E> c) {
+            Object[] elements = c.toArray();
+            final SequenceLock lock = list.lock;
+            lock.lock();
+            try {
+                int s = size;
+                int pc = list.count;
+                list.rawAddAllAt(offset + s, elements);
+                int added = list.count - pc;
+                size = s + added;
+                return added != 0;
+            } finally {
+                lock.unlock();
+            }
+        }
+
+        public boolean addAll(int index, Collection<? extends E> c) {
+            Object[] elements = c.toArray();
+            final SequenceLock lock = list.lock;
+            lock.lock();
+            try {
+                int s = size;
+                if (index < 0 || index > s)
+                    throw new ArrayIndexOutOfBoundsException(index);
+                int pc = list.count;
+                list.rawAddAllAt(index + offset, elements);
+                int added = list.count - pc;
+                size = s + added;
+                return added != 0;
+            } finally {
+                lock.unlock();
+            }
+        }
+
+        public void clear() {
+            final SequenceLock lock = list.lock;
+            lock.lock();
+            try {
+                list.internalClear(offset, offset + size);
+                size = 0;
+            } finally {
+                lock.unlock();
+            }
+        }
+
+        public boolean contains(Object o) {
+            return indexOf(o) >= 0;
+        }
+
+        public boolean containsAll(Collection<?> c) {
+            return list.internalContainsAll(c, offset, offset + size);
+        }
+
+        public boolean equals(Object o) {
+            if (o == this)
+                return true;
+            if (!(o instanceof List))
+                return false;
+            return list.internalEquals((List<?>)(o), offset, offset + size);
+        }
+
+        public E get(int index) {
+            if (index < 0 || index >= size)
+                throw new ArrayIndexOutOfBoundsException(index);
+            return list.get(index + offset);
+        }
+
+        public int hashCode() {
+            return list.internalHashCode(offset, offset + size);
+        }
+
+        public int indexOf(Object o) {
+            final SequenceLock lock = list.lock;
+            long seq = lock.awaitAvailability();
+            Object[] items = list.array;
+            int c = list.count;
+            if (c <= items.length) {
+                int idx = list.validatedIndexOf(o, items, offset,
+                                                offset + size, seq);
+                if (lock.getSequence() == seq)
+                    return idx < 0 ? -1 : idx - offset;
+            }
+            lock.lock();
+            try {
+                int idx = list.rawIndexOf(o, offset, offset + size);
+                return idx < 0 ? -1 : idx - offset;
+            } finally {
+                lock.unlock();
+            }
+        }
+
+        public boolean isEmpty() {
+            return size == 0;
+        }
+
+        public Iterator<E> iterator() {
+            return new SubItr<E>(this, offset);
+        }
+
+        public int lastIndexOf(Object o) {
+            final SequenceLock lock = list.lock;
+            long seq = lock.awaitAvailability();
+            Object[] items = list.array;
+            int c = list.count;
+            if (c <= items.length) {
+                int idx = list.validatedLastIndexOf(o, items, offset+size-1,
+                                                    offset, seq);
+                if (lock.getSequence() == seq)
+                    return idx < 0 ? -1 : idx - offset;
+            }
+            lock.lock();
+            try {
+                int idx = list.rawLastIndexOf(o, offset + size - 1, offset);
+                return idx < 0 ? -1 : idx - offset;
+            } finally {
+                lock.unlock();
+            }
+        }
+
+        public ListIterator<E> listIterator() {
+            return new SubItr<E>(this, offset);
+        }
+
+        public ListIterator<E> listIterator(int index) {
+            return new SubItr<E>(this, index + offset);
+        }
+
+        public E remove(int index) {
+            final SequenceLock lock = list.lock;
+            lock.lock();
+            try {
+                Object[] items = list.array;
+                int i = index + offset;
+                if (index < 0 || index >= size || i >= items.length)
+                    throw new ArrayIndexOutOfBoundsException(index);
+                @SuppressWarnings("unchecked")
+                E result = (E) items[i];
+                list.rawRemoveAt(i);
+                size--;
+                return result;
+            } finally {
+                lock.unlock();
+            }
+        }
+
+        public boolean remove(Object o) {
+            final SequenceLock lock = list.lock;
+            lock.lock();
+            try {
+                if (list.rawRemoveAt(list.rawIndexOf(o, offset,
+                                                     offset + size))) {
+                    --size;
+                    return true;
+                }
+                else
+                    return false;
+            } finally {
+                lock.unlock();
+            }
+        }
+
+        public boolean removeAll(Collection<?> c) {
+            return list.internalRemoveAll(c, offset, offset + size);
+        }
+
+        public boolean retainAll(Collection<?> c) {
+            return list.internalRetainAll(c, offset, offset + size);
+        }
+
+        public E set(int index, E element) {
+            if (index < 0 || index >= size)
+                throw new ArrayIndexOutOfBoundsException(index);
+            return list.set(index+offset, element);
+        }
+
+        public int size() {
+            return size;
+        }
+
+        public List<E> subList(int fromIndex, int toIndex) {
+            int c = size;
+            int ssize = toIndex - fromIndex;
+            if (fromIndex < 0 || toIndex > c || ssize < 0)
+                throw new IndexOutOfBoundsException();
+            return new ReadMostlyVectorSublist<E>(list, offset+fromIndex, ssize);
+        }
+
+        public Object[] toArray() {
+            return list.internalToArray(offset, offset + size);
+        }
+
+        public <T> T[] toArray(T[] a) {
+            return list.internalToArray(a, offset, offset + size);
+        }
+
+        public String toString() {
+            return list.internalToString(offset, offset + size);
+        }
+
+    }
+
+    static final class SubItr<E> implements ListIterator<E> {
+        final ReadMostlyVectorSublist<E> sublist;
+        final ReadMostlyVector<E> list;
+        final SequenceLock lock;
+        Object[] items;
+        E next, prev;
+        long seq;
+        int cursor;
+        int fence;
+        int lastRet;
+        boolean validNext, validPrev;
+
+        SubItr(ReadMostlyVectorSublist<E> sublist, int index) {
+            this.sublist = sublist;
+            this.list = sublist.list;
+            this.lock = list.lock;
+            this.cursor = index;
+            this.lastRet = -1;
+            refresh();
+            if (index < 0 || index > fence)
+                throw new ArrayIndexOutOfBoundsException(index);
+        }
+
+        private void refresh() {
+            validNext = validPrev = false;
+            do {
+                int n;
+                seq = lock.awaitAvailability();
+                items = list.array;
+                if ((n = list.count) > items.length)
+                    continue;
+                int b = sublist.offset + sublist.size;
+                fence = b < n ? b : n;
+            } while (lock.getSequence() != seq);
+        }
+
+        @SuppressWarnings("unchecked")
+        public boolean hasNext() {
+            boolean valid;
+            int i = cursor;
+            for (;;) {
+                if (i >= fence || i < 0 || i >= items.length) {
+                    valid = false;
+                    break;
+                }
+                next = (E) items[i];
+                if (lock.getSequence() == seq) {
+                    valid = true;
+                    break;
+                }
+                refresh();
+            }
+            return validNext = valid;
+        }
+
+        @SuppressWarnings("unchecked")
+        public boolean hasPrevious() {
+            boolean valid;
+            int i = cursor - 1;
+            for (;;) {
+                if (i >= fence || i < 0 || i >= items.length) {
+                    valid = false;
+                    break;
+                }
+                prev = (E) items[i];
+                if (lock.getSequence() == seq) {
+                    valid = true;
+                    break;
+                }
+                refresh();
+            }
+            return validPrev = valid;
+        }
+
+        public E next() {
+            if (validNext || hasNext()) {
+                validNext = false;
+                lastRet = cursor++;
+                return next;
+            }
+            throw new NoSuchElementException();
+        }
+
+        public E previous() {
+            if (validPrev || hasPrevious()) {
+                validPrev = false;
+                lastRet = cursor--;
+                return prev;
+            }
+            throw new NoSuchElementException();
+        }
+
+        public int nextIndex() {
+            return cursor - sublist.offset;
+        }
+
+        public int previousIndex() {
+            return cursor - 1 - sublist.offset;
+        }
+
+        public void remove() {
+            int i = lastRet;
+            if (i < 0)
+                throw new IllegalStateException();
+            cursor = i;
+            lastRet = -1;
+            lock.lock();
+            try {
+                if (i < list.count) {
+                    list.remove(i);
+                    --sublist.size;
+                }
+            } finally {
+                lock.unlock();
+            }
+            refresh();
+        }
+
+        public void set(E e) {
+            int i = lastRet;
+            if (i < 0)
+                throw new IllegalStateException();
+            lock.lock();
+            try {
+                if (i < list.count)
+                    list.set(i, e);
+            } finally {
+                lock.unlock();
+            }
+            refresh();
+        }
+
+        public void add(E e) {
+            int i = cursor;
+            if (i < 0)
+                throw new IllegalStateException();
+            cursor = i + 1;
+            lastRet = -1;
+            lock.lock();
+            try {
+                if (i <= list.count) {
+                    list.add(i, e);
+                    ++sublist.size;
+                }
+            } finally {
+                lock.unlock();
+            }
+            refresh();
+        }
+
+    }
+}
+
diff --git a/src/main/java/jsr166e/package-info.java b/src/main/java/jsr166e/package-info.java
new file mode 100644
index 00000000000..3cfe5dc53b6
--- /dev/null
+++ b/src/main/java/jsr166e/package-info.java
@@ -0,0 +1,9 @@
+/*
+ * Written by Doug Lea with assistance from members of JCP JSR-166
+ * Expert Group and released to the public domain, as explained at
+ * http://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+
+// Built on 2012-06-09
+package jsr166e;
diff --git a/src/main/java/jsr166y/ConcurrentLinkedDeque.java b/src/main/java/jsr166y/ConcurrentLinkedDeque.java
new file mode 100644
index 00000000000..d45892844b6
--- /dev/null
+++ b/src/main/java/jsr166y/ConcurrentLinkedDeque.java
@@ -0,0 +1,1468 @@
+/*
+ * Written by Doug Lea and Martin Buchholz with assistance from members of
+ * JCP JSR-166 Expert Group and released to the public domain, as explained
+ * at http://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+package jsr166y;
+
+import java.util.AbstractCollection;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Deque;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+import java.util.Queue;
+
+/**
+ * An unbounded concurrent {@linkplain Deque deque} based on linked nodes.
+ * Concurrent insertion, removal, and access operations execute safely
+ * across multiple threads.
+ * A {@code ConcurrentLinkedDeque} is an appropriate choice when
+ * many threads will share access to a common collection.
+ * Like most other concurrent collection implementations, this class
+ * does not permit the use of {@code null} elements.
+ *
+ * <p>Iterators are <i>weakly consistent</i>, returning elements
+ * reflecting the state of the deque at some point at or since the
+ * creation of the iterator.  They do <em>not</em> throw {@link
+ * java.util.ConcurrentModificationException
+ * ConcurrentModificationException}, and may proceed concurrently with
+ * other operations.
+ *
+ * <p>Beware that, unlike in most collections, the {@code size} method
+ * is <em>NOT</em> a constant-time operation. Because of the
+ * asynchronous nature of these deques, determining the current number
+ * of elements requires a traversal of the elements, and so may report
+ * inaccurate results if this collection is modified during traversal.
+ * Additionally, the bulk operations {@code addAll},
+ * {@code removeAll}, {@code retainAll}, {@code containsAll},
+ * {@code equals}, and {@code toArray} are <em>not</em> guaranteed
+ * to be performed atomically. For example, an iterator operating
+ * concurrently with an {@code addAll} operation might view only some
+ * of the added elements.
+ *
+ * <p>This class and its iterator implement all of the <em>optional</em>
+ * methods of the {@link Deque} and {@link Iterator} interfaces.
+ *
+ * <p>Memory consistency effects: As with other concurrent collections,
+ * actions in a thread prior to placing an object into a
+ * {@code ConcurrentLinkedDeque}
+ * <a href="package-summary.html#MemoryVisibility"><i>happen-before</i></a>
+ * actions subsequent to the access or removal of that element from
+ * the {@code ConcurrentLinkedDeque} in another thread.
+ *
+ * <p>This class is a member of the
+ * <a href="{@docRoot}/../technotes/guides/collections/index.html">
+ * Java Collections Framework</a>.
+ *
+ * @since 1.7
+ * @author Doug Lea
+ * @author Martin Buchholz
+ * @param <E> the type of elements held in this collection
+ */
+
+public class ConcurrentLinkedDeque<E>
+    extends AbstractCollection<E>
+    implements Deque<E>, java.io.Serializable {
+
+    /*
+     * This is an implementation of a concurrent lock-free deque
+     * supporting interior removes but not interior insertions, as
+     * required to support the entire Deque interface.
+     *
+     * We extend the techniques developed for ConcurrentLinkedQueue and
+     * LinkedTransferQueue (see the internal docs for those classes).
+     * Understanding the ConcurrentLinkedQueue implementation is a
+     * prerequisite for understanding the implementation of this class.
+     *
+     * The data structure is a symmetrical doubly-linked "GC-robust"
+     * linked list of nodes.  We minimize the number of volatile writes
+     * using two techniques: advancing multiple hops with a single CAS
+     * and mixing volatile and non-volatile writes of the same memory
+     * locations.
+     *
+     * A node contains the expected E ("item") and links to predecessor
+     * ("prev") and successor ("next") nodes:
+     *
+     * class Node<E> { volatile Node<E> prev, next; volatile E item; }
+     *
+     * A node p is considered "live" if it contains a non-null item
+     * (p.item != null).  When an item is CASed to null, the item is
+     * atomically logically deleted from the collection.
+     *
+     * At any time, there is precisely one "first" node with a null
+     * prev reference that terminates any chain of prev references
+     * starting at a live node.  Similarly there is precisely one
+     * "last" node terminating any chain of next references starting at
+     * a live node.  The "first" and "last" nodes may or may not be live.
+     * The "first" and "last" nodes are always mutually reachable.
+     *
+     * A new element is added atomically by CASing the null prev or
+     * next reference in the first or last node to a fresh node
+     * containing the element.  The element's node atomically becomes
+     * "live" at that point.
+     *
+     * A node is considered "active" if it is a live node, or the
+     * first or last node.  Active nodes cannot be unlinked.
+     *
+     * A "self-link" is a next or prev reference that is the same node:
+     *   p.prev == p  or  p.next == p
+     * Self-links are used in the node unlinking process.  Active nodes
+     * never have self-links.
+     *
+     * A node p is active if and only if:
+     *
+     * p.item != null ||
+     * (p.prev == null && p.next != p) ||
+     * (p.next == null && p.prev != p)
+     *
+     * The deque object has two node references, "head" and "tail".
+     * The head and tail are only approximations to the first and last
+     * nodes of the deque.  The first node can always be found by
+     * following prev pointers from head; likewise for tail.  However,
+     * it is permissible for head and tail to be referring to deleted
+     * nodes that have been unlinked and so may not be reachable from
+     * any live node.
+     *
+     * There are 3 stages of node deletion;
+     * "logical deletion", "unlinking", and "gc-unlinking".
+     *
+     * 1. "logical deletion" by CASing item to null atomically removes
+     * the element from the collection, and makes the containing node
+     * eligible for unlinking.
+     *
+     * 2. "unlinking" makes a deleted node unreachable from active
+     * nodes, and thus eventually reclaimable by GC.  Unlinked nodes
+     * may remain reachable indefinitely from an iterator.
+     *
+     * Physical node unlinking is merely an optimization (albeit a
+     * critical one), and so can be performed at our convenience.  At
+     * any time, the set of live nodes maintained by prev and next
+     * links are identical, that is, the live nodes found via next
+     * links from the first node is equal to the elements found via
+     * prev links from the last node.  However, this is not true for
+     * nodes that have already been logically deleted - such nodes may
+     * be reachable in one direction only.
+     *
+     * 3. "gc-unlinking" takes unlinking further by making active
+     * nodes unreachable from deleted nodes, making it easier for the
+     * GC to reclaim future deleted nodes.  This step makes the data
+     * structure "gc-robust", as first described in detail by Boehm
+     * (http://portal.acm.org/citation.cfm?doid=503272.503282).
+     *
+     * GC-unlinked nodes may remain reachable indefinitely from an
+     * iterator, but unlike unlinked nodes, are never reachable from
+     * head or tail.
+     *
+     * Making the data structure GC-robust will eliminate the risk of
+     * unbounded memory retention with conservative GCs and is likely
+     * to improve performance with generational GCs.
+     *
+     * When a node is dequeued at either end, e.g. via poll(), we would
+     * like to break any references from the node to active nodes.  We
+     * develop further the use of self-links that was very effective in
+     * other concurrent collection classes.  The idea is to replace
+     * prev and next pointers with special values that are interpreted
+     * to mean off-the-list-at-one-end.  These are approximations, but
+     * good enough to preserve the properties we want in our
+     * traversals, e.g. we guarantee that a traversal will never visit
+     * the same element twice, but we don't guarantee whether a
+     * traversal that runs out of elements will be able to see more
+     * elements later after enqueues at that end.  Doing gc-unlinking
+     * safely is particularly tricky, since any node can be in use
+     * indefinitely (for example by an iterator).  We must ensure that
+     * the nodes pointed at by head/tail never get gc-unlinked, since
+     * head/tail are needed to get "back on track" by other nodes that
+     * are gc-unlinked.  gc-unlinking accounts for much of the
+     * implementation complexity.
+     *
+     * Since neither unlinking nor gc-unlinking are necessary for
+     * correctness, there are many implementation choices regarding
+     * frequency (eagerness) of these operations.  Since volatile
+     * reads are likely to be much cheaper than CASes, saving CASes by
+     * unlinking multiple adjacent nodes at a time may be a win.
+     * gc-unlinking can be performed rarely and still be effective,
+     * since it is most important that long chains of deleted nodes
+     * are occasionally broken.
+     *
+     * The actual representation we use is that p.next == p means to
+     * goto the first node (which in turn is reached by following prev
+     * pointers from head), and p.next == null && p.prev == p means
+     * that the iteration is at an end and that p is a (static final)
+     * dummy node, NEXT_TERMINATOR, and not the last active node.
+     * Finishing the iteration when encountering such a TERMINATOR is
+     * good enough for read-only traversals, so such traversals can use
+     * p.next == null as the termination condition.  When we need to
+     * find the last (active) node, for enqueueing a new node, we need
+     * to check whether we have reached a TERMINATOR node; if so,
+     * restart traversal from tail.
+     *
+     * The implementation is completely directionally symmetrical,
+     * except that most public methods that iterate through the list
+     * follow next pointers ("forward" direction).
+     *
+     * We believe (without full proof) that all single-element deque
+     * operations (e.g., addFirst, peekLast, pollLast) are linearizable
+     * (see Herlihy and Shavit's book).  However, some combinations of
+     * operations are known not to be linearizable.  In particular,
+     * when an addFirst(A) is racing with pollFirst() removing B, it is
+     * possible for an observer iterating over the elements to observe
+     * A B C and subsequently observe A C, even though no interior
+     * removes are ever performed.  Nevertheless, iterators behave
+     * reasonably, providing the "weakly consistent" guarantees.
+     *
+     * Empirically, microbenchmarks suggest that this class adds about
+     * 40% overhead relative to ConcurrentLinkedQueue, which feels as
+     * good as we can hope for.
+     */
+
+    private static final long serialVersionUID = 876323262645176354L;
+
+    /**
+     * A node from which the first node on list (that is, the unique node p
+     * with p.prev == null && p.next != p) can be reached in O(1) time.
+     * Invariants:
+     * - the first node is always O(1) reachable from head via prev links
+     * - all live nodes are reachable from the first node via succ()
+     * - head != null
+     * - (tmp = head).next != tmp || tmp != head
+     * - head is never gc-unlinked (but may be unlinked)
+     * Non-invariants:
+     * - head.item may or may not be null
+     * - head may not be reachable from the first or last node, or from tail
+     */
+    private transient volatile Node<E> head;
+
+    /**
+     * A node from which the last node on list (that is, the unique node p
+     * with p.next == null && p.prev != p) can be reached in O(1) time.
+     * Invariants:
+     * - the last node is always O(1) reachable from tail via next links
+     * - all live nodes are reachable from the last node via pred()
+     * - tail != null
+     * - tail is never gc-unlinked (but may be unlinked)
+     * Non-invariants:
+     * - tail.item may or may not be null
+     * - tail may not be reachable from the first or last node, or from head
+     */
+    private transient volatile Node<E> tail;
+
+    private static final Node<Object> PREV_TERMINATOR, NEXT_TERMINATOR;
+
+    @SuppressWarnings("unchecked")
+    Node<E> prevTerminator() {
+        return (Node<E>) PREV_TERMINATOR;
+    }
+
+    @SuppressWarnings("unchecked")
+    Node<E> nextTerminator() {
+        return (Node<E>) NEXT_TERMINATOR;
+    }
+
+    static final class Node<E> {
+        volatile Node<E> prev;
+        volatile E item;
+        volatile Node<E> next;
+
+        Node() {  // default constructor for NEXT_TERMINATOR, PREV_TERMINATOR
+        }
+
+        /**
+         * Constructs a new node.  Uses relaxed write because item can
+         * only be seen after publication via casNext or casPrev.
+         */
+        Node(E item) {
+            UNSAFE.putObject(this, itemOffset, item);
+        }
+
+        boolean casItem(E cmp, E val) {
+            return UNSAFE.compareAndSwapObject(this, itemOffset, cmp, val);
+        }
+
+        void lazySetNext(Node<E> val) {
+            UNSAFE.putOrderedObject(this, nextOffset, val);
+        }
+
+        boolean casNext(Node<E> cmp, Node<E> val) {
+            return UNSAFE.compareAndSwapObject(this, nextOffset, cmp, val);
+        }
+
+        void lazySetPrev(Node<E> val) {
+            UNSAFE.putOrderedObject(this, prevOffset, val);
+        }
+
+        boolean casPrev(Node<E> cmp, Node<E> val) {
+            return UNSAFE.compareAndSwapObject(this, prevOffset, cmp, val);
+        }
+
+        // Unsafe mechanics
+
+        private static final sun.misc.Unsafe UNSAFE;
+        private static final long prevOffset;
+        private static final long itemOffset;
+        private static final long nextOffset;
+
+        static {
+            try {
+                UNSAFE = getUnsafe();
+                Class<?> k = Node.class;
+                prevOffset = UNSAFE.objectFieldOffset
+                    (k.getDeclaredField("prev"));
+                itemOffset = UNSAFE.objectFieldOffset
+                    (k.getDeclaredField("item"));
+                nextOffset = UNSAFE.objectFieldOffset
+                    (k.getDeclaredField("next"));
+            } catch (Exception e) {
+                throw new Error(e);
+            }
+        }
+    }
+
+    /**
+     * Links e as first element.
+     */
+    private void linkFirst(E e) {
+        checkNotNull(e);
+        final Node<E> newNode = new Node<E>(e);
+
+        restartFromHead:
+        for (;;)
+            for (Node<E> h = head, p = h, q;;) {
+                if ((q = p.prev) != null &&
+                    (q = (p = q).prev) != null)
+                    // Check for head updates every other hop.
+                    // If p == q, we are sure to follow head instead.
+                    p = (h != (h = head)) ? h : q;
+                else if (p.next == p) // PREV_TERMINATOR
+                    continue restartFromHead;
+                else {
+                    // p is first node
+                    newNode.lazySetNext(p); // CAS piggyback
+                    if (p.casPrev(null, newNode)) {
+                        // Successful CAS is the linearization point
+                        // for e to become an element of this deque,
+                        // and for newNode to become "live".
+                        if (p != h) // hop two nodes at a time
+                            casHead(h, newNode);  // Failure is OK.
+                        return;
+                    }
+                    // Lost CAS race to another thread; re-read prev
+                }
+            }
+    }
+
+    /**
+     * Links e as last element.
+     */
+    private void linkLast(E e) {
+        checkNotNull(e);
+        final Node<E> newNode = new Node<E>(e);
+
+        restartFromTail:
+        for (;;)
+            for (Node<E> t = tail, p = t, q;;) {
+                if ((q = p.next) != null &&
+                    (q = (p = q).next) != null)
+                    // Check for tail updates every other hop.
+                    // If p == q, we are sure to follow tail instead.
+                    p = (t != (t = tail)) ? t : q;
+                else if (p.prev == p) // NEXT_TERMINATOR
+                    continue restartFromTail;
+                else {
+                    // p is last node
+                    newNode.lazySetPrev(p); // CAS piggyback
+                    if (p.casNext(null, newNode)) {
+                        // Successful CAS is the linearization point
+                        // for e to become an element of this deque,
+                        // and for newNode to become "live".
+                        if (p != t) // hop two nodes at a time
+                            casTail(t, newNode);  // Failure is OK.
+                        return;
+                    }
+                    // Lost CAS race to another thread; re-read next
+                }
+            }
+    }
+
+    private static final int HOPS = 2;
+
+    /**
+     * Unlinks non-null node x.
+     */
+    void unlink(Node<E> x) {
+        // assert x != null;
+        // assert x.item == null;
+        // assert x != PREV_TERMINATOR;
+        // assert x != NEXT_TERMINATOR;
+
+        final Node<E> prev = x.prev;
+        final Node<E> next = x.next;
+        if (prev == null) {
+            unlinkFirst(x, next);
+        } else if (next == null) {
+            unlinkLast(x, prev);
+        } else {
+            // Unlink interior node.
+            //
+            // This is the common case, since a series of polls at the
+            // same end will be "interior" removes, except perhaps for
+            // the first one, since end nodes cannot be unlinked.
+            //
+            // At any time, all active nodes are mutually reachable by
+            // following a sequence of either next or prev pointers.
+            //
+            // Our strategy is to find the unique active predecessor
+            // and successor of x.  Try to fix up their links so that
+            // they point to each other, leaving x unreachable from
+            // active nodes.  If successful, and if x has no live
+            // predecessor/successor, we additionally try to gc-unlink,
+            // leaving active nodes unreachable from x, by rechecking
+            // that the status of predecessor and successor are
+            // unchanged and ensuring that x is not reachable from
+            // tail/head, before setting x's prev/next links to their
+            // logical approximate replacements, self/TERMINATOR.
+            Node<E> activePred, activeSucc;
+            boolean isFirst, isLast;
+            int hops = 1;
+
+            // Find active predecessor
+            for (Node<E> p = prev; ; ++hops) {
+                if (p.item != null) {
+                    activePred = p;
+                    isFirst = false;
+                    break;
+                }
+                Node<E> q = p.prev;
+                if (q == null) {
+                    if (p.next == p)
+                        return;
+                    activePred = p;
+                    isFirst = true;
+                    break;
+                }
+                else if (p == q)
+                    return;
+                else
+                    p = q;
+            }
+
+            // Find active successor
+            for (Node<E> p = next; ; ++hops) {
+                if (p.item != null) {
+                    activeSucc = p;
+                    isLast = false;
+                    break;
+                }
+                Node<E> q = p.next;
+                if (q == null) {
+                    if (p.prev == p)
+                        return;
+                    activeSucc = p;
+                    isLast = true;
+                    break;
+                }
+                else if (p == q)
+                    return;
+                else
+                    p = q;
+            }
+
+            // TODO: better HOP heuristics
+            if (hops < HOPS
+                // always squeeze out interior deleted nodes
+                && (isFirst | isLast))
+                return;
+
+            // Squeeze out deleted nodes between activePred and
+            // activeSucc, including x.
+            skipDeletedSuccessors(activePred);
+            skipDeletedPredecessors(activeSucc);
+
+            // Try to gc-unlink, if possible
+            if ((isFirst | isLast) &&
+
+                // Recheck expected state of predecessor and successor
+                (activePred.next == activeSucc) &&
+                (activeSucc.prev == activePred) &&
+                (isFirst ? activePred.prev == null : activePred.item != null) &&
+                (isLast  ? activeSucc.next == null : activeSucc.item != null)) {
+
+                updateHead(); // Ensure x is not reachable from head
+                updateTail(); // Ensure x is not reachable from tail
+
+                // Finally, actually gc-unlink
+                x.lazySetPrev(isFirst ? prevTerminator() : x);
+                x.lazySetNext(isLast  ? nextTerminator() : x);
+            }
+        }
+    }
+
+    /**
+     * Unlinks non-null first node.
+     */
+    private void unlinkFirst(Node<E> first, Node<E> next) {
+        // assert first != null;
+        // assert next != null;
+        // assert first.item == null;
+        for (Node<E> o = null, p = next, q;;) {
+            if (p.item != null || (q = p.next) == null) {
+                if (o != null && p.prev != p && first.casNext(next, p)) {
+                    skipDeletedPredecessors(p);
+                    if (first.prev == null &&
+                        (p.next == null || p.item != null) &&
+                        p.prev == first) {
+
+                        updateHead(); // Ensure o is not reachable from head
+                        updateTail(); // Ensure o is not reachable from tail
+
+                        // Finally, actually gc-unlink
+                        o.lazySetNext(o);
+                        o.lazySetPrev(prevTerminator());
+                    }
+                }
+                return;
+            }
+            else if (p == q)
+                return;
+            else {
+                o = p;
+                p = q;
+            }
+        }
+    }
+
+    /**
+     * Unlinks non-null last node.
+     */
+    private void unlinkLast(Node<E> last, Node<E> prev) {
+        // assert last != null;
+        // assert prev != null;
+        // assert last.item == null;
+        for (Node<E> o = null, p = prev, q;;) {
+            if (p.item != null || (q = p.prev) == null) {
+                if (o != null && p.next != p && last.casPrev(prev, p)) {
+                    skipDeletedSuccessors(p);
+                    if (last.next == null &&
+                        (p.prev == null || p.item != null) &&
+                        p.next == last) {
+
+                        updateHead(); // Ensure o is not reachable from head
+                        updateTail(); // Ensure o is not reachable from tail
+
+                        // Finally, actually gc-unlink
+                        o.lazySetPrev(o);
+                        o.lazySetNext(nextTerminator());
+                    }
+                }
+                return;
+            }
+            else if (p == q)
+                return;
+            else {
+                o = p;
+                p = q;
+            }
+        }
+    }
+
+    /**
+     * Guarantees that any node which was unlinked before a call to
+     * this method will be unreachable from head after it returns.
+     * Does not guarantee to eliminate slack, only that head will
+     * point to a node that was active while this method was running.
+     */
+    private final void updateHead() {
+        // Either head already points to an active node, or we keep
+        // trying to cas it to the first node until it does.
+        Node<E> h, p, q;
+        restartFromHead:
+        while ((h = head).item == null && (p = h.prev) != null) {
+            for (;;) {
+                if ((q = p.prev) == null ||
+                    (q = (p = q).prev) == null) {
+                    // It is possible that p is PREV_TERMINATOR,
+                    // but if so, the CAS is guaranteed to fail.
+                    if (casHead(h, p))
+                        return;
+                    else
+                        continue restartFromHead;
+                }
+                else if (h != head)
+                    continue restartFromHead;
+                else
+                    p = q;
+            }
+        }
+    }
+
+    /**
+     * Guarantees that any node which was unlinked before a call to
+     * this method will be unreachable from tail after it returns.
+     * Does not guarantee to eliminate slack, only that tail will
+     * point to a node that was active while this method was running.
+     */
+    private final void updateTail() {
+        // Either tail already points to an active node, or we keep
+        // trying to cas it to the last node until it does.
+        Node<E> t, p, q;
+        restartFromTail:
+        while ((t = tail).item == null && (p = t.next) != null) {
+            for (;;) {
+                if ((q = p.next) == null ||
+                    (q = (p = q).next) == null) {
+                    // It is possible that p is NEXT_TERMINATOR,
+                    // but if so, the CAS is guaranteed to fail.
+                    if (casTail(t, p))
+                        return;
+                    else
+                        continue restartFromTail;
+                }
+                else if (t != tail)
+                    continue restartFromTail;
+                else
+                    p = q;
+            }
+        }
+    }
+
+    private void skipDeletedPredecessors(Node<E> x) {
+        whileActive:
+        do {
+            Node<E> prev = x.prev;
+            // assert prev != null;
+            // assert x != NEXT_TERMINATOR;
+            // assert x != PREV_TERMINATOR;
+            Node<E> p = prev;
+            findActive:
+            for (;;) {
+                if (p.item != null)
+                    break findActive;
+                Node<E> q = p.prev;
+                if (q == null) {
+                    if (p.next == p)
+                        continue whileActive;
+                    break findActive;
+                }
+                else if (p == q)
+                    continue whileActive;
+                else
+                    p = q;
+            }
+
+            // found active CAS target
+            if (prev == p || x.casPrev(prev, p))
+                return;
+
+        } while (x.item != null || x.next == null);
+    }
+
+    private void skipDeletedSuccessors(Node<E> x) {
+        whileActive:
+        do {
+            Node<E> next = x.next;
+            // assert next != null;
+            // assert x != NEXT_TERMINATOR;
+            // assert x != PREV_TERMINATOR;
+            Node<E> p = next;
+            findActive:
+            for (;;) {
+                if (p.item != null)
+                    break findActive;
+                Node<E> q = p.next;
+                if (q == null) {
+                    if (p.prev == p)
+                        continue whileActive;
+                    break findActive;
+                }
+                else if (p == q)
+                    continue whileActive;
+                else
+                    p = q;
+            }
+
+            // found active CAS target
+            if (next == p || x.casNext(next, p))
+                return;
+
+        } while (x.item != null || x.prev == null);
+    }
+
+    /**
+     * Returns the successor of p, or the first node if p.next has been
+     * linked to self, which will only be true if traversing with a
+     * stale pointer that is now off the list.
+     */
+    final Node<E> succ(Node<E> p) {
+        // TODO: should we skip deleted nodes here?
+        Node<E> q = p.next;
+        return (p == q) ? first() : q;
+    }
+
+    /**
+     * Returns the predecessor of p, or the last node if p.prev has been
+     * linked to self, which will only be true if traversing with a
+     * stale pointer that is now off the list.
+     */
+    final Node<E> pred(Node<E> p) {
+        Node<E> q = p.prev;
+        return (p == q) ? last() : q;
+    }
+
+    /**
+     * Returns the first node, the unique node p for which:
+     *     p.prev == null && p.next != p
+     * The returned node may or may not be logically deleted.
+     * Guarantees that head is set to the returned node.
+     */
+    Node<E> first() {
+        restartFromHead:
+        for (;;)
+            for (Node<E> h = head, p = h, q;;) {
+                if ((q = p.prev) != null &&
+                    (q = (p = q).prev) != null)
+                    // Check for head updates every other hop.
+                    // If p == q, we are sure to follow head instead.
+                    p = (h != (h = head)) ? h : q;
+                else if (p == h
+                         // It is possible that p is PREV_TERMINATOR,
+                         // but if so, the CAS is guaranteed to fail.
+                         || casHead(h, p))
+                    return p;
+                else
+                    continue restartFromHead;
+            }
+    }
+
+    /**
+     * Returns the last node, the unique node p for which:
+     *     p.next == null && p.prev != p
+     * The returned node may or may not be logically deleted.
+     * Guarantees that tail is set to the returned node.
+     */
+    Node<E> last() {
+        restartFromTail:
+        for (;;)
+            for (Node<E> t = tail, p = t, q;;) {
+                if ((q = p.next) != null &&
+                    (q = (p = q).next) != null)
+                    // Check for tail updates every other hop.
+                    // If p == q, we are sure to follow tail instead.
+                    p = (t != (t = tail)) ? t : q;
+                else if (p == t
+                         // It is possible that p is NEXT_TERMINATOR,
+                         // but if so, the CAS is guaranteed to fail.
+                         || casTail(t, p))
+                    return p;
+                else
+                    continue restartFromTail;
+            }
+    }
+
+    // Minor convenience utilities
+
+    /**
+     * Throws NullPointerException if argument is null.
+     *
+     * @param v the element
+     */
+    private static void checkNotNull(Object v) {
+        if (v == null)
+            throw new NullPointerException();
+    }
+
+    /**
+     * Returns element unless it is null, in which case throws
+     * NoSuchElementException.
+     *
+     * @param v the element
+     * @return the element
+     */
+    private E screenNullResult(E v) {
+        if (v == null)
+            throw new NoSuchElementException();
+        return v;
+    }
+
+    /**
+     * Creates an array list and fills it with elements of this list.
+     * Used by toArray.
+     *
+     * @return the arrayList
+     */
+    private ArrayList<E> toArrayList() {
+        ArrayList<E> list = new ArrayList<E>();
+        for (Node<E> p = first(); p != null; p = succ(p)) {
+            E item = p.item;
+            if (item != null)
+                list.add(item);
+        }
+        return list;
+    }
+
+    /**
+     * Constructs an empty deque.
+     */
+    public ConcurrentLinkedDeque() {
+        head = tail = new Node<E>(null);
+    }
+
+    /**
+     * Constructs a deque initially containing the elements of
+     * the given collection, added in traversal order of the
+     * collection's iterator.
+     *
+     * @param c the collection of elements to initially contain
+     * @throws NullPointerException if the specified collection or any
+     *         of its elements are null
+     */
+    public ConcurrentLinkedDeque(Collection<? extends E> c) {
+        // Copy c into a private chain of Nodes
+        Node<E> h = null, t = null;
+        for (E e : c) {
+            checkNotNull(e);
+            Node<E> newNode = new Node<E>(e);
+            if (h == null)
+                h = t = newNode;
+            else {
+                t.lazySetNext(newNode);
+                newNode.lazySetPrev(t);
+                t = newNode;
+            }
+        }
+        initHeadTail(h, t);
+    }
+
+    /**
+     * Initializes head and tail, ensuring invariants hold.
+     */
+    private void initHeadTail(Node<E> h, Node<E> t) {
+        if (h == t) {
+            if (h == null)
+                h = t = new Node<E>(null);
+            else {
+                // Avoid edge case of a single Node with non-null item.
+                Node<E> newNode = new Node<E>(null);
+                t.lazySetNext(newNode);
+                newNode.lazySetPrev(t);
+                t = newNode;
+            }
+        }
+        head = h;
+        tail = t;
+    }
+
+    /**
+     * Inserts the specified element at the front of this deque.
+     * As the deque is unbounded, this method will never throw
+     * {@link IllegalStateException}.
+     *
+     * @throws NullPointerException if the specified element is null
+     */
+    public void addFirst(E e) {
+        linkFirst(e);
+    }
+
+    /**
+     * Inserts the specified element at the end of this deque.
+     * As the deque is unbounded, this method will never throw
+     * {@link IllegalStateException}.
+     *
+     * <p>This method is equivalent to {@link #add}.
+     *
+     * @throws NullPointerException if the specified element is null
+     */
+    public void addLast(E e) {
+        linkLast(e);
+    }
+
+    /**
+     * Inserts the specified element at the front of this deque.
+     * As the deque is unbounded, this method will never return {@code false}.
+     *
+     * @return {@code true} (as specified by {@link Deque#offerFirst})
+     * @throws NullPointerException if the specified element is null
+     */
+    public boolean offerFirst(E e) {
+        linkFirst(e);
+        return true;
+    }
+
+    /**
+     * Inserts the specified element at the end of this deque.
+     * As the deque is unbounded, this method will never return {@code false}.
+     *
+     * <p>This method is equivalent to {@link #add}.
+     *
+     * @return {@code true} (as specified by {@link Deque#offerLast})
+     * @throws NullPointerException if the specified element is null
+     */
+    public boolean offerLast(E e) {
+        linkLast(e);
+        return true;
+    }
+
+    public E peekFirst() {
+        for (Node<E> p = first(); p != null; p = succ(p)) {
+            E item = p.item;
+            if (item != null)
+                return item;
+        }
+        return null;
+    }
+
+    public E peekLast() {
+        for (Node<E> p = last(); p != null; p = pred(p)) {
+            E item = p.item;
+            if (item != null)
+                return item;
+        }
+        return null;
+    }
+
+    /**
+     * @throws NoSuchElementException {@inheritDoc}
+     */
+    public E getFirst() {
+        return screenNullResult(peekFirst());
+    }
+
+    /**
+     * @throws NoSuchElementException {@inheritDoc}
+     */
+    public E getLast() {
+        return screenNullResult(peekLast());
+    }
+
+    public E pollFirst() {
+        for (Node<E> p = first(); p != null; p = succ(p)) {
+            E item = p.item;
+            if (item != null && p.casItem(item, null)) {
+                unlink(p);
+                return item;
+            }
+        }
+        return null;
+    }
+
+    public E pollLast() {
+        for (Node<E> p = last(); p != null; p = pred(p)) {
+            E item = p.item;
+            if (item != null && p.casItem(item, null)) {
+                unlink(p);
+                return item;
+            }
+        }
+        return null;
+    }
+
+    /**
+     * @throws NoSuchElementException {@inheritDoc}
+     */
+    public E removeFirst() {
+        return screenNullResult(pollFirst());
+    }
+
+    /**
+     * @throws NoSuchElementException {@inheritDoc}
+     */
+    public E removeLast() {
+        return screenNullResult(pollLast());
+    }
+
+    // *** Queue and stack methods ***
+
+    /**
+     * Inserts the specified element at the tail of this deque.
+     * As the deque is unbounded, this method will never return {@code false}.
+     *
+     * @return {@code true} (as specified by {@link Queue#offer})
+     * @throws NullPointerException if the specified element is null
+     */
+    public boolean offer(E e) {
+        return offerLast(e);
+    }
+
+    /**
+     * Inserts the specified element at the tail of this deque.
+     * As the deque is unbounded, this method will never throw
+     * {@link IllegalStateException} or return {@code false}.
+     *
+     * @return {@code true} (as specified by {@link Collection#add})
+     * @throws NullPointerException if the specified element is null
+     */
+    public boolean add(E e) {
+        return offerLast(e);
+    }
+
+    public E poll()           { return pollFirst(); }
+    public E remove()         { return removeFirst(); }
+    public E peek()           { return peekFirst(); }
+    public E element()        { return getFirst(); }
+    public void push(E e)     { addFirst(e); }
+    public E pop()            { return removeFirst(); }
+
+    /**
+     * Removes the first element {@code e} such that
+     * {@code o.equals(e)}, if such an element exists in this deque.
+     * If the deque does not contain the element, it is unchanged.
+     *
+     * @param o element to be removed from this deque, if present
+     * @return {@code true} if the deque contained the specified element
+     * @throws NullPointerException if the specified element is null
+     */
+    public boolean removeFirstOccurrence(Object o) {
+        checkNotNull(o);
+        for (Node<E> p = first(); p != null; p = succ(p)) {
+            E item = p.item;
+            if (item != null && o.equals(item) && p.casItem(item, null)) {
+                unlink(p);
+                return true;
+            }
+        }
+        return false;
+    }
+
+    /**
+     * Removes the last element {@code e} such that
+     * {@code o.equals(e)}, if such an element exists in this deque.
+     * If the deque does not contain the element, it is unchanged.
+     *
+     * @param o element to be removed from this deque, if present
+     * @return {@code true} if the deque contained the specified element
+     * @throws NullPointerException if the specified element is null
+     */
+    public boolean removeLastOccurrence(Object o) {
+        checkNotNull(o);
+        for (Node<E> p = last(); p != null; p = pred(p)) {
+            E item = p.item;
+            if (item != null && o.equals(item) && p.casItem(item, null)) {
+                unlink(p);
+                return true;
+            }
+        }
+        return false;
+    }
+
+    /**
+     * Returns {@code true} if this deque contains at least one
+     * element {@code e} such that {@code o.equals(e)}.
+     *
+     * @param o element whose presence in this deque is to be tested
+     * @return {@code true} if this deque contains the specified element
+     */
+    public boolean contains(Object o) {
+        if (o == null) return false;
+        for (Node<E> p = first(); p != null; p = succ(p)) {
+            E item = p.item;
+            if (item != null && o.equals(item))
+                return true;
+        }
+        return false;
+    }
+
+    /**
+     * Returns {@code true} if this collection contains no elements.
+     *
+     * @return {@code true} if this collection contains no elements
+     */
+    public boolean isEmpty() {
+        return peekFirst() == null;
+    }
+
+    /**
+     * Returns the number of elements in this deque.  If this deque
+     * contains more than {@code Integer.MAX_VALUE} elements, it
+     * returns {@code Integer.MAX_VALUE}.
+     *
+     * <p>Beware that, unlike in most collections, this method is
+     * <em>NOT</em> a constant-time operation. Because of the
+     * asynchronous nature of these deques, determining the current
+     * number of elements requires traversing them all to count them.
+     * Additionally, it is possible for the size to change during
+     * execution of this method, in which case the returned result
+     * will be inaccurate. Thus, this method is typically not very
+     * useful in concurrent applications.
+     *
+     * @return the number of elements in this deque
+     */
+    public int size() {
+        int count = 0;
+        for (Node<E> p = first(); p != null; p = succ(p))
+            if (p.item != null)
+                // Collection.size() spec says to max out
+                if (++count == Integer.MAX_VALUE)
+                    break;
+        return count;
+    }
+
+    /**
+     * Removes the first element {@code e} such that
+     * {@code o.equals(e)}, if such an element exists in this deque.
+     * If the deque does not contain the element, it is unchanged.
+     *
+     * @param o element to be removed from this deque, if present
+     * @return {@code true} if the deque contained the specified element
+     * @throws NullPointerException if the specified element is null
+     */
+    public boolean remove(Object o) {
+        return removeFirstOccurrence(o);
+    }
+
+    /**
+     * Appends all of the elements in the specified collection to the end of
+     * this deque, in the order that they are returned by the specified
+     * collection's iterator.  Attempts to {@code addAll} of a deque to
+     * itself result in {@code IllegalArgumentException}.
+     *
+     * @param c the elements to be inserted into this deque
+     * @return {@code true} if this deque changed as a result of the call
+     * @throws NullPointerException if the specified collection or any
+     *         of its elements are null
+     * @throws IllegalArgumentException if the collection is this deque
+     */
+    public boolean addAll(Collection<? extends E> c) {
+        if (c == this)
+            // As historically specified in AbstractQueue#addAll
+            throw new IllegalArgumentException();
+
+        // Copy c into a private chain of Nodes
+        Node<E> beginningOfTheEnd = null, last = null;
+        for (E e : c) {
+            checkNotNull(e);
+            Node<E> newNode = new Node<E>(e);
+            if (beginningOfTheEnd == null)
+                beginningOfTheEnd = last = newNode;
+            else {
+                last.lazySetNext(newNode);
+                newNode.lazySetPrev(last);
+                last = newNode;
+            }
+        }
+        if (beginningOfTheEnd == null)
+            return false;
+
+        // Atomically append the chain at the tail of this collection
+        restartFromTail:
+        for (;;)
+            for (Node<E> t = tail, p = t, q;;) {
+                if ((q = p.next) != null &&
+                    (q = (p = q).next) != null)
+                    // Check for tail updates every other hop.
+                    // If p == q, we are sure to follow tail instead.
+                    p = (t != (t = tail)) ? t : q;
+                else if (p.prev == p) // NEXT_TERMINATOR
+                    continue restartFromTail;
+                else {
+                    // p is last node
+                    beginningOfTheEnd.lazySetPrev(p); // CAS piggyback
+                    if (p.casNext(null, beginningOfTheEnd)) {
+                        // Successful CAS is the linearization point
+                        // for all elements to be added to this deque.
+                        if (!casTail(t, last)) {
+                            // Try a little harder to update tail,
+                            // since we may be adding many elements.
+                            t = tail;
+                            if (last.next == null)
+                                casTail(t, last);
+                        }
+                        return true;
+                    }
+                    // Lost CAS race to another thread; re-read next
+                }
+            }
+    }
+
+    /**
+     * Removes all of the elements from this deque.
+     */
+    public void clear() {
+        while (pollFirst() != null)
+            ;
+    }
+
+    /**
+     * Returns an array containing all of the elements in this deque, in
+     * proper sequence (from first to last element).
+     *
+     * <p>The returned array will be "safe" in that no references to it are
+     * maintained by this deque.  (In other words, this method must allocate
+     * a new array).  The caller is thus free to modify the returned array.
+     *
+     * <p>This method acts as bridge between array-based and collection-based
+     * APIs.
+     *
+     * @return an array containing all of the elements in this deque
+     */
+    public Object[] toArray() {
+        return toArrayList().toArray();
+    }
+
+    /**
+     * Returns an array containing all of the elements in this deque,
+     * in proper sequence (from first to last element); the runtime
+     * type of the returned array is that of the specified array.  If
+     * the deque fits in the specified array, it is returned therein.
+     * Otherwise, a new array is allocated with the runtime type of
+     * the specified array and the size of this deque.
+     *
+     * <p>If this deque fits in the specified array with room to spare
+     * (i.e., the array has more elements than this deque), the element in
+     * the array immediately following the end of the deque is set to
+     * {@code null}.
+     *
+     * <p>Like the {@link #toArray()} method, this method acts as
+     * bridge between array-based and collection-based APIs.  Further,
+     * this method allows precise control over the runtime type of the
+     * output array, and may, under certain circumstances, be used to
+     * save allocation costs.
+     *
+     * <p>Suppose {@code x} is a deque known to contain only strings.
+     * The following code can be used to dump the deque into a newly
+     * allocated array of {@code String}:
+     *
+     *  <pre> {@code String[] y = x.toArray(new String[0]);}</pre>
+     *
+     * Note that {@code toArray(new Object[0])} is identical in function to
+     * {@code toArray()}.
+     *
+     * @param a the array into which the elements of the deque are to
+     *          be stored, if it is big enough; otherwise, a new array of the
+     *          same runtime type is allocated for this purpose
+     * @return an array containing all of the elements in this deque
+     * @throws ArrayStoreException if the runtime type of the specified array
+     *         is not a supertype of the runtime type of every element in
+     *         this deque
+     * @throws NullPointerException if the specified array is null
+     */
+    public <T> T[] toArray(T[] a) {
+        return toArrayList().toArray(a);
+    }
+
+    /**
+     * Returns an iterator over the elements in this deque in proper sequence.
+     * The elements will be returned in order from first (head) to last (tail).
+     *
+     * <p>The returned iterator is a "weakly consistent" iterator that
+     * will never throw {@link java.util.ConcurrentModificationException
+     * ConcurrentModificationException}, and guarantees to traverse
+     * elements as they existed upon construction of the iterator, and
+     * may (but is not guaranteed to) reflect any modifications
+     * subsequent to construction.
+     *
+     * @return an iterator over the elements in this deque in proper sequence
+     */
+    public Iterator<E> iterator() {
+        return new Itr();
+    }
+
+    /**
+     * Returns an iterator over the elements in this deque in reverse
+     * sequential order.  The elements will be returned in order from
+     * last (tail) to first (head).
+     *
+     * <p>The returned iterator is a "weakly consistent" iterator that
+     * will never throw {@link java.util.ConcurrentModificationException
+     * ConcurrentModificationException}, and guarantees to traverse
+     * elements as they existed upon construction of the iterator, and
+     * may (but is not guaranteed to) reflect any modifications
+     * subsequent to construction.
+     *
+     * @return an iterator over the elements in this deque in reverse order
+     */
+    public Iterator<E> descendingIterator() {
+        return new DescendingItr();
+    }
+
+    private abstract class AbstractItr implements Iterator<E> {
+        /**
+         * Next node to return item for.
+         */
+        private Node<E> nextNode;
+
+        /**
+         * nextItem holds on to item fields because once we claim
+         * that an element exists in hasNext(), we must return it in
+         * the following next() call even if it was in the process of
+         * being removed when hasNext() was called.
+         */
+        private E nextItem;
+
+        /**
+         * Node returned by most recent call to next. Needed by remove.
+         * Reset to null if this element is deleted by a call to remove.
+         */
+        private Node<E> lastRet;
+
+        abstract Node<E> startNode();
+        abstract Node<E> nextNode(Node<E> p);
+
+        AbstractItr() {
+            advance();
+        }
+
+        /**
+         * Sets nextNode and nextItem to next valid node, or to null
+         * if no such.
+         */
+        private void advance() {
+            lastRet = nextNode;
+
+            Node<E> p = (nextNode == null) ? startNode() : nextNode(nextNode);
+            for (;; p = nextNode(p)) {
+                if (p == null) {
+                    // p might be active end or TERMINATOR node; both are OK
+                    nextNode = null;
+                    nextItem = null;
+                    break;
+                }
+                E item = p.item;
+                if (item != null) {
+                    nextNode = p;
+                    nextItem = item;
+                    break;
+                }
+            }
+        }
+
+        public boolean hasNext() {
+            return nextItem != null;
+        }
+
+        public E next() {
+            E item = nextItem;
+            if (item == null) throw new NoSuchElementException();
+            advance();
+            return item;
+        }
+
+        public void remove() {
+            Node<E> l = lastRet;
+            if (l == null) throw new IllegalStateException();
+            l.item = null;
+            unlink(l);
+            lastRet = null;
+        }
+    }
+
+    /** Forward iterator */
+    private class Itr extends AbstractItr {
+        Node<E> startNode() { return first(); }
+        Node<E> nextNode(Node<E> p) { return succ(p); }
+    }
+
+    /** Descending iterator */
+    private class DescendingItr extends AbstractItr {
+        Node<E> startNode() { return last(); }
+        Node<E> nextNode(Node<E> p) { return pred(p); }
+    }
+
+    /**
+     * Saves the state to a stream (that is, serializes it).
+     *
+     * @serialData All of the elements (each an {@code E}) in
+     * the proper order, followed by a null
+     * @param s the stream
+     */
+    private void writeObject(java.io.ObjectOutputStream s)
+        throws java.io.IOException {
+
+        // Write out any hidden stuff
+        s.defaultWriteObject();
+
+        // Write out all elements in the proper order.
+        for (Node<E> p = first(); p != null; p = succ(p)) {
+            E item = p.item;
+            if (item != null)
+                s.writeObject(item);
+        }
+
+        // Use trailing null as sentinel
+        s.writeObject(null);
+    }
+
+    /**
+     * Reconstitutes the instance from a stream (that is, deserializes it).
+     * @param s the stream
+     */
+    private void readObject(java.io.ObjectInputStream s)
+        throws java.io.IOException, ClassNotFoundException {
+        s.defaultReadObject();
+
+        // Read in elements until trailing null sentinel found
+        Node<E> h = null, t = null;
+        Object item;
+        while ((item = s.readObject()) != null) {
+            @SuppressWarnings("unchecked")
+            Node<E> newNode = new Node<E>((E) item);
+            if (h == null)
+                h = t = newNode;
+            else {
+                t.lazySetNext(newNode);
+                newNode.lazySetPrev(t);
+                t = newNode;
+            }
+        }
+        initHeadTail(h, t);
+    }
+
+
+    private boolean casHead(Node<E> cmp, Node<E> val) {
+        return UNSAFE.compareAndSwapObject(this, headOffset, cmp, val);
+    }
+
+    private boolean casTail(Node<E> cmp, Node<E> val) {
+        return UNSAFE.compareAndSwapObject(this, tailOffset, cmp, val);
+    }
+
+    // Unsafe mechanics
+
+    private static final sun.misc.Unsafe UNSAFE;
+    private static final long headOffset;
+    private static final long tailOffset;
+    static {
+        PREV_TERMINATOR = new Node<Object>();
+        PREV_TERMINATOR.next = PREV_TERMINATOR;
+        NEXT_TERMINATOR = new Node<Object>();
+        NEXT_TERMINATOR.prev = NEXT_TERMINATOR;
+        try {
+            UNSAFE = getUnsafe();
+            Class<?> k = ConcurrentLinkedDeque.class;
+            headOffset = UNSAFE.objectFieldOffset
+                (k.getDeclaredField("head"));
+            tailOffset = UNSAFE.objectFieldOffset
+                (k.getDeclaredField("tail"));
+        } catch (Exception e) {
+            throw new Error(e);
+        }
+    }
+
+    /**
+     * Returns a sun.misc.Unsafe.  Suitable for use in a 3rd party package.
+     * Replace with a simple call to Unsafe.getUnsafe when integrating
+     * into a jdk.
+     *
+     * @return a sun.misc.Unsafe
+     */
+    static sun.misc.Unsafe getUnsafe() {
+        try {
+            return sun.misc.Unsafe.getUnsafe();
+        } catch (SecurityException se) {
+            try {
+                return java.security.AccessController.doPrivileged
+                    (new java.security
+                     .PrivilegedExceptionAction<sun.misc.Unsafe>() {
+                        public sun.misc.Unsafe run() throws Exception {
+                            java.lang.reflect.Field f = sun.misc
+                                .Unsafe.class.getDeclaredField("theUnsafe");
+                            f.setAccessible(true);
+                            return (sun.misc.Unsafe) f.get(null);
+                        }});
+            } catch (java.security.PrivilegedActionException e) {
+                throw new RuntimeException("Could not initialize intrinsics",
+                                           e.getCause());
+            }
+        }
+    }
+
+}
diff --git a/src/main/java/jsr166y/CountedCompleter.java b/src/main/java/jsr166y/CountedCompleter.java
new file mode 100644
index 00000000000..cbed8a26f09
--- /dev/null
+++ b/src/main/java/jsr166y/CountedCompleter.java
@@ -0,0 +1,480 @@
+/*
+ * Written by Doug Lea with assistance from members of JCP JSR-166
+ * Expert Group and released to the public domain, as explained at
+ * http://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+package jsr166y;
+
+/**
+ * A resultless {@link ForkJoinTask} with a completion action
+ * performed when triggered and there are no remaining pending
+ * actions. Uses of CountedCompleter are similar to those of other
+ * completion based components (such as {@link
+ * java.nio.channels.CompletionHandler}) except that multiple
+ * <em>pending</em> completions may be necessary to trigger the {@link
+ * #onCompletion} action, not just one. Unless initialized otherwise,
+ * the {@link #getPendingCount pending count} starts at zero, but may
+ * be (atomically) changed using methods {@link #setPendingCount},
+ * {@link #addToPendingCount}, and {@link
+ * #compareAndSetPendingCount}. Upon invocation of {@link
+ * #tryComplete}, if the pending action count is nonzero, it is
+ * decremented; otherwise, the completion action is performed, and if
+ * this completer itself has a completer, the process is continued
+ * with its completer.  As is the case with related synchronization
+ * components such as {@link Phaser} and {@link
+ * java.util.concurrent.Semaphore} these methods affect only internal
+ * counts; they do not establish any further internal bookkeeping. In
+ * particular, the identities of pending tasks are not maintained. As
+ * illustrated below, you can create subclasses that do record some or
+ * all pended tasks or their results when needed.
+ *
+ * <p>A concrete CountedCompleter class must define method {@link
+ * #compute}, that should, in almost all use cases, invoke {@code
+ * tryComplete()} once before returning. The class may also optionally
+ * override method {@link #onCompletion} to perform an action upon
+ * normal completion, and method {@link #onExceptionalCompletion} to
+ * perform an action upon any exception.
+ *
+ * <p>A CountedCompleter that does not itself have a completer (i.e.,
+ * one for which {@link #getCompleter} returns {@code null}) can be
+ * used as a regular ForkJoinTask with this added functionality.
+ * However, any completer that in turn has another completer serves
+ * only as an internal helper for other computations, so its own task
+ * status (as reported in methods such as {@link ForkJoinTask#isDone})
+ * is arbitrary; this status changes only upon explicit invocations of
+ * {@link #complete}, {@link ForkJoinTask#cancel}, {@link
+ * ForkJoinTask#completeExceptionally} or upon exceptional completion
+ * of method {@code compute}. Upon any exceptional completion, the
+ * exception may be relayed to a task's completer (and its completer,
+ * and so on), if one exists and it has not otherwise already
+ * completed.
+ *
+ * <p><b>Sample Usages.</b>
+ *
+ * <p><b>Parallel recursive decomposition.</b> CountedCompleters may
+ * be arranged in trees similar to those often used with {@link
+ * RecursiveAction}s, although the constructions involved in setting
+ * them up typically vary. Even though they entail a bit more
+ * bookkeeping, CountedCompleters may be better choices when applying
+ * a possibly time-consuming operation (that cannot be further
+ * subdivided) to each element of an array or collection; especially
+ * when the operation takes a significantly different amount of time
+ * to complete for some elements than others, either because of
+ * intrinsic variation (for example IO) or auxiliary effects such as
+ * garbage collection.  Because CountedCompleters provide their own
+ * continuations, other threads need not block waiting to perform
+ * them.
+ *
+ * <p> For example, here is an initial version of a class that uses
+ * divide-by-two recursive decomposition to divide work into single
+ * pieces (leaf tasks). Even when work is split into individual calls,
+ * tree-based techniques are usually preferable to directly forking
+ * leaf tasks, because they reduce inter-thread communication and
+ * improve load balancing. In the recursive case, the second of each
+ * pair of subtasks to finish triggers completion of its parent
+ * (because no result combination is performed, the default no-op
+ * implementation of method {@code onCompletion} is not overridden). A
+ * static utility method sets up the base task and invokes it:
+ *
+ * <pre> {@code
+ * class MyOperation<E> { void apply(E e) { ... }  }
+ *
+ * class ForEach<E> extends CountedCompleter {
+ *
+ *     public static <E> void forEach(ForkJoinPool pool, E[] array, MyOperation<E> op) {
+ *         pool.invoke(new ForEach<E>(null, array, op, 0, array.length));
+ *     }
+ *
+ *     final E[] array; final MyOperation<E> op; final int lo, hi;
+ *     ForEach(CountedCompleter p, E[] array, MyOperation<E> op, int lo, int hi) {
+ *         super(p);
+ *         this.array = array; this.op = op; this.lo = lo; this.hi = hi;
+ *     }
+ *
+ *     public void compute() { // version 1
+ *         if (hi - lo >= 2) {
+ *             int mid = (lo + hi) >>> 1;
+ *             setPendingCount(2); // must set pending count before fork
+ *             new ForEach(this, array, op, mid, hi).fork(); // right child
+ *             new ForEach(this, array, op, lo, mid).fork(); // left child
+ *         }
+ *         else if (hi > lo)
+ *             op.apply(array[lo]);
+ *         tryComplete();
+ *     }
+ * } }</pre>
+ *
+ * This design can be improved by noticing that in the recursive case,
+ * the task has nothing to do after forking its right task, so can
+ * directly invoke its left task before returning. (This is an analog
+ * of tail recursion removal.)  Also, because the task returns upon
+ * executing its left task (rather than falling through to invoke
+ * tryComplete) the pending count is set to one:
+ *
+ * <pre> {@code
+ * class ForEach<E> ...
+ *     public void compute() { // version 2
+ *         if (hi - lo >= 2) {
+ *             int mid = (lo + hi) >>> 1;
+ *             setPendingCount(1); // only one pending
+ *             new ForEach(this, array, op, mid, hi).fork(); // right child
+ *             new ForEach(this, array, op, lo, mid).compute(); // direct invoke
+ *         }
+ *         else {
+ *             if (hi > lo)
+ *                 op.apply(array[lo]);
+ *             tryComplete();
+ *         }
+ *     }
+ * }</pre>
+ *
+ * As a further improvement, notice that the left task need not even
+ * exist.  Instead of creating a new one, we can iterate using the
+ * original task, and add a pending count for each fork:
+ *
+ * <pre> {@code
+ * class ForEach<E> ...
+ *     public void compute() { // version 3
+ *         int l = lo,  h = hi;
+ *         while (h - l >= 2) {
+ *             int mid = (l + h) >>> 1;
+ *             addToPendingCount(1);
+ *             new ForEach(this, array, op, mid, h).fork(); // right child
+ *             h = mid;
+ *         }
+ *         if (h > l)
+ *             op.apply(array[l]);
+ *         tryComplete();
+ *     }
+ * }</pre>
+ *
+ * Additional improvements of such classes might entail precomputing
+ * pending counts so that they can be established in constructors,
+ * specializing classes for leaf steps, subdividing by say, four,
+ * instead of two per iteration, and using an adaptive threshold
+ * instead of always subdividing down to single elements.
+ *
+ * <p><b>Recording subtasks.</b> CountedCompleter tasks that combine
+ * results of multiple subtasks usually need to access these results
+ * in method {@link #onCompletion}. As illustrated in the following
+ * class (that performs a simplified form of map-reduce where mappings
+ * and reductions are all of type {@code E}), one way to do this in
+ * divide and conquer designs is to have each subtask record its
+ * sibling, so that it can be accessed in method {@code onCompletion}.
+ * For clarity, this class uses explicit left and right subtasks, but
+ * variants of other streamlinings seen in the above example may also
+ * apply.
+ *
+ * <pre> {@code
+ * class MyMapper<E> { E apply(E v) {  ...  } }
+ * class MyReducer<E> { E apply(E x, E y) {  ...  } }
+ * class MapReducer<E> extends CountedCompleter {
+ *     final E[] array; final MyMapper<E> mapper;
+ *     final MyReducer<E> reducer; final int lo, hi;
+ *     MapReducer sibling;
+ *     E result;
+ *     MapReducer(CountedCompleter p, E[] array, MyMapper<E> mapper,
+ *                MyReducer<E> reducer, int lo, int hi) {
+ *         super(p);
+ *         this.array = array; this.mapper = mapper;
+ *         this.reducer = reducer; this.lo = lo; this.hi = hi;
+ *     }
+ *     public void compute() {
+ *         if (hi - lo >= 2) {
+ *             int mid = (lo + hi) >>> 1;
+ *             MapReducer<E> left = new MapReducer(this, array, mapper, reducer, lo, mid);
+ *             MapReducer<E> right = new MapReducer(this, array, mapper, reducer, mid, hi);
+ *             left.sibling = right;
+ *             right.sibling = left;
+ *             setPendingCount(1); // only right is pending
+ *             right.fork();
+ *             left.compute();     // directly execute left
+ *         }
+ *         else {
+ *             if (hi > lo)
+ *                 result = mapper.apply(array[lo]);
+ *             tryComplete();
+ *         }
+ *     }
+ *     public void onCompletion(CountedCompleter caller) {
+ *         if (caller != this) {
+ *            MapReducer<E> child = (MapReducer<E>)caller;
+ *            MapReducer<E> sib = child.sibling;
+ *            if (sib == null || sib.result == null)
+ *                result = child.result;
+ *            else
+ *                result = reducer.apply(child.result, sib.result);
+ *         }
+ *     }
+ *
+ *     public static <E> E mapReduce(ForkJoinPool pool, E[] array,
+ *                                   MyMapper<E> mapper, MyReducer<E> reducer) {
+ *         MapReducer<E> mr = new MapReducer<E>(null, array, mapper,
+ *                                              reducer, 0, array.length);
+ *         pool.invoke(mr);
+ *         return mr.result;
+ *     }
+ * } }</pre>
+ *
+ * <p><b>Triggers.</b> Some CountedCompleters are themselves never
+ * forked, but instead serve as bits of plumbing in other designs;
+ * including those in which the completion of one of more async tasks
+ * triggers another async task. For example:
+ *
+ * <pre> {@code
+ * class HeaderBuilder extends CountedCompleter { ... }
+ * class BodyBuilder extends CountedCompleter { ... }
+ * class PacketSender extends CountedCompleter {
+ *     PacketSender(...) { super(null, 1); ... } // trigger on second completion
+ *     public void compute() { } // never called
+ *     public void onCompletion(CountedCompleter caller) { sendPacket(); }
+ * }
+ * // sample use:
+ * PacketSender p = new PacketSender();
+ * new HeaderBuilder(p, ...).fork();
+ * new BodyBuilder(p, ...).fork();
+ * }</pre>
+ *
+ * @since 1.8
+ * @author Doug Lea
+ */
+public abstract class CountedCompleter extends ForkJoinTask<Void> {
+    private static final long serialVersionUID = 5232453752276485070L;
+
+    /** This task's completer, or null if none */
+    final CountedCompleter completer;
+    /** The number of pending tasks until completion */
+    volatile int pending;
+
+    /**
+     * Creates a new CountedCompleter with the given completer
+     * and initial pending count.
+     *
+     * @param completer this tasks completer, or {@code null} if none
+     * @param initialPendingCount the initial pending count
+     */
+    protected CountedCompleter(CountedCompleter completer,
+                               int initialPendingCount) {
+        this.completer = completer;
+        this.pending = initialPendingCount;
+    }
+
+    /**
+     * Creates a new CountedCompleter with the given completer
+     * and an initial pending count of zero.
+     *
+     * @param completer this tasks completer, or {@code null} if none
+     */
+    protected CountedCompleter(CountedCompleter completer) {
+        this.completer = completer;
+    }
+
+    /**
+     * Creates a new CountedCompleter with no completer
+     * and an initial pending count of zero.
+     */
+    protected CountedCompleter() {
+        this.completer = null;
+    }
+
+    /**
+     * The main computation performed by this task.
+     */
+    public abstract void compute();
+
+    /**
+     * Performs an action when method {@link #tryComplete} is invoked
+     * and there are no pending counts, or when the unconditional
+     * method {@link #complete} is invoked.  By default, this method
+     * does nothing.
+     *
+     * @param caller the task invoking this method (which may
+     * be this task itself).
+     */
+    public void onCompletion(CountedCompleter caller) {
+    }
+
+    /**
+     * Performs an action when method {@link #completeExceptionally}
+     * is invoked or method {@link #compute} throws an exception, and
+     * this task has not otherwise already completed normally. On
+     * entry to this method, this task {@link
+     * ForkJoinTask#isCompletedAbnormally}.  The return value of this
+     * method controls further propagation: If {@code true} and this
+     * task has a completer, then this completer is also completed
+     * exceptionally.  The default implementation of this method does
+     * nothing except return {@code true}.
+     *
+     * @param ex the exception
+     * @param caller the task invoking this method (which may
+     * be this task itself).
+     * @return true if this exception should be propagated to this
+     * tasks completer, if one exists.
+     */
+    public boolean onExceptionalCompletion(Throwable ex, CountedCompleter caller) {
+        return true;
+    }
+
+    /**
+     * Returns the completer established in this task's constructor,
+     * or {@code null} if none.
+     *
+     * @return the completer
+     */
+    public final CountedCompleter getCompleter() {
+        return completer;
+    }
+
+    /**
+     * Returns the current pending count.
+     *
+     * @return the current pending count
+     */
+    public final int getPendingCount() {
+        return pending;
+    }
+
+    /**
+     * Sets the pending count to the given value.
+     *
+     * @param count the count
+     */
+    public final void setPendingCount(int count) {
+        pending = count;
+    }
+
+    /**
+     * Adds (atomically) the given value to the pending count.
+     *
+     * @param delta the value to add
+     */
+    public final void addToPendingCount(int delta) {
+        int c; // note: can replace with intrinsic in jdk8
+        do {} while (!U.compareAndSwapInt(this, PENDING, c = pending, c+delta));
+    }
+
+    /**
+     * Sets (atomically) the pending count to the given count only if
+     * it currently holds the given expected value.
+     *
+     * @param expected the expected value
+     * @param count the new value
+     * @return true is successful
+     */
+    public final boolean compareAndSetPendingCount(int expected, int count) {
+        return U.compareAndSwapInt(this, PENDING, expected, count);
+    }
+
+    /**
+     * If the pending count is nonzero, decrements the count;
+     * otherwise invokes {@link #onCompletion} and then similarly
+     * tries to complete this task's completer, if one exists,
+     * else marks this task as complete.
+     */
+    public final void tryComplete() {
+        CountedCompleter a = this, s = a;
+        for (int c;;) {
+            if ((c = a.pending) == 0) {
+                a.onCompletion(s);
+                if ((a = (s = a).completer) == null) {
+                    s.quietlyComplete();
+                    return;
+                }
+            }
+            else if (U.compareAndSwapInt(a, PENDING, c, c - 1))
+                return;
+        }
+    }
+
+    /**
+     * Regardless of pending count, invokes {@link #onCompletion},
+     * marks this task as complete with a {@code null} return value,
+     * and further triggers {@link #tryComplete} on this task's
+     * completer, if one exists. This method may be useful when
+     * forcing completion as soon as any one (versus all) of several
+     * subtask results are obtained.
+     *
+     * @param mustBeNull the {@code null} completion value
+     */
+    public void complete(Void mustBeNull) {
+        CountedCompleter p;
+        onCompletion(this);
+        quietlyComplete();
+        if ((p = completer) != null)
+            p.tryComplete();
+    }
+
+    /**
+     * Support for FJT exception propagation
+     */
+    void internalPropagateException(Throwable ex) {
+        CountedCompleter a = this, s = a;
+        while (a.onExceptionalCompletion(ex, s) &&
+               (a = (s = a).completer) != null && a.status >= 0)
+            a.recordExceptionalCompletion(ex);
+    }
+
+    /**
+     * Implements execution conventions for CountedCompleters
+     */
+    protected final boolean exec() {
+        compute();
+        return false;
+    }
+
+    /**
+     * Always returns {@code null}.
+     *
+     * @return {@code null} always
+     */
+    public final Void getRawResult() { return null; }
+
+    /**
+     * Requires null completion value.
+     */
+    protected final void setRawResult(Void mustBeNull) { }
+
+    // Unsafe mechanics
+    private static final sun.misc.Unsafe U;
+    private static final long PENDING;
+    static {
+        try {
+            U = getUnsafe();
+            PENDING = U.objectFieldOffset
+                (CountedCompleter.class.getDeclaredField("pending"));
+        } catch (Exception e) {
+            throw new Error(e);
+        }
+    }
+
+
+    /**
+     * Returns a sun.misc.Unsafe.  Suitable for use in a 3rd party package.
+     * Replace with a simple call to Unsafe.getUnsafe when integrating
+     * into a jdk.
+     *
+     * @return a sun.misc.Unsafe
+     */
+    private static sun.misc.Unsafe getUnsafe() {
+        try {
+            return sun.misc.Unsafe.getUnsafe();
+        } catch (SecurityException se) {
+            try {
+                return java.security.AccessController.doPrivileged
+                    (new java.security
+                     .PrivilegedExceptionAction<sun.misc.Unsafe>() {
+                        public sun.misc.Unsafe run() throws Exception {
+                            java.lang.reflect.Field f = sun.misc
+                                .Unsafe.class.getDeclaredField("theUnsafe");
+                            f.setAccessible(true);
+                            return (sun.misc.Unsafe) f.get(null);
+                        }});
+            } catch (java.security.PrivilegedActionException e) {
+                throw new RuntimeException("Could not initialize intrinsics",
+                                           e.getCause());
+            }
+        }
+    }
+
+}
diff --git a/src/main/java/jsr166y/ForkJoinPool.java b/src/main/java/jsr166y/ForkJoinPool.java
new file mode 100644
index 00000000000..e89f533a6c1
--- /dev/null
+++ b/src/main/java/jsr166y/ForkJoinPool.java
@@ -0,0 +1,2888 @@
+/*
+ * Written by Doug Lea with assistance from members of JCP JSR-166
+ * Expert Group and released to the public domain, as explained at
+ * http://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+package jsr166y;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.AbstractExecutorService;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Future;
+import java.util.concurrent.RejectedExecutionException;
+import java.util.concurrent.RunnableFuture;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.locks.AbstractQueuedSynchronizer;
+import java.util.concurrent.locks.Condition;
+
+/**
+ * An {@link ExecutorService} for running {@link ForkJoinTask}s.
+ * A {@code ForkJoinPool} provides the entry point for submissions
+ * from non-{@code ForkJoinTask} clients, as well as management and
+ * monitoring operations.
+ *
+ * <p>A {@code ForkJoinPool} differs from other kinds of {@link
+ * ExecutorService} mainly by virtue of employing
+ * <em>work-stealing</em>: all threads in the pool attempt to find and
+ * execute tasks submitted to the pool and/or created by other active
+ * tasks (eventually blocking waiting for work if none exist). This
+ * enables efficient processing when most tasks spawn other subtasks
+ * (as do most {@code ForkJoinTask}s), as well as when many small
+ * tasks are submitted to the pool from external clients.  Especially
+ * when setting <em>asyncMode</em> to true in constructors, {@code
+ * ForkJoinPool}s may also be appropriate for use with event-style
+ * tasks that are never joined.
+ *
+ * <p>A {@code ForkJoinPool} is constructed with a given target
+ * parallelism level; by default, equal to the number of available
+ * processors. The pool attempts to maintain enough active (or
+ * available) threads by dynamically adding, suspending, or resuming
+ * internal worker threads, even if some tasks are stalled waiting to
+ * join others. However, no such adjustments are guaranteed in the
+ * face of blocked IO or other unmanaged synchronization. The nested
+ * {@link ManagedBlocker} interface enables extension of the kinds of
+ * synchronization accommodated.
+ *
+ * <p>In addition to execution and lifecycle control methods, this
+ * class provides status check methods (for example
+ * {@link #getStealCount}) that are intended to aid in developing,
+ * tuning, and monitoring fork/join applications. Also, method
+ * {@link #toString} returns indications of pool state in a
+ * convenient form for informal monitoring.
+ *
+ * <p> As is the case with other ExecutorServices, there are three
+ * main task execution methods summarized in the following table.
+ * These are designed to be used primarily by clients not already
+ * engaged in fork/join computations in the current pool.  The main
+ * forms of these methods accept instances of {@code ForkJoinTask},
+ * but overloaded forms also allow mixed execution of plain {@code
+ * Runnable}- or {@code Callable}- based activities as well.  However,
+ * tasks that are already executing in a pool should normally instead
+ * use the within-computation forms listed in the table unless using
+ * async event-style tasks that are not usually joined, in which case
+ * there is little difference among choice of methods.
+ *
+ * <table BORDER CELLPADDING=3 CELLSPACING=1>
+ *  <tr>
+ *    <td></td>
+ *    <td ALIGN=CENTER> <b>Call from non-fork/join clients</b></td>
+ *    <td ALIGN=CENTER> <b>Call from within fork/join computations</b></td>
+ *  </tr>
+ *  <tr>
+ *    <td> <b>Arrange async execution</td>
+ *    <td> {@link #execute(ForkJoinTask)}</td>
+ *    <td> {@link ForkJoinTask#fork}</td>
+ *  </tr>
+ *  <tr>
+ *    <td> <b>Await and obtain result</td>
+ *    <td> {@link #invoke(ForkJoinTask)}</td>
+ *    <td> {@link ForkJoinTask#invoke}</td>
+ *  </tr>
+ *  <tr>
+ *    <td> <b>Arrange exec and obtain Future</td>
+ *    <td> {@link #submit(ForkJoinTask)}</td>
+ *    <td> {@link ForkJoinTask#fork} (ForkJoinTasks <em>are</em> Futures)</td>
+ *  </tr>
+ * </table>
+ *
+ * <p><b>Sample Usage.</b> Normally a single {@code ForkJoinPool} is
+ * used for all parallel task execution in a program or subsystem.
+ * Otherwise, use would not usually outweigh the construction and
+ * bookkeeping overhead of creating a large set of threads. For
+ * example, a common pool could be used for the {@code SortTasks}
+ * illustrated in {@link RecursiveAction}. Because {@code
+ * ForkJoinPool} uses threads in {@linkplain java.lang.Thread#isDaemon
+ * daemon} mode, there is typically no need to explicitly {@link
+ * #shutdown} such a pool upon program exit.
+ *
+ *  <pre> {@code
+ * static final ForkJoinPool mainPool = new ForkJoinPool();
+ * ...
+ * public void sort(long[] array) {
+ *   mainPool.invoke(new SortTask(array, 0, array.length));
+ * }}</pre>
+ *
+ * <p><b>Implementation notes</b>: This implementation restricts the
+ * maximum number of running threads to 32767. Attempts to create
+ * pools with greater than the maximum number result in
+ * {@code IllegalArgumentException}.
+ *
+ * <p>This implementation rejects submitted tasks (that is, by throwing
+ * {@link RejectedExecutionException}) only when the pool is shut down
+ * or internal resources have been exhausted.
+ *
+ * @since 1.7
+ * @author Doug Lea
+ */
+public class ForkJoinPool extends AbstractExecutorService {
+
+    /*
+     * Implementation Overview
+     *
+     * This class and its nested classes provide the main
+     * functionality and control for a set of worker threads:
+     * Submissions from non-FJ threads enter into submission queues.
+     * Workers take these tasks and typically split them into subtasks
+     * that may be stolen by other workers.  Preference rules give
+     * first priority to processing tasks from their own queues (LIFO
+     * or FIFO, depending on mode), then to randomized FIFO steals of
+     * tasks in other queues.
+     *
+     * WorkQueues
+     * ==========
+     *
+     * Most operations occur within work-stealing queues (in nested
+     * class WorkQueue).  These are special forms of Deques that
+     * support only three of the four possible end-operations -- push,
+     * pop, and poll (aka steal), under the further constraints that
+     * push and pop are called only from the owning thread (or, as
+     * extended here, under a lock), while poll may be called from
+     * other threads.  (If you are unfamiliar with them, you probably
+     * want to read Herlihy and Shavit's book "The Art of
+     * Multiprocessor programming", chapter 16 describing these in
+     * more detail before proceeding.)  The main work-stealing queue
+     * design is roughly similar to those in the papers "Dynamic
+     * Circular Work-Stealing Deque" by Chase and Lev, SPAA 2005
+     * (http://research.sun.com/scalable/pubs/index.html) and
+     * "Idempotent work stealing" by Michael, Saraswat, and Vechev,
+     * PPoPP 2009 (http://portal.acm.org/citation.cfm?id=1504186).
+     * The main differences ultimately stem from GC requirements that
+     * we null out taken slots as soon as we can, to maintain as small
+     * a footprint as possible even in programs generating huge
+     * numbers of tasks. To accomplish this, we shift the CAS
+     * arbitrating pop vs poll (steal) from being on the indices
+     * ("base" and "top") to the slots themselves.  So, both a
+     * successful pop and poll mainly entail a CAS of a slot from
+     * non-null to null.  Because we rely on CASes of references, we
+     * do not need tag bits on base or top.  They are simple ints as
+     * used in any circular array-based queue (see for example
+     * ArrayDeque).  Updates to the indices must still be ordered in a
+     * way that guarantees that top == base means the queue is empty,
+     * but otherwise may err on the side of possibly making the queue
+     * appear nonempty when a push, pop, or poll have not fully
+     * committed. Note that this means that the poll operation,
+     * considered individually, is not wait-free. One thief cannot
+     * successfully continue until another in-progress one (or, if
+     * previously empty, a push) completes.  However, in the
+     * aggregate, we ensure at least probabilistic non-blockingness.
+     * If an attempted steal fails, a thief always chooses a different
+     * random victim target to try next. So, in order for one thief to
+     * progress, it suffices for any in-progress poll or new push on
+     * any empty queue to complete. (This is why we normally use
+     * method pollAt and its variants that try once at the apparent
+     * base index, else consider alternative actions, rather than
+     * method poll.)
+     *
+     * This approach also enables support of a user mode in which local
+     * task processing is in FIFO, not LIFO order, simply by using
+     * poll rather than pop.  This can be useful in message-passing
+     * frameworks in which tasks are never joined.  However neither
+     * mode considers affinities, loads, cache localities, etc, so
+     * rarely provide the best possible performance on a given
+     * machine, but portably provide good throughput by averaging over
+     * these factors.  (Further, even if we did try to use such
+     * information, we do not usually have a basis for exploiting it.
+     * For example, some sets of tasks profit from cache affinities,
+     * but others are harmed by cache pollution effects.)
+     *
+     * WorkQueues are also used in a similar way for tasks submitted
+     * to the pool. We cannot mix these tasks in the same queues used
+     * for work-stealing (this would contaminate lifo/fifo
+     * processing). Instead, we loosely associate submission queues
+     * with submitting threads, using a form of hashing.  The
+     * ThreadLocal Submitter class contains a value initially used as
+     * a hash code for choosing existing queues, but may be randomly
+     * repositioned upon contention with other submitters.  In
+     * essence, submitters act like workers except that they never
+     * take tasks, and they are multiplexed on to a finite number of
+     * shared work queues. However, classes are set up so that future
+     * extensions could allow submitters to optionally help perform
+     * tasks as well. Insertion of tasks in shared mode requires a
+     * lock (mainly to protect in the case of resizing) but we use
+     * only a simple spinlock (using bits in field runState), because
+     * submitters encountering a busy queue move on to try or create
+     * other queues -- they block only when creating and registering
+     * new queues.
+     *
+     * Management
+     * ==========
+     *
+     * The main throughput advantages of work-stealing stem from
+     * decentralized control -- workers mostly take tasks from
+     * themselves or each other. We cannot negate this in the
+     * implementation of other management responsibilities. The main
+     * tactic for avoiding bottlenecks is packing nearly all
+     * essentially atomic control state into two volatile variables
+     * that are by far most often read (not written) as status and
+     * consistency checks.
+     *
+     * Field "ctl" contains 64 bits holding all the information needed
+     * to atomically decide to add, inactivate, enqueue (on an event
+     * queue), dequeue, and/or re-activate workers.  To enable this
+     * packing, we restrict maximum parallelism to (1<<15)-1 (which is
+     * far in excess of normal operating range) to allow ids, counts,
+     * and their negations (used for thresholding) to fit into 16bit
+     * fields.
+     *
+     * Field "runState" contains 32 bits needed to register and
+     * deregister WorkQueues, as well as to enable shutdown. It is
+     * only modified under a lock (normally briefly held, but
+     * occasionally protecting allocations and resizings) but even
+     * when locked remains available to check consistency.
+     *
+     * Recording WorkQueues.  WorkQueues are recorded in the
+     * "workQueues" array that is created upon pool construction and
+     * expanded if necessary.  Updates to the array while recording
+     * new workers and unrecording terminated ones are protected from
+     * each other by a lock but the array is otherwise concurrently
+     * readable, and accessed directly.  To simplify index-based
+     * operations, the array size is always a power of two, and all
+     * readers must tolerate null slots. Shared (submission) queues
+     * are at even indices, worker queues at odd indices. Grouping
+     * them together in this way simplifies and speeds up task
+     * scanning.
+     *
+     * All worker thread creation is on-demand, triggered by task
+     * submissions, replacement of terminated workers, and/or
+     * compensation for blocked workers. However, all other support
+     * code is set up to work with other policies.  To ensure that we
+     * do not hold on to worker references that would prevent GC, ALL
+     * accesses to workQueues are via indices into the workQueues
+     * array (which is one source of some of the messy code
+     * constructions here). In essence, the workQueues array serves as
+     * a weak reference mechanism. Thus for example the wait queue
+     * field of ctl stores indices, not references.  Access to the
+     * workQueues in associated methods (for example signalWork) must
+     * both index-check and null-check the IDs. All such accesses
+     * ignore bad IDs by returning out early from what they are doing,
+     * since this can only be associated with termination, in which
+     * case it is OK to give up.  All uses of the workQueues array
+     * also check that it is non-null (even if previously
+     * non-null). This allows nulling during termination, which is
+     * currently not necessary, but remains an option for
+     * resource-revocation-based shutdown schemes. It also helps
+     * reduce JIT issuance of uncommon-trap code, which tends to
+     * unnecessarily complicate control flow in some methods.
+     *
+     * Event Queuing. Unlike HPC work-stealing frameworks, we cannot
+     * let workers spin indefinitely scanning for tasks when none can
+     * be found immediately, and we cannot start/resume workers unless
+     * there appear to be tasks available.  On the other hand, we must
+     * quickly prod them into action when new tasks are submitted or
+     * generated. In many usages, ramp-up time to activate workers is
+     * the main limiting factor in overall performance (this is
+     * compounded at program start-up by JIT compilation and
+     * allocation). So we try to streamline this as much as possible.
+     * We park/unpark workers after placing in an event wait queue
+     * when they cannot find work. This "queue" is actually a simple
+     * Treiber stack, headed by the "id" field of ctl, plus a 15bit
+     * counter value (that reflects the number of times a worker has
+     * been inactivated) to avoid ABA effects (we need only as many
+     * version numbers as worker threads). Successors are held in
+     * field WorkQueue.nextWait.  Queuing deals with several intrinsic
+     * races, mainly that a task-producing thread can miss seeing (and
+     * signalling) another thread that gave up looking for work but
+     * has not yet entered the wait queue. We solve this by requiring
+     * a full sweep of all workers (via repeated calls to method
+     * scan()) both before and after a newly waiting worker is added
+     * to the wait queue. During a rescan, the worker might release
+     * some other queued worker rather than itself, which has the same
+     * net effect. Because enqueued workers may actually be rescanning
+     * rather than waiting, we set and clear the "parker" field of
+     * WorkQueues to reduce unnecessary calls to unpark.  (This
+     * requires a secondary recheck to avoid missed signals.)  Note
+     * the unusual conventions about Thread.interrupts surrounding
+     * parking and other blocking: Because interrupts are used solely
+     * to alert threads to check termination, which is checked anyway
+     * upon blocking, we clear status (using Thread.interrupted)
+     * before any call to park, so that park does not immediately
+     * return due to status being set via some other unrelated call to
+     * interrupt in user code.
+     *
+     * Signalling.  We create or wake up workers only when there
+     * appears to be at least one task they might be able to find and
+     * execute.  When a submission is added or another worker adds a
+     * task to a queue that previously had fewer than two tasks, they
+     * signal waiting workers (or trigger creation of new ones if
+     * fewer than the given parallelism level -- see signalWork).
+     * These primary signals are buttressed by signals during rescans;
+     * together these cover the signals needed in cases when more
+     * tasks are pushed but untaken, and improve performance compared
+     * to having one thread wake up all workers.
+     *
+     * Trimming workers. To release resources after periods of lack of
+     * use, a worker starting to wait when the pool is quiescent will
+     * time out and terminate if the pool has remained quiescent for
+     * SHRINK_RATE nanosecs. This will slowly propagate, eventually
+     * terminating all workers after long periods of non-use.
+     *
+     * Shutdown and Termination. A call to shutdownNow atomically sets
+     * a runState bit and then (non-atomically) sets each worker's
+     * runState status, cancels all unprocessed tasks, and wakes up
+     * all waiting workers.  Detecting whether termination should
+     * commence after a non-abrupt shutdown() call requires more work
+     * and bookkeeping. We need consensus about quiescence (i.e., that
+     * there is no more work). The active count provides a primary
+     * indication but non-abrupt shutdown still requires a rechecking
+     * scan for any workers that are inactive but not queued.
+     *
+     * Joining Tasks
+     * =============
+     *
+     * Any of several actions may be taken when one worker is waiting
+     * to join a task stolen (or always held) by another.  Because we
+     * are multiplexing many tasks on to a pool of workers, we can't
+     * just let them block (as in Thread.join).  We also cannot just
+     * reassign the joiner's run-time stack with another and replace
+     * it later, which would be a form of "continuation", that even if
+     * possible is not necessarily a good idea since we sometimes need
+     * both an unblocked task and its continuation to progress.
+     * Instead we combine two tactics:
+     *
+     *   Helping: Arranging for the joiner to execute some task that it
+     *      would be running if the steal had not occurred.
+     *
+     *   Compensating: Unless there are already enough live threads,
+     *      method tryCompensate() may create or re-activate a spare
+     *      thread to compensate for blocked joiners until they unblock.
+     *
+     * A third form (implemented in tryRemoveAndExec and
+     * tryPollForAndExec) amounts to helping a hypothetical
+     * compensator: If we can readily tell that a possible action of a
+     * compensator is to steal and execute the task being joined, the
+     * joining thread can do so directly, without the need for a
+     * compensation thread (although at the expense of larger run-time
+     * stacks, but the tradeoff is typically worthwhile).
+     *
+     * The ManagedBlocker extension API can't use helping so relies
+     * only on compensation in method awaitBlocker.
+     *
+     * The algorithm in tryHelpStealer entails a form of "linear"
+     * helping: Each worker records (in field currentSteal) the most
+     * recent task it stole from some other worker. Plus, it records
+     * (in field currentJoin) the task it is currently actively
+     * joining. Method tryHelpStealer uses these markers to try to
+     * find a worker to help (i.e., steal back a task from and execute
+     * it) that could hasten completion of the actively joined task.
+     * In essence, the joiner executes a task that would be on its own
+     * local deque had the to-be-joined task not been stolen. This may
+     * be seen as a conservative variant of the approach in Wagner &
+     * Calder "Leapfrogging: a portable technique for implementing
+     * efficient futures" SIGPLAN Notices, 1993
+     * (http://portal.acm.org/citation.cfm?id=155354). It differs in
+     * that: (1) We only maintain dependency links across workers upon
+     * steals, rather than use per-task bookkeeping.  This sometimes
+     * requires a linear scan of workQueues array to locate stealers,
+     * but often doesn't because stealers leave hints (that may become
+     * stale/wrong) of where to locate them.  A stealHint is only a
+     * hint because a worker might have had multiple steals and the
+     * hint records only one of them (usually the most current).
+     * Hinting isolates cost to when it is needed, rather than adding
+     * to per-task overhead.  (2) It is "shallow", ignoring nesting
+     * and potentially cyclic mutual steals.  (3) It is intentionally
+     * racy: field currentJoin is updated only while actively joining,
+     * which means that we miss links in the chain during long-lived
+     * tasks, GC stalls etc (which is OK since blocking in such cases
+     * is usually a good idea).  (4) We bound the number of attempts
+     * to find work (see MAX_HELP) and fall back to suspending the
+     * worker and if necessary replacing it with another.
+     *
+     * It is impossible to keep exactly the target parallelism number
+     * of threads running at any given time.  Determining the
+     * existence of conservatively safe helping targets, the
+     * availability of already-created spares, and the apparent need
+     * to create new spares are all racy, so we rely on multiple
+     * retries of each.  Compensation in the apparent absence of
+     * helping opportunities is challenging to control on JVMs, where
+     * GC and other activities can stall progress of tasks that in
+     * turn stall out many other dependent tasks, without us being
+     * able to determine whether they will ever require compensation.
+     * Even though work-stealing otherwise encounters little
+     * degradation in the presence of more threads than cores,
+     * aggressively adding new threads in such cases entails risk of
+     * unwanted positive feedback control loops in which more threads
+     * cause more dependent stalls (as well as delayed progress of
+     * unblocked threads to the point that we know they are available)
+     * leading to more situations requiring more threads, and so
+     * on. This aspect of control can be seen as an (analytically
+     * intractable) game with an opponent that may choose the worst
+     * (for us) active thread to stall at any time.  We take several
+     * precautions to bound losses (and thus bound gains), mainly in
+     * methods tryCompensate and awaitJoin: (1) We only try
+     * compensation after attempting enough helping steps (measured
+     * via counting and timing) that we have already consumed the
+     * estimated cost of creating and activating a new thread.  (2) We
+     * allow up to 50% of threads to be blocked before initially
+     * adding any others, and unless completely saturated, check that
+     * some work is available for a new worker before adding. Also, we
+     * create up to only 50% more threads until entering a mode that
+     * only adds a thread if all others are possibly blocked.  All
+     * together, this means that we might be half as fast to react,
+     * and create half as many threads as possible in the ideal case,
+     * but present vastly fewer anomalies in all other cases compared
+     * to both more aggressive and more conservative alternatives.
+     *
+     * Style notes: There is a lot of representation-level coupling
+     * among classes ForkJoinPool, ForkJoinWorkerThread, and
+     * ForkJoinTask.  The fields of WorkQueue maintain data structures
+     * managed by ForkJoinPool, so are directly accessed.  There is
+     * little point trying to reduce this, since any associated future
+     * changes in representations will need to be accompanied by
+     * algorithmic changes anyway. Several methods intrinsically
+     * sprawl because they must accumulate sets of consistent reads of
+     * volatiles held in local variables.  Methods signalWork() and
+     * scan() are the main bottlenecks, so are especially heavily
+     * micro-optimized/mangled.  There are lots of inline assignments
+     * (of form "while ((local = field) != 0)") which are usually the
+     * simplest way to ensure the required read orderings (which are
+     * sometimes critical). This leads to a "C"-like style of listing
+     * declarations of these locals at the heads of methods or blocks.
+     * There are several occurrences of the unusual "do {} while
+     * (!cas...)"  which is the simplest way to force an update of a
+     * CAS'ed variable. There are also other coding oddities that help
+     * some methods perform reasonably even when interpreted (not
+     * compiled).
+     *
+     * The order of declarations in this file is:
+     * (1) Static utility functions
+     * (2) Nested (static) classes
+     * (3) Static fields
+     * (4) Fields, along with constants used when unpacking some of them
+     * (5) Internal control methods
+     * (6) Callbacks and other support for ForkJoinTask methods
+     * (7) Exported methods
+     * (8) Static block initializing statics in minimally dependent order
+     */
+
+    // Static utilities
+
+    /**
+     * If there is a security manager, makes sure caller has
+     * permission to modify threads.
+     */
+    private static void checkPermission() {
+        SecurityManager security = System.getSecurityManager();
+        if (security != null)
+            security.checkPermission(modifyThreadPermission);
+    }
+
+    // Nested classes
+
+    /**
+     * Factory for creating new {@link ForkJoinWorkerThread}s.
+     * A {@code ForkJoinWorkerThreadFactory} must be defined and used
+     * for {@code ForkJoinWorkerThread} subclasses that extend base
+     * functionality or initialize threads with different contexts.
+     */
+    public static interface ForkJoinWorkerThreadFactory {
+        /**
+         * Returns a new worker thread operating in the given pool.
+         *
+         * @param pool the pool this thread works in
+         * @throws NullPointerException if the pool is null
+         */
+        public ForkJoinWorkerThread newThread(ForkJoinPool pool);
+    }
+
+    /**
+     * Default ForkJoinWorkerThreadFactory implementation; creates a
+     * new ForkJoinWorkerThread.
+     */
+    static class DefaultForkJoinWorkerThreadFactory
+        implements ForkJoinWorkerThreadFactory {
+        public ForkJoinWorkerThread newThread(ForkJoinPool pool) {
+            return new ForkJoinWorkerThread(pool);
+        }
+    }
+
+    /**
+     * A simple non-reentrant lock used for exclusion when managing
+     * queues and workers. We use a custom lock so that we can readily
+     * probe lock state in constructions that check among alternative
+     * actions. The lock is normally only very briefly held, and
+     * sometimes treated as a spinlock, but other usages block to
+     * reduce overall contention in those cases where locked code
+     * bodies perform allocation/resizing.
+     */
+    static final class Mutex extends AbstractQueuedSynchronizer {
+        public final boolean tryAcquire(int ignore) {
+            return compareAndSetState(0, 1);
+        }
+        public final boolean tryRelease(int ignore) {
+            setState(0);
+            return true;
+        }
+        public final void lock() { acquire(0); }
+        public final void unlock() { release(0); }
+        public final boolean isHeldExclusively() { return getState() == 1; }
+        public final Condition newCondition() { return new ConditionObject(); }
+    }
+
+    /**
+     * Class for artificial tasks that are used to replace the target
+     * of local joins if they are removed from an interior queue slot
+     * in WorkQueue.tryRemoveAndExec. We don't need the proxy to
+     * actually do anything beyond having a unique identity.
+     */
+    static final class EmptyTask extends ForkJoinTask<Void> {
+        EmptyTask() { status = ForkJoinTask.NORMAL; } // force done
+        public final Void getRawResult() { return null; }
+        public final void setRawResult(Void x) {}
+        public final boolean exec() { return true; }
+    }
+
+    /**
+     * Queues supporting work-stealing as well as external task
+     * submission. See above for main rationale and algorithms.
+     * Implementation relies heavily on "Unsafe" intrinsics
+     * and selective use of "volatile":
+     *
+     * Field "base" is the index (mod array.length) of the least valid
+     * queue slot, which is always the next position to steal (poll)
+     * from if nonempty. Reads and writes require volatile orderings
+     * but not CAS, because updates are only performed after slot
+     * CASes.
+     *
+     * Field "top" is the index (mod array.length) of the next queue
+     * slot to push to or pop from. It is written only by owner thread
+     * for push, or under lock for trySharedPush, and accessed by
+     * other threads only after reading (volatile) base.  Both top and
+     * base are allowed to wrap around on overflow, but (top - base)
+     * (or more commonly -(base - top) to force volatile read of base
+     * before top) still estimates size.
+     *
+     * The array slots are read and written using the emulation of
+     * volatiles/atomics provided by Unsafe. Insertions must in
+     * general use putOrderedObject as a form of releasing store to
+     * ensure that all writes to the task object are ordered before
+     * its publication in the queue. (Although we can avoid one case
+     * of this when locked in trySharedPush.) All removals entail a
+     * CAS to null.  The array is always a power of two. To ensure
+     * safety of Unsafe array operations, all accesses perform
+     * explicit null checks and implicit bounds checks via
+     * power-of-two masking.
+     *
+     * In addition to basic queuing support, this class contains
+     * fields described elsewhere to control execution. It turns out
+     * to work better memory-layout-wise to include them in this
+     * class rather than a separate class.
+     *
+     * Performance on most platforms is very sensitive to placement of
+     * instances of both WorkQueues and their arrays -- we absolutely
+     * do not want multiple WorkQueue instances or multiple queue
+     * arrays sharing cache lines. (It would be best for queue objects
+     * and their arrays to share, but there is nothing available to
+     * help arrange that).  Unfortunately, because they are recorded
+     * in a common array, WorkQueue instances are often moved to be
+     * adjacent by garbage collectors. To reduce impact, we use field
+     * padding that works OK on common platforms; this effectively
+     * trades off slightly slower average field access for the sake of
+     * avoiding really bad worst-case access. (Until better JVM
+     * support is in place, this padding is dependent on transient
+     * properties of JVM field layout rules.)  We also take care in
+     * allocating, sizing and resizing the array. Non-shared queue
+     * arrays are initialized (via method growArray) by workers before
+     * use. Others are allocated on first use.
+     */
+    static final class WorkQueue {
+        /**
+         * Capacity of work-stealing queue array upon initialization.
+         * Must be a power of two; at least 4, but should be larger to
+         * reduce or eliminate cacheline sharing among queues.
+         * Currently, it is much larger, as a partial workaround for
+         * the fact that JVMs often place arrays in locations that
+         * share GC bookkeeping (especially cardmarks) such that
+         * per-write accesses encounter serious memory contention.
+         */
+        static final int INITIAL_QUEUE_CAPACITY = 1 << 13;
+
+        /**
+         * Maximum size for queue arrays. Must be a power of two less
+         * than or equal to 1 << (31 - width of array entry) to ensure
+         * lack of wraparound of index calculations, but defined to a
+         * value a bit less than this to help users trap runaway
+         * programs before saturating systems.
+         */
+        static final int MAXIMUM_QUEUE_CAPACITY = 1 << 26; // 64M
+
+        volatile long totalSteals; // cumulative number of steals
+        int seed;                  // for random scanning; initialize nonzero
+        volatile int eventCount;   // encoded inactivation count; < 0 if inactive
+        int nextWait;              // encoded record of next event waiter
+        int rescans;               // remaining scans until block
+        int nsteals;               // top-level task executions since last idle
+        final int mode;            // lifo, fifo, or shared
+        int poolIndex;             // index of this queue in pool (or 0)
+        int stealHint;             // index of most recent known stealer
+        volatile int runState;     // 1: locked, -1: terminate; else 0
+        volatile int base;         // index of next slot for poll
+        int top;                   // index of next slot for push
+        ForkJoinTask<?>[] array;   // the elements (initially unallocated)
+        final ForkJoinPool pool;   // the containing pool (may be null)
+        final ForkJoinWorkerThread owner; // owning thread or null if shared
+        volatile Thread parker;    // == owner during call to park; else null
+        volatile ForkJoinTask<?> currentJoin;  // task being joined in awaitJoin
+        ForkJoinTask<?> currentSteal; // current non-local task being executed
+        // Heuristic padding to ameliorate unfortunate memory placements
+        Object p00, p01, p02, p03, p04, p05, p06, p07;
+        Object p08, p09, p0a, p0b, p0c, p0d, p0e;
+
+        WorkQueue(ForkJoinPool pool, ForkJoinWorkerThread owner, int mode) {
+            this.mode = mode;
+            this.pool = pool;
+            this.owner = owner;
+            // Place indices in the center of array (that is not yet allocated)
+            base = top = INITIAL_QUEUE_CAPACITY >>> 1;
+        }
+
+        /**
+         * Returns the approximate number of tasks in the queue.
+         */
+        final int queueSize() {
+            int n = base - top;       // non-owner callers must read base first
+            return (n >= 0) ? 0 : -n; // ignore transient negative
+        }
+
+        /**
+         * Provides a more accurate estimate of whether this queue has
+         * any tasks than does queueSize, by checking whether a
+         * near-empty queue has at least one unclaimed task.
+         */
+        final boolean isEmpty() {
+            ForkJoinTask<?>[] a; int m, s;
+            int n = base - (s = top);
+            return (n >= 0 ||
+                    (n == -1 &&
+                     ((a = array) == null ||
+                      (m = a.length - 1) < 0 ||
+                      U.getObjectVolatile
+                      (a, ((m & (s - 1)) << ASHIFT) + ABASE) == null)));
+        }
+
+        /**
+         * Pushes a task. Call only by owner in unshared queues.
+         *
+         * @param task the task. Caller must ensure non-null.
+         * @throw RejectedExecutionException if array cannot be resized
+         */
+        final void push(ForkJoinTask<?> task) {
+            ForkJoinTask<?>[] a; ForkJoinPool p;
+            int s = top, m, n;
+            if ((a = array) != null) {    // ignore if queue removed
+                U.putOrderedObject
+                    (a, (((m = a.length - 1) & s) << ASHIFT) + ABASE, task);
+                if ((n = (top = s + 1) - base) <= 2) {
+                    if ((p = pool) != null)
+                        p.signalWork();
+                }
+                else if (n >= m)
+                    growArray(true);
+            }
+        }
+
+        /**
+         * Pushes a task if lock is free and array is either big
+         * enough or can be resized to be big enough.
+         *
+         * @param task the task. Caller must ensure non-null.
+         * @return true if submitted
+         */
+        final boolean trySharedPush(ForkJoinTask<?> task) {
+            boolean submitted = false;
+            if (runState == 0 && U.compareAndSwapInt(this, RUNSTATE, 0, 1)) {
+                ForkJoinTask<?>[] a = array;
+                int s = top;
+                try {
+                    if ((a != null && a.length > s + 1 - base) ||
+                        (a = growArray(false)) != null) { // must presize
+                        int j = (((a.length - 1) & s) << ASHIFT) + ABASE;
+                        U.putObject(a, (long)j, task);    // don't need "ordered"
+                        top = s + 1;
+                        submitted = true;
+                    }
+                } finally {
+                    runState = 0;                         // unlock
+                }
+            }
+            return submitted;
+        }
+
+        /**
+         * Takes next task, if one exists, in LIFO order.  Call only
+         * by owner in unshared queues. (We do not have a shared
+         * version of this method because it is never needed.)
+         */
+        final ForkJoinTask<?> pop() {
+            ForkJoinTask<?>[] a; ForkJoinTask<?> t; int m;
+            if ((a = array) != null && (m = a.length - 1) >= 0) {
+                for (int s; (s = top - 1) - base >= 0;) {
+                    long j = ((m & s) << ASHIFT) + ABASE;
+                    if ((t = (ForkJoinTask<?>)U.getObject(a, j)) == null)
+                        break;
+                    if (U.compareAndSwapObject(a, j, t, null)) {
+                        top = s;
+                        return t;
+                    }
+                }
+            }
+            return null;
+        }
+
+        /**
+         * Takes a task in FIFO order if b is base of queue and a task
+         * can be claimed without contention. Specialized versions
+         * appear in ForkJoinPool methods scan and tryHelpStealer.
+         */
+        final ForkJoinTask<?> pollAt(int b) {
+            ForkJoinTask<?> t; ForkJoinTask<?>[] a;
+            if ((a = array) != null) {
+                int j = (((a.length - 1) & b) << ASHIFT) + ABASE;
+                if ((t = (ForkJoinTask<?>)U.getObjectVolatile(a, j)) != null &&
+                    base == b &&
+                    U.compareAndSwapObject(a, j, t, null)) {
+                    base = b + 1;
+                    return t;
+                }
+            }
+            return null;
+        }
+
+        /**
+         * Takes next task, if one exists, in FIFO order.
+         */
+        final ForkJoinTask<?> poll() {
+            ForkJoinTask<?>[] a; int b; ForkJoinTask<?> t;
+            while ((b = base) - top < 0 && (a = array) != null) {
+                int j = (((a.length - 1) & b) << ASHIFT) + ABASE;
+                t = (ForkJoinTask<?>)U.getObjectVolatile(a, j);
+                if (t != null) {
+                    if (base == b &&
+                        U.compareAndSwapObject(a, j, t, null)) {
+                        base = b + 1;
+                        return t;
+                    }
+                }
+                else if (base == b) {
+                    if (b + 1 == top)
+                        break;
+                    Thread.yield(); // wait for lagging update
+                }
+            }
+            return null;
+        }
+
+        /**
+         * Takes next task, if one exists, in order specified by mode.
+         */
+        final ForkJoinTask<?> nextLocalTask() {
+            return mode == 0 ? pop() : poll();
+        }
+
+        /**
+         * Returns next task, if one exists, in order specified by mode.
+         */
+        final ForkJoinTask<?> peek() {
+            ForkJoinTask<?>[] a = array; int m;
+            if (a == null || (m = a.length - 1) < 0)
+                return null;
+            int i = mode == 0 ? top - 1 : base;
+            int j = ((i & m) << ASHIFT) + ABASE;
+            return (ForkJoinTask<?>)U.getObjectVolatile(a, j);
+        }
+
+        /**
+         * Pops the given task only if it is at the current top.
+         */
+        final boolean tryUnpush(ForkJoinTask<?> t) {
+            ForkJoinTask<?>[] a; int s;
+            if ((a = array) != null && (s = top) != base &&
+                U.compareAndSwapObject
+                (a, (((a.length - 1) & --s) << ASHIFT) + ABASE, t, null)) {
+                top = s;
+                return true;
+            }
+            return false;
+        }
+
+        /**
+         * Polls the given task only if it is at the current base.
+         */
+        final boolean pollFor(ForkJoinTask<?> task) {
+            ForkJoinTask<?>[] a; int b;
+            if ((b = base) - top < 0 && (a = array) != null) {
+                int j = (((a.length - 1) & b) << ASHIFT) + ABASE;
+                if (U.getObjectVolatile(a, j) == task && base == b &&
+                    U.compareAndSwapObject(a, j, task, null)) {
+                    base = b + 1;
+                    return true;
+                }
+            }
+            return false;
+        }
+
+        /**
+         * Initializes or doubles the capacity of array. Call either
+         * by owner or with lock held -- it is OK for base, but not
+         * top, to move while resizings are in progress.
+         *
+         * @param rejectOnFailure if true, throw exception if capacity
+         * exceeded (relayed ultimately to user); else return null.
+         */
+        final ForkJoinTask<?>[] growArray(boolean rejectOnFailure) {
+            ForkJoinTask<?>[] oldA = array;
+            int size = oldA != null ? oldA.length << 1 : INITIAL_QUEUE_CAPACITY;
+            if (size <= MAXIMUM_QUEUE_CAPACITY) {
+                int oldMask, t, b;
+                ForkJoinTask<?>[] a = array = new ForkJoinTask<?>[size];
+                if (oldA != null && (oldMask = oldA.length - 1) >= 0 &&
+                    (t = top) - (b = base) > 0) {
+                    int mask = size - 1;
+                    do {
+                        ForkJoinTask<?> x;
+                        int oldj = ((b & oldMask) << ASHIFT) + ABASE;
+                        int j    = ((b &    mask) << ASHIFT) + ABASE;
+                        x = (ForkJoinTask<?>)U.getObjectVolatile(oldA, oldj);
+                        if (x != null &&
+                            U.compareAndSwapObject(oldA, oldj, x, null))
+                            U.putObjectVolatile(a, j, x);
+                    } while (++b != t);
+                }
+                return a;
+            }
+            else if (!rejectOnFailure)
+                return null;
+            else
+                throw new RejectedExecutionException("Queue capacity exceeded");
+        }
+
+        /**
+         * Removes and cancels all known tasks, ignoring any exceptions.
+         */
+        final void cancelAll() {
+            ForkJoinTask.cancelIgnoringExceptions(currentJoin);
+            ForkJoinTask.cancelIgnoringExceptions(currentSteal);
+            for (ForkJoinTask<?> t; (t = poll()) != null; )
+                ForkJoinTask.cancelIgnoringExceptions(t);
+        }
+
+        /**
+         * Computes next value for random probes.  Scans don't require
+         * a very high quality generator, but also not a crummy one.
+         * Marsaglia xor-shift is cheap and works well enough.  Note:
+         * This is manually inlined in its usages in ForkJoinPool to
+         * avoid writes inside busy scan loops.
+         */
+        final int nextSeed() {
+            int r = seed;
+            r ^= r << 13;
+            r ^= r >>> 17;
+            return seed = r ^= r << 5;
+        }
+
+        // Execution methods
+
+        /**
+         * Pops and runs tasks until empty.
+         */
+        private void popAndExecAll() {
+            // A bit faster than repeated pop calls
+            ForkJoinTask<?>[] a; int m, s; long j; ForkJoinTask<?> t;
+            while ((a = array) != null && (m = a.length - 1) >= 0 &&
+                   (s = top - 1) - base >= 0 &&
+                   (t = ((ForkJoinTask<?>)
+                         U.getObject(a, j = ((m & s) << ASHIFT) + ABASE)))
+                   != null) {
+                if (U.compareAndSwapObject(a, j, t, null)) {
+                    top = s;
+                    t.doExec();
+                }
+            }
+        }
+
+        /**
+         * Polls and runs tasks until empty.
+         */
+        private void pollAndExecAll() {
+            for (ForkJoinTask<?> t; (t = poll()) != null;)
+                t.doExec();
+        }
+
+        /**
+         * If present, removes from queue and executes the given task, or
+         * any other cancelled task. Returns (true) immediately on any CAS
+         * or consistency check failure so caller can retry.
+         *
+         * @return 0 if no progress can be made, else positive
+         * (this unusual convention simplifies use with tryHelpStealer.)
+         */
+        final int tryRemoveAndExec(ForkJoinTask<?> task) {
+            int stat = 1;
+            boolean removed = false, empty = true;
+            ForkJoinTask<?>[] a; int m, s, b, n;
+            if ((a = array) != null && (m = a.length - 1) >= 0 &&
+                (n = (s = top) - (b = base)) > 0) {
+                for (ForkJoinTask<?> t;;) {           // traverse from s to b
+                    int j = ((--s & m) << ASHIFT) + ABASE;
+                    t = (ForkJoinTask<?>)U.getObjectVolatile(a, j);
+                    if (t == null)                    // inconsistent length
+                        break;
+                    else if (t == task) {
+                        if (s + 1 == top) {           // pop
+                            if (!U.compareAndSwapObject(a, j, task, null))
+                                break;
+                            top = s;
+                            removed = true;
+                        }
+                        else if (base == b)           // replace with proxy
+                            removed = U.compareAndSwapObject(a, j, task,
+                                                             new EmptyTask());
+                        break;
+                    }
+                    else if (t.status >= 0)
+                        empty = false;
+                    else if (s + 1 == top) {          // pop and throw away
+                        if (U.compareAndSwapObject(a, j, t, null))
+                            top = s;
+                        break;
+                    }
+                    if (--n == 0) {
+                        if (!empty && base == b)
+                            stat = 0;
+                        break;
+                    }
+                }
+            }
+            if (removed)
+                task.doExec();
+            return stat;
+        }
+
+        /**
+         * Executes a top-level task and any local tasks remaining
+         * after execution.
+         */
+        final void runTask(ForkJoinTask<?> t) {
+            if (t != null) {
+                currentSteal = t;
+                t.doExec();
+                if (top != base) {       // process remaining local tasks
+                    if (mode == 0)
+                        popAndExecAll();
+                    else
+                        pollAndExecAll();
+                }
+                ++nsteals;
+                currentSteal = null;
+            }
+        }
+
+        /**
+         * Executes a non-top-level (stolen) task.
+         */
+        final void runSubtask(ForkJoinTask<?> t) {
+            if (t != null) {
+                ForkJoinTask<?> ps = currentSteal;
+                currentSteal = t;
+                t.doExec();
+                currentSteal = ps;
+            }
+        }
+
+        /**
+         * Returns true if owned and not known to be blocked.
+         */
+        final boolean isApparentlyUnblocked() {
+            Thread wt; Thread.State s;
+            return (eventCount >= 0 &&
+                    (wt = owner) != null &&
+                    (s = wt.getState()) != Thread.State.BLOCKED &&
+                    s != Thread.State.WAITING &&
+                    s != Thread.State.TIMED_WAITING);
+        }
+
+        /**
+         * If this owned and is not already interrupted, try to
+         * interrupt and/or unpark, ignoring exceptions.
+         */
+        final void interruptOwner() {
+            Thread wt, p;
+            if ((wt = owner) != null && !wt.isInterrupted()) {
+                try {
+                    wt.interrupt();
+                } catch (SecurityException ignore) {
+                }
+            }
+            if ((p = parker) != null)
+                U.unpark(p);
+        }
+
+        // Unsafe mechanics
+        private static final sun.misc.Unsafe U;
+        private static final long RUNSTATE;
+        private static final int ABASE;
+        private static final int ASHIFT;
+        static {
+            int s;
+            try {
+                U = getUnsafe();
+                Class<?> k = WorkQueue.class;
+                Class<?> ak = ForkJoinTask[].class;
+                RUNSTATE = U.objectFieldOffset
+                    (k.getDeclaredField("runState"));
+                ABASE = U.arrayBaseOffset(ak);
+                s = U.arrayIndexScale(ak);
+            } catch (Exception e) {
+                throw new Error(e);
+            }
+            if ((s & (s-1)) != 0)
+                throw new Error("data type scale not a power of two");
+            ASHIFT = 31 - Integer.numberOfLeadingZeros(s);
+        }
+    }
+    /**
+     * Per-thread records for threads that submit to pools. Currently
+     * holds only pseudo-random seed / index that is used to choose
+     * submission queues in method doSubmit. In the future, this may
+     * also incorporate a means to implement different task rejection
+     * and resubmission policies.
+     *
+     * Seeds for submitters and workers/workQueues work in basically
+     * the same way but are initialized and updated using slightly
+     * different mechanics. Both are initialized using the same
+     * approach as in class ThreadLocal, where successive values are
+     * unlikely to collide with previous values. This is done during
+     * registration for workers, but requires a separate AtomicInteger
+     * for submitters. Seeds are then randomly modified upon
+     * collisions using xorshifts, which requires a non-zero seed.
+     */
+    static final class Submitter {
+        int seed;
+        Submitter() {
+            int s = nextSubmitterSeed.getAndAdd(SEED_INCREMENT);
+            seed = (s == 0) ? 1 : s; // ensure non-zero
+        }
+    }
+
+    /** ThreadLocal class for Submitters */
+    static final class ThreadSubmitter extends ThreadLocal<Submitter> {
+        public Submitter initialValue() { return new Submitter(); }
+    }
+
+    // static fields (initialized in static initializer below)
+
+    /**
+     * Creates a new ForkJoinWorkerThread. This factory is used unless
+     * overridden in ForkJoinPool constructors.
+     */
+    public static final ForkJoinWorkerThreadFactory
+        defaultForkJoinWorkerThreadFactory;
+
+    /**
+     * Generator for assigning sequence numbers as pool names.
+     */
+    private static final AtomicInteger poolNumberGenerator;
+
+    /**
+     * Generator for initial hashes/seeds for submitters. Accessed by
+     * Submitter class constructor.
+     */
+    static final AtomicInteger nextSubmitterSeed;
+
+    /**
+     * Permission required for callers of methods that may start or
+     * kill threads.
+     */
+    private static final RuntimePermission modifyThreadPermission;
+
+    /**
+     * Per-thread submission bookeeping. Shared across all pools
+     * to reduce ThreadLocal pollution and because random motion
+     * to avoid contention in one pool is likely to hold for others.
+     */
+    private static final ThreadSubmitter submitters;
+
+    // static constants
+
+    /**
+     * The wakeup interval (in nanoseconds) for a worker waiting for a
+     * task when the pool is quiescent to instead try to shrink the
+     * number of workers.  The exact value does not matter too
+     * much. It must be short enough to release resources during
+     * sustained periods of idleness, but not so short that threads
+     * are continually re-created.
+     */
+    private static final long SHRINK_RATE =
+        4L * 1000L * 1000L * 1000L; // 4 seconds
+
+    /**
+     * The timeout value for attempted shrinkage, includes
+     * some slop to cope with system timer imprecision.
+     */
+    private static final long SHRINK_TIMEOUT = SHRINK_RATE - (SHRINK_RATE / 10);
+
+    /**
+     * The maximum stolen->joining link depth allowed in method
+     * tryHelpStealer.  Must be a power of two. This value also
+     * controls the maximum number of times to try to help join a task
+     * without any apparent progress or change in pool state before
+     * giving up and blocking (see awaitJoin).  Depths for legitimate
+     * chains are unbounded, but we use a fixed constant to avoid
+     * (otherwise unchecked) cycles and to bound staleness of
+     * traversal parameters at the expense of sometimes blocking when
+     * we could be helping.
+     */
+    private static final int MAX_HELP = 64;
+
+    /**
+     * Secondary time-based bound (in nanosecs) for helping attempts
+     * before trying compensated blocking in awaitJoin. Used in
+     * conjunction with MAX_HELP to reduce variance due to different
+     * polling rates associated with different helping options. The
+     * value should roughly approximate the time required to create
+     * and/or activate a worker thread.
+     */
+    private static final long COMPENSATION_DELAY = 1L << 18; // ~0.25 millisec
+
+    /**
+     * Increment for seed generators. See class ThreadLocal for
+     * explanation.
+     */
+    private static final int SEED_INCREMENT = 0x61c88647;
+
+    /**
+     * Bits and masks for control variables
+     *
+     * Field ctl is a long packed with:
+     * AC: Number of active running workers minus target parallelism (16 bits)
+     * TC: Number of total workers minus target parallelism (16 bits)
+     * ST: true if pool is terminating (1 bit)
+     * EC: the wait count of top waiting thread (15 bits)
+     * ID: poolIndex of top of Treiber stack of waiters (16 bits)
+     *
+     * When convenient, we can extract the upper 32 bits of counts and
+     * the lower 32 bits of queue state, u = (int)(ctl >>> 32) and e =
+     * (int)ctl.  The ec field is never accessed alone, but always
+     * together with id and st. The offsets of counts by the target
+     * parallelism and the positionings of fields makes it possible to
+     * perform the most common checks via sign tests of fields: When
+     * ac is negative, there are not enough active workers, when tc is
+     * negative, there are not enough total workers, and when e is
+     * negative, the pool is terminating.  To deal with these possibly
+     * negative fields, we use casts in and out of "short" and/or
+     * signed shifts to maintain signedness.
+     *
+     * When a thread is queued (inactivated), its eventCount field is
+     * set negative, which is the only way to tell if a worker is
+     * prevented from executing tasks, even though it must continue to
+     * scan for them to avoid queuing races. Note however that
+     * eventCount updates lag releases so usage requires care.
+     *
+     * Field runState is an int packed with:
+     * SHUTDOWN: true if shutdown is enabled (1 bit)
+     * SEQ:  a sequence number updated upon (de)registering workers (30 bits)
+     * INIT: set true after workQueues array construction (1 bit)
+     *
+     * The sequence number enables simple consistency checks:
+     * Staleness of read-only operations on the workQueues array can
+     * be checked by comparing runState before vs after the reads.
+     */
+
+    // bit positions/shifts for fields
+    private static final int  AC_SHIFT   = 48;
+    private static final int  TC_SHIFT   = 32;
+    private static final int  ST_SHIFT   = 31;
+    private static final int  EC_SHIFT   = 16;
+
+    // bounds
+    private static final int  SMASK      = 0xffff;  // short bits
+    private static final int  MAX_CAP    = 0x7fff;  // max #workers - 1
+    private static final int  SQMASK     = 0xfffe;  // even short bits
+    private static final int  SHORT_SIGN = 1 << 15;
+    private static final int  INT_SIGN   = 1 << 31;
+
+    // masks
+    private static final long STOP_BIT   = 0x0001L << ST_SHIFT;
+    private static final long AC_MASK    = ((long)SMASK) << AC_SHIFT;
+    private static final long TC_MASK    = ((long)SMASK) << TC_SHIFT;
+
+    // units for incrementing and decrementing
+    private static final long TC_UNIT    = 1L << TC_SHIFT;
+    private static final long AC_UNIT    = 1L << AC_SHIFT;
+
+    // masks and units for dealing with u = (int)(ctl >>> 32)
+    private static final int  UAC_SHIFT  = AC_SHIFT - 32;
+    private static final int  UTC_SHIFT  = TC_SHIFT - 32;
+    private static final int  UAC_MASK   = SMASK << UAC_SHIFT;
+    private static final int  UTC_MASK   = SMASK << UTC_SHIFT;
+    private static final int  UAC_UNIT   = 1 << UAC_SHIFT;
+    private static final int  UTC_UNIT   = 1 << UTC_SHIFT;
+
+    // masks and units for dealing with e = (int)ctl
+    private static final int E_MASK      = 0x7fffffff; // no STOP_BIT
+    private static final int E_SEQ       = 1 << EC_SHIFT;
+
+    // runState bits
+    private static final int SHUTDOWN    = 1 << 31;
+
+    // access mode for WorkQueue
+    static final int LIFO_QUEUE          =  0;
+    static final int FIFO_QUEUE          =  1;
+    static final int SHARED_QUEUE        = -1;
+
+    // Instance fields
+
+    /*
+     * Field layout order in this class tends to matter more than one
+     * would like. Runtime layout order is only loosely related to
+     * declaration order and may differ across JVMs, but the following
+     * empirically works OK on current JVMs.
+     */
+
+    volatile long ctl;                         // main pool control
+    final int parallelism;                     // parallelism level
+    final int localMode;                       // per-worker scheduling mode
+    final int submitMask;                      // submit queue index bound
+    int nextSeed;                              // for initializing worker seeds
+    volatile int runState;                     // shutdown status and seq
+    WorkQueue[] workQueues;                    // main registry
+    final Mutex lock;                          // for registration
+    final Condition termination;               // for awaitTermination
+    final ForkJoinWorkerThreadFactory factory; // factory for new workers
+    final Thread.UncaughtExceptionHandler ueh; // per-worker UEH
+    final AtomicLong stealCount;               // collect counts when terminated
+    final AtomicInteger nextWorkerNumber;      // to create worker name string
+    final String workerNamePrefix;             // to create worker name string
+
+    //  Creating, registering, and deregistering workers
+
+    /**
+     * Tries to create and start a worker
+     */
+    private void addWorker() {
+        Throwable ex = null;
+        ForkJoinWorkerThread wt = null;
+        try {
+            if ((wt = factory.newThread(this)) != null) {
+                wt.start();
+                return;
+            }
+        } catch (Throwable e) {
+            ex = e;
+        }
+        deregisterWorker(wt, ex); // adjust counts etc on failure
+    }
+
+    /**
+     * Callback from ForkJoinWorkerThread constructor to assign a
+     * public name. This must be separate from registerWorker because
+     * it is called during the "super" constructor call in
+     * ForkJoinWorkerThread.
+     */
+    final String nextWorkerName() {
+        return workerNamePrefix.concat
+            (Integer.toString(nextWorkerNumber.addAndGet(1)));
+    }
+
+    /**
+     * Callback from ForkJoinWorkerThread constructor to establish its
+     * poolIndex and record its WorkQueue. To avoid scanning bias due
+     * to packing entries in front of the workQueues array, we treat
+     * the array as a simple power-of-two hash table using per-thread
+     * seed as hash, expanding as needed.
+     *
+     * @param w the worker's queue
+     */
+
+    final void registerWorker(WorkQueue w) {
+        Mutex lock = this.lock;
+        lock.lock();
+        try {
+            WorkQueue[] ws = workQueues;
+            if (w != null && ws != null) {          // skip on shutdown/failure
+                int rs, n =  ws.length, m = n - 1;
+                int s = nextSeed += SEED_INCREMENT; // rarely-colliding sequence
+                w.seed = (s == 0) ? 1 : s;          // ensure non-zero seed
+                int r = (s << 1) | 1;               // use odd-numbered indices
+                if (ws[r &= m] != null) {           // collision
+                    int probes = 0;                 // step by approx half size
+                    int step = (n <= 4) ? 2 : ((n >>> 1) & SQMASK) + 2;
+                    while (ws[r = (r + step) & m] != null) {
+                        if (++probes >= n) {
+                            workQueues = ws = Arrays.copyOf(ws, n <<= 1);
+                            m = n - 1;
+                            probes = 0;
+                        }
+                    }
+                }
+                w.eventCount = w.poolIndex = r;     // establish before recording
+                ws[r] = w;                          // also update seq
+                runState = ((rs = runState) & SHUTDOWN) | ((rs + 2) & ~SHUTDOWN);
+            }
+        } finally {
+            lock.unlock();
+        }
+    }
+
+    /**
+     * Final callback from terminating worker, as well as upon failure
+     * to construct or start a worker in addWorker.  Removes record of
+     * worker from array, and adjusts counts. If pool is shutting
+     * down, tries to complete termination.
+     *
+     * @param wt the worker thread or null if addWorker failed
+     * @param ex the exception causing failure, or null if none
+     */
+    final void deregisterWorker(ForkJoinWorkerThread wt, Throwable ex) {
+        Mutex lock = this.lock;
+        WorkQueue w = null;
+        if (wt != null && (w = wt.workQueue) != null) {
+            w.runState = -1;                // ensure runState is set
+            stealCount.getAndAdd(w.totalSteals + w.nsteals);
+            int idx = w.poolIndex;
+            lock.lock();
+            try {                           // remove record from array
+                WorkQueue[] ws = workQueues;
+                if (ws != null && idx >= 0 && idx < ws.length && ws[idx] == w)
+                    ws[idx] = null;
+            } finally {
+                lock.unlock();
+            }
+        }
+
+        long c;                             // adjust ctl counts
+        do {} while (!U.compareAndSwapLong
+                     (this, CTL, c = ctl, (((c - AC_UNIT) & AC_MASK) |
+                                           ((c - TC_UNIT) & TC_MASK) |
+                                           (c & ~(AC_MASK|TC_MASK)))));
+
+        if (!tryTerminate(false, false) && w != null) {
+            w.cancelAll();                  // cancel remaining tasks
+            if (w.array != null)            // suppress signal if never ran
+                signalWork();               // wake up or create replacement
+            if (ex == null)                 // help clean refs on way out
+                ForkJoinTask.helpExpungeStaleExceptions();
+        }
+
+        if (ex != null)                     // rethrow
+            U.throwException(ex);
+    }
+
+
+    // Submissions
+
+    /**
+     * Unless shutting down, adds the given task to a submission queue
+     * at submitter's current queue index (modulo submission
+     * range). If no queue exists at the index, one is created.  If
+     * the queue is busy, another index is randomly chosen. The
+     * submitMask bounds the effective number of queues to the
+     * (nearest power of two for) parallelism level.
+     *
+     * @param task the task. Caller must ensure non-null.
+     */
+    private void doSubmit(ForkJoinTask<?> task) {
+        Submitter s = submitters.get();
+        for (int r = s.seed, m = submitMask;;) {
+            WorkQueue[] ws; WorkQueue q;
+            int k = r & m & SQMASK;          // use only even indices
+            if (runState < 0 || (ws = workQueues) == null || ws.length <= k)
+                throw new RejectedExecutionException(); // shutting down
+            else if ((q = ws[k]) == null) {  // create new queue
+                WorkQueue nq = new WorkQueue(this, null, SHARED_QUEUE);
+                Mutex lock = this.lock;      // construct outside lock
+                lock.lock();
+                try {                        // recheck under lock
+                    int rs = runState;       // to update seq
+                    if (ws == workQueues && ws[k] == null) {
+                        ws[k] = nq;
+                        runState = ((rs & SHUTDOWN) | ((rs + 2) & ~SHUTDOWN));
+                    }
+                } finally {
+                    lock.unlock();
+                }
+            }
+            else if (q.trySharedPush(task)) {
+                signalWork();
+                return;
+            }
+            else if (m > 1) {                // move to a different index
+                r ^= r << 13;                // same xorshift as WorkQueues
+                r ^= r >>> 17;
+                s.seed = r ^= r << 5;
+            }
+            else
+                Thread.yield();              // yield if no alternatives
+        }
+    }
+
+    // Maintaining ctl counts
+
+    /**
+     * Increments active count; mainly called upon return from blocking.
+     */
+    final void incrementActiveCount() {
+        long c;
+        do {} while (!U.compareAndSwapLong(this, CTL, c = ctl, c + AC_UNIT));
+    }
+
+    /**
+     * Tries to activate or create a worker if too few are active.
+     */
+    final void signalWork() {
+        long c; int u;
+        while ((u = (int)((c = ctl) >>> 32)) < 0) {     // too few active
+            WorkQueue[] ws = workQueues; int e, i; WorkQueue w; Thread p;
+            if ((e = (int)c) > 0) {                     // at least one waiting
+                if (ws != null && (i = e & SMASK) < ws.length &&
+                    (w = ws[i]) != null && w.eventCount == (e | INT_SIGN)) {
+                    long nc = (((long)(w.nextWait & E_MASK)) |
+                               ((long)(u + UAC_UNIT) << 32));
+                    if (U.compareAndSwapLong(this, CTL, c, nc)) {
+                        w.eventCount = (e + E_SEQ) & E_MASK;
+                        if ((p = w.parker) != null)
+                            U.unpark(p);                // activate and release
+                        break;
+                    }
+                }
+                else
+                    break;
+            }
+            else if (e == 0 && (u & SHORT_SIGN) != 0) { // too few total
+                long nc = (long)(((u + UTC_UNIT) & UTC_MASK) |
+                                 ((u + UAC_UNIT) & UAC_MASK)) << 32;
+                if (U.compareAndSwapLong(this, CTL, c, nc)) {
+                    addWorker();
+                    break;
+                }
+            }
+            else
+                break;
+        }
+    }
+
+    // Scanning for tasks
+
+    /**
+     * Top-level runloop for workers, called by ForkJoinWorkerThread.run.
+     */
+    final void runWorker(WorkQueue w) {
+        w.growArray(false);         // initialize queue array in this thread
+        do { w.runTask(scan(w)); } while (w.runState >= 0);
+    }
+
+    /**
+     * Scans for and, if found, returns one task, else possibly
+     * inactivates the worker. This method operates on single reads of
+     * volatile state and is designed to be re-invoked continuously,
+     * in part because it returns upon detecting inconsistencies,
+     * contention, or state changes that indicate possible success on
+     * re-invocation.
+     *
+     * The scan searches for tasks across a random permutation of
+     * queues (starting at a random index and stepping by a random
+     * relative prime, checking each at least once).  The scan
+     * terminates upon either finding a non-empty queue, or completing
+     * the sweep. If the worker is not inactivated, it takes and
+     * returns a task from this queue.  On failure to find a task, we
+     * take one of the following actions, after which the caller will
+     * retry calling this method unless terminated.
+     *
+     * * If pool is terminating, terminate the worker.
+     *
+     * * If not a complete sweep, try to release a waiting worker.  If
+     * the scan terminated because the worker is inactivated, then the
+     * released worker will often be the calling worker, and it can
+     * succeed obtaining a task on the next call. Or maybe it is
+     * another worker, but with same net effect. Releasing in other
+     * cases as well ensures that we have enough workers running.
+     *
+     * * If not already enqueued, try to inactivate and enqueue the
+     * worker on wait queue. Or, if inactivating has caused the pool
+     * to be quiescent, relay to idleAwaitWork to check for
+     * termination and possibly shrink pool.
+     *
+     * * If already inactive, and the caller has run a task since the
+     * last empty scan, return (to allow rescan) unless others are
+     * also inactivated.  Field WorkQueue.rescans counts down on each
+     * scan to ensure eventual inactivation and blocking.
+     *
+     * * If already enqueued and none of the above apply, park
+     * awaiting signal,
+     *
+     * @param w the worker (via its WorkQueue)
+     * @return a task or null of none found
+     */
+    private final ForkJoinTask<?> scan(WorkQueue w) {
+        WorkQueue[] ws;                       // first update random seed
+        int r = w.seed; r ^= r << 13; r ^= r >>> 17; w.seed = r ^= r << 5;
+        int rs = runState, m;                 // volatile read order matters
+        if ((ws = workQueues) != null && (m = ws.length - 1) > 0) {
+            int ec = w.eventCount;            // ec is negative if inactive
+            int step = (r >>> 16) | 1;        // relative prime
+            for (int j = (m + 1) << 2; ; r += step) {
+                WorkQueue q; ForkJoinTask<?> t; ForkJoinTask<?>[] a; int b;
+                if ((q = ws[r & m]) != null && (b = q.base) - q.top < 0 &&
+                    (a = q.array) != null) {  // probably nonempty
+                    int i = (((a.length - 1) & b) << ASHIFT) + ABASE;
+                    t = (ForkJoinTask<?>)U.getObjectVolatile(a, i);
+                    if (q.base == b && ec >= 0 && t != null &&
+                        U.compareAndSwapObject(a, i, t, null)) {
+                        if (q.top - (q.base = b + 1) > 1)
+                            signalWork();    // help pushes signal
+                        return t;
+                    }
+                    else if (ec < 0 || j <= m) {
+                        rs = 0;               // mark scan as imcomplete
+                        break;                // caller can retry after release
+                    }
+                }
+                if (--j < 0)
+                    break;
+            }
+
+            long c = ctl; int e = (int)c, a = (int)(c >> AC_SHIFT), nr, ns;
+            if (e < 0)                        // decode ctl on empty scan
+                w.runState = -1;              // pool is terminating
+            else if (rs == 0 || rs != runState) { // incomplete scan
+                WorkQueue v; Thread p;        // try to release a waiter
+                if (e > 0 && a < 0 && w.eventCount == ec &&
+                    (v = ws[e & m]) != null && v.eventCount == (e | INT_SIGN)) {
+                    long nc = ((long)(v.nextWait & E_MASK) |
+                               ((c + AC_UNIT) & (AC_MASK|TC_MASK)));
+                    if (ctl == c && U.compareAndSwapLong(this, CTL, c, nc)) {
+                        v.eventCount = (e + E_SEQ) & E_MASK;
+                        if ((p = v.parker) != null)
+                            U.unpark(p);
+                    }
+                }
+            }
+            else if (ec >= 0) {               // try to enqueue/inactivate
+                long nc = (long)ec | ((c - AC_UNIT) & (AC_MASK|TC_MASK));
+                w.nextWait = e;
+                w.eventCount = ec | INT_SIGN; // mark as inactive
+                if (ctl != c || !U.compareAndSwapLong(this, CTL, c, nc))
+                    w.eventCount = ec;        // unmark on CAS failure
+                else {
+                    if ((ns = w.nsteals) != 0) {
+                        w.nsteals = 0;        // set rescans if ran task
+                        w.rescans = (a > 0) ? 0 : a + parallelism;
+                        w.totalSteals += ns;
+                    }
+                    if (a == 1 - parallelism) // quiescent
+                        idleAwaitWork(w, nc, c);
+                }
+            }
+            else if (w.eventCount < 0) {      // already queued
+                if ((nr = w.rescans) > 0) {   // continue rescanning
+                    int ac = a + parallelism;
+                    if (((w.rescans = (ac < nr) ? ac : nr - 1) & 3) == 0)
+                        Thread.yield();       // yield before block
+                }
+                else {
+                    Thread.interrupted();     // clear status
+                    Thread wt = Thread.currentThread();
+                    U.putObject(wt, PARKBLOCKER, this);
+                    w.parker = wt;            // emulate LockSupport.park
+                    if (w.eventCount < 0)     // recheck
+                        U.park(false, 0L);
+                    w.parker = null;
+                    U.putObject(wt, PARKBLOCKER, null);
+                }
+            }
+        }
+        return null;
+    }
+
+    /**
+     * If inactivating worker w has caused the pool to become
+     * quiescent, checks for pool termination, and, so long as this is
+     * not the only worker, waits for event for up to SHRINK_RATE
+     * nanosecs.  On timeout, if ctl has not changed, terminates the
+     * worker, which will in turn wake up another worker to possibly
+     * repeat this process.
+     *
+     * @param w the calling worker
+     * @param currentCtl the ctl value triggering possible quiescence
+     * @param prevCtl the ctl value to restore if thread is terminated
+     */
+    private void idleAwaitWork(WorkQueue w, long currentCtl, long prevCtl) {
+        if (w.eventCount < 0 && !tryTerminate(false, false) &&
+            (int)prevCtl != 0 && !hasQueuedSubmissions() && ctl == currentCtl) {
+            Thread wt = Thread.currentThread();
+            Thread.yield();            // yield before block
+            while (ctl == currentCtl) {
+                long startTime = System.nanoTime();
+                Thread.interrupted();  // timed variant of version in scan()
+                U.putObject(wt, PARKBLOCKER, this);
+                w.parker = wt;
+                if (ctl == currentCtl)
+                    U.park(false, SHRINK_RATE);
+                w.parker = null;
+                U.putObject(wt, PARKBLOCKER, null);
+                if (ctl != currentCtl)
+                    break;
+                if (System.nanoTime() - startTime >= SHRINK_TIMEOUT &&
+                    U.compareAndSwapLong(this, CTL, currentCtl, prevCtl)) {
+                    w.eventCount = (w.eventCount + E_SEQ) | E_MASK;
+                    w.runState = -1;   // shrink
+                    break;
+                }
+            }
+        }
+    }
+
+    /**
+     * Tries to locate and execute tasks for a stealer of the given
+     * task, or in turn one of its stealers, Traces currentSteal ->
+     * currentJoin links looking for a thread working on a descendant
+     * of the given task and with a non-empty queue to steal back and
+     * execute tasks from. The first call to this method upon a
+     * waiting join will often entail scanning/search, (which is OK
+     * because the joiner has nothing better to do), but this method
+     * leaves hints in workers to speed up subsequent calls. The
+     * implementation is very branchy to cope with potential
+     * inconsistencies or loops encountering chains that are stale,
+     * unknown, or so long that they are likely cyclic.
+     *
+     * @param joiner the joining worker
+     * @param task the task to join
+     * @return 0 if no progress can be made, negative if task
+     * known complete, else positive
+     */
+    private int tryHelpStealer(WorkQueue joiner, ForkJoinTask<?> task) {
+        int stat = 0, steps = 0;                    // bound to avoid cycles
+        if (joiner != null && task != null) {       // hoist null checks
+            restart: for (;;) {
+                ForkJoinTask<?> subtask = task;     // current target
+                for (WorkQueue j = joiner, v;;) {   // v is stealer of subtask
+                    WorkQueue[] ws; int m, s, h;
+                    if ((s = task.status) < 0) {
+                        stat = s;
+                        break restart;
+                    }
+                    if ((ws = workQueues) == null || (m = ws.length - 1) <= 0)
+                        break restart;              // shutting down
+                    if ((v = ws[h = (j.stealHint | 1) & m]) == null ||
+                        v.currentSteal != subtask) {
+                        for (int origin = h;;) {    // find stealer
+                            if (((h = (h + 2) & m) & 15) == 1 &&
+                                (subtask.status < 0 || j.currentJoin != subtask))
+                                continue restart;   // occasional staleness check
+                            if ((v = ws[h]) != null &&
+                                v.currentSteal == subtask) {
+                                j.stealHint = h;    // save hint
+                                break;
+                            }
+                            if (h == origin)
+                                break restart;      // cannot find stealer
+                        }
+                    }
+                    for (;;) { // help stealer or descend to its stealer
+                        ForkJoinTask[] a;  int b;
+                        if (subtask.status < 0)     // surround probes with
+                            continue restart;       //   consistency checks
+                        if ((b = v.base) - v.top < 0 && (a = v.array) != null) {
+                            int i = (((a.length - 1) & b) << ASHIFT) + ABASE;
+                            ForkJoinTask<?> t =
+                                (ForkJoinTask<?>)U.getObjectVolatile(a, i);
+                            if (subtask.status < 0 || j.currentJoin != subtask ||
+                                v.currentSteal != subtask)
+                                continue restart;   // stale
+                            stat = 1;               // apparent progress
+                            if (t != null && v.base == b &&
+                                U.compareAndSwapObject(a, i, t, null)) {
+                                v.base = b + 1;     // help stealer
+                                joiner.runSubtask(t);
+                            }
+                            else if (v.base == b && ++steps == MAX_HELP)
+                                break restart;      // v apparently stalled
+                        }
+                        else {                      // empty -- try to descend
+                            ForkJoinTask<?> next = v.currentJoin;
+                            if (subtask.status < 0 || j.currentJoin != subtask ||
+                                v.currentSteal != subtask)
+                                continue restart;   // stale
+                            else if (next == null || ++steps == MAX_HELP)
+                                break restart;      // dead-end or maybe cyclic
+                            else {
+                                subtask = next;
+                                j = v;
+                                break;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        return stat;
+    }
+
+    /**
+     * If task is at base of some steal queue, steals and executes it.
+     *
+     * @param joiner the joining worker
+     * @param task the task
+     */
+    private void tryPollForAndExec(WorkQueue joiner, ForkJoinTask<?> task) {
+        WorkQueue[] ws;
+        if ((ws = workQueues) != null) {
+            for (int j = 1; j < ws.length && task.status >= 0; j += 2) {
+                WorkQueue q = ws[j];
+                if (q != null && q.pollFor(task)) {
+                    joiner.runSubtask(task);
+                    break;
+                }
+            }
+        }
+    }
+
+    /**
+     * Tries to decrement active count (sometimes implicitly) and
+     * possibly release or create a compensating worker in preparation
+     * for blocking. Fails on contention or termination. Otherwise,
+     * adds a new thread if no idle workers are available and either
+     * pool would become completely starved or: (at least half
+     * starved, and fewer than 50% spares exist, and there is at least
+     * one task apparently available). Even though the availability
+     * check requires a full scan, it is worthwhile in reducing false
+     * alarms.
+     *
+     * @param task if non-null, a task being waited for
+     * @param blocker if non-null, a blocker being waited for
+     * @return true if the caller can block, else should recheck and retry
+     */
+    final boolean tryCompensate(ForkJoinTask<?> task, ManagedBlocker blocker) {
+        int pc = parallelism, e;
+        long c = ctl;
+        WorkQueue[] ws = workQueues;
+        if ((e = (int)c) >= 0 && ws != null) {
+            int u, a, ac, hc;
+            int tc = (short)((u = (int)(c >>> 32)) >>> UTC_SHIFT) + pc;
+            boolean replace = false;
+            if ((a = u >> UAC_SHIFT) <= 0) {
+                if ((ac = a + pc) <= 1)
+                    replace = true;
+                else if ((e > 0 || (task != null &&
+                                    ac <= (hc = pc >>> 1) && tc < pc + hc))) {
+                    WorkQueue w;
+                    for (int j = 0; j < ws.length; ++j) {
+                        if ((w = ws[j]) != null && !w.isEmpty()) {
+                            replace = true;
+                            break;   // in compensation range and tasks available
+                        }
+                    }
+                }
+            }
+            if ((task == null || task.status >= 0) && // recheck need to block
+                (blocker == null || !blocker.isReleasable()) && ctl == c) {
+                if (!replace) {          // no compensation
+                    long nc = ((c - AC_UNIT) & AC_MASK) | (c & ~AC_MASK);
+                    if (U.compareAndSwapLong(this, CTL, c, nc))
+                        return true;
+                }
+                else if (e != 0) {       // release an idle worker
+                    WorkQueue w; Thread p; int i;
+                    if ((i = e & SMASK) < ws.length && (w = ws[i]) != null) {
+                        long nc = ((long)(w.nextWait & E_MASK) |
+                                   (c & (AC_MASK|TC_MASK)));
+                        if (w.eventCount == (e | INT_SIGN) &&
+                            U.compareAndSwapLong(this, CTL, c, nc)) {
+                            w.eventCount = (e + E_SEQ) & E_MASK;
+                            if ((p = w.parker) != null)
+                                U.unpark(p);
+                            return true;
+                        }
+                    }
+                }
+                else if (tc < MAX_CAP) { // create replacement
+                    long nc = ((c + TC_UNIT) & TC_MASK) | (c & ~TC_MASK);
+                    if (U.compareAndSwapLong(this, CTL, c, nc)) {
+                        addWorker();
+                        return true;
+                    }
+                }
+            }
+        }
+        return false;
+    }
+
+    /**
+     * Helps and/or blocks until the given task is done.
+     *
+     * @param joiner the joining worker
+     * @param task the task
+     * @return task status on exit
+     */
+    final int awaitJoin(WorkQueue joiner, ForkJoinTask<?> task) {
+        int s;
+        if ((s = task.status) >= 0) {
+            ForkJoinTask<?> prevJoin = joiner.currentJoin;
+            joiner.currentJoin = task;
+            long startTime = 0L;
+            for (int k = 0;;) {
+                if ((s = (joiner.isEmpty() ?           // try to help
+                          tryHelpStealer(joiner, task) :
+                          joiner.tryRemoveAndExec(task))) == 0 &&
+                    (s = task.status) >= 0) {
+                    if (k == 0) {
+                        startTime = System.nanoTime();
+                        tryPollForAndExec(joiner, task); // check uncommon case
+                    }
+                    else if ((k & (MAX_HELP - 1)) == 0 &&
+                             System.nanoTime() - startTime >=
+                             COMPENSATION_DELAY &&
+                             tryCompensate(task, null)) {
+                        if (task.trySetSignal()) {
+                            synchronized (task) {
+                                if (task.status >= 0) {
+                                    try {                // see ForkJoinTask
+                                        task.wait();     //  for explanation
+                                    } catch (InterruptedException ie) {
+                                    }
+                                }
+                                else
+                                    task.notifyAll();
+                            }
+                        }
+                        long c;                          // re-activate
+                        do {} while (!U.compareAndSwapLong
+                                     (this, CTL, c = ctl, c + AC_UNIT));
+                    }
+                }
+                if (s < 0 || (s = task.status) < 0) {
+                    joiner.currentJoin = prevJoin;
+                    break;
+                }
+                else if ((k++ & (MAX_HELP - 1)) == MAX_HELP >>> 1)
+                    Thread.yield();                     // for politeness
+            }
+        }
+        return s;
+    }
+
+    /**
+     * Stripped-down variant of awaitJoin used by timed joins. Tries
+     * to help join only while there is continuous progress. (Caller
+     * will then enter a timed wait.)
+     *
+     * @param joiner the joining worker
+     * @param task the task
+     * @return task status on exit
+     */
+    final int helpJoinOnce(WorkQueue joiner, ForkJoinTask<?> task) {
+        int s;
+        while ((s = task.status) >= 0 &&
+               (joiner.isEmpty() ?
+                tryHelpStealer(joiner, task) :
+                joiner.tryRemoveAndExec(task)) != 0)
+            ;
+        return s;
+    }
+
+    /**
+     * Returns a (probably) non-empty steal queue, if one is found
+     * during a random, then cyclic scan, else null.  This method must
+     * be retried by caller if, by the time it tries to use the queue,
+     * it is empty.
+     */
+    private WorkQueue findNonEmptyStealQueue(WorkQueue w) {
+        // Similar to loop in scan(), but ignoring submissions
+        int r = w.seed; r ^= r << 13; r ^= r >>> 17; w.seed = r ^= r << 5;
+        int step = (r >>> 16) | 1;
+        for (WorkQueue[] ws;;) {
+            int rs = runState, m;
+            if ((ws = workQueues) == null || (m = ws.length - 1) < 1)
+                return null;
+            for (int j = (m + 1) << 2; ; r += step) {
+                WorkQueue q = ws[((r << 1) | 1) & m];
+                if (q != null && !q.isEmpty())
+                    return q;
+                else if (--j < 0) {
+                    if (runState == rs)
+                        return null;
+                    break;
+                }
+            }
+        }
+    }
+
+
+    /**
+     * Runs tasks until {@code isQuiescent()}. We piggyback on
+     * active count ctl maintenance, but rather than blocking
+     * when tasks cannot be found, we rescan until all others cannot
+     * find tasks either.
+     */
+    final void helpQuiescePool(WorkQueue w) {
+        for (boolean active = true;;) {
+            ForkJoinTask<?> localTask; // exhaust local queue
+            while ((localTask = w.nextLocalTask()) != null)
+                localTask.doExec();
+            WorkQueue q = findNonEmptyStealQueue(w);
+            if (q != null) {
+                ForkJoinTask<?> t; int b;
+                if (!active) {      // re-establish active count
+                    long c;
+                    active = true;
+                    do {} while (!U.compareAndSwapLong
+                                 (this, CTL, c = ctl, c + AC_UNIT));
+                }
+                if ((b = q.base) - q.top < 0 && (t = q.pollAt(b)) != null)
+                    w.runSubtask(t);
+            }
+            else {
+                long c;
+                if (active) {       // decrement active count without queuing
+                    active = false;
+                    do {} while (!U.compareAndSwapLong
+                                 (this, CTL, c = ctl, c -= AC_UNIT));
+                }
+                else
+                    c = ctl;        // re-increment on exit
+                if ((int)(c >> AC_SHIFT) + parallelism == 0) {
+                    do {} while (!U.compareAndSwapLong
+                                 (this, CTL, c = ctl, c + AC_UNIT));
+                    break;
+                }
+            }
+        }
+    }
+
+    /**
+     * Gets and removes a local or stolen task for the given worker.
+     *
+     * @return a task, if available
+     */
+    final ForkJoinTask<?> nextTaskFor(WorkQueue w) {
+        for (ForkJoinTask<?> t;;) {
+            WorkQueue q; int b;
+            if ((t = w.nextLocalTask()) != null)
+                return t;
+            if ((q = findNonEmptyStealQueue(w)) == null)
+                return null;
+            if ((b = q.base) - q.top < 0 && (t = q.pollAt(b)) != null)
+                return t;
+        }
+    }
+
+    /**
+     * Returns the approximate (non-atomic) number of idle threads per
+     * active thread to offset steal queue size for method
+     * ForkJoinTask.getSurplusQueuedTaskCount().
+     */
+    final int idlePerActive() {
+        // Approximate at powers of two for small values, saturate past 4
+        int p = parallelism;
+        int a = p + (int)(ctl >> AC_SHIFT);
+        return (a > (p >>>= 1) ? 0 :
+                a > (p >>>= 1) ? 1 :
+                a > (p >>>= 1) ? 2 :
+                a > (p >>>= 1) ? 4 :
+                8);
+    }
+
+    //  Termination
+
+    /**
+     * Possibly initiates and/or completes termination.  The caller
+     * triggering termination runs three passes through workQueues:
+     * (0) Setting termination status, followed by wakeups of queued
+     * workers; (1) cancelling all tasks; (2) interrupting lagging
+     * threads (likely in external tasks, but possibly also blocked in
+     * joins).  Each pass repeats previous steps because of potential
+     * lagging thread creation.
+     *
+     * @param now if true, unconditionally terminate, else only
+     * if no work and no active workers
+     * @param enable if true, enable shutdown when next possible
+     * @return true if now terminating or terminated
+     */
+    private boolean tryTerminate(boolean now, boolean enable) {
+        Mutex lock = this.lock;
+        for (long c;;) {
+            if (((c = ctl) & STOP_BIT) != 0) {      // already terminating
+                if ((short)(c >>> TC_SHIFT) == -parallelism) {
+                    lock.lock();                    // don't need try/finally
+                    termination.signalAll();        // signal when 0 workers
+                    lock.unlock();
+                }
+                return true;
+            }
+            if (runState >= 0) {                    // not yet enabled
+                if (!enable)
+                    return false;
+                lock.lock();
+                runState |= SHUTDOWN;
+                lock.unlock();
+            }
+            if (!now) {                             // check if idle & no tasks
+                if ((int)(c >> AC_SHIFT) != -parallelism ||
+                    hasQueuedSubmissions())
+                    return false;
+                // Check for unqueued inactive workers. One pass suffices.
+                WorkQueue[] ws = workQueues; WorkQueue w;
+                if (ws != null) {
+                    for (int i = 1; i < ws.length; i += 2) {
+                        if ((w = ws[i]) != null && w.eventCount >= 0)
+                            return false;
+                    }
+                }
+            }
+            if (U.compareAndSwapLong(this, CTL, c, c | STOP_BIT)) {
+                for (int pass = 0; pass < 3; ++pass) {
+                    WorkQueue[] ws = workQueues;
+                    if (ws != null) {
+                        WorkQueue w;
+                        int n = ws.length;
+                        for (int i = 0; i < n; ++i) {
+                            if ((w = ws[i]) != null) {
+                                w.runState = -1;
+                                if (pass > 0) {
+                                    w.cancelAll();
+                                    if (pass > 1)
+                                        w.interruptOwner();
+                                }
+                            }
+                        }
+                        // Wake up workers parked on event queue
+                        int i, e; long cc; Thread p;
+                        while ((e = (int)(cc = ctl) & E_MASK) != 0 &&
+                               (i = e & SMASK) < n &&
+                               (w = ws[i]) != null) {
+                            long nc = ((long)(w.nextWait & E_MASK) |
+                                       ((cc + AC_UNIT) & AC_MASK) |
+                                       (cc & (TC_MASK|STOP_BIT)));
+                            if (w.eventCount == (e | INT_SIGN) &&
+                                U.compareAndSwapLong(this, CTL, cc, nc)) {
+                                w.eventCount = (e + E_SEQ) & E_MASK;
+                                w.runState = -1;
+                                if ((p = w.parker) != null)
+                                    U.unpark(p);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    // Exported methods
+
+    // Constructors
+
+    /**
+     * Creates a {@code ForkJoinPool} with parallelism equal to {@link
+     * java.lang.Runtime#availableProcessors}, using the {@linkplain
+     * #defaultForkJoinWorkerThreadFactory default thread factory},
+     * no UncaughtExceptionHandler, and non-async LIFO processing mode.
+     *
+     * @throws SecurityException if a security manager exists and
+     *         the caller is not permitted to modify threads
+     *         because it does not hold {@link
+     *         java.lang.RuntimePermission}{@code ("modifyThread")}
+     */
+    public ForkJoinPool() {
+        this(Runtime.getRuntime().availableProcessors(),
+             defaultForkJoinWorkerThreadFactory, null, false);
+    }
+
+    /**
+     * Creates a {@code ForkJoinPool} with the indicated parallelism
+     * level, the {@linkplain
+     * #defaultForkJoinWorkerThreadFactory default thread factory},
+     * no UncaughtExceptionHandler, and non-async LIFO processing mode.
+     *
+     * @param parallelism the parallelism level
+     * @throws IllegalArgumentException if parallelism less than or
+     *         equal to zero, or greater than implementation limit
+     * @throws SecurityException if a security manager exists and
+     *         the caller is not permitted to modify threads
+     *         because it does not hold {@link
+     *         java.lang.RuntimePermission}{@code ("modifyThread")}
+     */
+    public ForkJoinPool(int parallelism) {
+        this(parallelism, defaultForkJoinWorkerThreadFactory, null, false);
+    }
+
+    /**
+     * Creates a {@code ForkJoinPool} with the given parameters.
+     *
+     * @param parallelism the parallelism level. For default value,
+     * use {@link java.lang.Runtime#availableProcessors}.
+     * @param factory the factory for creating new threads. For default value,
+     * use {@link #defaultForkJoinWorkerThreadFactory}.
+     * @param handler the handler for internal worker threads that
+     * terminate due to unrecoverable errors encountered while executing
+     * tasks. For default value, use {@code null}.
+     * @param asyncMode if true,
+     * establishes local first-in-first-out scheduling mode for forked
+     * tasks that are never joined. This mode may be more appropriate
+     * than default locally stack-based mode in applications in which
+     * worker threads only process event-style asynchronous tasks.
+     * For default value, use {@code false}.
+     * @throws IllegalArgumentException if parallelism less than or
+     *         equal to zero, or greater than implementation limit
+     * @throws NullPointerException if the factory is null
+     * @throws SecurityException if a security manager exists and
+     *         the caller is not permitted to modify threads
+     *         because it does not hold {@link
+     *         java.lang.RuntimePermission}{@code ("modifyThread")}
+     */
+    public ForkJoinPool(int parallelism,
+                        ForkJoinWorkerThreadFactory factory,
+                        Thread.UncaughtExceptionHandler handler,
+                        boolean asyncMode) {
+        checkPermission();
+        if (factory == null)
+            throw new NullPointerException();
+        if (parallelism <= 0 || parallelism > MAX_CAP)
+            throw new IllegalArgumentException();
+        this.parallelism = parallelism;
+        this.factory = factory;
+        this.ueh = handler;
+        this.localMode = asyncMode ? FIFO_QUEUE : LIFO_QUEUE;
+        long np = (long)(-parallelism); // offset ctl counts
+        this.ctl = ((np << AC_SHIFT) & AC_MASK) | ((np << TC_SHIFT) & TC_MASK);
+        // Use nearest power 2 for workQueues size. See Hackers Delight sec 3.2.
+        int n = parallelism - 1;
+        n |= n >>> 1; n |= n >>> 2; n |= n >>> 4; n |= n >>> 8; n |= n >>> 16;
+        int size = (n + 1) << 1;        // #slots = 2*#workers
+        this.submitMask = size - 1;     // room for max # of submit queues
+        this.workQueues = new WorkQueue[size];
+        this.termination = (this.lock = new Mutex()).newCondition();
+        this.stealCount = new AtomicLong();
+        this.nextWorkerNumber = new AtomicInteger();
+        int pn = poolNumberGenerator.incrementAndGet();
+        StringBuilder sb = new StringBuilder("ForkJoinPool-");
+        sb.append(Integer.toString(pn));
+        sb.append("-worker-");
+        this.workerNamePrefix = sb.toString();
+        lock.lock();
+        this.runState = 1;              // set init flag
+        lock.unlock();
+    }
+
+    // Execution methods
+
+    /**
+     * Performs the given task, returning its result upon completion.
+     * If the computation encounters an unchecked Exception or Error,
+     * it is rethrown as the outcome of this invocation.  Rethrown
+     * exceptions behave in the same way as regular exceptions, but,
+     * when possible, contain stack traces (as displayed for example
+     * using {@code ex.printStackTrace()}) of both the current thread
+     * as well as the thread actually encountering the exception;
+     * minimally only the latter.
+     *
+     * @param task the task
+     * @return the task's result
+     * @throws NullPointerException if the task is null
+     * @throws RejectedExecutionException if the task cannot be
+     *         scheduled for execution
+     */
+    public <T> T invoke(ForkJoinTask<T> task) {
+        if (task == null)
+            throw new NullPointerException();
+        doSubmit(task);
+        return task.join();
+    }
+
+    /**
+     * Arranges for (asynchronous) execution of the given task.
+     *
+     * @param task the task
+     * @throws NullPointerException if the task is null
+     * @throws RejectedExecutionException if the task cannot be
+     *         scheduled for execution
+     */
+    public void execute(ForkJoinTask<?> task) {
+        if (task == null)
+            throw new NullPointerException();
+        doSubmit(task);
+    }
+
+    // AbstractExecutorService methods
+
+    /**
+     * @throws NullPointerException if the task is null
+     * @throws RejectedExecutionException if the task cannot be
+     *         scheduled for execution
+     */
+    public void execute(Runnable task) {
+        if (task == null)
+            throw new NullPointerException();
+        ForkJoinTask<?> job;
+        if (task instanceof ForkJoinTask<?>) // avoid re-wrap
+            job = (ForkJoinTask<?>) task;
+        else
+            job = new ForkJoinTask.AdaptedRunnableAction(task);
+        doSubmit(job);
+    }
+
+    /**
+     * Submits a ForkJoinTask for execution.
+     *
+     * @param task the task to submit
+     * @return the task
+     * @throws NullPointerException if the task is null
+     * @throws RejectedExecutionException if the task cannot be
+     *         scheduled for execution
+     */
+    public <T> ForkJoinTask<T> submit(ForkJoinTask<T> task) {
+        if (task == null)
+            throw new NullPointerException();
+        doSubmit(task);
+        return task;
+    }
+
+    /**
+     * @throws NullPointerException if the task is null
+     * @throws RejectedExecutionException if the task cannot be
+     *         scheduled for execution
+     */
+    public <T> ForkJoinTask<T> submit(Callable<T> task) {
+        ForkJoinTask<T> job = new ForkJoinTask.AdaptedCallable<T>(task);
+        doSubmit(job);
+        return job;
+    }
+
+    /**
+     * @throws NullPointerException if the task is null
+     * @throws RejectedExecutionException if the task cannot be
+     *         scheduled for execution
+     */
+    public <T> ForkJoinTask<T> submit(Runnable task, T result) {
+        ForkJoinTask<T> job = new ForkJoinTask.AdaptedRunnable<T>(task, result);
+        doSubmit(job);
+        return job;
+    }
+
+    /**
+     * @throws NullPointerException if the task is null
+     * @throws RejectedExecutionException if the task cannot be
+     *         scheduled for execution
+     */
+    public ForkJoinTask<?> submit(Runnable task) {
+        if (task == null)
+            throw new NullPointerException();
+        ForkJoinTask<?> job;
+        if (task instanceof ForkJoinTask<?>) // avoid re-wrap
+            job = (ForkJoinTask<?>) task;
+        else
+            job = new ForkJoinTask.AdaptedRunnableAction(task);
+        doSubmit(job);
+        return job;
+    }
+
+    /**
+     * @throws NullPointerException       {@inheritDoc}
+     * @throws RejectedExecutionException {@inheritDoc}
+     */
+    public <T> List<Future<T>> invokeAll(Collection<? extends Callable<T>> tasks) {
+        // In previous versions of this class, this method constructed
+        // a task to run ForkJoinTask.invokeAll, but now external
+        // invocation of multiple tasks is at least as efficient.
+        List<ForkJoinTask<T>> fs = new ArrayList<ForkJoinTask<T>>(tasks.size());
+        // Workaround needed because method wasn't declared with
+        // wildcards in return type but should have been.
+        @SuppressWarnings({"unchecked", "rawtypes"})
+            List<Future<T>> futures = (List<Future<T>>) (List) fs;
+
+        boolean done = false;
+        try {
+            for (Callable<T> t : tasks) {
+                ForkJoinTask<T> f = new ForkJoinTask.AdaptedCallable<T>(t);
+                doSubmit(f);
+                fs.add(f);
+            }
+            for (ForkJoinTask<T> f : fs)
+                f.quietlyJoin();
+            done = true;
+            return futures;
+        } finally {
+            if (!done)
+                for (ForkJoinTask<T> f : fs)
+                    f.cancel(false);
+        }
+    }
+
+    /**
+     * Returns the factory used for constructing new workers.
+     *
+     * @return the factory used for constructing new workers
+     */
+    public ForkJoinWorkerThreadFactory getFactory() {
+        return factory;
+    }
+
+    /**
+     * Returns the handler for internal worker threads that terminate
+     * due to unrecoverable errors encountered while executing tasks.
+     *
+     * @return the handler, or {@code null} if none
+     */
+    public Thread.UncaughtExceptionHandler getUncaughtExceptionHandler() {
+        return ueh;
+    }
+
+    /**
+     * Returns the targeted parallelism level of this pool.
+     *
+     * @return the targeted parallelism level of this pool
+     */
+    public int getParallelism() {
+        return parallelism;
+    }
+
+    /**
+     * Returns the number of worker threads that have started but not
+     * yet terminated.  The result returned by this method may differ
+     * from {@link #getParallelism} when threads are created to
+     * maintain parallelism when others are cooperatively blocked.
+     *
+     * @return the number of worker threads
+     */
+    public int getPoolSize() {
+        return parallelism + (short)(ctl >>> TC_SHIFT);
+    }
+
+    /**
+     * Returns {@code true} if this pool uses local first-in-first-out
+     * scheduling mode for forked tasks that are never joined.
+     *
+     * @return {@code true} if this pool uses async mode
+     */
+    public boolean getAsyncMode() {
+        return localMode != 0;
+    }
+
+    /**
+     * Returns an estimate of the number of worker threads that are
+     * not blocked waiting to join tasks or for other managed
+     * synchronization. This method may overestimate the
+     * number of running threads.
+     *
+     * @return the number of worker threads
+     */
+    public int getRunningThreadCount() {
+        int rc = 0;
+        WorkQueue[] ws; WorkQueue w;
+        if ((ws = workQueues) != null) {
+            for (int i = 1; i < ws.length; i += 2) {
+                if ((w = ws[i]) != null && w.isApparentlyUnblocked())
+                    ++rc;
+            }
+        }
+        return rc;
+    }
+
+    /**
+     * Returns an estimate of the number of threads that are currently
+     * stealing or executing tasks. This method may overestimate the
+     * number of active threads.
+     *
+     * @return the number of active threads
+     */
+    public int getActiveThreadCount() {
+        int r = parallelism + (int)(ctl >> AC_SHIFT);
+        return (r <= 0) ? 0 : r; // suppress momentarily negative values
+    }
+
+    /**
+     * Returns {@code true} if all worker threads are currently idle.
+     * An idle worker is one that cannot obtain a task to execute
+     * because none are available to steal from other threads, and
+     * there are no pending submissions to the pool. This method is
+     * conservative; it might not return {@code true} immediately upon
+     * idleness of all threads, but will eventually become true if
+     * threads remain inactive.
+     *
+     * @return {@code true} if all threads are currently idle
+     */
+    public boolean isQuiescent() {
+        return (int)(ctl >> AC_SHIFT) + parallelism == 0;
+    }
+
+    /**
+     * Returns an estimate of the total number of tasks stolen from
+     * one thread's work queue by another. The reported value
+     * underestimates the actual total number of steals when the pool
+     * is not quiescent. This value may be useful for monitoring and
+     * tuning fork/join programs: in general, steal counts should be
+     * high enough to keep threads busy, but low enough to avoid
+     * overhead and contention across threads.
+     *
+     * @return the number of steals
+     */
+    public long getStealCount() {
+        long count = stealCount.get();
+        WorkQueue[] ws; WorkQueue w;
+        if ((ws = workQueues) != null) {
+            for (int i = 1; i < ws.length; i += 2) {
+                if ((w = ws[i]) != null)
+                    count += w.totalSteals;
+            }
+        }
+        return count;
+    }
+
+    /**
+     * Returns an estimate of the total number of tasks currently held
+     * in queues by worker threads (but not including tasks submitted
+     * to the pool that have not begun executing). This value is only
+     * an approximation, obtained by iterating across all threads in
+     * the pool. This method may be useful for tuning task
+     * granularities.
+     *
+     * @return the number of queued tasks
+     */
+    public long getQueuedTaskCount() {
+        long count = 0;
+        WorkQueue[] ws; WorkQueue w;
+        if ((ws = workQueues) != null) {
+            for (int i = 1; i < ws.length; i += 2) {
+                if ((w = ws[i]) != null)
+                    count += w.queueSize();
+            }
+        }
+        return count;
+    }
+
+    /**
+     * Returns an estimate of the number of tasks submitted to this
+     * pool that have not yet begun executing.  This method may take
+     * time proportional to the number of submissions.
+     *
+     * @return the number of queued submissions
+     */
+    public int getQueuedSubmissionCount() {
+        int count = 0;
+        WorkQueue[] ws; WorkQueue w;
+        if ((ws = workQueues) != null) {
+            for (int i = 0; i < ws.length; i += 2) {
+                if ((w = ws[i]) != null)
+                    count += w.queueSize();
+            }
+        }
+        return count;
+    }
+
+    /**
+     * Returns {@code true} if there are any tasks submitted to this
+     * pool that have not yet begun executing.
+     *
+     * @return {@code true} if there are any queued submissions
+     */
+    public boolean hasQueuedSubmissions() {
+        WorkQueue[] ws; WorkQueue w;
+        if ((ws = workQueues) != null) {
+            for (int i = 0; i < ws.length; i += 2) {
+                if ((w = ws[i]) != null && !w.isEmpty())
+                    return true;
+            }
+        }
+        return false;
+    }
+
+    /**
+     * Removes and returns the next unexecuted submission if one is
+     * available.  This method may be useful in extensions to this
+     * class that re-assign work in systems with multiple pools.
+     *
+     * @return the next submission, or {@code null} if none
+     */
+    protected ForkJoinTask<?> pollSubmission() {
+        WorkQueue[] ws; WorkQueue w; ForkJoinTask<?> t;
+        if ((ws = workQueues) != null) {
+            for (int i = 0; i < ws.length; i += 2) {
+                if ((w = ws[i]) != null && (t = w.poll()) != null)
+                    return t;
+            }
+        }
+        return null;
+    }
+
+    /**
+     * Removes all available unexecuted submitted and forked tasks
+     * from scheduling queues and adds them to the given collection,
+     * without altering their execution status. These may include
+     * artificially generated or wrapped tasks. This method is
+     * designed to be invoked only when the pool is known to be
+     * quiescent. Invocations at other times may not remove all
+     * tasks. A failure encountered while attempting to add elements
+     * to collection {@code c} may result in elements being in
+     * neither, either or both collections when the associated
+     * exception is thrown.  The behavior of this operation is
+     * undefined if the specified collection is modified while the
+     * operation is in progress.
+     *
+     * @param c the collection to transfer elements into
+     * @return the number of elements transferred
+     */
+    protected int drainTasksTo(Collection<? super ForkJoinTask<?>> c) {
+        int count = 0;
+        WorkQueue[] ws; WorkQueue w; ForkJoinTask<?> t;
+        if ((ws = workQueues) != null) {
+            for (int i = 0; i < ws.length; ++i) {
+                if ((w = ws[i]) != null) {
+                    while ((t = w.poll()) != null) {
+                        c.add(t);
+                        ++count;
+                    }
+                }
+            }
+        }
+        return count;
+    }
+
+    /**
+     * Returns a string identifying this pool, as well as its state,
+     * including indications of run state, parallelism level, and
+     * worker and task counts.
+     *
+     * @return a string identifying this pool, as well as its state
+     */
+    public String toString() {
+        // Use a single pass through workQueues to collect counts
+        long qt = 0L, qs = 0L; int rc = 0;
+        long st = stealCount.get();
+        long c = ctl;
+        WorkQueue[] ws; WorkQueue w;
+        if ((ws = workQueues) != null) {
+            for (int i = 0; i < ws.length; ++i) {
+                if ((w = ws[i]) != null) {
+                    int size = w.queueSize();
+                    if ((i & 1) == 0)
+                        qs += size;
+                    else {
+                        qt += size;
+                        st += w.totalSteals;
+                        if (w.isApparentlyUnblocked())
+                            ++rc;
+                    }
+                }
+            }
+        }
+        int pc = parallelism;
+        int tc = pc + (short)(c >>> TC_SHIFT);
+        int ac = pc + (int)(c >> AC_SHIFT);
+        if (ac < 0) // ignore transient negative
+            ac = 0;
+        String level;
+        if ((c & STOP_BIT) != 0)
+            level = (tc == 0) ? "Terminated" : "Terminating";
+        else
+            level = runState < 0 ? "Shutting down" : "Running";
+        return super.toString() +
+            "[" + level +
+            ", parallelism = " + pc +
+            ", size = " + tc +
+            ", active = " + ac +
+            ", running = " + rc +
+            ", steals = " + st +
+            ", tasks = " + qt +
+            ", submissions = " + qs +
+            "]";
+    }
+
+    /**
+     * Initiates an orderly shutdown in which previously submitted
+     * tasks are executed, but no new tasks will be accepted.
+     * Invocation has no additional effect if already shut down.
+     * Tasks that are in the process of being submitted concurrently
+     * during the course of this method may or may not be rejected.
+     *
+     * @throws SecurityException if a security manager exists and
+     *         the caller is not permitted to modify threads
+     *         because it does not hold {@link
+     *         java.lang.RuntimePermission}{@code ("modifyThread")}
+     */
+    public void shutdown() {
+        checkPermission();
+        tryTerminate(false, true);
+    }
+
+    /**
+     * Attempts to cancel and/or stop all tasks, and reject all
+     * subsequently submitted tasks.  Tasks that are in the process of
+     * being submitted or executed concurrently during the course of
+     * this method may or may not be rejected. This method cancels
+     * both existing and unexecuted tasks, in order to permit
+     * termination in the presence of task dependencies. So the method
+     * always returns an empty list (unlike the case for some other
+     * Executors).
+     *
+     * @return an empty list
+     * @throws SecurityException if a security manager exists and
+     *         the caller is not permitted to modify threads
+     *         because it does not hold {@link
+     *         java.lang.RuntimePermission}{@code ("modifyThread")}
+     */
+    public List<Runnable> shutdownNow() {
+        checkPermission();
+        tryTerminate(true, true);
+        return Collections.emptyList();
+    }
+
+    /**
+     * Returns {@code true} if all tasks have completed following shut down.
+     *
+     * @return {@code true} if all tasks have completed following shut down
+     */
+    public boolean isTerminated() {
+        long c = ctl;
+        return ((c & STOP_BIT) != 0L &&
+                (short)(c >>> TC_SHIFT) == -parallelism);
+    }
+
+    /**
+     * Returns {@code true} if the process of termination has
+     * commenced but not yet completed.  This method may be useful for
+     * debugging. A return of {@code true} reported a sufficient
+     * period after shutdown may indicate that submitted tasks have
+     * ignored or suppressed interruption, or are waiting for IO,
+     * causing this executor not to properly terminate. (See the
+     * advisory notes for class {@link ForkJoinTask} stating that
+     * tasks should not normally entail blocking operations.  But if
+     * they do, they must abort them on interrupt.)
+     *
+     * @return {@code true} if terminating but not yet terminated
+     */
+    public boolean isTerminating() {
+        long c = ctl;
+        return ((c & STOP_BIT) != 0L &&
+                (short)(c >>> TC_SHIFT) != -parallelism);
+    }
+
+    /**
+     * Returns {@code true} if this pool has been shut down.
+     *
+     * @return {@code true} if this pool has been shut down
+     */
+    public boolean isShutdown() {
+        return runState < 0;
+    }
+
+    /**
+     * Blocks until all tasks have completed execution after a shutdown
+     * request, or the timeout occurs, or the current thread is
+     * interrupted, whichever happens first.
+     *
+     * @param timeout the maximum time to wait
+     * @param unit the time unit of the timeout argument
+     * @return {@code true} if this executor terminated and
+     *         {@code false} if the timeout elapsed before termination
+     * @throws InterruptedException if interrupted while waiting
+     */
+    public boolean awaitTermination(long timeout, TimeUnit unit)
+        throws InterruptedException {
+        long nanos = unit.toNanos(timeout);
+        final Mutex lock = this.lock;
+        lock.lock();
+        try {
+            for (;;) {
+                if (isTerminated())
+                    return true;
+                if (nanos <= 0)
+                    return false;
+                nanos = termination.awaitNanos(nanos);
+            }
+        } finally {
+            lock.unlock();
+        }
+    }
+
+    /**
+     * Interface for extending managed parallelism for tasks running
+     * in {@link ForkJoinPool}s.
+     *
+     * <p>A {@code ManagedBlocker} provides two methods.  Method
+     * {@code isReleasable} must return {@code true} if blocking is
+     * not necessary. Method {@code block} blocks the current thread
+     * if necessary (perhaps internally invoking {@code isReleasable}
+     * before actually blocking). These actions are performed by any
+     * thread invoking {@link ForkJoinPool#managedBlock}.  The
+     * unusual methods in this API accommodate synchronizers that may,
+     * but don't usually, block for long periods. Similarly, they
+     * allow more efficient internal handling of cases in which
+     * additional workers may be, but usually are not, needed to
+     * ensure sufficient parallelism.  Toward this end,
+     * implementations of method {@code isReleasable} must be amenable
+     * to repeated invocation.
+     *
+     * <p>For example, here is a ManagedBlocker based on a
+     * ReentrantLock:
+     *  <pre> {@code
+     * class ManagedLocker implements ManagedBlocker {
+     *   final ReentrantLock lock;
+     *   boolean hasLock = false;
+     *   ManagedLocker(ReentrantLock lock) { this.lock = lock; }
+     *   public boolean block() {
+     *     if (!hasLock)
+     *       lock.lock();
+     *     return true;
+     *   }
+     *   public boolean isReleasable() {
+     *     return hasLock || (hasLock = lock.tryLock());
+     *   }
+     * }}</pre>
+     *
+     * <p>Here is a class that possibly blocks waiting for an
+     * item on a given queue:
+     *  <pre> {@code
+     * class QueueTaker<E> implements ManagedBlocker {
+     *   final BlockingQueue<E> queue;
+     *   volatile E item = null;
+     *   QueueTaker(BlockingQueue<E> q) { this.queue = q; }
+     *   public boolean block() throws InterruptedException {
+     *     if (item == null)
+     *       item = queue.take();
+     *     return true;
+     *   }
+     *   public boolean isReleasable() {
+     *     return item != null || (item = queue.poll()) != null;
+     *   }
+     *   public E getItem() { // call after pool.managedBlock completes
+     *     return item;
+     *   }
+     * }}</pre>
+     */
+    public static interface ManagedBlocker {
+        /**
+         * Possibly blocks the current thread, for example waiting for
+         * a lock or condition.
+         *
+         * @return {@code true} if no additional blocking is necessary
+         * (i.e., if isReleasable would return true)
+         * @throws InterruptedException if interrupted while waiting
+         * (the method is not required to do so, but is allowed to)
+         */
+        boolean block() throws InterruptedException;
+
+        /**
+         * Returns {@code true} if blocking is unnecessary.
+         */
+        boolean isReleasable();
+    }
+
+    /**
+     * Blocks in accord with the given blocker.  If the current thread
+     * is a {@link ForkJoinWorkerThread}, this method possibly
+     * arranges for a spare thread to be activated if necessary to
+     * ensure sufficient parallelism while the current thread is blocked.
+     *
+     * <p>If the caller is not a {@link ForkJoinTask}, this method is
+     * behaviorally equivalent to
+     *  <pre> {@code
+     * while (!blocker.isReleasable())
+     *   if (blocker.block())
+     *     return;
+     * }</pre>
+     *
+     * If the caller is a {@code ForkJoinTask}, then the pool may
+     * first be expanded to ensure parallelism, and later adjusted.
+     *
+     * @param blocker the blocker
+     * @throws InterruptedException if blocker.block did so
+     */
+    public static void managedBlock(ManagedBlocker blocker)
+        throws InterruptedException {
+        Thread t = Thread.currentThread();
+        ForkJoinPool p = ((t instanceof ForkJoinWorkerThread) ?
+                          ((ForkJoinWorkerThread)t).pool : null);
+        while (!blocker.isReleasable()) {
+            if (p == null || p.tryCompensate(null, blocker)) {
+                try {
+                    do {} while (!blocker.isReleasable() && !blocker.block());
+                } finally {
+                    if (p != null)
+                        p.incrementActiveCount();
+                }
+                break;
+            }
+        }
+    }
+
+    // AbstractExecutorService overrides.  These rely on undocumented
+    // fact that ForkJoinTask.adapt returns ForkJoinTasks that also
+    // implement RunnableFuture.
+
+    protected <T> RunnableFuture<T> newTaskFor(Runnable runnable, T value) {
+        return new ForkJoinTask.AdaptedRunnable<T>(runnable, value);
+    }
+
+    protected <T> RunnableFuture<T> newTaskFor(Callable<T> callable) {
+        return new ForkJoinTask.AdaptedCallable<T>(callable);
+    }
+
+    // Unsafe mechanics
+    private static final sun.misc.Unsafe U;
+    private static final long CTL;
+    private static final long PARKBLOCKER;
+    private static final int ABASE;
+    private static final int ASHIFT;
+
+    static {
+        poolNumberGenerator = new AtomicInteger();
+        nextSubmitterSeed = new AtomicInteger(0x55555555);
+        modifyThreadPermission = new RuntimePermission("modifyThread");
+        defaultForkJoinWorkerThreadFactory =
+            new DefaultForkJoinWorkerThreadFactory();
+        submitters = new ThreadSubmitter();
+        int s;
+        try {
+            U = getUnsafe();
+            Class<?> k = ForkJoinPool.class;
+            Class<?> ak = ForkJoinTask[].class;
+            CTL = U.objectFieldOffset
+                (k.getDeclaredField("ctl"));
+            Class<?> tk = Thread.class;
+            PARKBLOCKER = U.objectFieldOffset
+                (tk.getDeclaredField("parkBlocker"));
+            ABASE = U.arrayBaseOffset(ak);
+            s = U.arrayIndexScale(ak);
+        } catch (Exception e) {
+            throw new Error(e);
+        }
+        if ((s & (s-1)) != 0)
+            throw new Error("data type scale not a power of two");
+        ASHIFT = 31 - Integer.numberOfLeadingZeros(s);
+    }
+
+    /**
+     * Returns a sun.misc.Unsafe.  Suitable for use in a 3rd party package.
+     * Replace with a simple call to Unsafe.getUnsafe when integrating
+     * into a jdk.
+     *
+     * @return a sun.misc.Unsafe
+     */
+    private static sun.misc.Unsafe getUnsafe() {
+        try {
+            return sun.misc.Unsafe.getUnsafe();
+        } catch (SecurityException se) {
+            try {
+                return java.security.AccessController.doPrivileged
+                    (new java.security
+                     .PrivilegedExceptionAction<sun.misc.Unsafe>() {
+                        public sun.misc.Unsafe run() throws Exception {
+                            java.lang.reflect.Field f = sun.misc
+                                .Unsafe.class.getDeclaredField("theUnsafe");
+                            f.setAccessible(true);
+                            return (sun.misc.Unsafe) f.get(null);
+                        }});
+            } catch (java.security.PrivilegedActionException e) {
+                throw new RuntimeException("Could not initialize intrinsics",
+                                           e.getCause());
+            }
+        }
+    }
+
+}
diff --git a/src/main/java/jsr166y/ForkJoinTask.java b/src/main/java/jsr166y/ForkJoinTask.java
new file mode 100644
index 00000000000..86d52180657
--- /dev/null
+++ b/src/main/java/jsr166y/ForkJoinTask.java
@@ -0,0 +1,1568 @@
+/*
+ * Written by Doug Lea with assistance from members of JCP JSR-166
+ * Expert Group and released to the public domain, as explained at
+ * http://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+package jsr166y;
+
+import java.io.Serializable;
+import java.util.Collection;
+import java.util.List;
+import java.util.RandomAccess;
+import java.lang.ref.WeakReference;
+import java.lang.ref.ReferenceQueue;
+import java.util.concurrent.Callable;
+import java.util.concurrent.CancellationException;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Future;
+import java.util.concurrent.RejectedExecutionException;
+import java.util.concurrent.RunnableFuture;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+import java.util.concurrent.locks.ReentrantLock;
+import java.lang.reflect.Constructor;
+
+/**
+ * Abstract base class for tasks that run within a {@link ForkJoinPool}.
+ * A {@code ForkJoinTask} is a thread-like entity that is much
+ * lighter weight than a normal thread.  Huge numbers of tasks and
+ * subtasks may be hosted by a small number of actual threads in a
+ * ForkJoinPool, at the price of some usage limitations.
+ *
+ * <p>A "main" {@code ForkJoinTask} begins execution when submitted
+ * to a {@link ForkJoinPool}.  Once started, it will usually in turn
+ * start other subtasks.  As indicated by the name of this class,
+ * many programs using {@code ForkJoinTask} employ only methods
+ * {@link #fork} and {@link #join}, or derivatives such as {@link
+ * #invokeAll(ForkJoinTask...) invokeAll}.  However, this class also
+ * provides a number of other methods that can come into play in
+ * advanced usages, as well as extension mechanics that allow
+ * support of new forms of fork/join processing.
+ *
+ * <p>A {@code ForkJoinTask} is a lightweight form of {@link Future}.
+ * The efficiency of {@code ForkJoinTask}s stems from a set of
+ * restrictions (that are only partially statically enforceable)
+ * reflecting their main use as computational tasks calculating pure
+ * functions or operating on purely isolated objects.  The primary
+ * coordination mechanisms are {@link #fork}, that arranges
+ * asynchronous execution, and {@link #join}, that doesn't proceed
+ * until the task's result has been computed.  Computations should
+ * ideally avoid {@code synchronized} methods or blocks, and should
+ * minimize other blocking synchronization apart from joining other
+ * tasks or using synchronizers such as Phasers that are advertised to
+ * cooperate with fork/join scheduling. Subdividable tasks should also
+ * not perform blocking IO, and should ideally access variables that
+ * are completely independent of those accessed by other running
+ * tasks. These guidelines are loosely enforced by not permitting
+ * checked exceptions such as {@code IOExceptions} to be
+ * thrown. However, computations may still encounter unchecked
+ * exceptions, that are rethrown to callers attempting to join
+ * them. These exceptions may additionally include {@link
+ * RejectedExecutionException} stemming from internal resource
+ * exhaustion, such as failure to allocate internal task
+ * queues. Rethrown exceptions behave in the same way as regular
+ * exceptions, but, when possible, contain stack traces (as displayed
+ * for example using {@code ex.printStackTrace()}) of both the thread
+ * that initiated the computation as well as the thread actually
+ * encountering the exception; minimally only the latter.
+ *
+ * <p>It is possible to define and use ForkJoinTasks that may block,
+ * but doing do requires three further considerations: (1) Completion
+ * of few if any <em>other</em> tasks should be dependent on a task
+ * that blocks on external synchronization or IO. Event-style async
+ * tasks that are never joined (for example, those subclassing {@link
+ * CountedCompleter}) often fall into this category.  (2) To minimize
+ * resource impact, tasks should be small; ideally performing only the
+ * (possibly) blocking action. (3) Unless the {@link
+ * ForkJoinPool.ManagedBlocker} API is used, or the number of possibly
+ * blocked tasks is known to be less than the pool's {@link
+ * ForkJoinPool#getParallelism} level, the pool cannot guarantee that
+ * enough threads will be available to ensure progress or good
+ * performance.
+ *
+ * <p>The primary method for awaiting completion and extracting
+ * results of a task is {@link #join}, but there are several variants:
+ * The {@link Future#get} methods support interruptible and/or timed
+ * waits for completion and report results using {@code Future}
+ * conventions. Method {@link #invoke} is semantically
+ * equivalent to {@code fork(); join()} but always attempts to begin
+ * execution in the current thread. The "<em>quiet</em>" forms of
+ * these methods do not extract results or report exceptions. These
+ * may be useful when a set of tasks are being executed, and you need
+ * to delay processing of results or exceptions until all complete.
+ * Method {@code invokeAll} (available in multiple versions)
+ * performs the most common form of parallel invocation: forking a set
+ * of tasks and joining them all.
+ *
+ * <p>In the most typical usages, a fork-join pair act like a call
+ * (fork) and return (join) from a parallel recursive function. As is
+ * the case with other forms of recursive calls, returns (joins)
+ * should be performed innermost-first. For example, {@code a.fork();
+ * b.fork(); b.join(); a.join();} is likely to be substantially more
+ * efficient than joining {@code a} before {@code b}.
+ *
+ * <p>The execution status of tasks may be queried at several levels
+ * of detail: {@link #isDone} is true if a task completed in any way
+ * (including the case where a task was cancelled without executing);
+ * {@link #isCompletedNormally} is true if a task completed without
+ * cancellation or encountering an exception; {@link #isCancelled} is
+ * true if the task was cancelled (in which case {@link #getException}
+ * returns a {@link java.util.concurrent.CancellationException}); and
+ * {@link #isCompletedAbnormally} is true if a task was either
+ * cancelled or encountered an exception, in which case {@link
+ * #getException} will return either the encountered exception or
+ * {@link java.util.concurrent.CancellationException}.
+ *
+ * <p>The ForkJoinTask class is not usually directly subclassed.
+ * Instead, you subclass one of the abstract classes that support a
+ * particular style of fork/join processing, typically {@link
+ * RecursiveAction} for most computations that do not return results,
+ * {@link RecursiveTask} for those that do, and {@link
+ * CountedCompleter} for those in which completed actions trigger
+ * other actions.  Normally, a concrete ForkJoinTask subclass declares
+ * fields comprising its parameters, established in a constructor, and
+ * then defines a {@code compute} method that somehow uses the control
+ * methods supplied by this base class. While these methods have
+ * {@code public} access (to allow instances of different task
+ * subclasses to call each other's methods), some of them may only be
+ * called from within other ForkJoinTasks (as may be determined using
+ * method {@link #inForkJoinPool}).  Attempts to invoke them in other
+ * contexts result in exceptions or errors, possibly including {@code
+ * ClassCastException}.
+ *
+ * <p>Method {@link #join} and its variants are appropriate for use
+ * only when completion dependencies are acyclic; that is, the
+ * parallel computation can be described as a directed acyclic graph
+ * (DAG). Otherwise, executions may encounter a form of deadlock as
+ * tasks cyclically wait for each other.  However, this framework
+ * supports other methods and techniques (for example the use of
+ * {@link Phaser}, {@link #helpQuiesce}, and {@link #complete}) that
+ * may be of use in constructing custom subclasses for problems that
+ * are not statically structured as DAGs. To support such usages a
+ * ForkJoinTask may be atomically <em>tagged</em> with a {@code short}
+ * value using {@link #setForkJoinTaskTag} or {@link
+ * #compareAndSetForkJoinTaskTag} and checked using {@link
+ * #getForkJoinTaskTag}. The ForkJoinTask implementation does not use
+ * these {@code protected} methods or tags for any purpose, but they
+ * may be of use in the construction of specialized subclasses.  For
+ * example, parallel graph traversals can use the supplied methods to
+ * avoid revisiting nodes/tasks that have already been processed.
+ * (Method names for tagging are bulky in part to encourage definition
+ * of methods that reflect their usage patterns.)
+ *
+ * <p>Most base support methods are {@code final}, to prevent
+ * overriding of implementations that are intrinsically tied to the
+ * underlying lightweight task scheduling framework.  Developers
+ * creating new basic styles of fork/join processing should minimally
+ * implement {@code protected} methods {@link #exec}, {@link
+ * #setRawResult}, and {@link #getRawResult}, while also introducing
+ * an abstract computational method that can be implemented in its
+ * subclasses, possibly relying on other {@code protected} methods
+ * provided by this class.
+ *
+ * <p>ForkJoinTasks should perform relatively small amounts of
+ * computation. Large tasks should be split into smaller subtasks,
+ * usually via recursive decomposition. As a very rough rule of thumb,
+ * a task should perform more than 100 and less than 10000 basic
+ * computational steps, and should avoid indefinite looping. If tasks
+ * are too big, then parallelism cannot improve throughput. If too
+ * small, then memory and internal task maintenance overhead may
+ * overwhelm processing.
+ *
+ * <p>This class provides {@code adapt} methods for {@link Runnable}
+ * and {@link Callable}, that may be of use when mixing execution of
+ * {@code ForkJoinTasks} with other kinds of tasks. When all tasks are
+ * of this form, consider using a pool constructed in <em>asyncMode</em>.
+ *
+ * <p>ForkJoinTasks are {@code Serializable}, which enables them to be
+ * used in extensions such as remote execution frameworks. It is
+ * sensible to serialize tasks only before or after, but not during,
+ * execution. Serialization is not relied on during execution itself.
+ *
+ * @since 1.7
+ * @author Doug Lea
+ */
+public abstract class ForkJoinTask<V> implements Future<V>, Serializable {
+
+    /*
+     * See the internal documentation of class ForkJoinPool for a
+     * general implementation overview.  ForkJoinTasks are mainly
+     * responsible for maintaining their "status" field amidst relays
+     * to methods in ForkJoinWorkerThread and ForkJoinPool.
+     *
+     * The methods of this class are more-or-less layered into
+     * (1) basic status maintenance
+     * (2) execution and awaiting completion
+     * (3) user-level methods that additionally report results.
+     * This is sometimes hard to see because this file orders exported
+     * methods in a way that flows well in javadocs.
+     */
+
+    /*
+     * The status field holds run control status bits packed into a
+     * single int to minimize footprint and to ensure atomicity (via
+     * CAS).  Status is initially zero, and takes on nonnegative
+     * values until completed, upon which status (anded with
+     * DONE_MASK) holds value NORMAL, CANCELLED, or EXCEPTIONAL. Tasks
+     * undergoing blocking waits by other threads have the SIGNAL bit
+     * set.  Completion of a stolen task with SIGNAL set awakens any
+     * waiters via notifyAll. Even though suboptimal for some
+     * purposes, we use basic builtin wait/notify to take advantage of
+     * "monitor inflation" in JVMs that we would otherwise need to
+     * emulate to avoid adding further per-task bookkeeping overhead.
+     * We want these monitors to be "fat", i.e., not use biasing or
+     * thin-lock techniques, so use some odd coding idioms that tend
+     * to avoid them, mainly by arranging that every synchronized
+     * block performs a wait, notifyAll or both.
+     *
+     * These control bits occupy only (some of) the upper half (16
+     * bits) of status field. The lower bits are used for user-defined
+     * tags.
+     */
+
+    /** The run status of this task */
+    volatile int status; // accessed directly by pool and workers
+    static final int DONE_MASK   = 0xf0000000;  // mask out non-completion bits
+    static final int NORMAL      = 0xf0000000;  // must be negative
+    static final int CANCELLED   = 0xc0000000;  // must be < NORMAL
+    static final int EXCEPTIONAL = 0x80000000;  // must be < CANCELLED
+    static final int SIGNAL      = 0x00010000;  // must be >= 1 << 16
+    static final int SMASK       = 0x0000ffff;  // short bits for tags
+
+    /**
+     * Marks completion and wakes up threads waiting to join this
+     * task.
+     *
+     * @param completion one of NORMAL, CANCELLED, EXCEPTIONAL
+     * @return completion status on exit
+     */
+    private int setCompletion(int completion) {
+        for (int s;;) {
+            if ((s = status) < 0)
+                return s;
+            if (U.compareAndSwapInt(this, STATUS, s, s | completion)) {
+                if ((s >>> 16) != 0)
+                    synchronized (this) { notifyAll(); }
+                return completion;
+            }
+        }
+    }
+
+    /**
+     * Primary execution method for stolen tasks. Unless done, calls
+     * exec and records status if completed, but doesn't wait for
+     * completion otherwise.
+     *
+     * @return status on exit from this method
+     */
+    final int doExec() {
+        int s; boolean completed;
+        if ((s = status) >= 0) {
+            try {
+                completed = exec();
+            } catch (Throwable rex) {
+                return setExceptionalCompletion(rex);
+            }
+            if (completed)
+                s = setCompletion(NORMAL);
+        }
+        return s;
+    }
+
+    /**
+     * Tries to set SIGNAL status unless already completed. Used by
+     * ForkJoinPool. Other variants are directly incorporated into
+     * externalAwaitDone etc.
+     *
+     * @return true if successful
+     */
+    final boolean trySetSignal() {
+        int s = status;
+        return s >= 0 && U.compareAndSwapInt(this, STATUS, s, s | SIGNAL);
+    }
+
+    /**
+     * Blocks a non-worker-thread until completion.
+     * @return status upon completion
+     */
+    private int externalAwaitDone() {
+        boolean interrupted = false;
+        int s;
+        while ((s = status) >= 0) {
+            if (U.compareAndSwapInt(this, STATUS, s, s | SIGNAL)) {
+                synchronized (this) {
+                    if (status >= 0) {
+                        try {
+                            wait();
+                        } catch (InterruptedException ie) {
+                            interrupted = true;
+                        }
+                    }
+                    else
+                        notifyAll();
+                }
+            }
+        }
+        if (interrupted)
+            Thread.currentThread().interrupt();
+        return s;
+    }
+
+    /**
+     * Blocks a non-worker-thread until completion or interruption.
+     */
+    private int externalInterruptibleAwaitDone() throws InterruptedException {
+        int s;
+        if (Thread.interrupted())
+            throw new InterruptedException();
+        while ((s = status) >= 0) {
+            if (U.compareAndSwapInt(this, STATUS, s, s | SIGNAL)) {
+                synchronized (this) {
+                    if (status >= 0)
+                        wait();
+                    else
+                        notifyAll();
+                }
+            }
+        }
+        return s;
+    }
+
+    /**
+     * Implementation for join, get, quietlyJoin. Directly handles
+     * only cases of already-completed, external wait, and
+     * unfork+exec.  Others are relayed to ForkJoinPool.awaitJoin.
+     *
+     * @return status upon completion
+     */
+    private int doJoin() {
+        int s; Thread t; ForkJoinWorkerThread wt; ForkJoinPool.WorkQueue w;
+        if ((s = status) >= 0) {
+            if (((t = Thread.currentThread()) instanceof ForkJoinWorkerThread)) {
+                if (!(w = (wt = (ForkJoinWorkerThread)t).workQueue).
+                    tryUnpush(this) || (s = doExec()) >= 0)
+                    s = wt.pool.awaitJoin(w, this);
+            }
+            else
+                s = externalAwaitDone();
+        }
+        return s;
+    }
+
+    /**
+     * Implementation for invoke, quietlyInvoke.
+     *
+     * @return status upon completion
+     */
+    private int doInvoke() {
+        int s; Thread t; ForkJoinWorkerThread wt;
+        if ((s = doExec()) >= 0) {
+            if ((t = Thread.currentThread()) instanceof ForkJoinWorkerThread)
+                s = (wt = (ForkJoinWorkerThread)t).pool.awaitJoin(wt.workQueue,
+                                                                  this);
+            else
+                s = externalAwaitDone();
+        }
+        return s;
+    }
+
+    // Exception table support
+
+    /**
+     * Table of exceptions thrown by tasks, to enable reporting by
+     * callers. Because exceptions are rare, we don't directly keep
+     * them with task objects, but instead use a weak ref table.  Note
+     * that cancellation exceptions don't appear in the table, but are
+     * instead recorded as status values.
+     *
+     * Note: These statics are initialized below in static block.
+     */
+    private static final ExceptionNode[] exceptionTable;
+    private static final ReentrantLock exceptionTableLock;
+    private static final ReferenceQueue<Object> exceptionTableRefQueue;
+
+    /**
+     * Fixed capacity for exceptionTable.
+     */
+    private static final int EXCEPTION_MAP_CAPACITY = 32;
+
+    /**
+     * Key-value nodes for exception table.  The chained hash table
+     * uses identity comparisons, full locking, and weak references
+     * for keys. The table has a fixed capacity because it only
+     * maintains task exceptions long enough for joiners to access
+     * them, so should never become very large for sustained
+     * periods. However, since we do not know when the last joiner
+     * completes, we must use weak references and expunge them. We do
+     * so on each operation (hence full locking). Also, some thread in
+     * any ForkJoinPool will call helpExpungeStaleExceptions when its
+     * pool becomes isQuiescent.
+     */
+    static final class ExceptionNode extends WeakReference<ForkJoinTask<?>> {
+        final Throwable ex;
+        ExceptionNode next;
+        final long thrower;  // use id not ref to avoid weak cycles
+        ExceptionNode(ForkJoinTask<?> task, Throwable ex, ExceptionNode next) {
+            super(task, exceptionTableRefQueue);
+            this.ex = ex;
+            this.next = next;
+            this.thrower = Thread.currentThread().getId();
+        }
+    }
+
+    /**
+     * Records exception and sets status.
+     *
+     * @return status on exit
+     */
+    final int recordExceptionalCompletion(Throwable ex) {
+        int s;
+        if ((s = status) >= 0) {
+            int h = System.identityHashCode(this);
+            final ReentrantLock lock = exceptionTableLock;
+            lock.lock();
+            try {
+                expungeStaleExceptions();
+                ExceptionNode[] t = exceptionTable;
+                int i = h & (t.length - 1);
+                for (ExceptionNode e = t[i]; ; e = e.next) {
+                    if (e == null) {
+                        t[i] = new ExceptionNode(this, ex, t[i]);
+                        break;
+                    }
+                    if (e.get() == this) // already present
+                        break;
+                }
+            } finally {
+                lock.unlock();
+            }
+            s = setCompletion(EXCEPTIONAL);
+        }
+        return s;
+    }
+
+    /**
+     * Records exception and possibly propagates
+     *
+     * @return status on exit
+     */
+    private int setExceptionalCompletion(Throwable ex) {
+        int s = recordExceptionalCompletion(ex);
+        if ((s & DONE_MASK) == EXCEPTIONAL)
+            internalPropagateException(ex);
+        return s;
+    }
+
+    /**
+     * Hook for exception propagation support for tasks with completers.
+     */
+    void internalPropagateException(Throwable ex) {
+    }
+
+    /**
+     * Cancels, ignoring any exceptions thrown by cancel. Used during
+     * worker and pool shutdown. Cancel is spec'ed not to throw any
+     * exceptions, but if it does anyway, we have no recourse during
+     * shutdown, so guard against this case.
+     */
+    static final void cancelIgnoringExceptions(ForkJoinTask<?> t) {
+        if (t != null && t.status >= 0) {
+            try {
+                t.cancel(false);
+            } catch (Throwable ignore) {
+            }
+        }
+    }
+
+    /**
+     * Removes exception node and clears status
+     */
+    private void clearExceptionalCompletion() {
+        int h = System.identityHashCode(this);
+        final ReentrantLock lock = exceptionTableLock;
+        lock.lock();
+        try {
+            ExceptionNode[] t = exceptionTable;
+            int i = h & (t.length - 1);
+            ExceptionNode e = t[i];
+            ExceptionNode pred = null;
+            while (e != null) {
+                ExceptionNode next = e.next;
+                if (e.get() == this) {
+                    if (pred == null)
+                        t[i] = next;
+                    else
+                        pred.next = next;
+                    break;
+                }
+                pred = e;
+                e = next;
+            }
+            expungeStaleExceptions();
+            status = 0;
+        } finally {
+            lock.unlock();
+        }
+    }
+
+    /**
+     * Returns a rethrowable exception for the given task, if
+     * available. To provide accurate stack traces, if the exception
+     * was not thrown by the current thread, we try to create a new
+     * exception of the same type as the one thrown, but with the
+     * recorded exception as its cause. If there is no such
+     * constructor, we instead try to use a no-arg constructor,
+     * followed by initCause, to the same effect. If none of these
+     * apply, or any fail due to other exceptions, we return the
+     * recorded exception, which is still correct, although it may
+     * contain a misleading stack trace.
+     *
+     * @return the exception, or null if none
+     */
+    private Throwable getThrowableException() {
+        if ((status & DONE_MASK) != EXCEPTIONAL)
+            return null;
+        int h = System.identityHashCode(this);
+        ExceptionNode e;
+        final ReentrantLock lock = exceptionTableLock;
+        lock.lock();
+        try {
+            expungeStaleExceptions();
+            ExceptionNode[] t = exceptionTable;
+            e = t[h & (t.length - 1)];
+            while (e != null && e.get() != this)
+                e = e.next;
+        } finally {
+            lock.unlock();
+        }
+        Throwable ex;
+        if (e == null || (ex = e.ex) == null)
+            return null;
+        if (false && e.thrower != Thread.currentThread().getId()) {
+            Class<? extends Throwable> ec = ex.getClass();
+            try {
+                Constructor<?> noArgCtor = null;
+                Constructor<?>[] cs = ec.getConstructors();// public ctors only
+                for (int i = 0; i < cs.length; ++i) {
+                    Constructor<?> c = cs[i];
+                    Class<?>[] ps = c.getParameterTypes();
+                    if (ps.length == 0)
+                        noArgCtor = c;
+                    else if (ps.length == 1 && ps[0] == Throwable.class)
+                        return (Throwable)(c.newInstance(ex));
+                }
+                if (noArgCtor != null) {
+                    Throwable wx = (Throwable)(noArgCtor.newInstance());
+                    wx.initCause(ex);
+                    return wx;
+                }
+            } catch (Exception ignore) {
+            }
+        }
+        return ex;
+    }
+
+    /**
+     * Poll stale refs and remove them. Call only while holding lock.
+     */
+    private static void expungeStaleExceptions() {
+        for (Object x; (x = exceptionTableRefQueue.poll()) != null;) {
+            if (x instanceof ExceptionNode) {
+                ForkJoinTask<?> key = ((ExceptionNode)x).get();
+                ExceptionNode[] t = exceptionTable;
+                int i = System.identityHashCode(key) & (t.length - 1);
+                ExceptionNode e = t[i];
+                ExceptionNode pred = null;
+                while (e != null) {
+                    ExceptionNode next = e.next;
+                    if (e == x) {
+                        if (pred == null)
+                            t[i] = next;
+                        else
+                            pred.next = next;
+                        break;
+                    }
+                    pred = e;
+                    e = next;
+                }
+            }
+        }
+    }
+
+    /**
+     * If lock is available, poll stale refs and remove them.
+     * Called from ForkJoinPool when pools become quiescent.
+     */
+    static final void helpExpungeStaleExceptions() {
+        final ReentrantLock lock = exceptionTableLock;
+        if (lock.tryLock()) {
+            try {
+                expungeStaleExceptions();
+            } finally {
+                lock.unlock();
+            }
+        }
+    }
+
+    /**
+     * Throws exception, if any, associated with the given status.
+     */
+    private void reportException(int s) {
+        Throwable ex = ((s == CANCELLED) ?  new CancellationException() :
+                        (s == EXCEPTIONAL) ? getThrowableException() :
+                        null);
+        if (ex != null)
+            U.throwException(ex);
+    }
+
+    // public methods
+
+    /**
+     * Arranges to asynchronously execute this task.  While it is not
+     * necessarily enforced, it is a usage error to fork a task more
+     * than once unless it has completed and been reinitialized.
+     * Subsequent modifications to the state of this task or any data
+     * it operates on are not necessarily consistently observable by
+     * any thread other than the one executing it unless preceded by a
+     * call to {@link #join} or related methods, or a call to {@link
+     * #isDone} returning {@code true}.
+     *
+     * <p>This method may be invoked only from within {@code
+     * ForkJoinPool} computations (as may be determined using method
+     * {@link #inForkJoinPool}).  Attempts to invoke in other contexts
+     * result in exceptions or errors, possibly including {@code
+     * ClassCastException}.
+     *
+     * @return {@code this}, to simplify usage
+     */
+    public final ForkJoinTask<V> fork() {
+        ((ForkJoinWorkerThread)Thread.currentThread()).workQueue.push(this);
+        return this;
+    }
+
+    /**
+     * Returns the result of the computation when it {@link #isDone is
+     * done}.  This method differs from {@link #get()} in that
+     * abnormal completion results in {@code RuntimeException} or
+     * {@code Error}, not {@code ExecutionException}, and that
+     * interrupts of the calling thread do <em>not</em> cause the
+     * method to abruptly return by throwing {@code
+     * InterruptedException}.
+     *
+     * @return the computed result
+     */
+    public final V join() {
+        int s;
+        if ((s = doJoin() & DONE_MASK) != NORMAL)
+            reportException(s);
+        return getRawResult();
+    }
+
+    /**
+     * Commences performing this task, awaits its completion if
+     * necessary, and returns its result, or throws an (unchecked)
+     * {@code RuntimeException} or {@code Error} if the underlying
+     * computation did so.
+     *
+     * @return the computed result
+     */
+    public final V invoke() {
+        int s;
+        if ((s = doInvoke() & DONE_MASK) != NORMAL)
+            reportException(s);
+        return getRawResult();
+    }
+
+    /**
+     * Forks the given tasks, returning when {@code isDone} holds for
+     * each task or an (unchecked) exception is encountered, in which
+     * case the exception is rethrown. If more than one task
+     * encounters an exception, then this method throws any one of
+     * these exceptions. If any task encounters an exception, the
+     * other may be cancelled. However, the execution status of
+     * individual tasks is not guaranteed upon exceptional return. The
+     * status of each task may be obtained using {@link
+     * #getException()} and related methods to check if they have been
+     * cancelled, completed normally or exceptionally, or left
+     * unprocessed.
+     *
+     * <p>This method may be invoked only from within {@code
+     * ForkJoinPool} computations (as may be determined using method
+     * {@link #inForkJoinPool}).  Attempts to invoke in other contexts
+     * result in exceptions or errors, possibly including {@code
+     * ClassCastException}.
+     *
+     * @param t1 the first task
+     * @param t2 the second task
+     * @throws NullPointerException if any task is null
+     */
+    public static void invokeAll(ForkJoinTask<?> t1, ForkJoinTask<?> t2) {
+        int s1, s2;
+        t2.fork();
+        if ((s1 = t1.doInvoke() & DONE_MASK) != NORMAL)
+            t1.reportException(s1);
+        if ((s2 = t2.doJoin() & DONE_MASK) != NORMAL)
+            t2.reportException(s2);
+    }
+
+    /**
+     * Forks the given tasks, returning when {@code isDone} holds for
+     * each task or an (unchecked) exception is encountered, in which
+     * case the exception is rethrown. If more than one task
+     * encounters an exception, then this method throws any one of
+     * these exceptions. If any task encounters an exception, others
+     * may be cancelled. However, the execution status of individual
+     * tasks is not guaranteed upon exceptional return. The status of
+     * each task may be obtained using {@link #getException()} and
+     * related methods to check if they have been cancelled, completed
+     * normally or exceptionally, or left unprocessed.
+     *
+     * <p>This method may be invoked only from within {@code
+     * ForkJoinPool} computations (as may be determined using method
+     * {@link #inForkJoinPool}).  Attempts to invoke in other contexts
+     * result in exceptions or errors, possibly including {@code
+     * ClassCastException}.
+     *
+     * @param tasks the tasks
+     * @throws NullPointerException if any task is null
+     */
+    public static void invokeAll(ForkJoinTask<?>... tasks) {
+        Throwable ex = null;
+        int last = tasks.length - 1;
+        for (int i = last; i >= 0; --i) {
+            ForkJoinTask<?> t = tasks[i];
+            if (t == null) {
+                if (ex == null)
+                    ex = new NullPointerException();
+            }
+            else if (i != 0)
+                t.fork();
+            else if (t.doInvoke() < NORMAL && ex == null)
+                ex = t.getException();
+        }
+        for (int i = 1; i <= last; ++i) {
+            ForkJoinTask<?> t = tasks[i];
+            if (t != null) {
+                if (ex != null)
+                    t.cancel(false);
+                else if (t.doJoin() < NORMAL)
+                    ex = t.getException();
+            }
+        }
+        if (ex != null)
+            U.throwException(ex);
+    }
+
+    /**
+     * Forks all tasks in the specified collection, returning when
+     * {@code isDone} holds for each task or an (unchecked) exception
+     * is encountered, in which case the exception is rethrown. If
+     * more than one task encounters an exception, then this method
+     * throws any one of these exceptions. If any task encounters an
+     * exception, others may be cancelled. However, the execution
+     * status of individual tasks is not guaranteed upon exceptional
+     * return. The status of each task may be obtained using {@link
+     * #getException()} and related methods to check if they have been
+     * cancelled, completed normally or exceptionally, or left
+     * unprocessed.
+     *
+     * <p>This method may be invoked only from within {@code
+     * ForkJoinPool} computations (as may be determined using method
+     * {@link #inForkJoinPool}).  Attempts to invoke in other contexts
+     * result in exceptions or errors, possibly including {@code
+     * ClassCastException}.
+     *
+     * @param tasks the collection of tasks
+     * @return the tasks argument, to simplify usage
+     * @throws NullPointerException if tasks or any element are null
+     */
+    public static <T extends ForkJoinTask<?>> Collection<T> invokeAll(Collection<T> tasks) {
+        if (!(tasks instanceof RandomAccess) || !(tasks instanceof List<?>)) {
+            invokeAll(tasks.toArray(new ForkJoinTask<?>[tasks.size()]));
+            return tasks;
+        }
+        @SuppressWarnings("unchecked")
+        List<? extends ForkJoinTask<?>> ts =
+            (List<? extends ForkJoinTask<?>>) tasks;
+        Throwable ex = null;
+        int last = ts.size() - 1;
+        for (int i = last; i >= 0; --i) {
+            ForkJoinTask<?> t = ts.get(i);
+            if (t == null) {
+                if (ex == null)
+                    ex = new NullPointerException();
+            }
+            else if (i != 0)
+                t.fork();
+            else if (t.doInvoke() < NORMAL && ex == null)
+                ex = t.getException();
+        }
+        for (int i = 1; i <= last; ++i) {
+            ForkJoinTask<?> t = ts.get(i);
+            if (t != null) {
+                if (ex != null)
+                    t.cancel(false);
+                else if (t.doJoin() < NORMAL)
+                    ex = t.getException();
+            }
+        }
+        if (ex != null)
+            U.throwException(ex);
+        return tasks;
+    }
+
+    /**
+     * Attempts to cancel execution of this task. This attempt will
+     * fail if the task has already completed or could not be
+     * cancelled for some other reason. If successful, and this task
+     * has not started when {@code cancel} is called, execution of
+     * this task is suppressed. After this method returns
+     * successfully, unless there is an intervening call to {@link
+     * #reinitialize}, subsequent calls to {@link #isCancelled},
+     * {@link #isDone}, and {@code cancel} will return {@code true}
+     * and calls to {@link #join} and related methods will result in
+     * {@code CancellationException}.
+     *
+     * <p>This method may be overridden in subclasses, but if so, must
+     * still ensure that these properties hold. In particular, the
+     * {@code cancel} method itself must not throw exceptions.
+     *
+     * <p>This method is designed to be invoked by <em>other</em>
+     * tasks. To terminate the current task, you can just return or
+     * throw an unchecked exception from its computation method, or
+     * invoke {@link #completeExceptionally}.
+     *
+     * @param mayInterruptIfRunning this value has no effect in the
+     * default implementation because interrupts are not used to
+     * control cancellation.
+     *
+     * @return {@code true} if this task is now cancelled
+     */
+    public boolean cancel(boolean mayInterruptIfRunning) {
+        return (setCompletion(CANCELLED) & DONE_MASK) == CANCELLED;
+    }
+
+    public final boolean isDone() {
+        return status < 0;
+    }
+
+    public final boolean isCancelled() {
+        return (status & DONE_MASK) == CANCELLED;
+    }
+
+    /**
+     * Returns {@code true} if this task threw an exception or was cancelled.
+     *
+     * @return {@code true} if this task threw an exception or was cancelled
+     */
+    public final boolean isCompletedAbnormally() {
+        return status < NORMAL;
+    }
+
+    /**
+     * Returns {@code true} if this task completed without throwing an
+     * exception and was not cancelled.
+     *
+     * @return {@code true} if this task completed without throwing an
+     * exception and was not cancelled
+     */
+    public final boolean isCompletedNormally() {
+        return (status & DONE_MASK) == NORMAL;
+    }
+
+    /**
+     * Returns the exception thrown by the base computation, or a
+     * {@code CancellationException} if cancelled, or {@code null} if
+     * none or if the method has not yet completed.
+     *
+     * @return the exception, or {@code null} if none
+     */
+    public final Throwable getException() {
+        int s = status & DONE_MASK;
+        return ((s >= NORMAL)    ? null :
+                (s == CANCELLED) ? new CancellationException() :
+                getThrowableException());
+    }
+
+    /**
+     * Completes this task abnormally, and if not already aborted or
+     * cancelled, causes it to throw the given exception upon
+     * {@code join} and related operations. This method may be used
+     * to induce exceptions in asynchronous tasks, or to force
+     * completion of tasks that would not otherwise complete.  Its use
+     * in other situations is discouraged.  This method is
+     * overridable, but overridden versions must invoke {@code super}
+     * implementation to maintain guarantees.
+     *
+     * @param ex the exception to throw. If this exception is not a
+     * {@code RuntimeException} or {@code Error}, the actual exception
+     * thrown will be a {@code RuntimeException} with cause {@code ex}.
+     */
+    public void completeExceptionally(Throwable ex) {
+        setExceptionalCompletion((ex instanceof RuntimeException) ||
+                                 (ex instanceof Error) ? ex :
+                                 new RuntimeException(ex));
+    }
+
+    /**
+     * Completes this task, and if not already aborted or cancelled,
+     * returning the given value as the result of subsequent
+     * invocations of {@code join} and related operations. This method
+     * may be used to provide results for asynchronous tasks, or to
+     * provide alternative handling for tasks that would not otherwise
+     * complete normally. Its use in other situations is
+     * discouraged. This method is overridable, but overridden
+     * versions must invoke {@code super} implementation to maintain
+     * guarantees.
+     *
+     * @param value the result value for this task
+     */
+    public void complete(V value) {
+        try {
+            setRawResult(value);
+        } catch (Throwable rex) {
+            setExceptionalCompletion(rex);
+            return;
+        }
+        setCompletion(NORMAL);
+    }
+
+    /**
+     * Completes this task normally without setting a value. The most
+     * recent value established by {@link #setRawResult} (or {@code
+     * null} by default) will be returned as the result of subsequent
+     * invocations of {@code join} and related operations.
+     *
+     * @since 1.8
+     */
+    public final void quietlyComplete() {
+        setCompletion(NORMAL);
+    }
+
+    /**
+     * Waits if necessary for the computation to complete, and then
+     * retrieves its result.
+     *
+     * @return the computed result
+     * @throws CancellationException if the computation was cancelled
+     * @throws ExecutionException if the computation threw an
+     * exception
+     * @throws InterruptedException if the current thread is not a
+     * member of a ForkJoinPool and was interrupted while waiting
+     */
+    public final V get() throws InterruptedException, ExecutionException {
+        int s = (Thread.currentThread() instanceof ForkJoinWorkerThread) ?
+            doJoin() : externalInterruptibleAwaitDone();
+        Throwable ex;
+        if ((s &= DONE_MASK) == CANCELLED)
+            throw new CancellationException();
+        if (s == EXCEPTIONAL && (ex = getThrowableException()) != null)
+            throw new ExecutionException(ex);
+        return getRawResult();
+    }
+
+    /**
+     * Waits if necessary for at most the given time for the computation
+     * to complete, and then retrieves its result, if available.
+     *
+     * @param timeout the maximum time to wait
+     * @param unit the time unit of the timeout argument
+     * @return the computed result
+     * @throws CancellationException if the computation was cancelled
+     * @throws ExecutionException if the computation threw an
+     * exception
+     * @throws InterruptedException if the current thread is not a
+     * member of a ForkJoinPool and was interrupted while waiting
+     * @throws TimeoutException if the wait timed out
+     */
+    public final V get(long timeout, TimeUnit unit)
+        throws InterruptedException, ExecutionException, TimeoutException {
+        if (Thread.interrupted())
+            throw new InterruptedException();
+        // Messy in part because we measure in nanosecs, but wait in millisecs
+        int s; long ns, ms;
+        if ((s = status) >= 0 && (ns = unit.toNanos(timeout)) > 0L) {
+            long deadline = System.nanoTime() + ns;
+            ForkJoinPool p = null;
+            ForkJoinPool.WorkQueue w = null;
+            Thread t = Thread.currentThread();
+            if (t instanceof ForkJoinWorkerThread) {
+                ForkJoinWorkerThread wt = (ForkJoinWorkerThread)t;
+                p = wt.pool;
+                w = wt.workQueue;
+                s = p.helpJoinOnce(w, this); // no retries on failure
+            }
+            boolean canBlock = false;
+            boolean interrupted = false;
+            try {
+                while ((s = status) >= 0) {
+                    if (w != null && w.runState < 0)
+                        cancelIgnoringExceptions(this);
+                    else if (!canBlock) {
+                        if (p == null || p.tryCompensate(this, null))
+                            canBlock = true;
+                    }
+                    else {
+                        if ((ms = TimeUnit.NANOSECONDS.toMillis(ns)) > 0L &&
+                            U.compareAndSwapInt(this, STATUS, s, s | SIGNAL)) {
+                            synchronized (this) {
+                                if (status >= 0) {
+                                    try {
+                                        wait(ms);
+                                    } catch (InterruptedException ie) {
+                                        if (p == null)
+                                            interrupted = true;
+                                    }
+                                }
+                                else
+                                    notifyAll();
+                            }
+                        }
+                        if ((s = status) < 0 || interrupted ||
+                            (ns = deadline - System.nanoTime()) <= 0L)
+                            break;
+                    }
+                }
+            } finally {
+                if (p != null && canBlock)
+                    p.incrementActiveCount();
+            }
+            if (interrupted)
+                throw new InterruptedException();
+        }
+        if ((s &= DONE_MASK) != NORMAL) {
+            Throwable ex;
+            if (s == CANCELLED)
+                throw new CancellationException();
+            if (s != EXCEPTIONAL)
+                throw new TimeoutException();
+            if ((ex = getThrowableException()) != null)
+                throw new ExecutionException(ex);
+        }
+        return getRawResult();
+    }
+
+    /**
+     * Joins this task, without returning its result or throwing its
+     * exception. This method may be useful when processing
+     * collections of tasks when some have been cancelled or otherwise
+     * known to have aborted.
+     */
+    public final void quietlyJoin() {
+        doJoin();
+    }
+
+    /**
+     * Commences performing this task and awaits its completion if
+     * necessary, without returning its result or throwing its
+     * exception.
+     */
+    public final void quietlyInvoke() {
+        doInvoke();
+    }
+
+    /**
+     * Possibly executes tasks until the pool hosting the current task
+     * {@link ForkJoinPool#isQuiescent is quiescent}. This method may
+     * be of use in designs in which many tasks are forked, but none
+     * are explicitly joined, instead executing them until all are
+     * processed.
+     *
+     * <p>This method may be invoked only from within {@code
+     * ForkJoinPool} computations (as may be determined using method
+     * {@link #inForkJoinPool}).  Attempts to invoke in other contexts
+     * result in exceptions or errors, possibly including {@code
+     * ClassCastException}.
+     */
+    public static void helpQuiesce() {
+        ForkJoinWorkerThread wt =
+            (ForkJoinWorkerThread)Thread.currentThread();
+        wt.pool.helpQuiescePool(wt.workQueue);
+    }
+
+    /**
+     * Resets the internal bookkeeping state of this task, allowing a
+     * subsequent {@code fork}. This method allows repeated reuse of
+     * this task, but only if reuse occurs when this task has either
+     * never been forked, or has been forked, then completed and all
+     * outstanding joins of this task have also completed. Effects
+     * under any other usage conditions are not guaranteed.
+     * This method may be useful when executing
+     * pre-constructed trees of subtasks in loops.
+     *
+     * <p>Upon completion of this method, {@code isDone()} reports
+     * {@code false}, and {@code getException()} reports {@code
+     * null}. However, the value returned by {@code getRawResult} is
+     * unaffected. To clear this value, you can invoke {@code
+     * setRawResult(null)}.
+     */
+    public void reinitialize() {
+        if ((status & DONE_MASK) == EXCEPTIONAL)
+            clearExceptionalCompletion();
+        else
+            status = 0;
+    }
+
+    /**
+     * Returns the pool hosting the current task execution, or null
+     * if this task is executing outside of any ForkJoinPool.
+     *
+     * @see #inForkJoinPool
+     * @return the pool, or {@code null} if none
+     */
+    public static ForkJoinPool getPool() {
+        Thread t = Thread.currentThread();
+        return (t instanceof ForkJoinWorkerThread) ?
+            ((ForkJoinWorkerThread) t).pool : null;
+    }
+
+    /**
+     * Returns {@code true} if the current thread is a {@link
+     * ForkJoinWorkerThread} executing as a ForkJoinPool computation.
+     *
+     * @return {@code true} if the current thread is a {@link
+     * ForkJoinWorkerThread} executing as a ForkJoinPool computation,
+     * or {@code false} otherwise
+     */
+    public static boolean inForkJoinPool() {
+        return Thread.currentThread() instanceof ForkJoinWorkerThread;
+    }
+
+    /**
+     * Tries to unschedule this task for execution. This method will
+     * typically succeed if this task is the most recently forked task
+     * by the current thread, and has not commenced executing in
+     * another thread.  This method may be useful when arranging
+     * alternative local processing of tasks that could have been, but
+     * were not, stolen.
+     *
+     * <p>This method may be invoked only from within {@code
+     * ForkJoinPool} computations (as may be determined using method
+     * {@link #inForkJoinPool}).  Attempts to invoke in other contexts
+     * result in exceptions or errors, possibly including {@code
+     * ClassCastException}.
+     *
+     * @return {@code true} if unforked
+     */
+    public boolean tryUnfork() {
+        return ((ForkJoinWorkerThread)Thread.currentThread())
+            .workQueue.tryUnpush(this);
+    }
+
+    /**
+     * Returns an estimate of the number of tasks that have been
+     * forked by the current worker thread but not yet executed. This
+     * value may be useful for heuristic decisions about whether to
+     * fork other tasks.
+     *
+     * <p>This method may be invoked only from within {@code
+     * ForkJoinPool} computations (as may be determined using method
+     * {@link #inForkJoinPool}).  Attempts to invoke in other contexts
+     * result in exceptions or errors, possibly including {@code
+     * ClassCastException}.
+     *
+     * @return the number of tasks
+     */
+    public static int getQueuedTaskCount() {
+        return ((ForkJoinWorkerThread) Thread.currentThread())
+            .workQueue.queueSize();
+    }
+
+    /**
+     * Returns an estimate of how many more locally queued tasks are
+     * held by the current worker thread than there are other worker
+     * threads that might steal them.  This value may be useful for
+     * heuristic decisions about whether to fork other tasks. In many
+     * usages of ForkJoinTasks, at steady state, each worker should
+     * aim to maintain a small constant surplus (for example, 3) of
+     * tasks, and to process computations locally if this threshold is
+     * exceeded.
+     *
+     * <p>This method may be invoked only from within {@code
+     * ForkJoinPool} computations (as may be determined using method
+     * {@link #inForkJoinPool}).  Attempts to invoke in other contexts
+     * result in exceptions or errors, possibly including {@code
+     * ClassCastException}.
+     *
+     * @return the surplus number of tasks, which may be negative
+     */
+    public static int getSurplusQueuedTaskCount() {
+        /*
+         * The aim of this method is to return a cheap heuristic guide
+         * for task partitioning when programmers, frameworks, tools,
+         * or languages have little or no idea about task granularity.
+         * In essence by offering this method, we ask users only about
+         * tradeoffs in overhead vs expected throughput and its
+         * variance, rather than how finely to partition tasks.
+         *
+         * In a steady state strict (tree-structured) computation,
+         * each thread makes available for stealing enough tasks for
+         * other threads to remain active. Inductively, if all threads
+         * play by the same rules, each thread should make available
+         * only a constant number of tasks.
+         *
+         * The minimum useful constant is just 1. But using a value of
+         * 1 would require immediate replenishment upon each steal to
+         * maintain enough tasks, which is infeasible.  Further,
+         * partitionings/granularities of offered tasks should
+         * minimize steal rates, which in general means that threads
+         * nearer the top of computation tree should generate more
+         * than those nearer the bottom. In perfect steady state, each
+         * thread is at approximately the same level of computation
+         * tree. However, producing extra tasks amortizes the
+         * uncertainty of progress and diffusion assumptions.
+         *
+         * So, users will want to use values larger, but not much
+         * larger than 1 to both smooth over transient shortages and
+         * hedge against uneven progress; as traded off against the
+         * cost of extra task overhead. We leave the user to pick a
+         * threshold value to compare with the results of this call to
+         * guide decisions, but recommend values such as 3.
+         *
+         * When all threads are active, it is on average OK to
+         * estimate surplus strictly locally. In steady-state, if one
+         * thread is maintaining say 2 surplus tasks, then so are
+         * others. So we can just use estimated queue length.
+         * However, this strategy alone leads to serious mis-estimates
+         * in some non-steady-state conditions (ramp-up, ramp-down,
+         * other stalls). We can detect many of these by further
+         * considering the number of "idle" threads, that are known to
+         * have zero queued tasks, so compensate by a factor of
+         * (#idle/#active) threads.
+         */
+        ForkJoinWorkerThread wt =
+            (ForkJoinWorkerThread)Thread.currentThread();
+        return wt.workQueue.queueSize() - wt.pool.idlePerActive();
+    }
+
+    // Extension methods
+
+    /**
+     * Returns the result that would be returned by {@link #join}, even
+     * if this task completed abnormally, or {@code null} if this task
+     * is not known to have been completed.  This method is designed
+     * to aid debugging, as well as to support extensions. Its use in
+     * any other context is discouraged.
+     *
+     * @return the result, or {@code null} if not completed
+     */
+    public abstract V getRawResult();
+
+    /**
+     * Forces the given value to be returned as a result.  This method
+     * is designed to support extensions, and should not in general be
+     * called otherwise.
+     *
+     * @param value the value
+     */
+    protected abstract void setRawResult(V value);
+
+    /**
+     * Immediately performs the base action of this task and returns
+     * true if, upon return from this method, this task is guaranteed
+     * to have completed normally. This method may return false
+     * otherwise, to indicate that this task is not necessarily
+     * complete (or is not known to be complete), for example in
+     * asynchronous actions that require explicit invocations of
+     * completion methods. This method may also throw an (unchecked)
+     * exception to indicate abnormal exit. This method is designed to
+     * support extensions, and should not in general be called
+     * otherwise.
+     *
+     * @return {@code true} if this task is known to have completed normally
+     */
+    protected abstract boolean exec();
+
+    /**
+     * Returns, but does not unschedule or execute, a task queued by
+     * the current thread but not yet executed, if one is immediately
+     * available. There is no guarantee that this task will actually
+     * be polled or executed next. Conversely, this method may return
+     * null even if a task exists but cannot be accessed without
+     * contention with other threads.  This method is designed
+     * primarily to support extensions, and is unlikely to be useful
+     * otherwise.
+     *
+     * <p>This method may be invoked only from within {@code
+     * ForkJoinPool} computations (as may be determined using method
+     * {@link #inForkJoinPool}).  Attempts to invoke in other contexts
+     * result in exceptions or errors, possibly including {@code
+     * ClassCastException}.
+     *
+     * @return the next task, or {@code null} if none are available
+     */
+    protected static ForkJoinTask<?> peekNextLocalTask() {
+        return ((ForkJoinWorkerThread) Thread.currentThread()).workQueue.peek();
+    }
+
+    /**
+     * Unschedules and returns, without executing, the next task
+     * queued by the current thread but not yet executed.  This method
+     * is designed primarily to support extensions, and is unlikely to
+     * be useful otherwise.
+     *
+     * <p>This method may be invoked only from within {@code
+     * ForkJoinPool} computations (as may be determined using method
+     * {@link #inForkJoinPool}).  Attempts to invoke in other contexts
+     * result in exceptions or errors, possibly including {@code
+     * ClassCastException}.
+     *
+     * @return the next task, or {@code null} if none are available
+     */
+    protected static ForkJoinTask<?> pollNextLocalTask() {
+        return ((ForkJoinWorkerThread) Thread.currentThread())
+            .workQueue.nextLocalTask();
+    }
+
+    /**
+     * Unschedules and returns, without executing, the next task
+     * queued by the current thread but not yet executed, if one is
+     * available, or if not available, a task that was forked by some
+     * other thread, if available. Availability may be transient, so a
+     * {@code null} result does not necessarily imply quiescence
+     * of the pool this task is operating in.  This method is designed
+     * primarily to support extensions, and is unlikely to be useful
+     * otherwise.
+     *
+     * <p>This method may be invoked only from within {@code
+     * ForkJoinPool} computations (as may be determined using method
+     * {@link #inForkJoinPool}).  Attempts to invoke in other contexts
+     * result in exceptions or errors, possibly including {@code
+     * ClassCastException}.
+     *
+     * @return a task, or {@code null} if none are available
+     */
+    protected static ForkJoinTask<?> pollTask() {
+        ForkJoinWorkerThread wt =
+            (ForkJoinWorkerThread)Thread.currentThread();
+        return wt.pool.nextTaskFor(wt.workQueue);
+    }
+
+    // tag operations
+
+    /**
+     * Returns the tag for this task.
+     *
+     * @return the tag for this task
+     * @since 1.8
+     */
+    public final short getForkJoinTaskTag() {
+        return (short)status;
+    }
+
+    /**
+     * Atomically sets the tag value for this task.
+     *
+     * @param tag the tag value
+     * @return the previous value of the tag
+     * @since 1.8
+     */
+    public final short setForkJoinTaskTag(short tag) {
+        for (int s;;) {
+            if (U.compareAndSwapInt(this, STATUS, s = status,
+                                    (s & ~SMASK) | (tag & SMASK)))
+                return (short)s;
+        }
+    }
+
+    /**
+     * Atomically conditionally sets the tag value for this task.
+     * Among other applications, tags can be used as visit markers
+     * in tasks operating on graphs, as in methods that check: {@code
+     * if (task.compareAndSetForkJoinTaskTag((short)0, (short)1))}
+     * before processing, otherwise exiting because the node has
+     * already been visited.
+     *
+     * @param e the expected tag value
+     * @param tag the new tag value
+     * @return true if successful; i.e., the current value was
+     * equal to e and is now tag.
+     * @since 1.8
+     */
+    public final boolean compareAndSetForkJoinTaskTag(short e, short tag) {
+        for (int s;;) {
+            if ((short)(s = status) != e)
+                return false;
+            if (U.compareAndSwapInt(this, STATUS, s,
+                                    (s & ~SMASK) | (tag & SMASK)))
+                return true;
+        }
+    }
+
+    /**
+     * Adaptor for Runnables. This implements RunnableFuture
+     * to be compliant with AbstractExecutorService constraints
+     * when used in ForkJoinPool.
+     */
+    static final class AdaptedRunnable<T> extends ForkJoinTask<T>
+        implements RunnableFuture<T> {
+        final Runnable runnable;
+        T result;
+        AdaptedRunnable(Runnable runnable, T result) {
+            if (runnable == null) throw new NullPointerException();
+            this.runnable = runnable;
+            this.result = result; // OK to set this even before completion
+        }
+        public final T getRawResult() { return result; }
+        public final void setRawResult(T v) { result = v; }
+        public final boolean exec() { runnable.run(); return true; }
+        public final void run() { invoke(); }
+        private static final long serialVersionUID = 5232453952276885070L;
+    }
+
+    /**
+     * Adaptor for Runnables without results
+     */
+    static final class AdaptedRunnableAction extends ForkJoinTask<Void>
+        implements RunnableFuture<Void> {
+        final Runnable runnable;
+        AdaptedRunnableAction(Runnable runnable) {
+            if (runnable == null) throw new NullPointerException();
+            this.runnable = runnable;
+        }
+        public final Void getRawResult() { return null; }
+        public final void setRawResult(Void v) { }
+        public final boolean exec() { runnable.run(); return true; }
+        public final void run() { invoke(); }
+        private static final long serialVersionUID = 5232453952276885070L;
+    }
+
+    /**
+     * Adaptor for Callables
+     */
+    static final class AdaptedCallable<T> extends ForkJoinTask<T>
+        implements RunnableFuture<T> {
+        final Callable<? extends T> callable;
+        T result;
+        AdaptedCallable(Callable<? extends T> callable) {
+            if (callable == null) throw new NullPointerException();
+            this.callable = callable;
+        }
+        public final T getRawResult() { return result; }
+        public final void setRawResult(T v) { result = v; }
+        public final boolean exec() {
+            try {
+                result = callable.call();
+                return true;
+            } catch (Error err) {
+                throw err;
+            } catch (RuntimeException rex) {
+                throw rex;
+            } catch (Exception ex) {
+                throw new RuntimeException(ex);
+            }
+        }
+        public final void run() { invoke(); }
+        private static final long serialVersionUID = 2838392045355241008L;
+    }
+
+    /**
+     * Returns a new {@code ForkJoinTask} that performs the {@code run}
+     * method of the given {@code Runnable} as its action, and returns
+     * a null result upon {@link #join}.
+     *
+     * @param runnable the runnable action
+     * @return the task
+     */
+    public static ForkJoinTask<?> adapt(Runnable runnable) {
+        return new AdaptedRunnableAction(runnable);
+    }
+
+    /**
+     * Returns a new {@code ForkJoinTask} that performs the {@code run}
+     * method of the given {@code Runnable} as its action, and returns
+     * the given result upon {@link #join}.
+     *
+     * @param runnable the runnable action
+     * @param result the result upon completion
+     * @return the task
+     */
+    public static <T> ForkJoinTask<T> adapt(Runnable runnable, T result) {
+        return new AdaptedRunnable<T>(runnable, result);
+    }
+
+    /**
+     * Returns a new {@code ForkJoinTask} that performs the {@code call}
+     * method of the given {@code Callable} as its action, and returns
+     * its result upon {@link #join}, translating any checked exceptions
+     * encountered into {@code RuntimeException}.
+     *
+     * @param callable the callable action
+     * @return the task
+     */
+    public static <T> ForkJoinTask<T> adapt(Callable<? extends T> callable) {
+        return new AdaptedCallable<T>(callable);
+    }
+
+    // Serialization support
+
+    private static final long serialVersionUID = -7721805057305804111L;
+
+    /**
+     * Saves this task to a stream (that is, serializes it).
+     *
+     * @serialData the current run status and the exception thrown
+     * during execution, or {@code null} if none
+     */
+    private void writeObject(java.io.ObjectOutputStream s)
+        throws java.io.IOException {
+        s.defaultWriteObject();
+        s.writeObject(getException());
+    }
+
+    /**
+     * Reconstitutes this task from a stream (that is, deserializes it).
+     */
+    private void readObject(java.io.ObjectInputStream s)
+        throws java.io.IOException, ClassNotFoundException {
+        s.defaultReadObject();
+        Object ex = s.readObject();
+        if (ex != null)
+            setExceptionalCompletion((Throwable)ex);
+    }
+
+    // Unsafe mechanics
+    private static final sun.misc.Unsafe U;
+    private static final long STATUS;
+    static {
+        exceptionTableLock = new ReentrantLock();
+        exceptionTableRefQueue = new ReferenceQueue<Object>();
+        exceptionTable = new ExceptionNode[EXCEPTION_MAP_CAPACITY];
+        try {
+            U = getUnsafe();
+            STATUS = U.objectFieldOffset
+                (ForkJoinTask.class.getDeclaredField("status"));
+        } catch (Exception e) {
+            throw new Error(e);
+        }
+    }
+
+    /**
+     * Returns a sun.misc.Unsafe.  Suitable for use in a 3rd party package.
+     * Replace with a simple call to Unsafe.getUnsafe when integrating
+     * into a jdk.
+     *
+     * @return a sun.misc.Unsafe
+     */
+    private static sun.misc.Unsafe getUnsafe() {
+        try {
+            return sun.misc.Unsafe.getUnsafe();
+        } catch (SecurityException se) {
+            try {
+                return java.security.AccessController.doPrivileged
+                    (new java.security
+                     .PrivilegedExceptionAction<sun.misc.Unsafe>() {
+                        public sun.misc.Unsafe run() throws Exception {
+                            java.lang.reflect.Field f = sun.misc
+                                .Unsafe.class.getDeclaredField("theUnsafe");
+                            f.setAccessible(true);
+                            return (sun.misc.Unsafe) f.get(null);
+                        }});
+            } catch (java.security.PrivilegedActionException e) {
+                throw new RuntimeException("Could not initialize intrinsics",
+                                           e.getCause());
+            }
+        }
+    }
+}
diff --git a/src/main/java/jsr166y/ForkJoinWorkerThread.java b/src/main/java/jsr166y/ForkJoinWorkerThread.java
new file mode 100644
index 00000000000..4382ac18af9
--- /dev/null
+++ b/src/main/java/jsr166y/ForkJoinWorkerThread.java
@@ -0,0 +1,119 @@
+/*
+ * Written by Doug Lea with assistance from members of JCP JSR-166
+ * Expert Group and released to the public domain, as explained at
+ * http://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+package jsr166y;
+
+/**
+ * A thread managed by a {@link ForkJoinPool}, which executes
+ * {@link ForkJoinTask}s.
+ * This class is subclassable solely for the sake of adding
+ * functionality -- there are no overridable methods dealing with
+ * scheduling or execution.  However, you can override initialization
+ * and termination methods surrounding the main task processing loop.
+ * If you do create such a subclass, you will also need to supply a
+ * custom {@link ForkJoinPool.ForkJoinWorkerThreadFactory} to use it
+ * in a {@code ForkJoinPool}.
+ *
+ * @since 1.7
+ * @author Doug Lea
+ */
+public class ForkJoinWorkerThread extends Thread {
+    /*
+     * ForkJoinWorkerThreads are managed by ForkJoinPools and perform
+     * ForkJoinTasks. For explanation, see the internal documentation
+     * of class ForkJoinPool.
+     */
+
+    final ForkJoinPool.WorkQueue workQueue; // Work-stealing mechanics
+    final ForkJoinPool pool;                // the pool this thread works in
+
+    /**
+     * Creates a ForkJoinWorkerThread operating in the given pool.
+     *
+     * @param pool the pool this thread works in
+     * @throws NullPointerException if pool is null
+     */
+    protected ForkJoinWorkerThread(ForkJoinPool pool) {
+        super(pool.nextWorkerName());
+        setDaemon(true);
+        Thread.UncaughtExceptionHandler ueh = pool.ueh;
+        if (ueh != null)
+            setUncaughtExceptionHandler(ueh);
+        this.pool = pool;
+        pool.registerWorker(this.workQueue = new ForkJoinPool.WorkQueue
+                            (pool, this, pool.localMode));
+    }
+
+    /**
+     * Returns the pool hosting this thread.
+     *
+     * @return the pool
+     */
+    public ForkJoinPool getPool() {
+        return pool;
+    }
+
+    /**
+     * Returns the index number of this thread in its pool.  The
+     * returned value ranges from zero to the maximum number of
+     * threads (minus one) that have ever been created in the pool.
+     * This method may be useful for applications that track status or
+     * collect results per-worker rather than per-task.
+     *
+     * @return the index number
+     */
+    public int getPoolIndex() {
+        return workQueue.poolIndex;
+    }
+
+    /**
+     * Initializes internal state after construction but before
+     * processing any tasks. If you override this method, you must
+     * invoke {@code super.onStart()} at the beginning of the method.
+     * Initialization requires care: Most fields must have legal
+     * default values, to ensure that attempted accesses from other
+     * threads work correctly even before this thread starts
+     * processing tasks.
+     */
+    protected void onStart() {
+    }
+
+    /**
+     * Performs cleanup associated with termination of this worker
+     * thread.  If you override this method, you must invoke
+     * {@code super.onTermination} at the end of the overridden method.
+     *
+     * @param exception the exception causing this thread to abort due
+     * to an unrecoverable error, or {@code null} if completed normally
+     */
+    protected void onTermination(Throwable exception) {
+    }
+
+    /**
+     * This method is required to be public, but should never be
+     * called explicitly. It performs the main run loop to execute
+     * {@link ForkJoinTask}s.
+     */
+    public void run() {
+        Throwable exception = null;
+        try {
+            onStart();
+            pool.runWorker(workQueue);
+        } catch (Throwable ex) {
+            exception = ex;
+        } finally {
+            try {
+                onTermination(exception);
+            } catch (Throwable ex) {
+                if (exception == null)
+                    exception = ex;
+            } finally {
+                pool.deregisterWorker(this, exception);
+            }
+        }
+    }
+}
+
diff --git a/src/main/java/jsr166y/LinkedTransferQueue.java b/src/main/java/jsr166y/LinkedTransferQueue.java
new file mode 100644
index 00000000000..3cc8dba46d9
--- /dev/null
+++ b/src/main/java/jsr166y/LinkedTransferQueue.java
@@ -0,0 +1,1352 @@
+/*
+ * Written by Doug Lea with assistance from members of JCP JSR-166
+ * Expert Group and released to the public domain, as explained at
+ * http://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+package jsr166y;
+
+import java.util.AbstractQueue;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+import java.util.Queue;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.locks.LockSupport;
+
+/**
+ * An unbounded {@link TransferQueue} based on linked nodes.
+ * This queue orders elements FIFO (first-in-first-out) with respect
+ * to any given producer.  The <em>head</em> of the queue is that
+ * element that has been on the queue the longest time for some
+ * producer.  The <em>tail</em> of the queue is that element that has
+ * been on the queue the shortest time for some producer.
+ *
+ * <p>Beware that, unlike in most collections, the {@code size} method
+ * is <em>NOT</em> a constant-time operation. Because of the
+ * asynchronous nature of these queues, determining the current number
+ * of elements requires a traversal of the elements, and so may report
+ * inaccurate results if this collection is modified during traversal.
+ * Additionally, the bulk operations {@code addAll},
+ * {@code removeAll}, {@code retainAll}, {@code containsAll},
+ * {@code equals}, and {@code toArray} are <em>not</em> guaranteed
+ * to be performed atomically. For example, an iterator operating
+ * concurrently with an {@code addAll} operation might view only some
+ * of the added elements.
+ *
+ * <p>This class and its iterator implement all of the
+ * <em>optional</em> methods of the {@link Collection} and {@link
+ * Iterator} interfaces.
+ *
+ * <p>Memory consistency effects: As with other concurrent
+ * collections, actions in a thread prior to placing an object into a
+ * {@code LinkedTransferQueue}
+ * <a href="package-summary.html#MemoryVisibility"><i>happen-before</i></a>
+ * actions subsequent to the access or removal of that element from
+ * the {@code LinkedTransferQueue} in another thread.
+ *
+ * <p>This class is a member of the
+ * <a href="{@docRoot}/../technotes/guides/collections/index.html">
+ * Java Collections Framework</a>.
+ *
+ * @since 1.7
+ * @author Doug Lea
+ * @param <E> the type of elements held in this collection
+ */
+public class LinkedTransferQueue<E> extends AbstractQueue<E>
+    implements TransferQueue<E>, java.io.Serializable {
+    private static final long serialVersionUID = -3223113410248163686L;
+
+    /*
+     * *** Overview of Dual Queues with Slack ***
+     *
+     * Dual Queues, introduced by Scherer and Scott
+     * (http://www.cs.rice.edu/~wns1/papers/2004-DISC-DDS.pdf) are
+     * (linked) queues in which nodes may represent either data or
+     * requests.  When a thread tries to enqueue a data node, but
+     * encounters a request node, it instead "matches" and removes it;
+     * and vice versa for enqueuing requests. Blocking Dual Queues
+     * arrange that threads enqueuing unmatched requests block until
+     * other threads provide the match. Dual Synchronous Queues (see
+     * Scherer, Lea, & Scott
+     * http://www.cs.rochester.edu/u/scott/papers/2009_Scherer_CACM_SSQ.pdf)
+     * additionally arrange that threads enqueuing unmatched data also
+     * block.  Dual Transfer Queues support all of these modes, as
+     * dictated by callers.
+     *
+     * A FIFO dual queue may be implemented using a variation of the
+     * Michael & Scott (M&S) lock-free queue algorithm
+     * (http://www.cs.rochester.edu/u/scott/papers/1996_PODC_queues.pdf).
+     * It maintains two pointer fields, "head", pointing to a
+     * (matched) node that in turn points to the first actual
+     * (unmatched) queue node (or null if empty); and "tail" that
+     * points to the last node on the queue (or again null if
+     * empty). For example, here is a possible queue with four data
+     * elements:
+     *
+     *  head                tail
+     *    |                   |
+     *    v                   v
+     *    M -> U -> U -> U -> U
+     *
+     * The M&S queue algorithm is known to be prone to scalability and
+     * overhead limitations when maintaining (via CAS) these head and
+     * tail pointers. This has led to the development of
+     * contention-reducing variants such as elimination arrays (see
+     * Moir et al http://portal.acm.org/citation.cfm?id=1074013) and
+     * optimistic back pointers (see Ladan-Mozes & Shavit
+     * http://people.csail.mit.edu/edya/publications/OptimisticFIFOQueue-journal.pdf).
+     * However, the nature of dual queues enables a simpler tactic for
+     * improving M&S-style implementations when dual-ness is needed.
+     *
+     * In a dual queue, each node must atomically maintain its match
+     * status. While there are other possible variants, we implement
+     * this here as: for a data-mode node, matching entails CASing an
+     * "item" field from a non-null data value to null upon match, and
+     * vice-versa for request nodes, CASing from null to a data
+     * value. (Note that the linearization properties of this style of
+     * queue are easy to verify -- elements are made available by
+     * linking, and unavailable by matching.) Compared to plain M&S
+     * queues, this property of dual queues requires one additional
+     * successful atomic operation per enq/deq pair. But it also
+     * enables lower cost variants of queue maintenance mechanics. (A
+     * variation of this idea applies even for non-dual queues that
+     * support deletion of interior elements, such as
+     * j.u.c.ConcurrentLinkedQueue.)
+     *
+     * Once a node is matched, its match status can never again
+     * change.  We may thus arrange that the linked list of them
+     * contain a prefix of zero or more matched nodes, followed by a
+     * suffix of zero or more unmatched nodes. (Note that we allow
+     * both the prefix and suffix to be zero length, which in turn
+     * means that we do not use a dummy header.)  If we were not
+     * concerned with either time or space efficiency, we could
+     * correctly perform enqueue and dequeue operations by traversing
+     * from a pointer to the initial node; CASing the item of the
+     * first unmatched node on match and CASing the next field of the
+     * trailing node on appends. (Plus some special-casing when
+     * initially empty).  While this would be a terrible idea in
+     * itself, it does have the benefit of not requiring ANY atomic
+     * updates on head/tail fields.
+     *
+     * We introduce here an approach that lies between the extremes of
+     * never versus always updating queue (head and tail) pointers.
+     * This offers a tradeoff between sometimes requiring extra
+     * traversal steps to locate the first and/or last unmatched
+     * nodes, versus the reduced overhead and contention of fewer
+     * updates to queue pointers. For example, a possible snapshot of
+     * a queue is:
+     *
+     *  head           tail
+     *    |              |
+     *    v              v
+     *    M -> M -> U -> U -> U -> U
+     *
+     * The best value for this "slack" (the targeted maximum distance
+     * between the value of "head" and the first unmatched node, and
+     * similarly for "tail") is an empirical matter. We have found
+     * that using very small constants in the range of 1-3 work best
+     * over a range of platforms. Larger values introduce increasing
+     * costs of cache misses and risks of long traversal chains, while
+     * smaller values increase CAS contention and overhead.
+     *
+     * Dual queues with slack differ from plain M&S dual queues by
+     * virtue of only sometimes updating head or tail pointers when
+     * matching, appending, or even traversing nodes; in order to
+     * maintain a targeted slack.  The idea of "sometimes" may be
+     * operationalized in several ways. The simplest is to use a
+     * per-operation counter incremented on each traversal step, and
+     * to try (via CAS) to update the associated queue pointer
+     * whenever the count exceeds a threshold. Another, that requires
+     * more overhead, is to use random number generators to update
+     * with a given probability per traversal step.
+     *
+     * In any strategy along these lines, because CASes updating
+     * fields may fail, the actual slack may exceed targeted
+     * slack. However, they may be retried at any time to maintain
+     * targets.  Even when using very small slack values, this
+     * approach works well for dual queues because it allows all
+     * operations up to the point of matching or appending an item
+     * (hence potentially allowing progress by another thread) to be
+     * read-only, thus not introducing any further contention. As
+     * described below, we implement this by performing slack
+     * maintenance retries only after these points.
+     *
+     * As an accompaniment to such techniques, traversal overhead can
+     * be further reduced without increasing contention of head
+     * pointer updates: Threads may sometimes shortcut the "next" link
+     * path from the current "head" node to be closer to the currently
+     * known first unmatched node, and similarly for tail. Again, this
+     * may be triggered with using thresholds or randomization.
+     *
+     * These ideas must be further extended to avoid unbounded amounts
+     * of costly-to-reclaim garbage caused by the sequential "next"
+     * links of nodes starting at old forgotten head nodes: As first
+     * described in detail by Boehm
+     * (http://portal.acm.org/citation.cfm?doid=503272.503282) if a GC
+     * delays noticing that any arbitrarily old node has become
+     * garbage, all newer dead nodes will also be unreclaimed.
+     * (Similar issues arise in non-GC environments.)  To cope with
+     * this in our implementation, upon CASing to advance the head
+     * pointer, we set the "next" link of the previous head to point
+     * only to itself; thus limiting the length of connected dead lists.
+     * (We also take similar care to wipe out possibly garbage
+     * retaining values held in other Node fields.)  However, doing so
+     * adds some further complexity to traversal: If any "next"
+     * pointer links to itself, it indicates that the current thread
+     * has lagged behind a head-update, and so the traversal must
+     * continue from the "head".  Traversals trying to find the
+     * current tail starting from "tail" may also encounter
+     * self-links, in which case they also continue at "head".
+     *
+     * It is tempting in slack-based scheme to not even use CAS for
+     * updates (similarly to Ladan-Mozes & Shavit). However, this
+     * cannot be done for head updates under the above link-forgetting
+     * mechanics because an update may leave head at a detached node.
+     * And while direct writes are possible for tail updates, they
+     * increase the risk of long retraversals, and hence long garbage
+     * chains, which can be much more costly than is worthwhile
+     * considering that the cost difference of performing a CAS vs
+     * write is smaller when they are not triggered on each operation
+     * (especially considering that writes and CASes equally require
+     * additional GC bookkeeping ("write barriers") that are sometimes
+     * more costly than the writes themselves because of contention).
+     *
+     * *** Overview of implementation ***
+     *
+     * We use a threshold-based approach to updates, with a slack
+     * threshold of two -- that is, we update head/tail when the
+     * current pointer appears to be two or more steps away from the
+     * first/last node. The slack value is hard-wired: a path greater
+     * than one is naturally implemented by checking equality of
+     * traversal pointers except when the list has only one element,
+     * in which case we keep slack threshold at one. Avoiding tracking
+     * explicit counts across method calls slightly simplifies an
+     * already-messy implementation. Using randomization would
+     * probably work better if there were a low-quality dirt-cheap
+     * per-thread one available, but even ThreadLocalRandom is too
+     * heavy for these purposes.
+     *
+     * With such a small slack threshold value, it is not worthwhile
+     * to augment this with path short-circuiting (i.e., unsplicing
+     * interior nodes) except in the case of cancellation/removal (see
+     * below).
+     *
+     * We allow both the head and tail fields to be null before any
+     * nodes are enqueued; initializing upon first append.  This
+     * simplifies some other logic, as well as providing more
+     * efficient explicit control paths instead of letting JVMs insert
+     * implicit NullPointerExceptions when they are null.  While not
+     * currently fully implemented, we also leave open the possibility
+     * of re-nulling these fields when empty (which is complicated to
+     * arrange, for little benefit.)
+     *
+     * All enqueue/dequeue operations are handled by the single method
+     * "xfer" with parameters indicating whether to act as some form
+     * of offer, put, poll, take, or transfer (each possibly with
+     * timeout). The relative complexity of using one monolithic
+     * method outweighs the code bulk and maintenance problems of
+     * using separate methods for each case.
+     *
+     * Operation consists of up to three phases. The first is
+     * implemented within method xfer, the second in tryAppend, and
+     * the third in method awaitMatch.
+     *
+     * 1. Try to match an existing node
+     *
+     *    Starting at head, skip already-matched nodes until finding
+     *    an unmatched node of opposite mode, if one exists, in which
+     *    case matching it and returning, also if necessary updating
+     *    head to one past the matched node (or the node itself if the
+     *    list has no other unmatched nodes). If the CAS misses, then
+     *    a loop retries advancing head by two steps until either
+     *    success or the slack is at most two. By requiring that each
+     *    attempt advances head by two (if applicable), we ensure that
+     *    the slack does not grow without bound. Traversals also check
+     *    if the initial head is now off-list, in which case they
+     *    start at the new head.
+     *
+     *    If no candidates are found and the call was untimed
+     *    poll/offer, (argument "how" is NOW) return.
+     *
+     * 2. Try to append a new node (method tryAppend)
+     *
+     *    Starting at current tail pointer, find the actual last node
+     *    and try to append a new node (or if head was null, establish
+     *    the first node). Nodes can be appended only if their
+     *    predecessors are either already matched or are of the same
+     *    mode. If we detect otherwise, then a new node with opposite
+     *    mode must have been appended during traversal, so we must
+     *    restart at phase 1. The traversal and update steps are
+     *    otherwise similar to phase 1: Retrying upon CAS misses and
+     *    checking for staleness.  In particular, if a self-link is
+     *    encountered, then we can safely jump to a node on the list
+     *    by continuing the traversal at current head.
+     *
+     *    On successful append, if the call was ASYNC, return.
+     *
+     * 3. Await match or cancellation (method awaitMatch)
+     *
+     *    Wait for another thread to match node; instead cancelling if
+     *    the current thread was interrupted or the wait timed out. On
+     *    multiprocessors, we use front-of-queue spinning: If a node
+     *    appears to be the first unmatched node in the queue, it
+     *    spins a bit before blocking. In either case, before blocking
+     *    it tries to unsplice any nodes between the current "head"
+     *    and the first unmatched node.
+     *
+     *    Front-of-queue spinning vastly improves performance of
+     *    heavily contended queues. And so long as it is relatively
+     *    brief and "quiet", spinning does not much impact performance
+     *    of less-contended queues.  During spins threads check their
+     *    interrupt status and generate a thread-local random number
+     *    to decide to occasionally perform a Thread.yield. While
+     *    yield has underdefined specs, we assume that it might help,
+     *    and will not hurt, in limiting impact of spinning on busy
+     *    systems.  We also use smaller (1/2) spins for nodes that are
+     *    not known to be front but whose predecessors have not
+     *    blocked -- these "chained" spins avoid artifacts of
+     *    front-of-queue rules which otherwise lead to alternating
+     *    nodes spinning vs blocking. Further, front threads that
+     *    represent phase changes (from data to request node or vice
+     *    versa) compared to their predecessors receive additional
+     *    chained spins, reflecting longer paths typically required to
+     *    unblock threads during phase changes.
+     *
+     *
+     * ** Unlinking removed interior nodes **
+     *
+     * In addition to minimizing garbage retention via self-linking
+     * described above, we also unlink removed interior nodes. These
+     * may arise due to timed out or interrupted waits, or calls to
+     * remove(x) or Iterator.remove.  Normally, given a node that was
+     * at one time known to be the predecessor of some node s that is
+     * to be removed, we can unsplice s by CASing the next field of
+     * its predecessor if it still points to s (otherwise s must
+     * already have been removed or is now offlist). But there are two
+     * situations in which we cannot guarantee to make node s
+     * unreachable in this way: (1) If s is the trailing node of list
+     * (i.e., with null next), then it is pinned as the target node
+     * for appends, so can only be removed later after other nodes are
+     * appended. (2) We cannot necessarily unlink s given a
+     * predecessor node that is matched (including the case of being
+     * cancelled): the predecessor may already be unspliced, in which
+     * case some previous reachable node may still point to s.
+     * (For further explanation see Herlihy & Shavit "The Art of
+     * Multiprocessor Programming" chapter 9).  Although, in both
+     * cases, we can rule out the need for further action if either s
+     * or its predecessor are (or can be made to be) at, or fall off
+     * from, the head of list.
+     *
+     * Without taking these into account, it would be possible for an
+     * unbounded number of supposedly removed nodes to remain
+     * reachable.  Situations leading to such buildup are uncommon but
+     * can occur in practice; for example when a series of short timed
+     * calls to poll repeatedly time out but never otherwise fall off
+     * the list because of an untimed call to take at the front of the
+     * queue.
+     *
+     * When these cases arise, rather than always retraversing the
+     * entire list to find an actual predecessor to unlink (which
+     * won't help for case (1) anyway), we record a conservative
+     * estimate of possible unsplice failures (in "sweepVotes").
+     * We trigger a full sweep when the estimate exceeds a threshold
+     * ("SWEEP_THRESHOLD") indicating the maximum number of estimated
+     * removal failures to tolerate before sweeping through, unlinking
+     * cancelled nodes that were not unlinked upon initial removal.
+     * We perform sweeps by the thread hitting threshold (rather than
+     * background threads or by spreading work to other threads)
+     * because in the main contexts in which removal occurs, the
+     * caller is already timed-out, cancelled, or performing a
+     * potentially O(n) operation (e.g. remove(x)), none of which are
+     * time-critical enough to warrant the overhead that alternatives
+     * would impose on other threads.
+     *
+     * Because the sweepVotes estimate is conservative, and because
+     * nodes become unlinked "naturally" as they fall off the head of
+     * the queue, and because we allow votes to accumulate even while
+     * sweeps are in progress, there are typically significantly fewer
+     * such nodes than estimated.  Choice of a threshold value
+     * balances the likelihood of wasted effort and contention, versus
+     * providing a worst-case bound on retention of interior nodes in
+     * quiescent queues. The value defined below was chosen
+     * empirically to balance these under various timeout scenarios.
+     *
+     * Note that we cannot self-link unlinked interior nodes during
+     * sweeps. However, the associated garbage chains terminate when
+     * some successor ultimately falls off the head of the list and is
+     * self-linked.
+     */
+
+    /** True if on multiprocessor */
+    private static final boolean MP =
+        Runtime.getRuntime().availableProcessors() > 1;
+
+    /**
+     * The number of times to spin (with randomly interspersed calls
+     * to Thread.yield) on multiprocessor before blocking when a node
+     * is apparently the first waiter in the queue.  See above for
+     * explanation. Must be a power of two. The value is empirically
+     * derived -- it works pretty well across a variety of processors,
+     * numbers of CPUs, and OSes.
+     */
+    private static final int FRONT_SPINS   = 1 << 7;
+
+    /**
+     * The number of times to spin before blocking when a node is
+     * preceded by another node that is apparently spinning.  Also
+     * serves as an increment to FRONT_SPINS on phase changes, and as
+     * base average frequency for yielding during spins. Must be a
+     * power of two.
+     */
+    private static final int CHAINED_SPINS = FRONT_SPINS >>> 1;
+
+    /**
+     * The maximum number of estimated removal failures (sweepVotes)
+     * to tolerate before sweeping through the queue unlinking
+     * cancelled nodes that were not unlinked upon initial
+     * removal. See above for explanation. The value must be at least
+     * two to avoid useless sweeps when removing trailing nodes.
+     */
+    static final int SWEEP_THRESHOLD = 32;
+
+    /**
+     * Queue nodes. Uses Object, not E, for items to allow forgetting
+     * them after use.  Relies heavily on Unsafe mechanics to minimize
+     * unnecessary ordering constraints: Writes that are intrinsically
+     * ordered wrt other accesses or CASes use simple relaxed forms.
+     */
+    static final class Node {
+        final boolean isData;   // false if this is a request node
+        volatile Object item;   // initially non-null if isData; CASed to match
+        volatile Node next;
+        volatile Thread waiter; // null until waiting
+
+        // CAS methods for fields
+        final boolean casNext(Node cmp, Node val) {
+            return UNSAFE.compareAndSwapObject(this, nextOffset, cmp, val);
+        }
+
+        final boolean casItem(Object cmp, Object val) {
+            // assert cmp == null || cmp.getClass() != Node.class;
+            return UNSAFE.compareAndSwapObject(this, itemOffset, cmp, val);
+        }
+
+        /**
+         * Constructs a new node.  Uses relaxed write because item can
+         * only be seen after publication via casNext.
+         */
+        Node(Object item, boolean isData) {
+            UNSAFE.putObject(this, itemOffset, item); // relaxed write
+            this.isData = isData;
+        }
+
+        /**
+         * Links node to itself to avoid garbage retention.  Called
+         * only after CASing head field, so uses relaxed write.
+         */
+        final void forgetNext() {
+            UNSAFE.putObject(this, nextOffset, this);
+        }
+
+        /**
+         * Sets item to self and waiter to null, to avoid garbage
+         * retention after matching or cancelling. Uses relaxed writes
+         * because order is already constrained in the only calling
+         * contexts: item is forgotten only after volatile/atomic
+         * mechanics that extract items.  Similarly, clearing waiter
+         * follows either CAS or return from park (if ever parked;
+         * else we don't care).
+         */
+        final void forgetContents() {
+            UNSAFE.putObject(this, itemOffset, this);
+            UNSAFE.putObject(this, waiterOffset, null);
+        }
+
+        /**
+         * Returns true if this node has been matched, including the
+         * case of artificial matches due to cancellation.
+         */
+        final boolean isMatched() {
+            Object x = item;
+            return (x == this) || ((x == null) == isData);
+        }
+
+        /**
+         * Returns true if this is an unmatched request node.
+         */
+        final boolean isUnmatchedRequest() {
+            return !isData && item == null;
+        }
+
+        /**
+         * Returns true if a node with the given mode cannot be
+         * appended to this node because this node is unmatched and
+         * has opposite data mode.
+         */
+        final boolean cannotPrecede(boolean haveData) {
+            boolean d = isData;
+            Object x;
+            return d != haveData && (x = item) != this && (x != null) == d;
+        }
+
+        /**
+         * Tries to artificially match a data node -- used by remove.
+         */
+        final boolean tryMatchData() {
+            // assert isData;
+            Object x = item;
+            if (x != null && x != this && casItem(x, null)) {
+                LockSupport.unpark(waiter);
+                return true;
+            }
+            return false;
+        }
+
+        private static final long serialVersionUID = -3375979862319811754L;
+
+        // Unsafe mechanics
+        private static final sun.misc.Unsafe UNSAFE;
+        private static final long itemOffset;
+        private static final long nextOffset;
+        private static final long waiterOffset;
+        static {
+            try {
+                UNSAFE = getUnsafe();
+                Class<?> k = Node.class;
+                itemOffset = UNSAFE.objectFieldOffset
+                    (k.getDeclaredField("item"));
+                nextOffset = UNSAFE.objectFieldOffset
+                    (k.getDeclaredField("next"));
+                waiterOffset = UNSAFE.objectFieldOffset
+                    (k.getDeclaredField("waiter"));
+            } catch (Exception e) {
+                throw new Error(e);
+            }
+        }
+    }
+
+    /** head of the queue; null until first enqueue */
+    transient volatile Node head;
+
+    /** tail of the queue; null until first append */
+    private transient volatile Node tail;
+
+    /** The number of apparent failures to unsplice removed nodes */
+    private transient volatile int sweepVotes;
+
+    // CAS methods for fields
+    private boolean casTail(Node cmp, Node val) {
+        return UNSAFE.compareAndSwapObject(this, tailOffset, cmp, val);
+    }
+
+    private boolean casHead(Node cmp, Node val) {
+        return UNSAFE.compareAndSwapObject(this, headOffset, cmp, val);
+    }
+
+    private boolean casSweepVotes(int cmp, int val) {
+        return UNSAFE.compareAndSwapInt(this, sweepVotesOffset, cmp, val);
+    }
+
+    /*
+     * Possible values for "how" argument in xfer method.
+     */
+    private static final int NOW   = 0; // for untimed poll, tryTransfer
+    private static final int ASYNC = 1; // for offer, put, add
+    private static final int SYNC  = 2; // for transfer, take
+    private static final int TIMED = 3; // for timed poll, tryTransfer
+
+    @SuppressWarnings("unchecked")
+    static <E> E cast(Object item) {
+        // assert item == null || item.getClass() != Node.class;
+        return (E) item;
+    }
+
+    /**
+     * Implements all queuing methods. See above for explanation.
+     *
+     * @param e the item or null for take
+     * @param haveData true if this is a put, else a take
+     * @param how NOW, ASYNC, SYNC, or TIMED
+     * @param nanos timeout in nanosecs, used only if mode is TIMED
+     * @return an item if matched, else e
+     * @throws NullPointerException if haveData mode but e is null
+     */
+    private E xfer(E e, boolean haveData, int how, long nanos) {
+        if (haveData && (e == null))
+            throw new NullPointerException();
+        Node s = null;                        // the node to append, if needed
+
+        retry:
+        for (;;) {                            // restart on append race
+
+            for (Node h = head, p = h; p != null;) { // find & match first node
+                boolean isData = p.isData;
+                Object item = p.item;
+                if (item != p && (item != null) == isData) { // unmatched
+                    if (isData == haveData)   // can't match
+                        break;
+                    if (p.casItem(item, e)) { // match
+                        for (Node q = p; q != h;) {
+                            Node n = q.next;  // update by 2 unless singleton
+                            if (head == h && casHead(h, n == null ? q : n)) {
+                                h.forgetNext();
+                                break;
+                            }                 // advance and retry
+                            if ((h = head)   == null ||
+                                (q = h.next) == null || !q.isMatched())
+                                break;        // unless slack < 2
+                        }
+                        LockSupport.unpark(p.waiter);
+                        return LinkedTransferQueue.<E>cast(item);
+                    }
+                }
+                Node n = p.next;
+                p = (p != n) ? n : (h = head); // Use head if p offlist
+            }
+
+            if (how != NOW) {                 // No matches available
+                if (s == null)
+                    s = new Node(e, haveData);
+                Node pred = tryAppend(s, haveData);
+                if (pred == null)
+                    continue retry;           // lost race vs opposite mode
+                if (how != ASYNC)
+                    return awaitMatch(s, pred, e, (how == TIMED), nanos);
+            }
+            return e; // not waiting
+        }
+    }
+
+    /**
+     * Tries to append node s as tail.
+     *
+     * @param s the node to append
+     * @param haveData true if appending in data mode
+     * @return null on failure due to losing race with append in
+     * different mode, else s's predecessor, or s itself if no
+     * predecessor
+     */
+    private Node tryAppend(Node s, boolean haveData) {
+        for (Node t = tail, p = t;;) {        // move p to last node and append
+            Node n, u;                        // temps for reads of next & tail
+            if (p == null && (p = head) == null) {
+                if (casHead(null, s))
+                    return s;                 // initialize
+            }
+            else if (p.cannotPrecede(haveData))
+                return null;                  // lost race vs opposite mode
+            else if ((n = p.next) != null)    // not last; keep traversing
+                p = p != t && t != (u = tail) ? (t = u) : // stale tail
+                    (p != n) ? n : null;      // restart if off list
+            else if (!p.casNext(null, s))
+                p = p.next;                   // re-read on CAS failure
+            else {
+                if (p != t) {                 // update if slack now >= 2
+                    while ((tail != t || !casTail(t, s)) &&
+                           (t = tail)   != null &&
+                           (s = t.next) != null && // advance and retry
+                           (s = s.next) != null && s != t);
+                }
+                return p;
+            }
+        }
+    }
+
+    /**
+     * Spins/yields/blocks until node s is matched or caller gives up.
+     *
+     * @param s the waiting node
+     * @param pred the predecessor of s, or s itself if it has no
+     * predecessor, or null if unknown (the null case does not occur
+     * in any current calls but may in possible future extensions)
+     * @param e the comparison value for checking match
+     * @param timed if true, wait only until timeout elapses
+     * @param nanos timeout in nanosecs, used only if timed is true
+     * @return matched item, or e if unmatched on interrupt or timeout
+     */
+    private E awaitMatch(Node s, Node pred, E e, boolean timed, long nanos) {
+        long lastTime = timed ? System.nanoTime() : 0L;
+        Thread w = Thread.currentThread();
+        int spins = -1; // initialized after first item and cancel checks
+        ThreadLocalRandom randomYields = null; // bound if needed
+
+        for (;;) {
+            Object item = s.item;
+            if (item != e) {                  // matched
+                // assert item != s;
+                s.forgetContents();           // avoid garbage
+                return LinkedTransferQueue.<E>cast(item);
+            }
+            if ((w.isInterrupted() || (timed && nanos <= 0)) &&
+                    s.casItem(e, s)) {        // cancel
+                unsplice(pred, s);
+                return e;
+            }
+
+            if (spins < 0) {                  // establish spins at/near front
+                if ((spins = spinsFor(pred, s.isData)) > 0)
+                    randomYields = ThreadLocalRandom.current();
+            }
+            else if (spins > 0) {             // spin
+                --spins;
+                if (randomYields.nextInt(CHAINED_SPINS) == 0)
+                    Thread.yield();           // occasionally yield
+            }
+            else if (s.waiter == null) {
+                s.waiter = w;                 // request unpark then recheck
+            }
+            else if (timed) {
+                long now = System.nanoTime();
+                if ((nanos -= now - lastTime) > 0)
+                    LockSupport.parkNanos(this, nanos);
+                lastTime = now;
+            }
+            else {
+                LockSupport.park(this);
+            }
+        }
+    }
+
+    /**
+     * Returns spin/yield value for a node with given predecessor and
+     * data mode. See above for explanation.
+     */
+    private static int spinsFor(Node pred, boolean haveData) {
+        if (MP && pred != null) {
+            if (pred.isData != haveData)      // phase change
+                return FRONT_SPINS + CHAINED_SPINS;
+            if (pred.isMatched())             // probably at front
+                return FRONT_SPINS;
+            if (pred.waiter == null)          // pred apparently spinning
+                return CHAINED_SPINS;
+        }
+        return 0;
+    }
+
+    /* -------------- Traversal methods -------------- */
+
+    /**
+     * Returns the successor of p, or the head node if p.next has been
+     * linked to self, which will only be true if traversing with a
+     * stale pointer that is now off the list.
+     */
+    final Node succ(Node p) {
+        Node next = p.next;
+        return (p == next) ? head : next;
+    }
+
+    /**
+     * Returns the first unmatched node of the given mode, or null if
+     * none.  Used by methods isEmpty, hasWaitingConsumer.
+     */
+    private Node firstOfMode(boolean isData) {
+        for (Node p = head; p != null; p = succ(p)) {
+            if (!p.isMatched())
+                return (p.isData == isData) ? p : null;
+        }
+        return null;
+    }
+
+    /**
+     * Returns the item in the first unmatched node with isData; or
+     * null if none.  Used by peek.
+     */
+    private E firstDataItem() {
+        for (Node p = head; p != null; p = succ(p)) {
+            Object item = p.item;
+            if (p.isData) {
+                if (item != null && item != p)
+                    return LinkedTransferQueue.<E>cast(item);
+            }
+            else if (item == null)
+                return null;
+        }
+        return null;
+    }
+
+    /**
+     * Traverses and counts unmatched nodes of the given mode.
+     * Used by methods size and getWaitingConsumerCount.
+     */
+    private int countOfMode(boolean data) {
+        int count = 0;
+        for (Node p = head; p != null; ) {
+            if (!p.isMatched()) {
+                if (p.isData != data)
+                    return 0;
+                if (++count == Integer.MAX_VALUE) // saturated
+                    break;
+            }
+            Node n = p.next;
+            if (n != p)
+                p = n;
+            else {
+                count = 0;
+                p = head;
+            }
+        }
+        return count;
+    }
+
+    final class Itr implements Iterator<E> {
+        private Node nextNode;   // next node to return item for
+        private E nextItem;      // the corresponding item
+        private Node lastRet;    // last returned node, to support remove
+        private Node lastPred;   // predecessor to unlink lastRet
+
+        /**
+         * Moves to next node after prev, or first node if prev null.
+         */
+        private void advance(Node prev) {
+            /*
+             * To track and avoid buildup of deleted nodes in the face
+             * of calls to both Queue.remove and Itr.remove, we must
+             * include variants of unsplice and sweep upon each
+             * advance: Upon Itr.remove, we may need to catch up links
+             * from lastPred, and upon other removes, we might need to
+             * skip ahead from stale nodes and unsplice deleted ones
+             * found while advancing.
+             */
+
+            Node r, b; // reset lastPred upon possible deletion of lastRet
+            if ((r = lastRet) != null && !r.isMatched())
+                lastPred = r;    // next lastPred is old lastRet
+            else if ((b = lastPred) == null || b.isMatched())
+                lastPred = null; // at start of list
+            else {
+                Node s, n;       // help with removal of lastPred.next
+                while ((s = b.next) != null &&
+                       s != b && s.isMatched() &&
+                       (n = s.next) != null && n != s)
+                    b.casNext(s, n);
+            }
+
+            this.lastRet = prev;
+
+            for (Node p = prev, s, n;;) {
+                s = (p == null) ? head : p.next;
+                if (s == null)
+                    break;
+                else if (s == p) {
+                    p = null;
+                    continue;
+                }
+                Object item = s.item;
+                if (s.isData) {
+                    if (item != null && item != s) {
+                        nextItem = LinkedTransferQueue.<E>cast(item);
+                        nextNode = s;
+                        return;
+                    }
+                }
+                else if (item == null)
+                    break;
+                // assert s.isMatched();
+                if (p == null)
+                    p = s;
+                else if ((n = s.next) == null)
+                    break;
+                else if (s == n)
+                    p = null;
+                else
+                    p.casNext(s, n);
+            }
+            nextNode = null;
+            nextItem = null;
+        }
+
+        Itr() {
+            advance(null);
+        }
+
+        public final boolean hasNext() {
+            return nextNode != null;
+        }
+
+        public final E next() {
+            Node p = nextNode;
+            if (p == null) throw new NoSuchElementException();
+            E e = nextItem;
+            advance(p);
+            return e;
+        }
+
+        public final void remove() {
+            final Node lastRet = this.lastRet;
+            if (lastRet == null)
+                throw new IllegalStateException();
+            this.lastRet = null;
+            if (lastRet.tryMatchData())
+                unsplice(lastPred, lastRet);
+        }
+    }
+
+    /* -------------- Removal methods -------------- */
+
+    /**
+     * Unsplices (now or later) the given deleted/cancelled node with
+     * the given predecessor.
+     *
+     * @param pred a node that was at one time known to be the
+     * predecessor of s, or null or s itself if s is/was at head
+     * @param s the node to be unspliced
+     */
+    final void unsplice(Node pred, Node s) {
+        s.forgetContents(); // forget unneeded fields
+        /*
+         * See above for rationale. Briefly: if pred still points to
+         * s, try to unlink s.  If s cannot be unlinked, because it is
+         * trailing node or pred might be unlinked, and neither pred
+         * nor s are head or offlist, add to sweepVotes, and if enough
+         * votes have accumulated, sweep.
+         */
+        if (pred != null && pred != s && pred.next == s) {
+            Node n = s.next;
+            if (n == null ||
+                (n != s && pred.casNext(s, n) && pred.isMatched())) {
+                for (;;) {               // check if at, or could be, head
+                    Node h = head;
+                    if (h == pred || h == s || h == null)
+                        return;          // at head or list empty
+                    if (!h.isMatched())
+                        break;
+                    Node hn = h.next;
+                    if (hn == null)
+                        return;          // now empty
+                    if (hn != h && casHead(h, hn))
+                        h.forgetNext();  // advance head
+                }
+                if (pred.next != pred && s.next != s) { // recheck if offlist
+                    for (;;) {           // sweep now if enough votes
+                        int v = sweepVotes;
+                        if (v < SWEEP_THRESHOLD) {
+                            if (casSweepVotes(v, v + 1))
+                                break;
+                        }
+                        else if (casSweepVotes(v, 0)) {
+                            sweep();
+                            break;
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    /**
+     * Unlinks matched (typically cancelled) nodes encountered in a
+     * traversal from head.
+     */
+    private void sweep() {
+        for (Node p = head, s, n; p != null && (s = p.next) != null; ) {
+            if (!s.isMatched())
+                // Unmatched nodes are never self-linked
+                p = s;
+            else if ((n = s.next) == null) // trailing node is pinned
+                break;
+            else if (s == n)    // stale
+                // No need to also check for p == s, since that implies s == n
+                p = head;
+            else
+                p.casNext(s, n);
+        }
+    }
+
+    /**
+     * Main implementation of remove(Object)
+     */
+    private boolean findAndRemove(Object e) {
+        if (e != null) {
+            for (Node pred = null, p = head; p != null; ) {
+                Object item = p.item;
+                if (p.isData) {
+                    if (item != null && item != p && e.equals(item) &&
+                        p.tryMatchData()) {
+                        unsplice(pred, p);
+                        return true;
+                    }
+                }
+                else if (item == null)
+                    break;
+                pred = p;
+                if ((p = p.next) == pred) { // stale
+                    pred = null;
+                    p = head;
+                }
+            }
+        }
+        return false;
+    }
+
+
+    /**
+     * Creates an initially empty {@code LinkedTransferQueue}.
+     */
+    public LinkedTransferQueue() {
+    }
+
+    /**
+     * Creates a {@code LinkedTransferQueue}
+     * initially containing the elements of the given collection,
+     * added in traversal order of the collection's iterator.
+     *
+     * @param c the collection of elements to initially contain
+     * @throws NullPointerException if the specified collection or any
+     *         of its elements are null
+     */
+    public LinkedTransferQueue(Collection<? extends E> c) {
+        this();
+        addAll(c);
+    }
+
+    /**
+     * Inserts the specified element at the tail of this queue.
+     * As the queue is unbounded, this method will never block.
+     *
+     * @throws NullPointerException if the specified element is null
+     */
+    public void put(E e) {
+        xfer(e, true, ASYNC, 0);
+    }
+
+    /**
+     * Inserts the specified element at the tail of this queue.
+     * As the queue is unbounded, this method will never block or
+     * return {@code false}.
+     *
+     * @return {@code true} (as specified by
+     *  {@link java.util.concurrent.BlockingQueue#offer(Object,long,TimeUnit)
+     *  BlockingQueue.offer})
+     * @throws NullPointerException if the specified element is null
+     */
+    public boolean offer(E e, long timeout, TimeUnit unit) {
+        xfer(e, true, ASYNC, 0);
+        return true;
+    }
+
+    /**
+     * Inserts the specified element at the tail of this queue.
+     * As the queue is unbounded, this method will never return {@code false}.
+     *
+     * @return {@code true} (as specified by {@link Queue#offer})
+     * @throws NullPointerException if the specified element is null
+     */
+    public boolean offer(E e) {
+        xfer(e, true, ASYNC, 0);
+        return true;
+    }
+
+    /**
+     * Inserts the specified element at the tail of this queue.
+     * As the queue is unbounded, this method will never throw
+     * {@link IllegalStateException} or return {@code false}.
+     *
+     * @return {@code true} (as specified by {@link Collection#add})
+     * @throws NullPointerException if the specified element is null
+     */
+    public boolean add(E e) {
+        xfer(e, true, ASYNC, 0);
+        return true;
+    }
+
+    /**
+     * Transfers the element to a waiting consumer immediately, if possible.
+     *
+     * <p>More precisely, transfers the specified element immediately
+     * if there exists a consumer already waiting to receive it (in
+     * {@link #take} or timed {@link #poll(long,TimeUnit) poll}),
+     * otherwise returning {@code false} without enqueuing the element.
+     *
+     * @throws NullPointerException if the specified element is null
+     */
+    public boolean tryTransfer(E e) {
+        return xfer(e, true, NOW, 0) == null;
+    }
+
+    /**
+     * Transfers the element to a consumer, waiting if necessary to do so.
+     *
+     * <p>More precisely, transfers the specified element immediately
+     * if there exists a consumer already waiting to receive it (in
+     * {@link #take} or timed {@link #poll(long,TimeUnit) poll}),
+     * else inserts the specified element at the tail of this queue
+     * and waits until the element is received by a consumer.
+     *
+     * @throws NullPointerException if the specified element is null
+     */
+    public void transfer(E e) throws InterruptedException {
+        if (xfer(e, true, SYNC, 0) != null) {
+            Thread.interrupted(); // failure possible only due to interrupt
+            throw new InterruptedException();
+        }
+    }
+
+    /**
+     * Transfers the element to a consumer if it is possible to do so
+     * before the timeout elapses.
+     *
+     * <p>More precisely, transfers the specified element immediately
+     * if there exists a consumer already waiting to receive it (in
+     * {@link #take} or timed {@link #poll(long,TimeUnit) poll}),
+     * else inserts the specified element at the tail of this queue
+     * and waits until the element is received by a consumer,
+     * returning {@code false} if the specified wait time elapses
+     * before the element can be transferred.
+     *
+     * @throws NullPointerException if the specified element is null
+     */
+    public boolean tryTransfer(E e, long timeout, TimeUnit unit)
+        throws InterruptedException {
+        if (xfer(e, true, TIMED, unit.toNanos(timeout)) == null)
+            return true;
+        if (!Thread.interrupted())
+            return false;
+        throw new InterruptedException();
+    }
+
+    public E take() throws InterruptedException {
+        E e = xfer(null, false, SYNC, 0);
+        if (e != null)
+            return e;
+        Thread.interrupted();
+        throw new InterruptedException();
+    }
+
+    public E poll(long timeout, TimeUnit unit) throws InterruptedException {
+        E e = xfer(null, false, TIMED, unit.toNanos(timeout));
+        if (e != null || !Thread.interrupted())
+            return e;
+        throw new InterruptedException();
+    }
+
+    public E poll() {
+        return xfer(null, false, NOW, 0);
+    }
+
+    /**
+     * @throws NullPointerException     {@inheritDoc}
+     * @throws IllegalArgumentException {@inheritDoc}
+     */
+    public int drainTo(Collection<? super E> c) {
+        if (c == null)
+            throw new NullPointerException();
+        if (c == this)
+            throw new IllegalArgumentException();
+        int n = 0;
+        for (E e; (e = poll()) != null;) {
+            c.add(e);
+            ++n;
+        }
+        return n;
+    }
+
+    /**
+     * @throws NullPointerException     {@inheritDoc}
+     * @throws IllegalArgumentException {@inheritDoc}
+     */
+    public int drainTo(Collection<? super E> c, int maxElements) {
+        if (c == null)
+            throw new NullPointerException();
+        if (c == this)
+            throw new IllegalArgumentException();
+        int n = 0;
+        for (E e; n < maxElements && (e = poll()) != null;) {
+            c.add(e);
+            ++n;
+        }
+        return n;
+    }
+
+    /**
+     * Returns an iterator over the elements in this queue in proper sequence.
+     * The elements will be returned in order from first (head) to last (tail).
+     *
+     * <p>The returned iterator is a "weakly consistent" iterator that
+     * will never throw {@link java.util.ConcurrentModificationException
+     * ConcurrentModificationException}, and guarantees to traverse
+     * elements as they existed upon construction of the iterator, and
+     * may (but is not guaranteed to) reflect any modifications
+     * subsequent to construction.
+     *
+     * @return an iterator over the elements in this queue in proper sequence
+     */
+    public Iterator<E> iterator() {
+        return new Itr();
+    }
+
+    public E peek() {
+        return firstDataItem();
+    }
+
+    /**
+     * Returns {@code true} if this queue contains no elements.
+     *
+     * @return {@code true} if this queue contains no elements
+     */
+    public boolean isEmpty() {
+        for (Node p = head; p != null; p = succ(p)) {
+            if (!p.isMatched())
+                return !p.isData;
+        }
+        return true;
+    }
+
+    public boolean hasWaitingConsumer() {
+        return firstOfMode(false) != null;
+    }
+
+    /**
+     * Returns the number of elements in this queue.  If this queue
+     * contains more than {@code Integer.MAX_VALUE} elements, returns
+     * {@code Integer.MAX_VALUE}.
+     *
+     * <p>Beware that, unlike in most collections, this method is
+     * <em>NOT</em> a constant-time operation. Because of the
+     * asynchronous nature of these queues, determining the current
+     * number of elements requires an O(n) traversal.
+     *
+     * @return the number of elements in this queue
+     */
+    public int size() {
+        return countOfMode(true);
+    }
+
+    public int getWaitingConsumerCount() {
+        return countOfMode(false);
+    }
+
+    /**
+     * Removes a single instance of the specified element from this queue,
+     * if it is present.  More formally, removes an element {@code e} such
+     * that {@code o.equals(e)}, if this queue contains one or more such
+     * elements.
+     * Returns {@code true} if this queue contained the specified element
+     * (or equivalently, if this queue changed as a result of the call).
+     *
+     * @param o element to be removed from this queue, if present
+     * @return {@code true} if this queue changed as a result of the call
+     */
+    public boolean remove(Object o) {
+        return findAndRemove(o);
+    }
+
+    /**
+     * Returns {@code true} if this queue contains the specified element.
+     * More formally, returns {@code true} if and only if this queue contains
+     * at least one element {@code e} such that {@code o.equals(e)}.
+     *
+     * @param o object to be checked for containment in this queue
+     * @return {@code true} if this queue contains the specified element
+     */
+    public boolean contains(Object o) {
+        if (o == null) return false;
+        for (Node p = head; p != null; p = succ(p)) {
+            Object item = p.item;
+            if (p.isData) {
+                if (item != null && item != p && o.equals(item))
+                    return true;
+            }
+            else if (item == null)
+                break;
+        }
+        return false;
+    }
+
+    /**
+     * Always returns {@code Integer.MAX_VALUE} because a
+     * {@code LinkedTransferQueue} is not capacity constrained.
+     *
+     * @return {@code Integer.MAX_VALUE} (as specified by
+     *         {@link java.util.concurrent.BlockingQueue#remainingCapacity()
+     *         BlockingQueue.remainingCapacity})
+     */
+    public int remainingCapacity() {
+        return Integer.MAX_VALUE;
+    }
+
+    /**
+     * Saves the state to a stream (that is, serializes it).
+     *
+     * @serialData All of the elements (each an {@code E}) in
+     * the proper order, followed by a null
+     * @param s the stream
+     */
+    private void writeObject(java.io.ObjectOutputStream s)
+        throws java.io.IOException {
+        s.defaultWriteObject();
+        for (E e : this)
+            s.writeObject(e);
+        // Use trailing null as sentinel
+        s.writeObject(null);
+    }
+
+    /**
+     * Reconstitutes the Queue instance from a stream (that is,
+     * deserializes it).
+     *
+     * @param s the stream
+     */
+    private void readObject(java.io.ObjectInputStream s)
+        throws java.io.IOException, ClassNotFoundException {
+        s.defaultReadObject();
+        for (;;) {
+            @SuppressWarnings("unchecked")
+            E item = (E) s.readObject();
+            if (item == null)
+                break;
+            else
+                offer(item);
+        }
+    }
+
+    // Unsafe mechanics
+
+    private static final sun.misc.Unsafe UNSAFE;
+    private static final long headOffset;
+    private static final long tailOffset;
+    private static final long sweepVotesOffset;
+    static {
+        try {
+            UNSAFE = getUnsafe();
+            Class<?> k = LinkedTransferQueue.class;
+            headOffset = UNSAFE.objectFieldOffset
+                (k.getDeclaredField("head"));
+            tailOffset = UNSAFE.objectFieldOffset
+                (k.getDeclaredField("tail"));
+            sweepVotesOffset = UNSAFE.objectFieldOffset
+                (k.getDeclaredField("sweepVotes"));
+        } catch (Exception e) {
+            throw new Error(e);
+        }
+    }
+
+    /**
+     * Returns a sun.misc.Unsafe.  Suitable for use in a 3rd party package.
+     * Replace with a simple call to Unsafe.getUnsafe when integrating
+     * into a jdk.
+     *
+     * @return a sun.misc.Unsafe
+     */
+    static sun.misc.Unsafe getUnsafe() {
+        try {
+            return sun.misc.Unsafe.getUnsafe();
+        } catch (SecurityException se) {
+            try {
+                return java.security.AccessController.doPrivileged
+                    (new java.security
+                     .PrivilegedExceptionAction<sun.misc.Unsafe>() {
+                        public sun.misc.Unsafe run() throws Exception {
+                            java.lang.reflect.Field f = sun.misc
+                                .Unsafe.class.getDeclaredField("theUnsafe");
+                            f.setAccessible(true);
+                            return (sun.misc.Unsafe) f.get(null);
+                        }});
+            } catch (java.security.PrivilegedActionException e) {
+                throw new RuntimeException("Could not initialize intrinsics",
+                                           e.getCause());
+            }
+        }
+    }
+
+}
diff --git a/src/main/java/jsr166y/Phaser.java b/src/main/java/jsr166y/Phaser.java
new file mode 100644
index 00000000000..b348286ca32
--- /dev/null
+++ b/src/main/java/jsr166y/Phaser.java
@@ -0,0 +1,1162 @@
+/*
+ * Written by Doug Lea with assistance from members of JCP JSR-166
+ * Expert Group and released to the public domain, as explained at
+ * http://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+package jsr166y;
+
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.concurrent.locks.LockSupport;
+
+/**
+ * A reusable synchronization barrier, similar in functionality to
+ * {@link java.util.concurrent.CyclicBarrier CyclicBarrier} and
+ * {@link java.util.concurrent.CountDownLatch CountDownLatch}
+ * but supporting more flexible usage.
+ *
+ * <p> <b>Registration.</b> Unlike the case for other barriers, the
+ * number of parties <em>registered</em> to synchronize on a phaser
+ * may vary over time.  Tasks may be registered at any time (using
+ * methods {@link #register}, {@link #bulkRegister}, or forms of
+ * constructors establishing initial numbers of parties), and
+ * optionally deregistered upon any arrival (using {@link
+ * #arriveAndDeregister}).  As is the case with most basic
+ * synchronization constructs, registration and deregistration affect
+ * only internal counts; they do not establish any further internal
+ * bookkeeping, so tasks cannot query whether they are registered.
+ * (However, you can introduce such bookkeeping by subclassing this
+ * class.)
+ *
+ * <p> <b>Synchronization.</b> Like a {@code CyclicBarrier}, a {@code
+ * Phaser} may be repeatedly awaited.  Method {@link
+ * #arriveAndAwaitAdvance} has effect analogous to {@link
+ * java.util.concurrent.CyclicBarrier#await CyclicBarrier.await}. Each
+ * generation of a phaser has an associated phase number. The phase
+ * number starts at zero, and advances when all parties arrive at the
+ * phaser, wrapping around to zero after reaching {@code
+ * Integer.MAX_VALUE}. The use of phase numbers enables independent
+ * control of actions upon arrival at a phaser and upon awaiting
+ * others, via two kinds of methods that may be invoked by any
+ * registered party:
+ *
+ * <ul>
+ *
+ *   <li> <b>Arrival.</b> Methods {@link #arrive} and
+ *       {@link #arriveAndDeregister} record arrival.  These methods
+ *       do not block, but return an associated <em>arrival phase
+ *       number</em>; that is, the phase number of the phaser to which
+ *       the arrival applied. When the final party for a given phase
+ *       arrives, an optional action is performed and the phase
+ *       advances.  These actions are performed by the party
+ *       triggering a phase advance, and are arranged by overriding
+ *       method {@link #onAdvance(int, int)}, which also controls
+ *       termination. Overriding this method is similar to, but more
+ *       flexible than, providing a barrier action to a {@code
+ *       CyclicBarrier}.
+ *
+ *   <li> <b>Waiting.</b> Method {@link #awaitAdvance} requires an
+ *       argument indicating an arrival phase number, and returns when
+ *       the phaser advances to (or is already at) a different phase.
+ *       Unlike similar constructions using {@code CyclicBarrier},
+ *       method {@code awaitAdvance} continues to wait even if the
+ *       waiting thread is interrupted. Interruptible and timeout
+ *       versions are also available, but exceptions encountered while
+ *       tasks wait interruptibly or with timeout do not change the
+ *       state of the phaser. If necessary, you can perform any
+ *       associated recovery within handlers of those exceptions,
+ *       often after invoking {@code forceTermination}.  Phasers may
+ *       also be used by tasks executing in a {@link ForkJoinPool},
+ *       which will ensure sufficient parallelism to execute tasks
+ *       when others are blocked waiting for a phase to advance.
+ *
+ * </ul>
+ *
+ * <p> <b>Termination.</b> A phaser may enter a <em>termination</em>
+ * state, that may be checked using method {@link #isTerminated}. Upon
+ * termination, all synchronization methods immediately return without
+ * waiting for advance, as indicated by a negative return value.
+ * Similarly, attempts to register upon termination have no effect.
+ * Termination is triggered when an invocation of {@code onAdvance}
+ * returns {@code true}. The default implementation returns {@code
+ * true} if a deregistration has caused the number of registered
+ * parties to become zero.  As illustrated below, when phasers control
+ * actions with a fixed number of iterations, it is often convenient
+ * to override this method to cause termination when the current phase
+ * number reaches a threshold. Method {@link #forceTermination} is
+ * also available to abruptly release waiting threads and allow them
+ * to terminate.
+ *
+ * <p> <b>Tiering.</b> Phasers may be <em>tiered</em> (i.e.,
+ * constructed in tree structures) to reduce contention. Phasers with
+ * large numbers of parties that would otherwise experience heavy
+ * synchronization contention costs may instead be set up so that
+ * groups of sub-phasers share a common parent.  This may greatly
+ * increase throughput even though it incurs greater per-operation
+ * overhead.
+ *
+ * <p>In a tree of tiered phasers, registration and deregistration of
+ * child phasers with their parent are managed automatically.
+ * Whenever the number of registered parties of a child phaser becomes
+ * non-zero (as established in the {@link #Phaser(Phaser,int)}
+ * constructor, {@link #register}, or {@link #bulkRegister}), the
+ * child phaser is registered with its parent.  Whenever the number of
+ * registered parties becomes zero as the result of an invocation of
+ * {@link #arriveAndDeregister}, the child phaser is deregistered
+ * from its parent.
+ *
+ * <p><b>Monitoring.</b> While synchronization methods may be invoked
+ * only by registered parties, the current state of a phaser may be
+ * monitored by any caller.  At any given moment there are {@link
+ * #getRegisteredParties} parties in total, of which {@link
+ * #getArrivedParties} have arrived at the current phase ({@link
+ * #getPhase}).  When the remaining ({@link #getUnarrivedParties})
+ * parties arrive, the phase advances.  The values returned by these
+ * methods may reflect transient states and so are not in general
+ * useful for synchronization control.  Method {@link #toString}
+ * returns snapshots of these state queries in a form convenient for
+ * informal monitoring.
+ *
+ * <p><b>Sample usages:</b>
+ *
+ * <p>A {@code Phaser} may be used instead of a {@code CountDownLatch}
+ * to control a one-shot action serving a variable number of parties.
+ * The typical idiom is for the method setting this up to first
+ * register, then start the actions, then deregister, as in:
+ *
+ *  <pre> {@code
+ * void runTasks(List<Runnable> tasks) {
+ *   final Phaser phaser = new Phaser(1); // "1" to register self
+ *   // create and start threads
+ *   for (final Runnable task : tasks) {
+ *     phaser.register();
+ *     new Thread() {
+ *       public void run() {
+ *         phaser.arriveAndAwaitAdvance(); // await all creation
+ *         task.run();
+ *       }
+ *     }.start();
+ *   }
+ *
+ *   // allow threads to start and deregister self
+ *   phaser.arriveAndDeregister();
+ * }}</pre>
+ *
+ * <p>One way to cause a set of threads to repeatedly perform actions
+ * for a given number of iterations is to override {@code onAdvance}:
+ *
+ *  <pre> {@code
+ * void startTasks(List<Runnable> tasks, final int iterations) {
+ *   final Phaser phaser = new Phaser() {
+ *     protected boolean onAdvance(int phase, int registeredParties) {
+ *       return phase >= iterations || registeredParties == 0;
+ *     }
+ *   };
+ *   phaser.register();
+ *   for (final Runnable task : tasks) {
+ *     phaser.register();
+ *     new Thread() {
+ *       public void run() {
+ *         do {
+ *           task.run();
+ *           phaser.arriveAndAwaitAdvance();
+ *         } while (!phaser.isTerminated());
+ *       }
+ *     }.start();
+ *   }
+ *   phaser.arriveAndDeregister(); // deregister self, don't wait
+ * }}</pre>
+ *
+ * If the main task must later await termination, it
+ * may re-register and then execute a similar loop:
+ *  <pre> {@code
+ *   // ...
+ *   phaser.register();
+ *   while (!phaser.isTerminated())
+ *     phaser.arriveAndAwaitAdvance();}</pre>
+ *
+ * <p>Related constructions may be used to await particular phase numbers
+ * in contexts where you are sure that the phase will never wrap around
+ * {@code Integer.MAX_VALUE}. For example:
+ *
+ *  <pre> {@code
+ * void awaitPhase(Phaser phaser, int phase) {
+ *   int p = phaser.register(); // assumes caller not already registered
+ *   while (p < phase) {
+ *     if (phaser.isTerminated())
+ *       // ... deal with unexpected termination
+ *     else
+ *       p = phaser.arriveAndAwaitAdvance();
+ *   }
+ *   phaser.arriveAndDeregister();
+ * }}</pre>
+ *
+ *
+ * <p>To create a set of {@code n} tasks using a tree of phasers, you
+ * could use code of the following form, assuming a Task class with a
+ * constructor accepting a {@code Phaser} that it registers with upon
+ * construction. After invocation of {@code build(new Task[n], 0, n,
+ * new Phaser())}, these tasks could then be started, for example by
+ * submitting to a pool:
+ *
+ *  <pre> {@code
+ * void build(Task[] tasks, int lo, int hi, Phaser ph) {
+ *   if (hi - lo > TASKS_PER_PHASER) {
+ *     for (int i = lo; i < hi; i += TASKS_PER_PHASER) {
+ *       int j = Math.min(i + TASKS_PER_PHASER, hi);
+ *       build(tasks, i, j, new Phaser(ph));
+ *     }
+ *   } else {
+ *     for (int i = lo; i < hi; ++i)
+ *       tasks[i] = new Task(ph);
+ *       // assumes new Task(ph) performs ph.register()
+ *   }
+ * }}</pre>
+ *
+ * The best value of {@code TASKS_PER_PHASER} depends mainly on
+ * expected synchronization rates. A value as low as four may
+ * be appropriate for extremely small per-phase task bodies (thus
+ * high rates), or up to hundreds for extremely large ones.
+ *
+ * <p><b>Implementation notes</b>: This implementation restricts the
+ * maximum number of parties to 65535. Attempts to register additional
+ * parties result in {@code IllegalStateException}. However, you can and
+ * should create tiered phasers to accommodate arbitrarily large sets
+ * of participants.
+ *
+ * @since 1.7
+ * @author Doug Lea
+ */
+public class Phaser {
+    /*
+     * This class implements an extension of X10 "clocks".  Thanks to
+     * Vijay Saraswat for the idea, and to Vivek Sarkar for
+     * enhancements to extend functionality.
+     */
+
+    /**
+     * Primary state representation, holding four bit-fields:
+     *
+     * unarrived  -- the number of parties yet to hit barrier (bits  0-15)
+     * parties    -- the number of parties to wait            (bits 16-31)
+     * phase      -- the generation of the barrier            (bits 32-62)
+     * terminated -- set if barrier is terminated             (bit  63 / sign)
+     *
+     * Except that a phaser with no registered parties is
+     * distinguished by the otherwise illegal state of having zero
+     * parties and one unarrived parties (encoded as EMPTY below).
+     *
+     * To efficiently maintain atomicity, these values are packed into
+     * a single (atomic) long. Good performance relies on keeping
+     * state decoding and encoding simple, and keeping race windows
+     * short.
+     *
+     * All state updates are performed via CAS except initial
+     * registration of a sub-phaser (i.e., one with a non-null
+     * parent).  In this (relatively rare) case, we use built-in
+     * synchronization to lock while first registering with its
+     * parent.
+     *
+     * The phase of a subphaser is allowed to lag that of its
+     * ancestors until it is actually accessed -- see method
+     * reconcileState.
+     */
+    private volatile long state;
+
+    private static final int  MAX_PARTIES     = 0xffff;
+    private static final int  MAX_PHASE       = Integer.MAX_VALUE;
+    private static final int  PARTIES_SHIFT   = 16;
+    private static final int  PHASE_SHIFT     = 32;
+    private static final int  UNARRIVED_MASK  = 0xffff;      // to mask ints
+    private static final long PARTIES_MASK    = 0xffff0000L; // to mask longs
+    private static final long COUNTS_MASK     = 0xffffffffL;
+    private static final long TERMINATION_BIT = 1L << 63;
+
+    // some special values
+    private static final int  ONE_ARRIVAL     = 1;
+    private static final int  ONE_PARTY       = 1 << PARTIES_SHIFT;
+    private static final int  ONE_DEREGISTER  = ONE_ARRIVAL|ONE_PARTY;
+    private static final int  EMPTY           = 1;
+
+    // The following unpacking methods are usually manually inlined
+
+    private static int unarrivedOf(long s) {
+        int counts = (int)s;
+        return (counts == EMPTY) ? 0 : (counts & UNARRIVED_MASK);
+    }
+
+    private static int partiesOf(long s) {
+        return (int)s >>> PARTIES_SHIFT;
+    }
+
+    private static int phaseOf(long s) {
+        return (int)(s >>> PHASE_SHIFT);
+    }
+
+    private static int arrivedOf(long s) {
+        int counts = (int)s;
+        return (counts == EMPTY) ? 0 :
+            (counts >>> PARTIES_SHIFT) - (counts & UNARRIVED_MASK);
+    }
+
+    /**
+     * The parent of this phaser, or null if none
+     */
+    private final Phaser parent;
+
+    /**
+     * The root of phaser tree. Equals this if not in a tree.
+     */
+    private final Phaser root;
+
+    /**
+     * Heads of Treiber stacks for waiting threads. To eliminate
+     * contention when releasing some threads while adding others, we
+     * use two of them, alternating across even and odd phases.
+     * Subphasers share queues with root to speed up releases.
+     */
+    private final AtomicReference<QNode> evenQ;
+    private final AtomicReference<QNode> oddQ;
+
+    private AtomicReference<QNode> queueFor(int phase) {
+        return ((phase & 1) == 0) ? evenQ : oddQ;
+    }
+
+    /**
+     * Returns message string for bounds exceptions on arrival.
+     */
+    private String badArrive(long s) {
+        return "Attempted arrival of unregistered party for " +
+            stateToString(s);
+    }
+
+    /**
+     * Returns message string for bounds exceptions on registration.
+     */
+    private String badRegister(long s) {
+        return "Attempt to register more than " +
+            MAX_PARTIES + " parties for " + stateToString(s);
+    }
+
+    /**
+     * Main implementation for methods arrive and arriveAndDeregister.
+     * Manually tuned to speed up and minimize race windows for the
+     * common case of just decrementing unarrived field.
+     *
+     * @param adjust value to subtract from state;
+     *               ONE_ARRIVAL for arrive,
+     *               ONE_DEREGISTER for arriveAndDeregister
+     */
+    private int doArrive(int adjust) {
+        final Phaser root = this.root;
+        for (;;) {
+            long s = (root == this) ? state : reconcileState();
+            int phase = (int)(s >>> PHASE_SHIFT);
+            if (phase < 0)
+                return phase;
+            int counts = (int)s;
+            int unarrived = (counts == EMPTY) ? 0 : (counts & UNARRIVED_MASK);
+            if (unarrived <= 0)
+                throw new IllegalStateException(badArrive(s));
+            if (UNSAFE.compareAndSwapLong(this, stateOffset, s, s-=adjust)) {
+                if (unarrived == 1) {
+                    long n = s & PARTIES_MASK;  // base of next state
+                    int nextUnarrived = (int)n >>> PARTIES_SHIFT;
+                    if (root == this) {
+                        if (onAdvance(phase, nextUnarrived))
+                            n |= TERMINATION_BIT;
+                        else if (nextUnarrived == 0)
+                            n |= EMPTY;
+                        else
+                            n |= nextUnarrived;
+                        int nextPhase = (phase + 1) & MAX_PHASE;
+                        n |= (long)nextPhase << PHASE_SHIFT;
+                        UNSAFE.compareAndSwapLong(this, stateOffset, s, n);
+                        releaseWaiters(phase);
+                    }
+                    else if (nextUnarrived == 0) { // propagate deregistration
+                        phase = parent.doArrive(ONE_DEREGISTER);
+                        UNSAFE.compareAndSwapLong(this, stateOffset,
+                                                  s, s | EMPTY);
+                    }
+                    else
+                        phase = parent.doArrive(ONE_ARRIVAL);
+                }
+                return phase;
+            }
+        }
+    }
+
+    /**
+     * Implementation of register, bulkRegister
+     *
+     * @param registrations number to add to both parties and
+     * unarrived fields. Must be greater than zero.
+     */
+    private int doRegister(int registrations) {
+        // adjustment to state
+        long adjust = ((long)registrations << PARTIES_SHIFT) | registrations;
+        final Phaser parent = this.parent;
+        int phase;
+        for (;;) {
+            long s = (parent == null) ? state : reconcileState();
+            int counts = (int)s;
+            int parties = counts >>> PARTIES_SHIFT;
+            int unarrived = counts & UNARRIVED_MASK;
+            if (registrations > MAX_PARTIES - parties)
+                throw new IllegalStateException(badRegister(s));
+            phase = (int)(s >>> PHASE_SHIFT);
+            if (phase < 0)
+                break;
+            if (counts != EMPTY) {                  // not 1st registration
+                if (parent == null || reconcileState() == s) {
+                    if (unarrived == 0)             // wait out advance
+                        root.internalAwaitAdvance(phase, null);
+                    else if (UNSAFE.compareAndSwapLong(this, stateOffset,
+                                                       s, s + adjust))
+                        break;
+                }
+            }
+            else if (parent == null) {              // 1st root registration
+                long next = ((long)phase << PHASE_SHIFT) | adjust;
+                if (UNSAFE.compareAndSwapLong(this, stateOffset, s, next))
+                    break;
+            }
+            else {
+                synchronized (this) {               // 1st sub registration
+                    if (state == s) {               // recheck under lock
+                        phase = parent.doRegister(1);
+                        if (phase < 0)
+                            break;
+                        // finish registration whenever parent registration
+                        // succeeded, even when racing with termination,
+                        // since these are part of the same "transaction".
+                        while (!UNSAFE.compareAndSwapLong
+                               (this, stateOffset, s,
+                                ((long)phase << PHASE_SHIFT) | adjust)) {
+                            s = state;
+                            phase = (int)(root.state >>> PHASE_SHIFT);
+                            // assert (int)s == EMPTY;
+                        }
+                        break;
+                    }
+                }
+            }
+        }
+        return phase;
+    }
+
+    /**
+     * Resolves lagged phase propagation from root if necessary.
+     * Reconciliation normally occurs when root has advanced but
+     * subphasers have not yet done so, in which case they must finish
+     * their own advance by setting unarrived to parties (or if
+     * parties is zero, resetting to unregistered EMPTY state).
+     *
+     * @return reconciled state
+     */
+    private long reconcileState() {
+        final Phaser root = this.root;
+        long s = state;
+        if (root != this) {
+            int phase, p;
+            // CAS to root phase with current parties, tripping unarrived
+            while ((phase = (int)(root.state >>> PHASE_SHIFT)) !=
+                   (int)(s >>> PHASE_SHIFT) &&
+                   !UNSAFE.compareAndSwapLong
+                   (this, stateOffset, s,
+                    s = (((long)phase << PHASE_SHIFT) |
+                         ((phase < 0) ? (s & COUNTS_MASK) :
+                          (((p = (int)s >>> PARTIES_SHIFT) == 0) ? EMPTY :
+                           ((s & PARTIES_MASK) | p))))))
+                s = state;
+        }
+        return s;
+    }
+
+    /**
+     * Creates a new phaser with no initially registered parties, no
+     * parent, and initial phase number 0. Any thread using this
+     * phaser will need to first register for it.
+     */
+    public Phaser() {
+        this(null, 0);
+    }
+
+    /**
+     * Creates a new phaser with the given number of registered
+     * unarrived parties, no parent, and initial phase number 0.
+     *
+     * @param parties the number of parties required to advance to the
+     * next phase
+     * @throws IllegalArgumentException if parties less than zero
+     * or greater than the maximum number of parties supported
+     */
+    public Phaser(int parties) {
+        this(null, parties);
+    }
+
+    /**
+     * Equivalent to {@link #Phaser(Phaser, int) Phaser(parent, 0)}.
+     *
+     * @param parent the parent phaser
+     */
+    public Phaser(Phaser parent) {
+        this(parent, 0);
+    }
+
+    /**
+     * Creates a new phaser with the given parent and number of
+     * registered unarrived parties.  When the given parent is non-null
+     * and the given number of parties is greater than zero, this
+     * child phaser is registered with its parent.
+     *
+     * @param parent the parent phaser
+     * @param parties the number of parties required to advance to the
+     * next phase
+     * @throws IllegalArgumentException if parties less than zero
+     * or greater than the maximum number of parties supported
+     */
+    public Phaser(Phaser parent, int parties) {
+        if (parties >>> PARTIES_SHIFT != 0)
+            throw new IllegalArgumentException("Illegal number of parties");
+        int phase = 0;
+        this.parent = parent;
+        if (parent != null) {
+            final Phaser root = parent.root;
+            this.root = root;
+            this.evenQ = root.evenQ;
+            this.oddQ = root.oddQ;
+            if (parties != 0)
+                phase = parent.doRegister(1);
+        }
+        else {
+            this.root = this;
+            this.evenQ = new AtomicReference<QNode>();
+            this.oddQ = new AtomicReference<QNode>();
+        }
+        this.state = (parties == 0) ? (long)EMPTY :
+            ((long)phase << PHASE_SHIFT) |
+            ((long)parties << PARTIES_SHIFT) |
+            ((long)parties);
+    }
+
+    /**
+     * Adds a new unarrived party to this phaser.  If an ongoing
+     * invocation of {@link #onAdvance} is in progress, this method
+     * may await its completion before returning.  If this phaser has
+     * a parent, and this phaser previously had no registered parties,
+     * this child phaser is also registered with its parent. If
+     * this phaser is terminated, the attempt to register has
+     * no effect, and a negative value is returned.
+     *
+     * @return the arrival phase number to which this registration
+     * applied.  If this value is negative, then this phaser has
+     * terminated, in which case registration has no effect.
+     * @throws IllegalStateException if attempting to register more
+     * than the maximum supported number of parties
+     */
+    public int register() {
+        return doRegister(1);
+    }
+
+    /**
+     * Adds the given number of new unarrived parties to this phaser.
+     * If an ongoing invocation of {@link #onAdvance} is in progress,
+     * this method may await its completion before returning.  If this
+     * phaser has a parent, and the given number of parties is greater
+     * than zero, and this phaser previously had no registered
+     * parties, this child phaser is also registered with its parent.
+     * If this phaser is terminated, the attempt to register has no
+     * effect, and a negative value is returned.
+     *
+     * @param parties the number of additional parties required to
+     * advance to the next phase
+     * @return the arrival phase number to which this registration
+     * applied.  If this value is negative, then this phaser has
+     * terminated, in which case registration has no effect.
+     * @throws IllegalStateException if attempting to register more
+     * than the maximum supported number of parties
+     * @throws IllegalArgumentException if {@code parties < 0}
+     */
+    public int bulkRegister(int parties) {
+        if (parties < 0)
+            throw new IllegalArgumentException();
+        if (parties == 0)
+            return getPhase();
+        return doRegister(parties);
+    }
+
+    /**
+     * Arrives at this phaser, without waiting for others to arrive.
+     *
+     * <p>It is a usage error for an unregistered party to invoke this
+     * method.  However, this error may result in an {@code
+     * IllegalStateException} only upon some subsequent operation on
+     * this phaser, if ever.
+     *
+     * @return the arrival phase number, or a negative value if terminated
+     * @throws IllegalStateException if not terminated and the number
+     * of unarrived parties would become negative
+     */
+    public int arrive() {
+        return doArrive(ONE_ARRIVAL);
+    }
+
+    /**
+     * Arrives at this phaser and deregisters from it without waiting
+     * for others to arrive. Deregistration reduces the number of
+     * parties required to advance in future phases.  If this phaser
+     * has a parent, and deregistration causes this phaser to have
+     * zero parties, this phaser is also deregistered from its parent.
+     *
+     * <p>It is a usage error for an unregistered party to invoke this
+     * method.  However, this error may result in an {@code
+     * IllegalStateException} only upon some subsequent operation on
+     * this phaser, if ever.
+     *
+     * @return the arrival phase number, or a negative value if terminated
+     * @throws IllegalStateException if not terminated and the number
+     * of registered or unarrived parties would become negative
+     */
+    public int arriveAndDeregister() {
+        return doArrive(ONE_DEREGISTER);
+    }
+
+    /**
+     * Arrives at this phaser and awaits others. Equivalent in effect
+     * to {@code awaitAdvance(arrive())}.  If you need to await with
+     * interruption or timeout, you can arrange this with an analogous
+     * construction using one of the other forms of the {@code
+     * awaitAdvance} method.  If instead you need to deregister upon
+     * arrival, use {@code awaitAdvance(arriveAndDeregister())}.
+     *
+     * <p>It is a usage error for an unregistered party to invoke this
+     * method.  However, this error may result in an {@code
+     * IllegalStateException} only upon some subsequent operation on
+     * this phaser, if ever.
+     *
+     * @return the arrival phase number, or the (negative)
+     * {@linkplain #getPhase() current phase} if terminated
+     * @throws IllegalStateException if not terminated and the number
+     * of unarrived parties would become negative
+     */
+    public int arriveAndAwaitAdvance() {
+        // Specialization of doArrive+awaitAdvance eliminating some reads/paths
+        final Phaser root = this.root;
+        for (;;) {
+            long s = (root == this) ? state : reconcileState();
+            int phase = (int)(s >>> PHASE_SHIFT);
+            if (phase < 0)
+                return phase;
+            int counts = (int)s;
+            int unarrived = (counts == EMPTY) ? 0 : (counts & UNARRIVED_MASK);
+            if (unarrived <= 0)
+                throw new IllegalStateException(badArrive(s));
+            if (UNSAFE.compareAndSwapLong(this, stateOffset, s,
+                                          s -= ONE_ARRIVAL)) {
+                if (unarrived > 1)
+                    return root.internalAwaitAdvance(phase, null);
+                if (root != this)
+                    return parent.arriveAndAwaitAdvance();
+                long n = s & PARTIES_MASK;  // base of next state
+                int nextUnarrived = (int)n >>> PARTIES_SHIFT;
+                if (onAdvance(phase, nextUnarrived))
+                    n |= TERMINATION_BIT;
+                else if (nextUnarrived == 0)
+                    n |= EMPTY;
+                else
+                    n |= nextUnarrived;
+                int nextPhase = (phase + 1) & MAX_PHASE;
+                n |= (long)nextPhase << PHASE_SHIFT;
+                if (!UNSAFE.compareAndSwapLong(this, stateOffset, s, n))
+                    return (int)(state >>> PHASE_SHIFT); // terminated
+                releaseWaiters(phase);
+                return nextPhase;
+            }
+        }
+    }
+
+    /**
+     * Awaits the phase of this phaser to advance from the given phase
+     * value, returning immediately if the current phase is not equal
+     * to the given phase value or this phaser is terminated.
+     *
+     * @param phase an arrival phase number, or negative value if
+     * terminated; this argument is normally the value returned by a
+     * previous call to {@code arrive} or {@code arriveAndDeregister}.
+     * @return the next arrival phase number, or the argument if it is
+     * negative, or the (negative) {@linkplain #getPhase() current phase}
+     * if terminated
+     */
+    public int awaitAdvance(int phase) {
+        final Phaser root = this.root;
+        long s = (root == this) ? state : reconcileState();
+        int p = (int)(s >>> PHASE_SHIFT);
+        if (phase < 0)
+            return phase;
+        if (p == phase)
+            return root.internalAwaitAdvance(phase, null);
+        return p;
+    }
+
+    /**
+     * Awaits the phase of this phaser to advance from the given phase
+     * value, throwing {@code InterruptedException} if interrupted
+     * while waiting, or returning immediately if the current phase is
+     * not equal to the given phase value or this phaser is
+     * terminated.
+     *
+     * @param phase an arrival phase number, or negative value if
+     * terminated; this argument is normally the value returned by a
+     * previous call to {@code arrive} or {@code arriveAndDeregister}.
+     * @return the next arrival phase number, or the argument if it is
+     * negative, or the (negative) {@linkplain #getPhase() current phase}
+     * if terminated
+     * @throws InterruptedException if thread interrupted while waiting
+     */
+    public int awaitAdvanceInterruptibly(int phase)
+        throws InterruptedException {
+        final Phaser root = this.root;
+        long s = (root == this) ? state : reconcileState();
+        int p = (int)(s >>> PHASE_SHIFT);
+        if (phase < 0)
+            return phase;
+        if (p == phase) {
+            QNode node = new QNode(this, phase, true, false, 0L);
+            p = root.internalAwaitAdvance(phase, node);
+            if (node.wasInterrupted)
+                throw new InterruptedException();
+        }
+        return p;
+    }
+
+    /**
+     * Awaits the phase of this phaser to advance from the given phase
+     * value or the given timeout to elapse, throwing {@code
+     * InterruptedException} if interrupted while waiting, or
+     * returning immediately if the current phase is not equal to the
+     * given phase value or this phaser is terminated.
+     *
+     * @param phase an arrival phase number, or negative value if
+     * terminated; this argument is normally the value returned by a
+     * previous call to {@code arrive} or {@code arriveAndDeregister}.
+     * @param timeout how long to wait before giving up, in units of
+     *        {@code unit}
+     * @param unit a {@code TimeUnit} determining how to interpret the
+     *        {@code timeout} parameter
+     * @return the next arrival phase number, or the argument if it is
+     * negative, or the (negative) {@linkplain #getPhase() current phase}
+     * if terminated
+     * @throws InterruptedException if thread interrupted while waiting
+     * @throws TimeoutException if timed out while waiting
+     */
+    public int awaitAdvanceInterruptibly(int phase,
+                                         long timeout, TimeUnit unit)
+        throws InterruptedException, TimeoutException {
+        long nanos = unit.toNanos(timeout);
+        final Phaser root = this.root;
+        long s = (root == this) ? state : reconcileState();
+        int p = (int)(s >>> PHASE_SHIFT);
+        if (phase < 0)
+            return phase;
+        if (p == phase) {
+            QNode node = new QNode(this, phase, true, true, nanos);
+            p = root.internalAwaitAdvance(phase, node);
+            if (node.wasInterrupted)
+                throw new InterruptedException();
+            else if (p == phase)
+                throw new TimeoutException();
+        }
+        return p;
+    }
+
+    /**
+     * Forces this phaser to enter termination state.  Counts of
+     * registered parties are unaffected.  If this phaser is a member
+     * of a tiered set of phasers, then all of the phasers in the set
+     * are terminated.  If this phaser is already terminated, this
+     * method has no effect.  This method may be useful for
+     * coordinating recovery after one or more tasks encounter
+     * unexpected exceptions.
+     */
+    public void forceTermination() {
+        // Only need to change root state
+        final Phaser root = this.root;
+        long s;
+        while ((s = root.state) >= 0) {
+            if (UNSAFE.compareAndSwapLong(root, stateOffset,
+                                          s, s | TERMINATION_BIT)) {
+                // signal all threads
+                releaseWaiters(0); // Waiters on evenQ
+                releaseWaiters(1); // Waiters on oddQ
+                return;
+            }
+        }
+    }
+
+    /**
+     * Returns the current phase number. The maximum phase number is
+     * {@code Integer.MAX_VALUE}, after which it restarts at
+     * zero. Upon termination, the phase number is negative,
+     * in which case the prevailing phase prior to termination
+     * may be obtained via {@code getPhase() + Integer.MIN_VALUE}.
+     *
+     * @return the phase number, or a negative value if terminated
+     */
+    public final int getPhase() {
+        return (int)(root.state >>> PHASE_SHIFT);
+    }
+
+    /**
+     * Returns the number of parties registered at this phaser.
+     *
+     * @return the number of parties
+     */
+    public int getRegisteredParties() {
+        return partiesOf(state);
+    }
+
+    /**
+     * Returns the number of registered parties that have arrived at
+     * the current phase of this phaser. If this phaser has terminated,
+     * the returned value is meaningless and arbitrary.
+     *
+     * @return the number of arrived parties
+     */
+    public int getArrivedParties() {
+        return arrivedOf(reconcileState());
+    }
+
+    /**
+     * Returns the number of registered parties that have not yet
+     * arrived at the current phase of this phaser. If this phaser has
+     * terminated, the returned value is meaningless and arbitrary.
+     *
+     * @return the number of unarrived parties
+     */
+    public int getUnarrivedParties() {
+        return unarrivedOf(reconcileState());
+    }
+
+    /**
+     * Returns the parent of this phaser, or {@code null} if none.
+     *
+     * @return the parent of this phaser, or {@code null} if none
+     */
+    public Phaser getParent() {
+        return parent;
+    }
+
+    /**
+     * Returns the root ancestor of this phaser, which is the same as
+     * this phaser if it has no parent.
+     *
+     * @return the root ancestor of this phaser
+     */
+    public Phaser getRoot() {
+        return root;
+    }
+
+    /**
+     * Returns {@code true} if this phaser has been terminated.
+     *
+     * @return {@code true} if this phaser has been terminated
+     */
+    public boolean isTerminated() {
+        return root.state < 0L;
+    }
+
+    /**
+     * Overridable method to perform an action upon impending phase
+     * advance, and to control termination. This method is invoked
+     * upon arrival of the party advancing this phaser (when all other
+     * waiting parties are dormant).  If this method returns {@code
+     * true}, this phaser will be set to a final termination state
+     * upon advance, and subsequent calls to {@link #isTerminated}
+     * will return true. Any (unchecked) Exception or Error thrown by
+     * an invocation of this method is propagated to the party
+     * attempting to advance this phaser, in which case no advance
+     * occurs.
+     *
+     * <p>The arguments to this method provide the state of the phaser
+     * prevailing for the current transition.  The effects of invoking
+     * arrival, registration, and waiting methods on this phaser from
+     * within {@code onAdvance} are unspecified and should not be
+     * relied on.
+     *
+     * <p>If this phaser is a member of a tiered set of phasers, then
+     * {@code onAdvance} is invoked only for its root phaser on each
+     * advance.
+     *
+     * <p>To support the most common use cases, the default
+     * implementation of this method returns {@code true} when the
+     * number of registered parties has become zero as the result of a
+     * party invoking {@code arriveAndDeregister}.  You can disable
+     * this behavior, thus enabling continuation upon future
+     * registrations, by overriding this method to always return
+     * {@code false}:
+     *
+     * <pre> {@code
+     * Phaser phaser = new Phaser() {
+     *   protected boolean onAdvance(int phase, int parties) { return false; }
+     * }}</pre>
+     *
+     * @param phase the current phase number on entry to this method,
+     * before this phaser is advanced
+     * @param registeredParties the current number of registered parties
+     * @return {@code true} if this phaser should terminate
+     */
+    protected boolean onAdvance(int phase, int registeredParties) {
+        return registeredParties == 0;
+    }
+
+    /**
+     * Returns a string identifying this phaser, as well as its
+     * state.  The state, in brackets, includes the String {@code
+     * "phase = "} followed by the phase number, {@code "parties = "}
+     * followed by the number of registered parties, and {@code
+     * "arrived = "} followed by the number of arrived parties.
+     *
+     * @return a string identifying this phaser, as well as its state
+     */
+    public String toString() {
+        return stateToString(reconcileState());
+    }
+
+    /**
+     * Implementation of toString and string-based error messages
+     */
+    private String stateToString(long s) {
+        return super.toString() +
+            "[phase = " + phaseOf(s) +
+            " parties = " + partiesOf(s) +
+            " arrived = " + arrivedOf(s) + "]";
+    }
+
+    // Waiting mechanics
+
+    /**
+     * Removes and signals threads from queue for phase.
+     */
+    private void releaseWaiters(int phase) {
+        QNode q;   // first element of queue
+        Thread t;  // its thread
+        AtomicReference<QNode> head = (phase & 1) == 0 ? evenQ : oddQ;
+        while ((q = head.get()) != null &&
+               q.phase != (int)(root.state >>> PHASE_SHIFT)) {
+            if (head.compareAndSet(q, q.next) &&
+                (t = q.thread) != null) {
+                q.thread = null;
+                LockSupport.unpark(t);
+            }
+        }
+    }
+
+    /**
+     * Variant of releaseWaiters that additionally tries to remove any
+     * nodes no longer waiting for advance due to timeout or
+     * interrupt. Currently, nodes are removed only if they are at
+     * head of queue, which suffices to reduce memory footprint in
+     * most usages.
+     *
+     * @return current phase on exit
+     */
+    private int abortWait(int phase) {
+        AtomicReference<QNode> head = (phase & 1) == 0 ? evenQ : oddQ;
+        for (;;) {
+            Thread t;
+            QNode q = head.get();
+            int p = (int)(root.state >>> PHASE_SHIFT);
+            if (q == null || ((t = q.thread) != null && q.phase == p))
+                return p;
+            if (head.compareAndSet(q, q.next) && t != null) {
+                q.thread = null;
+                LockSupport.unpark(t);
+            }
+        }
+    }
+
+    /** The number of CPUs, for spin control */
+    private static final int NCPU = Runtime.getRuntime().availableProcessors();
+
+    /**
+     * The number of times to spin before blocking while waiting for
+     * advance, per arrival while waiting. On multiprocessors, fully
+     * blocking and waking up a large number of threads all at once is
+     * usually a very slow process, so we use rechargeable spins to
+     * avoid it when threads regularly arrive: When a thread in
+     * internalAwaitAdvance notices another arrival before blocking,
+     * and there appear to be enough CPUs available, it spins
+     * SPINS_PER_ARRIVAL more times before blocking. The value trades
+     * off good-citizenship vs big unnecessary slowdowns.
+     */
+    static final int SPINS_PER_ARRIVAL = (NCPU < 2) ? 1 : 1 << 8;
+
+    /**
+     * Possibly blocks and waits for phase to advance unless aborted.
+     * Call only on root phaser.
+     *
+     * @param phase current phase
+     * @param node if non-null, the wait node to track interrupt and timeout;
+     * if null, denotes noninterruptible wait
+     * @return current phase
+     */
+    private int internalAwaitAdvance(int phase, QNode node) {
+        // assert root == this;
+        releaseWaiters(phase-1);          // ensure old queue clean
+        boolean queued = false;           // true when node is enqueued
+        int lastUnarrived = 0;            // to increase spins upon change
+        int spins = SPINS_PER_ARRIVAL;
+        long s;
+        int p;
+        while ((p = (int)((s = state) >>> PHASE_SHIFT)) == phase) {
+            if (node == null) {           // spinning in noninterruptible mode
+                int unarrived = (int)s & UNARRIVED_MASK;
+                if (unarrived != lastUnarrived &&
+                    (lastUnarrived = unarrived) < NCPU)
+                    spins += SPINS_PER_ARRIVAL;
+                boolean interrupted = Thread.interrupted();
+                if (interrupted || --spins < 0) { // need node to record intr
+                    node = new QNode(this, phase, false, false, 0L);
+                    node.wasInterrupted = interrupted;
+                }
+            }
+            else if (node.isReleasable()) // done or aborted
+                break;
+            else if (!queued) {           // push onto queue
+                AtomicReference<QNode> head = (phase & 1) == 0 ? evenQ : oddQ;
+                QNode q = node.next = head.get();
+                if ((q == null || q.phase == phase) &&
+                    (int)(state >>> PHASE_SHIFT) == phase) // avoid stale enq
+                    queued = head.compareAndSet(q, node);
+            }
+            else {
+                try {
+                    ForkJoinPool.managedBlock(node);
+                } catch (InterruptedException ie) {
+                    node.wasInterrupted = true;
+                }
+            }
+        }
+
+        if (node != null) {
+            if (node.thread != null)
+                node.thread = null;       // avoid need for unpark()
+            if (node.wasInterrupted && !node.interruptible)
+                Thread.currentThread().interrupt();
+            if (p == phase && (p = (int)(state >>> PHASE_SHIFT)) == phase)
+                return abortWait(phase); // possibly clean up on abort
+        }
+        releaseWaiters(phase);
+        return p;
+    }
+
+    /**
+     * Wait nodes for Treiber stack representing wait queue
+     */
+    static final class QNode implements ForkJoinPool.ManagedBlocker {
+        final Phaser phaser;
+        final int phase;
+        final boolean interruptible;
+        final boolean timed;
+        boolean wasInterrupted;
+        long nanos;
+        long lastTime;
+        volatile Thread thread; // nulled to cancel wait
+        QNode next;
+
+        QNode(Phaser phaser, int phase, boolean interruptible,
+              boolean timed, long nanos) {
+            this.phaser = phaser;
+            this.phase = phase;
+            this.interruptible = interruptible;
+            this.nanos = nanos;
+            this.timed = timed;
+            this.lastTime = timed ? System.nanoTime() : 0L;
+            thread = Thread.currentThread();
+        }
+
+        public boolean isReleasable() {
+            if (thread == null)
+                return true;
+            if (phaser.getPhase() != phase) {
+                thread = null;
+                return true;
+            }
+            if (Thread.interrupted())
+                wasInterrupted = true;
+            if (wasInterrupted && interruptible) {
+                thread = null;
+                return true;
+            }
+            if (timed) {
+                if (nanos > 0L) {
+                    long now = System.nanoTime();
+                    nanos -= now - lastTime;
+                    lastTime = now;
+                }
+                if (nanos <= 0L) {
+                    thread = null;
+                    return true;
+                }
+            }
+            return false;
+        }
+
+        public boolean block() {
+            if (isReleasable())
+                return true;
+            else if (!timed)
+                LockSupport.park(this);
+            else if (nanos > 0)
+                LockSupport.parkNanos(this, nanos);
+            return isReleasable();
+        }
+    }
+
+    // Unsafe mechanics
+
+    private static final sun.misc.Unsafe UNSAFE;
+    private static final long stateOffset;
+    static {
+        try {
+            UNSAFE = getUnsafe();
+            Class<?> k = Phaser.class;
+            stateOffset = UNSAFE.objectFieldOffset
+                (k.getDeclaredField("state"));
+        } catch (Exception e) {
+            throw new Error(e);
+        }
+    }
+
+    /**
+     * Returns a sun.misc.Unsafe.  Suitable for use in a 3rd party package.
+     * Replace with a simple call to Unsafe.getUnsafe when integrating
+     * into a jdk.
+     *
+     * @return a sun.misc.Unsafe
+     */
+    private static sun.misc.Unsafe getUnsafe() {
+        try {
+            return sun.misc.Unsafe.getUnsafe();
+        } catch (SecurityException se) {
+            try {
+                return java.security.AccessController.doPrivileged
+                    (new java.security
+                     .PrivilegedExceptionAction<sun.misc.Unsafe>() {
+                        public sun.misc.Unsafe run() throws Exception {
+                            java.lang.reflect.Field f = sun.misc
+                                .Unsafe.class.getDeclaredField("theUnsafe");
+                            f.setAccessible(true);
+                            return (sun.misc.Unsafe) f.get(null);
+                        }});
+            } catch (java.security.PrivilegedActionException e) {
+                throw new RuntimeException("Could not initialize intrinsics",
+                                           e.getCause());
+            }
+        }
+    }
+}
diff --git a/src/main/java/jsr166y/RecursiveAction.java b/src/main/java/jsr166y/RecursiveAction.java
new file mode 100644
index 00000000000..4ecc73c7d75
--- /dev/null
+++ b/src/main/java/jsr166y/RecursiveAction.java
@@ -0,0 +1,164 @@
+/*
+ * Written by Doug Lea with assistance from members of JCP JSR-166
+ * Expert Group and released to the public domain, as explained at
+ * http://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+package jsr166y;
+
+/**
+ * A recursive resultless {@link ForkJoinTask}.  This class
+ * establishes conventions to parameterize resultless actions as
+ * {@code Void} {@code ForkJoinTask}s. Because {@code null} is the
+ * only valid value of type {@code Void}, methods such as {@code join}
+ * always return {@code null} upon completion.
+ *
+ * <p><b>Sample Usages.</b> Here is a simple but complete ForkJoin
+ * sort that sorts a given {@code long[]} array:
+ *
+ *  <pre> {@code
+ * static class SortTask extends RecursiveAction {
+ *   final long[] array; final int lo, hi;
+ *   SortTask(long[] array, int lo, int hi) {
+ *     this.array = array; this.lo = lo; this.hi = hi;
+ *   }
+ *   SortTask(long[] array) { this(array, 0, array.length); }
+ *   protected void compute() {
+ *     if (hi - lo < THRESHOLD)
+ *       sortSequentially(lo, hi);
+ *     else {
+ *       int mid = (lo + hi) >>> 1;
+ *       invokeAll(new SortTask(array, lo, mid),
+ *                 new SortTask(array, mid, hi));
+ *       merge(lo, mid, hi);
+ *     }
+ *   }
+ *   // implementation details follow:
+ *   final static int THRESHOLD = 1000;
+ *   void sortSequentially(int lo, int hi) {
+ *     Arrays.sort(array, lo, hi);
+ *   }
+ *   void merge(int lo, int mid, int hi) {
+ *     long[] buf = Arrays.copyOfRange(array, lo, mid);
+ *     for (int i = 0, j = lo, k = mid; i < buf.length; j++)
+ *       array[j] = (k == hi || buf[i] < array[k]) ?
+ *         buf[i++] : array[k++];
+ *   }
+ * }}</pre>
+ *
+ * You could then sort {@code anArray} by creating {@code new
+ * SortTask(anArray)} and invoking it in a ForkJoinPool.  As a more
+ * concrete simple example, the following task increments each element
+ * of an array:
+ *  <pre> {@code
+ * class IncrementTask extends RecursiveAction {
+ *   final long[] array; final int lo, hi;
+ *   IncrementTask(long[] array, int lo, int hi) {
+ *     this.array = array; this.lo = lo; this.hi = hi;
+ *   }
+ *   protected void compute() {
+ *     if (hi - lo < THRESHOLD) {
+ *       for (int i = lo; i < hi; ++i)
+ *         array[i]++;
+ *     }
+ *     else {
+ *       int mid = (lo + hi) >>> 1;
+ *       invokeAll(new IncrementTask(array, lo, mid),
+ *                 new IncrementTask(array, mid, hi));
+ *     }
+ *   }
+ * }}</pre>
+ *
+ * <p>The following example illustrates some refinements and idioms
+ * that may lead to better performance: RecursiveActions need not be
+ * fully recursive, so long as they maintain the basic
+ * divide-and-conquer approach. Here is a class that sums the squares
+ * of each element of a double array, by subdividing out only the
+ * right-hand-sides of repeated divisions by two, and keeping track of
+ * them with a chain of {@code next} references. It uses a dynamic
+ * threshold based on method {@code getSurplusQueuedTaskCount}, but
+ * counterbalances potential excess partitioning by directly
+ * performing leaf actions on unstolen tasks rather than further
+ * subdividing.
+ *
+ *  <pre> {@code
+ * double sumOfSquares(ForkJoinPool pool, double[] array) {
+ *   int n = array.length;
+ *   Applyer a = new Applyer(array, 0, n, null);
+ *   pool.invoke(a);
+ *   return a.result;
+ * }
+ *
+ * class Applyer extends RecursiveAction {
+ *   final double[] array;
+ *   final int lo, hi;
+ *   double result;
+ *   Applyer next; // keeps track of right-hand-side tasks
+ *   Applyer(double[] array, int lo, int hi, Applyer next) {
+ *     this.array = array; this.lo = lo; this.hi = hi;
+ *     this.next = next;
+ *   }
+ *
+ *   double atLeaf(int l, int h) {
+ *     double sum = 0;
+ *     for (int i = l; i < h; ++i) // perform leftmost base step
+ *       sum += array[i] * array[i];
+ *     return sum;
+ *   }
+ *
+ *   protected void compute() {
+ *     int l = lo;
+ *     int h = hi;
+ *     Applyer right = null;
+ *     while (h - l > 1 && getSurplusQueuedTaskCount() <= 3) {
+ *        int mid = (l + h) >>> 1;
+ *        right = new Applyer(array, mid, h, right);
+ *        right.fork();
+ *        h = mid;
+ *     }
+ *     double sum = atLeaf(l, h);
+ *     while (right != null) {
+ *        if (right.tryUnfork()) // directly calculate if not stolen
+ *          sum += right.atLeaf(right.lo, right.hi);
+ *       else {
+ *          right.join();
+ *          sum += right.result;
+ *        }
+ *        right = right.next;
+ *      }
+ *     result = sum;
+ *   }
+ * }}</pre>
+ *
+ * @since 1.7
+ * @author Doug Lea
+ */
+public abstract class RecursiveAction extends ForkJoinTask<Void> {
+    private static final long serialVersionUID = 5232453952276485070L;
+
+    /**
+     * The main computation performed by this task.
+     */
+    protected abstract void compute();
+
+    /**
+     * Always returns {@code null}.
+     *
+     * @return {@code null} always
+     */
+    public final Void getRawResult() { return null; }
+
+    /**
+     * Requires null completion value.
+     */
+    protected final void setRawResult(Void mustBeNull) { }
+
+    /**
+     * Implements execution conventions for RecursiveActions.
+     */
+    protected final boolean exec() {
+        compute();
+        return true;
+    }
+
+}
diff --git a/src/main/java/jsr166y/RecursiveTask.java b/src/main/java/jsr166y/RecursiveTask.java
new file mode 100644
index 00000000000..8a57e547881
--- /dev/null
+++ b/src/main/java/jsr166y/RecursiveTask.java
@@ -0,0 +1,68 @@
+/*
+ * Written by Doug Lea with assistance from members of JCP JSR-166
+ * Expert Group and released to the public domain, as explained at
+ * http://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+package jsr166y;
+
+/**
+ * A recursive result-bearing {@link ForkJoinTask}.
+ *
+ * <p>For a classic example, here is a task computing Fibonacci numbers:
+ *
+ *  <pre> {@code
+ * class Fibonacci extends RecursiveTask<Integer> {
+ *   final int n;
+ *   Fibonacci(int n) { this.n = n; }
+ *   Integer compute() {
+ *     if (n <= 1)
+ *        return n;
+ *     Fibonacci f1 = new Fibonacci(n - 1);
+ *     f1.fork();
+ *     Fibonacci f2 = new Fibonacci(n - 2);
+ *     return f2.compute() + f1.join();
+ *   }
+ * }}</pre>
+ *
+ * However, besides being a dumb way to compute Fibonacci functions
+ * (there is a simple fast linear algorithm that you'd use in
+ * practice), this is likely to perform poorly because the smallest
+ * subtasks are too small to be worthwhile splitting up. Instead, as
+ * is the case for nearly all fork/join applications, you'd pick some
+ * minimum granularity size (for example 10 here) for which you always
+ * sequentially solve rather than subdividing.
+ *
+ * @since 1.7
+ * @author Doug Lea
+ */
+public abstract class RecursiveTask<V> extends ForkJoinTask<V> {
+    private static final long serialVersionUID = 5232453952276485270L;
+
+    /**
+     * The result of the computation.
+     */
+    V result;
+
+    /**
+     * The main computation performed by this task.
+     */
+    protected abstract V compute();
+
+    public final V getRawResult() {
+        return result;
+    }
+
+    protected final void setRawResult(V value) {
+        result = value;
+    }
+
+    /**
+     * Implements execution conventions for RecursiveTask.
+     */
+    protected final boolean exec() {
+        result = compute();
+        return true;
+    }
+
+}
diff --git a/src/main/java/jsr166y/ThreadLocalRandom.java b/src/main/java/jsr166y/ThreadLocalRandom.java
new file mode 100644
index 00000000000..032f77a04bc
--- /dev/null
+++ b/src/main/java/jsr166y/ThreadLocalRandom.java
@@ -0,0 +1,197 @@
+/*
+ * Written by Doug Lea with assistance from members of JCP JSR-166
+ * Expert Group and released to the public domain, as explained at
+ * http://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+package jsr166y;
+
+import java.util.Random;
+
+/**
+ * A random number generator isolated to the current thread.  Like the
+ * global {@link java.util.Random} generator used by the {@link
+ * java.lang.Math} class, a {@code ThreadLocalRandom} is initialized
+ * with an internally generated seed that may not otherwise be
+ * modified. When applicable, use of {@code ThreadLocalRandom} rather
+ * than shared {@code Random} objects in concurrent programs will
+ * typically encounter much less overhead and contention.  Use of
+ * {@code ThreadLocalRandom} is particularly appropriate when multiple
+ * tasks (for example, each a {@link ForkJoinTask}) use random numbers
+ * in parallel in thread pools.
+ *
+ * <p>Usages of this class should typically be of the form:
+ * {@code ThreadLocalRandom.current().nextX(...)} (where
+ * {@code X} is {@code Int}, {@code Long}, etc).
+ * When all usages are of this form, it is never possible to
+ * accidently share a {@code ThreadLocalRandom} across multiple threads.
+ *
+ * <p>This class also provides additional commonly used bounded random
+ * generation methods.
+ *
+ * @since 1.7
+ * @author Doug Lea
+ */
+public class ThreadLocalRandom extends Random {
+    // same constants as Random, but must be redeclared because private
+    private static final long multiplier = 0x5DEECE66DL;
+    private static final long addend = 0xBL;
+    private static final long mask = (1L << 48) - 1;
+
+    /**
+     * The random seed. We can't use super.seed.
+     */
+    private long rnd;
+
+    /**
+     * Initialization flag to permit calls to setSeed to succeed only
+     * while executing the Random constructor.  We can't allow others
+     * since it would cause setting seed in one part of a program to
+     * unintentionally impact other usages by the thread.
+     */
+    boolean initialized;
+
+    // Padding to help avoid memory contention among seed updates in
+    // different TLRs in the common case that they are located near
+    // each other.
+    private long pad0, pad1, pad2, pad3, pad4, pad5, pad6, pad7;
+
+    /**
+     * The actual ThreadLocal
+     */
+    private static final ThreadLocal<ThreadLocalRandom> localRandom =
+        new ThreadLocal<ThreadLocalRandom>() {
+            protected ThreadLocalRandom initialValue() {
+                return new ThreadLocalRandom();
+            }
+    };
+
+
+    /**
+     * Constructor called only by localRandom.initialValue.
+     */
+    ThreadLocalRandom() {
+        super();
+        initialized = true;
+    }
+
+    /**
+     * Returns the current thread's {@code ThreadLocalRandom}.
+     *
+     * @return the current thread's {@code ThreadLocalRandom}
+     */
+    public static ThreadLocalRandom current() {
+        return localRandom.get();
+    }
+
+    /**
+     * Throws {@code UnsupportedOperationException}.  Setting seeds in
+     * this generator is not supported.
+     *
+     * @throws UnsupportedOperationException always
+     */
+    public void setSeed(long seed) {
+        if (initialized)
+            throw new UnsupportedOperationException();
+        rnd = (seed ^ multiplier) & mask;
+    }
+
+    protected int next(int bits) {
+        rnd = (rnd * multiplier + addend) & mask;
+        return (int) (rnd >>> (48-bits));
+    }
+
+    /**
+     * Returns a pseudorandom, uniformly distributed value between the
+     * given least value (inclusive) and bound (exclusive).
+     *
+     * @param least the least value returned
+     * @param bound the upper bound (exclusive)
+     * @throws IllegalArgumentException if least greater than or equal
+     * to bound
+     * @return the next value
+     */
+    public int nextInt(int least, int bound) {
+        if (least >= bound)
+            throw new IllegalArgumentException();
+        return nextInt(bound - least) + least;
+    }
+
+    /**
+     * Returns a pseudorandom, uniformly distributed value
+     * between 0 (inclusive) and the specified value (exclusive).
+     *
+     * @param n the bound on the random number to be returned.  Must be
+     *        positive.
+     * @return the next value
+     * @throws IllegalArgumentException if n is not positive
+     */
+    public long nextLong(long n) {
+        if (n <= 0)
+            throw new IllegalArgumentException("n must be positive");
+        // Divide n by two until small enough for nextInt. On each
+        // iteration (at most 31 of them but usually much less),
+        // randomly choose both whether to include high bit in result
+        // (offset) and whether to continue with the lower vs upper
+        // half (which makes a difference only if odd).
+        long offset = 0;
+        while (n >= Integer.MAX_VALUE) {
+            int bits = next(2);
+            long half = n >>> 1;
+            long nextn = ((bits & 2) == 0) ? half : n - half;
+            if ((bits & 1) == 0)
+                offset += n - nextn;
+            n = nextn;
+        }
+        return offset + nextInt((int) n);
+    }
+
+    /**
+     * Returns a pseudorandom, uniformly distributed value between the
+     * given least value (inclusive) and bound (exclusive).
+     *
+     * @param least the least value returned
+     * @param bound the upper bound (exclusive)
+     * @return the next value
+     * @throws IllegalArgumentException if least greater than or equal
+     * to bound
+     */
+    public long nextLong(long least, long bound) {
+        if (least >= bound)
+            throw new IllegalArgumentException();
+        return nextLong(bound - least) + least;
+    }
+
+    /**
+     * Returns a pseudorandom, uniformly distributed {@code double} value
+     * between 0 (inclusive) and the specified value (exclusive).
+     *
+     * @param n the bound on the random number to be returned.  Must be
+     *        positive.
+     * @return the next value
+     * @throws IllegalArgumentException if n is not positive
+     */
+    public double nextDouble(double n) {
+        if (n <= 0)
+            throw new IllegalArgumentException("n must be positive");
+        return nextDouble() * n;
+    }
+
+    /**
+     * Returns a pseudorandom, uniformly distributed value between the
+     * given least value (inclusive) and bound (exclusive).
+     *
+     * @param least the least value returned
+     * @param bound the upper bound (exclusive)
+     * @return the next value
+     * @throws IllegalArgumentException if least greater than or equal
+     * to bound
+     */
+    public double nextDouble(double least, double bound) {
+        if (least >= bound)
+            throw new IllegalArgumentException();
+        return nextDouble() * (bound - least) + least;
+    }
+
+    private static final long serialVersionUID = -5851777807851030925L;
+}
diff --git a/src/main/java/jsr166y/TransferQueue.java b/src/main/java/jsr166y/TransferQueue.java
new file mode 100644
index 00000000000..3aeb69ea12c
--- /dev/null
+++ b/src/main/java/jsr166y/TransferQueue.java
@@ -0,0 +1,133 @@
+/*
+ * Written by Doug Lea with assistance from members of JCP JSR-166
+ * Expert Group and released to the public domain, as explained at
+ * http://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+package jsr166y;
+import java.util.concurrent.*;
+
+/**
+ * A {@link BlockingQueue} in which producers may wait for consumers
+ * to receive elements.  A {@code TransferQueue} may be useful for
+ * example in message passing applications in which producers
+ * sometimes (using method {@link #transfer}) await receipt of
+ * elements by consumers invoking {@code take} or {@code poll}, while
+ * at other times enqueue elements (via method {@code put}) without
+ * waiting for receipt.
+ * {@linkplain #tryTransfer(Object) Non-blocking} and
+ * {@linkplain #tryTransfer(Object,long,TimeUnit) time-out} versions of
+ * {@code tryTransfer} are also available.
+ * A {@code TransferQueue} may also be queried, via {@link
+ * #hasWaitingConsumer}, whether there are any threads waiting for
+ * items, which is a converse analogy to a {@code peek} operation.
+ *
+ * <p>Like other blocking queues, a {@code TransferQueue} may be
+ * capacity bounded.  If so, an attempted transfer operation may
+ * initially block waiting for available space, and/or subsequently
+ * block waiting for reception by a consumer.  Note that in a queue
+ * with zero capacity, such as {@link SynchronousQueue}, {@code put}
+ * and {@code transfer} are effectively synonymous.
+ *
+ * <p>This interface is a member of the
+ * <a href="{@docRoot}/../technotes/guides/collections/index.html">
+ * Java Collections Framework</a>.
+ *
+ * @since 1.7
+ * @author Doug Lea
+ * @param <E> the type of elements held in this collection
+ */
+public interface TransferQueue<E> extends BlockingQueue<E> {
+    /**
+     * Transfers the element to a waiting consumer immediately, if possible.
+     *
+     * <p>More precisely, transfers the specified element immediately
+     * if there exists a consumer already waiting to receive it (in
+     * {@link #take} or timed {@link #poll(long,TimeUnit) poll}),
+     * otherwise returning {@code false} without enqueuing the element.
+     *
+     * @param e the element to transfer
+     * @return {@code true} if the element was transferred, else
+     *         {@code false}
+     * @throws ClassCastException if the class of the specified element
+     *         prevents it from being added to this queue
+     * @throws NullPointerException if the specified element is null
+     * @throws IllegalArgumentException if some property of the specified
+     *         element prevents it from being added to this queue
+     */
+    boolean tryTransfer(E e);
+
+    /**
+     * Transfers the element to a consumer, waiting if necessary to do so.
+     *
+     * <p>More precisely, transfers the specified element immediately
+     * if there exists a consumer already waiting to receive it (in
+     * {@link #take} or timed {@link #poll(long,TimeUnit) poll}),
+     * else waits until the element is received by a consumer.
+     *
+     * @param e the element to transfer
+     * @throws InterruptedException if interrupted while waiting,
+     *         in which case the element is not left enqueued
+     * @throws ClassCastException if the class of the specified element
+     *         prevents it from being added to this queue
+     * @throws NullPointerException if the specified element is null
+     * @throws IllegalArgumentException if some property of the specified
+     *         element prevents it from being added to this queue
+     */
+    void transfer(E e) throws InterruptedException;
+
+    /**
+     * Transfers the element to a consumer if it is possible to do so
+     * before the timeout elapses.
+     *
+     * <p>More precisely, transfers the specified element immediately
+     * if there exists a consumer already waiting to receive it (in
+     * {@link #take} or timed {@link #poll(long,TimeUnit) poll}),
+     * else waits until the element is received by a consumer,
+     * returning {@code false} if the specified wait time elapses
+     * before the element can be transferred.
+     *
+     * @param e the element to transfer
+     * @param timeout how long to wait before giving up, in units of
+     *        {@code unit}
+     * @param unit a {@code TimeUnit} determining how to interpret the
+     *        {@code timeout} parameter
+     * @return {@code true} if successful, or {@code false} if
+     *         the specified waiting time elapses before completion,
+     *         in which case the element is not left enqueued
+     * @throws InterruptedException if interrupted while waiting,
+     *         in which case the element is not left enqueued
+     * @throws ClassCastException if the class of the specified element
+     *         prevents it from being added to this queue
+     * @throws NullPointerException if the specified element is null
+     * @throws IllegalArgumentException if some property of the specified
+     *         element prevents it from being added to this queue
+     */
+    boolean tryTransfer(E e, long timeout, TimeUnit unit)
+        throws InterruptedException;
+
+    /**
+     * Returns {@code true} if there is at least one consumer waiting
+     * to receive an element via {@link #take} or
+     * timed {@link #poll(long,TimeUnit) poll}.
+     * The return value represents a momentary state of affairs.
+     *
+     * @return {@code true} if there is at least one waiting consumer
+     */
+    boolean hasWaitingConsumer();
+
+    /**
+     * Returns an estimate of the number of consumers waiting to
+     * receive elements via {@link #take} or timed
+     * {@link #poll(long,TimeUnit) poll}.  The return value is an
+     * approximation of a momentary state of affairs, that may be
+     * inaccurate if consumers have completed or given up waiting.
+     * The value may be useful for monitoring and heuristics, but
+     * not for synchronization control.  Implementations of this
+     * method are likely to be noticeably slower than those for
+     * {@link #hasWaitingConsumer}.
+     *
+     * @return the number of consumers waiting to receive elements
+     */
+    int getWaitingConsumerCount();
+}
diff --git a/src/main/java/jsr166y/package-info.java b/src/main/java/jsr166y/package-info.java
new file mode 100644
index 00000000000..b42852eb0fe
--- /dev/null
+++ b/src/main/java/jsr166y/package-info.java
@@ -0,0 +1,30 @@
+/*
+ * Written by Doug Lea with assistance from members of JCP JSR-166
+ * Expert Group and released to the public domain, as explained at
+ * http://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+
+/**
+ * Preview versions of classes targeted for Java 7.  Includes a
+ * fine-grained parallel computation framework: ForkJoinTasks and
+ * their related support classes provide a very efficient basis for
+ * obtaining platform-independent parallel speed-ups of
+ * computation-intensive operations.  They are not a full substitute
+ * for the kinds of arbitrary processing supported by Executors or
+ * Threads. However, when applicable, they typically provide
+ * significantly greater performance on multiprocessor platforms.
+ *
+ * <p>Candidates for fork/join processing mainly include those that
+ * can be expressed using parallel divide-and-conquer techniques: To
+ * solve a problem, break it in two (or more) parts, and then solve
+ * those parts in parallel, continuing on in this way until the
+ * problem is too small to be broken up, so is solved directly.  The
+ * underlying <em>work-stealing</em> framework makes subtasks
+ * available to other threads (normally one per CPU), that help
+ * complete the tasks.  In general, the most efficient ForkJoinTasks
+ * are those that directly implement this algorithmic design pattern.
+ */
+
+// Built on 2012-06-09
+package jsr166y;

	Call from non-fork/join clients	Call from within fork/join computations
Arrange async execution	{@link #execute(ForkJoinTask)}	{@link ForkJoinTask#fork}
Await and obtain result	{@link #invoke(ForkJoinTask)}	{@link ForkJoinTask#invoke}
Arrange exec and obtain Future	{@link #submit(ForkJoinTask)}	{@link ForkJoinTask#fork} (ForkJoinTasks are Futures)