JDK 1.7之 ConcurrentHashMap 源码分析

xiaoxiao2021-02-28 114

转载请注明出处：http://blog.csdn.net/crazy1235/article/details/76795383

Segment HashEntry构造函数put hashensureSegmentSegmentputrehash getremove Segmentremove replace Segmentreplace containsclear Segmentclear size参考

JDK 1.5 引入了 ConcurrentHashMap 。

ConcurrentHashMap是线程安全且高效的HashMap。

HashTable容器使用synchronized来保证线程安全，但是在线程竞争激烈的情况下，HashTable的效率非常低。

当一个线程访问 HashTable 的同步方法时，其他线程也无法访问其他的同步方法，这样效率就很低下。

ConcurrentHashMap它采锁分段技术来保证高效的并发操作！

ConcurrentHashMap把容器分为多个 segment（片段），每个片段有一把锁，当多线程访问容器里不同数据段的数据时，线程间就不会存在竞争关系；一个线程占用锁访问一个segment的数据时，并不影响另外的线程访问其他segment中的数据。

–

从下面两张图就可以看出 ConcurrentHashMap 的内部结构！

（图片转自网络，侵删）

对比于JDK1.7中的HashMap的结构，ConcurrentHashMap将数组每个元素作为一个segment–片段。

Segment的结构与HashMap类似，每个片段对应一个table数组和链表结构！

一个Segment里面包含一个HashEntry数组，每个HashEntry是一个链表结构，当对HashEntry数组的数据进行修改时，必须首先获得与它对应的Segment锁！

Segment & HashEntry

/** * The segments, each of which is a specialized hash table. */ final Segment<K,V>[] segments; // 集成 ReentrantLock static final class Segment<K,V> extends ReentrantLock implements Serializable { private static final long serialVersionUID = 2249069246763182397L; static final int MAX_SCAN_RETRIES = Runtime.getRuntime().availableProcessors() > 1 ? 64 : 1; // 每一个segment对应一个HashEntry数组 transient volatile HashEntry<K,V>[] table; // 总的元素个数 transient int count; // 修改次数 transient int modCount; // 阈值 transient int threshold; // 加载因子 final float loadFactor; // 构造函数 Segment(float lf, int threshold, HashEntry<K,V>[] tab) { this.loadFactor = lf; this.threshold = threshold; this.table = tab; } // 往segment添加一个元素 final V put(K key, int hash, V value, boolean onlyIfAbsent) { // ... } // 扩容数组，变为之前的两倍，重新打包之前的数据，然后把新的节点添加进去 @SuppressWarnings("unchecked") private void rehash(HashEntry<K,V> node) { // ... } // private HashEntry<K,V> scanAndLockForPut(K key, int hash, V value) { // ... } // private void scanAndLock(Object key, int hash) { // ... } // 当value为空或者key，value值都匹配到了删除节点 final V remove(Object key, int hash, Object value) { // ... } // 根据key替换节点的值 final boolean replace(K key, int hash, V oldValue, V newValue) { // ... } // 根据key替换节点的值 final V replace(K key, int hash, V value) { // ... } // 清空segment中的元素节点 final void clear() { // ... } } /** * ConcurrentHashMap list entry. Note that this is never exported * out as a user-visible Map.Entry. */ static final class HashEntry<K,V> { final int hash; final K key; volatile V value; volatile HashEntry<K,V> next; HashEntry(int hash, K key, V value, HashEntry<K,V> next) { this.hash = hash; this.key = key; this.value = value; this.next = next; } /** * Sets next field with volatile write semantics. (See above * about use of putOrderedObject.) */ final void setNext(HashEntry<K,V> n) { UNSAFE.putOrderedObject(this, nextOffset, n); } // Unsafe mechanics static final sun.misc.Unsafe UNSAFE; static final long nextOffset; static { try { UNSAFE = sun.misc.Unsafe.getUnsafe(); Class k = HashEntry.class; nextOffset = UNSAFE.objectFieldOffset (k.getDeclaredField("next")); } catch (Exception e) { throw new Error(e); } } }

构造函数

// 默认初始容量 static final int DEFAULT_INITIAL_CAPACITY = 16; // 默认加载因子 static final float DEFAULT_LOAD_FACTOR = 0.75f; // 默认segment层级 static final int DEFAULT_CONCURRENCY_LEVEL = 16; // 最大容量 static final int MAXIMUM_CAPACITY = 1 << 30; // segment最小容量 static final int MIN_SEGMENT_TABLE_CAPACITY = 2; // 一个segment最大容量 static final int MAX_SEGMENTS = 1 << 16; // 锁之前重试次数 static final int RETRIES_BEFORE_LOCK = 2; public ConcurrentHashMap() { this(DEFAULT_INITIAL_CAPACITY, DEFAULT_LOAD_FACTOR, DEFAULT_CONCURRENCY_LEVEL); } public ConcurrentHashMap(int initialCapacity) { this(initialCapacity, DEFAULT_LOAD_FACTOR, DEFAULT_CONCURRENCY_LEVEL); } public ConcurrentHashMap(int initialCapacity, float loadFactor) { this(initialCapacity, loadFactor, DEFAULT_CONCURRENCY_LEVEL); } @SuppressWarnings("unchecked") public ConcurrentHashMap(int initialCapacity, float loadFactor, int concurrencyLevel) { if (!(loadFactor > 0) || initialCapacity < 0 || concurrencyLevel <= 0) throw new IllegalArgumentException(); if (concurrencyLevel > MAX_SEGMENTS) concurrencyLevel = MAX_SEGMENTS; // Find power-of-two sizes best matching arguments // int sshift = 0; // segment数组的长度是由concurrentLevel计算来的，segment数组的长度是2的N次方， // 默认concurrencyLevel = 16, 所以ssize在默认情况下也是16,此时 sshift = 4 // sshift相当于ssize从1向左移的次数 int ssize = 1; while (ssize < concurrencyLevel) { ++sshift; ssize <<= 1; } // 段偏移量，默认值情况下此时segmentShift = 28 this.segmentShift = 32 - sshift; // 散列算法的掩码，默认值情况下segmentMask = 15 this.segmentMask = ssize - 1; if (initialCapacity > MAXIMUM_CAPACITY) initialCapacity = MAXIMUM_CAPACITY; int c = initialCapacity / ssize; if (c * ssize < initialCapacity) ++c; int cap = MIN_SEGMENT_TABLE_CAPACITY; while (cap < c) cap <<= 1; // create segments and segments[0] Segment<K,V> s0 = new Segment<K,V>(loadFactor, (int)(cap * loadFactor), (HashEntry<K,V>[])new HashEntry[cap]); // 创建ssize长度的Segment数组 Segment<K,V>[] ss = (Segment<K,V>[])new Segment[ssize]; UNSAFE.putOrderedObject(ss, SBASE, s0); // ordered write of segments[0] this.segments = ss; }

initialCapacity 表示创建ConcurrentHashMap的初始容量。默认值是16

loadFactor 表示加载因子。当 ConcurrentHashMap中元素个数 > 最大容量 * loadFactor 时就需要进行扩容。

concurrencyLevel 表示并发的级别，也可以理解为segment数组的长度。Segment数组的长度大于等于concurrencyLevel的第一个2的n次方。

理想情况下，有concurrentLevel个线程同时访问不同的segment数据，这样这些线程之间互不干扰，达到了最高并发级别！

put

添加元素分为两步：

定位到segment判断是否需要对segment中的HashEntry数组进行扩容，然后再在segment中进行插入操作 public void putAll(Map<? extends K, ? extends V> m) { for (Map.Entry<? extends K, ? extends V> e : m.entrySet()) put(e.getKey(), e.getValue()); } @SuppressWarnings("unchecked") public V putIfAbsent(K key, V value) { Segment<K,V> s; if (value == null) throw new NullPointerException(); int hash = hash(key); int j = (hash >>> segmentShift) & segmentMask; if ((s = (Segment<K,V>)UNSAFE.getObject (segments, (j << SSHIFT) + SBASE)) == null) s = ensureSegment(j); return s.put(key, hash, value, true); } @SuppressWarnings("unchecked") public V put(K key, V value) { Segment<K,V> s; if (value == null) // 不允许value为空 throw new NullPointerException(); int hash = hash(key); // 计算hash值 int j = (hash >>> segmentShift) & segmentMask; // 定位属于哪个segment中 if ((s = (Segment<K,V>)UNSAFE.getObject // nonvolatile; recheck (segments, (j << SSHIFT) + SBASE)) == null) // in ensureSegment s = ensureSegment(j); return s.put(key, hash, value, false); // 将键值对保存到对应的segment中 }

ConcurrentHashMap使用分段锁的机制来保护不同Segment的数据，那么插入和获取元素的时候，就需要先定位到Segment。

hash

// ? private transient final int hashSeed = randomHashSeed(this); private static int randomHashSeed(ConcurrentHashMap instance) { if (sun.misc.VM.isBooted() && Holder.ALTERNATIVE_HASHING) { return sun.misc.Hashing.randomHashSeed(instance); } return 0; } private int hash(Object k) { int h = hashSeed; if ((0 != h) && (k instanceof String)) { return sun.misc.Hashing.stringHash32((String) k); } h ^= k.hashCode(); // 此处使用的是Wang/Jenkins hash的变种算法！ h += (h << 15) ^ 0xffffcd7d; h ^= (h >>> 10); h += (h << 3); h ^= (h >>> 6); h += (h << 2) + (h << 14); return h ^ (h >>> 16); }

此散列算法目的就是减少冲突，使元素能够比较均匀的分散到各个Segment中，从而提高整个容器的效率。

计算得到散列的hash值之后，就定位Segment数组中的哪个片段了。、

(hash >>> segmentShift) & segmentMask

默认情况下，segmentShift = 28， segmentMask = 15。首先hash右移28位，让高四位参与运算。然后在于segmentMask进行与操作。就得到了segment数组的下标。

举例：

hash(key)运算得到的值是一个32位的整数。

默认情况下，this.segmentShift = 32 - sshift = 32 - 4 = 28。

ensureSegment

这个函数的目的就是找到对应的segment。

@SuppressWarnings("unchecked") private Segment<K,V> ensureSegment(int k) { final Segment<K,V>[] ss = this.segments; long u = (k << SSHIFT) + SBASE; // raw offset Segment<K,V> seg; if ((seg = (Segment<K,V>)UNSAFE.getObjectVolatile(ss, u)) == null) { Segment<K,V> proto = ss[0]; // use segment 0 as prototype int cap = proto.table.length; float lf = proto.loadFactor; int threshold = (int)(cap * lf); HashEntry<K,V>[] tab = (HashEntry<K,V>[])new HashEntry[cap]; if ((seg = (Segment<K,V>)UNSAFE.getObjectVolatile(ss, u)) == null) { // recheck Segment<K,V> s = new Segment<K,V>(lf, threshold, tab); while ((seg = (Segment<K,V>)UNSAFE.getObjectVolatile(ss, u)) == null) { if (UNSAFE.compareAndSwapObject(ss, u, null, seg = s)) break; } } } return seg; }

找到了对应的segment之后，就可以往里面put值了

return s.put(key, hash, value, false);

Segment#put()

final V put(K key, int hash, V value, boolean onlyIfAbsent) { HashEntry<K,V> node = tryLock() ? null : scanAndLockForPut(key, hash, value); V oldValue; try { // 每一个segment对应一个HashEntry数组 HashEntry<K,V>[] tab = table; // 计算对应HashEntry数组的下标 // 每个segment中数组的长度都是2的N次方，所以这里经过运算之后，取的是hash的低几位数据 int index = (tab.length - 1) & hash; // 定位到HashEntry数组中的某个结点（对应链表的表头结点） HashEntry<K,V> first = entryAt(tab, index); // 遍历链表 for (HashEntry<K,V> e = first;;) { if (e != null) { // 如果链表不为空 K k; if ((k = e.key) == key || (e.hash == hash && key.equals(k))) { oldValue = e.value; if (!onlyIfAbsent) { e.value = value; ++modCount; } break; } e = e.next; } else { // 如果链表为空（表头为空） if (node != null) // 将新节点插入链表作为表头 node.setNext(first); else // 根据key value 创建结点并插入链表 node = new HashEntry<K,V>(hash, key, value, first); int c = count + 1; // 判断元素个数是否超过了阈值或者segment中数组的长度超过了MAXIMUM_CAPACITY，如果满足条件则rehash扩容！ if (c > threshold && tab.length < MAXIMUM_CAPACITY) rehash(node); else // 不需要扩容时，将node放到数组（HashEntry[]）中对应的位置 setEntryAt(tab, index, node); ++modCount; count = c; oldValue = null; break; } } } finally { unlock(); // 解锁 } return oldValue; // 返回旧value值 }

rehash

下面来看当需要扩容的时候：

/** * 两倍于之前的容量 */ @SuppressWarnings("unchecked") private void rehash(HashEntry<K,V> node) { HashEntry<K,V>[] oldTable = table; int oldCapacity = oldTable.length; // 扩大1倍（左移一位） int newCapacity = oldCapacity << 1; // 计算新的阈值 threshold = (int)(newCapacity * loadFactor); // 创建新的数组 HashEntry<K,V>[] newTable = (HashEntry<K,V>[]) new HashEntry[newCapacity]; // mask int sizeMask = newCapacity - 1; // 遍历旧数组数据 for (int i = 0; i < oldCapacity ; i++) { HashEntry<K,V> e = oldTable[i]; // 对应一个链表的表头结点 if (e != null) { HashEntry<K,V> next = e.next; // 计算e对应的这条链表在新数组中对应的下标 int idx = e.hash & sizeMask; if (next == null) // 只有一个结点时直接放入（新的）数组中 newTable[idx] = e; else { // 链表有多个结点时： HashEntry<K,V> lastRun = e; // 就链表的表头结点做为新链表的尾结点 int lastIdx = idx; for (HashEntry<K,V> last = next; last != null; last = last.next) { // 旧数组中一个链表中的数据并不一定在新数组中属于同一个链表，所以这里需要每次都重新计算 int k = last.hash & sizeMask; if (k != lastIdx) { lastIdx = k; lastRun = last; } } // lastRun（和之后的元素）插入数组中。 newTable[lastIdx] = lastRun; // 从（旧链表）头结点向后遍历，遍历到最后一组不同于前面hash值的组头。 for (HashEntry<K,V> p = e; p != lastRun; p = p.next) { V v = p.value; int h = p.hash; int k = h & sizeMask; HashEntry<K,V> n = newTable[k]; newTable[k] = new HashEntry<K,V>(h, p.key, v, n); // 拼接链表 } } } } // 将之前的旧数据都添加到新的结构中之后，才会插入新的结点（依旧是插入表头） int nodeIndex = node.hash & sizeMask; // add the new node node.setNext(newTable[nodeIndex]); newTable[nodeIndex] = node; table = newTable; }

以一条旧链表数据为例：

细心的朋友可以发现，这里并不一定遍历所有的链表元素，因为当后面的节点进过运算在新数据中的hash一样的话，只需要把这一组的头结点插入，后面的节点就会被带入其中。

所以，下面的for循环操作的是链表中lastRun节点之前的节点

for (HashEntry<K,V> p = e; p != lastRun; p = p.next)

get

首先找到对应的segment

然后找到segment中对应HashEntry链表

遍历链表即可

public V get(Object key) { Segment<K,V> s; // manually integrate access methods to reduce overhead HashEntry<K,V>[] tab; int h = hash(key); // 首先计算出segment数组的下标（(h >>> segmentShift) & segmentMask)） long u = (((h >>> segmentShift) & segmentMask) << SSHIFT) + SBASE; if ((s = (Segment<K,V>)UNSAFE.getObjectVolatile(segments, u)) != null && (tab = s.table) != null) { // 根据下标找到segment // 然后(tab.length - 1) & h) 得到对应HashEntry数组的下标 // 遍历链表 for (HashEntry<K,V> e = (HashEntry<K,V>) UNSAFE.getObjectVolatile (tab, ((long)(((tab.length - 1) & h)) << TSHIFT) + TBASE); e != null; e = e.next) { K k; if ((k = e.key) == key || (e.hash == h && key.equals(k))) return e.value; } } return null; }

remove

public V remove(Object key) { // 计算hash值 int hash = hash(key); // 根据hash值找到对应的segment Segment<K,V> s = segmentForHash(hash); // 调用Segment.remove 函数 return s == null ? null : s.remove(key, hash, null); } public boolean remove(Object key, Object value) { int hash = hash(key); Segment<K,V> s; return value != null && (s = segmentForHash(hash)) != null && s.remove(key, hash, value) != null; }

Segment#remove

/** * Remove; match on key only if value null, else match both. */ final V remove(Object key, int hash, Object value) { if (!tryLock()) scanAndLock(key, hash); V oldValue = null; try { HashEntry<K,V>[] tab = table; // 计算HashEntry数组下标 int index = (tab.length - 1) & hash; // 找到头结点 HashEntry<K,V> e = entryAt(tab, index); HashEntry<K,V> pred = null; while (e != null) { K k; HashEntry<K,V> next = e.next; if ((k = e.key) == key || (e.hash == hash && key.equals(k))) { // 找到对应节点 V v = e.value; if (value == null || value == v || value.equals(v)) { if (pred == null) // 当pred为空时，表示要移除的是链表的表头节点，重新设置链表 setEntryAt(tab, index, next); else pred.setNext(next); ++modCount; --count; // 记录旧value值 oldValue = v; } break; } pred = e; e = next; } } finally { unlock(); } return oldValue; }

replace

替换元素的值

public boolean replace(K key, V oldValue, V newValue) { int hash = hash(key); // oldValue或者newValue为空时，抛出空指针异常 if (oldValue == null || newValue == null) throw new NullPointerException(); // 找到segment Segment<K,V> s = segmentForHash(hash); // 调用Segment.replace return s != null && s.replace(key, hash, oldValue, newValue); } public V replace(K key, V value) { int hash = hash(key); if (value == null) throw new NullPointerException(); Segment<K,V> s = segmentForHash(hash); // 调用Segment.replace return s == null ? null : s.replace(key, hash, value); }

Segment#replace

final boolean replace(K key, int hash, V oldValue, V newValue) { if (!tryLock()) scanAndLock(key, hash); boolean replaced = false; try { HashEntry<K,V> e; // entryForHash 用来找到链表头，然后for循环遍历链表 for (e = entryForHash(this, hash); e != null; e = e.next) { K k; if ((k = e.key) == key || (e.hash == hash && key.equals(k))) { // 当oldValue对应上了数据时，才会用newValue替换，然后返回true if (oldValue.equals(e.value)) { e.value = newValue; ++modCount; replaced = true; } break; } } } finally { unlock(); } return replaced; } final V replace(K key, int hash, V value) { if (!tryLock()) scanAndLock(key, hash); V oldValue = null; try { HashEntry<K,V> e; for (e = entryForHash(this, hash); e != null; e = e.next) { K k; if ((k = e.key) == key || (e.hash == hash && key.equals(k))) { // 这里没有判断value值，直接替换为新value值，返回旧value值 oldValue = e.value; e.value = value; ++modCount; break; } } } finally { unlock(); } return oldValue; }

contains

判断是否包含key值对应的数据（节点）

1- 找到segment 2- 找到HashEntry 3- 遍历链表

@SuppressWarnings("unchecked") public boolean containsKey(Object key) { Segment<K,V> s; // same as get() except no need for volatile value read HashEntry<K,V>[] tab; int h = hash(key); long u = (((h >>> segmentShift) & segmentMask) << SSHIFT) + SBASE; // 找到对应的segment分组数据 if ((s = (Segment<K,V>)UNSAFE.getObjectVolatile(segments, u)) != null && (tab = s.table) != null) { // 找到对应链表并遍历 for (HashEntry<K,V> e = (HashEntry<K,V>) UNSAFE.getObjectVolatile (tab, ((long)(((tab.length - 1) & h)) << TSHIFT) + TBASE); e != null; e = e.next) { K k; // 判断 if ((k = e.key) == key || (e.hash == h && key.equals(k))) return true; } } return false; }

判断是否包含value值对应的数据（节点）

public boolean contains(Object value) { return containsValue(value); } public boolean containsValue(Object value) { // Same idea as size() if (value == null) throw new NullPointerException(); final Segment<K,V>[] segments = this.segments; boolean found = false; long last = 0; // 重试次数 int retries = -1; try { outer: for (;;) { // 死循环 // 当重试次数等于3次时，直接遍历每个segment并上锁。 if (retries++ == RETRIES_BEFORE_LOCK) { for (int j = 0; j < segments.length; ++j) ensureSegment(j).lock(); // force creation } long hashSum = 0L; int sum = 0; // 遍历segment数组 for (int j = 0; j < segments.length; ++j) { HashEntry<K,V>[] tab; Segment<K,V> seg = segmentAt(segments, j); if (seg != null && (tab = seg.table) != null) { // 遍历某个segment对应的HashEntry数组 for (int i = 0 ; i < tab.length; i++) { HashEntry<K,V> e; // 遍历HshEntry对应的链表 for (e = entryAt(tab, i); e != null; e = e.next) { V v = e.value; // 如果找到了跳出outer循环 if (v != null && value.equals(v)) { found = true; break outer; } } } // 记录总的修改次数 sum += seg.modCount; } } // 如果前后两次得到的修改次数一致，就表示查找过程中没有其他线程修改元素，这是跳出循环 if (retries > 0 && sum == last) break; // last保存上一次加起来的总修改次数 last = sum; } } finally { if (retries > RETRIES_BEFORE_LOCK) { for (int j = 0; j < segments.length; ++j) segmentAt(segments, j).unlock(); } } return found; }

在判断是否存在包含某个value时，有可能会出现另外一个线程插入一个节点，后者修改了一个节点的value数据。

所以为了保证准确定，该函数允许有三次机会去不加锁遍历segment，如果前后两次遍历segment之后发现modeCound总数是一样的，则表示前后过程中没有数据被修改，则可以使用遍历过程中的结果返回。

如果三次遍历之后，发现前后modeCount数据不一致，则直接遍历所有的segment并加锁，然后进行判断

clear

public void clear() { final Segment<K,V>[] segments = this.segments; // 遍历所有的segment清空 for (int j = 0; j < segments.length; ++j) { Segment<K,V> s = segmentAt(segments, j); if (s != null) s.clear(); } }

Segment#clear

final void clear() { lock(); // 上锁 try { HashEntry<K,V>[] tab = table; for (int i = 0; i < tab.length ; i++) setEntryAt(tab, i, null); // 置空 ++modCount; count = 0; } finally { unlock(); // 解锁 } }

size

计算 size 的思想类似于 containValue

public int size() { // final Segment<K,V>[] segments = this.segments; int size; boolean overflow; // true if size overflows 32 bits long sum; // sum of modCounts long last = 0L; // previous sum int retries = -1; // first iteration isn't retry try { // 死循环 for (;;) { // 当重试次数等于3次时，直接遍历每个segment并上锁。 if (retries++ == RETRIES_BEFORE_LOCK) { for (int j = 0; j < segments.length; ++j) ensureSegment(j).lock(); // force creation } sum = 0L; size = 0; // 遍历segment数组 for (int j = 0; j < segments.length; ++j) { Segment<K,V> seg = segmentAt(segments, j); if (seg != null) { sum += seg.modCount; // 判断是否数据溢出 // 注意这里计算元素总个数（size += c） if (c < 0 || (size += c) < 0) overflow = true; } } // 如果前后两次数据一致，则可以跳出循环 if (sum == last) break; last = sum; } } finally { if (retries > RETRIES_BEFORE_LOCK) { for (int j = 0; j < segments.length; ++j) segmentAt(segments, j).unlock(); } } // 返回总元素个数 return overflow ? Integer.MAX_VALUE : size; }

参考

https://my.oschina.net/hosee/blog/639352 http://blog.csdn.net/javazejian/article/details/76167357 https://my.oschina.net/hosee/blog/607677 http://www.importnew.com/22007.html http://blog.csdn.net/xuefeng0707/article/details/40834595 http://www.cnblogs.com/ITtangtang/p/3948786.html http://www.importnew.com/21781.html http://www.importnew.com/15845.html

转载请注明原文地址: https://www.6miu.com/read-66253.html

技术

最新回复(0)