privatestruct Entry { publicuint hashCode; ///<summary> /// 0-based index of next entry in chain: -1 means end of chain /// also encodes whether this entry _itself_ is part of the free list by changing sign and subtracting 3, /// so -2 means end of free list, -3 means index 0 but on free list, -4 means index 1 but on free list, etc. ///</summary> publicint next; //下一个元素的下标索引 public TKey key; // Key of entry public TValue value; // Value of entry }
if (comparer isnotnull && comparer != EqualityComparer<TKey>.Default) // first check for null to avoid forcing default comparer instantiation unnecessarily { _comparer = comparer; }
// Special-case EqualityComparer<string>.Default, StringComparer.Ordinal, and StringComparer.OrdinalIgnoreCase. // We use a non-randomized comparer for improved perf, falling back to a randomized comparer if the // hash buckets become unbalanced. if (typeof(TKey) == typeof(string)) { IEqualityComparer<string>? stringComparer = NonRandomizedStringEqualityComparer.GetStringComparer(_comparer); if (stringComparer isnotnull) { _comparer = (IEqualityComparer<TKey>?)stringComparer; } } }
privateintInitialize(int capacity) { int size = HashHelpers.GetPrime(capacity); int[] buckets = newint[size]; Entry[] entries = new Entry[size];
// Assign member variables after both arrays allocated to guard against corruption from OOM if second fails _freeList = -1; _buckets = buckets; _entries = entries;
publicvoidAdd(TKey key, TValue value) { bool modified = TryInsert(key, value, InsertionBehavior.ThrowOnExisting); Debug.Assert(modified); // If there was an existing key and the Add failed, an exception will already have been thrown. }
else { while (true) { // Should be a while loop https://github.com/dotnet/runtime/issues/9422 // Test uint in if rather than loop condition to drop range check for following array access if ((uint)i >= (uint)entries.Length) { break; }
if (entries[i].hashCode == hashCode && comparer.Equals(entries[i].key, key)) { if (behavior == InsertionBehavior.OverwriteExisting) { entries[i].value = value; returntrue; }
if (behavior == InsertionBehavior.ThrowOnExisting) { ThrowHelper.ThrowAddingDuplicateWithKeyArgumentException(key); }
returnfalse; }
i = entries[i].next;
collisionCount++; if (collisionCount > (uint)entries.Length) { // The chain of entries forms a loop; which means a concurrent update has happened. // Break out of the loop and throw, rather than looping forever. ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported(); } } }
内部一个死循环,这部分循环有个点是比较隐藏的,那就是如何跳出这个碰撞检测。
如果不发生碰撞冲突,int i = bucket - 1根据这个结果即可算出i=−1。发生碰撞的话就要分类讨论了,具体可以看源码。当然最终跳出的方案不是返回一个值就是将i赋值为−1。
int index; if (_freeCount > 0) { index = _freeList; Debug.Assert((StartOfFreeList - entries[_freeList].next) >= -1, "shouldn't overflow because `next` cannot underflow"); _freeList = StartOfFreeList - entries[_freeList].next; _freeCount--; } else { int count = _count; if (count == entries.Length) { Resize(); bucket = refGetBucket(hashCode); } index = count; _count = count + 1; entries = _entries; }
选择合适的位置放入数据。
freeCount和freeList在后面Remove中再说,不过同样都是为了选择位置。
ref Entry entry = ref entries![index]; entry.hashCode = hashCode; entry.next = bucket - 1; // Value in _buckets is 1-based entry.key = key; entry.value = value; bucket = index + 1; // Value in _buckets is 1-based _version++;
最终Add()调用结束后的数值记录。
Remove
publicboolRemove(TKey key){}
if (_buckets != null) { uint collisionCount = 0; uint hashCode = (uint)(_comparer?.GetHashCode(key) ?? key.GetHashCode()); refint bucket = refGetBucket(hashCode); Entry[]? entries = _entries; int last = -1; int i = bucket - 1; // Value in buckets is 1-based while (i >= 0) { ///下一段 } } returnfalse;
同样还是同过key获取HashCode之后找到桶的位置。last用于确定最后一个元素的位置。
while (i >= 0) { ref Entry entry = ref entries[i];
collisionCount++; if (collisionCount > (uint)entries.Length) { // The chain of entries forms a loop; which means a concurrent update has happened. // Break out of the loop and throw, rather than looping forever. ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported(); } }
遍历在同一个桶的entry直到找到目标元素,或者是碰撞次数过多抛出异常。
if (entry.hashCode == hashCode && (_comparer?.Equals(entry.key, key) ?? EqualityComparer<TKey>.Default.Equals(entry.key, key))) { if (last < 0) { // 代表当前是桶的最后一个元素,那么直接赋值即可 bucket = entry.next + 1; // Value in buckets is 1-based } else { // 代表当前元素处于链表中间,如果直接删掉会导致链表断开,所以让其头尾相连 entries[last].next = entry.next; } // 这里去看entry源码部分 entry.next = StartOfFreeList - _freeList; // entry内部数据初始化 if (RuntimeHelpers.IsReferenceOrContainsReferences<TKey>()) { entry.key = default!; }
uint hashCode = (uint)comparer.GetHashCode(key); int i = GetBucket(hashCode); Entry[]? entries = _entries; uint collisionCount = 0; i--; // Value in _buckets is 1-based; subtract 1 from i. We do it here so it fuses with the following conditional. do { // Should be a while loop https://github.com/dotnet/runtime/issues/9422 // Test in if to drop range check for following array access if ((uint)i >= (uint)entries.Length) { goto ReturnNotFound; }
collisionCount++; } while (collisionCount <= (uint)entries.Length);
// The chain of entries forms a loop; which means a concurrent update has happened. // Break out of the loop and throw, rather than looping forever. goto ConcurrentOperation;
经历了前面增删,这个查找其实变得就很清晰。说白了就是在桶里面跑一边找到就返回,找不到就寄。
不过需要注意的是这里面用了很多goto建议在目录大致过一遍即可。
//外部 ConcurrentOperation: ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported(); ReturnFound: ref TValue value = ref entry.value; Return: returnrefvalue; ReturnNotFound: value = ref Unsafe.NullRef<TValue>(); goto Return;
//精简版,完整版看目录 privatevoidResize(int newSize, bool forceNewHashCodes) { Entry[] entries = new Entry[newSize]; int count = _count; Array.Copy(_entries, entries, count); // Assign member variables after both arrays allocated to guard against corruption from OOM if second fails _buckets = newint[newSize]; for (int i = 0; i < count; i++) { if (entries[i].next >= -1) { refint bucket = refGetBucket(entries[i].hashCode); entries[i].next = bucket - 1; // Value in _buckets is 1-based bucket = i + 1; } } _entries = entries; }
可以看出扩容操作其实就是,申请新的Entry和buckets,之后将现有的元素拷贝进去。
索引器
public TValue this[TKey key] { get { ref TValue value = refFindValue(key); if (!Unsafe.IsNullRef(refvalue)) { returnvalue; }
// This is the maximum prime smaller than Array.MaxLength. publicconstint MaxPrimeArrayLength = 0x7FFFFFC3;
publicconstint HashPrime = 101;
// Table of prime numbers to use as hash table sizes. // A typical resize algorithm would pick the smallest prime number in this array // that is larger than twice the previous capacity. // Suppose our Hashtable currently has capacity x and enough elements are added // such that a resize needs to occur. Resizing first computes 2x then finds the // first prime in the table greater than 2x, i.e. if primes are ordered // p_1, p_2, ..., p_i, ..., it finds p_n such that p_n-1 < 2x < p_n. // Doubling is important for preserving the asymptotic complexity of the // hashtable operations such as add. Having a prime guarantees that double // hashing does not lead to infinite loops. IE, your hash function will be // h1(key) + i*h2(key), 0 <= i < size. h2 and the size must be relatively prime. // We prefer the low computation costs of higher prime numbers over the increased // memory allocation of a fixed prime number i.e. when right sizing a HashSet. privatestaticreadonlyint[] s_primes = { 3, 7, 11, 17, 23, 29, 37, 47, 59, 71, 89, 107, 131, 163, 197, 239, 293, 353, 431, 521, 631, 761, 919, 1103, 1327, 1597, 1931, 2333, 2801, 3371, 4049, 4861, 5839, 7013, 8419, 10103, 12143, 14591, 17519, 21023, 25229, 30293, 36353, 43627, 52361, 62851, 75431, 90523, 108631, 130363, 156437, 187751, 225307, 270371, 324449, 389357, 467237, 560689, 672827, 807403, 968897, 1162687, 1395263, 1674319, 2009191, 2411033, 2893249, 3471899, 4166287, 4999559, 5999471, 7199369 };
publicstaticintGetPrime(int min) { if (min < 0) thrownew ArgumentException(SR.Arg_HTCapacityOverflow);
foreach (int prime in s_primes) { if (prime >= min) return prime; }
// Outside of our predefined table. Compute the hard way. for (int i = (min | 1); i < int.MaxValue; i += 2) { if (IsPrime(i) && ((i - 1) % HashPrime != 0)) return i; } return min; }
// Returns size of hashtable to grow to. publicstaticintExpandPrime(int oldSize) { int newSize = 2 * oldSize;
// Allow the hashtables to grow to maximum possible size (~2G elements) before encountering capacity overflow. // Note that this check works even when _items.Length overflowed thanks to the (uint) cast if ((uint)newSize > MaxPrimeArrayLength && MaxPrimeArrayLength > oldSize) { Debug.Assert(MaxPrimeArrayLength == GetPrime(MaxPrimeArrayLength), "Invalid MaxPrimeArrayLength"); return MaxPrimeArrayLength; }
return GetPrime(newSize); }
///<summary>Returns approximate reciprocal of the divisor: ceil(2**64 / divisor).</summary> ///<remarks>This should only be used on 64-bit.</remarks> publicstaticulongGetFastModMultiplier(uint divisor) => ulong.MaxValue / divisor + 1;
///<summary>Performs a mod operation using the multiplier pre-computed with <see cref="GetFastModMultiplier"/>.</summary> ///<remarks>This should only be used on 64-bit.</remarks> [MethodImpl(MethodImplOptions.AggressiveInlining)] publicstaticuintFastMod(uintvalue, uint divisor, ulong multiplier) { // We use modified Daniel Lemire's fastmod algorithm (https://github.com/dotnet/runtime/pull/406), // which allows to avoid the long multiplication if the divisor is less than 2**31. Debug.Assert(divisor <= int.MaxValue);
// This is equivalent of (uint)Math.BigMul(multiplier * value, divisor, out _). This version // is faster than BigMul currently because we only need the high bits. uint highbits = (uint)(((((multiplier * value) >> 32) + 1) * divisor) >> 32);
Debug.Assert(highbits == value % divisor); return highbits; } } }
TryInsert源码
privateboolTryInsert(TKey key, TValue value, InsertionBehavior behavior) { // NOTE: this method is mirrored in CollectionsMarshal.GetValueRefOrAddDefault below. // If you make any changes here, make sure to keep that version in sync as well.
if (key == null) { ThrowHelper.ThrowArgumentNullException(ExceptionArgument.key); }
if (_buckets == null) { Initialize(0); } Debug.Assert(_buckets != null);
Entry[]? entries = _entries; Debug.Assert(entries != null, "expected entries to be non-null");
uint collisionCount = 0; refint bucket = refGetBucket(hashCode); int i = bucket - 1; // Value in _buckets is 1-based
if (comparer == null) { if (typeof(TKey).IsValueType) { // ValueType: Devirtualize with EqualityComparer<TValue>.Default intrinsic while (true) { // Should be a while loop https://github.com/dotnet/runtime/issues/9422 // Test uint in if rather than loop condition to drop range check for following array access if ((uint)i >= (uint)entries.Length) { break; }
if (entries[i].hashCode == hashCode && EqualityComparer<TKey>.Default.Equals(entries[i].key, key)) { if (behavior == InsertionBehavior.OverwriteExisting) { entries[i].value = value; returntrue; }
if (behavior == InsertionBehavior.ThrowOnExisting) { ThrowHelper.ThrowAddingDuplicateWithKeyArgumentException(key); }
returnfalse; }
i = entries[i].next;
collisionCount++; if (collisionCount > (uint)entries.Length) { // The chain of entries forms a loop; which means a concurrent update has happened. // Break out of the loop and throw, rather than looping forever. ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported(); } } } else { // Object type: Shared Generic, EqualityComparer<TValue>.Default won't devirtualize // https://github.com/dotnet/runtime/issues/10050 // So cache in a local rather than get EqualityComparer per loop iteration EqualityComparer<TKey> defaultComparer = EqualityComparer<TKey>.Default; while (true) { // Should be a while loop https://github.com/dotnet/runtime/issues/9422 // Test uint in if rather than loop condition to drop range check for following array access if ((uint)i >= (uint)entries.Length) { break; }
if (entries[i].hashCode == hashCode && defaultComparer.Equals(entries[i].key, key)) { if (behavior == InsertionBehavior.OverwriteExisting) { entries[i].value = value; returntrue; }
if (behavior == InsertionBehavior.ThrowOnExisting) { ThrowHelper.ThrowAddingDuplicateWithKeyArgumentException(key); }
returnfalse; }
i = entries[i].next;
collisionCount++; if (collisionCount > (uint)entries.Length) { // The chain of entries forms a loop; which means a concurrent update has happened. // Break out of the loop and throw, rather than looping forever. ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported(); } } } } else { while (true) { // Should be a while loop https://github.com/dotnet/runtime/issues/9422 // Test uint in if rather than loop condition to drop range check for following array access if ((uint)i >= (uint)entries.Length) { break; }
if (entries[i].hashCode == hashCode && comparer.Equals(entries[i].key, key)) { if (behavior == InsertionBehavior.OverwriteExisting) { entries[i].value = value; returntrue; }
if (behavior == InsertionBehavior.ThrowOnExisting) { ThrowHelper.ThrowAddingDuplicateWithKeyArgumentException(key); }
returnfalse; }
i = entries[i].next;
collisionCount++; if (collisionCount > (uint)entries.Length) { // The chain of entries forms a loop; which means a concurrent update has happened. // Break out of the loop and throw, rather than looping forever. ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported(); } } }
int index; if (_freeCount > 0) { index = _freeList; Debug.Assert((StartOfFreeList - entries[_freeList].next) >= -1, "shouldn't overflow because `next` cannot underflow"); _freeList = StartOfFreeList - entries[_freeList].next; _freeCount--; } else { int count = _count; if (count == entries.Length) { Resize(); bucket = refGetBucket(hashCode); } index = count; _count = count + 1; entries = _entries; }
ref Entry entry = ref entries![index]; entry.hashCode = hashCode; entry.next = bucket - 1; // Value in _buckets is 1-based entry.key = key; entry.value = value; bucket = index + 1; // Value in _buckets is 1-based _version++;
// Value types never rehash if (!typeof(TKey).IsValueType && collisionCount > HashHelpers.HashCollisionThreshold && comparer is NonRandomizedStringEqualityComparer) { // If we hit the collision threshold we'll need to switch to the comparer which is using randomized string hashing // i.e. EqualityComparer<string>.Default. Resize(entries.Length, true); }
returntrue; }
Resize源码
privatevoidResize() => Resize(HashHelpers.ExpandPrime(_count), false); privatevoidResize(int newSize, bool forceNewHashCodes) { // Value types never rehash Debug.Assert(!forceNewHashCodes || !typeof(TKey).IsValueType); Debug.Assert(_entries != null, "_entries should be non-null"); Debug.Assert(newSize >= _entries.Length);
Entry[] entries = new Entry[newSize];
int count = _count; Array.Copy(_entries, entries, count);
if (!typeof(TKey).IsValueType && forceNewHashCodes) { Debug.Assert(_comparer is NonRandomizedStringEqualityComparer); _comparer = (IEqualityComparer<TKey>)((NonRandomizedStringEqualityComparer)_comparer) .GetRandomizedEqualityComparer();
for (int i = 0; i < count; i++) { if (entries[i].next >= -1) { entries[i].hashCode = (uint)_comparer.GetHashCode(entries[i].key); } }
if (ReferenceEquals(_comparer, EqualityComparer<TKey>.Default)) { _comparer = null; } }
// Assign member variables after both arrays allocated to guard against corruption from OOM if second fails _buckets = newint[newSize]; #if TARGET_64BIT _fastModMultiplier = HashHelpers.GetFastModMultiplier((uint)newSize); #endif for (int i = 0; i < count; i++) { if (entries[i].next >= -1) { refint bucket = refGetBucket(entries[i].hashCode); entries[i].next = bucket - 1; // Value in _buckets is 1-based bucket = i + 1; } }
_entries = 0; }
Remove源码
publicboolRemove(TKey key) { // The overload Remove(TKey key, out TValue value) is a copy of this method with one additional // statement to copy the value for entry being removed into the output parameter. // Code has been intentionally duplicated for performance reasons.
if (key == null) { ThrowHelper.ThrowArgumentNullException(ExceptionArgument.key); }
if (_buckets != null) { Debug.Assert(_entries != null, "entries should be non-null"); uint collisionCount = 0; uint hashCode = (uint)(_comparer?.GetHashCode(key) ?? key.GetHashCode()); refint bucket = refGetBucket(hashCode); Entry[]? entries = _entries; int last = -1; int i = bucket - 1; // Value in buckets is 1-based while (i >= 0) { ref Entry entry = ref entries[i];
if (entry.hashCode == hashCode && (_comparer?.Equals(entry.key, key) ?? EqualityComparer<TKey>.Default.Equals(entry.key, key))) { if (last < 0) { bucket = entry.next + 1; // Value in buckets is 1-based } else { entries[last].next = entry.next; }
Debug.Assert((StartOfFreeList - _freeList) < 0, "shouldn't underflow because max hashtable length is MaxPrimeArrayLength = 0x7FEFFFFD(2146435069) _freelist underflow threshold 2147483646"); entry.next = StartOfFreeList - _freeList;
if (RuntimeHelpers.IsReferenceOrContainsReferences<TKey>()) { entry.key = default!; }
if (RuntimeHelpers.IsReferenceOrContainsReferences<TValue>()) { entry.value = default!; }
_freeList = i; _freeCount++; returntrue; }
last = i; i = entry.next;
collisionCount++; if (collisionCount > (uint)entries.Length) { // The chain of entries forms a loop; which means a concurrent update has happened. // Break out of the loop and throw, rather than looping forever. ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported(); } } }
ref Entry entry = ref Unsafe.NullRef<Entry>(); if (_buckets != null) { Debug.Assert(_entries != null, "expected entries to be != null"); IEqualityComparer<TKey>? comparer = _comparer; if (comparer == null) { uint hashCode = (uint)key.GetHashCode(); int i = GetBucket(hashCode); Entry[]? entries = _entries; uint collisionCount = 0; if (typeof(TKey).IsValueType) { // ValueType: Devirtualize with EqualityComparer<TValue>.Default intrinsic
i--; // Value in _buckets is 1-based; subtract 1 from i. We do it here so it fuses with the following conditional. do { // Should be a while loop https://github.com/dotnet/runtime/issues/9422 // Test in if to drop range check for following array access if ((uint)i >= (uint)entries.Length) { goto ReturnNotFound; }
collisionCount++; } while (collisionCount <= (uint)entries.Length);
// The chain of entries forms a loop; which means a concurrent update has happened. // Break out of the loop and throw, rather than looping forever. goto ConcurrentOperation; } else { // Object type: Shared Generic, EqualityComparer<TValue>.Default won't devirtualize // https://github.com/dotnet/runtime/issues/10050 // So cache in a local rather than get EqualityComparer per loop iteration EqualityComparer<TKey> defaultComparer = EqualityComparer<TKey>.Default;
i--; // Value in _buckets is 1-based; subtract 1 from i. We do it here so it fuses with the following conditional. do { // Should be a while loop https://github.com/dotnet/runtime/issues/9422 // Test in if to drop range check for following array access if ((uint)i >= (uint)entries.Length) { goto ReturnNotFound; }
collisionCount++; } while (collisionCount <= (uint)entries.Length);
// The chain of entries forms a loop; which means a concurrent update has happened. // Break out of the loop and throw, rather than looping forever. goto ConcurrentOperation; } } else { uint hashCode = (uint)comparer.GetHashCode(key); int i = GetBucket(hashCode); Entry[]? entries = _entries; uint collisionCount = 0; i--; // Value in _buckets is 1-based; subtract 1 from i. We do it here so it fuses with the following conditional. do { // Should be a while loop https://github.com/dotnet/runtime/issues/9422 // Test in if to drop range check for following array access if ((uint)i >= (uint)entries.Length) { goto ReturnNotFound; }
collisionCount++; } while (collisionCount <= (uint)entries.Length);
// The chain of entries forms a loop; which means a concurrent update has happened. // Break out of the loop and throw, rather than looping forever. goto ConcurrentOperation; } }
goto ReturnNotFound;
ConcurrentOperation: ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported(); ReturnFound: ref TValue value = ref entry.value; Return: returnrefvalue; ReturnNotFound: value = ref Unsafe.NullRef<TValue>(); goto Return; }