DotNet源码学习-HASHSET初探

命名空间:System.Collections.Generic

先看一下官方说明:类提供了高级的设置操作。集是不包含重复元素的集合,其元素无特定顺序 

HashSet <T>对象的容量是对象可以容纳的元素数。当向对象添加元素时,HashSet <T>对象的容量会自动增加。

HashSet<String> hashSet = new HashSet<string>();
hashSet.Add("test");
hashSet.Add();
Console.WriteLine(hashSet.Count);

打印结果:1

HashSet的默认构造方法:

public HashSet()
            : this((IEqualityComparer<T>?)null)
        { }

注:: this语法为调用自身对象的其他构造方法。

public HashSet(IEqualityComparer<T>? comparer)
{
     if (comparer == EqualityComparer<T>.Default)
     {
          comparer = ;
     }
     _comparer = comparer;
     _lastIndex = 0;
     _count = ;
     _freeList = -1;
     _version = ;
}

第二中创建方式,将集合作为参数。

List<string> list = new List<();
list.Add();
list.Add();
HashSet<string> hSet = (list);
Console.WriteLine(hSet.Count);

此时控台输出:1

此时调用的构造方法为:

public HashSet(IEnumerable<T> collection,IEqualityComparer<T>? comparer)
            : this(comparer)
{
    if (collection == )
    {
        throw new ArgumentNullException(nameof(collection));
    }
    var otherAsHashSet = collection as HashSet<T>;
    if (otherAsHashSet != null && AreEqualityComparersEqual(,otherAsHashSet))
    {
        CopyFrom(otherAsHashSet);
    }
    else
    {
        // to avoid excess resizes,first set size based on collection's count. Collection
         may contain duplicates,so call TrimExcess if resulting hashset is larger than
         threshold
        ICollection<T>? coll = collection as ICollection<T>;
        int suggestedCapacity = coll == null ?  : coll.Count;
        Initialize(suggestedCapacity);
        UnionWith(collection);
        if (_count > 0 && _slots.Length / _count > ShrinkThreshold)
        {
            TrimExcess();
        }
    }
}

在该构造方法中若存在重复值则通过查找大于或等于容量的下一个质数来使用建议的容量。

private int Initialize(int capacity)
{
    Debug.Assert(_buckets == null,Initialize was called but _buckets was non-null);
    int size = HashHelpers.GetPrime(capacity);
    _buckets = new [size];
    _slots =  Slot[size];
    return size;
}

下面为生成质数方法:

public static int GetPrime( min)
{
    if (min < )
         ArgumentException(SR.Arg_HTCapacityOverflow);
    foreach (int prime in s_primes)
    {
        if (prime >= min)
             prime;
    }
     Outside of our predefined table. Compute the hard way.
    for (int i = (min | 1); i < int.MaxValue; i += 2if (IsPrime(i) && ((i - 1) % HashPrime != ))
             i;
    }
     min;
}

再次扩展-》

bool IsPrime( candidate)
{
  if ((candidate & 1) != )
  {
    int limit = (int)Math.Sqrt(candidate);取平方
    int divisor = 3; divisor <= limit; divisor += )
    {
      if ((candidate % divisor) == return false;
    }
    true;
  }
  return candidate == ;
}
internal struct Slot
{
  int hashCode;       Lower 31 bits of hash code,-1 if unused
  int next;           Index of next entry,-1 if last
  internal T value;
}

存储LIst集合:

void UnionWith(IEnumerable<T> other)
{
  if (other ==  ArgumentNullException(nameof(other));
  }
  foreach (T item  other)
  {
    AddIfNotPresent(item);
  }
}

继续往下追踪:

bool AddIfNotPresent(T value)
{
  if (_buckets == )
  {
    Initialize();
  }

   hashCode;
   bucket;
  int collisionCount = ;
  Slot[] slots = _slots;

  IEqualityComparer<T>? comparer = _comparer;

  if (comparer == )
  {
  取HASHCODE
    hashCode = value ==  : InternalGetHashCode(value.GetHashCode());
    bucket = hashCode % _buckets!.Length;

    if (default(T)! != null)  TODO-NULLABLE: default(T) == null warning (https://github.com/dotnet/roslyn/issues/34757)
    {
      int i = _buckets[bucket] - 1; i >= 0; i = slots[i].next)
      {
        if (slots[i].hashCode == hashCode && EqualityComparer<T>.Default.Equals(slots[i].value,value))
        {
          ;
        }

        if (collisionCount >= slots.Length)
        {
           The chain of entries forms a loop,which means a concurrent update has happened.
           InvalidOperationException(SR.InvalidOperation_ConcurrentOperationsNotSupported);
        }
        collisionCount++;
      }
    }
    
    {
       Object type: Shared Generic,EqualityComparer<TValue>.Default won't devirtualize
      // https://github.com/dotnet/coreclr/issues/17273
       So cache in a local rather than get EqualityComparer per loop iteration
      EqualityComparer<T> defaultComparer = EqualityComparer<T>.Default;

      if (slots[i].hashCode == hashCode && defaultComparer.Equals(slots[i].value,1)">;
      }
    }
  }
  
  {
    hashCode = value ==  : InternalGetHashCode(comparer.GetHashCode(value));
    bucket = hashCode % _buckets! slots[i].next)
    {
       comparer.Equals(slots[i].value,value))
      {
        ;
      }

       slots.Length)
      {
         InvalidOperationException(SR.InvalidOperation_ConcurrentOperationsNotSupported);
      }
      collisionCount++;
    }
  }

   index;
  if (_freeList >= )
  {
    index = _freeList;
    _freeList = slots[index].next;
  }
  
  {
    if (_lastIndex == slots.Length)
    {
      IncreaseCapacity();
       this will change during resize
      slots = _slots;
      bucket = hashCode % _buckets.Length;
    }
    index = _lastIndex;
    _lastIndex++;
  }
  slots[index].hashCode = hashCode;
  slots[index].value = value;
  slots[index].next = _buckets[bucket] - ;
  _buckets[bucket] = index + ;
  _count++;
  _version++;

  ;
}
const int Lower31BitMask = 0x7FFFFFFF;

获取内部的HASHCODE

int InternalGetHashCode(T item,1)"> comparer)
{
  if (item == return int hashCode = comparer?.GetHashCode(item) ?? item.GetHashCode();
  return hashCode & Lower31BitMask;
}

划重点-》

slots[index].hashCode = hashCode;
slots[index].value = value;
slots[index].next = _buckets[bucket] - 1;

最终列表中的值存储到结构中。

使用对象初始化HASHSET时,如果相同

 

HashSet<string> hashSet = ();
 hashSet.Add();
 hashSet.Add();
 HashSet<string>(hashSet);
void CopyFrom(HashSet<T> source)
{
  int count = source._count;
  if (count ==  As well as short-circuiting on the rest of the work done, this avoids errors from trying to access otherAsHashSet._buckets
     or otherAsHashSet._slots when they aren't initialized.
    ;
  }

  int capacity = source._buckets!.Length;
  int threshold = HashHelpers.ExpandPrime(count + );

  if (threshold >= capacity)
  {
    _buckets = ([])source._buckets.Clone();
    _slots = (Slot[])source._slots.Clone();

    _lastIndex = source._lastIndex;
    _freeList = source._freeList;
  }
  int lastIndex = source._lastIndex;
    Slot[] slots = source._slots;
    Initialize(count);
    int index = int i = 0; i < lastIndex; ++i)
    {
      int hashCode = slots[i].hashCode;
      if (hashCode >= )
      {
        AddValue(index,hashCode,slots[i].value);
        ++index;
      }
    }
    Debug.Assert(index == count);
    _lastIndex = index;
  }
  _count = count;
}
int ExpandPrime(int oldSize)返回要增长到的哈希表大小
{
  int newSize = 2 * oldSize;

   Allow the hashtables to grow to maximum possible size (~2G elements) before encountering capacity overflow.
   Note that this check works even when _items.Length overflowed thanks to the (uint) cast
  if ((uint)newSize > MaxPrimeArrayLength && MaxPrimeArrayLength > oldSize)
  {
    Debug.Assert(MaxPrimeArrayLength == GetPrime(MaxPrimeArrayLength),1)">Invalid MaxPrimeArrayLength MaxPrimeArrayLength;
  }

   GetPrime(newSize);
}

这里只贴出HashSet声明创建对象的两种方式。

下篇再研究具体实现〜

相关文章

引言 本文从Linux小白的视角, 在CentOS 7.x服务器上搭建一个...
引言: 多线程编程/异步编程非常复杂,有很多概念和工具需要...
一. 宏观概念 ASP.NET Core Middleware是在应用程序处理管道...
背景 在.Net和C#中运行异步代码相当简单,因为我们有时候需要...
HTTP基本认证 在HTTP中,HTTP基本认证(Basic Authenticatio...
1.Linq 执行多列排序 OrderBy的意义是按照指定顺序排序,连续...