面试必备--手撕HashMap源码
2020-06-07 04:21
183 查看
HashMap在jdk1.8里的存储结构是数组加 链表 或者 红黑树。具体需要看整个HashMap的长度是否达到最小树化容量阈值(MIN_TREEIFY_CAPACITY默认64)和链表的长度是否达到树化阈值(TREEIFY_THRESHOLD默认是8)。
文章中还会讲到hashmap如何扩容 和 为什么要这么扩容等硬核知识。
HashMap涉及到的树化操作我会在后面单独写一篇关于红黑树的文章来分析。
好的,那么下面我为大家分析几个常用的HashMap方法。
1、HashMap()
/** * Constructs an empty <tt>HashMap</tt> with the specified initial * capacity and load factor. * 初始化hashmap * * @param initialCapacity the initial capacity * @param loadFactor the load factor * @throws IllegalArgumentException if the initial capacity is negative * or the load factor is nonpositive */ public HashMap(int initialCapacity, float loadFactor) { if (initialCapacity < 0) throw new IllegalArgumentException("Illegal initial capacity: " + initialCapacity); //如果扩容大小超过了最大值,则直接赋值最大值 if (initialCapacity > MAXIMUM_CAPACITY) initialCapacity = MAXIMUM_CAPACITY; if (loadFactor <= 0 || Float.isNaN(loadFactor)) throw new IllegalArgumentException("Illegal load factor: " + loadFactor); //负载因子 this.loadFactor = loadFactor; //扩容阈值 this.threshold = tableSizeFor(initialCapacity); }
2、get()
/** * Returns the value to which the specified key is mapped, * or {@code null} if this map contains no mapping for the key. * * <p>More formally, if this map contains a mapping from a key * {@code k} to a value {@code v} such that {@code (key==null ? k==null : * key.equals(k))}, then this method returns {@code v}; otherwise * it returns {@code null}. (There can be at most one such mapping.) * * <p>A return value of {@code null} does not <i>necessarily</i> * indicate that the map contains no mapping for the key; it's also * possible that the map explicitly maps the key to {@code null}. * The {@link #containsKey containsKey} operation may be used to * distinguish these two cases. * * @see #put(Object, Object) */ public V get(Object key) { Node<K,V> e; return (e = getNode(hash(key), key)) == null ? null : e.value; } /** * Implements Map.get and related methods * * @param hash hash for key * @param key the key * @return the node, or null if none */ final Node<K,V> getNode(int hash, Object key) { //tab: 引用当前hashmap的散列表 //first: 桶位中的头元素 //e: 临时node元素 //n: table数组长度 Node<K,V>[] tab; Node<K,V> first, e; int n; K k; if ((tab = table) != null && (n = tab.length) > 0 && (first = tab[(n - 1) & hash]) != null) { //第一种情况:定位出来的桶位元素,即为需要get的数据 if (first.hash == hash && // always check first node ((k = first.key) == key || (key != null && key.equals(k)))) return first; //说明当前桶位不止一个元素,可能是 链表 或者 红黑树 if ((e = first.next) != null) { //第二种情况,桶位升级成为 红黑树 if (first instanceof TreeNode) return ((TreeNode<K,V>)first).getTreeNode(hash, key); //第三种情况,桶位升级成为 链表 do { if (e.hash == hash && ((k = e.key) == key || (key != null && key.equals(k)))) return e; } while ((e = e.next) != null); } } return null; }
3、put()
/** * Associates the specified value with the specified key in this map. * If the map previously contained a mapping for the key, the old * value is replaced. * * @param key key with which the specified value is to be associated * @param value value to be associated with the specified key * @return the previous value associated with <tt>key</tt>, or * <tt>null</tt> if there was no mapping for <tt>key</tt>. * (A <tt>null</tt> return can also indicate that the map * previously associated <tt>null</tt> with <tt>key</tt>.) */ public V put(K key, V value) { return putVal(hash(key), key, value, false, true); } /** * Implements Map.put and related methods * * @param hash hash for key * @param key the key * @param value the value to put * @param onlyIfAbsent if true, don't change existing value * @param evict if false, the table is in creation mode. * @return previous value, or null if none * * 路由算法:(n - 1) & hash */ final V putVal(int hash, K key, V value, boolean onlyIfAbsent, boolean evict) { //tab: 引用当前HashMap的散列表 //p: 表示当前散列表的元素 //n: 表示散列表数组的长度 //i: 表示路由寻址的结果(返回的下标) Node<K,V>[] tab; Node<K,V> p; int n, i; //延时初始化hashmap,第一次调用putVal方法时会初始化hashmap对象中最低耗费内存的散列表 if ((tab = table) == null || (n = tab.length) == 0) n = (tab = resize()).length; //最简单的一种情况:寻址到的桶位刚好是null,这个时候,直接将当前k-v => node放进散列表中 if ((p = tab[i = (n - 1) & hash]) == null) tab[i] = newNode(hash, key, value, null); else { //e: 如果不为null,表示找到了一个与当前要插入的 k-v 一致的key的元素 //k: 表示临时的一个key Node<K,V> e; K k; //表示桶位中的元素与当前需要插入的元素的key完全一致,即将进行替换操作 if (p.hash == hash && ((k = p.key) == key || (key != null && key.equals(k)))) e = p; //判断是否是树状,如果是则按红黑树的方式插入 else if (p instanceof TreeNode) e = ((TreeNode<K,V>)p).putTreeVal(this, tab, hash, key, value); //链表的情况,而且链表的头元素与需要插入的元素不一致 else { //链表方式插入需要逐个遍历key是否相等 // 是:替换原有的value // 否:在末尾插入 for (int binCount = 0; ; ++binCount) { //条件成立的话,说明已经遍历到末尾了,并且没有找到一个与需要插入的元素的key一致的表内元素 //说明需要在链表的末尾插入元素 if ((e = p.next) == null) { p.next = newNode(hash, key, value, null); //条件成立的话,说明当前链表的长度达到树化的标准,需要进行树化 if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st //树化操作 treeifyBin(tab, hash); break; } // 条件成立的话,说明找到了与表内key一致的元素,跳出循环准备进行替换 if (e.hash == hash && ((k = e.key) == key || (key != null && key.equals(k)))) break; //把next元素赋值给p,一直到末尾 p = e; } } //两种情况下会执行这段代码 //1.桶位中的元素与需要插入的元素的key一致 //2.链表中的元素与需要插入的元素的key一致 //此时e中的值为已经存在于hashmap中的值,即准备要被替换的元素 if (e != null) { // existing mapping for key V oldValue = e.value; if (!onlyIfAbsent || oldValue == null) e.value = value; //将需要插入的元素插入到表尾 afterNodeAccess(e); return oldValue; } } //modCount 表示散列表结构被修改的次数,替换node元素的value不计数 ++modCount; //插入新元素,size自增,如果自增后的值大于扩容阈值,需要触发扩容 if (++size > threshold) resize(); afterNodeInsertion(evict); return null; }
4、remove()
/** * Removes the mapping for the specified key from this map if present. * * @param key key whose mapping is to be removed from the map * @return the previous value associated with <tt>key</tt>, or * <tt>null</tt> if there was no mapping for <tt>key</tt>. * (A <tt>null</tt> return can also indicate that the map * previously associated <tt>null</tt> with <tt>key</tt>.) */ public V remove(Object key) { Node<K,V> e; return (e = removeNode(hash(key), key, null, false, true)) == null ? null : e.value; } /** * Implements Map.remove and related methods * * @param hash hash for key * @param key the key * @param value the value to match if matchValue, else ignored * @param matchValue if true only remove if value is equal 如果为ture,则需要匹配key和value都相等才删除;如果为false,则只需要匹配key相等 * @param movable if false do not move other nodes while removing * @return the node, or null if none */ final Node<K,V> removeNode(int hash, Object key, Object value,boolean matchValue, boolean movable) { //tab: 引用当前hashmap的散列表 //p: 当前node元素 //n: table数组的长度 //index: 计算出的桶位位置 Node<K,V>[] tab; Node<K,V> p; int n, index; if ((tab = table) != null && (n = tab.length) > 0 && (p = tab[index = (n - 1) & hash]) != null) { //说明路由的桶位是有数据的,需要进行查找操作,并且删除 //node: 查找到的元素 //e: 当前node的next元素 Node<K,V> node = null, e; K k; V v; //第一种情况:当前桶位中的头元素即为需要删除的元素 if (p.hash == hash && ((k = p.key) == key || (key != null && key.equals(k)))) node = p; else if ((e = p.next) != null) { //第二种情况:当前桶位中的元素升级为红黑树 if (p instanceof TreeNode) node = ((TreeNode<K,V>)p).getTreeNode(hash, key); //第三种情况:当前桶位中的元素升级为链表 else { do { if (e.hash == hash && ((k = e.key) == key || (key != null && key.equals(k)))) { node = e; break; } p = e; } while ((e = e.next) != null); } } //如果node有值,则进入删除逻辑 //||操作 若运算符左边为true,则不再对运算符右侧进行运算 //matchValue == false,直接返回true,不需要执行后面的判断 //matchValue == true,执行 (v = node.value) == value 的判断是否true,如果为false则继续下一个操作的判断,为true返回,以此类推。 if (node != null && (!matchValue || (v = node.value) == value || (value != null && value.equals(v)))) { //第一种情况:node是树节点,说明需要进行树节点移除操作 if (node instanceof TreeNode) ((TreeNode<K,V>)node).removeTreeNode(this, tab, movable); //第二种情况:桶位元素即为需要删除的元素,则将该元素的下一位元素放至桶位中 else if (node == p) tab[index] = node.next; //第三种情况:将当前元素p的下一个元素 设置成 要删除元素的下一个元素 else p.next = node.next; ++modCount; --size; afterNodeRemoval(node); return node; } } return null; }
5、resize()
/** * Initializes or doubles table size. If null, allocates in * accord with initial capacity target held in field threshold. * Otherwise, because we are using power-of-two expansion, the * elements from each bin must either stay at same index, or move * with a power of two offset in the new table. * * 为什么需要扩容? * 为了解决哈希冲突导致的链化影响查询效率的问题,扩容会缓解该问题 * * @return the table */ final Node<K,V>[] resize() { //oldTab: 引用扩容前的哈希表 Node<K,V>[] oldTab = table; //oldCap: 引用扩容前哈希表数组长度 int oldCap = (oldTab == null) ? 0 : oldTab.length; //oldThr: 引用扩容前哈希表的扩容阈值 int oldThr = threshold; //newCap: 扩容后哈希表数组的长度 //newThr: 扩容后哈希表触发下次扩容的阈值 int newCap, newThr = 0; //条件如果成立说明 hashmap中的散列表已经初始化过了,这是一次正常的扩容 if (oldCap > 0) { //扩容之前的哈希表大小已经达到最大阈值后,则不扩容,且设置扩容条件为int的最大值 if (oldCap >= MAXIMUM_CAPACITY) { threshold = Integer.MAX_VALUE; return oldTab; } //oldCap左移一位实现数值翻倍,并赋值给newCap,newCap大小<数组最大值限制 且 扩容前的阈值 >= 16(默认阈值) //这种情况下,下一次扩容的阈值 等于当前阈值翻倍 else if ((newCap = oldCap << 1) < MAXIMUM_CAPACITY && oldCap >= DEFAULT_INITIAL_CAPACITY) newThr = oldThr << 1; // double threshold } //oldCap == 0,说明hashmap中的散列表是null //1.new HashMap(initCap,loadFactor) //2.new HashMap(initCap) //3.new HashMap(map),并且这个map有数据 else if (oldThr > 0) // initial capacity was placed in threshold newCap = oldThr; //oldCap == 0,oldThr == 0 //new HashMap() else { // zero initial threshold signifies using defaults newCap = DEFAULT_INITIAL_CAPACITY; newThr = (int)(DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY); } //上面步骤没有为newCap赋值的情况都会执行这段代码 //通过newCap和loadFactor计算出newThr if (newThr == 0) { float ft = (float)newCap * loadFactor; newThr = (newCap < MAXIMUM_CAPACITY && ft < (float)MAXIMUM_CAPACITY ? (int)ft : Integer.MAX_VALUE); } threshold = newThr; //创建出一个更大的数组 @SuppressWarnings({"rawtypes","unchecked"}) Node<K,V>[] newTab = (Node<K,V>[])new Node[newCap]; table = newTab; if (oldTab != null) { for (int j = 0; j < oldCap; ++j) { //当前node节点 Node<K,V> e; if ((e = oldTab[j]) != null) { //方便jvm GC时回收内存 oldTab[j] = null; //第一种情况:当前桶位只有一个元素,从未发生过碰撞 //计算出此元素在新哈希表中的位置,并放入 if (e.next == null) newTab[e.hash & (newCap - 1)] = e; //第二种情况:桶位已经树化 else if (e instanceof TreeNode) ((TreeNode<K,V>)e).split(this, newTab, j, oldCap); //第三种情况:桶位已经形成链表 else { // preserve order //低位链表:存放在扩容之后的数组的下标位置,与当前数组的下标位置一致 Node<K,V> loHead = null, loTail = null; //高位链表:存放在扩容之后的数组的下标位置为 当前数组下标位置 + 扩容前数组的长度 Node<K,V> hiHead = null, hiTail = null; //表示当前链表的下一个元素 Node<K,V> next; do { next = e.next; //hash -> .... 1 1111 //hash -> .... 0 1111 //oldCap -> 0b 1 0000 //如果e.hash & oldCap == 0,则放入低位链表中 if ((e.hash & oldCap) == 0) { if (loTail == null) //第一次进入,将此节点设置为头结点 loHead = e; else //将节点插入到链表末尾 loTail.next = e; //将指针指向最后一个元素 loTail = e; } //如果e.hash & oldCap != 0,则放入高位链表中 else { if (hiTail == null) //第一次进入,将此节点设置为头结点 hiHead = e; else //将节点插入到链表末尾 hiTail.next = e; //将指针指向最后一个元素 hiTail = e; } } while ((e = next) != null); if (loTail != null) { //此时loTail是指向低位链表的最后一个元素,next可能会指向旧关系的节点,因此需要置为null loTail.next = null; //将低位链表的头指针赋值给新哈希表对应桶位上 newTab[j] = loHead; } if (hiTail != null) { //此时hiTail是指向低位链表的最后一个元素,next可能会指向旧关系的节点,因此需要置为null hiTail.next = null; //将高位链表的头指针赋值给新哈希表对应桶位上 newTab[j + oldCap] = hiHead; } } } } } return newTab; }
6、hash()
/** * Computes key.hashCode() and spreads (XORs) higher bits of hash * to lower. Because the table uses power-of-two masking, sets of * hashes that vary only in bits above the current mask will * always collide. (Among known examples are sets of Float keys * holding consecutive whole numbers in small tables.) So we * apply a transform that spreads the impact of higher bits * downward. There is a tradeoff between speed, utility, and * quality of bit-spreading. Because many common sets of hashes * are already reasonably distributed (so don't benefit from * spreading), and because we use trees to handle large sets of * collisions in bins, we just XOR some shifted bits in the * cheapest possible way to reduce systematic lossage, as well as * to incorporate impact of the highest bits that would otherwise * never be used in index calculations because of table bounds. * * 1.7 是用key的hash值与低位进行 & 运算,这样会使得到的下标不够散列 * 1.8 是用key的hash值与高位进行 & 运算,这样会让得到的下标更加散列 * * 由于1.7 hash 和(length-1)运算,length 绝大多数情况小于2的16次方。 * 所以始终是hashcode 的低16位(甚至更低)参与运算。 * 要是高16位也参与运算,会让得到的下标更加散列。 * 因此1.8先通过 h >>> 16 获取key的高位 然后再与key的hash值进行 ^ 运算 * 用 ^ 运算是因为 &和|都会使得结果偏向0或者1 ,并不是均匀的概念,所以用 ^ */ static final int hash(Object key) { int h; //如果传入的key为null则会默认返回0,也就是桶位的第一位 return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16); }
相关文章推荐
- 面试必备HashMap源码分析
- 面试必备:HashMap源码解析(JDK8)
- 面试必备1:HashMap(JDK1.8)原理以及源码分析
- 面试必备:HashMap源码解析(JDK8)
- 面试必备:HashMap、Hashtable、ConcurrentHashMap的原理与区别
- 面试必备:HashMap、Hashtable、ConcurrentHashMap的原理与区别
- 面试必备:LinkedList源码解析(JDK8)
- BAT面试必问HashMap源码分析
- BAT面试必问HashMap源码分析
- HashMap源码分析 —— 一篇文章搞定HashMap面试
- (java面试) 11. HashMap和ConcurrentHashMap的区别,HashMap的底层源码
- BAT面试必问HashMap源码分析
- P7面试必备--springAOP底层源码深度解析
- 面试加分项-HashMap源码中这些常量的设计目的
- BAT面试必问HashMap源码分析
- 面试必备:HashMap、Hashtable、ConcurrentHashMap的原理与区别
- Java BAT大型公司面试必考技能视频教程之HashMap源码分析与实现
- HashMap 和 HashTable 源码学习和面试总结
- 面试专辑——HashMap源码剖析
- 面试必备:HashMap、Hashtable、ConcurrentHashMap的原理与区别