您的位置：首页 > 理论基础 > 计算机网络
网络子系统71_路由缓存垃圾回收

2013-10-20 14:37 323 查看
//	同步回收机制
//	返回值：
//		0，达到回收目标
//		1，没有达到回收目标

//	注：ip_rt_gc_min_interval = HZ/2
//		ip_rt_gc_timeout = RT_GC_TIMEOUT = 300HZ
//		ip_rt_gc_elasticity = 8*(rt_hash_mask+1)(正常情况下)
//		ip_rt_gc_elasticity = 1(当绑定缓存到邻居子系统失败时)

//	平衡点，gc目标数的动态调整：
//		1.当前缓存个数 < 安全阈值(安全范围，试图gc少缓存)
//			1.1 当前缓存个数 > 上次gc的平衡点(安全范围内允许缓存增多)
//				1.1.1 新平衡点+=min((当前缓存个数-上次平衡点)/2 , buckets)
//				1.2.1 gc目标数=当前缓存个数-新平衡点
//					1.2.1.1 gc目标数 < 0
//						1.2.1.1.1 新平衡点+=gc目标数

//		2. 当前缓存个数 > 安全阈值(危险范围，试图gc多缓存)
//			2.1 gc目标数 = max((当前缓存个数-安全阈值)/2, buckets)(超过安全阈值的缓存一半将被gc)
//			2.2 新平衡点=当前缓存个数-gc目标数
//		3. gc目标数>0时，进行垃圾回收
//	注，安全阈值，每个bucket的冲突链中有8个bucket

//	回收过程：
//		1.令，遍历所有bucket = 一个回收单位
//		2.如果一个回收单位释放了gc目标数，调整expire
//			2.1 expire+=HZ/2，放宽到期时限
//		3.如果一个回收单位没有释放gc目标数
//			3.1 如果当前缓存个数 < 缓存允许的最小值，退出，下一次使用当前的expire作为时限
//			3.2 否则，只有在用户态，并且耗时<1 jiffes时，继续回收。

//	回收时限确定：
//		1.expire, 回收单位的起始到期时限，每个bucket使用该时限作为起始时限
//			当前回收单位没有完成gc目标数，下一个回收单位折半expire。
//		5.tmo，每个冲突链起始设置tmo=expire，当冲突链的一个节点不能被删除时，
//			tmo折半。

1.1 static int rt_garbage_collect(void)
{
//静态变量
static unsigned long expire = RT_GC_TIMEOUT;
static unsigned long last_gc;
static int rover;
static int equilibrium;
struct rtable *rth, **rthp;
unsigned long now = jiffies;
int goal;

//回收速率限制
//	在ip_rt_gc_min_interval时间范围内，如果缓存数量没有超过最大值，则放弃回收
if (now - last_gc < ip_rt_gc_min_interval &&
atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) {
RT_CACHE_STAT_INC(gc_ignored);
goto out;
}

//ipv4_dst_ops.entries保存当前缓存的个数
goal = atomic_read(&ipv4_dst_ops.entries) -
(ip_rt_gc_elasticity << rt_hash_log);//平均长度下，所有bucket的缓存个数

//当前缓存的个数没有达到平均缓存个数
if (goal <= 0) {
if (equilibrium < ipv4_dst_ops.gc_thresh)
equilibrium = ipv4_dst_ops.gc_thresh;
//当前缓存的个数相对于上次平衡点的增长个数
goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium;
//缓存个数增加
if (goal > 0) {
//在平均水平下，平衡点随着缓存个数的增加而增加
equilibrium += min_t(unsigned int, goal / 2, rt_hash_mask + 1);
//新平衡点为剩余缓存个数
goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium;
}
//当前缓存个数超过平均缓存个数
} else {
//至少回收 number of buckets
goal = max_t(unsigned int, goal / 2, rt_hash_mask + 1);
//新平衡点为剩余缓存个数
equilibrium = atomic_read(&ipv4_dst_ops.entries) - goal;
}

if (now - last_gc >= ip_rt_gc_min_interval)
last_gc = now;
//当前缓存总数小于平均值时，并且小于上次的平衡点
//	说明缓存个数开始递减，则降低平衡点，不进行回收
if (goal <= 0) {
equilibrium += goal;
goto work_done;
}

do {
int i, k;
//rover记录上次遍历的bucket
for (i = rt_hash_mask, k = rover; i >= 0; i--) {
//tmo，bucket冲突链表内部使用的起始到期时限
//expire, bucket之间使用的起始到期时限
unsigned long tmo = expire;

k = (k + 1) & rt_hash_mask;
//这次应该遍历的bucket
rthp = &rt_hash_table[k].chain;
spin_lock_bh(&rt_hash_table[k].lock);
while ((rth = *rthp) != NULL) {
//检查缓存到期
if (!rt_may_expire(rth, tmo, expire)) {
//到期时间折半，使回收更加严格
tmo >>= 1;
rthp = &rth->u.rt_next;
continue;
}
*rthp = rth->u.rt_next;
rt_free(rth);
goal--;
}
spin_unlock_bh(&rt_hash_table[k].lock);
if (goal <= 0)
break;
}
rover = k;

if (goal <= 0)
goto work_done;
//到期时间已经足够严格，依然没有达到目标，放弃回收
if (expire == 0)
break;
//下一轮遍历的到期时间折半，使到期时间更加严格
expire >>= 1;

if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size)
goto out;
} while (!in_softirq() && time_before_eq(jiffies, now));

if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size)
goto out;
return 1;

work_done:
expire += ip_rt_gc_min_interval;
if (expire > ip_rt_gc_timeout ||
atomic_read(&ipv4_dst_ops.entries) < ipv4_dst_ops.gc_thresh)
expire = ip_rt_gc_timeout;

out:	return 0;
}

//	异步垃圾回收
//		垃圾回收定时器函数

//	函数主要任务：
//		1.计算此次gc处理的bucket数
//		2.从上次遍历到的下一个bucket开始
//			2.1 如果dst到期时间为0，可以被释放，从冲突链取下节点，释放
//			2.2 该dst没有到期，调整gc时限更加严格
//		3.重新激活gc定时器

//	注：ip_rt_gc_interval=60HZ

//	注：ip_rt_gc_timeout，ip_rt_gc_interval的关系
//		1.一个bucket至少在ip_rt_gc_timeout间隔gc一次
//		2.两次异步gc的间隔为ip_rt_gc_interval(即定时器的到期间隔)
//		3.此次gc处理的bucket个数=(ip_rt_gc_interval*(rt_hash_mask+1))/ip_rt_gc_timeout

//	使路由缓存到期的事件：
//		1.主机接收到 ICMP UNREACHABLE 或 ICMP FRAGMENTATION NEEDED，设置为一段时间后(10min)过期。
//		2.邻居项无法解析l3到l2的映射，目的ip不可达时，设置为立即过期。

//	dst_entry->expires
//		默认情况下，路由缓存表项永远不会过期，因此dst_entry->expires设置为0，当发生使路由缓存过期的事件时
//		，更新dst_entry->expires为缓存到期的时间戳。

2.1 static void rt_check_expire(unsigned long dummy)
{
static int rover;
int i = rover, t;
struct rtable *rth, **rthp;
unsigned long now = jiffies;
//计算可gc的bucket个数
for (t = ip_rt_gc_interval << rt_hash_log; t >= 0;
//gc定时器到期时间ip_rt_gc_timeout
t -= ip_rt_gc_timeout) {
unsigned long tmo = ip_rt_gc_timeout;
//rover记录上次gc的bucket
i = (i + 1) & rt_hash_mask;
rthp = &rt_hash_table[i].chain;

spin_lock(&rt_hash_table[i].lock);
while ((rth = *rthp) != NULL) {
//dst->expires该dst将过期的时间戳
if (rth->u.dst.expires) {
//表示指定的过期还没到
if (time_before_eq(now, rth->u.dst.expires)) {
//过期时限折半
tmo >>= 1;
rthp = &rth->u.rt_next;
continue;
}
//该dst还不能被释放
} else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) {
tmo >>= 1;
rthp = &rth->u.rt_next;
continue;
}

//从bucket的冲突链取下，释放
*rthp = rth->u.rt_next;
rt_free(rth);
}
spin_unlock(&rt_hash_table[i].lock);
//一步回收不超过1 jiffies
if (time_after(jiffies, now))
break;
}
rover = i;
//下次到期的时间
mod_timer(&rt_periodic_timer, now + ip_rt_gc_interval);
}

//	判断缓存是否可以释放
//	参数：
//		tmo1，可快速清理的缓存使用的到期时限
//		tmo2，高代价创建的缓存使用的到期时限
//		tmo2 > tmo1

//	缓存释放的条件：
//		1.引用计数=0
//		2.如果，缓存指定了到期时间，并且达到了到期时间，可清理
//		3.否则，可以快速清理，并且清理代价较低

//	可以快速清理的缓存项：
//		1.广播多播地址
//		2.入口路由
//		3.冲突链表中，其后有缓存项

//	清理代价较高的缓存项：
//		1.重定向的的缓存项
3.1 static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2)
{
unsigned long age;
int ret = 0;

if (atomic_read(&rth->u.dst.__refcnt))
goto out;

ret = 1;
if (rth->u.dst.expires &&
time_after_eq(jiffies, rth->u.dst.expires))
goto out;

age = jiffies - rth->u.dst.lastuse;
ret = 0;
if ((age <= tmo1 && !rt_fast_clean(rth)) ||
(age <= tmo2 && rt_valuable(rth)))
goto out;
ret = 1;
out:	return ret;
}
内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理
标签： Linux 路由缓存 kernel ip 网络
相关文章推荐
新的分享
章节导航