nr_dirtied_pause:当前task的脏页门限;
dirty_exceeded:全局的脏页数超过门限或者该bdi的脏页数超过门限;(dirty_exceeded = (bdi_dirty > bdi_thresh) &&
((nr_dirty > dirty_thresh) || strictlimit); )

bdp_ratelimits:percpu变量,当前CPU的脏页数
ratelimit_pages:CPU的脏页门限

调用balance_dirty_pages的条件有:
1:当前task的脏页数量大于ratelimit ,(如果dirty_exceeded为0,则为current->nr_dirtied_pause;如果dirty_exceeded为1,则最大为32KB)
2:当前CPU的脏页数超过了门限值ratelimit_pages;
3:当前脏页数+退出线程遗留的脏页超过了门限;

void balance_dirty_pages_ratelimited(struct address_space *mapping)
{struct backing_dev_info *bdi = inode_to_bdi(mapping->host);int ratelimit;int *p;if (!bdi_cap_account_dirty(bdi))return;ratelimit = current->nr_dirtied_pause;  /* 门限:初始值为32表示128KB */if (bdi->dirty_exceeded)                /* 如果该值设置了,则需要通过降低平衡触发的门限来加速脏页回收 */ratelimit = min(ratelimit, 32 >> (PAGE_SHIFT - 10));  /* 重新修改门限,最大为32KB,初始值128KB,加快回收 */preempt_disable();/** This prevents one CPU to accumulate too many dirtied pages without* calling into balance_dirty_pages(), which can happen when there are* 1000+ tasks, all of them start dirtying pages at exactly the same* time, hence all honoured too large initial task->nr_dirtied_pause.*//* 即保证当前线程脏页数超过门限,或者当前CPU超过门限,都要回收 */p =  this_cpu_ptr(&bdp_ratelimits);  /* 当前CPU的脏页计数 */if (unlikely(current->nr_dirtied >= ratelimit))  /* 如果当前线程脏页数超过门限值,则肯定会触发下面的回收流程。同时重新计算当前CPU的脏页数 */*p = 0;else if (unlikely(*p >= ratelimit_pages)) {     /* 默认值为32页 */ /* 当前线程的脏页数未超过门限值,但是当前CPU的脏页数超过CPU脏页门限值,则设置门限为0,肯定会触发回收。同时重新计算当前CPU的脏页数 */*p = 0;ratelimit = 0;}/** Pick up the dirtied pages by the exited tasks. This avoids lots of* short-lived tasks (eg. gcc invocations in a kernel build) escaping* the dirty throttling and livelock other long-run dirtiers.*/p = this_cpu_ptr(&dirty_throttle_leaks);   /* 退出的线程,也放在这里处理 */if (*p > 0 && current->nr_dirtied < ratelimit) {  unsigned long nr_pages_dirtied;nr_pages_dirtied = min(*p, ratelimit - current->nr_dirtied);*p -= nr_pages_dirtied;current->nr_dirtied += nr_pages_dirtied;}preempt_enable();if (unlikely(current->nr_dirtied >= ratelimit))    /* 当前线程脏页超过门限值 */balance_dirty_pages(mapping, current->nr_dirtied);
}
EXPORT_SYMBOL(balance_dirty_pages_ratelimited);

正常情况下应该是周期回收和背景回收,不会占用当前task的时间。但是当dirty > dirty_freerun_ceiling(thresh, bg_thresh) 即脏页数大于直接回收门限和背景回收门限的1/2时,需要将当前CPU休眠一会,让回收线程工作。

但是dirty <= dirty_freerun_ceiling(thresh, bg_thresh),也会动态的调整nr_dirtied_pause ,号让其更好的回收,调整的策略为:

static unsigned long dirty_poll_interval(unsigned long dirty,unsigned long thresh)
{/*  */if (thresh > dirty)  /*  */return 1UL << (ilog2(thresh - dirty) >> 1);return 1;  /* 脏页数超过门限值,则返回1页就需要回收 */
}

至于为什么这么做,可以参考如下解析:
/*
Ideally if we know there are N dirtiers, it’s safe to let each task
poll at (thresh-dirty)/N without exceeding the dirty limit.

However we neither know the current N, nor is sure whether it will
rush high at next second. So sqrt is used to tolerate larger N on
increased (thresh-dirty) gap:

irb> 0.upto(10) { |i| mb=2**i; pages=mb<<(20-12); printf “%4d\t%4d\n”, mb, Math.sqrt(pages)}
1 16
2 22
4 32
8 45
16 64
32 90
64 128
128 181
256 256
512 362
1024 512

The above table means, given 1MB (or 1GB) gap and the dd tasks polling
balance_dirty_pages() on every 16 (or 512) pages, the dirty limit
won’t be exceeded as long as there are less than 16 (or 512) concurrent
dd’s.

Note that dirty_poll_interval() will mainly be used when (dirty < freerun).
When the dirty pages are floating in range [freerun, limit],
“[PATCH 14/18] writeback: control dirty pause time” will independently
adjust tsk->nr_dirtied_pause to get suitable pause time.

So the sqrt naturally leads to less overheads and more N tolerance for
large memory servers, which have large (thresh-freerun) gaps.

*/

void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
{/* 可用内存并不是系统所有内存,而是free pages + reclaimable pages(文件页) */const unsigned long available_memory = global_dirtyable_memory();unsigned long background;unsigned long dirty;struct task_struct *tsk;if (vm_dirty_bytes)dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE);elsedirty = (vm_dirty_ratio * available_memory) / 100;if (dirty_background_bytes)background = DIV_ROUND_UP(dirty_background_bytes, PAGE_SIZE);elsebackground = (dirty_background_ratio * available_memory) / 100;if (background >= dirty)background = dirty / 2;tsk = current;if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {   /* 如果设置了该属性PF_LESS_THROTTLE或者是实时线程,门限稍微提高1/4 */background += background / 4;dirty += dirty / 4;}*pbackground = background;*pdirty = dirty;trace_global_dirty_state(background, dirty);
}static unsigned long global_dirtyable_memory(void)
{unsigned long x;/* 可用内存并不是系统所有内存,而是free pages + file pages(文件页) */x = global_page_state(NR_FREE_PAGES);x -= min(x, dirty_balance_reserve);x += global_page_state(NR_INACTIVE_FILE);x += global_page_state(NR_ACTIVE_FILE);if (!vm_highmem_is_dirtyable)x -= highmem_dirtyable_memory(x);return x + 1;  /* Ensure that we never return 0 */
}

1:如果可回收+正在回写脏页数量 < background和显式回写阈值的均值此次先不启动回写,否则启动background回写
2:如果可回收的脏页数大于背景回收门限值,则触发背景回收执行;

static void balance_dirty_pages(struct address_space *mapping,unsigned long pages_dirtied)
{unsigned long nr_reclaimable;  /* = file_dirty + unstable_nfs */unsigned long nr_dirty;  /* = file_dirty + writeback + unstable_nfs */unsigned long background_thresh;unsigned long dirty_thresh;long period;long pause;long max_pause;long min_pause;int nr_dirtied_pause;bool dirty_exceeded = false;unsigned long task_ratelimit;unsigned long dirty_ratelimit;unsigned long pos_ratio;struct backing_dev_info *bdi = inode_to_bdi(mapping->host);bool strictlimit = bdi->capabilities & BDI_CAP_STRICTLIMIT; //单独门限值回收unsigned long start_time = jiffies;for (;;) {unsigned long now = jiffies;unsigned long uninitialized_var(bdi_thresh);unsigned long thresh;unsigned long uninitialized_var(bdi_dirty);unsigned long dirty;unsigned long bg_thresh;/** Unstable writes are a feature of certain networked* filesystems (i.e. NFS) in which data may have been* written to the server's write cache, but has not yet* been flushed to permanent storage.*/nr_reclaimable = global_page_state(NR_FILE_DIRTY) +global_page_state(NR_UNSTABLE_NFS);  /* 全局 文件脏页  + 网络文件系统 */  /* = file_dirty + unstable_nfs */nr_dirty = nr_reclaimable + global_page_state(NR_WRITEBACK); /*全局 文件总的脏页+包括正在回写 */  /* = file_dirty + writeback + unstable_nfs */global_dirty_limits(&background_thresh, &dirty_thresh);//获取两个门限值if (unlikely(strictlimit)) {  /* 单独bdi回收 */bdi_dirty_limits(bdi, dirty_thresh, background_thresh,&bdi_dirty, &bdi_thresh, &bg_thresh);dirty = bdi_dirty;thresh = bdi_thresh;} else {                       /* 全局回收 */dirty = nr_dirty;          /* 全局 文件总的脏页+包括正在回写 */thresh = dirty_thresh;bg_thresh = background_thresh;}/** Throttle it only when the background writeback cannot* catch-up. This avoids (excessively) small writeouts* when the bdi limits are ramping up in case of !strictlimit.** In strictlimit case make decision based on the bdi counters* and limits. Small writeouts when the bdi limits are ramping* up are the price we consciously pay for strictlimit-ing.*//* 小于直接回收文件和背景回收的/2, 不占用本线程时间;否则说明背景回收没有运行,需要占用本线程时间,  */if (dirty <= dirty_freerun_ceiling(thresh, bg_thresh)) {  //(thresh + bg_thresh) / 2; 不回收current->dirty_paused_when = now;current->nr_dirtied = 0;                 /* 脏页数量重新置0 */current->nr_dirtied_pause =dirty_poll_interval(dirty, thresh);   /* 重新设置线程脏页门限 */break;}if (unlikely(!writeback_in_progress(bdi)))  /* 唤醒真正的回写线程 */bdi_start_background_writeback(bdi);if (!strictlimit)bdi_dirty_limits(bdi, dirty_thresh, background_thresh,&bdi_dirty, &bdi_thresh, NULL);//nr_dirty > dirty_thresh/** 如果是单个bdi独自回收,当前bdi的 脏页超过门限即回收;* 如果是整个系统回收,当前bdi超过门限且系统的脏页也要超超过门限;*/dirty_exceeded = (bdi_dirty > bdi_thresh) &&((nr_dirty > dirty_thresh) || strictlimit); //超过门限if (dirty_exceeded && !bdi->dirty_exceeded)bdi->dirty_exceeded = 1;                        //超过门限,后面需要加速回收bdi_update_bandwidth(bdi, dirty_thresh, background_thresh,nr_dirty, bdi_thresh, bdi_dirty,start_time);dirty_ratelimit = bdi->dirty_ratelimit;pos_ratio = bdi_position_ratio(bdi, dirty_thresh,background_thresh, nr_dirty,bdi_thresh, bdi_dirty);task_ratelimit = ((u64)dirty_ratelimit * pos_ratio) >>RATELIMIT_CALC_SHIFT;max_pause = bdi_max_pause(bdi, bdi_dirty);min_pause = bdi_min_pause(bdi, max_pause,task_ratelimit, dirty_ratelimit,&nr_dirtied_pause);if (unlikely(task_ratelimit == 0)) {period = max_pause;pause = max_pause;goto pause;}period = HZ * pages_dirtied / task_ratelimit;pause = period;if (current->dirty_paused_when)pause -= now - current->dirty_paused_when;/** For less than 1s think time (ext3/4 may block the dirtier* for up to 800ms from time to time on 1-HDD; so does xfs,* however at much less frequency), try to compensate it in* future periods by updating the virtual time; otherwise just* do a reset, as it may be a light dirtier.*/if (pause < min_pause) {trace_balance_dirty_pages(bdi,dirty_thresh,background_thresh,nr_dirty,bdi_thresh,bdi_dirty,dirty_ratelimit,task_ratelimit,pages_dirtied,period,min(pause, 0L),start_time);if (pause < -HZ) {current->dirty_paused_when = now;current->nr_dirtied = 0;} else if (period) {current->dirty_paused_when += period;current->nr_dirtied = 0;} else if (current->nr_dirtied_pause <= pages_dirtied)current->nr_dirtied_pause += pages_dirtied;break;}if (unlikely(pause > max_pause)) {/* for occasional dropped task_ratelimit */now += min(pause - max_pause, max_pause);pause = max_pause;}pause:trace_balance_dirty_pages(bdi,dirty_thresh,background_thresh,nr_dirty,bdi_thresh,bdi_dirty,dirty_ratelimit,task_ratelimit,pages_dirtied,period,pause,start_time);__set_current_state(TASK_KILLABLE);io_schedule_timeout(pause);//有可能会切出去,但最大超过200mscurrent->dirty_paused_when = now + pause;current->nr_dirtied = 0;current->nr_dirtied_pause = nr_dirtied_pause;/** This is typically equal to (nr_dirty < dirty_thresh) and can* also keep "1000+ dd on a slow USB stick" under control.*/if (task_ratelimit)break;/** In the case of an unresponding NFS server and the NFS dirty* pages exceeds dirty_thresh, give the other good bdi's a pipe* to go through, so that tasks on them still remain responsive.** In theory 1 page is enough to keep the comsumer-producer* pipe going: the flusher cleans 1 page => the task dirties 1* more page. However bdi_dirty has accounting errors.  So use* the larger and more IO friendly bdi_stat_error.*/if (bdi_dirty <= bdi_stat_error(bdi))break;if (fatal_signal_pending(current))break;}if (!dirty_exceeded && bdi->dirty_exceeded)  //如果不超过门限,则置0bdi->dirty_exceeded = 0;if (writeback_in_progress(bdi))  //正在回收,则退出return;/** In laptop mode, we wait until hitting the higher threshold before* starting background writeout, and then write out all the way down* to the lower threshold.  So slow writers cause minimal disk activity.** In normal mode, we start background writeout at the lower* background_thresh, to keep the amount of dirty memory low.*//** 节能模式,起到什么作用呢??*/if (laptop_mode)return;if (nr_reclaimable > background_thresh) //可回收的页面大于background_thresh,则触发线程异步回收bdi_start_background_writeback(bdi);
}

balance_dirty_pages_ratelimited分析相关推荐

  1. git 修复中间版本_如何修复git中的错误并且不留痕迹

    git 修复中间版本 You finally found it: a bug in an old commit! And luckily, you already have a solution in ...

  2. 内存微粒_减少室内微粒空气污染的策略,第2部分中的第1部分

    内存微粒 Residents of the West Coast have been experiencing a continuous onslaught of air pollution unli ...

  3. css居中 垂直居中_CSS垂直居中

    css居中 垂直居中 Front-end developing is beautiful, and it's getting prettier by the day. Nowadays we got ...

  4. F2FS源码分析-2.2 [F2FS 读写部分] F2FS的一般文件写流程分析

    F2FS源码分析系列文章 主目录 一.文件系统布局以及元数据结构 二.文件数据的存储以及读写 F2FS文件数据组织方式 一般文件写流程 一般文件读流程 目录文件读流程(未完成) 目录文件写流程(未完成 ...

  5. 【Golang源码分析】Go Web常用程序包gorilla/mux的使用与源码简析

    目录[阅读时间:约10分钟] 一.概述 二.对比: gorilla/mux与net/http DefaultServeMux 三.简单使用 四.源码简析 1.NewRouter函数 2.HandleF ...

  6. 2022-2028年中国自动驾驶系统行业现状调研分析报告

    [报告类型]产业研究 [报告价格]4500起 [出版时间]即时更新(交付时间约3个工作日) [发布机构]智研瞻产业研究院 [报告格式]PDF版 本报告介绍了中国自动驾驶系统行业市场行业相关概述.中国自 ...

  7. 2022-2028年中国阻尼涂料市场研究及前瞻分析报告

    [报告类型]产业研究 [报告价格]4500起 [出版时间]即时更新(交付时间约3个工作日) [发布机构]智研瞻产业研究院 [报告格式]PDF版 本报告介绍了中国阻尼涂料行业市场行业相关概述.中国阻尼涂 ...

  8. 2021-2028年中国阻燃装饰行业市场需求与投资规划分析报告

    [报告类型]产业研究 [报告价格]4500起 [出版时间]即时更新(交付时间约3个工作日) [发布机构]智研瞻产业研究院 [报告格式]PDF版 本报告介绍了中国阻燃装饰行业市场行业相关概述.中国阻燃装 ...

  9. 2022-2028年全球与中国漂白吸水棉市场研究及前瞻分析报告

    [报告类型]产业研究 [报告价格]4500起 [出版时间]即时更新(交付时间约3个工作日) [发布机构]智研瞻产业研究院 [报告格式]PDF版 本报告介绍了全球与中国漂白吸水棉行业市场行业相关概述.全 ...

最新文章

  1. java checked异常有那些,java checked exceptions
  2. linux 企业数据,Linux企业数据标准出炉 IBM惠普等数巨头加盟
  3. 计算机应用基础在线作业南开,2017南开计算机应用基础在线作业满分的答案.doc...
  4. 银行利率涨了,定期存款有必要取出再存吗?
  5. 【电路补习笔记】3、电感的参数与选型
  6. 电脑配置多个git账号
  7. 读《编程珠玑》 (四)
  8. Maven多模块打包遇到的问题详解
  9. 评论关于中国工厂代工的Andr​​oid 4.0迷你PC的智能电视棒Mk802 CF卡
  10. JavaScript实现邮箱后缀提示功能
  11. 基于opencv python 的网线线序识别(三)
  12. 多米诺骨牌最长牌阵java_棋盘的完美覆盖(多米诺骨牌完美覆盖)幻方(魔方阵)...
  13. Win8各国语言包的安装,让系统变成你想要的语言版,带你体验不一样的输入法
  14. Java面试题消息队列
  15. sdformatter格式化选项设置_使用SDFormatter怎么格式化内存卡-SDFormatter使用教程
  16. conda创建环境并下载d2l软件包
  17. 如履薄冰:Redis 懒惰删除的巨大牺牲
  18. texturepacker 打包图集
  19. myeclipse加大内存
  20. WeDrivePlugin64_24.dll 引发win7系统不能进入桌面

热门文章

  1. java 拼 变量名_你还用拼音为变量命名?新人OIer别傻了,教你写出优质代码
  2. 图像处理技术的应用及重要性调研报告
  3. idea运行maven项目依赖项scope:provided时启动报错NoClassDefFoundError解决
  4. 利用阿贝云建自己的网站
  5. 小程序制作预算_做一个小程序的大概预算是多少?做一个小程序大概多少钱?...
  6. Java微信抢红包实现
  7. MTK 驱动开发(31)---Sensor 移植及调试(1)
  8. 高效构建机器学习系统
  9. c语言中常见的置位操作(置1或清零)
  10. 地理信息系统有哪些SCI期刊推荐? - 易智编译EaseEditing