在计算cpu的util(函数sugov_get_util)期间需要使用margin来补偿util(在看schedutil governor的时候,不仅仅有cpu 的util margin,还有freq margin),得到最终的util+=margin

unsigned long
boosted_cpu_util(int cpu)
{  unsigned long util = cpu_util_freq(cpu);//获取当前cpu的util long margin = schedtune_cpu_margin(util, cpu);  trace_sched_boost_cpu(cpu, util, margin);  return util + margin;

使用walt计算cpu util:

static inline unsigned long cpu_util_freq(int cpu)
{  unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg;  unsigned long capacity = capacity_orig_of(cpu);  #ifdef CONFIG_SCHED_WALT  if (!walt_disabled && sysctl_sched_use_walt_cpu_util)  util = div64_u64(cpu_rq(cpu)->cumulative_runnable_avg,  walt_ravg_window >> SCHED_LOAD_SHIFT);
#endif  return (util >= capacity) ? capacity : util;

可以知道util = cumulative_runnable_avg/(walt_avg_window>>10)。walt_avg_window是常量,在walt.c文件中定义了,而且在将walt负载怎么计算的文章中有详细的解释:https://blog.csdn.net/wuming_422103632/article/details/81633225


static inline int
schedtune_cpu_margin(unsigned long util, int cpu)
{  int boost = schedtune_cpu_boost(cpu);  if (boost == 0)  return 0;  return schedtune_margin(util, boost);


int schedtune_cpu_boost(int cpu)
{  struct boost_groups *bg;  bg = &per_cpu(cpu_boost_groups, cpu);  return bg->boost_max;

上面函数是获取结构体struct boost_group 元素boost_max,结构体boost_group是存储cpu上面runnable task分不同的group,可能每个group有不同的boost参数设定。下面是这个结构体的解释:

/* SchedTune boost groups * Keep track of all the boost groups which impact on CPU, for example when a * CPU has two RUNNABLE tasks belonging to two different boost groups and thus * likely with different boost values. * Since on each system we expect only a limited number of boost groups, here * we use a simple array to keep track of the metrics required to compute the * maximum per-CPU boosting value. */
struct boost_groups {  /* Maximum boost value for all RUNNABLE tasks on a CPU */  bool idle;  int boost_max;  struct {  /* The boost for tasks on that boost group */  int boost;  /* Count of RUNNABLE tasks on that boost group */  unsigned tasks;  } group[BOOSTGROUPS_COUNT];  /* CPU's boost group locking */  raw_spinlock_t lock;
/* Boost groups affecting each CPU in the system */
DEFINE_PER_CPU(struct boost_groups, cpu_boost_groups);


static long
schedtune_margin(unsigned long signal, long boost)
{  long long margin = 0;  /* * Signal proportional compensation (SPC) * * The Boost (B) value is used to compute a Margin (M) which is * proportional to the complement of the original Signal (S): *   M = B * (SCHED_CAPACITY_SCALE - S) * The obtained M could be used by the caller to "boost" S. */  if (boost >= 0)  margin = signal * boost;  else  margin = -signal * boost;  margin  = reciprocal_divide(margin, schedtune_spc_rdiv);  if (boost >= 0)  margin = clamp_t(long long, margin, 0,  SCHED_CAPACITY_SCALE - signal);  if (boost < 0)  margin *= -1;  return margin;

主要是计算schedtune_spc_rdiv = reciprocal_value(100);reciprocal_value函数源码如下:

struct reciprocal_value reciprocal_value(u32 d)
{  struct reciprocal_value R;  u64 m;  int l;  l = fls(d - 1);  /*d=100,fls(99)=7*/m = ((1ULL << 32) * ((1ULL << l) - d));  do_div(m, d);  ++m;  R.m = (u32)m;  //R.m = 1202590843R.sh1 = min(l, 1);  //R.sh1 = 1R.sh2 = max(l - 1, 0); //R.sh2 = 6return R;
}  /** * fls - find last (most-significant) bit set * @x: the word to search * * This is defined the same way as ffs. * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. */  static __always_inline int fls(int x)
{  int r = 32;  if (!x)  return 0;  if (!(x & 0xffff0000u)) {  x <<= 16;  r -= 16;  }  if (!(x & 0xff000000u)) {  x <<= 8;  r -= 8;  }  if (!(x & 0xf0000000u)) {  x <<= 4;  r -= 4;  }  if (!(x & 0xc0000000u)) {  x <<= 2;  r -= 2;  }  if (!(x & 0x80000000u)) {  x <<= 1;  r -= 1;  }  return r;


margin  = reciprocal_divide(margin, schedtune_spc_rdiv);  

是一个除法,reciprocal_divide是计算A/B的优化函数。由于乘法在计算机上快得多,所以内核使用所谓的 Newton-Raphson 方法,这只需要乘法和位移,虽然我们对数学细节并不关系,但我们需要知道,内核可以不计算C=A/B,而是使用C=reciprocal_divide(A, reciprocal_value(B))的方式,后者涉及的两个函数都是库程序。

static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R)
{  u32 t = (u32)(((u64)a * R.m) >> 32);  return (t + ((a - t) >> R.sh1)) >> R.sh2;

这个算法的实现就是margin = signal*boost/100,从打印结果也证实了:

[ 52.191507] signal=49,margin=4 //执行reciprocal_divide之后打印的
[ 52.191519] boost=10,margin=4 //执行clamp_t之后打印的数值
[ 52.191532] signal=49,margin=4
[ 52.191543] boost=10,margin=4
[ 52.191931] signal=700,margin=70


#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi)


util += ±(util * boost)/100

上面处理的boost在schedtune中,涉及到cgroup(control group)这个子系统比较复杂,需要耗时间去理解。后续在学习

