/* file: include/linux/sched/topology.h */
struct sched_domain {
/* These fields must be setup */
/* 父节点 */
struct sched_domain __rcu *parent; /* top domain must be null terminated */
/* 子节点 */
struct sched_domain __rcu *child; /* bottom domain must be null terminated */
/* 本调度域中的调度组,各个调度组形成一个链表,下一节将对调度组做详细介绍 */
struct sched_group *groups; /* the balancing groups of the domain */
/* 检查负载均衡的最小时间间隔,过于频繁的检查会造成系统产生额外开销 */
unsigned long min_interval; /* Minimum balance interval ms */
/* 检查负载均衡的最大时间间隔,太长时间不检查容易导致负载偏差太大 */
unsigned long max_interval; /* Maximum balance interval ms */
/* 反映CPU繁忙程度的参数,系统会根据运行情况动态调整负载均衡的时间间隔,该间隔时间记录在字段balance_interval中
但如果CPU很繁忙,那么时间间隔就适当延长一些,即busy_factor*balance_interval
*/
unsigned int busy_factor; /* less balancing by factor if busy */
/* 调度域内的不均衡状态达到了一定的程度之后就开始进行负载均衡的操作。
imbalance_pct这个成员定义了判定不均衡的阈值 */
unsigned int imbalance_pct; /* No balance until over watermark */
unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */
int nohz_idle; /* NOHZ IDLE status */
int flags; /* See SD_* */
/* 该调度域在整个调度域层级结构中的level。Base调度域的level等于0,向上依次加一。
可以理解为调度域在树中的高度
*/
int level;
/* Runtime fields. */
/* 上次做负载均衡的时间点 */
unsigned long last_balance; /* init to jiffies. units in jiffies */
/* 该字段定义了均衡的时间间隔,会随着系统的运行而变化 */
unsigned int balance_interval; /* initialise to 1. units in ms. */
unsigned int nr_balance_failed; /* initialise to 0 */
/* idle_balance() stats */
u64 max_newidle_lb_cost;
unsigned long next_decay_max_lb_cost;
u64 avg_scan_cost; /* select_idle_sibling */
union {
void *private; /* used during construction */
struct rcu_head rcu; /* used during destruction */
};
struct sched_domain_shared *shared;
unsigned int span_weight;
/*
* A sched domain’s span means “balance process load among these CPUs”.
*
该字段用来描述当前sd覆盖了哪些CPU,父调度域的span应该是其所有子调度域的超集。
* */
unsigned long span[];
};
/* file: kernel/sched/sched.h */
struct sched_group {
/* 指向下一个调度组,形成单向链表 */
struct sched_group *next; /* Must be a circular list */
/* 该调度组的引用计数 */
atomic_t ref;
unsigned int group_weight;
/* 该调度组的算力信息 */
struct sched_group_capacity *sgc;
int asym_prefer_cpu; /* CPU of highest priority in group */
/* 该调度组包含的CPU */
unsigned long cpumask[];
};