diff options
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r-- | kernel/sched/fair.c | 115 |
1 files changed, 82 insertions, 33 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index dffb270..7f0a5e6 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2032,6 +2032,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) */ update_entity_load_avg(curr, 1); update_cfs_rq_blocked_load(cfs_rq, 1); + update_cfs_shares(cfs_rq); #ifdef CONFIG_SCHED_HRTICK /* @@ -3017,6 +3018,23 @@ static unsigned long cpu_avg_load_per_task(int cpu) return 0; } +static void record_wakee(struct task_struct *p) +{ + /* + * Rough decay (wiping) for cost saving, don't worry + * about the boundary, really active task won't care + * about the loss. + */ + if (jiffies > current->wakee_flip_decay_ts + HZ) { + current->wakee_flips = 0; + current->wakee_flip_decay_ts = jiffies; + } + + if (current->last_wakee != p) { + current->last_wakee = p; + current->wakee_flips++; + } +} static void task_waking_fair(struct task_struct *p) { @@ -3037,6 +3055,7 @@ static void task_waking_fair(struct task_struct *p) #endif se->vruntime -= min_vruntime; + record_wakee(p); } #ifdef CONFIG_FAIR_GROUP_SCHED @@ -3155,6 +3174,28 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu, #endif +static int wake_wide(struct task_struct *p) +{ + int factor = this_cpu_read(sd_llc_size); + + /* + * Yeah, it's the switching-frequency, could means many wakee or + * rapidly switch, use factor here will just help to automatically + * adjust the loose-degree, so bigger node will lead to more pull. + */ + if (p->wakee_flips > factor) { + /* + * wakee is somewhat hot, it needs certain amount of cpu + * resource, so if waker is far more hot, prefer to leave + * it alone. + */ + if (current->wakee_flips > (factor * p->wakee_flips)) + return 1; + } + + return 0; +} + static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) { s64 this_load, load; @@ -3164,6 +3205,13 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) unsigned long weight; int balanced; + /* + * If we wake multiple tasks be careful to not bounce + * ourselves around too much. + */ + if (wake_wide(p)) + return 0; + idx = sd->wake_idx; this_cpu = smp_processor_id(); prev_cpu = task_cpu(p); @@ -4171,47 +4219,48 @@ static void update_blocked_averages(int cpu) } /* - * Compute the cpu's hierarchical load factor for each task group. + * Compute the hierarchical load factor for cfs_rq and all its ascendants. * This needs to be done in a top-down fashion because the load of a child * group is a fraction of its parents load. */ -static int tg_load_down(struct task_group *tg, void *data) +static void update_cfs_rq_h_load(struct cfs_rq *cfs_rq) { - unsigned long load; - long cpu = (long)data; - - if (!tg->parent) { - load = cpu_rq(cpu)->avg.load_avg_contrib; - } else { - load = tg->parent->cfs_rq[cpu]->h_load; - load = div64_ul(load * tg->se[cpu]->avg.load_avg_contrib, - tg->parent->cfs_rq[cpu]->runnable_load_avg + 1); - } - - tg->cfs_rq[cpu]->h_load = load; - - return 0; -} - -static void update_h_load(long cpu) -{ - struct rq *rq = cpu_rq(cpu); + struct rq *rq = rq_of(cfs_rq); + struct sched_entity *se = cfs_rq->tg->se[cpu_of(rq)]; unsigned long now = jiffies; + unsigned long load; - if (rq->h_load_throttle == now) + if (cfs_rq->last_h_load_update == now) return; - rq->h_load_throttle = now; + cfs_rq->h_load_next = NULL; + for_each_sched_entity(se) { + cfs_rq = cfs_rq_of(se); + cfs_rq->h_load_next = se; + if (cfs_rq->last_h_load_update == now) + break; + } + + if (!se) { + cfs_rq->h_load = rq->avg.load_avg_contrib; + cfs_rq->last_h_load_update = now; + } - rcu_read_lock(); - walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); - rcu_read_unlock(); + while ((se = cfs_rq->h_load_next) != NULL) { + load = cfs_rq->h_load; + load = div64_ul(load * se->avg.load_avg_contrib, + cfs_rq->runnable_load_avg + 1); + cfs_rq = group_cfs_rq(se); + cfs_rq->h_load = load; + cfs_rq->last_h_load_update = now; + } } static unsigned long task_h_load(struct task_struct *p) { struct cfs_rq *cfs_rq = task_cfs_rq(p); + update_cfs_rq_h_load(cfs_rq); return div64_ul(p->se.avg.load_avg_contrib * cfs_rq->h_load, cfs_rq->runnable_load_avg + 1); } @@ -4220,10 +4269,6 @@ static inline void update_blocked_averages(int cpu) { } -static inline void update_h_load(long cpu) -{ -} - static unsigned long task_h_load(struct task_struct *p) { return p->se.avg.load_avg_contrib; @@ -4286,6 +4331,8 @@ static inline void init_sd_lb_stats(struct sd_lb_stats *sds) * get_sd_load_idx - Obtain the load index for a given sched domain. * @sd: The sched_domain whose load_idx is to be obtained. * @idle: The Idle status of the CPU for whose sd load_icx is obtained. + * + * Return: The load index. */ static inline int get_sd_load_idx(struct sched_domain *sd, enum cpu_idle_type idle) @@ -4611,6 +4658,9 @@ static inline void update_sg_lb_stats(struct lb_env *env, * * Determine if @sg is a busier group than the previously selected * busiest group. + * + * Return: %true if @sg is a busier group than the previously selected + * busiest group. %false otherwise. */ static bool update_sd_pick_busiest(struct lb_env *env, struct sd_lb_stats *sds, @@ -4719,7 +4769,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, * assuming lower CPU number will be equivalent to lower a SMT thread * number. * - * Returns 1 when packing is required and a task should be moved to + * Return: 1 when packing is required and a task should be moved to * this CPU. The amount of the imbalance is returned in *imbalance. * * @env: The load balancing environment. @@ -4904,7 +4954,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s * * @env: The load balancing environment. * - * Returns: - the busiest group if imbalance exists. + * Return: - The busiest group if imbalance exists. * - If no imbalance and user has opted for power-savings balance, * return the least loaded group whose CPUs can be * put to idle by rebalancing its tasks onto our group. @@ -5174,7 +5224,6 @@ redo: env.src_rq = busiest; env.loop_max = min(sysctl_sched_nr_migrate, busiest->nr_running); - update_h_load(env.src_cpu); more_balance: local_irq_save(flags); double_rq_lock(env.dst_rq, busiest); |