diff options
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r-- | kernel/sched_fair.c | 271 |
1 files changed, 182 insertions, 89 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 0c26e2d..0270246 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -69,14 +69,6 @@ static unsigned int sched_nr_latency = 8; unsigned int sysctl_sched_child_runs_first __read_mostly; /* - * sys_sched_yield() compat mode - * - * This option switches the agressive yield implementation of the - * old scheduler back on. - */ -unsigned int __read_mostly sysctl_sched_compat_yield; - -/* * SCHED_OTHER wake-up granularity. * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) * @@ -419,7 +411,7 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) rb_erase(&se->run_node, &cfs_rq->tasks_timeline); } -static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq) +static struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq) { struct rb_node *left = cfs_rq->rb_leftmost; @@ -429,6 +421,17 @@ static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq) return rb_entry(left, struct sched_entity, run_node); } +static struct sched_entity *__pick_next_entity(struct sched_entity *se) +{ + struct rb_node *next = rb_next(&se->run_node); + + if (!next) + return NULL; + + return rb_entry(next, struct sched_entity, run_node); +} + +#ifdef CONFIG_SCHED_DEBUG static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) { struct rb_node *last = rb_last(&cfs_rq->tasks_timeline); @@ -443,7 +446,6 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) * Scheduling class statistics methods: */ -#ifdef CONFIG_SCHED_DEBUG int sched_proc_update_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -540,7 +542,7 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se) } static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update); -static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta); +static void update_cfs_shares(struct cfs_rq *cfs_rq); /* * Update the current task's runtime statistics. Skip current tasks that @@ -733,6 +735,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) now - cfs_rq->load_last > 4 * period) { cfs_rq->load_period = 0; cfs_rq->load_avg = 0; + delta = period - 1; } cfs_rq->load_stamp = now; @@ -763,16 +766,15 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) list_del_leaf_cfs_rq(cfs_rq); } -static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg, - long weight_delta) +static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg) { long load_weight, load, shares; - load = cfs_rq->load.weight + weight_delta; + load = cfs_rq->load.weight; load_weight = atomic_read(&tg->load_weight); - load_weight -= cfs_rq->load_contribution; load_weight += load; + load_weight -= cfs_rq->load_contribution; shares = (tg->shares * load); if (load_weight) @@ -790,7 +792,7 @@ static void update_entity_shares_tick(struct cfs_rq *cfs_rq) { if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) { update_cfs_load(cfs_rq, 0); - update_cfs_shares(cfs_rq, 0); + update_cfs_shares(cfs_rq); } } # else /* CONFIG_SMP */ @@ -798,8 +800,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) { } -static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg, - long weight_delta) +static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg) { return tg->shares; } @@ -824,7 +825,7 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, account_entity_enqueue(cfs_rq, se); } -static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta) +static void update_cfs_shares(struct cfs_rq *cfs_rq) { struct task_group *tg; struct sched_entity *se; @@ -838,7 +839,7 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta) if (likely(se->load.weight == tg->shares)) return; #endif - shares = calc_cfs_shares(cfs_rq, tg, weight_delta); + shares = calc_cfs_shares(cfs_rq, tg); reweight_entity(cfs_rq_of(se), se, shares); } @@ -847,7 +848,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) { } -static inline void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta) +static inline void update_cfs_shares(struct cfs_rq *cfs_rq) { } @@ -978,8 +979,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) */ update_curr(cfs_rq); update_cfs_load(cfs_rq, 0); - update_cfs_shares(cfs_rq, se->load.weight); account_entity_enqueue(cfs_rq, se); + update_cfs_shares(cfs_rq); if (flags & ENQUEUE_WAKEUP) { place_entity(cfs_rq, se, 0); @@ -996,19 +997,49 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) list_add_leaf_cfs_rq(cfs_rq); } -static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) +static void __clear_buddies_last(struct sched_entity *se) +{ + for_each_sched_entity(se) { + struct cfs_rq *cfs_rq = cfs_rq_of(se); + if (cfs_rq->last == se) + cfs_rq->last = NULL; + else + break; + } +} + +static void __clear_buddies_next(struct sched_entity *se) { - if (!se || cfs_rq->last == se) - cfs_rq->last = NULL; + for_each_sched_entity(se) { + struct cfs_rq *cfs_rq = cfs_rq_of(se); + if (cfs_rq->next == se) + cfs_rq->next = NULL; + else + break; + } +} - if (!se || cfs_rq->next == se) - cfs_rq->next = NULL; +static void __clear_buddies_skip(struct sched_entity *se) +{ + for_each_sched_entity(se) { + struct cfs_rq *cfs_rq = cfs_rq_of(se); + if (cfs_rq->skip == se) + cfs_rq->skip = NULL; + else + break; + } } static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) { - for_each_sched_entity(se) - __clear_buddies(cfs_rq_of(se), se); + if (cfs_rq->last == se) + __clear_buddies_last(se); + + if (cfs_rq->next == se) + __clear_buddies_next(se); + + if (cfs_rq->skip == se) + __clear_buddies_skip(se); } static void @@ -1041,7 +1072,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) update_cfs_load(cfs_rq, 0); account_entity_dequeue(cfs_rq, se); update_min_vruntime(cfs_rq); - update_cfs_shares(cfs_rq, 0); + update_cfs_shares(cfs_rq); /* * Normalize the entity after updating the min_vruntime because the @@ -1084,7 +1115,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) return; if (cfs_rq->nr_running > 1) { - struct sched_entity *se = __pick_next_entity(cfs_rq); + struct sched_entity *se = __pick_first_entity(cfs_rq); s64 delta = curr->vruntime - se->vruntime; if (delta < 0) @@ -1128,13 +1159,27 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) static int wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se); +/* + * Pick the next process, keeping these things in mind, in this order: + * 1) keep things fair between processes/task groups + * 2) pick the "next" process, since someone really wants that to run + * 3) pick the "last" process, for cache locality + * 4) do not run the "skip" process, if something else is available + */ static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) { - struct sched_entity *se = __pick_next_entity(cfs_rq); + struct sched_entity *se = __pick_first_entity(cfs_rq); struct sched_entity *left = se; - if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1) - se = cfs_rq->next; + /* + * Avoid running the skip buddy, if running something else can + * be done without getting too unfair. + */ + if (cfs_rq->skip == se) { + struct sched_entity *second = __pick_next_entity(se); + if (second && wakeup_preempt_entity(second, left) < 1) + se = second; + } /* * Prefer last buddy, try to return the CPU to a preempted task. @@ -1142,6 +1187,12 @@ static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1) se = cfs_rq->last; + /* + * Someone really wants this to run. If it's not unfair, run it. + */ + if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1) + se = cfs_rq->next; + clear_buddies(cfs_rq, se); return se; @@ -1282,7 +1333,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) struct cfs_rq *cfs_rq = cfs_rq_of(se); update_cfs_load(cfs_rq, 0); - update_cfs_shares(cfs_rq, 0); + update_cfs_shares(cfs_rq); } hrtick_update(rq); @@ -1312,58 +1363,12 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) struct cfs_rq *cfs_rq = cfs_rq_of(se); update_cfs_load(cfs_rq, 0); - update_cfs_shares(cfs_rq, 0); + update_cfs_shares(cfs_rq); } hrtick_update(rq); } -/* - * sched_yield() support is very simple - we dequeue and enqueue. - * - * If compat_yield is turned on then we requeue to the end of the tree. - */ -static void yield_task_fair(struct rq *rq) -{ - struct task_struct *curr = rq->curr; - struct cfs_rq *cfs_rq = task_cfs_rq(curr); - struct sched_entity *rightmost, *se = &curr->se; - - /* - * Are we the only task in the tree? - */ - if (unlikely(cfs_rq->nr_running == 1)) - return; - - clear_buddies(cfs_rq, se); - - if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) { - update_rq_clock(rq); - /* - * Update run-time statistics of the 'current'. - */ - update_curr(cfs_rq); - - return; - } - /* - * Find the rightmost entry in the rbtree: - */ - rightmost = __pick_last_entity(cfs_rq); - /* - * Already in the rightmost position? - */ - if (unlikely(!rightmost || entity_before(rightmost, se))) - return; - - /* - * Minimally necessary key value to be last in the tree: - * Upon rescheduling, sched_class::put_prev_task() will place - * 'current' within the tree based on its new key value. - */ - se->vruntime = rightmost->vruntime + 1; -} - #ifdef CONFIG_SMP static void task_waking_fair(struct rq *rq, struct task_struct *p) @@ -1834,6 +1839,14 @@ static void set_next_buddy(struct sched_entity *se) } } +static void set_skip_buddy(struct sched_entity *se) +{ + if (likely(task_of(se)->policy != SCHED_IDLE)) { + for_each_sched_entity(se) + cfs_rq_of(se)->skip = se; + } +} + /* * Preempt the current task with a newly woken task if needed: */ @@ -1932,6 +1945,55 @@ static void put_prev_task_fair(struct rq *rq, struct task_struct *prev) } } +/* + * sched_yield() is very simple + * + * The magic of dealing with the ->skip buddy is in pick_next_entity. + */ +static void yield_task_fair(struct rq *rq) +{ + struct task_struct *curr = rq->curr; + struct cfs_rq *cfs_rq = task_cfs_rq(curr); + struct sched_entity *se = &curr->se; + + /* + * Are we the only task in the tree? + */ + if (unlikely(rq->nr_running == 1)) + return; + + clear_buddies(cfs_rq, se); + + if (curr->policy != SCHED_BATCH) { + update_rq_clock(rq); + /* + * Update run-time statistics of the 'current'. + */ + update_curr(cfs_rq); + } + + set_skip_buddy(se); +} + +static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preempt) +{ + struct sched_entity *se = &p->se; + + if (!se->on_rq) + return false; + + /* Tell the scheduler that we'd really like pse to run next. */ + set_next_buddy(se); + + /* Make p's CPU reschedule; pick_next_entity takes care of fairness. */ + if (preempt) + resched_task(rq->curr); + + yield_task_fair(rq); + + return true; +} + #ifdef CONFIG_SMP /************************************************** * Fair scheduling class load-balancing methods: @@ -2123,7 +2185,7 @@ static int update_shares_cpu(struct task_group *tg, int cpu) * We need to update shares after updating tg->load_weight in * order to adjust the weight of groups with long running tasks. */ - update_cfs_shares(cfs_rq, 0); + update_cfs_shares(cfs_rq); raw_spin_unlock_irqrestore(&rq->lock, flags); @@ -4079,33 +4141,62 @@ static void task_fork_fair(struct task_struct *p) * Priority of the task has changed. Check to see if we preempt * the current task. */ -static void prio_changed_fair(struct rq *rq, struct task_struct *p, - int oldprio, int running) +static void +prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio) { + if (!p->se.on_rq) + return; + /* * Reschedule if we are currently running on this runqueue and * our priority decreased, or if we are not currently running on * this runqueue and our priority is higher than the current's */ - if (running) { + if (rq->curr == p) { if (p->prio > oldprio) resched_task(rq->curr); } else check_preempt_curr(rq, p, 0); } +static void switched_from_fair(struct rq *rq, struct task_struct *p) +{ + struct sched_entity *se = &p->se; + struct cfs_rq *cfs_rq = cfs_rq_of(se); + + /* + * Ensure the task's vruntime is normalized, so that when its + * switched back to the fair class the enqueue_entity(.flags=0) will + * do the right thing. + * + * If it was on_rq, then the dequeue_entity(.flags=0) will already + * have normalized the vruntime, if it was !on_rq, then only when + * the task is sleeping will it still have non-normalized vruntime. + */ + if (!se->on_rq && p->state != TASK_RUNNING) { + /* + * Fix up our vruntime so that the current sleep doesn't + * cause 'unlimited' sleep bonus. + */ + place_entity(cfs_rq, se, 0); + se->vruntime -= cfs_rq->min_vruntime; + } +} + /* * We switched to the sched_fair class. */ -static void switched_to_fair(struct rq *rq, struct task_struct *p, - int running) +static void switched_to_fair(struct rq *rq, struct task_struct *p) { + if (!p->se.on_rq) + return; + /* * We were most likely switched from sched_rt, so * kick off the schedule if running, otherwise just see * if we can still preempt the current task. */ - if (running) + if (rq->curr == p) resched_task(rq->curr); else check_preempt_curr(rq, p, 0); @@ -4171,6 +4262,7 @@ static const struct sched_class fair_sched_class = { .enqueue_task = enqueue_task_fair, .dequeue_task = dequeue_task_fair, .yield_task = yield_task_fair, + .yield_to_task = yield_to_task_fair, .check_preempt_curr = check_preempt_wakeup, @@ -4191,6 +4283,7 @@ static const struct sched_class fair_sched_class = { .task_fork = task_fork_fair, .prio_changed = prio_changed_fair, + .switched_from = switched_from_fair, .switched_to = switched_to_fair, .get_rr_interval = get_rr_interval_fair, |