From 2d854e5738cded368a0759f85b1197f5c044513d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 12 Jul 2013 19:02:30 +0200 Subject: context_tracing: Fix guest accounting with native vtime 1) If context tracking is enabled with native vtime accounting (which combo is useless except for dev testing), we call vtime_guest_enter() and vtime_guest_exit() on host <-> guest switches. But those are stubs in this configurations. As a result, cputime is not correctly flushed on kvm context switches. 2) If context tracking runs but is disabled on some CPUs, those CPUs end up calling __guest_enter/__guest_exit which in turn call vtime_account_system(). We don't want to call this because we run in tick based accounting for these CPUs. Refactor the guest_enter/guest_exit code such that all combinations finally work. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Li Zhong Cc: Mike Galbraith Cc: Kevin Hilman --- include/linux/context_tracking.h | 52 +++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index fc09d7b..5984f25 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -20,25 +20,6 @@ struct context_tracking { } state; }; -static inline void __guest_enter(void) -{ - /* - * This is running in ioctl context so we can avoid - * the call to vtime_account() with its unnecessary idle check. - */ - vtime_account_system(current); - current->flags |= PF_VCPU; -} - -static inline void __guest_exit(void) -{ - /* - * This is running in ioctl context so we can avoid - * the call to vtime_account() with its unnecessary idle check. - */ - vtime_account_system(current); - current->flags &= ~PF_VCPU; -} #ifdef CONFIG_CONTEXT_TRACKING DECLARE_PER_CPU(struct context_tracking, context_tracking); @@ -56,9 +37,6 @@ static inline bool context_tracking_active(void) extern void user_enter(void); extern void user_exit(void); -extern void guest_enter(void); -extern void guest_exit(void); - static inline enum ctx_state exception_enter(void) { enum ctx_state prev_ctx; @@ -81,21 +59,35 @@ extern void context_tracking_task_switch(struct task_struct *prev, static inline bool context_tracking_in_user(void) { return false; } static inline void user_enter(void) { } static inline void user_exit(void) { } +static inline enum ctx_state exception_enter(void) { return 0; } +static inline void exception_exit(enum ctx_state prev_ctx) { } +static inline void context_tracking_task_switch(struct task_struct *prev, + struct task_struct *next) { } +#endif /* !CONFIG_CONTEXT_TRACKING */ +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +extern void guest_enter(void); +extern void guest_exit(void); +#else static inline void guest_enter(void) { - __guest_enter(); + /* + * This is running in ioctl context so we can avoid + * the call to vtime_account() with its unnecessary idle check. + */ + vtime_account_system(current); + current->flags |= PF_VCPU; } static inline void guest_exit(void) { - __guest_exit(); + /* + * This is running in ioctl context so we can avoid + * the call to vtime_account() with its unnecessary idle check. + */ + vtime_account_system(current); + current->flags &= ~PF_VCPU; } - -static inline enum ctx_state exception_enter(void) { return 0; } -static inline void exception_exit(enum ctx_state prev_ctx) { } -static inline void context_tracking_task_switch(struct task_struct *prev, - struct task_struct *next) { } -#endif /* !CONFIG_CONTEXT_TRACKING */ +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ #endif -- cgit v1.1 From 5b206d48e58204e84d249c4eb18651a1ff7a1274 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 12 Jul 2013 19:05:14 +0200 Subject: vtime: Update a few comments Update a stale comment from the old vtime era and document some locking that might be non obvious. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Li Zhong Cc: Mike Galbraith Cc: Kevin Hilman --- include/linux/context_tracking.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 5984f25..d883ff0 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -72,8 +72,9 @@ extern void guest_exit(void); static inline void guest_enter(void) { /* - * This is running in ioctl context so we can avoid - * the call to vtime_account() with its unnecessary idle check. + * This is running in ioctl context so its safe + * to assume that it's the stime pending cputime + * to flush. */ vtime_account_system(current); current->flags |= PF_VCPU; @@ -81,10 +82,7 @@ static inline void guest_enter(void) static inline void guest_exit(void) { - /* - * This is running in ioctl context so we can avoid - * the call to vtime_account() with its unnecessary idle check. - */ + /* Flush the guest cputime we spent on the guest */ vtime_account_system(current); current->flags &= ~PF_VCPU; } -- cgit v1.1 From 2e70933866ace52091a3c11a5c104c063ab0c445 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 10 Jul 2013 00:55:25 +0200 Subject: nohz: Only enable context tracking on full dynticks CPUs The context tracking subsystem has the ability to selectively enable the tracking on any defined subset of CPU. This means that we can define a CPU range that doesn't run the context tracking and another range that does. Now what we want in practice is to enable the tracking on full dynticks CPUs only. In order to perform this, we just need to pass our full dynticks CPU range selection from the full dynticks subsystem to the context tracking. This way we can spare the overhead of RCU user extended quiescent state and vtime maintainance on the CPUs that are outside the full dynticks range. Just keep in mind the raw context tracking itself is still necessary everywhere. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Li Zhong Cc: Mike Galbraith Cc: Kevin Hilman --- include/linux/context_tracking.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index d883ff0..1ae37c7 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -34,6 +34,8 @@ static inline bool context_tracking_active(void) return __this_cpu_read(context_tracking.active); } +extern void context_tracking_cpu_set(int cpu); + extern void user_enter(void); extern void user_exit(void); -- cgit v1.1 From 65f382fd0c8fa483713c0971de9f1dfb4cf1ad9c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 11 Jul 2013 19:12:32 +0200 Subject: context_tracking: Ground setup for static key use Prepare for using a static key in the context tracking subsystem. This will help optimizing the off case on its many users: * user_enter, user_exit, exception_enter, exception_exit, guest_enter, guest_exit, vtime_*() Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Li Zhong Cc: Mike Galbraith Cc: Kevin Hilman --- include/linux/context_tracking.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 1ae37c7..c138c24 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -4,6 +4,7 @@ #include #include #include +#include #include struct context_tracking { @@ -22,6 +23,7 @@ struct context_tracking { #ifdef CONFIG_CONTEXT_TRACKING +extern struct static_key context_tracking_enabled; DECLARE_PER_CPU(struct context_tracking, context_tracking); static inline bool context_tracking_in_user(void) @@ -67,6 +69,14 @@ static inline void context_tracking_task_switch(struct task_struct *prev, struct task_struct *next) { } #endif /* !CONFIG_CONTEXT_TRACKING */ + +#ifdef CONFIG_CONTEXT_TRACKING_FORCE +extern void context_tracking_init(void); +#else +static inline void context_tracking_init(void) { } +#endif /* CONFIG_CONTEXT_TRACKING_FORCE */ + + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN extern void guest_enter(void); extern void guest_exit(void); -- cgit v1.1 From ad65782fba507d91a0a98f519b59e79cac1b474c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 10 Jul 2013 02:44:35 +0200 Subject: context_tracking: Optimize main APIs off case with static key Optimize user and exception entry/exit APIs with static keys. This minimize the overhead for those who enable CONFIG_NO_HZ_FULL without always using it. Having no range passed to nohz_full= should result in the probes to be nopped (at least we hope so...). If this proves not be enough in the long term, we'll need to bring an exception slow path by re-routing the exception handlers. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Li Zhong Cc: Mike Galbraith Cc: Kevin Hilman --- include/linux/context_tracking.h | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index c138c24..38ab60b 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -38,23 +38,40 @@ static inline bool context_tracking_active(void) extern void context_tracking_cpu_set(int cpu); -extern void user_enter(void); -extern void user_exit(void); +extern void context_tracking_user_enter(void); +extern void context_tracking_user_exit(void); + +static inline void user_enter(void) +{ + if (static_key_false(&context_tracking_enabled)) + context_tracking_user_enter(); + +} +static inline void user_exit(void) +{ + if (static_key_false(&context_tracking_enabled)) + context_tracking_user_exit(); +} static inline enum ctx_state exception_enter(void) { enum ctx_state prev_ctx; + if (!static_key_false(&context_tracking_enabled)) + return 0; + prev_ctx = this_cpu_read(context_tracking.state); - user_exit(); + context_tracking_user_exit(); return prev_ctx; } static inline void exception_exit(enum ctx_state prev_ctx) { - if (prev_ctx == IN_USER) - user_enter(); + if (static_key_false(&context_tracking_enabled)) { + if (prev_ctx == IN_USER) + context_tracking_user_enter(); + } } extern void context_tracking_task_switch(struct task_struct *prev, -- cgit v1.1 From 48d6a816a8bf36e2a197c322697323003bdc1cfe Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 10 Jul 2013 02:44:35 +0200 Subject: context_tracking: Optimize guest APIs off case with static key Optimize guest entry/exit APIs with static keys. This minimize the overhead for those who enable CONFIG_NO_HZ_FULL without always using it. Having no range passed to nohz_full= should result in the probes overhead to be minimized. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Li Zhong Cc: Mike Galbraith Cc: Kevin Hilman --- include/linux/context_tracking.h | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 38ab60b..8854ead 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -95,8 +95,23 @@ static inline void context_tracking_init(void) { } #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN -extern void guest_enter(void); -extern void guest_exit(void); +static inline void guest_enter(void) +{ + if (static_key_false(&context_tracking_enabled) && + vtime_accounting_enabled()) + vtime_guest_enter(current); + else + current->flags |= PF_VCPU; +} + +static inline void guest_exit(void) +{ + if (static_key_false(&context_tracking_enabled) && + vtime_accounting_enabled()) + vtime_guest_exit(current); + else + current->flags &= ~PF_VCPU; +} #else static inline void guest_enter(void) { -- cgit v1.1 From 73d424f9af7b571276e6284617cb59726d47bf12 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 11 Jul 2013 19:42:13 +0200 Subject: context_tracking: Optimize context switch off case with static keys No need for syscall slowpath if no CPU is full dynticks, rather nop this in this case. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Li Zhong Cc: Mike Galbraith Cc: Kevin Hilman --- include/linux/context_tracking.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 8854ead..e070ea5 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -40,6 +40,8 @@ extern void context_tracking_cpu_set(int cpu); extern void context_tracking_user_enter(void); extern void context_tracking_user_exit(void); +extern void __context_tracking_task_switch(struct task_struct *prev, + struct task_struct *next); static inline void user_enter(void) { @@ -74,8 +76,12 @@ static inline void exception_exit(enum ctx_state prev_ctx) } } -extern void context_tracking_task_switch(struct task_struct *prev, - struct task_struct *next); +static inline void context_tracking_task_switch(struct task_struct *prev, + struct task_struct *next) +{ + if (static_key_false(&context_tracking_enabled)) + __context_tracking_task_switch(prev, next); +} #else static inline bool context_tracking_in_user(void) { return false; } static inline void user_enter(void) { } @@ -112,6 +118,7 @@ static inline void guest_exit(void) else current->flags &= ~PF_VCPU; } + #else static inline void guest_enter(void) { -- cgit v1.1 From 1b6a259aa5ab16d8b215bfc19ff7c9ffa8858f10 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 11 Jul 2013 20:27:43 +0200 Subject: context_tracking: User/kernel broundary cross trace events This can be useful to track all kernel/user round trips. And it's also helpful to debug the context tracking subsystem. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Li Zhong Cc: Mike Galbraith Cc: Kevin Hilman --- include/trace/events/context_tracking.h | 58 +++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 include/trace/events/context_tracking.h (limited to 'include') diff --git a/include/trace/events/context_tracking.h b/include/trace/events/context_tracking.h new file mode 100644 index 0000000..ce8007c --- /dev/null +++ b/include/trace/events/context_tracking.h @@ -0,0 +1,58 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM context_tracking + +#if !defined(_TRACE_CONTEXT_TRACKING_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_CONTEXT_TRACKING_H + +#include + +DECLARE_EVENT_CLASS(context_tracking_user, + + TP_PROTO(int dummy), + + TP_ARGS(dummy), + + TP_STRUCT__entry( + __field( int, dummy ) + ), + + TP_fast_assign( + __entry->dummy = dummy; + ), + + TP_printk("%s", "") +); + +/** + * user_enter - called when the kernel resumes to userspace + * @dummy: dummy arg to make trace event macro happy + * + * This event occurs when the kernel resumes to userspace after + * an exception or a syscall. + */ +DEFINE_EVENT(context_tracking_user, user_enter, + + TP_PROTO(int dummy), + + TP_ARGS(dummy) +); + +/** + * user_exit - called when userspace enters the kernel + * @dummy: dummy arg to make trace event macro happy + * + * This event occurs when userspace enters the kernel through + * an exception or a syscall. + */ +DEFINE_EVENT(context_tracking_user, user_exit, + + TP_PROTO(int dummy), + + TP_ARGS(dummy) +); + + +#endif /* _TRACE_CONTEXT_TRACKING_H */ + +/* This part must be outside protection */ +#include -- cgit v1.1 From e7358b3bc0d7fcec0e7a724477f06db2b0c68e21 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 12 Jul 2013 02:15:49 +0200 Subject: context_tracking: Split low level state headers We plan to use the context tracking static key on inline vtime APIs. For this we need to include the context tracking headers from those of vtime. However vtime headers need to stay low level because they are included in hardirq.h that mostly contains standalone definitions. But context_tracking.h includes sched.h for a few task_struct references, therefore it wouldn't be sensible to include it from vtime.h To solve this, lets split the context tracking headers and move out the pure state definitions that only require a few low level headers. We can safely include that small part in vtime.h later. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Li Zhong Cc: Mike Galbraith Cc: Kevin Hilman --- include/linux/context_tracking.h | 31 +-------------------------- include/linux/context_tracking_state.h | 39 ++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 30 deletions(-) create mode 100644 include/linux/context_tracking_state.h (limited to 'include') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index e070ea5..82ec487 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -2,40 +2,12 @@ #define _LINUX_CONTEXT_TRACKING_H #include -#include #include -#include +#include #include -struct context_tracking { - /* - * When active is false, probes are unset in order - * to minimize overhead: TIF flags are cleared - * and calls to user_enter/exit are ignored. This - * may be further optimized using static keys. - */ - bool active; - enum ctx_state { - IN_KERNEL = 0, - IN_USER, - } state; -}; - #ifdef CONFIG_CONTEXT_TRACKING -extern struct static_key context_tracking_enabled; -DECLARE_PER_CPU(struct context_tracking, context_tracking); - -static inline bool context_tracking_in_user(void) -{ - return __this_cpu_read(context_tracking.state) == IN_USER; -} - -static inline bool context_tracking_active(void) -{ - return __this_cpu_read(context_tracking.active); -} - extern void context_tracking_cpu_set(int cpu); extern void context_tracking_user_enter(void); @@ -83,7 +55,6 @@ static inline void context_tracking_task_switch(struct task_struct *prev, __context_tracking_task_switch(prev, next); } #else -static inline bool context_tracking_in_user(void) { return false; } static inline void user_enter(void) { } static inline void user_exit(void) { } static inline enum ctx_state exception_enter(void) { return 0; } diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h new file mode 100644 index 0000000..0f1979d --- /dev/null +++ b/include/linux/context_tracking_state.h @@ -0,0 +1,39 @@ +#ifndef _LINUX_CONTEXT_TRACKING_STATE_H +#define _LINUX_CONTEXT_TRACKING_STATE_H + +#include +#include + +struct context_tracking { + /* + * When active is false, probes are unset in order + * to minimize overhead: TIF flags are cleared + * and calls to user_enter/exit are ignored. This + * may be further optimized using static keys. + */ + bool active; + enum ctx_state { + IN_KERNEL = 0, + IN_USER, + } state; +}; + +#ifdef CONFIG_CONTEXT_TRACKING +extern struct static_key context_tracking_enabled; +DECLARE_PER_CPU(struct context_tracking, context_tracking); + +static inline bool context_tracking_in_user(void) +{ + return __this_cpu_read(context_tracking.state) == IN_USER; +} + +static inline bool context_tracking_active(void) +{ + return __this_cpu_read(context_tracking.active); +} +#else +static inline bool context_tracking_in_user(void) { return false; } +static inline bool context_tracking_active(void) { return false; } +#endif /* CONFIG_CONTEXT_TRACKING */ + +#endif -- cgit v1.1 From 2d4b84739f0acf61f37267b4e80c5bd85fb90a7e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 29 Jul 2013 20:29:43 +0200 Subject: hardirq: Split preempt count mask definitions In order to use static keys with vtime APIs, we'll need to add static keys headers to vtime.h hardirq.h then becomes a problem because it needs vtime.h for irqtime accounting in irq_enter/irq_exit, but it's often included just to get the irq mask definitions in the task preempt_count field and the APIs that come along: in_interrupt(), in_hardirq(), etc... Some very low level arch headers sometimes need these masks and APIs such as arch/m68k/include/asm/irqflags.h for example. But they don't want to include hardirq.h if vtime.h, jump_label.h and even workqueue.h come along. Including such bloated high level header from arch headers can quickly result in circular headers dependency that crash the build. So let's split hardirq.h in two parts: * preempt_mask.h that gathers all the preempt_count definitions and the APIs associated. This one is considered low level and can be safely included anywhere. * hardirq.h that includes the previous one. It defines the irq entry/exit APIs. To avoid future circular headers dependencies, the preempt_mask.h inclusion can replace hardirq.h on files that don't implement irq low level handlers but just need the atomic/context check APIs. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Li Zhong Cc: Mike Galbraith Cc: Kevin Hilman --- include/linux/hardirq.h | 117 +---------------------------------------- include/linux/preempt_mask.h | 122 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+), 116 deletions(-) create mode 100644 include/linux/preempt_mask.h (limited to 'include') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 05bcc09..ccfe17c 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -1,126 +1,11 @@ #ifndef LINUX_HARDIRQ_H #define LINUX_HARDIRQ_H -#include +#include #include #include #include -#include -/* - * We put the hardirq and softirq counter into the preemption - * counter. The bitmask has the following meaning: - * - * - bits 0-7 are the preemption count (max preemption depth: 256) - * - bits 8-15 are the softirq count (max # of softirqs: 256) - * - * The hardirq count can in theory reach the same as NR_IRQS. - * In reality, the number of nested IRQS is limited to the stack - * size as well. For archs with over 1000 IRQS it is not practical - * to expect that they will all nest. We give a max of 10 bits for - * hardirq nesting. An arch may choose to give less than 10 bits. - * m68k expects it to be 8. - * - * - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024) - * - bit 26 is the NMI_MASK - * - bit 27 is the PREEMPT_ACTIVE flag - * - * PREEMPT_MASK: 0x000000ff - * SOFTIRQ_MASK: 0x0000ff00 - * HARDIRQ_MASK: 0x03ff0000 - * NMI_MASK: 0x04000000 - */ -#define PREEMPT_BITS 8 -#define SOFTIRQ_BITS 8 -#define NMI_BITS 1 - -#define MAX_HARDIRQ_BITS 10 - -#ifndef HARDIRQ_BITS -# define HARDIRQ_BITS MAX_HARDIRQ_BITS -#endif - -#if HARDIRQ_BITS > MAX_HARDIRQ_BITS -#error HARDIRQ_BITS too high! -#endif - -#define PREEMPT_SHIFT 0 -#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS) -#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS) -#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS) - -#define __IRQ_MASK(x) ((1UL << (x))-1) - -#define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT) -#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT) -#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT) -#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT) - -#define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT) -#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT) -#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) -#define NMI_OFFSET (1UL << NMI_SHIFT) - -#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) - -#ifndef PREEMPT_ACTIVE -#define PREEMPT_ACTIVE_BITS 1 -#define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS) -#define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT) -#endif - -#if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS)) -#error PREEMPT_ACTIVE is too low! -#endif - -#define hardirq_count() (preempt_count() & HARDIRQ_MASK) -#define softirq_count() (preempt_count() & SOFTIRQ_MASK) -#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \ - | NMI_MASK)) - -/* - * Are we doing bottom half or hardware interrupt processing? - * Are we in a softirq context? Interrupt context? - * in_softirq - Are we currently processing softirq or have bh disabled? - * in_serving_softirq - Are we currently processing softirq? - */ -#define in_irq() (hardirq_count()) -#define in_softirq() (softirq_count()) -#define in_interrupt() (irq_count()) -#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) - -/* - * Are we in NMI context? - */ -#define in_nmi() (preempt_count() & NMI_MASK) - -#if defined(CONFIG_PREEMPT_COUNT) -# define PREEMPT_CHECK_OFFSET 1 -#else -# define PREEMPT_CHECK_OFFSET 0 -#endif - -/* - * Are we running in atomic context? WARNING: this macro cannot - * always detect atomic context; in particular, it cannot know about - * held spinlocks in non-preemptible kernels. Thus it should not be - * used in the general case to determine whether sleeping is possible. - * Do not use in_atomic() in driver code. - */ -#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0) - -/* - * Check whether we were atomic before we did preempt_disable(): - * (used by the scheduler, *after* releasing the kernel lock) - */ -#define in_atomic_preempt_off() \ - ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET) - -#ifdef CONFIG_PREEMPT_COUNT -# define preemptible() (preempt_count() == 0 && !irqs_disabled()) -#else -# define preemptible() 0 -#endif #if defined(CONFIG_SMP) || defined(CONFIG_GENERIC_HARDIRQS) extern void synchronize_irq(unsigned int irq); diff --git a/include/linux/preempt_mask.h b/include/linux/preempt_mask.h new file mode 100644 index 0000000..931bc61 --- /dev/null +++ b/include/linux/preempt_mask.h @@ -0,0 +1,122 @@ +#ifndef LINUX_PREEMPT_MASK_H +#define LINUX_PREEMPT_MASK_H + +#include +#include + +/* + * We put the hardirq and softirq counter into the preemption + * counter. The bitmask has the following meaning: + * + * - bits 0-7 are the preemption count (max preemption depth: 256) + * - bits 8-15 are the softirq count (max # of softirqs: 256) + * + * The hardirq count can in theory reach the same as NR_IRQS. + * In reality, the number of nested IRQS is limited to the stack + * size as well. For archs with over 1000 IRQS it is not practical + * to expect that they will all nest. We give a max of 10 bits for + * hardirq nesting. An arch may choose to give less than 10 bits. + * m68k expects it to be 8. + * + * - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024) + * - bit 26 is the NMI_MASK + * - bit 27 is the PREEMPT_ACTIVE flag + * + * PREEMPT_MASK: 0x000000ff + * SOFTIRQ_MASK: 0x0000ff00 + * HARDIRQ_MASK: 0x03ff0000 + * NMI_MASK: 0x04000000 + */ +#define PREEMPT_BITS 8 +#define SOFTIRQ_BITS 8 +#define NMI_BITS 1 + +#define MAX_HARDIRQ_BITS 10 + +#ifndef HARDIRQ_BITS +# define HARDIRQ_BITS MAX_HARDIRQ_BITS +#endif + +#if HARDIRQ_BITS > MAX_HARDIRQ_BITS +#error HARDIRQ_BITS too high! +#endif + +#define PREEMPT_SHIFT 0 +#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS) +#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS) +#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS) + +#define __IRQ_MASK(x) ((1UL << (x))-1) + +#define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT) +#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT) +#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT) +#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT) + +#define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT) +#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT) +#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) +#define NMI_OFFSET (1UL << NMI_SHIFT) + +#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) + +#ifndef PREEMPT_ACTIVE +#define PREEMPT_ACTIVE_BITS 1 +#define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS) +#define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT) +#endif + +#if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS)) +#error PREEMPT_ACTIVE is too low! +#endif + +#define hardirq_count() (preempt_count() & HARDIRQ_MASK) +#define softirq_count() (preempt_count() & SOFTIRQ_MASK) +#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \ + | NMI_MASK)) + +/* + * Are we doing bottom half or hardware interrupt processing? + * Are we in a softirq context? Interrupt context? + * in_softirq - Are we currently processing softirq or have bh disabled? + * in_serving_softirq - Are we currently processing softirq? + */ +#define in_irq() (hardirq_count()) +#define in_softirq() (softirq_count()) +#define in_interrupt() (irq_count()) +#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) + +/* + * Are we in NMI context? + */ +#define in_nmi() (preempt_count() & NMI_MASK) + +#if defined(CONFIG_PREEMPT_COUNT) +# define PREEMPT_CHECK_OFFSET 1 +#else +# define PREEMPT_CHECK_OFFSET 0 +#endif + +/* + * Are we running in atomic context? WARNING: this macro cannot + * always detect atomic context; in particular, it cannot know about + * held spinlocks in non-preemptible kernels. Thus it should not be + * used in the general case to determine whether sleeping is possible. + * Do not use in_atomic() in driver code. + */ +#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0) + +/* + * Check whether we were atomic before we did preempt_disable(): + * (used by the scheduler, *after* releasing the kernel lock) + */ +#define in_atomic_preempt_off() \ + ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET) + +#ifdef CONFIG_PREEMPT_COUNT +# define preemptible() (preempt_count() == 0 && !irqs_disabled()) +#else +# define preemptible() 0 +#endif + +#endif /* LINUX_PREEMPT_MASK_H */ -- cgit v1.1 From a5725ac23bf4ff79656ac2c317c323e261327fb3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 16 Jul 2013 18:50:52 +0200 Subject: vtime: Describe overriden functions in dedicated arch headers If the arch overrides some generic vtime APIs, let it describe these on a dedicated and standalone header. This way it becomes convenient to include it in vtime generic headers without irrelevant stuff in such a low level header. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Li Zhong Cc: Mike Galbraith Cc: Kevin Hilman Cc: Martin Schwidefsky Cc: Heiko Carstens --- include/asm-generic/vtime.h | 0 include/linux/vtime.h | 4 ++++ 2 files changed, 4 insertions(+) create mode 100644 include/asm-generic/vtime.h (limited to 'include') diff --git a/include/asm-generic/vtime.h b/include/asm-generic/vtime.h new file mode 100644 index 0000000..e69de29 diff --git a/include/linux/vtime.h b/include/linux/vtime.h index b1dd2db..2ad0739 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -1,6 +1,10 @@ #ifndef _LINUX_KERNEL_VTIME_H #define _LINUX_KERNEL_VTIME_H +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +#include +#endif + struct task_struct; #ifdef CONFIG_VIRT_CPU_ACCOUNTING -- cgit v1.1 From b04934061330a4a449cfce703c97d887c3e11cd7 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 12 Jul 2013 03:10:15 +0200 Subject: vtime: Optimize full dynticks accounting off case with static keys If no CPU is in the full dynticks range, we can avoid the full dynticks cputime accounting through generic vtime along with its overhead and use the traditional tick based accounting instead. Let's do this and nope the off case with static keys. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Li Zhong Cc: Mike Galbraith Cc: Kevin Hilman --- include/linux/context_tracking.h | 6 ++-- include/linux/vtime.h | 70 ++++++++++++++++++++++++++++++++++------ 2 files changed, 63 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 82ec487..1581587 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -74,8 +74,7 @@ static inline void context_tracking_init(void) { } #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN static inline void guest_enter(void) { - if (static_key_false(&context_tracking_enabled) && - vtime_accounting_enabled()) + if (vtime_accounting_enabled()) vtime_guest_enter(current); else current->flags |= PF_VCPU; @@ -83,8 +82,7 @@ static inline void guest_enter(void) static inline void guest_exit(void) { - if (static_key_false(&context_tracking_enabled) && - vtime_accounting_enabled()) + if (vtime_accounting_enabled()) vtime_guest_exit(current); else current->flags &= ~PF_VCPU; diff --git a/include/linux/vtime.h b/include/linux/vtime.h index 2ad0739..f5b72b3 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -1,22 +1,68 @@ #ifndef _LINUX_KERNEL_VTIME_H #define _LINUX_KERNEL_VTIME_H +#include #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE #include #endif + struct task_struct; +/* + * vtime_accounting_enabled() definitions/declarations + */ +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +static inline bool vtime_accounting_enabled(void) { return true; } +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +static inline bool vtime_accounting_enabled(void) +{ + if (static_key_false(&context_tracking_enabled)) { + if (context_tracking_active()) + return true; + } + + return false; +} +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ + +#ifndef CONFIG_VIRT_CPU_ACCOUNTING +static inline bool vtime_accounting_enabled(void) { return false; } +#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ + + +/* + * Common vtime APIs + */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING + +#ifdef __ARCH_HAS_VTIME_TASK_SWITCH extern void vtime_task_switch(struct task_struct *prev); +#else +extern void vtime_common_task_switch(struct task_struct *prev); +static inline void vtime_task_switch(struct task_struct *prev) +{ + if (vtime_accounting_enabled()) + vtime_common_task_switch(prev); +} +#endif /* __ARCH_HAS_VTIME_TASK_SWITCH */ + extern void vtime_account_system(struct task_struct *tsk); extern void vtime_account_idle(struct task_struct *tsk); extern void vtime_account_user(struct task_struct *tsk); -extern void vtime_account_irq_enter(struct task_struct *tsk); -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -static inline bool vtime_accounting_enabled(void) { return true; } -#endif +#ifdef __ARCH_HAS_VTIME_ACCOUNT +extern void vtime_account_irq_enter(struct task_struct *tsk); +#else +extern void vtime_common_account_irq_enter(struct task_struct *tsk); +static inline void vtime_account_irq_enter(struct task_struct *tsk) +{ + if (vtime_accounting_enabled()) + vtime_common_account_irq_enter(tsk); +} +#endif /* __ARCH_HAS_VTIME_ACCOUNT */ #else /* !CONFIG_VIRT_CPU_ACCOUNTING */ @@ -24,14 +70,20 @@ static inline void vtime_task_switch(struct task_struct *prev) { } static inline void vtime_account_system(struct task_struct *tsk) { } static inline void vtime_account_user(struct task_struct *tsk) { } static inline void vtime_account_irq_enter(struct task_struct *tsk) { } -static inline bool vtime_accounting_enabled(void) { return false; } -#endif +#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN extern void arch_vtime_task_switch(struct task_struct *tsk); -extern void vtime_account_irq_exit(struct task_struct *tsk); -extern bool vtime_accounting_enabled(void); +extern void vtime_gen_account_irq_exit(struct task_struct *tsk); + +static inline void vtime_account_irq_exit(struct task_struct *tsk) +{ + if (vtime_accounting_enabled()) + vtime_gen_account_irq_exit(tsk); +} + extern void vtime_user_enter(struct task_struct *tsk); + static inline void vtime_user_exit(struct task_struct *tsk) { vtime_account_user(tsk); @@ -39,7 +91,7 @@ static inline void vtime_user_exit(struct task_struct *tsk) extern void vtime_guest_enter(struct task_struct *tsk); extern void vtime_guest_exit(struct task_struct *tsk); extern void vtime_init_idle(struct task_struct *tsk, int cpu); -#else +#else /* !CONFIG_VIRT_CPU_ACCOUNTING_GEN */ static inline void vtime_account_irq_exit(struct task_struct *tsk) { /* On hard|softirq exit we always account to hard|softirq cputime */ -- cgit v1.1 From 460775df4680b4593d8449bc171008578625a850 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 24 Jul 2013 23:52:27 +0200 Subject: nohz: Optimize full dynticks state checks with static keys These APIs are frequenctly accessed and priority is given to optimize the full dynticks off-case in order to let distros enable this feature without suffering from significant performance regressions. Let's inline these APIs and optimize them with static keys. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Li Zhong Cc: Mike Galbraith Cc: Kevin Hilman --- include/linux/tick.h | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/tick.h b/include/linux/tick.h index 9180f4b..c60b079 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -10,6 +10,8 @@ #include #include #include +#include +#include #ifdef CONFIG_GENERIC_CLOCKEVENTS @@ -158,15 +160,34 @@ static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } # endif /* !CONFIG_NO_HZ_COMMON */ #ifdef CONFIG_NO_HZ_FULL +extern bool tick_nohz_full_running; +extern cpumask_var_t tick_nohz_full_mask; + +static inline bool tick_nohz_full_enabled(void) +{ + if (!static_key_false(&context_tracking_enabled)) + return false; + + return tick_nohz_full_running; +} + +static inline bool tick_nohz_full_cpu(int cpu) +{ + if (!tick_nohz_full_enabled()) + return false; + + return cpumask_test_cpu(cpu, tick_nohz_full_mask); +} + extern void tick_nohz_init(void); -extern int tick_nohz_full_cpu(int cpu); extern void tick_nohz_full_check(void); extern void tick_nohz_full_kick(void); extern void tick_nohz_full_kick_all(void); extern void tick_nohz_task_switch(struct task_struct *tsk); #else static inline void tick_nohz_init(void) { } -static inline int tick_nohz_full_cpu(int cpu) { return 0; } +static inline bool tick_nohz_full_enabled(void) { return false; } +static inline bool tick_nohz_full_cpu(int cpu) { return false; } static inline void tick_nohz_full_check(void) { } static inline void tick_nohz_full_kick(void) { } static inline void tick_nohz_full_kick_all(void) { } -- cgit v1.1 From d13508f9440e46dccac6a2dd48d51a73b2207482 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 24 Jul 2013 23:52:27 +0200 Subject: nohz: Optimize full dynticks's sched hooks with static keys Scheduler IPIs and task context switches are serious fast path. Let's try to hide as much as we can the impact of full dynticks APIs' off case that are called on these sites through the use of static keys. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Li Zhong Cc: Mike Galbraith Cc: Kevin Hilman --- include/linux/tick.h | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/tick.h b/include/linux/tick.h index c60b079..a7ef1d6 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -180,20 +180,32 @@ static inline bool tick_nohz_full_cpu(int cpu) } extern void tick_nohz_init(void); -extern void tick_nohz_full_check(void); +extern void __tick_nohz_full_check(void); extern void tick_nohz_full_kick(void); extern void tick_nohz_full_kick_all(void); -extern void tick_nohz_task_switch(struct task_struct *tsk); +extern void __tick_nohz_task_switch(struct task_struct *tsk); #else static inline void tick_nohz_init(void) { } static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; } -static inline void tick_nohz_full_check(void) { } +static inline void __tick_nohz_full_check(void) { } static inline void tick_nohz_full_kick(void) { } static inline void tick_nohz_full_kick_all(void) { } -static inline void tick_nohz_task_switch(struct task_struct *tsk) { } +static inline void __tick_nohz_task_switch(struct task_struct *tsk) { } #endif +static inline void tick_nohz_full_check(void) +{ + if (tick_nohz_full_enabled()) + __tick_nohz_full_check(); +} + +static inline void tick_nohz_task_switch(struct task_struct *tsk) +{ + if (tick_nohz_full_enabled()) + __tick_nohz_task_switch(tsk); +} + # ifdef CONFIG_CPU_IDLE_GOV_MENU extern void menu_hrtimer_cancel(void); -- cgit v1.1