From 17d9ddc72fb8bba0d4f67868c9c612e472a594a9 Mon Sep 17 00:00:00 2001 From: "Pallipadi, Venkatesh" Date: Wed, 10 Feb 2010 15:23:44 -0800 Subject: rbtree: Add support for augmented rbtrees Add support for augmented rbtrees in core rbtree code. This will be used in subsequent patches, in x86 PAT code, which needs interval trees to efficiently keep track of PAT ranges. Signed-off-by: Venkatesh Pallipadi LKML-Reference: <20100210232343.GA11465@linux-os.sc.intel.com> Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- include/linux/rbtree.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index 9c29541..8e33a25 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -110,6 +110,7 @@ struct rb_node struct rb_root { struct rb_node *rb_node; + void (*augment_cb)(struct rb_node *node); }; @@ -129,7 +130,9 @@ static inline void rb_set_color(struct rb_node *rb, int color) rb->rb_parent_color = (rb->rb_parent_color & ~1) | color; } -#define RB_ROOT (struct rb_root) { NULL, } +#define RB_ROOT (struct rb_root) { NULL, NULL, } +#define RB_AUGMENT_ROOT(x) (struct rb_root) { NULL, x} + #define rb_entry(ptr, type, member) container_of(ptr, type, member) #define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) -- cgit v1.1 From 4abc14a733f9002c05623db755aaafdd27fa7a91 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 20 Jan 2010 14:52:23 +0100 Subject: iommu-api: Rename ->{un}map function pointers to ->{un}map_range The new function pointer names match better with the top-level functions of the iommu-api which are using them. Main intention of this change is to make the ->{un}map pointer names free for two new mapping functions. Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 3af4ffd..0f18f37 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -36,10 +36,10 @@ struct iommu_ops { void (*domain_destroy)(struct iommu_domain *domain); int (*attach_dev)(struct iommu_domain *domain, struct device *dev); void (*detach_dev)(struct iommu_domain *domain, struct device *dev); - int (*map)(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot); - void (*unmap)(struct iommu_domain *domain, unsigned long iova, - size_t size); + int (*map_range)(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot); + void (*unmap_range)(struct iommu_domain *domain, unsigned long iova, + size_t size); phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, unsigned long iova); int (*domain_has_cap)(struct iommu_domain *domain, -- cgit v1.1 From cefc53c7f494240d4813c80154c7617452d1904d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 8 Jan 2010 13:35:09 +0100 Subject: iommu-api: Add iommu_map and iommu_unmap functions These two functions provide support for mapping and unmapping physical addresses to io virtual addresses. The difference to the iommu_(un)map_range() is that the new functions take a gfp_order parameter instead of a size. This allows the IOMMU backend implementations to detect easier if a given range can be mapped by larger page sizes. These new functions should replace the old ones in the long term. Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 0f18f37..6d0035b 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -60,6 +60,10 @@ extern int iommu_map_range(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot); extern void iommu_unmap_range(struct iommu_domain *domain, unsigned long iova, size_t size); +extern int iommu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, int gfp_order, int prot); +extern int iommu_unmap(struct iommu_domain *domain, unsigned long iova, + int gfp_order); extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, unsigned long iova); extern int iommu_domain_has_cap(struct iommu_domain *domain, @@ -108,6 +112,18 @@ static inline void iommu_unmap_range(struct iommu_domain *domain, { } +static inline int iommu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, int gfp_order, int prot) +{ + return -ENODEV; +} + +static inline int iommu_unmap(struct iommu_domain *domain, unsigned long iova, + int gfp_order) +{ + return -ENODEV; +} + static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, unsigned long iova) { -- cgit v1.1 From 67651786948c360c3122b8a17cb1e59209d50880 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 21 Jan 2010 16:32:27 +0100 Subject: iommu-api: Add ->{un}map callbacks to iommu_ops This patch adds new callbacks for mapping and unmapping pages to the iommu_ops structure. These callbacks are aware of page sizes which makes them different to the ->{un}map_range callbacks. Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 6d0035b..5a7a3d8 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -36,6 +36,10 @@ struct iommu_ops { void (*domain_destroy)(struct iommu_domain *domain); int (*attach_dev)(struct iommu_domain *domain, struct device *dev); void (*detach_dev)(struct iommu_domain *domain, struct device *dev); + int (*map)(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, int gfp_order, int prot); + int (*unmap)(struct iommu_domain *domain, unsigned long iova, + int gfp_order); int (*map_range)(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot); void (*unmap_range)(struct iommu_domain *domain, unsigned long iova, -- cgit v1.1 From 12c7389abe5786349d3ea6da1961cf78d0c1c7cd Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 21 Jan 2010 11:50:28 +0100 Subject: iommu-api: Remove iommu_{un}map_range functions These functions are not longer used and can be removed savely. There functionality is now provided by the iommu_{un}map functions which are also capable of multiple page sizes. Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 5a7a3d8..be22ad8 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -40,10 +40,6 @@ struct iommu_ops { phys_addr_t paddr, int gfp_order, int prot); int (*unmap)(struct iommu_domain *domain, unsigned long iova, int gfp_order); - int (*map_range)(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot); - void (*unmap_range)(struct iommu_domain *domain, unsigned long iova, - size_t size); phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, unsigned long iova); int (*domain_has_cap)(struct iommu_domain *domain, @@ -60,10 +56,6 @@ extern int iommu_attach_device(struct iommu_domain *domain, struct device *dev); extern void iommu_detach_device(struct iommu_domain *domain, struct device *dev); -extern int iommu_map_range(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot); -extern void iommu_unmap_range(struct iommu_domain *domain, unsigned long iova, - size_t size); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, int gfp_order, int prot); extern int iommu_unmap(struct iommu_domain *domain, unsigned long iova, @@ -104,18 +96,6 @@ static inline void iommu_detach_device(struct iommu_domain *domain, { } -static inline int iommu_map_range(struct iommu_domain *domain, - unsigned long iova, phys_addr_t paddr, - size_t size, int prot) -{ - return -ENODEV; -} - -static inline void iommu_unmap_range(struct iommu_domain *domain, - unsigned long iova, size_t size) -{ -} - static inline int iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, int gfp_order, int prot) { -- cgit v1.1 From 41acab8851a0408c1d5ad6c21a07456f88b54d40 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 10 Mar 2010 23:37:45 -0300 Subject: sched: Implement group scheduler statistics in one struct Put all statistic fields of sched_entity in one struct, sched_statistics, and embed it into sched_entity. This change allows to memset the sched_statistics to 0 when needed (for instance when forking), avoiding bugs of non initialized fields. Signed-off-by: Lucas De Marchi Signed-off-by: Peter Zijlstra LKML-Reference: <1268275065-18542-1-git-send-email-lucas.de.marchi@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 54 ++++++++++++++++++++++++--------------------------- 1 file changed, 25 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4b1753f..8cc863d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1127,36 +1127,8 @@ struct load_weight { unsigned long weight, inv_weight; }; -/* - * CFS stats for a schedulable entity (task, task-group etc) - * - * Current field usage histogram: - * - * 4 se->block_start - * 4 se->run_node - * 4 se->sleep_start - * 6 se->load.weight - */ -struct sched_entity { - struct load_weight load; /* for load-balancing */ - struct rb_node run_node; - struct list_head group_node; - unsigned int on_rq; - - u64 exec_start; - u64 sum_exec_runtime; - u64 vruntime; - u64 prev_sum_exec_runtime; - - u64 last_wakeup; - u64 avg_overlap; - - u64 nr_migrations; - - u64 start_runtime; - u64 avg_wakeup; - #ifdef CONFIG_SCHEDSTATS +struct sched_statistics { u64 wait_start; u64 wait_max; u64 wait_count; @@ -1188,6 +1160,30 @@ struct sched_entity { u64 nr_wakeups_affine_attempts; u64 nr_wakeups_passive; u64 nr_wakeups_idle; +}; +#endif + +struct sched_entity { + struct load_weight load; /* for load-balancing */ + struct rb_node run_node; + struct list_head group_node; + unsigned int on_rq; + + u64 exec_start; + u64 sum_exec_runtime; + u64 vruntime; + u64 prev_sum_exec_runtime; + + u64 last_wakeup; + u64 avg_overlap; + + u64 nr_migrations; + + u64 start_runtime; + u64 avg_wakeup; + +#ifdef CONFIG_SCHEDSTATS + struct sched_statistics statistics; #endif #ifdef CONFIG_FAIR_GROUP_SCHED -- cgit v1.1 From 39c0cbe2150cbd848a25ba6cdb271d1ad46818ad Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Thu, 11 Mar 2010 17:17:13 +0100 Subject: sched: Rate-limit nohz Entering nohz code on every micro-idle is costing ~10% throughput for netperf TCP_RR when scheduling cross-cpu. Rate limiting entry fixes this, but raises ticks a bit. On my Q6600, an idle box goes from ~85 interrupts/sec to 128. The higher the context switch rate, the more nohz entry costs. With this patch and some cycle recovery patches in my tree, max cross cpu context switch rate is improved by ~16%, a large portion of which of which is this ratelimiting. Signed-off-by: Mike Galbraith Signed-off-by: Peter Zijlstra LKML-Reference: <1268301003.6785.28.camel@marge.simson.net> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8cc863d..13efe7d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -271,11 +271,17 @@ extern cpumask_var_t nohz_cpu_mask; #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) extern int select_nohz_load_balancer(int cpu); extern int get_nohz_load_balancer(void); +extern int nohz_ratelimit(int cpu); #else static inline int select_nohz_load_balancer(int cpu) { return 0; } + +static inline int nohz_ratelimit(int cpu) +{ + return 0; +} #endif /* -- cgit v1.1 From b42e0c41a422a212ddea0666d5a3a0e3c35206db Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Thu, 11 Mar 2010 17:15:38 +0100 Subject: sched: Remove avg_wakeup Testing the load which led to this heuristic (nfs4 kbuild) shows that it has outlived it's usefullness. With intervening load balancing changes, I cannot see any difference with/without, so recover there fastpath cycles. Signed-off-by: Mike Galbraith Signed-off-by: Peter Zijlstra LKML-Reference: <1268301062.6785.29.camel@marge.simson.net> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 13efe7d..70c560f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1185,9 +1185,6 @@ struct sched_entity { u64 nr_migrations; - u64 start_runtime; - u64 avg_wakeup; - #ifdef CONFIG_SCHEDSTATS struct sched_statistics statistics; #endif -- cgit v1.1 From e12f31d3e5d36328c7fbd0fce40a95e70b59152c Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Thu, 11 Mar 2010 17:15:51 +0100 Subject: sched: Remove avg_overlap Both avg_overlap and avg_wakeup had an inherent problem in that their accuracy was detrimentally affected by cross-cpu wakeups, this because we are missing the necessary call to update_curr(). This can't be fixed without increasing overhead in our already too fat fastpath. Additionally, with recent load balancing changes making us prefer to place tasks in an idle cache domain (which is good for compute bound loads), communicating tasks suffer when a sync wakeup, which would enable affine placement, is turned into a non-sync wakeup by SYNC_LESS. With one task on the runqueue, wake_affine() rejects the affine wakeup request, leaving the unfortunate where placed, taking frequent cache misses. Remove it, and recover some fastpath cycles. Signed-off-by: Mike Galbraith Signed-off-by: Peter Zijlstra LKML-Reference: <1268301121.6785.30.camel@marge.simson.net> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 70c560f..8604884 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1180,9 +1180,6 @@ struct sched_entity { u64 vruntime; u64 prev_sum_exec_runtime; - u64 last_wakeup; - u64 avg_overlap; - u64 nr_migrations; #ifdef CONFIG_SCHEDSTATS -- cgit v1.1 From 4bdde044dc36ac7b01f7502394d52619af9d1927 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 24 Mar 2010 10:58:05 +0800 Subject: tracing: Convert some signal events to DEFINE_TRACE Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 23639 6084 8 29731 7423 kernel/signal.o.orig 22727 6084 8 28819 7093 kernel/signal.o 2 events are converted: signal_queue_overflow: signal_overflow_fail, signal_lose_info No functional change. Acked-by: Masami Hiramatsu Signed-off-by: Li Zefan LKML-Reference: <4BA97FBD.8070703@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- include/trace/events/signal.h | 52 ++++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h index a510b75..814566c 100644 --- a/include/trace/events/signal.h +++ b/include/trace/events/signal.h @@ -100,18 +100,7 @@ TRACE_EVENT(signal_deliver, __entry->sa_handler, __entry->sa_flags) ); -/** - * signal_overflow_fail - called when signal queue is overflow - * @sig: signal number - * @group: signal to process group or not (bool) - * @info: pointer to struct siginfo - * - * Kernel fails to generate 'sig' signal with 'info' siginfo, because - * siginfo queue is overflow, and the signal is dropped. - * 'group' is not 0 if the signal will be sent to a process group. - * 'sig' is always one of RT signals. - */ -TRACE_EVENT(signal_overflow_fail, +DECLARE_EVENT_CLASS(signal_queue_overflow, TP_PROTO(int sig, int group, struct siginfo *info), @@ -135,6 +124,24 @@ TRACE_EVENT(signal_overflow_fail, ); /** + * signal_overflow_fail - called when signal queue is overflow + * @sig: signal number + * @group: signal to process group or not (bool) + * @info: pointer to struct siginfo + * + * Kernel fails to generate 'sig' signal with 'info' siginfo, because + * siginfo queue is overflow, and the signal is dropped. + * 'group' is not 0 if the signal will be sent to a process group. + * 'sig' is always one of RT signals. + */ +DEFINE_EVENT(signal_queue_overflow, signal_overflow_fail, + + TP_PROTO(int sig, int group, struct siginfo *info), + + TP_ARGS(sig, group, info) +); + +/** * signal_lose_info - called when siginfo is lost * @sig: signal number * @group: signal to process group or not (bool) @@ -145,28 +152,13 @@ TRACE_EVENT(signal_overflow_fail, * 'group' is not 0 if the signal will be sent to a process group. * 'sig' is always one of non-RT signals. */ -TRACE_EVENT(signal_lose_info, +DEFINE_EVENT(signal_queue_overflow, signal_lose_info, TP_PROTO(int sig, int group, struct siginfo *info), - TP_ARGS(sig, group, info), - - TP_STRUCT__entry( - __field( int, sig ) - __field( int, group ) - __field( int, errno ) - __field( int, code ) - ), - - TP_fast_assign( - __entry->sig = sig; - __entry->group = group; - TP_STORE_SIGINFO(__entry, info); - ), - - TP_printk("sig=%d group=%d errno=%d code=%d", - __entry->sig, __entry->group, __entry->errno, __entry->code) + TP_ARGS(sig, group, info) ); + #endif /* _TRACE_SIGNAL_H */ /* This part must be outside protection */ -- cgit v1.1 From 50354a8a28d0c91695a2d6d25b5a821bfe557a07 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 24 Mar 2010 10:58:24 +0800 Subject: tracing: Update comments Make some comments consistent with the code. Signed-off-by: Li Zefan LKML-Reference: <4BA97FD0.7090202@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- include/trace/ftrace.h | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index ea6f9d4..75dd778 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -154,9 +154,11 @@ * * field = (typeof(field))entry; * - * p = get_cpu_var(ftrace_event_seq); + * p = &get_cpu_var(ftrace_event_seq); * trace_seq_init(p); - * ret = trace_seq_printf(s, "\n"); + * ret = trace_seq_printf(s, "%s: ", ); + * if (ret) + * ret = trace_seq_printf(s, "\n"); * put_cpu(); * if (!ret) * return TRACE_TYPE_PARTIAL_LINE; @@ -450,38 +452,38 @@ perf_trace_disable_##name(struct ftrace_event_call *unused) \ * * static void ftrace_raw_event_(proto) * { + * struct ftrace_data_offsets_ __maybe_unused __data_offsets; * struct ring_buffer_event *event; * struct ftrace_raw_ *entry; <-- defined in stage 1 * struct ring_buffer *buffer; * unsigned long irq_flags; + * int __data_size; * int pc; * * local_save_flags(irq_flags); * pc = preempt_count(); * + * __data_size = ftrace_get_offsets_(&__data_offsets, args); + * * event = trace_current_buffer_lock_reserve(&buffer, * event_.id, - * sizeof(struct ftrace_raw_), + * sizeof(*entry) + __data_size, * irq_flags, pc); * if (!event) * return; * entry = ring_buffer_event_data(event); * - * ; <-- Here we assign the entries by the __field and - * __array macros. + * { ; } <-- Here we assign the entries by the __field and + * __array macros. * - * trace_current_buffer_unlock_commit(buffer, event, irq_flags, pc); + * if (!filter_current_check_discard(buffer, event_call, entry, event)) + * trace_current_buffer_unlock_commit(buffer, + * event, irq_flags, pc); * } * * static int ftrace_raw_reg_event_(struct ftrace_event_call *unused) * { - * int ret; - * - * ret = register_trace_(ftrace_raw_event_); - * if (!ret) - * pr_info("event trace: Could not activate trace point " - * "probe to "); - * return ret; + * return register_trace_(ftrace_raw_event_); * } * * static void ftrace_unreg_event_(struct ftrace_event_call *unused) @@ -493,6 +495,8 @@ perf_trace_disable_##name(struct ftrace_event_call *unused) \ * .trace = ftrace_raw_output_, <-- stage 2 * }; * + * static const char print_fmt_[] = ; + * * static struct ftrace_event_call __used * __attribute__((__aligned__(4))) * __attribute__((section("_ftrace_events"))) event_ = { @@ -501,6 +505,8 @@ perf_trace_disable_##name(struct ftrace_event_call *unused) \ * .raw_init = trace_event_raw_init, * .regfunc = ftrace_reg_event_, * .unregfunc = ftrace_unreg_event_, + * .print_fmt = print_fmt_, + * .define_fields = ftrace_define_fields_, * } * */ @@ -569,7 +575,6 @@ ftrace_raw_event_id_##call(struct ftrace_event_call *event_call, \ return; \ entry = ring_buffer_event_data(event); \ \ - \ tstruct \ \ { assign; } \ -- cgit v1.1 From ae832d1e03ac9bf09fb8a07fb37908ab40c7cd0e Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 24 Mar 2010 10:57:43 +0800 Subject: tracing: Remove side effect from module tracepoints that caused a GPF Remove the @refcnt argument, because it has side-effects, and arguments with side-effects are not skipped by the jump over disabled instrumentation and are executed even when the tracepoint is disabled. This was also causing a GPF as found by Randy Dunlap: Subject: 2.6.33 GP fault only when built with tracing LKML-Reference: <4BA2B69D.3000309@oracle.com> Note, the current 2.6.34-rc has a fix for the actual cause of the GPF, but this fixes one of its triggers. Tested-by: Randy Dunlap Acked-by: Mathieu Desnoyers Signed-off-by: Li Zefan LKML-Reference: <4BA97FA7.6040406@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- include/linux/module.h | 6 ++---- include/trace/events/module.h | 14 +++++++------- 2 files changed, 9 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/module.h b/include/linux/module.h index 5e869ff..393ec39 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -460,8 +460,7 @@ static inline void __module_get(struct module *module) if (module) { preempt_disable(); __this_cpu_inc(module->refptr->count); - trace_module_get(module, _THIS_IP_, - __this_cpu_read(module->refptr->count)); + trace_module_get(module, _THIS_IP_); preempt_enable(); } } @@ -475,8 +474,7 @@ static inline int try_module_get(struct module *module) if (likely(module_is_live(module))) { __this_cpu_inc(module->refptr->count); - trace_module_get(module, _THIS_IP_, - __this_cpu_read(module->refptr->count)); + trace_module_get(module, _THIS_IP_); } else ret = 0; diff --git a/include/trace/events/module.h b/include/trace/events/module.h index 4b0f48b..a585f81 100644 --- a/include/trace/events/module.h +++ b/include/trace/events/module.h @@ -53,9 +53,9 @@ TRACE_EVENT(module_free, DECLARE_EVENT_CLASS(module_refcnt, - TP_PROTO(struct module *mod, unsigned long ip, int refcnt), + TP_PROTO(struct module *mod, unsigned long ip), - TP_ARGS(mod, ip, refcnt), + TP_ARGS(mod, ip), TP_STRUCT__entry( __field( unsigned long, ip ) @@ -65,7 +65,7 @@ DECLARE_EVENT_CLASS(module_refcnt, TP_fast_assign( __entry->ip = ip; - __entry->refcnt = refcnt; + __entry->refcnt = __this_cpu_read(mod->refptr->count); __assign_str(name, mod->name); ), @@ -75,16 +75,16 @@ DECLARE_EVENT_CLASS(module_refcnt, DEFINE_EVENT(module_refcnt, module_get, - TP_PROTO(struct module *mod, unsigned long ip, int refcnt), + TP_PROTO(struct module *mod, unsigned long ip), - TP_ARGS(mod, ip, refcnt) + TP_ARGS(mod, ip) ); DEFINE_EVENT(module_refcnt, module_put, - TP_PROTO(struct module *mod, unsigned long ip, int refcnt), + TP_PROTO(struct module *mod, unsigned long ip), - TP_ARGS(mod, ip, refcnt) + TP_ARGS(mod, ip) ); TRACE_EVENT(module_request, -- cgit v1.1 From eb0c53771fb2f5f66b0edb3ebce33be4bbf1c285 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 29 Mar 2010 14:25:18 -0400 Subject: tracing: Fix compile error in module tracepoints when MODULE_UNLOAD not set If modules are configured in the build but unloading of modules is not, then the refcnt is not defined. Place the get/put module tracepoints under CONFIG_MODULE_UNLOAD since it references this field in the module structure. As a side-effect, this patch also reduces the code when MODULE_UNLOAD is not set, because these unused tracepoints are not created. Signed-off-by: Steven Rostedt --- include/trace/events/module.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/trace/events/module.h b/include/trace/events/module.h index a585f81..f07b44a 100644 --- a/include/trace/events/module.h +++ b/include/trace/events/module.h @@ -51,6 +51,9 @@ TRACE_EVENT(module_free, TP_printk("%s", __get_str(name)) ); +#ifdef CONFIG_MODULE_UNLOAD +/* trace_module_get/put are only used if CONFIG_MODULE_UNLOAD is defined */ + DECLARE_EVENT_CLASS(module_refcnt, TP_PROTO(struct module *mod, unsigned long ip), @@ -86,6 +89,7 @@ DEFINE_EVENT(module_refcnt, module_put, TP_ARGS(mod, ip) ); +#endif /* CONFIG_MODULE_UNLOAD */ TRACE_EVENT(module_request, -- cgit v1.1 From 66a8cb95ed04025664d1db4e952155ee1dccd048 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 31 Mar 2010 13:21:56 -0400 Subject: ring-buffer: Add place holder recording of dropped events Currently, when the ring buffer drops events, it does not record the fact that it did so. It does inform the writer that the event was dropped by returning a NULL event, but it does not put in any place holder where the event was dropped. This is not a trivial thing to add because the ring buffer mostly runs in overwrite (flight recorder) mode. That is, when the ring buffer is full, new data will overwrite old data. In a produce/consumer mode, where new data is simply dropped when the ring buffer is full, it is trivial to add the placeholder for dropped events. When there's more room to write new data, then a special event can be added to notify the reader about the dropped events. But in overwrite mode, any new write can overwrite events. A place holder can not be inserted into the ring buffer since there never may be room. A reader could also come in at anytime and miss the placeholder. Luckily, the way the ring buffer works, the read side can find out if events were lost or not, and how many events. Everytime a write takes place, if it overwrites the header page (the next read) it updates a "overrun" variable that keeps track of the number of lost events. When a reader swaps out a page from the ring buffer, it can record this number, perfom the swap, and then check to see if the number changed, and take the diff if it has, which would be the number of events dropped. This can be stored by the reader and returned to callers of the reader. Since the reader page swap will fail if the writer moved the head page since the time the reader page set up the swap, this gives room to record the overruns without worrying about races. If the reader sets up the pages, records the overrun, than performs the swap, if the swap succeeds, then the overrun variable has not been updated since the setup before the swap. For binary readers of the ring buffer, a flag is set in the header of each sub page (sub buffer) of the ring buffer. This flag is embedded in the size field of the data on the sub buffer, in the 31st bit (the size can be 32 or 64 bits depending on the architecture), but only 27 bits needs to be used for the actual size (less actually). We could add a new field in the sub buffer header to also record the number of events dropped since the last read, but this will change the format of the binary ring buffer a bit too much. Perhaps this change can be made if the information on the number of events dropped is considered important enough. Note, the notification of dropped events is only used by consuming reads or peeking at the ring buffer. Iterating over the ring buffer does not keep this information because the necessary data is only available when a page swap is made, and the iterator does not swap out pages. Cc: Robert Richter Cc: Andi Kleen Cc: Li Zefan Cc: Arnaldo Carvalho de Melo Cc: "Luis Claudio R. Goncalves" Cc: Frederic Weisbecker Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 5fcc31e..c829776 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -120,9 +120,11 @@ int ring_buffer_write(struct ring_buffer *buffer, unsigned long length, void *data); struct ring_buffer_event * -ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts); +ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts, + unsigned long *lost_events); struct ring_buffer_event * -ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts); +ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, + unsigned long *lost_events); struct ring_buffer_iter * ring_buffer_read_start(struct ring_buffer *buffer, int cpu); -- cgit v1.1 From bc21b478425ac73f66a5ec0b375a5e0d12d609ce Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 31 Mar 2010 19:49:26 -0400 Subject: tracing: Show the lost events in the trace_pipe output Now that the ring buffer can keep track of where events are lost. Use this information to the output of trace_pipe: hackbench-3588 [001] 1326.701660: lock_acquire: ffffffff816591e0 read rcu_read_lock hackbench-3588 [001] 1326.701661: lock_acquire: ffff88003f4091f0 &(&dentry->d_lock)->rlock hackbench-3588 [001] 1326.701664: lock_release: ffff88003f4091f0 &(&dentry->d_lock)->rlock CPU:1 [LOST 673 EVENTS] hackbench-3588 [001] 1326.702711: kmem_cache_free: call_site=ffffffff81102b85 ptr=ffff880026d96738 hackbench-3588 [001] 1326.702712: lock_release: ffff88003e1480a8 &mm->mmap_sem hackbench-3588 [001] 1326.702713: lock_acquire: ffff88003e1480a8 &mm->mmap_sem Even works with the function graph tracer: 2) ! 170.098 us | } 2) 4.036 us | rcu_irq_exit(); 2) 3.657 us | idle_cpu(); 2) ! 190.301 us | } CPU:2 [LOST 2196 EVENTS] 2) 0.853 us | } /* cancel_dirty_page */ 2) | remove_from_page_cache() { 2) 1.578 us | _raw_spin_lock_irq(); 2) | __remove_from_page_cache() { Note, it does not work with the iterator "trace" file, since it requires the use of consuming the page from the ring buffer to determine how many events were lost, which the iterator does not do. Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index c0f4b36..39e71b0 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -58,6 +58,7 @@ struct trace_iterator { /* The below is zeroed out in pipe_read */ struct trace_seq seq; struct trace_entry *ent; + unsigned long lost_events; int leftover; int cpu; u64 ts; -- cgit v1.1 From 897f0b3c3ff40b443c84e271bef19bd6ae885195 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 15 Mar 2010 10:10:03 +0100 Subject: sched: Kill the broken and deadlockable cpuset_lock/cpuset_cpus_allowed_locked code This patch just states the fact the cpusets/cpuhotplug interaction is broken and removes the deadlockable code which only pretends to work. - cpuset_lock() doesn't really work. It is needed for cpuset_cpus_allowed_locked() but we can't take this lock in try_to_wake_up()->select_fallback_rq() path. - cpuset_lock() is deadlockable. Suppose that a task T bound to CPU takes callback_mutex. If cpu_down(CPU) happens before T drops callback_mutex stop_machine() preempts T, then migration_call(CPU_DEAD) tries to take cpuset_lock() and hangs forever because CPU is already dead and thus T can't be scheduled. - cpuset_cpus_allowed_locked() is deadlockable too. It takes task_lock() which is not irq-safe, but try_to_wake_up() can be called from irq. Kill them, and change select_fallback_rq() to use cpu_possible_mask, like we currently do without CONFIG_CPUSETS. Also, with or without this patch, with or without CONFIG_CPUSETS, the callers of select_fallback_rq() can race with each other or with set_cpus_allowed() pathes. The subsequent patches try to to fix these problems. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra LKML-Reference: <20100315091003.GA9123@redhat.com> Signed-off-by: Ingo Molnar --- include/linux/cpuset.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index a5740fc..eeaaee7 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -21,8 +21,6 @@ extern int number_of_cpusets; /* How many cpusets are defined in system? */ extern int cpuset_init(void); extern void cpuset_init_smp(void); extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask); -extern void cpuset_cpus_allowed_locked(struct task_struct *p, - struct cpumask *mask); extern nodemask_t cpuset_mems_allowed(struct task_struct *p); #define cpuset_current_mems_allowed (current->mems_allowed) void cpuset_init_current_mems_allowed(void); @@ -69,9 +67,6 @@ struct seq_file; extern void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task); -extern void cpuset_lock(void); -extern void cpuset_unlock(void); - extern int cpuset_mem_spread_node(void); static inline int cpuset_do_page_mem_spread(void) @@ -105,11 +100,6 @@ static inline void cpuset_cpus_allowed(struct task_struct *p, { cpumask_copy(mask, cpu_possible_mask); } -static inline void cpuset_cpus_allowed_locked(struct task_struct *p, - struct cpumask *mask) -{ - cpumask_copy(mask, cpu_possible_mask); -} static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) { @@ -157,9 +147,6 @@ static inline void cpuset_task_status_allowed(struct seq_file *m, { } -static inline void cpuset_lock(void) {} -static inline void cpuset_unlock(void) {} - static inline int cpuset_mem_spread_node(void) { return 0; -- cgit v1.1 From 6a1bdc1b577ebcb65f6603c57f8347309bc4ab13 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 15 Mar 2010 10:10:23 +0100 Subject: sched: _cpu_down(): Don't play with current->cpus_allowed _cpu_down() changes the current task's affinity and then recovers it at the end. The problems are well known: we can't restore old_allowed if it was bound to the now-dead-cpu, and we can race with the userspace which can change cpu-affinity during unplug. _cpu_down() should not play with current->cpus_allowed at all. Instead, take_cpu_down() can migrate the caller of _cpu_down() after __cpu_disable() removes the dying cpu from cpu_online_mask. Signed-off-by: Oleg Nesterov Acked-by: Rafael J. Wysocki Signed-off-by: Peter Zijlstra LKML-Reference: <20100315091023.GA9148@redhat.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 43c9451..8bea407 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1843,6 +1843,7 @@ extern void sched_clock_idle_sleep_event(void); extern void sched_clock_idle_wakeup_event(u64 delta_ns); #ifdef CONFIG_HOTPLUG_CPU +extern void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p); extern void idle_task_exit(void); #else static inline void idle_task_exit(void) {} -- cgit v1.1 From 9084bb8246ea935b98320554229e2f371f7f52fa Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 15 Mar 2010 10:10:27 +0100 Subject: sched: Make select_fallback_rq() cpuset friendly Introduce cpuset_cpus_allowed_fallback() helper to fix the cpuset problems with select_fallback_rq(). It can be called from any context and can't use any cpuset locks including task_lock(). It is called when the task doesn't have online cpus in ->cpus_allowed but ttwu/etc must be able to find a suitable cpu. I am not proud of this patch. Everything which needs such a fat comment can't be good even if correct. But I'd prefer to not change the locking rules in the code I hardly understand, and in any case I believe this simple change make the code much more correct compared to deadlocks we currently have. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra LKML-Reference: <20100315091027.GA9155@redhat.com> Signed-off-by: Ingo Molnar --- include/linux/cpuset.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index eeaaee7..a73454a 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -21,6 +21,7 @@ extern int number_of_cpusets; /* How many cpusets are defined in system? */ extern int cpuset_init(void); extern void cpuset_init_smp(void); extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask); +extern int cpuset_cpus_allowed_fallback(struct task_struct *p); extern nodemask_t cpuset_mems_allowed(struct task_struct *p); #define cpuset_current_mems_allowed (current->mems_allowed) void cpuset_init_current_mems_allowed(void); @@ -101,6 +102,12 @@ static inline void cpuset_cpus_allowed(struct task_struct *p, cpumask_copy(mask, cpu_possible_mask); } +static inline int cpuset_cpus_allowed_fallback(struct task_struct *p) +{ + cpumask_copy(&p->cpus_allowed, cpu_possible_mask); + return cpumask_any(cpu_active_mask); +} + static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) { return node_possible_map; -- cgit v1.1 From 0017d735092844118bef006696a750a0e4ef6ebd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 24 Mar 2010 18:34:10 +0100 Subject: sched: Fix TASK_WAKING vs fork deadlock Oleg noticed a few races with the TASK_WAKING usage on fork. - since TASK_WAKING is basically a spinlock, it should be IRQ safe - since we set TASK_WAKING (*) without holding rq->lock it could be there still is a rq->lock holder, thereby not actually providing full serialization. (*) in fact we clear PF_STARTING, which in effect enables TASK_WAKING. Cure the second issue by not setting TASK_WAKING in sched_fork(), but only temporarily in wake_up_new_task() while calling select_task_rq(). Cure the first by holding rq->lock around the select_task_rq() call, this will disable IRQs, this however requires that we push down the rq->lock release into select_task_rq_fair()'s cgroup stuff. Because select_task_rq_fair() still needs to drop the rq->lock we cannot fully get rid of TASK_WAKING. Reported-by: Oleg Nesterov Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8bea407..fb6c188 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1046,7 +1046,8 @@ struct sched_class { void (*put_prev_task) (struct rq *rq, struct task_struct *p); #ifdef CONFIG_SMP - int (*select_task_rq)(struct task_struct *p, int sd_flag, int flags); + int (*select_task_rq)(struct rq *rq, struct task_struct *p, + int sd_flag, int flags); void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); void (*post_schedule) (struct rq *this_rq); -- cgit v1.1 From 371fd7e7a56a5c136d31aa980011bd2f131c3ef5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 24 Mar 2010 16:38:48 +0100 Subject: sched: Add enqueue/dequeue flags In order to reduce the dependency on TASK_WAKING rework the enqueue interface to support a proper flags field. Replace the int wakeup, bool head arguments with an int flags argument and create the following flags: ENQUEUE_WAKEUP - the enqueue is a wakeup of a sleeping task, ENQUEUE_WAKING - the enqueue has relative vruntime due to having sched_class::task_waking() called, ENQUEUE_HEAD - the waking task should be places on the head of the priority queue (where appropriate). For symmetry also convert sched_class::dequeue() to a flags scheme. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/sched.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index fb6c188..e3e900f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1032,12 +1032,17 @@ struct sched_domain; #define WF_SYNC 0x01 /* waker goes to sleep after wakup */ #define WF_FORK 0x02 /* child wakeup after fork */ +#define ENQUEUE_WAKEUP 1 +#define ENQUEUE_WAKING 2 +#define ENQUEUE_HEAD 4 + +#define DEQUEUE_SLEEP 1 + struct sched_class { const struct sched_class *next; - void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup, - bool head); - void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep); + void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags); + void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags); void (*yield_task) (struct rq *rq); void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags); -- cgit v1.1 From 1527bc8b928dd1399c3d3467dd47d9ede210978a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 1 Feb 2010 15:03:07 +0100 Subject: bitops: Optimize hweight() by making use of compile-time evaluation Rename the extisting runtime hweight() implementations to __arch_hweight(), rename the compile-time versions to __const_hweight() and then have hweight() pick between them. Suggested-by: H. Peter Anvin Signed-off-by: Peter Zijlstra LKML-Reference: <20100318111929.GB11152@aftab> Acked-by: H. Peter Anvin LKML-Reference: <1265028224.24455.154.camel@laptop> Signed-off-by: H. Peter Anvin --- include/asm-generic/bitops/arch_hweight.h | 11 ++++++++ include/asm-generic/bitops/const_hweight.h | 42 ++++++++++++++++++++++++++++++ include/asm-generic/bitops/hweight.h | 8 ++---- include/linux/bitops.h | 25 ------------------ 4 files changed, 55 insertions(+), 31 deletions(-) create mode 100644 include/asm-generic/bitops/arch_hweight.h create mode 100644 include/asm-generic/bitops/const_hweight.h (limited to 'include') diff --git a/include/asm-generic/bitops/arch_hweight.h b/include/asm-generic/bitops/arch_hweight.h new file mode 100644 index 0000000..3a7be84 --- /dev/null +++ b/include/asm-generic/bitops/arch_hweight.h @@ -0,0 +1,11 @@ +#ifndef _ASM_GENERIC_BITOPS_ARCH_HWEIGHT_H_ +#define _ASM_GENERIC_BITOPS_ARCH_HWEIGHT_H_ + +#include + +extern unsigned int __arch_hweight32(unsigned int w); +extern unsigned int __arch_hweight16(unsigned int w); +extern unsigned int __arch_hweight8(unsigned int w); +extern unsigned long __arch_hweight64(__u64 w); + +#endif /* _ASM_GENERIC_BITOPS_HWEIGHT_H_ */ diff --git a/include/asm-generic/bitops/const_hweight.h b/include/asm-generic/bitops/const_hweight.h new file mode 100644 index 0000000..fa2a50b --- /dev/null +++ b/include/asm-generic/bitops/const_hweight.h @@ -0,0 +1,42 @@ +#ifndef _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_ +#define _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_ + +/* + * Compile time versions of __arch_hweightN() + */ +#define __const_hweight8(w) \ + ( (!!((w) & (1ULL << 0))) + \ + (!!((w) & (1ULL << 1))) + \ + (!!((w) & (1ULL << 2))) + \ + (!!((w) & (1ULL << 3))) + \ + (!!((w) & (1ULL << 4))) + \ + (!!((w) & (1ULL << 5))) + \ + (!!((w) & (1ULL << 6))) + \ + (!!((w) & (1ULL << 7))) ) + +#define __const_hweight16(w) (__const_hweight8(w) + __const_hweight8((w) >> 8 )) +#define __const_hweight32(w) (__const_hweight16(w) + __const_hweight16((w) >> 16)) +#define __const_hweight64(w) (__const_hweight32(w) + __const_hweight32((w) >> 32)) + +/* + * Generic interface. + */ +#define hweight8(w) (__builtin_constant_p(w) ? __const_hweight8(w) : __arch_hweight8(w)) +#define hweight16(w) (__builtin_constant_p(w) ? __const_hweight16(w) : __arch_hweight16(w)) +#define hweight32(w) (__builtin_constant_p(w) ? __const_hweight32(w) : __arch_hweight32(w)) +#define hweight64(w) (__builtin_constant_p(w) ? __const_hweight64(w) : __arch_hweight64(w)) + +/* + * Interface for known constant arguments + */ +#define HWEIGHT8(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight8(w)) +#define HWEIGHT16(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight16(w)) +#define HWEIGHT32(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight32(w)) +#define HWEIGHT64(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight64(w)) + +/* + * Type invariant interface to the compile time constant hweight functions. + */ +#define HWEIGHT(w) HWEIGHT64((u64)w) + +#endif /* _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_ */ diff --git a/include/asm-generic/bitops/hweight.h b/include/asm-generic/bitops/hweight.h index fbbc383..a94d651 100644 --- a/include/asm-generic/bitops/hweight.h +++ b/include/asm-generic/bitops/hweight.h @@ -1,11 +1,7 @@ #ifndef _ASM_GENERIC_BITOPS_HWEIGHT_H_ #define _ASM_GENERIC_BITOPS_HWEIGHT_H_ -#include - -extern unsigned int hweight32(unsigned int w); -extern unsigned int hweight16(unsigned int w); -extern unsigned int hweight8(unsigned int w); -extern unsigned long hweight64(__u64 w); +#include +#include #endif /* _ASM_GENERIC_BITOPS_HWEIGHT_H_ */ diff --git a/include/linux/bitops.h b/include/linux/bitops.h index b793898..c55d5bc 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -47,31 +47,6 @@ static inline unsigned long hweight_long(unsigned long w) return sizeof(w) == 4 ? hweight32(w) : hweight64(w); } -/* - * Clearly slow versions of the hweightN() functions, their benefit is - * of course compile time evaluation of constant arguments. - */ -#define HWEIGHT8(w) \ - ( BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + \ - (!!((w) & (1ULL << 0))) + \ - (!!((w) & (1ULL << 1))) + \ - (!!((w) & (1ULL << 2))) + \ - (!!((w) & (1ULL << 3))) + \ - (!!((w) & (1ULL << 4))) + \ - (!!((w) & (1ULL << 5))) + \ - (!!((w) & (1ULL << 6))) + \ - (!!((w) & (1ULL << 7))) ) - -#define HWEIGHT16(w) (HWEIGHT8(w) + HWEIGHT8((w) >> 8)) -#define HWEIGHT32(w) (HWEIGHT16(w) + HWEIGHT16((w) >> 16)) -#define HWEIGHT64(w) (HWEIGHT32(w) + HWEIGHT32((w) >> 32)) - -/* - * Type invariant version that simply casts things to the - * largest type. - */ -#define HWEIGHT(w) HWEIGHT64((u64)(w)) - /** * rol32 - rotate a 32-bit value left * @word: value to rotate -- cgit v1.1 From d61931d89be506372d01a90d1755f6d0a9fafe2d Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 5 Mar 2010 17:34:46 +0100 Subject: x86: Add optimized popcnt variants Add support for the hardware version of the Hamming weight function, popcnt, present in CPUs which advertize it under CPUID, Function 0x0000_0001_ECX[23]. On CPUs which don't support it, we fallback to the default lib/hweight.c sw versions. A synthetic benchmark comparing popcnt with __sw_hweight64 showed almost a 3x speedup on a F10h machine. Signed-off-by: Borislav Petkov LKML-Reference: <20100318112015.GC11152@aftab> Signed-off-by: H. Peter Anvin --- include/asm-generic/bitops/arch_hweight.h | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-generic/bitops/arch_hweight.h b/include/asm-generic/bitops/arch_hweight.h index 3a7be84..9a81c1e 100644 --- a/include/asm-generic/bitops/arch_hweight.h +++ b/include/asm-generic/bitops/arch_hweight.h @@ -3,9 +3,23 @@ #include -extern unsigned int __arch_hweight32(unsigned int w); -extern unsigned int __arch_hweight16(unsigned int w); -extern unsigned int __arch_hweight8(unsigned int w); -extern unsigned long __arch_hweight64(__u64 w); +inline unsigned int __arch_hweight32(unsigned int w) +{ + return __sw_hweight32(w); +} +inline unsigned int __arch_hweight16(unsigned int w) +{ + return __sw_hweight16(w); +} + +inline unsigned int __arch_hweight8(unsigned int w) +{ + return __sw_hweight8(w); +} + +inline unsigned long __arch_hweight64(__u64 w) +{ + return __sw_hweight64(w); +} #endif /* _ASM_GENERIC_BITOPS_HWEIGHT_H_ */ -- cgit v1.1 From 6dad2a29646ce3792c40cfc52d77e9b65a7bb143 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 31 Mar 2010 21:56:46 +0200 Subject: cpufreq: Unify sysfs attribute definition macros Multiple modules used to define those which are with identical functionality and were needlessly replicated among the different cpufreq drivers. Push them into the header and remove duplication. Signed-off-by: Borislav Petkov LKML-Reference: <1270065406-1814-7-git-send-email-bp@amd64.org> Reviewed-by: Thomas Renninger Signed-off-by: H. Peter Anvin --- include/linux/cpufreq.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 4de02b1..9f15150 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -278,6 +278,27 @@ struct freq_attr { ssize_t (*store)(struct cpufreq_policy *, const char *, size_t count); }; +#define cpufreq_freq_attr_ro(_name) \ +static struct freq_attr _name = \ +__ATTR(_name, 0444, show_##_name, NULL) + +#define cpufreq_freq_attr_ro_perm(_name, _perm) \ +static struct freq_attr _name = \ +__ATTR(_name, _perm, show_##_name, NULL) + +#define cpufreq_freq_attr_ro_old(_name) \ +static struct freq_attr _name##_old = \ +__ATTR(_name, 0444, show_##_name##_old, NULL) + +#define cpufreq_freq_attr_rw(_name) \ +static struct freq_attr _name = \ +__ATTR(_name, 0644, show_##_name, store_##_name) + +#define cpufreq_freq_attr_rw_old(_name) \ +static struct freq_attr _name##_old = \ +__ATTR(_name, 0644, show_##_name##_old, store_##_name##_old) + + struct global_attr { struct attribute attr; ssize_t (*show)(struct kobject *kobj, @@ -286,6 +307,15 @@ struct global_attr { const char *c, size_t count); }; +#define define_one_global_ro(_name) \ +static struct global_attr _name = \ +__ATTR(_name, 0444, show_##_name, NULL) + +#define define_one_global_rw(_name) \ +static struct global_attr _name = \ +__ATTR(_name, 0644, show_##_name, store_##_name) + + /********************************************************************* * CPUFREQ 2.6. INTERFACE * *********************************************************************/ -- cgit v1.1 From cecbca96da387428e220e307a9c945e37e2f4d9e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 18 Apr 2010 19:08:41 +0200 Subject: tracing: Dump either the oops's cpu source or all cpus buffers The ftrace_dump_on_oops kernel parameter, sysctl and sysrq let one dump every cpu buffers when an oops or panic happens. It's nice when you have few cpus but it may take ages if have many, plus you miss the real origin of the problem in all the cpu traces. Sometimes, all you need is to dump the cpu buffer that triggered the opps, most of the time it is our main interest. This patch modifies ftrace_dump_on_oops to handle this choice. The ftrace_dump_on_oops kernel parameter, when it comes alone, has the same behaviour than before. But ftrace_dump_on_oops=orig_cpu will only dump the buffer of the cpu that oops'ed. Similarly, sysctl kernel.ftrace_dump_on_oops=1 and echo 1 > /proc/sys/kernel/ftrace_dump_on_oops keep their previous behaviour. But setting 2 jumps into cpu origin dump mode. v2: Fix double setup v3: Fix spelling issues reported by Randy Dunlap v4: Also update __ftrace_dump in the selftests Signed-off-by: Frederic Weisbecker Acked-by: David S. Miller Acked-by: Steven Rostedt Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Li Zefan Cc: Lai Jiangshan --- include/linux/ftrace.h | 4 +++- include/linux/kernel.h | 11 +++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 01e6ade..ea5b1aa 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -492,7 +492,9 @@ static inline int test_tsk_trace_graph(struct task_struct *tsk) return tsk->trace & TSK_TRACE_FL_GRAPH; } -extern int ftrace_dump_on_oops; +enum ftrace_dump_mode; + +extern enum ftrace_dump_mode ftrace_dump_on_oops; #ifdef CONFIG_PREEMPT #define INIT_TRACE_RECURSION .trace_recursion = 0, diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 9365227..9fb1c12 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -490,6 +490,13 @@ static inline void tracing_off(void) { } static inline void tracing_off_permanent(void) { } static inline int tracing_is_on(void) { return 0; } #endif + +enum ftrace_dump_mode { + DUMP_NONE, + DUMP_ALL, + DUMP_ORIG, +}; + #ifdef CONFIG_TRACING extern void tracing_start(void); extern void tracing_stop(void); @@ -571,7 +578,7 @@ __ftrace_vbprintk(unsigned long ip, const char *fmt, va_list ap); extern int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap); -extern void ftrace_dump(void); +extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode); #else static inline void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } @@ -592,7 +599,7 @@ ftrace_vprintk(const char *fmt, va_list ap) { return 0; } -static inline void ftrace_dump(void) { } +static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } #endif /* CONFIG_TRACING */ /* -- cgit v1.1 From 669c55e9f99b90e46eaa0f98a67ec53d46dc969a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 16 Apr 2010 14:59:29 +0200 Subject: sched: Pre-compute cpumask_weight(sched_domain_span(sd)) Dave reported that his large SPARC machines spend lots of time in hweight64(), try and optimize some of those needless cpumask_weight() invocations (esp. with the large offstack cpumasks these are very expensive indeed). Reported-by: David Miller Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index e3e900f..dfea405 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -960,6 +960,7 @@ struct sched_domain { char *name; #endif + unsigned int span_weight; /* * Span of all CPUs in this domain. * -- cgit v1.1 From 62b915f1060996a8e1f69be50e3b8e9e43b710cb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 2 Apr 2010 19:01:22 +0200 Subject: tracing: Add graph output support for irqsoff tracer Add function graph output to irqsoff tracer. The graph output is enabled by setting new 'display-graph' trace option. Signed-off-by: Jiri Olsa LKML-Reference: <1270227683-14631-4-git-send-email-jolsa@redhat.com> Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ea5b1aa..8415a52 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -352,6 +352,10 @@ struct ftrace_graph_ret { int depth; }; +/* Type of the callback handlers for tracing function graph*/ +typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *); /* return */ +typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */ + #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* for init task */ @@ -400,10 +404,6 @@ extern char __irqentry_text_end[]; #define FTRACE_RETFUNC_DEPTH 50 #define FTRACE_RETSTACK_ALLOC_SIZE 32 -/* Type of the callback handlers for tracing function graph*/ -typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *); /* return */ -typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */ - extern int register_ftrace_graph(trace_func_graph_ret_t retfunc, trace_func_graph_ent_t entryfunc); @@ -441,6 +441,13 @@ static inline void unpause_graph_tracing(void) static inline void ftrace_graph_init_task(struct task_struct *t) { } static inline void ftrace_graph_exit_task(struct task_struct *t) { } +static inline int register_ftrace_graph(trace_func_graph_ret_t retfunc, + trace_func_graph_ent_t entryfunc) +{ + return -1; +} +static inline void unregister_ftrace_graph(void) { } + static inline int task_curr_ret_stack(struct task_struct *tsk) { return -1; -- cgit v1.1 From 72c9ddfd4c5bf54ef03cfdf57026416cb678eeba Mon Sep 17 00:00:00 2001 From: David Miller Date: Tue, 20 Apr 2010 15:47:11 -0700 Subject: ring-buffer: Make non-consuming read less expensive with lots of cpus. When performing a non-consuming read, a synchronize_sched() is performed once for every cpu which is actively tracing. This is very expensive, and can make it take several seconds to open up the 'trace' file with lots of cpus. Only one synchronize_sched() call is actually necessary. What is desired is for all cpus to see the disabling state change. So we transform the existing sequence: for_each_cpu() { ring_buffer_read_start(); } where each ring_buffer_start() call performs a synchronize_sched(), into the following: for_each_cpu() { ring_buffer_read_prepare(); } ring_buffer_read_prepare_sync(); for_each_cpu() { ring_buffer_read_start(); } wherein only the single ring_buffer_read_prepare_sync() call needs to do the synchronize_sched(). The first phase, via ring_buffer_read_prepare(), allocates the 'iter' memory and increments ->record_disabled. In the second phase, ring_buffer_read_prepare_sync() makes sure this ->record_disabled state is visible fully to all cpus. And in the final third phase, the ring_buffer_read_start() calls reset the 'iter' objects allocated in the first phase since we now know that none of the cpus are adding trace entries any more. This makes openning the 'trace' file nearly instantaneous on a sparc64 Niagara2 box with 128 cpus tracing. Signed-off-by: David S. Miller LKML-Reference: <20100420.154711.11246950.davem@davemloft.net> Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index c829776..25b4f68 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -127,7 +127,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, unsigned long *lost_events); struct ring_buffer_iter * -ring_buffer_read_start(struct ring_buffer *buffer, int cpu); +ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu); +void ring_buffer_read_prepare_sync(void); +void ring_buffer_read_start(struct ring_buffer_iter *iter); void ring_buffer_read_finish(struct ring_buffer_iter *iter); struct ring_buffer_event * -- cgit v1.1 From 4dbf6bc239c169b032777616806ecc648058f6b2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 4 May 2010 11:24:01 -0400 Subject: tracing: Convert nop macros to static inlines The ftrace.h file contains several functions as macros when the functions are disabled due to config options. This patch converts most of them to static inlines. There are two exceptions: register_ftrace_function() and unregister_ftrace_function() This is because their parameter "ops" must not be evaluated since code using the function is allowed to #ifdef out the creation of the parameter. This also fixes an error caused by recent changes: kernel/trace/trace_irqsoff.c: In function 'start_irqsoff_tracer': kernel/trace/trace_irqsoff.c:571: error: expected expression before 'do' Reported-by: Ingo Molnar Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 8415a52..e0ae83b 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -82,9 +82,13 @@ void clear_ftrace_function(void); extern void ftrace_stub(unsigned long a0, unsigned long a1); #else /* !CONFIG_FUNCTION_TRACER */ -# define register_ftrace_function(ops) do { } while (0) -# define unregister_ftrace_function(ops) do { } while (0) -# define clear_ftrace_function(ops) do { } while (0) +/* + * (un)register_ftrace_function must be a macro since the ops parameter + * must not be evaluated. + */ +#define register_ftrace_function(ops) ({ 0; }) +#define unregister_ftrace_function(ops) ({ 0; }) +static inline void clear_ftrace_function(void) { } static inline void ftrace_kill(void) { } static inline void ftrace_stop(void) { } static inline void ftrace_start(void) { } @@ -237,11 +241,13 @@ extern int skip_trace(unsigned long ip); extern void ftrace_disable_daemon(void); extern void ftrace_enable_daemon(void); #else -# define skip_trace(ip) ({ 0; }) -# define ftrace_force_update() ({ 0; }) -# define ftrace_set_filter(buf, len, reset) do { } while (0) -# define ftrace_disable_daemon() do { } while (0) -# define ftrace_enable_daemon() do { } while (0) +static inline int skip_trace(unsigned long ip) { return 0; } +static inline int ftrace_force_update(void) { return 0; } +static inline void ftrace_set_filter(unsigned char *buf, int len, int reset) +{ +} +static inline void ftrace_disable_daemon(void) { } +static inline void ftrace_enable_daemon(void) { } static inline void ftrace_release_mod(struct module *mod) {} static inline int register_ftrace_command(struct ftrace_func_command *cmd) { @@ -314,16 +320,16 @@ static inline void __ftrace_enabled_restore(int enabled) extern void time_hardirqs_on(unsigned long a0, unsigned long a1); extern void time_hardirqs_off(unsigned long a0, unsigned long a1); #else -# define time_hardirqs_on(a0, a1) do { } while (0) -# define time_hardirqs_off(a0, a1) do { } while (0) + static inline void time_hardirqs_on(unsigned long a0, unsigned long a1) { } + static inline void time_hardirqs_off(unsigned long a0, unsigned long a1) { } #endif #ifdef CONFIG_PREEMPT_TRACER extern void trace_preempt_on(unsigned long a0, unsigned long a1); extern void trace_preempt_off(unsigned long a0, unsigned long a1); #else -# define trace_preempt_on(a0, a1) do { } while (0) -# define trace_preempt_off(a0, a1) do { } while (0) + static inline void trace_preempt_on(unsigned long a0, unsigned long a1) { } + static inline void trace_preempt_off(unsigned long a0, unsigned long a1) { } #endif #ifdef CONFIG_FTRACE_MCOUNT_RECORD -- cgit v1.1 From 1ce7e4ff24fe338438bc7837e02780f202bf202b Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 23 Apr 2010 10:35:52 +0800 Subject: cgroup: Check task_lock in task_subsys_state() Expand task_subsys_state()'s rcu_dereference_check() to include the full locking rule as documented in Documentation/cgroups/cgroups.txt by adding a check for task->alloc_lock being held. This fixes an RCU false positive when resuming from suspend. The warning comes from freezer cgroup in cgroup_freezing_or_frozen(). Signed-off-by: Li Zefan Acked-by: Matt Helsley Signed-off-by: Paul E. McKenney --- include/linux/cgroup.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b8ad1ea..8f78073 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -530,6 +530,7 @@ static inline struct cgroup_subsys_state *task_subsys_state( { return rcu_dereference_check(task->cgroups->subsys[subsys_id], rcu_read_lock_held() || + lockdep_is_held(&task->alloc_lock) || cgroup_lock_is_held()); } -- cgit v1.1 From 4677d4a53e0d565742277e8913e91c821453e63e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 3 May 2010 14:57:11 +0200 Subject: arch, hweight: Fix compilation errors Fix function prototype visibility issues when compiling for non-x86 architectures. Tested with crosstool (ftp://ftp.kernel.org/pub/tools/crosstool/) with alpha, ia64 and sparc targets. Signed-off-by: Borislav Petkov LKML-Reference: <20100503130736.GD26107@aftab> Signed-off-by: H. Peter Anvin --- include/asm-generic/bitops/arch_hweight.h | 8 ++++---- include/linux/bitops.h | 5 +++++ 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-generic/bitops/arch_hweight.h b/include/asm-generic/bitops/arch_hweight.h index 9a81c1e..6a211f4 100644 --- a/include/asm-generic/bitops/arch_hweight.h +++ b/include/asm-generic/bitops/arch_hweight.h @@ -3,22 +3,22 @@ #include -inline unsigned int __arch_hweight32(unsigned int w) +static inline unsigned int __arch_hweight32(unsigned int w) { return __sw_hweight32(w); } -inline unsigned int __arch_hweight16(unsigned int w) +static inline unsigned int __arch_hweight16(unsigned int w) { return __sw_hweight16(w); } -inline unsigned int __arch_hweight8(unsigned int w) +static inline unsigned int __arch_hweight8(unsigned int w) { return __sw_hweight8(w); } -inline unsigned long __arch_hweight64(__u64 w) +static inline unsigned long __arch_hweight64(__u64 w) { return __sw_hweight64(w); } diff --git a/include/linux/bitops.h b/include/linux/bitops.h index c55d5bc..26caa60 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -10,6 +10,11 @@ #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) #endif +extern unsigned int __sw_hweight8(unsigned int w); +extern unsigned int __sw_hweight16(unsigned int w); +extern unsigned int __sw_hweight32(unsigned int w); +extern unsigned long __sw_hweight64(__u64 w); + /* * Include this here because some architectures need generic_ffs/fls in * scope -- cgit v1.1 From 2c2df8418ac7908eec4558407b83f16739006c54 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 30 Mar 2010 01:07:02 -0700 Subject: x86, acpi/irq: Introduce apci_isa_irq_to_gsi There are a number of cases where the current code makes the assumption that isa irqs identity map to the first 16 acpi global system intereupts. In most instances that assumption is correct as that is the required behaviour in dual i8259 mode and the default behavior in ioapic mode. However there are some systems out there that take advantage of acpis interrupt remapping for the isa irqs to have a completely different mapping of isa_irq to gsi. Introduce acpi_isa_irq_to_gsi to perform this mapping explicitly in the code that needs it. Initially this will be just the current assumed identity mapping to ensure it's introduction does not cause regressions. Signed-off-by: Eric W. Biederman LKML-Reference: <1269936436-7039-1-git-send-email-ebiederm@xmission.com> Signed-off-by: H. Peter Anvin --- include/linux/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index b926afe..7a937da 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -116,6 +116,7 @@ extern unsigned long acpi_realmode_flags; int acpi_register_gsi (struct device *dev, u32 gsi, int triggering, int polarity); int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); +int acpi_isa_irq_to_gsi (unsigned isa_irq, u32 *gsi); #ifdef CONFIG_X86_IO_APIC extern int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity); -- cgit v1.1 From 9a0a91bb56d2915cdb8585717de38376ad20fef9 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 30 Mar 2010 01:07:03 -0700 Subject: x86, acpi/irq: Teach acpi_get_override_irq to take a gsi not an isa_irq In perverse acpi implementations the isa irqs are not identity mapped to the first 16 gsi. Furthermore at least the extended interrupt resource capability may return gsi's and not isa irqs. So since what we get from acpi is a gsi teach acpi_get_overrride_irq to operate on a gsi instead of an isa_irq. Signed-off-by: Eric W. Biederman LKML-Reference: <1269936436-7039-2-git-send-email-ebiederm@xmission.com> Signed-off-by: H. Peter Anvin --- include/linux/acpi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 7a937da..3da73f5 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -119,9 +119,9 @@ int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); int acpi_isa_irq_to_gsi (unsigned isa_irq, u32 *gsi); #ifdef CONFIG_X86_IO_APIC -extern int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity); +extern int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity); #else -#define acpi_get_override_irq(bus, trigger, polarity) (-1) +#define acpi_get_override_irq(gsi, trigger, polarity) (-1) #endif /* * This function undoes the effect of one call to acpi_register_gsi(). -- cgit v1.1 From 2e26ca7150a4f2ab3e69471dfc65f131e7dd7a05 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 5 May 2010 10:52:31 -0400 Subject: tracing: Fix tracepoint.h DECLARE_TRACE() to allow more than one header When more than one header is included under CREATE_TRACE_POINTS the DECLARE_TRACE() macro is not defined back to its original meaning and the second include will fail to initialize the TRACE_EVENT() and DECLARE_TRACE() correctly. To fix this the tracepoint.h file moves the define of DECLARE_TRACE() out of the #ifdef _LINUX_TRACEPOINT_H protection (just like the define of the TRACE_EVENT()). This way the define_trace.h will undef the DECLARE_TRACE() at the end and allow new headers to start from scratch. This patch also requires fixing the include/events/napi.h It currently uses DECLARE_TRACE() and should be converted to a TRACE_EVENT() format. But I'll leave that change to the authors of that file. But since the napi.h file depends on using the CREATE_TRACE_POINTS and does not define its own DEFINE_TRACE() it must use the define_trace.h method instead. Cc: Neil Horman Cc: David S. Miller Cc: Mathieu Desnoyers Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 114 ++++++++++++++++++++++--------------------- include/trace/define_trace.h | 5 ++ include/trace/events/napi.h | 10 +++- 3 files changed, 72 insertions(+), 57 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 78b4bd3..1d85f9a 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -33,6 +33,65 @@ struct tracepoint { * Keep in sync with vmlinux.lds.h. */ +/* + * Connect a probe to a tracepoint. + * Internal API, should not be used directly. + */ +extern int tracepoint_probe_register(const char *name, void *probe); + +/* + * Disconnect a probe from a tracepoint. + * Internal API, should not be used directly. + */ +extern int tracepoint_probe_unregister(const char *name, void *probe); + +extern int tracepoint_probe_register_noupdate(const char *name, void *probe); +extern int tracepoint_probe_unregister_noupdate(const char *name, void *probe); +extern void tracepoint_probe_update_all(void); + +struct tracepoint_iter { + struct module *module; + struct tracepoint *tracepoint; +}; + +extern void tracepoint_iter_start(struct tracepoint_iter *iter); +extern void tracepoint_iter_next(struct tracepoint_iter *iter); +extern void tracepoint_iter_stop(struct tracepoint_iter *iter); +extern void tracepoint_iter_reset(struct tracepoint_iter *iter); +extern int tracepoint_get_iter_range(struct tracepoint **tracepoint, + struct tracepoint *begin, struct tracepoint *end); + +/* + * tracepoint_synchronize_unregister must be called between the last tracepoint + * probe unregistration and the end of module exit to make sure there is no + * caller executing a probe when it is freed. + */ +static inline void tracepoint_synchronize_unregister(void) +{ + synchronize_sched(); +} + +#define PARAMS(args...) args + +#ifdef CONFIG_TRACEPOINTS +extern void tracepoint_update_probe_range(struct tracepoint *begin, + struct tracepoint *end); +#else +static inline void tracepoint_update_probe_range(struct tracepoint *begin, + struct tracepoint *end) +{ } +#endif /* CONFIG_TRACEPOINTS */ + +#endif /* _LINUX_TRACEPOINT_H */ + +/* + * Note: we keep the TRACE_EVENT and DECLARE_TRACE outside the include + * file ifdef protection. + * This is due to the way trace events work. If a file includes two + * trace event headers under one "CREATE_TRACE_POINTS" the first include + * will override the TRACE_EVENT and break the second include. + */ + #ifndef DECLARE_TRACE #define TP_PROTO(args...) args @@ -96,9 +155,6 @@ struct tracepoint { #define EXPORT_TRACEPOINT_SYMBOL(name) \ EXPORT_SYMBOL(__tracepoint_##name) -extern void tracepoint_update_probe_range(struct tracepoint *begin, - struct tracepoint *end); - #else /* !CONFIG_TRACEPOINTS */ #define DECLARE_TRACE(name, proto, args) \ static inline void _do_trace_##name(struct tracepoint *tp, proto) \ @@ -119,61 +175,9 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin, #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) #define EXPORT_TRACEPOINT_SYMBOL(name) -static inline void tracepoint_update_probe_range(struct tracepoint *begin, - struct tracepoint *end) -{ } #endif /* CONFIG_TRACEPOINTS */ #endif /* DECLARE_TRACE */ -/* - * Connect a probe to a tracepoint. - * Internal API, should not be used directly. - */ -extern int tracepoint_probe_register(const char *name, void *probe); - -/* - * Disconnect a probe from a tracepoint. - * Internal API, should not be used directly. - */ -extern int tracepoint_probe_unregister(const char *name, void *probe); - -extern int tracepoint_probe_register_noupdate(const char *name, void *probe); -extern int tracepoint_probe_unregister_noupdate(const char *name, void *probe); -extern void tracepoint_probe_update_all(void); - -struct tracepoint_iter { - struct module *module; - struct tracepoint *tracepoint; -}; - -extern void tracepoint_iter_start(struct tracepoint_iter *iter); -extern void tracepoint_iter_next(struct tracepoint_iter *iter); -extern void tracepoint_iter_stop(struct tracepoint_iter *iter); -extern void tracepoint_iter_reset(struct tracepoint_iter *iter); -extern int tracepoint_get_iter_range(struct tracepoint **tracepoint, - struct tracepoint *begin, struct tracepoint *end); - -/* - * tracepoint_synchronize_unregister must be called between the last tracepoint - * probe unregistration and the end of module exit to make sure there is no - * caller executing a probe when it is freed. - */ -static inline void tracepoint_synchronize_unregister(void) -{ - synchronize_sched(); -} - -#define PARAMS(args...) args - -#endif /* _LINUX_TRACEPOINT_H */ - -/* - * Note: we keep the TRACE_EVENT outside the include file ifdef protection. - * This is due to the way trace events work. If a file includes two - * trace event headers under one "CREATE_TRACE_POINTS" the first include - * will override the TRACE_EVENT and break the second include. - */ - #ifndef TRACE_EVENT /* * For use with the TRACE_EVENT macro: diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 5acfb1e..1dfab54 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -65,6 +65,10 @@ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) +/* Make all open coded DECLARE_TRACE nops */ +#undef DECLARE_TRACE +#define DECLARE_TRACE(name, proto, args) + #ifdef CONFIG_EVENT_TRACING #include #endif @@ -75,6 +79,7 @@ #undef DEFINE_EVENT #undef DEFINE_EVENT_PRINT #undef TRACE_HEADER_MULTI_READ +#undef DECLARE_TRACE /* Only undef what we defined in this file */ #ifdef UNDEF_TRACE_INCLUDE_FILE diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h index a8989c4..188deca 100644 --- a/include/trace/events/napi.h +++ b/include/trace/events/napi.h @@ -1,4 +1,7 @@ -#ifndef _TRACE_NAPI_H_ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM napi + +#if !defined(_TRACE_NAPI_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_NAPI_H_ #include @@ -8,4 +11,7 @@ DECLARE_TRACE(napi_poll, TP_PROTO(struct napi_struct *napi), TP_ARGS(napi)); -#endif +#endif /* _TRACE_NAPI_H_ */ + +/* This part must be outside protection */ +#include -- cgit v1.1 From 50b5d6ad63821cea324a5a7a19854d4de1a0a819 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 6 May 2010 00:56:07 -0700 Subject: sctp: Fix a race between ICMP protocol unreachable and connect() ICMP protocol unreachable handling completely disregarded the fact that the user may have locked the socket. It proceeded to destroy the association, even though the user may have held the lock and had a ref on the association. This resulted in the following: Attempt to release alive inet socket f6afcc00 ========================= [ BUG: held lock freed! ] ------------------------- somenu/2672 is freeing memory f6afcc00-f6afcfff, with a lock still held there! (sk_lock-AF_INET){+.+.+.}, at: [] sctp_connect+0x13/0x4c 1 lock held by somenu/2672: #0: (sk_lock-AF_INET){+.+.+.}, at: [] sctp_connect+0x13/0x4c stack backtrace: Pid: 2672, comm: somenu Not tainted 2.6.32-telco #55 Call Trace: [] ? printk+0xf/0x11 [] debug_check_no_locks_freed+0xce/0xff [] kmem_cache_free+0x21/0x66 [] __sk_free+0x9d/0xab [] sk_free+0x1c/0x1e [] sctp_association_put+0x32/0x89 [] __sctp_connect+0x36d/0x3f4 [] ? sctp_connect+0x13/0x4c [] ? autoremove_wake_function+0x0/0x33 [] sctp_connect+0x31/0x4c [] inet_dgram_connect+0x4b/0x55 [] sys_connect+0x54/0x71 [] ? lock_release_non_nested+0x88/0x239 [] ? might_fault+0x42/0x7c [] ? might_fault+0x42/0x7c [] sys_socketcall+0x6d/0x178 [] ? trace_hardirqs_on_thunk+0xc/0x10 [] syscall_call+0x7/0xb This was because the sctp_wait_for_connect() would aqcure the socket lock and then proceed to release the last reference count on the association, thus cause the fully destruction path to finish freeing the socket. The simplest solution is to start a very short timer in case the socket is owned by user. When the timer expires, we can do some verification and be able to do the release properly. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/sctp/sm.h | 1 + include/net/sctp/structs.h | 3 +++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 851c813..61d73e3 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -279,6 +279,7 @@ int sctp_do_sm(sctp_event_t event_type, sctp_subtype_t subtype, /* 2nd level prototypes */ void sctp_generate_t3_rtx_event(unsigned long peer); void sctp_generate_heartbeat_event(unsigned long peer); +void sctp_generate_proto_unreach_event(unsigned long peer); void sctp_ootb_pkt_free(struct sctp_packet *); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 597f8e2..219043a 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1010,6 +1010,9 @@ struct sctp_transport { /* Heartbeat timer is per destination. */ struct timer_list hb_timer; + /* Timer to handle ICMP proto unreachable envets */ + struct timer_list proto_unreach_timer; + /* Since we're using per-destination retransmission timers * (see above), we're also using per-destination "transmitted" * queues. This probably ought to be a private struct -- cgit v1.1 From ee84b8243b07c33a5c8aed42b4b2da60cb16d1d2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 6 May 2010 09:28:41 -0700 Subject: rcu: create rcu_my_thread_group_empty() wrapper Some RCU-lockdep splat repairs need to know whether they are running in a single-threaded process. Unfortunately, the thread_group_empty() primitive is defined in sched.h, and can induce #include hell. This commit therefore introduces a rcu_my_thread_group_empty() wrapper that is defined in rcupdate.c, thus avoiding the need to include sched.h everywhere. Signed-off-by: "Paul E. McKenney" --- include/linux/rcupdate.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 07db2fe..db266bb 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -190,6 +190,8 @@ static inline int rcu_read_lock_sched_held(void) #ifdef CONFIG_PROVE_RCU +extern int rcu_my_thread_group_empty(void); + /** * rcu_dereference_check - rcu_dereference with debug checking * @p: The pointer to read, prior to dereferencing -- cgit v1.1 From 1142d810298e694754498dbb4983fcb6cb7fd884 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 6 May 2010 18:49:20 +0200 Subject: cpu_stop: implement stop_cpu[s]() Implement a simplistic per-cpu maximum priority cpu monopolization mechanism. A non-sleeping callback can be scheduled to run on one or multiple cpus with maximum priority monopolozing those cpus. This is primarily to replace and unify RT workqueue usage in stop_machine and scheduler migration_thread which currently is serving multiple purposes. Four functions are provided - stop_one_cpu(), stop_one_cpu_nowait(), stop_cpus() and try_stop_cpus(). This is to allow clean sharing of resources among stop_cpu and all the migration thread users. One stopper thread per cpu is created which is currently named "stopper/CPU". This will eventually replace the migration thread and take on its name. * This facility was originally named cpuhog and lived in separate files but Peter Zijlstra nacked the name and thus got renamed to cpu_stop and moved into stop_machine.c. * Better reporting of preemption leak as per Peter's suggestion. Signed-off-by: Tejun Heo Acked-by: Peter Zijlstra Cc: Oleg Nesterov Cc: Dimitri Sivanich --- include/linux/stop_machine.h | 39 +++++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index baba3a2..efcbd6c 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -1,15 +1,46 @@ #ifndef _LINUX_STOP_MACHINE #define _LINUX_STOP_MACHINE -/* "Bogolock": stop the entire machine, disable interrupts. This is a - very heavy lock, which is equivalent to grabbing every spinlock - (and more). So the "read" side to such a lock is anything which - disables preeempt. */ + #include #include +#include #include #if defined(CONFIG_STOP_MACHINE) && defined(CONFIG_SMP) +/* + * stop_cpu[s]() is simplistic per-cpu maximum priority cpu + * monopolization mechanism. The caller can specify a non-sleeping + * function to be executed on a single or multiple cpus preempting all + * other processes and monopolizing those cpus until it finishes. + * + * Resources for this mechanism are preallocated when a cpu is brought + * up and requests are guaranteed to be served as long as the target + * cpus are online. + */ + +typedef int (*cpu_stop_fn_t)(void *arg); + +struct cpu_stop_work { + struct list_head list; /* cpu_stopper->works */ + cpu_stop_fn_t fn; + void *arg; + struct cpu_stop_done *done; +}; + +int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg); +void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, + struct cpu_stop_work *work_buf); +int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg); +int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg); + +/* + * stop_machine "Bogolock": stop the entire machine, disable + * interrupts. This is a very heavy lock, which is equivalent to + * grabbing every spinlock (and more). So the "read" side to such a + * lock is anything which disables preeempt. + */ + /** * stop_machine: freeze the machine on all CPUs and run this function * @fn: the function to run -- cgit v1.1 From 3fc1f1e27a5b807791d72e5d992aa33b668a6626 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 6 May 2010 18:49:20 +0200 Subject: stop_machine: reimplement using cpu_stop Reimplement stop_machine using cpu_stop. As cpu stoppers are guaranteed to be available for all online cpus, stop_machine_create/destroy() are no longer necessary and removed. With resource management and synchronization handled by cpu_stop, the new implementation is much simpler. Asking the cpu_stop to execute the stop_cpu() state machine on all online cpus with cpu hotplug disabled is enough. stop_machine itself doesn't need to manage any global resources anymore, so all per-instance information is rolled into struct stop_machine_data and the mutex and all static data variables are removed. The previous implementation created and destroyed RT workqueues as necessary which made stop_machine() calls highly expensive on very large machines. According to Dimitri Sivanich, preventing the dynamic creation/destruction makes booting faster more than twice on very large machines. cpu_stop resources are preallocated for all online cpus and should have the same effect. Signed-off-by: Tejun Heo Acked-by: Rusty Russell Acked-by: Peter Zijlstra Cc: Oleg Nesterov Cc: Dimitri Sivanich --- include/linux/stop_machine.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include') diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index efcbd6c..0e552e7 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -67,23 +67,6 @@ int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); */ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); -/** - * stop_machine_create: create all stop_machine threads - * - * Description: This causes all stop_machine threads to be created before - * stop_machine actually gets called. This can be used by subsystems that - * need a non failing stop_machine infrastructure. - */ -int stop_machine_create(void); - -/** - * stop_machine_destroy: destroy all stop_machine threads - * - * Description: This causes all stop_machine threads which were created with - * stop_machine_create to be destroyed again. - */ -void stop_machine_destroy(void); - #else static inline int stop_machine(int (*fn)(void *), void *data, @@ -96,8 +79,5 @@ static inline int stop_machine(int (*fn)(void *), void *data, return ret; } -static inline int stop_machine_create(void) { return 0; } -static inline void stop_machine_destroy(void) { } - #endif /* CONFIG_SMP */ #endif /* _LINUX_STOP_MACHINE */ -- cgit v1.1 From 969c79215a35b06e5e3efe69b9412f858df7856c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 6 May 2010 18:49:21 +0200 Subject: sched: replace migration_thread with cpu_stop Currently migration_thread is serving three purposes - migration pusher, context to execute active_load_balance() and forced context switcher for expedited RCU synchronize_sched. All three roles are hardcoded into migration_thread() and determining which job is scheduled is slightly messy. This patch kills migration_thread and replaces all three uses with cpu_stop. The three different roles of migration_thread() are splitted into three separate cpu_stop callbacks - migration_cpu_stop(), active_load_balance_cpu_stop() and synchronize_sched_expedited_cpu_stop() - and each use case now simply asks cpu_stop to execute the callback as necessary. synchronize_sched_expedited() was implemented with private preallocated resources and custom multi-cpu queueing and waiting logic, both of which are provided by cpu_stop. synchronize_sched_expedited_count is made atomic and all other shared resources along with the mutex are dropped. synchronize_sched_expedited() also implemented a check to detect cases where not all the callback got executed on their assigned cpus and fall back to synchronize_sched(). If called with cpu hotplug blocked, cpu_stop already guarantees that and the condition cannot happen; otherwise, stop_machine() would break. However, this patch preserves the paranoid check using a cpumask to record on which cpus the stopper ran so that it can serve as a bisection point if something actually goes wrong theree. Because the internal execution state is no longer visible, rcu_expedited_torture_stats() is removed. This patch also renames cpu_stop threads to from "stopper/%d" to "migration/%d". The names of these threads ultimately don't matter and there's no reason to make unnecessary userland visible changes. With this patch applied, stop_machine() and sched now share the same resources. stop_machine() is faster without wasting any resources and sched migration users are much cleaner. Signed-off-by: Tejun Heo Acked-by: Peter Zijlstra Cc: Ingo Molnar Cc: Dipankar Sarma Cc: Josh Triplett Cc: Paul E. McKenney Cc: Oleg Nesterov Cc: Dimitri Sivanich --- include/linux/rcutiny.h | 2 -- include/linux/rcutree.h | 1 - 2 files changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index a519587..0006b2d 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -60,8 +60,6 @@ static inline long rcu_batches_completed_bh(void) return 0; } -extern int rcu_expedited_torture_stats(char *page); - static inline void rcu_force_quiescent_state(void) { } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 42cc3a0..24e467e 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -35,7 +35,6 @@ struct notifier_block; extern void rcu_sched_qs(int cpu); extern void rcu_bh_qs(int cpu); extern int rcu_needs_cpu(int cpu); -extern int rcu_expedited_torture_stats(char *page); #ifdef CONFIG_TREE_PREEMPT_RCU -- cgit v1.1 From 03b1930efd3c2320b1dcba76c8af15f7e454919d Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 24 Mar 2010 19:09:55 -0300 Subject: V4L/DVB: saa7146: fix regression of the av7110/budget-av driver An earlier regression fix for the mxb driver (V4L/DVB: saa7146_vv: fix regression where v4l2_device was registered too late) caused a new regression in the av7110 driver. Reverted the old fix and fixed the problem in the mxb driver instead. Tested on mxb and budget-av cards. The real problem is that the saa7146 framework has separate probe() and attach() driver callbacks which should be rolled into one. This is now done for the mxb driver, but others should do the same. Lack of hardware makes this hard to do, though. I hope to get hold of some hexium cards and then I can try to improve the framework to prevent this from happening again. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/media/saa7146_vv.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/media/saa7146_vv.h b/include/media/saa7146_vv.h index b9da1f5..4aeff96 100644 --- a/include/media/saa7146_vv.h +++ b/include/media/saa7146_vv.h @@ -188,7 +188,6 @@ void saa7146_buffer_timeout(unsigned long data); void saa7146_dma_free(struct saa7146_dev* dev,struct videobuf_queue *q, struct saa7146_buf *buf); -int saa7146_vv_devinit(struct saa7146_dev *dev); int saa7146_vv_init(struct saa7146_dev* dev, struct saa7146_ext_vv *ext_vv); int saa7146_vv_release(struct saa7146_dev* dev); -- cgit v1.1 From 8cfe92d683a0041ac8e016a0b0a487c99a78f6c1 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Wed, 28 Apr 2010 11:33:25 +0200 Subject: drm/ttm: Remove the ttm_bo_block_reservation() function. It's unused and buggy in its current form, since it can place a bo in the reserved state without removing it from lru lists. Signed-off-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- include/drm/ttm/ttm_bo_driver.h | 28 ---------------------------- 1 file changed, 28 deletions(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index e929c27..6b9db91 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -789,34 +789,6 @@ extern void ttm_bo_unreserve(struct ttm_buffer_object *bo); extern int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo, bool interruptible); -/** - * ttm_bo_block_reservation - * - * @bo: A pointer to a struct ttm_buffer_object. - * @interruptible: Use interruptible sleep when waiting. - * @no_wait: Don't sleep, but rather return -EBUSY. - * - * Block reservation for validation by simply reserving the buffer. - * This is intended for single buffer use only without eviction, - * and thus needs no deadlock protection. - * - * Returns: - * -EBUSY: If no_wait == 1 and the buffer is already reserved. - * -ERESTARTSYS: If interruptible == 1 and the process received a signal - * while sleeping. - */ -extern int ttm_bo_block_reservation(struct ttm_buffer_object *bo, - bool interruptible, bool no_wait); - -/** - * ttm_bo_unblock_reservation - * - * @bo: A pointer to a struct ttm_buffer_object. - * - * Unblocks reservation leaving lru lists untouched. - */ -extern void ttm_bo_unblock_reservation(struct ttm_buffer_object *bo); - /* * ttm_bo_util.c */ -- cgit v1.1 From 27a9da6538ee18046d7bff8e36a9f783542c54c3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 4 May 2010 20:36:56 +0200 Subject: sched: Remove rq argument to the tracepoints struct rq isn't visible outside of sched.o so its near useless to expose the pointer, also there are no users of it, so remove it. Acked-by: Steven Rostedt Signed-off-by: Peter Zijlstra LKML-Reference: <1272997616.1642.207.camel@laptop> Signed-off-by: Ingo Molnar --- include/trace/events/sched.h | 32 ++++++++++---------------------- 1 file changed, 10 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index cfceb0b..4f733ece 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -51,15 +51,12 @@ TRACE_EVENT(sched_kthread_stop_ret, /* * Tracepoint for waiting on task to unschedule: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) */ TRACE_EVENT(sched_wait_task, - TP_PROTO(struct rq *rq, struct task_struct *p), + TP_PROTO(struct task_struct *p), - TP_ARGS(rq, p), + TP_ARGS(p), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) @@ -79,15 +76,12 @@ TRACE_EVENT(sched_wait_task, /* * Tracepoint for waking up a task: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) */ DECLARE_EVENT_CLASS(sched_wakeup_template, - TP_PROTO(struct rq *rq, struct task_struct *p, int success), + TP_PROTO(struct task_struct *p, int success), - TP_ARGS(rq, p, success), + TP_ARGS(p, success), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) @@ -111,31 +105,25 @@ DECLARE_EVENT_CLASS(sched_wakeup_template, ); DEFINE_EVENT(sched_wakeup_template, sched_wakeup, - TP_PROTO(struct rq *rq, struct task_struct *p, int success), - TP_ARGS(rq, p, success)); + TP_PROTO(struct task_struct *p, int success), + TP_ARGS(p, success)); /* * Tracepoint for waking up a new task: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) */ DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new, - TP_PROTO(struct rq *rq, struct task_struct *p, int success), - TP_ARGS(rq, p, success)); + TP_PROTO(struct task_struct *p, int success), + TP_ARGS(p, success)); /* * Tracepoint for task switches, performed by the scheduler: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) */ TRACE_EVENT(sched_switch, - TP_PROTO(struct rq *rq, struct task_struct *prev, + TP_PROTO(struct task_struct *prev, struct task_struct *next), - TP_ARGS(rq, prev, next), + TP_ARGS(prev, next), TP_STRUCT__entry( __array( char, prev_comm, TASK_COMM_LEN ) -- cgit v1.1 From bbf1bb3eee86f2eef2baa14e600be454d09109ee Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 8 May 2010 16:20:53 +0200 Subject: cpu_stop: add dummy implementation for UP When !CONFIG_SMP, cpu_stop functions weren't defined at all which could lead to build failures if UP code uses cpu_stop facility. Add dummy cpu_stop implementation for UP. The waiting variants execute the work function directly with preempt disabled and stop_one_cpu_nowait() schedules a workqueue work. Makefile and ifdefs around stop_machine implementation are updated to accomodate CONFIG_SMP && !CONFIG_STOP_MACHINE case. Signed-off-by: Tejun Heo Reported-by: Ingo Molnar --- include/linux/stop_machine.h | 69 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 63 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index 0e552e7..6b524a0 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -6,8 +6,6 @@ #include #include -#if defined(CONFIG_STOP_MACHINE) && defined(CONFIG_SMP) - /* * stop_cpu[s]() is simplistic per-cpu maximum priority cpu * monopolization mechanism. The caller can specify a non-sleeping @@ -18,9 +16,10 @@ * up and requests are guaranteed to be served as long as the target * cpus are online. */ - typedef int (*cpu_stop_fn_t)(void *arg); +#ifdef CONFIG_SMP + struct cpu_stop_work { struct list_head list; /* cpu_stopper->works */ cpu_stop_fn_t fn; @@ -34,12 +33,70 @@ void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg); int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg); +#else /* CONFIG_SMP */ + +#include + +struct cpu_stop_work { + struct work_struct work; + cpu_stop_fn_t fn; + void *arg; +}; + +static inline int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg) +{ + int ret = -ENOENT; + preempt_disable(); + if (cpu == smp_processor_id()) + ret = fn(arg); + preempt_enable(); + return ret; +} + +static void stop_one_cpu_nowait_workfn(struct work_struct *work) +{ + struct cpu_stop_work *stwork = + container_of(work, struct cpu_stop_work, work); + preempt_disable(); + stwork->fn(stwork->arg); + preempt_enable(); +} + +static inline void stop_one_cpu_nowait(unsigned int cpu, + cpu_stop_fn_t fn, void *arg, + struct cpu_stop_work *work_buf) +{ + if (cpu == smp_processor_id()) { + INIT_WORK(&work_buf->work, stop_one_cpu_nowait_workfn); + work_buf->fn = fn; + work_buf->arg = arg; + schedule_work(&work_buf->work); + } +} + +static inline int stop_cpus(const struct cpumask *cpumask, + cpu_stop_fn_t fn, void *arg) +{ + if (cpumask_test_cpu(raw_smp_processor_id(), cpumask)) + return stop_one_cpu(raw_smp_processor_id(), fn, arg); + return -ENOENT; +} + +static inline int try_stop_cpus(const struct cpumask *cpumask, + cpu_stop_fn_t fn, void *arg) +{ + return stop_cpus(cpumask, fn, arg); +} + +#endif /* CONFIG_SMP */ + /* * stop_machine "Bogolock": stop the entire machine, disable * interrupts. This is a very heavy lock, which is equivalent to * grabbing every spinlock (and more). So the "read" side to such a * lock is anything which disables preeempt. */ +#if defined(CONFIG_STOP_MACHINE) && defined(CONFIG_SMP) /** * stop_machine: freeze the machine on all CPUs and run this function @@ -67,7 +124,7 @@ int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); */ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); -#else +#else /* CONFIG_STOP_MACHINE && CONFIG_SMP */ static inline int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) @@ -79,5 +136,5 @@ static inline int stop_machine(int (*fn)(void *), void *data, return ret; } -#endif /* CONFIG_SMP */ -#endif /* _LINUX_STOP_MACHINE */ +#endif /* CONFIG_STOP_MACHINE && CONFIG_SMP */ +#endif /* _LINUX_STOP_MACHINE */ -- cgit v1.1 From e0e37c200f1357db0dd986edb359c41c57d24f6e Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 9 May 2010 08:24:39 -0700 Subject: sched: Eliminate the ts->idle_lastupdate field Now that the only user of ts->idle_lastupdate is update_ts_time_stats(), the entire field can be eliminated. In update_ts_time_stats(), idle_lastupdate is first set to "now", and a few lines later, the only user is an if() statement that assigns a variable either to "now" or to ts->idle_lastupdate, which has the value of "now" at that point. Signed-off-by: Arjan van de Ven Signed-off-by: Andrew Morton Reviewed-by: Rik van Riel Acked-by: Peter Zijlstra Cc: davej@redhat.com LKML-Reference: <20100509082439.2fab0b4f@infradead.org> Signed-off-by: Ingo Molnar --- include/linux/tick.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/tick.h b/include/linux/tick.h index d2ae79e..0343eed 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -60,7 +60,6 @@ struct tick_sched { ktime_t idle_waketime; ktime_t idle_exittime; ktime_t idle_sleeptime; - ktime_t idle_lastupdate; ktime_t sleep_length; unsigned long last_jiffies; unsigned long next_jiffies; -- cgit v1.1 From 0224cf4c5ee0d7faec83956b8e21f7d89e3df3bd Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 9 May 2010 08:25:23 -0700 Subject: sched: Intoduce get_cpu_iowait_time_us() For the ondemand cpufreq governor, it is desired that the iowait time is microaccounted in a similar way as idle time is. This patch introduces the infrastructure to account and expose this information via the get_cpu_iowait_time_us() function. [akpm@linux-foundation.org: fix CONFIG_NO_HZ=n build] Signed-off-by: Arjan van de Ven Signed-off-by: Andrew Morton Reviewed-by: Rik van Riel Acked-by: Peter Zijlstra Cc: davej@redhat.com LKML-Reference: <20100509082523.284feab6@infradead.org> Signed-off-by: Ingo Molnar --- include/linux/tick.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/tick.h b/include/linux/tick.h index 0343eed..b232ccc 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -42,6 +42,7 @@ enum tick_nohz_mode { * @idle_waketime: Time when the idle was interrupted * @idle_exittime: Time when the idle state was left * @idle_sleeptime: Sum of the time slept in idle with sched tick stopped + * @iowait_sleeptime: Sum of the time slept in idle with sched tick stopped, with IO outstanding * @sleep_length: Duration of the current idle sleep * @do_timer_lst: CPU was the last one doing do_timer before going idle */ @@ -60,6 +61,7 @@ struct tick_sched { ktime_t idle_waketime; ktime_t idle_exittime; ktime_t idle_sleeptime; + ktime_t iowait_sleeptime; ktime_t sleep_length; unsigned long last_jiffies; unsigned long next_jiffies; @@ -123,6 +125,7 @@ extern void tick_nohz_stop_sched_tick(int inidle); extern void tick_nohz_restart_sched_tick(void); extern ktime_t tick_nohz_get_sleep_length(void); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); +extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); # else static inline void tick_nohz_stop_sched_tick(int inidle) { } static inline void tick_nohz_restart_sched_tick(void) { } @@ -133,6 +136,7 @@ static inline ktime_t tick_nohz_get_sleep_length(void) return len; } static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } +static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } # endif /* !NO_HZ */ #endif -- cgit v1.1 From 2b3fc35f6919344e3cf722dde8308f47235c0b70 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 20 Apr 2010 16:23:07 +0800 Subject: rcu: optionally leave lockdep enabled after RCU lockdep splat There is no need to disable lockdep after an RCU lockdep splat, so remove the debug_lockdeps_off() from lockdep_rcu_dereference(). To avoid repeated lockdep splats, use a static variable in the inlined rcu_dereference_check() and rcu_dereference_protected() macros so that a given instance splats only once, but so that multiple instances can be detected per boot. This is controlled by a new config variable CONFIG_PROVE_RCU_REPEATEDLY, which is disabled by default. This provides the normal lockdep behavior by default, but permits people who want to find multiple RCU-lockdep splats per boot to easily do so. Requested-by: Eric Paris Signed-off-by: Lai Jiangshan Tested-by: Eric Paris Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index db266bb..4dca275 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -192,6 +192,15 @@ static inline int rcu_read_lock_sched_held(void) extern int rcu_my_thread_group_empty(void); +#define __do_rcu_dereference_check(c) \ + do { \ + static bool __warned; \ + if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \ + __warned = true; \ + lockdep_rcu_dereference(__FILE__, __LINE__); \ + } \ + } while (0) + /** * rcu_dereference_check - rcu_dereference with debug checking * @p: The pointer to read, prior to dereferencing @@ -221,8 +230,7 @@ extern int rcu_my_thread_group_empty(void); */ #define rcu_dereference_check(p, c) \ ({ \ - if (debug_lockdep_rcu_enabled() && !(c)) \ - lockdep_rcu_dereference(__FILE__, __LINE__); \ + __do_rcu_dereference_check(c); \ rcu_dereference_raw(p); \ }) @@ -239,8 +247,7 @@ extern int rcu_my_thread_group_empty(void); */ #define rcu_dereference_protected(p, c) \ ({ \ - if (debug_lockdep_rcu_enabled() && !(c)) \ - lockdep_rcu_dereference(__FILE__, __LINE__); \ + __do_rcu_dereference_check(c); \ (p); \ }) -- cgit v1.1 From d20200b591f59847ab6a5c23507084a7d29e23c5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 30 Mar 2010 10:52:21 -0700 Subject: rcu: Fix bogus CONFIG_PROVE_LOCKING in comments to reflect reality. It is CONFIG_DEBUG_LOCK_ALLOC rather than CONFIG_PROVE_LOCKING, so fix it. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 15 ++++++++------- include/linux/srcu.h | 4 ++-- 2 files changed, 10 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 4dca275..a150af0 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -106,8 +106,8 @@ extern int debug_lockdep_rcu_enabled(void); /** * rcu_read_lock_held - might we be in RCU read-side critical section? * - * If CONFIG_PROVE_LOCKING is selected and enabled, returns nonzero iff in - * an RCU read-side critical section. In absence of CONFIG_PROVE_LOCKING, + * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU + * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, * this assumes we are in an RCU read-side critical section unless it can * prove otherwise. * @@ -129,11 +129,12 @@ extern int rcu_read_lock_bh_held(void); /** * rcu_read_lock_sched_held - might we be in RCU-sched read-side critical section? * - * If CONFIG_PROVE_LOCKING is selected and enabled, returns nonzero iff in an - * RCU-sched read-side critical section. In absence of CONFIG_PROVE_LOCKING, - * this assumes we are in an RCU-sched read-side critical section unless it - * can prove otherwise. Note that disabling of preemption (including - * disabling irqs) counts as an RCU-sched read-side critical section. + * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an + * RCU-sched read-side critical section. In absence of + * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side + * critical section unless it can prove otherwise. Note that disabling + * of preemption (including disabling irqs) counts as an RCU-sched + * read-side critical section. * * Check rcu_scheduler_active to prevent false positives during boot. */ diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 4d5ecb2..9c01f10 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -84,8 +84,8 @@ long srcu_batches_completed(struct srcu_struct *sp); /** * srcu_read_lock_held - might we be in SRCU read-side critical section? * - * If CONFIG_PROVE_LOCKING is selected and enabled, returns nonzero iff in - * an SRCU read-side critical section. In absence of CONFIG_PROVE_LOCKING, + * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an SRCU + * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, * this assumes we are in an SRCU read-side critical section unless it can * prove otherwise. */ -- cgit v1.1 From 32c141a0a1dfa29e0a07d78bec0c0919fc4b9f88 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 30 Mar 2010 10:59:28 -0700 Subject: rcu: fix now-bogus rcu_scheduler_active comments. The rcu_scheduler_active check has been wrapped into the new debug_lockdep_rcu_enabled() function, so update the comments to reflect this new reality. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index a150af0..02537a7 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -111,7 +111,8 @@ extern int debug_lockdep_rcu_enabled(void); * this assumes we are in an RCU read-side critical section unless it can * prove otherwise. * - * Check rcu_scheduler_active to prevent false positives during boot. + * Check debug_lockdep_rcu_enabled() to prevent false positives during boot + * and while lockdep is disabled. */ static inline int rcu_read_lock_held(void) { @@ -136,7 +137,8 @@ extern int rcu_read_lock_bh_held(void); * of preemption (including disabling irqs) counts as an RCU-sched * read-side critical section. * - * Check rcu_scheduler_active to prevent false positives during boot. + * Check debug_lockdep_rcu_enabled() to prevent false positives during boot + * and while lockdep is disabled. */ #ifdef CONFIG_PREEMPT static inline int rcu_read_lock_sched_held(void) -- cgit v1.1 From da848c47bc6e873a54a445ea1960423a495b6b32 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 30 Mar 2010 15:46:01 -0700 Subject: rcu: shrink rcutiny by making synchronize_rcu_bh() be inline Because synchronize_rcu_bh() is identical to synchronize_sched(), make the former a static inline invoking the latter, saving the overhead of an EXPORT_SYMBOL_GPL() and the duplicate code. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 -- include/linux/rcutiny.h | 12 +++++++++++- include/linux/rcutree.h | 2 ++ 3 files changed, 13 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 02537a7..d8fb2ab 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -56,8 +56,6 @@ struct rcu_head { }; /* Exported common interfaces */ -extern void synchronize_rcu_bh(void); -extern void synchronize_sched(void); extern void rcu_barrier(void); extern void rcu_barrier_bh(void); extern void rcu_barrier_sched(void); diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index a519587..bbeb55b 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -74,7 +74,17 @@ static inline void rcu_sched_force_quiescent_state(void) { } -#define synchronize_rcu synchronize_sched +extern void synchronize_sched(void); + +static inline void synchronize_rcu(void) +{ + synchronize_sched(); +} + +static inline void synchronize_rcu_bh(void) +{ + synchronize_sched(); +} static inline void synchronize_rcu_expedited(void) { diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 42cc3a0..7484fe6 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -86,6 +86,8 @@ static inline void __rcu_read_unlock_bh(void) extern void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); +extern void synchronize_rcu_bh(void); +extern void synchronize_sched(void); extern void synchronize_rcu_expedited(void); static inline void synchronize_rcu_bh_expedited(void) -- cgit v1.1 From 25502a6c13745f4650cc59322bd198194f55e796 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 1 Apr 2010 17:37:01 -0700 Subject: rcu: refactor RCU's context-switch handling The addition of preemptible RCU to treercu resulted in a bit of confusion and inefficiency surrounding the handling of context switches for RCU-sched and for RCU-preempt. For RCU-sched, a context switch is a quiescent state, pure and simple, just like it always has been. For RCU-preempt, a context switch is in no way a quiescent state, but special handling is required when a task blocks in an RCU read-side critical section. However, the callout from the scheduler and the outer loop in ksoftirqd still calls something named rcu_sched_qs(), whose name is no longer accurate. Furthermore, when rcu_check_callbacks() notes an RCU-sched quiescent state, it ends up unnecessarily (though harmlessly, aside from the performance hit) enqueuing the current task if it happens to be running in an RCU-preempt read-side critical section. This not only increases the maximum latency of scheduler_tick(), it also needlessly increases the overhead of the next outermost rcu_read_unlock() invocation. This patch addresses this situation by separating the notion of RCU's context-switch handling from that of RCU-sched's quiescent states. The context-switch handling is covered by rcu_note_context_switch() in general and by rcu_preempt_note_context_switch() for preemptible RCU. This permits rcu_sched_qs() to handle quiescent states and only quiescent states. It also reduces the maximum latency of scheduler_tick(), though probably by much less than a microsecond. Finally, it means that tasks within preemptible-RCU read-side critical sections avoid incurring the overhead of queuing unless there really is a context switch. Suggested-by: Lai Jiangshan Acked-by: Lai Jiangshan Signed-off-by: Paul E. McKenney Cc: Ingo Molnar Cc: Peter Zijlstra --- include/linux/rcutiny.h | 4 ++++ include/linux/rcutree.h | 1 + 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index bbeb55b..ff22b97 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -29,6 +29,10 @@ void rcu_sched_qs(int cpu); void rcu_bh_qs(int cpu); +static inline void rcu_note_context_switch(int cpu) +{ + rcu_sched_qs(cpu); +} #define __rcu_read_lock() preempt_disable() #define __rcu_read_unlock() preempt_enable() diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 7484fe6..b9f7460 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -34,6 +34,7 @@ struct notifier_block; extern void rcu_sched_qs(int cpu); extern void rcu_bh_qs(int cpu); +extern void rcu_note_context_switch(int cpu); extern int rcu_needs_cpu(int cpu); extern int rcu_expedited_torture_stats(char *page); -- cgit v1.1 From bbad937983147c017c25406860287cb94da9af7c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 2 Apr 2010 16:17:17 -0700 Subject: rcu: slim down rcutiny by removing rcu_scheduler_active and friends TINY_RCU does not need rcu_scheduler_active unless CONFIG_DEBUG_LOCK_ALLOC. So conditionally compile rcu_scheduler_active in order to slim down rcutiny a bit more. Also gets rid of an EXPORT_SYMBOL_GPL, which is responsible for most of the slimming. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 4 +--- include/linux/rcutiny.h | 13 +++++++++++++ include/linux/rcutree.h | 3 +++ 3 files changed, 17 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index d8fb2ab..23be3a7 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -64,8 +64,6 @@ extern int sched_expedited_torture_stats(char *page); /* Internal to kernel */ extern void rcu_init(void); -extern int rcu_scheduler_active; -extern void rcu_scheduler_starting(void); #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) #include @@ -178,7 +176,7 @@ static inline int rcu_read_lock_bh_held(void) #ifdef CONFIG_PREEMPT static inline int rcu_read_lock_sched_held(void) { - return !rcu_scheduler_active || preempt_count() != 0 || irqs_disabled(); + return preempt_count() != 0 || irqs_disabled(); } #else /* #ifdef CONFIG_PREEMPT */ static inline int rcu_read_lock_sched_held(void) diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index ff22b97..14e5a76 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -128,4 +128,17 @@ static inline int rcu_preempt_depth(void) return 0; } +#ifdef CONFIG_DEBUG_LOCK_ALLOC + +extern int rcu_scheduler_active __read_mostly; +extern void rcu_scheduler_starting(void); + +#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + +static inline void rcu_scheduler_starting(void) +{ +} + +#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + #endif /* __LINUX_RCUTINY_H */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index b9f7460..4828205 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -123,4 +123,7 @@ static inline int rcu_blocking_is_gp(void) return num_online_cpus() == 1; } +extern void rcu_scheduler_starting(void); +extern int rcu_scheduler_active __read_mostly; + #endif /* __LINUX_RCUTREE_H */ -- cgit v1.1 From d14aada8e20bdf81ffd43f433b123972cf575b32 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 19 Apr 2010 22:24:22 -0700 Subject: rcu: make SRCU usable in modules Add a #include for mutex.h to allow SRCU to be more easily used in kernel modules. Signed-off-by: Lai Jiangshan Signed-off-by: Paul E. McKenney --- include/linux/srcu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 9c01f10..4d5d2f5 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -27,6 +27,8 @@ #ifndef _LINUX_SRCU_H #define _LINUX_SRCU_H +#include + struct srcu_struct_array { int c[2]; }; -- cgit v1.1 From a5d8e467f83f6672104f276223a88e3b50cbd375 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Sat, 17 Apr 2010 08:48:38 -0400 Subject: Debugobjects transition check Implement a basic state machine checker in the debugobjects. This state machine checker detects races and inconsistencies within the "active" life of a debugobject. The checker only keeps track of the current state; all the state machine logic is kept at the object instance level. The checker works by adding a supplementary "unsigned int astate" field to the debug_obj structure. It keeps track of the current "active state" of the object. The only constraints that are imposed on the states by the debugobjects system is that: - activation of an object sets the current active state to 0, - deactivation of an object expects the current active state to be 0. For the rest of the states, the state mapping is determined by the specific object instance. Therefore, the logic keeping track of the state machine is within the specialized instance, without any need to know about it at the debugobject level. The current object active state is changed by calling: debug_object_active_state(addr, descr, expect, next) where "expect" is the expected state and "next" is the next state to move to if the expected state is found. A warning is generated if the expected is not found. Signed-off-by: Mathieu Desnoyers Reviewed-by: Thomas Gleixner Acked-by: David S. Miller CC: "Paul E. McKenney" CC: akpm@linux-foundation.org CC: mingo@elte.hu CC: laijs@cn.fujitsu.com CC: dipankar@in.ibm.com CC: josh@joshtriplett.org CC: dvhltc@us.ibm.com CC: niv@us.ibm.com CC: peterz@infradead.org CC: rostedt@goodmis.org CC: Valdis.Kletnieks@vt.edu CC: dhowells@redhat.com CC: eric.dumazet@gmail.com CC: Alexey Dobriyan Signed-off-by: Paul E. McKenney --- include/linux/debugobjects.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h index 8c243aa..597692f 100644 --- a/include/linux/debugobjects.h +++ b/include/linux/debugobjects.h @@ -20,12 +20,14 @@ struct debug_obj_descr; * struct debug_obj - representaion of an tracked object * @node: hlist node to link the object into the tracker list * @state: tracked object state + * @astate: current active state * @object: pointer to the real object * @descr: pointer to an object type specific debug description structure */ struct debug_obj { struct hlist_node node; enum debug_obj_state state; + unsigned int astate; void *object; struct debug_obj_descr *descr; }; @@ -60,6 +62,15 @@ extern void debug_object_deactivate(void *addr, struct debug_obj_descr *descr); extern void debug_object_destroy (void *addr, struct debug_obj_descr *descr); extern void debug_object_free (void *addr, struct debug_obj_descr *descr); +/* + * Active state: + * - Set at 0 upon initialization. + * - Must return to 0 before deactivation. + */ +extern void +debug_object_active_state(void *addr, struct debug_obj_descr *descr, + unsigned int expect, unsigned int next); + extern void debug_objects_early_init(void); extern void debug_objects_mem_init(void); #else -- cgit v1.1 From 4376030a54860dedab9d848dfa7cc700a6025c0b Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Sat, 17 Apr 2010 08:48:39 -0400 Subject: rcu head introduce rcu head init on stack PEM: o Would it be possible to make this bisectable as follows? a. Insert a new patch after current patch 4/6 that defines destroy_rcu_head_on_stack(), init_rcu_head_on_stack(), and init_rcu_head() with their !CONFIG_DEBUG_OBJECTS_RCU_HEAD definitions. This patch performs this transition. Signed-off-by: Mathieu Desnoyers CC: "Paul E. McKenney" CC: David S. Miller CC: akpm@linux-foundation.org CC: mingo@elte.hu CC: laijs@cn.fujitsu.com CC: dipankar@in.ibm.com CC: josh@joshtriplett.org CC: dvhltc@us.ibm.com CC: niv@us.ibm.com CC: tglx@linutronix.de CC: peterz@infradead.org CC: rostedt@goodmis.org CC: Valdis.Kletnieks@vt.edu CC: dhowells@redhat.com CC: eric.dumazet@gmail.com CC: Alexey Dobriyan Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 23be3a7..b653b4a 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -79,6 +79,14 @@ extern void rcu_init(void); (ptr)->next = NULL; (ptr)->func = NULL; \ } while (0) +static inline void init_rcu_head_on_stack(struct rcu_head *head) +{ +} + +static inline void destroy_rcu_head_on_stack(struct rcu_head *head) +{ +} + #ifdef CONFIG_DEBUG_LOCK_ALLOC extern struct lockdep_map rcu_lock_map; -- cgit v1.1 From a93d2f1744206827ccf416e2cdc5018aa503314e Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Fri, 7 May 2010 14:33:26 +0800 Subject: sched, wait: Use wrapper functions epoll should not touch flags in wait_queue_t. This patch introduces a new function __add_wait_queue_exclusive(), for the users, who use wait queue as a LIFO queue. __add_wait_queue_tail_exclusive() is introduced too instead of add_wait_queue_exclusive_locked(). remove_wait_queue_locked() is removed, as it is a duplicate of __remove_wait_queue(), disliked by users, and with less users. Signed-off-by: Changli Gao Signed-off-by: Peter Zijlstra Cc: Alexander Viro Cc: Paul Menage Cc: Li Zefan Cc: Davide Libenzi Cc: LKML-Reference: <1273214006-2979-1-git-send-email-xiaosuo@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/wait.h | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/wait.h b/include/linux/wait.h index a48e16b..76d96d0 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -127,12 +127,26 @@ static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new) /* * Used for wake-one threads: */ +static inline void __add_wait_queue_exclusive(wait_queue_head_t *q, + wait_queue_t *wait) +{ + wait->flags |= WQ_FLAG_EXCLUSIVE; + __add_wait_queue(q, wait); +} + static inline void __add_wait_queue_tail(wait_queue_head_t *head, - wait_queue_t *new) + wait_queue_t *new) { list_add_tail(&new->task_list, &head->task_list); } +static inline void __add_wait_queue_tail_exclusive(wait_queue_head_t *q, + wait_queue_t *wait) +{ + wait->flags |= WQ_FLAG_EXCLUSIVE; + __add_wait_queue_tail(q, wait); +} + static inline void __remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old) { @@ -404,25 +418,6 @@ do { \ }) /* - * Must be called with the spinlock in the wait_queue_head_t held. - */ -static inline void add_wait_queue_exclusive_locked(wait_queue_head_t *q, - wait_queue_t * wait) -{ - wait->flags |= WQ_FLAG_EXCLUSIVE; - __add_wait_queue_tail(q, wait); -} - -/* - * Must be called with the spinlock in the wait_queue_head_t held. - */ -static inline void remove_wait_queue_locked(wait_queue_head_t *q, - wait_queue_t * wait) -{ - __remove_wait_queue(q, wait); -} - -/* * These are the old interfaces to sleep waiting for an event. * They are racy. DO NOT use them, use the wait_event* interfaces above. * We plan to remove these interfaces. -- cgit v1.1 From 72d5a9f7a9542f88397558c65bcfc3b115a65e34 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 10 May 2010 17:12:17 -0700 Subject: rcu: remove all rcu head initializations, except on_stack initializations Remove all rcu head inits. We don't care about the RCU head state before passing it to call_rcu() anyway. Only leave the "on_stack" variants so debugobjects can keep track of objects on stack. Signed-off-by: Mathieu Desnoyers Signed-off-by: Paul E. McKenney --- include/linux/init_task.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index b1ed1cd..7996fc2 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -49,7 +49,6 @@ extern struct group_info init_groups; { .first = &init_task.pids[PIDTYPE_PGID].node }, \ { .first = &init_task.pids[PIDTYPE_SID].node }, \ }, \ - .rcu = RCU_HEAD_INIT, \ .level = 0, \ .numbers = { { \ .nr = 0, \ -- cgit v1.1 From f33d7e2d2d113a63772bbc993cdec3b5327f0ef1 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 11 May 2010 14:06:43 -0700 Subject: dma-mapping: fix dma_sync_single_range_* dma_sync_single_range_for_cpu() and dma_sync_single_range_for_device() use a wrong address with a partial synchronization. Signed-off-by: FUJITA Tomonori Reviewed-by: Konrad Rzeszutek Wilk Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/dma-mapping-common.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/dma-mapping-common.h b/include/asm-generic/dma-mapping-common.h index e694263..6920695 100644 --- a/include/asm-generic/dma-mapping-common.h +++ b/include/asm-generic/dma-mapping-common.h @@ -131,7 +131,7 @@ static inline void dma_sync_single_range_for_cpu(struct device *dev, debug_dma_sync_single_range_for_cpu(dev, addr, offset, size, dir); } else - dma_sync_single_for_cpu(dev, addr, size, dir); + dma_sync_single_for_cpu(dev, addr + offset, size, dir); } static inline void dma_sync_single_range_for_device(struct device *dev, @@ -148,7 +148,7 @@ static inline void dma_sync_single_range_for_device(struct device *dev, debug_dma_sync_single_range_for_device(dev, addr, offset, size, dir); } else - dma_sync_single_for_device(dev, addr, size, dir); + dma_sync_single_for_device(dev, addr + offset, size, dir); } static inline void -- cgit v1.1 From 34441427aab4bdb3069a4ffcda69a99357abcb2e Mon Sep 17 00:00:00 2001 From: Robin Holt Date: Tue, 11 May 2010 14:06:46 -0700 Subject: revert "procfs: provide stack information for threads" and its fixup commits Originally, commit d899bf7b ("procfs: provide stack information for threads") attempted to introduce a new feature for showing where the threadstack was located and how many pages are being utilized by the stack. Commit c44972f1 ("procfs: disable per-task stack usage on NOMMU") was applied to fix the NO_MMU case. Commit 89240ba0 ("x86, fs: Fix x86 procfs stack information for threads on 64-bit") was applied to fix a bug in ia32 executables being loaded. Commit 9ebd4eba7 ("procfs: fix /proc//stat stack pointer for kernel threads") was applied to fix a bug which had kernel threads printing a userland stack address. Commit 1306d603f ('proc: partially revert "procfs: provide stack information for threads"') was then applied to revert the stack pages being used to solve a significant performance regression. This patch nearly undoes the effect of all these patches. The reason for reverting these is it provides an unusable value in field 28. For x86_64, a fork will result in the task->stack_start value being updated to the current user top of stack and not the stack start address. This unpredictability of the stack_start value makes it worthless. That includes the intended use of showing how much stack space a thread has. Other architectures will get different values. As an example, ia64 gets 0. The do_fork() and copy_process() functions appear to treat the stack_start and stack_size parameters as architecture specific. I only partially reverted c44972f1 ("procfs: disable per-task stack usage on NOMMU") . If I had completely reverted it, I would have had to change mm/Makefile only build pagewalk.o when CONFIG_PROC_PAGE_MONITOR is configured. Since I could not test the builds without significant effort, I decided to not change mm/Makefile. I only partially reverted 89240ba0 ("x86, fs: Fix x86 procfs stack information for threads on 64-bit") . I left the KSTK_ESP() change in place as that seemed worthwhile. Signed-off-by: Robin Holt Cc: Stefani Seibold Cc: KOSAKI Motohiro Cc: Michal Simek Cc: Ingo Molnar Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index dad7f66..2b7b81d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1497,7 +1497,6 @@ struct task_struct { /* bitmask of trace recursion */ unsigned long trace_recursion; #endif /* CONFIG_TRACING */ - unsigned long stack_start; #ifdef CONFIG_CGROUP_MEM_RES_CTLR /* memcg uses this to do batch job */ struct memcg_batch_info { int do_batch; /* incremented when batch uncharge started */ -- cgit v1.1 From d83c49f3e36cecd2e8823b6c48ffba083b8a5704 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 30 Apr 2010 17:17:09 -0400 Subject: Fix the regression created by "set S_DEAD on unlink()..." commit 1) i_flags simply doesn't work for mount/unlink race prevention; we may have many links to file and rm on one of those obviously shouldn't prevent bind on top of another later on. To fix it right way we need to mark _dentry_ as unsuitable for mounting upon; new flag (DCACHE_CANT_MOUNT) is protected by d_flags and i_mutex on the inode in question. Set it (with dont_mount(dentry)) in unlink/rmdir/etc., check (with cant_mount(dentry)) in places in namespace.c that used to check for S_DEAD. Setting S_DEAD is still needed in places where we used to set it (for directories getting killed), since we rely on it for readdir/rmdir race prevention. 2) rename()/mount() protection has another bogosity - we unhash the target before we'd checked that it's not a mountpoint. Fixed. 3) ancient bogosity in pivot_root() - we locked i_mutex on the right directory, but checked S_DEAD on the different (and wrong) one. Noticed and fixed. Signed-off-by: Al Viro --- include/linux/dcache.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 30b93b2..eebb617 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -186,6 +186,8 @@ d_iput: no no no yes #define DCACHE_FSNOTIFY_PARENT_WATCHED 0x0080 /* Parent inode is watched by some fsnotify listener */ +#define DCACHE_CANT_MOUNT 0x0100 + extern spinlock_t dcache_lock; extern seqlock_t rename_lock; @@ -358,6 +360,18 @@ static inline int d_unlinked(struct dentry *dentry) return d_unhashed(dentry) && !IS_ROOT(dentry); } +static inline int cant_mount(struct dentry *dentry) +{ + return (dentry->d_flags & DCACHE_CANT_MOUNT); +} + +static inline void dont_mount(struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + dentry->d_flags |= DCACHE_CANT_MOUNT; + spin_unlock(&dentry->d_lock); +} + static inline struct dentry *dget_parent(struct dentry *dentry) { struct dentry *ret; -- cgit v1.1 From 35790c0421121364883a167bab8a2e37e1f67f78 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 16 May 2010 00:34:04 -0700 Subject: tcp: fix MD5 (RFC2385) support TCP MD5 support uses percpu data for temporary storage. It currently disables preemption so that same storage cannot be reclaimed by another thread on same cpu. We also have to make sure a softirq handler wont try to use also same context. Various bug reports demonstrated corruptions. Fix is to disable preemption and BH. Reported-by: Bhaskar Dutta Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 75be5a2..aa04b9a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1197,30 +1197,15 @@ extern int tcp_v4_md5_do_del(struct sock *sk, extern struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *); extern void tcp_free_md5sig_pool(void); -extern struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu); -extern void __tcp_put_md5sig_pool(void); +extern struct tcp_md5sig_pool *tcp_get_md5sig_pool(void); +extern void tcp_put_md5sig_pool(void); + extern int tcp_md5_hash_header(struct tcp_md5sig_pool *, struct tcphdr *); extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, struct sk_buff *, unsigned header_len); extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key); -static inline -struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) -{ - int cpu = get_cpu(); - struct tcp_md5sig_pool *ret = __tcp_get_md5sig_pool(cpu); - if (!ret) - put_cpu(); - return ret; -} - -static inline void tcp_put_md5sig_pool(void) -{ - __tcp_put_md5sig_pool(); - put_cpu(); -} - /* write queue abstraction */ static inline void tcp_write_queue_purge(struct sock *sk) { -- cgit v1.1 From c02db8c6290bb992442fec1407643c94cc414375 Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Sun, 16 May 2010 01:05:45 -0700 Subject: rtnetlink: make SR-IOV VF interface symmetric Now we have a set of nested attributes: IFLA_VFINFO_LIST (NESTED) IFLA_VF_INFO (NESTED) IFLA_VF_MAC IFLA_VF_VLAN IFLA_VF_TX_RATE This allows a single set to operate on multiple attributes if desired. Among other things, it means a dump can be replayed to set state. The current interface has yet to be released, so this seems like something to consider for 2.6.34. Signed-off-by: Chris Wright Signed-off-by: David S. Miller --- include/linux/if_link.h | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/if_link.h b/include/linux/if_link.h index c9bf92c..d94963b 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -79,10 +79,7 @@ enum { IFLA_NET_NS_PID, IFLA_IFALIAS, IFLA_NUM_VF, /* Number of VFs if device is SR-IOV PF */ - IFLA_VF_MAC, /* Hardware queue specific attributes */ - IFLA_VF_VLAN, - IFLA_VF_TX_RATE, /* TX Bandwidth Allocation */ - IFLA_VFINFO, + IFLA_VFINFO_LIST, __IFLA_MAX }; @@ -203,6 +200,24 @@ enum macvlan_mode { /* SR-IOV virtual function managment section */ +enum { + IFLA_VF_INFO_UNSPEC, + IFLA_VF_INFO, + __IFLA_VF_INFO_MAX, +}; + +#define IFLA_VF_INFO_MAX (__IFLA_VF_INFO_MAX - 1) + +enum { + IFLA_VF_UNSPEC, + IFLA_VF_MAC, /* Hardware queue specific attributes */ + IFLA_VF_VLAN, + IFLA_VF_TX_RATE, /* TX Bandwidth Allocation */ + __IFLA_VF_MAX, +}; + +#define IFLA_VF_MAX (__IFLA_VF_MAX - 1) + struct ifla_vf_mac { __u32 vf; __u8 mac[32]; /* MAX_ADDR_LEN */ -- cgit v1.1 From f3d46f9d3194e0329216002a8724d4c0957abc79 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 17 May 2010 14:33:53 +1000 Subject: atomic_t: Cast to volatile when accessing atomic variables In preparation for removing volatile from the atomic_t definition, this patch adds a volatile cast to all the atomic read functions. Signed-off-by: Anton Blanchard Signed-off-by: Linus Torvalds --- include/asm-generic/atomic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index c99c64d..c33749f 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -33,7 +33,7 @@ * Atomically reads the value of @v. Note that the guaranteed * useful range of an atomic_t is only 24 bits. */ -#define atomic_read(v) ((v)->counter) +#define atomic_read(v) (*(volatile int *)&(v)->counter) /** * atomic_set - set atomic variable -- cgit v1.1 From 81880d603d00c645e0890d0a44d50711c503b72b Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 17 May 2010 14:34:57 +1000 Subject: atomic_t: Remove volatile from atomic_t definition When looking at a performance problem on PowerPC, I noticed some awful code generation: c00000000051fc98: 3b 60 00 01 li r27,1 ... c00000000051fca0: 3b 80 00 00 li r28,0 ... c00000000051fcdc: 93 61 00 70 stw r27,112(r1) c00000000051fce0: 93 81 00 74 stw r28,116(r1) c00000000051fce4: 81 21 00 70 lwz r9,112(r1) c00000000051fce8: 80 01 00 74 lwz r0,116(r1) c00000000051fcec: 7d 29 07 b4 extsw r9,r9 c00000000051fcf0: 7c 00 07 b4 extsw r0,r0 c00000000051fcf4: 7c 20 04 ac lwsync c00000000051fcf8: 7d 60 f8 28 lwarx r11,0,r31 c00000000051fcfc: 7c 0b 48 00 cmpw r11,r9 c00000000051fd00: 40 c2 00 10 bne- c00000000051fd10 c00000000051fd04: 7c 00 f9 2d stwcx. r0,0,r31 c00000000051fd08: 40 c2 ff f0 bne+ c00000000051fcf8 c00000000051fd0c: 4c 00 01 2c isync We create two constants, write them out to the stack, read them straight back in and sign extend them. What a mess. It turns out this bad code is a result of us defining atomic_t as a volatile int. We removed the volatile attribute from the powerpc atomic_t definition years ago, but commit ea435467500612636f8f4fb639ff6e76b2496e4b (atomic_t: unify all arch definitions) added it back in. To dig up an old quote from Linus: > The fact is, volatile on data structures is a bug. It's a wart in the C > language. It shouldn't be used. > > Volatile accesses in *code* can be ok, and if we have "atomic_read()" > expand to a "*(volatile int *)&(x)->value", then I'd be ok with that. > > But marking data structures volatile just makes the compiler screw up > totally, and makes code for initialization sequences etc much worse. And screw up it does :) With the volatile removed, we see much more reasonable code generation: c00000000051f5b8: 3b 60 00 01 li r27,1 ... c00000000051f5c0: 3b 80 00 00 li r28,0 ... c00000000051fc7c: 7c 20 04 ac lwsync c00000000051fc80: 7c 00 f8 28 lwarx r0,0,r31 c00000000051fc84: 7c 00 d8 00 cmpw r0,r27 c00000000051fc88: 40 c2 00 10 bne- c00000000051fc98 c00000000051fc8c: 7f 80 f9 2d stwcx. r28,0,r31 c00000000051fc90: 40 c2 ff f0 bne+ c00000000051fc80 c00000000051fc94: 4c 00 01 2c isync Six instructions less. Signed-off-by: Anton Blanchard Signed-off-by: Linus Torvalds --- include/linux/types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/types.h b/include/linux/types.h index c42724f..23d237a 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -188,12 +188,12 @@ typedef u32 phys_addr_t; typedef phys_addr_t resource_size_t; typedef struct { - volatile int counter; + int counter; } atomic_t; #ifdef CONFIG_64BIT typedef struct { - volatile long counter; + long counter; } atomic64_t; #endif -- cgit v1.1 From 0b7f1a7efb38b551f5948a13d0b36e876ba536db Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 28 Jan 2009 21:01:02 +0100 Subject: platform: Make platform resource input parameters const Make the platform resource input parameters of platform_device_add_resources() and platform_device_register_simple() const, as the resources are copied and never modified. Signed-off-by: Geert Uytterhoeven Acked-by: Greg Kroah-Hartman --- include/linux/platform_device.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 212da17..5417944 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -44,12 +44,14 @@ extern int platform_get_irq_byname(struct platform_device *, const char *); extern int platform_add_devices(struct platform_device **, int); extern struct platform_device *platform_device_register_simple(const char *, int id, - struct resource *, unsigned int); + const struct resource *, unsigned int); extern struct platform_device *platform_device_register_data(struct device *, const char *, int, const void *, size_t); extern struct platform_device *platform_device_alloc(const char *name, int id); -extern int platform_device_add_resources(struct platform_device *pdev, struct resource *res, unsigned int num); +extern int platform_device_add_resources(struct platform_device *pdev, + const struct resource *res, + unsigned int num); extern int platform_device_add_data(struct platform_device *pdev, const void *data, size_t size); extern int platform_device_add(struct platform_device *pdev); extern void platform_device_del(struct platform_device *pdev); -- cgit v1.1 From bf54a2b3c0dbf76136f00ff785bf6d8f6291311d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 18 Nov 2008 21:13:53 +0100 Subject: m68k: amiga - Zorro bus modalias support Add Amiga Zorro bus modalias and uevent support Signed-off-by: Geert Uytterhoeven --- include/linux/mod_devicetable.h | 9 +++++++++ include/linux/zorro.h | 13 +------------ 2 files changed, 10 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index f58e9d8..56fde43 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -474,4 +474,13 @@ struct platform_device_id { __attribute__((aligned(sizeof(kernel_ulong_t)))); }; +struct zorro_device_id { + __u32 id; /* Device ID or ZORRO_WILDCARD */ + kernel_ulong_t driver_data; /* Data private to the driver */ +}; + +#define ZORRO_WILDCARD (0xffffffff) /* not official */ + +#define ZORRO_DEVICE_MODALIAS_FMT "zorro:i%08X" + #endif /* LINUX_MOD_DEVICETABLE_H */ diff --git a/include/linux/zorro.h b/include/linux/zorro.h index 913bfc2..908db1b 100644 --- a/include/linux/zorro.h +++ b/include/linux/zorro.h @@ -38,8 +38,6 @@ typedef __u32 zorro_id; -#define ZORRO_WILDCARD (0xffffffff) /* not official */ - /* Include the ID list */ #include @@ -116,6 +114,7 @@ struct ConfigDev { #include #include +#include #include @@ -155,16 +154,6 @@ extern struct bus_type zorro_bus_type; /* - * Zorro device IDs - */ - -struct zorro_device_id { - zorro_id id; /* Device ID or ZORRO_WILDCARD */ - unsigned long driver_data; /* Data private to the driver */ -}; - - - /* * Zorro device drivers */ -- cgit v1.1 From 0d305464aefff342c85b4db8b3d7a4345246e5a1 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 5 Apr 2009 12:40:41 +0200 Subject: m68k: amiga - Zorro host bridge platform device conversion Signed-off-by: Geert Uytterhoeven --- include/linux/zorro.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/zorro.h b/include/linux/zorro.h index 908db1b..7bf9db5 100644 --- a/include/linux/zorro.h +++ b/include/linux/zorro.h @@ -141,15 +141,6 @@ struct zorro_dev { * Zorro bus */ -struct zorro_bus { - struct list_head devices; /* list of devices on this bus */ - unsigned int num_resources; /* number of resources */ - struct resource resources[4]; /* address space routed to this bus */ - struct device dev; - char name[10]; -}; - -extern struct zorro_bus zorro_bus; /* single Zorro bus */ extern struct bus_type zorro_bus_type; -- cgit v1.1