From bf988435bd5b53529f4408a8efb1f433f6ddfda9 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 28 Jun 2010 08:45:58 +0000 Subject: ethtool: Fix potential user buffer overflow for ETHTOOL_{G, S}RXFH struct ethtool_rxnfc was originally defined in 2.6.27 for the ETHTOOL_{G,S}RXFH command with only the cmd, flow_type and data fields. It was then extended in 2.6.30 to support various additional commands. These commands should have been defined to use a new structure, but it is too late to change that now. Since user-space may still be using the old structure definition for the ETHTOOL_{G,S}RXFH commands, and since they do not need the additional fields, only copy the originally defined fields to and from user-space. Signed-off-by: Ben Hutchings Cc: stable@kernel.org Signed-off-by: David S. Miller --- include/linux/ethtool.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 276b40a..b4207ca 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -379,6 +379,8 @@ struct ethtool_rxnfc { __u32 flow_type; /* The rx flow hash value or the rule DB size */ __u64 data; + /* The following fields are not valid and must not be used for + * the ETHTOOL_{G,X}RXFH commands. */ struct ethtool_rx_flow_spec fs; __u32 rule_cnt; __u32 rule_locs[0]; -- cgit v1.1 From c59690fa484c04ab96fe932241b569a09755a4d2 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 30 Jun 2010 00:53:53 -0700 Subject: Input: i8042 - mark stubs in i8042.h "static inline" Otherwise we may run into following: drivers/platform/built-in.o: In function `i8042_lock_chip': /home/test/ws2/projects/linux-2.6/include/linux/i8042.h:50: multiple definition of `i8042_lock_chip' drivers/input/serio/built-in.o:/home/test/ws2/projects/linux-2.6/include/linux/i8042.h:50: first defined here ... make[1]: *** [drivers/built-in.o] Error 1 make: *** [drivers] Error 2 Signed-off-by: Feng Tang Signed-off-by: Dmitry Torokhov --- include/linux/i8042.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/i8042.h b/include/linux/i8042.h index 9bf6870..a986ff5 100644 --- a/include/linux/i8042.h +++ b/include/linux/i8042.h @@ -46,31 +46,31 @@ int i8042_remove_filter(bool (*filter)(unsigned char data, unsigned char str, #else -void i8042_lock_chip(void) +static inline void i8042_lock_chip(void) { } -void i8042_unlock_chip(void) +static inline void i8042_unlock_chip(void) { } -int i8042_command(unsigned char *param, int command) +static inline int i8042_command(unsigned char *param, int command) { return -ENODEV; } -bool i8042_check_port_owner(const struct serio *serio) +static inline bool i8042_check_port_owner(const struct serio *serio) { return false; } -int i8042_install_filter(bool (*filter)(unsigned char data, unsigned char str, +static inline int i8042_install_filter(bool (*filter)(unsigned char data, unsigned char str, struct serio *serio)) { return -ENODEV; } -int i8042_remove_filter(bool (*filter)(unsigned char data, unsigned char str, +static inline int i8042_remove_filter(bool (*filter)(unsigned char data, unsigned char str, struct serio *serio)) { return -ENODEV; -- cgit v1.1 From 4efd7e833591721bec21cc4730a7f6261417840f Mon Sep 17 00:00:00 2001 From: Andreas Steffen Date: Wed, 30 Jun 2010 10:41:15 -0700 Subject: xfrm: fix XFRMA_MARK extraction in xfrm_mark_get Determine the size of the xfrm_mark struct, not of its pointer. Signed-off-by: Andreas Steffen Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 1913af6..fc8f36d 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1586,7 +1586,7 @@ static inline struct xfrm_state *xfrm_input_state(struct sk_buff *skb) static inline int xfrm_mark_get(struct nlattr **attrs, struct xfrm_mark *m) { if (attrs[XFRMA_MARK]) - memcpy(m, nla_data(attrs[XFRMA_MARK]), sizeof(m)); + memcpy(m, nla_data(attrs[XFRMA_MARK]), sizeof(struct xfrm_mark)); else m->v = m->m = 0; -- cgit v1.1 From 9b2c2ff7a1c04e69842254dd4afe0f8ad4efa439 Mon Sep 17 00:00:00 2001 From: Saeed Bishara Date: Sun, 27 Jun 2010 00:26:43 +0000 Subject: mv643xx_eth: use sw csum for big packets Some controllers (KW, Dove) limits the TX IP/layer4 checksum offloading to a max size. Signed-off-by: Saeed Bishara Acked-by: Lennert Buytenhek Signed-off-by: David S. Miller --- include/linux/mv643xx_eth.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h index cbbbe9b..30b0c4e 100644 --- a/include/linux/mv643xx_eth.h +++ b/include/linux/mv643xx_eth.h @@ -19,6 +19,11 @@ struct mv643xx_eth_shared_platform_data { struct mbus_dram_target_info *dram; struct platform_device *shared_smi; unsigned int t_clk; + /* + * Max packet size for Tx IP/Layer 4 checksum, when set to 0, default + * limit of 9KiB will be used. + */ + int tx_csum_limit; }; #define MV643XX_ETH_PHY_ADDR_DEFAULT 0 -- cgit v1.1 From 4ef6acff83222f4496ceef7d1f0ee9e50a5bb403 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 1 Jul 2010 13:21:35 +0000 Subject: sched: qdisc_reset_all_tx is calling qdisc_reset without qdisc_lock When calling qdisc_reset() the qdisc lock needs to be held. In this case there is at least one driver i4l which is using this without holding the lock. Add the locking here. Signed-off-by: John Fastabend Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- include/net/sch_generic.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 03ca5d8..ba749be 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -317,8 +317,16 @@ extern void tcf_destroy_chain(struct tcf_proto **fl); static inline void qdisc_reset_all_tx(struct net_device *dev) { unsigned int i; - for (i = 0; i < dev->num_tx_queues; i++) - qdisc_reset(netdev_get_tx_queue(dev, i)->qdisc); + struct Qdisc *qdisc; + + for (i = 0; i < dev->num_tx_queues; i++) { + qdisc = netdev_get_tx_queue(dev, i)->qdisc; + if (qdisc) { + spin_lock_bh(qdisc_lock(qdisc)); + qdisc_reset(qdisc); + spin_unlock_bh(qdisc_lock(qdisc)); + } + } } /* Are all TX queues of the device empty? */ -- cgit v1.1 From f0796d5c73e59786d09a1e617689d1d415f2db44 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 1 Jul 2010 13:21:57 +0000 Subject: net: decreasing real_num_tx_queues needs to flush qdisc Reducing real_num_queues needs to flush the qdisc otherwise skbs with queue_mappings greater then real_num_tx_queues can be sent to the underlying driver. The flow for this is, dev_queue_xmit() dev_pick_tx() skb_tx_hash() => hash using real_num_tx_queues skb_set_queue_mapping() ... qdisc_enqueue_root() => enqueue skb on txq from hash ... dev->real_num_tx_queues -= n ... sch_direct_xmit() dev_hard_start_xmit() ndo_start_xmit(skb,dev) => skb queue set with old hash skbs are enqueued on the qdisc with skb->queue_mapping set 0 < queue_mappings < real_num_tx_queues. When the driver decreases real_num_tx_queues skb's may be dequeued from the qdisc with a queue_mapping greater then real_num_tx_queues. This fixes a case in ixgbe where this was occurring with DCB and FCoE. Because the driver is using queue_mapping to map skbs to tx descriptor rings we can potentially map skbs to rings that no longer exist. Signed-off-by: John Fastabend Tested-by: Ross Brattain Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ include/net/sch_generic.h | 12 ++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 40291f3..5e6188d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1656,6 +1656,9 @@ static inline int netif_is_multiqueue(const struct net_device *dev) return (dev->num_tx_queues > 1); } +extern void netif_set_real_num_tx_queues(struct net_device *dev, + unsigned int txq); + /* Use this variant when it is known for sure that it * is executing from hardware interrupt context or with hardware interrupts * disabled. diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index ba749be..433604b 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -313,13 +313,12 @@ extern void qdisc_calculate_pkt_len(struct sk_buff *skb, extern void tcf_destroy(struct tcf_proto *tp); extern void tcf_destroy_chain(struct tcf_proto **fl); -/* Reset all TX qdiscs of a device. */ -static inline void qdisc_reset_all_tx(struct net_device *dev) +/* Reset all TX qdiscs greater then index of a device. */ +static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i) { - unsigned int i; struct Qdisc *qdisc; - for (i = 0; i < dev->num_tx_queues; i++) { + for (; i < dev->num_tx_queues; i++) { qdisc = netdev_get_tx_queue(dev, i)->qdisc; if (qdisc) { spin_lock_bh(qdisc_lock(qdisc)); @@ -329,6 +328,11 @@ static inline void qdisc_reset_all_tx(struct net_device *dev) } } +static inline void qdisc_reset_all_tx(struct net_device *dev) +{ + qdisc_reset_all_tx_gt(dev, 0); +} + /* Are all TX queues of the device empty? */ static inline bool qdisc_all_tx_empty(const struct net_device *dev) { -- cgit v1.1 From e2aec372ff4b7e78e79c308104a860ae0ed20950 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 1 Jul 2010 13:18:58 +0000 Subject: linux/net.h: fix kernel-doc warnings Fix kernel-doc warnings in linux/net.h: Warning(include/linux/net.h:151): No description found for parameter 'wq' Warning(include/linux/net.h:151): Excess struct/union/enum/typedef member 'fasync_list' description in 'socket' Warning(include/linux/net.h:151): Excess struct/union/enum/typedef member 'wait' description in 'socket' Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- include/linux/net.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/net.h b/include/linux/net.h index 2b4deee..dee0b11 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -129,10 +129,9 @@ struct socket_wq { * @type: socket type (%SOCK_STREAM, etc) * @flags: socket flags (%SOCK_ASYNC_NOSPACE, etc) * @ops: protocol specific socket operations - * @fasync_list: Asynchronous wake up list * @file: File back pointer for gc * @sk: internal networking protocol agnostic socket representation - * @wait: wait queue for several uses + * @wq: wait queue for several uses */ struct socket { socket_state state; -- cgit v1.1 From b945d6b2554d550fe95caadc61e521c0ad71fb9c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 29 May 2010 15:31:43 +0200 Subject: rbtree: Undo augmented trees performance damage and regression Reimplement augmented RB-trees without sprinkling extra branches all over the RB-tree code (which lives in the scheduler hot path). This approach is 'borrowed' from Fabio's BFQ implementation and relies on traversing the rebalance path after the RB-tree-op to correct the heap property for insertion/removal and make up for the damage done by the tree rotations. For insertion the rebalance path is trivially that from the new node upwards to the root, for removal it is that from the deepest node in the path from the to be removed node that will still be around after the removal. [ This patch also fixes a video driver regression reported by Ali Gholami Rudi - the memtype->subtree_max_end was updated incorrectly. ] Acked-by: Suresh Siddha Acked-by: Venkatesh Pallipadi Signed-off-by: Peter Zijlstra Tested-by: Ali Gholami Rudi Cc: Fabio Checconi Cc: "H. Peter Anvin" Cc: Andrew Morton Cc: Linus Torvalds LKML-Reference: <1275414172.27810.27961.camel@twins> Signed-off-by: Ingo Molnar --- include/linux/rbtree.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index fe1872e..7066acb 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -110,7 +110,6 @@ struct rb_node struct rb_root { struct rb_node *rb_node; - void (*augment_cb)(struct rb_node *node); }; @@ -130,9 +129,7 @@ static inline void rb_set_color(struct rb_node *rb, int color) rb->rb_parent_color = (rb->rb_parent_color & ~1) | color; } -#define RB_ROOT (struct rb_root) { NULL, NULL, } -#define RB_AUGMENT_ROOT(x) (struct rb_root) { NULL, x} - +#define RB_ROOT (struct rb_root) { NULL, } #define rb_entry(ptr, type, member) container_of(ptr, type, member) #define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) @@ -142,6 +139,14 @@ static inline void rb_set_color(struct rb_node *rb, int color) extern void rb_insert_color(struct rb_node *, struct rb_root *); extern void rb_erase(struct rb_node *, struct rb_root *); +typedef void (*rb_augment_f)(struct rb_node *node, void *data); + +extern void rb_augment_insert(struct rb_node *node, + rb_augment_f func, void *data); +extern struct rb_node *rb_augment_erase_begin(struct rb_node *node); +extern void rb_augment_erase_end(struct rb_node *node, + rb_augment_f func, void *data); + /* Find logical next and previous nodes in a tree */ extern struct rb_node *rb_next(const struct rb_node *); extern struct rb_node *rb_prev(const struct rb_node *); -- cgit v1.1 From bcfcc450baaaa44afc1d3c51ef96a53338ff0eb2 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 2 Jul 2010 07:08:44 +0000 Subject: net: Fix definition of netif_vdbg() when VERBOSE_DEBUG is defined netif_vdbg() was originally defined as entirely equivalent to netdev_vdbg(), but I assume that it was intended to take the same parameters as netif_dbg() etc. (Currently it is only used by the sfc driver, in which I worked on that assumption.) In commit a4ed89c I changed the definition used when VERBOSE_DEBUG is not defined, but I failed to notice that the definition used when VERBOSE_DEBUG is defined was also not as I expected. Change that to match netif_dbg() as well. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5e6188d..b21e405 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2332,7 +2332,7 @@ do { \ #endif #if defined(VERBOSE_DEBUG) -#define netif_vdbg netdev_dbg +#define netif_vdbg netif_dbg #else #define netif_vdbg(priv, type, dev, format, args...) \ ({ \ -- cgit v1.1 From 9c3a8ee8a1d72c5c0d7fbdf426d80e270ddfa54c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 10 Jun 2010 12:07:27 +0200 Subject: writeback: remove writeback_inodes_wbc This was just an odd wrapper around writeback_inodes_wb. Removing this also allows to get rid of the bdi member of struct writeback_control which was rather out of place there. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/writeback.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index d63ef8f..f6756f6 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -27,8 +27,6 @@ enum writeback_sync_modes { * in a manner such that unspecified fields are set to zero. */ struct writeback_control { - struct backing_dev_info *bdi; /* If !NULL, only write back this - queue */ struct super_block *sb; /* if !NULL, only write inodes from this super_block */ enum writeback_sync_modes sync_mode; @@ -66,7 +64,8 @@ int inode_wait(void *); void writeback_inodes_sb(struct super_block *); int writeback_inodes_sb_if_idle(struct super_block *); void sync_inodes_sb(struct super_block *); -void writeback_inodes_wbc(struct writeback_control *wbc); +void writeback_inodes_wb(struct bdi_writeback *wb, + struct writeback_control *wbc); long wb_do_writeback(struct bdi_writeback *wb, int force_wait); void wakeup_flusher_threads(long nr_pages); -- cgit v1.1 From edadfb10ba35da7253541e4155aa92eff758ebe6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 10 Jun 2010 12:07:54 +0200 Subject: writeback: split writeback_inodes_wb The case where we have a superblock doesn't require a loop here as we scan over all inodes in writeback_sb_inodes. Split it out into a separate helper to make the code simpler. This also allows to get rid of the sb member in struct writeback_control, which was rather out of place there. Also update the comments in writeback_sb_inodes that explain the handling of inodes from wrong superblocks. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/writeback.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index f6756f6..c24eca7 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -27,8 +27,6 @@ enum writeback_sync_modes { * in a manner such that unspecified fields are set to zero. */ struct writeback_control { - struct super_block *sb; /* if !NULL, only write inodes from - this super_block */ enum writeback_sync_modes sync_mode; unsigned long *older_than_this; /* If !NULL, only write back inodes older than this */ -- cgit v1.1 From 83ba7b071f30f7c01f72518ad72d5cd203c27502 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 6 Jul 2010 08:59:53 +0200 Subject: writeback: simplify the write back thread queue First remove items from work_list as soon as we start working on them. This means we don't have to track any pending or visited state and can get rid of all the RCU magic freeing the work items - we can simply free them once the operation has finished. Second use a real completion for tracking synchronous requests - if the caller sets the completion pointer we complete it, otherwise use it as a boolean indicator that we can free the work item directly. Third unify struct wb_writeback_args and struct bdi_work into a single data structure, wb_writeback_work. Previous we set all parameters into a struct wb_writeback_args, copied it into struct bdi_work, copied it again on the stack to use it there. Instead of just allocate one structure dynamically or on the stack and use it all the way through the stack. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 9ae2889..e9aec0d 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -82,8 +82,6 @@ struct backing_dev_info { struct bdi_writeback wb; /* default writeback info for this bdi */ spinlock_t wb_lock; /* protects update side of wb_list */ struct list_head wb_list; /* the flusher threads hanging off this bdi */ - unsigned long wb_mask; /* bitmask of registered tasks */ - unsigned int wb_cnt; /* number of registered tasks */ struct list_head work_list; -- cgit v1.1 From 140236b4b1c749c9b795ea3d11558a0eb5a3a080 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 10 Jun 2010 13:56:33 +0300 Subject: VFS: introduce s_dirty accessors This patch introduces 3 VFS accessors: 'sb_mark_dirty()', 'sb_mark_clean()', and 'sb_is_dirty()'. They simply set 'sb->s_dirt' or test 'sb->s_dirt'. The plan is to make every FS use these accessors later instead of manipulating the 'sb->s_dirt' flag directly. Ultimately, this change is a preparation for the periodic superblock synchronization optimization which is about preventing the "sync_supers" kernel thread from waking up even if there is nothing to synchronize. This patch does not do any functional change, just adds accessor functions. Signed-off-by: Artem Bityutskiy Signed-off-by: Linus Torvalds --- include/linux/fs.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 471e1ff..68ca1b0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1783,6 +1783,19 @@ extern int get_sb_pseudo(struct file_system_type *, char *, struct vfsmount *mnt); extern void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb); +static inline void sb_mark_dirty(struct super_block *sb) +{ + sb->s_dirt = 1; +} +static inline void sb_mark_clean(struct super_block *sb) +{ + sb->s_dirt = 0; +} +static inline int sb_is_dirty(struct super_block *sb) +{ + return sb->s_dirt; +} + /* Alas, no aliases. Too much hassle with bringing module.h everywhere */ #define fops_get(fops) \ (((fops) && try_module_get((fops)->owner) ? (fops) : NULL)) -- cgit v1.1 From 095c24710aa508a303edff86709637007113fbbf Mon Sep 17 00:00:00 2001 From: Andy Walls Date: Sat, 12 Jun 2010 20:20:36 -0300 Subject: V4L/DVB: tuner: Add a definition for the Philips FQ1236 MK5 NTSC tuner Signed-off-by: Andy Walls Signed-off-by: Mauro Carvalho Chehab --- include/media/tuner.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/media/tuner.h b/include/media/tuner.h index 5505c53..51811eac 100644 --- a/include/media/tuner.h +++ b/include/media/tuner.h @@ -130,6 +130,7 @@ #define TUNER_PHILIPS_CU1216L 82 #define TUNER_NXP_TDA18271 83 #define TUNER_SONY_BTF_PXN01Z 84 +#define TUNER_PHILIPS_FQ1236_MK5 85 /* NTSC, TDA9885, no FM radio */ /* tv card specific */ #define TDA9887_PRESENT (1<<0) -- cgit v1.1 From 44a54f787c0abcf75a2ed49b8ec8b2b512468f73 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 9 Jul 2010 15:41:44 -0400 Subject: tracing: Add alignment to syscall metadata declarations For some reason if we declare a static variable and then assign it later, and the assignment contains a __attribute__((__aligned__(#))), some versions of gcc will ignore it. This caused the syscall meta data to not be compact in its section and caused a kernel oops when the section was being read. The fix for these versions of gcc seems to be to add the aligned attribute to the declaration as well. This fixes the BZ regression: https://bugzilla.kernel.org/show_bug.cgi?id=16353 Reported-by: Zeev Tarantov Tested-by: Zeev Tarantov Acked-by: Frederic Weisbecker LKML-Reference: Signed-off-by: Steven Rostedt --- include/linux/syscalls.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 7f614ce..13ebb54 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -124,7 +124,8 @@ extern struct trace_event_functions enter_syscall_print_funcs; extern struct trace_event_functions exit_syscall_print_funcs; #define SYSCALL_TRACE_ENTER_EVENT(sname) \ - static struct syscall_metadata __syscall_meta_##sname; \ + static struct syscall_metadata \ + __attribute__((__aligned__(4))) __syscall_meta_##sname; \ static struct ftrace_event_call \ __attribute__((__aligned__(4))) event_enter_##sname; \ static struct ftrace_event_call __used \ @@ -138,7 +139,8 @@ extern struct trace_event_functions exit_syscall_print_funcs; } #define SYSCALL_TRACE_EXIT_EVENT(sname) \ - static struct syscall_metadata __syscall_meta_##sname; \ + static struct syscall_metadata \ + __attribute__((__aligned__(4))) __syscall_meta_##sname; \ static struct ftrace_event_call \ __attribute__((__aligned__(4))) event_exit_##sname; \ static struct ftrace_event_call __used \ -- cgit v1.1 From 95f72d1ed41a66f1c1c29c24d479de81a0bea36f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 12 Jul 2010 14:36:09 +1000 Subject: lmb: rename to memblock via following scripts FILES=$(find * -type f | grep -vE 'oprofile|[^K]config') sed -i \ -e 's/lmb/memblock/g' \ -e 's/LMB/MEMBLOCK/g' \ $FILES for N in $(find . -name lmb.[ch]); do M=$(echo $N | sed 's/lmb/memblock/g') mv $N $M done and remove some wrong change like lmbench and dlmb etc. also move memblock.c from lib/ to mm/ Suggested-by: Ingo Molnar Acked-by: "H. Peter Anvin" Acked-by: Benjamin Herrenschmidt Acked-by: Linus Torvalds Signed-off-by: Yinghai Lu Signed-off-by: Benjamin Herrenschmidt --- include/linux/lmb.h | 89 ------------------------------------------------ include/linux/memblock.h | 89 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 89 deletions(-) delete mode 100644 include/linux/lmb.h create mode 100644 include/linux/memblock.h (limited to 'include') diff --git a/include/linux/lmb.h b/include/linux/lmb.h deleted file mode 100644 index f3d1433..0000000 --- a/include/linux/lmb.h +++ /dev/null @@ -1,89 +0,0 @@ -#ifndef _LINUX_LMB_H -#define _LINUX_LMB_H -#ifdef __KERNEL__ - -/* - * Logical memory blocks. - * - * Copyright (C) 2001 Peter Bergner, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include - -#define MAX_LMB_REGIONS 128 - -struct lmb_property { - u64 base; - u64 size; -}; - -struct lmb_region { - unsigned long cnt; - u64 size; - struct lmb_property region[MAX_LMB_REGIONS+1]; -}; - -struct lmb { - unsigned long debug; - u64 rmo_size; - struct lmb_region memory; - struct lmb_region reserved; -}; - -extern struct lmb lmb; - -extern void __init lmb_init(void); -extern void __init lmb_analyze(void); -extern long lmb_add(u64 base, u64 size); -extern long lmb_remove(u64 base, u64 size); -extern long __init lmb_free(u64 base, u64 size); -extern long __init lmb_reserve(u64 base, u64 size); -extern u64 __init lmb_alloc_nid(u64 size, u64 align, int nid, - u64 (*nid_range)(u64, u64, int *)); -extern u64 __init lmb_alloc(u64 size, u64 align); -extern u64 __init lmb_alloc_base(u64 size, - u64, u64 max_addr); -extern u64 __init __lmb_alloc_base(u64 size, - u64 align, u64 max_addr); -extern u64 __init lmb_phys_mem_size(void); -extern u64 lmb_end_of_DRAM(void); -extern void __init lmb_enforce_memory_limit(u64 memory_limit); -extern int __init lmb_is_reserved(u64 addr); -extern int lmb_is_region_reserved(u64 base, u64 size); -extern int lmb_find(struct lmb_property *res); - -extern void lmb_dump_all(void); - -static inline u64 -lmb_size_bytes(struct lmb_region *type, unsigned long region_nr) -{ - return type->region[region_nr].size; -} -static inline u64 -lmb_size_pages(struct lmb_region *type, unsigned long region_nr) -{ - return lmb_size_bytes(type, region_nr) >> PAGE_SHIFT; -} -static inline u64 -lmb_start_pfn(struct lmb_region *type, unsigned long region_nr) -{ - return type->region[region_nr].base >> PAGE_SHIFT; -} -static inline u64 -lmb_end_pfn(struct lmb_region *type, unsigned long region_nr) -{ - return lmb_start_pfn(type, region_nr) + - lmb_size_pages(type, region_nr); -} - -#include - -#endif /* __KERNEL__ */ - -#endif /* _LINUX_LMB_H */ diff --git a/include/linux/memblock.h b/include/linux/memblock.h new file mode 100644 index 0000000..a59faf2 --- /dev/null +++ b/include/linux/memblock.h @@ -0,0 +1,89 @@ +#ifndef _LINUX_MEMBLOCK_H +#define _LINUX_MEMBLOCK_H +#ifdef __KERNEL__ + +/* + * Logical memory blocks. + * + * Copyright (C) 2001 Peter Bergner, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include + +#define MAX_MEMBLOCK_REGIONS 128 + +struct memblock_property { + u64 base; + u64 size; +}; + +struct memblock_region { + unsigned long cnt; + u64 size; + struct memblock_property region[MAX_MEMBLOCK_REGIONS+1]; +}; + +struct memblock { + unsigned long debug; + u64 rmo_size; + struct memblock_region memory; + struct memblock_region reserved; +}; + +extern struct memblock memblock; + +extern void __init memblock_init(void); +extern void __init memblock_analyze(void); +extern long memblock_add(u64 base, u64 size); +extern long memblock_remove(u64 base, u64 size); +extern long __init memblock_free(u64 base, u64 size); +extern long __init memblock_reserve(u64 base, u64 size); +extern u64 __init memblock_alloc_nid(u64 size, u64 align, int nid, + u64 (*nid_range)(u64, u64, int *)); +extern u64 __init memblock_alloc(u64 size, u64 align); +extern u64 __init memblock_alloc_base(u64 size, + u64, u64 max_addr); +extern u64 __init __memblock_alloc_base(u64 size, + u64 align, u64 max_addr); +extern u64 __init memblock_phys_mem_size(void); +extern u64 memblock_end_of_DRAM(void); +extern void __init memblock_enforce_memory_limit(u64 memory_limit); +extern int __init memblock_is_reserved(u64 addr); +extern int memblock_is_region_reserved(u64 base, u64 size); +extern int memblock_find(struct memblock_property *res); + +extern void memblock_dump_all(void); + +static inline u64 +memblock_size_bytes(struct memblock_region *type, unsigned long region_nr) +{ + return type->region[region_nr].size; +} +static inline u64 +memblock_size_pages(struct memblock_region *type, unsigned long region_nr) +{ + return memblock_size_bytes(type, region_nr) >> PAGE_SHIFT; +} +static inline u64 +memblock_start_pfn(struct memblock_region *type, unsigned long region_nr) +{ + return type->region[region_nr].base >> PAGE_SHIFT; +} +static inline u64 +memblock_end_pfn(struct memblock_region *type, unsigned long region_nr) +{ + return memblock_start_pfn(type, region_nr) + + memblock_size_pages(type, region_nr); +} + +#include + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_MEMBLOCK_H */ -- cgit v1.1 From b0f77d0eae0c58a5a9691a067ada112ceeae2d00 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 14 Jul 2010 20:50:29 -0700 Subject: net: fix problem in reading sock TX queue Fix problem in reading the tx_queue recorded in a socket. In dev_pick_tx, the TX queue is read by doing a check with sk_tx_queue_recorded on the socket, followed by a sk_tx_queue_get. The problem is that there is not mutual exclusion across these calls in the socket so it it is possible that the queue in the sock can be invalidated after sk_tx_queue_recorded is called so that sk_tx_queue get returns -1, which sets 65535 in queue_index and thus dev_pick_tx returns 65536 which is a bogus queue and can cause crash in dev_queue_xmit. We fix this by only calling sk_tx_queue_get which does the proper checks. The interface is that sk_tx_queue_get returns the TX queue if the sock argument is non-NULL and TX queue is recorded, else it returns -1. sk_tx_queue_recorded is no longer used so it can be completely removed. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/sock.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 731150d5..0a691ea 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1224,12 +1224,7 @@ static inline void sk_tx_queue_clear(struct sock *sk) static inline int sk_tx_queue_get(const struct sock *sk) { - return sk->sk_tx_queue_mapping; -} - -static inline bool sk_tx_queue_recorded(const struct sock *sk) -{ - return (sk && sk->sk_tx_queue_mapping >= 0); + return sk ? sk->sk_tx_queue_mapping : -1; } static inline void sk_set_socket(struct sock *sk, struct socket *sock) -- cgit v1.1 From 13ceef099edd2b70c5a6f3a9ef5d6d97cda2e096 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 14 Jul 2010 07:56:33 +0200 Subject: jbd2/ocfs2: Fix block checksumming when a buffer is used in several transactions OCFS2 uses t_commit trigger to compute and store checksum of the just committed blocks. When a buffer has b_frozen_data, checksum is computed for it instead of b_data but this can result in an old checksum being written to the filesystem in the following scenario: 1) transaction1 is opened 2) handle1 is opened 3) journal_access(handle1, bh) - This sets jh->b_transaction to transaction1 4) modify(bh) 5) journal_dirty(handle1, bh) 6) handle1 is closed 7) start committing transaction1, opening transaction2 8) handle2 is opened 9) journal_access(handle2, bh) - This copies off b_frozen_data to make it safe for transaction1 to commit. jh->b_next_transaction is set to transaction2. 10) jbd2_journal_write_metadata() checksums b_frozen_data 11) the journal correctly writes b_frozen_data to the disk journal 12) handle2 is closed - There was no dirty call for the bh on handle2, so it is never queued for any more journal operation 13) Checkpointing finally happens, and it just spools the bh via normal buffer writeback. This will write b_data, which was never triggered on and thus contains a wrong (old) checksum. This patch fixes the problem by calling the trigger at the moment data is frozen for journal commit - i.e., either when b_frozen_data is created by do_get_write_access or just before we write a buffer to the log if b_frozen_data does not exist. We also rename the trigger to t_frozen as that better describes when it is called. Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh Signed-off-by: Joel Becker --- include/linux/jbd2.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index a4d2e9f..adf832d 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1026,11 +1026,12 @@ void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *); struct jbd2_buffer_trigger_type { /* - * Fired just before a buffer is written to the journal. - * mapped_data is a mapped buffer that is the frozen data for - * commit. + * Fired a the moment data to write to the journal are known to be + * stable - so either at the moment b_frozen_data is created or just + * before a buffer is written to the journal. mapped_data is a mapped + * buffer that is the frozen data for commit. */ - void (*t_commit)(struct jbd2_buffer_trigger_type *type, + void (*t_frozen)(struct jbd2_buffer_trigger_type *type, struct buffer_head *bh, void *mapped_data, size_t size); @@ -1042,7 +1043,7 @@ struct jbd2_buffer_trigger_type { struct buffer_head *bh); }; -extern void jbd2_buffer_commit_trigger(struct journal_head *jh, +extern void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data, struct jbd2_buffer_trigger_type *triggers); extern void jbd2_buffer_abort_trigger(struct journal_head *jh, -- cgit v1.1 From 58c84eda07560a6b75b03e8d3b26d6eddfc14011 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 15 Jul 2010 09:41:42 -0600 Subject: PCI: fall back to original BIOS BAR addresses If we fail to assign resources to a PCI BAR, this patch makes us try the original address from BIOS rather than leaving it disabled. Linux tries to make sure all PCI device BARs are inside the upstream PCI host bridge or P2P bridge apertures, reassigning BARs if necessary. Windows does similar reassignment. Before this patch, if we could not move a BAR into an aperture, we left the resource unassigned, i.e., at address zero. Windows leaves such BARs at the original BIOS addresses, and this patch makes Linux do the same. This is a bit ugly because we disable the resource long before we try to reassign it, so we have to keep track of the BIOS BAR address somewhere. For lack of a better place, I put it in the struct pci_dev. I think it would be cleaner to attempt the assignment immediately when the claim fails, so we could easily remember the original address. But we currently claim motherboard resources in the middle, after attempting to claim PCI resources and before assigning new PCI resources, and changing that is a fairly big job. Addresses https://bugzilla.kernel.org/show_bug.cgi?id=16263 Reported-by: Andrew Tested-by: Andrew Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 7cb0084..f26fda7 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -288,6 +288,7 @@ struct pci_dev { */ unsigned int irq; struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */ + resource_size_t fw_addr[DEVICE_COUNT_RESOURCE]; /* FW-assigned addr */ /* These fields are used by common fixups */ unsigned int transparent:1; /* Transparent PCI bridge */ -- cgit v1.1 From 7f8275d0d660c146de6ee3017e1e2e594c49e820 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 19 Jul 2010 14:56:17 +1000 Subject: mm: add context argument to shrinker callback The current shrinker implementation requires the registered callback to have global state to work from. This makes it difficult to shrink caches that are not global (e.g. per-filesystem caches). Pass the shrinker structure to the callback so that users can embed the shrinker structure in the context the shrinker needs to operate on and get back to it in the callback via container_of(). Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index b969efb..a2b4804 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -999,7 +999,7 @@ static inline void sync_mm_rss(struct task_struct *task, struct mm_struct *mm) * querying the cache size, so a fastpath for that case is appropriate. */ struct shrinker { - int (*shrink)(int nr_to_scan, gfp_t gfp_mask); + int (*shrink)(struct shrinker *, int nr_to_scan, gfp_t gfp_mask); int seeks; /* seeks to recreate an obj */ /* These are for internal use */ -- cgit v1.1 From 772a2f9b488f4d27c314da5eeabde750b9ead41b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 15 Jul 2010 10:39:47 +0200 Subject: fb: handle allocation failure in alloc_apertures() If the kzalloc() fails we should return NULL. All the places that call alloc_apertures() check for this already. Signed-off-by: Dan Carpenter Acked-by: James Simmons Acked-by: Marcin Slusarz Signed-off-by: Dave Airlie --- include/linux/fb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/fb.h b/include/linux/fb.h index 8e5a9df..e7445df 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -873,6 +873,8 @@ struct fb_info { static inline struct apertures_struct *alloc_apertures(unsigned int max_num) { struct apertures_struct *a = kzalloc(sizeof(struct apertures_struct) + max_num * sizeof(struct aperture), GFP_KERNEL); + if (!a) + return NULL; a->count = max_num; return a; } -- cgit v1.1 From 844b9a8707f1fcf0482e0c52f44a555e799ccda6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 20 Jul 2010 13:24:34 -0700 Subject: vfs: fix RCU-lockdep false positive due to /proc If a single-threaded process does a file-descriptor operation, and some other process accesses that same file descriptor via /proc, the current rcu_dereference_check_fdtable() can give a false-positive RCU-lockdep splat due to the reference count being increased by the /proc access after the reference-count check in fget_light() but before the check in rcu_dereference_check_fdtable(). This commit prevents this false positive by checking for a single-threaded process. To avoid #include hell, this commit uses the wrapper for thread_group_empty(current) defined by rcu_my_thread_group_empty() provided in a separate commit. Located-by: Miles Lane Located-by: Eric Dumazet Signed-off-by: Paul E. McKenney Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fdtable.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 013dc52..d147461 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -61,7 +61,8 @@ struct files_struct { (rcu_dereference_check((fdtfd), \ rcu_read_lock_held() || \ lockdep_is_held(&(files)->file_lock) || \ - atomic_read(&(files)->count) == 1)) + atomic_read(&(files)->count) == 1 || \ + rcu_my_thread_group_empty())) #define files_fdtable(files) \ (rcu_dereference_check_fdtable((files), (files)->fdt)) -- cgit v1.1 From a6a1a095ec8ace2912fc280d371eee8ff5da5736 Mon Sep 17 00:00:00 2001 From: Doug Goldstein Date: Tue, 20 Jul 2010 15:22:25 -0700 Subject: include/linux/vgaarb.h: add missing part of include guard vgaarb.h was missing the #define of the #ifndef at the top for the guard to prevent multiple #include's from causing re-define errors Signed-off-by: Doug Goldstein Cc: Dave Airlie Cc: Jesse Barnes Signed-off-by: Andrew Morton Signed-off-by: Dave Airlie --- include/linux/vgaarb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h index c9a9759..814f294 100644 --- a/include/linux/vgaarb.h +++ b/include/linux/vgaarb.h @@ -29,6 +29,7 @@ */ #ifndef LINUX_VGA_H +#define LINUX_VGA_H #include -- cgit v1.1 From f8324e20f8289dffc646d64366332e05eaacab25 Mon Sep 17 00:00:00 2001 From: Mikael Pettersson Date: Tue, 20 Jul 2010 18:45:14 -0700 Subject: math-emu: correct test for downshifting fraction in _FP_FROM_INT() The kernel's math-emu code contains a macro _FP_FROM_INT() which is used to convert an integer to a raw normalized floating-point value. It does this basically in three steps: 1. Compute the exponent from the number of leading zero bits. 2. Downshift large fractions to put the MSB in the right position for normalized fractions. 3. Upshift small fractions to put the MSB in the right position. There is an boundary error in step 2, causing a fraction with its MSB exactly one bit above the normalized MSB position to not be downshifted. This results in a non-normalized raw float, which when packed becomes a massively inaccurate representation for that input. The impact of this depends on a number of arch-specific factors, but it is known to have broken emulation of FXTOD instructions on UltraSPARC III, which was originally reported as GCC bug 44631 . Any arch which uses math-emu to emulate conversions from integers to same-size floats may be affected. The fix is simple: the exponent comparison used to determine if the fraction should be downshifted must be "<=" not "<". I'm sending a kernel module to test this as a reply to this message. There are also SPARC user-space test cases in the GCC bug entry. Signed-off-by: Mikael Pettersson Signed-off-by: David S. Miller --- include/math-emu/op-common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/math-emu/op-common.h b/include/math-emu/op-common.h index fd88226..9696a5e 100644 --- a/include/math-emu/op-common.h +++ b/include/math-emu/op-common.h @@ -799,7 +799,7 @@ do { \ X##_e -= (_FP_W_TYPE_SIZE - rsize); \ X##_e = rsize - X##_e - 1; \ \ - if (_FP_FRACBITS_##fs < rsize && _FP_WFRACBITS_##fs < X##_e) \ + if (_FP_FRACBITS_##fs < rsize && _FP_WFRACBITS_##fs <= X##_e) \ __FP_FRAC_SRS_1(ur_, (X##_e - _FP_WFRACBITS_##fs + 1), rsize);\ _FP_FRAC_DISASSEMBLE_##wc(X, ur_, rsize); \ if ((_FP_WFRACBITS_##fs - X##_e - 1) > 0) \ -- cgit v1.1 From edd63cb6b91024332d6983fc51058ac1ef0c081e Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Wed, 21 Jul 2010 19:27:07 -0500 Subject: sysrq,kdb: Use __handle_sysrq() for kdb's sysrq function The kdb code should not toggle the sysrq state in case an end user wants to try and resume the normal kernel execution. Signed-off-by: Jason Wessel Acked-by: Dmitry Torokhov --- include/linux/sysrq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h index 4496322..609e8ca 100644 --- a/include/linux/sysrq.h +++ b/include/linux/sysrq.h @@ -45,6 +45,7 @@ struct sysrq_key_op { */ void handle_sysrq(int key, struct tty_struct *tty); +void __handle_sysrq(int key, struct tty_struct *tty, int check_mask); int register_sysrq_key(int key, struct sysrq_key_op *op); int unregister_sysrq_key(int key, struct sysrq_key_op *op); struct sysrq_key_op *__sysrq_get_key_op(int key); -- cgit v1.1