From 1080d709fb9d8cd4392f93476ee46a9d6ea05a5b Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 27 Oct 2008 12:28:25 -0700 Subject: net: implement emergency route cache rebulds when gc_elasticity is exceeded This is a patch to provide on demand route cache rebuilding. Currently, our route cache is rebulid periodically regardless of need. This introduced unneeded periodic latency. This patch offers a better approach. Using code provided by Eric Dumazet, we compute the standard deviation of the average hash bucket chain length while running rt_check_expire. Should any given chain length grow to larger that average plus 4 standard deviations, we trigger an emergency hash table rebuild for that net namespace. This allows for the common case in which chains are well behaved and do not grow unevenly to not incur any latency at all, while those systems (which may be being maliciously attacked), only rebuild when the attack is detected. This patch take 2 other factors into account: 1) chains with multiple entries that differ by attributes that do not affect the hash value are only counted once, so as not to unduly bias system to rebuilding if features like QOS are heavily used 2) if rebuilding crosses a certain threshold (which is adjustable via the added sysctl in this patch), route caching is disabled entirely for that net namespace, since constant rebuilding is less efficient that no caching at all Tested successfully by me. Signed-off-by: Neil Horman Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index ece1c92..977f482 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -49,6 +49,8 @@ struct netns_ipv4 { int sysctl_icmp_ratelimit; int sysctl_icmp_ratemask; int sysctl_icmp_errors_use_inbound_ifaddr; + int sysctl_rt_cache_rebuild_count; + int current_rt_cache_rebuild_count; struct timer_list rt_secret_timer; atomic_t rt_genid; -- cgit v1.1 From b057efd4d226fcc3a92b0dc6d8ea8e8185ecb260 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 28 Oct 2008 11:59:11 -0700 Subject: netlink: constify struct nlattr * arg to parsing functions Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netlink.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/netlink.h b/include/net/netlink.h index 3643bbb..46b7764 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -233,7 +233,7 @@ extern int nla_parse(struct nlattr *tb[], int maxtype, extern struct nlattr * nla_find(struct nlattr *head, int len, int attrtype); extern size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize); -extern int nla_memcpy(void *dest, struct nlattr *src, int count); +extern int nla_memcpy(void *dest, const struct nlattr *src, int count); extern int nla_memcmp(const struct nlattr *nla, const void *data, size_t size); extern int nla_strcmp(const struct nlattr *nla, const char *str); @@ -741,7 +741,7 @@ static inline struct nlattr *nla_find_nested(struct nlattr *nla, int attrtype) * See nla_parse() */ static inline int nla_parse_nested(struct nlattr *tb[], int maxtype, - struct nlattr *nla, + const struct nlattr *nla, const struct nla_policy *policy) { return nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy); @@ -875,7 +875,7 @@ static inline int nla_put_msecs(struct sk_buff *skb, int attrtype, * nla_get_u32 - return payload of u32 attribute * @nla: u32 netlink attribute */ -static inline u32 nla_get_u32(struct nlattr *nla) +static inline u32 nla_get_u32(const struct nlattr *nla) { return *(u32 *) nla_data(nla); } @@ -884,7 +884,7 @@ static inline u32 nla_get_u32(struct nlattr *nla) * nla_get_be32 - return payload of __be32 attribute * @nla: __be32 netlink attribute */ -static inline __be32 nla_get_be32(struct nlattr *nla) +static inline __be32 nla_get_be32(const struct nlattr *nla) { return *(__be32 *) nla_data(nla); } @@ -893,7 +893,7 @@ static inline __be32 nla_get_be32(struct nlattr *nla) * nla_get_u16 - return payload of u16 attribute * @nla: u16 netlink attribute */ -static inline u16 nla_get_u16(struct nlattr *nla) +static inline u16 nla_get_u16(const struct nlattr *nla) { return *(u16 *) nla_data(nla); } @@ -902,7 +902,7 @@ static inline u16 nla_get_u16(struct nlattr *nla) * nla_get_be16 - return payload of __be16 attribute * @nla: __be16 netlink attribute */ -static inline __be16 nla_get_be16(struct nlattr *nla) +static inline __be16 nla_get_be16(const struct nlattr *nla) { return *(__be16 *) nla_data(nla); } @@ -911,7 +911,7 @@ static inline __be16 nla_get_be16(struct nlattr *nla) * nla_get_le16 - return payload of __le16 attribute * @nla: __le16 netlink attribute */ -static inline __le16 nla_get_le16(struct nlattr *nla) +static inline __le16 nla_get_le16(const struct nlattr *nla) { return *(__le16 *) nla_data(nla); } @@ -920,7 +920,7 @@ static inline __le16 nla_get_le16(struct nlattr *nla) * nla_get_u8 - return payload of u8 attribute * @nla: u8 netlink attribute */ -static inline u8 nla_get_u8(struct nlattr *nla) +static inline u8 nla_get_u8(const struct nlattr *nla) { return *(u8 *) nla_data(nla); } @@ -929,7 +929,7 @@ static inline u8 nla_get_u8(struct nlattr *nla) * nla_get_u64 - return payload of u64 attribute * @nla: u64 netlink attribute */ -static inline u64 nla_get_u64(struct nlattr *nla) +static inline u64 nla_get_u64(const struct nlattr *nla) { u64 tmp; @@ -942,7 +942,7 @@ static inline u64 nla_get_u64(struct nlattr *nla) * nla_get_flag - return payload of flag attribute * @nla: flag netlink attribute */ -static inline int nla_get_flag(struct nlattr *nla) +static inline int nla_get_flag(const struct nlattr *nla) { return !!nla; } @@ -953,7 +953,7 @@ static inline int nla_get_flag(struct nlattr *nla) * * Returns the number of milliseconds in jiffies. */ -static inline unsigned long nla_get_msecs(struct nlattr *nla) +static inline unsigned long nla_get_msecs(const struct nlattr *nla) { u64 msecs = nla_get_u64(nla); -- cgit v1.1 From def8b4faff5ca349beafbbfeb2c51f3602a6ef3a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 28 Oct 2008 13:24:06 -0700 Subject: net: reduce structures when XFRM=n ifdef out * struct sk_buff::sp (pointer) * struct dst_entry::xfrm (pointer) * struct sock::sk_policy (2 pointers) Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/skbuff.h | 15 ++++++++++++++- include/net/dst.h | 3 ++- include/net/sock.h | 2 ++ include/net/xfrm.h | 4 ++++ 4 files changed, 22 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2725f4e..487e345 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -269,8 +269,9 @@ struct sk_buff { struct dst_entry *dst; struct rtable *rtable; }; +#ifdef CONFIG_XFRM struct sec_path *sp; - +#endif /* * This is the control buffer. It is free to use for every * layer. Please put your private variables there. If you @@ -1864,6 +1865,18 @@ static inline void skb_copy_queue_mapping(struct sk_buff *to, const struct sk_bu to->queue_mapping = from->queue_mapping; } +#ifdef CONFIG_XFRM +static inline struct sec_path *skb_sec_path(struct sk_buff *skb) +{ + return skb->sp; +} +#else +static inline struct sec_path *skb_sec_path(struct sk_buff *skb) +{ + return NULL; +} +#endif + static inline int skb_is_gso(const struct sk_buff *skb) { return skb_shinfo(skb)->gso_size; diff --git a/include/net/dst.h b/include/net/dst.h index 8a8b71e..f96c4ba 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -59,8 +59,9 @@ struct dst_entry struct neighbour *neighbour; struct hh_cache *hh; +#ifdef CONFIG_XFRM struct xfrm_state *xfrm; - +#endif int (*input)(struct sk_buff*); int (*output)(struct sk_buff*); diff --git a/include/net/sock.h b/include/net/sock.h index ada50c0..d6b750a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -229,7 +229,9 @@ struct sock { } sk_backlog; wait_queue_head_t *sk_sleep; struct dst_entry *sk_dst_cache; +#ifdef CONFIG_XFRM struct xfrm_policy *sk_policy[2]; +#endif rwlock_t sk_dst_lock; atomic_t sk_rmem_alloc; atomic_t sk_wmem_alloc; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 11c890a..f2c5ba2 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -882,6 +882,7 @@ struct xfrm_dst u32 path_cookie; }; +#ifdef CONFIG_XFRM static inline void xfrm_dst_destroy(struct xfrm_dst *xdst) { dst_release(xdst->route); @@ -894,6 +895,7 @@ static inline void xfrm_dst_destroy(struct xfrm_dst *xdst) xdst->partner = NULL; #endif } +#endif extern void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev); @@ -1536,9 +1538,11 @@ static inline void xfrm_states_delete(struct xfrm_state **states, int n) } #endif +#ifdef CONFIG_XFRM static inline struct xfrm_state *xfrm_input_state(struct sk_buff *skb) { return skb->sp->xvec[skb->sp->len - 1]; } +#endif #endif /* _NET_XFRM_H */ -- cgit v1.1 From 3a2dfbe8acb154905fdc2fd03ec56df42e6c4cc4 Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Tue, 28 Oct 2008 16:01:07 -0700 Subject: xfrm: Notify changes in UDP encapsulation via netlink Add new_mapping() implementation to the netlink xfrm_mgr to notify address/port changes detected in UDP encapsulated ESP packets. Signed-off-by: Martin Willi Signed-off-by: David S. Miller --- include/linux/xfrm.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 4bc1e6b..52f3abd 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -199,6 +199,9 @@ enum { #define XFRM_MSG_NEWSPDINFO XFRM_MSG_NEWSPDINFO XFRM_MSG_GETSPDINFO, #define XFRM_MSG_GETSPDINFO XFRM_MSG_GETSPDINFO + + XFRM_MSG_MAPPING, +#define XFRM_MSG_MAPPING XFRM_MSG_MAPPING __XFRM_MSG_MAX }; #define XFRM_MSG_MAX (__XFRM_MSG_MAX - 1) @@ -438,6 +441,15 @@ struct xfrm_user_migrate { __u16 new_family; }; +struct xfrm_user_mapping { + struct xfrm_usersa_id id; + __u32 reqid; + xfrm_address_t old_saddr; + xfrm_address_t new_saddr; + __be16 old_sport; + __be16 new_sport; +}; + #ifndef __KERNEL__ /* backwards compatibility for userspace */ #define XFRMGRP_ACQUIRE 1 @@ -464,6 +476,8 @@ enum xfrm_nlgroups { #define XFRMNLGRP_REPORT XFRMNLGRP_REPORT XFRMNLGRP_MIGRATE, #define XFRMNLGRP_MIGRATE XFRMNLGRP_MIGRATE + XFRMNLGRP_MAPPING, +#define XFRMNLGRP_MAPPING XFRMNLGRP_MAPPING __XFRMNLGRP_MAX }; #define XFRMNLGRP_MAX (__XFRMNLGRP_MAX - 1) -- cgit v1.1 From 0c6ce78abf6e228d44c3840edb8a4ae0c1299825 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 28 Oct 2008 16:09:23 -0700 Subject: net: replace uses of NIP6_FMT with %p6 Signed-off-by: Harvey Harrison Signed-off-by: David S. Miller --- include/linux/sunrpc/svc_xprt.h | 4 ++-- include/net/ip_vs.h | 4 ++-- include/net/netfilter/nf_conntrack_tuple.h | 6 +++--- include/net/sctp/sctp.h | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 6fd7b01..42e01c9 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -145,8 +145,8 @@ static inline char *__svc_print_addr(struct sockaddr *addr, break; case AF_INET6: - snprintf(buf, len, "%x:%x:%x:%x:%x:%x:%x:%x, port=%u", - NIP6(((struct sockaddr_in6 *) addr)->sin6_addr), + snprintf(buf, len, "%p6, port=%u", + &((struct sockaddr_in6 *)addr)->sin6_addr, ntohs(((struct sockaddr_in6 *) addr)->sin6_port)); break; diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index fe9fcf7..6a66920 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -87,8 +87,8 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len, int len; #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) - len = snprintf(&buf[*idx], buf_len - *idx, "[" NIP6_FMT "]", - NIP6(addr->in6)) + 1; + len = snprintf(&buf[*idx], buf_len - *idx, "[%p6]", + &addr->in6) + 1; else #endif len = snprintf(&buf[*idx], buf_len - *idx, NIPQUAD_FMT, diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h index a6874ba2..303efaf 100644 --- a/include/net/netfilter/nf_conntrack_tuple.h +++ b/include/net/netfilter/nf_conntrack_tuple.h @@ -122,10 +122,10 @@ static inline void nf_ct_dump_tuple_ip(const struct nf_conntrack_tuple *t) static inline void nf_ct_dump_tuple_ipv6(const struct nf_conntrack_tuple *t) { #ifdef DEBUG - printk("tuple %p: %u " NIP6_FMT " %hu -> " NIP6_FMT " %hu\n", + printk("tuple %p: %u %p6 %hu -> %p6 %hu\n", t, t->dst.protonum, - NIP6(*(struct in6_addr *)t->src.u3.all), ntohs(t->src.u.all), - NIP6(*(struct in6_addr *)t->dst.u3.all), ntohs(t->dst.u.all)); + t->src.u3.all, ntohs(t->src.u.all), + t->dst.u3.all, ntohs(t->dst.u.all)); #endif } diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index ed71b11..a84c397 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -285,9 +285,9 @@ extern int sctp_debug_flag; if (sctp_debug_flag) { \ if (saddr->sa.sa_family == AF_INET6) { \ printk(KERN_DEBUG \ - lead NIP6_FMT trail, \ + lead "%p6" trail, \ leadparm, \ - NIP6(saddr->v6.sin6_addr), \ + &saddr->v6.sin6_addr, \ otherparms); \ } else { \ printk(KERN_DEBUG \ -- cgit v1.1 From b189db5d299c6824780af5590564ff608adb3dea Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 28 Oct 2008 22:38:52 -0700 Subject: net: remove NIP6(), NIP6_FMT, NIP6_SEQFMT and final users Open code NIP6_FMT in the one call inside sscanf and one user of NIP6() that could use %p6 in the netfilter code. Signed-off-by: Harvey Harrison Signed-off-by: David S. Miller --- include/linux/kernel.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 396a350..77777c4 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -357,18 +357,6 @@ static inline char *pack_hex_byte(char *buf, u8 byte) ((unsigned char *)&addr)[3] #define NIPQUAD_FMT "%u.%u.%u.%u" -#define NIP6(addr) \ - ntohs((addr).s6_addr16[0]), \ - ntohs((addr).s6_addr16[1]), \ - ntohs((addr).s6_addr16[2]), \ - ntohs((addr).s6_addr16[3]), \ - ntohs((addr).s6_addr16[4]), \ - ntohs((addr).s6_addr16[5]), \ - ntohs((addr).s6_addr16[6]), \ - ntohs((addr).s6_addr16[7]) -#define NIP6_FMT "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x" -#define NIP6_SEQFMT "%04x%04x%04x%04x%04x%04x%04x%04x" - #if defined(__LITTLE_ENDIAN) #define HIPQUAD(addr) \ ((unsigned char *)&addr)[3], \ -- cgit v1.1 From 645ca708f936b2fbeb79e52d7823e3eb2c0905f8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 29 Oct 2008 01:41:45 -0700 Subject: udp: introduce struct udp_table and multiple spinlocks UDP sockets are hashed in a 128 slots hash table. This hash table is protected by *one* rwlock. This rwlock is readlocked each time an incoming UDP message is handled. This rwlock is writelocked each time a socket must be inserted in hash table (bind time), or deleted from this table (close time) This is not scalable on SMP machines : 1) Even in read mode, lock() and unlock() are atomic operations and must dirty a contended cache line, shared by all cpus. 2) A writer might be starved if many readers are 'in flight'. This can happen on a machine with some NIC receiving many UDP messages. User process can be delayed a long time at socket creation/dismantle time. This patch prepares RCU migration, by introducing 'struct udp_table and struct udp_hslot', and using one spinlock per chain, to reduce contention on central rwlock. Introducing one spinlock per chain reduces latencies, for port randomization on heavily loaded UDP servers. This also speedup bindings to specific ports. udp_lib_unhash() was uninlined, becoming to big. Some cleanups were done to ease review of following patch (RCUification of UDP Unicast lookups) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 2 +- include/net/udp.h | 25 ++++++++++++------------- include/net/udplite.h | 2 +- 3 files changed, 14 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index d6b750a..d200dfb 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -599,7 +599,7 @@ struct proto { union { struct inet_hashinfo *hashinfo; - struct hlist_head *udp_hash; + struct udp_table *udp_table; struct raw_hashinfo *raw_hash; } h; diff --git a/include/net/udp.h b/include/net/udp.h index 1e20509..df2bfe5 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -50,8 +50,15 @@ struct udp_skb_cb { }; #define UDP_SKB_CB(__skb) ((struct udp_skb_cb *)((__skb)->cb)) -extern struct hlist_head udp_hash[UDP_HTABLE_SIZE]; -extern rwlock_t udp_hash_lock; +struct udp_hslot { + struct hlist_head head; + spinlock_t lock; +} __attribute__((aligned(2 * sizeof(long)))); +struct udp_table { + struct udp_hslot hash[UDP_HTABLE_SIZE]; +}; +extern struct udp_table udp_table; +extern void udp_table_init(struct udp_table *); /* Note: this must match 'valbool' in sock_setsockopt */ @@ -110,15 +117,7 @@ static inline void udp_lib_hash(struct sock *sk) BUG(); } -static inline void udp_lib_unhash(struct sock *sk) -{ - write_lock_bh(&udp_hash_lock); - if (sk_del_node_init(sk)) { - inet_sk(sk)->num = 0; - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - } - write_unlock_bh(&udp_hash_lock); -} +extern void udp_lib_unhash(struct sock *sk); static inline void udp_lib_close(struct sock *sk, long timeout) { @@ -187,7 +186,7 @@ extern struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, struct udp_seq_afinfo { char *name; sa_family_t family; - struct hlist_head *hashtable; + struct udp_table *udp_table; struct file_operations seq_fops; struct seq_operations seq_ops; }; @@ -196,7 +195,7 @@ struct udp_iter_state { struct seq_net_private p; sa_family_t family; int bucket; - struct hlist_head *hashtable; + struct udp_table *udp_table; }; #ifdef CONFIG_PROC_FS diff --git a/include/net/udplite.h b/include/net/udplite.h index b76b2e3..afdffe6 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -11,7 +11,7 @@ #define UDPLITE_RECV_CSCOV 11 /* receiver partial coverage (threshold ) */ extern struct proto udplite_prot; -extern struct hlist_head udplite_hash[UDP_HTABLE_SIZE]; +extern struct udp_table udplite_table; /* * Checksum computation is all in software, hence simpler getfrag. -- cgit v1.1 From 271b72c7fa82c2c7a795bc16896149933110672d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 29 Oct 2008 02:11:14 -0700 Subject: udp: RCU handling for Unicast packets. Goals are : 1) Optimizing handling of incoming Unicast UDP frames, so that no memory writes should happen in the fast path. Note: Multicasts and broadcasts still will need to take a lock, because doing a full lockless lookup in this case is difficult. 2) No expensive operations in the socket bind/unhash phases : - No expensive synchronize_rcu() calls. - No added rcu_head in socket structure, increasing memory needs, but more important, forcing us to use call_rcu() calls, that have the bad property of making sockets structure cold. (rcu grace period between socket freeing and its potential reuse make this socket being cold in CPU cache). David did a previous patch using call_rcu() and noticed a 20% impact on TCP connection rates. Quoting Cristopher Lameter : "Right. That results in cacheline cooldown. You'd want to recycle the object as they are cache hot on a per cpu basis. That is screwed up by the delayed regular rcu processing. We have seen multiple regressions due to cacheline cooldown. The only choice in cacheline hot sensitive areas is to deal with the complexity that comes with SLAB_DESTROY_BY_RCU or give up on RCU." - Because udp sockets are allocated from dedicated kmem_cache, use of SLAB_DESTROY_BY_RCU can help here. Theory of operation : --------------------- As the lookup is lockfree (using rcu_read_lock()/rcu_read_unlock()), special attention must be taken by readers and writers. Use of SLAB_DESTROY_BY_RCU is tricky too, because a socket can be freed, reused, inserted in a different chain or in worst case in the same chain while readers could do lookups in the same time. In order to avoid loops, a reader must check each socket found in a chain really belongs to the chain the reader was traversing. If it finds a mismatch, lookup must start again at the begining. This *restart* loop is the reason we had to use rdlock for the multicast case, because we dont want to send same message several times to the same socket. We use RCU only for fast path. Thus, /proc/net/udp still takes spinlocks. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index d200dfb..0bea25d 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -363,6 +363,27 @@ static __inline__ int sk_del_node_init(struct sock *sk) return rc; } +static __inline__ int __sk_del_node_init_rcu(struct sock *sk) +{ + if (sk_hashed(sk)) { + hlist_del_init_rcu(&sk->sk_node); + return 1; + } + return 0; +} + +static __inline__ int sk_del_node_init_rcu(struct sock *sk) +{ + int rc = __sk_del_node_init_rcu(sk); + + if (rc) { + /* paranoid for a while -acme */ + WARN_ON(atomic_read(&sk->sk_refcnt) == 1); + __sock_put(sk); + } + return rc; +} + static __inline__ void __sk_add_node(struct sock *sk, struct hlist_head *list) { hlist_add_head(&sk->sk_node, list); @@ -374,6 +395,17 @@ static __inline__ void sk_add_node(struct sock *sk, struct hlist_head *list) __sk_add_node(sk, list); } +static __inline__ void __sk_add_node_rcu(struct sock *sk, struct hlist_head *list) +{ + hlist_add_head_rcu(&sk->sk_node, list); +} + +static __inline__ void sk_add_node_rcu(struct sock *sk, struct hlist_head *list) +{ + sock_hold(sk); + __sk_add_node_rcu(sk, list); +} + static __inline__ void __sk_del_bind_node(struct sock *sk) { __hlist_del(&sk->sk_bind_node); @@ -387,6 +419,8 @@ static __inline__ void sk_add_bind_node(struct sock *sk, #define sk_for_each(__sk, node, list) \ hlist_for_each_entry(__sk, node, list, sk_node) +#define sk_for_each_rcu(__sk, node, list) \ + hlist_for_each_entry_rcu(__sk, node, list, sk_node) #define sk_for_each_from(__sk, node) \ if (__sk && ({ node = &(__sk)->sk_node; 1; })) \ hlist_for_each_entry_from(__sk, node, sk_node) @@ -589,8 +623,9 @@ struct proto { int *sysctl_rmem; int max_header; - struct kmem_cache *slab; + struct kmem_cache *slab; unsigned int obj_size; + int slab_flags; atomic_t *orphan_count; -- cgit v1.1 From 96631ed16c514cf8b28fab991a076985ce378c26 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 29 Oct 2008 11:19:58 -0700 Subject: udp: introduce sk_for_each_rcu_safenext() Corey Minyard found a race added in commit 271b72c7fa82c2c7a795bc16896149933110672d (udp: RCU handling for Unicast packets.) "If the socket is moved from one list to another list in-between the time the hash is calculated and the next field is accessed, and the socket has moved to the end of the new list, the traversal will not complete properly on the list it should have, since the socket will be on the end of the new list and there's not a way to tell it's on a new list and restart the list traversal. I think that this can be solved by pre-fetching the "next" field (with proper barriers) before checking the hash." This patch corrects this problem, introducing a new sk_for_each_rcu_safenext() macro. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/rculist.h | 17 +++++++++++++++++ include/net/sock.h | 4 ++-- 2 files changed, 19 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index e649bd3..3ba2998 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -383,5 +383,22 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ pos = rcu_dereference(pos->next)) +/** + * hlist_for_each_entry_rcu_safenext - iterate over rcu list of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_node to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + * @next: the &struct hlist_node to use as a next cursor + * + * Special version of hlist_for_each_entry_rcu that make sure + * each next pointer is fetched before each iteration. + */ +#define hlist_for_each_entry_rcu_safenext(tpos, pos, head, member, next) \ + for (pos = rcu_dereference((head)->first); \ + pos && ({ next = pos->next; smp_rmb(); prefetch(next); 1; }) && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ + pos = rcu_dereference(next)) + #endif /* __KERNEL__ */ #endif diff --git a/include/net/sock.h b/include/net/sock.h index 0bea25d..a4f6d3f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -419,8 +419,8 @@ static __inline__ void sk_add_bind_node(struct sock *sk, #define sk_for_each(__sk, node, list) \ hlist_for_each_entry(__sk, node, list, sk_node) -#define sk_for_each_rcu(__sk, node, list) \ - hlist_for_each_entry_rcu(__sk, node, list, sk_node) +#define sk_for_each_rcu_safenext(__sk, node, list, next) \ + hlist_for_each_entry_rcu_safenext(__sk, node, list, sk_node, next) #define sk_for_each_from(__sk, node) \ if (__sk && ({ node = &(__sk)->sk_node; 1; })) \ hlist_for_each_entry_from(__sk, node, sk_node) -- cgit v1.1 From 5b095d98928fdb9e3b75be20a54b7a6cbf6ca9ad Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Wed, 29 Oct 2008 12:52:50 -0700 Subject: net: replace %p6 with %pI6 Signed-off-by: Harvey Harrison Signed-off-by: David S. Miller --- include/linux/sunrpc/svc_xprt.h | 2 +- include/net/ip_vs.h | 2 +- include/net/netfilter/nf_conntrack_tuple.h | 2 +- include/net/sctp/sctp.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 42e01c9..51cb75e 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -145,7 +145,7 @@ static inline char *__svc_print_addr(struct sockaddr *addr, break; case AF_INET6: - snprintf(buf, len, "%p6, port=%u", + snprintf(buf, len, "%pI6, port=%u", &((struct sockaddr_in6 *)addr)->sin6_addr, ntohs(((struct sockaddr_in6 *) addr)->sin6_port)); break; diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 6a66920..af48cad 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -87,7 +87,7 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len, int len; #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) - len = snprintf(&buf[*idx], buf_len - *idx, "[%p6]", + len = snprintf(&buf[*idx], buf_len - *idx, "[%pI6]", &addr->in6) + 1; else #endif diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h index 303efaf..42f1fc9 100644 --- a/include/net/netfilter/nf_conntrack_tuple.h +++ b/include/net/netfilter/nf_conntrack_tuple.h @@ -122,7 +122,7 @@ static inline void nf_ct_dump_tuple_ip(const struct nf_conntrack_tuple *t) static inline void nf_ct_dump_tuple_ipv6(const struct nf_conntrack_tuple *t) { #ifdef DEBUG - printk("tuple %p: %u %p6 %hu -> %p6 %hu\n", + printk("tuple %p: %u %pI6 %hu -> %pI6 %hu\n", t, t->dst.protonum, t->src.u3.all, ntohs(t->src.u.all), t->dst.u3.all, ntohs(t->dst.u.all)); diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index a84c397..e71b0f7 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -285,7 +285,7 @@ extern int sctp_debug_flag; if (sctp_debug_flag) { \ if (saddr->sa.sa_family == AF_INET6) { \ printk(KERN_DEBUG \ - lead "%p6" trail, \ + lead "%pI6" trail, \ leadparm, \ &saddr->v6.sin6_addr, \ otherparms); \ -- cgit v1.1 From cc0fe83525d734bdd9c883b45eca6bb22f286daa Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 31 Oct 2008 00:42:25 -0700 Subject: xfrm: remove unused struct xfrm_policy::next Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index f2c5ba2..45e11b3 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -467,7 +467,6 @@ struct xfrm_policy_walk { struct xfrm_policy { - struct xfrm_policy *next; struct hlist_node bydst; struct hlist_node byidx; -- cgit v1.1 From 90d841fd0a5e02affd4e2bbdde4f710c61599281 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Fri, 31 Oct 2008 00:43:45 -0700 Subject: pkt_sched: sch_generic: Add Qdisc_ops peek() method. Add Qdisc_ops peek() method in order to replace requeuing. Based on ideas and patches of Herbert Xu, Patrick McHardy and David S. Miller. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- include/net/sch_generic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 3fe49d8..f81f7c4 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -111,6 +111,7 @@ struct Qdisc_ops int (*enqueue)(struct sk_buff *, struct Qdisc *); struct sk_buff * (*dequeue)(struct Qdisc *); + struct sk_buff * (*peek)(struct Qdisc *); int (*requeue)(struct sk_buff *, struct Qdisc *); unsigned int (*drop)(struct Qdisc *); -- cgit v1.1 From 48a8f519e0fe22a5c98523286b2a120841a11dd5 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 31 Oct 2008 00:44:18 -0700 Subject: pkt_sched: Add ->peek() methods for fifo, prio and SFQ qdiscs. From: Patrick McHardy Just as a demonstration how easy adding a peek operation to the work-conserving qdiscs actually is. It doesn't need to keep or change any internal state in many cases thanks to the guarantee that the packet will either be dequeued or, if another packet arrives, the upper qdisc will immediately ->peek again to reevaluate the state. (This is only slightly modified Patrick's patch.) Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- include/net/sch_generic.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index f81f7c4..da6839a 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -433,6 +433,11 @@ static inline struct sk_buff *qdisc_dequeue_tail(struct Qdisc *sch) return __qdisc_dequeue_tail(sch, &sch->q); } +static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch) +{ + return skb_peek(&sch->q); +} + static inline int __qdisc_requeue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff_head *list) { -- cgit v1.1 From 77be155cba4e163e8bba9fd27222a8b6189ec4f7 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Fri, 31 Oct 2008 00:47:01 -0700 Subject: pkt_sched: Add peek emulation for non-work-conserving qdiscs. This patch adds qdisc_peek_dequeued() wrapper to emulate peek method with qdisc->dequeue() and storing "peeked" skb in qdisc->gso_skb until dequeuing. This is mainly for compatibility reasons not to break some strange configs because peeking is expected for non-work-conserving parent qdiscs to query work-conserving child qdiscs. This implementation requires using qdisc_dequeue_peeked() wrapper instead of directly calling qdisc->dequeue() for all qdiscs ever querried with qdisc->ops->peek() or qdisc_peek_dequeued(). Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- include/net/sch_generic.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index da6839a..9dcb5bf 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -438,6 +438,29 @@ static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch) return skb_peek(&sch->q); } +/* generic pseudo peek method for non-work-conserving qdisc */ +static inline struct sk_buff *qdisc_peek_dequeued(struct Qdisc *sch) +{ + /* we can reuse ->gso_skb because peek isn't called for root qdiscs */ + if (!sch->gso_skb) + sch->gso_skb = sch->dequeue(sch); + + return sch->gso_skb; +} + +/* use instead of qdisc->dequeue() for all qdiscs queried with ->peek() */ +static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) +{ + struct sk_buff *skb = sch->gso_skb; + + if (skb) + sch->gso_skb = NULL; + else + skb = sch->dequeue(sch); + + return skb; +} + static inline int __qdisc_requeue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff_head *list) { -- cgit v1.1 From 3685f25de1b0447fff381c420de1e25bd57c9efb Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Fri, 31 Oct 2008 00:56:49 -0700 Subject: misc: replace NIPQUAD() Using NIPQUAD() with NIPQUAD_FMT, %d.%d.%d.%d or %u.%u.%u.%u can be replaced with %pI4 Signed-off-by: Harvey Harrison Signed-off-by: David S. Miller --- include/linux/sunrpc/svc_xprt.h | 4 ++-- include/net/ip_vs.h | 4 ++-- include/net/netfilter/nf_conntrack_tuple.h | 6 +++--- include/net/sctp/sctp.h | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 51cb75e..0127dac 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -139,8 +139,8 @@ static inline char *__svc_print_addr(struct sockaddr *addr, { switch (addr->sa_family) { case AF_INET: - snprintf(buf, len, "%u.%u.%u.%u, port=%u", - NIPQUAD(((struct sockaddr_in *) addr)->sin_addr), + snprintf(buf, len, "%pI4, port=%u", + &((struct sockaddr_in *)addr)->sin_addr, ntohs(((struct sockaddr_in *) addr)->sin_port)); break; diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index af48cad..fc63353 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -91,8 +91,8 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len, &addr->in6) + 1; else #endif - len = snprintf(&buf[*idx], buf_len - *idx, NIPQUAD_FMT, - NIPQUAD(addr->ip)) + 1; + len = snprintf(&buf[*idx], buf_len - *idx, "%pI4", + &addr->ip) + 1; *idx += len; BUG_ON(*idx > buf_len + 1); diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h index 42f1fc9..f2f6aa7 100644 --- a/include/net/netfilter/nf_conntrack_tuple.h +++ b/include/net/netfilter/nf_conntrack_tuple.h @@ -112,10 +112,10 @@ struct nf_conntrack_tuple_mask static inline void nf_ct_dump_tuple_ip(const struct nf_conntrack_tuple *t) { #ifdef DEBUG - printk("tuple %p: %u " NIPQUAD_FMT ":%hu -> " NIPQUAD_FMT ":%hu\n", + printk("tuple %p: %u %pI4:%hu -> %pI4:%hu\n", t, t->dst.protonum, - NIPQUAD(t->src.u3.ip), ntohs(t->src.u.all), - NIPQUAD(t->dst.u3.ip), ntohs(t->dst.u.all)); + &t->src.u3.ip, ntohs(t->src.u.all), + &t->dst.u3.ip, ntohs(t->dst.u.all)); #endif } diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index e71b0f7..2379750 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -291,9 +291,9 @@ extern int sctp_debug_flag; otherparms); \ } else { \ printk(KERN_DEBUG \ - lead NIPQUAD_FMT trail, \ + lead "%pI4" trail, \ leadparm, \ - NIPQUAD(saddr->v4.sin_addr.s_addr), \ + &saddr->v4.sin_addr.s_addr, \ otherparms); \ } \ } -- cgit v1.1 From 3db594380b8452eda4d88b12844077809607caaa Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 7 Oct 2008 12:04:33 +0200 Subject: mac80211: remove wiphy_to_hw This isn't used by anyone, if we ever need it we can add it back, until then it's useless. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8856e2d..e41b9a8 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -868,8 +868,6 @@ struct ieee80211_hw { u8 max_altrate_tries; }; -struct ieee80211_hw *wiphy_to_hw(struct wiphy *wiphy); - /** * SET_IEEE80211_DEV - set device for 802.11 hardware * -- cgit v1.1 From e87a2feea75e3cba7af43ed9317b56b282d87742 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 7 Oct 2008 12:04:35 +0200 Subject: mac80211: remove max_antenna_gain config The antenna gain isn't exactly configurable, despite the belief of some unnamed individual who thinks that the EEPROM might influence it. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index e41b9a8..2fab5a7 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -470,7 +470,6 @@ enum ieee80211_conf_flags { * @listen_interval: listen interval in units of beacon interval * @flags: configuration flags defined above * @power_level: requested transmit power (in dBm) - * @max_antenna_gain: maximum antenna gain (in dBi) * @antenna_sel_tx: transmit antenna selection, 0: default/diversity, * 1/2: antenna 0/1 * @antenna_sel_rx: receive antenna selection, like @antenna_sel_tx @@ -485,7 +484,6 @@ struct ieee80211_conf { u16 listen_interval; u32 flags; int power_level; - int max_antenna_gain; u8 antenna_sel_tx; u8 antenna_sel_rx; -- cgit v1.1 From 7a5158ef8da70fdedeb0530faaa8128aa645be3c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 8 Oct 2008 10:59:33 +0200 Subject: mac80211: fix short slot handling This patch makes mac80211 handle short slot requests from the AP properly. Also warn about uses of IEEE80211_CONF_SHORT_SLOT_TIME and optimise out the code since it cannot ever be hit anyway. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 2fab5a7..d1466e7 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -180,8 +180,12 @@ enum ieee80211_bss_change { * @assoc: association status * @aid: association ID number, valid only when @assoc is true * @use_cts_prot: use CTS protection - * @use_short_preamble: use 802.11b short preamble - * @use_short_slot: use short slot time (only relevant for ERP) + * @use_short_preamble: use 802.11b short preamble; + * if the hardware cannot handle this it must set the + * IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE hardware flag + * @use_short_slot: use short slot time (only relevant for ERP); + * if the hardware cannot handle this it must set the + * IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE hardware flag * @dtim_period: num of beacons before the next DTIM, for PSM * @timestamp: beacon timestamp * @beacon_int: beacon interval @@ -442,23 +446,23 @@ struct ieee80211_rx_status { * * Flags to define PHY configuration options * - * @IEEE80211_CONF_SHORT_SLOT_TIME: use 802.11g short slot time * @IEEE80211_CONF_RADIOTAP: add radiotap header at receive time (if supported) * @IEEE80211_CONF_SUPPORT_HT_MODE: use 802.11n HT capabilities (if supported) * @IEEE80211_CONF_PS: Enable 802.11 power save mode */ enum ieee80211_conf_flags { - /* - * TODO: IEEE80211_CONF_SHORT_SLOT_TIME will be removed once drivers - * have been converted to use bss_info_changed() for slot time - * configuration - */ - IEEE80211_CONF_SHORT_SLOT_TIME = (1<<0), - IEEE80211_CONF_RADIOTAP = (1<<1), - IEEE80211_CONF_SUPPORT_HT_MODE = (1<<2), - IEEE80211_CONF_PS = (1<<3), + IEEE80211_CONF_RADIOTAP = (1<<0), + IEEE80211_CONF_SUPPORT_HT_MODE = (1<<1), + IEEE80211_CONF_PS = (1<<2), }; +/* XXX: remove all this once drivers stop trying to use it */ +static inline int __deprecated __IEEE80211_CONF_SHORT_SLOT_TIME(void) +{ + return 0; +} +#define IEEE80211_CONF_SHORT_SLOT_TIME (__IEEE80211_CONF_SHORT_SLOT_TIME()) + /** * struct ieee80211_conf - configuration of the device * -- cgit v1.1 From d9fe60dea7779d412b34679f1177c5ca1940ea8d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 9 Oct 2008 12:13:49 +0200 Subject: 802.11: clean up/fix HT support This patch cleans up a number of things: * the unusable definition of the HT capabilities/HT information information elements * variable names that are hard to understand * mac80211: move ieee80211_handle_ht to ht.c and remove the unused enable_ht parameter * mac80211: fix bug with MCS rate 32 in ieee80211_handle_ht * mac80211: fix bug with casting the result of ieee80211_bss_get_ie to an information element _contents_ rather than the whole element, add size checking (another out-of-bounds access bug fixed!) * mac80211: remove some unused return values in favour of BUG_ON checking * a few minor other things Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 133 +++++++++++++++++++++++++++++++++------------- include/net/mac80211.h | 12 ++--- include/net/wireless.h | 15 +++--- 3 files changed, 109 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 14126bc..64a4abc 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -685,28 +685,88 @@ struct ieee80211_bar { #define IEEE80211_BAR_CTRL_ACK_POLICY_NORMAL 0x0000 #define IEEE80211_BAR_CTRL_CBMTID_COMPRESSED_BA 0x0004 + +#define IEEE80211_HT_MCS_MASK_LEN 10 + +/** + * struct ieee80211_mcs_info - MCS information + * @rx_mask: RX mask + * @rx_highest: highest supported RX rate + * @tx_params: TX parameters + */ +struct ieee80211_mcs_info { + u8 rx_mask[IEEE80211_HT_MCS_MASK_LEN]; + __le16 rx_highest; + u8 tx_params; + u8 reserved[3]; +} __attribute__((packed)); + +/* 802.11n HT capability MSC set */ +#define IEEE80211_HT_MCS_RX_HIGHEST_MASK 0x3ff +#define IEEE80211_HT_MCS_TX_DEFINED 0x01 +#define IEEE80211_HT_MCS_TX_RX_DIFF 0x02 +/* value 0 == 1 stream etc */ +#define IEEE80211_HT_MCS_TX_MAX_STREAMS_MASK 0x0C +#define IEEE80211_HT_MCS_TX_MAX_STREAMS_SHIFT 2 +#define IEEE80211_HT_MCS_TX_MAX_STREAMS 4 +#define IEEE80211_HT_MCS_TX_UNEQUAL_MODULATION 0x10 + +/* + * 802.11n D5.0 20.3.5 / 20.6 says: + * - indices 0 to 7 and 32 are single spatial stream + * - 8 to 31 are multiple spatial streams using equal modulation + * [8..15 for two streams, 16..23 for three and 24..31 for four] + * - remainder are multiple spatial streams using unequal modulation + */ +#define IEEE80211_HT_MCS_UNEQUAL_MODULATION_START 33 +#define IEEE80211_HT_MCS_UNEQUAL_MODULATION_START_BYTE \ + (IEEE80211_HT_MCS_UNEQUAL_MODULATION_START / 8) + /** * struct ieee80211_ht_cap - HT capabilities * - * This structure refers to "HT capabilities element" as - * described in 802.11n draft section 7.3.2.52 + * This structure is the "HT capabilities element" as + * described in 802.11n D5.0 7.3.2.57 */ struct ieee80211_ht_cap { __le16 cap_info; u8 ampdu_params_info; - u8 supp_mcs_set[16]; + + /* 16 bytes MCS information */ + struct ieee80211_mcs_info mcs; + __le16 extended_ht_cap_info; __le32 tx_BF_cap_info; u8 antenna_selection_info; } __attribute__ ((packed)); +/* 802.11n HT capabilities masks (for cap_info) */ +#define IEEE80211_HT_CAP_LDPC_CODING 0x0001 +#define IEEE80211_HT_CAP_SUP_WIDTH_20_40 0x0002 +#define IEEE80211_HT_CAP_SM_PS 0x000C +#define IEEE80211_HT_CAP_GRN_FLD 0x0010 +#define IEEE80211_HT_CAP_SGI_20 0x0020 +#define IEEE80211_HT_CAP_SGI_40 0x0040 +#define IEEE80211_HT_CAP_TX_STBC 0x0080 +#define IEEE80211_HT_CAP_RX_STBC 0x0300 +#define IEEE80211_HT_CAP_DELAY_BA 0x0400 +#define IEEE80211_HT_CAP_MAX_AMSDU 0x0800 +#define IEEE80211_HT_CAP_DSSSCCK40 0x1000 +#define IEEE80211_HT_CAP_PSMP_SUPPORT 0x2000 +#define IEEE80211_HT_CAP_40MHZ_INTOLERANT 0x4000 +#define IEEE80211_HT_CAP_LSIG_TXOP_PROT 0x8000 + +/* 802.11n HT capability AMPDU settings (for ampdu_params_info) */ +#define IEEE80211_HT_AMPDU_PARM_FACTOR 0x03 +#define IEEE80211_HT_AMPDU_PARM_DENSITY 0x1C + /** - * struct ieee80211_ht_cap - HT additional information + * struct ieee80211_ht_info - HT information * - * This structure refers to "HT information element" as - * described in 802.11n draft section 7.3.2.53 + * This structure is the "HT information element" as + * described in 802.11n D5.0 7.3.2.58 */ -struct ieee80211_ht_addt_info { +struct ieee80211_ht_info { u8 control_chan; u8 ht_param; __le16 operation_mode; @@ -714,36 +774,33 @@ struct ieee80211_ht_addt_info { u8 basic_set[16]; } __attribute__ ((packed)); -/* 802.11n HT capabilities masks */ -#define IEEE80211_HT_CAP_SUP_WIDTH 0x0002 -#define IEEE80211_HT_CAP_SM_PS 0x000C -#define IEEE80211_HT_CAP_GRN_FLD 0x0010 -#define IEEE80211_HT_CAP_SGI_20 0x0020 -#define IEEE80211_HT_CAP_SGI_40 0x0040 -#define IEEE80211_HT_CAP_DELAY_BA 0x0400 -#define IEEE80211_HT_CAP_MAX_AMSDU 0x0800 -#define IEEE80211_HT_CAP_DSSSCCK40 0x1000 -/* 802.11n HT capability AMPDU settings */ -#define IEEE80211_HT_CAP_AMPDU_FACTOR 0x03 -#define IEEE80211_HT_CAP_AMPDU_DENSITY 0x1C -/* 802.11n HT capability MSC set */ -#define IEEE80211_SUPP_MCS_SET_UEQM 4 -#define IEEE80211_HT_CAP_MAX_STREAMS 4 -#define IEEE80211_SUPP_MCS_SET_LEN 10 -/* maximum streams the spec allows */ -#define IEEE80211_HT_CAP_MCS_TX_DEFINED 0x01 -#define IEEE80211_HT_CAP_MCS_TX_RX_DIFF 0x02 -#define IEEE80211_HT_CAP_MCS_TX_STREAMS 0x0C -#define IEEE80211_HT_CAP_MCS_TX_UEQM 0x10 -/* 802.11n HT IE masks */ -#define IEEE80211_HT_IE_CHA_SEC_OFFSET 0x03 -#define IEEE80211_HT_IE_CHA_SEC_NONE 0x00 -#define IEEE80211_HT_IE_CHA_SEC_ABOVE 0x01 -#define IEEE80211_HT_IE_CHA_SEC_BELOW 0x03 -#define IEEE80211_HT_IE_CHA_WIDTH 0x04 -#define IEEE80211_HT_IE_HT_PROTECTION 0x0003 -#define IEEE80211_HT_IE_NON_GF_STA_PRSNT 0x0004 -#define IEEE80211_HT_IE_NON_HT_STA_PRSNT 0x0010 +/* for ht_param */ +#define IEEE80211_HT_PARAM_CHA_SEC_OFFSET 0x03 +#define IEEE80211_HT_PARAM_CHA_SEC_NONE 0x00 +#define IEEE80211_HT_PARAM_CHA_SEC_ABOVE 0x01 +#define IEEE80211_HT_PARAM_CHA_SEC_BELOW 0x03 +#define IEEE80211_HT_PARAM_CHAN_WIDTH_ANY 0x04 +#define IEEE80211_HT_PARAM_RIFS_MODE 0x08 +#define IEEE80211_HT_PARAM_SPSMP_SUPPORT 0x10 +#define IEEE80211_HT_PARAM_SERV_INTERVAL_GRAN 0xE0 + +/* for operation_mode */ +#define IEEE80211_HT_OP_MODE_PROTECTION 0x0003 +#define IEEE80211_HT_OP_MODE_PROTECTION_NONE 0 +#define IEEE80211_HT_OP_MODE_PROTECTION_NONMEMBER 1 +#define IEEE80211_HT_OP_MODE_PROTECTION_20MHZ 2 +#define IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED 3 +#define IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT 0x0004 +#define IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT 0x0010 + +/* for stbc_param */ +#define IEEE80211_HT_STBC_PARAM_DUAL_BEACON 0x0040 +#define IEEE80211_HT_STBC_PARAM_DUAL_CTS_PROT 0x0080 +#define IEEE80211_HT_STBC_PARAM_STBC_BEACON 0x0100 +#define IEEE80211_HT_STBC_PARAM_LSIG_TXOP_FULLPROT 0x0200 +#define IEEE80211_HT_STBC_PARAM_PCO_ACTIVE 0x0400 +#define IEEE80211_HT_STBC_PARAM_PCO_PHASE 0x0800 + /* block-ack parameters */ #define IEEE80211_ADDBA_PARAM_POLICY_MASK 0x0002 @@ -949,7 +1006,7 @@ enum ieee80211_eid { WLAN_EID_EXT_SUPP_RATES = 50, /* 802.11n */ WLAN_EID_HT_CAPABILITY = 45, - WLAN_EID_HT_EXTRA_INFO = 61, + WLAN_EID_HT_INFORMATION = 61, /* 802.11i */ WLAN_EID_RSN = 48, WLAN_EID_WPA = 221, diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d1466e7..2870f39 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -191,7 +191,7 @@ enum ieee80211_bss_change { * @beacon_int: beacon interval * @assoc_capability: capabilities taken from assoc resp * @assoc_ht: association in HT mode - * @ht_conf: ht capabilities + * @ht_cap: ht capabilities * @ht_bss_conf: ht extended capabilities * @basic_rates: bitmap of basic rates, each bit stands for an * index into the rate table configured by the driver in @@ -212,7 +212,7 @@ struct ieee80211_bss_conf { u64 basic_rates; /* ht related data */ bool assoc_ht; - struct ieee80211_ht_info *ht_conf; + struct ieee80211_sta_ht_cap *ht_cap; struct ieee80211_ht_bss_info *ht_bss_conf; }; @@ -477,7 +477,7 @@ static inline int __deprecated __IEEE80211_CONF_SHORT_SLOT_TIME(void) * @antenna_sel_tx: transmit antenna selection, 0: default/diversity, * 1/2: antenna 0/1 * @antenna_sel_rx: receive antenna selection, like @antenna_sel_tx - * @ht_conf: describes current self configuration of 802.11n HT capabilies + * @ht_cap: describes current self configuration of 802.11n HT capabilities * @ht_bss_conf: describes current BSS configuration of 802.11n HT parameters * @channel: the channel to tune to */ @@ -493,7 +493,7 @@ struct ieee80211_conf { struct ieee80211_channel *channel; - struct ieee80211_ht_info ht_conf; + struct ieee80211_sta_ht_cap ht_cap; struct ieee80211_ht_bss_info ht_bss_conf; }; @@ -687,7 +687,7 @@ enum set_key_cmd { * @addr: MAC address * @aid: AID we assigned to the station if we're an AP * @supp_rates: Bitmap of supported rates (per band) - * @ht_info: HT capabilities of this STA + * @ht_cap: HT capabilities of this STA * @drv_priv: data area for driver use, will always be aligned to * sizeof(void *), size is determined in hw information. */ @@ -695,7 +695,7 @@ struct ieee80211_sta { u64 supp_rates[IEEE80211_NUM_BANDS]; u8 addr[ETH_ALEN]; u16 aid; - struct ieee80211_ht_info ht_info; + struct ieee80211_sta_ht_cap ht_cap; /* must be last */ u8 drv_priv[0] __attribute__((__aligned__(sizeof(void *)))); diff --git a/include/net/wireless.h b/include/net/wireless.h index 721efb3..c0aa0fb 100644 --- a/include/net/wireless.h +++ b/include/net/wireless.h @@ -10,6 +10,7 @@ #include #include #include +#include #include /** @@ -133,23 +134,23 @@ struct ieee80211_rate { }; /** - * struct ieee80211_ht_info - describing STA's HT capabilities + * struct ieee80211_sta_ht_cap - STA's HT capabilities * * This structure describes most essential parameters needed * to describe 802.11n HT capabilities for an STA. * - * @ht_supported: is HT supported by STA, 0: no, 1: yes + * @ht_supported: is HT supported by the STA * @cap: HT capabilities map as described in 802.11n spec * @ampdu_factor: Maximum A-MPDU length factor * @ampdu_density: Minimum A-MPDU spacing - * @supp_mcs_set: Supported MCS set as described in 802.11n spec + * @mcs: Supported MCS rates */ -struct ieee80211_ht_info { +struct ieee80211_sta_ht_cap { u16 cap; /* use IEEE80211_HT_CAP_ */ - u8 ht_supported; + bool ht_supported; u8 ampdu_factor; u8 ampdu_density; - u8 supp_mcs_set[16]; + struct ieee80211_mcs_info mcs; }; /** @@ -173,7 +174,7 @@ struct ieee80211_supported_band { enum ieee80211_band band; int n_channels; int n_bitrates; - struct ieee80211_ht_info ht_info; + struct ieee80211_sta_ht_cap ht_cap; }; /** -- cgit v1.1 From 0f4ac38b5999c3d51adad52d61c56c1b99c247ec Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 9 Oct 2008 12:18:04 +0200 Subject: mac80211: kill hw.conf.antenna_sel_{rx,tx} Never actually used. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 2870f39..5f28b7f 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -324,7 +324,7 @@ struct ieee80211_tx_altrate { * @flags: transmit info flags, defined above * @band: TBD * @tx_rate_idx: TBD - * @antenna_sel_tx: TBD + * @antenna_sel_tx: antenna to use, 0 for automatic diversity * @control: union for control data * @status: union for status data * @driver_data: array of driver_data pointers @@ -474,9 +474,6 @@ static inline int __deprecated __IEEE80211_CONF_SHORT_SLOT_TIME(void) * @listen_interval: listen interval in units of beacon interval * @flags: configuration flags defined above * @power_level: requested transmit power (in dBm) - * @antenna_sel_tx: transmit antenna selection, 0: default/diversity, - * 1/2: antenna 0/1 - * @antenna_sel_rx: receive antenna selection, like @antenna_sel_tx * @ht_cap: describes current self configuration of 802.11n HT capabilities * @ht_bss_conf: describes current BSS configuration of 802.11n HT parameters * @channel: the channel to tune to @@ -488,8 +485,6 @@ struct ieee80211_conf { u16 listen_interval; u32 flags; int power_level; - u8 antenna_sel_tx; - u8 antenna_sel_rx; struct ieee80211_channel *channel; -- cgit v1.1 From e8975581f63870be42ff4662b293d1b0c8c21350 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 9 Oct 2008 12:18:51 +0200 Subject: mac80211: introduce hw config change flags This makes mac80211 notify the driver which configuration actually changed, e.g. channel etc. No driver changes, this is just plumbing, driver authors are expected to act on this if they want to. Also remove the HW CONFIG debug printk, it's incorrect, often we configure something else. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 5f28b7f..34e8569 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -464,12 +464,32 @@ static inline int __deprecated __IEEE80211_CONF_SHORT_SLOT_TIME(void) #define IEEE80211_CONF_SHORT_SLOT_TIME (__IEEE80211_CONF_SHORT_SLOT_TIME()) /** + * enum ieee80211_conf_changed - denotes which configuration changed + * + * @IEEE80211_CONF_CHANGE_RADIO_ENABLED: the value of radio_enabled changed + * @IEEE80211_CONF_CHANGE_BEACON_INTERVAL: the beacon interval changed + * @IEEE80211_CONF_CHANGE_LISTEN_INTERVAL: the listen interval changed + * @IEEE80211_CONF_CHANGE_RADIOTAP: the radiotap flag changed + * @IEEE80211_CONF_CHANGE_PS: the PS flag changed + * @IEEE80211_CONF_CHANGE_POWER: the TX power changed + * @IEEE80211_CONF_CHANGE_CHANNEL: the channel changed + */ +enum ieee80211_conf_changed { + IEEE80211_CONF_CHANGE_RADIO_ENABLED = BIT(0), + IEEE80211_CONF_CHANGE_BEACON_INTERVAL = BIT(1), + IEEE80211_CONF_CHANGE_LISTEN_INTERVAL = BIT(2), + IEEE80211_CONF_CHANGE_RADIOTAP = BIT(3), + IEEE80211_CONF_CHANGE_PS = BIT(4), + IEEE80211_CONF_CHANGE_POWER = BIT(5), + IEEE80211_CONF_CHANGE_CHANNEL = BIT(6), +}; + +/** * struct ieee80211_conf - configuration of the device * * This struct indicates how the driver shall configure the hardware. * * @radio_enabled: when zero, driver is required to switch off the radio. - * TODO make a flag * @beacon_int: beacon interval (TODO make interface config) * @listen_interval: listen interval in units of beacon interval * @flags: configuration flags defined above @@ -479,13 +499,13 @@ static inline int __deprecated __IEEE80211_CONF_SHORT_SLOT_TIME(void) * @channel: the channel to tune to */ struct ieee80211_conf { - int radio_enabled; - int beacon_int; - u16 listen_interval; u32 flags; int power_level; + u16 listen_interval; + bool radio_enabled; + struct ieee80211_channel *channel; struct ieee80211_sta_ht_cap ht_cap; @@ -1214,7 +1234,7 @@ struct ieee80211_ops { struct ieee80211_if_init_conf *conf); void (*remove_interface)(struct ieee80211_hw *hw, struct ieee80211_if_init_conf *conf); - int (*config)(struct ieee80211_hw *hw, struct ieee80211_conf *conf); + int (*config)(struct ieee80211_hw *hw, u32 changed); int (*config_interface)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_if_conf *conf); -- cgit v1.1 From d51626df5747efaa8d2c00678f64cb503845effe Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 9 Oct 2008 12:20:13 +0200 Subject: nl80211: export HT capabilities This exports the local HT capabilities in nl80211. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 9bad6540..41720d4 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -452,17 +452,29 @@ enum nl80211_mpath_info { * an array of nested frequency attributes * @NL80211_BAND_ATTR_RATES: supported bitrates in this band, * an array of nested bitrate attributes + * @NL80211_BAND_ATTR_HT_MCS_SET: 16-byte attribute containing the MCS set as + * defined in 802.11n + * @NL80211_BAND_ATTR_HT_CAPA: HT capabilities, as in the HT information IE + * @NL80211_BAND_ATTR_HT_AMPDU_FACTOR: A-MPDU factor, as in 11n + * @NL80211_BAND_ATTR_HT_AMPDU_DENSITY: A-MPDU density, as in 11n */ enum nl80211_band_attr { __NL80211_BAND_ATTR_INVALID, NL80211_BAND_ATTR_FREQS, NL80211_BAND_ATTR_RATES, + NL80211_BAND_ATTR_HT_MCS_SET, + NL80211_BAND_ATTR_HT_CAPA, + NL80211_BAND_ATTR_HT_AMPDU_FACTOR, + NL80211_BAND_ATTR_HT_AMPDU_DENSITY, + /* keep last */ __NL80211_BAND_ATTR_AFTER_LAST, NL80211_BAND_ATTR_MAX = __NL80211_BAND_ATTR_AFTER_LAST - 1 }; +#define NL80211_BAND_ATTR_HT_CAPA NL80211_BAND_ATTR_HT_CAPA + /** * enum nl80211_frequency_attr - frequency attributes * @NL80211_FREQUENCY_ATTR_FREQ: Frequency in MHz -- cgit v1.1 From 9124b07740c51cbc6e358dd0c4abc6ee8ded084d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 14 Oct 2008 19:17:54 +0200 Subject: mac80211: make retry limits part of hw config Instead of having a separate callback, use the HW config callback with a new flag to change retry limits. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 34e8569..fd52300 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -473,6 +473,7 @@ static inline int __deprecated __IEEE80211_CONF_SHORT_SLOT_TIME(void) * @IEEE80211_CONF_CHANGE_PS: the PS flag changed * @IEEE80211_CONF_CHANGE_POWER: the TX power changed * @IEEE80211_CONF_CHANGE_CHANNEL: the channel changed + * @IEEE80211_CONF_CHANGE_RETRY_LIMITS: retry limits changed */ enum ieee80211_conf_changed { IEEE80211_CONF_CHANGE_RADIO_ENABLED = BIT(0), @@ -482,6 +483,7 @@ enum ieee80211_conf_changed { IEEE80211_CONF_CHANGE_PS = BIT(4), IEEE80211_CONF_CHANGE_POWER = BIT(5), IEEE80211_CONF_CHANGE_CHANNEL = BIT(6), + IEEE80211_CONF_CHANGE_RETRY_LIMITS = BIT(7), }; /** @@ -497,6 +499,12 @@ enum ieee80211_conf_changed { * @ht_cap: describes current self configuration of 802.11n HT capabilities * @ht_bss_conf: describes current BSS configuration of 802.11n HT parameters * @channel: the channel to tune to + * @long_frame_max_tx_count: Maximum number of transmissions for a "long" frame + * (a frame not RTS protected), called "dot11LongRetryLimit" in 802.11, + * but actually means the number of transmissions not the number of retries + * @short_frame_max_tx_count: Maximum number of transmissions for a "short" + * frame, called "dot11ShortRetryLimit" in 802.11, but actually means the + * number of transmissions not the number of retries */ struct ieee80211_conf { int beacon_int; @@ -506,6 +514,8 @@ struct ieee80211_conf { u16 listen_interval; bool radio_enabled; + u8 long_frame_max_tx_count, short_frame_max_tx_count; + struct ieee80211_channel *channel; struct ieee80211_sta_ht_cap ht_cap; @@ -1190,8 +1200,6 @@ enum ieee80211_ampdu_mlme_action { * the device does fragmentation by itself; if this method is assigned then * the stack will not do fragmentation. * - * @set_retry_limit: Configuration of retry limits (if device needs it) - * * @sta_notify: Notifies low level driver about addition or removal * of assocaited station or AP. * @@ -1261,8 +1269,6 @@ struct ieee80211_ops { u32 *iv32, u16 *iv16); int (*set_rts_threshold)(struct ieee80211_hw *hw, u32 value); int (*set_frag_threshold)(struct ieee80211_hw *hw, u32 value); - int (*set_retry_limit)(struct ieee80211_hw *hw, - u32 short_retry, u32 long_retr); void (*sta_notify)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, enum sta_notify_cmd, struct ieee80211_sta *sta); int (*conf_tx)(struct ieee80211_hw *hw, u16 queue, -- cgit v1.1 From bda3933a8aceedd03e0dd410844bd310033ca756 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 11 Oct 2008 01:51:51 +0200 Subject: mac80211: move bss_conf into vif Move bss_conf into the vif struct so that drivers can access it during ->tx without having to store it in the private data or similar. No driver updates because this is only for when they want to start using it. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index fd52300..94ff3ef 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -529,11 +529,14 @@ struct ieee80211_conf { * use during the life of a virtual interface. * * @type: type of this virtual interface + * @bss_conf: BSS configuration for this interface, either our own + * or the BSS we're associated to * @drv_priv: data area for driver use, will always be aligned to * sizeof(void *). */ struct ieee80211_vif { enum nl80211_iftype type; + struct ieee80211_bss_conf bss_conf; /* must be last */ u8 drv_priv[0] __attribute__((__aligned__(sizeof(void *)))); }; -- cgit v1.1 From ae5eb02641233a4e9d1b92d22090f1b1afa14466 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 14 Oct 2008 16:58:37 +0200 Subject: mac80211: rewrite HT handling The HT handling has the following deficiencies, which I've (partially) fixed: * it always uses the AP info even if there is no AP, hence has no chance of working as an AP * it pretends to be HW config, but really is per-BSS * channel sanity checking is left to the drivers * it generally lets the driver control too much HT enabling is still wrong with this patch if you have more than one virtual STA mode interface, but that never happens currently. Once WDS, IBSS or AP/VLAN gets HT capabilities, it will also be wrong, see the comment in ieee80211_enable_ht(). Additionally, this fixes a number of bugs: * mac80211: ieee80211_set_disassoc doesn't notify the driver any more since the refactoring * iwl-agn-rs: always uses the HT capabilities from the wrong stuff mac80211 gives it rather than the actual peer STA * ath9k: a number of bugs resulting from the broken HT API I'm not entirely happy with putting the HT capabilities into struct ieee80211_sta as restricted to our own HT TX capabilities, but I see no cleaner solution for now. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 59 ++++++++++++++++++++++---------------------------- 1 file changed, 26 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 94ff3ef..9801afb 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3,7 +3,7 @@ * * Copyright 2002-2005, Devicescape Software, Inc. * Copyright 2006-2007 Jiri Benc - * Copyright 2007 Johannes Berg + * Copyright 2007-2008 Johannes Berg * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -82,22 +82,6 @@ enum ieee80211_notification_types { }; /** - * struct ieee80211_ht_bss_info - describing BSS's HT characteristics - * - * This structure describes most essential parameters needed - * to describe 802.11n HT characteristics in a BSS. - * - * @primary_channel: channel number of primery channel - * @bss_cap: 802.11n's general BSS capabilities (e.g. channel width) - * @bss_op_mode: 802.11n's BSS operation modes (e.g. HT protection) - */ -struct ieee80211_ht_bss_info { - u8 primary_channel; - u8 bss_cap; /* use IEEE80211_HT_IE_CHA_ */ - u8 bss_op_mode; /* use IEEE80211_HT_IE_ */ -}; - -/** * enum ieee80211_max_queues - maximum number of queues * * @IEEE80211_MAX_QUEUES: Maximum number of regular device queues. @@ -172,6 +156,19 @@ enum ieee80211_bss_change { }; /** + * struct ieee80211_bss_ht_conf - BSS's changing HT configuration + * @secondary_channel_offset: secondary channel offset, uses + * %IEEE80211_HT_PARAM_CHA_SEC_ values + * @width_40_ok: indicates that 40 MHz bandwidth may be used for TX + * @operation_mode: HT operation mode (like in &struct ieee80211_ht_info) + */ +struct ieee80211_bss_ht_conf { + u8 secondary_channel_offset; + bool width_40_ok; + u16 operation_mode; +}; + +/** * struct ieee80211_bss_conf - holds the BSS's changing parameters * * This structure keeps information about a BSS (and an association @@ -190,9 +187,7 @@ enum ieee80211_bss_change { * @timestamp: beacon timestamp * @beacon_int: beacon interval * @assoc_capability: capabilities taken from assoc resp - * @assoc_ht: association in HT mode - * @ht_cap: ht capabilities - * @ht_bss_conf: ht extended capabilities + * @ht: BSS's HT configuration * @basic_rates: bitmap of basic rates, each bit stands for an * index into the rate table configured by the driver in * the current band. @@ -210,10 +205,7 @@ struct ieee80211_bss_conf { u16 assoc_capability; u64 timestamp; u64 basic_rates; - /* ht related data */ - bool assoc_ht; - struct ieee80211_sta_ht_cap *ht_cap; - struct ieee80211_ht_bss_info *ht_bss_conf; + struct ieee80211_bss_ht_conf ht; }; /** @@ -447,13 +439,11 @@ struct ieee80211_rx_status { * Flags to define PHY configuration options * * @IEEE80211_CONF_RADIOTAP: add radiotap header at receive time (if supported) - * @IEEE80211_CONF_SUPPORT_HT_MODE: use 802.11n HT capabilities (if supported) * @IEEE80211_CONF_PS: Enable 802.11 power save mode */ enum ieee80211_conf_flags { IEEE80211_CONF_RADIOTAP = (1<<0), - IEEE80211_CONF_SUPPORT_HT_MODE = (1<<1), - IEEE80211_CONF_PS = (1<<2), + IEEE80211_CONF_PS = (1<<1), }; /* XXX: remove all this once drivers stop trying to use it */ @@ -463,6 +453,10 @@ static inline int __deprecated __IEEE80211_CONF_SHORT_SLOT_TIME(void) } #define IEEE80211_CONF_SHORT_SLOT_TIME (__IEEE80211_CONF_SHORT_SLOT_TIME()) +struct ieee80211_ht_conf { + bool enabled; +}; + /** * enum ieee80211_conf_changed - denotes which configuration changed * @@ -474,6 +468,7 @@ static inline int __deprecated __IEEE80211_CONF_SHORT_SLOT_TIME(void) * @IEEE80211_CONF_CHANGE_POWER: the TX power changed * @IEEE80211_CONF_CHANGE_CHANNEL: the channel changed * @IEEE80211_CONF_CHANGE_RETRY_LIMITS: retry limits changed + * @IEEE80211_CONF_CHANGE_HT: HT configuration changed */ enum ieee80211_conf_changed { IEEE80211_CONF_CHANGE_RADIO_ENABLED = BIT(0), @@ -484,6 +479,7 @@ enum ieee80211_conf_changed { IEEE80211_CONF_CHANGE_POWER = BIT(5), IEEE80211_CONF_CHANGE_CHANNEL = BIT(6), IEEE80211_CONF_CHANGE_RETRY_LIMITS = BIT(7), + IEEE80211_CONF_CHANGE_HT = BIT(8), }; /** @@ -496,9 +492,8 @@ enum ieee80211_conf_changed { * @listen_interval: listen interval in units of beacon interval * @flags: configuration flags defined above * @power_level: requested transmit power (in dBm) - * @ht_cap: describes current self configuration of 802.11n HT capabilities - * @ht_bss_conf: describes current BSS configuration of 802.11n HT parameters * @channel: the channel to tune to + * @ht: the HT configuration for the device * @long_frame_max_tx_count: Maximum number of transmissions for a "long" frame * (a frame not RTS protected), called "dot11LongRetryLimit" in 802.11, * but actually means the number of transmissions not the number of retries @@ -517,9 +512,7 @@ struct ieee80211_conf { u8 long_frame_max_tx_count, short_frame_max_tx_count; struct ieee80211_channel *channel; - - struct ieee80211_sta_ht_cap ht_cap; - struct ieee80211_ht_bss_info ht_bss_conf; + struct ieee80211_ht_conf ht; }; /** @@ -715,7 +708,7 @@ enum set_key_cmd { * @addr: MAC address * @aid: AID we assigned to the station if we're an AP * @supp_rates: Bitmap of supported rates (per band) - * @ht_cap: HT capabilities of this STA + * @ht_cap: HT capabilities of this STA; restricted to our own TX capabilities * @drv_priv: data area for driver use, will always be aligned to * sizeof(void *), size is determined in hw information. */ -- cgit v1.1 From e6a9854b05c1a6af1308fe2b8c68f35abf28a3ee Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 21 Oct 2008 12:40:02 +0200 Subject: mac80211/drivers: rewrite the rate control API So after the previous changes we were still unhappy with how convoluted the API is and decided to make things simpler for everybody. This completely changes the rate control API, now taking into account 802.11n with MCS rates and more control, most drivers don't support that though. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 254 ++++++++++++++++++++++++++++++------------------- 1 file changed, 156 insertions(+), 98 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 9801afb..3741c0a 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -214,29 +214,24 @@ struct ieee80211_bss_conf { * These flags are used with the @flags member of &ieee80211_tx_info. * * @IEEE80211_TX_CTL_REQ_TX_STATUS: request TX status callback for this frame. - * @IEEE80211_TX_CTL_USE_RTS_CTS: use RTS-CTS before sending frame - * @IEEE80211_TX_CTL_USE_CTS_PROTECT: use CTS protection for the frame (e.g., - * for combined 802.11g / 802.11b networks) + * @IEEE80211_TX_CTL_ASSIGN_SEQ: The driver has to assign a sequence + * number to this frame, taking care of not overwriting the fragment + * number and increasing the sequence number only when the + * IEEE80211_TX_CTL_FIRST_FRAGMENT flag is set. mac80211 will properly + * assign sequence numbers to QoS-data frames but cannot do so correctly + * for non-QoS-data and management frames because beacons need them from + * that counter as well and mac80211 cannot guarantee proper sequencing. + * If this flag is set, the driver should instruct the hardware to + * assign a sequence number to the frame or assign one itself. Cf. IEEE + * 802.11-2007 7.1.3.4.1 paragraph 3. This flag will always be set for + * beacons and always be clear for frames without a sequence number field. * @IEEE80211_TX_CTL_NO_ACK: tell the low level not to wait for an ack - * @IEEE80211_TX_CTL_RATE_CTRL_PROBE: TBD * @IEEE80211_TX_CTL_CLEAR_PS_FILT: clear powersave filter for destination * station - * @IEEE80211_TX_CTL_REQUEUE: TBD * @IEEE80211_TX_CTL_FIRST_FRAGMENT: this is a first fragment of the frame - * @IEEE80211_TX_CTL_SHORT_PREAMBLE: TBD - * @IEEE80211_TX_CTL_LONG_RETRY_LIMIT: this frame should be send using the - * through set_retry_limit configured long retry value * @IEEE80211_TX_CTL_SEND_AFTER_DTIM: send this frame after DTIM beacon * @IEEE80211_TX_CTL_AMPDU: this frame should be sent as part of an A-MPDU - * @IEEE80211_TX_CTL_OFDM_HT: this frame can be sent in HT OFDM rates. number - * of streams when this flag is on can be extracted from antenna_sel_tx, - * so if 1 antenna is marked use SISO, 2 antennas marked use MIMO, n - * antennas marked use MIMO_n. - * @IEEE80211_TX_CTL_GREEN_FIELD: use green field protection for this frame - * @IEEE80211_TX_CTL_40_MHZ_WIDTH: send this frame using 40 Mhz channel width - * @IEEE80211_TX_CTL_DUP_DATA: duplicate data frame on both 20 Mhz channels - * @IEEE80211_TX_CTL_SHORT_GI: send this frame using short guard interval - * @IEEE80211_TX_CTL_INJECTED: TBD + * @IEEE80211_TX_CTL_INJECTED: Frame was injected, internal to mac80211. * @IEEE80211_TX_STAT_TX_FILTERED: The frame was not transmitted * because the destination STA was in powersave mode. * @IEEE80211_TX_STAT_ACK: Frame was acknowledged @@ -244,62 +239,70 @@ struct ieee80211_bss_conf { * is for the whole aggregation. * @IEEE80211_TX_STAT_AMPDU_NO_BACK: no block ack was returned, * so consider using block ack request (BAR). - * @IEEE80211_TX_CTL_ASSIGN_SEQ: The driver has to assign a sequence - * number to this frame, taking care of not overwriting the fragment - * number and increasing the sequence number only when the - * IEEE80211_TX_CTL_FIRST_FRAGMENT flags is set. mac80211 will properly - * assign sequence numbers to QoS-data frames but cannot do so correctly - * for non-QoS-data and management frames because beacons need them from - * that counter as well and mac80211 cannot guarantee proper sequencing. - * If this flag is set, the driver should instruct the hardware to - * assign a sequence number to the frame or assign one itself. Cf. IEEE - * 802.11-2007 7.1.3.4.1 paragraph 3. This flag will always be set for - * beacons always be clear for frames without a sequence number field. + * @IEEE80211_TX_CTL_RATE_CTRL_PROBE: internal to mac80211, can be + * set by rate control algorithms to indicate probe rate, will + * be cleared for fragmented frames (except on the last fragment) + * @IEEE80211_TX_CTL_REQUEUE: REMOVE THIS */ enum mac80211_tx_control_flags { IEEE80211_TX_CTL_REQ_TX_STATUS = BIT(0), - IEEE80211_TX_CTL_USE_RTS_CTS = BIT(2), - IEEE80211_TX_CTL_USE_CTS_PROTECT = BIT(3), - IEEE80211_TX_CTL_NO_ACK = BIT(4), - IEEE80211_TX_CTL_RATE_CTRL_PROBE = BIT(5), - IEEE80211_TX_CTL_CLEAR_PS_FILT = BIT(6), - IEEE80211_TX_CTL_REQUEUE = BIT(7), - IEEE80211_TX_CTL_FIRST_FRAGMENT = BIT(8), - IEEE80211_TX_CTL_SHORT_PREAMBLE = BIT(9), - IEEE80211_TX_CTL_LONG_RETRY_LIMIT = BIT(10), - IEEE80211_TX_CTL_SEND_AFTER_DTIM = BIT(12), - IEEE80211_TX_CTL_AMPDU = BIT(13), - IEEE80211_TX_CTL_OFDM_HT = BIT(14), - IEEE80211_TX_CTL_GREEN_FIELD = BIT(15), - IEEE80211_TX_CTL_40_MHZ_WIDTH = BIT(16), - IEEE80211_TX_CTL_DUP_DATA = BIT(17), - IEEE80211_TX_CTL_SHORT_GI = BIT(18), - IEEE80211_TX_CTL_INJECTED = BIT(19), - IEEE80211_TX_STAT_TX_FILTERED = BIT(20), - IEEE80211_TX_STAT_ACK = BIT(21), - IEEE80211_TX_STAT_AMPDU = BIT(22), - IEEE80211_TX_STAT_AMPDU_NO_BACK = BIT(23), - IEEE80211_TX_CTL_ASSIGN_SEQ = BIT(24), + IEEE80211_TX_CTL_ASSIGN_SEQ = BIT(1), + IEEE80211_TX_CTL_NO_ACK = BIT(2), + IEEE80211_TX_CTL_CLEAR_PS_FILT = BIT(3), + IEEE80211_TX_CTL_FIRST_FRAGMENT = BIT(4), + IEEE80211_TX_CTL_SEND_AFTER_DTIM = BIT(5), + IEEE80211_TX_CTL_AMPDU = BIT(6), + IEEE80211_TX_CTL_INJECTED = BIT(7), + IEEE80211_TX_STAT_TX_FILTERED = BIT(8), + IEEE80211_TX_STAT_ACK = BIT(9), + IEEE80211_TX_STAT_AMPDU = BIT(10), + IEEE80211_TX_STAT_AMPDU_NO_BACK = BIT(11), + IEEE80211_TX_CTL_RATE_CTRL_PROBE = BIT(12), + + /* XXX: remove this */ + IEEE80211_TX_CTL_REQUEUE = BIT(13), }; +enum mac80211_rate_control_flags { + IEEE80211_TX_RC_USE_RTS_CTS = BIT(0), + IEEE80211_TX_RC_USE_CTS_PROTECT = BIT(1), + IEEE80211_TX_RC_USE_SHORT_PREAMBLE = BIT(2), + + /* rate index is an MCS rate number instead of an index */ + IEEE80211_TX_RC_MCS = BIT(3), + IEEE80211_TX_RC_GREEN_FIELD = BIT(4), + IEEE80211_TX_RC_40_MHZ_WIDTH = BIT(5), + IEEE80211_TX_RC_DUP_DATA = BIT(6), + IEEE80211_TX_RC_SHORT_GI = BIT(7), +}; + + +/* there are 40 bytes if you don't need the rateset to be kept */ +#define IEEE80211_TX_INFO_DRIVER_DATA_SIZE 40 -#define IEEE80211_TX_INFO_DRIVER_DATA_SIZE \ - (sizeof(((struct sk_buff *)0)->cb) - 8) -#define IEEE80211_TX_INFO_DRIVER_DATA_PTRS \ - (IEEE80211_TX_INFO_DRIVER_DATA_SIZE / sizeof(void *)) +/* if you do need the rateset, then you have less space */ +#define IEEE80211_TX_INFO_RATE_DRIVER_DATA_SIZE 24 -/* maximum number of alternate rate retry stages */ -#define IEEE80211_TX_MAX_ALTRATE 3 +/* maximum number of rate stages */ +#define IEEE80211_TX_MAX_RATES 5 /** - * struct ieee80211_tx_altrate - alternate rate selection/status + * struct ieee80211_tx_rate - rate selection/status * - * @rate_idx: rate index to attempt to send with + * @idx: rate index to attempt to send with + * @flags: rate control flags (&enum mac80211_rate_control_flags) * @limit: number of retries before fallback + * + * A value of -1 for @idx indicates an invalid rate and, if used + * in an array of retry rates, that no more rates should be tried. + * + * When used for transmit status reporting, the driver should + * always report the rate along with the flags it used. */ -struct ieee80211_tx_altrate { - s8 rate_idx; - u8 limit; +struct ieee80211_tx_rate { + s8 idx; + u8 count; + u8 flags; }; /** @@ -314,15 +317,12 @@ struct ieee80211_tx_altrate { * it may be NULL. * * @flags: transmit info flags, defined above - * @band: TBD - * @tx_rate_idx: TBD + * @band: the band to transmit on (use for checking for races) * @antenna_sel_tx: antenna to use, 0 for automatic diversity * @control: union for control data * @status: union for status data * @driver_data: array of driver_data pointers * @retry_count: number of retries - * @excessive_retries: set to 1 if the frame was retried many times - * but not acknowledged * @ampdu_ack_len: number of aggregated frames. * relevant only if IEEE80211_TX_STATUS_AMPDU was set. * @ampdu_ack_map: block ack bit map for the aggregation. @@ -333,31 +333,43 @@ struct ieee80211_tx_info { /* common information */ u32 flags; u8 band; - s8 tx_rate_idx; + u8 antenna_sel_tx; - /* 1 byte hole */ + /* 2 byte hole */ union { struct { + union { + /* rate control */ + struct { + struct ieee80211_tx_rate rates[ + IEEE80211_TX_MAX_RATES]; + s8 rts_cts_rate_idx; + }; + /* only needed before rate control */ + unsigned long jiffies; + }; /* NB: vif can be NULL for injected frames */ struct ieee80211_vif *vif; struct ieee80211_key_conf *hw_key; struct ieee80211_sta *sta; - unsigned long jiffies; - s8 rts_cts_rate_idx; - u8 retry_limit; - struct ieee80211_tx_altrate retries[IEEE80211_TX_MAX_ALTRATE]; } control; struct { + struct ieee80211_tx_rate rates[IEEE80211_TX_MAX_RATES]; + u8 ampdu_ack_len; u64 ampdu_ack_map; int ack_signal; - struct ieee80211_tx_altrate retries[IEEE80211_TX_MAX_ALTRATE + 1]; - u8 retry_count; - bool excessive_retries; - u8 ampdu_ack_len; + /* 8 bytes free */ } status; - void *driver_data[IEEE80211_TX_INFO_DRIVER_DATA_PTRS]; + struct { + struct ieee80211_tx_rate driver_rates[ + IEEE80211_TX_MAX_RATES]; + void *rate_driver_data[ + IEEE80211_TX_INFO_RATE_DRIVER_DATA_SIZE / sizeof(void *)]; + }; + void *driver_data[ + IEEE80211_TX_INFO_DRIVER_DATA_SIZE / sizeof(void *)]; }; }; @@ -366,6 +378,41 @@ static inline struct ieee80211_tx_info *IEEE80211_SKB_CB(struct sk_buff *skb) return (struct ieee80211_tx_info *)skb->cb; } +/** + * ieee80211_tx_info_clear_status - clear TX status + * + * @info: The &struct ieee80211_tx_info to be cleared. + * + * When the driver passes an skb back to mac80211, it must report + * a number of things in TX status. This function clears everything + * in the TX status but the rate control information (it does clear + * the count since you need to fill that in anyway). + * + * NOTE: You can only use this function if you do NOT use + * info->driver_data! Use info->rate_driver_data + * instead if you need only the less space that allows. + */ +static inline void +ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) +{ + int i; + + BUILD_BUG_ON(offsetof(struct ieee80211_tx_info, status.rates) != + offsetof(struct ieee80211_tx_info, control.rates)); + BUILD_BUG_ON(offsetof(struct ieee80211_tx_info, status.rates) != + offsetof(struct ieee80211_tx_info, driver_rates)); + BUILD_BUG_ON(offsetof(struct ieee80211_tx_info, status.rates) != 8); + /* clear the rate counts */ + for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) + info->status.rates[i].count = 0; + + BUILD_BUG_ON( + offsetof(struct ieee80211_tx_info, status.ampdu_ack_len) != 23); + memset(&info->status.ampdu_ack_len, 0, + sizeof(struct ieee80211_tx_info) - + offsetof(struct ieee80211_tx_info, status.ampdu_ack_len)); +} + /** * enum mac80211_rx_flags - receive flags @@ -869,8 +916,8 @@ enum ieee80211_hw_flags { * @sta_data_size: size (in bytes) of the drv_priv data area * within &struct ieee80211_sta. * - * @max_altrates: maximum number of alternate rate retry stages - * @max_altrate_tries: maximum number of tries for each stage + * @max_rates: maximum number of alternate rate retry stages + * @max_rate_tries: maximum number of tries for each stage */ struct ieee80211_hw { struct ieee80211_conf conf; @@ -887,8 +934,8 @@ struct ieee80211_hw { u16 ampdu_queues; u16 max_listen_interval; s8 max_signal; - u8 max_altrates; - u8 max_altrate_tries; + u8 max_rates; + u8 max_rate_tries; }; /** @@ -927,9 +974,9 @@ static inline struct ieee80211_rate * ieee80211_get_tx_rate(const struct ieee80211_hw *hw, const struct ieee80211_tx_info *c) { - if (WARN_ON(c->tx_rate_idx < 0)) + if (WARN_ON(c->control.rates[0].idx < 0)) return NULL; - return &hw->wiphy->bands[c->band]->bitrates[c->tx_rate_idx]; + return &hw->wiphy->bands[c->band]->bitrates[c->control.rates[0].idx]; } static inline struct ieee80211_rate * @@ -945,9 +992,9 @@ static inline struct ieee80211_rate * ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw, const struct ieee80211_tx_info *c, int idx) { - if (c->control.retries[idx].rate_idx < 0) + if (c->control.rates[idx + 1].idx < 0) return NULL; - return &hw->wiphy->bands[c->band]->bitrates[c->control.retries[idx].rate_idx]; + return &hw->wiphy->bands[c->band]->bitrates[c->control.rates[idx + 1].idx]; } /** @@ -1840,17 +1887,30 @@ struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_hw *hw, /* Rate control API */ + /** - * struct rate_selection - rate information for/from rate control algorithms - * - * @rate_idx: selected transmission rate index - * @nonerp_idx: Non-ERP rate to use instead if ERP cannot be used - * @probe_idx: rate for probing (or -1) - * @max_rate_idx: maximum rate index that can be used, this is - * input to the algorithm and will be enforced - */ -struct rate_selection { - s8 rate_idx, nonerp_idx, probe_idx, max_rate_idx; + * struct ieee80211_tx_rate_control - rate control information for/from RC algo + * + * @hw: The hardware the algorithm is invoked for. + * @sband: The band this frame is being transmitted on. + * @bss_conf: the current BSS configuration + * @reported_rate: The rate control algorithm can fill this in to indicate + * which rate should be reported to userspace as the current rate and + * used for rate calculations in the mesh network. + * @rts: whether RTS will be used for this frame because it is longer than the + * RTS threshold + * @short_preamble: whether mac80211 will request short-preamble transmission + * if the selected rate supports it + * @max_rate_idx: user-requested maximum rate (not MCS for now) + */ +struct ieee80211_tx_rate_control { + struct ieee80211_hw *hw; + struct ieee80211_supported_band *sband; + struct ieee80211_bss_conf *bss_conf; + struct sk_buff *skb; + struct ieee80211_tx_rate reported_rate; + bool rts, short_preamble; + u8 max_rate_idx; }; struct rate_control_ops { @@ -1869,10 +1929,8 @@ struct rate_control_ops { void (*tx_status)(void *priv, struct ieee80211_supported_band *sband, struct ieee80211_sta *sta, void *priv_sta, struct sk_buff *skb); - void (*get_rate)(void *priv, struct ieee80211_supported_band *sband, - struct ieee80211_sta *sta, void *priv_sta, - struct sk_buff *skb, - struct rate_selection *sel); + void (*get_rate)(void *priv, struct ieee80211_sta *sta, void *priv_sta, + struct ieee80211_tx_rate_control *txrc); void (*add_sta_debugfs)(void *priv, void *priv_sta, struct dentry *dir); -- cgit v1.1 From 50fb2e4572141770380f5919793c6e575fa3474b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 16 Oct 2008 11:21:49 +0200 Subject: mac80211: remove rate_control_clear "Clearing" the rate control algorithm is pointless, none of the algorithms actually uses this operation and it's not even invoked properly for all channel switching. Also, there's no need to since rate control algorithms work per station. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 3741c0a..e0b1ff9 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1917,7 +1917,6 @@ struct rate_control_ops { struct module *module; const char *name; void *(*alloc)(struct ieee80211_hw *hw, struct dentry *debugfsdir); - void (*clear)(void *priv); void (*free)(void *priv); void *(*alloc_sta)(void *priv, struct ieee80211_sta *sta, gfp_t gfp); -- cgit v1.1 From 93da9cc17c5ae8a751886fd4732db89ad5e9bdb9 Mon Sep 17 00:00:00 2001 From: "colin@cozybit.com" Date: Tue, 21 Oct 2008 12:03:48 -0700 Subject: Add nl80211 commands to get and set o11s mesh networking parameters The two new commands are NL80211_CMD_GET_MESH_PARAMS and NL80211_CMD_SET_MESH_PARAMS. There is a new attribute enum, NL80211_ATTR_MESH_PARAMS, which enumerates the various mesh configuration parameters. Moved struct mesh_config from mac80211/ieee80211_i.h to net/cfg80211.h. nl80211_get_mesh_params and nl80211_set_mesh_params unpack the netlink messages and ask the driver to get or set the configuration. This is done via two new function stubs, get_mesh_params and set_mesh_params, in struct cfg80211_ops. Signed-off-by: Colin McCabe Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 86 +++++++++++++++++++++++++++++++++++++++++++++++++ include/net/cfg80211.h | 32 +++++++++++++++++- 2 files changed, 117 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 41720d4..e4cc786 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -106,6 +106,12 @@ * to the the specified ISO/IEC 3166-1 alpha2 country code. The core will * store this as a valid request and then query userspace for it. * + * @NL80211_CMD_GET_MESH_PARAMS: Get mesh networking properties for the + * interface identified by %NL80211_ATTR_IFINDEX + * + * @NL80211_CMD_SET_MESH_PARAMS: Set mesh networking properties for the + * interface identified by %NL80211_ATTR_IFINDEX + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -148,6 +154,9 @@ enum nl80211_commands { NL80211_CMD_SET_REG, NL80211_CMD_REQ_SET_REG, + NL80211_CMD_GET_MESH_PARAMS, + NL80211_CMD_SET_MESH_PARAMS, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -296,6 +305,8 @@ enum nl80211_attrs { NL80211_ATTR_REG_ALPHA2, NL80211_ATTR_REG_RULES, + NL80211_ATTR_MESH_PARAMS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -606,4 +617,79 @@ enum nl80211_mntr_flags { NL80211_MNTR_FLAG_MAX = __NL80211_MNTR_FLAG_AFTER_LAST - 1 }; +/** + * enum nl80211_meshconf_params - mesh configuration parameters + * + * Mesh configuration parameters + * + * @__NL80211_MESHCONF_INVALID: internal use + * + * @NL80211_MESHCONF_RETRY_TIMEOUT: specifies the initial retry timeout in + * millisecond units, used by the Peer Link Open message + * + * @NL80211_MESHCONF_CONFIRM_TIMEOUT: specifies the inital confirm timeout, in + * millisecond units, used by the peer link management to close a peer link + * + * @NL80211_MESHCONF_HOLDING_TIMEOUT: specifies the holding timeout, in + * millisecond units + * + * @NL80211_MESHCONF_MAX_PEER_LINKS: maximum number of peer links allowed + * on this mesh interface + * + * @NL80211_MESHCONF_MAX_RETRIES: specifies the maximum number of peer link + * open retries that can be sent to establish a new peer link instance in a + * mesh + * + * @NL80211_MESHCONF_TTL: specifies the value of TTL field set at a source mesh + * point. + * + * @NL80211_MESHCONF_AUTO_OPEN_PLINKS: whether we should automatically + * open peer links when we detect compatible mesh peers. + * + * @NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES: the number of action frames + * containing a PREQ that an MP can send to a particular destination (path + * target) + * + * @NL80211_MESHCONF_PATH_REFRESH_TIME: how frequently to refresh mesh paths + * (in milliseconds) + * + * @NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT: minimum length of time to wait + * until giving up on a path discovery (in milliseconds) + * + * @NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT: The time (in TUs) for which mesh + * points receiving a PREQ shall consider the forwarding information from the + * root to be valid. (TU = time unit) + * + * @NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL: The minimum interval of time (in + * TUs) during which an MP can send only one action frame containing a PREQ + * reference element + * + * @NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME: The interval of time (in TUs) + * that it takes for an HWMP information element to propagate across the mesh + * + * @NL80211_MESHCONF_ATTR_MAX: highest possible mesh configuration attribute + * + * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use + */ +enum nl80211_meshconf_params { + __NL80211_MESHCONF_INVALID, + NL80211_MESHCONF_RETRY_TIMEOUT, + NL80211_MESHCONF_CONFIRM_TIMEOUT, + NL80211_MESHCONF_HOLDING_TIMEOUT, + NL80211_MESHCONF_MAX_PEER_LINKS, + NL80211_MESHCONF_MAX_RETRIES, + NL80211_MESHCONF_TTL, + NL80211_MESHCONF_AUTO_OPEN_PLINKS, + NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES, + NL80211_MESHCONF_PATH_REFRESH_TIME, + NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT, + NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT, + NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, + NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, + + /* keep last */ + __NL80211_MESHCONF_ATTR_AFTER_LAST, + NL80211_MESHCONF_ATTR_MAX = __NL80211_MESHCONF_ATTR_AFTER_LAST - 1 +}; + #endif /* __LINUX_NL80211_H */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0e85ec3..03e1e88 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -347,6 +347,25 @@ struct ieee80211_regdomain { .flags = reg_flags, \ } +struct mesh_config { + /* Timeouts in ms */ + /* Mesh plink management parameters */ + u16 dot11MeshRetryTimeout; + u16 dot11MeshConfirmTimeout; + u16 dot11MeshHoldingTimeout; + u16 dot11MeshMaxPeerLinks; + u8 dot11MeshMaxRetries; + u8 dot11MeshTTL; + bool auto_open_plinks; + /* HWMP parameters */ + u8 dot11MeshHWMPmaxPREQretries; + u32 path_refresh_time; + u16 min_discovery_timeout; + u32 dot11MeshHWMPactivePathTimeout; + u16 dot11MeshHWMPpreqMinInterval; + u16 dot11MeshHWMPnetDiameterTraversalTime; +}; + /* from net/wireless.h */ struct wiphy; @@ -397,6 +416,12 @@ struct wiphy; * * @change_station: Modify a given station. * + * @get_mesh_params: Put the current mesh parameters into *params + * + * @set_mesh_params: Set mesh parameters. + * The mask is a bitfield which tells us which parameters to + * set, and which to leave alone. + * * @set_mesh_cfg: set mesh parameters (by now, just mesh id) * * @change_bss: Modify parameters for a given BSS. @@ -452,7 +477,12 @@ struct cfg80211_ops { int (*dump_mpath)(struct wiphy *wiphy, struct net_device *dev, int idx, u8 *dst, u8 *next_hop, struct mpath_info *pinfo); - + int (*get_mesh_params)(struct wiphy *wiphy, + struct net_device *dev, + struct mesh_config *conf); + int (*set_mesh_params)(struct wiphy *wiphy, + struct net_device *dev, + const struct mesh_config *nconf, u32 mask); int (*change_bss)(struct wiphy *wiphy, struct net_device *dev, struct bss_parameters *params); }; -- cgit v1.1 From e37d4dffdffb7f834bd28d4ae8e3dcdf07fce508 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Mon, 20 Oct 2008 21:20:27 -0400 Subject: mac80211: fix a few typos in mac80211 kernel doc Correct a handful of errors found while reading the mac80211 book. Signed-off-by: Bob Copeland Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index e0b1ff9..16c8959 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -99,7 +99,7 @@ enum ieee80211_max_queues { * The information provided in this structure is required for QoS * transmit queue configuration. Cf. IEEE 802.11 7.3.2.29. * - * @aifs: arbitration interface space [0..255] + * @aifs: arbitration interframe space [0..255] * @cw_min: minimum contention window [a value of the form * 2^n-1 in the range 1..32767] * @cw_max: maximum contention window [like @cw_min] @@ -950,7 +950,7 @@ static inline void SET_IEEE80211_DEV(struct ieee80211_hw *hw, struct device *dev } /** - * SET_IEEE80211_PERM_ADDR - set the permanenet MAC address for 802.11 hardware + * SET_IEEE80211_PERM_ADDR - set the permanent MAC address for 802.11 hardware * * @hw: the &struct ieee80211_hw to set the MAC address for * @addr: the address to set @@ -1043,7 +1043,7 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw, * This happens everytime the iv16 wraps around (every 65536 packets). The * set_key() call will happen only once for each key (unless the AP did * rekeying), it will not include a valid phase 1 key. The valid phase 1 key is - * provided by udpate_tkip_key only. The trigger that makes mac80211 call this + * provided by update_tkip_key only. The trigger that makes mac80211 call this * handler is software decryption with wrap around of iv16. */ @@ -1177,7 +1177,7 @@ enum ieee80211_ampdu_mlme_action { * Must be implemented. * * @add_interface: Called when a netdevice attached to the hardware is - * enabled. Because it is not called for monitor mode devices, @open + * enabled. Because it is not called for monitor mode devices, @start * and @stop must be implemented. * The driver should perform any initialization it needs before * the device can be enabled. The initial configuration for the @@ -1244,7 +1244,7 @@ enum ieee80211_ampdu_mlme_action { * the stack will not do fragmentation. * * @sta_notify: Notifies low level driver about addition or removal - * of assocaited station or AP. + * of associated station or AP. * * @conf_tx: Configure TX queue parameters (EDCF (aifs, cw_min, cw_max), * bursting) for a hardware TX queue. @@ -1544,7 +1544,7 @@ void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw, * the next beacon frame from the 802.11 code. The low-level is responsible * for calling this function before beacon data is needed (e.g., based on * hardware interrupt). Returned skb is used only once and low-level driver - * is responsible of freeing it. + * is responsible for freeing it. */ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif); -- cgit v1.1 From cf03268e6ed6cfacaa5e32db41ea832c4d10438b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 21 Oct 2008 09:42:38 +0200 Subject: wireless: don't publish __regulatory_hint This function requires an internal lock to be held, so it cannot be published to other modules in the kernel. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/wireless.h | 37 +++++++++---------------------------- 1 file changed, 9 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/net/wireless.h b/include/net/wireless.h index c0aa0fb..061fe50 100644 --- a/include/net/wireless.h +++ b/include/net/wireless.h @@ -341,33 +341,6 @@ ieee80211_get_channel(struct wiphy *wiphy, int freq) } /** - * __regulatory_hint - hint to the wireless core a regulatory domain - * @wiphy: if a driver is providing the hint this is the driver's very - * own &struct wiphy - * @alpha2: the ISO/IEC 3166 alpha2 being claimed the regulatory domain - * should be in. If @rd is set this should be NULL - * @rd: a complete regulatory domain, if passed the caller need not worry - * about freeing it - * - * The Wireless subsystem can use this function to hint to the wireless core - * what it believes should be the current regulatory domain by - * giving it an ISO/IEC 3166 alpha2 country code it knows its regulatory - * domain should be in or by providing a completely build regulatory domain. - * - * Returns -EALREADY if *a regulatory domain* has already been set. Note that - * this could be by another driver. It is safe for drivers to continue if - * -EALREADY is returned, if drivers are not capable of world roaming they - * should not register more channels than they support. Right now we only - * support listening to the first driver hint. If the driver is capable - * of world roaming but wants to respect its own EEPROM mappings for - * specific regulatory domains it should register the @reg_notifier callback - * on the &struct wiphy. Returns 0 if the hint went through fine or through an - * intersection operation. Otherwise a standard error code is returned. - * - */ -extern int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, - const char *alpha2, struct ieee80211_regdomain *rd); -/** * regulatory_hint - driver hint to the wireless core a regulatory domain * @wiphy: the driver's very own &struct wiphy * @alpha2: the ISO/IEC 3166 alpha2 the driver claims its regulatory domain @@ -388,7 +361,15 @@ extern int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, * the wireless core it is unknown. If you pass a built regulatory domain * and we return non zero you are in charge of kfree()'ing the structure. * - * See __regulatory_hint() documentation for possible return values. + * Returns -EALREADY if *a regulatory domain* has already been set. Note that + * this could be by another driver. It is safe for drivers to continue if + * -EALREADY is returned, if drivers are not capable of world roaming they + * should not register more channels than they support. Right now we only + * support listening to the first driver hint. If the driver is capable + * of world roaming but wants to respect its own EEPROM mappings for + * specific regulatory domains it should register the @reg_notifier callback + * on the &struct wiphy. Returns 0 if the hint went through fine or through an + * intersection operation. Otherwise a standard error code is returned. */ extern int regulatory_hint(struct wiphy *wiphy, const char *alpha2, struct ieee80211_regdomain *rd); -- cgit v1.1 From 7e272fcff6f0a32a3d46e600ea5895f6058f4e2d Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Wed, 24 Sep 2008 18:13:14 -0400 Subject: wireless: consolidate on a single escape_essid implementation Signed-off-by: John W. Linville --- include/net/ieee80211.h | 20 -------------------- include/net/lib80211.h | 31 +++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 20 deletions(-) create mode 100644 include/net/lib80211.h (limited to 'include') diff --git a/include/net/ieee80211.h b/include/net/ieee80211.h index 93a56de..dec10c4 100644 --- a/include/net/ieee80211.h +++ b/include/net/ieee80211.h @@ -127,10 +127,6 @@ static inline bool ieee80211_ratelimit_debug(u32 level) } #endif /* CONFIG_IEEE80211_DEBUG */ -/* escape_essid() is intended to be used in debug (and possibly error) - * messages. It should never be used for passing essid to user space. */ -const char *escape_essid(const char *essid, u8 essid_len); - /* * To use the debug system: * @@ -1135,22 +1131,6 @@ static inline void *ieee80211_priv(struct net_device *dev) return ((struct ieee80211_device *)netdev_priv(dev))->priv; } -static inline int ieee80211_is_empty_essid(const char *essid, int essid_len) -{ - /* Single white space is for Linksys APs */ - if (essid_len == 1 && essid[0] == ' ') - return 1; - - /* Otherwise, if the entire essid is 0, we assume it is hidden */ - while (essid_len) { - essid_len--; - if (essid[essid_len] != '\0') - return 0; - } - - return 1; -} - static inline int ieee80211_is_valid_mode(struct ieee80211_device *ieee, int mode) { diff --git a/include/net/lib80211.h b/include/net/lib80211.h new file mode 100644 index 0000000..91a64f3 --- /dev/null +++ b/include/net/lib80211.h @@ -0,0 +1,31 @@ +/* + * lib80211.h -- common bits for IEEE802.11 wireless drivers + * + * Copyright (c) 2008, John W. Linville + * + */ + +#ifndef LIB80211_H +#define LIB80211_H + +/* escape_ssid() is intended to be used in debug (and possibly error) + * messages. It should never be used for passing ssid to user space. */ +const char *escape_ssid(const char *ssid, u8 ssid_len); + +static inline int is_empty_ssid(const char *ssid, int ssid_len) +{ + /* Single white space is for Linksys APs */ + if (ssid_len == 1 && ssid[0] == ' ') + return 1; + + /* Otherwise, if the entire ssid is 0, we assume it is hidden */ + while (ssid_len) { + ssid_len--; + if (ssid[ssid_len] != '\0') + return 0; + } + + return 1; +} + +#endif /* LIB80211_H */ -- cgit v1.1 From c5d3dce875ef055ed9b14f169cc967cc2c8faf1f Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Tue, 30 Sep 2008 17:17:26 -0400 Subject: wireless: remove NETWORK_EMPTY_ESSID flag It is unnecessary and of questionable value. Also remove is_empty_ssid, as it is also unnecessary. Signed-off-by: John W. Linville --- include/net/ieee80211.h | 1 - include/net/lib80211.h | 16 ---------------- 2 files changed, 17 deletions(-) (limited to 'include') diff --git a/include/net/ieee80211.h b/include/net/ieee80211.h index dec10c4..afa34d3 100644 --- a/include/net/ieee80211.h +++ b/include/net/ieee80211.h @@ -729,7 +729,6 @@ struct ieee80211_txb { #define MAX_WPA_IE_LEN 64 -#define NETWORK_EMPTY_ESSID (1<<0) #define NETWORK_HAS_OFDM (1<<1) #define NETWORK_HAS_CCK (1<<2) diff --git a/include/net/lib80211.h b/include/net/lib80211.h index 91a64f3..ce49a30 100644 --- a/include/net/lib80211.h +++ b/include/net/lib80211.h @@ -12,20 +12,4 @@ * messages. It should never be used for passing ssid to user space. */ const char *escape_ssid(const char *ssid, u8 ssid_len); -static inline int is_empty_ssid(const char *ssid, int ssid_len) -{ - /* Single white space is for Linksys APs */ - if (ssid_len == 1 && ssid[0] == ' ') - return 1; - - /* Otherwise, if the entire ssid is 0, we assume it is hidden */ - while (ssid_len) { - ssid_len--; - if (ssid[ssid_len] != '\0') - return 0; - } - - return 1; -} - #endif /* LIB80211_H */ -- cgit v1.1 From 9387b7caf3049168fc97a8a9111af8fe2143af18 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Tue, 30 Sep 2008 20:59:05 -0400 Subject: wireless: use individual buffers for printing ssid values Also change escape_ssid to print_ssid to match print_mac semantics. Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 6 +++--- include/net/lib80211.h | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 64a4abc..b0726e2 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -12,8 +12,8 @@ * published by the Free Software Foundation. */ -#ifndef IEEE80211_H -#define IEEE80211_H +#ifndef LINUX_IEEE80211_H +#define LINUX_IEEE80211_H #include #include @@ -1114,4 +1114,4 @@ static inline u8 *ieee80211_get_DA(struct ieee80211_hdr *hdr) return hdr->addr1; } -#endif /* IEEE80211_H */ +#endif /* LINUX_IEEE80211_H */ diff --git a/include/net/lib80211.h b/include/net/lib80211.h index ce49a30..906d96f 100644 --- a/include/net/lib80211.h +++ b/include/net/lib80211.h @@ -8,8 +8,9 @@ #ifndef LIB80211_H #define LIB80211_H -/* escape_ssid() is intended to be used in debug (and possibly error) +/* print_ssid() is intended to be used in debug (and possibly error) * messages. It should never be used for passing ssid to user space. */ -const char *escape_ssid(const char *ssid, u8 ssid_len); +const char *print_ssid(char *buf, const char *ssid, u8 ssid_len); +#define DECLARE_SSID_BUF(var) char var[32 * 4 + 1] __maybe_unused #endif /* LIB80211_H */ -- cgit v1.1 From 72118015271e6d3852cb9f647efe0987d131adaa Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Tue, 30 Sep 2008 21:43:03 -0400 Subject: wireless: avoid some net/ieee80211.h vs. linux/ieee80211.h conflicts There is quite a lot of overlap in definitions between these headers... Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 3 +- include/net/ieee80211.h | 112 +--------------------------------------------- include/net/lib80211.h | 4 +- 3 files changed, 6 insertions(+), 113 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index b0726e2..aad9919 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -826,8 +826,7 @@ struct ieee80211_ht_info { /* Authentication algorithms */ #define WLAN_AUTH_OPEN 0 #define WLAN_AUTH_SHARED_KEY 1 -#define WLAN_AUTH_FAST_BSS_TRANSITION 2 -#define WLAN_AUTH_LEAP 128 +#define WLAN_AUTH_LEAP 2 #define WLAN_AUTH_CHALLENGE_LEN 128 diff --git a/include/net/ieee80211.h b/include/net/ieee80211.h index afa34d3..738734a 100644 --- a/include/net/ieee80211.h +++ b/include/net/ieee80211.h @@ -28,6 +28,7 @@ #include /* ETH_ALEN */ #include /* ARRAY_SIZE */ #include +#include #define IEEE80211_VERSION "git-1.1.13" @@ -214,94 +215,6 @@ struct ieee80211_snap_hdr { #define WLAN_GET_SEQ_FRAG(seq) ((seq) & IEEE80211_SCTL_FRAG) #define WLAN_GET_SEQ_SEQ(seq) (((seq) & IEEE80211_SCTL_SEQ) >> 4) -/* Authentication algorithms */ -#define WLAN_AUTH_OPEN 0 -#define WLAN_AUTH_SHARED_KEY 1 -#define WLAN_AUTH_LEAP 2 - -#define WLAN_AUTH_CHALLENGE_LEN 128 - -#define WLAN_CAPABILITY_ESS (1<<0) -#define WLAN_CAPABILITY_IBSS (1<<1) -#define WLAN_CAPABILITY_CF_POLLABLE (1<<2) -#define WLAN_CAPABILITY_CF_POLL_REQUEST (1<<3) -#define WLAN_CAPABILITY_PRIVACY (1<<4) -#define WLAN_CAPABILITY_SHORT_PREAMBLE (1<<5) -#define WLAN_CAPABILITY_PBCC (1<<6) -#define WLAN_CAPABILITY_CHANNEL_AGILITY (1<<7) -#define WLAN_CAPABILITY_SPECTRUM_MGMT (1<<8) -#define WLAN_CAPABILITY_QOS (1<<9) -#define WLAN_CAPABILITY_SHORT_SLOT_TIME (1<<10) -#define WLAN_CAPABILITY_DSSS_OFDM (1<<13) - -/* 802.11g ERP information element */ -#define WLAN_ERP_NON_ERP_PRESENT (1<<0) -#define WLAN_ERP_USE_PROTECTION (1<<1) -#define WLAN_ERP_BARKER_PREAMBLE (1<<2) - -/* Status codes */ -enum ieee80211_statuscode { - WLAN_STATUS_SUCCESS = 0, - WLAN_STATUS_UNSPECIFIED_FAILURE = 1, - WLAN_STATUS_CAPS_UNSUPPORTED = 10, - WLAN_STATUS_REASSOC_NO_ASSOC = 11, - WLAN_STATUS_ASSOC_DENIED_UNSPEC = 12, - WLAN_STATUS_NOT_SUPPORTED_AUTH_ALG = 13, - WLAN_STATUS_UNKNOWN_AUTH_TRANSACTION = 14, - WLAN_STATUS_CHALLENGE_FAIL = 15, - WLAN_STATUS_AUTH_TIMEOUT = 16, - WLAN_STATUS_AP_UNABLE_TO_HANDLE_NEW_STA = 17, - WLAN_STATUS_ASSOC_DENIED_RATES = 18, - /* 802.11b */ - WLAN_STATUS_ASSOC_DENIED_NOSHORTPREAMBLE = 19, - WLAN_STATUS_ASSOC_DENIED_NOPBCC = 20, - WLAN_STATUS_ASSOC_DENIED_NOAGILITY = 21, - /* 802.11h */ - WLAN_STATUS_ASSOC_DENIED_NOSPECTRUM = 22, - WLAN_STATUS_ASSOC_REJECTED_BAD_POWER = 23, - WLAN_STATUS_ASSOC_REJECTED_BAD_SUPP_CHAN = 24, - /* 802.11g */ - WLAN_STATUS_ASSOC_DENIED_NOSHORTTIME = 25, - WLAN_STATUS_ASSOC_DENIED_NODSSSOFDM = 26, - /* 802.11i */ - WLAN_STATUS_INVALID_IE = 40, - WLAN_STATUS_INVALID_GROUP_CIPHER = 41, - WLAN_STATUS_INVALID_PAIRWISE_CIPHER = 42, - WLAN_STATUS_INVALID_AKMP = 43, - WLAN_STATUS_UNSUPP_RSN_VERSION = 44, - WLAN_STATUS_INVALID_RSN_IE_CAP = 45, - WLAN_STATUS_CIPHER_SUITE_REJECTED = 46, -}; - -/* Reason codes */ -enum ieee80211_reasoncode { - WLAN_REASON_UNSPECIFIED = 1, - WLAN_REASON_PREV_AUTH_NOT_VALID = 2, - WLAN_REASON_DEAUTH_LEAVING = 3, - WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY = 4, - WLAN_REASON_DISASSOC_AP_BUSY = 5, - WLAN_REASON_CLASS2_FRAME_FROM_NONAUTH_STA = 6, - WLAN_REASON_CLASS3_FRAME_FROM_NONASSOC_STA = 7, - WLAN_REASON_DISASSOC_STA_HAS_LEFT = 8, - WLAN_REASON_STA_REQ_ASSOC_WITHOUT_AUTH = 9, - /* 802.11h */ - WLAN_REASON_DISASSOC_BAD_POWER = 10, - WLAN_REASON_DISASSOC_BAD_SUPP_CHAN = 11, - /* 802.11i */ - WLAN_REASON_INVALID_IE = 13, - WLAN_REASON_MIC_FAILURE = 14, - WLAN_REASON_4WAY_HANDSHAKE_TIMEOUT = 15, - WLAN_REASON_GROUP_KEY_HANDSHAKE_TIMEOUT = 16, - WLAN_REASON_IE_DIFFERENT = 17, - WLAN_REASON_INVALID_GROUP_CIPHER = 18, - WLAN_REASON_INVALID_PAIRWISE_CIPHER = 19, - WLAN_REASON_INVALID_AKMP = 20, - WLAN_REASON_UNSUPP_RSN_VERSION = 21, - WLAN_REASON_INVALID_RSN_IE_CAP = 22, - WLAN_REASON_IEEE8021X_FAILED = 23, - WLAN_REASON_CIPHER_SUITE_REJECTED = 24, -}; - /* Action categories - 802.11h */ enum ieee80211_actioncategories { WLAN_ACTION_SPECTRUM_MGMT = 0, @@ -530,15 +443,6 @@ enum ieee80211_mfie { MFIE_TYPE_QOS_PARAMETER = 222, }; -/* Minimal header; can be used for passing 802.11 frames with sufficient - * information to determine what type of underlying data type is actually - * stored in the data. */ -struct ieee80211_hdr { - __le16 frame_ctl; - __le16 duration_id; - u8 payload[0]; -} __attribute__ ((packed)); - struct ieee80211_hdr_1addr { __le16 frame_ctl; __le16 duration_id; @@ -586,18 +490,6 @@ struct ieee80211_hdr_3addrqos { __le16 qos_ctl; } __attribute__ ((packed)); -struct ieee80211_hdr_4addrqos { - __le16 frame_ctl; - __le16 duration_id; - u8 addr1[ETH_ALEN]; - u8 addr2[ETH_ALEN]; - u8 addr3[ETH_ALEN]; - __le16 seq_ctl; - u8 addr4[ETH_ALEN]; - u8 payload[0]; - __le16 qos_ctl; -} __attribute__ ((packed)); - struct ieee80211_info_element { u8 id; u8 len; @@ -1187,7 +1079,7 @@ static inline int ieee80211_get_hdrlen(u16 fc) static inline u8 *ieee80211_get_payload(struct ieee80211_hdr *hdr) { - switch (ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_ctl))) { + switch (ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_control))) { case IEEE80211_1ADDR_LEN: return ((struct ieee80211_hdr_1addr *)hdr)->payload; case IEEE80211_2ADDR_LEN: diff --git a/include/net/lib80211.h b/include/net/lib80211.h index 906d96f..e1558a1 100644 --- a/include/net/lib80211.h +++ b/include/net/lib80211.h @@ -8,9 +8,11 @@ #ifndef LIB80211_H #define LIB80211_H +#include + /* print_ssid() is intended to be used in debug (and possibly error) * messages. It should never be used for passing ssid to user space. */ const char *print_ssid(char *buf, const char *ssid, u8 ssid_len); -#define DECLARE_SSID_BUF(var) char var[32 * 4 + 1] __maybe_unused +#define DECLARE_SSID_BUF(var) char var[IEEE80211_MAX_SSID_LEN * 4 + 1] __maybe_unused #endif /* LIB80211_H */ -- cgit v1.1 From 8b30b1fe368ab03049435884c11c5c50e4c4ef0b Mon Sep 17 00:00:00 2001 From: Sujith Date: Fri, 24 Oct 2008 09:55:27 +0530 Subject: mac80211: Re-enable aggregation Wireless HW without any dedicated queues for aggregation do not need the ampdu_queues mechanism present right now in mac80211. Since mac80211 is still incomplete wrt TX MQ changes, do not allow aggregation sessions for drivers that set ampdu_queues. This is only an interim hack until Intel fixes the requeue issue. Signed-off-by: Sujith Signed-off-by: Luis Rodriguez Signed-off-by: John W. Linville --- include/linux/skbuff.h | 4 ++++ include/net/mac80211.h | 8 ++++---- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 487e345..a01b6f8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -250,6 +250,9 @@ typedef unsigned char *sk_buff_data_t; * @tc_verd: traffic control verdict * @ndisc_nodetype: router type (from link layer) * @do_not_encrypt: set to prevent encryption of this frame + * @requeue: set to indicate that the wireless core should attempt + * a software retry on this frame if we failed to + * receive an ACK for it * @dma_cookie: a cookie to one of several possible DMA operations * done by skb DMA functions * @secmark: security marking @@ -326,6 +329,7 @@ struct sk_buff { #endif #if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE) __u8 do_not_encrypt:1; + __u8 requeue:1; #endif /* 0/13/14 bit hole */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 16c8959..bba96a2 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -242,7 +242,6 @@ struct ieee80211_bss_conf { * @IEEE80211_TX_CTL_RATE_CTRL_PROBE: internal to mac80211, can be * set by rate control algorithms to indicate probe rate, will * be cleared for fragmented frames (except on the last fragment) - * @IEEE80211_TX_CTL_REQUEUE: REMOVE THIS */ enum mac80211_tx_control_flags { IEEE80211_TX_CTL_REQ_TX_STATUS = BIT(0), @@ -258,9 +257,6 @@ enum mac80211_tx_control_flags { IEEE80211_TX_STAT_AMPDU = BIT(10), IEEE80211_TX_STAT_AMPDU_NO_BACK = BIT(11), IEEE80211_TX_CTL_RATE_CTRL_PROBE = BIT(12), - - /* XXX: remove this */ - IEEE80211_TX_CTL_REQUEUE = BIT(13), }; enum mac80211_rate_control_flags { @@ -847,6 +843,9 @@ enum ieee80211_tkip_key_type { * @IEEE80211_HW_SPECTRUM_MGMT: * Hardware supports spectrum management defined in 802.11h * Measurement, Channel Switch, Quieting, TPC + * + * @IEEE80211_HW_AMPDU_AGGREGATION: + * Hardware supports 11n A-MPDU aggregation. */ enum ieee80211_hw_flags { IEEE80211_HW_RX_INCLUDES_FCS = 1<<1, @@ -858,6 +857,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_SIGNAL_DBM = 1<<7, IEEE80211_HW_NOISE_DBM = 1<<8, IEEE80211_HW_SPECTRUM_MGMT = 1<<9, + IEEE80211_HW_AMPDU_AGGREGATION = 1<<10, }; /** -- cgit v1.1 From d2372b315289aec9f565a855023c40654a5bff68 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 24 Oct 2008 20:32:20 +0200 Subject: wireless: make regdom passing semantics simpler The regdom struct is given to the core, so it might as well free it in error conditions. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/wireless.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/wireless.h b/include/net/wireless.h index 061fe50..6e3ea01 100644 --- a/include/net/wireless.h +++ b/include/net/wireless.h @@ -358,8 +358,7 @@ ieee80211_get_channel(struct wiphy *wiphy, int freq) * for a regulatory domain structure for the respective country. If * a regulatory domain is build and passed you should set the alpha2 * if possible, otherwise set it to the special value of "99" which tells - * the wireless core it is unknown. If you pass a built regulatory domain - * and we return non zero you are in charge of kfree()'ing the structure. + * the wireless core it is unknown. * * Returns -EALREADY if *a regulatory domain* has already been set. Note that * this could be by another driver. It is safe for drivers to continue if -- cgit v1.1 From be3d48106c1e5d075784e5e67928a6b5ffc0f3b6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 24 Oct 2008 20:32:21 +0200 Subject: wireless: remove struct regdom hinting The code needs to be split out and cleaned up, so as a first step remove the capability, to add it back in a subsequent patch as a separate function. Also remove the publically facing return value of the function and the wiphy argument. A number of internal functions go from being generic helpers to just being used for alpha2 setting. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/wireless.h | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/net/wireless.h b/include/net/wireless.h index 6e3ea01..41294c5 100644 --- a/include/net/wireless.h +++ b/include/net/wireless.h @@ -342,34 +342,19 @@ ieee80211_get_channel(struct wiphy *wiphy, int freq) /** * regulatory_hint - driver hint to the wireless core a regulatory domain - * @wiphy: the driver's very own &struct wiphy + * @wiphy: the wireless device giving the hint (used only for reporting + * conflicts) * @alpha2: the ISO/IEC 3166 alpha2 the driver claims its regulatory domain * should be in. If @rd is set this should be NULL. Note that if you * set this to NULL you should still set rd->alpha2 to some accepted * alpha2. - * @rd: a complete regulatory domain provided by the driver. If passed - * the driver does not need to worry about freeing it. * * Wireless drivers can use this function to hint to the wireless core * what it believes should be the current regulatory domain by * giving it an ISO/IEC 3166 alpha2 country code it knows its regulatory * domain should be in or by providing a completely build regulatory domain. * If the driver provides an ISO/IEC 3166 alpha2 userspace will be queried - * for a regulatory domain structure for the respective country. If - * a regulatory domain is build and passed you should set the alpha2 - * if possible, otherwise set it to the special value of "99" which tells - * the wireless core it is unknown. - * - * Returns -EALREADY if *a regulatory domain* has already been set. Note that - * this could be by another driver. It is safe for drivers to continue if - * -EALREADY is returned, if drivers are not capable of world roaming they - * should not register more channels than they support. Right now we only - * support listening to the first driver hint. If the driver is capable - * of world roaming but wants to respect its own EEPROM mappings for - * specific regulatory domains it should register the @reg_notifier callback - * on the &struct wiphy. Returns 0 if the hint went through fine or through an - * intersection operation. Otherwise a standard error code is returned. + * for a regulatory domain structure for the respective country. */ -extern int regulatory_hint(struct wiphy *wiphy, - const char *alpha2, struct ieee80211_regdomain *rd); +extern void regulatory_hint(struct wiphy *wiphy, const char *alpha2); #endif /* __NET_WIRELESS_H */ -- cgit v1.1 From e25cf4a6945e0f859186231be7164ba565412e0a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 Oct 2008 08:51:20 +0200 Subject: mac80211: fix two kernel-doc warnings One parameter wasn't described and one I forgot to update when renaming it; also update TBDs in sta_info. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index bba96a2..0b983be 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -287,7 +287,7 @@ enum mac80211_rate_control_flags { * * @idx: rate index to attempt to send with * @flags: rate control flags (&enum mac80211_rate_control_flags) - * @limit: number of retries before fallback + * @count: number of tries in this rate before going to the next rate * * A value of -1 for @idx indicates an invalid rate and, if used * in an array of retry rates, that no more rates should be tried. @@ -1902,6 +1902,8 @@ struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_hw *hw, * @short_preamble: whether mac80211 will request short-preamble transmission * if the selected rate supports it * @max_rate_idx: user-requested maximum rate (not MCS for now) + * @skb: the skb that will be transmitted, the control information in it needs + * to be filled in */ struct ieee80211_tx_rate_control { struct ieee80211_hw *hw; -- cgit v1.1 From 48148938b494cd57029a43c758e9972307a31d2a Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Mon, 3 Nov 2008 17:08:56 -0800 Subject: IPVS: Remove supports_ipv6 scheduler flag Remove the 'supports_ipv6' scheduler flag since all schedulers now support IPv6. Signed-off-by: Julius Volz Signed-off-by: David S. Miller --- include/net/ip_vs.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index fc63353..8f6abf4 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -503,9 +503,6 @@ struct ip_vs_scheduler { char *name; /* scheduler name */ atomic_t refcnt; /* reference counter */ struct module *module; /* THIS_MODULE/NULL */ -#ifdef CONFIG_IP_VS_IPV6 - int supports_ipv6; /* scheduler has IPv6 support */ -#endif /* scheduler initializing service */ int (*init_service)(struct ip_vs_service *svc); -- cgit v1.1 From 6cf3f41e6c08bca6641a695449791c38a25f35ff Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Mon, 3 Nov 2008 18:16:50 -0800 Subject: bonding, net: Move last_rx update into bonding recv logic The only user of the net_device->last_rx field is bonding. This patch adds a conditional update of last_rx to the bonding special logic in skb_bond_should_drop, causing last_rx to only be updated when the ARP monitor is running. This frees network device drivers from the necessity of updating last_rx, which can have cache line thrash issues. Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller --- include/linux/if.h | 1 + include/linux/netdevice.h | 32 ++++++++++++++++++-------------- 2 files changed, 19 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/if.h b/include/linux/if.h index 6524684..2a6e296 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -65,6 +65,7 @@ #define IFF_BONDING 0x20 /* bonding master or slave */ #define IFF_SLAVE_NEEDARP 0x40 /* need ARPs for validation */ #define IFF_ISATAP 0x80 /* ISATAP interface (RFC4214) */ +#define IFF_MASTER_ARPMON 0x100 /* bonding master, ARP mon in use */ #define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_PROTO 0x0002 diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9d77b1d..f1b0dbe 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1742,22 +1742,26 @@ static inline int skb_bond_should_drop(struct sk_buff *skb) struct net_device *dev = skb->dev; struct net_device *master = dev->master; - if (master && - (dev->priv_flags & IFF_SLAVE_INACTIVE)) { - if ((dev->priv_flags & IFF_SLAVE_NEEDARP) && - skb->protocol == __constant_htons(ETH_P_ARP)) - return 0; - - if (master->priv_flags & IFF_MASTER_ALB) { - if (skb->pkt_type != PACKET_BROADCAST && - skb->pkt_type != PACKET_MULTICAST) + if (master) { + if (master->priv_flags & IFF_MASTER_ARPMON) + dev->last_rx = jiffies; + + if (dev->priv_flags & IFF_SLAVE_INACTIVE) { + if ((dev->priv_flags & IFF_SLAVE_NEEDARP) && + skb->protocol == __constant_htons(ETH_P_ARP)) return 0; - } - if (master->priv_flags & IFF_MASTER_8023AD && - skb->protocol == __constant_htons(ETH_P_SLOW)) - return 0; - return 1; + if (master->priv_flags & IFF_MASTER_ALB) { + if (skb->pkt_type != PACKET_BROADCAST && + skb->pkt_type != PACKET_MULTICAST) + return 0; + } + if (master->priv_flags & IFF_MASTER_8023AD && + skb->protocol == __constant_htons(ETH_P_SLOW)) + return 0; + + return 1; + } } return 0; } -- cgit v1.1 From 5f7340eff8f68f41b7e5c7ad47ec4cd1ea1afb40 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Tue, 4 Nov 2008 14:21:08 +0100 Subject: netfilter: xt_NFLOG: don't call nf_log_packet in NFLOG module. This patch modifies xt_NFLOG to suppress the call to nf_log_packet() function. The call of this wrapper in xt_NFLOG was causing NFLOG to use the first initialized module. Thus, if ipt_ULOG is loaded before nfnetlink_log all NFLOG rules are treated as plain LOG rules. Signed-off-by: Eric Leblond Signed-off-by: Patrick McHardy --- include/net/netfilter/nfnetlink_log.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 include/net/netfilter/nfnetlink_log.h (limited to 'include') diff --git a/include/net/netfilter/nfnetlink_log.h b/include/net/netfilter/nfnetlink_log.h new file mode 100644 index 0000000..9b67f94 --- /dev/null +++ b/include/net/netfilter/nfnetlink_log.h @@ -0,0 +1,14 @@ +#ifndef _KER_NFNETLINK_LOG_H +#define _KER_NFNETLINK_LOG_H + +void +nfulnl_log_packet(unsigned int pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *li_user, + const char *prefix); + +#endif /* _KER_NFNETLINK_LOG_H */ + -- cgit v1.1 From 511061e2dd1b84bb21bb97c9216a19606c29ac02 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 4 Nov 2008 14:22:55 +0100 Subject: netfilter: netns ebtables: part 1 * propagate netns from userspace, register table in passed netns * remporarily register every ebt_table in init_net P. S.: one needs to add ".netns_ok = 1" to igmp_protocol to test with ebtables(8) in netns. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge/ebtables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index d45e29c..624e788 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -300,7 +300,7 @@ struct ebt_table #define EBT_ALIGN(s) (((s) + (__alignof__(struct ebt_replace)-1)) & \ ~(__alignof__(struct ebt_replace)-1)) -extern int ebt_register_table(struct ebt_table *table); +extern int ebt_register_table(struct net *net, struct ebt_table *table); extern void ebt_unregister_table(struct ebt_table *table); extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, -- cgit v1.1 From 6beceee5aa2cb94c4ae9f0784c7d3135d343f5b5 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 4 Nov 2008 14:27:15 +0100 Subject: netfilter: netns ebtables: part 2 * return ebt_table from ebt_register_table(), module code will save it into per-netns data for unregistration * duplicate ebt_table at the very beginning of registration -- it's added into list, so one ebt_table wouldn't end up in many lists (and each netns has different one) * introduce underscored tables in individial modules, this is temporary to not break bisection. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge/ebtables.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 624e788..e40ddb9 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -300,7 +300,8 @@ struct ebt_table #define EBT_ALIGN(s) (((s) + (__alignof__(struct ebt_replace)-1)) & \ ~(__alignof__(struct ebt_replace)-1)) -extern int ebt_register_table(struct net *net, struct ebt_table *table); +extern struct ebt_table *ebt_register_table(struct net *net, + struct ebt_table *table); extern void ebt_unregister_table(struct ebt_table *table); extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, -- cgit v1.1 From 8157e6d16af86e4a8d31a035db7be02a8a171c26 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 4 Nov 2008 14:29:03 +0100 Subject: netfilter: netns ebtables: ebtable_broute in netns Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netns/x_tables.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h index b809397..055e684 100644 --- a/include/net/netns/x_tables.h +++ b/include/net/netns/x_tables.h @@ -4,7 +4,10 @@ #include #include +struct ebt_table; + struct netns_xt { struct list_head tables[NFPROTO_NUMPROTO]; + struct ebt_table *broute_table; }; #endif -- cgit v1.1 From 4aad10938d4e4e8364b664cd5420c3bfeb9b679b Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 4 Nov 2008 14:29:58 +0100 Subject: netfilter: netns ebtables: ebtable_filter in netns Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netns/x_tables.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h index 055e684..d258e16 100644 --- a/include/net/netns/x_tables.h +++ b/include/net/netns/x_tables.h @@ -9,5 +9,6 @@ struct ebt_table; struct netns_xt { struct list_head tables[NFPROTO_NUMPROTO]; struct ebt_table *broute_table; + struct ebt_table *frame_filter; }; #endif -- cgit v1.1 From b71b30a626fd0e43c825a05036e7a2c3f377a563 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 4 Nov 2008 14:30:46 +0100 Subject: netfilter: netns ebtables: ebtable_nat in netns Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netns/x_tables.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h index d258e16..9554a64 100644 --- a/include/net/netns/x_tables.h +++ b/include/net/netns/x_tables.h @@ -10,5 +10,6 @@ struct netns_xt { struct list_head tables[NFPROTO_NUMPROTO]; struct ebt_table *broute_table; struct ebt_table *frame_filter; + struct ebt_table *frame_nat; }; #endif -- cgit v1.1 From d5f642384e9da75393160350f75bbb9a527f7c58 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 4 Nov 2008 14:45:58 -0800 Subject: net: #ifdef ->sk_security Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/sock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 941ad7c..08291c1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -271,7 +271,9 @@ struct sock { struct sk_buff *sk_send_head; __u32 sk_sndmsg_off; int sk_write_pending; +#ifdef CONFIG_SECURITY void *sk_security; +#endif __u32 sk_mark; /* XXX 4 bytes hole on 64 bit */ void (*sk_state_change)(struct sock *sk); -- cgit v1.1 From 7d43d1a0f2cf535167ec7247f110a1f85cecac43 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Tue, 4 Nov 2008 23:43:47 -0800 Subject: dccp: Implement lookup table for feature-negotiation information A lookup table for feature-negotiation information, extracted from RFC 4340/42, is provided by this patch. All currently known features can be found in this table, along with their feature location, their default value, and type. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/dccp.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 6080449..3978aff 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -176,19 +176,20 @@ enum { }; /* DCCP features (RFC 4340 section 6.4) */ -enum { +enum dccp_feature_numbers { DCCPF_RESERVED = 0, DCCPF_CCID = 1, - DCCPF_SHORT_SEQNOS = 2, /* XXX: not yet implemented */ + DCCPF_SHORT_SEQNOS = 2, DCCPF_SEQUENCE_WINDOW = 3, - DCCPF_ECN_INCAPABLE = 4, /* XXX: not yet implemented */ + DCCPF_ECN_INCAPABLE = 4, DCCPF_ACK_RATIO = 5, DCCPF_SEND_ACK_VECTOR = 6, DCCPF_SEND_NDP_COUNT = 7, DCCPF_MIN_CSUM_COVER = 8, - DCCPF_DATA_CHECKSUM = 9, /* XXX: not yet implemented */ + DCCPF_DATA_CHECKSUM = 9, /* 10-127 reserved */ DCCPF_MIN_CCID_SPECIFIC = 128, + DCCPF_SEND_LEV_RATE = 192, /* RFC 4342, sec. 8.4 */ DCCPF_MAX_CCID_SPECIFIC = 255, }; -- cgit v1.1 From ac75773c2742d82cbcb078708df406e9017224b7 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Tue, 4 Nov 2008 23:55:49 -0800 Subject: dccp: Per-socket initialisation of feature negotiation This provides feature-negotiation initialisation for both DCCP sockets and DCCP request_sockets, to support feature negotiation during connection setup. It also resolves a FIXME regarding the congestion control initialisation. Thanks to Wei Yongjun for help with the IPv6 side of this patch. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: David S. Miller --- include/linux/dccp.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 3978aff..484b8a1 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -412,6 +412,7 @@ extern void dccp_minisock_init(struct dccp_minisock *dmsk); * @dreq_iss: initial sequence number sent on the Response (RFC 4340, 7.1) * @dreq_isr: initial sequence number received on the Request * @dreq_service: service code present on the Request (there is just one) + * @dreq_featneg: feature negotiation options for this connection * The following two fields are analogous to the ones in dccp_sock: * @dreq_timestamp_echo: last received timestamp to echo (13.1) * @dreq_timestamp_echo: the time of receiving the last @dreq_timestamp_echo @@ -421,6 +422,7 @@ struct dccp_request_sock { __u64 dreq_iss; __u64 dreq_isr; __be32 dreq_service; + struct list_head dreq_featneg; __u32 dreq_timestamp_echo; __u32 dreq_timestamp_time; }; @@ -498,6 +500,7 @@ struct dccp_ackvec; * @dccps_mss_cache - current value of MSS (path MTU minus header sizes) * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4) * @dccps_minisock - associated minisock (accessed via dccp_msk) + * @dccps_featneg - tracks feature-negotiation state (mostly during handshake) * @dccps_hc_rx_ackvec - rx half connection ack vector * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection) * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection) @@ -535,6 +538,7 @@ struct dccp_sock { __u64 dccps_ndp_count:48; unsigned long dccps_rate_last; struct dccp_minisock dccps_minisock; + struct list_head dccps_featneg; struct dccp_ackvec *dccps_hc_rx_ackvec; struct ccid *dccps_hc_rx_ccid; struct ccid *dccps_hc_tx_ccid; -- cgit v1.1 From ae33bc40c0d96d02f51a996482ea7e41c5152695 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 5 Nov 2008 16:00:02 -0800 Subject: net: Guaranetee the proper ordering of the loopback device. I was recently hunting a bug that occurred in network namespace cleanup. In looking at the code it became apparrent that we have and will continue to have cases where if we have anything going on in a network namespace there will be assumptions that the loopback device is present. Things like sending igmp unsubscribe messages when we bring down network devices invokes the routing code which assumes that at least the loopback driver is present. Therefore to avoid magic initcall ordering hackery that is hard to follow and hard to get right insert a call to register the loopback device directly from net_dev_init(). This guarantes that the loopback device is the first device registered and the last network device to go away. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f1b0dbe..12d7f44 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1766,6 +1766,7 @@ static inline int skb_bond_should_drop(struct sk_buff *skb) return 0; } +extern struct pernet_operations __net_initdata loopback_net_ops; #endif /* __KERNEL__ */ #endif /* _LINUX_DEV_H */ -- cgit v1.1 From 61c9eaf90081cbe6dc4f389e0056bff76eca19ec Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Wed, 5 Nov 2008 16:02:34 -0800 Subject: pkt_sched: Fix qdisc len in qdisc_peek_dequeued() A packet dequeued and stored as gso_skb in qdisc_peek_dequeued() should be seen as part of the queue for sch->q.qlen queries until it's really dequeued with qdisc_dequeue_peeked(), so qlen needs additional updating in these functions. (Updating qstats.backlog shouldn't matter here.) Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- include/net/sch_generic.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 9dcb5bf..64ae1ba 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -442,8 +442,12 @@ static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch) static inline struct sk_buff *qdisc_peek_dequeued(struct Qdisc *sch) { /* we can reuse ->gso_skb because peek isn't called for root qdiscs */ - if (!sch->gso_skb) + if (!sch->gso_skb) { sch->gso_skb = sch->dequeue(sch); + if (sch->gso_skb) + /* it's still part of the queue */ + sch->q.qlen++; + } return sch->gso_skb; } @@ -453,10 +457,12 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) { struct sk_buff *skb = sch->gso_skb; - if (skb) + if (skb) { sch->gso_skb = NULL; - else + sch->q.qlen--; + } else { skb = sch->dequeue(sch); + } return skb; } -- cgit v1.1 From 305d552accae6afb859c493ebc7d98ca3371dae2 Mon Sep 17 00:00:00 2001 From: Brian Haley Date: Tue, 4 Nov 2008 17:51:14 -0800 Subject: bonding: send IPv6 neighbor advertisement on failover This patch adds better IPv6 failover support for bonding devices, especially when in active-backup mode and there are only IPv6 addresses configured, as reported by Alex Sidorenko. - Creates a new file, net/drivers/bonding/bond_ipv6.c, for the IPv6-specific routines. Both regular bonds and VLANs over bonds are supported. - Adds a new tunable, num_unsol_na, to limit the number of unsolicited IPv6 Neighbor Advertisements that are sent on a failover event. Default is 1. - Creates two new IPv6 neighbor discovery functions: ndisc_build_skb() ndisc_send_skb() These were required to support VLANs since we have to be able to add the VLAN id to the skb since ndisc_send_na() and friends shouldn't be asked to do this. These two routines are basically __ndisc_send() split into two pieces, in a slightly different order. - Updates Documentation/networking/bonding.txt and bumps the rev of bond support to 3.4.0. On failover, this new code will generate one packet: - An unsolicited IPv6 Neighbor Advertisement, which helps the switch learn that the address has moved to the new slave. Testing has shown that sending just the NA results in pretty good behavior when in active-back mode, I saw no lost ping packets for example. Signed-off-by: Brian Haley Signed-off-by: Jay Vosburgh Signed-off-by: Jeff Garzik --- include/net/ndisc.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 11dd013..ce532f2 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -108,6 +108,20 @@ extern void ndisc_send_redirect(struct sk_buff *skb, extern int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int dir); +extern struct sk_buff *ndisc_build_skb(struct net_device *dev, + const struct in6_addr *daddr, + const struct in6_addr *saddr, + struct icmp6hdr *icmp6h, + const struct in6_addr *target, + int llinfo); + +extern void ndisc_send_skb(struct sk_buff *skb, + struct net_device *dev, + struct neighbour *neigh, + const struct in6_addr *daddr, + const struct in6_addr *saddr, + struct icmp6hdr *icmp6h); + /* -- cgit v1.1 From fd9abb3d97c2ab883e4732ec1214fe64190236e7 Mon Sep 17 00:00:00 2001 From: Steve Glendinning Date: Wed, 5 Nov 2008 00:35:37 +0000 Subject: SMSC LAN911x and LAN921x vendor driver Attached is a driver for SMSC's LAN911x and LAN921x families of embedded ethernet controllers. There is an existing smc911x driver in the tree; this is intended to replace it. Dustin McIntire (the author of the smc911x driver) has expressed his support for switching to this driver. This driver contains workarounds for all known hardware issues, and has been tested on all flavours of the chip on multiple architectures. This driver now uses phylib, so this patch also adds support for the device's internal phy Signed-off-by: Steve Glendinning Signed-off-by: Bahadir Balban Signed-off-by: Dustin Mcintire Signed-off-by: Bill Gatliff Signed-off-by: Jeff Garzik --- include/linux/smsc911x.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 include/linux/smsc911x.h (limited to 'include') diff --git a/include/linux/smsc911x.h b/include/linux/smsc911x.h new file mode 100644 index 0000000..47c4ffd --- /dev/null +++ b/include/linux/smsc911x.h @@ -0,0 +1,42 @@ +/*************************************************************************** + * + * Copyright (C) 2004-2008 SMSC + * Copyright (C) 2005-2008 ARM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + ***************************************************************************/ +#ifndef __LINUX_SMSC911X_H__ +#define __LINUX_SMSC911X_H__ + +#include + +/* platform_device configuration data, should be assigned to + * the platform_device's dev.platform_data */ +struct smsc911x_platform_config { + unsigned int irq_polarity; + unsigned int irq_type; + phy_interface_t phy_interface; +}; + +/* Constants for platform_device irq polarity configuration */ +#define SMSC911X_IRQ_POLARITY_ACTIVE_LOW 0 +#define SMSC911X_IRQ_POLARITY_ACTIVE_HIGH 1 + +/* Constants for platform_device irq type configuration */ +#define SMSC911X_IRQ_TYPE_OPEN_DRAIN 0 +#define SMSC911X_IRQ_TYPE_PUSH_PULL 1 + +#endif /* __LINUX_SMSC911X_H__ */ -- cgit v1.1 From 3d8160b1493bcadca74fbb635d79b3928b8999cf Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 7 Nov 2008 22:52:14 -0800 Subject: Revert "net: Guaranetee the proper ordering of the loopback device." This reverts commit ae33bc40c0d96d02f51a996482ea7e41c5152695. --- include/linux/netdevice.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 12d7f44..f1b0dbe 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1766,7 +1766,6 @@ static inline int skb_bond_should_drop(struct sk_buff *skb) return 0; } -extern struct pernet_operations __net_initdata loopback_net_ops; #endif /* __KERNEL__ */ #endif /* _LINUX_DEV_H */ -- cgit v1.1 From 505d4f73dda9e20d59da05008f1f5eb432613e71 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 7 Nov 2008 22:54:20 -0800 Subject: net: Guaranetee the proper ordering of the loopback device. v2 I was recently hunting a bug that occurred in network namespace cleanup. In looking at the code it became apparrent that we have and will continue to have cases where if we have anything going on in a network namespace there will be assumptions that the loopback device is present. Things like sending igmp unsubscribe messages when we bring down network devices invokes the routing code which assumes that at least the loopback driver is present. Therefore to avoid magic initcall ordering hackery that is hard to follow and hard to get right insert a call to register the loopback device directly from net_dev_init(). This guarantes that the loopback device is the first device registered and the last network device to go away. But do it carefully so we register the loopback device after we clear dev_boot_phase. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f1b0dbe..12d7f44 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1766,6 +1766,7 @@ static inline int skb_bond_should_drop(struct sk_buff *skb) return 0; } +extern struct pernet_operations __net_initdata loopback_net_ops; #endif /* __KERNEL__ */ #endif /* _LINUX_DEV_H */ -- cgit v1.1 From f400923735ecbb67cbe4a3606c9479f694754f51 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 7 Nov 2008 22:56:00 -0800 Subject: pkt_sched: Control group classifier The classifier should cover the most common use case and will work without any special configuration. The principle of the classifier is to directly access the task_struct via get_current(). In order for this to work, classification requests from softirqs must be ignored. This is not a problem because the vast majority of packets in softirq context are not assigned to a task anyway. For this to work, a mechanism is needed to trace softirq context. This repost goes back to the method of relying on the number of nested bh disable calls for the sake of not adding too much complexity and the option to come up with something more reliable if actually needed. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/cgroup_subsys.h | 6 ++++++ include/linux/pkt_cls.h | 14 ++++++++++++++ 2 files changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index 9c22396..9c8d31b 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -54,3 +54,9 @@ SUBSYS(freezer) #endif /* */ + +#ifdef CONFIG_NET_CLS_CGROUP +SUBSYS(net_cls) +#endif + +/* */ diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h index 7cf7824..e6aa848 100644 --- a/include/linux/pkt_cls.h +++ b/include/linux/pkt_cls.h @@ -394,6 +394,20 @@ enum #define TCA_BASIC_MAX (__TCA_BASIC_MAX - 1) + +/* Cgroup classifier */ + +enum +{ + TCA_CGROUP_UNSPEC, + TCA_CGROUP_ACT, + TCA_CGROUP_POLICE, + TCA_CGROUP_EMATCHES, + __TCA_CGROUP_MAX, +}; + +#define TCA_CGROUP_MAX (__TCA_CGROUP_MAX - 1) + /* Extended Matches */ struct tcf_ematch_tree_hdr -- cgit v1.1 From 1239cd58d237fa6ad501acaec8776262a5784ec8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 28 Oct 2008 11:12:57 +0100 Subject: wireless: move mesh config length constant This is a constant from the 802.11 specification. Signed-off-by: Johannes Berg Cc: Javier Cardona Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index aad9919..9dc288b 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -97,7 +97,10 @@ #define IEEE80211_MAX_FRAME_LEN 2352 #define IEEE80211_MAX_SSID_LEN 32 + #define IEEE80211_MAX_MESH_ID_LEN 32 +#define IEEE80211_MESH_CONFIG_LEN 19 + #define IEEE80211_QOS_CTL_LEN 2 #define IEEE80211_QOS_CTL_TID_MASK 0x000F #define IEEE80211_QOS_CTL_TAG1D_MASK 0x0007 -- cgit v1.1 From 41bb73eeac5ff5fb217257ba33b654747b3abf11 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 29 Oct 2008 01:09:37 +0100 Subject: mac80211: remove SSID driver code Remove the SSID from the driver API since now there is no driver that requires knowing the SSID and I think it's unlikely that any hardware design that does require the SSID will play well with mac80211. This also removes support for setting the SSID in master mode which will require a patch to hostapd to not try. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 0b983be..af2ec6f 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -616,14 +616,12 @@ struct ieee80211_if_init_conf { * enum ieee80211_if_conf_change - interface config change flags * * @IEEE80211_IFCC_BSSID: The BSSID changed. - * @IEEE80211_IFCC_SSID: The SSID changed. * @IEEE80211_IFCC_BEACON: The beacon for this interface changed * (currently AP and MESH only), use ieee80211_beacon_get(). */ enum ieee80211_if_conf_change { IEEE80211_IFCC_BSSID = BIT(0), - IEEE80211_IFCC_SSID = BIT(1), - IEEE80211_IFCC_BEACON = BIT(2), + IEEE80211_IFCC_BEACON = BIT(1), }; /** @@ -631,11 +629,6 @@ enum ieee80211_if_conf_change { * * @changed: parameters that have changed, see &enum ieee80211_if_conf_change. * @bssid: BSSID of the network we are associated to/creating. - * @ssid: used (together with @ssid_len) by drivers for hardware that - * generate beacons independently. The pointer is valid only during the - * config_interface() call, so copy the value somewhere if you need - * it. - * @ssid_len: length of the @ssid field. * * This structure is passed to the config_interface() callback of * &struct ieee80211_hw. @@ -643,8 +636,6 @@ enum ieee80211_if_conf_change { struct ieee80211_if_conf { u32 changed; u8 *bssid; - u8 *ssid; - size_t ssid_len; }; /** -- cgit v1.1 From 8469cdef1f123e2e3e56645f1ac26c7cfb333d9c Mon Sep 17 00:00:00 2001 From: Sujith Date: Wed, 29 Oct 2008 10:19:28 +0530 Subject: mac80211: Add a new event in ieee80211_ampdu_mlme_action Send a notification to the driver on succesful reception of an ADDBA response, add IEEE80211_AMPDU_TX_RESUME for this purpose. Signed-off-by: Sujith Signed-off-by: John W. Linville --- include/net/mac80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index af2ec6f..5385600 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1127,12 +1127,14 @@ enum ieee80211_filter_flags { * @IEEE80211_AMPDU_RX_STOP: stop Rx aggregation * @IEEE80211_AMPDU_TX_START: start Tx aggregation * @IEEE80211_AMPDU_TX_STOP: stop Tx aggregation + * @IEEE80211_AMPDU_TX_RESUME: resume TX aggregation */ enum ieee80211_ampdu_mlme_action { IEEE80211_AMPDU_RX_START, IEEE80211_AMPDU_RX_STOP, IEEE80211_AMPDU_TX_START, IEEE80211_AMPDU_TX_STOP, + IEEE80211_AMPDU_TX_RESUME, }; /** -- cgit v1.1 From bd815252720e4b667d9946d050d003ec89bda099 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 29 Oct 2008 20:00:45 +0100 Subject: wireless: implement basic rate helper function This adds a helper function that, given a bitmap of basic rates and a bitrate returns the response rate for this rate. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/wireless.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/net/wireless.h b/include/net/wireless.h index 41294c5..17d4b58 100644 --- a/include/net/wireless.h +++ b/include/net/wireless.h @@ -341,6 +341,22 @@ ieee80211_get_channel(struct wiphy *wiphy, int freq) } /** + * ieee80211_get_response_rate - get basic rate for a given rate + * + * @sband: the band to look for rates in + * @basic_rates: bitmap of basic rates + * @bitrate: the bitrate for which to find the basic rate + * + * This function returns the basic rate corresponding to a given + * bitrate, that is the next lower bitrate contained in the basic + * rate map, which is, for this function, given as a bitmap of + * indices of rates in the band's bitrate table. + */ +struct ieee80211_rate * +ieee80211_get_response_rate(struct ieee80211_supported_band *sband, + u64 basic_rates, int bitrate); + +/** * regulatory_hint - driver hint to the wireless core a regulatory domain * @wiphy: the wireless device giving the hint (used only for reporting * conflicts) -- cgit v1.1 From 90c97a040d6b08cc4890328aa262fdc37336ab01 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 30 Oct 2008 16:59:22 +0200 Subject: nl80211: Add basic rate configuration for AP mode Add a new attribute, NL80211_ATTR_BSS_BASIC_RATES, that can be used with NL80211_CMD_SET_BSS for userspace (e.g., hostapd) to set which rates are in the basic rate set. Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 6 ++++++ include/net/cfg80211.h | 5 +++++ 2 files changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index e4cc786..5009809 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -243,6 +243,9 @@ enum nl80211_commands { * (u8, 0 or 1) * @NL80211_ATTR_BSS_SHORT_SLOT_TIME: whether short slot time enabled * (u8, 0 or 1) + * @NL80211_ATTR_BSS_BASIC_RATES: basic rates, array of basic + * rates in format defined by IEEE 802.11 7.3.2.2 but without the length + * restriction (at most %NL80211_MAX_SUPP_RATES). * * @NL80211_ATTR_HT_CAPABILITY: HT Capability information element (from * association request when used with NL80211_CMD_NEW_STATION) @@ -307,6 +310,8 @@ enum nl80211_attrs { NL80211_ATTR_MESH_PARAMS, + NL80211_ATTR_BSS_BASIC_RATES, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -318,6 +323,7 @@ enum nl80211_attrs { * here */ #define NL80211_ATTR_HT_CAPABILITY NL80211_ATTR_HT_CAPABILITY +#define NL80211_ATTR_BSS_BASIC_RATES NL80211_ATTR_BSS_BASIC_RATES #define NL80211_MAX_SUPP_RATES 32 #define NL80211_MAX_SUPP_REG_RULES 32 diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 03e1e88..7caf3c7 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -280,11 +280,16 @@ struct mpath_info { * (0 = no, 1 = yes, -1 = do not change) * @use_short_slot_time: Whether the use of short slot time is allowed * (0 = no, 1 = yes, -1 = do not change) + * @basic_rates: basic rates in IEEE 802.11 format + * (or NULL for no change) + * @basic_rates_len: number of basic rates */ struct bss_parameters { int use_cts_prot; int use_short_preamble; int use_short_slot_time; + u8 *basic_rates; + u8 basic_rates_len; }; /** -- cgit v1.1 From 318884875bdddca663ecc373c813cf8e117d9e43 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 30 Oct 2008 16:59:24 +0200 Subject: nl80211: Add TX queue parameter configuration Add a new attribute, NL80211_ATTR_WIPHY_TXQ_PARAMS, that can be used with NL80211_CMD_SET_WIPHY for userspace (e.g., hostapd) to set TX queue parameters (txop, cwmin, cwmax, aifs). Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- include/linux/nl80211.h | 43 +++++++++++++++++++++++++++++++++++++++++-- include/net/cfg80211.h | 23 +++++++++++++++++++++++ 2 files changed, 64 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 5009809..7982734 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -25,8 +25,9 @@ * * @NL80211_CMD_GET_WIPHY: request information about a wiphy or dump request * to get a list of all present wiphys. - * @NL80211_CMD_SET_WIPHY: set wiphy name, needs %NL80211_ATTR_WIPHY and - * %NL80211_ATTR_WIPHY_NAME. + * @NL80211_CMD_SET_WIPHY: set wiphy parameters, needs %NL80211_ATTR_WIPHY or + * %NL80211_ATTR_IFINDEX; can be used to set %NL80211_ATTR_WIPHY_NAME + * and/or %NL80211_ATTR_WIPHY_TXQ_PARAMS. * @NL80211_CMD_NEW_WIPHY: Newly created wiphy, response to get request * or rename notification. Has attributes %NL80211_ATTR_WIPHY and * %NL80211_ATTR_WIPHY_NAME. @@ -178,6 +179,7 @@ enum nl80211_commands { * @NL80211_ATTR_WIPHY: index of wiphy to operate on, cf. * /sys/class/ieee80211//index * @NL80211_ATTR_WIPHY_NAME: wiphy name (used for renaming) + * @NL80211_ATTR_WIPHY_TXQ_PARAMS: a nested array of TX queue parameters * * @NL80211_ATTR_IFINDEX: network interface index of the device to operate on * @NL80211_ATTR_IFNAME: network interface name @@ -312,6 +314,8 @@ enum nl80211_attrs { NL80211_ATTR_BSS_BASIC_RATES, + NL80211_ATTR_WIPHY_TXQ_PARAMS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -324,6 +328,7 @@ enum nl80211_attrs { */ #define NL80211_ATTR_HT_CAPABILITY NL80211_ATTR_HT_CAPABILITY #define NL80211_ATTR_BSS_BASIC_RATES NL80211_ATTR_BSS_BASIC_RATES +#define NL80211_ATTR_WIPHY_TXQ_PARAMS NL80211_ATTR_WIPHY_TXQ_PARAMS #define NL80211_MAX_SUPP_RATES 32 #define NL80211_MAX_SUPP_REG_RULES 32 @@ -698,4 +703,38 @@ enum nl80211_meshconf_params { NL80211_MESHCONF_ATTR_MAX = __NL80211_MESHCONF_ATTR_AFTER_LAST - 1 }; +/** + * enum nl80211_txq_attr - TX queue parameter attributes + * @__NL80211_TXQ_ATTR_INVALID: Attribute number 0 is reserved + * @NL80211_TXQ_ATTR_QUEUE: TX queue identifier (NL80211_TXQ_Q_*) + * @NL80211_TXQ_ATTR_TXOP: Maximum burst time in units of 32 usecs, 0 meaning + * disabled + * @NL80211_TXQ_ATTR_CWMIN: Minimum contention window [a value of the form + * 2^n-1 in the range 1..32767] + * @NL80211_TXQ_ATTR_CWMAX: Maximum contention window [a value of the form + * 2^n-1 in the range 1..32767] + * @NL80211_TXQ_ATTR_AIFS: Arbitration interframe space [0..255] + * @__NL80211_TXQ_ATTR_AFTER_LAST: Internal + * @NL80211_TXQ_ATTR_MAX: Maximum TXQ attribute number + */ +enum nl80211_txq_attr { + __NL80211_TXQ_ATTR_INVALID, + NL80211_TXQ_ATTR_QUEUE, + NL80211_TXQ_ATTR_TXOP, + NL80211_TXQ_ATTR_CWMIN, + NL80211_TXQ_ATTR_CWMAX, + NL80211_TXQ_ATTR_AIFS, + + /* keep last */ + __NL80211_TXQ_ATTR_AFTER_LAST, + NL80211_TXQ_ATTR_MAX = __NL80211_TXQ_ATTR_AFTER_LAST - 1 +}; + +enum nl80211_txq_q { + NL80211_TXQ_Q_VO, + NL80211_TXQ_Q_VI, + NL80211_TXQ_Q_BE, + NL80211_TXQ_Q_BK +}; + #endif /* __LINUX_NL80211_H */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7caf3c7..4480231 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -371,6 +371,24 @@ struct mesh_config { u16 dot11MeshHWMPnetDiameterTraversalTime; }; +/** + * struct ieee80211_txq_params - TX queue parameters + * @queue: TX queue identifier (NL80211_TXQ_Q_*) + * @txop: Maximum burst time in units of 32 usecs, 0 meaning disabled + * @cwmin: Minimum contention window [a value of the form 2^n-1 in the range + * 1..32767] + * @cwmax: Maximum contention window [a value of the form 2^n-1 in the range + * 1..32767] + * @aifs: Arbitration interframe space [0..255] + */ +struct ieee80211_txq_params { + enum nl80211_txq_q queue; + u16 txop; + u16 cwmin; + u16 cwmax; + u8 aifs; +}; + /* from net/wireless.h */ struct wiphy; @@ -430,6 +448,8 @@ struct wiphy; * @set_mesh_cfg: set mesh parameters (by now, just mesh id) * * @change_bss: Modify parameters for a given BSS. + * + * @set_txq_params: Set TX queue parameters */ struct cfg80211_ops { int (*add_virtual_intf)(struct wiphy *wiphy, char *name, @@ -490,6 +510,9 @@ struct cfg80211_ops { const struct mesh_config *nconf, u32 mask); int (*change_bss)(struct wiphy *wiphy, struct net_device *dev, struct bss_parameters *params); + + int (*set_txq_params)(struct wiphy *wiphy, + struct ieee80211_txq_params *params); }; #endif /* __NET_CFG80211_H */ -- cgit v1.1 From fc6971d491517ba15e800540ff88caa55dc65b01 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 30 Oct 2008 19:59:05 +0200 Subject: mac80211_hwsim: Add support for client PS mode This introduces a debugfs file (ieee80211/phy#/hwsim/ps) that can be used to force a simulated radio into power save mode. Following values can be written into this file to change PS mode: 0 = power save disabled (constantly awake) 1 = power save enabled (drop all frames; do not send PS-Poll) 2 = power save enabled (send PS-Poll frames automatically to receive buffered unicast frames); not yet fully implemented 3 = manual PS-Poll trigger (send a single PS-Poll frame) Two different behavior for power save mode processing can be tested: - move between modes 1 and 0 (i.e., receive all buffered frames at a time) - move to mode 1 and use manual PS-Poll frames (write 3 to the 'ps' debugfs file) to fetch power save buffered frames one at a time Mode 2 (automatic PS-Poll) does not yet parse Beacon frames, but eventually, it should take a look at TIM IE and send PS-Poll if a traffic bit is set for our AID. Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 9dc288b..56b0eb2 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -669,6 +669,13 @@ struct ieee80211_cts { u8 ra[6]; } __attribute__ ((packed)); +struct ieee80211_pspoll { + __le16 frame_control; + __le16 aid; + u8 bssid[6]; + u8 ta[6]; +} __attribute__ ((packed)); + /** * struct ieee80211_bar - HT Block Ack Request * -- cgit v1.1 From b219cee191e7cfe88a695a57249a295d0d5b22e9 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 30 Oct 2008 13:33:55 -0700 Subject: cfg80211: make use of reg macros on REG_RULE Ensure regulatory converstion macros safely accept multiple arguments and make REG_RULE() use them. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- include/net/cfg80211.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 4480231..1d57835 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -336,19 +336,19 @@ struct ieee80211_regdomain { struct ieee80211_reg_rule reg_rules[]; }; -#define MHZ_TO_KHZ(freq) (freq * 1000) -#define KHZ_TO_MHZ(freq) (freq / 1000) -#define DBI_TO_MBI(gain) (gain * 100) -#define MBI_TO_DBI(gain) (gain / 100) -#define DBM_TO_MBM(gain) (gain * 100) -#define MBM_TO_DBM(gain) (gain / 100) +#define MHZ_TO_KHZ(freq) ((freq) * 1000) +#define KHZ_TO_MHZ(freq) ((freq) / 1000) +#define DBI_TO_MBI(gain) ((gain) * 100) +#define MBI_TO_DBI(gain) ((gain) / 100) +#define DBM_TO_MBM(gain) ((gain) * 100) +#define MBM_TO_DBM(gain) ((gain) / 100) #define REG_RULE(start, end, bw, gain, eirp, reg_flags) { \ - .freq_range.start_freq_khz = (start) * 1000, \ - .freq_range.end_freq_khz = (end) * 1000, \ - .freq_range.max_bandwidth_khz = (bw) * 1000, \ - .power_rule.max_antenna_gain = (gain) * 100, \ - .power_rule.max_eirp = (eirp) * 100, \ + .freq_range.start_freq_khz = MHZ_TO_KHZ(start), \ + .freq_range.end_freq_khz = MHZ_TO_KHZ(end), \ + .freq_range.max_bandwidth_khz = MHZ_TO_KHZ(bw), \ + .power_rule.max_antenna_gain = DBI_TO_MBI(gain), \ + .power_rule.max_eirp = DBM_TO_MBM(eirp), \ .flags = reg_flags, \ } -- cgit v1.1 From fb28ad35906af2f042c94e2f9c0f898ef9acfa37 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Mon, 10 Nov 2008 13:55:14 -0800 Subject: net: struct device - replace bus_id with dev_name(), dev_set_name() Acked-by: Marcel Holtmann Acked-by: Greg Kroah-Hartman Signed-off-by: Kay Sievers Signed-off-by: David S. Miller --- include/net/wireless.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/wireless.h b/include/net/wireless.h index 17d4b58..4123515 100644 --- a/include/net/wireless.h +++ b/include/net/wireless.h @@ -263,9 +263,9 @@ static inline struct device *wiphy_dev(struct wiphy *wiphy) /** * wiphy_name - get wiphy name */ -static inline char *wiphy_name(struct wiphy *wiphy) +static inline const char *wiphy_name(struct wiphy *wiphy) { - return wiphy->dev.bus_id; + return dev_name(&wiphy->dev); } /** -- cgit v1.1 From 9b739ba5e66c96938fbc07a4dbd9da5b81eac56f Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 11 Nov 2008 16:47:44 -0800 Subject: net: remove struct neigh_table::pde ->pde isn't actually needed, since name is stashed in ->id. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/neighbour.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index aa4b708..365b5e2 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -180,9 +180,6 @@ struct neigh_table __u32 hash_rnd; unsigned int hash_chain_gc; struct pneigh_entry **phash_buckets; -#ifdef CONFIG_PROC_FS - struct proc_dir_entry *pde; -#endif }; /* flags for neigh_update() */ -- cgit v1.1 From 6bb3ce25d05f2990c8a19adaf427531430267c1f Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 11 Nov 2008 17:25:22 -0800 Subject: net: remove struct dst_entry::entry_size Unused after kmem_cache_zalloc() conversion. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/dst.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index f96c4ba..65a60fa 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -104,7 +104,6 @@ struct dst_ops void (*link_failure)(struct sk_buff *); void (*update_pmtu)(struct dst_entry *dst, u32 mtu); int (*local_out)(struct sk_buff *skb); - int entry_size; atomic_t entries; struct kmem_cache *kmem_cachep; -- cgit v1.1 From d90ebcbfa7f5a8b4e20518c9f94c5c4e4cd3c2e5 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Wed, 12 Nov 2008 00:47:26 -0800 Subject: dccp: Query supported CCIDs This provides a data structure to record which CCIDs are locally supported and three accessor functions: - a test function for internal use which is used to validate CCID requests made by the user; - a copy function so that the list can be used for feature-negotiation; - documented getsockopt() support so that the user can query capabilities. The data structure is a table which is filled in at compile-time with the list of available CCIDs (which in turn depends on the Kconfig choices). Using the copy function for cloning the list of supported CCIDs is useful for feature negotiation, since the negotiation is now with the full list of available CCIDs (e.g. {2, 3}) instead of the default value {2}. This means negotiation will not fail if the peer requests to use CCID3 instead of CCID2. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: David S. Miller --- include/linux/dccp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 484b8a1..d3ac1bd 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -209,6 +209,7 @@ struct dccp_so_feat { #define DCCP_SOCKOPT_SERVER_TIMEWAIT 6 #define DCCP_SOCKOPT_SEND_CSCOV 10 #define DCCP_SOCKOPT_RECV_CSCOV 11 +#define DCCP_SOCKOPT_AVAILABLE_CCIDS 12 #define DCCP_SOCKOPT_CCID_RX_INFO 128 #define DCCP_SOCKOPT_CCID_TX_INFO 192 -- cgit v1.1 From 8f424b5f32d78b4f353b3cddca9804808ef063eb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 12 Nov 2008 00:53:30 -0800 Subject: net: Introduce read_pnet() and write_pnet() helpers This patch introduces two helpers that deal with reading and writing struct net pointers in various network structures. Their implementation depends on CONFIG_NET_NS For symmetry, both functions work with "struct net **pnet". Their usage should reduce the number of #ifdef CONFIG_NET_NS, without adding many helpers for each network structure that hold a "struct net *pointer" Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/net_namespace.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 700c53a..3195577 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -192,6 +192,24 @@ static inline void release_net(struct net *net) } #endif +#ifdef CONFIG_NET_NS + +static inline void write_pnet(struct net **pnet, struct net *net) +{ + *pnet = net; +} + +static inline struct net *read_pnet(struct net * const *pnet) +{ + return *pnet; +} + +#else + +#define write_pnet(pnet, net) do { (void)(net);} while (0) +#define read_pnet(pnet) (&init_net) + +#endif #define for_each_net(VAR) \ list_for_each_entry(VAR, &net_namespace_list, list) -- cgit v1.1 From 7a9546ee354ec6f23af403992b8c07baa50a23d2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 12 Nov 2008 00:54:20 -0800 Subject: net: ib_net pointer should depends on CONFIG_NET_NS We can shrink size of "struct inet_bind_bucket" by 50%, using read_pnet() and write_pnet() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 5cc182f..cb31fbf 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -77,13 +77,20 @@ struct inet_ehash_bucket { * ports are created in O(1) time? I thought so. ;-) -DaveM */ struct inet_bind_bucket { +#ifdef CONFIG_NET_NS struct net *ib_net; +#endif unsigned short port; signed short fastreuse; struct hlist_node node; struct hlist_head owners; }; +static inline struct net *ib_net(struct inet_bind_bucket *ib) +{ + return read_pnet(&ib->ib_net); +} + #define inet_bind_bucket_for_each(tb, node, head) \ hlist_for_each_entry(tb, node, head, node) -- cgit v1.1 From e42ea986e4a4cab4209d982feffcaf50f21e80e3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 12 Nov 2008 00:54:54 -0800 Subject: net: Cleanup of neighbour code Using read_pnet() and write_pnet() in neighbour code ease the reading of code. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/neighbour.h | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 365b5e2..d8d790e 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -220,11 +220,7 @@ extern void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *p static inline struct net *neigh_parms_net(const struct neigh_parms *parms) { -#ifdef CONFIG_NET_NS - return parms->net; -#else - return &init_net; -#endif + return read_pnet(&parms->net); } extern unsigned long neigh_rand_reach_time(unsigned long base); @@ -241,11 +237,7 @@ extern int pneigh_delete(struct neigh_table *tbl, struct net *net, const void static inline struct net *pneigh_net(const struct pneigh_entry *pneigh) { -#ifdef CONFIG_NET_NS - return pneigh->net; -#else - return &init_net; -#endif + return read_pnet(&pneigh->net); } extern void neigh_app_ns(struct neighbour *n); -- cgit v1.1 From 2378982487c492541d17adc0a870e7e83b07ba43 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 12 Nov 2008 23:25:32 -0800 Subject: net: ifdef struct sock::sk_async_wait_queue Every user is under CONFIG_NET_DMA already, so ifdef field as well. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/sock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 08291c1..8b2b821 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -239,7 +239,9 @@ struct sock { int sk_sndbuf; struct sk_buff_head sk_receive_queue; struct sk_buff_head sk_write_queue; +#ifdef CONFIG_NET_DMA struct sk_buff_head sk_async_wait_queue; +#endif int sk_wmem_queued; int sk_forward_alloc; gfp_t sk_allocation; -- cgit v1.1 From 38a7ddffa4b79d7b1fbc9bf2fa82b21b72622858 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Thu, 13 Nov 2008 22:44:11 -0800 Subject: tcp: remove an unnecessary field in struct tcp_skb_cb The urg_ptr field is not used anywhere and is merely confusing. Signed-off-by: Petr Tesarik Signed-off-by: David S. Miller --- include/net/tcp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 438014d..8f26b28 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -590,7 +590,6 @@ struct tcp_skb_cb { #define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */ #define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS) - __u16 urg_ptr; /* Valid w/URG flags is set. */ __u32 ack_seq; /* Sequence number ACK'd */ }; -- cgit v1.1 From f30ab418a1d3c5a8b83493e7d70d6876a74aa0ce Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Thu, 13 Nov 2008 22:56:30 -0800 Subject: pkt_sched: Remove qdisc->ops->requeue() etc. After implementing qdisc->ops->peek() and changing sch_netem into classless qdisc there are no more qdisc->ops->requeue() users. This patch removes this method with its wrappers (qdisc_requeue()), and also unused qdisc->requeue structure. There are a few minor fixes of warnings (htb_enqueue()) and comments btw. The idea to kill ->requeue() and a similar patch were first developed by David S. Miller. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- include/net/sch_generic.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 64ae1ba..f8c4742 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -53,7 +53,6 @@ struct Qdisc atomic_t refcnt; unsigned long state; struct sk_buff *gso_skb; - struct sk_buff_head requeue; struct sk_buff_head q; struct netdev_queue *dev_queue; struct Qdisc *next_sched; @@ -112,7 +111,6 @@ struct Qdisc_ops int (*enqueue)(struct sk_buff *, struct Qdisc *); struct sk_buff * (*dequeue)(struct Qdisc *); struct sk_buff * (*peek)(struct Qdisc *); - int (*requeue)(struct sk_buff *, struct Qdisc *); unsigned int (*drop)(struct Qdisc *); int (*init)(struct Qdisc *, struct nlattr *arg); @@ -467,21 +465,6 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) return skb; } -static inline int __qdisc_requeue(struct sk_buff *skb, struct Qdisc *sch, - struct sk_buff_head *list) -{ - __skb_queue_head(list, skb); - sch->qstats.backlog += qdisc_pkt_len(skb); - sch->qstats.requeues++; - - return NET_XMIT_SUCCESS; -} - -static inline int qdisc_requeue(struct sk_buff *skb, struct Qdisc *sch) -{ - return __qdisc_requeue(skb, sch, &sch->q); -} - static inline void __qdisc_reset_queue(struct Qdisc *sch, struct sk_buff_head *list) { -- cgit v1.1 From f004f3ea34209d8b836426b26ade3dc502631b18 Mon Sep 17 00:00:00 2001 From: Paulius Zaleckas Date: Fri, 14 Nov 2008 00:24:34 +0000 Subject: phylib: make mdio-gpio work without OF (v4) make mdio-gpio work with non OpenFirmware gpio implementation. Aditional changes to mdio-gpio: - use gpio_request() and gpio_free() - place irq[] array in struct mdio_gpio_info - add module description, author and license - add note about compiling this driver as module - rename mdc and mdio function (were ugly names) - change MII to MDIO in bus name - add __init __exit to module (un)loading functions - probe fails if no phys added to the bus - kzalloc bitbang with sizeof(*bitbang) Changes since v3: - keep bus naming "%x" to be compatible with existing drivers. Changes since v2: - more #ifdefs reduction - platform driver will be registered on OF platforms also - unified platform and OF bus_id to phy%i Changes since v1: - removed NO_IRQ - reduced #idefs Laurent, please test this driver under OF. Signed-off-by: Paulius Zaleckas Signed-off-by: David S. Miller --- include/linux/mdio-gpio.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 include/linux/mdio-gpio.h (limited to 'include') diff --git a/include/linux/mdio-gpio.h b/include/linux/mdio-gpio.h new file mode 100644 index 0000000..e9d3fdf --- /dev/null +++ b/include/linux/mdio-gpio.h @@ -0,0 +1,25 @@ +/* + * MDIO-GPIO bus platform data structures + * + * Copyright (C) 2008, Paulius Zaleckas + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#ifndef __LINUX_MDIO_GPIO_H +#define __LINUX_MDIO_GPIO_H + +#include + +struct mdio_gpio_platform_data { + /* GPIO numbers for bus pins */ + unsigned int mdc; + unsigned int mdio; + + unsigned int phy_mask; + int irqs[PHY_MAX_ADDR]; +}; + +#endif /* __LINUX_MDIO_GPIO_H */ -- cgit v1.1 From e8b2dfe9b4501ed0047459b2756ba26e5a940a69 Mon Sep 17 00:00:00 2001 From: Balazs Scheidler Date: Sun, 16 Nov 2008 19:32:39 -0800 Subject: TPROXY: implemented IP_RECVORIGDSTADDR socket option In case UDP traffic is redirected to a local UDP socket, the originally addressed destination address/port cannot be recovered with the in-kernel tproxy. This patch adds an IP_RECVORIGDSTADDR sockopt that enables a IP_ORIGDSTADDR ancillary message in recvmsg(). This ancillary message contains the original destination address/port of the packet being received. Signed-off-by: Balazs Scheidler Signed-off-by: David S. Miller --- include/linux/in.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/in.h b/include/linux/in.h index db458be..d60122a 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -80,6 +80,10 @@ struct in_addr { /* BSD compatibility */ #define IP_RECVRETOPTS IP_RETOPTS +/* TProxy original addresses */ +#define IP_ORIGDSTADDR 20 +#define IP_RECVORIGDSTADDR IP_ORIGDSTADDR + /* IP_MTU_DISCOVER values */ #define IP_PMTUDISC_DONT 0 /* Never send DF frames */ #define IP_PMTUDISC_WANT 1 /* Use per route hints */ -- cgit v1.1 From bbaffaca4810de1a25e32ecaf836eeaacc7a3d11 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 16 Nov 2008 19:37:55 -0800 Subject: rcu: Introduce hlist_nulls variant of hlist hlist uses NULL value to finish a chain. hlist_nulls variant use the low order bit set to 1 to signal an end-of-list marker. This allows to store many different end markers, so that some RCU lockless algos (used in TCP/UDP stack for example) can save some memory barriers in fast paths. Two new files are added : include/linux/list_nulls.h - mimics hlist part of include/linux/list.h, derived to hlist_nulls variant include/linux/rculist_nulls.h - mimics hlist part of include/linux/rculist.h, derived to hlist_nulls variant Only four helpers are declared for the moment : hlist_nulls_del_init_rcu(), hlist_nulls_del_rcu(), hlist_nulls_add_head_rcu() and hlist_nulls_for_each_entry_rcu() prefetches() were removed, since an end of list is not anymore NULL value. prefetches() could trigger useless (and possibly dangerous) memory transactions. Example of use (extracted from __udp4_lib_lookup()) struct sock *sk, *result; struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; int score, badness; rcu_read_lock(); begin: result = NULL; badness = -1; sk_nulls_for_each_rcu(sk, node, &hslot->head) { score = compute_score(sk, net, saddr, hnum, sport, daddr, dport, dif); if (score > badness) { result = sk; badness = score; } } /* * if the nulls value we got at the end of this lookup is * not the expected one, we must restart lookup. * We probably met an item that was moved to another chain. */ if (get_nulls_value(node) != hash) goto begin; if (result) { if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) result = NULL; else if (unlikely(compute_score(result, net, saddr, hnum, sport, daddr, dport, dif) < badness)) { sock_put(result); goto begin; } } rcu_read_unlock(); return result; Signed-off-by: Eric Dumazet Acked-by: Peter Zijlstra Signed-off-by: David S. Miller --- include/linux/list_nulls.h | 94 ++++++++++++++++++++++++++++++++++++ include/linux/rculist_nulls.h | 110 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 204 insertions(+) create mode 100644 include/linux/list_nulls.h create mode 100644 include/linux/rculist_nulls.h (limited to 'include') diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h new file mode 100644 index 0000000..93150ec --- /dev/null +++ b/include/linux/list_nulls.h @@ -0,0 +1,94 @@ +#ifndef _LINUX_LIST_NULLS_H +#define _LINUX_LIST_NULLS_H + +/* + * Special version of lists, where end of list is not a NULL pointer, + * but a 'nulls' marker, which can have many different values. + * (up to 2^31 different values guaranteed on all platforms) + * + * In the standard hlist, termination of a list is the NULL pointer. + * In this special 'nulls' variant, we use the fact that objects stored in + * a list are aligned on a word (4 or 8 bytes alignment). + * We therefore use the last significant bit of 'ptr' : + * Set to 1 : This is a 'nulls' end-of-list marker (ptr >> 1) + * Set to 0 : This is a pointer to some object (ptr) + */ + +struct hlist_nulls_head { + struct hlist_nulls_node *first; +}; + +struct hlist_nulls_node { + struct hlist_nulls_node *next, **pprev; +}; +#define INIT_HLIST_NULLS_HEAD(ptr, nulls) \ + ((ptr)->first = (struct hlist_nulls_node *) (1UL | (((long)nulls) << 1))) + +#define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member) +/** + * ptr_is_a_nulls - Test if a ptr is a nulls + * @ptr: ptr to be tested + * + */ +static inline int is_a_nulls(const struct hlist_nulls_node *ptr) +{ + return ((unsigned long)ptr & 1); +} + +/** + * get_nulls_value - Get the 'nulls' value of the end of chain + * @ptr: end of chain + * + * Should be called only if is_a_nulls(ptr); + */ +static inline unsigned long get_nulls_value(const struct hlist_nulls_node *ptr) +{ + return ((unsigned long)ptr) >> 1; +} + +static inline int hlist_nulls_unhashed(const struct hlist_nulls_node *h) +{ + return !h->pprev; +} + +static inline int hlist_nulls_empty(const struct hlist_nulls_head *h) +{ + return is_a_nulls(h->first); +} + +static inline void __hlist_nulls_del(struct hlist_nulls_node *n) +{ + struct hlist_nulls_node *next = n->next; + struct hlist_nulls_node **pprev = n->pprev; + *pprev = next; + if (!is_a_nulls(next)) + next->pprev = pprev; +} + +/** + * hlist_nulls_for_each_entry - iterate over list of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_node to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + * + */ +#define hlist_nulls_for_each_entry(tpos, pos, head, member) \ + for (pos = (head)->first; \ + (!is_a_nulls(pos)) && \ + ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +/** + * hlist_nulls_for_each_entry_from - iterate over a hlist continuing from current point + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_node to use as a loop cursor. + * @member: the name of the hlist_node within the struct. + * + */ +#define hlist_nulls_for_each_entry_from(tpos, pos, member) \ + for (; (!is_a_nulls(pos)) && \ + ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +#endif diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h new file mode 100644 index 0000000..f9ddd03 --- /dev/null +++ b/include/linux/rculist_nulls.h @@ -0,0 +1,110 @@ +#ifndef _LINUX_RCULIST_NULLS_H +#define _LINUX_RCULIST_NULLS_H + +#ifdef __KERNEL__ + +/* + * RCU-protected list version + */ +#include +#include + +/** + * hlist_nulls_del_init_rcu - deletes entry from hash list with re-initialization + * @n: the element to delete from the hash list. + * + * Note: hlist_nulls_unhashed() on the node return true after this. It is + * useful for RCU based read lockfree traversal if the writer side + * must know if the list entry is still hashed or already unhashed. + * + * In particular, it means that we can not poison the forward pointers + * that may still be used for walking the hash list and we can only + * zero the pprev pointer so list_unhashed() will return true after + * this. + * + * The caller must take whatever precautions are necessary (such as + * holding appropriate locks) to avoid racing with another + * list-mutation primitive, such as hlist_nulls_add_head_rcu() or + * hlist_nulls_del_rcu(), running on this same list. However, it is + * perfectly legal to run concurrently with the _rcu list-traversal + * primitives, such as hlist_nulls_for_each_entry_rcu(). + */ +static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n) +{ + if (!hlist_nulls_unhashed(n)) { + __hlist_nulls_del(n); + n->pprev = NULL; + } +} + +/** + * hlist_nulls_del_rcu - deletes entry from hash list without re-initialization + * @n: the element to delete from the hash list. + * + * Note: hlist_nulls_unhashed() on entry does not return true after this, + * the entry is in an undefined state. It is useful for RCU based + * lockfree traversal. + * + * In particular, it means that we can not poison the forward + * pointers that may still be used for walking the hash list. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() + * or hlist_nulls_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_nulls_for_each_entry(). + */ +static inline void hlist_nulls_del_rcu(struct hlist_nulls_node *n) +{ + __hlist_nulls_del(n); + n->pprev = LIST_POISON2; +} + +/** + * hlist_nulls_add_head_rcu + * @n: the element to add to the hash list. + * @h: the list to add to. + * + * Description: + * Adds the specified element to the specified hlist_nulls, + * while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() + * or hlist_nulls_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. Regardless of the type of CPU, the + * list-traversal primitive must be guarded by rcu_read_lock(). + */ +static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, + struct hlist_nulls_head *h) +{ + struct hlist_nulls_node *first = h->first; + + n->next = first; + n->pprev = &h->first; + rcu_assign_pointer(h->first, n); + if (!is_a_nulls(first)) + first->pprev = &n->next; +} +/** + * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_nulls_node to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_nulls_node within the struct. + * + */ +#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ + for (pos = rcu_dereference((head)->first); \ + (!is_a_nulls(pos)) && \ + ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ + pos = rcu_dereference(pos->next)) + +#endif +#endif -- cgit v1.1 From 88ab1932eac721c6e7336708558fa5ed02c85c80 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 16 Nov 2008 19:39:21 -0800 Subject: udp: Use hlist_nulls in UDP RCU code This is a straightforward patch, using hlist_nulls infrastructure. RCUification already done on UDP two weeks ago. Using hlist_nulls permits us to avoid some memory barriers, both at lookup time and delete time. Patch is large because it adds new macros to include/net/sock.h. These macros will be used by TCP & DCCP in next patch. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/rculist.h | 17 --------------- include/net/sock.h | 57 +++++++++++++++++++++++++++++++++++++++---------- include/net/udp.h | 2 +- 3 files changed, 47 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 3ba2998..e649bd3 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -383,22 +383,5 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ pos = rcu_dereference(pos->next)) -/** - * hlist_for_each_entry_rcu_safenext - iterate over rcu list of given type - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the hlist_node within the struct. - * @next: the &struct hlist_node to use as a next cursor - * - * Special version of hlist_for_each_entry_rcu that make sure - * each next pointer is fetched before each iteration. - */ -#define hlist_for_each_entry_rcu_safenext(tpos, pos, head, member, next) \ - for (pos = rcu_dereference((head)->first); \ - pos && ({ next = pos->next; smp_rmb(); prefetch(next); 1; }) && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference(next)) - #endif /* __KERNEL__ */ #endif diff --git a/include/net/sock.h b/include/net/sock.h index 8b2b821..0a63894 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -42,6 +42,7 @@ #include #include +#include #include #include #include @@ -52,6 +53,7 @@ #include #include +#include #include #include @@ -106,6 +108,7 @@ struct net; * @skc_reuse: %SO_REUSEADDR setting * @skc_bound_dev_if: bound device index if != 0 * @skc_node: main hash linkage for various protocol lookup tables + * @skc_nulls_node: main hash linkage for UDP/UDP-Lite protocol * @skc_bind_node: bind hash linkage for various protocol lookup tables * @skc_refcnt: reference count * @skc_hash: hash value used with various protocol lookup tables @@ -120,7 +123,10 @@ struct sock_common { volatile unsigned char skc_state; unsigned char skc_reuse; int skc_bound_dev_if; - struct hlist_node skc_node; + union { + struct hlist_node skc_node; + struct hlist_nulls_node skc_nulls_node; + }; struct hlist_node skc_bind_node; atomic_t skc_refcnt; unsigned int skc_hash; @@ -206,6 +212,7 @@ struct sock { #define sk_reuse __sk_common.skc_reuse #define sk_bound_dev_if __sk_common.skc_bound_dev_if #define sk_node __sk_common.skc_node +#define sk_nulls_node __sk_common.skc_nulls_node #define sk_bind_node __sk_common.skc_bind_node #define sk_refcnt __sk_common.skc_refcnt #define sk_hash __sk_common.skc_hash @@ -300,12 +307,30 @@ static inline struct sock *sk_head(const struct hlist_head *head) return hlist_empty(head) ? NULL : __sk_head(head); } +static inline struct sock *__sk_nulls_head(const struct hlist_nulls_head *head) +{ + return hlist_nulls_entry(head->first, struct sock, sk_nulls_node); +} + +static inline struct sock *sk_nulls_head(const struct hlist_nulls_head *head) +{ + return hlist_nulls_empty(head) ? NULL : __sk_nulls_head(head); +} + static inline struct sock *sk_next(const struct sock *sk) { return sk->sk_node.next ? hlist_entry(sk->sk_node.next, struct sock, sk_node) : NULL; } +static inline struct sock *sk_nulls_next(const struct sock *sk) +{ + return (!is_a_nulls(sk->sk_nulls_node.next)) ? + hlist_nulls_entry(sk->sk_nulls_node.next, + struct sock, sk_nulls_node) : + NULL; +} + static inline int sk_unhashed(const struct sock *sk) { return hlist_unhashed(&sk->sk_node); @@ -321,6 +346,11 @@ static __inline__ void sk_node_init(struct hlist_node *node) node->pprev = NULL; } +static __inline__ void sk_nulls_node_init(struct hlist_nulls_node *node) +{ + node->pprev = NULL; +} + static __inline__ void __sk_del_node(struct sock *sk) { __hlist_del(&sk->sk_node); @@ -367,18 +397,18 @@ static __inline__ int sk_del_node_init(struct sock *sk) return rc; } -static __inline__ int __sk_del_node_init_rcu(struct sock *sk) +static __inline__ int __sk_nulls_del_node_init_rcu(struct sock *sk) { if (sk_hashed(sk)) { - hlist_del_init_rcu(&sk->sk_node); + hlist_nulls_del_init_rcu(&sk->sk_nulls_node); return 1; } return 0; } -static __inline__ int sk_del_node_init_rcu(struct sock *sk) +static __inline__ int sk_nulls_del_node_init_rcu(struct sock *sk) { - int rc = __sk_del_node_init_rcu(sk); + int rc = __sk_nulls_del_node_init_rcu(sk); if (rc) { /* paranoid for a while -acme */ @@ -399,15 +429,15 @@ static __inline__ void sk_add_node(struct sock *sk, struct hlist_head *list) __sk_add_node(sk, list); } -static __inline__ void __sk_add_node_rcu(struct sock *sk, struct hlist_head *list) +static __inline__ void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) { - hlist_add_head_rcu(&sk->sk_node, list); + hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); } -static __inline__ void sk_add_node_rcu(struct sock *sk, struct hlist_head *list) +static __inline__ void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) { sock_hold(sk); - __sk_add_node_rcu(sk, list); + __sk_nulls_add_node_rcu(sk, list); } static __inline__ void __sk_del_bind_node(struct sock *sk) @@ -423,11 +453,16 @@ static __inline__ void sk_add_bind_node(struct sock *sk, #define sk_for_each(__sk, node, list) \ hlist_for_each_entry(__sk, node, list, sk_node) -#define sk_for_each_rcu_safenext(__sk, node, list, next) \ - hlist_for_each_entry_rcu_safenext(__sk, node, list, sk_node, next) +#define sk_nulls_for_each(__sk, node, list) \ + hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node) +#define sk_nulls_for_each_rcu(__sk, node, list) \ + hlist_nulls_for_each_entry_rcu(__sk, node, list, sk_nulls_node) #define sk_for_each_from(__sk, node) \ if (__sk && ({ node = &(__sk)->sk_node; 1; })) \ hlist_for_each_entry_from(__sk, node, sk_node) +#define sk_nulls_for_each_from(__sk, node) \ + if (__sk && ({ node = &(__sk)->sk_nulls_node; 1; })) \ + hlist_nulls_for_each_entry_from(__sk, node, sk_nulls_node) #define sk_for_each_continue(__sk, node) \ if (__sk && ({ node = &(__sk)->sk_node; 1; })) \ hlist_for_each_entry_continue(__sk, node, sk_node) diff --git a/include/net/udp.h b/include/net/udp.h index df2bfe5..90e6ce5 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -51,7 +51,7 @@ struct udp_skb_cb { #define UDP_SKB_CB(__skb) ((struct udp_skb_cb *)((__skb)->cb)) struct udp_hslot { - struct hlist_head head; + struct hlist_nulls_head head; spinlock_t lock; } __attribute__((aligned(2 * sizeof(long)))); struct udp_table { -- cgit v1.1 From 3ab5aee7fe840b5b1b35a8d1ac11c3de5281e611 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 16 Nov 2008 19:40:17 -0800 Subject: net: Convert TCP & DCCP hash tables to use RCU / hlist_nulls RCU was added to UDP lookups, using a fast infrastructure : - sockets kmem_cache use SLAB_DESTROY_BY_RCU and dont pay the price of call_rcu() at freeing time. - hlist_nulls permits to use few memory barriers. This patch uses same infrastructure for TCP/DCCP established and timewait sockets. Thanks to SLAB_DESTROY_BY_RCU, no slowdown for applications using short lived TCP connections. A followup patch, converting rwlocks to spinlocks will even speedup this case. __inet_lookup_established() is pretty fast now we dont have to dirty a contended cache line (read_lock/read_unlock) Only established and timewait hashtable are converted to RCU (bind table and listen table are still using traditional locking) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 4 ++-- include/net/inet_timewait_sock.h | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index cb31fbf..4818960 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -41,8 +41,8 @@ * I'll experiment with dynamic table growth later. */ struct inet_ehash_bucket { - struct hlist_head chain; - struct hlist_head twchain; + struct hlist_nulls_head chain; + struct hlist_nulls_head twchain; }; /* There are a few simple rules, which allow for local port reuse by diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 80e4977..4b8ece2 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -110,7 +110,7 @@ struct inet_timewait_sock { #define tw_state __tw_common.skc_state #define tw_reuse __tw_common.skc_reuse #define tw_bound_dev_if __tw_common.skc_bound_dev_if -#define tw_node __tw_common.skc_node +#define tw_node __tw_common.skc_nulls_node #define tw_bind_node __tw_common.skc_bind_node #define tw_refcnt __tw_common.skc_refcnt #define tw_hash __tw_common.skc_hash @@ -137,10 +137,10 @@ struct inet_timewait_sock { struct hlist_node tw_death_node; }; -static inline void inet_twsk_add_node(struct inet_timewait_sock *tw, - struct hlist_head *list) +static inline void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw, + struct hlist_nulls_head *list) { - hlist_add_head(&tw->tw_node, list); + hlist_nulls_add_head_rcu(&tw->tw_node, list); } static inline void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, @@ -175,7 +175,7 @@ static inline int inet_twsk_del_dead_node(struct inet_timewait_sock *tw) } #define inet_twsk_for_each(tw, node, head) \ - hlist_for_each_entry(tw, node, head, tw_node) + hlist_nulls_for_each_entry(tw, node, head, tw_node) #define inet_twsk_for_each_inmate(tw, node, jail) \ hlist_for_each_entry(tw, node, jail, tw_death_node) -- cgit v1.1 From 5635c10d976716ef47ae441998aeae144c7e7387 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 16 Nov 2008 19:46:36 -0800 Subject: net: make sure struct dst_entry refcount is aligned on 64 bytes As found in the past (commit f1dd9c379cac7d5a76259e7dffcd5f8edc697d17 [NET]: Fix tbench regression in 2.6.25-rc1), it is really important that struct dst_entry refcount is aligned on a cache line. We cannot use __atribute((aligned)), so manually pad the structure for 32 and 64 bit arches. for 32bit : offsetof(truct dst_entry, __refcnt) is 0x80 for 64bit : offsetof(truct dst_entry, __refcnt) is 0xc0 As it is not possible to guess at compile time cache line size, we use a generic value of 64 bytes, that satisfies many current arches. (Using 128 bytes alignment on 64bit arches would waste 64 bytes) Add a BUILD_BUG_ON to catch future updates to "struct dst_entry" dont break this alignment. "tbench 8" is 4.4 % faster on a dual quad core (HP BL460c G1), Intel E5450 @3.00GHz (2350 MB/s instead of 2250 MB/s) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dst.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index 65a60fa..6c77879 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -61,6 +61,8 @@ struct dst_entry struct hh_cache *hh; #ifdef CONFIG_XFRM struct xfrm_state *xfrm; +#else + void *__pad1; #endif int (*input)(struct sk_buff*); int (*output)(struct sk_buff*); @@ -71,8 +73,20 @@ struct dst_entry #ifdef CONFIG_NET_CLS_ROUTE __u32 tclassid; +#else + __u32 __pad2; #endif + + /* + * Align __refcnt to a 64 bytes alignment + * (L1_CACHE_SIZE would be too much) + */ +#ifdef CONFIG_64BIT + long __pad_to_align_refcnt[2]; +#else + long __pad_to_align_refcnt[1]; +#endif /* * __refcnt wants to be on a different cache line from * input/output/ops or performance tanks badly @@ -157,6 +171,11 @@ dst_metric_locked(struct dst_entry *dst, int metric) static inline void dst_hold(struct dst_entry * dst) { + /* + * If your kernel compilation stops here, please check + * __pad_to_align_refcnt declaration in struct dst_entry + */ + BUILD_BUG_ON(offsetof(struct dst_entry, __refcnt) & 63); atomic_inc(&dst->__refcnt); } -- cgit v1.1 From 3f2c31d90327f21d76d296af34aa4ca547932ff4 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Sun, 16 Nov 2008 22:41:34 -0800 Subject: virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation) If segmentation offload is enabled by the host, we currently allocate maximum sized packet buffers and pass them to the host. This uses up 20 ring entries, allowing us to supply only 20 packet buffers to the host with a 256 entry ring. This is a huge overhead when receiving small packets, and is most keenly felt when receiving MTU sized packets from off-host. The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support using receive buffers which are smaller than the maximum packet size. In order to transfer large packets to the guest, the host merges together multiple receive buffers to form a larger logical buffer. The number of merged buffers is returned to the guest via a field in the virtio_net_hdr. Make use of this support by supplying single page receive buffers to the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of the payload to the skb's linear data buffer and adjust the fragment offset to point to the remaining data. This ensures proper alignment and allows us to not use any paged data for small packets. If the payload occupies multiple pages, we simply append those pages as fragments and free the associated skbs. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin Signed-off-by: Rusty Russell (use netdev_priv) Signed-off-by: David S. Miller --- include/linux/virtio_net.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 5e33761..5cdd0aa 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -20,6 +20,7 @@ #define VIRTIO_NET_F_HOST_TSO6 12 /* Host can handle TSOv6 in. */ #define VIRTIO_NET_F_HOST_ECN 13 /* Host can handle TSO[6] w/ ECN in. */ #define VIRTIO_NET_F_HOST_UFO 14 /* Host can handle UFO in. */ +#define VIRTIO_NET_F_MRG_RXBUF 15 /* Host can merge receive buffers. */ struct virtio_net_config { @@ -44,4 +45,12 @@ struct virtio_net_hdr __u16 csum_start; /* Position to start checksumming from */ __u16 csum_offset; /* Offset after that to place checksum */ }; + +/* This is the version of the header to use when the MRG_RXBUF + * feature has been negotiated. */ +struct virtio_net_hdr_mrg_rxbuf { + struct virtio_net_hdr hdr; + __u16 num_buffers; /* Number of merged rx buffers */ +}; + #endif /* _LINUX_VIRTIO_NET_H */ -- cgit v1.1 From 49aebc66d6b896f9c7c5739d85c4548c00015aa7 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sun, 16 Nov 2008 22:51:23 -0800 Subject: dccp: Deprecate old setsockopt framework The previous setsockopt interface, which passed socket options via struct dccp_so_feat, is complicated/difficult to use. Continuing to support it leads to ugly code since the old approach did not distinguish between NN and SP values. This patch removes the old setsockopt interface and replaces it with two new functions to register NN/SP values for feature negotiation. These are essentially wrappers around the internal __feat_register functions, with checking added to avoid * wrong usage (type); * changing values while the connection is in progress. Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- include/linux/dccp.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index d3ac1bd..6eaaca9 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -193,13 +193,6 @@ enum dccp_feature_numbers { DCCPF_MAX_CCID_SPECIFIC = 255, }; -/* this structure is argument to DCCP_SOCKOPT_CHANGE_X */ -struct dccp_so_feat { - __u8 dccpsf_feat; - __u8 __user *dccpsf_val; - __u8 dccpsf_len; -}; - /* DCCP socket options */ #define DCCP_SOCKOPT_PACKET_SIZE 1 /* XXX deprecated, without effect */ #define DCCP_SOCKOPT_SERVICE 2 -- cgit v1.1 From 29450559849da7066813601effb7666966869853 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sun, 16 Nov 2008 22:53:48 -0800 Subject: dccp: Feature negotiation for minimum-checksum-coverage This provides feature negotiation for server minimum checksum coverage which so far has been missing. Since sender/receiver coverage values range only from 0...15, their type has also been reduced in size from u16 to u4. Feature-negotiation options are now generated for both sender and receiver coverage, i.e. when the peer has `forgotten' to enable partial coverage then feature negotiation will automatically enable (negotiate) the partial coverage value for this connection. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: David S. Miller --- include/linux/dccp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 6eaaca9..5a5a899 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -527,8 +527,8 @@ struct dccp_sock { __u32 dccps_timestamp_time; __u16 dccps_l_ack_ratio; __u16 dccps_r_ack_ratio; - __u16 dccps_pcslen; - __u16 dccps_pcrlen; + __u8 dccps_pcslen:4; + __u8 dccps_pcrlen:4; __u64 dccps_ndp_count:48; unsigned long dccps_rate_last; struct dccp_minisock dccps_minisock; -- cgit v1.1 From dd9c0e363cef32b7d6f23d4c87e8dfe4f91fd1c5 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sun, 16 Nov 2008 22:55:08 -0800 Subject: dccp: Deprecate Ack Ratio sysctl This patch deprecates the Ack Ratio sysctl, since * Ack Ratio is entirely ignored by CCID-3 and CCID-4, * Ack Ratio currently doesn't work in CCID-2 (i.e. is always set to 1); * even if it would work in CCID-2, there is no point for a user to change it: - Ack Ratio is constrained by cwnd (RFC 4341, 6.1.2), - if Ack Ratio > cwnd, the system resorts to spurious RTO timeouts (since waiting for Acks which will never arrive in this window), - cwnd is not a user-configurable value. The only reasonable place for Ack Ratio is to print it for debugging. It is planned to do this later on, as part of e.g. dccp_probe. With this patch Ack Ratio is now under full control of feature negotiation: * Ack Ratio is resolved as a dependency of the selected CCID; * if the chosen CCID supports it (i.e. CCID == CCID-2), Ack Ratio is set to the default of 2, following RFC 4340, 11.3 - "New connections start with Ack Ratio 2 for both endpoints"; * what happens then is part of another patch set, since it concerns the dynamic update of Ack Ratio while the connection is in full flight. Thanks to Tomasz Grobelny for discussion leading up to this patch. Signed-off-by: Gerrit Renker Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/dccp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 5a5a899..eda389c 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -368,7 +368,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) * @dccpms_ccid - Congestion Control Id (CCID) (section 10) * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5) * @dccpms_send_ndp_count - Send NDP Count Feature (7.7.2) - * @dccpms_ack_ratio - Ack Ratio Feature (section 11.3) * @dccpms_pending - List of features being negotiated * @dccpms_conf - */ @@ -378,7 +377,6 @@ struct dccp_minisock { __u8 dccpms_tx_ccid; __u8 dccpms_send_ack_vector; __u8 dccpms_send_ndp_count; - __u8 dccpms_ack_ratio; struct list_head dccpms_pending; struct list_head dccpms_conf; }; -- cgit v1.1 From 4d24b52ac5085ef8a264d044f1b302b7c029887a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 16 Nov 2008 23:01:49 -0800 Subject: ematch: simpler tcf_em_unregister() Simply delete ops from list and let list debugging do the job. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index aa9e282..d1ca314 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -246,7 +246,7 @@ struct tcf_ematch_ops }; extern int tcf_em_register(struct tcf_ematch_ops *); -extern int tcf_em_unregister(struct tcf_ematch_ops *); +extern void tcf_em_unregister(struct tcf_ematch_ops *); extern int tcf_em_tree_validate(struct tcf_proto *, struct nlattr *, struct tcf_ematch_tree *); extern void tcf_em_tree_destroy(struct tcf_proto *, struct tcf_ematch_tree *); -- cgit v1.1 From 4dc06f9633444f426ef9960c53426f2d2ded64ac Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 17 Nov 2008 16:01:42 +0100 Subject: netfilter: nf_conntrack: connection tracking helper name persistent aliases This patch adds the macro MODULE_ALIAS_NFCT_HELPER that defines a way to provide generic and persistent aliases for the connection tracking helpers. This next patch requires this patch. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index b76a868..f11255e 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -298,5 +298,8 @@ do { \ local_bh_enable(); \ } while (0) +#define MODULE_ALIAS_NFCT_HELPER(helper) \ + MODULE_ALIAS("nfct-helper-" helper) + #endif /* __KERNEL__ */ #endif /* _NF_CONNTRACK_H */ -- cgit v1.1 From 226c0c0ef2abdf91b8d9cce1aaf7d4635a5e5926 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 18 Nov 2008 11:54:05 +0100 Subject: netfilter: ctnetlink: helper modules load-on-demand support This patch adds module loading for helpers via ctnetlink. * Creation path: We support explicit and implicit helper assignation. For the explicit case, we try to load the module. If the module is correctly loaded and the helper is present, we return EAGAIN to re-start the creation. Otherwise, we return EOPNOTSUPP. * Update path: release the spin lock, load the module and check. If it is present, then return EAGAIN to re-start the update. This patch provides a refactorized function to lookup-and-set the connection tracking helper. The function removes the exported symbol __nf_ct_helper_find as it has not clients anymore. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_helper.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index f8060ab..66d65a7 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -39,9 +39,6 @@ struct nf_conntrack_helper }; extern struct nf_conntrack_helper * -__nf_ct_helper_find(const struct nf_conntrack_tuple *tuple); - -extern struct nf_conntrack_helper * __nf_conntrack_helper_find_byname(const char *name); extern int nf_conntrack_helper_register(struct nf_conntrack_helper *); @@ -49,6 +46,8 @@ extern void nf_conntrack_helper_unregister(struct nf_conntrack_helper *); extern struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp); +extern int __nf_ct_try_assign_helper(struct nf_conn *ct, gfp_t flags); + static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct) { return nf_ct_ext_find(ct, NF_CT_EXT_HELPER); -- cgit v1.1 From 19abb7b090a6bce88d4e9b2914a0367f4f684432 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 18 Nov 2008 11:56:20 +0100 Subject: netfilter: ctnetlink: deliver events for conntracks changed from userspace As for now, the creation and update of conntracks via ctnetlink do not propagate an event to userspace. This can result in inconsistent situations if several userspace processes modify the connection tracking table by means of ctnetlink at the same time. Specifically, using the conntrack command line tool and conntrackd at the same time can trigger unconsistencies. This patch also modifies the event cache infrastructure to pass the process PID and the ECHO flag to nfnetlink_send() to report back to userspace if the process that triggered the change needs so. Based on a suggestion from Patrick McHardy. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack.h | 2 +- include/net/netfilter/nf_conntrack_ecache.h | 57 +++++++++++++++++++++++++++-- include/net/netfilter/nf_conntrack_expect.h | 2 + 3 files changed, 56 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index f11255e..2e0c536 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -199,7 +199,7 @@ __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple); extern void nf_conntrack_hash_insert(struct nf_conn *ct); -extern void nf_conntrack_flush(struct net *net); +extern void nf_conntrack_flush(struct net *net, u32 pid, int report); extern bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, u_int16_t l3num, diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 1285ff2..0ff0dc6 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -17,6 +17,13 @@ struct nf_conntrack_ecache { unsigned int events; }; +/* This structure is passed to event handler */ +struct nf_ct_event { + struct nf_conn *ct; + u32 pid; + int report; +}; + extern struct atomic_notifier_head nf_conntrack_chain; extern int nf_conntrack_register_notifier(struct notifier_block *nb); extern int nf_conntrack_unregister_notifier(struct notifier_block *nb); @@ -39,22 +46,56 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) local_bh_enable(); } -static inline void nf_conntrack_event(enum ip_conntrack_events event, - struct nf_conn *ct) +static inline void +nf_conntrack_event_report(enum ip_conntrack_events event, + struct nf_conn *ct, + u32 pid, + int report) { + struct nf_ct_event item = { + .ct = ct, + .pid = pid, + .report = report + }; if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) - atomic_notifier_call_chain(&nf_conntrack_chain, event, ct); + atomic_notifier_call_chain(&nf_conntrack_chain, event, &item); } +static inline void +nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct) +{ + nf_conntrack_event_report(event, ct, 0, 0); +} + +struct nf_exp_event { + struct nf_conntrack_expect *exp; + u32 pid; + int report; +}; + extern struct atomic_notifier_head nf_ct_expect_chain; extern int nf_ct_expect_register_notifier(struct notifier_block *nb); extern int nf_ct_expect_unregister_notifier(struct notifier_block *nb); static inline void +nf_ct_expect_event_report(enum ip_conntrack_expect_events event, + struct nf_conntrack_expect *exp, + u32 pid, + int report) +{ + struct nf_exp_event item = { + .exp = exp, + .pid = pid, + .report = report + }; + atomic_notifier_call_chain(&nf_ct_expect_chain, event, &item); +} + +static inline void nf_ct_expect_event(enum ip_conntrack_expect_events event, struct nf_conntrack_expect *exp) { - atomic_notifier_call_chain(&nf_ct_expect_chain, event, exp); + nf_ct_expect_event_report(event, exp, 0, 0); } extern int nf_conntrack_ecache_init(struct net *net); @@ -66,9 +107,17 @@ static inline void nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) {} static inline void nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct) {} +static inline void nf_conntrack_event_report(enum ip_conntrack_events event, + struct nf_conn *ct, + u32 pid, + int report) {} static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) {} static inline void nf_ct_expect_event(enum ip_conntrack_expect_events event, struct nf_conntrack_expect *exp) {} +static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e, + struct nf_conntrack_expect *exp, + u32 pid, + int report) {} static inline void nf_ct_event_cache_flush(struct net *net) {} static inline int nf_conntrack_ecache_init(struct net *net) diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 37a7fc1..ab17a15 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -100,6 +100,8 @@ void nf_ct_expect_init(struct nf_conntrack_expect *, unsigned int, u_int8_t, u_int8_t, const __be16 *, const __be16 *); void nf_ct_expect_put(struct nf_conntrack_expect *exp); int nf_ct_expect_related(struct nf_conntrack_expect *expect); +int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, + u32 pid, int report); #endif /*_NF_CONNTRACK_EXPECT_H*/ -- cgit v1.1 From d9e150071d18b5c87ba7a097af4063a5ad0c6a0c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 18 Nov 2008 12:16:52 +0100 Subject: netfilter: nfnetlink_log: fix warning and prototype mismatch net/netfilter/nfnetlink_log.c:537:1: warning: symbol 'nfulnl_log_packet' was not declared. Should it be static? Including the proper header also revealed an incorrect prototype. Signed-off-by: Patrick McHardy --- include/net/netfilter/nfnetlink_log.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nfnetlink_log.h b/include/net/netfilter/nfnetlink_log.h index 9b67f94..b0569ff 100644 --- a/include/net/netfilter/nfnetlink_log.h +++ b/include/net/netfilter/nfnetlink_log.h @@ -2,7 +2,7 @@ #define _KER_NFNETLINK_LOG_H void -nfulnl_log_packet(unsigned int pf, +nfulnl_log_packet(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, -- cgit v1.1 From 07f0757a6808f2f36a0e58c3a54867ccffdb8dc9 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 19 Nov 2008 15:44:53 -0800 Subject: include/net net/ - csum_partial - remove unnecessary casts The first argument to csum_partial is const void * casts to char/u8 * are not necessary Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/net/checksum.h | 2 +- include/net/ip_vs.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/checksum.h b/include/net/checksum.h index 07602b7..ba55d8b 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -98,7 +98,7 @@ static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) { __be32 diff[] = { ~from, to }; - *sum = csum_fold(csum_partial((char *)diff, sizeof(diff), ~csum_unfold(*sum))); + *sum = csum_fold(csum_partial(diff, sizeof(diff), ~csum_unfold(*sum))); } static inline void csum_replace2(__sum16 *sum, __be16 from, __be16 to) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 8f6abf4..ab9b003 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -913,7 +913,7 @@ static inline __wsum ip_vs_check_diff4(__be32 old, __be32 new, __wsum oldsum) { __be32 diff[2] = { ~old, new }; - return csum_partial((char *) diff, sizeof(diff), oldsum); + return csum_partial(diff, sizeof(diff), oldsum); } #ifdef CONFIG_IP_VS_IPV6 @@ -923,7 +923,7 @@ static inline __wsum ip_vs_check_diff16(const __be32 *old, const __be32 *new, __be32 diff[8] = { ~old[3], ~old[2], ~old[1], ~old[0], new[3], new[2], new[1], new[0] }; - return csum_partial((char *) diff, sizeof(diff), oldsum); + return csum_partial(diff, sizeof(diff), oldsum); } #endif @@ -931,7 +931,7 @@ static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum) { __be16 diff[2] = { ~old, new }; - return csum_partial((char *) diff, sizeof(diff), oldsum); + return csum_partial(diff, sizeof(diff), oldsum); } #endif /* __KERNEL__ */ -- cgit v1.1 From d314774cf2cd5dfeb39a00d37deee65d4c627927 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 19 Nov 2008 21:32:24 -0800 Subject: netdev: network device operations infrastructure This patch changes the network device internal API to move adminstrative operations out of the network device structure and into a separate structure. This patch involves some hackery to maintain compatablity between the new and old model, so all 300+ drivers don't have to be changed at once. For drivers that aren't converted yet, the netdevice_ops virt function list still resides in the net_device structure. For old protocols, the new net_device_ops are copied out to the old net_device pointers. After the transistion is completed the nag message can be changed to an WARN_ON, and the compatiablity code can be made configurable. Some function pointers aren't moved: * destructor can't be in net_device_ops because it may need to be referenced after the module is unloaded. * neighbor setup is manipulated in a couple of places that need special consideration * hard_start_xmit is in the fast path for transmit. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 232 +++++++++++++++++++++++++++++++++------------- 1 file changed, 168 insertions(+), 64 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 12d7f44..9060f5f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -451,6 +451,131 @@ struct netdev_queue { struct Qdisc *qdisc_sleeping; } ____cacheline_aligned_in_smp; + +/* + * This structure defines the management hooks for network devices. + * The following hooks can bed defined and are optonal (can be null) + * unless otherwise noted. + * + * int (*ndo_init)(struct net_device *dev); + * This function is called once when network device is registered. + * The network device can use this to any late stage initializaton + * or semantic validattion. It can fail with an error code which will + * be propogated back to register_netdev + * + * void (*ndo_uninit)(struct net_device *dev); + * This function is called when device is unregistered or when registration + * fails. It is not called if init fails. + * + * int (*ndo_open)(struct net_device *dev); + * This function is called when network device transistions to the up + * state. + * + * int (*ndo_stop)(struct net_device *dev); + * This function is called when network device transistions to the down + * state. + * + * void (*ndo_change_rx_flags)(struct net_device *dev, int flags); + * This function is called to allow device receiver to make + * changes to configuration when multicast or promiscious is enabled. + * + * void (*ndo_set_rx_mode)(struct net_device *dev); + * This function is called device changes address list filtering. + * + * void (*ndo_set_multicast_list)(struct net_device *dev); + * This function is called when the multicast address list changes. + * + * int (*ndo_set_mac_address)(struct net_device *dev, void *addr); + * This function is called when the Media Access Control address + * needs to be changed. If not this interface is not defined, the + * mac address can not be changed. + * + * int (*ndo_validate_addr)(struct net_device *dev); + * Test if Media Access Control address is valid for the device. + * + * int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); + * Called when a user request an ioctl which can't be handled by + * the generic interface code. If not defined ioctl's return + * not supported error code. + * + * int (*ndo_set_config)(struct net_device *dev, struct ifmap *map); + * Used to set network devices bus interface parameters. This interface + * is retained for legacy reason, new devices should use the bus + * interface (PCI) for low level management. + * + * int (*ndo_change_mtu)(struct net_device *dev, int new_mtu); + * Called when a user wants to change the Maximum Transfer Unit + * of a device. If not defined, any request to change MTU will + * will return an error. + * + * void (*ndo_tx_timeout) (struct net_device *dev); + * Callback uses when the transmitter has not made any progress + * for dev->watchdog ticks. + * + * struct net_device_stats* (*get_stats)(struct net_device *dev); + * Called when a user wants to get the network device usage + * statistics. If not defined, the counters in dev->stats will + * be used. + * + * void (*ndo_vlan_rx_register)(struct net_device *dev, struct vlan_group *grp); + * If device support VLAN receive accleration + * (ie. dev->features & NETIF_F_HW_VLAN_RX), then this function is called + * when vlan groups for the device changes. Note: grp is NULL + * if no vlan's groups are being used. + * + * void (*ndo_vlan_rx_add_vid)(struct net_device *dev, unsigned short vid); + * If device support VLAN filtering (dev->features & NETIF_F_HW_VLAN_FILTER) + * this function is called when a VLAN id is registered. + * + * void (*ndo_vlan_rx_kill_vid)(struct net_device *dev, unsigned short vid); + * If device support VLAN filtering (dev->features & NETIF_F_HW_VLAN_FILTER) + * this function is called when a VLAN id is unregistered. + * + * void (*ndo_poll_controller)(struct net_device *dev); + */ +struct net_device_ops { + int (*ndo_init)(struct net_device *dev); + void (*ndo_uninit)(struct net_device *dev); + int (*ndo_open)(struct net_device *dev); + int (*ndo_stop)(struct net_device *dev); +#define HAVE_CHANGE_RX_FLAGS + void (*ndo_change_rx_flags)(struct net_device *dev, + int flags); +#define HAVE_SET_RX_MODE + void (*ndo_set_rx_mode)(struct net_device *dev); +#define HAVE_MULTICAST + void (*ndo_set_multicast_list)(struct net_device *dev); +#define HAVE_SET_MAC_ADDR + int (*ndo_set_mac_address)(struct net_device *dev, + void *addr); +#define HAVE_VALIDATE_ADDR + int (*ndo_validate_addr)(struct net_device *dev); +#define HAVE_PRIVATE_IOCTL + int (*ndo_do_ioctl)(struct net_device *dev, + struct ifreq *ifr, int cmd); +#define HAVE_SET_CONFIG + int (*ndo_set_config)(struct net_device *dev, + struct ifmap *map); +#define HAVE_CHANGE_MTU + int (*ndo_change_mtu)(struct net_device *dev, int new_mtu); + +#define HAVE_TX_TIMEOUT + void (*ndo_tx_timeout) (struct net_device *dev); + + struct net_device_stats* (*ndo_get_stats)(struct net_device *dev); + + void (*ndo_vlan_rx_register)(struct net_device *dev, + struct vlan_group *grp); + void (*ndo_vlan_rx_add_vid)(struct net_device *dev, + unsigned short vid); + void (*ndo_vlan_rx_kill_vid)(struct net_device *dev, + unsigned short vid); +#ifdef CONFIG_NET_POLL_CONTROLLER +#define HAVE_NETDEV_POLL + void (*ndo_poll_controller)(struct net_device *dev); +#endif +}; + /* * The DEVICE structure. * Actually, this whole structure is a big mistake. It mixes I/O @@ -498,11 +623,6 @@ struct net_device #ifdef CONFIG_NETPOLL struct list_head napi_list; #endif - - /* The device initialization function. Called only once. */ - int (*init)(struct net_device *dev); - - /* ------- Fields preinitialized in Space.c finish here ------- */ /* Net device features */ unsigned long features; @@ -546,15 +666,13 @@ struct net_device * for all in netdev_increment_features. */ #define NETIF_F_ONE_FOR_ALL (NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ROBUST | \ - NETIF_F_SG | NETIF_F_HIGHDMA | \ + NETIF_F_SG | NETIF_F_HIGHDMA | \ NETIF_F_FRAGLIST) /* Interface index. Unique device identifier */ int ifindex; int iflink; - - struct net_device_stats* (*get_stats)(struct net_device *dev); struct net_device_stats stats; #ifdef CONFIG_WIRELESS_EXT @@ -564,18 +682,13 @@ struct net_device /* Instance data managed by the core of Wireless Extensions. */ struct iw_public_data * wireless_data; #endif + /* Management operations */ + const struct net_device_ops *netdev_ops; const struct ethtool_ops *ethtool_ops; /* Hardware header description */ const struct header_ops *header_ops; - /* - * This marks the end of the "visible" part of the structure. All - * fields hereafter are internal to the system, and may change at - * will (read: may be cleaned up at will). - */ - - unsigned int flags; /* interface flags (a la BSD) */ unsigned short gflags; unsigned short priv_flags; /* Like 'flags' but invisible to userspace. */ @@ -634,7 +747,7 @@ struct net_device unsigned long last_rx; /* Time of last Rx */ /* Interface address info used in eth_type_trans() */ unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address, (before bcast - because most packets are unicast) */ + because most packets are unicast) */ unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ @@ -648,6 +761,10 @@ struct net_device /* Number of TX queues currently active in device */ unsigned int real_num_tx_queues; + /* Map buffer to appropriate transmit queue */ + u16 (*select_queue)(struct net_device *dev, + struct sk_buff *skb); + unsigned long tx_queue_len; /* Max frames per queue allowed */ spinlock_t tx_global_lock; /* @@ -662,9 +779,6 @@ struct net_device int watchdog_timeo; /* used by dev_watchdog() */ struct timer_list watchdog_timer; -/* - * refcnt is a very hot point, so align it on SMP - */ /* Number of references to this device */ atomic_t refcnt ____cacheline_aligned_in_smp; @@ -683,56 +797,14 @@ struct net_device NETREG_RELEASED, /* called free_netdev */ } reg_state; - /* Called after device is detached from network. */ - void (*uninit)(struct net_device *dev); - /* Called after last user reference disappears. */ - void (*destructor)(struct net_device *dev); + /* Called from unregister, can be used to call free_netdev */ + void (*destructor)(struct net_device *dev); - /* Pointers to interface service routines. */ - int (*open)(struct net_device *dev); - int (*stop)(struct net_device *dev); -#define HAVE_NETDEV_POLL -#define HAVE_CHANGE_RX_FLAGS - void (*change_rx_flags)(struct net_device *dev, - int flags); -#define HAVE_SET_RX_MODE - void (*set_rx_mode)(struct net_device *dev); -#define HAVE_MULTICAST - void (*set_multicast_list)(struct net_device *dev); -#define HAVE_SET_MAC_ADDR - int (*set_mac_address)(struct net_device *dev, - void *addr); -#define HAVE_VALIDATE_ADDR - int (*validate_addr)(struct net_device *dev); -#define HAVE_PRIVATE_IOCTL - int (*do_ioctl)(struct net_device *dev, - struct ifreq *ifr, int cmd); -#define HAVE_SET_CONFIG - int (*set_config)(struct net_device *dev, - struct ifmap *map); -#define HAVE_CHANGE_MTU - int (*change_mtu)(struct net_device *dev, int new_mtu); + int (*neigh_setup)(struct net_device *dev, struct neigh_parms *); -#define HAVE_TX_TIMEOUT - void (*tx_timeout) (struct net_device *dev); - - void (*vlan_rx_register)(struct net_device *dev, - struct vlan_group *grp); - void (*vlan_rx_add_vid)(struct net_device *dev, - unsigned short vid); - void (*vlan_rx_kill_vid)(struct net_device *dev, - unsigned short vid); - - int (*neigh_setup)(struct net_device *dev, struct neigh_parms *); #ifdef CONFIG_NETPOLL struct netpoll_info *npinfo; #endif -#ifdef CONFIG_NET_POLL_CONTROLLER - void (*poll_controller)(struct net_device *dev); -#endif - - u16 (*select_queue)(struct net_device *dev, - struct sk_buff *skb); #ifdef CONFIG_NET_NS /* Network namespace this network device is inside */ @@ -763,6 +835,38 @@ struct net_device /* for setting kernel sock attribute on TCP connection setup */ #define GSO_MAX_SIZE 65536 unsigned int gso_max_size; + +#ifdef CONFIG_COMPAT_NET_DEV_OPS + struct { + int (*init)(struct net_device *dev); + void (*uninit)(struct net_device *dev); + int (*open)(struct net_device *dev); + int (*stop)(struct net_device *dev); + void (*change_rx_flags)(struct net_device *dev, + int flags); + void (*set_rx_mode)(struct net_device *dev); + void (*set_multicast_list)(struct net_device *dev); + int (*set_mac_address)(struct net_device *dev, + void *addr); + int (*validate_addr)(struct net_device *dev); + int (*do_ioctl)(struct net_device *dev, + struct ifreq *ifr, int cmd); + int (*set_config)(struct net_device *dev, + struct ifmap *map); + int (*change_mtu)(struct net_device *dev, int new_mtu); + void (*tx_timeout) (struct net_device *dev); + struct net_device_stats* (*get_stats)(struct net_device *dev); + void (*vlan_rx_register)(struct net_device *dev, + struct vlan_group *grp); + void (*vlan_rx_add_vid)(struct net_device *dev, + unsigned short vid); + void (*vlan_rx_kill_vid)(struct net_device *dev, + unsigned short vid); +#ifdef CONFIG_NET_POLL_CONTROLLER + void (*poll_controller)(struct net_device *dev); +#endif +#endif + }; }; #define to_net_dev(d) container_of(d, struct net_device, dev) -- cgit v1.1 From eeda3fd64f75bcbfaa70ce946513abaf3f23b8e0 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 19 Nov 2008 21:40:23 -0800 Subject: netdev: introduce dev_get_stats() In order for the network device ops get_stats call to be immutable, the handling of the default internal network device stats block has to be changed. Add a new helper function which replaces the old use of internal_get_stats. Note: change return code to make it clear that the caller should not go changing the returned statistics. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9060f5f..981a089 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -865,8 +865,8 @@ struct net_device #ifdef CONFIG_NET_POLL_CONTROLLER void (*poll_controller)(struct net_device *dev); #endif -#endif }; +#endif }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -1780,6 +1780,8 @@ extern void netdev_features_change(struct net_device *dev); /* Load a device via the kmod */ extern void dev_load(struct net *net, const char *name); extern void dev_mcast_init(void); +extern const struct net_device_stats *dev_get_stats(struct net_device *dev); + extern int netdev_max_backlog; extern int weight_p; extern int netdev_set_master(struct net_device *dev, struct net_device *master); -- cgit v1.1 From ccad637b0c57de1825ffd34c311bf71487545ac2 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 19 Nov 2008 22:42:31 -0800 Subject: netdev: expose ethernet address primitives When ethernet devices are converted, the function pointer setup by eth_setup() need to be done during intialization. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 25d62e6..0e5e970 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -41,6 +41,10 @@ extern int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh); extern void eth_header_cache_update(struct hh_cache *hh, const struct net_device *dev, const unsigned char *haddr); +extern int eth_mac_addr(struct net_device *dev, void *p); +extern int eth_change_mtu(struct net_device *dev, int new_mtu); +extern int eth_validate_addr(struct net_device *dev); + extern struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count); -- cgit v1.1 From 5caea4ea7088e80ac5410d04660346094608b909 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Nov 2008 00:40:07 -0800 Subject: net: listening_hash get a spinlock per bucket This patch prepares RCU migration of listening_hash table for TCP/DCCP protocols. listening_hash table being small (32 slots per protocol), we add a spinlock for each slot, instead of a single rwlock for whole table. This should reduce hold time of readers, and writers concurrency. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 45 +++++++++++++++---------------------------- 1 file changed, 15 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 4818960..62d2dd0d 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -99,6 +99,11 @@ struct inet_bind_hashbucket { struct hlist_head chain; }; +struct inet_listen_hashbucket { + spinlock_t lock; + struct hlist_head head; +}; + /* This is for listening sockets, thus all sockets which possess wildcards. */ #define INET_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */ @@ -123,22 +128,21 @@ struct inet_hashinfo { unsigned int bhash_size; /* Note : 4 bytes padding on 64 bit arches */ - /* All sockets in TCP_LISTEN state will be in here. This is the only - * table where wildcard'd TCP sockets can exist. Hash function here - * is just local port number. - */ - struct hlist_head listening_hash[INET_LHTABLE_SIZE]; + struct kmem_cache *bind_bucket_cachep; /* All the above members are written once at bootup and * never written again _or_ are predominantly read-access. * * Now align to a new cache line as all the following members - * are often dirty. + * might be often dirty. + */ + /* All sockets in TCP_LISTEN state will be in here. This is the only + * table where wildcard'd TCP sockets can exist. Hash function here + * is just local port number. */ - rwlock_t lhash_lock ____cacheline_aligned; - atomic_t lhash_users; - wait_queue_head_t lhash_wait; - struct kmem_cache *bind_bucket_cachep; + struct inet_listen_hashbucket listening_hash[INET_LHTABLE_SIZE] + ____cacheline_aligned_in_smp; + }; static inline struct inet_ehash_bucket *inet_ehash_bucket( @@ -236,26 +240,7 @@ extern void __inet_inherit_port(struct sock *sk, struct sock *child); extern void inet_put_port(struct sock *sk); -extern void inet_listen_wlock(struct inet_hashinfo *hashinfo); - -/* - * - We may sleep inside this lock. - * - If sleeping is not required (or called from BH), - * use plain read_(un)lock(&inet_hashinfo.lhash_lock). - */ -static inline void inet_listen_lock(struct inet_hashinfo *hashinfo) -{ - /* read_lock synchronizes to candidates to writers */ - read_lock(&hashinfo->lhash_lock); - atomic_inc(&hashinfo->lhash_users); - read_unlock(&hashinfo->lhash_lock); -} - -static inline void inet_listen_unlock(struct inet_hashinfo *hashinfo) -{ - if (atomic_dec_and_test(&hashinfo->lhash_users)) - wake_up(&hashinfo->lhash_wait); -} +void inet_hashinfo_init(struct inet_hashinfo *h); extern void __inet_hash_nolisten(struct sock *sk); extern void inet_hash(struct sock *sk); -- cgit v1.1 From d214c7537bbf2f247991fb65b3420b0b3d712c67 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 20 Nov 2008 00:49:27 -0800 Subject: filter: add SKF_AD_NLATTR_NEST to look for nested attributes SKF_AD_NLATTR allows us to find the first matching attribute in a stream of netlink attributes from one offset to the end of the netlink message. This is not suitable to look for a specific matching inside a set of nested attributes. For example, in ctnetlink messages, if we look for the CTA_V6_SRC attribute in a message that talks about an IPv4 connection, SKF_AD_NLATTR returns the offset of CTA_STATUS which has the same value of CTA_V6_SRC but outside the nest. To differenciate CTA_STATUS and CTA_V6_SRC, we would have to make assumptions on the size of the attribute and the usual offset, resulting in horrible BSF code. This patch adds SKF_AD_NLATTR_NEST, which is a variant of SKF_AD_NLATTR, that looks for an attribute inside the limits of a nested attributes, but not further. This patch validates that we have enough room to look for the nested attributes - based on a suggestion from Patrick McHardy. Signed-off-by: Pablo Neira Ayuso Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/filter.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index b6ea9aa..1354aaf 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -122,7 +122,8 @@ struct sock_fprog /* Required for SO_ATTACH_FILTER. */ #define SKF_AD_PKTTYPE 4 #define SKF_AD_IFINDEX 8 #define SKF_AD_NLATTR 12 -#define SKF_AD_MAX 16 +#define SKF_AD_NLATTR_NEST 16 +#define SKF_AD_MAX 20 #define SKF_NET_OFF (-0x100000) #define SKF_LL_OFF (-0x200000) -- cgit v1.1 From 0c19b0adb8dd33dbd10ff48e41971231c486855c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 20 Nov 2008 04:08:29 -0800 Subject: netlink: avoid memset of 0 bytes sparse warning A netlink attribute padding of zero triggers this sparse warning: include/linux/netlink.h:245:8: warning: memset with byte count of 0 Avoid the memset when the size parameter is constant and requires no padding. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netlink.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 9ff1b54..51b09a1 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -242,7 +242,8 @@ __nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags) nlh->nlmsg_flags = flags; nlh->nlmsg_pid = pid; nlh->nlmsg_seq = seq; - memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size); + if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0) + memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size); return nlh; } -- cgit v1.1 From 13d2a1d2b032de08d7dcab6a1edcd47802681f96 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 20 Nov 2008 04:10:00 -0800 Subject: pkt_sched: add DRR scheduler Add classful DRR scheduler as a more flexible replacement for SFQ. The main difference to the algorithm described in "Efficient Fair Queueing using Deficit Round Robin" is that this implementation doesn't drop packets from the longest queue on overrun because its classful and limits are handled by each individual child qdisc. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/pkt_sched.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index 5d921fa..e3f133a 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -500,4 +500,20 @@ struct tc_netem_corrupt #define NETEM_DIST_SCALE 8192 +/* DRR */ + +enum +{ + TCA_DRR_UNSPEC, + TCA_DRR_QUANTUM, + __TCA_DRR_MAX +}; + +#define TCA_DRR_MAX (__TCA_DRR_MAX - 1) + +struct tc_drr_stats +{ + u32 deficit; +}; + #endif -- cgit v1.1 From 018a7bf1e55000dd792194238c9043918d24d3dd Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Thu, 20 Nov 2008 15:59:56 +0100 Subject: netfilter: ip{,6}t_policy.h should include xp_policy.h It seems that all of the include/netfilter_{ipv4,ipv6}/{ipt,ip6t}_*.h which share constants include the corresponding include/netfilter/xp_*.h files. Neither ipt_policy.h not ip6t_policy.h do. Make these consistant with the norm. Signed-off-by: Andy Whitcroft Signed-off-by: Patrick McHardy --- include/linux/netfilter_ipv4/ipt_policy.h | 2 ++ include/linux/netfilter_ipv6/ip6t_policy.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/netfilter_ipv4/ipt_policy.h b/include/linux/netfilter_ipv4/ipt_policy.h index b9478a2..1037fb2 100644 --- a/include/linux/netfilter_ipv4/ipt_policy.h +++ b/include/linux/netfilter_ipv4/ipt_policy.h @@ -1,6 +1,8 @@ #ifndef _IPT_POLICY_H #define _IPT_POLICY_H +#include + #define IPT_POLICY_MAX_ELEM XT_POLICY_MAX_ELEM /* ipt_policy_flags */ diff --git a/include/linux/netfilter_ipv6/ip6t_policy.h b/include/linux/netfilter_ipv6/ip6t_policy.h index 6bab316..b1c449d 100644 --- a/include/linux/netfilter_ipv6/ip6t_policy.h +++ b/include/linux/netfilter_ipv6/ip6t_policy.h @@ -1,6 +1,8 @@ #ifndef _IP6T_POLICY_H #define _IP6T_POLICY_H +#include + #define IP6T_POLICY_MAX_ELEM XT_POLICY_MAX_ELEM /* ip6t_policy_flags */ -- cgit v1.1 From 008298231abbeb91bc7be9e8b078607b816d1a4a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 20 Nov 2008 20:14:53 -0800 Subject: netdev: add more functions to netdevice ops This patch moves neigh_setup and hard_start_xmit into the network device ops structure. For bisection, fix all the previously converted drivers as well. Bonding driver took the biggest hit on this. Added a prefetch of the hard_start_xmit in the fast path to try and reduce any impact this would have. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 981a089..d8fb236 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -454,8 +454,8 @@ struct netdev_queue { /* * This structure defines the management hooks for network devices. - * The following hooks can bed defined and are optonal (can be null) - * unless otherwise noted. + * The following hooks can be defined; unless noted otherwise, they are + * optional and can be filled with a null pointer. * * int (*ndo_init)(struct net_device *dev); * This function is called once when network device is registered. @@ -475,6 +475,15 @@ struct netdev_queue { * This function is called when network device transistions to the down * state. * + * int (*ndo_hard_start_xmit)(struct sk_buff *skb, struct net_device *dev); + * Called when a packet needs to be transmitted. + * Must return NETDEV_TX_OK , NETDEV_TX_BUSY, or NETDEV_TX_LOCKED, + * Required can not be NULL. + * + * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb); + * Called to decide which queue to when device supports multiple + * transmit queues. + * * void (*ndo_change_rx_flags)(struct net_device *dev, int flags); * This function is called to allow device receiver to make * changes to configuration when multicast or promiscious is enabled. @@ -508,7 +517,7 @@ struct netdev_queue { * of a device. If not defined, any request to change MTU will * will return an error. * - * void (*ndo_tx_timeout) (struct net_device *dev); + * void (*ndo_tx_timeout)(struct net_device *dev); * Callback uses when the transmitter has not made any progress * for dev->watchdog ticks. * @@ -538,6 +547,10 @@ struct net_device_ops { void (*ndo_uninit)(struct net_device *dev); int (*ndo_open)(struct net_device *dev); int (*ndo_stop)(struct net_device *dev); + int (*ndo_start_xmit) (struct sk_buff *skb, + struct net_device *dev); + u16 (*ndo_select_queue)(struct net_device *dev, + struct sk_buff *skb); #define HAVE_CHANGE_RX_FLAGS void (*ndo_change_rx_flags)(struct net_device *dev, int flags); @@ -557,8 +570,10 @@ struct net_device_ops { int (*ndo_set_config)(struct net_device *dev, struct ifmap *map); #define HAVE_CHANGE_MTU - int (*ndo_change_mtu)(struct net_device *dev, int new_mtu); - + int (*ndo_change_mtu)(struct net_device *dev, + int new_mtu); + int (*ndo_neigh_setup)(struct net_device *dev, + struct neigh_parms *); #define HAVE_TX_TIMEOUT void (*ndo_tx_timeout) (struct net_device *dev); @@ -761,18 +776,12 @@ struct net_device /* Number of TX queues currently active in device */ unsigned int real_num_tx_queues; - /* Map buffer to appropriate transmit queue */ - u16 (*select_queue)(struct net_device *dev, - struct sk_buff *skb); - unsigned long tx_queue_len; /* Max frames per queue allowed */ spinlock_t tx_global_lock; /* * One part is mostly used on xmit path (device) */ void *priv; /* pointer to private data */ - int (*hard_start_xmit) (struct sk_buff *skb, - struct net_device *dev); /* These may be needed for future network-power-down code. */ unsigned long trans_start; /* Time (in jiffies) of last Tx */ @@ -800,8 +809,6 @@ struct net_device /* Called from unregister, can be used to call free_netdev */ void (*destructor)(struct net_device *dev); - int (*neigh_setup)(struct net_device *dev, struct neigh_parms *); - #ifdef CONFIG_NETPOLL struct netpoll_info *npinfo; #endif @@ -842,6 +849,10 @@ struct net_device void (*uninit)(struct net_device *dev); int (*open)(struct net_device *dev); int (*stop)(struct net_device *dev); + int (*hard_start_xmit) (struct sk_buff *skb, + struct net_device *dev); + u16 (*select_queue)(struct net_device *dev, + struct sk_buff *skb); void (*change_rx_flags)(struct net_device *dev, int flags); void (*set_rx_mode)(struct net_device *dev); @@ -854,6 +865,8 @@ struct net_device int (*set_config)(struct net_device *dev, struct ifmap *map); int (*change_mtu)(struct net_device *dev, int new_mtu); + int (*neigh_setup)(struct net_device *dev, + struct neigh_parms *); void (*tx_timeout) (struct net_device *dev); struct net_device_stats* (*get_stats)(struct net_device *dev); void (*vlan_rx_register)(struct net_device *dev, -- cgit v1.1 From 145186a39570244aead77dc2efc559e5cac90548 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 20 Nov 2008 20:29:48 -0800 Subject: fddi: convert to new network device ops Similar to ethernet. Convert infrastructure and the one lone FDDI driver (for the one lone user of that hardware??). Compile tested only. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/fddidevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fddidevice.h b/include/linux/fddidevice.h index e61e42d..155bafd 100644 --- a/include/linux/fddidevice.h +++ b/include/linux/fddidevice.h @@ -27,6 +27,7 @@ #ifdef __KERNEL__ extern __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev); +extern int fddi_change_mtu(struct net_device *dev, int new_mtu); extern struct net_device *alloc_fddidev(int sizeof_priv); #endif -- cgit v1.1 From 748ff68fad9600593c6abe47856037602bd5d133 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 20 Nov 2008 20:32:15 -0800 Subject: hippi: convert driver to net_device_ops Convert the HIPPI infrastructure for use with net_device_ops. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/hippidevice.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hippidevice.h b/include/linux/hippidevice.h index bab303d..f148e49 100644 --- a/include/linux/hippidevice.h +++ b/include/linux/hippidevice.h @@ -32,7 +32,9 @@ struct hippi_cb { }; extern __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev); - +extern int hippi_change_mtu(struct net_device *dev, int new_mtu); +extern int hippi_mac_addr(struct net_device *dev, void *p); +extern int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p); extern struct net_device *alloc_hippi_dev(int sizeof_priv); #endif -- cgit v1.1 From 9db66bdcc83749affe61c61eb8ff3cf08f42afec Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Nov 2008 20:39:09 -0800 Subject: net: convert TCP/DCCP ehash rwlocks to spinlocks Now TCP & DCCP use RCU lookups, we can convert ehash rwlocks to spinlocks. /proc/net/tcp and other seq_file 'readers' can safely be converted to 'writers'. This should speedup writers, since spin_lock()/spin_unlock() only use one atomic operation instead of two for write_lock()/write_unlock() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 62d2dd0d..28b3ee3 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -116,7 +116,7 @@ struct inet_hashinfo { * TIME_WAIT sockets use a separate chain (twchain). */ struct inet_ehash_bucket *ehash; - rwlock_t *ehash_locks; + spinlock_t *ehash_locks; unsigned int ehash_size; unsigned int ehash_locks_mask; @@ -152,7 +152,7 @@ static inline struct inet_ehash_bucket *inet_ehash_bucket( return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)]; } -static inline rwlock_t *inet_ehash_lockp( +static inline spinlock_t *inet_ehash_lockp( struct inet_hashinfo *hashinfo, unsigned int hash) { @@ -177,16 +177,16 @@ static inline int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) size = 4096; if (sizeof(rwlock_t) != 0) { #ifdef CONFIG_NUMA - if (size * sizeof(rwlock_t) > PAGE_SIZE) - hashinfo->ehash_locks = vmalloc(size * sizeof(rwlock_t)); + if (size * sizeof(spinlock_t) > PAGE_SIZE) + hashinfo->ehash_locks = vmalloc(size * sizeof(spinlock_t)); else #endif - hashinfo->ehash_locks = kmalloc(size * sizeof(rwlock_t), + hashinfo->ehash_locks = kmalloc(size * sizeof(spinlock_t), GFP_KERNEL); if (!hashinfo->ehash_locks) return ENOMEM; for (i = 0; i < size; i++) - rwlock_init(&hashinfo->ehash_locks[i]); + spin_lock_init(&hashinfo->ehash_locks[i]); } hashinfo->ehash_locks_mask = size - 1; return 0; @@ -197,7 +197,7 @@ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo) if (hashinfo->ehash_locks) { #ifdef CONFIG_NUMA unsigned int size = (hashinfo->ehash_locks_mask + 1) * - sizeof(rwlock_t); + sizeof(spinlock_t); if (size > PAGE_SIZE) vfree(hashinfo->ehash_locks); else -- cgit v1.1 From 2f90b8657ec942d1880f720e0177ee71df7c8e3c Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 20 Nov 2008 20:52:10 -0800 Subject: ixgbe: this patch adds support for DCB to the kernel and ixgbe driver This adds support for Data Center Bridging (DCB) features in the ixgbe driver and adds an rtnetlink interface for configuring DCB to the kernel. The DCB feature support included are Priority Grouping (PG) - which allows bandwidth guarantees to be allocated to groups to traffic based on the 802.1q priority, and Priority Based Flow Control (PFC) - which introduces a new MAC control PAUSE frame which works at granularity of the 802.1p priority instead of the link (IEEE 802.3x). Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: David S. Miller --- include/linux/dcbnl.h | 230 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/netdevice.h | 8 ++ include/linux/rtnetlink.h | 5 + include/net/dcbnl.h | 44 +++++++++ 4 files changed, 287 insertions(+) create mode 100644 include/linux/dcbnl.h create mode 100644 include/net/dcbnl.h (limited to 'include') diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h new file mode 100644 index 0000000..32d32c1 --- /dev/null +++ b/include/linux/dcbnl.h @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2008, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Lucy Liu + */ + +#ifndef __LINUX_DCBNL_H__ +#define __LINUX_DCBNL_H__ + +#define DCB_PROTO_VERSION 1 + +struct dcbmsg { + unsigned char dcb_family; + __u8 cmd; + __u16 dcb_pad; +}; + +/** + * enum dcbnl_commands - supported DCB commands + * + * @DCB_CMD_UNDEFINED: unspecified command to catch errors + * @DCB_CMD_GSTATE: request the state of DCB in the device + * @DCB_CMD_SSTATE: set the state of DCB in the device + * @DCB_CMD_PGTX_GCFG: request the priority group configuration for Tx + * @DCB_CMD_PGTX_SCFG: set the priority group configuration for Tx + * @DCB_CMD_PGRX_GCFG: request the priority group configuration for Rx + * @DCB_CMD_PGRX_SCFG: set the priority group configuration for Rx + * @DCB_CMD_PFC_GCFG: request the priority flow control configuration + * @DCB_CMD_PFC_SCFG: set the priority flow control configuration + * @DCB_CMD_SET_ALL: apply all changes to the underlying device + * @DCB_CMD_GPERM_HWADDR: get the permanent MAC address of the underlying + * device. Only useful when using bonding. + */ +enum dcbnl_commands { + DCB_CMD_UNDEFINED, + + DCB_CMD_GSTATE, + DCB_CMD_SSTATE, + + DCB_CMD_PGTX_GCFG, + DCB_CMD_PGTX_SCFG, + DCB_CMD_PGRX_GCFG, + DCB_CMD_PGRX_SCFG, + + DCB_CMD_PFC_GCFG, + DCB_CMD_PFC_SCFG, + + DCB_CMD_SET_ALL, + DCB_CMD_GPERM_HWADDR, + + __DCB_CMD_ENUM_MAX, + DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1, +}; + + +/** + * enum dcbnl_attrs - DCB top-level netlink attributes + * + * @DCB_ATTR_UNDEFINED: unspecified attribute to catch errors + * @DCB_ATTR_IFNAME: interface name of the underlying device (NLA_STRING) + * @DCB_ATTR_STATE: enable state of DCB in the device (NLA_U8) + * @DCB_ATTR_PFC_STATE: enable state of PFC in the device (NLA_U8) + * @DCB_ATTR_PFC_CFG: priority flow control configuration (NLA_NESTED) + * @DCB_ATTR_NUM_TC: number of traffic classes supported in the device (NLA_U8) + * @DCB_ATTR_PG_CFG: priority group configuration (NLA_NESTED) + * @DCB_ATTR_SET_ALL: bool to commit changes to hardware or not (NLA_U8) + * @DCB_ATTR_PERM_HWADDR: MAC address of the physical device (NLA_NESTED) + */ +enum dcbnl_attrs { + DCB_ATTR_UNDEFINED, + + DCB_ATTR_IFNAME, + DCB_ATTR_STATE, + DCB_ATTR_PFC_STATE, + DCB_ATTR_PFC_CFG, + DCB_ATTR_NUM_TC, + DCB_ATTR_PG_CFG, + DCB_ATTR_SET_ALL, + DCB_ATTR_PERM_HWADDR, + + __DCB_ATTR_ENUM_MAX, + DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1, +}; + +/** + * enum dcbnl_pfc_attrs - DCB Priority Flow Control user priority nested attrs + * + * @DCB_PFC_UP_ATTR_UNDEFINED: unspecified attribute to catch errors + * @DCB_PFC_UP_ATTR_0: Priority Flow Control value for User Priority 0 (NLA_U8) + * @DCB_PFC_UP_ATTR_1: Priority Flow Control value for User Priority 1 (NLA_U8) + * @DCB_PFC_UP_ATTR_2: Priority Flow Control value for User Priority 2 (NLA_U8) + * @DCB_PFC_UP_ATTR_3: Priority Flow Control value for User Priority 3 (NLA_U8) + * @DCB_PFC_UP_ATTR_4: Priority Flow Control value for User Priority 4 (NLA_U8) + * @DCB_PFC_UP_ATTR_5: Priority Flow Control value for User Priority 5 (NLA_U8) + * @DCB_PFC_UP_ATTR_6: Priority Flow Control value for User Priority 6 (NLA_U8) + * @DCB_PFC_UP_ATTR_7: Priority Flow Control value for User Priority 7 (NLA_U8) + * @DCB_PFC_UP_ATTR_MAX: highest attribute number currently defined + * @DCB_PFC_UP_ATTR_ALL: apply to all priority flow control attrs (NLA_FLAG) + * + */ +enum dcbnl_pfc_up_attrs { + DCB_PFC_UP_ATTR_UNDEFINED, + + DCB_PFC_UP_ATTR_0, + DCB_PFC_UP_ATTR_1, + DCB_PFC_UP_ATTR_2, + DCB_PFC_UP_ATTR_3, + DCB_PFC_UP_ATTR_4, + DCB_PFC_UP_ATTR_5, + DCB_PFC_UP_ATTR_6, + DCB_PFC_UP_ATTR_7, + DCB_PFC_UP_ATTR_ALL, + + __DCB_PFC_UP_ATTR_ENUM_MAX, + DCB_PFC_UP_ATTR_MAX = __DCB_PFC_UP_ATTR_ENUM_MAX - 1, +}; + +/** + * enum dcbnl_pg_attrs - DCB Priority Group attributes + * + * @DCB_PG_ATTR_UNDEFINED: unspecified attribute to catch errors + * @DCB_PG_ATTR_TC_0: Priority Group Traffic Class 0 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_1: Priority Group Traffic Class 1 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_2: Priority Group Traffic Class 2 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_3: Priority Group Traffic Class 3 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_4: Priority Group Traffic Class 4 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_5: Priority Group Traffic Class 5 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_6: Priority Group Traffic Class 6 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_7: Priority Group Traffic Class 7 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_MAX: highest attribute number currently defined + * @DCB_PG_ATTR_TC_ALL: apply to all traffic classes (NLA_NESTED) + * @DCB_PG_ATTR_BW_ID_0: Percent of link bandwidth for Priority Group 0 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_1: Percent of link bandwidth for Priority Group 1 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_2: Percent of link bandwidth for Priority Group 2 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_3: Percent of link bandwidth for Priority Group 3 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_4: Percent of link bandwidth for Priority Group 4 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_5: Percent of link bandwidth for Priority Group 5 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_6: Percent of link bandwidth for Priority Group 6 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_7: Percent of link bandwidth for Priority Group 7 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_MAX: highest attribute number currently defined + * @DCB_PG_ATTR_BW_ID_ALL: apply to all priority groups (NLA_FLAG) + * + */ +enum dcbnl_pg_attrs { + DCB_PG_ATTR_UNDEFINED, + + DCB_PG_ATTR_TC_0, + DCB_PG_ATTR_TC_1, + DCB_PG_ATTR_TC_2, + DCB_PG_ATTR_TC_3, + DCB_PG_ATTR_TC_4, + DCB_PG_ATTR_TC_5, + DCB_PG_ATTR_TC_6, + DCB_PG_ATTR_TC_7, + DCB_PG_ATTR_TC_MAX, + DCB_PG_ATTR_TC_ALL, + + DCB_PG_ATTR_BW_ID_0, + DCB_PG_ATTR_BW_ID_1, + DCB_PG_ATTR_BW_ID_2, + DCB_PG_ATTR_BW_ID_3, + DCB_PG_ATTR_BW_ID_4, + DCB_PG_ATTR_BW_ID_5, + DCB_PG_ATTR_BW_ID_6, + DCB_PG_ATTR_BW_ID_7, + DCB_PG_ATTR_BW_ID_MAX, + DCB_PG_ATTR_BW_ID_ALL, + + __DCB_PG_ATTR_ENUM_MAX, + DCB_PG_ATTR_MAX = __DCB_PG_ATTR_ENUM_MAX - 1, +}; + +/** + * enum dcbnl_tc_attrs - DCB Traffic Class attributes + * + * @DCB_TC_ATTR_PARAM_UNDEFINED: unspecified attribute to catch errors + * @DCB_TC_ATTR_PARAM_PGID: (NLA_U8) Priority group the traffic class belongs to + * Valid values are: 0-7 + * @DCB_TC_ATTR_PARAM_UP_MAPPING: (NLA_U8) Traffic class to user priority map + * Some devices may not support changing the + * user priority map of a TC. + * @DCB_TC_ATTR_PARAM_STRICT_PRIO: (NLA_U8) Strict priority setting + * 0 - none + * 1 - group strict + * 2 - link strict + * @DCB_TC_ATTR_PARAM_BW_PCT: optional - (NLA_U8) If supported by the device and + * not configured to use link strict priority, + * this is the percentage of bandwidth of the + * priority group this traffic class belongs to + * @DCB_TC_ATTR_PARAM_ALL: (NLA_FLAG) all traffic class parameters + * + */ +enum dcbnl_tc_attrs { + DCB_TC_ATTR_PARAM_UNDEFINED, + + DCB_TC_ATTR_PARAM_PGID, + DCB_TC_ATTR_PARAM_UP_MAPPING, + DCB_TC_ATTR_PARAM_STRICT_PRIO, + DCB_TC_ATTR_PARAM_BW_PCT, + DCB_TC_ATTR_PARAM_ALL, + + __DCB_TC_ATTR_PARAM_ENUM_MAX, + DCB_TC_ATTR_PARAM_MAX = __DCB_TC_ATTR_PARAM_ENUM_MAX - 1, +}; + +/** + * enum dcb_general_attr_values - general DCB attribute values + * + * @DCB_ATTR_UNDEFINED: value used to indicate an attribute is not supported + * + */ +enum dcb_general_attr_values { + DCB_ATTR_VALUE_UNDEFINED = 0xff +}; + + +#endif /* __LINUX_DCBNL_H__ */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d8fb236..6095af5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -43,6 +43,9 @@ #include #include +#ifdef CONFIG_DCBNL +#include +#endif struct vlan_group; struct ethtool_ops; @@ -843,6 +846,11 @@ struct net_device #define GSO_MAX_SIZE 65536 unsigned int gso_max_size; +#ifdef CONFIG_DCBNL + /* Data Center Bridging netlink ops */ + struct dcbnl_rtnl_ops *dcbnl_ops; +#endif + #ifdef CONFIG_COMPAT_NET_DEV_OPS struct { int (*init)(struct net_device *dev); diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 2b3d51c..e88f705 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -107,6 +107,11 @@ enum { RTM_GETADDRLABEL, #define RTM_GETADDRLABEL RTM_GETADDRLABEL + RTM_GETDCB = 78, +#define RTM_GETDCB RTM_GETDCB + RTM_SETDCB, +#define RTM_SETDCB RTM_SETDCB + __RTM_MAX, #define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) }; diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h new file mode 100644 index 0000000..0ef0c5a --- /dev/null +++ b/include/net/dcbnl.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2008, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Lucy Liu + */ + +#ifndef __NET_DCBNL_H__ +#define __NET_DCBNL_H__ + +/* + * Ops struct for the netlink callbacks. Used by DCB-enabled drivers through + * the netdevice struct. + */ +struct dcbnl_rtnl_ops { + u8 (*getstate)(struct net_device *); + void (*setstate)(struct net_device *, u8); + void (*getpermhwaddr)(struct net_device *, u8 *); + void (*setpgtccfgtx)(struct net_device *, int, u8, u8, u8, u8); + void (*setpgbwgcfgtx)(struct net_device *, int, u8); + void (*setpgtccfgrx)(struct net_device *, int, u8, u8, u8, u8); + void (*setpgbwgcfgrx)(struct net_device *, int, u8); + void (*getpgtccfgtx)(struct net_device *, int, u8 *, u8 *, u8 *, u8 *); + void (*getpgbwgcfgtx)(struct net_device *, int, u8 *); + void (*getpgtccfgrx)(struct net_device *, int, u8 *, u8 *, u8 *, u8 *); + void (*getpgbwgcfgrx)(struct net_device *, int, u8 *); + void (*setpfccfg)(struct net_device *, int, u8); + void (*getpfccfg)(struct net_device *, int, u8 *); + u8 (*setall)(struct net_device *); +}; + +#endif /* __NET_DCBNL_H__ */ -- cgit v1.1 From 46132188bf72e22ef097f16ed5c969ee8cea1e8b Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 20 Nov 2008 21:05:08 -0800 Subject: DCB: Add interface to query for the DCB capabilities of an device. Adds to the netlink interface for Data Center Bridging (DCB), allowing the DCB capabilities supported by a device to be queried. Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: David S. Miller --- include/linux/dcbnl.h | 37 +++++++++++++++++++++++++++++++++++++ include/net/dcbnl.h | 1 + 2 files changed, 38 insertions(+) (limited to 'include') diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h index 32d32c1..13f0c63 100644 --- a/include/linux/dcbnl.h +++ b/include/linux/dcbnl.h @@ -43,6 +43,7 @@ struct dcbmsg { * @DCB_CMD_SET_ALL: apply all changes to the underlying device * @DCB_CMD_GPERM_HWADDR: get the permanent MAC address of the underlying * device. Only useful when using bonding. + * @DCB_CMD_GCAP: request the DCB capabilities of the device */ enum dcbnl_commands { DCB_CMD_UNDEFINED, @@ -60,6 +61,7 @@ enum dcbnl_commands { DCB_CMD_SET_ALL, DCB_CMD_GPERM_HWADDR, + DCB_CMD_GCAP, __DCB_CMD_ENUM_MAX, DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1, @@ -78,6 +80,7 @@ enum dcbnl_commands { * @DCB_ATTR_PG_CFG: priority group configuration (NLA_NESTED) * @DCB_ATTR_SET_ALL: bool to commit changes to hardware or not (NLA_U8) * @DCB_ATTR_PERM_HWADDR: MAC address of the physical device (NLA_NESTED) + * @DCB_ATTR_CAP: DCB capabilities of the device (NLA_NESTED) */ enum dcbnl_attrs { DCB_ATTR_UNDEFINED, @@ -90,6 +93,7 @@ enum dcbnl_attrs { DCB_ATTR_PG_CFG, DCB_ATTR_SET_ALL, DCB_ATTR_PERM_HWADDR, + DCB_ATTR_CAP, __DCB_ATTR_ENUM_MAX, DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1, @@ -217,6 +221,39 @@ enum dcbnl_tc_attrs { }; /** + * enum dcbnl_cap_attrs - DCB Capability attributes + * + * @DCB_CAP_ATTR_UNDEFINED: unspecified attribute to catch errors + * @DCB_CAP_ATTR_ALL: (NLA_FLAG) all capability parameters + * @DCB_CAP_ATTR_PG: (NLA_U8) device supports Priority Groups + * @DCB_CAP_ATTR_PFC: (NLA_U8) device supports Priority Flow Control + * @DCB_CAP_ATTR_UP2TC: (NLA_U8) device supports user priority to + * traffic class mapping + * @DCB_CAP_ATTR_PG_TCS: (NLA_U8) bitmap where each bit represents a + * number of traffic classes the device + * can be configured to use for Priority Groups + * @DCB_CAP_ATTR_PFC_TCS: (NLA_U8) bitmap where each bit represents a + * number of traffic classes the device can be + * configured to use for Priority Flow Control + * @DCB_CAP_ATTR_GSP: (NLA_U8) device supports group strict priority + * @DCB_CAP_ATTR_BCN: (NLA_U8) device supports Backwards Congestion + * Notification + */ +enum dcbnl_cap_attrs { + DCB_CAP_ATTR_UNDEFINED, + DCB_CAP_ATTR_ALL, + DCB_CAP_ATTR_PG, + DCB_CAP_ATTR_PFC, + DCB_CAP_ATTR_UP2TC, + DCB_CAP_ATTR_PG_TCS, + DCB_CAP_ATTR_PFC_TCS, + DCB_CAP_ATTR_GSP, + DCB_CAP_ATTR_BCN, + + __DCB_CAP_ATTR_ENUM_MAX, + DCB_CAP_ATTR_MAX = __DCB_CAP_ATTR_ENUM_MAX - 1, +}; +/** * enum dcb_general_attr_values - general DCB attribute values * * @DCB_ATTR_UNDEFINED: value used to indicate an attribute is not supported diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h index 0ef0c5a..183ed04 100644 --- a/include/net/dcbnl.h +++ b/include/net/dcbnl.h @@ -39,6 +39,7 @@ struct dcbnl_rtnl_ops { void (*setpfccfg)(struct net_device *, int, u8); void (*getpfccfg)(struct net_device *, int, u8 *); u8 (*setall)(struct net_device *); + u8 (*getcap)(struct net_device *, int, u8 *); }; #endif /* __NET_DCBNL_H__ */ -- cgit v1.1 From 33dbabc4a7f7bd72313c73a3c199f31f3900336f Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 20 Nov 2008 21:08:19 -0800 Subject: DCB: Add interface to query # of TCs supported by device Adds interface for Data Center Bridging (DCB) to query (and set if supported) the number of traffic classes currently supported by the device for the two (DCB) features: priority groups (PG) and priority flow control (PFC). Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: David S. Miller --- include/linux/dcbnl.h | 27 +++++++++++++++++++++++++++ include/net/dcbnl.h | 2 ++ 2 files changed, 29 insertions(+) (limited to 'include') diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h index 13f0c63..1077fba 100644 --- a/include/linux/dcbnl.h +++ b/include/linux/dcbnl.h @@ -44,6 +44,8 @@ struct dcbmsg { * @DCB_CMD_GPERM_HWADDR: get the permanent MAC address of the underlying * device. Only useful when using bonding. * @DCB_CMD_GCAP: request the DCB capabilities of the device + * @DCB_CMD_GNUMTCS: get the number of traffic classes currently supported + * @DCB_CMD_SNUMTCS: set the number of traffic classes */ enum dcbnl_commands { DCB_CMD_UNDEFINED, @@ -62,6 +64,8 @@ enum dcbnl_commands { DCB_CMD_SET_ALL, DCB_CMD_GPERM_HWADDR, DCB_CMD_GCAP, + DCB_CMD_GNUMTCS, + DCB_CMD_SNUMTCS, __DCB_CMD_ENUM_MAX, DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1, @@ -81,6 +85,7 @@ enum dcbnl_commands { * @DCB_ATTR_SET_ALL: bool to commit changes to hardware or not (NLA_U8) * @DCB_ATTR_PERM_HWADDR: MAC address of the physical device (NLA_NESTED) * @DCB_ATTR_CAP: DCB capabilities of the device (NLA_NESTED) + * @DCB_ATTR_NUMTCS: number of traffic classes supported (NLA_NESTED) */ enum dcbnl_attrs { DCB_ATTR_UNDEFINED, @@ -94,6 +99,7 @@ enum dcbnl_attrs { DCB_ATTR_SET_ALL, DCB_ATTR_PERM_HWADDR, DCB_ATTR_CAP, + DCB_ATTR_NUMTCS, __DCB_ATTR_ENUM_MAX, DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1, @@ -253,6 +259,27 @@ enum dcbnl_cap_attrs { __DCB_CAP_ATTR_ENUM_MAX, DCB_CAP_ATTR_MAX = __DCB_CAP_ATTR_ENUM_MAX - 1, }; + +/** + * enum dcbnl_numtcs_attrs - number of traffic classes + * + * @DCB_NUMTCS_ATTR_UNDEFINED: unspecified attribute to catch errors + * @DCB_NUMTCS_ATTR_ALL: (NLA_FLAG) all traffic class attributes + * @DCB_NUMTCS_ATTR_PG: (NLA_U8) number of traffic classes used for + * priority groups + * @DCB_NUMTCS_ATTR_PFC: (NLA_U8) number of traffic classes which can + * support priority flow control + */ +enum dcbnl_numtcs_attrs { + DCB_NUMTCS_ATTR_UNDEFINED, + DCB_NUMTCS_ATTR_ALL, + DCB_NUMTCS_ATTR_PG, + DCB_NUMTCS_ATTR_PFC, + + __DCB_NUMTCS_ATTR_ENUM_MAX, + DCB_NUMTCS_ATTR_MAX = __DCB_NUMTCS_ATTR_ENUM_MAX - 1, +}; + /** * enum dcb_general_attr_values - general DCB attribute values * diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h index 183ed04..f0a6528 100644 --- a/include/net/dcbnl.h +++ b/include/net/dcbnl.h @@ -40,6 +40,8 @@ struct dcbnl_rtnl_ops { void (*getpfccfg)(struct net_device *, int, u8 *); u8 (*setall)(struct net_device *); u8 (*getcap)(struct net_device *, int, u8 *); + u8 (*getnumtcs)(struct net_device *, int, u8 *); + u8 (*setnumtcs)(struct net_device *, int, u8); }; #endif /* __NET_DCBNL_H__ */ -- cgit v1.1 From 0eb3aa9bab20217fb42244ccdcb5bf8a002f504c Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 20 Nov 2008 21:09:23 -0800 Subject: DCB: Add interface to query the state of PFC feature. Adds a netlink interface for Data Center Bridging (DCB) to get and set the enable state of the Priority Flow Control (PFC) feature. Primarily, this is a way to turn off PFC in the driver while DCB remains enabled. Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: David S. Miller --- include/linux/dcbnl.h | 2 ++ include/net/dcbnl.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h index 1077fba..6cc4560 100644 --- a/include/linux/dcbnl.h +++ b/include/linux/dcbnl.h @@ -66,6 +66,8 @@ enum dcbnl_commands { DCB_CMD_GCAP, DCB_CMD_GNUMTCS, DCB_CMD_SNUMTCS, + DCB_CMD_PFC_GSTATE, + DCB_CMD_PFC_SSTATE, __DCB_CMD_ENUM_MAX, DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1, diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h index f0a6528..c7d87ca 100644 --- a/include/net/dcbnl.h +++ b/include/net/dcbnl.h @@ -42,6 +42,8 @@ struct dcbnl_rtnl_ops { u8 (*getcap)(struct net_device *, int, u8 *); u8 (*getnumtcs)(struct net_device *, int, u8 *); u8 (*setnumtcs)(struct net_device *, int, u8); + u8 (*getpfcstate)(struct net_device *); + void (*setpfcstate)(struct net_device *, u8); }; #endif /* __NET_DCBNL_H__ */ -- cgit v1.1 From 859ee3c43812051e21816c6d6d4cc04fb7ce9b2e Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 20 Nov 2008 21:10:23 -0800 Subject: DCB: Add support for DCB BCN Adds an interface to configure the Backward Congestion Notification (BCN) feature. In a BCN capabale network, congestion notifications from congested points out in the network can cause the end station limit the rate of a given traffic flow. Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: David S. Miller --- include/linux/dcbnl.h | 44 +++++++++++++++++++++++++++++++++++++++++++- include/net/dcbnl.h | 4 ++++ 2 files changed, 47 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h index 6cc4560..e73a614 100644 --- a/include/linux/dcbnl.h +++ b/include/linux/dcbnl.h @@ -46,6 +46,8 @@ struct dcbmsg { * @DCB_CMD_GCAP: request the DCB capabilities of the device * @DCB_CMD_GNUMTCS: get the number of traffic classes currently supported * @DCB_CMD_SNUMTCS: set the number of traffic classes + * @DCB_CMD_GBCN: set backward congestion notification configuration + * @DCB_CMD_SBCN: get backward congestion notification configration. */ enum dcbnl_commands { DCB_CMD_UNDEFINED, @@ -62,18 +64,24 @@ enum dcbnl_commands { DCB_CMD_PFC_SCFG, DCB_CMD_SET_ALL, + DCB_CMD_GPERM_HWADDR, + DCB_CMD_GCAP, + DCB_CMD_GNUMTCS, DCB_CMD_SNUMTCS, + DCB_CMD_PFC_GSTATE, DCB_CMD_PFC_SSTATE, + DCB_CMD_BCN_GCFG, + DCB_CMD_BCN_SCFG, + __DCB_CMD_ENUM_MAX, DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1, }; - /** * enum dcbnl_attrs - DCB top-level netlink attributes * @@ -88,6 +96,7 @@ enum dcbnl_commands { * @DCB_ATTR_PERM_HWADDR: MAC address of the physical device (NLA_NESTED) * @DCB_ATTR_CAP: DCB capabilities of the device (NLA_NESTED) * @DCB_ATTR_NUMTCS: number of traffic classes supported (NLA_NESTED) + * @DCB_ATTR_BCN: backward congestion notification configuration (NLA_NESTED) */ enum dcbnl_attrs { DCB_ATTR_UNDEFINED, @@ -102,6 +111,7 @@ enum dcbnl_attrs { DCB_ATTR_PERM_HWADDR, DCB_ATTR_CAP, DCB_ATTR_NUMTCS, + DCB_ATTR_BCN, __DCB_ATTR_ENUM_MAX, DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1, @@ -282,6 +292,38 @@ enum dcbnl_numtcs_attrs { DCB_NUMTCS_ATTR_MAX = __DCB_NUMTCS_ATTR_ENUM_MAX - 1, }; +enum dcbnl_bcn_attrs{ + DCB_BCN_ATTR_UNDEFINED = 0, + + DCB_BCN_ATTR_RP_0, + DCB_BCN_ATTR_RP_1, + DCB_BCN_ATTR_RP_2, + DCB_BCN_ATTR_RP_3, + DCB_BCN_ATTR_RP_4, + DCB_BCN_ATTR_RP_5, + DCB_BCN_ATTR_RP_6, + DCB_BCN_ATTR_RP_7, + DCB_BCN_ATTR_RP_ALL, + + DCB_BCN_ATTR_ALPHA, + DCB_BCN_ATTR_BETA, + DCB_BCN_ATTR_GD, + DCB_BCN_ATTR_GI, + DCB_BCN_ATTR_TMAX, + DCB_BCN_ATTR_TD, + DCB_BCN_ATTR_RMIN, + DCB_BCN_ATTR_W, + DCB_BCN_ATTR_RD, + DCB_BCN_ATTR_RU, + DCB_BCN_ATTR_WRTT, + DCB_BCN_ATTR_RI, + DCB_BCN_ATTR_C, + DCB_BCN_ATTR_ALL, + + __DCB_BCN_ATTR_ENUM_MAX, + DCB_BCN_ATTR_MAX = __DCB_BCN_ATTR_ENUM_MAX - 1, +}; + /** * enum dcb_general_attr_values - general DCB attribute values * diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h index c7d87ca..91e0a3d 100644 --- a/include/net/dcbnl.h +++ b/include/net/dcbnl.h @@ -44,6 +44,10 @@ struct dcbnl_rtnl_ops { u8 (*setnumtcs)(struct net_device *, int, u8); u8 (*getpfcstate)(struct net_device *); void (*setpfcstate)(struct net_device *, u8); + void (*getbcncfg)(struct net_device *, int, u32 *); + void (*setbcncfg)(struct net_device *, int, u32); + void (*getbcnrp)(struct net_device *, int, u8 *); + void (*setbcnrp)(struct net_device *, int, u8); }; #endif /* __NET_DCBNL_H__ */ -- cgit v1.1 From 4821277f36e008b531728e359fbbedb229117f4b Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 3 Nov 2008 21:05:01 +0100 Subject: mac80211: fix BUILD_BUG_ON() caused by misalignment on arm On ARM alignment is done slightly different from other architectures. struct ieee80211_tx_rate is aligned to word size, even though it only has 3 single-byte members, which triggers the BUILD_BUG_ON in ieee80211_tx_info_clear_status This patch marks the struct ieee80211_tx_rate as packed, so that ARM behaves like the other architectures. Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- include/net/mac80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 1b8ed42..c813fbf 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -307,7 +307,7 @@ struct ieee80211_tx_rate { s8 idx; u8 count; u8 flags; -}; +} __attribute__((packed)); /** * struct ieee80211_tx_info - skb transmit information -- cgit v1.1 From 0ed94eaaed618634f68197161203aac9f849471e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 7 Nov 2008 19:50:42 -0800 Subject: mac80211: remove more excess kernel-doc Delete kernel-doc struct descriptions for fields that don't exist: Warning(include/net/mac80211.h:1263): Excess struct/union/enum/typedef member 'conf_ht' description in 'ieee80211_ops' Warning(net/mac80211/sta_info.h:309): Excess struct/union/enum/typedef member 'addr' description in 'sta_info' Warning(net/mac80211/sta_info.h:309): Excess struct/union/enum/typedef member 'aid' description in 'sta_info' Signed-off-by: Randy Dunlap cc: Johannes Berg cc: John W. Linville Signed-off-by: John W. Linville --- include/net/mac80211.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index c813fbf..0dbbfc7 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1269,8 +1269,6 @@ enum ieee80211_ampdu_mlme_action { * This is needed only for IBSS mode and the result of this function is * used to determine whether to reply to Probe Requests. * - * @conf_ht: Configures low level driver with 802.11n HT data. Must be atomic. - * * @ampdu_action: Perform a certain A-MPDU action * The RA/TID combination determines the destination and TID we want * the ampdu action to be performed for. The action is defined through -- cgit v1.1 From 274bfb8dc5ffa16cb073801bebe76ab7f4e2e73d Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Wed, 29 Oct 2008 11:35:05 -0400 Subject: lib80211: absorb crypto bits from net/ieee80211 These bits are shared already between ipw2x00 and hostap, and could probably be shared both more cleanly and with other drivers. This commit simply relocates the code to lib80211 and adjusts the drivers appropriately. Signed-off-by: John W. Linville --- include/net/ieee80211.h | 10 ++-- include/net/ieee80211_crypt.h | 108 ------------------------------------------ include/net/lib80211.h | 108 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 111 insertions(+), 115 deletions(-) delete mode 100644 include/net/ieee80211_crypt.h (limited to 'include') diff --git a/include/net/ieee80211.h b/include/net/ieee80211.h index 738734a..7ab3ed2 100644 --- a/include/net/ieee80211.h +++ b/include/net/ieee80211.h @@ -30,6 +30,8 @@ #include #include +#include + #define IEEE80211_VERSION "git-1.1.13" #define IEEE80211_DATA_LEN 2304 @@ -355,8 +357,6 @@ struct ieee80211_stats { struct ieee80211_device; -#include "ieee80211_crypt.h" - #define SEC_KEY_1 (1<<0) #define SEC_KEY_2 (1<<1) #define SEC_KEY_3 (1<<2) @@ -937,11 +937,7 @@ struct ieee80211_device { size_t wpa_ie_len; u8 *wpa_ie; - struct list_head crypt_deinit_list; - struct ieee80211_crypt_data *crypt[WEP_KEYS]; - int tx_keyidx; /* default TX key index (crypt[tx_keyidx]) */ - struct timer_list crypt_deinit_timer; - int crypt_quiesced; + struct lib80211_crypt_info crypt_info; int bcrx_sta_key; /* use individual keys to override default keys even * with RX of broad/multicast frames */ diff --git a/include/net/ieee80211_crypt.h b/include/net/ieee80211_crypt.h deleted file mode 100644 index b3d65e0..0000000 --- a/include/net/ieee80211_crypt.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Original code based on Host AP (software wireless LAN access point) driver - * for Intersil Prism2/2.5/3. - * - * Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen - * - * Copyright (c) 2002-2003, Jouni Malinen - * - * Adaption to a generic IEEE 802.11 stack by James Ketrenos - * - * - * Copyright (c) 2004, Intel Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. See README and COPYING for - * more details. - */ - -/* - * This file defines the interface to the ieee80211 crypto module. - */ -#ifndef IEEE80211_CRYPT_H -#define IEEE80211_CRYPT_H - -#include -#include -#include -#include - -enum { - IEEE80211_CRYPTO_TKIP_COUNTERMEASURES = (1 << 0), -}; - -struct sk_buff; -struct module; - -struct ieee80211_crypto_ops { - const char *name; - struct list_head list; - - /* init new crypto context (e.g., allocate private data space, - * select IV, etc.); returns NULL on failure or pointer to allocated - * private data on success */ - void *(*init) (int keyidx); - - /* deinitialize crypto context and free allocated private data */ - void (*deinit) (void *priv); - - int (*build_iv) (struct sk_buff * skb, int hdr_len, - u8 *key, int keylen, void *priv); - - /* encrypt/decrypt return < 0 on error or >= 0 on success. The return - * value from decrypt_mpdu is passed as the keyidx value for - * decrypt_msdu. skb must have enough head and tail room for the - * encryption; if not, error will be returned; these functions are - * called for all MPDUs (i.e., fragments). - */ - int (*encrypt_mpdu) (struct sk_buff * skb, int hdr_len, void *priv); - int (*decrypt_mpdu) (struct sk_buff * skb, int hdr_len, void *priv); - - /* These functions are called for full MSDUs, i.e. full frames. - * These can be NULL if full MSDU operations are not needed. */ - int (*encrypt_msdu) (struct sk_buff * skb, int hdr_len, void *priv); - int (*decrypt_msdu) (struct sk_buff * skb, int keyidx, int hdr_len, - void *priv); - - int (*set_key) (void *key, int len, u8 * seq, void *priv); - int (*get_key) (void *key, int len, u8 * seq, void *priv); - - /* procfs handler for printing out key information and possible - * statistics */ - char *(*print_stats) (char *p, void *priv); - - /* Crypto specific flag get/set for configuration settings */ - unsigned long (*get_flags) (void *priv); - unsigned long (*set_flags) (unsigned long flags, void *priv); - - /* maximum number of bytes added by encryption; encrypt buf is - * allocated with extra_prefix_len bytes, copy of in_buf, and - * extra_postfix_len; encrypt need not use all this space, but - * the result must start at the beginning of the buffer and correct - * length must be returned */ - int extra_mpdu_prefix_len, extra_mpdu_postfix_len; - int extra_msdu_prefix_len, extra_msdu_postfix_len; - - struct module *owner; -}; - -struct ieee80211_crypt_data { - struct list_head list; /* delayed deletion list */ - struct ieee80211_crypto_ops *ops; - void *priv; - atomic_t refcnt; -}; - -struct ieee80211_device; - -int ieee80211_register_crypto_ops(struct ieee80211_crypto_ops *ops); -int ieee80211_unregister_crypto_ops(struct ieee80211_crypto_ops *ops); -struct ieee80211_crypto_ops *ieee80211_get_crypto_ops(const char *name); -void ieee80211_crypt_deinit_entries(struct ieee80211_device *, int); -void ieee80211_crypt_deinit_handler(unsigned long); -void ieee80211_crypt_delayed_deinit(struct ieee80211_device *ieee, - struct ieee80211_crypt_data **crypt); -void ieee80211_crypt_quiescing(struct ieee80211_device *ieee); - -#endif diff --git a/include/net/lib80211.h b/include/net/lib80211.h index e1558a1..dd1079f 100644 --- a/include/net/lib80211.h +++ b/include/net/lib80211.h @@ -3,11 +3,32 @@ * * Copyright (c) 2008, John W. Linville * + * Some bits copied from old ieee80211 component, w/ original copyright + * notices below: + * + * Original code based on Host AP (software wireless LAN access point) driver + * for Intersil Prism2/2.5/3. + * + * Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen + * + * Copyright (c) 2002-2003, Jouni Malinen + * + * Adaption to a generic IEEE 802.11 stack by James Ketrenos + * + * + * Copyright (c) 2004, Intel Corporation + * */ #ifndef LIB80211_H #define LIB80211_H +#include +#include +#include +#include +#include +#include #include /* print_ssid() is intended to be used in debug (and possibly error) @@ -15,4 +36,91 @@ const char *print_ssid(char *buf, const char *ssid, u8 ssid_len); #define DECLARE_SSID_BUF(var) char var[IEEE80211_MAX_SSID_LEN * 4 + 1] __maybe_unused +#define NUM_WEP_KEYS 4 + +enum { + IEEE80211_CRYPTO_TKIP_COUNTERMEASURES = (1 << 0), +}; + +struct lib80211_crypto_ops { + const char *name; + struct list_head list; + + /* init new crypto context (e.g., allocate private data space, + * select IV, etc.); returns NULL on failure or pointer to allocated + * private data on success */ + void *(*init) (int keyidx); + + /* deinitialize crypto context and free allocated private data */ + void (*deinit) (void *priv); + + int (*build_iv) (struct sk_buff * skb, int hdr_len, + u8 *key, int keylen, void *priv); + + /* encrypt/decrypt return < 0 on error or >= 0 on success. The return + * value from decrypt_mpdu is passed as the keyidx value for + * decrypt_msdu. skb must have enough head and tail room for the + * encryption; if not, error will be returned; these functions are + * called for all MPDUs (i.e., fragments). + */ + int (*encrypt_mpdu) (struct sk_buff * skb, int hdr_len, void *priv); + int (*decrypt_mpdu) (struct sk_buff * skb, int hdr_len, void *priv); + + /* These functions are called for full MSDUs, i.e. full frames. + * These can be NULL if full MSDU operations are not needed. */ + int (*encrypt_msdu) (struct sk_buff * skb, int hdr_len, void *priv); + int (*decrypt_msdu) (struct sk_buff * skb, int keyidx, int hdr_len, + void *priv); + + int (*set_key) (void *key, int len, u8 * seq, void *priv); + int (*get_key) (void *key, int len, u8 * seq, void *priv); + + /* procfs handler for printing out key information and possible + * statistics */ + char *(*print_stats) (char *p, void *priv); + + /* Crypto specific flag get/set for configuration settings */ + unsigned long (*get_flags) (void *priv); + unsigned long (*set_flags) (unsigned long flags, void *priv); + + /* maximum number of bytes added by encryption; encrypt buf is + * allocated with extra_prefix_len bytes, copy of in_buf, and + * extra_postfix_len; encrypt need not use all this space, but + * the result must start at the beginning of the buffer and correct + * length must be returned */ + int extra_mpdu_prefix_len, extra_mpdu_postfix_len; + int extra_msdu_prefix_len, extra_msdu_postfix_len; + + struct module *owner; +}; + +struct lib80211_crypt_data { + struct list_head list; /* delayed deletion list */ + struct lib80211_crypto_ops *ops; + void *priv; + atomic_t refcnt; +}; + +struct lib80211_crypt_info { + char *name; + /* Most clients will already have a lock, + so just point to that. */ + spinlock_t *lock; + + struct lib80211_crypt_data *crypt[NUM_WEP_KEYS]; + int tx_keyidx; /* default TX key index (crypt[tx_keyidx]) */ + struct list_head crypt_deinit_list; + struct timer_list crypt_deinit_timer; + int crypt_quiesced; +}; + +int lib80211_register_crypto_ops(struct lib80211_crypto_ops *ops); +int lib80211_unregister_crypto_ops(struct lib80211_crypto_ops *ops); +struct lib80211_crypto_ops *lib80211_get_crypto_ops(const char *name); +void lib80211_crypt_deinit_entries(struct lib80211_crypt_info *, int); +void lib80211_crypt_deinit_handler(unsigned long); +void lib80211_crypt_delayed_deinit(struct lib80211_crypt_info *info, + struct lib80211_crypt_data **crypt); +void lib80211_crypt_quiescing(struct lib80211_crypt_info *info); + #endif /* LIB80211_H */ -- cgit v1.1 From 2ba4b32ecf748d5f45f298fc9677fa46d1dd9aff Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Tue, 11 Nov 2008 16:00:06 -0500 Subject: lib80211: consolidate crypt init routines Signed-off-by: John W. Linville --- include/net/lib80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/lib80211.h b/include/net/lib80211.h index dd1079f..a269b23 100644 --- a/include/net/lib80211.h +++ b/include/net/lib80211.h @@ -114,6 +114,9 @@ struct lib80211_crypt_info { int crypt_quiesced; }; +int lib80211_crypt_info_init(struct lib80211_crypt_info *info, char *name, + spinlock_t *lock); +void lib80211_crypt_info_free(struct lib80211_crypt_info *info); int lib80211_register_crypto_ops(struct lib80211_crypto_ops *ops); int lib80211_unregister_crypto_ops(struct lib80211_crypto_ops *ops); struct lib80211_crypto_ops *lib80211_get_crypto_ops(const char *name); -- cgit v1.1 From 627271018df75c8861b9e75b39d5995842e6ec95 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Wed, 12 Nov 2008 10:01:41 -0500 Subject: mac80211: add explicit padding in struct ieee80211_tx_info Otherwise, the BUILD_BUG_ON calls in ieee80211_tx_info_clear_status can fail on some architectures. Signed-off-by: John W. Linville --- include/net/mac80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 0dbbfc7..6a1d4ea 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -341,6 +341,7 @@ struct ieee80211_tx_info { u8 antenna_sel_tx; /* 2 byte hole */ + u8 pad[2]; union { struct { -- cgit v1.1 From a1eb5fe319beb9e181aa52c8adf75ad9aab56a89 Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Wed, 19 Nov 2008 09:37:43 +0200 Subject: wireless: missing include in lib80211.h This patch adds #include in lib80211.h to avoid these compilation erros. > In file included from /work/src/wireless-testing/net/wireless/lib80211.c:24: > /work/src/wireless-testing/include/net/lib80211.h:113: error: field > 'crypt_deinit_timer' has incomplete type > /work/src/wireless-testing/net/wireless/lib80211.c: In function > 'lib80211_crypt_info_init': > /work/src/wireless-testing/net/wireless/lib80211.c:83: error: implicit > declaration of function 'setup_timer' > /work/src/wireless-testing/net/wireless/lib80211.c: In function > 'lib80211_crypt_info_free': > /work/src/wireless-testing/net/wireless/lib80211.c:95: error: implicit > declaration of function 'del_timer_sync' > /work/src/wireless-testing/net/wireless/lib80211.c: In function > 'lib80211_crypt_deinit_handler': > /work/src/wireless-testing/net/wireless/lib80211.c:157: error: > implicit declaration of function 'add_timer' > /work/src/wireless-testing/net/wireless/lib80211.c: In function > 'lib80211_crypt_delayed_deinit': > /work/src/wireless-testing/net/wireless/lib80211.c:182: error: > implicit declaration of function 'timer_pending' > make[3]: *** [net/wireless/lib80211.o] Error 1 > make[2]: *** [net/wireless] Error 2 > make[1]: *** [net] Error 2 > make: *** [sub-make] Error 2 Signed-off-by: Rami Rosen Signed-off-by: John W. Linville --- include/net/lib80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/lib80211.h b/include/net/lib80211.h index a269b23..fb4e278 100644 --- a/include/net/lib80211.h +++ b/include/net/lib80211.h @@ -30,7 +30,7 @@ #include #include #include - +#include /* print_ssid() is intended to be used in debug (and possibly error) * messages. It should never be used for passing ssid to user space. */ const char *print_ssid(char *buf, const char *ssid, u8 ssid_len); -- cgit v1.1 From f757fec4b0d45dfcb52f9a914a12225a6a0a3e05 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 21 Nov 2008 15:49:19 -0800 Subject: net: use net_eq() in INET_MATCH and INET_TW_MATCH We can avoid some useless instructions if !CONFIG_NET_NS Because of RCU, we use INET_MATCH or INET_TW_MATCH twice for the found socket, so thats six instructions less per incoming TCP packet. Yet another tbench speedup :) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 28b3ee3..ec7ee2e 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -291,25 +291,25 @@ typedef __u64 __bitwise __addrpair; ((__force __u64)(__be32)(__saddr))); #endif /* __BIG_ENDIAN */ #define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ - (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ + (((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) && \ ((*((__addrpair *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) #define INET_TW_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ - (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ + (((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) && \ ((*((__addrpair *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) #else /* 32-bit arch */ #define INET_ADDR_COOKIE(__name, __saddr, __daddr) #define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif) \ - (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ + (((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) && \ (inet_sk(__sk)->daddr == (__saddr)) && \ (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) #define INET_TW_MATCH(__sk, __net, __hash,__cookie, __saddr, __daddr, __ports, __dif) \ - (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ + (((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) && \ (inet_twsk(__sk)->tw_daddr == (__saddr)) && \ (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \ ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ -- cgit v1.1 From 2baf8a2daab65cdd3f20bfeb4676a2f6aff7c3bf Mon Sep 17 00:00:00 2001 From: Wang Chen Date: Fri, 21 Nov 2008 16:34:18 -0800 Subject: netdevice hdlc: Convert directly reference of netdev->priv For killing directly reference of netdev->priv, use netdev->ml_priv to replace it. Because the private pvc data comes from add_pvc() and can't be allocated in alloc_netdev(). Signed-off-by: Wang Chen Acked-by: Krzysztof Halasa Signed-off-by: David S. Miller --- include/linux/hdlc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hdlc.h b/include/linux/hdlc.h index c597696..e960faa 100644 --- a/include/linux/hdlc.h +++ b/include/linux/hdlc.h @@ -80,7 +80,7 @@ struct net_device *alloc_hdlcdev(void *priv); static inline struct hdlc_device* dev_to_hdlc(struct net_device *dev) { - return dev->priv; + return netdev_priv(dev); } static __inline__ void debug_frame(const struct sk_buff *skb) -- cgit v1.1 From 72364706c3b7c09a658e356218a918c5f92dcad0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Ha=C5=82asa?= Date: Thu, 14 Aug 2008 19:18:17 +0200 Subject: WAN: syncppp.c is no longer used by any kernel code. Remove it. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Krzysztof Hałasa --- include/net/syncppp.h | 102 -------------------------------------------------- 1 file changed, 102 deletions(-) delete mode 100644 include/net/syncppp.h (limited to 'include') diff --git a/include/net/syncppp.h b/include/net/syncppp.h deleted file mode 100644 index 9e306f7..0000000 --- a/include/net/syncppp.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Defines for synchronous PPP/Cisco link level subroutines. - * - * Copyright (C) 1994 Cronyx Ltd. - * Author: Serge Vakulenko, - * - * This software is distributed with NO WARRANTIES, not even the implied - * warranties for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * - * Authors grant any other persons or organizations permission to use - * or modify this software as long as this message is kept with the software, - * all derivative works or modified versions. - * - * Version 1.7, Wed Jun 7 22:12:02 MSD 1995 - * - * - * - */ - -#ifndef _SYNCPPP_H_ -#define _SYNCPPP_H_ 1 - -#ifdef __KERNEL__ -struct slcp { - u16 state; /* state machine */ - u32 magic; /* local magic number */ - u_char echoid; /* id of last keepalive echo request */ - u_char confid; /* id of last configuration request */ -}; - -struct sipcp { - u16 state; /* state machine */ - u_char confid; /* id of last configuration request */ -}; - -struct sppp -{ - struct sppp * pp_next; /* next interface in keepalive list */ - u32 pp_flags; /* use Cisco protocol instead of PPP */ - u16 pp_alivecnt; /* keepalive packets counter */ - u16 pp_loopcnt; /* loopback detection counter */ - u32 pp_seq; /* local sequence number */ - u32 pp_rseq; /* remote sequence number */ - struct slcp lcp; /* LCP params */ - struct sipcp ipcp; /* IPCP params */ - struct timer_list pp_timer; - struct net_device *pp_if; - char pp_link_state; /* Link status */ - spinlock_t lock; -}; - -struct ppp_device -{ - struct net_device *dev; /* Network device pointer */ - struct sppp sppp; /* Synchronous PPP */ -}; - -static inline struct sppp *sppp_of(struct net_device *dev) -{ - struct ppp_device **ppp = dev->ml_priv; - BUG_ON((*ppp)->dev != dev); - return &(*ppp)->sppp; -} - -#define PP_KEEPALIVE 0x01 /* use keepalive protocol */ -#define PP_CISCO 0x02 /* use Cisco protocol instead of PPP */ -#define PP_TIMO 0x04 /* cp_timeout routine active */ -#define PP_DEBUG 0x08 - -#define PPP_MTU 1500 /* max. transmit unit */ - -#define LCP_STATE_CLOSED 0 /* LCP state: closed (conf-req sent) */ -#define LCP_STATE_ACK_RCVD 1 /* LCP state: conf-ack received */ -#define LCP_STATE_ACK_SENT 2 /* LCP state: conf-ack sent */ -#define LCP_STATE_OPENED 3 /* LCP state: opened */ - -#define IPCP_STATE_CLOSED 0 /* IPCP state: closed (conf-req sent) */ -#define IPCP_STATE_ACK_RCVD 1 /* IPCP state: conf-ack received */ -#define IPCP_STATE_ACK_SENT 2 /* IPCP state: conf-ack sent */ -#define IPCP_STATE_OPENED 3 /* IPCP state: opened */ - -#define SPPP_LINK_DOWN 0 /* link down - no keepalive */ -#define SPPP_LINK_UP 1 /* link is up - keepalive ok */ - -void sppp_attach (struct ppp_device *pd); -void sppp_detach (struct net_device *dev); -int sppp_do_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd); -struct sk_buff *sppp_dequeue (struct net_device *dev); -int sppp_isempty (struct net_device *dev); -void sppp_flush (struct net_device *dev); -int sppp_open (struct net_device *dev); -int sppp_reopen (struct net_device *dev); -int sppp_close (struct net_device *dev); -#endif - -#define SPPPIOCCISCO (SIOCDEVPRIVATE) -#define SPPPIOCPPP (SIOCDEVPRIVATE+1) -#define SPPPIOCDEBUG (SIOCDEVPRIVATE+2) -#define SPPPIOCSFLAGS (SIOCDEVPRIVATE+3) -#define SPPPIOCGFLAGS (SIOCDEVPRIVATE+4) - -#endif /* _SYNCPPP_H_ */ -- cgit v1.1 From b20a9c24d5c5d466d7e4a25c6f1bedbd2d16ad4f Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sun, 23 Nov 2008 16:02:31 -0800 Subject: dccp: Set per-connection CCIDs via socket options With this patch, TX/RX CCIDs can now be changed on a per-connection basis, which overrides the defaults set by the global sysctl variables for TX/RX CCIDs. To make full use of this facility, the remaining patches of this patch set are needed, which track dependencies and activate negotiated feature values. Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- include/linux/dccp.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index eda389c..6a72ff5 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -168,6 +168,8 @@ enum { DCCPO_MIN_CCID_SPECIFIC = 128, DCCPO_MAX_CCID_SPECIFIC = 255, }; +/* maximum size of a single TLV-encoded DCCP option (sans type/len bytes) */ +#define DCCP_SINGLE_OPT_MAXLEN 253 /* DCCP CCIDS */ enum { @@ -203,6 +205,9 @@ enum dccp_feature_numbers { #define DCCP_SOCKOPT_SEND_CSCOV 10 #define DCCP_SOCKOPT_RECV_CSCOV 11 #define DCCP_SOCKOPT_AVAILABLE_CCIDS 12 +#define DCCP_SOCKOPT_CCID 13 +#define DCCP_SOCKOPT_TX_CCID 14 +#define DCCP_SOCKOPT_RX_CCID 15 #define DCCP_SOCKOPT_CCID_RX_INFO 128 #define DCCP_SOCKOPT_CCID_TX_INFO 192 -- cgit v1.1 From c25eb3bfb97294d0543a81230fbc237046b4b84c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 23 Nov 2008 17:22:55 -0800 Subject: net: Convert TCP/DCCP listening hash tables to use RCU This is the last step to be able to perform full RCU lookups in __inet_lookup() : After established/timewait tables, we add RCU lookups to listening hash table. The only trick here is that a socket of a given type (TCP ipv4, TCP ipv6, ...) can now flight between two different tables (established and listening) during a RCU grace period, so we must use different 'nulls' end-of-chain values for two tables. We define a large value : #define LISTENING_NULLS_BASE (1U << 29) So that slots in listening table are guaranteed to have different end-of-chain values than slots in established table. A reader can still detect it finished its lookup in the right chain. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index ec7ee2e..f44bb5c 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -99,9 +99,16 @@ struct inet_bind_hashbucket { struct hlist_head chain; }; +/* + * Sockets can be hashed in established or listening table + * We must use different 'nulls' end-of-chain value for listening + * hash table, or we might find a socket that was closed and + * reallocated/inserted into established hash table + */ +#define LISTENING_NULLS_BASE (1U << 29) struct inet_listen_hashbucket { spinlock_t lock; - struct hlist_head head; + struct hlist_nulls_head head; }; /* This is for listening sockets, thus all sockets which possess wildcards. */ -- cgit v1.1 From 1f87e235e6fb92c2968b52b9191de04f1aff8e77 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 23 Nov 2008 23:24:32 -0800 Subject: eth: Declare an optimized compare_ether_addr_64bits() function Linus mentioned we could try to perform long word operations, even on potentially unaligned addresses, on x86 at least. David mentioned the HAVE_EFFICIENT_UNALIGNED_ACCESS test to handle this on all arches that have efficient unailgned accesses. I tried this idea and got nice assembly on 32 bits: 158: 33 82 38 01 00 00 xor 0x138(%edx),%eax 15e: 33 8a 34 01 00 00 xor 0x134(%edx),%ecx 164: c1 e0 10 shl $0x10,%eax 167: 09 c1 or %eax,%ecx 169: 74 0b je 176 And very nice assembly on 64 bits of course (one xor, one shl) Nice oprofile improvement in eth_type_trans(), 0.17 % instead of 0.41 %, expected since we remove 8 instructions on a fast path. This patch implements a compare_ether_addr_64bits() function, that uses the CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS ifdef to efficiently perform the 6 bytes comparison on all capable arches. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 0e5e970..1cb0f0b 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -27,6 +27,7 @@ #include #include #include +#include #ifdef __KERNEL__ extern __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); @@ -140,6 +141,47 @@ static inline unsigned compare_ether_addr(const u8 *addr1, const u8 *addr2) BUILD_BUG_ON(ETH_ALEN != 6); return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0; } + +static inline unsigned long zap_last_2bytes(unsigned long value) +{ +#ifdef __BIG_ENDIAN + return value >> 16; +#else + return value << 16; +#endif +} + +/** + * compare_ether_addr_64bits - Compare two Ethernet addresses + * @addr1: Pointer to an array of 8 bytes + * @addr2: Pointer to an other array of 8 bytes + * + * Compare two ethernet addresses, returns 0 if equal. + * Same result than "memcmp(addr1, addr2, ETH_ALEN)" but without conditional + * branches, and possibly long word memory accesses on CPU allowing cheap + * unaligned memory reads. + * arrays = { byte1, byte2, byte3, byte4, byte6, byte7, pad1, pad2} + * + * Please note that alignment of addr1 & addr2 is only guaranted to be 16 bits. + */ + +static inline unsigned compare_ether_addr_64bits(const u8 addr1[6+2], + const u8 addr2[6+2]) +{ +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + unsigned long fold = ((*(unsigned long *)addr1) ^ + (*(unsigned long *)addr2)); + + if (sizeof(fold) == 8) + return zap_last_2bytes(fold) != 0; + + fold |= zap_last_2bytes((*(unsigned long *)(addr1 + 4)) ^ + (*(unsigned long *)(addr2 + 4))); + return fold != 0; +#else + return compare_ether_addr(addr1, addr2); +#endif +} #endif /* __KERNEL__ */ #endif /* _LINUX_ETHERDEVICE_H */ -- cgit v1.1 From 2e77d89b2fa8e3f8325b8ce7893ec3645f41aff5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 24 Nov 2008 15:52:46 -0800 Subject: net: avoid a pair of dst_hold()/dst_release() in ip_append_data() We can reduce pressure on dst entry refcount that slowdown UDP transmit path on SMP machines. This pressure is visible on RTP servers when delivering content to mediagateways, especially big ones, handling thousand of streams. Several cpus send UDP frames to the same destination, hence use the same dst entry. This patch makes ip_append_data() eventually steal the refcount its callers had to take on the dst entry. This doesnt avoid all refcounting, but still gives speedups on SMP, on UDP/RAW transmit path Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index bc026ec..ddef10c 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -110,7 +110,7 @@ extern int ip_append_data(struct sock *sk, int odd, struct sk_buff *skb), void *from, int len, int protolen, struct ipcm_cookie *ipc, - struct rtable *rt, + struct rtable **rt, unsigned int flags); extern int ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb); extern ssize_t ip_append_page(struct sock *sk, struct page *page, -- cgit v1.1 From e1aa680fa40e7492260a09cb57d94002245cc8fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 24 Nov 2008 21:11:55 -0800 Subject: tcp: move tcp_simple_retransmit to tcp_input MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/net/tcp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 8f26b28..90b4c3b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -472,8 +472,6 @@ extern void tcp_send_delayed_ack(struct sock *sk); /* tcp_input.c */ extern void tcp_cwnd_application_limited(struct sock *sk); -extern void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, - struct sk_buff *skb); /* tcp_timer.c */ extern void tcp_init_xmit_timers(struct sock *); -- cgit v1.1 From 832d11c5cd076abc0aa1eaf7be96c81d1a59ce41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 24 Nov 2008 21:20:15 -0800 Subject: tcp: Try to restore large SKBs while SACK processing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During SACK processing, most of the benefits of TSO are eaten by the SACK blocks that one-by-one fragment SKBs to MSS sized chunks. Then we're in problems when cleanup work for them has to be done when a large cumulative ACK comes. Try to return back to pre-split state already while more and more SACK info gets discovered by combining newly discovered SACK areas with the previous skb if that's SACKed as well. This approach has a number of benefits: 1) The processing overhead is spread more equally over the RTT 2) Write queue has less skbs to process (affect everything which has to walk in the queue past the sacked areas) 3) Write queue is consistent whole the time, so no other parts of TCP has to be aware of this (this was not the case with some other approach that was, well, quite intrusive all around). 4) Clean_rtx_queue can release most of the pages using single put_page instead of previous PAGE_SIZE/mss+1 calls In case a hole is fully filled by the new SACK block, we attempt to combine the next skb too which allows construction of skbs that are even larger than what tso split them to and it handles hole per on every nth patterns that often occur during slow start overshoot pretty nicely. Though this to be really useful also a retransmission would have to get lost since cumulative ACKs advance one hole at a time in the most typical case. TODO: handle upwards only merging. That should be rather easy when segment is fully sacked but I'm leaving that as future work item (it won't make very large difference anyway since this current approach already covers quite a lot of normal cases). I was earlier thinking of some sophisticated way of tracking timestamps of the first and the last segment but later on realized that it won't be that necessary at all to store the timestamp of the last segment. The cases that can occur are basically either: 1) ambiguous => no sensible measurement can be taken anyway 2) non-ambiguous is due to reordering => having the timestamp of the last segment there is just skewing things more off than does some good since the ack got triggered by one of the holes (besides some substle issues that would make determining right hole/skb even harder problem). Anyway, it has nothing to do with this change then. I choose to route some abnormal looking cases with goto noop, some could be handled differently (eg., by stopping the walking at that skb but again). In general, they either shouldn't happen at all or are rare enough to make no difference in practice. In theory this change (as whole) could cause some macroscale regression (global) because of cache misses that are taken over the round-trip time but it gets very likely better because of much less (local) cache misses per other write queue walkers and the big recovery clearing cumulative ack. Worth to note that these benefits would be very easy to get also without TSO/GSO being on as long as the data is in pages so that we can merge them. Currently I won't let that happen because DSACK splitting at fragment that would mess up pcounts due to sk_can_gso in tcp_set_skb_tso_segs. Once DSACKs fragments gets avoided, we have some conditions that can be made less strict. TODO: I will probably have to convert the excessive pointer passing to struct sacktag_state... :-) My testing revealed that considerable amount of skbs couldn't be shifted because they were cloned (most likely still awaiting tx reclaim)... [The rest is considering future work instead since I got repeatably EFAULT to tcpdump's recvfrom when I added pskb_expand_head to deal with clones, so I separated that into another, later patch] ...To counter that, I gave up on the fifth advantage: 5) When growing previous SACK block, less allocs for new skbs are done, basically a new alloc is needed only when new hole is detected and when the previous skb runs out of frags space ...which now only happens of if reclaim is fast enough to dispose the clone before the SACK block comes in (the window is RTT long), otherwise we'll have to alloc some. With clones being handled I got these numbers (will be somewhat worse without that), taken with fine-grained mibs: TCPSackShifted 398 TCPSackMerged 877 TCPSackShiftFallback 320 TCPSACKCOLLAPSEFALLBACKGSO 0 TCPSACKCOLLAPSEFALLBACKSKBBITS 0 TCPSACKCOLLAPSEFALLBACKSKBDATA 0 TCPSACKCOLLAPSEFALLBACKBELOW 0 TCPSACKCOLLAPSEFALLBACKFIRST 1 TCPSACKCOLLAPSEFALLBACKPREVBITS 318 TCPSACKCOLLAPSEFALLBACKMSS 1 TCPSACKCOLLAPSEFALLBACKNOHEAD 0 TCPSACKCOLLAPSEFALLBACKSHIFT 0 TCPSACKCOLLAPSENOOPSEQ 0 TCPSACKCOLLAPSENOOPSMALLPCOUNT 0 TCPSACKCOLLAPSENOOPSMALLLEN 0 TCPSACKCOLLAPSEHOLE 12 Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/linux/skbuff.h | 33 +++++++++++++++++++++++++++++++++ include/net/tcp.h | 5 +++++ 2 files changed, 38 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a01b6f8..acf17af 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -493,6 +493,19 @@ static inline bool skb_queue_is_last(const struct sk_buff_head *list, } /** + * skb_queue_is_first - check if skb is the first entry in the queue + * @list: queue head + * @skb: buffer + * + * Returns true if @skb is the first buffer on the list. + */ +static inline bool skb_queue_is_first(const struct sk_buff_head *list, + const struct sk_buff *skb) +{ + return (skb->prev == (struct sk_buff *) list); +} + +/** * skb_queue_next - return the next packet in the queue * @list: queue head * @skb: current buffer @@ -511,6 +524,24 @@ static inline struct sk_buff *skb_queue_next(const struct sk_buff_head *list, } /** + * skb_queue_prev - return the prev packet in the queue + * @list: queue head + * @skb: current buffer + * + * Return the prev packet in @list before @skb. It is only valid to + * call this if skb_queue_is_first() evaluates to false. + */ +static inline struct sk_buff *skb_queue_prev(const struct sk_buff_head *list, + const struct sk_buff *skb) +{ + /* This BUG_ON may seem severe, but if we just return then we + * are going to dereference garbage. + */ + BUG_ON(skb_queue_is_first(list, skb)); + return skb->prev; +} + +/** * skb_get - reference buffer * @skb: buffer to reference * @@ -1652,6 +1683,8 @@ extern int skb_splice_bits(struct sk_buff *skb, extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); extern void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); +extern int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, + int shiftlen); extern struct sk_buff *skb_segment(struct sk_buff *skb, int features); diff --git a/include/net/tcp.h b/include/net/tcp.h index 90b4c3b..2653924 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1192,6 +1192,11 @@ static inline struct sk_buff *tcp_write_queue_next(struct sock *sk, struct sk_bu return skb_queue_next(&sk->sk_write_queue, skb); } +static inline struct sk_buff *tcp_write_queue_prev(struct sock *sk, struct sk_buff *skb) +{ + return skb_queue_prev(&sk->sk_write_queue, skb); +} + #define tcp_for_write_queue(skb, sk) \ skb_queue_walk(&(sk)->sk_write_queue, skb) -- cgit v1.1 From 111cc8b913b42ef07793648b1699288332f273e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 24 Nov 2008 21:27:22 -0800 Subject: tcp: add some mibs to track collapsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/linux/snmp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/snmp.h b/include/linux/snmp.h index 7a6e6bb..aee3f1e 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h @@ -216,6 +216,9 @@ enum LINUX_MIB_TCPSPURIOUSRTOS, /* TCPSpuriousRTOs */ LINUX_MIB_TCPMD5NOTFOUND, /* TCPMD5NotFound */ LINUX_MIB_TCPMD5UNEXPECTED, /* TCPMD5Unexpected */ + LINUX_MIB_SACKSHIFTED, + LINUX_MIB_SACKMERGED, + LINUX_MIB_SACKSHIFTFALLBACK, __LINUX_MIB_MAX }; -- cgit v1.1 From 47fd5b8373ecc6bf5473e4139b62b06425448252 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 25 Nov 2008 00:20:43 -0800 Subject: netdev: add HAVE_NET_DEVICE_OPS As a concession to vendors who have to deal with one source for different kernel versions, add a HAVE_NET_DEVICE_OPS so they don't end up hard coding ifdef against kernel version. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6095af5..76a89f8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -545,6 +545,7 @@ struct netdev_queue { * * void (*ndo_poll_controller)(struct net_device *dev); */ +#define HAVE_NET_DEVICE_OPS struct net_device_ops { int (*ndo_init)(struct net_device *dev); void (*ndo_uninit)(struct net_device *dev); -- cgit v1.1 From 7a6b6f515f77d1c62a2f383b6dce18cb0af0cf4f Mon Sep 17 00:00:00 2001 From: Jeff Kirsher Date: Tue, 25 Nov 2008 01:02:08 -0800 Subject: DCB: fix kconfig option Since the netlink option for DCB is necessary to actually be useful, simplified the Kconfig option. In addition, added useful help text for the Kconfig option. Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 76a89f8..0df0db0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -43,7 +43,7 @@ #include #include -#ifdef CONFIG_DCBNL +#ifdef CONFIG_DCB #include #endif @@ -847,7 +847,7 @@ struct net_device #define GSO_MAX_SIZE 65536 unsigned int gso_max_size; -#ifdef CONFIG_DCBNL +#ifdef CONFIG_DCB /* Data Center Bridging netlink ops */ struct dcbnl_rtnl_ops *dcbnl_ops; #endif -- cgit v1.1 From 65f233fb1669e6c990cd1d7fd308ac7dc66dc207 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 25 Nov 2008 18:20:13 +0100 Subject: netfilter: fix warning in net/netfilter/nf_conntrack_proto_tcp.c fix this warning: net/netfilter/nf_conntrack_proto_tcp.c: In function \u2018tcp_in_window\u2019: net/netfilter/nf_conntrack_proto_tcp.c:491: warning: unused variable \u2018net\u2019 net/netfilter/nf_conntrack_proto_tcp.c: In function \u2018tcp_packet\u2019: net/netfilter/nf_conntrack_proto_tcp.c:812: warning: unused variable \u2018net\u2019 Signed-off-by: Ingo Molnar Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_l4proto.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 7f2f43c..debdaf7 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -129,7 +129,7 @@ extern const struct nla_policy nf_ct_port_nla_policy[]; && net_ratelimit()) #endif #else -#define LOG_INVALID(net, proto) 0 +static inline int LOG_INVALID(struct net *net, int proto) { return 0; } #endif /* CONFIG_SYSCTL */ #endif /*_NF_CONNTRACK_PROTOCOL_H*/ -- cgit v1.1 From 3f2355cb9111ac04e7ae06a4d7044da2ae813863 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 12 Nov 2008 14:22:02 -0800 Subject: cfg80211/mac80211: Add 802.11d support This adds country IE parsing to mac80211 and enables its usage within the new regulatory infrastructure in cfg80211. We parse the country IEs only on management beacons for the BSSID you are associated to and disregard the IEs when the country and environment (indoor, outdoor, any) matches the already processed country IE. To avoid following misinformed or outdated APs we build and use a regulatory domain out of the intersection between what the AP provides us on the country IE and what CRDA is aware is allowed on the same country. A secondary device is allowed to follow only the same country IE as it make no sense for two devices on a system to be in two different countries. In the case the AP is using country IEs for an incorrect country the user may help compliance further by setting the regulatory domain before or after the IE is parsed and in that case another intersection will be performed. CONFIG_WIRELESS_OLD_REGULATORY is supported but requires CRDA present. Signed-off-by: Luis R. Rodriguez Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 62 +++++++++++++++++++++++++++++++++++++++++++++++ include/net/wireless.h | 15 ++++++++++++ 2 files changed, 77 insertions(+) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 56b0eb2..a6ec928 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1042,6 +1042,68 @@ enum ieee80211_spectrum_mgmt_actioncode { WLAN_ACTION_SPCT_CHL_SWITCH = 4, }; +/* + * IEEE 802.11-2007 7.3.2.9 Country information element + * + * Minimum length is 8 octets, ie len must be evenly + * divisible by 2 + */ + +/* Although the spec says 8 I'm seeing 6 in practice */ +#define IEEE80211_COUNTRY_IE_MIN_LEN 6 + +/* + * For regulatory extension stuff see IEEE 802.11-2007 + * Annex I (page 1141) and Annex J (page 1147). Also + * review 7.3.2.9. + * + * When dot11RegulatoryClassesRequired is true and the + * first_channel/reg_extension_id is >= 201 then the IE + * compromises of the 'ext' struct represented below: + * + * - Regulatory extension ID - when generating IE this just needs + * to be monotonically increasing for each triplet passed in + * the IE + * - Regulatory class - index into set of rules + * - Coverage class - index into air propagation time (Table 7-27), + * in microseconds, you can compute the air propagation time from + * the index by multiplying by 3, so index 10 yields a propagation + * of 10 us. Valid values are 0-31, values 32-255 are not defined + * yet. A value of 0 inicates air propagation of <= 1 us. + * + * See also Table I.2 for Emission limit sets and table + * I.3 for Behavior limit sets. Table J.1 indicates how to map + * a reg_class to an emission limit set and behavior limit set. + */ +#define IEEE80211_COUNTRY_EXTENSION_ID 201 + +/* + * Channels numbers in the IE must be monotonically increasing + * if dot11RegulatoryClassesRequired is not true. + * + * If dot11RegulatoryClassesRequired is true consecutive + * subband triplets following a regulatory triplet shall + * have monotonically increasing first_channel number fields. + * + * Channel numbers shall not overlap. + * + * Note that max_power is signed. + */ +struct ieee80211_country_ie_triplet { + union { + struct { + u8 first_channel; + u8 num_channels; + s8 max_power; + } __attribute__ ((packed)) chans; + struct { + u8 reg_extension_id; + u8 reg_class; + u8 coverage_class; + } __attribute__ ((packed)) ext; + }; +} __attribute__ ((packed)); + /* BACK action code */ enum ieee80211_back_actioncode { WLAN_ACTION_ADDBA_REQ = 0, diff --git a/include/net/wireless.h b/include/net/wireless.h index 4123515..c85fa0e 100644 --- a/include/net/wireless.h +++ b/include/net/wireless.h @@ -373,4 +373,19 @@ ieee80211_get_response_rate(struct ieee80211_supported_band *sband, * for a regulatory domain structure for the respective country. */ extern void regulatory_hint(struct wiphy *wiphy, const char *alpha2); + +/** + * regulatory_hint_11d - hints a country IE as a regulatory domain + * @wiphy: the wireless device giving the hint (used only for reporting + * conflicts) + * @country_ie: pointer to the country IE + * @country_ie_len: length of the country IE + * + * We will intersect the rd with the what CRDA tells us should apply + * for the alpha2 this country IE belongs to, this prevents APs from + * sending us incorrect or outdated information against a country. + */ +extern void regulatory_hint_11d(struct wiphy *wiphy, + u8 *country_ie, + u8 country_ie_len); #endif /* __NET_WIRELESS_H */ -- cgit v1.1 From 14b9815af3f4fe0e171ee0c4325c31d2a2c1570b Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 12 Nov 2008 14:22:03 -0800 Subject: cfg80211: add support for custom firmware regulatory solutions This adds API to cfg80211 to allow wireless drivers to inform us if their firmware can handle regulatory considerations *and* they cannot map these regulatory domains to an ISO / IEC 3166 alpha2. In these cases we skip the first regulatory hint instead of expecting the driver to build their own regulatory structure, providing us with an alpha2, or using the reg_notifier(). Signed-off-by: Luis R. Rodriguez Acked-by: Zhu Yi Signed-off-by: John W. Linville --- include/net/wireless.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/net/wireless.h b/include/net/wireless.h index c85fa0e..21c5d96 100644 --- a/include/net/wireless.h +++ b/include/net/wireless.h @@ -181,6 +181,11 @@ struct ieee80211_supported_band { * struct wiphy - wireless hardware description * @idx: the wiphy index assigned to this item * @class_dev: the class device representing /sys/class/ieee80211/ + * @fw_handles_regulatory: tells us the firmware for this device + * has its own regulatory solution and cannot identify the + * ISO / IEC 3166 alpha2 it belongs to. When this is enabled + * we will disregard the first regulatory hint (when the + * initiator is %REGDOM_SET_BY_CORE). * @reg_notifier: the driver's regulatory notification callback */ struct wiphy { @@ -192,6 +197,8 @@ struct wiphy { /* Supported interface modes, OR together BIT(NL80211_IFTYPE_...) */ u16 interface_modes; + bool fw_handles_regulatory; + /* If multiple wiphys are registered and you're handed e.g. * a regular netdev with assigned ieee80211_ptr, you won't * know whether it points to a wiphy your driver has registered -- cgit v1.1 From 8eecaba900e89643029fd2c253ad8ebb60761165 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 25 Nov 2008 13:45:29 -0800 Subject: tcp: tcp_limit_reno_sacked can become static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/net/tcp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 2653924..e8ae90a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -761,8 +761,6 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) return tp->packets_out - tcp_left_out(tp) + tp->retrans_out; } -extern int tcp_limit_reno_sacked(struct tcp_sock *tp); - /* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd. * The exception is rate halving phase, when cwnd is decreasing towards * ssthresh. -- cgit v1.1 From d62ddc21b674b5ac1466091ff3fbf7baa53bc92c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:14:31 -0800 Subject: netns xfrm: add netns boilerplate Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/net_namespace.h | 4 ++++ include/net/netns/xfrm.h | 7 +++++++ include/net/xfrm.h | 3 ++- 3 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 include/net/netns/xfrm.h (limited to 'include') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 3195577..6fc13d9 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -19,6 +19,7 @@ #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #include #endif +#include struct proc_dir_entry; struct net_device; @@ -74,6 +75,9 @@ struct net { struct netns_ct ct; #endif #endif +#ifdef CONFIG_XFRM + struct netns_xfrm xfrm; +#endif struct net_generic *gen; }; diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h new file mode 100644 index 0000000..1cb0024 --- /dev/null +++ b/include/net/netns/xfrm.h @@ -0,0 +1,7 @@ +#ifndef __NETNS_XFRM_H +#define __NETNS_XFRM_H + +struct netns_xfrm { +}; + +#endif diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 45e11b3..9107d6f 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1269,7 +1269,8 @@ struct xfrm6_tunnel { extern void xfrm_init(void); extern void xfrm4_init(void); -extern void xfrm_state_init(void); +extern int xfrm_state_init(struct net *net); +extern void xfrm_state_fini(struct net *net); extern void xfrm4_state_init(void); #ifdef CONFIG_XFRM extern int xfrm6_init(void); -- cgit v1.1 From 673c09be457bb23aa0eaaa79804cbb342210d195 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:15:16 -0800 Subject: netns xfrm: add struct xfrm_state::xs_net To avoid unnecessary complications with passing netns around. * set once, very early after allocating * once set, never changes For a while create every xfrm_state in init_net. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 9107d6f..9da8903 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -130,6 +130,9 @@ struct xfrm_state_walk { /* Full description of state of transformer. */ struct xfrm_state { +#ifdef CONFIG_NET_NS + struct net *xs_net; +#endif union { struct hlist_node gclist; struct hlist_node bydst; @@ -223,6 +226,11 @@ struct xfrm_state void *data; }; +static inline struct net *xs_net(struct xfrm_state *x) +{ + return read_pnet(&x->xs_net); +} + /* xflags - make enum if more show up */ #define XFRM_TIME_DEFER 1 @@ -1296,7 +1304,7 @@ extern void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto); extern int xfrm_state_walk(struct xfrm_state_walk *walk, int (*func)(struct xfrm_state *, int, void*), void *); extern void xfrm_state_walk_done(struct xfrm_state_walk *walk); -extern struct xfrm_state *xfrm_state_alloc(void); +extern struct xfrm_state *xfrm_state_alloc(struct net *net); extern struct xfrm_state *xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm_policy *pol, int *err, -- cgit v1.1 From 9d4139c76905833afcb77fe8ccc17f302a0eb9ab Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:16:11 -0800 Subject: netns xfrm: per-netns xfrm_state_all list This is done to get a) simple "something leaked" check b) cover possible DoSes when other netns puts many, many xfrm_states onto a list. c) not miss "alien xfrm_state" check in some of list iterators in future. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 1cb0024..6ae234a 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -1,7 +1,10 @@ #ifndef __NETNS_XFRM_H #define __NETNS_XFRM_H +#include + struct netns_xfrm { + struct list_head state_all; }; #endif -- cgit v1.1 From 73d189dce486cd6693fa29169b1aac0872efbcea Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:16:58 -0800 Subject: netns xfrm: per-netns xfrm_state_bydst hash Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 6ae234a..02487b3 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -5,6 +5,15 @@ struct netns_xfrm { struct list_head state_all; + /* + * Hash table to find appropriate SA towards given target (endpoint of + * tunnel or destination of transport mode) allowed by selector. + * + * Main use is finding SA after policy selected tunnel or transport + * mode. Also, it can be used by ah/esp icmp error handler to find + * offending SA. + */ + struct hlist_head *state_bydst; }; #endif -- cgit v1.1 From d320bbb306f2085892bc958781e8fcaf5d491589 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:17:24 -0800 Subject: netns xfrm: per-netns xfrm_state_bysrc hash Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 02487b3..bfcd5a3 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -14,6 +14,7 @@ struct netns_xfrm { * offending SA. */ struct hlist_head *state_bydst; + struct hlist_head *state_bysrc; }; #endif -- cgit v1.1 From b754a4fd8f58d245c9b5e92914cce09c4309cb67 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:17:47 -0800 Subject: netns xfrm: per-netns xfrm_state_byspi hash Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index bfcd5a3..b05ca3f 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -15,6 +15,7 @@ struct netns_xfrm { */ struct hlist_head *state_bydst; struct hlist_head *state_bysrc; + struct hlist_head *state_byspi; }; #endif -- cgit v1.1 From 529983ecabeae3d8e61c9e27079154b1b8544dcd Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:18:12 -0800 Subject: netns xfrm: per-netns xfrm_state_hmask Since hashtables are per-netns, they can be independently resized. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index b05ca3f..dbbc0e9 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -16,6 +16,7 @@ struct netns_xfrm { struct hlist_head *state_bydst; struct hlist_head *state_bysrc; struct hlist_head *state_byspi; + unsigned int state_hmask; }; #endif -- cgit v1.1 From 0bf7c5b019518d3fe9cb96b9c97bf44d251472c3 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:18:39 -0800 Subject: netns xfrm: per-netns xfrm_state counts Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index dbbc0e9..492b471 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -17,6 +17,7 @@ struct netns_xfrm { struct hlist_head *state_bysrc; struct hlist_head *state_byspi; unsigned int state_hmask; + unsigned int state_num; }; #endif -- cgit v1.1 From 630827338585022b851ec0a6335df8e436c900e4 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:19:07 -0800 Subject: netns xfrm: per-netns xfrm_hash_work All of this is implicit passing which netns's hashes should be resized. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 492b471..bd68802 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -2,6 +2,7 @@ #define __NETNS_XFRM_H #include +#include struct netns_xfrm { struct list_head state_all; @@ -18,6 +19,7 @@ struct netns_xfrm { struct hlist_head *state_byspi; unsigned int state_hmask; unsigned int state_num; + struct work_struct state_hash_work; }; #endif -- cgit v1.1 From b8a0ae20b0eecd4b86a113d2abe2fa5a582b30a6 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:20:11 -0800 Subject: netns xfrm: per-netns state GC list km_waitq is going to be made per-netns to disallow spurious wakeups in __xfrm_lookup(). To not wakeup after every garbage-collected xfrm_state (which potentially can be from different netns) make state GC list per-netns. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index bd68802..8ceb765 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -20,6 +20,7 @@ struct netns_xfrm { unsigned int state_hmask; unsigned int state_num; struct work_struct state_hash_work; + struct hlist_head state_gc_list; }; #endif -- cgit v1.1 From c78371441c0d957f54c9f8a35b3ee5a378d14808 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:20:36 -0800 Subject: netns xfrm: per-netns state GC work State GC is per-netns, and this is part of it. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 8ceb765..8055535 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -21,6 +21,7 @@ struct netns_xfrm { unsigned int state_num; struct work_struct state_hash_work; struct hlist_head state_gc_list; + struct work_struct state_gc_work; }; #endif -- cgit v1.1 From 50a30657fd7ee77a94a6bf0ad86eba7c37c3032e Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:21:01 -0800 Subject: netns xfrm: per-netns km_waitq Disallow spurious wakeups in __xfrm_lookup(). Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 3 +++ include/net/xfrm.h | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 8055535..2a383c8 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -2,6 +2,7 @@ #define __NETNS_XFRM_H #include +#include #include struct netns_xfrm { @@ -22,6 +23,8 @@ struct netns_xfrm { struct work_struct state_hash_work; struct hlist_head state_gc_list; struct work_struct state_gc_work; + + wait_queue_head_t km_waitq; }; #endif diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 9da8903..0d4353c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1459,7 +1459,6 @@ extern int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_kmaddress *k); #endif -extern wait_queue_head_t km_waitq; extern int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport); extern void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid); extern int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr); -- cgit v1.1 From 0331b1f383e1fa4049f8e75cafeea8f006171c64 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:21:45 -0800 Subject: netns xfrm: add struct xfrm_policy::xp_net Again, to avoid complications with passing netns when not necessary. Again, ->xp_net is set-once field, once set it never changes. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 0d4353c..1ab1756 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -475,6 +475,9 @@ struct xfrm_policy_walk { struct xfrm_policy { +#ifdef CONFIG_NET_NS + struct net *xp_net; +#endif struct hlist_node bydst; struct hlist_node byidx; @@ -499,6 +502,11 @@ struct xfrm_policy struct xfrm_tmpl xfrm_vec[XFRM_MAX_DEPTH]; }; +static inline struct net *xp_net(struct xfrm_policy *xp) +{ + return read_pnet(&xp->xp_net); +} + struct xfrm_kmaddress { xfrm_address_t local; xfrm_address_t remote; @@ -1425,7 +1433,7 @@ static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) } #endif -struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp); +struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp); extern void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type); extern int xfrm_policy_walk(struct xfrm_policy_walk *walk, -- cgit v1.1 From adfcf0b27e87d16a6a8c364daa724653d4d8930b Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:22:11 -0800 Subject: netns xfrm: per-netns policy list Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 2a383c8..6648924 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -25,6 +25,8 @@ struct netns_xfrm { struct work_struct state_gc_work; wait_queue_head_t km_waitq; + + struct list_head policy_all; }; #endif -- cgit v1.1 From 93b851c1c93c7d5cd8d94cd3f3a268b2d5460e9e Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:22:35 -0800 Subject: netns xfrm: per-netns xfrm_policy_byidx hash Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 6648924..5cd7d06 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -27,6 +27,7 @@ struct netns_xfrm { wait_queue_head_t km_waitq; struct list_head policy_all; + struct hlist_head *policy_byidx; }; #endif -- cgit v1.1 From 8100bea7d619e8496ad8e545d1b41f536e076cd5 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:22:58 -0800 Subject: netns xfrm: per-netns xfrm_policy_byidx hashmask Per-netns hashes are independently resizeable. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 5cd7d06..42dc318 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -28,6 +28,7 @@ struct netns_xfrm { struct list_head policy_all; struct hlist_head *policy_byidx; + unsigned int policy_idx_hmask; }; #endif -- cgit v1.1 From 8b18f8eaf9207d53ba3e69f2b98d7290f4dec227 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:23:26 -0800 Subject: netns xfrm: per-netns inexact policies Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 42dc318..c756831 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -4,6 +4,7 @@ #include #include #include +#include struct netns_xfrm { struct list_head state_all; @@ -29,6 +30,7 @@ struct netns_xfrm { struct list_head policy_all; struct hlist_head *policy_byidx; unsigned int policy_idx_hmask; + struct hlist_head policy_inexact[XFRM_POLICY_MAX * 2]; }; #endif -- cgit v1.1 From a35f6c5de32664d82c072a7e2c7d5c5234de4158 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:23:48 -0800 Subject: netns xfrm: per-netns xfrm_policy_bydst hash Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index c756831..39cfa79 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -6,6 +6,11 @@ #include #include +struct xfrm_policy_hash { + struct hlist_head *table; + unsigned int hmask; +}; + struct netns_xfrm { struct list_head state_all; /* @@ -31,6 +36,7 @@ struct netns_xfrm { struct hlist_head *policy_byidx; unsigned int policy_idx_hmask; struct hlist_head policy_inexact[XFRM_POLICY_MAX * 2]; + struct xfrm_policy_hash policy_bydst[XFRM_POLICY_MAX * 2]; }; #endif -- cgit v1.1 From dc2caba7b321289e7d02e63d7216961ccecfa103 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:24:15 -0800 Subject: netns xfrm: per-netns policy counts Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 1 + include/net/xfrm.h | 6 ++---- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 39cfa79..d5aadf0 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -37,6 +37,7 @@ struct netns_xfrm { unsigned int policy_idx_hmask; struct hlist_head policy_inexact[XFRM_POLICY_MAX * 2]; struct xfrm_policy_hash policy_bydst[XFRM_POLICY_MAX * 2]; + unsigned int policy_count[XFRM_POLICY_MAX * 2]; }; #endif diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 1ab1756..8699620 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -559,8 +559,6 @@ struct xfrm_mgr extern int xfrm_register_km(struct xfrm_mgr *km); extern int xfrm_unregister_km(struct xfrm_mgr *km); -extern unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2]; - /* * This structure is used for the duration where packets are being * transformed by IPsec. As soon as the packet leaves IPsec the @@ -999,7 +997,7 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir, if (sk && sk->sk_policy[XFRM_POLICY_IN]) return __xfrm_policy_check(sk, ndir, skb, family); - return (!xfrm_policy_count[dir] && !skb->sp) || + return (!init_net.xfrm.policy_count[dir] && !skb->sp) || (skb->dst->flags & DST_NOPOLICY) || __xfrm_policy_check(sk, ndir, skb, family); } @@ -1051,7 +1049,7 @@ extern int __xfrm_route_forward(struct sk_buff *skb, unsigned short family); static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family) { - return !xfrm_policy_count[XFRM_POLICY_OUT] || + return !init_net.xfrm.policy_count[XFRM_POLICY_OUT] || (skb->dst->flags & DST_NOXFRM) || __xfrm_route_forward(skb, family); } -- cgit v1.1 From 66caf628c3b634c57b14a1a104dcd57e4fab2e3b Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:28:57 -0800 Subject: netns xfrm: per-netns policy hash resizing work Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index d5aadf0..c53d173 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -38,6 +38,7 @@ struct netns_xfrm { struct hlist_head policy_inexact[XFRM_POLICY_MAX * 2]; struct xfrm_policy_hash policy_bydst[XFRM_POLICY_MAX * 2]; unsigned int policy_count[XFRM_POLICY_MAX * 2]; + struct work_struct policy_hash_work; }; #endif -- cgit v1.1 From 0e6024519b4da2d9413b97be1de8122d5709ccc1 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:30:18 -0800 Subject: netns xfrm: state flush in netns Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 8699620..e4bb672 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1363,7 +1363,7 @@ struct xfrmk_spdinfo { extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq); extern int xfrm_state_delete(struct xfrm_state *x); -extern int xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info); +extern int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info); extern void xfrm_sad_getinfo(struct xfrmk_sadinfo *si); extern void xfrm_spd_getinfo(struct xfrmk_spdinfo *si); extern int xfrm_replay_check(struct xfrm_state *x, -- cgit v1.1 From 221df1ed33c9284fc7a6f6e47ca7f8d5f3665d43 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:30:50 -0800 Subject: netns xfrm: state lookup in netns Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index e4bb672..15136c5 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1323,8 +1323,8 @@ extern int xfrm_state_check_expire(struct xfrm_state *x); extern void xfrm_state_insert(struct xfrm_state *x); extern int xfrm_state_add(struct xfrm_state *x); extern int xfrm_state_update(struct xfrm_state *x); -extern struct xfrm_state *xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family); -extern struct xfrm_state *xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family); +extern struct xfrm_state *xfrm_state_lookup(struct net *net, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family); +extern struct xfrm_state *xfrm_state_lookup_byaddr(struct net *net, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family); #ifdef CONFIG_XFRM_SUB_POLICY extern int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n, unsigned short family); -- cgit v1.1 From 5447c5e401c49aba0c36bb1066f2d25b152553b7 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:31:51 -0800 Subject: netns xfrm: finding states in netns Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 15136c5..4cbd055 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1315,7 +1315,8 @@ extern struct xfrm_state *xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm_policy *pol, int *err, unsigned short family); -extern struct xfrm_state * xfrm_stateonly_find(xfrm_address_t *daddr, +extern struct xfrm_state * xfrm_stateonly_find(struct net *net, + xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, u8 mode, u8 proto, u32 reqid); @@ -1361,7 +1362,7 @@ struct xfrmk_spdinfo { u32 spdhmcnt; }; -extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq); +extern struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 seq); extern int xfrm_state_delete(struct xfrm_state *x); extern int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info); extern void xfrm_sad_getinfo(struct xfrmk_sadinfo *si); @@ -1446,7 +1447,7 @@ struct xfrm_policy *xfrm_policy_byid(u8, int dir, u32 id, int delete, int *err); int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info); u32 xfrm_get_acqseq(void); extern int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi); -struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, +struct xfrm_state * xfrm_find_acq(struct net *net, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create, unsigned short family); extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol); -- cgit v1.1 From 284fa7da300adcb700b44df2f64a536b434d4650 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:32:14 -0800 Subject: netns xfrm: state walking in netns Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 4cbd055..40ed487 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1307,7 +1307,7 @@ extern int xfrm_proc_init(void); #endif extern void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto); -extern int xfrm_state_walk(struct xfrm_state_walk *walk, +extern int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk, int (*func)(struct xfrm_state *, int, void*), void *); extern void xfrm_state_walk_done(struct xfrm_state_walk *walk); extern struct xfrm_state *xfrm_state_alloc(struct net *net); -- cgit v1.1 From 33ffbbd52c327225a3e28485c39dc5746d81be03 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:33:32 -0800 Subject: netns xfrm: policy flushing in netns Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 40ed487..766cc71 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1444,7 +1444,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, struct xfrm_sec_ctx *ctx, int delete, int *err); struct xfrm_policy *xfrm_policy_byid(u8, int dir, u32 id, int delete, int *err); -int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info); +int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info); u32 xfrm_get_acqseq(void); extern int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi); struct xfrm_state * xfrm_find_acq(struct net *net, u8 mode, u32 reqid, u8 proto, -- cgit v1.1 From 8d1211a6aaea43ea36151c17b0193eb763ff2d7e Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:34:20 -0800 Subject: netns xfrm: finding policy in netns Add netns parameter to xfrm_policy_bysel_ctx(), xfrm_policy_byidx(). Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 766cc71..ec2b7a9 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1439,11 +1439,11 @@ extern int xfrm_policy_walk(struct xfrm_policy_walk *walk, int (*func)(struct xfrm_policy *, int, int, void*), void *); extern void xfrm_policy_walk_done(struct xfrm_policy_walk *walk); int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl); -struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, +struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u8 type, int dir, struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete, int *err); -struct xfrm_policy *xfrm_policy_byid(u8, int dir, u32 id, int delete, int *err); +struct xfrm_policy *xfrm_policy_byid(struct net *net, u8, int dir, u32 id, int delete, int *err); int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info); u32 xfrm_get_acqseq(void); extern int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi); -- cgit v1.1 From cdcbca7c1f1946758cfacb69bc1c7eeaccb11e2d Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:34:49 -0800 Subject: netns xfrm: policy walking in netns Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index ec2b7a9..1dc4ff0 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1435,7 +1435,7 @@ static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp); extern void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type); -extern int xfrm_policy_walk(struct xfrm_policy_walk *walk, +extern int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, int (*func)(struct xfrm_policy *, int, int, void*), void *); extern void xfrm_policy_walk_done(struct xfrm_policy_walk *walk); int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl); -- cgit v1.1 From 52479b623d3d41df84c499325b6a8c7915413032 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:35:18 -0800 Subject: netns xfrm: lookup in netns Pass netns to xfrm_lookup()/__xfrm_lookup(). For that pass netns to flow_cache_lookup() and resolver callback. Take it from socket or netdevice. Stub DECnet to init_net. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/dst.h | 16 ++++++++-------- include/net/flow.h | 9 +++++---- 2 files changed, 13 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index 6c77879..6be3b08 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -291,21 +291,21 @@ enum { struct flowi; #ifndef CONFIG_XFRM -static inline int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, - struct sock *sk, int flags) +static inline int xfrm_lookup(struct net *net, struct dst_entry **dst_p, + struct flowi *fl, struct sock *sk, int flags) { return 0; } -static inline int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, - struct sock *sk, int flags) +static inline int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, + struct flowi *fl, struct sock *sk, int flags) { return 0; } #else -extern int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, - struct sock *sk, int flags); -extern int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, - struct sock *sk, int flags); +extern int xfrm_lookup(struct net *net, struct dst_entry **dst_p, + struct flowi *fl, struct sock *sk, int flags); +extern int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, + struct flowi *fl, struct sock *sk, int flags); #endif #endif diff --git a/include/net/flow.h b/include/net/flow.h index b45a5e4..809970b 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -84,12 +84,13 @@ struct flowi { #define FLOW_DIR_OUT 1 #define FLOW_DIR_FWD 2 +struct net; struct sock; -typedef int (*flow_resolve_t)(struct flowi *key, u16 family, u8 dir, - void **objp, atomic_t **obj_refp); +typedef int (*flow_resolve_t)(struct net *net, struct flowi *key, u16 family, + u8 dir, void **objp, atomic_t **obj_refp); -extern void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, - flow_resolve_t resolver); +extern void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, + u8 dir, flow_resolve_t resolver); extern void flow_cache_flush(void); extern atomic_t flow_cache_genid; -- cgit v1.1 From f6e1e25d703c0a9ba1863384a16851dec52f8e3a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:35:44 -0800 Subject: netns xfrm: xfrm_policy_check in netns Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 1dc4ff0..158848f 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -992,12 +992,13 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir, struct sk_buff *skb, unsigned int family, int reverse) { + struct net *net = dev_net(skb->dev); int ndir = dir | (reverse ? XFRM_POLICY_MASK + 1 : 0); if (sk && sk->sk_policy[XFRM_POLICY_IN]) return __xfrm_policy_check(sk, ndir, skb, family); - return (!init_net.xfrm.policy_count[dir] && !skb->sp) || + return (!net->xfrm.policy_count[dir] && !skb->sp) || (skb->dst->flags & DST_NOPOLICY) || __xfrm_policy_check(sk, ndir, skb, family); } -- cgit v1.1 From 99a66657b2f62ae8b2b1e6ffc6abed051e4561ca Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:36:13 -0800 Subject: netns xfrm: xfrm_route_forward() in netns Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 158848f..36c8cff 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1050,7 +1050,9 @@ extern int __xfrm_route_forward(struct sk_buff *skb, unsigned short family); static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family) { - return !init_net.xfrm.policy_count[XFRM_POLICY_OUT] || + struct net *net = dev_net(skb->dev); + + return !net->xfrm.policy_count[XFRM_POLICY_OUT] || (skb->dst->flags & DST_NOXFRM) || __xfrm_route_forward(skb, family); } -- cgit v1.1 From ddcfd79680c1dc74eb5f24aa70785c11bf7eec8f Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:37:23 -0800 Subject: netns xfrm: dst garbage-collecting in netns Pass netns pointer to struct xfrm_policy_afinfo::garbage_collect() [This needs more thoughts on what to do with dst_ops] [Currently stub to init_net] Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 36c8cff..bd25150 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -265,7 +265,7 @@ struct xfrm_dst; struct xfrm_policy_afinfo { unsigned short family; struct dst_ops *dst_ops; - void (*garbage_collect)(void); + void (*garbage_collect)(struct net *net); struct dst_entry *(*dst_lookup)(int tos, xfrm_address_t *saddr, xfrm_address_t *daddr); int (*get_saddr)(xfrm_address_t *saddr, xfrm_address_t *daddr); -- cgit v1.1 From a6483b790f8efcd8db190c1c0ff93f9d9efe919a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:38:20 -0800 Subject: netns xfrm: per-netns NETLINK_XFRM socket Stub senders to init_net's one temporarily. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 2 ++ include/net/xfrm.h | 7 ++++--- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index c53d173..09f3060 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -39,6 +39,8 @@ struct netns_xfrm { struct xfrm_policy_hash policy_bydst[XFRM_POLICY_MAX * 2]; unsigned int policy_count[XFRM_POLICY_MAX * 2]; struct work_struct policy_hash_work; + + struct sock *nlsk; }; #endif diff --git a/include/net/xfrm.h b/include/net/xfrm.h index bd25150..e027179 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -48,7 +48,6 @@ DECLARE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics); #define XFRM_INC_STATS_USER(field) #endif -extern struct sock *xfrm_nl; extern u32 sysctl_xfrm_aevent_etime; extern u32 sysctl_xfrm_aevent_rseqth; extern int sysctl_xfrm_larval_drop; @@ -1516,18 +1515,20 @@ static inline int xfrm_policy_id2dir(u32 index) return index & 7; } -static inline int xfrm_aevent_is_on(void) +#ifdef CONFIG_XFRM +static inline int xfrm_aevent_is_on(struct net *net) { struct sock *nlsk; int ret = 0; rcu_read_lock(); - nlsk = rcu_dereference(xfrm_nl); + nlsk = rcu_dereference(net->xfrm.nlsk); if (nlsk) ret = netlink_has_listeners(nlsk, XFRMNLGRP_AEVENTS); rcu_read_unlock(); return ret; } +#endif static inline int xfrm_alg_len(struct xfrm_algo *alg) { -- cgit v1.1 From 7067802e262457a9737521e5669b622028b2283a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:50:36 -0800 Subject: netns xfrm: pass netns with KM notifications SA and SPD flush are executed with NULL SA and SPD respectively, for these cases pass netns explicitly from userspace socket. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index e027179..52e784f 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -256,6 +256,7 @@ struct km_event u32 seq; u32 pid; u32 event; + struct net *net; }; struct net_device; -- cgit v1.1 From db983c1144884cab10d6397532f4bf05eb0c01d2 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:51:01 -0800 Subject: netns xfrm: KM reporting in netns Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 52e784f..f3ea160 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -552,7 +552,7 @@ struct xfrm_mgr struct xfrm_policy *(*compile_policy)(struct sock *sk, int opt, u8 *data, int len, int *dir); int (*new_mapping)(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport); int (*notify_policy)(struct xfrm_policy *x, int dir, struct km_event *c); - int (*report)(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr); + int (*report)(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr); int (*migrate)(struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_bundles, struct xfrm_kmaddress *k); }; @@ -1471,7 +1471,7 @@ extern int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type, extern int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport); extern void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid); -extern int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr); +extern int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr); extern void xfrm_input_init(void); extern int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq); -- cgit v1.1 From c5b3cf46eabe6e7459125fc6e2033b4222665017 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:51:25 -0800 Subject: netns xfrm: ->dst_lookup in netns Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index f3ea160..b16d4c0 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -266,7 +266,8 @@ struct xfrm_policy_afinfo { unsigned short family; struct dst_ops *dst_ops; void (*garbage_collect)(struct net *net); - struct dst_entry *(*dst_lookup)(int tos, xfrm_address_t *saddr, + struct dst_entry *(*dst_lookup)(struct net *net, int tos, + xfrm_address_t *saddr, xfrm_address_t *daddr); int (*get_saddr)(xfrm_address_t *saddr, xfrm_address_t *daddr); struct dst_entry *(*find_bundle)(struct flowi *fl, struct xfrm_policy *policy); -- cgit v1.1 From fbda33b2b85941c1ae3a0d89522dec5c1b1bd98c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:56:49 -0800 Subject: netns xfrm: ->get_saddr in netns Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index b16d4c0..d076f3d 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -269,7 +269,7 @@ struct xfrm_policy_afinfo { struct dst_entry *(*dst_lookup)(struct net *net, int tos, xfrm_address_t *saddr, xfrm_address_t *daddr); - int (*get_saddr)(xfrm_address_t *saddr, xfrm_address_t *daddr); + int (*get_saddr)(struct net *net, xfrm_address_t *saddr, xfrm_address_t *daddr); struct dst_entry *(*find_bundle)(struct flowi *fl, struct xfrm_policy *policy); void (*decode_session)(struct sk_buff *skb, struct flowi *fl, -- cgit v1.1 From 59c9940ed0ef026673cac52f2eaed77af7d486da Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:59:52 -0800 Subject: netns xfrm: per-netns MIBs Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/mib.h | 3 +++ include/net/xfrm.h | 13 ++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h index 10cb7c3..0b44112 100644 --- a/include/net/netns/mib.h +++ b/include/net/netns/mib.h @@ -20,6 +20,9 @@ struct netns_mib { DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics); DEFINE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics); #endif +#ifdef CONFIG_XFRM_STATISTICS + DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics); +#endif }; #endif diff --git a/include/net/xfrm.h b/include/net/xfrm.h index d076f3d..78ec3e8 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -38,14 +38,13 @@ MODULE_ALIAS("xfrm-type-" __stringify(family) "-" __stringify(proto)) #ifdef CONFIG_XFRM_STATISTICS -DECLARE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics); -#define XFRM_INC_STATS(field) SNMP_INC_STATS(xfrm_statistics, field) -#define XFRM_INC_STATS_BH(field) SNMP_INC_STATS_BH(xfrm_statistics, field) -#define XFRM_INC_STATS_USER(field) SNMP_INC_STATS_USER(xfrm_statistics, field) +#define XFRM_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.xfrm_statistics, field) +#define XFRM_INC_STATS_BH(net, field) SNMP_INC_STATS_BH((net)->mib.xfrm_statistics, field) +#define XFRM_INC_STATS_USER(net, field) SNMP_INC_STATS_USER((net)-mib.xfrm_statistics, field) #else -#define XFRM_INC_STATS(field) -#define XFRM_INC_STATS_BH(field) -#define XFRM_INC_STATS_USER(field) +#define XFRM_INC_STATS(net, field) ((void)(net)) +#define XFRM_INC_STATS_BH(net, field) ((void)(net)) +#define XFRM_INC_STATS_USER(net, field) ((void)(net)) #endif extern u32 sysctl_xfrm_aevent_etime; -- cgit v1.1 From c68cd1a01ba56995d85a4a62b195b2b3f6415c64 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 18:00:14 -0800 Subject: netns xfrm: /proc/net/xfrm_stat in netns Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 78ec3e8..1554ccd 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1306,7 +1306,8 @@ static inline void xfrm6_fini(void) #endif #ifdef CONFIG_XFRM_STATISTICS -extern int xfrm_proc_init(void); +extern int xfrm_proc_init(struct net *net); +extern void xfrm_proc_fini(struct net *net); #endif extern void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto); -- cgit v1.1 From b27aeadb5948d400df83db4d29590fb9862ba49d Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 18:00:48 -0800 Subject: netns xfrm: per-netns sysctls Make net.core.xfrm_aevent_etime net.core.xfrm_acq_expires net.core.xfrm_aevent_rseqth net.core.xfrm_larval_drop sysctls per-netns. For that make net_core_path[] global, register it to prevent two /proc/net/core antries and change initcall position -- xfrm_init() is called from fs_initcall, so this one should be fs_initcall at least. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/ip.h | 1 + include/net/netns/xfrm.h | 10 ++++++++++ include/net/xfrm.h | 14 +++++++++----- 3 files changed, 20 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index ddef10c..1086813 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -187,6 +187,7 @@ extern void inet_get_local_port_range(int *low, int *high); extern int sysctl_ip_default_ttl; extern int sysctl_ip_nonlocal_bind; +extern struct ctl_path net_core_path[]; extern struct ctl_path net_ipv4_ctl_path[]; /* From inetpeer.c */ diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 09f3060..1ba9127 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -6,6 +6,8 @@ #include #include +struct ctl_table_header; + struct xfrm_policy_hash { struct hlist_head *table; unsigned int hmask; @@ -41,6 +43,14 @@ struct netns_xfrm { struct work_struct policy_hash_work; struct sock *nlsk; + + u32 sysctl_aevent_etime; + u32 sysctl_aevent_rseqth; + int sysctl_larval_drop; + u32 sysctl_acq_expires; +#ifdef CONFIG_SYSCTL + struct ctl_table_header *sysctl_hdr; +#endif }; #endif diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 1554ccd..2e9f5c0 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -47,11 +47,6 @@ #define XFRM_INC_STATS_USER(net, field) ((void)(net)) #endif -extern u32 sysctl_xfrm_aevent_etime; -extern u32 sysctl_xfrm_aevent_rseqth; -extern int sysctl_xfrm_larval_drop; -extern u32 sysctl_xfrm_acq_expires; - extern struct mutex xfrm_cfg_mutex; /* Organization of SPD aka "XFRM rules" @@ -1310,6 +1305,15 @@ extern int xfrm_proc_init(struct net *net); extern void xfrm_proc_fini(struct net *net); #endif +extern int xfrm_sysctl_init(struct net *net); +#ifdef CONFIG_SYSCTL +extern void xfrm_sysctl_fini(struct net *net); +#else +static inline void xfrm_sysctl_fini(struct net *net) +{ +} +#endif + extern void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto); extern int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk, int (*func)(struct xfrm_state *, int, void*), void *); -- cgit v1.1 From c1b56878fb68e9c14070939ea4537ad4db79ffae Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 25 Nov 2008 21:14:06 -0800 Subject: tc: policing requires a rate estimator Found that while trying average rate policing, it was possible to request average rate policing without a rate estimator. This results in no policing which is harmless but incorrect. Since policing could be setup in two steps, need to check in the kernel. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/gen_stats.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index 8cd8185..dcf5bfa 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -45,5 +45,6 @@ extern void gen_kill_estimator(struct gnet_stats_basic *bstats, extern int gen_replace_estimator(struct gnet_stats_basic *bstats, struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct nlattr *opt); +extern int gen_estimator_active(const struct gnet_stats_rate_est *rate_est); #endif -- cgit v1.1 From 1748376b6626acf59c24e9592ac67b3fe2a0e026 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Nov 2008 21:16:35 -0800 Subject: net: Use a percpu_counter for sockets_allocated Instead of using one atomic_t per protocol, use a percpu_counter for "sockets_allocated", to reduce cache line contention on heavy duty network servers. Note : We revert commit (248969ae31e1b3276fc4399d67ce29a5d81e6fd9 net: af_unix can make unix_nr_socks visbile in /proc), since it is not anymore used after sock_prot_inuse_add() addition Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 1 + include/net/sock.h | 2 +- include/net/tcp.h | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 2379750..bbb7742 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -138,6 +138,7 @@ void sctp_write_space(struct sock *sk); unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait); void sctp_sock_rfree(struct sk_buff *skb); +extern struct percpu_counter sctp_sockets_allocated; /* * sctp/primitive.c diff --git a/include/net/sock.h b/include/net/sock.h index 00cd486..a2a3890 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -649,7 +649,7 @@ struct proto { /* Memory pressure */ void (*enter_memory_pressure)(struct sock *sk); atomic_t *memory_allocated; /* Current allocated memory. */ - atomic_t *sockets_allocated; /* Current number of sockets. */ + struct percpu_counter *sockets_allocated; /* Current number of sockets. */ /* * Pressure flag: try to collapse. * Technical note: it is used by multiple contexts non atomically. diff --git a/include/net/tcp.h b/include/net/tcp.h index e8ae90a..cbca3b8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -238,7 +238,7 @@ extern int sysctl_tcp_slow_start_after_idle; extern int sysctl_tcp_max_ssthresh; extern atomic_t tcp_memory_allocated; -extern atomic_t tcp_sockets_allocated; +extern struct percpu_counter tcp_sockets_allocated; extern int tcp_memory_pressure; /* -- cgit v1.1 From dd24c00191d5e4a1ae896aafe33c6b8095ab4bd1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Nov 2008 21:17:14 -0800 Subject: net: Use a percpu_counter for orphan_count Instead of using one atomic_t per protocol, use a percpu_counter for "orphan_count", to reduce cache line contention on heavy duty network servers. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 2 +- include/net/tcp.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index a2a3890..5a3a151 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -666,7 +666,7 @@ struct proto { unsigned int obj_size; int slab_flags; - atomic_t *orphan_count; + struct percpu_counter *orphan_count; struct request_sock_ops *rsk_prot; struct timewait_sock_ops *twsk_prot; diff --git a/include/net/tcp.h b/include/net/tcp.h index cbca3b8..de1e91d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -46,7 +46,7 @@ extern struct inet_hashinfo tcp_hashinfo; -extern atomic_t tcp_orphan_count; +extern struct percpu_counter tcp_orphan_count; extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define MAX_TCP_HEADER (128 + MAX_HEADER) -- cgit v1.1 From e2f367f269fe19375f10e63efe0f2a6d3ddef8e6 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Fri, 21 Nov 2008 19:01:30 +0200 Subject: nl80211: Report max TX power in NL80211_BAND_ATTR_FREQS This is useful information to provide for userspace (e.g., hostapd needs this to generate Country IE). Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- include/linux/nl80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 7982734..54d6ebe 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -508,6 +508,7 @@ enum nl80211_band_attr { * on this channel in current regulatory domain. * @NL80211_FREQUENCY_ATTR_RADAR: Radar detection is mandatory * on this channel in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_MAX_TX_POWER: Maximum transmission power in dBm. */ enum nl80211_frequency_attr { __NL80211_FREQUENCY_ATTR_INVALID, @@ -516,12 +517,15 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_PASSIVE_SCAN, NL80211_FREQUENCY_ATTR_NO_IBSS, NL80211_FREQUENCY_ATTR_RADAR, + NL80211_FREQUENCY_ATTR_MAX_TX_POWER, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, NL80211_FREQUENCY_ATTR_MAX = __NL80211_FREQUENCY_ATTR_AFTER_LAST - 1 }; +#define NL80211_FREQUENCY_ATTR_MAX_TX_POWER NL80211_FREQUENCY_ATTR_MAX_TX_POWER + /** * enum nl80211_bitrate_attr - bitrate attributes * @NL80211_BITRATE_ATTR_RATE: Bitrate in units of 100 kbps -- cgit v1.1 From f80b5e99c7dac5a9a0d72496cec5075a12cd1476 Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Fri, 21 Nov 2008 20:40:09 -0200 Subject: rfkill: preserve state across suspend The rfkill class API requires that the driver connected to a class call rfkill_force_state() on resume to update the real state of the rfkill controller, OR that it provides a get_state() hook. This means there is potentially a hidden call in the resume code flow that changes rfkill->state (i.e. rfkill_force_state()), so the previous state of the transmitter was being lost. The simplest and most future-proof way to fix this is to explicitly store the pre-sleep state on the rfkill structure, and restore from that on resume. Signed-off-by: Henrique de Moraes Holschuh Acked-by: Ivo van Doorn Cc: Matthew Garrett Cc: Alan Jenkins Signed-off-by: John W. Linville --- include/linux/rfkill.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index 4cd64b0..f376a93 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -108,6 +108,7 @@ struct rfkill { struct device dev; struct list_head node; + enum rfkill_state state_for_resume; }; #define to_rfkill(d) container_of(d, struct rfkill, dev) -- cgit v1.1 From bf8c1ac6d81ba8c0e4dc2215f84f5e2a3c8227e8 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Sat, 22 Nov 2008 22:00:31 +0200 Subject: nl80211: Change max TX power to be in mBm instead of dBm In order to be consistent with NL80211_ATTR_POWER_RULE_MAX_EIRP, change NL80211_FREQUENCY_ATTR_MAX_TX_POWER to use mBm and U32 instead of dBm and U8. This is a userspace interface change, but the previous version had not yet been pushed upstream and there are no userspace programs using this yet, so there is justification to get this change in as long as it goes in before the previous version gets out. Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- include/linux/nl80211.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 54d6ebe..e08c8bc 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -508,7 +508,8 @@ enum nl80211_band_attr { * on this channel in current regulatory domain. * @NL80211_FREQUENCY_ATTR_RADAR: Radar detection is mandatory * on this channel in current regulatory domain. - * @NL80211_FREQUENCY_ATTR_MAX_TX_POWER: Maximum transmission power in dBm. + * @NL80211_FREQUENCY_ATTR_MAX_TX_POWER: Maximum transmission power in mBm + * (100 * dBm). */ enum nl80211_frequency_attr { __NL80211_FREQUENCY_ATTR_INVALID, -- cgit v1.1 From 244e6c2d0724bc4908a1995804704bdee3b31528 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Wed, 26 Nov 2008 15:24:32 -0800 Subject: pkt_sched: gen_estimator: Optimize gen_estimator_active() Since all other gen_estimator functions use bstats and rate_est params together, and searching for them is optimized now, let's use this also in gen_estimator_active(). The return type of gen_estimator_active() is changed to bool, and gen_find_node() parameters to const, btw. In tcf_act_police_locate() a check for ACT_P_CREATED is added before calling gen_estimator_active(). Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- include/net/gen_stats.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index dcf5bfa..d136b52 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -45,6 +45,6 @@ extern void gen_kill_estimator(struct gnet_stats_basic *bstats, extern int gen_replace_estimator(struct gnet_stats_basic *bstats, struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct nlattr *opt); -extern int gen_estimator_active(const struct gnet_stats_rate_est *rate_est); - +extern bool gen_estimator_active(const struct gnet_stats_basic *bstats, + const struct gnet_stats_rate_est *rate_est); #endif -- cgit v1.1 From c4106aa88a440430d387e022f2ad6dc1e0d52e98 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Thu, 27 Nov 2008 00:12:47 -0800 Subject: decnet: remove private wrappers of endian helpers Signed-off-by: Harvey Harrison Reviewed-by: Steven Whitehouse Signed-off-by: David S. Miller --- include/net/dn.h | 8 +++----- include/net/dn_fib.h | 6 +++--- 2 files changed, 6 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/dn.h b/include/net/dn.h index 6277783..7cd6bb8 100644 --- a/include/net/dn.h +++ b/include/net/dn.h @@ -4,9 +4,7 @@ #include #include #include - -#define dn_ntohs(x) le16_to_cpu(x) -#define dn_htons(x) cpu_to_le16(x) +#include struct dn_scp /* Session Control Port */ { @@ -175,7 +173,7 @@ struct dn_skb_cb { static inline __le16 dn_eth2dn(unsigned char *ethaddr) { - return dn_htons(ethaddr[4] | (ethaddr[5] << 8)); + return get_unaligned((__le16 *)(ethaddr + 4)); } static inline __le16 dn_saddr2dn(struct sockaddr_dn *saddr) @@ -185,7 +183,7 @@ static inline __le16 dn_saddr2dn(struct sockaddr_dn *saddr) static inline void dn_dn2eth(unsigned char *ethaddr, __le16 addr) { - __u16 a = dn_ntohs(addr); + __u16 a = le16_to_cpu(addr); ethaddr[0] = 0xAA; ethaddr[1] = 0x00; ethaddr[2] = 0x04; diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h index 3012511..c378be7 100644 --- a/include/net/dn_fib.h +++ b/include/net/dn_fib.h @@ -181,9 +181,9 @@ static inline void dn_fib_res_put(struct dn_fib_res *res) static inline __le16 dnet_make_mask(int n) { - if (n) - return dn_htons(~((1<<(16-n))-1)); - return 0; + if (n) + return cpu_to_le16(~((1 << (16 - n)) - 1)); + return cpu_to_le16(0); } #endif /* _NET_DN_FIB_H */ -- cgit v1.1 From 475ad8e2172d7f8b73af5532a8dad265b51339c2 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Thu, 27 Nov 2008 23:04:13 -0800 Subject: decnet: compile fix for removal of byteorder wrapper Signed-off-by: Harvey Harrison Signed-off-by: David S. Miller --- include/net/dn.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/dn.h b/include/net/dn.h index 7cd6bb8..e5469f7 100644 --- a/include/net/dn.h +++ b/include/net/dn.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include struct dn_scp /* Session Control Port */ { -- cgit v1.1 From 0f0ca340e57bd7446855fefd07a64249acf81223 Mon Sep 17 00:00:00 2001 From: Giuseppe Cavallaro Date: Fri, 28 Nov 2008 16:24:56 -0800 Subject: phy: power management support This patch adds the power management support into the physical abstraction layer. Suspend and resume functions respectively turns on/off the bit 11 into the PHY Basic mode control register. Generic PHY device starts supporting PM. In order to support the wake-on LAN and avoid to put in power down the PHY device, the MDIO is aware of what the Ethernet device wants to do. Voluntary, no CONFIG_PM defines were added into the sources. Also generic suspend/resume functions are exported to allow other drivers use them (such as genphy_config_aneg etc.). Within the phy_driver_register function, we need to remove the memset. It overrides the device driver owner and it is not good. Signed-off-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 77c4ed6..d7e54d9 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -467,6 +467,8 @@ int genphy_restart_aneg(struct phy_device *phydev); int genphy_config_aneg(struct phy_device *phydev); int genphy_update_link(struct phy_device *phydev); int genphy_read_status(struct phy_device *phydev); +int genphy_suspend(struct phy_device *phydev); +int genphy_resume(struct phy_device *phydev); void phy_driver_unregister(struct phy_driver *drv); int phy_driver_register(struct phy_driver *new_driver); void phy_prepare_link(struct phy_device *phydev, -- cgit v1.1 From 7a9d4020533b5c0c615b6de3be154c9ff30b8cc9 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 30 Nov 2008 12:17:26 +0100 Subject: Bluetooth: Send HCI Reset command by default on device initialization The Bluetooth subsystem was not using the HCI Reset command when doing device initialization. The Bluetooth 1.0b specification was ambiguous on how the device firmware was suppose to handle it. Almost every device was triggering a transport reset at the same time. In case of USB this ended up in disconnects from the bus. All modern Bluetooth dongles handle this perfectly fine and a lot of them actually require that HCI Reset is sent. If not then they are either stuck in their HID Proxy mode or their internal structures for inquiry and paging are not correctly setup. To handle old and new devices smoothly the Bluetooth subsystem contains a quirk to force the HCI Reset on initialization. However maintaining such a quirk becomes more and more complicated. This patch turns the logic around and lets the old devices disable the HCI Reset command. The only device where the HCI_QUIRK_NO_RESET is still needed are the original Digianswer devices and dongles with an early CSR firmware. CSR reported that they fixed this for version 12 firmware. The last official release of version 11 firmware is build ID 115. The first version 12 candidate was build ID 117. Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 3cc2949..3645139 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -54,7 +54,7 @@ /* HCI device quirks */ enum { - HCI_QUIRK_RESET_ON_INIT, + HCI_QUIRK_NO_RESET, HCI_QUIRK_RAW_DEVICE, HCI_QUIRK_FIXUP_BUFFER_SIZE }; -- cgit v1.1 From a418b893a6af11ae73c762ed5b76c1bad6dc19d8 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 30 Nov 2008 12:17:28 +0100 Subject: Bluetooth: Enable per-module dynamic debug messages With the introduction of CONFIG_DYNAMIC_PRINTK_DEBUG it is possible to allow debugging without having to recompile the kernel. This patch turns all BT_DBG() calls into pr_debug() to support dynamic debug messages. As a side effect all CONFIG_BT_*_DEBUG statements are now removed and some broken debug entries have been fixed. Signed-off-by: Marcel Holtmann --- include/net/bluetooth/bluetooth.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 996d12d..a04f846 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -54,8 +54,8 @@ #define SOL_RFCOMM 18 #define BT_INFO(fmt, arg...) printk(KERN_INFO "Bluetooth: " fmt "\n" , ## arg) -#define BT_DBG(fmt, arg...) printk(KERN_INFO "%s: " fmt "\n" , __func__ , ## arg) -#define BT_ERR(fmt, arg...) printk(KERN_ERR "%s: " fmt "\n" , __func__ , ## arg) +#define BT_ERR(fmt, arg...) printk(KERN_ERR "%s: " fmt "\n" , __func__ , ## arg) +#define BT_DBG(fmt, arg...) pr_debug("%s: " fmt "\n" , __func__ , ## arg) /* Connection and socket states */ enum { -- cgit v1.1 From 52404881984e2d447f920a23e3bb63262dfc77f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Wed, 3 Dec 2008 15:42:56 -0800 Subject: Phonet: basic net namespace support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/phonet.h | 2 +- include/net/phonet/pn_dev.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h index c6a2451..057b0a8 100644 --- a/include/net/phonet/phonet.h +++ b/include/net/phonet/phonet.h @@ -46,7 +46,7 @@ static inline struct pn_sock *pn_sk(struct sock *sk) extern const struct proto_ops phonet_dgram_ops; -struct sock *pn_find_sock_by_sa(const struct sockaddr_pn *sa); +struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *sa); void phonet_get_local_port_range(int *min, int *max); void pn_sock_hash(struct sock *sk); void pn_sock_unhash(struct sock *sk); diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h index bbd2a83..aa1c59a 100644 --- a/include/net/phonet/pn_dev.h +++ b/include/net/phonet/pn_dev.h @@ -43,7 +43,7 @@ struct net_device *phonet_device_get(struct net *net); int phonet_address_add(struct net_device *dev, u8 addr); int phonet_address_del(struct net_device *dev, u8 addr); u8 phonet_address_get(struct net_device *dev, u8 addr); -int phonet_address_lookup(u8 addr); +int phonet_address_lookup(struct net *net, u8 addr); #define PN_NO_ADDR 0xff -- cgit v1.1 From 8865c418caf4e9dd2c24bdfae3a5a4106e143e60 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 3 Dec 2008 22:12:38 -0800 Subject: atm: 32-bit ioctl compatibility We lack compat ioctl support through most of the ATM code. This patch deals with most of it, and I can now at least use BR2684 and PPPoATM with 32-bit userspace. I haven't added a .compat_ioctl method to struct atm_ioctl, because AFAICT none of the current users need any conversion -- so we can just call the ->ioctl() method in every case. I looked at br2684, clip, lec, mpc, pppoatm and atmtcp. In svc_compat_ioctl() the only mangling which is needed is to change COMPAT_ATM_ADDPARTY to ATM_ADDPARTY. Although it's defined as _IOW('a', ATMIOC_SPECIAL+4,struct atm_iobuf) it doesn't actually _take_ a struct atm_iobuf as an argument -- it takes a struct sockaddr_atmsvc, which _is_ the same between 32-bit and 64-bit code, so doesn't need conversion. Almost all of vcc_ioctl() would have been identical, so I converted that into a core do_vcc_ioctl() function with an 'int compat' argument. I've done the same with atm_dev_ioctl(), where there _are_ a few differences, but still it's relatively contained and there would otherwise have been a lot of duplication. I haven't done any of the actual device-specific ioctls, although I've added a compat_ioctl method to struct atmdev_ops. Signed-off-by: David Woodhouse Signed-off-by: David S. Miller --- include/linux/atm.h | 17 ++++++++++++++--- include/linux/atmdev.h | 15 +++++++++++++++ 2 files changed, 29 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/atm.h b/include/linux/atm.h index c791ddd..d3b2921 100644 --- a/include/linux/atm.h +++ b/include/linux/atm.h @@ -231,10 +231,21 @@ static __inline__ int atmpvc_addr_in_use(struct sockaddr_atmpvc addr) */ struct atmif_sioc { - int number; - int length; - void __user *arg; + int number; + int length; + void __user *arg; }; +#ifdef __KERNEL__ +#ifdef CONFIG_COMPAT +#include +struct compat_atmif_sioc { + int number; + int length; + compat_uptr_t arg; +}; +#endif +#endif + typedef unsigned short atm_backend_t; #endif diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index a3d07c2..086e5c3 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -100,6 +100,10 @@ struct atm_dev_stats { /* use backend to make new if */ #define ATM_ADDPARTY _IOW('a', ATMIOC_SPECIAL+4,struct atm_iobuf) /* add party to p2mp call */ +#ifdef CONFIG_COMPAT +/* It actually takes struct sockaddr_atmsvc, not struct atm_iobuf */ +#define COMPAT_ATM_ADDPARTY _IOW('a', ATMIOC_SPECIAL+4,struct compat_atm_iobuf) +#endif #define ATM_DROPPARTY _IOW('a', ATMIOC_SPECIAL+5,int) /* drop party from p2mp call */ @@ -224,6 +228,13 @@ struct atm_cirange { extern struct proc_dir_entry *atm_proc_root; #endif +#ifdef CONFIG_COMPAT +#include +struct compat_atm_iobuf { + int length; + compat_uptr_t buffer; +}; +#endif struct k_atm_aal_stats { #define __HANDLE_ITEM(i) atomic_t i @@ -379,6 +390,10 @@ struct atmdev_ops { /* only send is required */ int (*open)(struct atm_vcc *vcc); void (*close)(struct atm_vcc *vcc); int (*ioctl)(struct atm_dev *dev,unsigned int cmd,void __user *arg); +#ifdef CONFIG_COMPAT + int (*compat_ioctl)(struct atm_dev *dev,unsigned int cmd, + void __user *arg); +#endif int (*getsockopt)(struct atm_vcc *vcc,int level,int optname, void __user *optval,int optlen); int (*setsockopt)(struct atm_vcc *vcc,int level,int optname, -- cgit v1.1 From 72bdcf34380917260da41e3c49e10edee04bc5cd Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Wed, 26 Nov 2008 16:15:24 +0200 Subject: nl80211: Add frequency configuration (including HT40) This patch adds new NL80211_CMD_SET_WIPHY attributes NL80211_ATTR_WIPHY_FREQ and NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET to allow userspace to set the operating channel (e.g., hostapd for AP mode). Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 23 +++++++++++++++++++++-- include/net/cfg80211.h | 9 +++++++++ include/net/mac80211.h | 3 +++ 3 files changed, 33 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index e08c8bc..92f79d2 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -26,8 +26,9 @@ * @NL80211_CMD_GET_WIPHY: request information about a wiphy or dump request * to get a list of all present wiphys. * @NL80211_CMD_SET_WIPHY: set wiphy parameters, needs %NL80211_ATTR_WIPHY or - * %NL80211_ATTR_IFINDEX; can be used to set %NL80211_ATTR_WIPHY_NAME - * and/or %NL80211_ATTR_WIPHY_TXQ_PARAMS. + * %NL80211_ATTR_IFINDEX; can be used to set %NL80211_ATTR_WIPHY_NAME, + * %NL80211_ATTR_WIPHY_TXQ_PARAMS, %NL80211_ATTR_WIPHY_FREQ, and/or + * %NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET. * @NL80211_CMD_NEW_WIPHY: Newly created wiphy, response to get request * or rename notification. Has attributes %NL80211_ATTR_WIPHY and * %NL80211_ATTR_WIPHY_NAME. @@ -180,6 +181,14 @@ enum nl80211_commands { * /sys/class/ieee80211//index * @NL80211_ATTR_WIPHY_NAME: wiphy name (used for renaming) * @NL80211_ATTR_WIPHY_TXQ_PARAMS: a nested array of TX queue parameters + * @NL80211_ATTR_WIPHY_FREQ: frequency of the selected channel in MHz + * @NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET: included with NL80211_ATTR_WIPHY_FREQ + * if HT20 or HT40 are allowed (i.e., 802.11n disabled if not included): + * NL80211_SEC_CHAN_NO_HT = HT not allowed (i.e., same as not including + * this attribute) + * NL80211_SEC_CHAN_DISABLED = HT20 only + * NL80211_SEC_CHAN_BELOW = secondary channel is below the primary channel + * NL80211_SEC_CHAN_ABOVE = secondary channel is above the primary channel * * @NL80211_ATTR_IFINDEX: network interface index of the device to operate on * @NL80211_ATTR_IFNAME: network interface name @@ -315,6 +324,8 @@ enum nl80211_attrs { NL80211_ATTR_BSS_BASIC_RATES, NL80211_ATTR_WIPHY_TXQ_PARAMS, + NL80211_ATTR_WIPHY_FREQ, + NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET, /* add attributes here, update the policy in nl80211.c */ @@ -329,6 +340,8 @@ enum nl80211_attrs { #define NL80211_ATTR_HT_CAPABILITY NL80211_ATTR_HT_CAPABILITY #define NL80211_ATTR_BSS_BASIC_RATES NL80211_ATTR_BSS_BASIC_RATES #define NL80211_ATTR_WIPHY_TXQ_PARAMS NL80211_ATTR_WIPHY_TXQ_PARAMS +#define NL80211_ATTR_WIPHY_FREQ NL80211_ATTR_WIPHY_FREQ +#define NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET #define NL80211_MAX_SUPP_RATES 32 #define NL80211_MAX_SUPP_REG_RULES 32 @@ -742,4 +755,10 @@ enum nl80211_txq_q { NL80211_TXQ_Q_BK }; +enum nl80211_sec_chan_offset { + NL80211_SEC_CHAN_NO_HT /* No HT */, + NL80211_SEC_CHAN_DISABLED /* HT20 only */, + NL80211_SEC_CHAN_BELOW /* HT40- */, + NL80211_SEC_CHAN_ABOVE /* HT40+ */ +}; #endif /* __LINUX_NL80211_H */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 1d57835..53b06f6 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -392,6 +392,9 @@ struct ieee80211_txq_params { /* from net/wireless.h */ struct wiphy; +/* from net/ieee80211.h */ +struct ieee80211_channel; + /** * struct cfg80211_ops - backend description for wireless configuration * @@ -450,6 +453,8 @@ struct wiphy; * @change_bss: Modify parameters for a given BSS. * * @set_txq_params: Set TX queue parameters + * + * @set_channel: Set channel */ struct cfg80211_ops { int (*add_virtual_intf)(struct wiphy *wiphy, char *name, @@ -513,6 +518,10 @@ struct cfg80211_ops { int (*set_txq_params)(struct wiphy *wiphy, struct ieee80211_txq_params *params); + + int (*set_channel)(struct wiphy *wiphy, + struct ieee80211_channel *chan, + enum nl80211_sec_chan_offset); }; #endif /* __NET_CFG80211_H */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 6a1d4ea..6e823cc 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -507,6 +507,9 @@ static inline int __deprecated __IEEE80211_CONF_SHORT_SLOT_TIME(void) struct ieee80211_ht_conf { bool enabled; + int sec_chan_offset; /* 0 = HT40 disabled; -1 = HT40 enabled, secondary + * channel below primary; 1 = HT40 enabled, + * secondary channel above primary */ }; /** -- cgit v1.1 From 10ec4f1d0851eb97cd53db66150835dd7f64829d Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 26 Nov 2008 13:03:08 -0800 Subject: nl80211: relicense nl80211.h under the ISC We have a few BSD/ISC licensed userspace applications which include nl80211.h from the kernel. To avoid legal ambiguity for usage of the header file in these projects we rather simply relicense the header file under the ISC. We've received consent from all contributors to it. Signed-off-by: Luis R. Rodriguez Acked-by: Johannes Berg Acked-by: Michael Wu Acked-by: Luis Carlos Cobo Acked-by: Michael Buesch Acked-by: Jouni Malinen Acked-by: Colin McCabe Acked-by: Javier Cardona Cc: johannes@sipsolutions.net Cc: altape@eden.rutgers.edu Cc: luisca@cozybit.com Cc: mb@bu3sch.de Cc: jouni.malinen@atheros.com Cc: colin@cozybit.com Cc: javier@cozybit.com Signed-off-by: John W. Linville --- include/linux/nl80211.h | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 92f79d2..04d4516 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -3,7 +3,26 @@ /* * 802.11 netlink interface public header * - * Copyright 2006, 2007 Johannes Berg + * Copyright 2006, 2007, 2008 Johannes Berg + * Copyright 2008 Michael Wu + * Copyright 2008 Luis Carlos Cobo + * Copyright 2008 Michael Buesch + * Copyright 2008 Luis R. Rodriguez + * Copyright 2008 Jouni Malinen + * Copyright 2008 Colin McCabe + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * */ /** -- cgit v1.1 From fee52678dbda2099a25243e79da98dc390e1939a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 26 Nov 2008 22:36:31 +0100 Subject: cfg80211: handle SIOCGIWNAME This patch moves the SIOCGIWNAME handling from mac80211 to cfg80211. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 53b06f6..c97eac3 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5,6 +5,8 @@ #include #include #include +/* remove once we remove the wext stuff */ +#include /* * 802.11 configuration in-kernel interface @@ -524,4 +526,9 @@ struct cfg80211_ops { enum nl80211_sec_chan_offset); }; +/* temporary wext handlers */ +int cfg80211_wext_giwname(struct net_device *dev, + struct iw_request_info *info, + char *name, char *extra); + #endif /* __NET_CFG80211_H */ -- cgit v1.1 From e60c7744f8aa77bcbcb0b294596d6c87445d1200 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 26 Nov 2008 23:31:40 +0100 Subject: cfg80211: handle SIOCGIWMODE/SIOCSIWMODE further reducing wext code in mac80211. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index c97eac3..a0c0bf1 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -530,5 +530,9 @@ struct cfg80211_ops { int cfg80211_wext_giwname(struct net_device *dev, struct iw_request_info *info, char *name, char *extra); +int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info, + u32 *mode, char *extra); +int cfg80211_wext_giwmode(struct net_device *dev, struct iw_request_info *info, + u32 *mode, char *extra); #endif /* __NET_CFG80211_H */ -- cgit v1.1 From 007e5ddddfed4ba039899754936e89b27d5cb551 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 27 Nov 2008 23:13:38 +0100 Subject: wireless: clean up radiotap a bit No need to pad the header so no constant needed for that, no need to carry any version number from netbsd nor CVS IDs from them. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/ieee80211_radiotap.h | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index d364fd5..384698c 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -1,7 +1,4 @@ -/* $FreeBSD: src/sys/net80211/ieee80211_radiotap.h,v 1.5 2005/01/22 20:12:05 sam Exp $ */ -/* $NetBSD: ieee80211_radiotap.h,v 1.11 2005/06/22 06:16:02 dyoung Exp $ */ - -/*- +/* * Copyright (c) 2003, 2004 David Young. All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -42,8 +39,6 @@ #include #include -/* Radiotap header version (from official NetBSD feed) */ -#define IEEE80211RADIOTAP_VERSION "1.5" /* Base version of the radiotap packet header data */ #define PKTHDR_RADIOTAP_VERSION 0 @@ -62,12 +57,8 @@ * readers. */ -/* XXX tcpdump/libpcap do not tolerate variable-length headers, - * yet, so we pad every radiotap header to 64 bytes. Ugh. - */ -#define IEEE80211_RADIOTAP_HDRLEN 64 - -/* The radio capture header precedes the 802.11 header. +/* + * The radio capture header precedes the 802.11 header. * All data in the header is little endian on all platforms. */ struct ieee80211_radiotap_header { -- cgit v1.1 From 4571d3bf87b76eae875283ff9f7243984b5ddcae Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Sun, 30 Nov 2008 00:48:41 +0100 Subject: mac80211: add sta_notify_ps callback This patch is necessary in order to provide a proper Access point support for p54. Unfortunately for us, there is no documented way to disable the interfering power save buffering mechanism in firmware completely. Therefore we give in and notify the driver through our new sta_notify_ps callback, so that we can update the filter state. Signed-off-by: Christian Lamparter Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 6e823cc..0a0c593 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -782,6 +782,19 @@ enum sta_notify_cmd { }; /** + * enum sta_notify_ps_cmd - sta power save notify command + * + * Used with the sta_notify_ps() callback in &struct ieee80211_ops to + * notify the driver if a station made a power state transition. + * + * @STA_NOTIFY_SLEEP: a station is now sleeping + * @STA_NOTIFY_AWAKE: a sleeping station woke up + */ +enum sta_notify_ps_cmd { + STA_NOTIFY_SLEEP, STA_NOTIFY_AWAKE, +}; + +/** * enum ieee80211_tkip_key_type - get tkip key * * Used by drivers which need to get a tkip key for skb. Some drivers need a @@ -1251,6 +1264,9 @@ enum ieee80211_ampdu_mlme_action { * @sta_notify: Notifies low level driver about addition or removal * of associated station or AP. * + * @sta_ps_notify: Notifies low level driver about the power state transition + * of a associated station. Must be atomic. + * * @conf_tx: Configure TX queue parameters (EDCF (aifs, cw_min, cw_max), * bursting) for a hardware TX queue. * @@ -1317,6 +1333,8 @@ struct ieee80211_ops { int (*set_frag_threshold)(struct ieee80211_hw *hw, u32 value); void (*sta_notify)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, enum sta_notify_cmd, struct ieee80211_sta *sta); + void (*sta_notify_ps)(struct ieee80211_hw *hw, + enum sta_notify_ps_cmd, struct ieee80211_sta *sta); int (*conf_tx)(struct ieee80211_hw *hw, u16 queue, const struct ieee80211_tx_queue_params *params); int (*get_tx_stats)(struct ieee80211_hw *hw, -- cgit v1.1 From 8bef7a10014c4579c66579ab47fc1bb9563ac42a Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Sun, 30 Nov 2008 20:56:28 +0200 Subject: mac80211: document ieee80211_tx_info.pad Fixes htmldocs warning: Warning(mac80211.h:379): No description found for parameter 'pad[2]' Signed-off-by: Kalle Valo Signed-off-by: John W. Linville --- include/net/mac80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 0a0c593..e84c922 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -323,6 +323,7 @@ struct ieee80211_tx_rate { * @flags: transmit info flags, defined above * @band: the band to transmit on (use for checking for races) * @antenna_sel_tx: antenna to use, 0 for automatic diversity + * @pad: padding, ignore * @control: union for control data * @status: union for status data * @driver_data: array of driver_data pointers -- cgit v1.1 From b74ca3a896b9ab5f952bc440154758e708c48884 Mon Sep 17 00:00:00 2001 From: Wang Chen Date: Mon, 8 Dec 2008 01:14:16 -0800 Subject: netdevice: Kill netdev->priv This is the last shoot of this series. After I removing all directly reference of netdev->priv, I am killing "priv" of "struct net_device" and fixing relative comments/docs. Anyone will not be allowed to reference netdev->priv directly. If you want to reference the memory of private data, use netdev_priv() instead. If the private data is not allocted when alloc_netdev(), use netdev->ml_priv to point that memory after you creating that private data. Signed-off-by: Wang Chen Signed-off-by: David S. Miller --- include/linux/hdlc.h | 2 +- include/linux/netdevice.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hdlc.h b/include/linux/hdlc.h index e960faa..fd47a15 100644 --- a/include/linux/hdlc.h +++ b/include/linux/hdlc.h @@ -43,7 +43,7 @@ struct hdlc_proto { }; -/* Pointed to by dev->priv */ +/* Pointed to by netdev_priv(dev) */ typedef struct hdlc_device { /* used by HDLC layer to take control over HDLC device from hw driver*/ int (*attach)(struct net_device *dev, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0df0db0..47e7315 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -785,7 +785,6 @@ struct net_device /* * One part is mostly used on xmit path (device) */ - void *priv; /* pointer to private data */ /* These may be needed for future network-power-down code. */ unsigned long trans_start; /* Time (in jiffies) of last Tx */ -- cgit v1.1 From 0049bab5e765aa74cf767a834fa336e19453fc5e Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 8 Dec 2008 01:18:05 -0800 Subject: dccp: Remove obsolete parts of the old CCID interface The TX/RX CCIDs of the minisock are now redundant: similar to the Ack Vector case, their value equals initially that of the sysctl, but at the end of feature negotiation may be something different. The old interface removed by this patch thus has been replaced by the newer interface to dynamically query the currently loaded CCIDs. Also removed are the constructors for the TX CCID and the RX CCID, since the switch "rx <-> non-rx" is done by the handler in minisocks.c (and the handler is the only place in the code where CCIDs are loaded). Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: David S. Miller --- include/linux/dccp.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 6a72ff5..46daea3 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -370,7 +370,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) * Will be used to pass the state from dccp_request_sock to dccp_sock. * * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2) - * @dccpms_ccid - Congestion Control Id (CCID) (section 10) * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5) * @dccpms_send_ndp_count - Send NDP Count Feature (7.7.2) * @dccpms_pending - List of features being negotiated @@ -378,8 +377,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) */ struct dccp_minisock { __u64 dccpms_sequence_window; - __u8 dccpms_rx_ccid; - __u8 dccpms_tx_ccid; __u8 dccpms_send_ack_vector; __u8 dccpms_send_ndp_count; struct list_head dccpms_pending; -- cgit v1.1 From 4098dce5be537a157eed4a326efd464109825b8b Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 8 Dec 2008 01:18:37 -0800 Subject: dccp: Remove manual influence on NDP Count feature Updating the NDP count feature is handled automatically now: * for CCID-2 it is disabled, since the code does not use NDP counts; * for CCID-3 it is enabled, as NDP counts are used to determine loss lengths. Allowing the user to change NDP values leads to unpredictable and failing behaviour, since it is then possible to disable NDP counts even when they are needed (e.g. in CCID-3). This means that only those user settings are sensible that agree with the values for Send NDP Count implied by the choice of CCID. But those settings are already activated by the feature negotiation (CCID dependency tracking), hence this form of support is redundant. At startup the initialisation of the NDP count feature uses the default value of 0, which is done implicitly by the zeroing-out of the socket when it is allocated. If the choice of CCID or feature negotiation enables NDP count, this will then be updated via the NDP activation handler. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: David S. Miller --- include/linux/dccp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 46daea3..60e9443 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -371,14 +371,12 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) * * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2) * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5) - * @dccpms_send_ndp_count - Send NDP Count Feature (7.7.2) * @dccpms_pending - List of features being negotiated * @dccpms_conf - */ struct dccp_minisock { __u64 dccpms_sequence_window; __u8 dccpms_send_ack_vector; - __u8 dccpms_send_ndp_count; struct list_head dccpms_pending; struct list_head dccpms_conf; }; @@ -490,6 +488,7 @@ struct dccp_ackvec; * @dccps_r_ack_ratio - feature-remote Ack Ratio * @dccps_pcslen - sender partial checksum coverage (via sockopt) * @dccps_pcrlen - receiver partial checksum coverage (via sockopt) + * @dccps_send_ndp_count - local Send NDP Count feature (7.7.2) * @dccps_ndp_count - number of Non Data Packets since last data packet * @dccps_mss_cache - current value of MSS (path MTU minus header sizes) * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4) @@ -529,6 +528,7 @@ struct dccp_sock { __u16 dccps_r_ack_ratio; __u8 dccps_pcslen:4; __u8 dccps_pcrlen:4; + __u8 dccps_send_ndp_count:1; __u64 dccps_ndp_count:48; unsigned long dccps_rate_last; struct dccp_minisock dccps_minisock; -- cgit v1.1 From 6fdd34d43bff8be9bb925b49d87a0ee144d2ab07 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 8 Dec 2008 01:19:06 -0800 Subject: dccp ccid-2: Phase out the use of boolean Ack Vector sysctl This removes the use of the sysctl and the minisock variable for the Send Ack Vector feature, as it now is handled fully dynamically via feature negotiation (i.e. when CCID-2 is enabled, Ack Vectors are automatically enabled as per RFC 4341, 4.). Using a sysctl in parallel to this implementation would open the door to crashes, since much of the code relies on tests of the boolean minisock / sysctl variable. Thus, this patch replaces all tests of type if (dccp_msk(sk)->dccpms_send_ack_vector) /* ... */ with if (dp->dccps_hc_rx_ackvec != NULL) /* ... */ The dccps_hc_rx_ackvec is allocated by the dccp_hdlr_ackvec() when feature negotiation concluded that Ack Vectors are to be used on the half-connection. Otherwise, it is NULL (due to dccp_init_sock/dccp_create_openreq_child), so that the test is a valid one. The activation handler for Ack Vectors is called as soon as the feature negotiation has concluded at the * server when the Ack marking the transition RESPOND => OPEN arrives; * client after it has sent its ACK, marking the transition REQUEST => PARTOPEN. Adding the sequence number of the Response packet to the Ack Vector has been removed, since (a) connection establishment implies that the Response has been received; (b) the CCIDs only look at packets received in the (PART)OPEN state, i.e. this entry will always be ignored; (c) it can not be used for anything useful - to detect loss for instance, only packets received after the loss can serve as pseudo-dupacks. There was a FIXME to change the error code when dccp_ackvec_add() fails. I removed this after finding out that: * the check whether ackno < ISN is already made earlier, * this Response is likely the 1st packet with an Ackno that the client gets, * so when dccp_ackvec_add() fails, the reason is likely not a packet error. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: David S. Miller --- include/linux/dccp.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 60e9443..61734e2 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -360,7 +360,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) #define DCCPF_INITIAL_SEQUENCE_WINDOW 100 #define DCCPF_INITIAL_ACK_RATIO 2 #define DCCPF_INITIAL_CCID DCCPC_CCID2 -#define DCCPF_INITIAL_SEND_ACK_VECTOR 1 /* FIXME: for now we're default to 1 but it should really be 0 */ #define DCCPF_INITIAL_SEND_NDP_COUNT 1 @@ -370,13 +369,11 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) * Will be used to pass the state from dccp_request_sock to dccp_sock. * * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2) - * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5) * @dccpms_pending - List of features being negotiated * @dccpms_conf - */ struct dccp_minisock { __u64 dccpms_sequence_window; - __u8 dccpms_send_ack_vector; struct list_head dccpms_pending; struct list_head dccpms_conf; }; -- cgit v1.1 From 2107fb8b5bf018be691afdd4c6ffaecf0c3307be Mon Sep 17 00:00:00 2001 From: Steve Glendinning Date: Wed, 5 Nov 2008 00:35:38 +0000 Subject: smsc911x: add dynamic bus configuration Convert the driver to select 16-bit or 32-bit bus access at runtime, at a small performance cost. Signed-off-by: Steve Glendinning Acked-by: Catalin Marinas Signed-off-by: David S. Miller --- include/linux/smsc911x.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/smsc911x.h b/include/linux/smsc911x.h index 47c4ffd..1cbf031 100644 --- a/include/linux/smsc911x.h +++ b/include/linux/smsc911x.h @@ -28,6 +28,7 @@ struct smsc911x_platform_config { unsigned int irq_polarity; unsigned int irq_type; + unsigned int flags; phy_interface_t phy_interface; }; @@ -39,4 +40,8 @@ struct smsc911x_platform_config { #define SMSC911X_IRQ_TYPE_OPEN_DRAIN 0 #define SMSC911X_IRQ_TYPE_PUSH_PULL 1 +/* Constants for flags */ +#define SMSC911X_USE_16BIT (BIT(0)) +#define SMSC911X_USE_32BIT (BIT(1)) + #endif /* __LINUX_SMSC911X_H__ */ -- cgit v1.1 From bd91b8bf372911c1e4d66d6bb44fe409349a6791 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 10 Dec 2008 16:07:08 -0800 Subject: netns: ip6mr: allocate mroute6_socket per-namespace. Preliminary work to make IPv6 multicast forwarding netns-aware. Make IPv6 multicast forwarding mroute6_socket per-namespace, moves it into struct netns_ipv6. At the moment, mroute6_socket is only referenced in init_net. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- include/linux/mroute6.h | 8 ++++++-- include/net/netns/ipv6.h | 3 +++ 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index 6f4c180..2cd9901 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -117,6 +117,7 @@ struct sioc_mif_req6 #include #include /* for struct sk_buff_head */ +#include #ifdef CONFIG_IPV6_MROUTE static inline int ip6_mroute_opt(int opt) @@ -232,10 +233,13 @@ struct rtmsg; extern int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait); #ifdef CONFIG_IPV6_MROUTE -extern struct sock *mroute6_socket; +static inline struct sock *mroute6_socket(struct net *net) +{ + return net->ipv6.mroute6_sk; +} extern int ip6mr_sk_done(struct sock *sk); #else -#define mroute6_socket NULL +static inline struct sock *mroute6_socket(struct net *net) { return NULL; } static inline int ip6mr_sk_done(struct sock *sk) { return 0; } #endif #endif diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 2932721..8a0a67d 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -55,5 +55,8 @@ struct netns_ipv6 { struct sock *ndisc_sk; struct sock *tcp_sk; struct sock *igmp_sk; +#ifdef CONFIG_IPV6_MROUTE + struct sock *mroute6_sk; +#endif }; #endif -- cgit v1.1 From 4e16880cb4225bfa68878ad5b2a9ded53657d054 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 10 Dec 2008 16:15:08 -0800 Subject: netns: ip6mr: dynamically allocates vif6_table Preliminary work to make IPv6 multicast forwarding netns-aware. Dynamically allocates interface table vif6_table and moves it to struct netns_ipv6, and updates MIF_EXISTS() macro. At the moment, vif6_table is only referenced in init_net. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- include/net/netns/ipv6.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 8a0a67d..4ab0cb0 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -57,6 +57,8 @@ struct netns_ipv6 { struct sock *igmp_sk; #ifdef CONFIG_IPV6_MROUTE struct sock *mroute6_sk; + struct mif_device *vif6_table; + int maxvif; #endif }; #endif -- cgit v1.1 From 58701ad41105638baa0b38ffe9ac5b10469c1fd3 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 10 Dec 2008 16:22:34 -0800 Subject: netns: ip6mr: store netns in struct mfc6_cache This patch stores into struct mfc6_cache the network namespace each mfc6_cache belongs to. The new member is mfc6_net. mfc6_net is assigned at cache allocation and doesn't change during the rest of the cache entry life. This will help to retrieve the current netns around the IPv6 multicast forwarding code. At the moment, all mfc6_cache are allocated in init_net. Changelog: ========== * Use write_pnet()/read_pnet() to set and get mfc6_net. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- include/linux/mroute6.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index 2cd9901..15d85fe 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -188,6 +188,9 @@ struct mif_device struct mfc6_cache { struct mfc6_cache *next; /* Next entry on cache line */ +#ifdef CONFIG_NET_NS + struct net *mfc6_net; +#endif struct in6_addr mf6c_mcastgrp; /* Group the entry belongs to */ struct in6_addr mf6c_origin; /* Source of packet */ mifi_t mf6c_parent; /* Source interface */ @@ -210,6 +213,18 @@ struct mfc6_cache } mfc_un; }; +static inline +struct net *mfc6_net(const struct mfc6_cache *mfc) +{ + return read_pnet(&mfc->mfc6_net); +} + +static inline +void mfc6_net_set(struct mfc6_cache *mfc, struct net *net) +{ + write_pnet(&mfc->mfc6_net, hold_net(net)); +} + #define MFC_STATIC 1 #define MFC_NOTIFY 2 -- cgit v1.1 From 4a6258a0e33d042e4c84d9dec25d45ddb40a70b3 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 10 Dec 2008 16:24:07 -0800 Subject: netns: ip6mr: dynamically allocate mfc6_cache_array Preliminary work to make IPv6 multicast forwarding netns-aware. Dynamically allocates IPv6 multicast forwarding cache, mfc6_cache_array, and moves it to struct netns_ipv6. At the moment, mfc6_cache_array is only referenced in init_net. Replace 'ARRAY_SIZE(mfc6_cache_array)' with mfc6_cache_array size: MFC6_LINES. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- include/net/netns/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 4ab0cb0..14c1bbe 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -57,6 +57,7 @@ struct netns_ipv6 { struct sock *igmp_sk; #ifdef CONFIG_IPV6_MROUTE struct sock *mroute6_sk; + struct mfc6_cache **mfc6_cache_array; struct mif_device *vif6_table; int maxvif; #endif -- cgit v1.1 From 4045e57c19bee150370390545ee8a933b3f7a18d Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 10 Dec 2008 16:27:21 -0800 Subject: netns: ip6mr: declare counter cache_resolve_queue_len per-namespace Preliminary work to make IPv6 multicast forwarding netns-aware. Declare variable cache_resolve_queue_len per-namespace: moves it into struct netns_ipv6. This variable counts the number of unresolved cache entries queued in the list mfc_unres_queue. This list is kept global to all netns as the number of entries per namespace is limited to 10 (hardcoded in routine ip6mr_cache_unresolved). Entries belonging to different namespaces in mfc_unres_queue will be identified by matching the mfc_net member introduced previously in struct mfc6_cache. Keeping this list global to all netns, also allows us to keep a single timer (ipmr_expire_timer) to handle their expiration. In some places cache_resolve_queue_len value was tested for arming or deleting the timer. These tests were equivalent to testing mfc_unres_queue value instead and are replaced in this patch. At the moment, cache_resolve_queue_len is only referenced in init_net. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- include/net/netns/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 14c1bbe..30572f3 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -60,6 +60,7 @@ struct netns_ipv6 { struct mfc6_cache **mfc6_cache_array; struct mif_device *vif6_table; int maxvif; + atomic_t cache_resolve_queue_len; #endif }; #endif -- cgit v1.1 From a21f3f997c73ced682129aedd372bb6b53041510 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 10 Dec 2008 16:28:44 -0800 Subject: netns: ip6mr: declare mroute_do_assert and mroute_do_pim per-namespace Preliminary work to make IPv6 multicast forwarding netns-aware. Declare IPv6 multicast forwarding variables 'mroute_do_assert' and 'mroute_do_pim' per-namespace in struct netns_ipv6. At the moment, these variables are only referenced in init_net. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- include/net/netns/ipv6.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 30572f3..919cb0f 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -61,6 +61,8 @@ struct netns_ipv6 { struct mif_device *vif6_table; int maxvif; atomic_t cache_resolve_queue_len; + int mroute_do_assert; + int mroute_do_pim; #endif }; #endif -- cgit v1.1 From 950d5704e5daa1f90bcd75b99163491e7b249169 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 10 Dec 2008 16:29:24 -0800 Subject: netns: ip6mr: declare reg_vif_num per-namespace Preliminary work to make IPv6 multicast forwarding netns-aware. Declare variable 'reg_vif_num' per-namespace, moves into struct netns_ipv6. At the moment, this variable is only referenced in init_net. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- include/net/netns/ipv6.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 919cb0f..afab4e4 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -63,6 +63,9 @@ struct netns_ipv6 { atomic_t cache_resolve_queue_len; int mroute_do_assert; int mroute_do_pim; +#ifdef CONFIG_IPV6_PIMSM_V2 + int mroute_reg_vif_num; +#endif #endif }; #endif -- cgit v1.1 From 8229efdaef1e7913ae1712c0ba752f267e5fcd5e Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 10 Dec 2008 16:30:15 -0800 Subject: netns: ip6mr: enable namespace support in ipv6 multicast forwarding code This last patch makes the appropriate changes to use and propagate the network namespace where needed in IPv6 multicast forwarding code. This consists mainly in replacing all the remaining init_net occurences with current netns pointer retrieved from sockets, net devices or mfc6_caches depending on the routines' contexts. Some routines receive a new 'struct net' parameter to propagate the current netns: * ip6mr_get_route * ip6mr_cache_report * ip6mr_cache_find * ip6mr_cache_unresolved * mif6_add/mif6_delete * ip6mr_mfc_add/ip6mr_mfc_delete * ip6mr_reg_vif All the IPv6 multicast forwarding variables moved to struct netns_ipv6 by the previous patches are now referenced in the correct namespace. Changelog: ========== * Take into account the net associated to mfc6_cache when matching entries in mfc_unres_queue list. * Call mroute_clean_tables() in ip6mr_net_exit() to free memory allocated per-namespace. * Call dev_net_set() in ip6mr_reg_vif() to initialize dev->nd_net correctly. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- include/linux/mroute6.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index 15d85fe..5375fac 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -245,7 +245,8 @@ void mfc6_net_set(struct mfc6_cache *mfc, struct net *net) #ifdef __KERNEL__ struct rtmsg; -extern int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait); +extern int ip6mr_get_route(struct net *net, struct sk_buff *skb, + struct rtmsg *rtm, int nowait); #ifdef CONFIG_IPV6_MROUTE static inline struct sock *mroute6_socket(struct net *net) -- cgit v1.1 From bb608e9db7d29616fb6e0d856c23434610d4a1bd Mon Sep 17 00:00:00 2001 From: Senthil Balasubramanian Date: Thu, 4 Dec 2008 20:38:13 +0530 Subject: wireless: Incorrect LEAP authentication algorithm identifier. This patch fixes a regression introduced by "wireless: avoid some net/ieee80211.h vs. linux/ieee80211.h conflicts" LEAP authentication algorithm identifier should be 128. Signed-off-by: Senthil Balasubramanian Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index a6ec928..c4e6ca1 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -836,7 +836,7 @@ struct ieee80211_ht_info { /* Authentication algorithms */ #define WLAN_AUTH_OPEN 0 #define WLAN_AUTH_SHARED_KEY 1 -#define WLAN_AUTH_LEAP 2 +#define WLAN_AUTH_LEAP 128 #define WLAN_AUTH_CHALLENGE_LEN 128 -- cgit v1.1 From 7ba1c04ed727a70df2dc63464232c0ec906ad67d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 8 Dec 2008 11:18:32 +0100 Subject: mac80211: improve sta_notify documentation Mention more possible STA entries and document the atomic requirement. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index e84c922..346f373 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1262,8 +1262,8 @@ enum ieee80211_ampdu_mlme_action { * the device does fragmentation by itself; if this method is assigned then * the stack will not do fragmentation. * - * @sta_notify: Notifies low level driver about addition or removal - * of associated station or AP. + * @sta_notify: Notifies low level driver about addition or removal of an + * associated station, AP, IBSS/WDS/mesh peer etc. Must be atomic. * * @sta_ps_notify: Notifies low level driver about the power state transition * of a associated station. Must be atomic. -- cgit v1.1 From 0f202aa2e1e1db1d20da9bcc3f5ad43c5a22d2d5 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Mon, 8 Dec 2008 14:51:41 -0500 Subject: ieee80211_security: correct warning about width of auth_mode Also remove auth_algo which is unused. Signed-off-by: John W. Linville --- include/net/ieee80211.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/ieee80211.h b/include/net/ieee80211.h index 7ab3ed2..adb7cf3 100644 --- a/include/net/ieee80211.h +++ b/include/net/ieee80211.h @@ -385,9 +385,8 @@ struct ieee80211_device; #define SCM_TEMPORAL_KEY_LENGTH 16 struct ieee80211_security { - u16 active_key:2, - enabled:1, - auth_mode:2, auth_algo:4, unicast_uses_group:1, encrypt:1; + u16 active_key:2, enabled:1, unicast_uses_group:1, encrypt:1; + u8 auth_mode; u8 encode_alg[WEP_KEYS]; u8 key_sizes[WEP_KEYS]; u8 keys[WEP_KEYS][SCM_KEY_LEN]; -- cgit v1.1 From f546638c3f809fdacddc03fe765669c3042e0d9d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 9 Dec 2008 12:30:03 +0100 Subject: mac80211: remove fragmentation offload functionality There's no driver that actually does fragmentation on the device, and the callback is buggy (when it returns an error, mac80211's fragmentation status is changed so reading the frag threshold from userspace reads the new value despite the error). Let's just remove it, if we really find some hardware supporting it we can add it back later. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 346f373..5ecc686 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1258,10 +1258,6 @@ enum ieee80211_ampdu_mlme_action { * * @set_rts_threshold: Configuration of RTS threshold (if device needs it) * - * @set_frag_threshold: Configuration of fragmentation threshold. Assign this if - * the device does fragmentation by itself; if this method is assigned then - * the stack will not do fragmentation. - * * @sta_notify: Notifies low level driver about addition or removal of an * associated station, AP, IBSS/WDS/mesh peer etc. Must be atomic. * @@ -1331,7 +1327,6 @@ struct ieee80211_ops { void (*get_tkip_seq)(struct ieee80211_hw *hw, u8 hw_key_idx, u32 *iv32, u16 *iv16); int (*set_rts_threshold)(struct ieee80211_hw *hw, u32 value); - int (*set_frag_threshold)(struct ieee80211_hw *hw, u32 value); void (*sta_notify)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, enum sta_notify_cmd, struct ieee80211_sta *sta); void (*sta_notify_ps)(struct ieee80211_hw *hw, -- cgit v1.1 From 89fad578a61810b7fdf8edd294890f3c0cde4390 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Tue, 9 Dec 2008 16:28:06 +0100 Subject: mac80211: integrate sta_notify_ps cmds into sta_notify This patch replaces the newly introduced sta_notify_ps function, which can be used to notify the driver about every power state transition for all associated stations, by integrating its functionality back into the original sta_notify callback. Signed-off-by: Christian Lamparter Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 5ecc686..046ce69 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -773,25 +773,16 @@ struct ieee80211_sta { * enum sta_notify_cmd - sta notify command * * Used with the sta_notify() callback in &struct ieee80211_ops, this - * indicates addition and removal of a station to station table. + * indicates addition and removal of a station to station table, + * or if a associated station made a power state transition. * * @STA_NOTIFY_ADD: a station was added to the station table * @STA_NOTIFY_REMOVE: a station being removed from the station table - */ -enum sta_notify_cmd { - STA_NOTIFY_ADD, STA_NOTIFY_REMOVE -}; - -/** - * enum sta_notify_ps_cmd - sta power save notify command - * - * Used with the sta_notify_ps() callback in &struct ieee80211_ops to - * notify the driver if a station made a power state transition. - * * @STA_NOTIFY_SLEEP: a station is now sleeping * @STA_NOTIFY_AWAKE: a sleeping station woke up */ -enum sta_notify_ps_cmd { +enum sta_notify_cmd { + STA_NOTIFY_ADD, STA_NOTIFY_REMOVE, STA_NOTIFY_SLEEP, STA_NOTIFY_AWAKE, }; @@ -1258,11 +1249,9 @@ enum ieee80211_ampdu_mlme_action { * * @set_rts_threshold: Configuration of RTS threshold (if device needs it) * - * @sta_notify: Notifies low level driver about addition or removal of an - * associated station, AP, IBSS/WDS/mesh peer etc. Must be atomic. - * - * @sta_ps_notify: Notifies low level driver about the power state transition - * of a associated station. Must be atomic. + * @sta_notify: Notifies low level driver about addition, removal or power + * state transition of an associated station, AP, IBSS/WDS/mesh peer etc. + * Must be atomic. * * @conf_tx: Configure TX queue parameters (EDCF (aifs, cw_min, cw_max), * bursting) for a hardware TX queue. @@ -1329,8 +1318,6 @@ struct ieee80211_ops { int (*set_rts_threshold)(struct ieee80211_hw *hw, u32 value); void (*sta_notify)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, enum sta_notify_cmd, struct ieee80211_sta *sta); - void (*sta_notify_ps)(struct ieee80211_hw *hw, - enum sta_notify_ps_cmd, struct ieee80211_sta *sta); int (*conf_tx)(struct ieee80211_hw *hw, u16 queue, const struct ieee80211_tx_queue_params *params); int (*get_tx_stats)(struct ieee80211_hw *hw, -- cgit v1.1 From 4dec9b807be757780ca3611a959ac22c28d292a7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 10 Dec 2008 17:48:48 +0100 Subject: rfkill: strip pointless notifier chain No users, so no reason to have it. Signed-off-by: Johannes Berg Acked-by: Ivo van Doorn Signed-off-by: John W. Linville --- include/linux/rfkill.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index f376a93..164332c 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -149,11 +149,4 @@ static inline char *rfkill_get_led_name(struct rfkill *rfkill) #endif } -/* rfkill notification chain */ -#define RFKILL_STATE_CHANGED 0x0001 /* state of a normal rfkill - switch has changed */ - -int register_rfkill_notifier(struct notifier_block *nb); -int unregister_rfkill_notifier(struct notifier_block *nb); - #endif /* RFKILL_H */ -- cgit v1.1 From 1a881f27c50b4fbd6858a8696a189263621136b0 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Dec 2008 23:27:47 -0800 Subject: net: Add frag_list support to GSO This patch allows GSO to handle frag_list in a limited way for the purposes of allowing packets merged by GRO to be refragmented on output. Most hardware won't (and aren't expected to) support handling GRO frag_list packets directly. Therefore we will perform GSO in software for those cases. However, for drivers that can support it (such as virtual NICs) we may not have to segment the packets at all. Whether the added overhead of GRO/GSO is worthwhile for bridges and routers when weighed against the benefit of potentially increasing the MTU within the host is still an open question. However, for the case of host nodes this is undoubtedly a win. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b60c26b..bdf5465 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1858,6 +1858,8 @@ static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) { return skb_is_gso(skb) && (!skb_gso_ok(skb, dev->features) || + (skb_shinfo(skb)->frag_list && + !(dev->features & NETIF_F_FRAGLIST)) || unlikely(skb->ip_summed != CHECKSUM_PARTIAL)); } -- cgit v1.1 From d565b0a1a9b6ee7dff46e1f68b26b526ac11ae50 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Dec 2008 23:38:52 -0800 Subject: net: Add Generic Receive Offload infrastructure This patch adds the top-level GRO (Generic Receive Offload) infrastructure. This is pretty similar to LRO except that this is protocol-independent. Instead of holding packets in an lro_mgr structure, they're now held in napi_struct. For drivers that intend to use this, they can set the NETIF_F_GRO bit and call napi_gro_receive instead of netif_receive_skb or just call netif_rx. The latter will call napi_receive_skb automatically. When napi_gro_receive is used, the driver must either call napi_complete/napi_rx_complete, or call napi_gro_flush in softirq context if the driver uses the primitives __napi_complete/__napi_rx_complete. Protocols will set the gro_receive and gro_complete function pointers in order to participate in this scheme. In addition to the packet, gro_receive will get a list of currently held packets. Each packet in the list has a same_flow field which is non-zero if it is a potential match for the new packet. For each packet that may match, they also have a flush field which is non-zero if the held packet must not be merged with the new packet. Once gro_receive has determined that the new skb matches a held packet, the held packet may be processed immediately if the new skb cannot be merged with it. In this case gro_receive should return the pointer to the existing skb in gro_list. Otherwise the new skb should be merged into the existing packet and NULL should be returned, unless the new skb makes it impossible for any further merges to be made (e.g., FIN packet) where the merged skb should be returned. Whenever the skb is merged into an existing entry, the gro_receive function should set NAPI_GRO_CB(skb)->same_flow. Note that if an skb merely matches an existing entry but can't be merged with it, then this shouldn't be set. If gro_receive finds it pointless to hold the new skb for future merging, it should set NAPI_GRO_CB(skb)->flush. Held packets will be flushed by napi_gro_flush which is called by napi_complete and napi_rx_complete. Currently held packets are stored in a singly liked list just like LRO. The list is limited to a maximum of 8 entries. In future, this may be expanded to use a hash table to allow more flows to be held for merging. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 80 +++++++++++++++++------------------------------ include/linux/netpoll.h | 5 --- 2 files changed, 28 insertions(+), 57 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bdf5465..58856b6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -314,8 +314,9 @@ struct napi_struct { spinlock_t poll_lock; int poll_owner; struct net_device *dev; - struct list_head dev_list; #endif + struct list_head dev_list; + struct sk_buff *gro_list; }; enum @@ -376,22 +377,8 @@ static inline int napi_reschedule(struct napi_struct *napi) * * Mark NAPI processing as complete. */ -static inline void __napi_complete(struct napi_struct *n) -{ - BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); - list_del(&n->poll_list); - smp_mb__before_clear_bit(); - clear_bit(NAPI_STATE_SCHED, &n->state); -} - -static inline void napi_complete(struct napi_struct *n) -{ - unsigned long flags; - - local_irq_save(flags); - __napi_complete(n); - local_irq_restore(flags); -} +extern void __napi_complete(struct napi_struct *n); +extern void napi_complete(struct napi_struct *n); /** * napi_disable - prevent NAPI from scheduling @@ -640,9 +627,7 @@ struct net_device unsigned long state; struct list_head dev_list; -#ifdef CONFIG_NETPOLL struct list_head napi_list; -#endif /* Net device features */ unsigned long features; @@ -661,6 +646,7 @@ struct net_device #define NETIF_F_LLTX 4096 /* LockLess TX - deprecated. Please */ /* do not use LLTX in new drivers */ #define NETIF_F_NETNS_LOCAL 8192 /* Does not change network namespaces */ +#define NETIF_F_GRO 16384 /* Generic receive offload */ #define NETIF_F_LRO 32768 /* large receive offload */ /* Segmentation offload features */ @@ -984,22 +970,8 @@ static inline void *netdev_priv(const struct net_device *dev) * netif_napi_add() must be used to initialize a napi context prior to calling * *any* of the other napi related functions. */ -static inline void netif_napi_add(struct net_device *dev, - struct napi_struct *napi, - int (*poll)(struct napi_struct *, int), - int weight) -{ - INIT_LIST_HEAD(&napi->poll_list); - napi->poll = poll; - napi->weight = weight; -#ifdef CONFIG_NETPOLL - napi->dev = dev; - list_add(&napi->dev_list, &dev->napi_list); - spin_lock_init(&napi->poll_lock); - napi->poll_owner = -1; -#endif - set_bit(NAPI_STATE_SCHED, &napi->state); -} +void netif_napi_add(struct net_device *dev, struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), int weight); /** * netif_napi_del - remove a napi context @@ -1007,12 +979,20 @@ static inline void netif_napi_add(struct net_device *dev, * * netif_napi_del() removes a napi context from the network device napi list */ -static inline void netif_napi_del(struct napi_struct *napi) -{ -#ifdef CONFIG_NETPOLL - list_del(&napi->dev_list); -#endif -} +void netif_napi_del(struct napi_struct *napi); + +struct napi_gro_cb { + /* This is non-zero if the packet may be of the same flow. */ + int same_flow; + + /* This is non-zero if the packet cannot be merged with the new skb. */ + int flush; + + /* Number of segments aggregated. */ + int count; +}; + +#define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) struct packet_type { __be16 type; /* This is really htons(ether_type). */ @@ -1024,6 +1004,9 @@ struct packet_type { struct sk_buff *(*gso_segment)(struct sk_buff *skb, int features); int (*gso_send_check)(struct sk_buff *skb); + struct sk_buff **(*gro_receive)(struct sk_buff **head, + struct sk_buff *skb); + int (*gro_complete)(struct sk_buff *skb); void *af_packet_priv; struct list_head list; }; @@ -1377,6 +1360,9 @@ extern int netif_rx(struct sk_buff *skb); extern int netif_rx_ni(struct sk_buff *skb); #define HAVE_NETIF_RECEIVE_SKB 1 extern int netif_receive_skb(struct sk_buff *skb); +extern void napi_gro_flush(struct napi_struct *napi); +extern int napi_gro_receive(struct napi_struct *napi, + struct sk_buff *skb); extern void netif_nit_deliver(struct sk_buff *skb); extern int dev_valid_name(const char *name); extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *); @@ -1621,17 +1607,7 @@ static inline void __netif_rx_complete(struct net_device *dev, static inline void netif_rx_complete(struct net_device *dev, struct napi_struct *napi) { - unsigned long flags; - - /* - * don't let napi dequeue from the cpu poll list - * just in case its running on a different cpu - */ - if (unlikely(test_bit(NAPI_STATE_NPSVC, &napi->state))) - return; - local_irq_save(flags); - __netif_rx_complete(dev, napi); - local_irq_restore(flags); + napi_complete(napi); } static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index e3d7959..e38d3c9 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -94,11 +94,6 @@ static inline void netpoll_poll_unlock(void *have) rcu_read_unlock(); } -static inline void netpoll_netdev_init(struct net_device *dev) -{ - INIT_LIST_HEAD(&dev->napi_list); -} - #else static inline int netpoll_rx(struct sk_buff *skb) { -- cgit v1.1 From 73cc19f1556b95976934de236fd9043f7208844f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Dec 2008 23:41:09 -0800 Subject: ipv4: Add GRO infrastructure This patch adds GRO support for IPv4. The criteria for merging is more stringent than LRO, in particular, we require all fields in the IP header to be identical except for the length, ID and checksum. In addition, the ID must form an arithmetic sequence with a difference of one. The ID requirement might seem overly strict, however, most hardware TSO solutions already obey this rule. Linux itself also obeys this whether GSO is in use or not. In future we could relax this rule by storing the IDs (or rather making sure that we don't drop them when pulling the aggregate skb's tail). Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/protocol.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/protocol.h b/include/net/protocol.h index 8d024d7..cb2965a 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -39,6 +39,9 @@ struct net_protocol { int (*gso_send_check)(struct sk_buff *skb); struct sk_buff *(*gso_segment)(struct sk_buff *skb, int features); + struct sk_buff **(*gro_receive)(struct sk_buff **head, + struct sk_buff *skb); + int (*gro_complete)(struct sk_buff *skb); unsigned int no_policy:1, netns_ok:1; }; -- cgit v1.1 From 71d93b39e52e92aea35f1058d957cf12250d0b75 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Dec 2008 23:42:33 -0800 Subject: net: Add skb_gro_receive This patch adds the helper skb_gro_receive to merge packets for GRO. The current method is to allocate a new header skb and then chain the original packets to its frag_list. This is done to make it easier to integrate into the existing GSO framework. In future as GSO is moved into the drivers, we can undo this and simply chain the original packets together. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index acf17af..cf2cb50 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1687,6 +1687,8 @@ extern int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); extern struct sk_buff *skb_segment(struct sk_buff *skb, int features); +extern int skb_gro_receive(struct sk_buff **head, + struct sk_buff *skb); static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer) -- cgit v1.1 From bf296b125b21b8d558ceb6ec30bb4eba2730cd6b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Dec 2008 23:43:36 -0800 Subject: tcp: Add GRO support This patch adds the TCP-specific portion of GRO. The criterion for merging is extremely strict (the TCP header must match exactly apart from the checksum) so as to allow refragmentation. Otherwise this is pretty much identical to LRO, except that we support the merging of ECN packets. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/tcp.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index de1e91d..218235d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1358,6 +1358,12 @@ extern void tcp_v4_destroy_sock(struct sock *sk); extern int tcp_v4_gso_send_check(struct sk_buff *skb); extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features); +extern struct sk_buff **tcp_gro_receive(struct sk_buff **head, + struct sk_buff *skb); +extern struct sk_buff **tcp4_gro_receive(struct sk_buff **head, + struct sk_buff *skb); +extern int tcp_gro_complete(struct sk_buff *skb); +extern int tcp4_gro_complete(struct sk_buff *skb); #ifdef CONFIG_PROC_FS extern int tcp4_proc_init(void); -- cgit v1.1 From b240a0e5644eb817c4a397098a40e1ad42a615bc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Dec 2008 23:44:31 -0800 Subject: ethtool: Add GGRO and SGRO ops This patch adds the ethtool ops to enable and disable GRO. It also makes GRO depend on RX checksum offload much the same as how TSO depends on SG support. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/ethtool.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index b4b038b..27c67a5 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -467,6 +467,8 @@ struct ethtool_ops { #define ETHTOOL_GRXFH 0x00000029 /* Get RX flow hash configuration */ #define ETHTOOL_SRXFH 0x0000002a /* Set RX flow hash configuration */ +#define ETHTOOL_GGRO 0x0000002b /* Get GRO enable (ethtool_value) */ +#define ETHTOOL_SGRO 0x0000002c /* Set GRO enable (ethtool_value) */ /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET -- cgit v1.1 From e18ce3465477502108187c6c08b6423fb784a313 Mon Sep 17 00:00:00 2001 From: Steve Glendinning Date: Tue, 16 Dec 2008 02:00:00 -0800 Subject: net: Move flow control definitions to mii.h flags used within drivers for indicating tx and rx flow control are defined in 4 drivers (and probably more), move these constants to mii.h. The 3 SMSC drivers use the same constants (FLOW_CTRL_TX), but TG3 uses TG3_FLOW_CTRL_TX, so this patch also renames the constants within TG3. Signed-off-by: Steve Glendinning Signed-off-by: David S. Miller --- include/linux/mii.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/mii.h b/include/linux/mii.h index 151b7e0..4a376e0 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -135,6 +135,10 @@ #define LPA_1000FULL 0x0800 /* Link partner 1000BASE-T full duplex */ #define LPA_1000HALF 0x0400 /* Link partner 1000BASE-T half duplex */ +/* Flow control flags */ +#define FLOW_CTRL_TX 0x01 +#define FLOW_CTRL_RX 0x02 + /* This structure is used in all SIOCxMIIxxx ioctl calls */ struct mii_ioctl_data { __u16 phy_id; -- cgit v1.1 From bc02ff95fe4ebd3e5ee7455c0aa6f76ebe39ebca Mon Sep 17 00:00:00 2001 From: Steve Glendinning Date: Tue, 16 Dec 2008 02:00:48 -0800 Subject: net: Refactor full duplex flow control resolution These 4 drivers have identical full duplex flow control resolution functions. This patch changes them all to use one common function. The function in question decides whether a device should enable TX and RX flow control in a standard way (IEEE 802.3-2005 table 28B-3), so this should also be useful for other drivers. Signed-off-by: Steve Glendinning Signed-off-by: David S. Miller --- include/linux/mii.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include') diff --git a/include/linux/mii.h b/include/linux/mii.h index 4a376e0..ad74858 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -239,5 +239,34 @@ static inline unsigned int mii_duplex (unsigned int duplex_lock, return 0; } +/** + * mii_resolve_flowctrl_fdx + * @lcladv: value of MII ADVERTISE register + * @rmtadv: value of MII LPA register + * + * Resolve full duplex flow control as per IEEE 802.3-2005 table 28B-3 + */ +static inline u8 mii_resolve_flowctrl_fdx(u16 lcladv, u16 rmtadv) +{ + u8 cap = 0; + + if (lcladv & ADVERTISE_PAUSE_CAP) { + if (lcladv & ADVERTISE_PAUSE_ASYM) { + if (rmtadv & LPA_PAUSE_CAP) + cap = FLOW_CTRL_TX | FLOW_CTRL_RX; + else if (rmtadv & LPA_PAUSE_ASYM) + cap = FLOW_CTRL_RX; + } else { + if (rmtadv & LPA_PAUSE_CAP) + cap = FLOW_CTRL_TX | FLOW_CTRL_RX; + } + } else if (lcladv & ADVERTISE_PAUSE_ASYM) { + if ((rmtadv & LPA_PAUSE_CAP) && (rmtadv & LPA_PAUSE_ASYM)) + cap = FLOW_CTRL_TX; + } + + return cap; +} + #endif /* __KERNEL__ */ #endif /* __LINUX_MII_H__ */ -- cgit v1.1 From b24a2516d10751d7ed5afb58420df25370c9dffb Mon Sep 17 00:00:00 2001 From: Yang Hongyang Date: Tue, 16 Dec 2008 02:06:23 -0800 Subject: ipv6: Add IPV6_PKTINFO sticky option support to setsockopt() There are three reasons for me to add this support: 1.When no interface is specified in an IPV6_PKTINFO ancillary data item, the interface specified in an IPV6_PKTINFO sticky optionis is used. RFC3542: 6.7. Summary of Outgoing Interface Selection This document and [RFC-3493] specify various methods that affect the selection of the packet's outgoing interface. This subsection summarizes the ordering among those in order to ensure deterministic behavior. For a given outgoing packet on a given socket, the outgoing interface is determined in the following order: 1. if an interface is specified in an IPV6_PKTINFO ancillary data item, the interface is used. 2. otherwise, if an interface is specified in an IPV6_PKTINFO sticky option, the interface is used. 2.When no IPV6_PKTINFO ancillary data is received,getsockopt() should return the sticky option value which set with setsockopt(). RFC 3542: Issuing getsockopt() for the above options will return the sticky option value i.e., the value set with setsockopt(). If no sticky option value has been set getsockopt() will return the following values: 3.Make the setsockopt implementation POSIX compliant. Signed-off-by: Yang Hongyang Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 641e026..0b816ca 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -278,6 +278,7 @@ struct ipv6_pinfo { struct in6_addr saddr; struct in6_addr rcv_saddr; struct in6_addr daddr; + struct in6_pktinfo sticky_pktinfo; struct in6_addr *daddr_cache; #ifdef CONFIG_IPV6_SUBTREES struct in6_addr *saddr_cache; -- cgit v1.1 From b31a1d8b41513b96e9c7ec2f68c5734cef0b26a4 Mon Sep 17 00:00:00 2001 From: Andy Fleming Date: Tue, 16 Dec 2008 15:29:15 -0800 Subject: gianfar: Convert gianfar to an of_platform_driver Does the same for the accompanying MDIO driver, and then modifies the TBI configuration method. The old way used fields in einfo, which no longer exists. The new way is to create an MDIO device-tree node for each instance of gianfar, and create a tbi-handle property to associate ethernet controllers with the TBI PHYs they are connected to. Signed-off-by: Andy Fleming Signed-off-by: David S. Miller --- include/linux/fsl_devices.h | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h index 708bab5..d9051d7 100644 --- a/include/linux/fsl_devices.h +++ b/include/linux/fsl_devices.h @@ -47,12 +47,7 @@ struct gianfar_platform_data { /* device specific information */ u32 device_flags; - /* board specific information */ - u32 board_flags; - int mdio_bus; /* Bus controlled by us */ - char bus_id[MII_BUS_ID_SIZE]; /* Bus PHY is on */ - u32 phy_id; - u8 mac_addr[6]; + char bus_id[BUS_ID_SIZE]; phy_interface_t interface; }; @@ -61,17 +56,6 @@ struct gianfar_mdio_data { int irq[32]; }; -/* Flags related to gianfar device features */ -#define FSL_GIANFAR_DEV_HAS_GIGABIT 0x00000001 -#define FSL_GIANFAR_DEV_HAS_COALESCE 0x00000002 -#define FSL_GIANFAR_DEV_HAS_RMON 0x00000004 -#define FSL_GIANFAR_DEV_HAS_MULTI_INTR 0x00000008 -#define FSL_GIANFAR_DEV_HAS_CSUM 0x00000010 -#define FSL_GIANFAR_DEV_HAS_VLAN 0x00000020 -#define FSL_GIANFAR_DEV_HAS_EXTENDED_HASH 0x00000040 -#define FSL_GIANFAR_DEV_HAS_PADDING 0x00000080 -#define FSL_GIANFAR_DEV_HAS_MAGIC_PACKET 0x00000100 - /* Flags in gianfar_platform_data */ #define FSL_GIANFAR_BRD_HAS_PHY_INTR 0x00000001 /* set or use a timer */ #define FSL_GIANFAR_BRD_IS_REDUCED 0x00000002 /* Set if RGMII, RMII */ -- cgit v1.1 From 2d91d78b68606ff7ce52ea70e187dee7831aa2f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Wed, 17 Dec 2008 15:47:29 -0800 Subject: Phonet: allocate a non-Ethernet ARP type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also leave some room for more 802.11 types. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/if_arp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index 4d34018..11df77a 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -87,6 +87,8 @@ #define ARPHRD_IEEE80211_PRISM 802 /* IEEE 802.11 + Prism2 header */ #define ARPHRD_IEEE80211_RADIOTAP 803 /* IEEE 802.11 + radiotap header */ +#define ARPHRD_PHONET 820 /* PhoNet media type */ + #define ARPHRD_VOID 0xFFFF /* Void type, nothing is known */ #define ARPHRD_NONE 0xFFFE /* zero header length */ -- cgit v1.1 From 57c81fffc863fb4c1804bc963bcbfb82d736c6df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Wed, 17 Dec 2008 15:47:48 -0800 Subject: Phonet: allocate separate ARP type for GPRS over a Phonet pipe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A separate xmit lock class supports GPRS over a Phonet pipe over a TUN device (type ARPHRD_NONE). Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/if_arp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index 11df77a..5ff8980 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -88,6 +88,7 @@ #define ARPHRD_IEEE80211_RADIOTAP 803 /* IEEE 802.11 + radiotap header */ #define ARPHRD_PHONET 820 /* PhoNet media type */ +#define ARPHRD_PHONET_PIPE 821 /* PhoNet pipe header */ #define ARPHRD_VOID 0xFFFF /* Void type, nothing is known */ #define ARPHRD_NONE 0xFFFE /* zero header length */ -- cgit v1.1 From be677730a0ccb6bedced6f65f2ba8f57a3c607ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Wed, 17 Dec 2008 15:48:31 -0800 Subject: Phonet: use atomic for packet TX window MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GPRS TX flow control won't need to lock the underlying socket anymore. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/pep.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h index fcd7930..4c61cdc 100644 --- a/include/net/phonet/pep.h +++ b/include/net/phonet/pep.h @@ -35,12 +35,12 @@ struct pep_sock { struct sock *listener; struct sk_buff_head ctrlreq_queue; #define PNPIPE_CTRLREQ_MAX 10 + atomic_t tx_credits; int ifindex; u16 peer_type; /* peer type/subtype */ u8 pipe_handle; u8 rx_credits; - u8 tx_credits; u8 rx_fc; /* RX flow control */ u8 tx_fc; /* TX flow control */ u8 init_enable; /* auto-enable at creation */ -- cgit v1.1 From 420e7fabd9c6d907280ed6b3e40eef425c5d8d8d Mon Sep 17 00:00:00 2001 From: Henning Rogge Date: Thu, 11 Dec 2008 22:04:19 +0100 Subject: nl80211: Add signal strength and bandwith to nl80211station info This patch adds signal strength and transmission bitrate to the station_info of nl80211. Signed-off-by: Henning Rogge Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 31 +++++++++++++++++++++++++++++++ include/net/cfg80211.h | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 04d4516..7501acf 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -425,6 +425,32 @@ enum nl80211_sta_flags { }; /** + * enum nl80211_rate_info - bitrate information + * + * These attribute types are used with %NL80211_STA_INFO_TXRATE + * when getting information about the bitrate of a station. + * + * @__NL80211_RATE_INFO_INVALID: attribute number 0 is reserved + * @NL80211_RATE_INFO_BITRATE: total bitrate (u16, 100kbit/s) + * @NL80211_RATE_INFO_MCS: mcs index for 802.11n (u8) + * @NL80211_RATE_INFO_40_MHZ_WIDTH: 40 Mhz dualchannel bitrate + * @NL80211_RATE_INFO_SHORT_GI: 400ns guard interval + * @NL80211_RATE_INFO_MAX: highest rate_info number currently defined + * @__NL80211_RATE_INFO_AFTER_LAST: internal use + */ +enum nl80211_rate_info { + __NL80211_RATE_INFO_INVALID, + NL80211_RATE_INFO_BITRATE, + NL80211_RATE_INFO_MCS, + NL80211_RATE_INFO_40_MHZ_WIDTH, + NL80211_RATE_INFO_SHORT_GI, + + /* keep last */ + __NL80211_RATE_INFO_AFTER_LAST, + NL80211_RATE_INFO_MAX = __NL80211_RATE_INFO_AFTER_LAST - 1 +}; + +/** * enum nl80211_sta_info - station information * * These attribute types are used with %NL80211_ATTR_STA_INFO @@ -436,6 +462,9 @@ enum nl80211_sta_flags { * @NL80211_STA_INFO_TX_BYTES: total transmitted bytes (u32, to this station) * @__NL80211_STA_INFO_AFTER_LAST: internal * @NL80211_STA_INFO_MAX: highest possible station info attribute + * @NL80211_STA_INFO_SIGNAL: signal strength of last received PPDU (u8, dBm) + * @NL80211_STA_INFO_TX_BITRATE: current unicast tx rate, nested attribute + * containing info as possible, see &enum nl80211_sta_info_txrate. */ enum nl80211_sta_info { __NL80211_STA_INFO_INVALID, @@ -445,6 +474,8 @@ enum nl80211_sta_info { NL80211_STA_INFO_LLID, NL80211_STA_INFO_PLID, NL80211_STA_INFO_PLINK_STATE, + NL80211_STA_INFO_SIGNAL, + NL80211_STA_INFO_TX_BITRATE, /* keep last */ __NL80211_STA_INFO_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a0c0bf1..65e03ac 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -169,6 +169,9 @@ struct station_parameters { * @STATION_INFO_LLID: @llid filled * @STATION_INFO_PLID: @plid filled * @STATION_INFO_PLINK_STATE: @plink_state filled + * @STATION_INFO_SIGNAL: @signal filled + * @STATION_INFO_TX_BITRATE: @tx_bitrate fields are filled + * (tx_bitrate, tx_bitrate_flags and tx_bitrate_mcs) */ enum station_info_flags { STATION_INFO_INACTIVE_TIME = 1<<0, @@ -177,6 +180,39 @@ enum station_info_flags { STATION_INFO_LLID = 1<<3, STATION_INFO_PLID = 1<<4, STATION_INFO_PLINK_STATE = 1<<5, + STATION_INFO_SIGNAL = 1<<6, + STATION_INFO_TX_BITRATE = 1<<7, +}; + +/** + * enum station_info_rate_flags - bitrate info flags + * + * Used by the driver to indicate the specific rate transmission + * type for 802.11n transmissions. + * + * @RATE_INFO_FLAGS_MCS: @tx_bitrate_mcs filled + * @RATE_INFO_FLAGS_40_MHZ_WIDTH: 40 Mhz width transmission + * @RATE_INFO_FLAGS_SHORT_GI: 400ns guard interval + */ +enum rate_info_flags { + RATE_INFO_FLAGS_MCS = 1<<0, + RATE_INFO_FLAGS_40_MHZ_WIDTH = 1<<1, + RATE_INFO_FLAGS_SHORT_GI = 1<<2, +}; + +/** + * struct rate_info - bitrate information + * + * Information about a receiving or transmitting bitrate + * + * @flags: bitflag of flags from &enum rate_info_flags + * @mcs: mcs index if struct describes a 802.11n bitrate + * @legacy: bitrate in 100kbit/s for 802.11abg + */ +struct rate_info { + u8 flags; + u8 mcs; + u16 legacy; }; /** @@ -191,6 +227,8 @@ enum station_info_flags { * @llid: mesh local link id * @plid: mesh peer link id * @plink_state: mesh peer link state + * @signal: signal strength of last received packet in dBm + * @txrate: current unicast bitrate to this station */ struct station_info { u32 filled; @@ -200,6 +238,8 @@ struct station_info { u16 llid; u16 plid; u8 plink_state; + s8 signal; + struct rate_info txrate; }; /** -- cgit v1.1 From 094d05dc32fc2930e381189a942016e5561775d9 Mon Sep 17 00:00:00 2001 From: Sujith Date: Fri, 12 Dec 2008 11:57:43 +0530 Subject: mac80211: Fix HT channel selection HT management is done differently for AP and STA modes, unify to just the ->config() callback since HT is fundamentally a PHY property and cannot be per-BSS. Rename enum nl80211_sec_chan_offset as nl80211_channel_type to denote the channel type ( NO_HT, HT20, HT40+, HT40- ). Signed-off-by: Johannes Berg Signed-off-by: Sujith Signed-off-by: John W. Linville --- include/linux/nl80211.h | 22 +++++++++++----------- include/net/cfg80211.h | 2 +- include/net/mac80211.h | 9 +-------- 3 files changed, 13 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 7501acf..e86ed59 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -201,13 +201,13 @@ enum nl80211_commands { * @NL80211_ATTR_WIPHY_NAME: wiphy name (used for renaming) * @NL80211_ATTR_WIPHY_TXQ_PARAMS: a nested array of TX queue parameters * @NL80211_ATTR_WIPHY_FREQ: frequency of the selected channel in MHz - * @NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET: included with NL80211_ATTR_WIPHY_FREQ + * @NL80211_ATTR_WIPHY_CHANNEL_TYPE: included with NL80211_ATTR_WIPHY_FREQ * if HT20 or HT40 are allowed (i.e., 802.11n disabled if not included): - * NL80211_SEC_CHAN_NO_HT = HT not allowed (i.e., same as not including + * NL80211_CHAN_NO_HT = HT not allowed (i.e., same as not including * this attribute) - * NL80211_SEC_CHAN_DISABLED = HT20 only - * NL80211_SEC_CHAN_BELOW = secondary channel is below the primary channel - * NL80211_SEC_CHAN_ABOVE = secondary channel is above the primary channel + * NL80211_CHAN_HT20 = HT20 only + * NL80211_CHAN_HT40MINUS = secondary channel is below the primary channel + * NL80211_CHAN_HT40PLUS = secondary channel is above the primary channel * * @NL80211_ATTR_IFINDEX: network interface index of the device to operate on * @NL80211_ATTR_IFNAME: network interface name @@ -344,7 +344,7 @@ enum nl80211_attrs { NL80211_ATTR_WIPHY_TXQ_PARAMS, NL80211_ATTR_WIPHY_FREQ, - NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET, + NL80211_ATTR_WIPHY_CHANNEL_TYPE, /* add attributes here, update the policy in nl80211.c */ @@ -805,10 +805,10 @@ enum nl80211_txq_q { NL80211_TXQ_Q_BK }; -enum nl80211_sec_chan_offset { - NL80211_SEC_CHAN_NO_HT /* No HT */, - NL80211_SEC_CHAN_DISABLED /* HT20 only */, - NL80211_SEC_CHAN_BELOW /* HT40- */, - NL80211_SEC_CHAN_ABOVE /* HT40+ */ +enum nl80211_channel_type { + NL80211_CHAN_NO_HT, + NL80211_CHAN_HT20, + NL80211_CHAN_HT40MINUS, + NL80211_CHAN_HT40PLUS }; #endif /* __LINUX_NL80211_H */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 65e03ac..23c0ab7 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -563,7 +563,7 @@ struct cfg80211_ops { int (*set_channel)(struct wiphy *wiphy, struct ieee80211_channel *chan, - enum nl80211_sec_chan_offset); + enum nl80211_channel_type channel_type); }; /* temporary wext handlers */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 046ce69..22ae72c 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -165,14 +165,9 @@ enum ieee80211_bss_change { /** * struct ieee80211_bss_ht_conf - BSS's changing HT configuration - * @secondary_channel_offset: secondary channel offset, uses - * %IEEE80211_HT_PARAM_CHA_SEC_ values - * @width_40_ok: indicates that 40 MHz bandwidth may be used for TX * @operation_mode: HT operation mode (like in &struct ieee80211_ht_info) */ struct ieee80211_bss_ht_conf { - u8 secondary_channel_offset; - bool width_40_ok; u16 operation_mode; }; @@ -508,9 +503,7 @@ static inline int __deprecated __IEEE80211_CONF_SHORT_SLOT_TIME(void) struct ieee80211_ht_conf { bool enabled; - int sec_chan_offset; /* 0 = HT40 disabled; -1 = HT40 enabled, secondary - * channel below primary; 1 = HT40 enabled, - * secondary channel above primary */ + enum nl80211_channel_type channel_type; }; /** -- cgit v1.1 From 0fb8ca45eb164c405eef8978f26829f9348b4d4d Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Fri, 12 Dec 2008 14:38:33 +0200 Subject: mac80211: Add HT rates into RX status reporting This patch adds option for HT-enabled drivers to report HT rates (HT20/HT40, short GI, MCS index) to mac80211. These rates are currently not in the rate table, so the rate_idx is used to indicate MCS index. Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- include/net/mac80211.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 22ae72c..9428d3e 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -436,6 +436,9 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * is valid. This is useful in monitor mode and necessary for beacon frames * to enable IBSS merging. * @RX_FLAG_SHORTPRE: Short preamble was used for this frame + * @RX_FLAG_HT: HT MCS was used and rate_idx is MCS index + * @RX_FLAG_40MHZ: HT40 (40 MHz) was used + * @RX_FLAG_SHORT_GI: Short guard interval was used */ enum mac80211_rx_flags { RX_FLAG_MMIC_ERROR = 1<<0, @@ -446,7 +449,10 @@ enum mac80211_rx_flags { RX_FLAG_FAILED_FCS_CRC = 1<<5, RX_FLAG_FAILED_PLCP_CRC = 1<<6, RX_FLAG_TSFT = 1<<7, - RX_FLAG_SHORTPRE = 1<<8 + RX_FLAG_SHORTPRE = 1<<8, + RX_FLAG_HT = 1<<9, + RX_FLAG_40MHZ = 1<<10, + RX_FLAG_SHORT_GI = 1<<11, }; /** @@ -466,7 +472,8 @@ enum mac80211_rx_flags { * @noise: noise when receiving this frame, in dBm. * @qual: overall signal quality indication, in percent (0-100). * @antenna: antenna used - * @rate_idx: index of data rate into band's supported rates + * @rate_idx: index of data rate into band's supported rates or MCS index if + * HT rates are use (RX_FLAG_HT) * @flag: %RX_FLAG_* */ struct ieee80211_rx_status { -- cgit v1.1 From 520eb82076993b7f55ef9b80771d264272e5127b Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Thu, 18 Dec 2008 23:35:27 +0200 Subject: mac80211: implement dynamic power save This patch implements dynamic power save for mac80211. Basically it means enabling power save mode after an idle period. Implementing it dynamically gives a good compromise of low power consumption and low latency. Some hardware have support for this in firmware, but some require the host to do it. The dynamic power save is implemented by adding an timeout to ieee80211_subif_start_xmit(). The timeout can be enabled from userspace with Wireless Extensions. For example, the command below enables the dynamic power save and sets the time timeout to 500 ms: iwconfig wlan0 power timeout 500m Power save now only works with devices which handle power save in firmware. It's also disabled by default and the heuristics when and how to enable is considered as a policy decision and will be left for the userspace to handle. In case the firmware has support for this, drivers can disable this feature with IEEE80211_HW_NO_STACK_DYNAMIC_PS. Big thanks to Johannes Berg for the help with the design and code. Signed-off-by: Kalle Valo Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 9428d3e..b3bd00a 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -854,6 +854,11 @@ enum ieee80211_tkip_key_type { * * @IEEE80211_HW_AMPDU_AGGREGATION: * Hardware supports 11n A-MPDU aggregation. + * + * @IEEE80211_HW_NO_STACK_DYNAMIC_PS: + * Hardware which has dynamic power save support, meaning + * that power save is enabled in idle periods, and don't need support + * from stack. */ enum ieee80211_hw_flags { IEEE80211_HW_RX_INCLUDES_FCS = 1<<1, @@ -866,6 +871,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_NOISE_DBM = 1<<8, IEEE80211_HW_SPECTRUM_MGMT = 1<<9, IEEE80211_HW_AMPDU_AGGREGATION = 1<<10, + IEEE80211_HW_NO_STACK_DYNAMIC_PS = 1<<11, }; /** -- cgit v1.1 From 1486a61ebcd2711532f8163d30babc40e11e7b40 Mon Sep 17 00:00:00 2001 From: Don Skidmore Date: Sun, 21 Dec 2008 20:09:50 -0800 Subject: net: fix DCB setstate to return success/failure Data Center Bridging (DCB) had no way to know if setstate had failed in the driver. This patch enables dcb netlink code to handle the status for the DCB setstate interface. Likewise it allows the driver to return a failed status if MSI-X isn't enabled. Signed-off-by: Don Skidmore Signed-off-by: Eric W Multanen Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- include/net/dcbnl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h index 91e0a3d..775cfc8 100644 --- a/include/net/dcbnl.h +++ b/include/net/dcbnl.h @@ -26,7 +26,7 @@ */ struct dcbnl_rtnl_ops { u8 (*getstate)(struct net_device *); - void (*setstate)(struct net_device *, u8); + u8 (*setstate)(struct net_device *, u8); void (*getpermhwaddr)(struct net_device *, u8 *); void (*setpgtccfgtx)(struct net_device *, int, u8, u8, u8, u8); void (*setpgbwgcfgtx)(struct net_device *, int, u8); -- cgit v1.1 From f4314e815e87b4ab1c9b1115dd5853cd20ca999c Mon Sep 17 00:00:00 2001 From: Don Skidmore Date: Sun, 21 Dec 2008 20:10:29 -0800 Subject: net: add DCNA attribute to the BCN interface for DCB Adds the Backward Congestion Notification Address (BCNA) attribute to the Backward Congestion Notification (BCN) interface for Data Center Bridging (DCB), which was missing. Receive the BCNA attribute in the ixgbe driver. The BCNA attribute is for a switch to inform the endstation about the physical port identification in order to support BCN on aggregated links. Signed-off-by: Don Skidmore Signed-off-by: Eric W Multanen Signed-off-by: Jeff Kirsher --- include/linux/dcbnl.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h index e73a614..b0ef274 100644 --- a/include/linux/dcbnl.h +++ b/include/linux/dcbnl.h @@ -305,6 +305,8 @@ enum dcbnl_bcn_attrs{ DCB_BCN_ATTR_RP_7, DCB_BCN_ATTR_RP_ALL, + DCB_BCN_ATTR_BCNA_0, + DCB_BCN_ATTR_BCNA_1, DCB_BCN_ATTR_ALPHA, DCB_BCN_ATTR_BETA, DCB_BCN_ATTR_GD, -- cgit v1.1 From 908a7a16b852ffd618a9127be8d62432182d81b4 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 22 Dec 2008 20:43:12 -0800 Subject: net: Remove unused netdev arg from some NAPI interfaces. When the napi api was changed to separate its 1:1 binding to the net_device struct, the netif_rx_[prep|schedule|complete] api failed to remove the now vestigual net_device structure parameter. This patch cleans up that api by properly removing it.. Signed-off-by: Neil Horman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 58856b6..41e1224 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1555,8 +1555,7 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits) } /* Test if receive needs to be scheduled but only if up */ -static inline int netif_rx_schedule_prep(struct net_device *dev, - struct napi_struct *napi) +static inline int netif_rx_schedule_prep(struct napi_struct *napi) { return napi_schedule_prep(napi); } @@ -1564,27 +1563,24 @@ static inline int netif_rx_schedule_prep(struct net_device *dev, /* Add interface to tail of rx poll list. This assumes that _prep has * already been called and returned 1. */ -static inline void __netif_rx_schedule(struct net_device *dev, - struct napi_struct *napi) +static inline void __netif_rx_schedule(struct napi_struct *napi) { __napi_schedule(napi); } /* Try to reschedule poll. Called by irq handler. */ -static inline void netif_rx_schedule(struct net_device *dev, - struct napi_struct *napi) +static inline void netif_rx_schedule(struct napi_struct *napi) { - if (netif_rx_schedule_prep(dev, napi)) - __netif_rx_schedule(dev, napi); + if (netif_rx_schedule_prep(napi)) + __netif_rx_schedule(napi); } /* Try to reschedule poll. Called by dev->poll() after netif_rx_complete(). */ -static inline int netif_rx_reschedule(struct net_device *dev, - struct napi_struct *napi) +static inline int netif_rx_reschedule(struct napi_struct *napi) { if (napi_schedule_prep(napi)) { - __netif_rx_schedule(dev, napi); + __netif_rx_schedule(napi); return 1; } return 0; @@ -1593,8 +1589,7 @@ static inline int netif_rx_reschedule(struct net_device *dev, /* same as netif_rx_complete, except that local_irq_save(flags) * has already been issued */ -static inline void __netif_rx_complete(struct net_device *dev, - struct napi_struct *napi) +static inline void __netif_rx_complete(struct napi_struct *napi) { __napi_complete(napi); } @@ -1604,8 +1599,7 @@ static inline void __netif_rx_complete(struct net_device *dev, * it completes the work. The device cannot be out of poll list at this * moment, it is BUG(). */ -static inline void netif_rx_complete(struct net_device *dev, - struct napi_struct *napi) +static inline void netif_rx_complete(struct napi_struct *napi) { napi_complete(napi); } -- cgit v1.1 From aea3c5c05d2c409e93bfa80dcedc06af7da6c13b Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 25 Dec 2008 16:57:24 -0800 Subject: sctp: Implement socket option SCTP_GET_ASSOC_NUMBER Implement socket option SCTP_GET_ASSOC_NUMBER of the latest ietf socket extensions API draft. 8.2.5. Get the Current Number of Associations (SCTP_GET_ASSOC_NUMBER) This option gets the current number of associations that are attached to a one-to-many style socket. The option value is an uint32_t. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/sctp/user.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h index f205b10..b259fc5 100644 --- a/include/net/sctp/user.h +++ b/include/net/sctp/user.h @@ -118,6 +118,8 @@ enum sctp_optname { #define SCTP_PEER_AUTH_CHUNKS SCTP_PEER_AUTH_CHUNKS SCTP_LOCAL_AUTH_CHUNKS, /* Read only */ #define SCTP_LOCAL_AUTH_CHUNKS SCTP_LOCAL_AUTH_CHUNKS + SCTP_GET_ASSOC_NUMBER, /* Read only */ +#define SCTP_GET_ASSOC_NUMBER SCTP_GET_ASSOC_NUMBER /* Internal Socket Options. Some of the sctp library functions are -- cgit v1.1 From 619e803d3c1b7bcc17c45e81f309d0b9b3df2d5d Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Thu, 25 Dec 2008 17:21:17 -0800 Subject: netlink: fix (theoretical) overrun in message iteration See commit 1045b03e07d85f3545118510a587035536030c1c ("netlink: fix overrun in attribute iteration") for a detailed explanation of why this patch is necessary. In short, nlmsg_next() can make "remaining" go negative, and the remaining >= sizeof(...) comparison will promote "remaining" to an unsigned type, which means that the expression will evaluate to true for negative numbers, even though it was not intended. I put "theoretical" in the title because I have no evidence that this can actually happen, but I suspect that a crafted netlink packet can trigger some badness. Note that the last test, which seemingly has the exact same problem (also true for nla_ok()), is perfectly OK, since we already know that remaining is positive. Signed-off-by: Vegard Nossum Signed-off-by: David S. Miller --- include/net/netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netlink.h b/include/net/netlink.h index 46b7764..8a6150a 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -332,7 +332,7 @@ static inline int nlmsg_attrlen(const struct nlmsghdr *nlh, int hdrlen) */ static inline int nlmsg_ok(const struct nlmsghdr *nlh, int remaining) { - return (remaining >= sizeof(struct nlmsghdr) && + return (remaining >= (int) sizeof(struct nlmsghdr) && nlh->nlmsg_len >= sizeof(struct nlmsghdr) && nlh->nlmsg_len <= remaining); } -- cgit v1.1