diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/datagram.c | 6 | ||||
-rw-r--r-- | net/core/dev.c | 62 | ||||
-rw-r--r-- | net/core/dev_addr_lists.c | 97 | ||||
-rw-r--r-- | net/core/drop_monitor.c | 56 | ||||
-rw-r--r-- | net/core/ethtool.c | 130 | ||||
-rw-r--r-- | net/core/fib_rules.c | 32 | ||||
-rw-r--r-- | net/core/filter.c | 72 | ||||
-rw-r--r-- | net/core/gen_stats.c | 3 | ||||
-rw-r--r-- | net/core/kmap_skb.h | 19 | ||||
-rw-r--r-- | net/core/neighbour.c | 128 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 13 | ||||
-rw-r--r-- | net/core/net_namespace.c | 6 | ||||
-rw-r--r-- | net/core/netprio_cgroup.c | 6 | ||||
-rw-r--r-- | net/core/pktgen.c | 68 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 359 | ||||
-rw-r--r-- | net/core/skbuff.c | 446 | ||||
-rw-r--r-- | net/core/sock.c | 95 | ||||
-rw-r--r-- | net/core/sock_diag.c | 12 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 15 | ||||
-rw-r--r-- | net/core/utils.c | 9 |
20 files changed, 1139 insertions, 495 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c index e4fbfd6..ae6acf6 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -65,7 +65,7 @@ static inline int connection_based(struct sock *sk) return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM; } -static int receiver_wake_function(wait_queue_t *wait, unsigned mode, int sync, +static int receiver_wake_function(wait_queue_t *wait, unsigned int mode, int sync, void *key) { unsigned long bits = (unsigned long)key; @@ -158,7 +158,7 @@ out_noerr: * quite explicitly by POSIX 1003.1g, don't change them without having * the standard around please. */ -struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, +struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, int *peeked, int *off, int *err) { struct sk_buff *skb; @@ -216,7 +216,7 @@ no_packet: } EXPORT_SYMBOL(__skb_recv_datagram); -struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, +struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, int noblock, int *err) { int peeked, off = 0; diff --git a/net/core/dev.c b/net/core/dev.c index 99e1d75..cd09819 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -208,7 +208,8 @@ static inline void dev_base_seq_inc(struct net *net) static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) { - unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); + unsigned int hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); + return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)]; } @@ -299,10 +300,9 @@ static const unsigned short netdev_lock_type[] = ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE, ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET, ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, - ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211, - ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, - ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154, - ARPHRD_VOID, ARPHRD_NONE}; + ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM, + ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE, + ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE}; static const char *const netdev_lock_name[] = {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", @@ -317,10 +317,9 @@ static const char *const netdev_lock_name[] = "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE", "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET", "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", - "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211", - "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", - "_xmit_PHONET_PIPE", "_xmit_IEEE802154", - "_xmit_VOID", "_xmit_NONE"}; + "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM", + "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE", + "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"}; static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; @@ -1676,10 +1675,9 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) if (skb_network_header(skb2) < skb2->data || skb2->network_header > skb2->tail) { - if (net_ratelimit()) - pr_crit("protocol %04x is buggy, dev %s\n", - ntohs(skb2->protocol), - dev->name); + net_crit_ratelimited("protocol %04x is buggy, dev %s\n", + ntohs(skb2->protocol), + dev->name); skb_reset_network_header(skb2); } @@ -2316,11 +2314,9 @@ EXPORT_SYMBOL(__skb_tx_hash); static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) { if (unlikely(queue_index >= dev->real_num_tx_queues)) { - if (net_ratelimit()) { - pr_warn("%s selects TX queue %d, but real number of TX queues is %d\n", - dev->name, queue_index, - dev->real_num_tx_queues); - } + net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n", + dev->name, queue_index, + dev->real_num_tx_queues); return 0; } return queue_index; @@ -2562,17 +2558,15 @@ int dev_queue_xmit(struct sk_buff *skb) } } HARD_TX_UNLOCK(dev, txq); - if (net_ratelimit()) - pr_crit("Virtual device %s asks to queue packet!\n", - dev->name); + net_crit_ratelimited("Virtual device %s asks to queue packet!\n", + dev->name); } else { /* Recursion is detected! It is possible, * unfortunately */ recursion_alert: - if (net_ratelimit()) - pr_crit("Dead loop on virtual device %s, fix it urgently!\n", - dev->name); + net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n", + dev->name); } } @@ -3053,9 +3047,8 @@ static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq) struct Qdisc *q; if (unlikely(MAX_RED_LOOP < ttl++)) { - if (net_ratelimit()) - pr_warn("Redir loop detected Dropping packet (%d->%d)\n", - skb->skb_iif, dev->ifindex); + net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n", + skb->skb_iif, dev->ifindex); return TC_ACT_SHOT; } @@ -3515,10 +3508,16 @@ gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) break; case GRO_DROP: - case GRO_MERGED_FREE: kfree_skb(skb); break; + case GRO_MERGED_FREE: + if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) + kmem_cache_free(skbuff_head_cache, skb); + else + __kfree_skb(skb); + break; + case GRO_HELD: case GRO_MERGED: break; @@ -3603,7 +3602,7 @@ gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, } EXPORT_SYMBOL(napi_frags_finish); -struct sk_buff *napi_frags_skb(struct napi_struct *napi) +static struct sk_buff *napi_frags_skb(struct napi_struct *napi) { struct sk_buff *skb = napi->skb; struct ethhdr *eth; @@ -3638,7 +3637,6 @@ struct sk_buff *napi_frags_skb(struct napi_struct *napi) out: return skb; } -EXPORT_SYMBOL(napi_frags_skb); gro_result_t napi_gro_frags(struct napi_struct *napi) { @@ -4592,9 +4590,9 @@ void dev_set_rx_mode(struct net_device *dev) * * Get the combination of flag bits exported through APIs to userspace. */ -unsigned dev_get_flags(const struct net_device *dev) +unsigned int dev_get_flags(const struct net_device *dev) { - unsigned flags; + unsigned int flags; flags = (dev->flags & ~(IFF_PROMISC | IFF_ALLMULTI | diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index 626698f..c4cc2bc 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -21,12 +21,35 @@ * General list handling functions */ +static int __hw_addr_create_ex(struct netdev_hw_addr_list *list, + unsigned char *addr, int addr_len, + unsigned char addr_type, bool global) +{ + struct netdev_hw_addr *ha; + int alloc_size; + + alloc_size = sizeof(*ha); + if (alloc_size < L1_CACHE_BYTES) + alloc_size = L1_CACHE_BYTES; + ha = kmalloc(alloc_size, GFP_ATOMIC); + if (!ha) + return -ENOMEM; + memcpy(ha->addr, addr, addr_len); + ha->type = addr_type; + ha->refcount = 1; + ha->global_use = global; + ha->synced = false; + list_add_tail_rcu(&ha->list, &list->list); + list->count++; + + return 0; +} + static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, unsigned char *addr, int addr_len, unsigned char addr_type, bool global) { struct netdev_hw_addr *ha; - int alloc_size; if (addr_len > MAX_ADDR_LEN) return -EINVAL; @@ -46,21 +69,7 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, } } - - alloc_size = sizeof(*ha); - if (alloc_size < L1_CACHE_BYTES) - alloc_size = L1_CACHE_BYTES; - ha = kmalloc(alloc_size, GFP_ATOMIC); - if (!ha) - return -ENOMEM; - memcpy(ha->addr, addr, addr_len); - ha->type = addr_type; - ha->refcount = 1; - ha->global_use = global; - ha->synced = false; - list_add_tail_rcu(&ha->list, &list->list); - list->count++; - return 0; + return __hw_addr_create_ex(list, addr, addr_len, addr_type, global); } static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr, @@ -377,6 +386,34 @@ EXPORT_SYMBOL(dev_addr_del_multiple); */ /** + * dev_uc_add_excl - Add a global secondary unicast address + * @dev: device + * @addr: address to add + */ +int dev_uc_add_excl(struct net_device *dev, unsigned char *addr) +{ + struct netdev_hw_addr *ha; + int err; + + netif_addr_lock_bh(dev); + list_for_each_entry(ha, &dev->uc.list, list) { + if (!memcmp(ha->addr, addr, dev->addr_len) && + ha->type == NETDEV_HW_ADDR_T_UNICAST) { + err = -EEXIST; + goto out; + } + } + err = __hw_addr_create_ex(&dev->uc, addr, dev->addr_len, + NETDEV_HW_ADDR_T_UNICAST, true); + if (!err) + __dev_set_rx_mode(dev); +out: + netif_addr_unlock_bh(dev); + return err; +} +EXPORT_SYMBOL(dev_uc_add_excl); + +/** * dev_uc_add - Add a secondary unicast address * @dev: device * @addr: address to add @@ -501,6 +538,34 @@ EXPORT_SYMBOL(dev_uc_init); * Multicast list handling functions */ +/** + * dev_mc_add_excl - Add a global secondary multicast address + * @dev: device + * @addr: address to add + */ +int dev_mc_add_excl(struct net_device *dev, unsigned char *addr) +{ + struct netdev_hw_addr *ha; + int err; + + netif_addr_lock_bh(dev); + list_for_each_entry(ha, &dev->mc.list, list) { + if (!memcmp(ha->addr, addr, dev->addr_len) && + ha->type == NETDEV_HW_ADDR_T_MULTICAST) { + err = -EEXIST; + goto out; + } + } + err = __hw_addr_create_ex(&dev->mc, addr, dev->addr_len, + NETDEV_HW_ADDR_T_MULTICAST, true); + if (!err) + __dev_set_rx_mode(dev); +out: + netif_addr_unlock_bh(dev); + return err; +} +EXPORT_SYMBOL(dev_mc_add_excl); + static int __dev_mc_add(struct net_device *dev, unsigned char *addr, bool global) { diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index a7cad74..3252e7e 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -4,6 +4,8 @@ * Copyright (C) 2009 Neil Horman <nhorman@tuxdriver.com> */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/string.h> @@ -22,6 +24,7 @@ #include <linux/timer.h> #include <linux/bitops.h> #include <linux/slab.h> +#include <linux/module.h> #include <net/genetlink.h> #include <net/netevent.h> @@ -261,9 +264,15 @@ static int set_all_monitor_traces(int state) switch (state) { case TRACE_ON: + if (!try_module_get(THIS_MODULE)) { + rc = -ENODEV; + break; + } + rc |= register_trace_kfree_skb(trace_kfree_skb_hit, NULL); rc |= register_trace_napi_poll(trace_napi_poll_hit, NULL); break; + case TRACE_OFF: rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL); rc |= unregister_trace_napi_poll(trace_napi_poll_hit, NULL); @@ -279,6 +288,9 @@ static int set_all_monitor_traces(int state) kfree_rcu(new_stat, rcu); } } + + module_put(THIS_MODULE); + break; default: rc = 1; @@ -381,10 +393,10 @@ static int __init init_net_drop_monitor(void) struct per_cpu_dm_data *data; int cpu, rc; - printk(KERN_INFO "Initializing network drop monitor service\n"); + pr_info("Initializing network drop monitor service\n"); if (sizeof(void *) > 8) { - printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n"); + pr_err("Unable to store program counters on this arch, Drop monitor failed\n"); return -ENOSPC; } @@ -392,19 +404,19 @@ static int __init init_net_drop_monitor(void) dropmon_ops, ARRAY_SIZE(dropmon_ops)); if (rc) { - printk(KERN_ERR "Could not create drop monitor netlink family\n"); + pr_err("Could not create drop monitor netlink family\n"); return rc; } rc = register_netdevice_notifier(&dropmon_net_notifier); if (rc < 0) { - printk(KERN_CRIT "Failed to register netdevice notifier\n"); + pr_crit("Failed to register netdevice notifier\n"); goto out_unreg; } rc = 0; - for_each_present_cpu(cpu) { + for_each_possible_cpu(cpu) { data = &per_cpu(dm_cpu_data, cpu); data->cpu = cpu; INIT_WORK(&data->dm_alert_work, send_dm_alert); @@ -423,4 +435,36 @@ out: return rc; } -late_initcall(init_net_drop_monitor); +static void exit_net_drop_monitor(void) +{ + struct per_cpu_dm_data *data; + int cpu; + + BUG_ON(unregister_netdevice_notifier(&dropmon_net_notifier)); + + /* + * Because of the module_get/put we do in the trace state change path + * we are guarnateed not to have any current users when we get here + * all we need to do is make sure that we don't have any running timers + * or pending schedule calls + */ + + for_each_possible_cpu(cpu) { + data = &per_cpu(dm_cpu_data, cpu); + del_timer_sync(&data->send_timer); + cancel_work_sync(&data->dm_alert_work); + /* + * At this point, we should have exclusive access + * to this struct and can free the skb inside it + */ + kfree_skb(data->skb); + } + + BUG_ON(genl_unregister_family(&net_drop_monitor_family)); +} + +module_init(init_net_drop_monitor); +module_exit(exit_net_drop_monitor); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Neil Horman <nhorman@tuxdriver.com>"); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 6d6d7d2..9c2afb4 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -17,6 +17,8 @@ #include <linux/errno.h> #include <linux/ethtool.h> #include <linux/netdevice.h> +#include <linux/net_tstamp.h> +#include <linux/phy.h> #include <linux/bitops.h> #include <linux/uaccess.h> #include <linux/vmalloc.h> @@ -36,6 +38,17 @@ u32 ethtool_op_get_link(struct net_device *dev) } EXPORT_SYMBOL(ethtool_op_get_link); +int ethtool_op_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) +{ + info->so_timestamping = + SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE; + info->phc_index = -1; + return 0; +} +EXPORT_SYMBOL(ethtool_op_get_ts_info); + /* Handlers for each ethtool command */ #define ETHTOOL_DEV_FEATURE_WORDS ((NETDEV_FEATURE_COUNT + 31) / 32) @@ -738,18 +751,17 @@ static int ethtool_get_link(struct net_device *dev, char __user *useraddr) return 0; } -static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr) +static int ethtool_get_any_eeprom(struct net_device *dev, void __user *useraddr, + int (*getter)(struct net_device *, + struct ethtool_eeprom *, u8 *), + u32 total_len) { struct ethtool_eeprom eeprom; - const struct ethtool_ops *ops = dev->ethtool_ops; void __user *userbuf = useraddr + sizeof(eeprom); u32 bytes_remaining; u8 *data; int ret = 0; - if (!ops->get_eeprom || !ops->get_eeprom_len) - return -EOPNOTSUPP; - if (copy_from_user(&eeprom, useraddr, sizeof(eeprom))) return -EFAULT; @@ -758,7 +770,7 @@ static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr) return -EINVAL; /* Check for exceeding total eeprom len */ - if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev)) + if (eeprom.offset + eeprom.len > total_len) return -EINVAL; data = kmalloc(PAGE_SIZE, GFP_USER); @@ -769,7 +781,7 @@ static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr) while (bytes_remaining > 0) { eeprom.len = min(bytes_remaining, (u32)PAGE_SIZE); - ret = ops->get_eeprom(dev, &eeprom, data); + ret = getter(dev, &eeprom, data); if (ret) break; if (copy_to_user(userbuf, data, eeprom.len)) { @@ -790,6 +802,17 @@ static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr) return ret; } +static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + + if (!ops->get_eeprom || !ops->get_eeprom_len) + return -EOPNOTSUPP; + + return ethtool_get_any_eeprom(dev, useraddr, ops->get_eeprom, + ops->get_eeprom_len(dev)); +} + static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr) { struct ethtool_eeprom eeprom; @@ -1278,6 +1301,81 @@ out: return ret; } +static int ethtool_get_ts_info(struct net_device *dev, void __user *useraddr) +{ + int err = 0; + struct ethtool_ts_info info; + const struct ethtool_ops *ops = dev->ethtool_ops; + struct phy_device *phydev = dev->phydev; + + memset(&info, 0, sizeof(info)); + info.cmd = ETHTOOL_GET_TS_INFO; + + if (phydev && phydev->drv && phydev->drv->ts_info) { + + err = phydev->drv->ts_info(phydev, &info); + + } else if (dev->ethtool_ops && dev->ethtool_ops->get_ts_info) { + + err = ops->get_ts_info(dev, &info); + + } else { + info.so_timestamping = + SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE; + info.phc_index = -1; + } + + if (err) + return err; + + if (copy_to_user(useraddr, &info, sizeof(info))) + err = -EFAULT; + + return err; +} + +static int ethtool_get_module_info(struct net_device *dev, + void __user *useraddr) +{ + int ret; + struct ethtool_modinfo modinfo; + const struct ethtool_ops *ops = dev->ethtool_ops; + + if (!ops->get_module_info) + return -EOPNOTSUPP; + + if (copy_from_user(&modinfo, useraddr, sizeof(modinfo))) + return -EFAULT; + + ret = ops->get_module_info(dev, &modinfo); + if (ret) + return ret; + + if (copy_to_user(useraddr, &modinfo, sizeof(modinfo))) + return -EFAULT; + + return 0; +} + +static int ethtool_get_module_eeprom(struct net_device *dev, + void __user *useraddr) +{ + int ret; + struct ethtool_modinfo modinfo; + const struct ethtool_ops *ops = dev->ethtool_ops; + + if (!ops->get_module_info || !ops->get_module_eeprom) + return -EOPNOTSUPP; + + ret = ops->get_module_info(dev, &modinfo); + if (ret) + return ret; + + return ethtool_get_any_eeprom(dev, useraddr, ops->get_module_eeprom, + modinfo.eeprom_len); +} + /* The main entry point in this file. Called from net/core/dev.c */ int dev_ethtool(struct net *net, struct ifreq *ifr) @@ -1295,11 +1393,13 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) return -EFAULT; if (!dev->ethtool_ops) { - /* ETHTOOL_GDRVINFO does not require any driver support. - * It is also unprivileged and does not change anything, - * so we can take a shortcut to it. */ + /* A few commands do not require any driver support, + * are unprivileged, and do not change anything, so we + * can take a shortcut to them. */ if (ethcmd == ETHTOOL_GDRVINFO) return ethtool_get_drvinfo(dev, useraddr); + else if (ethcmd == ETHTOOL_GET_TS_INFO) + return ethtool_get_ts_info(dev, useraddr); else return -EOPNOTSUPP; } @@ -1330,6 +1430,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GRXCLSRULE: case ETHTOOL_GRXCLSRLALL: case ETHTOOL_GFEATURES: + case ETHTOOL_GET_TS_INFO: break; default: if (!capable(CAP_NET_ADMIN)) @@ -1496,6 +1597,15 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GET_DUMP_DATA: rc = ethtool_get_dump_data(dev, useraddr); break; + case ETHTOOL_GET_TS_INFO: + rc = ethtool_get_ts_info(dev, useraddr); + break; + case ETHTOOL_GMODULEINFO: + rc = ethtool_get_module_info(dev, useraddr); + break; + case ETHTOOL_GMODULEEEPROM: + rc = ethtool_get_module_eeprom(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index c02e63c..72cceb7 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -542,7 +542,8 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, frh = nlmsg_data(nlh); frh->family = ops->family; frh->table = rule->table; - NLA_PUT_U32(skb, FRA_TABLE, rule->table); + if (nla_put_u32(skb, FRA_TABLE, rule->table)) + goto nla_put_failure; frh->res1 = 0; frh->res2 = 0; frh->action = rule->action; @@ -553,31 +554,28 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, frh->flags |= FIB_RULE_UNRESOLVED; if (rule->iifname[0]) { - NLA_PUT_STRING(skb, FRA_IIFNAME, rule->iifname); - + if (nla_put_string(skb, FRA_IIFNAME, rule->iifname)) + goto nla_put_failure; if (rule->iifindex == -1) frh->flags |= FIB_RULE_IIF_DETACHED; } if (rule->oifname[0]) { - NLA_PUT_STRING(skb, FRA_OIFNAME, rule->oifname); - + if (nla_put_string(skb, FRA_OIFNAME, rule->oifname)) + goto nla_put_failure; if (rule->oifindex == -1) frh->flags |= FIB_RULE_OIF_DETACHED; } - if (rule->pref) - NLA_PUT_U32(skb, FRA_PRIORITY, rule->pref); - - if (rule->mark) - NLA_PUT_U32(skb, FRA_FWMARK, rule->mark); - - if (rule->mark_mask || rule->mark) - NLA_PUT_U32(skb, FRA_FWMASK, rule->mark_mask); - - if (rule->target) - NLA_PUT_U32(skb, FRA_GOTO, rule->target); - + if ((rule->pref && + nla_put_u32(skb, FRA_PRIORITY, rule->pref)) || + (rule->mark && + nla_put_u32(skb, FRA_FWMARK, rule->mark)) || + ((rule->mark_mask || rule->mark) && + nla_put_u32(skb, FRA_FWMASK, rule->mark_mask)) || + (rule->target && + nla_put_u32(skb, FRA_GOTO, rule->target))) + goto nla_put_failure; if (ops->fill(rule, skb, frh) < 0) goto nla_put_failure; diff --git a/net/core/filter.c b/net/core/filter.c index 6f755cc..47a5f05 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -317,6 +317,9 @@ load_b: case BPF_S_ANC_CPU: A = raw_smp_processor_id(); continue; + case BPF_S_ANC_ALU_XOR_X: + A ^= X; + continue; case BPF_S_ANC_NLATTR: { struct nlattr *nla; @@ -528,7 +531,7 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) * Compare this with conditional jumps below, * where offsets are limited. --ANK (981016) */ - if (ftest->k >= (unsigned)(flen-pc-1)) + if (ftest->k >= (unsigned int)(flen-pc-1)) return -EINVAL; break; case BPF_S_JMP_JEQ_K: @@ -561,6 +564,7 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) ANCILLARY(HATYPE); ANCILLARY(RXHASH); ANCILLARY(CPU); + ANCILLARY(ALU_XOR_X); } } ftest->code = code; @@ -589,6 +593,67 @@ void sk_filter_release_rcu(struct rcu_head *rcu) } EXPORT_SYMBOL(sk_filter_release_rcu); +static int __sk_prepare_filter(struct sk_filter *fp) +{ + int err; + + fp->bpf_func = sk_run_filter; + + err = sk_chk_filter(fp->insns, fp->len); + if (err) + return err; + + bpf_jit_compile(fp); + return 0; +} + +/** + * sk_unattached_filter_create - create an unattached filter + * @fprog: the filter program + * @sk: the socket to use + * + * Create a filter independent ofr any socket. We first run some + * sanity checks on it to make sure it does not explode on us later. + * If an error occurs or there is insufficient memory for the filter + * a negative errno code is returned. On success the return is zero. + */ +int sk_unattached_filter_create(struct sk_filter **pfp, + struct sock_fprog *fprog) +{ + struct sk_filter *fp; + unsigned int fsize = sizeof(struct sock_filter) * fprog->len; + int err; + + /* Make sure new filter is there and in the right amounts. */ + if (fprog->filter == NULL) + return -EINVAL; + + fp = kmalloc(fsize + sizeof(*fp), GFP_KERNEL); + if (!fp) + return -ENOMEM; + memcpy(fp->insns, fprog->filter, fsize); + + atomic_set(&fp->refcnt, 1); + fp->len = fprog->len; + + err = __sk_prepare_filter(fp); + if (err) + goto free_mem; + + *pfp = fp; + return 0; +free_mem: + kfree(fp); + return err; +} +EXPORT_SYMBOL_GPL(sk_unattached_filter_create); + +void sk_unattached_filter_destroy(struct sk_filter *fp) +{ + sk_filter_release(fp); +} +EXPORT_SYMBOL_GPL(sk_unattached_filter_destroy); + /** * sk_attach_filter - attach a socket filter * @fprog: the filter program @@ -619,16 +684,13 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) atomic_set(&fp->refcnt, 1); fp->len = fprog->len; - fp->bpf_func = sk_run_filter; - err = sk_chk_filter(fp->insns, fp->len); + err = __sk_prepare_filter(fp); if (err) { sk_filter_uncharge(sk, fp); return err; } - bpf_jit_compile(fp); - old_fp = rcu_dereference_protected(sk->sk_filter, sock_owned_by_user(sk)); rcu_assign_pointer(sk->sk_filter, fp); diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index 0452eb2..ddedf21 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -27,7 +27,8 @@ static inline int gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size) { - NLA_PUT(d->skb, type, size, buf); + if (nla_put(d->skb, type, size, buf)) + goto nla_put_failure; return 0; nla_put_failure: diff --git a/net/core/kmap_skb.h b/net/core/kmap_skb.h deleted file mode 100644 index 52d0a44..0000000 --- a/net/core/kmap_skb.h +++ /dev/null @@ -1,19 +0,0 @@ -#include <linux/highmem.h> - -static inline void *kmap_skb_frag(const skb_frag_t *frag) -{ -#ifdef CONFIG_HIGHMEM - BUG_ON(in_irq()); - - local_bh_disable(); -#endif - return kmap_atomic(skb_frag_page(frag)); -} - -static inline void kunmap_skb_frag(void *vaddr) -{ - kunmap_atomic(vaddr); -#ifdef CONFIG_HIGHMEM - local_bh_enable(); -#endif -} diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 0a68045..eb09f8b 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -15,6 +15,8 @@ * Harald Welte Add neighbour cache statistics like rtstat */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/slab.h> #include <linux/types.h> #include <linux/kernel.h> @@ -712,14 +714,13 @@ void neigh_destroy(struct neighbour *neigh) NEIGH_CACHE_STAT_INC(neigh->tbl, destroys); if (!neigh->dead) { - printk(KERN_WARNING - "Destroying alive neighbour %p\n", neigh); + pr_warn("Destroying alive neighbour %p\n", neigh); dump_stack(); return; } if (neigh_del_timer(neigh)) - printk(KERN_WARNING "Impossible event.\n"); + pr_warn("Impossible event\n"); skb_queue_purge(&neigh->arp_queue); neigh->arp_queue_len_bytes = 0; @@ -890,7 +891,7 @@ static void neigh_timer_handler(unsigned long arg) { unsigned long now, next; struct neighbour *neigh = (struct neighbour *)arg; - unsigned state; + unsigned int state; int notify = 0; write_lock(&neigh->lock); @@ -1500,7 +1501,7 @@ static void neigh_parms_destroy(struct neigh_parms *parms) static struct lock_class_key neigh_table_proxy_queue_class; -void neigh_table_init_no_netlink(struct neigh_table *tbl) +static void neigh_table_init_no_netlink(struct neigh_table *tbl) { unsigned long now = jiffies; unsigned long phsize; @@ -1538,7 +1539,6 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) tbl->last_flush = now; tbl->last_rand = now + tbl->parms.reachable_time * 20; } -EXPORT_SYMBOL(neigh_table_init_no_netlink); void neigh_table_init(struct neigh_table *tbl) { @@ -1555,8 +1555,8 @@ void neigh_table_init(struct neigh_table *tbl) write_unlock(&neigh_tbl_lock); if (unlikely(tmp)) { - printk(KERN_ERR "NEIGH: Registering multiple tables for " - "family %d\n", tbl->family); + pr_err("Registering multiple tables for family %d\n", + tbl->family); dump_stack(); } } @@ -1572,7 +1572,7 @@ int neigh_table_clear(struct neigh_table *tbl) pneigh_queue_purge(&tbl->proxy_queue); neigh_ifdown(tbl, NULL); if (atomic_read(&tbl->entries)) - printk(KERN_CRIT "neighbour leakage\n"); + pr_crit("neighbour leakage\n"); write_lock(&neigh_tbl_lock); for (tp = &neigh_tables; *tp; tp = &(*tp)->next) { if (*tp == tbl) { @@ -1768,29 +1768,29 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) if (nest == NULL) return -ENOBUFS; - if (parms->dev) - NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex); - - NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)); - NLA_PUT_U32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes); - /* approximative value for deprecated QUEUE_LEN (in packets) */ - NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, - DIV_ROUND_UP(parms->queue_len_bytes, - SKB_TRUESIZE(ETH_FRAME_LEN))); - NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen); - NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes); - NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes); - NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes); - NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time); - NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME, - parms->base_reachable_time); - NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime); - NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time); - NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time); - NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay); - NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay); - NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime); - + if ((parms->dev && + nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) || + nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) || + nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) || + /* approximative value for deprecated QUEUE_LEN (in packets) */ + nla_put_u32(skb, NDTPA_QUEUE_LEN, + DIV_ROUND_UP(parms->queue_len_bytes, + SKB_TRUESIZE(ETH_FRAME_LEN))) || + nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) || + nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) || + nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) || + nla_put_u32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes) || + nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) || + nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME, + parms->base_reachable_time) || + nla_put_msecs(skb, NDTPA_GC_STALETIME, parms->gc_staletime) || + nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME, + parms->delay_probe_time) || + nla_put_msecs(skb, NDTPA_RETRANS_TIME, parms->retrans_time) || + nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay) || + nla_put_msecs(skb, NDTPA_PROXY_DELAY, parms->proxy_delay) || + nla_put_msecs(skb, NDTPA_LOCKTIME, parms->locktime)) + goto nla_put_failure; return nla_nest_end(skb, nest); nla_put_failure: @@ -1815,12 +1815,12 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, ndtmsg->ndtm_pad1 = 0; ndtmsg->ndtm_pad2 = 0; - NLA_PUT_STRING(skb, NDTA_NAME, tbl->id); - NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval); - NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1); - NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2); - NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3); - + if (nla_put_string(skb, NDTA_NAME, tbl->id) || + nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) || + nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) || + nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) || + nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3)) + goto nla_put_failure; { unsigned long now = jiffies; unsigned int flush_delta = now - tbl->last_flush; @@ -1841,7 +1841,8 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1); rcu_read_unlock_bh(); - NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc); + if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc)) + goto nla_put_failure; } { @@ -1866,7 +1867,8 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, ndst.ndts_forced_gc_runs += st->forced_gc_runs; } - NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst); + if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst)) + goto nla_put_failure; } BUG_ON(tbl->parms.dev); @@ -2137,7 +2139,8 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, ndm->ndm_type = neigh->type; ndm->ndm_ifindex = neigh->dev->ifindex; - NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key); + if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key)) + goto nla_put_failure; read_lock_bh(&neigh->lock); ndm->ndm_state = neigh->nud_state; @@ -2157,8 +2160,9 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1; read_unlock_bh(&neigh->lock); - NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes)); - NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci); + if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) || + nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) + goto nla_put_failure; return nlmsg_end(skb, nlh); @@ -2187,7 +2191,8 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, ndm->ndm_ifindex = pn->dev->ifindex; ndm->ndm_state = NUD_NONE; - NLA_PUT(skb, NDA_DST, tbl->key_len, pn->key); + if (nla_put(skb, NDA_DST, tbl->key_len, pn->key)) + goto nla_put_failure; return nlmsg_end(skb, nlh); @@ -2795,7 +2800,6 @@ enum { static struct neigh_sysctl_table { struct ctl_table_header *sysctl_header; struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1]; - char *dev_name; } neigh_sysctl_template __read_mostly = { .neigh_vars = { [NEIGH_VAR_MCAST_PROBE] = { @@ -2921,19 +2925,7 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, { struct neigh_sysctl_table *t; const char *dev_name_source = NULL; - -#define NEIGH_CTL_PATH_ROOT 0 -#define NEIGH_CTL_PATH_PROTO 1 -#define NEIGH_CTL_PATH_NEIGH 2 -#define NEIGH_CTL_PATH_DEV 3 - - struct ctl_path neigh_path[] = { - { .procname = "net", }, - { .procname = "proto", }, - { .procname = "neigh", }, - { .procname = "default", }, - { }, - }; + char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ]; t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL); if (!t) @@ -2961,7 +2953,7 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0, sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL])); } else { - dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname; + dev_name_source = "default"; t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1); t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1; t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2; @@ -2984,23 +2976,16 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev; } - t->dev_name = kstrdup(dev_name_source, GFP_KERNEL); - if (!t->dev_name) - goto free; - - neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name; - neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name; - + snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s", + p_name, dev_name_source); t->sysctl_header = - register_net_sysctl_table(neigh_parms_net(p), neigh_path, t->neigh_vars); + register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars); if (!t->sysctl_header) - goto free_procname; + goto free; p->sysctl_table = t; return 0; -free_procname: - kfree(t->dev_name); free: kfree(t); err: @@ -3013,8 +2998,7 @@ void neigh_sysctl_unregister(struct neigh_parms *p) if (p->sysctl_table) { struct neigh_sysctl_table *t = p->sysctl_table; p->sysctl_table = NULL; - unregister_sysctl_table(t->sysctl_header); - kfree(t->dev_name); + unregister_net_sysctl_table(t->sysctl_header); kfree(t); } } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 4955862..fdf9e61 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -74,15 +74,14 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, int (*set)(struct net_device *, unsigned long)) { struct net_device *net = to_net_dev(dev); - char *endp; unsigned long new; int ret = -EINVAL; if (!capable(CAP_NET_ADMIN)) return -EPERM; - new = simple_strtoul(buf, &endp, 0); - if (endp == buf) + ret = kstrtoul(buf, 0, &new); + if (ret) goto err; if (!rtnl_trylock()) @@ -232,7 +231,7 @@ NETDEVICE_SHOW(flags, fmt_hex); static int change_flags(struct net_device *net, unsigned long new_flags) { - return dev_change_flags(net, (unsigned) new_flags); + return dev_change_flags(net, (unsigned int) new_flags); } static ssize_t store_flags(struct device *dev, struct device_attribute *attr, @@ -582,7 +581,7 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue, return err; } - map = kzalloc(max_t(unsigned, + map = kzalloc(max_t(unsigned int, RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES), GFP_KERNEL); if (!map) { @@ -903,7 +902,7 @@ static ssize_t bql_set_hold_time(struct netdev_queue *queue, const char *buf, size_t len) { struct dql *dql = &queue->dql; - unsigned value; + unsigned int value; int err; err = kstrtouint(buf, 10, &value); @@ -1107,7 +1106,7 @@ static ssize_t store_xps_map(struct netdev_queue *queue, return err; } - new_dev_maps = kzalloc(max_t(unsigned, + new_dev_maps = kzalloc(max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES), GFP_KERNEL); if (!new_dev_maps) { free_cpumask_var(mask); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 31a5ae5..dddbacb 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -1,3 +1,5 @@ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/workqueue.h> #include <linux/rtnetlink.h> #include <linux/cache.h> @@ -212,8 +214,8 @@ static void net_free(struct net *net) { #ifdef NETNS_REFCNT_DEBUG if (unlikely(atomic_read(&net->use_count) != 0)) { - printk(KERN_EMERG "network namespace not free! Usage: %d\n", - atomic_read(&net->use_count)); + pr_emerg("network namespace not free! Usage: %d\n", + atomic_read(&net->use_count)); return; } #endif diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index ba6900f..09eda68 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -9,6 +9,8 @@ * Authors: Neil Horman <nhorman@tuxdriver.com> */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/slab.h> #include <linux/types.h> @@ -88,7 +90,7 @@ static void extend_netdev_table(struct net_device *dev, u32 new_len) old_priomap = rtnl_dereference(dev->priomap); if (!new_priomap) { - printk(KERN_WARNING "Unable to alloc new priomap!\n"); + pr_warn("Unable to alloc new priomap!\n"); return; } @@ -136,7 +138,7 @@ static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp) ret = get_prioidx(&cs->prioidx); if (ret != 0) { - printk(KERN_WARNING "No space in priority index array\n"); + pr_warn("No space in priority index array\n"); kfree(cs); return ERR_PTR(ret); } diff --git a/net/core/pktgen.c b/net/core/pktgen.c index b81369b..cce9e53 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -320,7 +320,7 @@ struct pktgen_dev { (see RFC 3260, sec. 4) */ /* MPLS */ - unsigned nr_labels; /* Depth of stack, 0 = no MPLS */ + unsigned int nr_labels; /* Depth of stack, 0 = no MPLS */ __be32 labels[MAX_MPLS_LABELS]; /* VLAN/SVLAN (802.1Q/Q-in-Q) */ @@ -373,10 +373,10 @@ struct pktgen_dev { */ char odevname[32]; struct flow_state *flows; - unsigned cflows; /* Concurrent flows (config) */ - unsigned lflow; /* Flow length (config) */ - unsigned nflows; /* accumulated flows (stats) */ - unsigned curfl; /* current sequenced flow (state)*/ + unsigned int cflows; /* Concurrent flows (config) */ + unsigned int lflow; /* Flow length (config) */ + unsigned int nflows; /* accumulated flows (stats) */ + unsigned int curfl; /* current sequenced flow (state)*/ u16 queue_map_min; u16 queue_map_max; @@ -592,7 +592,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v) pkt_dev->src_mac_count, pkt_dev->dst_mac_count); if (pkt_dev->nr_labels) { - unsigned i; + unsigned int i; seq_printf(seq, " mpls: "); for (i = 0; i < pkt_dev->nr_labels; i++) seq_printf(seq, "%08x%s", ntohl(pkt_dev->labels[i]), @@ -812,7 +812,7 @@ done_str: static ssize_t get_labels(const char __user *buffer, struct pktgen_dev *pkt_dev) { - unsigned n = 0; + unsigned int n = 0; char c; ssize_t i = 0; int len; @@ -891,8 +891,8 @@ static ssize_t pktgen_if_write(struct file *file, if (copy_from_user(tb, user_buffer, copy)) return -EFAULT; tb[copy] = 0; - printk(KERN_DEBUG "pktgen: %s,%lu buffer -:%s:-\n", name, - (unsigned long)count, tb); + pr_debug("%s,%lu buffer -:%s:-\n", + name, (unsigned long)count, tb); } if (!strcmp(name, "min_pkt_size")) { @@ -1261,8 +1261,7 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->cur_daddr = pkt_dev->daddr_min; } if (debug) - printk(KERN_DEBUG "pktgen: dst_min set to: %s\n", - pkt_dev->dst_min); + pr_debug("dst_min set to: %s\n", pkt_dev->dst_min); i += len; sprintf(pg_result, "OK: dst_min=%s", pkt_dev->dst_min); return count; @@ -1284,8 +1283,7 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->cur_daddr = pkt_dev->daddr_max; } if (debug) - printk(KERN_DEBUG "pktgen: dst_max set to: %s\n", - pkt_dev->dst_max); + pr_debug("dst_max set to: %s\n", pkt_dev->dst_max); i += len; sprintf(pg_result, "OK: dst_max=%s", pkt_dev->dst_max); return count; @@ -1307,7 +1305,7 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->cur_in6_daddr = pkt_dev->in6_daddr; if (debug) - printk(KERN_DEBUG "pktgen: dst6 set to: %s\n", buf); + pr_debug("dst6 set to: %s\n", buf); i += len; sprintf(pg_result, "OK: dst6=%s", buf); @@ -1329,7 +1327,7 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->cur_in6_daddr = pkt_dev->min_in6_daddr; if (debug) - printk(KERN_DEBUG "pktgen: dst6_min set to: %s\n", buf); + pr_debug("dst6_min set to: %s\n", buf); i += len; sprintf(pg_result, "OK: dst6_min=%s", buf); @@ -1350,7 +1348,7 @@ static ssize_t pktgen_if_write(struct file *file, snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->max_in6_daddr); if (debug) - printk(KERN_DEBUG "pktgen: dst6_max set to: %s\n", buf); + pr_debug("dst6_max set to: %s\n", buf); i += len; sprintf(pg_result, "OK: dst6_max=%s", buf); @@ -1373,7 +1371,7 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->cur_in6_saddr = pkt_dev->in6_saddr; if (debug) - printk(KERN_DEBUG "pktgen: src6 set to: %s\n", buf); + pr_debug("src6 set to: %s\n", buf); i += len; sprintf(pg_result, "OK: src6=%s", buf); @@ -1394,8 +1392,7 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->cur_saddr = pkt_dev->saddr_min; } if (debug) - printk(KERN_DEBUG "pktgen: src_min set to: %s\n", - pkt_dev->src_min); + pr_debug("src_min set to: %s\n", pkt_dev->src_min); i += len; sprintf(pg_result, "OK: src_min=%s", pkt_dev->src_min); return count; @@ -1415,8 +1412,7 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->cur_saddr = pkt_dev->saddr_max; } if (debug) - printk(KERN_DEBUG "pktgen: src_max set to: %s\n", - pkt_dev->src_max); + pr_debug("src_max set to: %s\n", pkt_dev->src_max); i += len; sprintf(pg_result, "OK: src_max=%s", pkt_dev->src_max); return count; @@ -1510,7 +1506,7 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "mpls")) { - unsigned n, cnt; + unsigned int n, cnt; len = get_labels(&user_buffer[i], pkt_dev); if (len < 0) @@ -1527,7 +1523,7 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->svlan_id = 0xffff; if (debug) - printk(KERN_DEBUG "pktgen: VLAN/SVLAN auto turned off\n"); + pr_debug("VLAN/SVLAN auto turned off\n"); } return count; } @@ -1542,10 +1538,10 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->vlan_id = value; /* turn on VLAN */ if (debug) - printk(KERN_DEBUG "pktgen: VLAN turned on\n"); + pr_debug("VLAN turned on\n"); if (debug && pkt_dev->nr_labels) - printk(KERN_DEBUG "pktgen: MPLS auto turned off\n"); + pr_debug("MPLS auto turned off\n"); pkt_dev->nr_labels = 0; /* turn off MPLS */ sprintf(pg_result, "OK: vlan_id=%u", pkt_dev->vlan_id); @@ -1554,7 +1550,7 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->svlan_id = 0xffff; if (debug) - printk(KERN_DEBUG "pktgen: VLAN/SVLAN turned off\n"); + pr_debug("VLAN/SVLAN turned off\n"); } return count; } @@ -1599,10 +1595,10 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->svlan_id = value; /* turn on SVLAN */ if (debug) - printk(KERN_DEBUG "pktgen: SVLAN turned on\n"); + pr_debug("SVLAN turned on\n"); if (debug && pkt_dev->nr_labels) - printk(KERN_DEBUG "pktgen: MPLS auto turned off\n"); + pr_debug("MPLS auto turned off\n"); pkt_dev->nr_labels = 0; /* turn off MPLS */ sprintf(pg_result, "OK: svlan_id=%u", pkt_dev->svlan_id); @@ -1611,7 +1607,7 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->svlan_id = 0xffff; if (debug) - printk(KERN_DEBUG "pktgen: VLAN/SVLAN turned off\n"); + pr_debug("VLAN/SVLAN turned off\n"); } return count; } @@ -1779,8 +1775,7 @@ static ssize_t pktgen_thread_write(struct file *file, i += len; if (debug) - printk(KERN_DEBUG "pktgen: t=%s, count=%lu\n", - name, (unsigned long)count); + pr_debug("t=%s, count=%lu\n", name, (unsigned long)count); if (!t) { pr_err("ERROR: No thread\n"); @@ -2324,7 +2319,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) } if (pkt_dev->flags & F_MPLS_RND) { - unsigned i; + unsigned int i; for (i = 0; i < pkt_dev->nr_labels; i++) if (pkt_dev->labels[i] & MPLS_STACK_BOTTOM) pkt_dev->labels[i] = MPLS_STACK_BOTTOM | @@ -2550,7 +2545,7 @@ err: static void mpls_push(__be32 *mpls, struct pktgen_dev *pkt_dev) { - unsigned i; + unsigned int i; for (i = 0; i < pkt_dev->nr_labels; i++) *mpls++ = pkt_dev->labels[i] & ~MPLS_STACK_BOTTOM; @@ -2934,8 +2929,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, if (datalen < sizeof(struct pktgen_hdr)) { datalen = sizeof(struct pktgen_hdr); - if (net_ratelimit()) - pr_info("increased datalen to %d\n", datalen); + net_info_ratelimited("increased datalen to %d\n", datalen); } udph->source = htons(pkt_dev->cur_udp_src); @@ -3365,8 +3359,8 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->errors++; break; default: /* Drivers are not supposed to return other values! */ - if (net_ratelimit()) - pr_info("%s xmit error: %d\n", pkt_dev->odevname, ret); + net_info_ratelimited("%s xmit error: %d\n", + pkt_dev->odevname, ret); pkt_dev->errors++; /* fallthru */ case NETDEV_TX_LOCKED: diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 90430b7..21318d1 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -35,7 +35,9 @@ #include <linux/security.h> #include <linux/mutex.h> #include <linux/if_addr.h> +#include <linux/if_bridge.h> #include <linux/pci.h> +#include <linux/etherdevice.h> #include <asm/uaccess.h> @@ -552,7 +554,7 @@ void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data } EXPORT_SYMBOL(__rta_fill); -int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo) +int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int group, int echo) { struct sock *rtnl = net->rtnl; int err = 0; @@ -607,7 +609,8 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) for (i = 0; i < RTAX_MAX; i++) { if (metrics[i]) { valid++; - NLA_PUT_U32(skb, i+1, metrics[i]); + if (nla_put_u32(skb, i+1, metrics[i])) + goto nla_put_failure; } } @@ -782,6 +785,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_MTU */ + nla_total_size(4) /* IFLA_LINK */ + nla_total_size(4) /* IFLA_MASTER */ + + nla_total_size(4) /* IFLA_PROMISCUITY */ + nla_total_size(1) /* IFLA_OPERSTATE */ + nla_total_size(1) /* IFLA_LINKMODE */ + nla_total_size(ext_filter_mask @@ -807,7 +811,8 @@ static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev) vf_port = nla_nest_start(skb, IFLA_VF_PORT); if (!vf_port) goto nla_put_failure; - NLA_PUT_U32(skb, IFLA_PORT_VF, vf); + if (nla_put_u32(skb, IFLA_PORT_VF, vf)) + goto nla_put_failure; err = dev->netdev_ops->ndo_get_vf_port(dev, vf, skb); if (err == -EMSGSIZE) goto nla_put_failure; @@ -891,25 +896,23 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, ifm->ifi_flags = dev_get_flags(dev); ifm->ifi_change = change; - NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name); - NLA_PUT_U32(skb, IFLA_TXQLEN, dev->tx_queue_len); - NLA_PUT_U8(skb, IFLA_OPERSTATE, - netif_running(dev) ? dev->operstate : IF_OPER_DOWN); - NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode); - NLA_PUT_U32(skb, IFLA_MTU, dev->mtu); - NLA_PUT_U32(skb, IFLA_GROUP, dev->group); - - if (dev->ifindex != dev->iflink) - NLA_PUT_U32(skb, IFLA_LINK, dev->iflink); - - if (dev->master) - NLA_PUT_U32(skb, IFLA_MASTER, dev->master->ifindex); - - if (dev->qdisc) - NLA_PUT_STRING(skb, IFLA_QDISC, dev->qdisc->ops->id); - - if (dev->ifalias) - NLA_PUT_STRING(skb, IFLA_IFALIAS, dev->ifalias); + if (nla_put_string(skb, IFLA_IFNAME, dev->name) || + nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) || + nla_put_u8(skb, IFLA_OPERSTATE, + netif_running(dev) ? dev->operstate : IF_OPER_DOWN) || + nla_put_u8(skb, IFLA_LINKMODE, dev->link_mode) || + nla_put_u32(skb, IFLA_MTU, dev->mtu) || + nla_put_u32(skb, IFLA_GROUP, dev->group) || + nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) || + (dev->ifindex != dev->iflink && + nla_put_u32(skb, IFLA_LINK, dev->iflink)) || + (dev->master && + nla_put_u32(skb, IFLA_MASTER, dev->master->ifindex)) || + (dev->qdisc && + nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) || + (dev->ifalias && + nla_put_string(skb, IFLA_IFALIAS, dev->ifalias))) + goto nla_put_failure; if (1) { struct rtnl_link_ifmap map = { @@ -920,12 +923,14 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, .dma = dev->dma, .port = dev->if_port, }; - NLA_PUT(skb, IFLA_MAP, sizeof(map), &map); + if (nla_put(skb, IFLA_MAP, sizeof(map), &map)) + goto nla_put_failure; } if (dev->addr_len) { - NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr); - NLA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast); + if (nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr) || + nla_put(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast)) + goto nla_put_failure; } attr = nla_reserve(skb, IFLA_STATS, @@ -942,8 +947,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, goto nla_put_failure; copy_rtnl_link_stats64(nla_data(attr), stats); - if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF)) - NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent)); + if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF) && + nla_put_u32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent))) + goto nla_put_failure; if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF)) { @@ -986,12 +992,13 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nla_nest_cancel(skb, vfinfo); goto nla_put_failure; } - NLA_PUT(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac); - NLA_PUT(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan); - NLA_PUT(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), - &vf_tx_rate); - NLA_PUT(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk), - &vf_spoofchk); + if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) || + nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) || + nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), + &vf_tx_rate) || + nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk), + &vf_spoofchk)) + goto nla_put_failure; nla_nest_end(skb, vf); } nla_nest_end(skb, vfinfo); @@ -1113,6 +1120,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_PORT_SELF] = { .type = NLA_NESTED }, [IFLA_AF_SPEC] = { .type = NLA_NESTED }, [IFLA_EXT_MASK] = { .type = NLA_U32 }, + [IFLA_PROMISCUITY] = { .type = NLA_U32 }, }; EXPORT_SYMBOL(ifla_policy); @@ -1516,11 +1524,9 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, err = 0; errout: - if (err < 0 && modified && net_ratelimit()) - printk(KERN_WARNING "A link change request failed with " - "some changes committed already. Interface %s may " - "have been left with an inconsistent configuration, " - "please check.\n", dev->name); + if (err < 0 && modified) + net_warn_ratelimited("A link change request failed with some changes committed already. Interface %s may have been left with an inconsistent configuration, please check.\n", + dev->name); if (send_addr_notify) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); @@ -1634,14 +1640,14 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net, int err; struct net_device *dev; unsigned int num_queues = 1; - unsigned int real_num_queues = 1; if (ops->get_tx_queues) { - err = ops->get_tx_queues(src_net, tb, &num_queues, - &real_num_queues); - if (err) + err = ops->get_tx_queues(src_net, tb); + if (err < 0) goto err; + num_queues = err; } + err = -ENOMEM; dev = alloc_netdev_mq(ops->priv_size, ifname, ops->setup, num_queues); if (!dev) @@ -1947,7 +1953,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) +void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change) { struct net *net = dev_net(dev); struct sk_buff *skb; @@ -1972,6 +1978,267 @@ errout: rtnl_set_sk_err(net, RTNLGRP_LINK, err); } +static int nlmsg_populate_fdb_fill(struct sk_buff *skb, + struct net_device *dev, + u8 *addr, u32 pid, u32 seq, + int type, unsigned int flags) +{ + struct nlmsghdr *nlh; + struct ndmsg *ndm; + + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), NLM_F_MULTI); + if (!nlh) + return -EMSGSIZE; + + ndm = nlmsg_data(nlh); + ndm->ndm_family = AF_BRIDGE; + ndm->ndm_pad1 = 0; + ndm->ndm_pad2 = 0; + ndm->ndm_flags = flags; + ndm->ndm_type = 0; + ndm->ndm_ifindex = dev->ifindex; + ndm->ndm_state = NUD_PERMANENT; + + if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr)) + goto nla_put_failure; + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static inline size_t rtnl_fdb_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN); +} + +static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, int type) +{ + struct net *net = dev_net(dev); + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(rtnl_fdb_nlmsg_size(), GFP_ATOMIC); + if (!skb) + goto errout; + + err = nlmsg_populate_fdb_fill(skb, dev, addr, 0, 0, type, NTF_SELF); + if (err < 0) { + kfree_skb(skb); + goto errout; + } + + rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); + return; +errout: + rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); +} + +static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + struct net *net = sock_net(skb->sk); + struct net_device *master = NULL; + struct ndmsg *ndm; + struct nlattr *tb[NDA_MAX+1]; + struct net_device *dev; + u8 *addr; + int err; + + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); + if (err < 0) + return err; + + ndm = nlmsg_data(nlh); + if (ndm->ndm_ifindex == 0) { + pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ifindex\n"); + return -EINVAL; + } + + dev = __dev_get_by_index(net, ndm->ndm_ifindex); + if (dev == NULL) { + pr_info("PF_BRIDGE: RTM_NEWNEIGH with unknown ifindex\n"); + return -ENODEV; + } + + if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) { + pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid address\n"); + return -EINVAL; + } + + addr = nla_data(tb[NDA_LLADDR]); + if (!is_valid_ether_addr(addr)) { + pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ether address\n"); + return -EINVAL; + } + + err = -EOPNOTSUPP; + + /* Support fdb on master device the net/bridge default case */ + if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) && + (dev->priv_flags & IFF_BRIDGE_PORT)) { + master = dev->master; + err = master->netdev_ops->ndo_fdb_add(ndm, dev, addr, + nlh->nlmsg_flags); + if (err) + goto out; + else + ndm->ndm_flags &= ~NTF_MASTER; + } + + /* Embedded bridge, macvlan, and any other device support */ + if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_add) { + err = dev->netdev_ops->ndo_fdb_add(ndm, dev, addr, + nlh->nlmsg_flags); + + if (!err) { + rtnl_fdb_notify(dev, addr, RTM_NEWNEIGH); + ndm->ndm_flags &= ~NTF_SELF; + } + } +out: + return err; +} + +static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + struct net *net = sock_net(skb->sk); + struct ndmsg *ndm; + struct nlattr *llattr; + struct net_device *dev; + int err = -EINVAL; + __u8 *addr; + + if (nlmsg_len(nlh) < sizeof(*ndm)) + return -EINVAL; + + ndm = nlmsg_data(nlh); + if (ndm->ndm_ifindex == 0) { + pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ifindex\n"); + return -EINVAL; + } + + dev = __dev_get_by_index(net, ndm->ndm_ifindex); + if (dev == NULL) { + pr_info("PF_BRIDGE: RTM_DELNEIGH with unknown ifindex\n"); + return -ENODEV; + } + + llattr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_LLADDR); + if (llattr == NULL || nla_len(llattr) != ETH_ALEN) { + pr_info("PF_BRIGDE: RTM_DELNEIGH with invalid address\n"); + return -EINVAL; + } + + addr = nla_data(llattr); + err = -EOPNOTSUPP; + + /* Support fdb on master device the net/bridge default case */ + if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) && + (dev->priv_flags & IFF_BRIDGE_PORT)) { + struct net_device *master = dev->master; + + if (master->netdev_ops->ndo_fdb_del) + err = master->netdev_ops->ndo_fdb_del(ndm, dev, addr); + + if (err) + goto out; + else + ndm->ndm_flags &= ~NTF_MASTER; + } + + /* Embedded bridge, macvlan, and any other device support */ + if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_del) { + err = dev->netdev_ops->ndo_fdb_del(ndm, dev, addr); + + if (!err) { + rtnl_fdb_notify(dev, addr, RTM_DELNEIGH); + ndm->ndm_flags &= ~NTF_SELF; + } + } +out: + return err; +} + +static int nlmsg_populate_fdb(struct sk_buff *skb, + struct netlink_callback *cb, + struct net_device *dev, + int *idx, + struct netdev_hw_addr_list *list) +{ + struct netdev_hw_addr *ha; + int err; + u32 pid, seq; + + pid = NETLINK_CB(cb->skb).pid; + seq = cb->nlh->nlmsg_seq; + + list_for_each_entry(ha, &list->list, list) { + if (*idx < cb->args[0]) + goto skip; + + err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, + pid, seq, 0, NTF_SELF); + if (err < 0) + return err; +skip: + *idx += 1; + } + return 0; +} + +/** + * ndo_dflt_fdb_dump: default netdevice operation to dump an FDB table. + * @nlh: netlink message header + * @dev: netdevice + * + * Default netdevice operation to dump the existing unicast address list. + * Returns zero on success. + */ +int ndo_dflt_fdb_dump(struct sk_buff *skb, + struct netlink_callback *cb, + struct net_device *dev, + int idx) +{ + int err; + + netif_addr_lock_bh(dev); + err = nlmsg_populate_fdb(skb, cb, dev, &idx, &dev->uc); + if (err) + goto out; + nlmsg_populate_fdb(skb, cb, dev, &idx, &dev->mc); +out: + netif_addr_unlock_bh(dev); + return idx; +} +EXPORT_SYMBOL(ndo_dflt_fdb_dump); + +static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int idx = 0; + struct net *net = sock_net(skb->sk); + struct net_device *dev; + + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { + if (dev->priv_flags & IFF_BRIDGE_PORT) { + struct net_device *master = dev->master; + const struct net_device_ops *ops = master->netdev_ops; + + if (ops->ndo_fdb_dump) + idx = ops->ndo_fdb_dump(skb, cb, dev, idx); + } + + if (dev->netdev_ops->ndo_fdb_dump) + idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, idx); + } + rcu_read_unlock(); + + cb->args[0] = idx; + return skb->len; +} + /* Protected by RTNL sempahore. */ static struct rtattr **rta_buf; static int rtattr_max; @@ -2042,7 +2309,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) struct rtattr *attr = (void *)nlh + NLMSG_ALIGN(min_len); while (RTA_OK(attr, attrlen)) { - unsigned flavor = attr->rta_type; + unsigned int flavor = attr->rta_type; if (flavor) { if (flavor > rta_max[sz_idx]) return -EINVAL; @@ -2144,5 +2411,9 @@ void __init rtnetlink_init(void) rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, NULL); rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, NULL); + + rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, NULL); + rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, NULL); + rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e598400..016694d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -36,6 +36,8 @@ * The functions in this file will not compile correctly with gcc 2.4.x */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> @@ -67,10 +69,9 @@ #include <asm/uaccess.h> #include <trace/events/skb.h> +#include <linux/highmem.h> -#include "kmap_skb.h" - -static struct kmem_cache *skbuff_head_cache __read_mostly; +struct kmem_cache *skbuff_head_cache __read_mostly; static struct kmem_cache *skbuff_fclone_cache __read_mostly; static void sock_pipe_buf_release(struct pipe_inode_info *pipe, @@ -119,11 +120,10 @@ static const struct pipe_buf_operations sock_pipe_buf_ops = { */ static void skb_over_panic(struct sk_buff *skb, int sz, void *here) { - printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p " - "data:%p tail:%#lx end:%#lx dev:%s\n", - here, skb->len, sz, skb->head, skb->data, - (unsigned long)skb->tail, (unsigned long)skb->end, - skb->dev ? skb->dev->name : "<NULL>"); + pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n", + __func__, here, skb->len, sz, skb->head, skb->data, + (unsigned long)skb->tail, (unsigned long)skb->end, + skb->dev ? skb->dev->name : "<NULL>"); BUG(); } @@ -138,11 +138,10 @@ static void skb_over_panic(struct sk_buff *skb, int sz, void *here) static void skb_under_panic(struct sk_buff *skb, int sz, void *here) { - printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p " - "data:%p tail:%#lx end:%#lx dev:%s\n", - here, skb->len, sz, skb->head, skb->data, - (unsigned long)skb->tail, (unsigned long)skb->end, - skb->dev ? skb->dev->name : "<NULL>"); + pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n", + __func__, here, skb->len, sz, skb->head, skb->data, + (unsigned long)skb->tail, (unsigned long)skb->end, + skb->dev ? skb->dev->name : "<NULL>"); BUG(); } @@ -246,6 +245,7 @@ EXPORT_SYMBOL(__alloc_skb); /** * build_skb - build a network buffer * @data: data buffer provided by caller + * @frag_size: size of fragment, or 0 if head was kmalloced * * Allocate a new &sk_buff. Caller provides space holding head and * skb_shared_info. @data must have been allocated by kmalloc() @@ -259,20 +259,21 @@ EXPORT_SYMBOL(__alloc_skb); * before giving packet to stack. * RX rings only contains data buffers, not full skbs. */ -struct sk_buff *build_skb(void *data) +struct sk_buff *build_skb(void *data, unsigned int frag_size) { struct skb_shared_info *shinfo; struct sk_buff *skb; - unsigned int size; + unsigned int size = frag_size ? : ksize(data); skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC); if (!skb) return NULL; - size = ksize(data) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); memset(skb, 0, offsetof(struct sk_buff, tail)); skb->truesize = SKB_TRUESIZE(size); + skb->head_frag = frag_size != 0; atomic_set(&skb->users, 1); skb->head = data; skb->data = data; @@ -292,6 +293,46 @@ struct sk_buff *build_skb(void *data) } EXPORT_SYMBOL(build_skb); +struct netdev_alloc_cache { + struct page *page; + unsigned int offset; +}; +static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); + +/** + * netdev_alloc_frag - allocate a page fragment + * @fragsz: fragment size + * + * Allocates a frag from a page for receive buffer. + * Uses GFP_ATOMIC allocations. + */ +void *netdev_alloc_frag(unsigned int fragsz) +{ + struct netdev_alloc_cache *nc; + void *data = NULL; + unsigned long flags; + + local_irq_save(flags); + nc = &__get_cpu_var(netdev_alloc_cache); + if (unlikely(!nc->page)) { +refill: + nc->page = alloc_page(GFP_ATOMIC | __GFP_COLD); + nc->offset = 0; + } + if (likely(nc->page)) { + if (nc->offset + fragsz > PAGE_SIZE) { + put_page(nc->page); + goto refill; + } + data = page_address(nc->page) + nc->offset; + nc->offset += fragsz; + get_page(nc->page); + } + local_irq_restore(flags); + return data; +} +EXPORT_SYMBOL(netdev_alloc_frag); + /** * __netdev_alloc_skb - allocate an skbuff for rx on a specific device * @dev: network device to receive on @@ -306,11 +347,23 @@ EXPORT_SYMBOL(build_skb); * %NULL is returned if there is no free memory. */ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, - unsigned int length, gfp_t gfp_mask) + unsigned int length, gfp_t gfp_mask) { - struct sk_buff *skb; + struct sk_buff *skb = NULL; + unsigned int fragsz = SKB_DATA_ALIGN(length + NET_SKB_PAD) + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + + if (fragsz <= PAGE_SIZE && !(gfp_mask & __GFP_WAIT)) { + void *data = netdev_alloc_frag(fragsz); - skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, NUMA_NO_NODE); + if (likely(data)) { + skb = build_skb(data, fragsz); + if (unlikely(!skb)) + put_page(virt_to_head_page(data)); + } + } else { + skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, NUMA_NO_NODE); + } if (likely(skb)) { skb_reserve(skb, NET_SKB_PAD); skb->dev = dev; @@ -329,28 +382,6 @@ void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, } EXPORT_SYMBOL(skb_add_rx_frag); -/** - * dev_alloc_skb - allocate an skbuff for receiving - * @length: length to allocate - * - * Allocate a new &sk_buff and assign it a usage count of one. The - * buffer has unspecified headroom built in. Users should allocate - * the headroom they think they need without accounting for the - * built in space. The built in space is used for optimisations. - * - * %NULL is returned if there is no free memory. Although this function - * allocates memory it can be called from an interrupt. - */ -struct sk_buff *dev_alloc_skb(unsigned int length) -{ - /* - * There is more code here than it seems: - * __dev_alloc_skb is an inline - */ - return __dev_alloc_skb(length, GFP_ATOMIC); -} -EXPORT_SYMBOL(dev_alloc_skb); - static void skb_drop_list(struct sk_buff **listp) { struct sk_buff *list = *listp; @@ -377,6 +408,14 @@ static void skb_clone_fraglist(struct sk_buff *skb) skb_get(list); } +static void skb_free_head(struct sk_buff *skb) +{ + if (skb->head_frag) + put_page(virt_to_head_page(skb->head)); + else + kfree(skb->head); +} + static void skb_release_data(struct sk_buff *skb) { if (!skb->cloned || @@ -403,7 +442,7 @@ static void skb_release_data(struct sk_buff *skb) if (skb_has_frag_list(skb)) skb_drop_fraglist(skb); - kfree(skb->head); + skb_free_head(skb); } } @@ -645,6 +684,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) C(tail); C(end); C(head); + C(head_frag); C(data); C(truesize); atomic_set(&n->users, 1); @@ -707,10 +747,10 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) } return -ENOMEM; } - vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); + vaddr = kmap_atomic(skb_frag_page(f)); memcpy(page_address(page), vaddr + f->page_offset, skb_frag_size(f)); - kunmap_skb_frag(vaddr); + kunmap_atomic(vaddr); page->private = (unsigned long)head; head = page; } @@ -819,7 +859,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) { int headerlen = skb_headroom(skb); - unsigned int size = (skb_end_pointer(skb) - skb->head) + skb->data_len; + unsigned int size = skb_end_offset(skb) + skb->data_len; struct sk_buff *n = alloc_skb(size, gfp_mask); if (!n) @@ -920,9 +960,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, { int i; u8 *data; - int size = nhead + (skb_end_pointer(skb) - skb->head) + ntail; + int size = nhead + skb_end_offset(skb) + ntail; long off; - bool fastpath; BUG_ON(nhead < 0); @@ -931,27 +970,6 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, size = SKB_DATA_ALIGN(size); - /* Check if we can avoid taking references on fragments if we own - * the last reference on skb->head. (see skb_release_data()) - */ - if (!skb->cloned) - fastpath = true; - else { - int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1; - fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta; - } - - if (fastpath && - size + sizeof(struct skb_shared_info) <= ksize(skb->head)) { - memmove(skb->head + size, skb_shinfo(skb), - offsetof(struct skb_shared_info, - frags[skb_shinfo(skb)->nr_frags])); - memmove(skb->head + nhead, skb->head, - skb_tail_pointer(skb) - skb->head); - off = nhead; - goto adjust_others; - } - data = kmalloc(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), gfp_mask); if (!data) @@ -967,9 +985,12 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb_shinfo(skb), offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags])); - if (fastpath) { - kfree(skb->head); - } else { + /* + * if shinfo is shared we must drop the old head gracefully, but if it + * is not we can just drop the old head and let the existing refcount + * be since all we did is relocate the values + */ + if (skb_cloned(skb)) { /* copy this zero copy skb frags */ if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { if (skb_copy_ubufs(skb, gfp_mask)) @@ -982,11 +1003,13 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb_clone_fraglist(skb); skb_release_data(skb); + } else { + skb_free_head(skb); } off = (data + nhead) - skb->head; skb->head = data; -adjust_others: + skb->head_frag = 0; skb->data += off; #ifdef NET_SKBUFF_DATA_USES_OFFSET skb->end = size; @@ -1275,7 +1298,7 @@ drop_pages: return -ENOMEM; nfrag->next = frag->next; - kfree_skb(frag); + consume_skb(frag); frag = nfrag; *fragp = frag; } @@ -1487,21 +1510,22 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; + skb_frag_t *f = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); - end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); + end = start + skb_frag_size(f); if ((copy = end - offset) > 0) { u8 *vaddr; if (copy > len) copy = len; - vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); + vaddr = kmap_atomic(skb_frag_page(f)); memcpy(to, - vaddr + skb_shinfo(skb)->frags[i].page_offset+ - offset - start, copy); - kunmap_skb_frag(vaddr); + vaddr + f->page_offset + offset - start, + copy); + kunmap_atomic(vaddr); if ((len -= copy) == 0) return 0; @@ -1547,9 +1571,9 @@ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i) put_page(spd->pages[i]); } -static inline struct page *linear_to_page(struct page *page, unsigned int *len, - unsigned int *offset, - struct sk_buff *skb, struct sock *sk) +static struct page *linear_to_page(struct page *page, unsigned int *len, + unsigned int *offset, + struct sk_buff *skb, struct sock *sk) { struct page *p = sk->sk_sndmsg_page; unsigned int off; @@ -1565,6 +1589,9 @@ new_page: } else { unsigned int mlen; + /* If we are the only user of the page, we can reset offset */ + if (page_count(p) == 1) + sk->sk_sndmsg_off = 0; off = sk->sk_sndmsg_off; mlen = PAGE_SIZE - off; if (mlen < 64 && mlen < *len) { @@ -1578,36 +1605,48 @@ new_page: memcpy(page_address(p) + off, page_address(page) + *offset, *len); sk->sk_sndmsg_off += *len; *offset = off; - get_page(p); return p; } +static bool spd_can_coalesce(const struct splice_pipe_desc *spd, + struct page *page, + unsigned int offset) +{ + return spd->nr_pages && + spd->pages[spd->nr_pages - 1] == page && + (spd->partial[spd->nr_pages - 1].offset + + spd->partial[spd->nr_pages - 1].len == offset); +} + /* * Fill page/offset/length into spd, if it can hold more pages. */ -static inline int spd_fill_page(struct splice_pipe_desc *spd, - struct pipe_inode_info *pipe, struct page *page, - unsigned int *len, unsigned int offset, - struct sk_buff *skb, int linear, - struct sock *sk) +static bool spd_fill_page(struct splice_pipe_desc *spd, + struct pipe_inode_info *pipe, struct page *page, + unsigned int *len, unsigned int offset, + struct sk_buff *skb, bool linear, + struct sock *sk) { - if (unlikely(spd->nr_pages == pipe->buffers)) - return 1; + if (unlikely(spd->nr_pages == MAX_SKB_FRAGS)) + return true; if (linear) { page = linear_to_page(page, len, &offset, skb, sk); if (!page) - return 1; - } else - get_page(page); - + return true; + } + if (spd_can_coalesce(spd, page, offset)) { + spd->partial[spd->nr_pages - 1].len += *len; + return false; + } + get_page(page); spd->pages[spd->nr_pages] = page; spd->partial[spd->nr_pages].len = *len; spd->partial[spd->nr_pages].offset = offset; spd->nr_pages++; - return 0; + return false; } static inline void __segment_seek(struct page **page, unsigned int *poff, @@ -1624,20 +1663,20 @@ static inline void __segment_seek(struct page **page, unsigned int *poff, *plen -= off; } -static inline int __splice_segment(struct page *page, unsigned int poff, - unsigned int plen, unsigned int *off, - unsigned int *len, struct sk_buff *skb, - struct splice_pipe_desc *spd, int linear, - struct sock *sk, - struct pipe_inode_info *pipe) +static bool __splice_segment(struct page *page, unsigned int poff, + unsigned int plen, unsigned int *off, + unsigned int *len, struct sk_buff *skb, + struct splice_pipe_desc *spd, bool linear, + struct sock *sk, + struct pipe_inode_info *pipe) { if (!*len) - return 1; + return true; /* skip this segment if already processed */ if (*off >= plen) { *off -= plen; - return 0; + return false; } /* ignore any bits we already processed */ @@ -1653,34 +1692,38 @@ static inline int __splice_segment(struct page *page, unsigned int poff, flen = min_t(unsigned int, flen, PAGE_SIZE - poff); if (spd_fill_page(spd, pipe, page, &flen, poff, skb, linear, sk)) - return 1; + return true; __segment_seek(&page, &poff, &plen, flen); *len -= flen; } while (*len && plen); - return 0; + return false; } /* - * Map linear and fragment data from the skb to spd. It reports failure if the + * Map linear and fragment data from the skb to spd. It reports true if the * pipe is full or if we already spliced the requested length. */ -static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, - unsigned int *offset, unsigned int *len, - struct splice_pipe_desc *spd, struct sock *sk) +static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, + unsigned int *offset, unsigned int *len, + struct splice_pipe_desc *spd, struct sock *sk) { int seg; - /* - * map the linear part + /* map the linear part : + * If skb->head_frag is set, this 'linear' part is backed by a + * fragment, and if the head is not shared with any clones then + * we can avoid a copy since we own the head portion of this page. */ if (__splice_segment(virt_to_page(skb->data), (unsigned long) skb->data & (PAGE_SIZE - 1), skb_headlen(skb), - offset, len, skb, spd, 1, sk, pipe)) - return 1; + offset, len, skb, spd, + skb_head_is_locked(skb), + sk, pipe)) + return true; /* * then map the fragments @@ -1690,11 +1733,11 @@ static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, if (__splice_segment(skb_frag_page(f), f->page_offset, skb_frag_size(f), - offset, len, skb, spd, 0, sk, pipe)) - return 1; + offset, len, skb, spd, false, sk, pipe)) + return true; } - return 0; + return false; } /* @@ -1707,8 +1750,8 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, struct pipe_inode_info *pipe, unsigned int tlen, unsigned int flags) { - struct partial_page partial[PIPE_DEF_BUFFERS]; - struct page *pages[PIPE_DEF_BUFFERS]; + struct partial_page partial[MAX_SKB_FRAGS]; + struct page *pages[MAX_SKB_FRAGS]; struct splice_pipe_desc spd = { .pages = pages, .partial = partial, @@ -1720,9 +1763,6 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, struct sock *sk = skb->sk; int ret = 0; - if (splice_grow_spd(pipe, &spd)) - return -ENOMEM; - /* * __skb_splice_bits() only fails if the output has no room left, * so no point in going over the frag_list for the error case. @@ -1758,7 +1798,6 @@ done: lock_sock(sk); } - splice_shrink_spd(pipe, &spd); return ret; } @@ -1806,10 +1845,10 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len) if (copy > len) copy = len; - vaddr = kmap_skb_frag(frag); + vaddr = kmap_atomic(skb_frag_page(frag)); memcpy(vaddr + frag->page_offset + offset - start, from, copy); - kunmap_skb_frag(vaddr); + kunmap_atomic(vaddr); if ((len -= copy) == 0) return 0; @@ -1869,21 +1908,21 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset, for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); - end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); + end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { __wsum csum2; u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; if (copy > len) copy = len; - vaddr = kmap_skb_frag(frag); + vaddr = kmap_atomic(skb_frag_page(frag)); csum2 = csum_partial(vaddr + frag->page_offset + offset - start, copy, 0); - kunmap_skb_frag(vaddr); + kunmap_atomic(vaddr); csum = csum_block_add(csum, csum2, pos); if (!(len -= copy)) return csum; @@ -1955,12 +1994,12 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, if (copy > len) copy = len; - vaddr = kmap_skb_frag(frag); + vaddr = kmap_atomic(skb_frag_page(frag)); csum2 = csum_partial_copy_nocheck(vaddr + frag->page_offset + offset - start, to, copy, 0); - kunmap_skb_frag(vaddr); + kunmap_atomic(vaddr); csum = csum_block_add(csum, csum2, pos); if (!(len -= copy)) return csum; @@ -2480,7 +2519,7 @@ next_skb: if (abs_offset < block_limit) { if (!st->frag_data) - st->frag_data = kmap_skb_frag(frag); + st->frag_data = kmap_atomic(skb_frag_page(frag)); *data = (u8 *) st->frag_data + frag->page_offset + (abs_offset - st->stepped_offset); @@ -2489,7 +2528,7 @@ next_skb: } if (st->frag_data) { - kunmap_skb_frag(st->frag_data); + kunmap_atomic(st->frag_data); st->frag_data = NULL; } @@ -2498,7 +2537,7 @@ next_skb: } if (st->frag_data) { - kunmap_skb_frag(st->frag_data); + kunmap_atomic(st->frag_data); st->frag_data = NULL; } @@ -2526,7 +2565,7 @@ EXPORT_SYMBOL(skb_seq_read); void skb_abort_seq_read(struct skb_seq_state *st) { if (st->frag_data) - kunmap_skb_frag(st->frag_data); + kunmap_atomic(st->frag_data); } EXPORT_SYMBOL(skb_abort_seq_read); @@ -2718,14 +2757,13 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) if (unlikely(!nskb)) goto err; - hsize = skb_end_pointer(nskb) - nskb->head; + hsize = skb_end_offset(nskb); if (skb_cow_head(nskb, doffset + headroom)) { kfree_skb(nskb); goto err; } - nskb->truesize += skb_end_pointer(nskb) - nskb->head - - hsize; + nskb->truesize += skb_end_offset(nskb) - hsize; skb_release_head_state(nskb); __skb_push(nskb, doffset); } else { @@ -2843,6 +2881,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) unsigned int len = skb_gro_len(skb); unsigned int offset = skb_gro_offset(skb); unsigned int headlen = skb_headlen(skb); + unsigned int delta_truesize; if (p->len + len >= 65536) return -E2BIG; @@ -2872,11 +2911,41 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) frag->page_offset += offset; skb_frag_size_sub(frag, offset); + /* all fragments truesize : remove (head size + sk_buff) */ + delta_truesize = skb->truesize - + SKB_TRUESIZE(skb_end_offset(skb)); + skb->truesize -= skb->data_len; skb->len -= skb->data_len; skb->data_len = 0; - NAPI_GRO_CB(skb)->free = 1; + NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE; + goto done; + } else if (skb->head_frag) { + int nr_frags = pinfo->nr_frags; + skb_frag_t *frag = pinfo->frags + nr_frags; + struct page *page = virt_to_head_page(skb->head); + unsigned int first_size = headlen - offset; + unsigned int first_offset; + + if (nr_frags + 1 + skbinfo->nr_frags > MAX_SKB_FRAGS) + return -E2BIG; + + first_offset = skb->data - + (unsigned char *)page_address(page) + + offset; + + pinfo->nr_frags = nr_frags + 1 + skbinfo->nr_frags; + + frag->page.p = page; + frag->page_offset = first_offset; + skb_frag_size_set(frag, first_size); + + memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags); + /* We dont need to clear skbinfo->nr_frags here */ + + delta_truesize = skb->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff)); + NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD; goto done; } else if (skb_gro_len(p) != pinfo->gso_size) return -E2BIG; @@ -2918,7 +2987,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) p = nskb; merge: - p->truesize += skb->truesize - len; + delta_truesize = skb->truesize; if (offset > headlen) { unsigned int eat = offset - headlen; @@ -2938,7 +3007,7 @@ merge: done: NAPI_GRO_CB(p)->count++; p->data_len += len; - p->truesize += len; + p->truesize += delta_truesize; p->len += len; NAPI_GRO_CB(skb)->same_flow = 1; @@ -3166,7 +3235,7 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) int len = skb->len; if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= - (unsigned)sk->sk_rcvbuf) + (unsigned int)sk->sk_rcvbuf) return -ENOMEM; skb_orphan(skb); @@ -3260,10 +3329,8 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off) { if (unlikely(start > skb_headlen(skb)) || unlikely((int)start + off > skb_headlen(skb) - 2)) { - if (net_ratelimit()) - printk(KERN_WARNING - "bad partial csum: csum=%u/%u len=%u\n", - start, off, skb_headlen(skb)); + net_warn_ratelimited("bad partial csum: csum=%u/%u len=%u\n", + start, off, skb_headlen(skb)); return false; } skb->ip_summed = CHECKSUM_PARTIAL; @@ -3275,8 +3342,93 @@ EXPORT_SYMBOL_GPL(skb_partial_csum_set); void __skb_warn_lro_forwarding(const struct sk_buff *skb) { - if (net_ratelimit()) - pr_warning("%s: received packets cannot be forwarded" - " while LRO is enabled\n", skb->dev->name); + net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled\n", + skb->dev->name); } EXPORT_SYMBOL(__skb_warn_lro_forwarding); + +void kfree_skb_partial(struct sk_buff *skb, bool head_stolen) +{ + if (head_stolen) + kmem_cache_free(skbuff_head_cache, skb); + else + __kfree_skb(skb); +} +EXPORT_SYMBOL(kfree_skb_partial); + +/** + * skb_try_coalesce - try to merge skb to prior one + * @to: prior buffer + * @from: buffer to add + * @fragstolen: pointer to boolean + * + */ +bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, + bool *fragstolen, int *delta_truesize) +{ + int i, delta, len = from->len; + + *fragstolen = false; + + if (skb_cloned(to)) + return false; + + if (len <= skb_tailroom(to)) { + BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len)); + *delta_truesize = 0; + return true; + } + + if (skb_has_frag_list(to) || skb_has_frag_list(from)) + return false; + + if (skb_headlen(from) != 0) { + struct page *page; + unsigned int offset; + + if (skb_shinfo(to)->nr_frags + + skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) + return false; + + if (skb_head_is_locked(from)) + return false; + + delta = from->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff)); + + page = virt_to_head_page(from->head); + offset = from->data - (unsigned char *)page_address(page); + + skb_fill_page_desc(to, skb_shinfo(to)->nr_frags, + page, offset, skb_headlen(from)); + *fragstolen = true; + } else { + if (skb_shinfo(to)->nr_frags + + skb_shinfo(from)->nr_frags > MAX_SKB_FRAGS) + return false; + + delta = from->truesize - + SKB_TRUESIZE(skb_end_pointer(from) - from->head); + } + + WARN_ON_ONCE(delta < len); + + memcpy(skb_shinfo(to)->frags + skb_shinfo(to)->nr_frags, + skb_shinfo(from)->frags, + skb_shinfo(from)->nr_frags * sizeof(skb_frag_t)); + skb_shinfo(to)->nr_frags += skb_shinfo(from)->nr_frags; + + if (!skb_cloned(from)) + skb_shinfo(from)->nr_frags = 0; + + /* if the skb is cloned this does nothing since we set nr_frags to 0 */ + for (i = 0; i < skb_shinfo(from)->nr_frags; i++) + skb_frag_ref(from, i); + + to->truesize += delta; + to->len += len; + to->data_len += len; + + *delta_truesize = delta; + return true; +} +EXPORT_SYMBOL(skb_try_coalesce); diff --git a/net/core/sock.c b/net/core/sock.c index b2e14c0..5efcd63 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -89,6 +89,8 @@ * 2 of the License, or (at your option) any later version. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/capability.h> #include <linux/errno.h> #include <linux/types.h> @@ -113,6 +115,7 @@ #include <linux/user_namespace.h> #include <linux/static_key.h> #include <linux/memcontrol.h> +#include <linux/prefetch.h> #include <asm/uaccess.h> @@ -258,7 +261,9 @@ static struct lock_class_key af_callback_keys[AF_MAX]; /* Run time adjustable parameters. */ __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX; +EXPORT_SYMBOL(sysctl_wmem_max); __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX; +EXPORT_SYMBOL(sysctl_rmem_max); __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; @@ -294,9 +299,8 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) *timeo_p = 0; if (warned < 10 && net_ratelimit()) { warned++; - printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) " - "tries to set negative timeout\n", - current->comm, task_pid_nr(current)); + pr_info("%s: `%s' (pid %d) tries to set negative timeout\n", + __func__, current->comm, task_pid_nr(current)); } return 0; } @@ -314,8 +318,8 @@ static void sock_warn_obsolete_bsdism(const char *name) static char warncomm[TASK_COMM_LEN]; if (strcmp(warncomm, current->comm) && warned < 5) { strcpy(warncomm, current->comm); - printk(KERN_WARNING "process `%s' is using obsolete " - "%s SO_BSDCOMPAT\n", warncomm, name); + pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n", + warncomm, name); warned++; } } @@ -389,7 +393,7 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) skb->dev = NULL; - if (sk_rcvqueues_full(sk, skb)) { + if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) { atomic_inc(&sk->sk_drops); goto discard_and_relse; } @@ -406,7 +410,7 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) rc = sk_backlog_rcv(sk, skb); mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); - } else if (sk_add_backlog(sk, skb)) { + } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) { bh_unlock_sock(sk); atomic_inc(&sk->sk_drops); goto discard_and_relse; @@ -561,7 +565,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname, sock_valbool_flag(sk, SOCK_DBG, valbool); break; case SO_REUSEADDR: - sk->sk_reuse = valbool; + sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE); break; case SO_TYPE: case SO_PROTOCOL: @@ -577,23 +581,15 @@ int sock_setsockopt(struct socket *sock, int level, int optname, break; case SO_SNDBUF: /* Don't error on this BSD doesn't and if you think - about it this is right. Otherwise apps have to - play 'guess the biggest size' games. RCVBUF/SNDBUF - are treated in BSD as hints */ - - if (val > sysctl_wmem_max) - val = sysctl_wmem_max; + * about it this is right. Otherwise apps have to + * play 'guess the biggest size' games. RCVBUF/SNDBUF + * are treated in BSD as hints + */ + val = min_t(u32, val, sysctl_wmem_max); set_sndbuf: sk->sk_userlocks |= SOCK_SNDBUF_LOCK; - if ((val * 2) < SOCK_MIN_SNDBUF) - sk->sk_sndbuf = SOCK_MIN_SNDBUF; - else - sk->sk_sndbuf = val * 2; - - /* - * Wake up sending tasks if we - * upped the value. - */ + sk->sk_sndbuf = max_t(u32, val * 2, SOCK_MIN_SNDBUF); + /* Wake up sending tasks if we upped the value. */ sk->sk_write_space(sk); break; @@ -606,12 +602,11 @@ set_sndbuf: case SO_RCVBUF: /* Don't error on this BSD doesn't and if you think - about it this is right. Otherwise apps have to - play 'guess the biggest size' games. RCVBUF/SNDBUF - are treated in BSD as hints */ - - if (val > sysctl_rmem_max) - val = sysctl_rmem_max; + * about it this is right. Otherwise apps have to + * play 'guess the biggest size' games. RCVBUF/SNDBUF + * are treated in BSD as hints + */ + val = min_t(u32, val, sysctl_rmem_max); set_rcvbuf: sk->sk_userlocks |= SOCK_RCVBUF_LOCK; /* @@ -629,10 +624,7 @@ set_rcvbuf: * returning the value we actually used in getsockopt * is the most desirable behavior. */ - if ((val * 2) < SOCK_MIN_RCVBUF) - sk->sk_rcvbuf = SOCK_MIN_RCVBUF; - else - sk->sk_rcvbuf = val * 2; + sk->sk_rcvbuf = max_t(u32, val * 2, SOCK_MIN_RCVBUF); break; case SO_RCVBUFFORCE: @@ -858,7 +850,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_BROADCAST: - v.val = !!sock_flag(sk, SOCK_BROADCAST); + v.val = sock_flag(sk, SOCK_BROADCAST); break; case SO_SNDBUF: @@ -874,7 +866,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_KEEPALIVE: - v.val = !!sock_flag(sk, SOCK_KEEPOPEN); + v.val = sock_flag(sk, SOCK_KEEPOPEN); break; case SO_TYPE: @@ -896,7 +888,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_OOBINLINE: - v.val = !!sock_flag(sk, SOCK_URGINLINE); + v.val = sock_flag(sk, SOCK_URGINLINE); break; case SO_NO_CHECK: @@ -909,7 +901,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, case SO_LINGER: lv = sizeof(v.ling); - v.ling.l_onoff = !!sock_flag(sk, SOCK_LINGER); + v.ling.l_onoff = sock_flag(sk, SOCK_LINGER); v.ling.l_linger = sk->sk_lingertime / HZ; break; @@ -975,7 +967,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_PASSCRED: - v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0; + v.val = !!test_bit(SOCK_PASSCRED, &sock->flags); break; case SO_PEERCRED: @@ -1010,7 +1002,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_PASSSEC: - v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0; + v.val = !!test_bit(SOCK_PASSSEC, &sock->flags); break; case SO_PEERSEC: @@ -1021,11 +1013,11 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_RXQ_OVFL: - v.val = !!sock_flag(sk, SOCK_RXQ_OVFL); + v.val = sock_flag(sk, SOCK_RXQ_OVFL); break; case SO_WIFI_STATUS: - v.val = !!sock_flag(sk, SOCK_WIFI_STATUS); + v.val = sock_flag(sk, SOCK_WIFI_STATUS); break; case SO_PEEK_OFF: @@ -1035,7 +1027,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sk->sk_peek_off; break; case SO_NOFCS: - v.val = !!sock_flag(sk, SOCK_NOFCS); + v.val = sock_flag(sk, SOCK_NOFCS); break; default: return -ENOPROTOOPT; @@ -1247,8 +1239,8 @@ static void __sk_free(struct sock *sk) sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP); if (atomic_read(&sk->sk_omem_alloc)) - printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n", - __func__, atomic_read(&sk->sk_omem_alloc)); + pr_debug("%s: optmem leakage (%d bytes) detected\n", + __func__, atomic_read(&sk->sk_omem_alloc)); if (sk->sk_peer_cred) put_cred(sk->sk_peer_cred); @@ -1534,7 +1526,7 @@ struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, */ void *sock_kmalloc(struct sock *sk, int size, gfp_t priority) { - if ((unsigned)size <= sysctl_optmem_max && + if ((unsigned int)size <= sysctl_optmem_max && atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { void *mem; /* First do the add, to avoid the race if kmalloc @@ -1712,6 +1704,7 @@ static void __release_sock(struct sock *sk) do { struct sk_buff *next = skb->next; + prefetch(next); WARN_ON_ONCE(skb_dst_is_noref(skb)); skb->next = NULL; sk_backlog_rcv(sk, skb); @@ -2432,7 +2425,7 @@ static void assign_proto_idx(struct proto *prot) prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR); if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) { - printk(KERN_ERR "PROTO_INUSE_NR exhausted\n"); + pr_err("PROTO_INUSE_NR exhausted\n"); return; } @@ -2462,8 +2455,8 @@ int proto_register(struct proto *prot, int alloc_slab) NULL); if (prot->slab == NULL) { - printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n", - prot->name); + pr_crit("%s: Can't create sock SLAB cache!\n", + prot->name); goto out; } @@ -2477,8 +2470,8 @@ int proto_register(struct proto *prot, int alloc_slab) SLAB_HWCACHE_ALIGN, NULL); if (prot->rsk_prot->slab == NULL) { - printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n", - prot->name); + pr_crit("%s: Can't create request sock SLAB cache!\n", + prot->name); goto out_free_request_sock_slab_name; } } @@ -2576,7 +2569,7 @@ static char proto_method_implemented(const void *method) } static long sock_prot_memory_allocated(struct proto *proto) { - return proto->memory_allocated != NULL ? proto_memory_allocated(proto): -1L; + return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L; } static char *sock_prot_memory_pressure(struct proto *proto) diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index b9868e1..5fd1467 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -10,7 +10,7 @@ #include <linux/inet_diag.h> #include <linux/sock_diag.h> -static struct sock_diag_handler *sock_diag_handlers[AF_MAX]; +static const struct sock_diag_handler *sock_diag_handlers[AF_MAX]; static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh); static DEFINE_MUTEX(sock_diag_table_mutex); @@ -70,7 +70,7 @@ void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlms } EXPORT_SYMBOL_GPL(sock_diag_unregister_inet_compat); -int sock_diag_register(struct sock_diag_handler *hndl) +int sock_diag_register(const struct sock_diag_handler *hndl) { int err = 0; @@ -88,7 +88,7 @@ int sock_diag_register(struct sock_diag_handler *hndl) } EXPORT_SYMBOL_GPL(sock_diag_register); -void sock_diag_unregister(struct sock_diag_handler *hnld) +void sock_diag_unregister(const struct sock_diag_handler *hnld) { int family = hnld->family; @@ -102,7 +102,7 @@ void sock_diag_unregister(struct sock_diag_handler *hnld) } EXPORT_SYMBOL_GPL(sock_diag_unregister); -static inline struct sock_diag_handler *sock_diag_lock_handler(int family) +static const inline struct sock_diag_handler *sock_diag_lock_handler(int family) { if (sock_diag_handlers[family] == NULL) request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, @@ -112,7 +112,7 @@ static inline struct sock_diag_handler *sock_diag_lock_handler(int family) return sock_diag_handlers[family]; } -static inline void sock_diag_unlock_handler(struct sock_diag_handler *h) +static inline void sock_diag_unlock_handler(const struct sock_diag_handler *h) { mutex_unlock(&sock_diag_table_mutex); } @@ -121,7 +121,7 @@ static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { int err; struct sock_diag_req *req = NLMSG_DATA(nlh); - struct sock_diag_handler *hndl; + const struct sock_diag_handler *hndl; if (nlmsg_len(nlh) < sizeof(*req)) return -EINVAL; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 0c28508..a7c3684 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -14,6 +14,7 @@ #include <linux/vmalloc.h> #include <linux/init.h> #include <linux/slab.h> +#include <linux/kmemleak.h> #include <net/ip.h> #include <net/sock.h> @@ -202,12 +203,6 @@ static struct ctl_table netns_core_table[] = { { } }; -__net_initdata struct ctl_path net_core_path[] = { - { .procname = "net", }, - { .procname = "core", }, - { }, -}; - static __net_init int sysctl_core_net_init(struct net *net) { struct ctl_table *tbl; @@ -223,8 +218,7 @@ static __net_init int sysctl_core_net_init(struct net *net) tbl[0].data = &net->core.sysctl_somaxconn; } - net->core.sysctl_hdr = register_net_sysctl_table(net, - net_core_path, tbl); + net->core.sysctl_hdr = register_net_sysctl(net, "net/core", tbl); if (net->core.sysctl_hdr == NULL) goto err_reg; @@ -254,10 +248,7 @@ static __net_initdata struct pernet_operations sysctl_core_ops = { static __init int sysctl_core_init(void) { - static struct ctl_table empty[1]; - - register_sysctl_paths(net_core_path, empty); - register_net_sysctl_rotable(net_core_path, net_core_table); + register_net_sysctl(&init_net, "net/core", net_core_table); return register_pernet_subsys(&sysctl_core_ops); } diff --git a/net/core/utils.c b/net/core/utils.c index dc3c3fa..39895a6 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -58,14 +58,11 @@ __be32 in_aton(const char *str) int i; l = 0; - for (i = 0; i < 4; i++) - { + for (i = 0; i < 4; i++) { l <<= 8; - if (*str != '\0') - { + if (*str != '\0') { val = 0; - while (*str != '\0' && *str != '.' && *str != '\n') - { + while (*str != '\0' && *str != '.' && *str != '\n') { val *= 10; val += *str - '0'; str++; |