aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c14
-rw-r--r--net/core/dst.c15
-rw-r--r--net/core/flow.c31
-rw-r--r--net/core/neighbour.c21
-rw-r--r--net/core/net_namespace.c31
-rw-r--r--net/core/netpoll.c2
-rw-r--r--net/core/sock.c1
-rw-r--r--net/core/timestamping.c12
8 files changed, 80 insertions, 47 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 9c58c1e..f134f88 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3434,14 +3434,20 @@ static inline gro_result_t
__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
struct sk_buff *p;
+ unsigned int maclen = skb->dev->hard_header_len;
for (p = napi->gro_list; p; p = p->next) {
unsigned long diffs;
diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
diffs |= p->vlan_tci ^ skb->vlan_tci;
- diffs |= compare_ether_header(skb_mac_header(p),
- skb_gro_mac_header(skb));
+ if (maclen == ETH_HLEN)
+ diffs |= compare_ether_header(skb_mac_header(p),
+ skb_gro_mac_header(skb));
+ else if (!diffs)
+ diffs = memcmp(skb_mac_header(p),
+ skb_gro_mac_header(skb),
+ maclen);
NAPI_GRO_CB(p)->same_flow = !diffs;
NAPI_GRO_CB(p)->flush = 0;
}
@@ -3498,7 +3504,8 @@ EXPORT_SYMBOL(napi_gro_receive);
static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
{
__skb_pull(skb, skb_headlen(skb));
- skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
+ /* restore the reserve we had after netdev_alloc_skb_ip_align() */
+ skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));
skb->vlan_tci = 0;
skb->dev = napi->dev;
skb->skb_iif = 0;
@@ -6105,6 +6112,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
*/
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
+ rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
/*
* Flush the unicast and multicast chains
diff --git a/net/core/dst.c b/net/core/dst.c
index 6135f36..8246d47 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -171,7 +171,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
dst_init_metrics(dst, dst_default_metrics, true);
dst->expires = 0UL;
dst->path = dst;
- dst->neighbour = NULL;
+ RCU_INIT_POINTER(dst->_neighbour, NULL);
dst->hh = NULL;
#ifdef CONFIG_XFRM
dst->xfrm = NULL;
@@ -231,7 +231,7 @@ struct dst_entry *dst_destroy(struct dst_entry * dst)
smp_rmb();
again:
- neigh = dst->neighbour;
+ neigh = rcu_dereference_protected(dst->_neighbour, 1);
hh = dst->hh;
child = dst->child;
@@ -240,7 +240,7 @@ again:
hh_cache_put(hh);
if (neigh) {
- dst->neighbour = NULL;
+ RCU_INIT_POINTER(dst->_neighbour, NULL);
neigh_release(neigh);
}
@@ -367,14 +367,19 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
if (!unregister) {
dst->input = dst->output = dst_discard;
} else {
+ struct neighbour *neigh;
+
dst->dev = dev_net(dst->dev)->loopback_dev;
dev_hold(dst->dev);
dev_put(dev);
- if (dst->neighbour && dst->neighbour->dev == dev) {
- dst->neighbour->dev = dst->dev;
+ rcu_read_lock();
+ neigh = dst_get_neighbour(dst);
+ if (neigh && neigh->dev == dev) {
+ neigh->dev = dst->dev;
dev_hold(dst->dev);
dev_put(dev);
}
+ rcu_read_unlock();
}
}
diff --git a/net/core/flow.c b/net/core/flow.c
index 990703b..a6bda2a 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -172,29 +172,26 @@ static void flow_new_hash_rnd(struct flow_cache *fc,
static u32 flow_hash_code(struct flow_cache *fc,
struct flow_cache_percpu *fcp,
- const struct flowi *key)
+ const struct flowi *key,
+ size_t keysize)
{
const u32 *k = (const u32 *) key;
+ const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32);
- return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd)
+ return jhash2(k, length, fcp->hash_rnd)
& (flow_cache_hash_size(fc) - 1);
}
-typedef unsigned long flow_compare_t;
-
/* I hear what you're saying, use memcmp. But memcmp cannot make
- * important assumptions that we can here, such as alignment and
- * constant size.
+ * important assumptions that we can here, such as alignment.
*/
-static int flow_key_compare(const struct flowi *key1, const struct flowi *key2)
+static int flow_key_compare(const struct flowi *key1, const struct flowi *key2,
+ size_t keysize)
{
const flow_compare_t *k1, *k1_lim, *k2;
- const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t);
-
- BUILD_BUG_ON(sizeof(struct flowi) % sizeof(flow_compare_t));
k1 = (const flow_compare_t *) key1;
- k1_lim = k1 + n_elem;
+ k1_lim = k1 + keysize;
k2 = (const flow_compare_t *) key2;
@@ -215,6 +212,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
struct flow_cache_entry *fle, *tfle;
struct hlist_node *entry;
struct flow_cache_object *flo;
+ size_t keysize;
unsigned int hash;
local_bh_disable();
@@ -222,6 +220,11 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
fle = NULL;
flo = NULL;
+
+ keysize = flow_key_size(family);
+ if (!keysize)
+ goto nocache;
+
/* Packet really early in init? Making flow_cache_init a
* pre-smp initcall would solve this. --RR */
if (!fcp->hash_table)
@@ -230,11 +233,11 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
if (fcp->hash_rnd_recalc)
flow_new_hash_rnd(fc, fcp);
- hash = flow_hash_code(fc, fcp, key);
+ hash = flow_hash_code(fc, fcp, key, keysize);
hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) {
if (tfle->family == family &&
tfle->dir == dir &&
- flow_key_compare(key, &tfle->key) == 0) {
+ flow_key_compare(key, &tfle->key, keysize) == 0) {
fle = tfle;
break;
}
@@ -248,7 +251,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
if (fle) {
fle->family = family;
fle->dir = dir;
- memcpy(&fle->key, key, sizeof(*key));
+ memcpy(&fle->key, key, keysize * sizeof(flow_compare_t));
fle->object = NULL;
hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]);
fcp->hash_count++;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 16db887..96bb0a3 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -823,6 +823,8 @@ next_elt:
write_unlock_bh(&tbl->lock);
cond_resched();
write_lock_bh(&tbl->lock);
+ nht = rcu_dereference_protected(tbl->nht,
+ lockdep_is_held(&tbl->lock));
}
/* Cycle through all hash buckets every base_reachable_time/2 ticks.
* ARP entry timeouts range from 1/2 base_reachable_time to 3/2
@@ -1173,12 +1175,17 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
while (neigh->nud_state & NUD_VALID &&
(skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
- struct neighbour *n1 = neigh;
+ struct dst_entry *dst = skb_dst(skb);
+ struct neighbour *n2, *n1 = neigh;
write_unlock_bh(&neigh->lock);
+
+ rcu_read_lock();
/* On shaper/eql skb->dst->neighbour != neigh :( */
- if (skb_dst(skb) && skb_dst(skb)->neighbour)
- n1 = skb_dst(skb)->neighbour;
+ if (dst && (n2 = dst_get_neighbour(dst)) != NULL)
+ n1 = n2;
n1->output(skb);
+ rcu_read_unlock();
+
write_lock_bh(&neigh->lock);
}
skb_queue_purge(&neigh->arp_queue);
@@ -1300,10 +1307,10 @@ EXPORT_SYMBOL(neigh_compat_output);
int neigh_resolve_output(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
- struct neighbour *neigh;
+ struct neighbour *neigh = dst_get_neighbour(dst);
int rc = 0;
- if (!dst || !(neigh = dst->neighbour))
+ if (!dst)
goto discard;
__skb_pull(skb, skb_network_offset(skb));
@@ -1333,7 +1340,7 @@ out:
return rc;
discard:
NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
- dst, dst ? dst->neighbour : NULL);
+ dst, neigh);
out_kfree_skb:
rc = -EINVAL;
kfree_skb(skb);
@@ -1347,7 +1354,7 @@ int neigh_connected_output(struct sk_buff *skb)
{
int err;
struct dst_entry *dst = skb_dst(skb);
- struct neighbour *neigh = dst->neighbour;
+ struct neighbour *neigh = dst_get_neighbour(dst);
struct net_device *dev = neigh->dev;
unsigned int seq;
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index ea489db..0b0211d 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -29,6 +29,20 @@ EXPORT_SYMBOL(init_net);
#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
+static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
+
+static struct net_generic *net_alloc_generic(void)
+{
+ struct net_generic *ng;
+ size_t generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);
+
+ ng = kzalloc(generic_size, GFP_KERNEL);
+ if (ng)
+ ng->len = max_gen_ptrs;
+
+ return ng;
+}
+
static int net_assign_generic(struct net *net, int id, void *data)
{
struct net_generic *ng, *old_ng;
@@ -42,8 +56,7 @@ static int net_assign_generic(struct net *net, int id, void *data)
if (old_ng->len >= id)
goto assign;
- ng = kzalloc(sizeof(struct net_generic) +
- id * sizeof(void *), GFP_KERNEL);
+ ng = net_alloc_generic();
if (ng == NULL)
return -ENOMEM;
@@ -58,7 +71,6 @@ static int net_assign_generic(struct net *net, int id, void *data)
* the old copy for kfree after a grace period.
*/
- ng->len = id;
memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
rcu_assign_pointer(net->gen, ng);
@@ -159,18 +171,6 @@ out_undo:
goto out;
}
-static struct net_generic *net_alloc_generic(void)
-{
- struct net_generic *ng;
- size_t generic_size = sizeof(struct net_generic) +
- INITIAL_NET_GEN_PTRS * sizeof(void *);
-
- ng = kzalloc(generic_size, GFP_KERNEL);
- if (ng)
- ng->len = INITIAL_NET_GEN_PTRS;
-
- return ng;
-}
#ifdef CONFIG_NET_NS
static struct kmem_cache *net_cachep;
@@ -481,6 +481,7 @@ again:
}
return error;
}
+ max_gen_ptrs = max_t(unsigned int, max_gen_ptrs, *ops->id);
}
error = __register_pernet_operations(list, ops);
if (error) {
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 18d9cbd..05db410 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -193,7 +193,7 @@ void netpoll_poll_dev(struct net_device *dev)
poll_napi(dev);
- if (dev->priv_flags & IFF_SLAVE) {
+ if (dev->flags & IFF_SLAVE) {
if (dev->npinfo) {
struct net_device *bond_dev = dev->master;
struct sk_buff *skb;
diff --git a/net/core/sock.c b/net/core/sock.c
index 6e81978..aebb419 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1257,6 +1257,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
/* It is still raw copy of parent, so invalidate
* destructor and make plain sk_free() */
newsk->sk_destruct = NULL;
+ bh_unlock_sock(newsk);
sk_free(newsk);
newsk = NULL;
goto out;
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 7e7ca37..97d036a 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -57,9 +57,13 @@ void skb_clone_tx_timestamp(struct sk_buff *skb)
case PTP_CLASS_V2_VLAN:
phydev = skb->dev->phydev;
if (likely(phydev->drv->txtstamp)) {
+ if (!atomic_inc_not_zero(&sk->sk_refcnt))
+ return;
clone = skb_clone(skb, GFP_ATOMIC);
- if (!clone)
+ if (!clone) {
+ sock_put(sk);
return;
+ }
clone->sk = sk;
phydev->drv->txtstamp(phydev, clone, type);
}
@@ -76,8 +80,11 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
struct sock_exterr_skb *serr;
int err;
- if (!hwtstamps)
+ if (!hwtstamps) {
+ sock_put(sk);
+ kfree_skb(skb);
return;
+ }
*skb_hwtstamps(skb) = *hwtstamps;
serr = SKB_EXT_ERR(skb);
@@ -86,6 +93,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
skb->sk = NULL;
err = sock_queue_err_skb(sk, skb);
+ sock_put(sk);
if (err)
kfree_skb(skb);
}