aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c4
-rw-r--r--net/8021q/vlan_core.c4
-rw-r--r--net/9p/client.c114
-rw-r--r--net/9p/protocol.c74
-rw-r--r--net/9p/protocol.h6
-rw-r--r--net/9p/trans_virtio.c142
-rw-r--r--net/atm/proc.c10
-rw-r--r--net/bluetooth/bnep/netdev.c10
-rw-r--r--net/bluetooth/hci_sysfs.c41
-rw-r--r--net/bluetooth/l2cap.c4
-rw-r--r--net/bluetooth/rfcomm/core.c4
-rw-r--r--net/bluetooth/rfcomm/sock.c4
-rw-r--r--net/bluetooth/sco.c4
-rw-r--r--net/bridge/Kconfig1
-rw-r--r--net/bridge/br_device.c45
-rw-r--r--net/bridge/br_forward.c18
-rw-r--r--net/bridge/br_if.c6
-rw-r--r--net/bridge/br_input.c10
-rw-r--r--net/bridge/br_multicast.c39
-rw-r--r--net/bridge/br_notify.c4
-rw-r--r--net/bridge/br_private.h20
-rw-r--r--net/bridge/br_sysfs_if.c2
-rw-r--r--net/core/dev.c401
-rw-r--r--net/core/dev_mcast.c5
-rw-r--r--net/core/ethtool.c103
-rw-r--r--net/core/fib_rules.c10
-rw-r--r--net/core/neighbour.c2
-rw-r--r--net/core/net-sysfs.c229
-rw-r--r--net/core/netpoll.c4
-rw-r--r--net/core/pktgen.c58
-rw-r--r--net/core/rtnetlink.c43
-rw-r--r--net/core/skbuff.c2
-rw-r--r--net/core/sock.c19
-rw-r--r--net/dccp/dccp.h2
-rw-r--r--net/dccp/ipv4.c8
-rw-r--r--net/dccp/ipv6.c8
-rw-r--r--net/dccp/minisocks.c2
-rw-r--r--net/dccp/proto.c16
-rw-r--r--net/ipv4/Kconfig8
-rw-r--r--net/ipv4/af_inet.c4
-rw-r--r--net/ipv4/devinet.c4
-rw-r--r--net/ipv4/ip_gre.c7
-rw-r--r--net/ipv4/ipconfig.c57
-rw-r--r--net/ipv4/ipmr.c3
-rw-r--r--net/ipv4/proc.c3
-rw-r--r--net/ipv4/route.c80
-rw-r--r--net/ipv4/tcp.c65
-rw-r--r--net/ipv4/tcp_input.c3
-rw-r--r--net/ipv4/tcp_ipv4.c20
-rw-r--r--net/ipv4/tcp_minisocks.c3
-rw-r--r--net/ipv4/tcp_output.c18
-rw-r--r--net/ipv4/tcp_timer.c2
-rw-r--r--net/ipv4/udp.c6
-rw-r--r--net/ipv4/xfrm4_policy.c5
-rw-r--r--net/ipv6/addrconf.c870
-rw-r--r--net/ipv6/fib6_rules.c11
-rw-r--r--net/ipv6/ip6mr.c3
-rw-r--r--net/ipv6/route.c13
-rw-r--r--net/ipv6/tcp_ipv6.c7
-rw-r--r--net/ipv6/udp.c28
-rw-r--r--net/ipv6/xfrm6_policy.c3
-rw-r--r--net/llc/llc_c_ac.c2
-rw-r--r--net/llc/llc_conn.c3
-rw-r--r--net/mac80211/debugfs_netdev.c10
-rw-r--r--net/mac80211/mesh_plink.c2
-rw-r--r--net/mac80211/mlme.c13
-rw-r--r--net/mac80211/sta_info.c1
-rw-r--r--net/netfilter/nf_conntrack_netlink.c4
-rw-r--r--net/netfilter/nf_conntrack_sip.c4
-rw-r--r--net/netfilter/nfnetlink.c4
-rw-r--r--net/netfilter/xt_hashlimit.c2
-rw-r--r--net/netlink/af_netlink.c17
-rw-r--r--net/packet/af_packet.c6
-rw-r--r--net/phonet/pn_dev.c3
-rw-r--r--net/phonet/pn_netlink.c3
-rw-r--r--net/rds/af_rds.c7
-rw-r--r--net/rds/cong.c2
-rw-r--r--net/rds/ib_cm.c3
-rw-r--r--net/rds/ib_rdma.c5
-rw-r--r--net/rds/ib_recv.c4
-rw-r--r--net/rds/ib_send.c20
-rw-r--r--net/rds/iw_cm.c4
-rw-r--r--net/rds/iw_recv.c4
-rw-r--r--net/rds/iw_send.c3
-rw-r--r--net/rds/loop.c7
-rw-r--r--net/rds/rdma.c4
-rw-r--r--net/rds/rdma_transport.c5
-rw-r--r--net/rds/rds.h2
-rw-r--r--net/rds/send.c38
-rw-r--r--net/rds/tcp_recv.c1
-rw-r--r--net/rds/tcp_send.c4
-rw-r--r--net/rds/threads.c2
-rw-r--r--net/sctp/input.c42
-rw-r--r--net/sctp/ipv6.c2
-rw-r--r--net/sctp/sm_sideeffect.c2
-rw-r--r--net/sctp/socket.c3
-rw-r--r--net/sunrpc/addr.c8
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c11
-rw-r--r--net/sunrpc/rpc_pipe.c9
-rw-r--r--net/sunrpc/svc.c6
-rw-r--r--net/sunrpc/svc_xprt.c27
-rw-r--r--net/sunrpc/svcauth_unix.c49
-rw-r--r--net/sunrpc/svcsock.c3
-rw-r--r--net/sunrpc/xprtrdma/transport.c7
-rw-r--r--net/sunrpc/xprtsock.c18
-rw-r--r--net/tipc/bcast.c35
-rw-r--r--net/tipc/bearer.c37
-rw-r--r--net/tipc/bearer.h2
-rw-r--r--net/tipc/link.c20
-rw-r--r--net/tipc/net.c25
-rw-r--r--net/tipc/ref.c26
-rw-r--r--net/tipc/socket.c6
-rw-r--r--net/tipc/subscr.c57
-rw-r--r--net/tipc/subscr.h2
-rw-r--r--net/x25/x25_dev.c2
-rw-r--r--net/xfrm/xfrm_policy.c7
116 files changed, 2137 insertions, 1159 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 4535122..c39a5f4 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -530,6 +530,10 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
}
unregister_netdevice_many(&list);
break;
+
+ case NETDEV_PRE_TYPE_CHANGE:
+ /* Forbid underlaying device to change its type. */
+ return NOTIFY_BAD;
}
out:
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index c0316e0..c584a0a 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -11,7 +11,7 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
if (netpoll_rx(skb))
return NET_RX_DROP;
- if (skb_bond_should_drop(skb))
+ if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master)))
goto drop;
skb->skb_iif = skb->dev->ifindex;
@@ -83,7 +83,7 @@ vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp,
{
struct sk_buff *p;
- if (skb_bond_should_drop(skb))
+ if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master)))
goto drop;
skb->skb_iif = skb->dev->ifindex;
diff --git a/net/9p/client.c b/net/9p/client.c
index 09d4f1e..e3e5bf4 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -46,6 +46,7 @@ enum {
Opt_msize,
Opt_trans,
Opt_legacy,
+ Opt_version,
Opt_err,
};
@@ -53,9 +54,42 @@ static const match_table_t tokens = {
{Opt_msize, "msize=%u"},
{Opt_legacy, "noextend"},
{Opt_trans, "trans=%s"},
+ {Opt_version, "version=%s"},
{Opt_err, NULL},
};
+inline int p9_is_proto_dotl(struct p9_client *clnt)
+{
+ return (clnt->proto_version == p9_proto_2000L);
+}
+EXPORT_SYMBOL(p9_is_proto_dotl);
+
+inline int p9_is_proto_dotu(struct p9_client *clnt)
+{
+ return (clnt->proto_version == p9_proto_2000u);
+}
+EXPORT_SYMBOL(p9_is_proto_dotu);
+
+/* Interpret mount option for protocol version */
+static unsigned char get_protocol_version(const substring_t *name)
+{
+ unsigned char version = -EINVAL;
+ if (!strncmp("9p2000", name->from, name->to-name->from)) {
+ version = p9_proto_legacy;
+ P9_DPRINTK(P9_DEBUG_9P, "Protocol version: Legacy\n");
+ } else if (!strncmp("9p2000.u", name->from, name->to-name->from)) {
+ version = p9_proto_2000u;
+ P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2000.u\n");
+ } else if (!strncmp("9p2000.L", name->from, name->to-name->from)) {
+ version = p9_proto_2000L;
+ P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2000.L\n");
+ } else {
+ P9_DPRINTK(P9_DEBUG_ERROR, "Unknown protocol version %s. ",
+ name->from);
+ }
+ return version;
+}
+
static struct p9_req_t *
p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...);
@@ -75,7 +109,7 @@ static int parse_opts(char *opts, struct p9_client *clnt)
int option;
int ret = 0;
- clnt->dotu = 1;
+ clnt->proto_version = p9_proto_2000u;
clnt->msize = 8192;
if (!opts)
@@ -118,7 +152,13 @@ static int parse_opts(char *opts, struct p9_client *clnt)
}
break;
case Opt_legacy:
- clnt->dotu = 0;
+ clnt->proto_version = p9_proto_legacy;
+ break;
+ case Opt_version:
+ ret = get_protocol_version(&args[0]);
+ if (ret == -EINVAL)
+ goto free_and_return;
+ clnt->proto_version = ret;
break;
default:
continue;
@@ -410,14 +450,15 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
int ecode;
char *ename;
- err = p9pdu_readf(req->rc, c->dotu, "s?d", &ename, &ecode);
+ err = p9pdu_readf(req->rc, c->proto_version, "s?d",
+ &ename, &ecode);
if (err) {
P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n",
err);
return err;
}
- if (c->dotu)
+ if (p9_is_proto_dotu(c))
err = -ecode;
if (!err || !IS_ERR_VALUE(err))
@@ -515,7 +556,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
/* marshall the data */
p9pdu_prepare(req->tc, tag, type);
va_start(ap, fmt);
- err = p9pdu_vwritef(req->tc, c->dotu, fmt, ap);
+ err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap);
va_end(ap);
p9pdu_finalize(req->tc);
@@ -627,14 +668,31 @@ int p9_client_version(struct p9_client *c)
char *version;
int msize;
- P9_DPRINTK(P9_DEBUG_9P, ">>> TVERSION msize %d extended %d\n",
- c->msize, c->dotu);
- req = p9_client_rpc(c, P9_TVERSION, "ds", c->msize,
- c->dotu ? "9P2000.u" : "9P2000");
+ P9_DPRINTK(P9_DEBUG_9P, ">>> TVERSION msize %d protocol %d\n",
+ c->msize, c->proto_version);
+
+ switch (c->proto_version) {
+ case p9_proto_2000L:
+ req = p9_client_rpc(c, P9_TVERSION, "ds",
+ c->msize, "9P2000.L");
+ break;
+ case p9_proto_2000u:
+ req = p9_client_rpc(c, P9_TVERSION, "ds",
+ c->msize, "9P2000.u");
+ break;
+ case p9_proto_legacy:
+ req = p9_client_rpc(c, P9_TVERSION, "ds",
+ c->msize, "9P2000");
+ break;
+ default:
+ return -EINVAL;
+ break;
+ }
+
if (IS_ERR(req))
return PTR_ERR(req);
- err = p9pdu_readf(req->rc, c->dotu, "ds", &msize, &version);
+ err = p9pdu_readf(req->rc, c->proto_version, "ds", &msize, &version);
if (err) {
P9_DPRINTK(P9_DEBUG_9P, "version error %d\n", err);
p9pdu_dump(1, req->rc);
@@ -642,10 +700,12 @@ int p9_client_version(struct p9_client *c)
}
P9_DPRINTK(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version);
- if (!memcmp(version, "9P2000.u", 8))
- c->dotu = 1;
- else if (!memcmp(version, "9P2000", 6))
- c->dotu = 0;
+ if (!strncmp(version, "9P2000.L", 8))
+ c->proto_version = p9_proto_2000L;
+ else if (!strncmp(version, "9P2000.u", 8))
+ c->proto_version = p9_proto_2000u;
+ else if (!strncmp(version, "9P2000", 6))
+ c->proto_version = p9_proto_legacy;
else {
err = -EREMOTEIO;
goto error;
@@ -700,8 +760,8 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
goto put_trans;
}
- P9_DPRINTK(P9_DEBUG_MUX, "clnt %p trans %p msize %d dotu %d\n",
- clnt, clnt->trans_mod, clnt->msize, clnt->dotu);
+ P9_DPRINTK(P9_DEBUG_MUX, "clnt %p trans %p msize %d protocol %d\n",
+ clnt, clnt->trans_mod, clnt->msize, clnt->proto_version);
err = clnt->trans_mod->create(clnt, dev_name, options);
if (err)
@@ -784,7 +844,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
goto error;
}
- err = p9pdu_readf(req->rc, clnt->dotu, "Q", &qid);
+ err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid);
if (err) {
p9pdu_dump(1, req->rc);
p9_free_req(clnt, req);
@@ -833,7 +893,7 @@ p9_client_auth(struct p9_client *clnt, char *uname, u32 n_uname, char *aname)
goto error;
}
- err = p9pdu_readf(req->rc, clnt->dotu, "Q", &qid);
+ err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid);
if (err) {
p9pdu_dump(1, req->rc);
p9_free_req(clnt, req);
@@ -891,7 +951,7 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames,
goto error;
}
- err = p9pdu_readf(req->rc, clnt->dotu, "R", &nwqids, &wqids);
+ err = p9pdu_readf(req->rc, clnt->proto_version, "R", &nwqids, &wqids);
if (err) {
p9pdu_dump(1, req->rc);
p9_free_req(clnt, req);
@@ -952,7 +1012,7 @@ int p9_client_open(struct p9_fid *fid, int mode)
goto error;
}
- err = p9pdu_readf(req->rc, clnt->dotu, "Qd", &qid, &iounit);
+ err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit);
if (err) {
p9pdu_dump(1, req->rc);
goto free_and_error;
@@ -997,7 +1057,7 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode,
goto error;
}
- err = p9pdu_readf(req->rc, clnt->dotu, "Qd", &qid, &iounit);
+ err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit);
if (err) {
p9pdu_dump(1, req->rc);
goto free_and_error;
@@ -1098,7 +1158,7 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
goto error;
}
- err = p9pdu_readf(req->rc, clnt->dotu, "D", &count, &dataptr);
+ err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr);
if (err) {
p9pdu_dump(1, req->rc);
goto free_and_error;
@@ -1159,7 +1219,7 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
goto error;
}
- err = p9pdu_readf(req->rc, clnt->dotu, "d", &count);
+ err = p9pdu_readf(req->rc, clnt->proto_version, "d", &count);
if (err) {
p9pdu_dump(1, req->rc);
goto free_and_error;
@@ -1199,7 +1259,7 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
goto error;
}
- err = p9pdu_readf(req->rc, clnt->dotu, "wS", &ignored, ret);
+ err = p9pdu_readf(req->rc, clnt->proto_version, "wS", &ignored, ret);
if (err) {
p9pdu_dump(1, req->rc);
p9_free_req(clnt, req);
@@ -1226,7 +1286,7 @@ error:
}
EXPORT_SYMBOL(p9_client_stat);
-static int p9_client_statsize(struct p9_wstat *wst, int optional)
+static int p9_client_statsize(struct p9_wstat *wst, int proto_version)
{
int ret;
@@ -1245,7 +1305,7 @@ static int p9_client_statsize(struct p9_wstat *wst, int optional)
if (wst->muid)
ret += strlen(wst->muid);
- if (optional) {
+ if (proto_version == p9_proto_2000u) {
ret += 2+4+4+4; /* extension[s] n_uid[4] n_gid[4] n_muid[4] */
if (wst->extension)
ret += strlen(wst->extension);
@@ -1262,7 +1322,7 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)
err = 0;
clnt = fid->clnt;
- wst->size = p9_client_statsize(wst, clnt->dotu);
+ wst->size = p9_client_statsize(wst, clnt->proto_version);
P9_DPRINTK(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid);
P9_DPRINTK(P9_DEBUG_9P,
" sz=%x type=%x dev=%x qid=%x.%llx.%x\n"
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index fc70147..94f5a8f 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -52,7 +52,7 @@
#endif
static int
-p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...);
+p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);
#ifdef CONFIG_NET_9P_DEBUG
void
@@ -144,7 +144,8 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)
*/
static int
-p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
+p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
+ va_list ap)
{
const char *ptr;
int errcode = 0;
@@ -194,7 +195,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
int16_t len;
int size;
- errcode = p9pdu_readf(pdu, optional, "w", &len);
+ errcode = p9pdu_readf(pdu, proto_version,
+ "w", &len);
if (errcode)
break;
@@ -217,7 +219,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
struct p9_qid *qid =
va_arg(ap, struct p9_qid *);
- errcode = p9pdu_readf(pdu, optional, "bdq",
+ errcode = p9pdu_readf(pdu, proto_version, "bdq",
&qid->type, &qid->version,
&qid->path);
}
@@ -230,7 +232,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
stbuf->n_uid = stbuf->n_gid = stbuf->n_muid =
-1;
errcode =
- p9pdu_readf(pdu, optional,
+ p9pdu_readf(pdu, proto_version,
"wwdQdddqssss?sddd",
&stbuf->size, &stbuf->type,
&stbuf->dev, &stbuf->qid,
@@ -250,7 +252,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
void **data = va_arg(ap, void **);
errcode =
- p9pdu_readf(pdu, optional, "d", count);
+ p9pdu_readf(pdu, proto_version, "d", count);
if (!errcode) {
*count =
MIN(*count,
@@ -263,8 +265,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
int16_t *nwname = va_arg(ap, int16_t *);
char ***wnames = va_arg(ap, char ***);
- errcode =
- p9pdu_readf(pdu, optional, "w", nwname);
+ errcode = p9pdu_readf(pdu, proto_version,
+ "w", nwname);
if (!errcode) {
*wnames =
kmalloc(sizeof(char *) * *nwname,
@@ -278,7 +280,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
for (i = 0; i < *nwname; i++) {
errcode =
- p9pdu_readf(pdu, optional,
+ p9pdu_readf(pdu,
+ proto_version,
"s",
&(*wnames)[i]);
if (errcode)
@@ -306,7 +309,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
*wqids = NULL;
errcode =
- p9pdu_readf(pdu, optional, "w", nwqid);
+ p9pdu_readf(pdu, proto_version, "w", nwqid);
if (!errcode) {
*wqids =
kmalloc(*nwqid *
@@ -321,7 +324,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
for (i = 0; i < *nwqid; i++) {
errcode =
- p9pdu_readf(pdu, optional,
+ p9pdu_readf(pdu,
+ proto_version,
"Q",
&(*wqids)[i]);
if (errcode)
@@ -336,7 +340,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
}
break;
case '?':
- if (!optional)
+ if (proto_version != p9_proto_2000u)
return 0;
break;
default:
@@ -352,7 +356,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
}
int
-p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
+p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
+ va_list ap)
{
const char *ptr;
int errcode = 0;
@@ -389,7 +394,8 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
if (sptr)
len = MIN(strlen(sptr), USHORT_MAX);
- errcode = p9pdu_writef(pdu, optional, "w", len);
+ errcode = p9pdu_writef(pdu, proto_version,
+ "w", len);
if (!errcode && pdu_write(pdu, sptr, len))
errcode = -EFAULT;
}
@@ -398,7 +404,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
const struct p9_qid *qid =
va_arg(ap, const struct p9_qid *);
errcode =
- p9pdu_writef(pdu, optional, "bdq",
+ p9pdu_writef(pdu, proto_version, "bdq",
qid->type, qid->version,
qid->path);
} break;
@@ -406,7 +412,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
const struct p9_wstat *stbuf =
va_arg(ap, const struct p9_wstat *);
errcode =
- p9pdu_writef(pdu, optional,
+ p9pdu_writef(pdu, proto_version,
"wwdQdddqssss?sddd",
stbuf->size, stbuf->type,
stbuf->dev, &stbuf->qid,
@@ -421,8 +427,8 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
int32_t count = va_arg(ap, int32_t);
const void *data = va_arg(ap, const void *);
- errcode =
- p9pdu_writef(pdu, optional, "d", count);
+ errcode = p9pdu_writef(pdu, proto_version, "d",
+ count);
if (!errcode && pdu_write(pdu, data, count))
errcode = -EFAULT;
}
@@ -431,8 +437,8 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
int32_t count = va_arg(ap, int32_t);
const char __user *udata =
va_arg(ap, const void __user *);
- errcode =
- p9pdu_writef(pdu, optional, "d", count);
+ errcode = p9pdu_writef(pdu, proto_version, "d",
+ count);
if (!errcode && pdu_write_u(pdu, udata, count))
errcode = -EFAULT;
}
@@ -441,14 +447,15 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
int16_t nwname = va_arg(ap, int);
const char **wnames = va_arg(ap, const char **);
- errcode =
- p9pdu_writef(pdu, optional, "w", nwname);
+ errcode = p9pdu_writef(pdu, proto_version, "w",
+ nwname);
if (!errcode) {
int i;
for (i = 0; i < nwname; i++) {
errcode =
- p9pdu_writef(pdu, optional,
+ p9pdu_writef(pdu,
+ proto_version,
"s",
wnames[i]);
if (errcode)
@@ -462,14 +469,15 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
struct p9_qid *wqids =
va_arg(ap, struct p9_qid *);
- errcode =
- p9pdu_writef(pdu, optional, "w", nwqid);
+ errcode = p9pdu_writef(pdu, proto_version, "w",
+ nwqid);
if (!errcode) {
int i;
for (i = 0; i < nwqid; i++) {
errcode =
- p9pdu_writef(pdu, optional,
+ p9pdu_writef(pdu,
+ proto_version,
"Q",
&wqids[i]);
if (errcode)
@@ -479,7 +487,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
}
break;
case '?':
- if (!optional)
+ if (proto_version != p9_proto_2000u)
return 0;
break;
default:
@@ -494,32 +502,32 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
return errcode;
}
-int p9pdu_readf(struct p9_fcall *pdu, int optional, const char *fmt, ...)
+int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...)
{
va_list ap;
int ret;
va_start(ap, fmt);
- ret = p9pdu_vreadf(pdu, optional, fmt, ap);
+ ret = p9pdu_vreadf(pdu, proto_version, fmt, ap);
va_end(ap);
return ret;
}
static int
-p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...)
+p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...)
{
va_list ap;
int ret;
va_start(ap, fmt);
- ret = p9pdu_vwritef(pdu, optional, fmt, ap);
+ ret = p9pdu_vwritef(pdu, proto_version, fmt, ap);
va_end(ap);
return ret;
}
-int p9stat_read(char *buf, int len, struct p9_wstat *st, int dotu)
+int p9stat_read(char *buf, int len, struct p9_wstat *st, int proto_version)
{
struct p9_fcall fake_pdu;
int ret;
@@ -529,7 +537,7 @@ int p9stat_read(char *buf, int len, struct p9_wstat *st, int dotu)
fake_pdu.sdata = buf;
fake_pdu.offset = 0;
- ret = p9pdu_readf(&fake_pdu, dotu, "S", st);
+ ret = p9pdu_readf(&fake_pdu, proto_version, "S", st);
if (ret) {
P9_DPRINTK(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret);
p9pdu_dump(1, &fake_pdu);
diff --git a/net/9p/protocol.h b/net/9p/protocol.h
index ccde462..2431c0f 100644
--- a/net/9p/protocol.h
+++ b/net/9p/protocol.h
@@ -25,9 +25,9 @@
*
*/
-int
-p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap);
-int p9pdu_readf(struct p9_fcall *pdu, int optional, const char *fmt, ...);
+int p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
+ va_list ap);
+int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);
int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type);
int p9pdu_finalize(struct p9_fcall *pdu);
void p9pdu_dump(int, struct p9_fcall *);
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index cb50f4a..afde1a8 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -49,8 +49,6 @@
/* a single mutex to manage channel initialization and attachment */
static DEFINE_MUTEX(virtio_9p_lock);
-/* global which tracks highest initialized channel */
-static int chan_index;
/**
* struct virtio_chan - per-instance transport information
@@ -68,8 +66,7 @@ static int chan_index;
*
*/
-static struct virtio_chan {
- bool initialized;
+struct virtio_chan {
bool inuse;
spinlock_t lock;
@@ -80,7 +77,17 @@ static struct virtio_chan {
/* Scatterlist: can be too big for stack. */
struct scatterlist sg[VIRTQUEUE_NUM];
-} channels[MAX_9P_CHAN];
+
+ int tag_len;
+ /*
+ * tag name to identify a mount Non-null terminated
+ */
+ char *tag;
+
+ struct list_head chan_list;
+};
+
+static struct list_head virtio_chan_list;
/* How many bytes left in this page. */
static unsigned int rest_of_page(void *data)
@@ -213,30 +220,38 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
return 0;
}
+static ssize_t p9_mount_tag_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct virtio_chan *chan;
+ struct virtio_device *vdev;
+
+ vdev = dev_to_virtio(dev);
+ chan = vdev->priv;
+
+ return snprintf(buf, chan->tag_len + 1, "%s", chan->tag);
+}
+
+static DEVICE_ATTR(mount_tag, 0444, p9_mount_tag_show, NULL);
+
/**
* p9_virtio_probe - probe for existence of 9P virtio channels
* @vdev: virtio device to probe
*
- * This probes for existing virtio channels. At present only
- * a single channel is in use, so in the future more work may need
- * to be done here.
+ * This probes for existing virtio channels.
*
*/
static int p9_virtio_probe(struct virtio_device *vdev)
{
+ __u16 tag_len;
+ char *tag;
int err;
struct virtio_chan *chan;
- int index;
- mutex_lock(&virtio_9p_lock);
- index = chan_index++;
- chan = &channels[index];
- mutex_unlock(&virtio_9p_lock);
-
- if (chan_index > MAX_9P_CHAN) {
- printk(KERN_ERR "9p: virtio: Maximum channels exceeded\n");
- BUG();
+ chan = kmalloc(sizeof(struct virtio_chan), GFP_KERNEL);
+ if (!chan) {
+ printk(KERN_ERR "9p: Failed to allocate virtio 9P channel\n");
err = -ENOMEM;
goto fail;
}
@@ -255,15 +270,37 @@ static int p9_virtio_probe(struct virtio_device *vdev)
sg_init_table(chan->sg, VIRTQUEUE_NUM);
chan->inuse = false;
- chan->initialized = true;
+ if (virtio_has_feature(vdev, VIRTIO_9P_MOUNT_TAG)) {
+ vdev->config->get(vdev,
+ offsetof(struct virtio_9p_config, tag_len),
+ &tag_len, sizeof(tag_len));
+ } else {
+ err = -EINVAL;
+ goto out_free_vq;
+ }
+ tag = kmalloc(tag_len, GFP_KERNEL);
+ if (!tag) {
+ err = -ENOMEM;
+ goto out_free_vq;
+ }
+ vdev->config->get(vdev, offsetof(struct virtio_9p_config, tag),
+ tag, tag_len);
+ chan->tag = tag;
+ chan->tag_len = tag_len;
+ err = sysfs_create_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
+ if (err) {
+ kfree(tag);
+ goto out_free_vq;
+ }
+ mutex_lock(&virtio_9p_lock);
+ list_add_tail(&chan->chan_list, &virtio_chan_list);
+ mutex_unlock(&virtio_9p_lock);
return 0;
out_free_vq:
vdev->config->del_vqs(vdev);
+ kfree(chan);
fail:
- mutex_lock(&virtio_9p_lock);
- chan_index--;
- mutex_unlock(&virtio_9p_lock);
return err;
}
@@ -280,35 +317,31 @@ fail:
* We use a simple reference count mechanism to ensure that only a single
* mount has a channel open at a time.
*
- * Bugs: doesn't allow identification of a specific channel
- * to allocate, channels are allocated sequentially. This was
- * a pragmatic decision to get things rolling, but ideally some
- * way of identifying the channel to attach to would be nice
- * if we are going to support multiple channels.
- *
*/
static int
p9_virtio_create(struct p9_client *client, const char *devname, char *args)
{
- struct virtio_chan *chan = channels;
- int index = 0;
+ struct virtio_chan *chan;
+ int ret = -ENOENT;
+ int found = 0;
mutex_lock(&virtio_9p_lock);
- while (index < MAX_9P_CHAN) {
- if (chan->initialized && !chan->inuse) {
- chan->inuse = true;
- break;
- } else {
- index++;
- chan = &channels[index];
+ list_for_each_entry(chan, &virtio_chan_list, chan_list) {
+ if (!strncmp(devname, chan->tag, chan->tag_len)) {
+ if (!chan->inuse) {
+ chan->inuse = true;
+ found = 1;
+ break;
+ }
+ ret = -EBUSY;
}
}
mutex_unlock(&virtio_9p_lock);
- if (index >= MAX_9P_CHAN) {
+ if (!found) {
printk(KERN_ERR "9p: no channels available\n");
- return -ENODEV;
+ return ret;
}
client->trans = (void *)chan;
@@ -329,11 +362,15 @@ static void p9_virtio_remove(struct virtio_device *vdev)
struct virtio_chan *chan = vdev->priv;
BUG_ON(chan->inuse);
+ vdev->config->del_vqs(vdev);
+
+ mutex_lock(&virtio_9p_lock);
+ list_del(&chan->chan_list);
+ mutex_unlock(&virtio_9p_lock);
+ sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
+ kfree(chan->tag);
+ kfree(chan);
- if (chan->initialized) {
- vdev->config->del_vqs(vdev);
- chan->initialized = false;
- }
}
static struct virtio_device_id id_table[] = {
@@ -341,13 +378,19 @@ static struct virtio_device_id id_table[] = {
{ 0 },
};
+static unsigned int features[] = {
+ VIRTIO_9P_MOUNT_TAG,
+};
+
/* The standard "struct lguest_driver": */
static struct virtio_driver p9_virtio_drv = {
- .driver.name = KBUILD_MODNAME,
- .driver.owner = THIS_MODULE,
- .id_table = id_table,
- .probe = p9_virtio_probe,
- .remove = p9_virtio_remove,
+ .feature_table = features,
+ .feature_table_size = ARRAY_SIZE(features),
+ .driver.name = KBUILD_MODNAME,
+ .driver.owner = THIS_MODULE,
+ .id_table = id_table,
+ .probe = p9_virtio_probe,
+ .remove = p9_virtio_remove,
};
static struct p9_trans_module p9_virtio_trans = {
@@ -364,10 +407,7 @@ static struct p9_trans_module p9_virtio_trans = {
/* The standard init function */
static int __init p9_virtio_init(void)
{
- int count;
-
- for (count = 0; count < MAX_9P_CHAN; count++)
- channels[count].initialized = false;
+ INIT_LIST_HEAD(&virtio_chan_list);
v9fs_register_trans(&p9_virtio_trans);
return register_virtio_driver(&p9_virtio_drv);
diff --git a/net/atm/proc.c b/net/atm/proc.c
index 7a96b23..f188a39 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -406,7 +406,6 @@ EXPORT_SYMBOL(atm_proc_root);
int atm_proc_dev_register(struct atm_dev *dev)
{
- int digits, num;
int error;
/* No proc info */
@@ -414,16 +413,9 @@ int atm_proc_dev_register(struct atm_dev *dev)
return 0;
error = -ENOMEM;
- digits = 0;
- for (num = dev->number; num; num /= 10)
- digits++;
- if (!digits)
- digits++;
-
- dev->proc_name = kmalloc(strlen(dev->type) + digits + 2, GFP_KERNEL);
+ dev->proc_name = kasprintf(GFP_KERNEL, "%s:%d", dev->type, dev->number);
if (!dev->proc_name)
goto err_out;
- sprintf(dev->proc_name, "%s:%d", dev->type, dev->number);
dev->proc_entry = proc_create_data(dev->proc_name, 0, atm_proc_root,
&proc_atm_dev_ops, dev);
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index b6234b7..326ab45 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -87,7 +87,7 @@ static void bnep_net_set_mc_list(struct net_device *dev)
memcpy(__skb_put(skb, ETH_ALEN), dev->broadcast, ETH_ALEN);
r->len = htons(ETH_ALEN * 2);
} else {
- struct dev_mc_list *dmi = dev->mc_list;
+ struct dev_mc_list *dmi;
int i, len = skb->len;
if (dev->flags & IFF_BROADCAST) {
@@ -97,12 +97,12 @@ static void bnep_net_set_mc_list(struct net_device *dev)
/* FIXME: We should group addresses here. */
- for (i = 0;
- i < netdev_mc_count(dev) && i < BNEP_MAX_MULTICAST_FILTERS;
- i++) {
+ i = 0;
+ netdev_for_each_mc_addr(dmi, dev) {
+ if (i == BNEP_MAX_MULTICAST_FILTERS)
+ break;
memcpy(__skb_put(skb, ETH_ALEN), dmi->dmi_addr, ETH_ALEN);
memcpy(__skb_put(skb, ETH_ALEN), dmi->dmi_addr, ETH_ALEN);
- dmi = dmi->next;
}
r->len = htons(skb->len - len);
}
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 1a79a6c..cafb55b 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -3,6 +3,7 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/debugfs.h>
+#include <linux/seq_file.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
@@ -405,20 +406,11 @@ static struct device_type bt_host = {
.release = bt_host_release,
};
-static int inquiry_cache_open(struct inode *inode, struct file *file)
-{
- file->private_data = inode->i_private;
- return 0;
-}
-
-static ssize_t inquiry_cache_read(struct file *file, char __user *userbuf,
- size_t count, loff_t *ppos)
+static int inquiry_cache_show(struct seq_file *f, void *p)
{
- struct hci_dev *hdev = file->private_data;
+ struct hci_dev *hdev = f->private;
struct inquiry_cache *cache = &hdev->inq_cache;
struct inquiry_entry *e;
- char buf[4096];
- int n = 0;
hci_dev_lock_bh(hdev);
@@ -426,23 +418,30 @@ static ssize_t inquiry_cache_read(struct file *file, char __user *userbuf,
struct inquiry_data *data = &e->data;
bdaddr_t bdaddr;
baswap(&bdaddr, &data->bdaddr);
- n += sprintf(buf + n, "%s %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %d %u\n",
- batostr(&bdaddr),
- data->pscan_rep_mode, data->pscan_period_mode,
- data->pscan_mode, data->dev_class[2],
- data->dev_class[1], data->dev_class[0],
- __le16_to_cpu(data->clock_offset),
- data->rssi, data->ssp_mode, e->timestamp);
+ seq_printf(f, "%s %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %d %u\n",
+ batostr(&bdaddr),
+ data->pscan_rep_mode, data->pscan_period_mode,
+ data->pscan_mode, data->dev_class[2],
+ data->dev_class[1], data->dev_class[0],
+ __le16_to_cpu(data->clock_offset),
+ data->rssi, data->ssp_mode, e->timestamp);
}
hci_dev_unlock_bh(hdev);
- return simple_read_from_buffer(userbuf, count, ppos, buf, n);
+ return 0;
+}
+
+static int inquiry_cache_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, inquiry_cache_show, inode->i_private);
}
static const struct file_operations inquiry_cache_fops = {
- .open = inquiry_cache_open,
- .read = inquiry_cache_read,
+ .open = inquiry_cache_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
};
int hci_register_sysfs(struct hci_dev *hdev)
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 400efa2..4db7ae2 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3937,7 +3937,9 @@ drop:
return 0;
}
-static ssize_t l2cap_sysfs_show(struct class *dev, char *buf)
+static ssize_t l2cap_sysfs_show(struct class *dev,
+ struct class_attribute *attr,
+ char *buf)
{
struct sock *sk;
struct hlist_node *node;
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 89f4a59..db8a68e 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -2098,7 +2098,9 @@ static struct hci_cb rfcomm_cb = {
.security_cfm = rfcomm_security_cfm
};
-static ssize_t rfcomm_dlc_sysfs_show(struct class *dev, char *buf)
+static ssize_t rfcomm_dlc_sysfs_show(struct class *dev,
+ struct class_attribute *attr,
+ char *buf)
{
struct rfcomm_session *s;
struct list_head *pp, *p;
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 4b5968d..ca87d6a 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -1061,7 +1061,9 @@ done:
return result;
}
-static ssize_t rfcomm_sock_sysfs_show(struct class *dev, char *buf)
+static ssize_t rfcomm_sock_sysfs_show(struct class *dev,
+ struct class_attribute *attr,
+ char *buf)
{
struct sock *sk;
struct hlist_node *node;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index dd8f6ec..f93b939 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -953,7 +953,9 @@ drop:
return 0;
}
-static ssize_t sco_sysfs_show(struct class *dev, char *buf)
+static ssize_t sco_sysfs_show(struct class *dev,
+ struct class_attribute *attr,
+ char *buf)
{
struct sock *sk;
struct hlist_node *node;
diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig
index 19a6b96..d115d5c 100644
--- a/net/bridge/Kconfig
+++ b/net/bridge/Kconfig
@@ -35,6 +35,7 @@ config BRIDGE
config BRIDGE_IGMP_SNOOPING
bool "IGMP snooping"
depends on BRIDGE
+ depends on INET
default y
---help---
If you say Y here, then the Ethernet bridge will be able selectively
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index eb7062d..5b8a6e7 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -26,11 +26,12 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
const unsigned char *dest = skb->data;
struct net_bridge_fdb_entry *dst;
struct net_bridge_mdb_entry *mdst;
+ struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
- BR_INPUT_SKB_CB(skb)->brdev = dev;
+ brstats->tx_packets++;
+ brstats->tx_bytes += skb->len;
- dev->stats.tx_packets++;
- dev->stats.tx_bytes += skb->len;
+ BR_INPUT_SKB_CB(skb)->brdev = dev;
skb_reset_mac_header(skb);
skb_pull(skb, ETH_HLEN);
@@ -40,7 +41,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
goto out;
mdst = br_mdb_get(br, skb);
- if (mdst || BR_INPUT_SKB_CB(skb)->mrouters_only)
+ if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb))
br_multicast_deliver(mdst, skb);
else
br_flood_deliver(br, skb);
@@ -81,6 +82,31 @@ static int br_dev_stop(struct net_device *dev)
return 0;
}
+static struct net_device_stats *br_get_stats(struct net_device *dev)
+{
+ struct net_bridge *br = netdev_priv(dev);
+ struct net_device_stats *stats = &dev->stats;
+ struct br_cpu_netstats sum = { 0 };
+ unsigned int cpu;
+
+ for_each_possible_cpu(cpu) {
+ const struct br_cpu_netstats *bstats
+ = per_cpu_ptr(br->stats, cpu);
+
+ sum.tx_bytes += bstats->tx_bytes;
+ sum.tx_packets += bstats->tx_packets;
+ sum.rx_bytes += bstats->rx_bytes;
+ sum.rx_packets += bstats->rx_packets;
+ }
+
+ stats->tx_bytes = sum.tx_bytes;
+ stats->tx_packets = sum.tx_packets;
+ stats->rx_bytes = sum.rx_bytes;
+ stats->rx_packets = sum.rx_packets;
+
+ return stats;
+}
+
static int br_change_mtu(struct net_device *dev, int new_mtu)
{
struct net_bridge *br = netdev_priv(dev);
@@ -180,19 +206,28 @@ static const struct net_device_ops br_netdev_ops = {
.ndo_open = br_dev_open,
.ndo_stop = br_dev_stop,
.ndo_start_xmit = br_dev_xmit,
+ .ndo_get_stats = br_get_stats,
.ndo_set_mac_address = br_set_mac_address,
.ndo_set_multicast_list = br_dev_set_multicast_list,
.ndo_change_mtu = br_change_mtu,
.ndo_do_ioctl = br_dev_ioctl,
};
+static void br_dev_free(struct net_device *dev)
+{
+ struct net_bridge *br = netdev_priv(dev);
+
+ free_percpu(br->stats);
+ free_netdev(dev);
+}
+
void br_dev_setup(struct net_device *dev)
{
random_ether_addr(dev->dev_addr);
ether_setup(dev);
dev->netdev_ops = &br_netdev_ops;
- dev->destructor = free_netdev;
+ dev->destructor = br_dev_free;
SET_ETHTOOL_OPS(dev, &br_ethtool_ops);
dev->tx_queue_len = 0;
dev->priv_flags = IFF_EBRIDGE;
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index d61e6f7..8dbec83 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -19,6 +19,11 @@
#include <linux/netfilter_bridge.h>
#include "br_private.h"
+static int deliver_clone(const struct net_bridge_port *prev,
+ struct sk_buff *skb,
+ void (*__packet_hook)(const struct net_bridge_port *p,
+ struct sk_buff *skb));
+
/* Don't forward packets to originating port or forwarding diasabled */
static inline int should_deliver(const struct net_bridge_port *p,
const struct sk_buff *skb)
@@ -94,17 +99,22 @@ void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
}
/* called with rcu_read_lock */
-void br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
+void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk_buff *skb0)
{
if (should_deliver(to, skb)) {
- __br_forward(to, skb);
+ if (skb0)
+ deliver_clone(to, skb, __br_forward);
+ else
+ __br_forward(to, skb);
return;
}
- kfree_skb(skb);
+ if (!skb0)
+ kfree_skb(skb);
}
-static int deliver_clone(struct net_bridge_port *prev, struct sk_buff *skb,
+static int deliver_clone(const struct net_bridge_port *prev,
+ struct sk_buff *skb,
void (*__packet_hook)(const struct net_bridge_port *p,
struct sk_buff *skb))
{
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index b6a3872..b7cdd2e 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -185,6 +185,12 @@ static struct net_device *new_bridge_dev(struct net *net, const char *name)
br = netdev_priv(dev);
br->dev = dev;
+ br->stats = alloc_percpu(struct br_cpu_netstats);
+ if (!br->stats) {
+ free_netdev(dev);
+ return NULL;
+ }
+
spin_lock_init(&br->lock);
INIT_LIST_HEAD(&br->port_list);
spin_lock_init(&br->hash_lock);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 53b3985..333dfb7 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -23,9 +23,11 @@ const u8 br_group_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
static int br_pass_frame_up(struct sk_buff *skb)
{
struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
+ struct net_bridge *br = netdev_priv(brdev);
+ struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
- brdev->stats.rx_packets++;
- brdev->stats.rx_bytes += skb->len;
+ brstats->rx_packets++;
+ brstats->rx_bytes += skb->len;
indev = skb->dev;
skb->dev = brdev;
@@ -70,7 +72,7 @@ int br_handle_frame_finish(struct sk_buff *skb)
if (is_multicast_ether_addr(dest)) {
mdst = br_mdb_get(br, skb);
- if (mdst || BR_INPUT_SKB_CB(skb)->mrouters_only) {
+ if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) {
if ((mdst && !hlist_unhashed(&mdst->mglist)) ||
br_multicast_is_router(br))
skb2 = skb;
@@ -90,7 +92,7 @@ int br_handle_frame_finish(struct sk_buff *skb)
if (skb) {
if (dst)
- br_forward(dst->dst, skb);
+ br_forward(dst->dst, skb, skb2);
else
br_flood_forward(br, skb, skb2);
}
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 2559fb5..9f0c4f0 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -38,7 +38,7 @@ static struct net_bridge_mdb_entry *__br_mdb_ip_get(
struct net_bridge_mdb_entry *mp;
struct hlist_node *p;
- hlist_for_each_entry(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) {
+ hlist_for_each_entry_rcu(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) {
if (dst == mp->addr)
return mp;
}
@@ -49,22 +49,23 @@ static struct net_bridge_mdb_entry *__br_mdb_ip_get(
static struct net_bridge_mdb_entry *br_mdb_ip_get(
struct net_bridge_mdb_htable *mdb, __be32 dst)
{
+ if (!mdb)
+ return NULL;
+
return __br_mdb_ip_get(mdb, dst, br_ip_hash(mdb, dst));
}
struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
struct sk_buff *skb)
{
- struct net_bridge_mdb_htable *mdb = br->mdb;
-
- if (!mdb || br->multicast_disabled)
+ if (br->multicast_disabled)
return NULL;
switch (skb->protocol) {
case htons(ETH_P_IP):
if (BR_INPUT_SKB_CB(skb)->igmp)
break;
- return br_mdb_ip_get(mdb, ip_hdr(skb)->daddr);
+ return br_mdb_ip_get(br->mdb, ip_hdr(skb)->daddr);
}
return NULL;
@@ -627,8 +628,8 @@ static void br_multicast_port_query_expired(unsigned long data)
struct net_bridge *br = port->br;
spin_lock(&br->multicast_lock);
- if (port && (port->state == BR_STATE_DISABLED ||
- port->state == BR_STATE_BLOCKING))
+ if (port->state == BR_STATE_DISABLED ||
+ port->state == BR_STATE_BLOCKING)
goto out;
if (port->multicast_startup_queries_sent <
@@ -823,6 +824,7 @@ static int br_multicast_query(struct net_bridge *br,
unsigned long max_delay;
unsigned long now = jiffies;
__be32 group;
+ int err = 0;
spin_lock(&br->multicast_lock);
if (!netif_running(br->dev) ||
@@ -841,15 +843,17 @@ static int br_multicast_query(struct net_bridge *br,
group = 0;
}
} else {
- if (!pskb_may_pull(skb, sizeof(struct igmpv3_query)))
- return -EINVAL;
+ if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) {
+ err = -EINVAL;
+ goto out;
+ }
ih3 = igmpv3_query_hdr(skb);
if (ih3->nsrcs)
- return 0;
+ goto out;
- max_delay = ih3->code ? 1 :
- IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE);
+ max_delay = ih3->code ?
+ IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE) : 1;
}
if (!group)
@@ -876,7 +880,7 @@ static int br_multicast_query(struct net_bridge *br,
out:
spin_unlock(&br->multicast_lock);
- return 0;
+ return err;
}
static void br_multicast_leave_group(struct net_bridge *br,
@@ -987,7 +991,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
err = pskb_trim_rcsum(skb2, len);
if (err)
- return err;
+ goto err_out;
}
len -= ip_hdrlen(skb2);
@@ -999,8 +1003,6 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
if (!pskb_may_pull(skb2, sizeof(*ih)))
goto out;
- iph = ip_hdr(skb2);
-
switch (skb2->ip_summed) {
case CHECKSUM_COMPLETE:
if (!csum_fold(skb2->csum))
@@ -1009,7 +1011,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
case CHECKSUM_NONE:
skb2->csum = 0;
if (skb_checksum_complete(skb2))
- return -EINVAL;
+ goto out;
}
err = 0;
@@ -1036,6 +1038,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
out:
__skb_push(skb2, offset);
+err_out:
if (skb2 != skb)
kfree_skb(skb2);
return err;
@@ -1135,7 +1138,7 @@ void br_multicast_stop(struct net_bridge *br)
if (mdb->old) {
spin_unlock_bh(&br->multicast_lock);
- synchronize_rcu_bh();
+ rcu_barrier_bh();
spin_lock_bh(&br->multicast_lock);
WARN_ON(mdb->old);
}
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 763a3ec..1413b72 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -82,6 +82,10 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
case NETDEV_UNREGISTER:
br_del_if(br, dev);
break;
+
+ case NETDEV_PRE_TYPE_CHANGE:
+ /* Forbid underlaying device to change its type. */
+ return NOTIFY_BAD;
}
/* Events that may cause spanning tree to refresh */
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 1cf2cef..791d4ab 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -135,6 +135,14 @@ struct net_bridge
spinlock_t lock;
struct list_head port_list;
struct net_device *dev;
+
+ struct br_cpu_netstats __percpu {
+ unsigned long rx_packets;
+ unsigned long rx_bytes;
+ unsigned long tx_packets;
+ unsigned long tx_bytes;
+ } *stats;
+
spinlock_t hash_lock;
struct hlist_head hash[BR_HASH_SIZE];
unsigned long feature_mask;
@@ -206,12 +214,20 @@ struct net_bridge
struct br_input_skb_cb {
struct net_device *brdev;
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
int igmp;
int mrouters_only;
+#endif
};
#define BR_INPUT_SKB_CB(__skb) ((struct br_input_skb_cb *)(__skb)->cb)
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+# define BR_INPUT_SKB_CB_MROUTERS_ONLY(__skb) (BR_INPUT_SKB_CB(__skb)->mrouters_only)
+#else
+# define BR_INPUT_SKB_CB_MROUTERS_ONLY(__skb) (0)
+#endif
+
extern struct notifier_block br_device_notifier;
extern const u8 br_group_address[ETH_ALEN];
@@ -252,7 +268,7 @@ extern void br_deliver(const struct net_bridge_port *to,
struct sk_buff *skb);
extern int br_dev_queue_push_xmit(struct sk_buff *skb);
extern void br_forward(const struct net_bridge_port *to,
- struct sk_buff *skb);
+ struct sk_buff *skb, struct sk_buff *skb0);
extern int br_forward_finish(struct sk_buff *skb);
extern void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb);
extern void br_flood_forward(struct net_bridge *br, struct sk_buff *skb,
@@ -423,7 +439,7 @@ extern void br_ifinfo_notify(int event, struct net_bridge_port *port);
#ifdef CONFIG_SYSFS
/* br_sysfs_if.c */
-extern struct sysfs_ops brport_sysfs_ops;
+extern const struct sysfs_ops brport_sysfs_ops;
extern int br_sysfs_addif(struct net_bridge_port *p);
/* br_sysfs_br.c */
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 696596c..0b99164 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -238,7 +238,7 @@ static ssize_t brport_store(struct kobject * kobj,
return ret;
}
-struct sysfs_ops brport_sysfs_ops = {
+const struct sysfs_ops brport_sysfs_ops = {
.show = brport_show,
.store = brport_store,
};
diff --git a/net/core/dev.c b/net/core/dev.c
index bcc490c..a03aab4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -772,14 +772,17 @@ EXPORT_SYMBOL(__dev_getfirstbyhwtype);
struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
{
- struct net_device *dev;
+ struct net_device *dev, *ret = NULL;
- rtnl_lock();
- dev = __dev_getfirstbyhwtype(net, type);
- if (dev)
- dev_hold(dev);
- rtnl_unlock();
- return dev;
+ rcu_read_lock();
+ for_each_netdev_rcu(net, dev)
+ if (dev->type == type) {
+ dev_hold(dev);
+ ret = dev;
+ break;
+ }
+ rcu_read_unlock();
+ return ret;
}
EXPORT_SYMBOL(dev_getfirstbyhwtype);
@@ -1084,9 +1087,9 @@ void netdev_state_change(struct net_device *dev)
}
EXPORT_SYMBOL(netdev_state_change);
-void netdev_bonding_change(struct net_device *dev, unsigned long event)
+int netdev_bonding_change(struct net_device *dev, unsigned long event)
{
- call_netdevice_notifiers(event, dev);
+ return call_netdevice_notifiers(event, dev);
}
EXPORT_SYMBOL(netdev_bonding_change);
@@ -1931,7 +1934,7 @@ out_kfree_skb:
return rc;
}
-static u32 skb_tx_hashrnd;
+static u32 hashrnd __read_mostly;
u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
{
@@ -1949,7 +1952,7 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
else
hash = skb->protocol;
- hash = jhash_1word(hash, skb_tx_hashrnd);
+ hash = jhash_1word(hash, hashrnd);
return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
}
@@ -1959,10 +1962,9 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
{
if (unlikely(queue_index >= dev->real_num_tx_queues)) {
if (net_ratelimit()) {
- WARN(1, "%s selects TX queue %d, but "
+ netdev_warn(dev, "selects TX queue %d, but "
"real number of TX queues is %d\n",
- dev->name, queue_index,
- dev->real_num_tx_queues);
+ queue_index, dev->real_num_tx_queues);
}
return 0;
}
@@ -2175,6 +2177,178 @@ int weight_p __read_mostly = 64; /* old backlog weight */
DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
+#ifdef CONFIG_SMP
+/*
+ * get_rps_cpu is called from netif_receive_skb and returns the target
+ * CPU from the RPS map of the receiving queue for a given skb.
+ */
+static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
+{
+ struct ipv6hdr *ip6;
+ struct iphdr *ip;
+ struct netdev_rx_queue *rxqueue;
+ struct rps_map *map;
+ int cpu = -1;
+ u8 ip_proto;
+ u32 addr1, addr2, ports, ihl;
+
+ rcu_read_lock();
+
+ if (skb_rx_queue_recorded(skb)) {
+ u16 index = skb_get_rx_queue(skb);
+ if (unlikely(index >= dev->num_rx_queues)) {
+ if (net_ratelimit()) {
+ netdev_warn(dev, "received packet on queue "
+ "%u, but number of RX queues is %u\n",
+ index, dev->num_rx_queues);
+ }
+ goto done;
+ }
+ rxqueue = dev->_rx + index;
+ } else
+ rxqueue = dev->_rx;
+
+ if (!rxqueue->rps_map)
+ goto done;
+
+ if (skb->rxhash)
+ goto got_hash; /* Skip hash computation on packet header */
+
+ switch (skb->protocol) {
+ case __constant_htons(ETH_P_IP):
+ if (!pskb_may_pull(skb, sizeof(*ip)))
+ goto done;
+
+ ip = (struct iphdr *) skb->data;
+ ip_proto = ip->protocol;
+ addr1 = ip->saddr;
+ addr2 = ip->daddr;
+ ihl = ip->ihl;
+ break;
+ case __constant_htons(ETH_P_IPV6):
+ if (!pskb_may_pull(skb, sizeof(*ip6)))
+ goto done;
+
+ ip6 = (struct ipv6hdr *) skb->data;
+ ip_proto = ip6->nexthdr;
+ addr1 = ip6->saddr.s6_addr32[3];
+ addr2 = ip6->daddr.s6_addr32[3];
+ ihl = (40 >> 2);
+ break;
+ default:
+ goto done;
+ }
+ ports = 0;
+ switch (ip_proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ case IPPROTO_DCCP:
+ case IPPROTO_ESP:
+ case IPPROTO_AH:
+ case IPPROTO_SCTP:
+ case IPPROTO_UDPLITE:
+ if (pskb_may_pull(skb, (ihl * 4) + 4))
+ ports = *((u32 *) (skb->data + (ihl * 4)));
+ break;
+
+ default:
+ break;
+ }
+
+ skb->rxhash = jhash_3words(addr1, addr2, ports, hashrnd);
+ if (!skb->rxhash)
+ skb->rxhash = 1;
+
+got_hash:
+ map = rcu_dereference(rxqueue->rps_map);
+ if (map) {
+ u16 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
+
+ if (cpu_online(tcpu)) {
+ cpu = tcpu;
+ goto done;
+ }
+ }
+
+done:
+ rcu_read_unlock();
+ return cpu;
+}
+
+/*
+ * This structure holds the per-CPU mask of CPUs for which IPIs are scheduled
+ * to be sent to kick remote softirq processing. There are two masks since
+ * the sending of IPIs must be done with interrupts enabled. The select field
+ * indicates the current mask that enqueue_backlog uses to schedule IPIs.
+ * select is flipped before net_rps_action is called while still under lock,
+ * net_rps_action then uses the non-selected mask to send the IPIs and clears
+ * it without conflicting with enqueue_backlog operation.
+ */
+struct rps_remote_softirq_cpus {
+ cpumask_t mask[2];
+ int select;
+};
+static DEFINE_PER_CPU(struct rps_remote_softirq_cpus, rps_remote_softirq_cpus);
+
+/* Called from hardirq (IPI) context */
+static void trigger_softirq(void *data)
+{
+ struct softnet_data *queue = data;
+ __napi_schedule(&queue->backlog);
+ __get_cpu_var(netdev_rx_stat).received_rps++;
+}
+#endif /* CONFIG_SMP */
+
+/*
+ * enqueue_to_backlog is called to queue an skb to a per CPU backlog
+ * queue (may be a remote CPU queue).
+ */
+static int enqueue_to_backlog(struct sk_buff *skb, int cpu)
+{
+ struct softnet_data *queue;
+ unsigned long flags;
+
+ queue = &per_cpu(softnet_data, cpu);
+
+ local_irq_save(flags);
+ __get_cpu_var(netdev_rx_stat).total++;
+
+ spin_lock(&queue->input_pkt_queue.lock);
+ if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
+ if (queue->input_pkt_queue.qlen) {
+enqueue:
+ __skb_queue_tail(&queue->input_pkt_queue, skb);
+ spin_unlock_irqrestore(&queue->input_pkt_queue.lock,
+ flags);
+ return NET_RX_SUCCESS;
+ }
+
+ /* Schedule NAPI for backlog device */
+ if (napi_schedule_prep(&queue->backlog)) {
+#ifdef CONFIG_SMP
+ if (cpu != smp_processor_id()) {
+ struct rps_remote_softirq_cpus *rcpus =
+ &__get_cpu_var(rps_remote_softirq_cpus);
+
+ cpu_set(cpu, rcpus->mask[rcpus->select]);
+ __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+ } else
+ __napi_schedule(&queue->backlog);
+#else
+ __napi_schedule(&queue->backlog);
+#endif
+ }
+ goto enqueue;
+ }
+
+ spin_unlock(&queue->input_pkt_queue.lock);
+
+ __get_cpu_var(netdev_rx_stat).dropped++;
+ local_irq_restore(flags);
+
+ kfree_skb(skb);
+ return NET_RX_DROP;
+}
/**
* netif_rx - post buffer to the network code
@@ -2193,8 +2367,7 @@ DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
int netif_rx(struct sk_buff *skb)
{
- struct softnet_data *queue;
- unsigned long flags;
+ int cpu;
/* if netpoll wants it, pretend we never saw it */
if (netpoll_rx(skb))
@@ -2203,31 +2376,15 @@ int netif_rx(struct sk_buff *skb)
if (!skb->tstamp.tv64)
net_timestamp(skb);
- /*
- * The code is rearranged so that the path is the most
- * short when CPU is congested, but is still operating.
- */
- local_irq_save(flags);
- queue = &__get_cpu_var(softnet_data);
-
- __get_cpu_var(netdev_rx_stat).total++;
- if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
- if (queue->input_pkt_queue.qlen) {
-enqueue:
- __skb_queue_tail(&queue->input_pkt_queue, skb);
- local_irq_restore(flags);
- return NET_RX_SUCCESS;
- }
-
- napi_schedule(&queue->backlog);
- goto enqueue;
- }
-
- __get_cpu_var(netdev_rx_stat).dropped++;
- local_irq_restore(flags);
+#ifdef CONFIG_SMP
+ cpu = get_rps_cpu(skb->dev, skb);
+ if (cpu < 0)
+ cpu = smp_processor_id();
+#else
+ cpu = smp_processor_id();
+#endif
- kfree_skb(skb);
- return NET_RX_DROP;
+ return enqueue_to_backlog(skb, cpu);
}
EXPORT_SYMBOL(netif_rx);
@@ -2464,25 +2621,11 @@ void netif_nit_deliver(struct sk_buff *skb)
rcu_read_unlock();
}
-/**
- * netif_receive_skb - process receive buffer from network
- * @skb: buffer to process
- *
- * netif_receive_skb() is the main receive data processing function.
- * It always succeeds. The buffer may be dropped during processing
- * for congestion control or by the protocol layers.
- *
- * This function may only be called from softirq context and interrupts
- * should be enabled.
- *
- * Return values (usually ignored):
- * NET_RX_SUCCESS: no congestion
- * NET_RX_DROP: packet was dropped
- */
-int netif_receive_skb(struct sk_buff *skb)
+int __netif_receive_skb(struct sk_buff *skb)
{
struct packet_type *ptype, *pt_prev;
struct net_device *orig_dev;
+ struct net_device *master;
struct net_device *null_or_orig;
struct net_device *null_or_bond;
int ret = NET_RX_DROP;
@@ -2503,11 +2646,12 @@ int netif_receive_skb(struct sk_buff *skb)
null_or_orig = NULL;
orig_dev = skb->dev;
- if (orig_dev->master) {
- if (skb_bond_should_drop(skb))
+ master = ACCESS_ONCE(orig_dev->master);
+ if (master) {
+ if (skb_bond_should_drop(skb, master))
null_or_orig = orig_dev; /* deliver only exact match */
else
- skb->dev = orig_dev->master;
+ skb->dev = master;
}
__get_cpu_var(netdev_rx_stat).total++;
@@ -2588,6 +2732,37 @@ out:
rcu_read_unlock();
return ret;
}
+
+/**
+ * netif_receive_skb - process receive buffer from network
+ * @skb: buffer to process
+ *
+ * netif_receive_skb() is the main receive data processing function.
+ * It always succeeds. The buffer may be dropped during processing
+ * for congestion control or by the protocol layers.
+ *
+ * This function may only be called from softirq context and interrupts
+ * should be enabled.
+ *
+ * Return values (usually ignored):
+ * NET_RX_SUCCESS: no congestion
+ * NET_RX_DROP: packet was dropped
+ */
+int netif_receive_skb(struct sk_buff *skb)
+{
+#ifdef CONFIG_SMP
+ int cpu;
+
+ cpu = get_rps_cpu(skb->dev, skb);
+
+ if (cpu < 0)
+ return __netif_receive_skb(skb);
+ else
+ return enqueue_to_backlog(skb, cpu);
+#else
+ return __netif_receive_skb(skb);
+#endif
+}
EXPORT_SYMBOL(netif_receive_skb);
/* Network device is going away, flush any packets still pending */
@@ -2914,16 +3089,16 @@ static int process_backlog(struct napi_struct *napi, int quota)
do {
struct sk_buff *skb;
- local_irq_disable();
+ spin_lock_irq(&queue->input_pkt_queue.lock);
skb = __skb_dequeue(&queue->input_pkt_queue);
if (!skb) {
__napi_complete(napi);
- local_irq_enable();
+ spin_unlock_irq(&queue->input_pkt_queue.lock);
break;
}
- local_irq_enable();
+ spin_unlock_irq(&queue->input_pkt_queue.lock);
- netif_receive_skb(skb);
+ __netif_receive_skb(skb);
} while (++work < quota && jiffies == start_time);
return work;
@@ -3012,6 +3187,24 @@ void netif_napi_del(struct napi_struct *napi)
}
EXPORT_SYMBOL(netif_napi_del);
+#ifdef CONFIG_SMP
+/*
+ * net_rps_action sends any pending IPI's for rps. This is only called from
+ * softirq and interrupts must be enabled.
+ */
+static void net_rps_action(cpumask_t *mask)
+{
+ int cpu;
+
+ /* Send pending IPI's to kick RPS processing on remote cpus. */
+ for_each_cpu_mask_nr(cpu, *mask) {
+ struct softnet_data *queue = &per_cpu(softnet_data, cpu);
+ if (cpu_online(cpu))
+ __smp_call_function_single(cpu, &queue->csd, 0);
+ }
+ cpus_clear(*mask);
+}
+#endif
static void net_rx_action(struct softirq_action *h)
{
@@ -3019,6 +3212,10 @@ static void net_rx_action(struct softirq_action *h)
unsigned long time_limit = jiffies + 2;
int budget = netdev_budget;
void *have;
+#ifdef CONFIG_SMP
+ int select;
+ struct rps_remote_softirq_cpus *rcpus;
+#endif
local_irq_disable();
@@ -3081,7 +3278,17 @@ static void net_rx_action(struct softirq_action *h)
netpoll_poll_unlock(have);
}
out:
+#ifdef CONFIG_SMP
+ rcpus = &__get_cpu_var(rps_remote_softirq_cpus);
+ select = rcpus->select;
+ rcpus->select ^= 1;
+
+ local_irq_enable();
+
+ net_rps_action(&rcpus->mask[select]);
+#else
local_irq_enable();
+#endif
#ifdef CONFIG_NET_DMA
/*
@@ -3327,10 +3534,10 @@ static int softnet_seq_show(struct seq_file *seq, void *v)
{
struct netif_rx_stats *s = v;
- seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
+ seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
s->total, s->dropped, s->time_squeeze, 0,
0, 0, 0, 0, /* was fastroute */
- s->cpu_collision);
+ s->cpu_collision, s->received_rps);
return 0;
}
@@ -3553,11 +3760,10 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
slave->master = master;
- synchronize_net();
-
- if (old)
+ if (old) {
+ synchronize_net();
dev_put(old);
-
+ }
if (master)
slave->flags |= IFF_SLAVE;
else
@@ -4253,12 +4459,13 @@ void dev_unicast_unsync(struct net_device *to, struct net_device *from)
}
EXPORT_SYMBOL(dev_unicast_unsync);
-static void dev_unicast_flush(struct net_device *dev)
+void dev_unicast_flush(struct net_device *dev)
{
netif_addr_lock_bh(dev);
__hw_addr_flush(&dev->uc);
netif_addr_unlock_bh(dev);
}
+EXPORT_SYMBOL(dev_unicast_flush);
static void dev_unicast_init(struct net_device *dev)
{
@@ -4280,7 +4487,7 @@ static void __dev_addr_discard(struct dev_addr_list **list)
}
}
-static void dev_addr_discard(struct net_device *dev)
+void dev_addr_discard(struct net_device *dev)
{
netif_addr_lock_bh(dev);
@@ -4289,6 +4496,7 @@ static void dev_addr_discard(struct net_device *dev)
netif_addr_unlock_bh(dev);
}
+EXPORT_SYMBOL(dev_addr_discard);
/**
* dev_get_flags - get flags reported to userspace
@@ -5067,6 +5275,23 @@ int register_netdevice(struct net_device *dev)
dev->iflink = -1;
+ if (!dev->num_rx_queues) {
+ /*
+ * Allocate a single RX queue if driver never called
+ * alloc_netdev_mq
+ */
+
+ dev->_rx = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL);
+ if (!dev->_rx) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ dev->_rx->first = dev->_rx;
+ atomic_set(&dev->_rx->count, 1);
+ dev->num_rx_queues = 1;
+ }
+
/* Init, if this function is available */
if (dev->netdev_ops->ndo_init) {
ret = dev->netdev_ops->ndo_init(dev);
@@ -5424,9 +5649,11 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
void (*setup)(struct net_device *), unsigned int queue_count)
{
struct netdev_queue *tx;
+ struct netdev_rx_queue *rx;
struct net_device *dev;
size_t alloc_size;
struct net_device *p;
+ int i;
BUG_ON(strlen(name) >= sizeof(dev->name));
@@ -5452,11 +5679,27 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
goto free_p;
}
+ rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
+ if (!rx) {
+ printk(KERN_ERR "alloc_netdev: Unable to allocate "
+ "rx queues.\n");
+ goto free_tx;
+ }
+
+ atomic_set(&rx->count, queue_count);
+
+ /*
+ * Set a pointer to first element in the array which holds the
+ * reference count.
+ */
+ for (i = 0; i < queue_count; i++)
+ rx[i].first = rx;
+
dev = PTR_ALIGN(p, NETDEV_ALIGN);
dev->padded = (char *)dev - (char *)p;
if (dev_addr_init(dev))
- goto free_tx;
+ goto free_rx;
dev_unicast_init(dev);
@@ -5466,6 +5709,9 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
dev->num_tx_queues = queue_count;
dev->real_num_tx_queues = queue_count;
+ dev->_rx = rx;
+ dev->num_rx_queues = queue_count;
+
dev->gso_max_size = GSO_MAX_SIZE;
netdev_init_queues(dev);
@@ -5480,9 +5726,10 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
strcpy(dev->name, name);
return dev;
+free_rx:
+ kfree(rx);
free_tx:
kfree(tx);
-
free_p:
kfree(p);
return NULL;
@@ -5985,6 +6232,12 @@ static int __init net_dev_init(void)
queue->completion_queue = NULL;
INIT_LIST_HEAD(&queue->poll_list);
+#ifdef CONFIG_SMP
+ queue->csd.func = trigger_softirq;
+ queue->csd.info = queue;
+ queue->csd.flags = 0;
+#endif
+
queue->backlog.poll = process_backlog;
queue->backlog.weight = weight_p;
queue->backlog.gro_list = NULL;
@@ -6023,7 +6276,7 @@ subsys_initcall(net_dev_init);
static int __init initialize_hashrnd(void)
{
- get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd));
+ get_random_bytes(&hashrnd, sizeof(hashrnd));
return 0;
}
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index fd91569..3dc295b 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -97,8 +97,9 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
netif_addr_lock_bh(dev);
if (alen != dev->addr_len)
- return -EINVAL;
- err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl);
+ err = -EINVAL;
+ else
+ err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl);
if (!err)
__dev_set_rx_mode(dev);
netif_addr_unlock_bh(dev);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 0f2f821..f4cb6b6 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -17,6 +17,7 @@
#include <linux/errno.h>
#include <linux/ethtool.h>
#include <linux/netdevice.h>
+#include <linux/bitops.h>
#include <asm/uaccess.h>
/*
@@ -199,10 +200,7 @@ static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
return dev->ethtool_ops->set_settings(dev, &cmd);
}
-/*
- * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
- */
-static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
{
struct ethtool_drvinfo info;
const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -214,6 +212,10 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use
info.cmd = ETHTOOL_GDRVINFO;
ops->get_drvinfo(dev, &info);
+ /*
+ * this method of obtaining string set info is deprecated;
+ * Use ETHTOOL_GSSET_INFO instead.
+ */
if (ops->get_sset_count) {
int rc;
@@ -237,10 +239,67 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use
return 0;
}
-/*
- * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
- */
-static noinline int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev,
+ void __user *useraddr)
+{
+ struct ethtool_sset_info info;
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ u64 sset_mask;
+ int i, idx = 0, n_bits = 0, ret, rc;
+ u32 *info_buf = NULL;
+
+ if (!ops->get_sset_count)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&info, useraddr, sizeof(info)))
+ return -EFAULT;
+
+ /* store copy of mask, because we zero struct later on */
+ sset_mask = info.sset_mask;
+ if (!sset_mask)
+ return 0;
+
+ /* calculate size of return buffer */
+ n_bits = hweight64(sset_mask);
+
+ memset(&info, 0, sizeof(info));
+ info.cmd = ETHTOOL_GSSET_INFO;
+
+ info_buf = kzalloc(n_bits * sizeof(u32), GFP_USER);
+ if (!info_buf)
+ return -ENOMEM;
+
+ /*
+ * fill return buffer based on input bitmask and successful
+ * get_sset_count return
+ */
+ for (i = 0; i < 64; i++) {
+ if (!(sset_mask & (1ULL << i)))
+ continue;
+
+ rc = ops->get_sset_count(dev, i);
+ if (rc >= 0) {
+ info.sset_mask |= (1ULL << i);
+ info_buf[idx++] = rc;
+ }
+ }
+
+ ret = -EFAULT;
+ if (copy_to_user(useraddr, &info, sizeof(info)))
+ goto out;
+
+ useraddr += offsetof(struct ethtool_sset_info, data);
+ if (copy_to_user(useraddr, info_buf, idx * sizeof(u32)))
+ goto out;
+
+ ret = 0;
+
+out:
+ kfree(info_buf);
+ return ret;
+}
+
+static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr)
{
struct ethtool_rxnfc cmd;
@@ -253,10 +312,7 @@ static noinline int ethtool_set_rxnfc(struct net_device *dev, void __user *usera
return dev->ethtool_ops->set_rxnfc(dev, &cmd);
}
-/*
- * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
- */
-static noinline int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr)
{
struct ethtool_rxnfc info;
const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -328,10 +384,7 @@ static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
list->count++;
}
-/*
- * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
- */
-static noinline int ethtool_set_rx_ntuple(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, void __user *useraddr)
{
struct ethtool_rx_ntuple cmd;
const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -799,10 +852,7 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr)
return ret;
}
-/*
- * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
- */
-static noinline int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr)
{
struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE };
@@ -816,10 +866,7 @@ static noinline int ethtool_get_coalesce(struct net_device *dev, void __user *us
return 0;
}
-/*
- * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
- */
-static noinline int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr)
{
struct ethtool_coalesce coalesce;
@@ -1229,10 +1276,7 @@ static int ethtool_set_value(struct net_device *dev, char __user *useraddr,
return actor(dev, edata.data);
}
-/*
- * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
- */
-static noinline int ethtool_flash_device(struct net_device *dev, char __user *useraddr)
+static noinline_for_stack int ethtool_flash_device(struct net_device *dev, char __user *useraddr)
{
struct ethtool_flash efl;
@@ -1471,6 +1515,9 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_GRXNTUPLE:
rc = ethtool_get_rx_ntuple(dev, useraddr);
break;
+ case ETHTOOL_GSSET_INFO:
+ rc = ethtool_get_sset_info(dev, useraddr);
+ break;
default:
rc = -EOPNOTSUPP;
}
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 9a24377..2ff3489 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -108,7 +108,7 @@ fib_rules_register(struct fib_rules_ops *tmpl, struct net *net)
struct fib_rules_ops *ops;
int err;
- ops = kmemdup(tmpl, sizeof (*ops), GFP_KERNEL);
+ ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL);
if (ops == NULL)
return ERR_PTR(-ENOMEM);
@@ -123,7 +123,6 @@ fib_rules_register(struct fib_rules_ops *tmpl, struct net *net)
return ops;
}
-
EXPORT_SYMBOL_GPL(fib_rules_register);
void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
@@ -157,7 +156,6 @@ void fib_rules_unregister(struct fib_rules_ops *ops)
call_rcu(&ops->rcu, fib_rules_put_rcu);
}
-
EXPORT_SYMBOL_GPL(fib_rules_unregister);
static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
@@ -220,7 +218,6 @@ out:
return err;
}
-
EXPORT_SYMBOL_GPL(fib_rules_lookup);
static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb,
@@ -613,7 +610,7 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
break;
cb->args[1] = 0;
- skip:
+skip:
idx++;
}
rcu_read_unlock();
@@ -685,7 +682,6 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
struct fib_rules_ops *ops;
ASSERT_RTNL();
- rcu_read_lock();
switch (event) {
case NETDEV_REGISTER:
@@ -699,8 +695,6 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
break;
}
- rcu_read_unlock();
-
return NOTIFY_DONE;
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index d102f6d..6cee643 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -771,6 +771,8 @@ static __inline__ int neigh_max_probes(struct neighbour *n)
}
static void neigh_invalidate(struct neighbour *neigh)
+ __releases(neigh->lock)
+ __acquires(neigh->lock)
{
struct sk_buff *skb;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 099c753..f6b6bfe 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -466,6 +466,216 @@ static struct attribute_group wireless_group = {
};
#endif
+/*
+ * RX queue sysfs structures and functions.
+ */
+struct rx_queue_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct netdev_rx_queue *queue,
+ struct rx_queue_attribute *attr, char *buf);
+ ssize_t (*store)(struct netdev_rx_queue *queue,
+ struct rx_queue_attribute *attr, const char *buf, size_t len);
+};
+#define to_rx_queue_attr(_attr) container_of(_attr, \
+ struct rx_queue_attribute, attr)
+
+#define to_rx_queue(obj) container_of(obj, struct netdev_rx_queue, kobj)
+
+static ssize_t rx_queue_attr_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
+ struct netdev_rx_queue *queue = to_rx_queue(kobj);
+
+ if (!attribute->show)
+ return -EIO;
+
+ return attribute->show(queue, attribute, buf);
+}
+
+static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
+{
+ struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
+ struct netdev_rx_queue *queue = to_rx_queue(kobj);
+
+ if (!attribute->store)
+ return -EIO;
+
+ return attribute->store(queue, attribute, buf, count);
+}
+
+static struct sysfs_ops rx_queue_sysfs_ops = {
+ .show = rx_queue_attr_show,
+ .store = rx_queue_attr_store,
+};
+
+static ssize_t show_rps_map(struct netdev_rx_queue *queue,
+ struct rx_queue_attribute *attribute, char *buf)
+{
+ struct rps_map *map;
+ cpumask_var_t mask;
+ size_t len = 0;
+ int i;
+
+ if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+ return -ENOMEM;
+
+ rcu_read_lock();
+ map = rcu_dereference(queue->rps_map);
+ if (map)
+ for (i = 0; i < map->len; i++)
+ cpumask_set_cpu(map->cpus[i], mask);
+
+ len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask);
+ if (PAGE_SIZE - len < 3) {
+ rcu_read_unlock();
+ free_cpumask_var(mask);
+ return -EINVAL;
+ }
+ rcu_read_unlock();
+
+ free_cpumask_var(mask);
+ len += sprintf(buf + len, "\n");
+ return len;
+}
+
+static void rps_map_release(struct rcu_head *rcu)
+{
+ struct rps_map *map = container_of(rcu, struct rps_map, rcu);
+
+ kfree(map);
+}
+
+ssize_t store_rps_map(struct netdev_rx_queue *queue,
+ struct rx_queue_attribute *attribute,
+ const char *buf, size_t len)
+{
+ struct rps_map *old_map, *map;
+ cpumask_var_t mask;
+ int err, cpu, i;
+ static DEFINE_SPINLOCK(rps_map_lock);
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+ return -ENOMEM;
+
+ err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
+ if (err) {
+ free_cpumask_var(mask);
+ return err;
+ }
+
+ map = kzalloc(max_t(unsigned,
+ RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),
+ GFP_KERNEL);
+ if (!map) {
+ free_cpumask_var(mask);
+ return -ENOMEM;
+ }
+
+ i = 0;
+ for_each_cpu_and(cpu, mask, cpu_online_mask)
+ map->cpus[i++] = cpu;
+
+ if (i)
+ map->len = i;
+ else {
+ kfree(map);
+ map = NULL;
+ }
+
+ spin_lock(&rps_map_lock);
+ old_map = queue->rps_map;
+ rcu_assign_pointer(queue->rps_map, map);
+ spin_unlock(&rps_map_lock);
+
+ if (old_map)
+ call_rcu(&old_map->rcu, rps_map_release);
+
+ free_cpumask_var(mask);
+ return len;
+}
+
+static struct rx_queue_attribute rps_cpus_attribute =
+ __ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map);
+
+static struct attribute *rx_queue_default_attrs[] = {
+ &rps_cpus_attribute.attr,
+ NULL
+};
+
+static void rx_queue_release(struct kobject *kobj)
+{
+ struct netdev_rx_queue *queue = to_rx_queue(kobj);
+ struct rps_map *map = queue->rps_map;
+ struct netdev_rx_queue *first = queue->first;
+
+ if (map)
+ call_rcu(&map->rcu, rps_map_release);
+
+ if (atomic_dec_and_test(&first->count))
+ kfree(first);
+}
+
+static struct kobj_type rx_queue_ktype = {
+ .sysfs_ops = &rx_queue_sysfs_ops,
+ .release = rx_queue_release,
+ .default_attrs = rx_queue_default_attrs,
+};
+
+static int rx_queue_add_kobject(struct net_device *net, int index)
+{
+ struct netdev_rx_queue *queue = net->_rx + index;
+ struct kobject *kobj = &queue->kobj;
+ int error = 0;
+
+ kobj->kset = net->queues_kset;
+ error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
+ "rx-%u", index);
+ if (error) {
+ kobject_put(kobj);
+ return error;
+ }
+
+ kobject_uevent(kobj, KOBJ_ADD);
+
+ return error;
+}
+
+static int rx_queue_register_kobjects(struct net_device *net)
+{
+ int i;
+ int error = 0;
+
+ net->queues_kset = kset_create_and_add("queues",
+ NULL, &net->dev.kobj);
+ if (!net->queues_kset)
+ return -ENOMEM;
+ for (i = 0; i < net->num_rx_queues; i++) {
+ error = rx_queue_add_kobject(net, i);
+ if (error)
+ break;
+ }
+
+ if (error)
+ while (--i >= 0)
+ kobject_put(&net->_rx[i].kobj);
+
+ return error;
+}
+
+static void rx_queue_remove_kobjects(struct net_device *net)
+{
+ int i;
+
+ for (i = 0; i < net->num_rx_queues; i++)
+ kobject_put(&net->_rx[i].kobj);
+ kset_unregister(net->queues_kset);
+}
+
#endif /* CONFIG_SYSFS */
#ifdef CONFIG_HOTPLUG
@@ -529,6 +739,10 @@ void netdev_unregister_kobject(struct net_device * net)
if (!net_eq(dev_net(net), &init_net))
return;
+#ifdef CONFIG_SYSFS
+ rx_queue_remove_kobjects(net);
+#endif
+
device_del(dev);
}
@@ -537,6 +751,7 @@ int netdev_register_kobject(struct net_device *net)
{
struct device *dev = &(net->dev);
const struct attribute_group **groups = net->sysfs_groups;
+ int error = 0;
dev->class = &net_class;
dev->platform_data = net;
@@ -563,7 +778,19 @@ int netdev_register_kobject(struct net_device *net)
if (!net_eq(dev_net(net), &init_net))
return 0;
- return device_add(dev);
+ error = device_add(dev);
+ if (error)
+ return error;
+
+#ifdef CONFIG_SYSFS
+ error = rx_queue_register_kobjects(net);
+ if (error) {
+ device_del(dev);
+ return error;
+ }
+#endif
+
+ return error;
}
int netdev_class_create_file(struct class_attribute *class_attr)
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 7aa6972..d4ec38f 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -735,7 +735,7 @@ int netpoll_setup(struct netpoll *np)
npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
if (!npinfo) {
err = -ENOMEM;
- goto release;
+ goto put;
}
npinfo->rx_flags = 0;
@@ -845,7 +845,7 @@ int netpoll_setup(struct netpoll *np)
kfree(npinfo);
}
-
+put:
dev_put(ndev);
return err;
}
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 4392381..2ad68da 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -169,7 +169,7 @@
#include <asm/dma.h>
#include <asm/div64.h> /* do_div */
-#define VERSION "2.72"
+#define VERSION "2.73"
#define IP_NAME_SZ 32
#define MAX_MPLS_LABELS 16 /* This is the max label stack depth */
#define MPLS_STACK_BOTTOM htonl(0x00000100)
@@ -190,6 +190,7 @@
#define F_IPSEC_ON (1<<12) /* ipsec on for flows */
#define F_QUEUE_MAP_RND (1<<13) /* queue map Random */
#define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */
+#define F_NODE (1<<15) /* Node memory alloc*/
/* Thread control flag bits */
#define T_STOP (1<<0) /* Stop run */
@@ -372,6 +373,7 @@ struct pktgen_dev {
u16 queue_map_min;
u16 queue_map_max;
+ int node; /* Memory node */
#ifdef CONFIG_XFRM
__u8 ipsmode; /* IPSEC mode (config) */
@@ -607,6 +609,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
if (pkt_dev->traffic_class)
seq_printf(seq, " traffic_class: 0x%02x\n", pkt_dev->traffic_class);
+ if (pkt_dev->node >= 0)
+ seq_printf(seq, " node: %d\n", pkt_dev->node);
+
seq_printf(seq, " Flags: ");
if (pkt_dev->flags & F_IPV6)
@@ -660,6 +665,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
if (pkt_dev->flags & F_SVID_RND)
seq_printf(seq, "SVID_RND ");
+ if (pkt_dev->flags & F_NODE)
+ seq_printf(seq, "NODE_ALLOC ");
+
seq_puts(seq, "\n");
/* not really stopped, more like last-running-at */
@@ -1074,6 +1082,21 @@ static ssize_t pktgen_if_write(struct file *file,
pkt_dev->dst_mac_count);
return count;
}
+ if (!strcmp(name, "node")) {
+ len = num_arg(&user_buffer[i], 10, &value);
+ if (len < 0)
+ return len;
+
+ i += len;
+
+ if (node_possible(value)) {
+ pkt_dev->node = value;
+ sprintf(pg_result, "OK: node=%d", pkt_dev->node);
+ }
+ else
+ sprintf(pg_result, "ERROR: node not possible");
+ return count;
+ }
if (!strcmp(name, "flag")) {
char f[32];
memset(f, 0, 32);
@@ -1166,12 +1189,18 @@ static ssize_t pktgen_if_write(struct file *file,
else if (strcmp(f, "!IPV6") == 0)
pkt_dev->flags &= ~F_IPV6;
+ else if (strcmp(f, "NODE_ALLOC") == 0)
+ pkt_dev->flags |= F_NODE;
+
+ else if (strcmp(f, "!NODE_ALLOC") == 0)
+ pkt_dev->flags &= ~F_NODE;
+
else {
sprintf(pg_result,
"Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
f,
"IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, "
- "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC\n");
+ "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC, NODE_ALLOC\n");
return count;
}
sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags);
@@ -2572,9 +2601,27 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
mod_cur_headers(pkt_dev);
datalen = (odev->hard_header_len + 16) & ~0xf;
- skb = __netdev_alloc_skb(odev,
- pkt_dev->cur_pkt_size + 64
- + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT);
+
+ if (pkt_dev->flags & F_NODE) {
+ int node;
+
+ if (pkt_dev->node >= 0)
+ node = pkt_dev->node;
+ else
+ node = numa_node_id();
+
+ skb = __alloc_skb(NET_SKB_PAD + pkt_dev->cur_pkt_size + 64
+ + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT, 0, node);
+ if (likely(skb)) {
+ skb_reserve(skb, NET_SKB_PAD);
+ skb->dev = odev;
+ }
+ }
+ else
+ skb = __netdev_alloc_skb(odev,
+ pkt_dev->cur_pkt_size + 64
+ + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT);
+
if (!skb) {
sprintf(pkt_dev->result, "No memory");
return NULL;
@@ -3674,6 +3721,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
pkt_dev->svlan_p = 0;
pkt_dev->svlan_cfi = 0;
pkt_dev->svlan_id = 0xffff;
+ pkt_dev->node = -1;
err = pktgen_setup_dev(pkt_dev, ifname);
if (err)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4568120..ffc6cf3 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -600,7 +600,39 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
a->rx_compressed = b->rx_compressed;
a->tx_compressed = b->tx_compressed;
-};
+}
+
+static void copy_rtnl_link_stats64(struct rtnl_link_stats64 *a,
+ const struct net_device_stats *b)
+{
+ a->rx_packets = b->rx_packets;
+ a->tx_packets = b->tx_packets;
+ a->rx_bytes = b->rx_bytes;
+ a->tx_bytes = b->tx_bytes;
+ a->rx_errors = b->rx_errors;
+ a->tx_errors = b->tx_errors;
+ a->rx_dropped = b->rx_dropped;
+ a->tx_dropped = b->tx_dropped;
+
+ a->multicast = b->multicast;
+ a->collisions = b->collisions;
+
+ a->rx_length_errors = b->rx_length_errors;
+ a->rx_over_errors = b->rx_over_errors;
+ a->rx_crc_errors = b->rx_crc_errors;
+ a->rx_frame_errors = b->rx_frame_errors;
+ a->rx_fifo_errors = b->rx_fifo_errors;
+ a->rx_missed_errors = b->rx_missed_errors;
+
+ a->tx_aborted_errors = b->tx_aborted_errors;
+ a->tx_carrier_errors = b->tx_carrier_errors;
+ a->tx_fifo_errors = b->tx_fifo_errors;
+ a->tx_heartbeat_errors = b->tx_heartbeat_errors;
+ a->tx_window_errors = b->tx_window_errors;
+
+ a->rx_compressed = b->rx_compressed;
+ a->tx_compressed = b->tx_compressed;
+}
static inline int rtnl_vfinfo_size(const struct net_device *dev)
{
@@ -698,6 +730,14 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
stats = dev_get_stats(dev);
copy_rtnl_link_stats(nla_data(attr), stats);
+ attr = nla_reserve(skb, IFLA_STATS64,
+ sizeof(struct rtnl_link_stats64));
+ if (attr == NULL)
+ goto nla_put_failure;
+
+ stats = dev_get_stats(dev);
+ copy_rtnl_link_stats64(nla_data(attr), stats);
+
if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
int i;
struct ifla_vf_info ivi;
@@ -1473,6 +1513,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
case NETDEV_POST_INIT:
case NETDEV_REGISTER:
case NETDEV_CHANGE:
+ case NETDEV_PRE_TYPE_CHANGE:
case NETDEV_GOING_DOWN:
case NETDEV_UNREGISTER:
case NETDEV_UNREGISTER_BATCH:
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 93c4e06..bdea0ef 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -534,6 +534,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->network_header = old->network_header;
new->mac_header = old->mac_header;
skb_dst_set(new, dst_clone(skb_dst(old)));
+ new->rxhash = old->rxhash;
#ifdef CONFIG_XFRM
new->sp = secpath_get(old->sp);
#endif
@@ -581,6 +582,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
C(len);
C(data_len);
C(mac_len);
+ C(rxhash);
n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
n->cloned = 1;
n->nohdr = 0;
diff --git a/net/core/sock.c b/net/core/sock.c
index fcd397a..c5812bb 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -340,8 +340,12 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
rc = sk_backlog_rcv(sk, skb);
mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
- } else
- sk_add_backlog(sk, skb);
+ } else if (sk_add_backlog(sk, skb)) {
+ bh_unlock_sock(sk);
+ atomic_inc(&sk->sk_drops);
+ goto discard_and_relse;
+ }
+
bh_unlock_sock(sk);
out:
sock_put(sk);
@@ -1139,6 +1143,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
sock_lock_init(newsk);
bh_lock_sock(newsk);
newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
+ newsk->sk_backlog.len = 0;
atomic_set(&newsk->sk_rmem_alloc, 0);
/*
@@ -1542,6 +1547,12 @@ static void __release_sock(struct sock *sk)
bh_lock_sock(sk);
} while ((skb = sk->sk_backlog.head) != NULL);
+
+ /*
+ * Doing the zeroing here guarantee we can not loop forever
+ * while a wild producer attempts to flood us.
+ */
+ sk->sk_backlog.len = 0;
}
/**
@@ -1874,6 +1885,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_allocation = GFP_KERNEL;
sk->sk_rcvbuf = sysctl_rmem_default;
sk->sk_sndbuf = sysctl_wmem_default;
+ sk->sk_backlog.limit = sk->sk_rcvbuf << 1;
sk->sk_state = TCP_CLOSE;
sk_set_socket(sk, sock);
@@ -2276,7 +2288,8 @@ out_free_request_sock_slab:
prot->rsk_prot->slab = NULL;
}
out_free_request_sock_slab_name:
- kfree(prot->rsk_prot->slab_name);
+ if (prot->rsk_prot)
+ kfree(prot->rsk_prot->slab_name);
out_free_sock_slab:
kmem_cache_destroy(prot->slab);
prot->slab = NULL;
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 5ef32c2..53f8e12 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -189,7 +189,7 @@ enum {
#define DCCP_MIB_MAX __DCCP_MIB_MAX
struct dccp_mib {
unsigned long mibs[DCCP_MIB_MAX];
-} __SNMP_MIB_ALIGN__;
+};
DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics);
#define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field)
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index b195c4f..4071eaf 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -998,11 +998,11 @@ static struct inet_protosw dccp_v4_protosw = {
static int __net_init dccp_v4_init_net(struct net *net)
{
- int err;
+ if (dccp_hashinfo.bhash == NULL)
+ return -ESOCKTNOSUPPORT;
- err = inet_ctl_sock_create(&net->dccp.v4_ctl_sk, PF_INET,
- SOCK_DCCP, IPPROTO_DCCP, net);
- return err;
+ return inet_ctl_sock_create(&net->dccp.v4_ctl_sk, PF_INET,
+ SOCK_DCCP, IPPROTO_DCCP, net);
}
static void __net_exit dccp_v4_exit_net(struct net *net)
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 1aec634..af3394d 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1191,11 +1191,11 @@ static struct inet_protosw dccp_v6_protosw = {
static int __net_init dccp_v6_init_net(struct net *net)
{
- int err;
+ if (dccp_hashinfo.bhash == NULL)
+ return -ESOCKTNOSUPPORT;
- err = inet_ctl_sock_create(&net->dccp.v6_ctl_sk, PF_INET6,
- SOCK_DCCP, IPPROTO_DCCP, net);
- return err;
+ return inet_ctl_sock_create(&net->dccp.v6_ctl_sk, PF_INET6,
+ SOCK_DCCP, IPPROTO_DCCP, net);
}
static void __net_exit dccp_v6_exit_net(struct net *net)
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index af226a0..0d508c3 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -254,7 +254,7 @@ int dccp_child_process(struct sock *parent, struct sock *child,
* in main socket hash table and lock on listening
* socket does not protect us more.
*/
- sk_add_backlog(child, skb);
+ __sk_add_backlog(child, skb);
}
bh_unlock_sock(child);
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 0ef7061..aa4cef3 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1036,7 +1036,7 @@ static int __init dccp_init(void)
FIELD_SIZEOF(struct sk_buff, cb));
rc = percpu_counter_init(&dccp_orphan_count, 0);
if (rc)
- goto out;
+ goto out_fail;
rc = -ENOBUFS;
inet_hashinfo_init(&dccp_hashinfo);
dccp_hashinfo.bind_bucket_cachep =
@@ -1125,8 +1125,9 @@ static int __init dccp_init(void)
goto out_sysctl_exit;
dccp_timestamping_init();
-out:
- return rc;
+
+ return 0;
+
out_sysctl_exit:
dccp_sysctl_exit();
out_ackvec_exit:
@@ -1135,18 +1136,19 @@ out_free_dccp_mib:
dccp_mib_exit();
out_free_dccp_bhash:
free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
- dccp_hashinfo.bhash = NULL;
out_free_dccp_locks:
inet_ehash_locks_free(&dccp_hashinfo);
out_free_dccp_ehash:
free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
- dccp_hashinfo.ehash = NULL;
out_free_bind_bucket_cachep:
kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
- dccp_hashinfo.bind_bucket_cachep = NULL;
out_free_percpu:
percpu_counter_destroy(&dccp_orphan_count);
- goto out;
+out_fail:
+ dccp_hashinfo.bhash = NULL;
+ dccp_hashinfo.ehash = NULL;
+ dccp_hashinfo.bind_bucket_cachep = NULL;
+ return rc;
}
static void __exit dccp_fini(void)
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 0c94a1a..c9a1c68 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -587,9 +587,15 @@ choice
config DEFAULT_HTCP
bool "Htcp" if TCP_CONG_HTCP=y
+ config DEFAULT_HYBLA
+ bool "Hybla" if TCP_CONG_HYBLA=y
+
config DEFAULT_VEGAS
bool "Vegas" if TCP_CONG_VEGAS=y
+ config DEFAULT_VENO
+ bool "Veno" if TCP_CONG_VENO=y
+
config DEFAULT_WESTWOOD
bool "Westwood" if TCP_CONG_WESTWOOD=y
@@ -610,8 +616,10 @@ config DEFAULT_TCP_CONG
default "bic" if DEFAULT_BIC
default "cubic" if DEFAULT_CUBIC
default "htcp" if DEFAULT_HTCP
+ default "hybla" if DEFAULT_HYBLA
default "vegas" if DEFAULT_VEGAS
default "westwood" if DEFAULT_WESTWOOD
+ default "veno" if DEFAULT_VENO
default "reno" if DEFAULT_RENO
default "cubic"
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 33b7dff..55e1190 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1401,10 +1401,10 @@ EXPORT_SYMBOL_GPL(snmp_fold_field);
int snmp_mib_init(void __percpu *ptr[2], size_t mibsize)
{
BUG_ON(ptr == NULL);
- ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long long));
+ ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long));
if (!ptr[0])
goto err0;
- ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long long));
+ ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long));
if (!ptr[1])
goto err1;
return 0;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 51ca946..c75320e 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1095,10 +1095,10 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
case NETDEV_DOWN:
ip_mc_down(in_dev);
break;
- case NETDEV_BONDING_OLDTYPE:
+ case NETDEV_PRE_TYPE_CHANGE:
ip_mc_unmap(in_dev);
break;
- case NETDEV_BONDING_NEWTYPE:
+ case NETDEV_POST_TYPE_CHANGE:
ip_mc_remap(in_dev);
break;
case NETDEV_CHANGEMTU:
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index c0c5274..f47c9f7 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1144,12 +1144,9 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
if (saddr)
memcpy(&iph->saddr, saddr, 4);
-
- if (daddr) {
+ if (daddr)
memcpy(&iph->daddr, daddr, 4);
- return t->hlen;
- }
- if (iph->daddr && !ipv4_is_multicast(iph->daddr))
+ if (iph->daddr)
return t->hlen;
return -t->hlen;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 10a6a60..6789092 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -187,6 +187,16 @@ struct ic_device {
static struct ic_device *ic_first_dev __initdata = NULL;/* List of open device */
static struct net_device *ic_dev __initdata = NULL; /* Selected device */
+static bool __init ic_device_match(struct net_device *dev)
+{
+ if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) :
+ (!(dev->flags & IFF_LOOPBACK) &&
+ (dev->flags & (IFF_POINTOPOINT|IFF_BROADCAST)) &&
+ strncmp(dev->name, "dummy", 5)))
+ return true;
+ return false;
+}
+
static int __init ic_open_devs(void)
{
struct ic_device *d, **last;
@@ -207,10 +217,7 @@ static int __init ic_open_devs(void)
for_each_netdev(&init_net, dev) {
if (dev->flags & IFF_LOOPBACK)
continue;
- if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) :
- (!(dev->flags & IFF_LOOPBACK) &&
- (dev->flags & (IFF_POINTOPOINT|IFF_BROADCAST)) &&
- strncmp(dev->name, "dummy", 5))) {
+ if (ic_device_match(dev)) {
int able = 0;
if (dev->mtu >= 364)
able |= IC_BOOTP;
@@ -228,7 +235,7 @@ static int __init ic_open_devs(void)
}
if (!(d = kmalloc(sizeof(struct ic_device), GFP_KERNEL))) {
rtnl_unlock();
- return -1;
+ return -ENOMEM;
}
d->dev = dev;
*last = d;
@@ -253,7 +260,7 @@ static int __init ic_open_devs(void)
printk(KERN_ERR "IP-Config: Device `%s' not found.\n", user_dev_name);
else
printk(KERN_ERR "IP-Config: No network devices available.\n");
- return -1;
+ return -ENODEV;
}
return 0;
}
@@ -1303,6 +1310,32 @@ __be32 __init root_nfs_parse_addr(char *name)
return addr;
}
+#define DEVICE_WAIT_MAX 12 /* 12 seconds */
+
+static int __init wait_for_devices(void)
+{
+ int i;
+
+ msleep(CONF_PRE_OPEN);
+ for (i = 0; i < DEVICE_WAIT_MAX; i++) {
+ struct net_device *dev;
+ int found = 0;
+
+ rtnl_lock();
+ for_each_netdev(&init_net, dev) {
+ if (ic_device_match(dev)) {
+ found = 1;
+ break;
+ }
+ }
+ rtnl_unlock();
+ if (found)
+ return 0;
+ ssleep(1);
+ }
+ return -ENODEV;
+}
+
/*
* IP Autoconfig dispatcher.
*/
@@ -1313,6 +1346,7 @@ static int __init ip_auto_config(void)
#ifdef IPCONFIG_DYNAMIC
int retries = CONF_OPEN_RETRIES;
#endif
+ int err;
#ifdef CONFIG_PROC_FS
proc_net_fops_create(&init_net, "pnp", S_IRUGO, &pnp_seq_fops);
@@ -1325,12 +1359,15 @@ static int __init ip_auto_config(void)
#ifdef IPCONFIG_DYNAMIC
try_try_again:
#endif
- /* Give hardware a chance to settle */
- msleep(CONF_PRE_OPEN);
+ /* Wait for devices to appear */
+ err = wait_for_devices();
+ if (err)
+ return err;
/* Setup all network devices */
- if (ic_open_devs() < 0)
- return -1;
+ err = ic_open_devs();
+ if (err)
+ return err;
/* Give drivers a chance to settle */
ssleep(CONF_POST_OPEN);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 8582e12..0b9d03c 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -802,6 +802,9 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
int line;
struct mfc_cache *uc, *c, **cp;
+ if (mfc->mfcc_parent >= MAXVIFS)
+ return -ENFILE;
+
line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
for (cp = &net->ipv4.mfc_cache_array[line];
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 242ed23..3dc9914 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -249,6 +249,9 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPSackShifted", LINUX_MIB_SACKSHIFTED),
SNMP_MIB_ITEM("TCPSackMerged", LINUX_MIB_SACKMERGED),
SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK),
+ SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP),
+ SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP),
+ SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b2ba558..32d3961 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -146,7 +146,6 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
static void ipv4_link_failure(struct sk_buff *skb);
static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
static int rt_garbage_collect(struct dst_ops *ops);
-static void rt_emergency_hash_rebuild(struct net *net);
static struct dst_ops ipv4_dst_ops = {
@@ -780,11 +779,30 @@ static void rt_do_flush(int process_context)
#define FRACT_BITS 3
#define ONE (1UL << FRACT_BITS)
+/*
+ * Given a hash chain and an item in this hash chain,
+ * find if a previous entry has the same hash_inputs
+ * (but differs on tos, mark or oif)
+ * Returns 0 if an alias is found.
+ * Returns ONE if rth has no alias before itself.
+ */
+static int has_noalias(const struct rtable *head, const struct rtable *rth)
+{
+ const struct rtable *aux = head;
+
+ while (aux != rth) {
+ if (compare_hash_inputs(&aux->fl, &rth->fl))
+ return 0;
+ aux = aux->u.dst.rt_next;
+ }
+ return ONE;
+}
+
static void rt_check_expire(void)
{
static unsigned int rover;
unsigned int i = rover, goal;
- struct rtable *rth, *aux, **rthp;
+ struct rtable *rth, **rthp;
unsigned long samples = 0;
unsigned long sum = 0, sum2 = 0;
unsigned long delta;
@@ -835,15 +853,7 @@ nofree:
* attributes don't unfairly skew
* the length computation
*/
- for (aux = rt_hash_table[i].chain;;) {
- if (aux == rth) {
- length += ONE;
- break;
- }
- if (compare_hash_inputs(&aux->fl, &rth->fl))
- break;
- aux = aux->u.dst.rt_next;
- }
+ length += has_noalias(rt_hash_table[i].chain, rth);
continue;
}
} else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout))
@@ -922,10 +932,8 @@ static void rt_secret_rebuild_oneshot(struct net *net)
{
del_timer_sync(&net->ipv4.rt_secret_timer);
rt_cache_invalidate(net);
- if (ip_rt_secret_interval) {
- net->ipv4.rt_secret_timer.expires += ip_rt_secret_interval;
- add_timer(&net->ipv4.rt_secret_timer);
- }
+ if (ip_rt_secret_interval)
+ mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval);
}
static void rt_emergency_hash_rebuild(struct net *net)
@@ -1073,6 +1081,21 @@ work_done:
out: return 0;
}
+/*
+ * Returns number of entries in a hash chain that have different hash_inputs
+ */
+static int slow_chain_length(const struct rtable *head)
+{
+ int length = 0;
+ const struct rtable *rth = head;
+
+ while (rth) {
+ length += has_noalias(head, rth);
+ rth = rth->u.dst.rt_next;
+ }
+ return length >> FRACT_BITS;
+}
+
static int rt_intern_hash(unsigned hash, struct rtable *rt,
struct rtable **rp, struct sk_buff *skb)
{
@@ -1185,7 +1208,8 @@ restart:
rt_free(cand);
}
} else {
- if (chain_length > rt_chain_length_max) {
+ if (chain_length > rt_chain_length_max &&
+ slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) {
struct net *net = dev_net(rt->u.dst.dev);
int num = ++net->ipv4.current_rt_cache_rebuild_count;
if (!rt_caching(dev_net(rt->u.dst.dev))) {
@@ -1417,7 +1441,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
dev_hold(rt->u.dst.dev);
if (rt->idev)
in_dev_hold(rt->idev);
- rt->u.dst.obsolete = 0;
+ rt->u.dst.obsolete = -1;
rt->u.dst.lastuse = jiffies;
rt->u.dst.path = &rt->u.dst;
rt->u.dst.neighbour = NULL;
@@ -1482,7 +1506,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
struct dst_entry *ret = dst;
if (rt) {
- if (dst->obsolete) {
+ if (dst->obsolete > 0) {
ip_rt_put(rt);
ret = NULL;
} else if ((rt->rt_flags & RTCF_REDIRECTED) ||
@@ -1702,7 +1726,9 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
{
- return NULL;
+ if (rt_is_expired((struct rtable *)dst))
+ return NULL;
+ return dst;
}
static void ipv4_dst_destroy(struct dst_entry *dst)
@@ -1864,7 +1890,8 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (!rth)
goto e_nobufs;
- rth->u.dst.output= ip_rt_bug;
+ rth->u.dst.output = ip_rt_bug;
+ rth->u.dst.obsolete = -1;
atomic_set(&rth->u.dst.__refcnt, 1);
rth->u.dst.flags= DST_HOST;
@@ -2030,6 +2057,7 @@ static int __mkroute_input(struct sk_buff *skb,
rth->fl.oif = 0;
rth->rt_spec_dst= spec_dst;
+ rth->u.dst.obsolete = -1;
rth->u.dst.input = ip_forward;
rth->u.dst.output = ip_output;
rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev));
@@ -2194,6 +2222,7 @@ local_input:
goto e_nobufs;
rth->u.dst.output= ip_rt_bug;
+ rth->u.dst.obsolete = -1;
rth->rt_genid = rt_genid(net);
atomic_set(&rth->u.dst.__refcnt, 1);
@@ -2420,6 +2449,7 @@ static int __mkroute_output(struct rtable **result,
rth->rt_spec_dst= fl->fl4_src;
rth->u.dst.output=ip_output;
+ rth->u.dst.obsolete = -1;
rth->rt_genid = rt_genid(dev_net(dev_out));
RT_CACHE_STAT_INC(out_slow_tot);
@@ -3077,22 +3107,20 @@ static void rt_secret_reschedule(int old)
rtnl_lock();
for_each_net(net) {
int deleted = del_timer_sync(&net->ipv4.rt_secret_timer);
+ long time;
if (!new)
continue;
if (deleted) {
- long time = net->ipv4.rt_secret_timer.expires - jiffies;
+ time = net->ipv4.rt_secret_timer.expires - jiffies;
if (time <= 0 || (time += diff) <= 0)
time = 0;
-
- net->ipv4.rt_secret_timer.expires = time;
} else
- net->ipv4.rt_secret_timer.expires = new;
+ time = new;
- net->ipv4.rt_secret_timer.expires += jiffies;
- add_timer(&net->ipv4.rt_secret_timer);
+ mod_timer(&net->ipv4.rt_secret_timer, jiffies + time);
}
rtnl_unlock();
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 5901010..6afb6d8 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -429,7 +429,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
if (tp->urg_seq == tp->copied_seq &&
!sock_flag(sk, SOCK_URGINLINE) &&
tp->urg_data)
- target--;
+ target++;
/* Potential race condition. If read of tp below will
* escape above sk->sk_state, we can be illegally awaken
@@ -1254,6 +1254,39 @@ static void tcp_prequeue_process(struct sock *sk)
tp->ucopy.memory = 0;
}
+#ifdef CONFIG_NET_DMA
+static void tcp_service_net_dma(struct sock *sk, bool wait)
+{
+ dma_cookie_t done, used;
+ dma_cookie_t last_issued;
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (!tp->ucopy.dma_chan)
+ return;
+
+ last_issued = tp->ucopy.dma_cookie;
+ dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
+
+ do {
+ if (dma_async_memcpy_complete(tp->ucopy.dma_chan,
+ last_issued, &done,
+ &used) == DMA_SUCCESS) {
+ /* Safe to free early-copied skbs now */
+ __skb_queue_purge(&sk->sk_async_wait_queue);
+ break;
+ } else {
+ struct sk_buff *skb;
+ while ((skb = skb_peek(&sk->sk_async_wait_queue)) &&
+ (dma_async_is_complete(skb->dma_cookie, done,
+ used) == DMA_SUCCESS)) {
+ __skb_dequeue(&sk->sk_async_wait_queue);
+ kfree_skb(skb);
+ }
+ }
+ } while (wait);
+}
+#endif
+
static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
{
struct sk_buff *skb;
@@ -1546,6 +1579,10 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
/* __ Set realtime policy in scheduler __ */
}
+#ifdef CONFIG_NET_DMA
+ if (tp->ucopy.dma_chan)
+ dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
+#endif
if (copied >= target) {
/* Do not sleep, just process backlog. */
release_sock(sk);
@@ -1554,6 +1591,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
sk_wait_data(sk, &timeo);
#ifdef CONFIG_NET_DMA
+ tcp_service_net_dma(sk, false); /* Don't block */
tp->ucopy.wakeup = 0;
#endif
@@ -1633,6 +1671,9 @@ do_prequeue:
copied = -EFAULT;
break;
}
+
+ dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
+
if ((offset + used) == skb->len)
copied_early = 1;
@@ -1702,27 +1743,9 @@ skip_copy:
}
#ifdef CONFIG_NET_DMA
- if (tp->ucopy.dma_chan) {
- dma_cookie_t done, used;
-
- dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
-
- while (dma_async_memcpy_complete(tp->ucopy.dma_chan,
- tp->ucopy.dma_cookie, &done,
- &used) == DMA_IN_PROGRESS) {
- /* do partial cleanup of sk_async_wait_queue */
- while ((skb = skb_peek(&sk->sk_async_wait_queue)) &&
- (dma_async_is_complete(skb->dma_cookie, done,
- used) == DMA_SUCCESS)) {
- __skb_dequeue(&sk->sk_async_wait_queue);
- kfree_skb(skb);
- }
- }
+ tcp_service_net_dma(sk, true); /* Wait for queue to drain */
+ tp->ucopy.dma_chan = NULL;
- /* Safe to free early-copied skbs now */
- __skb_queue_purge(&sk->sk_async_wait_queue);
- tp->ucopy.dma_chan = NULL;
- }
if (tp->ucopy.pinned_list) {
dma_unpin_iovec_pages(tp->ucopy.pinned_list);
tp->ucopy.pinned_list = NULL;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 788851c..c096a42 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2511,6 +2511,9 @@ static void tcp_mark_head_lost(struct sock *sk, int packets)
int err;
unsigned int mss;
+ if (packets == 0)
+ return;
+
WARN_ON(packets > tp->packets_out);
if (tp->lost_skb_hint) {
skb = tp->lost_skb_hint;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index c3588b4..f4df5f9 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -370,6 +370,11 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
if (sk->sk_state == TCP_CLOSE)
goto out;
+ if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
+ NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
+ goto out;
+ }
+
icsk = inet_csk(sk);
tp = tcp_sk(sk);
seq = ntohl(th->seq);
@@ -1651,13 +1656,15 @@ int tcp_v4_rcv(struct sk_buff *skb)
if (!sk)
goto no_tcp_socket;
- if (iph->ttl < inet_sk(sk)->min_ttl)
- goto discard_and_relse;
-
process:
if (sk->sk_state == TCP_TIME_WAIT)
goto do_time_wait;
+ if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
+ NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
+ goto discard_and_relse;
+ }
+
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
nf_reset(skb);
@@ -1682,8 +1689,11 @@ process:
if (!tcp_prequeue(sk, skb))
ret = tcp_v4_do_rcv(sk, skb);
}
- } else
- sk_add_backlog(sk, skb);
+ } else if (unlikely(sk_add_backlog(sk, skb))) {
+ bh_unlock_sock(sk);
+ NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
+ goto discard_and_relse;
+ }
bh_unlock_sock(sk);
sock_put(sk);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f206ee5..32f9627 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -671,6 +671,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
inet_rsk(req)->acked = 1;
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP);
return NULL;
}
@@ -728,7 +729,7 @@ int tcp_child_process(struct sock *parent, struct sock *child,
* in main socket hash table and lock on listening
* socket does not protect us more.
*/
- sk_add_backlog(child, skb);
+ __sk_add_backlog(child, skb);
}
bh_unlock_sock(child);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 4a1605d..f181b78 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2395,13 +2395,17 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
struct tcp_extend_values *xvp = tcp_xv(rvp);
struct inet_request_sock *ireq = inet_rsk(req);
struct tcp_sock *tp = tcp_sk(sk);
+ const struct tcp_cookie_values *cvp = tp->cookie_values;
struct tcphdr *th;
struct sk_buff *skb;
struct tcp_md5sig_key *md5;
int tcp_header_size;
int mss;
+ int s_data_desired = 0;
- skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC);
+ if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired)
+ s_data_desired = cvp->s_data_desired;
+ skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15 + s_data_desired, 1, GFP_ATOMIC);
if (skb == NULL)
return NULL;
@@ -2457,16 +2461,12 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
TCPCB_FLAG_SYN | TCPCB_FLAG_ACK);
if (OPTION_COOKIE_EXTENSION & opts.options) {
- const struct tcp_cookie_values *cvp = tp->cookie_values;
-
- if (cvp != NULL &&
- cvp->s_data_constant &&
- cvp->s_data_desired > 0) {
- u8 *buf = skb_put(skb, cvp->s_data_desired);
+ if (s_data_desired) {
+ u8 *buf = skb_put(skb, s_data_desired);
/* copy data directly from the listening socket. */
- memcpy(buf, cvp->s_data_payload, cvp->s_data_desired);
- TCP_SKB_CB(skb)->end_seq += cvp->s_data_desired;
+ memcpy(buf, cvp->s_data_payload, s_data_desired);
+ TCP_SKB_CB(skb)->end_seq += s_data_desired;
}
if (opts.hash_size > 0) {
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index a17629b..b2e6bbc 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -134,7 +134,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
}
/* This function calculates a "timeout" which is equivalent to the timeout of a
- * TCP connection after "boundary" unsucessful, exponentially backed-off
+ * TCP connection after "boundary" unsuccessful, exponentially backed-off
* retransmissions with an initial RTO of TCP_RTO_MIN.
*/
static bool retransmits_timed_out(struct sock *sk,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 608a544..7af756d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1371,8 +1371,10 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
bh_lock_sock(sk);
if (!sock_owned_by_user(sk))
rc = __udp_queue_rcv_skb(sk, skb);
- else
- sk_add_backlog(sk, skb);
+ else if (sk_add_backlog(sk, skb)) {
+ bh_unlock_sock(sk);
+ goto drop;
+ }
bh_unlock_sock(sk);
return rc;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 67107d6..e4a1483 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -91,11 +91,12 @@ static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst,
return 0;
}
-static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
+static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
+ struct flowi *fl)
{
struct rtable *rt = (struct rtable *)xdst->route;
- xdst->u.rt.fl = rt->fl;
+ xdst->u.rt.fl = *fl;
xdst->u.dst.dev = dev;
dev_hold(dev);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 88fd8c5..68e5809 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -81,7 +81,7 @@
#include <linux/random.h>
#endif
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/unaligned.h>
#include <linux/proc_fs.h>
@@ -97,7 +97,11 @@
#endif
#define INFINITY_LIFE_TIME 0xFFFFFFFF
-#define TIME_DELTA(a,b) ((unsigned long)((long)(a) - (long)(b)))
+#define TIME_DELTA(a, b) ((unsigned long)((long)(a) - (long)(b)))
+
+#define ADDRCONF_TIMER_FUZZ_MINUS (HZ > 50 ? HZ/50 : 1)
+#define ADDRCONF_TIMER_FUZZ (HZ / 4)
+#define ADDRCONF_TIMER_FUZZ_MAX (HZ)
#ifdef CONFIG_SYSCTL
static void addrconf_sysctl_register(struct inet6_dev *idev);
@@ -126,8 +130,8 @@ static int ipv6_count_addresses(struct inet6_dev *idev);
/*
* Configured unicast address hash table
*/
-static struct inet6_ifaddr *inet6_addr_lst[IN6_ADDR_HSIZE];
-static DEFINE_RWLOCK(addrconf_hash_lock);
+static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE];
+static DEFINE_SPINLOCK(addrconf_hash_lock);
static void addrconf_verify(unsigned long);
@@ -137,8 +141,8 @@ static DEFINE_SPINLOCK(addrconf_verify_lock);
static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
-static void addrconf_bonding_change(struct net_device *dev,
- unsigned long event);
+static void addrconf_type_change(struct net_device *dev,
+ unsigned long event);
static int addrconf_ifdown(struct net_device *dev, int how);
static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags);
@@ -151,8 +155,8 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
static void inet6_prefix_notify(int event, struct inet6_dev *idev,
struct prefix_info *pinfo);
-static int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
- struct net_device *dev);
+static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
+ struct net_device *dev);
static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
@@ -249,8 +253,7 @@ static void addrconf_del_timer(struct inet6_ifaddr *ifp)
__in6_ifa_put(ifp);
}
-enum addrconf_timer_t
-{
+enum addrconf_timer_t {
AC_NONE,
AC_DAD,
AC_RS,
@@ -270,7 +273,8 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
case AC_RS:
ifp->timer.function = addrconf_rs_timer;
break;
- default:;
+ default:
+ break;
}
ifp->timer.expires = jiffies + when;
add_timer(&ifp->timer);
@@ -317,7 +321,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev)
{
struct net_device *dev = idev->dev;
- WARN_ON(idev->addr_list != NULL);
+ WARN_ON(!list_empty(&idev->addr_list));
WARN_ON(idev->mc_list != NULL);
#ifdef NET_REFCNT_DEBUG
@@ -325,7 +329,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev)
#endif
dev_put(dev);
if (!idev->dead) {
- printk("Freeing alive inet6 device %p\n", idev);
+ pr_warning("Freeing alive inet6 device %p\n", idev);
return;
}
snmp6_free_dev(idev);
@@ -350,6 +354,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
rwlock_init(&ndev->lock);
ndev->dev = dev;
+ INIT_LIST_HEAD(&ndev->addr_list);
+
memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf));
ndev->cnf.mtu6 = dev->mtu;
ndev->cnf.sysctl = NULL;
@@ -401,6 +407,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
#endif
#ifdef CONFIG_IPV6_PRIVACY
+ INIT_LIST_HEAD(&ndev->tempaddr_list);
setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev);
if ((dev->flags&IFF_LOOPBACK) ||
dev->type == ARPHRD_TUNNEL ||
@@ -438,8 +445,10 @@ static struct inet6_dev * ipv6_find_idev(struct net_device *dev)
ASSERT_RTNL();
- if ((idev = __in6_dev_get(dev)) == NULL) {
- if ((idev = ipv6_add_dev(dev)) == NULL)
+ idev = __in6_dev_get(dev);
+ if (!idev) {
+ idev = ipv6_add_dev(dev);
+ if (!idev)
return NULL;
}
@@ -465,7 +474,8 @@ static void dev_forward_change(struct inet6_dev *idev)
else
ipv6_dev_mc_dec(dev, &in6addr_linklocal_allrouters);
}
- for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) {
+
+ list_for_each_entry(ifa, &idev->addr_list, if_list) {
if (ifa->flags&IFA_F_TENTATIVE)
continue;
if (idev->cnf.forwarding)
@@ -522,12 +532,16 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
}
#endif
-/* Nobody refers to this ifaddr, destroy it */
+static void inet6_ifa_finish_destroy_rcu(struct rcu_head *head)
+{
+ struct inet6_ifaddr *ifp = container_of(head, struct inet6_ifaddr, rcu);
+ kfree(ifp);
+}
+/* Nobody refers to this ifaddr, destroy it */
void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
{
- WARN_ON(ifp->if_next != NULL);
- WARN_ON(ifp->lst_next != NULL);
+ WARN_ON(!hlist_unhashed(&ifp->addr_lst));
#ifdef NET_REFCNT_DEBUG
printk(KERN_DEBUG "inet6_ifa_finish_destroy\n");
@@ -536,54 +550,45 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
in6_dev_put(ifp->idev);
if (del_timer(&ifp->timer))
- printk("Timer is still running, when freeing ifa=%p\n", ifp);
+ pr_notice("Timer is still running, when freeing ifa=%p\n", ifp);
if (!ifp->dead) {
- printk("Freeing alive inet6 address %p\n", ifp);
+ pr_warning("Freeing alive inet6 address %p\n", ifp);
return;
}
dst_release(&ifp->rt->u.dst);
- kfree(ifp);
+ call_rcu(&ifp->rcu, inet6_ifa_finish_destroy_rcu);
}
static void
ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
{
- struct inet6_ifaddr *ifa, **ifap;
+ struct list_head *p;
int ifp_scope = ipv6_addr_src_scope(&ifp->addr);
/*
* Each device address list is sorted in order of scope -
* global before linklocal.
*/
- for (ifap = &idev->addr_list; (ifa = *ifap) != NULL;
- ifap = &ifa->if_next) {
+ list_for_each(p, &idev->addr_list) {
+ struct inet6_ifaddr *ifa
+ = list_entry(p, struct inet6_ifaddr, if_list);
if (ifp_scope >= ipv6_addr_src_scope(&ifa->addr))
break;
}
- ifp->if_next = *ifap;
- *ifap = ifp;
+ list_add(&ifp->if_list, p);
}
-/*
- * Hash function taken from net_alias.c
- */
-static u8 ipv6_addr_hash(const struct in6_addr *addr)
+static u32 ipv6_addr_hash(const struct in6_addr *addr)
{
- __u32 word;
-
/*
* We perform the hash function over the last 64 bits of the address
* This will include the IEEE address token on links that support it.
*/
-
- word = (__force u32)(addr->s6_addr32[2] ^ addr->s6_addr32[3]);
- word ^= (word >> 16);
- word ^= (word >> 8);
-
- return ((word ^ (word >> 4)) & 0x0f);
+ return jhash_2words(addr->s6_addr32[2], addr->s6_addr32[3], 0)
+ & (IN6_ADDR_HSIZE - 1);
}
/* On success it returns ifp with increased reference count */
@@ -594,7 +599,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
{
struct inet6_ifaddr *ifa = NULL;
struct rt6_info *rt;
- int hash;
+ unsigned int hash;
int err = 0;
int addr_type = ipv6_addr_type(addr);
@@ -615,7 +620,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
goto out2;
}
- write_lock(&addrconf_hash_lock);
+ spin_lock(&addrconf_hash_lock);
/* Ignore adding duplicate addresses on an interface */
if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) {
@@ -642,6 +647,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
spin_lock_init(&ifa->lock);
init_timer(&ifa->timer);
+ INIT_HLIST_NODE(&ifa->addr_lst);
ifa->timer.data = (unsigned long) ifa;
ifa->scope = scope;
ifa->prefix_len = pfxlen;
@@ -668,10 +674,9 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
/* Add to big hash table */
hash = ipv6_addr_hash(addr);
- ifa->lst_next = inet6_addr_lst[hash];
- inet6_addr_lst[hash] = ifa;
+ hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
in6_ifa_hold(ifa);
- write_unlock(&addrconf_hash_lock);
+ spin_unlock(&addrconf_hash_lock);
write_lock(&idev->lock);
/* Add to inet6_dev unicast addr list. */
@@ -679,8 +684,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
#ifdef CONFIG_IPV6_PRIVACY
if (ifa->flags&IFA_F_TEMPORARY) {
- ifa->tmp_next = idev->tempaddr_list;
- idev->tempaddr_list = ifa;
+ list_add(&ifa->tmp_list, &idev->tempaddr_list);
in6_ifa_hold(ifa);
}
#endif
@@ -699,7 +703,7 @@ out2:
return ifa;
out:
- write_unlock(&addrconf_hash_lock);
+ spin_unlock(&addrconf_hash_lock);
goto out2;
}
@@ -707,7 +711,7 @@ out:
static void ipv6_del_addr(struct inet6_ifaddr *ifp)
{
- struct inet6_ifaddr *ifa, **ifap;
+ struct inet6_ifaddr *ifa, *ifn;
struct inet6_dev *idev = ifp->idev;
int hash;
int deleted = 0, onlink = 0;
@@ -717,42 +721,28 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
ifp->dead = 1;
- write_lock_bh(&addrconf_hash_lock);
- for (ifap = &inet6_addr_lst[hash]; (ifa=*ifap) != NULL;
- ifap = &ifa->lst_next) {
- if (ifa == ifp) {
- *ifap = ifa->lst_next;
- __in6_ifa_put(ifp);
- ifa->lst_next = NULL;
- break;
- }
- }
- write_unlock_bh(&addrconf_hash_lock);
+ spin_lock_bh(&addrconf_hash_lock);
+ hlist_del_init_rcu(&ifp->addr_lst);
+ __in6_ifa_put(ifp);
+ spin_unlock_bh(&addrconf_hash_lock);
write_lock_bh(&idev->lock);
#ifdef CONFIG_IPV6_PRIVACY
if (ifp->flags&IFA_F_TEMPORARY) {
- for (ifap = &idev->tempaddr_list; (ifa=*ifap) != NULL;
- ifap = &ifa->tmp_next) {
- if (ifa == ifp) {
- *ifap = ifa->tmp_next;
- if (ifp->ifpub) {
- in6_ifa_put(ifp->ifpub);
- ifp->ifpub = NULL;
- }
- __in6_ifa_put(ifp);
- ifa->tmp_next = NULL;
- break;
- }
+ list_del(&ifp->tmp_list);
+ if (ifp->ifpub) {
+ in6_ifa_put(ifp->ifpub);
+ ifp->ifpub = NULL;
}
+ __in6_ifa_put(ifp);
}
#endif
- for (ifap = &idev->addr_list; (ifa=*ifap) != NULL;) {
+ list_for_each_entry_safe(ifa, ifn, &idev->addr_list, if_list) {
if (ifa == ifp) {
- *ifap = ifa->if_next;
+ list_del_init(&ifp->if_list);
__in6_ifa_put(ifp);
- ifa->if_next = NULL;
+
if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0)
break;
deleted = 1;
@@ -785,7 +775,6 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
}
}
}
- ifap = &ifa->if_next;
}
write_unlock_bh(&idev->lock);
@@ -1164,7 +1153,7 @@ int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev,
continue;
read_lock_bh(&idev->lock);
- for (score->ifa = idev->addr_list; score->ifa; score->ifa = score->ifa->if_next) {
+ list_for_each_entry(score->ifa, &idev->addr_list, if_list) {
int i;
/*
@@ -1242,7 +1231,6 @@ try_nextdev:
in6_ifa_put(hiscore->ifa);
return 0;
}
-
EXPORT_SYMBOL(ipv6_dev_get_saddr);
int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
@@ -1252,12 +1240,14 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
int err = -EADDRNOTAVAIL;
rcu_read_lock();
- if ((idev = __in6_dev_get(dev)) != NULL) {
+ idev = __in6_dev_get(dev);
+ if (idev) {
struct inet6_ifaddr *ifp;
read_lock_bh(&idev->lock);
- for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
- if (ifp->scope == IFA_LINK && !(ifp->flags & banned_flags)) {
+ list_for_each_entry(ifp, &idev->addr_list, if_list) {
+ if (ifp->scope == IFA_LINK &&
+ !(ifp->flags & banned_flags)) {
ipv6_addr_copy(addr, &ifp->addr);
err = 0;
break;
@@ -1275,7 +1265,7 @@ static int ipv6_count_addresses(struct inet6_dev *idev)
struct inet6_ifaddr *ifp;
read_lock_bh(&idev->lock);
- for (ifp=idev->addr_list; ifp; ifp=ifp->if_next)
+ list_for_each_entry(ifp, &idev->addr_list, if_list)
cnt++;
read_unlock_bh(&idev->lock);
return cnt;
@@ -1284,11 +1274,12 @@ static int ipv6_count_addresses(struct inet6_dev *idev)
int ipv6_chk_addr(struct net *net, struct in6_addr *addr,
struct net_device *dev, int strict)
{
- struct inet6_ifaddr * ifp;
- u8 hash = ipv6_addr_hash(addr);
+ struct inet6_ifaddr *ifp = NULL;
+ struct hlist_node *node;
+ unsigned int hash = ipv6_addr_hash(addr);
- read_lock_bh(&addrconf_hash_lock);
- for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
+ rcu_read_lock_bh();
+ hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) {
if (!net_eq(dev_net(ifp->idev->dev), net))
continue;
if (ipv6_addr_equal(&ifp->addr, addr) &&
@@ -1298,27 +1289,28 @@ int ipv6_chk_addr(struct net *net, struct in6_addr *addr,
break;
}
}
- read_unlock_bh(&addrconf_hash_lock);
+ rcu_read_unlock_bh();
+
return ifp != NULL;
}
EXPORT_SYMBOL(ipv6_chk_addr);
-static
-int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
- struct net_device *dev)
+static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
+ struct net_device *dev)
{
- struct inet6_ifaddr * ifp;
- u8 hash = ipv6_addr_hash(addr);
+ unsigned int hash = ipv6_addr_hash(addr);
+ struct inet6_ifaddr *ifp;
+ struct hlist_node *node;
- for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
+ hlist_for_each_entry(ifp, node, &inet6_addr_lst[hash], addr_lst) {
if (!net_eq(dev_net(ifp->idev->dev), net))
continue;
if (ipv6_addr_equal(&ifp->addr, addr)) {
if (dev == NULL || ifp->idev->dev == dev)
- break;
+ return true;
}
}
- return ifp != NULL;
+ return false;
}
int ipv6_chk_prefix(struct in6_addr *addr, struct net_device *dev)
@@ -1332,7 +1324,7 @@ int ipv6_chk_prefix(struct in6_addr *addr, struct net_device *dev)
idev = __in6_dev_get(dev);
if (idev) {
read_lock_bh(&idev->lock);
- for (ifa = idev->addr_list; ifa; ifa = ifa->if_next) {
+ list_for_each_entry(ifa, &idev->addr_list, if_list) {
onlink = ipv6_prefix_equal(addr, &ifa->addr,
ifa->prefix_len);
if (onlink)
@@ -1349,11 +1341,12 @@ EXPORT_SYMBOL(ipv6_chk_prefix);
struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr,
struct net_device *dev, int strict)
{
- struct inet6_ifaddr * ifp;
- u8 hash = ipv6_addr_hash(addr);
+ struct inet6_ifaddr *ifp = NULL;
+ struct hlist_node *node;
+ unsigned int hash = ipv6_addr_hash(addr);
- read_lock_bh(&addrconf_hash_lock);
- for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
+ rcu_read_lock_bh();
+ hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) {
if (!net_eq(dev_net(ifp->idev->dev), net))
continue;
if (ipv6_addr_equal(&ifp->addr, addr)) {
@@ -1364,7 +1357,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
}
}
}
- read_unlock_bh(&addrconf_hash_lock);
+ rcu_read_unlock_bh();
return ifp;
}
@@ -1380,6 +1373,8 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
if (dad_failed)
ifp->flags |= IFA_F_DADFAILED;
spin_unlock_bh(&ifp->lock);
+ if (dad_failed)
+ ipv6_ifa_notify(0, ifp);
in6_ifa_put(ifp);
#ifdef CONFIG_IPV6_PRIVACY
} else if (ifp->flags&IFA_F_TEMPORARY) {
@@ -1567,7 +1562,7 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev)
struct inet6_ifaddr *ifp;
read_lock_bh(&idev->lock);
- for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
+ list_for_each_entry(ifp, &idev->addr_list, if_list) {
if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) {
memcpy(eui, ifp->addr.s6_addr+8, 8);
err = 0;
@@ -1735,7 +1730,8 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
ASSERT_RTNL();
- if ((idev = ipv6_find_idev(dev)) == NULL)
+ idev = ipv6_find_idev(dev);
+ if (!idev)
return NULL;
/* Add default multicast route */
@@ -1968,7 +1964,7 @@ ok:
#ifdef CONFIG_IPV6_PRIVACY
read_lock_bh(&in6_dev->lock);
/* update all temporary addresses in the list */
- for (ift=in6_dev->tempaddr_list; ift; ift=ift->tmp_next) {
+ list_for_each_entry(ift, &in6_dev->tempaddr_list, tmp_list) {
/*
* When adjusting the lifetimes of an existing
* temporary address, only lower the lifetimes.
@@ -2171,7 +2167,7 @@ static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx,
return -ENXIO;
read_lock_bh(&idev->lock);
- for (ifp = idev->addr_list; ifp; ifp=ifp->if_next) {
+ list_for_each_entry(ifp, &idev->addr_list, if_list) {
if (ifp->prefix_len == plen &&
ipv6_addr_equal(pfx, &ifp->addr)) {
in6_ifa_hold(ifp);
@@ -2182,7 +2178,7 @@ static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx,
/* If the last address is deleted administratively,
disable IPv6 on this interface.
*/
- if (idev->addr_list == NULL)
+ if (list_empty(&idev->addr_list))
addrconf_ifdown(idev->dev, 1);
return 0;
}
@@ -2443,7 +2439,8 @@ static void addrconf_ip6_tnl_config(struct net_device *dev)
ASSERT_RTNL();
- if ((idev = addrconf_add_dev(dev)) == NULL) {
+ idev = addrconf_add_dev(dev);
+ if (!idev) {
printk(KERN_DEBUG "init ip6-ip6: add_dev failed\n");
return;
}
@@ -2458,7 +2455,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
int run_pending = 0;
int err;
- switch(event) {
+ switch (event) {
case NETDEV_REGISTER:
if (!idev && dev->mtu >= IPV6_MIN_MTU) {
idev = ipv6_add_dev(dev);
@@ -2466,6 +2463,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
return notifier_from_errno(-ENOMEM);
}
break;
+
case NETDEV_UP:
case NETDEV_CHANGE:
if (dev->flags & IFF_SLAVE)
@@ -2495,10 +2493,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
}
if (idev) {
- if (idev->if_flags & IF_READY) {
+ if (idev->if_flags & IF_READY)
/* device is already configured. */
break;
- }
idev->if_flags |= IF_READY;
}
@@ -2510,7 +2507,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
run_pending = 1;
}
- switch(dev->type) {
+ switch (dev->type) {
#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
case ARPHRD_SIT:
addrconf_sit_config(dev);
@@ -2527,25 +2524,30 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
addrconf_dev_config(dev);
break;
}
+
if (idev) {
if (run_pending)
addrconf_dad_run(idev);
- /* If the MTU changed during the interface down, when the
- interface up, the changed MTU must be reflected in the
- idev as well as routers.
+ /*
+ * If the MTU changed during the interface down,
+ * when the interface up, the changed MTU must be
+ * reflected in the idev as well as routers.
*/
- if (idev->cnf.mtu6 != dev->mtu && dev->mtu >= IPV6_MIN_MTU) {
+ if (idev->cnf.mtu6 != dev->mtu &&
+ dev->mtu >= IPV6_MIN_MTU) {
rt6_mtu_change(dev, dev->mtu);
idev->cnf.mtu6 = dev->mtu;
}
idev->tstamp = jiffies;
inet6_ifinfo_notify(RTM_NEWLINK, idev);
- /* If the changed mtu during down is lower than IPV6_MIN_MTU
- stop IPv6 on this interface.
+
+ /*
+ * If the changed mtu during down is lower than
+ * IPV6_MIN_MTU stop IPv6 on this interface.
*/
if (dev->mtu < IPV6_MIN_MTU)
- addrconf_ifdown(dev, event != NETDEV_DOWN);
+ addrconf_ifdown(dev, 1);
}
break;
@@ -2562,7 +2564,10 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
break;
}
- /* MTU falled under IPV6_MIN_MTU. Stop IPv6 on this interface. */
+ /*
+ * MTU falled under IPV6_MIN_MTU.
+ * Stop IPv6 on this interface.
+ */
case NETDEV_DOWN:
case NETDEV_UNREGISTER:
@@ -2582,9 +2587,10 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
return notifier_from_errno(err);
}
break;
- case NETDEV_BONDING_OLDTYPE:
- case NETDEV_BONDING_NEWTYPE:
- addrconf_bonding_change(dev, event);
+
+ case NETDEV_PRE_TYPE_CHANGE:
+ case NETDEV_POST_TYPE_CHANGE:
+ addrconf_type_change(dev, event);
break;
}
@@ -2596,28 +2602,27 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
*/
static struct notifier_block ipv6_dev_notf = {
.notifier_call = addrconf_notify,
- .priority = 0
};
-static void addrconf_bonding_change(struct net_device *dev, unsigned long event)
+static void addrconf_type_change(struct net_device *dev, unsigned long event)
{
struct inet6_dev *idev;
ASSERT_RTNL();
idev = __in6_dev_get(dev);
- if (event == NETDEV_BONDING_NEWTYPE)
+ if (event == NETDEV_POST_TYPE_CHANGE)
ipv6_mc_remap(idev);
- else if (event == NETDEV_BONDING_OLDTYPE)
+ else if (event == NETDEV_PRE_TYPE_CHANGE)
ipv6_mc_unmap(idev);
}
static int addrconf_ifdown(struct net_device *dev, int how)
{
- struct inet6_dev *idev;
- struct inet6_ifaddr *ifa, **bifa;
struct net *net = dev_net(dev);
- int i;
+ struct inet6_dev *idev;
+ struct inet6_ifaddr *ifa;
+ LIST_HEAD(keep_list);
ASSERT_RTNL();
@@ -2628,8 +2633,9 @@ static int addrconf_ifdown(struct net_device *dev, int how)
if (idev == NULL)
return -ENODEV;
- /* Step 1: remove reference to ipv6 device from parent device.
- Do not dev_put!
+ /*
+ * Step 1: remove reference to ipv6 device from parent device.
+ * Do not dev_put!
*/
if (how) {
idev->dead = 1;
@@ -2642,40 +2648,21 @@ static int addrconf_ifdown(struct net_device *dev, int how)
}
- /* Step 2: clear hash table */
- for (i=0; i<IN6_ADDR_HSIZE; i++) {
- bifa = &inet6_addr_lst[i];
-
- write_lock_bh(&addrconf_hash_lock);
- while ((ifa = *bifa) != NULL) {
- if (ifa->idev == idev &&
- (how || !(ifa->flags&IFA_F_PERMANENT))) {
- *bifa = ifa->lst_next;
- ifa->lst_next = NULL;
- addrconf_del_timer(ifa);
- in6_ifa_put(ifa);
- continue;
- }
- bifa = &ifa->lst_next;
- }
- write_unlock_bh(&addrconf_hash_lock);
- }
-
write_lock_bh(&idev->lock);
- /* Step 3: clear flags for stateless addrconf */
+ /* Step 2: clear flags for stateless addrconf */
if (!how)
idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY);
- /* Step 4: clear address list */
#ifdef CONFIG_IPV6_PRIVACY
if (how && del_timer(&idev->regen_timer))
in6_dev_put(idev);
- /* clear tempaddr list */
- while ((ifa = idev->tempaddr_list) != NULL) {
- idev->tempaddr_list = ifa->tmp_next;
- ifa->tmp_next = NULL;
+ /* Step 3: clear tempaddr list */
+ while (!list_empty(&idev->tempaddr_list)) {
+ ifa = list_first_entry(&idev->tempaddr_list,
+ struct inet6_ifaddr, tmp_list);
+ list_del(&ifa->tmp_list);
ifa->dead = 1;
write_unlock_bh(&idev->lock);
spin_lock_bh(&ifa->lock);
@@ -2689,35 +2676,56 @@ static int addrconf_ifdown(struct net_device *dev, int how)
write_lock_bh(&idev->lock);
}
#endif
- bifa = &idev->addr_list;
- while ((ifa = *bifa) != NULL) {
- if (how == 0 && (ifa->flags&IFA_F_PERMANENT)) {
- /* Retain permanent address on admin down */
- bifa = &ifa->if_next;
-
- /* Restart DAD if needed when link comes back up */
- if ( !((dev->flags&(IFF_NOARP|IFF_LOOPBACK)) ||
- idev->cnf.accept_dad <= 0 ||
- (ifa->flags & IFA_F_NODAD)))
- ifa->flags |= IFA_F_TENTATIVE;
- } else {
- *bifa = ifa->if_next;
- ifa->if_next = NULL;
- ifa->dead = 1;
- write_unlock_bh(&idev->lock);
+ while (!list_empty(&idev->addr_list)) {
+ ifa = list_first_entry(&idev->addr_list,
+ struct inet6_ifaddr, if_list);
+ addrconf_del_timer(ifa);
+
+ /* If just doing link down, and address is permanent
+ and not link-local, then retain it. */
+ if (!how &&
+ (ifa->flags&IFA_F_PERMANENT) &&
+ !(ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) {
+ list_move_tail(&ifa->if_list, &keep_list);
+
+ /* If not doing DAD on this address, just keep it. */
+ if ((dev->flags&(IFF_NOARP|IFF_LOOPBACK)) ||
+ idev->cnf.accept_dad <= 0 ||
+ (ifa->flags & IFA_F_NODAD))
+ continue;
- __ipv6_ifa_notify(RTM_DELADDR, ifa);
- atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa);
- in6_ifa_put(ifa);
+ /* If it was tentative already, no need to notify */
+ if (ifa->flags & IFA_F_TENTATIVE)
+ continue;
- write_lock_bh(&idev->lock);
+ /* Flag it for later restoration when link comes up */
+ ifa->flags |= IFA_F_TENTATIVE;
+ in6_ifa_hold(ifa);
+ } else {
+ list_del(&ifa->if_list);
+ ifa->dead = 1;
}
+ write_unlock_bh(&idev->lock);
+
+ /* clear hash table */
+ spin_lock_bh(&addrconf_hash_lock);
+ hlist_del_init_rcu(&ifa->addr_lst);
+ __in6_ifa_put(ifa);
+ spin_unlock_bh(&addrconf_hash_lock);
+
+ __ipv6_ifa_notify(RTM_DELADDR, ifa);
+ atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa);
+ in6_ifa_put(ifa);
+
+ write_lock_bh(&idev->lock);
}
+
+ list_splice(&keep_list, &idev->addr_list);
+
write_unlock_bh(&idev->lock);
/* Step 5: Discard multicast list */
-
if (how)
ipv6_mc_destroy_dev(idev);
else
@@ -2725,8 +2733,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
idev->tstamp = jiffies;
- /* Shot the device (if unregistered) */
-
+ /* Last: Shot the device (if unregistered) */
if (how) {
addrconf_sysctl_unregister(idev);
neigh_parms_release(&nd_tbl, idev->nd_parms);
@@ -2739,28 +2746,29 @@ static int addrconf_ifdown(struct net_device *dev, int how)
static void addrconf_rs_timer(unsigned long data)
{
struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
+ struct inet6_dev *idev = ifp->idev;
- if (ifp->idev->cnf.forwarding)
+ read_lock(&idev->lock);
+ if (idev->dead || !(idev->if_flags & IF_READY))
goto out;
- if (ifp->idev->if_flags & IF_RA_RCVD) {
- /*
- * Announcement received after solicitation
- * was sent
- */
+ if (idev->cnf.forwarding)
+ goto out;
+
+ /* Announcement received after solicitation was sent */
+ if (idev->if_flags & IF_RA_RCVD)
goto out;
- }
spin_lock(&ifp->lock);
- if (ifp->probes++ < ifp->idev->cnf.rtr_solicits) {
+ if (ifp->probes++ < idev->cnf.rtr_solicits) {
/* The wait after the last probe can be shorter */
addrconf_mod_timer(ifp, AC_RS,
- (ifp->probes == ifp->idev->cnf.rtr_solicits) ?
- ifp->idev->cnf.rtr_solicit_delay :
- ifp->idev->cnf.rtr_solicit_interval);
+ (ifp->probes == idev->cnf.rtr_solicits) ?
+ idev->cnf.rtr_solicit_delay :
+ idev->cnf.rtr_solicit_interval);
spin_unlock(&ifp->lock);
- ndisc_send_rs(ifp->idev->dev, &ifp->addr, &in6addr_linklocal_allrouters);
+ ndisc_send_rs(idev->dev, &ifp->addr, &in6addr_linklocal_allrouters);
} else {
spin_unlock(&ifp->lock);
/*
@@ -2768,10 +2776,11 @@ static void addrconf_rs_timer(unsigned long data)
* assumption any longer.
*/
printk(KERN_DEBUG "%s: no IPv6 routers present\n",
- ifp->idev->dev->name);
+ idev->dev->name);
}
out:
+ read_unlock(&idev->lock);
in6_ifa_put(ifp);
}
@@ -2835,7 +2844,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
* Optimistic nodes can start receiving
* Frames right away
*/
- if(ifp->flags & IFA_F_OPTIMISTIC)
+ if (ifp->flags & IFA_F_OPTIMISTIC)
ip6_ins_rt(ifp->rt);
addrconf_dad_kick(ifp);
@@ -2850,9 +2859,9 @@ static void addrconf_dad_timer(unsigned long data)
struct inet6_dev *idev = ifp->idev;
struct in6_addr mcaddr;
- read_lock_bh(&idev->lock);
- if (idev->dead) {
- read_unlock_bh(&idev->lock);
+ read_lock(&idev->lock);
+ if (idev->dead || !(idev->if_flags & IF_READY)) {
+ read_unlock(&idev->lock);
goto out;
}
@@ -2864,7 +2873,7 @@ static void addrconf_dad_timer(unsigned long data)
ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
spin_unlock(&ifp->lock);
- read_unlock_bh(&idev->lock);
+ read_unlock(&idev->lock);
addrconf_dad_completed(ifp);
@@ -2874,7 +2883,7 @@ static void addrconf_dad_timer(unsigned long data)
ifp->probes--;
addrconf_mod_timer(ifp, AC_DAD, ifp->idev->nd_parms->retrans_time);
spin_unlock(&ifp->lock);
- read_unlock_bh(&idev->lock);
+ read_unlock(&idev->lock);
/* send a neighbour solicitation for our addr */
addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
@@ -2885,7 +2894,7 @@ out:
static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
{
- struct net_device * dev = ifp->idev->dev;
+ struct net_device *dev = ifp->idev->dev;
/*
* Configure the address for reception. Now it is valid.
@@ -2916,11 +2925,12 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
}
}
-static void addrconf_dad_run(struct inet6_dev *idev) {
+static void addrconf_dad_run(struct inet6_dev *idev)
+{
struct inet6_ifaddr *ifp;
read_lock_bh(&idev->lock);
- for (ifp = idev->addr_list; ifp; ifp = ifp->if_next) {
+ list_for_each_entry(ifp, &idev->addr_list, if_list) {
spin_lock(&ifp->lock);
if (!(ifp->flags & IFA_F_TENTATIVE)) {
spin_unlock(&ifp->lock);
@@ -2945,36 +2955,35 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq)
struct net *net = seq_file_net(seq);
for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
- ifa = inet6_addr_lst[state->bucket];
-
- while (ifa && !net_eq(dev_net(ifa->idev->dev), net))
- ifa = ifa->lst_next;
- if (ifa)
- break;
+ struct hlist_node *n;
+ hlist_for_each_entry_rcu(ifa, n, &inet6_addr_lst[state->bucket],
+ addr_lst)
+ if (net_eq(dev_net(ifa->idev->dev), net))
+ return ifa;
}
- return ifa;
+ return NULL;
}
-static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct inet6_ifaddr *ifa)
+static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
+ struct inet6_ifaddr *ifa)
{
struct if6_iter_state *state = seq->private;
struct net *net = seq_file_net(seq);
+ struct hlist_node *n = &ifa->addr_lst;
- ifa = ifa->lst_next;
-try_again:
- if (ifa) {
- if (!net_eq(dev_net(ifa->idev->dev), net)) {
- ifa = ifa->lst_next;
- goto try_again;
- }
- }
+ hlist_for_each_entry_continue_rcu(ifa, n, addr_lst)
+ if (net_eq(dev_net(ifa->idev->dev), net))
+ return ifa;
- if (!ifa && ++state->bucket < IN6_ADDR_HSIZE) {
- ifa = inet6_addr_lst[state->bucket];
- goto try_again;
+ while (++state->bucket < IN6_ADDR_HSIZE) {
+ hlist_for_each_entry(ifa, n,
+ &inet6_addr_lst[state->bucket], addr_lst) {
+ if (net_eq(dev_net(ifa->idev->dev), net))
+ return ifa;
+ }
}
- return ifa;
+ return NULL;
}
static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos)
@@ -2982,15 +2991,15 @@ static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos)
struct inet6_ifaddr *ifa = if6_get_first(seq);
if (ifa)
- while(pos && (ifa = if6_get_next(seq, ifa)) != NULL)
+ while (pos && (ifa = if6_get_next(seq, ifa)) != NULL)
--pos;
return pos ? NULL : ifa;
}
static void *if6_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(addrconf_hash_lock)
+ __acquires(rcu)
{
- read_lock_bh(&addrconf_hash_lock);
+ rcu_read_lock_bh();
return if6_get_idx(seq, *pos);
}
@@ -3004,9 +3013,9 @@ static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}
static void if6_seq_stop(struct seq_file *seq, void *v)
- __releases(addrconf_hash_lock)
+ __releases(rcu)
{
- read_unlock_bh(&addrconf_hash_lock);
+ rcu_read_unlock_bh();
}
static int if6_seq_show(struct seq_file *seq, void *v)
@@ -3076,10 +3085,12 @@ void if6_proc_exit(void)
int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr)
{
int ret = 0;
- struct inet6_ifaddr * ifp;
- u8 hash = ipv6_addr_hash(addr);
- read_lock_bh(&addrconf_hash_lock);
- for (ifp = inet6_addr_lst[hash]; ifp; ifp = ifp->lst_next) {
+ struct inet6_ifaddr *ifp = NULL;
+ struct hlist_node *n;
+ unsigned int hash = ipv6_addr_hash(addr);
+
+ rcu_read_lock_bh();
+ hlist_for_each_entry_rcu(ifp, n, &inet6_addr_lst[hash], addr_lst) {
if (!net_eq(dev_net(ifp->idev->dev), net))
continue;
if (ipv6_addr_equal(&ifp->addr, addr) &&
@@ -3088,7 +3099,7 @@ int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr)
break;
}
}
- read_unlock_bh(&addrconf_hash_lock);
+ rcu_read_unlock_bh();
return ret;
}
#endif
@@ -3099,43 +3110,35 @@ int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr)
static void addrconf_verify(unsigned long foo)
{
+ unsigned long now, next, next_sec, next_sched;
struct inet6_ifaddr *ifp;
- unsigned long now, next;
+ struct hlist_node *node;
int i;
- spin_lock_bh(&addrconf_verify_lock);
+ rcu_read_lock_bh();
+ spin_lock(&addrconf_verify_lock);
now = jiffies;
- next = now + ADDR_CHECK_FREQUENCY;
+ next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
del_timer(&addr_chk_timer);
- for (i=0; i < IN6_ADDR_HSIZE; i++) {
-
+ for (i = 0; i < IN6_ADDR_HSIZE; i++) {
restart:
- read_lock(&addrconf_hash_lock);
- for (ifp=inet6_addr_lst[i]; ifp; ifp=ifp->lst_next) {
+ hlist_for_each_entry_rcu(ifp, node,
+ &inet6_addr_lst[i], addr_lst) {
unsigned long age;
-#ifdef CONFIG_IPV6_PRIVACY
- unsigned long regen_advance;
-#endif
if (ifp->flags & IFA_F_PERMANENT)
continue;
spin_lock(&ifp->lock);
- age = (now - ifp->tstamp) / HZ;
-
-#ifdef CONFIG_IPV6_PRIVACY
- regen_advance = ifp->idev->cnf.regen_max_retry *
- ifp->idev->cnf.dad_transmits *
- ifp->idev->nd_parms->retrans_time / HZ;
-#endif
+ /* We try to batch several events at once. */
+ age = (now - ifp->tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
if (ifp->valid_lft != INFINITY_LIFE_TIME &&
age >= ifp->valid_lft) {
spin_unlock(&ifp->lock);
in6_ifa_hold(ifp);
- read_unlock(&addrconf_hash_lock);
ipv6_del_addr(ifp);
goto restart;
} else if (ifp->prefered_lft == INFINITY_LIFE_TIME) {
@@ -3157,7 +3160,6 @@ restart:
if (deprecate) {
in6_ifa_hold(ifp);
- read_unlock(&addrconf_hash_lock);
ipv6_ifa_notify(0, ifp);
in6_ifa_put(ifp);
@@ -3166,6 +3168,10 @@ restart:
#ifdef CONFIG_IPV6_PRIVACY
} else if ((ifp->flags&IFA_F_TEMPORARY) &&
!(ifp->flags&IFA_F_TENTATIVE)) {
+ unsigned long regen_advance = ifp->idev->cnf.regen_max_retry *
+ ifp->idev->cnf.dad_transmits *
+ ifp->idev->nd_parms->retrans_time / HZ;
+
if (age >= ifp->prefered_lft - regen_advance) {
struct inet6_ifaddr *ifpub = ifp->ifpub;
if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
@@ -3175,7 +3181,7 @@ restart:
in6_ifa_hold(ifp);
in6_ifa_hold(ifpub);
spin_unlock(&ifp->lock);
- read_unlock(&addrconf_hash_lock);
+
spin_lock(&ifpub->lock);
ifpub->regen_count = 0;
spin_unlock(&ifpub->lock);
@@ -3195,12 +3201,26 @@ restart:
spin_unlock(&ifp->lock);
}
}
- read_unlock(&addrconf_hash_lock);
}
- addr_chk_timer.expires = time_before(next, jiffies + HZ) ? jiffies + HZ : next;
+ next_sec = round_jiffies_up(next);
+ next_sched = next;
+
+ /* If rounded timeout is accurate enough, accept it. */
+ if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
+ next_sched = next_sec;
+
+ /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
+ if (time_before(next_sched, jiffies + ADDRCONF_TIMER_FUZZ_MAX))
+ next_sched = jiffies + ADDRCONF_TIMER_FUZZ_MAX;
+
+ ADBG((KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
+ now, next, next_sec, next_sched));
+
+ addr_chk_timer.expires = next_sched;
add_timer(&addr_chk_timer);
- spin_unlock_bh(&addrconf_verify_lock);
+ spin_unlock(&addrconf_verify_lock);
+ rcu_read_unlock_bh();
}
static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local)
@@ -3490,8 +3510,7 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
return nlmsg_end(skb, nlh);
}
-enum addr_type_t
-{
+enum addr_type_t {
UNICAST_ADDR,
MULTICAST_ADDR,
ANYCAST_ADDR,
@@ -3502,7 +3521,6 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
struct netlink_callback *cb, enum addr_type_t type,
int s_ip_idx, int *p_ip_idx)
{
- struct inet6_ifaddr *ifa;
struct ifmcaddr6 *ifmca;
struct ifacaddr6 *ifaca;
int err = 1;
@@ -3510,11 +3528,12 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
read_lock_bh(&idev->lock);
switch (type) {
- case UNICAST_ADDR:
+ case UNICAST_ADDR: {
+ struct inet6_ifaddr *ifa;
+
/* unicast address incl. temp addr */
- for (ifa = idev->addr_list; ifa;
- ifa = ifa->if_next, ip_idx++) {
- if (ip_idx < s_ip_idx)
+ list_for_each_entry(ifa, &idev->addr_list, if_list) {
+ if (++ip_idx < s_ip_idx)
continue;
err = inet6_fill_ifaddr(skb, ifa,
NETLINK_CB(cb->skb).pid,
@@ -3525,6 +3544,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
break;
}
break;
+ }
case MULTICAST_ADDR:
/* multicast address */
for (ifmca = idev->mc_list; ifmca;
@@ -3589,7 +3609,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
if (idx > s_idx)
s_ip_idx = 0;
ip_idx = 0;
- if ((idev = __in6_dev_get(dev)) == NULL)
+ idev = __in6_dev_get(dev);
+ if (!idev)
goto cont;
if (in6_dump_addrs(idev, skb, cb, type,
@@ -3656,12 +3677,14 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
if (ifm->ifa_index)
dev = __dev_get_by_index(net, ifm->ifa_index);
- if ((ifa = ipv6_get_ifaddr(net, addr, dev, 1)) == NULL) {
+ ifa = ipv6_get_ifaddr(net, addr, dev, 1);
+ if (!ifa) {
err = -EADDRNOTAVAIL;
goto errout;
}
- if ((skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_KERNEL)) == NULL) {
+ skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_KERNEL);
+ if (!skb) {
err = -ENOBUFS;
goto errout_ifa;
}
@@ -3786,7 +3809,7 @@ static inline void __snmp6_fill_stats(u64 *stats, void __percpu **mib,
static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
int bytes)
{
- switch(attrtype) {
+ switch (attrtype) {
case IFLA_INET6_STATS:
__snmp6_fill_stats(stats, (void __percpu **)idev->stats.ipv6, IPSTATS_MIB_MAX, bytes);
break;
@@ -4138,211 +4161,211 @@ static struct addrconf_sysctl_table
.sysctl_header = NULL,
.addrconf_vars = {
{
- .procname = "forwarding",
- .data = &ipv6_devconf.forwarding,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = addrconf_sysctl_forward,
+ .procname = "forwarding",
+ .data = &ipv6_devconf.forwarding,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = addrconf_sysctl_forward,
},
{
- .procname = "hop_limit",
- .data = &ipv6_devconf.hop_limit,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
+ .procname = "hop_limit",
+ .data = &ipv6_devconf.hop_limit,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
},
{
- .procname = "mtu",
- .data = &ipv6_devconf.mtu6,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
+ .procname = "mtu",
+ .data = &ipv6_devconf.mtu6,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
},
{
- .procname = "accept_ra",
- .data = &ipv6_devconf.accept_ra,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
+ .procname = "accept_ra",
+ .data = &ipv6_devconf.accept_ra,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
},
{
- .procname = "accept_redirects",
- .data = &ipv6_devconf.accept_redirects,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
+ .procname = "accept_redirects",
+ .data = &ipv6_devconf.accept_redirects,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
},
{
- .procname = "autoconf",
- .data = &ipv6_devconf.autoconf,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
+ .procname = "autoconf",
+ .data = &ipv6_devconf.autoconf,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
},
{
- .procname = "dad_transmits",
- .data = &ipv6_devconf.dad_transmits,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
+ .procname = "dad_transmits",
+ .data = &ipv6_devconf.dad_transmits,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
},
{
- .procname = "router_solicitations",
- .data = &ipv6_devconf.rtr_solicits,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
+ .procname = "router_solicitations",
+ .data = &ipv6_devconf.rtr_solicits,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
},
{
- .procname = "router_solicitation_interval",
- .data = &ipv6_devconf.rtr_solicit_interval,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
+ .procname = "router_solicitation_interval",
+ .data = &ipv6_devconf.rtr_solicit_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
},
{
- .procname = "router_solicitation_delay",
- .data = &ipv6_devconf.rtr_solicit_delay,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
+ .procname = "router_solicitation_delay",
+ .data = &ipv6_devconf.rtr_solicit_delay,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
},
{
- .procname = "force_mld_version",
- .data = &ipv6_devconf.force_mld_version,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
+ .procname = "force_mld_version",
+ .data = &ipv6_devconf.force_mld_version,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
},
#ifdef CONFIG_IPV6_PRIVACY
{
- .procname = "use_tempaddr",
- .data = &ipv6_devconf.use_tempaddr,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
+ .procname = "use_tempaddr",
+ .data = &ipv6_devconf.use_tempaddr,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
},
{
- .procname = "temp_valid_lft",
- .data = &ipv6_devconf.temp_valid_lft,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
+ .procname = "temp_valid_lft",
+ .data = &ipv6_devconf.temp_valid_lft,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
},
{
- .procname = "temp_prefered_lft",
- .data = &ipv6_devconf.temp_prefered_lft,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
+ .procname = "temp_prefered_lft",
+ .data = &ipv6_devconf.temp_prefered_lft,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
},
{
- .procname = "regen_max_retry",
- .data = &ipv6_devconf.regen_max_retry,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
+ .procname = "regen_max_retry",
+ .data = &ipv6_devconf.regen_max_retry,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
},
{
- .procname = "max_desync_factor",
- .data = &ipv6_devconf.max_desync_factor,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
+ .procname = "max_desync_factor",
+ .data = &ipv6_devconf.max_desync_factor,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
},
#endif
{
- .procname = "max_addresses",
- .data = &ipv6_devconf.max_addresses,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
+ .procname = "max_addresses",
+ .data = &ipv6_devconf.max_addresses,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
},
{
- .procname = "accept_ra_defrtr",
- .data = &ipv6_devconf.accept_ra_defrtr,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
+ .procname = "accept_ra_defrtr",
+ .data = &ipv6_devconf.accept_ra_defrtr,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
},
{
- .procname = "accept_ra_pinfo",
- .data = &ipv6_devconf.accept_ra_pinfo,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
+ .procname = "accept_ra_pinfo",
+ .data = &ipv6_devconf.accept_ra_pinfo,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
},
#ifdef CONFIG_IPV6_ROUTER_PREF
{
- .procname = "accept_ra_rtr_pref",
- .data = &ipv6_devconf.accept_ra_rtr_pref,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
+ .procname = "accept_ra_rtr_pref",
+ .data = &ipv6_devconf.accept_ra_rtr_pref,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
},
{
- .procname = "router_probe_interval",
- .data = &ipv6_devconf.rtr_probe_interval,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
+ .procname = "router_probe_interval",
+ .data = &ipv6_devconf.rtr_probe_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
},
#ifdef CONFIG_IPV6_ROUTE_INFO
{
- .procname = "accept_ra_rt_info_max_plen",
- .data = &ipv6_devconf.accept_ra_rt_info_max_plen,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
+ .procname = "accept_ra_rt_info_max_plen",
+ .data = &ipv6_devconf.accept_ra_rt_info_max_plen,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
},
#endif
#endif
{
- .procname = "proxy_ndp",
- .data = &ipv6_devconf.proxy_ndp,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
+ .procname = "proxy_ndp",
+ .data = &ipv6_devconf.proxy_ndp,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
},
{
- .procname = "accept_source_route",
- .data = &ipv6_devconf.accept_source_route,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
+ .procname = "accept_source_route",
+ .data = &ipv6_devconf.accept_source_route,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
},
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
{
- .procname = "optimistic_dad",
- .data = &ipv6_devconf.optimistic_dad,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
+ .procname = "optimistic_dad",
+ .data = &ipv6_devconf.optimistic_dad,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
},
#endif
#ifdef CONFIG_IPV6_MROUTE
{
- .procname = "mc_forwarding",
- .data = &ipv6_devconf.mc_forwarding,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = proc_dointvec,
+ .procname = "mc_forwarding",
+ .data = &ipv6_devconf.mc_forwarding,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = proc_dointvec,
},
#endif
{
- .procname = "disable_ipv6",
- .data = &ipv6_devconf.disable_ipv6,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = addrconf_sysctl_disable,
+ .procname = "disable_ipv6",
+ .data = &ipv6_devconf.disable_ipv6,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = addrconf_sysctl_disable,
},
{
- .procname = "accept_dad",
- .data = &ipv6_devconf.accept_dad,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
+ .procname = "accept_dad",
+ .data = &ipv6_devconf.accept_dad,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
},
{
.procname = "force_tllao",
@@ -4378,8 +4401,8 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name,
if (t == NULL)
goto out;
- for (i=0; t->addrconf_vars[i].data; i++) {
- t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf;
+ for (i = 0; t->addrconf_vars[i].data; i++) {
+ t->addrconf_vars[i].data += (char *)p - (char *)&ipv6_devconf;
t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */
t->addrconf_vars[i].extra2 = net;
}
@@ -4516,14 +4539,12 @@ int register_inet6addr_notifier(struct notifier_block *nb)
{
return atomic_notifier_chain_register(&inet6addr_chain, nb);
}
-
EXPORT_SYMBOL(register_inet6addr_notifier);
int unregister_inet6addr_notifier(struct notifier_block *nb)
{
- return atomic_notifier_chain_unregister(&inet6addr_chain,nb);
+ return atomic_notifier_chain_unregister(&inet6addr_chain, nb);
}
-
EXPORT_SYMBOL(unregister_inet6addr_notifier);
/*
@@ -4532,11 +4553,12 @@ EXPORT_SYMBOL(unregister_inet6addr_notifier);
int __init addrconf_init(void)
{
- int err;
+ int i, err;
- if ((err = ipv6_addr_label_init()) < 0) {
- printk(KERN_CRIT "IPv6 Addrconf: cannot initialize default policy table: %d.\n",
- err);
+ err = ipv6_addr_label_init();
+ if (err < 0) {
+ printk(KERN_CRIT "IPv6 Addrconf:"
+ " cannot initialize default policy table: %d.\n", err);
return err;
}
@@ -4567,6 +4589,9 @@ int __init addrconf_init(void)
if (err)
goto errlo;
+ for (i = 0; i < IN6_ADDR_HSIZE; i++)
+ INIT_HLIST_HEAD(&inet6_addr_lst[i]);
+
register_netdevice_notifier(&ipv6_dev_notf);
addrconf_verify(0);
@@ -4595,7 +4620,6 @@ errlo:
void addrconf_cleanup(void)
{
- struct inet6_ifaddr *ifa;
struct net_device *dev;
int i;
@@ -4615,20 +4639,10 @@ void addrconf_cleanup(void)
/*
* Check hash table.
*/
- write_lock_bh(&addrconf_hash_lock);
- for (i=0; i < IN6_ADDR_HSIZE; i++) {
- for (ifa=inet6_addr_lst[i]; ifa; ) {
- struct inet6_ifaddr *bifa;
-
- bifa = ifa;
- ifa = ifa->lst_next;
- printk(KERN_DEBUG "bug: IPv6 address leakage detected: ifa=%p\n", bifa);
- /* Do not free it; something is wrong.
- Now we can investigate it with debugger.
- */
- }
- }
- write_unlock_bh(&addrconf_hash_lock);
+ spin_lock_bh(&addrconf_hash_lock);
+ for (i = 0; i < IN6_ADDR_HSIZE; i++)
+ WARN_ON(!hlist_empty(&inet6_addr_lst[i]));
+ spin_unlock_bh(&addrconf_hash_lock);
del_timer(&addr_chk_timer);
rtnl_unlock();
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 551882b..5e463c43 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -84,18 +84,11 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
if ((rule->flags & FIB_RULE_FIND_SADDR) &&
r->src.plen && !(flags & RT6_LOOKUP_F_HAS_SADDR)) {
struct in6_addr saddr;
- unsigned int srcprefs = 0;
-
- if (flags & RT6_LOOKUP_F_SRCPREF_TMP)
- srcprefs |= IPV6_PREFER_SRC_TMP;
- if (flags & RT6_LOOKUP_F_SRCPREF_PUBLIC)
- srcprefs |= IPV6_PREFER_SRC_PUBLIC;
- if (flags & RT6_LOOKUP_F_SRCPREF_COA)
- srcprefs |= IPV6_PREFER_SRC_COA;
if (ipv6_dev_get_saddr(net,
ip6_dst_idev(&rt->u.dst)->dev,
- &flp->fl6_dst, srcprefs,
+ &flp->fl6_dst,
+ rt6_flags2srcprefs(flags),
&saddr))
goto again;
if (!ipv6_prefix_equal(&saddr, &r->src.addr,
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 52e0f74..23e4ac0 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1113,6 +1113,9 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
unsigned char ttls[MAXMIFS];
int i;
+ if (mfc->mf6cc_parent >= MAXMIFS)
+ return -ENFILE;
+
memset(ttls, 255, MAXMIFS);
for (i = 0; i < MAXMIFS; i++) {
if (IF_ISSET(i, &mfc->mf6cc_ifset))
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index b08879e..7fcb0e5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -819,15 +819,8 @@ struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
if (!ipv6_addr_any(&fl->fl6_src))
flags |= RT6_LOOKUP_F_HAS_SADDR;
- else if (sk) {
- unsigned int prefs = inet6_sk(sk)->srcprefs;
- if (prefs & IPV6_PREFER_SRC_TMP)
- flags |= RT6_LOOKUP_F_SRCPREF_TMP;
- if (prefs & IPV6_PREFER_SRC_PUBLIC)
- flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC;
- if (prefs & IPV6_PREFER_SRC_COA)
- flags |= RT6_LOOKUP_F_SRCPREF_COA;
- }
+ else if (sk)
+ flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
}
@@ -886,7 +879,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
rt = (struct rt6_info *) dst;
- if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
+ if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
return dst;
return NULL;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 6963a6b..9b6dbba 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1740,8 +1740,11 @@ process:
if (!tcp_prequeue(sk, skb))
ret = tcp_v6_do_rcv(sk, skb);
}
- } else
- sk_add_backlog(sk, skb);
+ } else if (unlikely(sk_add_backlog(sk, skb))) {
+ bh_unlock_sock(sk);
+ NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
+ goto discard_and_relse;
+ }
bh_unlock_sock(sk);
sock_put(sk);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 52b8347..3c0c9c7 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -583,16 +583,20 @@ static void flush_stack(struct sock **stack, unsigned int count,
bh_lock_sock(sk);
if (!sock_owned_by_user(sk))
udpv6_queue_rcv_skb(sk, skb1);
- else
- sk_add_backlog(sk, skb1);
+ else if (sk_add_backlog(sk, skb1)) {
+ kfree_skb(skb1);
+ bh_unlock_sock(sk);
+ goto drop;
+ }
bh_unlock_sock(sk);
- } else {
- atomic_inc(&sk->sk_drops);
- UDP6_INC_STATS_BH(sock_net(sk),
- UDP_MIB_RCVBUFERRORS, IS_UDPLITE(sk));
- UDP6_INC_STATS_BH(sock_net(sk),
- UDP_MIB_INERRORS, IS_UDPLITE(sk));
+ continue;
}
+drop:
+ atomic_inc(&sk->sk_drops);
+ UDP6_INC_STATS_BH(sock_net(sk),
+ UDP_MIB_RCVBUFERRORS, IS_UDPLITE(sk));
+ UDP6_INC_STATS_BH(sock_net(sk),
+ UDP_MIB_INERRORS, IS_UDPLITE(sk));
}
}
/*
@@ -754,8 +758,12 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
bh_lock_sock(sk);
if (!sock_owned_by_user(sk))
udpv6_queue_rcv_skb(sk, skb);
- else
- sk_add_backlog(sk, skb);
+ else if (sk_add_backlog(sk, skb)) {
+ atomic_inc(&sk->sk_drops);
+ bh_unlock_sock(sk);
+ sock_put(sk);
+ goto discard;
+ }
bh_unlock_sock(sk);
sock_put(sk);
return 0;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index dbdc696..ae18165 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -116,7 +116,8 @@ static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
return 0;
}
-static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
+static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
+ struct flowi *fl)
{
struct rt6_info *rt = (struct rt6_info*)xdst->route;
diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c
index 019c780..86d6985 100644
--- a/net/llc/llc_c_ac.c
+++ b/net/llc/llc_c_ac.c
@@ -1437,7 +1437,7 @@ static void llc_process_tmr_ev(struct sock *sk, struct sk_buff *skb)
llc_conn_state_process(sk, skb);
else {
llc_set_backlog_type(skb, LLC_EVENT);
- sk_add_backlog(sk, skb);
+ __sk_add_backlog(sk, skb);
}
}
}
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index a8dde9b..a12144d 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -827,7 +827,8 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb)
else {
dprintk("%s: adding to backlog...\n", __func__);
llc_set_backlog_type(skb, LLC_PACKET);
- sk_add_backlog(sk, skb);
+ if (sk_add_backlog(sk, skb))
+ goto drop_unlock;
}
out:
bh_unlock_sock(sk);
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 9affe2c..b4ddb2f 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -48,20 +48,24 @@ static ssize_t ieee80211_if_write(
ssize_t (*write)(struct ieee80211_sub_if_data *, const char *, int))
{
u8 *buf;
- ssize_t ret = -ENODEV;
+ ssize_t ret;
- buf = kzalloc(count, GFP_KERNEL);
+ buf = kmalloc(count, GFP_KERNEL);
if (!buf)
return -ENOMEM;
+ ret = -EFAULT;
if (copy_from_user(buf, userbuf, count))
- return -EFAULT;
+ goto freebuf;
+ ret = -ENODEV;
rtnl_lock();
if (sdata->dev->reg_state == NETREG_REGISTERED)
ret = (*write)(sdata, buf, count);
rtnl_unlock();
+freebuf:
+ kfree(buf);
return ret;
}
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index bc4e20e..1a29c4a 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -744,7 +744,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
break;
default:
/* should not get here, PLINK_BLOCKED is dealt with at the
- * beggining of the function
+ * beginning of the function
*/
spin_unlock_bh(&sta->lock);
break;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 0ab284c..be5f723 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -436,10 +436,12 @@ static void ieee80211_enable_ps(struct ieee80211_local *local,
if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)
ieee80211_send_nullfunc(local, sdata, 1);
- if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) {
- conf->flags |= IEEE80211_CONF_PS;
- ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
- }
+ if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) &&
+ (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS))
+ return;
+
+ conf->flags |= IEEE80211_CONF_PS;
+ ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
}
}
@@ -558,7 +560,8 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
(!(ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)))
ieee80211_send_nullfunc(local, sdata, 1);
- if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) ||
+ if (!((local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) &&
+ (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)) ||
(ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)) {
ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED;
local->hw.conf.flags |= IEEE80211_CONF_PS;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 211c475..56422d8 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -434,6 +434,7 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU)
/* check if STA exists already */
if (sta_info_get_bss(sdata, sta->sta.addr)) {
spin_unlock_irqrestore(&local->sta_lock, flags);
+ mutex_unlock(&local->sta_mtx);
rcu_read_lock();
err = -EEXIST;
goto out_free;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 2b2af63..569410a 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -582,7 +582,9 @@ nla_put_failure:
nlmsg_failure:
kfree_skb(skb);
errout:
- nfnetlink_set_err(net, 0, group, -ENOBUFS);
+ if (nfnetlink_set_err(net, 0, group, -ENOBUFS) > 0)
+ return -ENOBUFS;
+
return 0;
}
#endif /* CONFIG_NF_CONNTRACK_EVENTS */
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 8dd75d9..c6cd1b8 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -284,7 +284,7 @@ EXPORT_SYMBOL_GPL(ct_sip_parse_request);
* tabs, spaces and continuation lines, which are treated as a single whitespace
* character.
*
- * Some headers may appear multiple times. A comma seperated list of values is
+ * Some headers may appear multiple times. A comma separated list of values is
* equivalent to multiple headers.
*/
static const struct sip_header ct_sip_hdrs[] = {
@@ -421,7 +421,7 @@ int ct_sip_get_header(const struct nf_conn *ct, const char *dptr,
}
EXPORT_SYMBOL_GPL(ct_sip_get_header);
-/* Get next header field in a list of comma seperated values */
+/* Get next header field in a list of comma separated values */
static int ct_sip_next_header(const struct nf_conn *ct, const char *dptr,
unsigned int dataoff, unsigned int datalen,
enum sip_header_types type,
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 8eb0cc2..6afa3d5 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -113,9 +113,9 @@ int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid,
}
EXPORT_SYMBOL_GPL(nfnetlink_send);
-void nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error)
+int nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error)
{
- netlink_set_err(net->nfnl, pid, group, error);
+ return netlink_set_err(net->nfnl, pid, group, error);
}
EXPORT_SYMBOL_GPL(nfnetlink_set_err);
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index d952806..9e9c489 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -1,6 +1,6 @@
/*
* xt_hashlimit - Netfilter module to limit the number of packets per time
- * seperately for each hashbucket (sourceip/sourceport/dstip/dstport)
+ * separately for each hashbucket (sourceip/sourceport/dstip/dstport)
*
* (C) 2003-2004 by Harald Welte <laforge@netfilter.org>
* Copyright © CC Computer Consultants GmbH, 2007 - 2008
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 320d042..acbbae1 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1093,6 +1093,7 @@ static inline int do_one_set_err(struct sock *sk,
struct netlink_set_err_data *p)
{
struct netlink_sock *nlk = nlk_sk(sk);
+ int ret = 0;
if (sk == p->exclude_sk)
goto out;
@@ -1104,10 +1105,15 @@ static inline int do_one_set_err(struct sock *sk,
!test_bit(p->group - 1, nlk->groups))
goto out;
+ if (p->code == ENOBUFS && nlk->flags & NETLINK_RECV_NO_ENOBUFS) {
+ ret = 1;
+ goto out;
+ }
+
sk->sk_err = p->code;
sk->sk_error_report(sk);
out:
- return 0;
+ return ret;
}
/**
@@ -1116,12 +1122,16 @@ out:
* @pid: the PID of a process that we want to skip (if any)
* @groups: the broadcast group that will notice the error
* @code: error code, must be negative (as usual in kernelspace)
+ *
+ * This function returns the number of broadcast listeners that have set the
+ * NETLINK_RECV_NO_ENOBUFS socket option.
*/
-void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
+int netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
{
struct netlink_set_err_data info;
struct hlist_node *node;
struct sock *sk;
+ int ret = 0;
info.exclude_sk = ssk;
info.pid = pid;
@@ -1132,9 +1142,10 @@ void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
read_lock(&nl_table_lock);
sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
- do_one_set_err(sk, &info);
+ ret += do_one_set_err(sk, &info);
read_unlock(&nl_table_lock);
+ return ret;
}
EXPORT_SYMBOL(netlink_set_err);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 031a5e6..1612d41 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1688,6 +1688,8 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
{
switch (i->type) {
case PACKET_MR_MULTICAST:
+ if (i->alen != dev->addr_len)
+ return -EINVAL;
if (what > 0)
return dev_mc_add(dev, i->addr, i->alen, 0);
else
@@ -1700,6 +1702,8 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
return dev_set_allmulti(dev, what);
break;
case PACKET_MR_UNICAST:
+ if (i->alen != dev->addr_len)
+ return -EINVAL;
if (what > 0)
return dev_unicast_add(dev, i->addr);
else
@@ -1734,7 +1738,7 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
goto done;
err = -EINVAL;
- if (mreq->mr_alen != dev->addr_len)
+ if (mreq->mr_alen > dev->addr_len)
goto done;
err = -ENOBUFS;
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index c597cc5..5c6ae0c 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -107,8 +107,7 @@ static void phonet_device_destroy(struct net_device *dev)
if (pnd) {
u8 addr;
- for (addr = find_first_bit(pnd->addrs, 64); addr < 64;
- addr = find_next_bit(pnd->addrs, 64, 1+addr))
+ for_each_set_bit(addr, pnd->addrs, 64)
phonet_address_notify(RTM_DELADDR, dev, addr);
kfree(pnd);
}
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index 2e6c7eb..fe2e708 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -141,8 +141,7 @@ static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
continue;
addr_idx = 0;
- for (addr = find_first_bit(pnd->addrs, 64); addr < 64;
- addr = find_next_bit(pnd->addrs, 64, 1+addr)) {
+ for_each_set_bit(addr, pnd->addrs, 64) {
if (addr_idx++ < addr_start_idx)
continue;
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 853c52b..937ecda 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -159,7 +159,8 @@ static unsigned int rds_poll(struct file *file, struct socket *sock,
poll_wait(file, sk->sk_sleep, wait);
- poll_wait(file, &rds_poll_waitq, wait);
+ if (rs->rs_seen_congestion)
+ poll_wait(file, &rds_poll_waitq, wait);
read_lock_irqsave(&rs->rs_recv_lock, flags);
if (!rs->rs_cong_monitor) {
@@ -181,6 +182,10 @@ static unsigned int rds_poll(struct file *file, struct socket *sock,
mask |= (POLLOUT | POLLWRNORM);
read_unlock_irqrestore(&rs->rs_recv_lock, flags);
+ /* clear state any time we wake a seen-congested socket */
+ if (mask)
+ rs->rs_seen_congestion = 0;
+
return mask;
}
diff --git a/net/rds/cong.c b/net/rds/cong.c
index 6d06cac..dd2711d 100644
--- a/net/rds/cong.c
+++ b/net/rds/cong.c
@@ -218,8 +218,6 @@ void rds_cong_queue_updates(struct rds_cong_map *map)
spin_lock_irqsave(&rds_cong_lock, flags);
list_for_each_entry(conn, &map->m_conn_list, c_map_item) {
- if (conn->c_loopback)
- continue;
if (!test_and_set_bit(0, &conn->c_map_queued)) {
rds_stats_inc(s_cong_update_queued);
queue_delayed_work(rds_wq, &conn->c_send_w, 0);
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 647cb8f..e1f124b 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -203,9 +203,10 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST);
break;
default:
- rds_ib_conn_error(conn, "RDS/IB: Fatal QP Event %u "
+ rdsdebug("Fatal QP Event %u "
"- connection %pI4->%pI4, reconnecting\n",
event->event, &conn->c_laddr, &conn->c_faddr);
+ rds_conn_drop(conn);
break;
}
}
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 4b0da86..cfb1d90 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -234,8 +234,8 @@ void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool)
{
flush_workqueue(rds_wq);
rds_ib_flush_mr_pool(pool, 1);
- BUG_ON(atomic_read(&pool->item_count));
- BUG_ON(atomic_read(&pool->free_pinned));
+ WARN_ON(atomic_read(&pool->item_count));
+ WARN_ON(atomic_read(&pool->free_pinned));
kfree(pool);
}
@@ -440,6 +440,7 @@ static void __rds_ib_teardown_mr(struct rds_ib_mr *ibmr)
/* FIXME we need a way to tell a r/w MR
* from a r/o MR */
+ BUG_ON(in_interrupt());
set_page_dirty(page);
put_page(page);
}
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 04dc0d3..c338881 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -468,8 +468,8 @@ static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credi
set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
rds_ib_stats_inc(s_ib_ack_send_failure);
- /* Need to finesse this later. */
- BUG();
+
+ rds_ib_conn_error(ic->conn, "sending ack failed\n");
} else
rds_ib_stats_inc(s_ib_ack_sent);
}
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index a10fab6..17fa808 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -243,8 +243,12 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
struct rds_message *rm;
rm = rds_send_get_message(conn, send->s_op);
- if (rm)
+ if (rm) {
+ if (rm->m_rdma_op)
+ rds_ib_send_unmap_rdma(ic, rm->m_rdma_op);
rds_ib_send_rdma_complete(rm, wc.status);
+ rds_message_put(rm);
+ }
}
oldest = (oldest + 1) % ic->i_send_ring.w_nr;
@@ -482,6 +486,13 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
BUG_ON(off % RDS_FRAG_SIZE);
BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header));
+ /* Do not send cong updates to IB loopback */
+ if (conn->c_loopback
+ && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
+ rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
+ return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
+ }
+
/* FIXME we may overallocate here */
if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0)
i = 1;
@@ -574,8 +585,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
rds_ib_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits);
adv_credits += posted;
BUG_ON(adv_credits > 255);
- } else if (ic->i_rm != rm)
- BUG();
+ }
send = &ic->i_sends[pos];
first = send;
@@ -714,8 +724,8 @@ add_header:
ic->i_rm = prev->s_rm;
prev->s_rm = NULL;
}
- /* Finesse this later */
- BUG();
+
+ rds_ib_conn_error(ic->conn, "ib_post_send failed\n");
goto out;
}
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index 394cf6b..6bc638f 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -156,9 +156,11 @@ static void rds_iw_qp_event_handler(struct ib_event *event, void *data)
case IB_EVENT_QP_REQ_ERR:
case IB_EVENT_QP_FATAL:
default:
- rds_iw_conn_error(conn, "RDS/IW: Fatal QP Event %u - connection %pI4->%pI4...reconnecting\n",
+ rdsdebug("Fatal QP Event %u "
+ "- connection %pI4->%pI4, reconnecting\n",
event->event, &conn->c_laddr,
&conn->c_faddr);
+ rds_conn_drop(conn);
break;
}
}
diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c
index 54af7d6..337e4e5 100644
--- a/net/rds/iw_recv.c
+++ b/net/rds/iw_recv.c
@@ -468,8 +468,8 @@ static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credi
set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
rds_iw_stats_inc(s_iw_ack_send_failure);
- /* Need to finesse this later. */
- BUG();
+
+ rds_iw_conn_error(ic->conn, "sending ack failed\n");
} else
rds_iw_stats_inc(s_iw_ack_sent);
}
diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c
index 1379e9d..52182ff 100644
--- a/net/rds/iw_send.c
+++ b/net/rds/iw_send.c
@@ -616,8 +616,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
rds_iw_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits);
adv_credits += posted;
BUG_ON(adv_credits > 255);
- } else if (ic->i_rm != rm)
- BUG();
+ }
send = &ic->i_sends[pos];
first = send;
diff --git a/net/rds/loop.c b/net/rds/loop.c
index 4a61997..93a45f1 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -80,16 +80,9 @@ static int rds_loop_xmit_cong_map(struct rds_connection *conn,
struct rds_cong_map *map,
unsigned long offset)
{
- unsigned long i;
-
BUG_ON(offset);
BUG_ON(map != conn->c_lcong);
- for (i = 0; i < RDS_CONG_MAP_PAGES; i++) {
- memcpy((void *)conn->c_fcong->m_page_addrs[i],
- (void *)map->m_page_addrs[i], PAGE_SIZE);
- }
-
rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 4c64daa..61b359d 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -438,8 +438,10 @@ void rds_rdma_free_op(struct rds_rdma_op *ro)
/* Mark page dirty if it was possibly modified, which
* is the case for a RDMA_READ which copies from remote
* to local memory */
- if (!ro->r_write)
+ if (!ro->r_write) {
+ BUG_ON(in_interrupt());
set_page_dirty(page);
+ }
put_page(page);
}
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index 9ece910..5ea82fc 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -101,7 +101,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
break;
case RDMA_CM_EVENT_DISCONNECTED:
- printk(KERN_WARNING "RDS/RDMA: DISCONNECT event - dropping connection "
+ rdsdebug("DISCONNECT event - dropping connection "
"%pI4->%pI4\n", &conn->c_laddr,
&conn->c_faddr);
rds_conn_drop(conn);
@@ -109,8 +109,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
default:
/* things like device disconnect? */
- printk(KERN_ERR "unknown event %u\n", event->event);
- BUG();
+ printk(KERN_ERR "RDS: unknown event %u!\n", event->event);
break;
}
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 85d6f89..4bec6e2 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -388,6 +388,8 @@ struct rds_sock {
/* flag indicating we were congested or not */
int rs_congested;
+ /* seen congestion (ENOBUFS) when sending? */
+ int rs_seen_congestion;
/* rs_lock protects all these adjacent members before the newline */
spinlock_t rs_lock;
diff --git a/net/rds/send.c b/net/rds/send.c
index b2fccfc..4629a0b 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -507,12 +507,13 @@ EXPORT_SYMBOL_GPL(rds_send_get_message);
*/
void rds_send_remove_from_sock(struct list_head *messages, int status)
{
- unsigned long flags = 0; /* silence gcc :P */
+ unsigned long flags;
struct rds_sock *rs = NULL;
struct rds_message *rm;
- local_irq_save(flags);
while (!list_empty(messages)) {
+ int was_on_sock = 0;
+
rm = list_entry(messages->next, struct rds_message,
m_conn_item);
list_del_init(&rm->m_conn_item);
@@ -527,20 +528,19 @@ void rds_send_remove_from_sock(struct list_head *messages, int status)
* while we're messing with it. It does not prevent the
* message from being removed from the socket, though.
*/
- spin_lock(&rm->m_rs_lock);
+ spin_lock_irqsave(&rm->m_rs_lock, flags);
if (!test_bit(RDS_MSG_ON_SOCK, &rm->m_flags))
goto unlock_and_drop;
if (rs != rm->m_rs) {
if (rs) {
- spin_unlock(&rs->rs_lock);
rds_wake_sk_sleep(rs);
sock_put(rds_rs_to_sk(rs));
}
rs = rm->m_rs;
- spin_lock(&rs->rs_lock);
sock_hold(rds_rs_to_sk(rs));
}
+ spin_lock(&rs->rs_lock);
if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) {
struct rds_rdma_op *ro = rm->m_rdma_op;
@@ -557,21 +557,22 @@ void rds_send_remove_from_sock(struct list_head *messages, int status)
notifier->n_status = status;
rm->m_rdma_op->r_notifier = NULL;
}
- rds_message_put(rm);
+ was_on_sock = 1;
rm->m_rs = NULL;
}
+ spin_unlock(&rs->rs_lock);
unlock_and_drop:
- spin_unlock(&rm->m_rs_lock);
+ spin_unlock_irqrestore(&rm->m_rs_lock, flags);
rds_message_put(rm);
+ if (was_on_sock)
+ rds_message_put(rm);
}
if (rs) {
- spin_unlock(&rs->rs_lock);
rds_wake_sk_sleep(rs);
sock_put(rds_rs_to_sk(rs));
}
- local_irq_restore(flags);
}
/*
@@ -633,9 +634,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
list_move(&rm->m_sock_item, &list);
rds_send_sndbuf_remove(rs, rm);
clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags);
-
- /* If this is a RDMA operation, notify the app. */
- __rds_rdma_send_complete(rs, rm, RDS_RDMA_CANCELED);
}
/* order flag updates with the rs lock */
@@ -644,9 +642,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
spin_unlock_irqrestore(&rs->rs_lock, flags);
- if (wake)
- rds_wake_sk_sleep(rs);
-
conn = NULL;
/* now remove the messages from the conn list as needed */
@@ -654,6 +649,10 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
/* We do this here rather than in the loop above, so that
* we don't have to nest m_rs_lock under rs->rs_lock */
spin_lock_irqsave(&rm->m_rs_lock, flags2);
+ /* If this is a RDMA operation, notify the app. */
+ spin_lock(&rs->rs_lock);
+ __rds_rdma_send_complete(rs, rm, RDS_RDMA_CANCELED);
+ spin_unlock(&rs->rs_lock);
rm->m_rs = NULL;
spin_unlock_irqrestore(&rm->m_rs_lock, flags2);
@@ -682,6 +681,9 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
if (conn)
spin_unlock_irqrestore(&conn->c_lock, flags);
+ if (wake)
+ rds_wake_sk_sleep(rs);
+
while (!list_empty(&list)) {
rm = list_entry(list.next, struct rds_message, m_sock_item);
list_del_init(&rm->m_sock_item);
@@ -815,7 +817,7 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
int ret = 0;
int queued = 0, allocated_mr = 0;
int nonblock = msg->msg_flags & MSG_DONTWAIT;
- long timeo = sock_rcvtimeo(sk, nonblock);
+ long timeo = sock_sndtimeo(sk, nonblock);
/* Mirror Linux UDP mirror of BSD error message compatibility */
/* XXX: Perhaps MSG_MORE someday */
@@ -894,8 +896,10 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
queue_delayed_work(rds_wq, &conn->c_conn_w, 0);
ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs);
- if (ret)
+ if (ret) {
+ rs->rs_seen_congestion = 1;
goto out;
+ }
while (!rds_send_queue_rm(rs, conn, rm, rs->rs_bound_port,
dport, &queued)) {
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index c00daff..40bfcf8 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -97,6 +97,7 @@ int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov,
goto out;
}
+ rds_stats_add(s_copy_to_user, to_copy);
size -= to_copy;
ret += to_copy;
skb_off += to_copy;
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 34fdcc0..a28b895 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -240,7 +240,9 @@ void rds_tcp_write_space(struct sock *sk)
tc->t_last_seen_una = rds_tcp_snd_una(tc);
rds_send_drop_acked(conn, rds_tcp_snd_una(tc), rds_tcp_is_acked);
- queue_delayed_work(rds_wq, &conn->c_send_w, 0);
+ if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf)
+ queue_delayed_work(rds_wq, &conn->c_send_w, 0);
+
out:
read_unlock(&sk->sk_callback_lock);
diff --git a/net/rds/threads.c b/net/rds/threads.c
index 00fa10e..786c20e 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -259,7 +259,7 @@ void rds_threads_exit(void)
int __init rds_threads_init(void)
{
- rds_wq = create_singlethread_workqueue("krdsd");
+ rds_wq = create_workqueue("krdsd");
if (rds_wq == NULL)
return -ENOMEM;
diff --git a/net/sctp/input.c b/net/sctp/input.c
index c0c973e..3d74b26 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -75,7 +75,7 @@ static struct sctp_association *__sctp_lookup_association(
const union sctp_addr *peer,
struct sctp_transport **pt);
-static void sctp_add_backlog(struct sock *sk, struct sk_buff *skb);
+static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb);
/* Calculate the SCTP checksum of an SCTP packet. */
@@ -265,8 +265,13 @@ int sctp_rcv(struct sk_buff *skb)
}
if (sock_owned_by_user(sk)) {
+ if (sctp_add_backlog(sk, skb)) {
+ sctp_bh_unlock_sock(sk);
+ sctp_chunk_free(chunk);
+ skb = NULL; /* sctp_chunk_free already freed the skb */
+ goto discard_release;
+ }
SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_BACKLOG);
- sctp_add_backlog(sk, skb);
} else {
SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_SOFTIRQ);
sctp_inq_push(&chunk->rcvr->inqueue, chunk);
@@ -336,8 +341,10 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
sctp_bh_lock_sock(sk);
if (sock_owned_by_user(sk)) {
- sk_add_backlog(sk, skb);
- backloged = 1;
+ if (sk_add_backlog(sk, skb))
+ sctp_chunk_free(chunk);
+ else
+ backloged = 1;
} else
sctp_inq_push(inqueue, chunk);
@@ -362,22 +369,27 @@ done:
return 0;
}
-static void sctp_add_backlog(struct sock *sk, struct sk_buff *skb)
+static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb)
{
struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk;
struct sctp_ep_common *rcvr = chunk->rcvr;
+ int ret;
- /* Hold the assoc/ep while hanging on the backlog queue.
- * This way, we know structures we need will not disappear from us
- */
- if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type)
- sctp_association_hold(sctp_assoc(rcvr));
- else if (SCTP_EP_TYPE_SOCKET == rcvr->type)
- sctp_endpoint_hold(sctp_ep(rcvr));
- else
- BUG();
+ ret = sk_add_backlog(sk, skb);
+ if (!ret) {
+ /* Hold the assoc/ep while hanging on the backlog queue.
+ * This way, we know structures we need will not disappear
+ * from us
+ */
+ if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type)
+ sctp_association_hold(sctp_assoc(rcvr));
+ else if (SCTP_EP_TYPE_SOCKET == rcvr->type)
+ sctp_endpoint_hold(sctp_ep(rcvr));
+ else
+ BUG();
+ }
+ return ret;
- sk_add_backlog(sk, skb);
}
/* Handle icmp frag needed error. */
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 1d7ac70..240dceb 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -371,7 +371,7 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist,
}
read_lock_bh(&in6_dev->lock);
- for (ifp = in6_dev->addr_list; ifp; ifp = ifp->if_next) {
+ list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
/* Add the address to the local list. */
addr = t_new(struct sctp_sockaddr_entry, GFP_ATOMIC);
if (addr) {
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 4e4ca65..500886b 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -475,7 +475,7 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc,
* used to provide an upper bound to this doubling operation.
*
* Special Case: the first HB doesn't trigger exponential backoff.
- * The first unacknowleged HB triggers it. We do this with a flag
+ * The first unacknowledged HB triggers it. We do this with a flag
* that indicates that we have an outstanding HB.
*/
if (!is_hb || transport->hb_sent) {
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index f6d1e59..dfc5c12 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3720,6 +3720,9 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
SCTP_DBG_OBJCNT_INC(sock);
percpu_counter_inc(&sctp_sockets_allocated);
+ /* Set socket backlog limit. */
+ sk->sk_backlog.limit = sysctl_sctp_rmem[1];
+
local_bh_disable();
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
local_bh_enable();
diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c
index 6dcdd25..f845d9d 100644
--- a/net/sunrpc/addr.c
+++ b/net/sunrpc/addr.c
@@ -71,8 +71,9 @@ static size_t rpc_ntop6(const struct sockaddr *sap,
if (unlikely(len == 0))
return len;
- if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) &&
- !(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_SITELOCAL))
+ if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL))
+ return len;
+ if (sin6->sin6_scope_id == 0)
return len;
rc = snprintf(scopebuf, sizeof(scopebuf), "%c%u",
@@ -165,8 +166,7 @@ static int rpc_parse_scope_id(const char *buf, const size_t buflen,
if (*delim != IPV6_SCOPE_DELIMITER)
return 0;
- if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) &&
- !(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_SITELOCAL))
+ if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL))
return 0;
len = (buf + buflen) - delim - 1;
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index f7a7f83..0cfccc2a 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -206,8 +206,14 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct
ctx->gc_win = window_size;
/* gssd signals an error by passing ctx->gc_win = 0: */
if (ctx->gc_win == 0) {
- /* in which case, p points to an error code which we ignore */
- p = ERR_PTR(-EACCES);
+ /*
+ * in which case, p points to an error code. Anything other
+ * than -EKEYEXPIRED gets converted to -EACCES.
+ */
+ p = simple_get_bytes(p, end, &ret, sizeof(ret));
+ if (!IS_ERR(p))
+ p = (ret == -EKEYEXPIRED) ? ERR_PTR(-EKEYEXPIRED) :
+ ERR_PTR(-EACCES);
goto err;
}
/* copy the opaque wire context */
@@ -646,6 +652,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
err = PTR_ERR(p);
switch (err) {
case -EACCES:
+ case -EKEYEXPIRED:
gss_msg->msg.errno = err;
err = mlen;
break;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 9ea4538..8d63f8f 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -999,19 +999,14 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
inode = rpc_get_inode(sb, S_IFDIR | 0755);
if (!inode)
return -ENOMEM;
- root = d_alloc_root(inode);
+ sb->s_root = root = d_alloc_root(inode);
if (!root) {
iput(inode);
return -ENOMEM;
}
if (rpc_populate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF, NULL))
- goto out;
- sb->s_root = root;
+ return -ENOMEM;
return 0;
-out:
- d_genocide(root);
- dput(root);
- return -ENOMEM;
}
static int
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 538ca43..8420a42 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -133,7 +133,7 @@ svc_pool_map_choose_mode(void)
return SVC_POOL_PERNODE;
}
- node = any_online_node(node_online_map);
+ node = first_online_node;
if (nr_cpus_node(node) > 2) {
/*
* Non-trivial SMP, or CONFIG_NUMA on
@@ -506,6 +506,10 @@ svc_init_buffer(struct svc_rqst *rqstp, unsigned int size)
{
unsigned int pages, arghi;
+ /* bc_xprt uses fore channel allocated buffers */
+ if (svc_is_backchannel(rqstp))
+ return 1;
+
pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply.
* We assume one is at most one page
*/
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 7d1f9e9..8f0f1fb 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -173,11 +173,13 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
.sin_addr.s_addr = htonl(INADDR_ANY),
.sin_port = htons(port),
};
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
struct sockaddr_in6 sin6 = {
.sin6_family = AF_INET6,
.sin6_addr = IN6ADDR_ANY_INIT,
.sin6_port = htons(port),
};
+#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
struct sockaddr *sap;
size_t len;
@@ -186,10 +188,12 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
sap = (struct sockaddr *)&sin;
len = sizeof(sin);
break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
case PF_INET6:
sap = (struct sockaddr *)&sin6;
len = sizeof(sin6);
break;
+#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
default:
return ERR_PTR(-EAFNOSUPPORT);
}
@@ -231,7 +235,10 @@ int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
err:
spin_unlock(&svc_xprt_class_lock);
dprintk("svc: transport %s not found\n", xprt_name);
- return -ENOENT;
+
+ /* This errno is exposed to user space. Provide a reasonable
+ * perror msg for a bad transport. */
+ return -EPROTONOSUPPORT;
}
EXPORT_SYMBOL_GPL(svc_create_xprt);
@@ -699,8 +706,10 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
spin_unlock_bh(&pool->sp_lock);
len = 0;
- if (test_bit(XPT_LISTENER, &xprt->xpt_flags) &&
- !test_bit(XPT_CLOSE, &xprt->xpt_flags)) {
+ if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) {
+ dprintk("svc_recv: found XPT_CLOSE\n");
+ svc_delete_xprt(xprt);
+ } else if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
struct svc_xprt *newxpt;
newxpt = xprt->xpt_ops->xpo_accept(xprt);
if (newxpt) {
@@ -726,7 +735,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
svc_xprt_received(newxpt);
}
svc_xprt_received(xprt);
- } else if (!test_bit(XPT_CLOSE, &xprt->xpt_flags)) {
+ } else {
dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
rqstp, pool->sp_id, xprt,
atomic_read(&xprt->xpt_ref.refcount));
@@ -739,11 +748,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
dprintk("svc: got len=%d\n", len);
}
- if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) {
- dprintk("svc_recv: found XPT_CLOSE\n");
- svc_delete_xprt(xprt);
- }
-
/* No data, incomplete (TCP) read, or accept() */
if (len == 0 || len == -EAGAIN) {
rqstp->rq_res.len = 0;
@@ -889,11 +893,8 @@ void svc_delete_xprt(struct svc_xprt *xprt)
if (test_bit(XPT_TEMP, &xprt->xpt_flags))
serv->sv_tmpcnt--;
- for (dr = svc_deferred_dequeue(xprt); dr;
- dr = svc_deferred_dequeue(xprt)) {
- svc_xprt_put(xprt);
+ while ((dr = svc_deferred_dequeue(xprt)) != NULL)
kfree(dr);
- }
svc_xprt_put(xprt);
spin_unlock_bh(&serv->sv_lock);
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index d8c0411..afdcb04 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -15,6 +15,7 @@
#include <linux/kernel.h>
#define RPCDBG_FACILITY RPCDBG_AUTH
+#include <linux/sunrpc/clnt.h>
/*
* AUTHUNIX and AUTHNULL credentials are both handled here.
@@ -187,10 +188,13 @@ static int ip_map_parse(struct cache_detail *cd,
* for scratch: */
char *buf = mesg;
int len;
- int b1, b2, b3, b4, b5, b6, b7, b8;
- char c;
char class[8];
- struct in6_addr addr;
+ union {
+ struct sockaddr sa;
+ struct sockaddr_in s4;
+ struct sockaddr_in6 s6;
+ } address;
+ struct sockaddr_in6 sin6;
int err;
struct ip_map *ipmp;
@@ -209,24 +213,24 @@ static int ip_map_parse(struct cache_detail *cd,
len = qword_get(&mesg, buf, mlen);
if (len <= 0) return -EINVAL;
- if (sscanf(buf, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) == 4) {
- addr.s6_addr32[0] = 0;
- addr.s6_addr32[1] = 0;
- addr.s6_addr32[2] = htonl(0xffff);
- addr.s6_addr32[3] =
- htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4);
- } else if (sscanf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x%c",
- &b1, &b2, &b3, &b4, &b5, &b6, &b7, &b8, &c) == 8) {
- addr.s6_addr16[0] = htons(b1);
- addr.s6_addr16[1] = htons(b2);
- addr.s6_addr16[2] = htons(b3);
- addr.s6_addr16[3] = htons(b4);
- addr.s6_addr16[4] = htons(b5);
- addr.s6_addr16[5] = htons(b6);
- addr.s6_addr16[6] = htons(b7);
- addr.s6_addr16[7] = htons(b8);
- } else
+ if (rpc_pton(buf, len, &address.sa, sizeof(address)) == 0)
return -EINVAL;
+ switch (address.sa.sa_family) {
+ case AF_INET:
+ /* Form a mapped IPv4 address in sin6 */
+ memset(&sin6, 0, sizeof(sin6));
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
+ sin6.sin6_addr.s6_addr32[3] = address.s4.sin_addr.s_addr;
+ break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ case AF_INET6:
+ memcpy(&sin6, &address.s6, sizeof(sin6));
+ break;
+#endif
+ default:
+ return -EINVAL;
+ }
expiry = get_expiry(&mesg);
if (expiry ==0)
@@ -243,7 +247,8 @@ static int ip_map_parse(struct cache_detail *cd,
} else
dom = NULL;
- ipmp = ip_map_lookup(class, &addr);
+ /* IPv6 scope IDs are ignored for now */
+ ipmp = ip_map_lookup(class, &sin6.sin6_addr);
if (ipmp) {
err = ip_map_update(ipmp,
container_of(dom, struct unix_domain, h),
@@ -619,7 +624,7 @@ static int unix_gid_show(struct seq_file *m,
else
glen = 0;
- seq_printf(m, "%d %d:", ug->uid, glen);
+ seq_printf(m, "%u %d:", ug->uid, glen);
for (i = 0; i < glen; i++)
seq_printf(m, " %d", GROUP_AT(ug->gi, i));
seq_printf(m, "\n");
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 870929e..a29f259 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -968,6 +968,7 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
return len;
err_delete:
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
+ svc_xprt_received(&svsk->sk_xprt);
err_again:
return -EAGAIN;
}
@@ -1357,7 +1358,7 @@ int svc_addsock(struct svc_serv *serv, const int fd, char *name_return,
if (!so)
return err;
- if (so->sk->sk_family != AF_INET)
+ if ((so->sk->sk_family != PF_INET) && (so->sk->sk_family != PF_INET6))
err = -EAFNOSUPPORT;
else if (so->sk->sk_protocol != IPPROTO_TCP &&
so->sk->sk_protocol != IPPROTO_UDP)
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 7018eef..f96c2fe 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -160,16 +160,15 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt)
(void)rpc_ntop(sap, buf, sizeof(buf));
xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL);
- (void)snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap));
+ snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap));
xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL);
xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
- (void)snprintf(buf, sizeof(buf), "%02x%02x%02x%02x",
- NIPQUAD(sin->sin_addr.s_addr));
+ snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
- (void)snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap));
+ snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap));
xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL);
/* netid */
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 3d739e5..75ab08e 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -297,12 +297,11 @@ static void xs_format_common_peer_addresses(struct rpc_xprt *xprt)
switch (sap->sa_family) {
case AF_INET:
sin = xs_addr_in(xprt);
- (void)snprintf(buf, sizeof(buf), "%02x%02x%02x%02x",
- NIPQUAD(sin->sin_addr.s_addr));
+ snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
break;
case AF_INET6:
sin6 = xs_addr_in6(xprt);
- (void)snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr);
+ snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr);
break;
default:
BUG();
@@ -315,10 +314,10 @@ static void xs_format_common_peer_ports(struct rpc_xprt *xprt)
struct sockaddr *sap = xs_addr(xprt);
char buf[128];
- (void)snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap));
+ snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap));
xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL);
- (void)snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap));
+ snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap));
xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL);
}
@@ -1912,6 +1911,11 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
case -EALREADY:
xprt_clear_connecting(xprt);
return;
+ case -EINVAL:
+ /* Happens, for instance, if the user specified a link
+ * local IPv6 address without a scope-id.
+ */
+ goto out;
}
out_eagain:
status = -EAGAIN;
@@ -2100,7 +2104,7 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
* we allocate pages instead doing a kmalloc like rpc_malloc is because we want
* to use the server side send routines.
*/
-void *bc_malloc(struct rpc_task *task, size_t size)
+static void *bc_malloc(struct rpc_task *task, size_t size)
{
struct page *page;
struct rpc_buffer *buf;
@@ -2120,7 +2124,7 @@ void *bc_malloc(struct rpc_task *task, size_t size)
/*
* Free the space allocated in the bc_alloc routine
*/
-void bc_free(void *buffer)
+static void bc_free(void *buffer)
{
struct rpc_buffer *buf;
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index a3bfd40..90a0519 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -558,10 +558,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
struct tipc_bearer *unused1,
struct tipc_media_addr *unused2)
{
- static int send_count = 0;
-
int bp_index;
- int swap_time;
/* Prepare buffer for broadcasting (if first time trying to send it) */
@@ -575,11 +572,6 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
msg_set_mc_netid(msg, tipc_net_id);
}
- /* Determine if bearer pairs should be swapped following this attempt */
-
- if ((swap_time = (++send_count >= 10)))
- send_count = 0;
-
/* Send buffer over bearers until all targets reached */
bcbearer->remains = tipc_cltr_bcast_nodes;
@@ -595,21 +587,22 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
if (bcbearer->remains_new.count == bcbearer->remains.count)
continue; /* bearer pair doesn't add anything */
- if (!p->publ.blocked &&
- !p->media->send_msg(buf, &p->publ, &p->media->bcast_addr)) {
- if (swap_time && s && !s->publ.blocked)
- goto swap;
- else
- goto update;
+ if (p->publ.blocked ||
+ p->media->send_msg(buf, &p->publ, &p->media->bcast_addr)) {
+ /* unable to send on primary bearer */
+ if (!s || s->publ.blocked ||
+ s->media->send_msg(buf, &s->publ,
+ &s->media->bcast_addr)) {
+ /* unable to send on either bearer */
+ continue;
+ }
+ }
+
+ if (s) {
+ bcbearer->bpairs[bp_index].primary = s;
+ bcbearer->bpairs[bp_index].secondary = p;
}
- if (!s || s->publ.blocked ||
- s->media->send_msg(buf, &s->publ, &s->media->bcast_addr))
- continue; /* unable to send using bearer pair */
-swap:
- bcbearer->bpairs[bp_index].primary = s;
- bcbearer->bpairs[bp_index].secondary = p;
-update:
if (bcbearer->remains_new.count == 0)
return 0;
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 327011f..7809137 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -45,10 +45,10 @@
#define MAX_ADDR_STR 32
-static struct media *media_list = NULL;
+static struct media media_list[MAX_MEDIA];
static u32 media_count = 0;
-struct bearer *tipc_bearers = NULL;
+struct bearer tipc_bearers[MAX_BEARERS];
/**
* media_name_valid - validate media name
@@ -108,9 +108,11 @@ int tipc_register_media(u32 media_type,
int res = -EINVAL;
write_lock_bh(&tipc_net_lock);
- if (!media_list)
- goto exit;
+ if (tipc_mode != TIPC_NET_MODE) {
+ warn("Media <%s> rejected, not in networked mode yet\n", name);
+ goto exit;
+ }
if (!media_name_valid(name)) {
warn("Media <%s> rejected, illegal name\n", name);
goto exit;
@@ -660,33 +662,10 @@ int tipc_disable_bearer(const char *name)
-int tipc_bearer_init(void)
-{
- int res;
-
- write_lock_bh(&tipc_net_lock);
- tipc_bearers = kcalloc(MAX_BEARERS, sizeof(struct bearer), GFP_ATOMIC);
- media_list = kcalloc(MAX_MEDIA, sizeof(struct media), GFP_ATOMIC);
- if (tipc_bearers && media_list) {
- res = 0;
- } else {
- kfree(tipc_bearers);
- kfree(media_list);
- tipc_bearers = NULL;
- media_list = NULL;
- res = -ENOMEM;
- }
- write_unlock_bh(&tipc_net_lock);
- return res;
-}
-
void tipc_bearer_stop(void)
{
u32 i;
- if (!tipc_bearers)
- return;
-
for (i = 0; i < MAX_BEARERS; i++) {
if (tipc_bearers[i].active)
tipc_bearers[i].publ.blocked = 1;
@@ -695,10 +674,6 @@ void tipc_bearer_stop(void)
if (tipc_bearers[i].active)
bearer_disable(tipc_bearers[i].publ.name);
}
- kfree(tipc_bearers);
- kfree(media_list);
- tipc_bearers = NULL;
- media_list = NULL;
media_count = 0;
}
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index ca57348..000228e 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -114,7 +114,7 @@ struct bearer_name {
struct link;
-extern struct bearer *tipc_bearers;
+extern struct bearer tipc_bearers[];
void tipc_media_addr_printf(struct print_buf *pb, struct tipc_media_addr *a);
struct sk_buff *tipc_media_get_names(void);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 6f50f64..49f2be8 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1553,7 +1553,7 @@ u32 tipc_link_push_packet(struct link *l_ptr)
/* Continue retransmission now, if there is anything: */
- if (r_q_size && buf && !skb_cloned(buf)) {
+ if (r_q_size && buf) {
msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1));
msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in);
if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) {
@@ -1722,15 +1722,16 @@ void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf,
dbg("Retransmitting %u in link %x\n", retransmits, l_ptr);
if (tipc_bearer_congested(l_ptr->b_ptr, l_ptr)) {
- if (!skb_cloned(buf)) {
+ if (l_ptr->retransm_queue_size == 0) {
msg_dbg(msg, ">NO_RETR->BCONG>");
dbg_print_link(l_ptr, " ");
l_ptr->retransm_queue_head = msg_seqno(msg);
l_ptr->retransm_queue_size = retransmits;
- return;
} else {
- /* Don't retransmit if driver already has the buffer */
+ err("Unexpected retransmit on link %s (qsize=%d)\n",
+ l_ptr->name, l_ptr->retransm_queue_size);
}
+ return;
} else {
/* Detect repeated retransmit failures on uncongested bearer */
@@ -1745,7 +1746,7 @@ void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf,
}
}
- while (retransmits && (buf != l_ptr->next_out) && buf && !skb_cloned(buf)) {
+ while (retransmits && (buf != l_ptr->next_out) && buf) {
msg = buf_msg(buf);
msg_set_ack(msg, mod(l_ptr->next_in_no - 1));
msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
@@ -1882,6 +1883,15 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
(msg_destnode(msg) != tipc_own_addr)))
goto cont;
+ /* Discard non-routeable messages destined for another node */
+
+ if (unlikely(!msg_isdata(msg) &&
+ (msg_destnode(msg) != tipc_own_addr))) {
+ if ((msg_user(msg) != CONN_MANAGER) &&
+ (msg_user(msg) != MSG_FRAGMENTER))
+ goto cont;
+ }
+
/* Locate unicast link endpoint that should handle message */
n_ptr = tipc_node_find(msg_prevnode(msg));
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 7906608..f25b1cd 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -116,7 +116,8 @@
*/
DEFINE_RWLOCK(tipc_net_lock);
-struct network tipc_net = { NULL };
+struct _zone *tipc_zones[256] = { NULL, };
+struct network tipc_net = { tipc_zones };
struct tipc_node *tipc_net_select_remote_node(u32 addr, u32 ref)
{
@@ -158,28 +159,12 @@ void tipc_net_send_external_routes(u32 dest)
}
}
-static int net_init(void)
-{
- memset(&tipc_net, 0, sizeof(tipc_net));
- tipc_net.zones = kcalloc(tipc_max_zones + 1, sizeof(struct _zone *), GFP_ATOMIC);
- if (!tipc_net.zones) {
- return -ENOMEM;
- }
- return 0;
-}
-
static void net_stop(void)
{
u32 z_num;
- if (!tipc_net.zones)
- return;
-
- for (z_num = 1; z_num <= tipc_max_zones; z_num++) {
+ for (z_num = 1; z_num <= tipc_max_zones; z_num++)
tipc_zone_delete(tipc_net.zones[z_num]);
- }
- kfree(tipc_net.zones);
- tipc_net.zones = NULL;
}
static void net_route_named_msg(struct sk_buff *buf)
@@ -282,9 +267,7 @@ int tipc_net_start(u32 addr)
tipc_named_reinit();
tipc_port_reinit();
- if ((res = tipc_bearer_init()) ||
- (res = net_init()) ||
- (res = tipc_cltr_init()) ||
+ if ((res = tipc_cltr_init()) ||
(res = tipc_bclink_init())) {
return res;
}
diff --git a/net/tipc/ref.c b/net/tipc/ref.c
index 414fc34..8dea665 100644
--- a/net/tipc/ref.c
+++ b/net/tipc/ref.c
@@ -153,11 +153,11 @@ void tipc_ref_table_stop(void)
u32 tipc_ref_acquire(void *object, spinlock_t **lock)
{
- struct reference *entry;
u32 index;
u32 index_mask;
u32 next_plus_upper;
u32 ref;
+ struct reference *entry = NULL;
if (!object) {
err("Attempt to acquire reference to non-existent object\n");
@@ -175,30 +175,36 @@ u32 tipc_ref_acquire(void *object, spinlock_t **lock)
index = tipc_ref_table.first_free;
entry = &(tipc_ref_table.entries[index]);
index_mask = tipc_ref_table.index_mask;
- /* take lock in case a previous user of entry still holds it */
- spin_lock_bh(&entry->lock);
next_plus_upper = entry->ref;
tipc_ref_table.first_free = next_plus_upper & index_mask;
ref = (next_plus_upper & ~index_mask) + index;
- entry->ref = ref;
- entry->object = object;
- *lock = &entry->lock;
}
else if (tipc_ref_table.init_point < tipc_ref_table.capacity) {
index = tipc_ref_table.init_point++;
entry = &(tipc_ref_table.entries[index]);
spin_lock_init(&entry->lock);
- spin_lock_bh(&entry->lock);
ref = tipc_ref_table.start_mask + index;
- entry->ref = ref;
- entry->object = object;
- *lock = &entry->lock;
}
else {
ref = 0;
}
write_unlock_bh(&ref_table_lock);
+ /*
+ * Grab the lock so no one else can modify this entry
+ * While we assign its ref value & object pointer
+ */
+ if (entry) {
+ spin_lock_bh(&entry->lock);
+ entry->ref = ref;
+ entry->object = object;
+ *lock = &entry->lock;
+ /*
+ * keep it locked, the caller is responsible
+ * for unlocking this when they're done with it
+ */
+ }
+
return ref;
}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 1ea64f0..4b235fc 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1322,8 +1322,10 @@ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf)
if (!sock_owned_by_user(sk)) {
res = filter_rcv(sk, buf);
} else {
- sk_add_backlog(sk, buf);
- res = TIPC_OK;
+ if (sk_add_backlog(sk, buf))
+ res = TIPC_ERR_OVERLOAD;
+ else
+ res = TIPC_OK;
}
bh_unlock_sock(sk);
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index ac91f0d..ff123e5 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -76,19 +76,6 @@ struct top_srv {
static struct top_srv topsrv = { 0 };
/**
- * htohl - convert value to endianness used by destination
- * @in: value to convert
- * @swap: non-zero if endianness must be reversed
- *
- * Returns converted value
- */
-
-static u32 htohl(u32 in, int swap)
-{
- return swap ? swab32(in) : in;
-}
-
-/**
* subscr_send_event - send a message containing a tipc_event to the subscriber
*
* Note: Must not hold subscriber's server port lock, since tipc_send() will
@@ -107,11 +94,11 @@ static void subscr_send_event(struct subscription *sub,
msg_sect.iov_base = (void *)&sub->evt;
msg_sect.iov_len = sizeof(struct tipc_event);
- sub->evt.event = htohl(event, sub->swap);
- sub->evt.found_lower = htohl(found_lower, sub->swap);
- sub->evt.found_upper = htohl(found_upper, sub->swap);
- sub->evt.port.ref = htohl(port_ref, sub->swap);
- sub->evt.port.node = htohl(node, sub->swap);
+ sub->evt.event = htonl(event);
+ sub->evt.found_lower = htonl(found_lower);
+ sub->evt.found_upper = htonl(found_upper);
+ sub->evt.port.ref = htonl(port_ref);
+ sub->evt.port.node = htonl(node);
tipc_send(sub->server_ref, 1, &msg_sect);
}
@@ -287,16 +274,23 @@ static void subscr_cancel(struct tipc_subscr *s,
{
struct subscription *sub;
struct subscription *sub_temp;
+ __u32 type, lower, upper;
int found = 0;
/* Find first matching subscription, exit if not found */
+ type = ntohl(s->seq.type);
+ lower = ntohl(s->seq.lower);
+ upper = ntohl(s->seq.upper);
+
list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list,
subscription_list) {
- if (!memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) {
- found = 1;
- break;
- }
+ if ((type == sub->seq.type) &&
+ (lower == sub->seq.lower) &&
+ (upper == sub->seq.upper)) {
+ found = 1;
+ break;
+ }
}
if (!found)
return;
@@ -325,16 +319,10 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s,
struct subscriber *subscriber)
{
struct subscription *sub;
- int swap;
-
- /* Determine subscriber's endianness */
-
- swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE));
/* Detect & process a subscription cancellation request */
- if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) {
- s->filter &= ~htohl(TIPC_SUB_CANCEL, swap);
+ if (ntohl(s->filter) & TIPC_SUB_CANCEL) {
subscr_cancel(s, subscriber);
return NULL;
}
@@ -359,11 +347,11 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s,
/* Initialize subscription object */
- sub->seq.type = htohl(s->seq.type, swap);
- sub->seq.lower = htohl(s->seq.lower, swap);
- sub->seq.upper = htohl(s->seq.upper, swap);
- sub->timeout = htohl(s->timeout, swap);
- sub->filter = htohl(s->filter, swap);
+ sub->seq.type = ntohl(s->seq.type);
+ sub->seq.lower = ntohl(s->seq.lower);
+ sub->seq.upper = ntohl(s->seq.upper);
+ sub->timeout = ntohl(s->timeout);
+ sub->filter = ntohl(s->filter);
if ((!(sub->filter & TIPC_SUB_PORTS) ==
!(sub->filter & TIPC_SUB_SERVICE)) ||
(sub->seq.lower > sub->seq.upper)) {
@@ -376,7 +364,6 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s,
INIT_LIST_HEAD(&sub->nameseq_list);
list_add(&sub->subscription_list, &subscriber->subscription_list);
sub->server_ref = subscriber->port_ref;
- sub->swap = swap;
memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr));
atomic_inc(&topsrv.subscription_count);
if (sub->timeout != TIPC_WAIT_FOREVER) {
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index 45d89bf..c20f496 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -53,7 +53,6 @@ typedef void (*tipc_subscr_event) (struct subscription *sub,
* @nameseq_list: adjacent subscriptions in name sequence's subscription list
* @subscription_list: adjacent subscriptions in subscriber's subscription list
* @server_ref: object reference of server port associated with subscription
- * @swap: indicates if subscriber uses opposite endianness in its messages
* @evt: template for events generated by subscription
*/
@@ -66,7 +65,6 @@ struct subscription {
struct list_head nameseq_list;
struct list_head subscription_list;
u32 server_ref;
- int swap;
struct tipc_event evt;
};
diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
index 3e1efe5..52e3042 100644
--- a/net/x25/x25_dev.c
+++ b/net/x25/x25_dev.c
@@ -53,7 +53,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb)
if (!sock_owned_by_user(sk)) {
queued = x25_process_rx_frame(sk, skb);
} else {
- sk_add_backlog(sk, skb);
+ queued = !sk_add_backlog(sk, skb);
}
bh_unlock_sock(sk);
sock_put(sk);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 34a5ef8..843e066 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1372,7 +1372,8 @@ static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
return err;
}
-static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
+static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
+ struct flowi *fl)
{
struct xfrm_policy_afinfo *afinfo =
xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
@@ -1381,7 +1382,7 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
if (!afinfo)
return -EINVAL;
- err = afinfo->fill_dst(xdst, dev);
+ err = afinfo->fill_dst(xdst, dev, fl);
xfrm_policy_put_afinfo(afinfo);
@@ -1486,7 +1487,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
- err = xfrm_fill_dst(xdst, dev);
+ err = xfrm_fill_dst(xdst, dev, fl);
if (err)
goto free_dst;