diff options
Diffstat (limited to 'net')
116 files changed, 2137 insertions, 1159 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 4535122..c39a5f4 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -530,6 +530,10 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, } unregister_netdevice_many(&list); break; + + case NETDEV_PRE_TYPE_CHANGE: + /* Forbid underlaying device to change its type. */ + return NOTIFY_BAD; } out: diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index c0316e0..c584a0a 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -11,7 +11,7 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, if (netpoll_rx(skb)) return NET_RX_DROP; - if (skb_bond_should_drop(skb)) + if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master))) goto drop; skb->skb_iif = skb->dev->ifindex; @@ -83,7 +83,7 @@ vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp, { struct sk_buff *p; - if (skb_bond_should_drop(skb)) + if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master))) goto drop; skb->skb_iif = skb->dev->ifindex; diff --git a/net/9p/client.c b/net/9p/client.c index 09d4f1e..e3e5bf4 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -46,6 +46,7 @@ enum { Opt_msize, Opt_trans, Opt_legacy, + Opt_version, Opt_err, }; @@ -53,9 +54,42 @@ static const match_table_t tokens = { {Opt_msize, "msize=%u"}, {Opt_legacy, "noextend"}, {Opt_trans, "trans=%s"}, + {Opt_version, "version=%s"}, {Opt_err, NULL}, }; +inline int p9_is_proto_dotl(struct p9_client *clnt) +{ + return (clnt->proto_version == p9_proto_2000L); +} +EXPORT_SYMBOL(p9_is_proto_dotl); + +inline int p9_is_proto_dotu(struct p9_client *clnt) +{ + return (clnt->proto_version == p9_proto_2000u); +} +EXPORT_SYMBOL(p9_is_proto_dotu); + +/* Interpret mount option for protocol version */ +static unsigned char get_protocol_version(const substring_t *name) +{ + unsigned char version = -EINVAL; + if (!strncmp("9p2000", name->from, name->to-name->from)) { + version = p9_proto_legacy; + P9_DPRINTK(P9_DEBUG_9P, "Protocol version: Legacy\n"); + } else if (!strncmp("9p2000.u", name->from, name->to-name->from)) { + version = p9_proto_2000u; + P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2000.u\n"); + } else if (!strncmp("9p2000.L", name->from, name->to-name->from)) { + version = p9_proto_2000L; + P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2000.L\n"); + } else { + P9_DPRINTK(P9_DEBUG_ERROR, "Unknown protocol version %s. ", + name->from); + } + return version; +} + static struct p9_req_t * p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...); @@ -75,7 +109,7 @@ static int parse_opts(char *opts, struct p9_client *clnt) int option; int ret = 0; - clnt->dotu = 1; + clnt->proto_version = p9_proto_2000u; clnt->msize = 8192; if (!opts) @@ -118,7 +152,13 @@ static int parse_opts(char *opts, struct p9_client *clnt) } break; case Opt_legacy: - clnt->dotu = 0; + clnt->proto_version = p9_proto_legacy; + break; + case Opt_version: + ret = get_protocol_version(&args[0]); + if (ret == -EINVAL) + goto free_and_return; + clnt->proto_version = ret; break; default: continue; @@ -410,14 +450,15 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) int ecode; char *ename; - err = p9pdu_readf(req->rc, c->dotu, "s?d", &ename, &ecode); + err = p9pdu_readf(req->rc, c->proto_version, "s?d", + &ename, &ecode); if (err) { P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n", err); return err; } - if (c->dotu) + if (p9_is_proto_dotu(c)) err = -ecode; if (!err || !IS_ERR_VALUE(err)) @@ -515,7 +556,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) /* marshall the data */ p9pdu_prepare(req->tc, tag, type); va_start(ap, fmt); - err = p9pdu_vwritef(req->tc, c->dotu, fmt, ap); + err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap); va_end(ap); p9pdu_finalize(req->tc); @@ -627,14 +668,31 @@ int p9_client_version(struct p9_client *c) char *version; int msize; - P9_DPRINTK(P9_DEBUG_9P, ">>> TVERSION msize %d extended %d\n", - c->msize, c->dotu); - req = p9_client_rpc(c, P9_TVERSION, "ds", c->msize, - c->dotu ? "9P2000.u" : "9P2000"); + P9_DPRINTK(P9_DEBUG_9P, ">>> TVERSION msize %d protocol %d\n", + c->msize, c->proto_version); + + switch (c->proto_version) { + case p9_proto_2000L: + req = p9_client_rpc(c, P9_TVERSION, "ds", + c->msize, "9P2000.L"); + break; + case p9_proto_2000u: + req = p9_client_rpc(c, P9_TVERSION, "ds", + c->msize, "9P2000.u"); + break; + case p9_proto_legacy: + req = p9_client_rpc(c, P9_TVERSION, "ds", + c->msize, "9P2000"); + break; + default: + return -EINVAL; + break; + } + if (IS_ERR(req)) return PTR_ERR(req); - err = p9pdu_readf(req->rc, c->dotu, "ds", &msize, &version); + err = p9pdu_readf(req->rc, c->proto_version, "ds", &msize, &version); if (err) { P9_DPRINTK(P9_DEBUG_9P, "version error %d\n", err); p9pdu_dump(1, req->rc); @@ -642,10 +700,12 @@ int p9_client_version(struct p9_client *c) } P9_DPRINTK(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version); - if (!memcmp(version, "9P2000.u", 8)) - c->dotu = 1; - else if (!memcmp(version, "9P2000", 6)) - c->dotu = 0; + if (!strncmp(version, "9P2000.L", 8)) + c->proto_version = p9_proto_2000L; + else if (!strncmp(version, "9P2000.u", 8)) + c->proto_version = p9_proto_2000u; + else if (!strncmp(version, "9P2000", 6)) + c->proto_version = p9_proto_legacy; else { err = -EREMOTEIO; goto error; @@ -700,8 +760,8 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) goto put_trans; } - P9_DPRINTK(P9_DEBUG_MUX, "clnt %p trans %p msize %d dotu %d\n", - clnt, clnt->trans_mod, clnt->msize, clnt->dotu); + P9_DPRINTK(P9_DEBUG_MUX, "clnt %p trans %p msize %d protocol %d\n", + clnt, clnt->trans_mod, clnt->msize, clnt->proto_version); err = clnt->trans_mod->create(clnt, dev_name, options); if (err) @@ -784,7 +844,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, goto error; } - err = p9pdu_readf(req->rc, clnt->dotu, "Q", &qid); + err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid); if (err) { p9pdu_dump(1, req->rc); p9_free_req(clnt, req); @@ -833,7 +893,7 @@ p9_client_auth(struct p9_client *clnt, char *uname, u32 n_uname, char *aname) goto error; } - err = p9pdu_readf(req->rc, clnt->dotu, "Q", &qid); + err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid); if (err) { p9pdu_dump(1, req->rc); p9_free_req(clnt, req); @@ -891,7 +951,7 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, goto error; } - err = p9pdu_readf(req->rc, clnt->dotu, "R", &nwqids, &wqids); + err = p9pdu_readf(req->rc, clnt->proto_version, "R", &nwqids, &wqids); if (err) { p9pdu_dump(1, req->rc); p9_free_req(clnt, req); @@ -952,7 +1012,7 @@ int p9_client_open(struct p9_fid *fid, int mode) goto error; } - err = p9pdu_readf(req->rc, clnt->dotu, "Qd", &qid, &iounit); + err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit); if (err) { p9pdu_dump(1, req->rc); goto free_and_error; @@ -997,7 +1057,7 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, goto error; } - err = p9pdu_readf(req->rc, clnt->dotu, "Qd", &qid, &iounit); + err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit); if (err) { p9pdu_dump(1, req->rc); goto free_and_error; @@ -1098,7 +1158,7 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, goto error; } - err = p9pdu_readf(req->rc, clnt->dotu, "D", &count, &dataptr); + err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr); if (err) { p9pdu_dump(1, req->rc); goto free_and_error; @@ -1159,7 +1219,7 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, goto error; } - err = p9pdu_readf(req->rc, clnt->dotu, "d", &count); + err = p9pdu_readf(req->rc, clnt->proto_version, "d", &count); if (err) { p9pdu_dump(1, req->rc); goto free_and_error; @@ -1199,7 +1259,7 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) goto error; } - err = p9pdu_readf(req->rc, clnt->dotu, "wS", &ignored, ret); + err = p9pdu_readf(req->rc, clnt->proto_version, "wS", &ignored, ret); if (err) { p9pdu_dump(1, req->rc); p9_free_req(clnt, req); @@ -1226,7 +1286,7 @@ error: } EXPORT_SYMBOL(p9_client_stat); -static int p9_client_statsize(struct p9_wstat *wst, int optional) +static int p9_client_statsize(struct p9_wstat *wst, int proto_version) { int ret; @@ -1245,7 +1305,7 @@ static int p9_client_statsize(struct p9_wstat *wst, int optional) if (wst->muid) ret += strlen(wst->muid); - if (optional) { + if (proto_version == p9_proto_2000u) { ret += 2+4+4+4; /* extension[s] n_uid[4] n_gid[4] n_muid[4] */ if (wst->extension) ret += strlen(wst->extension); @@ -1262,7 +1322,7 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) err = 0; clnt = fid->clnt; - wst->size = p9_client_statsize(wst, clnt->dotu); + wst->size = p9_client_statsize(wst, clnt->proto_version); P9_DPRINTK(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid); P9_DPRINTK(P9_DEBUG_9P, " sz=%x type=%x dev=%x qid=%x.%llx.%x\n" diff --git a/net/9p/protocol.c b/net/9p/protocol.c index fc70147..94f5a8f 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -52,7 +52,7 @@ #endif static int -p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...); +p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...); #ifdef CONFIG_NET_9P_DEBUG void @@ -144,7 +144,8 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size) */ static int -p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) +p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, + va_list ap) { const char *ptr; int errcode = 0; @@ -194,7 +195,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) int16_t len; int size; - errcode = p9pdu_readf(pdu, optional, "w", &len); + errcode = p9pdu_readf(pdu, proto_version, + "w", &len); if (errcode) break; @@ -217,7 +219,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) struct p9_qid *qid = va_arg(ap, struct p9_qid *); - errcode = p9pdu_readf(pdu, optional, "bdq", + errcode = p9pdu_readf(pdu, proto_version, "bdq", &qid->type, &qid->version, &qid->path); } @@ -230,7 +232,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) stbuf->n_uid = stbuf->n_gid = stbuf->n_muid = -1; errcode = - p9pdu_readf(pdu, optional, + p9pdu_readf(pdu, proto_version, "wwdQdddqssss?sddd", &stbuf->size, &stbuf->type, &stbuf->dev, &stbuf->qid, @@ -250,7 +252,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) void **data = va_arg(ap, void **); errcode = - p9pdu_readf(pdu, optional, "d", count); + p9pdu_readf(pdu, proto_version, "d", count); if (!errcode) { *count = MIN(*count, @@ -263,8 +265,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) int16_t *nwname = va_arg(ap, int16_t *); char ***wnames = va_arg(ap, char ***); - errcode = - p9pdu_readf(pdu, optional, "w", nwname); + errcode = p9pdu_readf(pdu, proto_version, + "w", nwname); if (!errcode) { *wnames = kmalloc(sizeof(char *) * *nwname, @@ -278,7 +280,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) for (i = 0; i < *nwname; i++) { errcode = - p9pdu_readf(pdu, optional, + p9pdu_readf(pdu, + proto_version, "s", &(*wnames)[i]); if (errcode) @@ -306,7 +309,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) *wqids = NULL; errcode = - p9pdu_readf(pdu, optional, "w", nwqid); + p9pdu_readf(pdu, proto_version, "w", nwqid); if (!errcode) { *wqids = kmalloc(*nwqid * @@ -321,7 +324,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) for (i = 0; i < *nwqid; i++) { errcode = - p9pdu_readf(pdu, optional, + p9pdu_readf(pdu, + proto_version, "Q", &(*wqids)[i]); if (errcode) @@ -336,7 +340,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) } break; case '?': - if (!optional) + if (proto_version != p9_proto_2000u) return 0; break; default: @@ -352,7 +356,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) } int -p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) +p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, + va_list ap) { const char *ptr; int errcode = 0; @@ -389,7 +394,8 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) if (sptr) len = MIN(strlen(sptr), USHORT_MAX); - errcode = p9pdu_writef(pdu, optional, "w", len); + errcode = p9pdu_writef(pdu, proto_version, + "w", len); if (!errcode && pdu_write(pdu, sptr, len)) errcode = -EFAULT; } @@ -398,7 +404,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) const struct p9_qid *qid = va_arg(ap, const struct p9_qid *); errcode = - p9pdu_writef(pdu, optional, "bdq", + p9pdu_writef(pdu, proto_version, "bdq", qid->type, qid->version, qid->path); } break; @@ -406,7 +412,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) const struct p9_wstat *stbuf = va_arg(ap, const struct p9_wstat *); errcode = - p9pdu_writef(pdu, optional, + p9pdu_writef(pdu, proto_version, "wwdQdddqssss?sddd", stbuf->size, stbuf->type, stbuf->dev, &stbuf->qid, @@ -421,8 +427,8 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) int32_t count = va_arg(ap, int32_t); const void *data = va_arg(ap, const void *); - errcode = - p9pdu_writef(pdu, optional, "d", count); + errcode = p9pdu_writef(pdu, proto_version, "d", + count); if (!errcode && pdu_write(pdu, data, count)) errcode = -EFAULT; } @@ -431,8 +437,8 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) int32_t count = va_arg(ap, int32_t); const char __user *udata = va_arg(ap, const void __user *); - errcode = - p9pdu_writef(pdu, optional, "d", count); + errcode = p9pdu_writef(pdu, proto_version, "d", + count); if (!errcode && pdu_write_u(pdu, udata, count)) errcode = -EFAULT; } @@ -441,14 +447,15 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) int16_t nwname = va_arg(ap, int); const char **wnames = va_arg(ap, const char **); - errcode = - p9pdu_writef(pdu, optional, "w", nwname); + errcode = p9pdu_writef(pdu, proto_version, "w", + nwname); if (!errcode) { int i; for (i = 0; i < nwname; i++) { errcode = - p9pdu_writef(pdu, optional, + p9pdu_writef(pdu, + proto_version, "s", wnames[i]); if (errcode) @@ -462,14 +469,15 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) struct p9_qid *wqids = va_arg(ap, struct p9_qid *); - errcode = - p9pdu_writef(pdu, optional, "w", nwqid); + errcode = p9pdu_writef(pdu, proto_version, "w", + nwqid); if (!errcode) { int i; for (i = 0; i < nwqid; i++) { errcode = - p9pdu_writef(pdu, optional, + p9pdu_writef(pdu, + proto_version, "Q", &wqids[i]); if (errcode) @@ -479,7 +487,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) } break; case '?': - if (!optional) + if (proto_version != p9_proto_2000u) return 0; break; default: @@ -494,32 +502,32 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) return errcode; } -int p9pdu_readf(struct p9_fcall *pdu, int optional, const char *fmt, ...) +int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...) { va_list ap; int ret; va_start(ap, fmt); - ret = p9pdu_vreadf(pdu, optional, fmt, ap); + ret = p9pdu_vreadf(pdu, proto_version, fmt, ap); va_end(ap); return ret; } static int -p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...) +p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...) { va_list ap; int ret; va_start(ap, fmt); - ret = p9pdu_vwritef(pdu, optional, fmt, ap); + ret = p9pdu_vwritef(pdu, proto_version, fmt, ap); va_end(ap); return ret; } -int p9stat_read(char *buf, int len, struct p9_wstat *st, int dotu) +int p9stat_read(char *buf, int len, struct p9_wstat *st, int proto_version) { struct p9_fcall fake_pdu; int ret; @@ -529,7 +537,7 @@ int p9stat_read(char *buf, int len, struct p9_wstat *st, int dotu) fake_pdu.sdata = buf; fake_pdu.offset = 0; - ret = p9pdu_readf(&fake_pdu, dotu, "S", st); + ret = p9pdu_readf(&fake_pdu, proto_version, "S", st); if (ret) { P9_DPRINTK(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret); p9pdu_dump(1, &fake_pdu); diff --git a/net/9p/protocol.h b/net/9p/protocol.h index ccde462..2431c0f 100644 --- a/net/9p/protocol.h +++ b/net/9p/protocol.h @@ -25,9 +25,9 @@ * */ -int -p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap); -int p9pdu_readf(struct p9_fcall *pdu, int optional, const char *fmt, ...); +int p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, + va_list ap); +int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...); int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type); int p9pdu_finalize(struct p9_fcall *pdu); void p9pdu_dump(int, struct p9_fcall *); diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index cb50f4a..afde1a8 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -49,8 +49,6 @@ /* a single mutex to manage channel initialization and attachment */ static DEFINE_MUTEX(virtio_9p_lock); -/* global which tracks highest initialized channel */ -static int chan_index; /** * struct virtio_chan - per-instance transport information @@ -68,8 +66,7 @@ static int chan_index; * */ -static struct virtio_chan { - bool initialized; +struct virtio_chan { bool inuse; spinlock_t lock; @@ -80,7 +77,17 @@ static struct virtio_chan { /* Scatterlist: can be too big for stack. */ struct scatterlist sg[VIRTQUEUE_NUM]; -} channels[MAX_9P_CHAN]; + + int tag_len; + /* + * tag name to identify a mount Non-null terminated + */ + char *tag; + + struct list_head chan_list; +}; + +static struct list_head virtio_chan_list; /* How many bytes left in this page. */ static unsigned int rest_of_page(void *data) @@ -213,30 +220,38 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req) return 0; } +static ssize_t p9_mount_tag_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct virtio_chan *chan; + struct virtio_device *vdev; + + vdev = dev_to_virtio(dev); + chan = vdev->priv; + + return snprintf(buf, chan->tag_len + 1, "%s", chan->tag); +} + +static DEVICE_ATTR(mount_tag, 0444, p9_mount_tag_show, NULL); + /** * p9_virtio_probe - probe for existence of 9P virtio channels * @vdev: virtio device to probe * - * This probes for existing virtio channels. At present only - * a single channel is in use, so in the future more work may need - * to be done here. + * This probes for existing virtio channels. * */ static int p9_virtio_probe(struct virtio_device *vdev) { + __u16 tag_len; + char *tag; int err; struct virtio_chan *chan; - int index; - mutex_lock(&virtio_9p_lock); - index = chan_index++; - chan = &channels[index]; - mutex_unlock(&virtio_9p_lock); - - if (chan_index > MAX_9P_CHAN) { - printk(KERN_ERR "9p: virtio: Maximum channels exceeded\n"); - BUG(); + chan = kmalloc(sizeof(struct virtio_chan), GFP_KERNEL); + if (!chan) { + printk(KERN_ERR "9p: Failed to allocate virtio 9P channel\n"); err = -ENOMEM; goto fail; } @@ -255,15 +270,37 @@ static int p9_virtio_probe(struct virtio_device *vdev) sg_init_table(chan->sg, VIRTQUEUE_NUM); chan->inuse = false; - chan->initialized = true; + if (virtio_has_feature(vdev, VIRTIO_9P_MOUNT_TAG)) { + vdev->config->get(vdev, + offsetof(struct virtio_9p_config, tag_len), + &tag_len, sizeof(tag_len)); + } else { + err = -EINVAL; + goto out_free_vq; + } + tag = kmalloc(tag_len, GFP_KERNEL); + if (!tag) { + err = -ENOMEM; + goto out_free_vq; + } + vdev->config->get(vdev, offsetof(struct virtio_9p_config, tag), + tag, tag_len); + chan->tag = tag; + chan->tag_len = tag_len; + err = sysfs_create_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); + if (err) { + kfree(tag); + goto out_free_vq; + } + mutex_lock(&virtio_9p_lock); + list_add_tail(&chan->chan_list, &virtio_chan_list); + mutex_unlock(&virtio_9p_lock); return 0; out_free_vq: vdev->config->del_vqs(vdev); + kfree(chan); fail: - mutex_lock(&virtio_9p_lock); - chan_index--; - mutex_unlock(&virtio_9p_lock); return err; } @@ -280,35 +317,31 @@ fail: * We use a simple reference count mechanism to ensure that only a single * mount has a channel open at a time. * - * Bugs: doesn't allow identification of a specific channel - * to allocate, channels are allocated sequentially. This was - * a pragmatic decision to get things rolling, but ideally some - * way of identifying the channel to attach to would be nice - * if we are going to support multiple channels. - * */ static int p9_virtio_create(struct p9_client *client, const char *devname, char *args) { - struct virtio_chan *chan = channels; - int index = 0; + struct virtio_chan *chan; + int ret = -ENOENT; + int found = 0; mutex_lock(&virtio_9p_lock); - while (index < MAX_9P_CHAN) { - if (chan->initialized && !chan->inuse) { - chan->inuse = true; - break; - } else { - index++; - chan = &channels[index]; + list_for_each_entry(chan, &virtio_chan_list, chan_list) { + if (!strncmp(devname, chan->tag, chan->tag_len)) { + if (!chan->inuse) { + chan->inuse = true; + found = 1; + break; + } + ret = -EBUSY; } } mutex_unlock(&virtio_9p_lock); - if (index >= MAX_9P_CHAN) { + if (!found) { printk(KERN_ERR "9p: no channels available\n"); - return -ENODEV; + return ret; } client->trans = (void *)chan; @@ -329,11 +362,15 @@ static void p9_virtio_remove(struct virtio_device *vdev) struct virtio_chan *chan = vdev->priv; BUG_ON(chan->inuse); + vdev->config->del_vqs(vdev); + + mutex_lock(&virtio_9p_lock); + list_del(&chan->chan_list); + mutex_unlock(&virtio_9p_lock); + sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); + kfree(chan->tag); + kfree(chan); - if (chan->initialized) { - vdev->config->del_vqs(vdev); - chan->initialized = false; - } } static struct virtio_device_id id_table[] = { @@ -341,13 +378,19 @@ static struct virtio_device_id id_table[] = { { 0 }, }; +static unsigned int features[] = { + VIRTIO_9P_MOUNT_TAG, +}; + /* The standard "struct lguest_driver": */ static struct virtio_driver p9_virtio_drv = { - .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, - .id_table = id_table, - .probe = p9_virtio_probe, - .remove = p9_virtio_remove, + .feature_table = features, + .feature_table_size = ARRAY_SIZE(features), + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .id_table = id_table, + .probe = p9_virtio_probe, + .remove = p9_virtio_remove, }; static struct p9_trans_module p9_virtio_trans = { @@ -364,10 +407,7 @@ static struct p9_trans_module p9_virtio_trans = { /* The standard init function */ static int __init p9_virtio_init(void) { - int count; - - for (count = 0; count < MAX_9P_CHAN; count++) - channels[count].initialized = false; + INIT_LIST_HEAD(&virtio_chan_list); v9fs_register_trans(&p9_virtio_trans); return register_virtio_driver(&p9_virtio_drv); diff --git a/net/atm/proc.c b/net/atm/proc.c index 7a96b23..f188a39 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -406,7 +406,6 @@ EXPORT_SYMBOL(atm_proc_root); int atm_proc_dev_register(struct atm_dev *dev) { - int digits, num; int error; /* No proc info */ @@ -414,16 +413,9 @@ int atm_proc_dev_register(struct atm_dev *dev) return 0; error = -ENOMEM; - digits = 0; - for (num = dev->number; num; num /= 10) - digits++; - if (!digits) - digits++; - - dev->proc_name = kmalloc(strlen(dev->type) + digits + 2, GFP_KERNEL); + dev->proc_name = kasprintf(GFP_KERNEL, "%s:%d", dev->type, dev->number); if (!dev->proc_name) goto err_out; - sprintf(dev->proc_name, "%s:%d", dev->type, dev->number); dev->proc_entry = proc_create_data(dev->proc_name, 0, atm_proc_root, &proc_atm_dev_ops, dev); diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index b6234b7..326ab45 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -87,7 +87,7 @@ static void bnep_net_set_mc_list(struct net_device *dev) memcpy(__skb_put(skb, ETH_ALEN), dev->broadcast, ETH_ALEN); r->len = htons(ETH_ALEN * 2); } else { - struct dev_mc_list *dmi = dev->mc_list; + struct dev_mc_list *dmi; int i, len = skb->len; if (dev->flags & IFF_BROADCAST) { @@ -97,12 +97,12 @@ static void bnep_net_set_mc_list(struct net_device *dev) /* FIXME: We should group addresses here. */ - for (i = 0; - i < netdev_mc_count(dev) && i < BNEP_MAX_MULTICAST_FILTERS; - i++) { + i = 0; + netdev_for_each_mc_addr(dmi, dev) { + if (i == BNEP_MAX_MULTICAST_FILTERS) + break; memcpy(__skb_put(skb, ETH_ALEN), dmi->dmi_addr, ETH_ALEN); memcpy(__skb_put(skb, ETH_ALEN), dmi->dmi_addr, ETH_ALEN); - dmi = dmi->next; } r->len = htons(skb->len - len); } diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 1a79a6c..cafb55b 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -3,6 +3,7 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/debugfs.h> +#include <linux/seq_file.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> @@ -405,20 +406,11 @@ static struct device_type bt_host = { .release = bt_host_release, }; -static int inquiry_cache_open(struct inode *inode, struct file *file) -{ - file->private_data = inode->i_private; - return 0; -} - -static ssize_t inquiry_cache_read(struct file *file, char __user *userbuf, - size_t count, loff_t *ppos) +static int inquiry_cache_show(struct seq_file *f, void *p) { - struct hci_dev *hdev = file->private_data; + struct hci_dev *hdev = f->private; struct inquiry_cache *cache = &hdev->inq_cache; struct inquiry_entry *e; - char buf[4096]; - int n = 0; hci_dev_lock_bh(hdev); @@ -426,23 +418,30 @@ static ssize_t inquiry_cache_read(struct file *file, char __user *userbuf, struct inquiry_data *data = &e->data; bdaddr_t bdaddr; baswap(&bdaddr, &data->bdaddr); - n += sprintf(buf + n, "%s %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %d %u\n", - batostr(&bdaddr), - data->pscan_rep_mode, data->pscan_period_mode, - data->pscan_mode, data->dev_class[2], - data->dev_class[1], data->dev_class[0], - __le16_to_cpu(data->clock_offset), - data->rssi, data->ssp_mode, e->timestamp); + seq_printf(f, "%s %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %d %u\n", + batostr(&bdaddr), + data->pscan_rep_mode, data->pscan_period_mode, + data->pscan_mode, data->dev_class[2], + data->dev_class[1], data->dev_class[0], + __le16_to_cpu(data->clock_offset), + data->rssi, data->ssp_mode, e->timestamp); } hci_dev_unlock_bh(hdev); - return simple_read_from_buffer(userbuf, count, ppos, buf, n); + return 0; +} + +static int inquiry_cache_open(struct inode *inode, struct file *file) +{ + return single_open(file, inquiry_cache_show, inode->i_private); } static const struct file_operations inquiry_cache_fops = { - .open = inquiry_cache_open, - .read = inquiry_cache_read, + .open = inquiry_cache_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, }; int hci_register_sysfs(struct hci_dev *hdev) diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 400efa2..4db7ae2 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -3937,7 +3937,9 @@ drop: return 0; } -static ssize_t l2cap_sysfs_show(struct class *dev, char *buf) +static ssize_t l2cap_sysfs_show(struct class *dev, + struct class_attribute *attr, + char *buf) { struct sock *sk; struct hlist_node *node; diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 89f4a59..db8a68e 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -2098,7 +2098,9 @@ static struct hci_cb rfcomm_cb = { .security_cfm = rfcomm_security_cfm }; -static ssize_t rfcomm_dlc_sysfs_show(struct class *dev, char *buf) +static ssize_t rfcomm_dlc_sysfs_show(struct class *dev, + struct class_attribute *attr, + char *buf) { struct rfcomm_session *s; struct list_head *pp, *p; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 4b5968d..ca87d6a 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -1061,7 +1061,9 @@ done: return result; } -static ssize_t rfcomm_sock_sysfs_show(struct class *dev, char *buf) +static ssize_t rfcomm_sock_sysfs_show(struct class *dev, + struct class_attribute *attr, + char *buf) { struct sock *sk; struct hlist_node *node; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index dd8f6ec..f93b939 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -953,7 +953,9 @@ drop: return 0; } -static ssize_t sco_sysfs_show(struct class *dev, char *buf) +static ssize_t sco_sysfs_show(struct class *dev, + struct class_attribute *attr, + char *buf) { struct sock *sk; struct hlist_node *node; diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig index 19a6b96..d115d5c 100644 --- a/net/bridge/Kconfig +++ b/net/bridge/Kconfig @@ -35,6 +35,7 @@ config BRIDGE config BRIDGE_IGMP_SNOOPING bool "IGMP snooping" depends on BRIDGE + depends on INET default y ---help--- If you say Y here, then the Ethernet bridge will be able selectively diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index eb7062d..5b8a6e7 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -26,11 +26,12 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) const unsigned char *dest = skb->data; struct net_bridge_fdb_entry *dst; struct net_bridge_mdb_entry *mdst; + struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats); - BR_INPUT_SKB_CB(skb)->brdev = dev; + brstats->tx_packets++; + brstats->tx_bytes += skb->len; - dev->stats.tx_packets++; - dev->stats.tx_bytes += skb->len; + BR_INPUT_SKB_CB(skb)->brdev = dev; skb_reset_mac_header(skb); skb_pull(skb, ETH_HLEN); @@ -40,7 +41,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) goto out; mdst = br_mdb_get(br, skb); - if (mdst || BR_INPUT_SKB_CB(skb)->mrouters_only) + if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) br_multicast_deliver(mdst, skb); else br_flood_deliver(br, skb); @@ -81,6 +82,31 @@ static int br_dev_stop(struct net_device *dev) return 0; } +static struct net_device_stats *br_get_stats(struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + struct net_device_stats *stats = &dev->stats; + struct br_cpu_netstats sum = { 0 }; + unsigned int cpu; + + for_each_possible_cpu(cpu) { + const struct br_cpu_netstats *bstats + = per_cpu_ptr(br->stats, cpu); + + sum.tx_bytes += bstats->tx_bytes; + sum.tx_packets += bstats->tx_packets; + sum.rx_bytes += bstats->rx_bytes; + sum.rx_packets += bstats->rx_packets; + } + + stats->tx_bytes = sum.tx_bytes; + stats->tx_packets = sum.tx_packets; + stats->rx_bytes = sum.rx_bytes; + stats->rx_packets = sum.rx_packets; + + return stats; +} + static int br_change_mtu(struct net_device *dev, int new_mtu) { struct net_bridge *br = netdev_priv(dev); @@ -180,19 +206,28 @@ static const struct net_device_ops br_netdev_ops = { .ndo_open = br_dev_open, .ndo_stop = br_dev_stop, .ndo_start_xmit = br_dev_xmit, + .ndo_get_stats = br_get_stats, .ndo_set_mac_address = br_set_mac_address, .ndo_set_multicast_list = br_dev_set_multicast_list, .ndo_change_mtu = br_change_mtu, .ndo_do_ioctl = br_dev_ioctl, }; +static void br_dev_free(struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + + free_percpu(br->stats); + free_netdev(dev); +} + void br_dev_setup(struct net_device *dev) { random_ether_addr(dev->dev_addr); ether_setup(dev); dev->netdev_ops = &br_netdev_ops; - dev->destructor = free_netdev; + dev->destructor = br_dev_free; SET_ETHTOOL_OPS(dev, &br_ethtool_ops); dev->tx_queue_len = 0; dev->priv_flags = IFF_EBRIDGE; diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index d61e6f7..8dbec83 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -19,6 +19,11 @@ #include <linux/netfilter_bridge.h> #include "br_private.h" +static int deliver_clone(const struct net_bridge_port *prev, + struct sk_buff *skb, + void (*__packet_hook)(const struct net_bridge_port *p, + struct sk_buff *skb)); + /* Don't forward packets to originating port or forwarding diasabled */ static inline int should_deliver(const struct net_bridge_port *p, const struct sk_buff *skb) @@ -94,17 +99,22 @@ void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) } /* called with rcu_read_lock */ -void br_forward(const struct net_bridge_port *to, struct sk_buff *skb) +void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk_buff *skb0) { if (should_deliver(to, skb)) { - __br_forward(to, skb); + if (skb0) + deliver_clone(to, skb, __br_forward); + else + __br_forward(to, skb); return; } - kfree_skb(skb); + if (!skb0) + kfree_skb(skb); } -static int deliver_clone(struct net_bridge_port *prev, struct sk_buff *skb, +static int deliver_clone(const struct net_bridge_port *prev, + struct sk_buff *skb, void (*__packet_hook)(const struct net_bridge_port *p, struct sk_buff *skb)) { diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index b6a3872..b7cdd2e 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -185,6 +185,12 @@ static struct net_device *new_bridge_dev(struct net *net, const char *name) br = netdev_priv(dev); br->dev = dev; + br->stats = alloc_percpu(struct br_cpu_netstats); + if (!br->stats) { + free_netdev(dev); + return NULL; + } + spin_lock_init(&br->lock); INIT_LIST_HEAD(&br->port_list); spin_lock_init(&br->hash_lock); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 53b3985..333dfb7 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -23,9 +23,11 @@ const u8 br_group_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; static int br_pass_frame_up(struct sk_buff *skb) { struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev; + struct net_bridge *br = netdev_priv(brdev); + struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats); - brdev->stats.rx_packets++; - brdev->stats.rx_bytes += skb->len; + brstats->rx_packets++; + brstats->rx_bytes += skb->len; indev = skb->dev; skb->dev = brdev; @@ -70,7 +72,7 @@ int br_handle_frame_finish(struct sk_buff *skb) if (is_multicast_ether_addr(dest)) { mdst = br_mdb_get(br, skb); - if (mdst || BR_INPUT_SKB_CB(skb)->mrouters_only) { + if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) { if ((mdst && !hlist_unhashed(&mdst->mglist)) || br_multicast_is_router(br)) skb2 = skb; @@ -90,7 +92,7 @@ int br_handle_frame_finish(struct sk_buff *skb) if (skb) { if (dst) - br_forward(dst->dst, skb); + br_forward(dst->dst, skb, skb2); else br_flood_forward(br, skb, skb2); } diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 2559fb5..9f0c4f0 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -38,7 +38,7 @@ static struct net_bridge_mdb_entry *__br_mdb_ip_get( struct net_bridge_mdb_entry *mp; struct hlist_node *p; - hlist_for_each_entry(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) { + hlist_for_each_entry_rcu(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) { if (dst == mp->addr) return mp; } @@ -49,22 +49,23 @@ static struct net_bridge_mdb_entry *__br_mdb_ip_get( static struct net_bridge_mdb_entry *br_mdb_ip_get( struct net_bridge_mdb_htable *mdb, __be32 dst) { + if (!mdb) + return NULL; + return __br_mdb_ip_get(mdb, dst, br_ip_hash(mdb, dst)); } struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, struct sk_buff *skb) { - struct net_bridge_mdb_htable *mdb = br->mdb; - - if (!mdb || br->multicast_disabled) + if (br->multicast_disabled) return NULL; switch (skb->protocol) { case htons(ETH_P_IP): if (BR_INPUT_SKB_CB(skb)->igmp) break; - return br_mdb_ip_get(mdb, ip_hdr(skb)->daddr); + return br_mdb_ip_get(br->mdb, ip_hdr(skb)->daddr); } return NULL; @@ -627,8 +628,8 @@ static void br_multicast_port_query_expired(unsigned long data) struct net_bridge *br = port->br; spin_lock(&br->multicast_lock); - if (port && (port->state == BR_STATE_DISABLED || - port->state == BR_STATE_BLOCKING)) + if (port->state == BR_STATE_DISABLED || + port->state == BR_STATE_BLOCKING) goto out; if (port->multicast_startup_queries_sent < @@ -823,6 +824,7 @@ static int br_multicast_query(struct net_bridge *br, unsigned long max_delay; unsigned long now = jiffies; __be32 group; + int err = 0; spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || @@ -841,15 +843,17 @@ static int br_multicast_query(struct net_bridge *br, group = 0; } } else { - if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) - return -EINVAL; + if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) { + err = -EINVAL; + goto out; + } ih3 = igmpv3_query_hdr(skb); if (ih3->nsrcs) - return 0; + goto out; - max_delay = ih3->code ? 1 : - IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE); + max_delay = ih3->code ? + IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE) : 1; } if (!group) @@ -876,7 +880,7 @@ static int br_multicast_query(struct net_bridge *br, out: spin_unlock(&br->multicast_lock); - return 0; + return err; } static void br_multicast_leave_group(struct net_bridge *br, @@ -987,7 +991,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, err = pskb_trim_rcsum(skb2, len); if (err) - return err; + goto err_out; } len -= ip_hdrlen(skb2); @@ -999,8 +1003,6 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, if (!pskb_may_pull(skb2, sizeof(*ih))) goto out; - iph = ip_hdr(skb2); - switch (skb2->ip_summed) { case CHECKSUM_COMPLETE: if (!csum_fold(skb2->csum)) @@ -1009,7 +1011,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, case CHECKSUM_NONE: skb2->csum = 0; if (skb_checksum_complete(skb2)) - return -EINVAL; + goto out; } err = 0; @@ -1036,6 +1038,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, out: __skb_push(skb2, offset); +err_out: if (skb2 != skb) kfree_skb(skb2); return err; @@ -1135,7 +1138,7 @@ void br_multicast_stop(struct net_bridge *br) if (mdb->old) { spin_unlock_bh(&br->multicast_lock); - synchronize_rcu_bh(); + rcu_barrier_bh(); spin_lock_bh(&br->multicast_lock); WARN_ON(mdb->old); } diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c index 763a3ec..1413b72 100644 --- a/net/bridge/br_notify.c +++ b/net/bridge/br_notify.c @@ -82,6 +82,10 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v case NETDEV_UNREGISTER: br_del_if(br, dev); break; + + case NETDEV_PRE_TYPE_CHANGE: + /* Forbid underlaying device to change its type. */ + return NOTIFY_BAD; } /* Events that may cause spanning tree to refresh */ diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 1cf2cef..791d4ab 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -135,6 +135,14 @@ struct net_bridge spinlock_t lock; struct list_head port_list; struct net_device *dev; + + struct br_cpu_netstats __percpu { + unsigned long rx_packets; + unsigned long rx_bytes; + unsigned long tx_packets; + unsigned long tx_bytes; + } *stats; + spinlock_t hash_lock; struct hlist_head hash[BR_HASH_SIZE]; unsigned long feature_mask; @@ -206,12 +214,20 @@ struct net_bridge struct br_input_skb_cb { struct net_device *brdev; +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING int igmp; int mrouters_only; +#endif }; #define BR_INPUT_SKB_CB(__skb) ((struct br_input_skb_cb *)(__skb)->cb) +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING +# define BR_INPUT_SKB_CB_MROUTERS_ONLY(__skb) (BR_INPUT_SKB_CB(__skb)->mrouters_only) +#else +# define BR_INPUT_SKB_CB_MROUTERS_ONLY(__skb) (0) +#endif + extern struct notifier_block br_device_notifier; extern const u8 br_group_address[ETH_ALEN]; @@ -252,7 +268,7 @@ extern void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb); extern int br_dev_queue_push_xmit(struct sk_buff *skb); extern void br_forward(const struct net_bridge_port *to, - struct sk_buff *skb); + struct sk_buff *skb, struct sk_buff *skb0); extern int br_forward_finish(struct sk_buff *skb); extern void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb); extern void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, @@ -423,7 +439,7 @@ extern void br_ifinfo_notify(int event, struct net_bridge_port *port); #ifdef CONFIG_SYSFS /* br_sysfs_if.c */ -extern struct sysfs_ops brport_sysfs_ops; +extern const struct sysfs_ops brport_sysfs_ops; extern int br_sysfs_addif(struct net_bridge_port *p); /* br_sysfs_br.c */ diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 696596c..0b99164 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -238,7 +238,7 @@ static ssize_t brport_store(struct kobject * kobj, return ret; } -struct sysfs_ops brport_sysfs_ops = { +const struct sysfs_ops brport_sysfs_ops = { .show = brport_show, .store = brport_store, }; diff --git a/net/core/dev.c b/net/core/dev.c index bcc490c..a03aab4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -772,14 +772,17 @@ EXPORT_SYMBOL(__dev_getfirstbyhwtype); struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) { - struct net_device *dev; + struct net_device *dev, *ret = NULL; - rtnl_lock(); - dev = __dev_getfirstbyhwtype(net, type); - if (dev) - dev_hold(dev); - rtnl_unlock(); - return dev; + rcu_read_lock(); + for_each_netdev_rcu(net, dev) + if (dev->type == type) { + dev_hold(dev); + ret = dev; + break; + } + rcu_read_unlock(); + return ret; } EXPORT_SYMBOL(dev_getfirstbyhwtype); @@ -1084,9 +1087,9 @@ void netdev_state_change(struct net_device *dev) } EXPORT_SYMBOL(netdev_state_change); -void netdev_bonding_change(struct net_device *dev, unsigned long event) +int netdev_bonding_change(struct net_device *dev, unsigned long event) { - call_netdevice_notifiers(event, dev); + return call_netdevice_notifiers(event, dev); } EXPORT_SYMBOL(netdev_bonding_change); @@ -1931,7 +1934,7 @@ out_kfree_skb: return rc; } -static u32 skb_tx_hashrnd; +static u32 hashrnd __read_mostly; u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) { @@ -1949,7 +1952,7 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) else hash = skb->protocol; - hash = jhash_1word(hash, skb_tx_hashrnd); + hash = jhash_1word(hash, hashrnd); return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); } @@ -1959,10 +1962,9 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) { if (unlikely(queue_index >= dev->real_num_tx_queues)) { if (net_ratelimit()) { - WARN(1, "%s selects TX queue %d, but " + netdev_warn(dev, "selects TX queue %d, but " "real number of TX queues is %d\n", - dev->name, queue_index, - dev->real_num_tx_queues); + queue_index, dev->real_num_tx_queues); } return 0; } @@ -2175,6 +2177,178 @@ int weight_p __read_mostly = 64; /* old backlog weight */ DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; +#ifdef CONFIG_SMP +/* + * get_rps_cpu is called from netif_receive_skb and returns the target + * CPU from the RPS map of the receiving queue for a given skb. + */ +static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb) +{ + struct ipv6hdr *ip6; + struct iphdr *ip; + struct netdev_rx_queue *rxqueue; + struct rps_map *map; + int cpu = -1; + u8 ip_proto; + u32 addr1, addr2, ports, ihl; + + rcu_read_lock(); + + if (skb_rx_queue_recorded(skb)) { + u16 index = skb_get_rx_queue(skb); + if (unlikely(index >= dev->num_rx_queues)) { + if (net_ratelimit()) { + netdev_warn(dev, "received packet on queue " + "%u, but number of RX queues is %u\n", + index, dev->num_rx_queues); + } + goto done; + } + rxqueue = dev->_rx + index; + } else + rxqueue = dev->_rx; + + if (!rxqueue->rps_map) + goto done; + + if (skb->rxhash) + goto got_hash; /* Skip hash computation on packet header */ + + switch (skb->protocol) { + case __constant_htons(ETH_P_IP): + if (!pskb_may_pull(skb, sizeof(*ip))) + goto done; + + ip = (struct iphdr *) skb->data; + ip_proto = ip->protocol; + addr1 = ip->saddr; + addr2 = ip->daddr; + ihl = ip->ihl; + break; + case __constant_htons(ETH_P_IPV6): + if (!pskb_may_pull(skb, sizeof(*ip6))) + goto done; + + ip6 = (struct ipv6hdr *) skb->data; + ip_proto = ip6->nexthdr; + addr1 = ip6->saddr.s6_addr32[3]; + addr2 = ip6->daddr.s6_addr32[3]; + ihl = (40 >> 2); + break; + default: + goto done; + } + ports = 0; + switch (ip_proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_DCCP: + case IPPROTO_ESP: + case IPPROTO_AH: + case IPPROTO_SCTP: + case IPPROTO_UDPLITE: + if (pskb_may_pull(skb, (ihl * 4) + 4)) + ports = *((u32 *) (skb->data + (ihl * 4))); + break; + + default: + break; + } + + skb->rxhash = jhash_3words(addr1, addr2, ports, hashrnd); + if (!skb->rxhash) + skb->rxhash = 1; + +got_hash: + map = rcu_dereference(rxqueue->rps_map); + if (map) { + u16 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; + + if (cpu_online(tcpu)) { + cpu = tcpu; + goto done; + } + } + +done: + rcu_read_unlock(); + return cpu; +} + +/* + * This structure holds the per-CPU mask of CPUs for which IPIs are scheduled + * to be sent to kick remote softirq processing. There are two masks since + * the sending of IPIs must be done with interrupts enabled. The select field + * indicates the current mask that enqueue_backlog uses to schedule IPIs. + * select is flipped before net_rps_action is called while still under lock, + * net_rps_action then uses the non-selected mask to send the IPIs and clears + * it without conflicting with enqueue_backlog operation. + */ +struct rps_remote_softirq_cpus { + cpumask_t mask[2]; + int select; +}; +static DEFINE_PER_CPU(struct rps_remote_softirq_cpus, rps_remote_softirq_cpus); + +/* Called from hardirq (IPI) context */ +static void trigger_softirq(void *data) +{ + struct softnet_data *queue = data; + __napi_schedule(&queue->backlog); + __get_cpu_var(netdev_rx_stat).received_rps++; +} +#endif /* CONFIG_SMP */ + +/* + * enqueue_to_backlog is called to queue an skb to a per CPU backlog + * queue (may be a remote CPU queue). + */ +static int enqueue_to_backlog(struct sk_buff *skb, int cpu) +{ + struct softnet_data *queue; + unsigned long flags; + + queue = &per_cpu(softnet_data, cpu); + + local_irq_save(flags); + __get_cpu_var(netdev_rx_stat).total++; + + spin_lock(&queue->input_pkt_queue.lock); + if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { + if (queue->input_pkt_queue.qlen) { +enqueue: + __skb_queue_tail(&queue->input_pkt_queue, skb); + spin_unlock_irqrestore(&queue->input_pkt_queue.lock, + flags); + return NET_RX_SUCCESS; + } + + /* Schedule NAPI for backlog device */ + if (napi_schedule_prep(&queue->backlog)) { +#ifdef CONFIG_SMP + if (cpu != smp_processor_id()) { + struct rps_remote_softirq_cpus *rcpus = + &__get_cpu_var(rps_remote_softirq_cpus); + + cpu_set(cpu, rcpus->mask[rcpus->select]); + __raise_softirq_irqoff(NET_RX_SOFTIRQ); + } else + __napi_schedule(&queue->backlog); +#else + __napi_schedule(&queue->backlog); +#endif + } + goto enqueue; + } + + spin_unlock(&queue->input_pkt_queue.lock); + + __get_cpu_var(netdev_rx_stat).dropped++; + local_irq_restore(flags); + + kfree_skb(skb); + return NET_RX_DROP; +} /** * netif_rx - post buffer to the network code @@ -2193,8 +2367,7 @@ DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; int netif_rx(struct sk_buff *skb) { - struct softnet_data *queue; - unsigned long flags; + int cpu; /* if netpoll wants it, pretend we never saw it */ if (netpoll_rx(skb)) @@ -2203,31 +2376,15 @@ int netif_rx(struct sk_buff *skb) if (!skb->tstamp.tv64) net_timestamp(skb); - /* - * The code is rearranged so that the path is the most - * short when CPU is congested, but is still operating. - */ - local_irq_save(flags); - queue = &__get_cpu_var(softnet_data); - - __get_cpu_var(netdev_rx_stat).total++; - if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { - if (queue->input_pkt_queue.qlen) { -enqueue: - __skb_queue_tail(&queue->input_pkt_queue, skb); - local_irq_restore(flags); - return NET_RX_SUCCESS; - } - - napi_schedule(&queue->backlog); - goto enqueue; - } - - __get_cpu_var(netdev_rx_stat).dropped++; - local_irq_restore(flags); +#ifdef CONFIG_SMP + cpu = get_rps_cpu(skb->dev, skb); + if (cpu < 0) + cpu = smp_processor_id(); +#else + cpu = smp_processor_id(); +#endif - kfree_skb(skb); - return NET_RX_DROP; + return enqueue_to_backlog(skb, cpu); } EXPORT_SYMBOL(netif_rx); @@ -2464,25 +2621,11 @@ void netif_nit_deliver(struct sk_buff *skb) rcu_read_unlock(); } -/** - * netif_receive_skb - process receive buffer from network - * @skb: buffer to process - * - * netif_receive_skb() is the main receive data processing function. - * It always succeeds. The buffer may be dropped during processing - * for congestion control or by the protocol layers. - * - * This function may only be called from softirq context and interrupts - * should be enabled. - * - * Return values (usually ignored): - * NET_RX_SUCCESS: no congestion - * NET_RX_DROP: packet was dropped - */ -int netif_receive_skb(struct sk_buff *skb) +int __netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; struct net_device *orig_dev; + struct net_device *master; struct net_device *null_or_orig; struct net_device *null_or_bond; int ret = NET_RX_DROP; @@ -2503,11 +2646,12 @@ int netif_receive_skb(struct sk_buff *skb) null_or_orig = NULL; orig_dev = skb->dev; - if (orig_dev->master) { - if (skb_bond_should_drop(skb)) + master = ACCESS_ONCE(orig_dev->master); + if (master) { + if (skb_bond_should_drop(skb, master)) null_or_orig = orig_dev; /* deliver only exact match */ else - skb->dev = orig_dev->master; + skb->dev = master; } __get_cpu_var(netdev_rx_stat).total++; @@ -2588,6 +2732,37 @@ out: rcu_read_unlock(); return ret; } + +/** + * netif_receive_skb - process receive buffer from network + * @skb: buffer to process + * + * netif_receive_skb() is the main receive data processing function. + * It always succeeds. The buffer may be dropped during processing + * for congestion control or by the protocol layers. + * + * This function may only be called from softirq context and interrupts + * should be enabled. + * + * Return values (usually ignored): + * NET_RX_SUCCESS: no congestion + * NET_RX_DROP: packet was dropped + */ +int netif_receive_skb(struct sk_buff *skb) +{ +#ifdef CONFIG_SMP + int cpu; + + cpu = get_rps_cpu(skb->dev, skb); + + if (cpu < 0) + return __netif_receive_skb(skb); + else + return enqueue_to_backlog(skb, cpu); +#else + return __netif_receive_skb(skb); +#endif +} EXPORT_SYMBOL(netif_receive_skb); /* Network device is going away, flush any packets still pending */ @@ -2914,16 +3089,16 @@ static int process_backlog(struct napi_struct *napi, int quota) do { struct sk_buff *skb; - local_irq_disable(); + spin_lock_irq(&queue->input_pkt_queue.lock); skb = __skb_dequeue(&queue->input_pkt_queue); if (!skb) { __napi_complete(napi); - local_irq_enable(); + spin_unlock_irq(&queue->input_pkt_queue.lock); break; } - local_irq_enable(); + spin_unlock_irq(&queue->input_pkt_queue.lock); - netif_receive_skb(skb); + __netif_receive_skb(skb); } while (++work < quota && jiffies == start_time); return work; @@ -3012,6 +3187,24 @@ void netif_napi_del(struct napi_struct *napi) } EXPORT_SYMBOL(netif_napi_del); +#ifdef CONFIG_SMP +/* + * net_rps_action sends any pending IPI's for rps. This is only called from + * softirq and interrupts must be enabled. + */ +static void net_rps_action(cpumask_t *mask) +{ + int cpu; + + /* Send pending IPI's to kick RPS processing on remote cpus. */ + for_each_cpu_mask_nr(cpu, *mask) { + struct softnet_data *queue = &per_cpu(softnet_data, cpu); + if (cpu_online(cpu)) + __smp_call_function_single(cpu, &queue->csd, 0); + } + cpus_clear(*mask); +} +#endif static void net_rx_action(struct softirq_action *h) { @@ -3019,6 +3212,10 @@ static void net_rx_action(struct softirq_action *h) unsigned long time_limit = jiffies + 2; int budget = netdev_budget; void *have; +#ifdef CONFIG_SMP + int select; + struct rps_remote_softirq_cpus *rcpus; +#endif local_irq_disable(); @@ -3081,7 +3278,17 @@ static void net_rx_action(struct softirq_action *h) netpoll_poll_unlock(have); } out: +#ifdef CONFIG_SMP + rcpus = &__get_cpu_var(rps_remote_softirq_cpus); + select = rcpus->select; + rcpus->select ^= 1; + + local_irq_enable(); + + net_rps_action(&rcpus->mask[select]); +#else local_irq_enable(); +#endif #ifdef CONFIG_NET_DMA /* @@ -3327,10 +3534,10 @@ static int softnet_seq_show(struct seq_file *seq, void *v) { struct netif_rx_stats *s = v; - seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", + seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", s->total, s->dropped, s->time_squeeze, 0, 0, 0, 0, 0, /* was fastroute */ - s->cpu_collision); + s->cpu_collision, s->received_rps); return 0; } @@ -3553,11 +3760,10 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) slave->master = master; - synchronize_net(); - - if (old) + if (old) { + synchronize_net(); dev_put(old); - + } if (master) slave->flags |= IFF_SLAVE; else @@ -4253,12 +4459,13 @@ void dev_unicast_unsync(struct net_device *to, struct net_device *from) } EXPORT_SYMBOL(dev_unicast_unsync); -static void dev_unicast_flush(struct net_device *dev) +void dev_unicast_flush(struct net_device *dev) { netif_addr_lock_bh(dev); __hw_addr_flush(&dev->uc); netif_addr_unlock_bh(dev); } +EXPORT_SYMBOL(dev_unicast_flush); static void dev_unicast_init(struct net_device *dev) { @@ -4280,7 +4487,7 @@ static void __dev_addr_discard(struct dev_addr_list **list) } } -static void dev_addr_discard(struct net_device *dev) +void dev_addr_discard(struct net_device *dev) { netif_addr_lock_bh(dev); @@ -4289,6 +4496,7 @@ static void dev_addr_discard(struct net_device *dev) netif_addr_unlock_bh(dev); } +EXPORT_SYMBOL(dev_addr_discard); /** * dev_get_flags - get flags reported to userspace @@ -5067,6 +5275,23 @@ int register_netdevice(struct net_device *dev) dev->iflink = -1; + if (!dev->num_rx_queues) { + /* + * Allocate a single RX queue if driver never called + * alloc_netdev_mq + */ + + dev->_rx = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL); + if (!dev->_rx) { + ret = -ENOMEM; + goto out; + } + + dev->_rx->first = dev->_rx; + atomic_set(&dev->_rx->count, 1); + dev->num_rx_queues = 1; + } + /* Init, if this function is available */ if (dev->netdev_ops->ndo_init) { ret = dev->netdev_ops->ndo_init(dev); @@ -5424,9 +5649,11 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, void (*setup)(struct net_device *), unsigned int queue_count) { struct netdev_queue *tx; + struct netdev_rx_queue *rx; struct net_device *dev; size_t alloc_size; struct net_device *p; + int i; BUG_ON(strlen(name) >= sizeof(dev->name)); @@ -5452,11 +5679,27 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, goto free_p; } + rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL); + if (!rx) { + printk(KERN_ERR "alloc_netdev: Unable to allocate " + "rx queues.\n"); + goto free_tx; + } + + atomic_set(&rx->count, queue_count); + + /* + * Set a pointer to first element in the array which holds the + * reference count. + */ + for (i = 0; i < queue_count; i++) + rx[i].first = rx; + dev = PTR_ALIGN(p, NETDEV_ALIGN); dev->padded = (char *)dev - (char *)p; if (dev_addr_init(dev)) - goto free_tx; + goto free_rx; dev_unicast_init(dev); @@ -5466,6 +5709,9 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, dev->num_tx_queues = queue_count; dev->real_num_tx_queues = queue_count; + dev->_rx = rx; + dev->num_rx_queues = queue_count; + dev->gso_max_size = GSO_MAX_SIZE; netdev_init_queues(dev); @@ -5480,9 +5726,10 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, strcpy(dev->name, name); return dev; +free_rx: + kfree(rx); free_tx: kfree(tx); - free_p: kfree(p); return NULL; @@ -5985,6 +6232,12 @@ static int __init net_dev_init(void) queue->completion_queue = NULL; INIT_LIST_HEAD(&queue->poll_list); +#ifdef CONFIG_SMP + queue->csd.func = trigger_softirq; + queue->csd.info = queue; + queue->csd.flags = 0; +#endif + queue->backlog.poll = process_backlog; queue->backlog.weight = weight_p; queue->backlog.gro_list = NULL; @@ -6023,7 +6276,7 @@ subsys_initcall(net_dev_init); static int __init initialize_hashrnd(void) { - get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd)); + get_random_bytes(&hashrnd, sizeof(hashrnd)); return 0; } diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index fd91569..3dc295b 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -97,8 +97,9 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl) netif_addr_lock_bh(dev); if (alen != dev->addr_len) - return -EINVAL; - err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl); + err = -EINVAL; + else + err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl); if (!err) __dev_set_rx_mode(dev); netif_addr_unlock_bh(dev); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 0f2f821..f4cb6b6 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -17,6 +17,7 @@ #include <linux/errno.h> #include <linux/ethtool.h> #include <linux/netdevice.h> +#include <linux/bitops.h> #include <asm/uaccess.h> /* @@ -199,10 +200,7 @@ static int ethtool_set_settings(struct net_device *dev, void __user *useraddr) return dev->ethtool_ops->set_settings(dev, &cmd); } -/* - * noinline attribute so that gcc doesnt use too much stack in dev_ethtool() - */ -static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) +static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) { struct ethtool_drvinfo info; const struct ethtool_ops *ops = dev->ethtool_ops; @@ -214,6 +212,10 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use info.cmd = ETHTOOL_GDRVINFO; ops->get_drvinfo(dev, &info); + /* + * this method of obtaining string set info is deprecated; + * Use ETHTOOL_GSSET_INFO instead. + */ if (ops->get_sset_count) { int rc; @@ -237,10 +239,67 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use return 0; } -/* - * noinline attribute so that gcc doesnt use too much stack in dev_ethtool() - */ -static noinline int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr) +static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev, + void __user *useraddr) +{ + struct ethtool_sset_info info; + const struct ethtool_ops *ops = dev->ethtool_ops; + u64 sset_mask; + int i, idx = 0, n_bits = 0, ret, rc; + u32 *info_buf = NULL; + + if (!ops->get_sset_count) + return -EOPNOTSUPP; + + if (copy_from_user(&info, useraddr, sizeof(info))) + return -EFAULT; + + /* store copy of mask, because we zero struct later on */ + sset_mask = info.sset_mask; + if (!sset_mask) + return 0; + + /* calculate size of return buffer */ + n_bits = hweight64(sset_mask); + + memset(&info, 0, sizeof(info)); + info.cmd = ETHTOOL_GSSET_INFO; + + info_buf = kzalloc(n_bits * sizeof(u32), GFP_USER); + if (!info_buf) + return -ENOMEM; + + /* + * fill return buffer based on input bitmask and successful + * get_sset_count return + */ + for (i = 0; i < 64; i++) { + if (!(sset_mask & (1ULL << i))) + continue; + + rc = ops->get_sset_count(dev, i); + if (rc >= 0) { + info.sset_mask |= (1ULL << i); + info_buf[idx++] = rc; + } + } + + ret = -EFAULT; + if (copy_to_user(useraddr, &info, sizeof(info))) + goto out; + + useraddr += offsetof(struct ethtool_sset_info, data); + if (copy_to_user(useraddr, info_buf, idx * sizeof(u32))) + goto out; + + ret = 0; + +out: + kfree(info_buf); + return ret; +} + +static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr) { struct ethtool_rxnfc cmd; @@ -253,10 +312,7 @@ static noinline int ethtool_set_rxnfc(struct net_device *dev, void __user *usera return dev->ethtool_ops->set_rxnfc(dev, &cmd); } -/* - * noinline attribute so that gcc doesnt use too much stack in dev_ethtool() - */ -static noinline int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr) +static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr) { struct ethtool_rxnfc info; const struct ethtool_ops *ops = dev->ethtool_ops; @@ -328,10 +384,7 @@ static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list, list->count++; } -/* - * noinline attribute so that gcc doesnt use too much stack in dev_ethtool() - */ -static noinline int ethtool_set_rx_ntuple(struct net_device *dev, void __user *useraddr) +static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, void __user *useraddr) { struct ethtool_rx_ntuple cmd; const struct ethtool_ops *ops = dev->ethtool_ops; @@ -799,10 +852,7 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr) return ret; } -/* - * noinline attribute so that gcc doesnt use too much stack in dev_ethtool() - */ -static noinline int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr) +static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr) { struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE }; @@ -816,10 +866,7 @@ static noinline int ethtool_get_coalesce(struct net_device *dev, void __user *us return 0; } -/* - * noinline attribute so that gcc doesnt use too much stack in dev_ethtool() - */ -static noinline int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr) +static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr) { struct ethtool_coalesce coalesce; @@ -1229,10 +1276,7 @@ static int ethtool_set_value(struct net_device *dev, char __user *useraddr, return actor(dev, edata.data); } -/* - * noinline attribute so that gcc doesnt use too much stack in dev_ethtool() - */ -static noinline int ethtool_flash_device(struct net_device *dev, char __user *useraddr) +static noinline_for_stack int ethtool_flash_device(struct net_device *dev, char __user *useraddr) { struct ethtool_flash efl; @@ -1471,6 +1515,9 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GRXNTUPLE: rc = ethtool_get_rx_ntuple(dev, useraddr); break; + case ETHTOOL_GSSET_INFO: + rc = ethtool_get_sset_info(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 9a24377..2ff3489 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -108,7 +108,7 @@ fib_rules_register(struct fib_rules_ops *tmpl, struct net *net) struct fib_rules_ops *ops; int err; - ops = kmemdup(tmpl, sizeof (*ops), GFP_KERNEL); + ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL); if (ops == NULL) return ERR_PTR(-ENOMEM); @@ -123,7 +123,6 @@ fib_rules_register(struct fib_rules_ops *tmpl, struct net *net) return ops; } - EXPORT_SYMBOL_GPL(fib_rules_register); void fib_rules_cleanup_ops(struct fib_rules_ops *ops) @@ -157,7 +156,6 @@ void fib_rules_unregister(struct fib_rules_ops *ops) call_rcu(&ops->rcu, fib_rules_put_rcu); } - EXPORT_SYMBOL_GPL(fib_rules_unregister); static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, @@ -220,7 +218,6 @@ out: return err; } - EXPORT_SYMBOL_GPL(fib_rules_lookup); static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb, @@ -613,7 +610,7 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb) break; cb->args[1] = 0; - skip: +skip: idx++; } rcu_read_unlock(); @@ -685,7 +682,6 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event, struct fib_rules_ops *ops; ASSERT_RTNL(); - rcu_read_lock(); switch (event) { case NETDEV_REGISTER: @@ -699,8 +695,6 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event, break; } - rcu_read_unlock(); - return NOTIFY_DONE; } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index d102f6d..6cee643 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -771,6 +771,8 @@ static __inline__ int neigh_max_probes(struct neighbour *n) } static void neigh_invalidate(struct neighbour *neigh) + __releases(neigh->lock) + __acquires(neigh->lock) { struct sk_buff *skb; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 099c753..f6b6bfe 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -466,6 +466,216 @@ static struct attribute_group wireless_group = { }; #endif +/* + * RX queue sysfs structures and functions. + */ +struct rx_queue_attribute { + struct attribute attr; + ssize_t (*show)(struct netdev_rx_queue *queue, + struct rx_queue_attribute *attr, char *buf); + ssize_t (*store)(struct netdev_rx_queue *queue, + struct rx_queue_attribute *attr, const char *buf, size_t len); +}; +#define to_rx_queue_attr(_attr) container_of(_attr, \ + struct rx_queue_attribute, attr) + +#define to_rx_queue(obj) container_of(obj, struct netdev_rx_queue, kobj) + +static ssize_t rx_queue_attr_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct rx_queue_attribute *attribute = to_rx_queue_attr(attr); + struct netdev_rx_queue *queue = to_rx_queue(kobj); + + if (!attribute->show) + return -EIO; + + return attribute->show(queue, attribute, buf); +} + +static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct rx_queue_attribute *attribute = to_rx_queue_attr(attr); + struct netdev_rx_queue *queue = to_rx_queue(kobj); + + if (!attribute->store) + return -EIO; + + return attribute->store(queue, attribute, buf, count); +} + +static struct sysfs_ops rx_queue_sysfs_ops = { + .show = rx_queue_attr_show, + .store = rx_queue_attr_store, +}; + +static ssize_t show_rps_map(struct netdev_rx_queue *queue, + struct rx_queue_attribute *attribute, char *buf) +{ + struct rps_map *map; + cpumask_var_t mask; + size_t len = 0; + int i; + + if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + + rcu_read_lock(); + map = rcu_dereference(queue->rps_map); + if (map) + for (i = 0; i < map->len; i++) + cpumask_set_cpu(map->cpus[i], mask); + + len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask); + if (PAGE_SIZE - len < 3) { + rcu_read_unlock(); + free_cpumask_var(mask); + return -EINVAL; + } + rcu_read_unlock(); + + free_cpumask_var(mask); + len += sprintf(buf + len, "\n"); + return len; +} + +static void rps_map_release(struct rcu_head *rcu) +{ + struct rps_map *map = container_of(rcu, struct rps_map, rcu); + + kfree(map); +} + +ssize_t store_rps_map(struct netdev_rx_queue *queue, + struct rx_queue_attribute *attribute, + const char *buf, size_t len) +{ + struct rps_map *old_map, *map; + cpumask_var_t mask; + int err, cpu, i; + static DEFINE_SPINLOCK(rps_map_lock); + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + + err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits); + if (err) { + free_cpumask_var(mask); + return err; + } + + map = kzalloc(max_t(unsigned, + RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES), + GFP_KERNEL); + if (!map) { + free_cpumask_var(mask); + return -ENOMEM; + } + + i = 0; + for_each_cpu_and(cpu, mask, cpu_online_mask) + map->cpus[i++] = cpu; + + if (i) + map->len = i; + else { + kfree(map); + map = NULL; + } + + spin_lock(&rps_map_lock); + old_map = queue->rps_map; + rcu_assign_pointer(queue->rps_map, map); + spin_unlock(&rps_map_lock); + + if (old_map) + call_rcu(&old_map->rcu, rps_map_release); + + free_cpumask_var(mask); + return len; +} + +static struct rx_queue_attribute rps_cpus_attribute = + __ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map); + +static struct attribute *rx_queue_default_attrs[] = { + &rps_cpus_attribute.attr, + NULL +}; + +static void rx_queue_release(struct kobject *kobj) +{ + struct netdev_rx_queue *queue = to_rx_queue(kobj); + struct rps_map *map = queue->rps_map; + struct netdev_rx_queue *first = queue->first; + + if (map) + call_rcu(&map->rcu, rps_map_release); + + if (atomic_dec_and_test(&first->count)) + kfree(first); +} + +static struct kobj_type rx_queue_ktype = { + .sysfs_ops = &rx_queue_sysfs_ops, + .release = rx_queue_release, + .default_attrs = rx_queue_default_attrs, +}; + +static int rx_queue_add_kobject(struct net_device *net, int index) +{ + struct netdev_rx_queue *queue = net->_rx + index; + struct kobject *kobj = &queue->kobj; + int error = 0; + + kobj->kset = net->queues_kset; + error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL, + "rx-%u", index); + if (error) { + kobject_put(kobj); + return error; + } + + kobject_uevent(kobj, KOBJ_ADD); + + return error; +} + +static int rx_queue_register_kobjects(struct net_device *net) +{ + int i; + int error = 0; + + net->queues_kset = kset_create_and_add("queues", + NULL, &net->dev.kobj); + if (!net->queues_kset) + return -ENOMEM; + for (i = 0; i < net->num_rx_queues; i++) { + error = rx_queue_add_kobject(net, i); + if (error) + break; + } + + if (error) + while (--i >= 0) + kobject_put(&net->_rx[i].kobj); + + return error; +} + +static void rx_queue_remove_kobjects(struct net_device *net) +{ + int i; + + for (i = 0; i < net->num_rx_queues; i++) + kobject_put(&net->_rx[i].kobj); + kset_unregister(net->queues_kset); +} + #endif /* CONFIG_SYSFS */ #ifdef CONFIG_HOTPLUG @@ -529,6 +739,10 @@ void netdev_unregister_kobject(struct net_device * net) if (!net_eq(dev_net(net), &init_net)) return; +#ifdef CONFIG_SYSFS + rx_queue_remove_kobjects(net); +#endif + device_del(dev); } @@ -537,6 +751,7 @@ int netdev_register_kobject(struct net_device *net) { struct device *dev = &(net->dev); const struct attribute_group **groups = net->sysfs_groups; + int error = 0; dev->class = &net_class; dev->platform_data = net; @@ -563,7 +778,19 @@ int netdev_register_kobject(struct net_device *net) if (!net_eq(dev_net(net), &init_net)) return 0; - return device_add(dev); + error = device_add(dev); + if (error) + return error; + +#ifdef CONFIG_SYSFS + error = rx_queue_register_kobjects(net); + if (error) { + device_del(dev); + return error; + } +#endif + + return error; } int netdev_class_create_file(struct class_attribute *class_attr) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 7aa6972..d4ec38f 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -735,7 +735,7 @@ int netpoll_setup(struct netpoll *np) npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL); if (!npinfo) { err = -ENOMEM; - goto release; + goto put; } npinfo->rx_flags = 0; @@ -845,7 +845,7 @@ int netpoll_setup(struct netpoll *np) kfree(npinfo); } - +put: dev_put(ndev); return err; } diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 4392381..2ad68da 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -169,7 +169,7 @@ #include <asm/dma.h> #include <asm/div64.h> /* do_div */ -#define VERSION "2.72" +#define VERSION "2.73" #define IP_NAME_SZ 32 #define MAX_MPLS_LABELS 16 /* This is the max label stack depth */ #define MPLS_STACK_BOTTOM htonl(0x00000100) @@ -190,6 +190,7 @@ #define F_IPSEC_ON (1<<12) /* ipsec on for flows */ #define F_QUEUE_MAP_RND (1<<13) /* queue map Random */ #define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */ +#define F_NODE (1<<15) /* Node memory alloc*/ /* Thread control flag bits */ #define T_STOP (1<<0) /* Stop run */ @@ -372,6 +373,7 @@ struct pktgen_dev { u16 queue_map_min; u16 queue_map_max; + int node; /* Memory node */ #ifdef CONFIG_XFRM __u8 ipsmode; /* IPSEC mode (config) */ @@ -607,6 +609,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v) if (pkt_dev->traffic_class) seq_printf(seq, " traffic_class: 0x%02x\n", pkt_dev->traffic_class); + if (pkt_dev->node >= 0) + seq_printf(seq, " node: %d\n", pkt_dev->node); + seq_printf(seq, " Flags: "); if (pkt_dev->flags & F_IPV6) @@ -660,6 +665,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v) if (pkt_dev->flags & F_SVID_RND) seq_printf(seq, "SVID_RND "); + if (pkt_dev->flags & F_NODE) + seq_printf(seq, "NODE_ALLOC "); + seq_puts(seq, "\n"); /* not really stopped, more like last-running-at */ @@ -1074,6 +1082,21 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->dst_mac_count); return count; } + if (!strcmp(name, "node")) { + len = num_arg(&user_buffer[i], 10, &value); + if (len < 0) + return len; + + i += len; + + if (node_possible(value)) { + pkt_dev->node = value; + sprintf(pg_result, "OK: node=%d", pkt_dev->node); + } + else + sprintf(pg_result, "ERROR: node not possible"); + return count; + } if (!strcmp(name, "flag")) { char f[32]; memset(f, 0, 32); @@ -1166,12 +1189,18 @@ static ssize_t pktgen_if_write(struct file *file, else if (strcmp(f, "!IPV6") == 0) pkt_dev->flags &= ~F_IPV6; + else if (strcmp(f, "NODE_ALLOC") == 0) + pkt_dev->flags |= F_NODE; + + else if (strcmp(f, "!NODE_ALLOC") == 0) + pkt_dev->flags &= ~F_NODE; + else { sprintf(pg_result, "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", f, "IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, " - "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC\n"); + "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC, NODE_ALLOC\n"); return count; } sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags); @@ -2572,9 +2601,27 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, mod_cur_headers(pkt_dev); datalen = (odev->hard_header_len + 16) & ~0xf; - skb = __netdev_alloc_skb(odev, - pkt_dev->cur_pkt_size + 64 - + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT); + + if (pkt_dev->flags & F_NODE) { + int node; + + if (pkt_dev->node >= 0) + node = pkt_dev->node; + else + node = numa_node_id(); + + skb = __alloc_skb(NET_SKB_PAD + pkt_dev->cur_pkt_size + 64 + + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT, 0, node); + if (likely(skb)) { + skb_reserve(skb, NET_SKB_PAD); + skb->dev = odev; + } + } + else + skb = __netdev_alloc_skb(odev, + pkt_dev->cur_pkt_size + 64 + + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT); + if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; @@ -3674,6 +3721,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) pkt_dev->svlan_p = 0; pkt_dev->svlan_cfi = 0; pkt_dev->svlan_id = 0xffff; + pkt_dev->node = -1; err = pktgen_setup_dev(pkt_dev, ifname); if (err) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 4568120..ffc6cf3 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -600,7 +600,39 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a, a->rx_compressed = b->rx_compressed; a->tx_compressed = b->tx_compressed; -}; +} + +static void copy_rtnl_link_stats64(struct rtnl_link_stats64 *a, + const struct net_device_stats *b) +{ + a->rx_packets = b->rx_packets; + a->tx_packets = b->tx_packets; + a->rx_bytes = b->rx_bytes; + a->tx_bytes = b->tx_bytes; + a->rx_errors = b->rx_errors; + a->tx_errors = b->tx_errors; + a->rx_dropped = b->rx_dropped; + a->tx_dropped = b->tx_dropped; + + a->multicast = b->multicast; + a->collisions = b->collisions; + + a->rx_length_errors = b->rx_length_errors; + a->rx_over_errors = b->rx_over_errors; + a->rx_crc_errors = b->rx_crc_errors; + a->rx_frame_errors = b->rx_frame_errors; + a->rx_fifo_errors = b->rx_fifo_errors; + a->rx_missed_errors = b->rx_missed_errors; + + a->tx_aborted_errors = b->tx_aborted_errors; + a->tx_carrier_errors = b->tx_carrier_errors; + a->tx_fifo_errors = b->tx_fifo_errors; + a->tx_heartbeat_errors = b->tx_heartbeat_errors; + a->tx_window_errors = b->tx_window_errors; + + a->rx_compressed = b->rx_compressed; + a->tx_compressed = b->tx_compressed; +} static inline int rtnl_vfinfo_size(const struct net_device *dev) { @@ -698,6 +730,14 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, stats = dev_get_stats(dev); copy_rtnl_link_stats(nla_data(attr), stats); + attr = nla_reserve(skb, IFLA_STATS64, + sizeof(struct rtnl_link_stats64)); + if (attr == NULL) + goto nla_put_failure; + + stats = dev_get_stats(dev); + copy_rtnl_link_stats64(nla_data(attr), stats); + if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) { int i; struct ifla_vf_info ivi; @@ -1473,6 +1513,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi case NETDEV_POST_INIT: case NETDEV_REGISTER: case NETDEV_CHANGE: + case NETDEV_PRE_TYPE_CHANGE: case NETDEV_GOING_DOWN: case NETDEV_UNREGISTER: case NETDEV_UNREGISTER_BATCH: diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 93c4e06..bdea0ef 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -534,6 +534,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->network_header = old->network_header; new->mac_header = old->mac_header; skb_dst_set(new, dst_clone(skb_dst(old))); + new->rxhash = old->rxhash; #ifdef CONFIG_XFRM new->sp = secpath_get(old->sp); #endif @@ -581,6 +582,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) C(len); C(data_len); C(mac_len); + C(rxhash); n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; n->cloned = 1; n->nohdr = 0; diff --git a/net/core/sock.c b/net/core/sock.c index fcd397a..c5812bb 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -340,8 +340,12 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) rc = sk_backlog_rcv(sk, skb); mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); - } else - sk_add_backlog(sk, skb); + } else if (sk_add_backlog(sk, skb)) { + bh_unlock_sock(sk); + atomic_inc(&sk->sk_drops); + goto discard_and_relse; + } + bh_unlock_sock(sk); out: sock_put(sk); @@ -1139,6 +1143,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) sock_lock_init(newsk); bh_lock_sock(newsk); newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; + newsk->sk_backlog.len = 0; atomic_set(&newsk->sk_rmem_alloc, 0); /* @@ -1542,6 +1547,12 @@ static void __release_sock(struct sock *sk) bh_lock_sock(sk); } while ((skb = sk->sk_backlog.head) != NULL); + + /* + * Doing the zeroing here guarantee we can not loop forever + * while a wild producer attempts to flood us. + */ + sk->sk_backlog.len = 0; } /** @@ -1874,6 +1885,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_allocation = GFP_KERNEL; sk->sk_rcvbuf = sysctl_rmem_default; sk->sk_sndbuf = sysctl_wmem_default; + sk->sk_backlog.limit = sk->sk_rcvbuf << 1; sk->sk_state = TCP_CLOSE; sk_set_socket(sk, sock); @@ -2276,7 +2288,8 @@ out_free_request_sock_slab: prot->rsk_prot->slab = NULL; } out_free_request_sock_slab_name: - kfree(prot->rsk_prot->slab_name); + if (prot->rsk_prot) + kfree(prot->rsk_prot->slab_name); out_free_sock_slab: kmem_cache_destroy(prot->slab); prot->slab = NULL; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 5ef32c2..53f8e12 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -189,7 +189,7 @@ enum { #define DCCP_MIB_MAX __DCCP_MIB_MAX struct dccp_mib { unsigned long mibs[DCCP_MIB_MAX]; -} __SNMP_MIB_ALIGN__; +}; DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics); #define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index b195c4f..4071eaf 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -998,11 +998,11 @@ static struct inet_protosw dccp_v4_protosw = { static int __net_init dccp_v4_init_net(struct net *net) { - int err; + if (dccp_hashinfo.bhash == NULL) + return -ESOCKTNOSUPPORT; - err = inet_ctl_sock_create(&net->dccp.v4_ctl_sk, PF_INET, - SOCK_DCCP, IPPROTO_DCCP, net); - return err; + return inet_ctl_sock_create(&net->dccp.v4_ctl_sk, PF_INET, + SOCK_DCCP, IPPROTO_DCCP, net); } static void __net_exit dccp_v4_exit_net(struct net *net) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 1aec634..af3394d 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1191,11 +1191,11 @@ static struct inet_protosw dccp_v6_protosw = { static int __net_init dccp_v6_init_net(struct net *net) { - int err; + if (dccp_hashinfo.bhash == NULL) + return -ESOCKTNOSUPPORT; - err = inet_ctl_sock_create(&net->dccp.v6_ctl_sk, PF_INET6, - SOCK_DCCP, IPPROTO_DCCP, net); - return err; + return inet_ctl_sock_create(&net->dccp.v6_ctl_sk, PF_INET6, + SOCK_DCCP, IPPROTO_DCCP, net); } static void __net_exit dccp_v6_exit_net(struct net *net) diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index af226a0..0d508c3 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -254,7 +254,7 @@ int dccp_child_process(struct sock *parent, struct sock *child, * in main socket hash table and lock on listening * socket does not protect us more. */ - sk_add_backlog(child, skb); + __sk_add_backlog(child, skb); } bh_unlock_sock(child); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 0ef7061..aa4cef3 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1036,7 +1036,7 @@ static int __init dccp_init(void) FIELD_SIZEOF(struct sk_buff, cb)); rc = percpu_counter_init(&dccp_orphan_count, 0); if (rc) - goto out; + goto out_fail; rc = -ENOBUFS; inet_hashinfo_init(&dccp_hashinfo); dccp_hashinfo.bind_bucket_cachep = @@ -1125,8 +1125,9 @@ static int __init dccp_init(void) goto out_sysctl_exit; dccp_timestamping_init(); -out: - return rc; + + return 0; + out_sysctl_exit: dccp_sysctl_exit(); out_ackvec_exit: @@ -1135,18 +1136,19 @@ out_free_dccp_mib: dccp_mib_exit(); out_free_dccp_bhash: free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order); - dccp_hashinfo.bhash = NULL; out_free_dccp_locks: inet_ehash_locks_free(&dccp_hashinfo); out_free_dccp_ehash: free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order); - dccp_hashinfo.ehash = NULL; out_free_bind_bucket_cachep: kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); - dccp_hashinfo.bind_bucket_cachep = NULL; out_free_percpu: percpu_counter_destroy(&dccp_orphan_count); - goto out; +out_fail: + dccp_hashinfo.bhash = NULL; + dccp_hashinfo.ehash = NULL; + dccp_hashinfo.bind_bucket_cachep = NULL; + return rc; } static void __exit dccp_fini(void) diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 0c94a1a..c9a1c68 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -587,9 +587,15 @@ choice config DEFAULT_HTCP bool "Htcp" if TCP_CONG_HTCP=y + config DEFAULT_HYBLA + bool "Hybla" if TCP_CONG_HYBLA=y + config DEFAULT_VEGAS bool "Vegas" if TCP_CONG_VEGAS=y + config DEFAULT_VENO + bool "Veno" if TCP_CONG_VENO=y + config DEFAULT_WESTWOOD bool "Westwood" if TCP_CONG_WESTWOOD=y @@ -610,8 +616,10 @@ config DEFAULT_TCP_CONG default "bic" if DEFAULT_BIC default "cubic" if DEFAULT_CUBIC default "htcp" if DEFAULT_HTCP + default "hybla" if DEFAULT_HYBLA default "vegas" if DEFAULT_VEGAS default "westwood" if DEFAULT_WESTWOOD + default "veno" if DEFAULT_VENO default "reno" if DEFAULT_RENO default "cubic" diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 33b7dff..55e1190 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1401,10 +1401,10 @@ EXPORT_SYMBOL_GPL(snmp_fold_field); int snmp_mib_init(void __percpu *ptr[2], size_t mibsize) { BUG_ON(ptr == NULL); - ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long long)); + ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long)); if (!ptr[0]) goto err0; - ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long long)); + ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long)); if (!ptr[1]) goto err1; return 0; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 51ca946..c75320e 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1095,10 +1095,10 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, case NETDEV_DOWN: ip_mc_down(in_dev); break; - case NETDEV_BONDING_OLDTYPE: + case NETDEV_PRE_TYPE_CHANGE: ip_mc_unmap(in_dev); break; - case NETDEV_BONDING_NEWTYPE: + case NETDEV_POST_TYPE_CHANGE: ip_mc_remap(in_dev); break; case NETDEV_CHANGEMTU: diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index c0c5274..f47c9f7 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1144,12 +1144,9 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, if (saddr) memcpy(&iph->saddr, saddr, 4); - - if (daddr) { + if (daddr) memcpy(&iph->daddr, daddr, 4); - return t->hlen; - } - if (iph->daddr && !ipv4_is_multicast(iph->daddr)) + if (iph->daddr) return t->hlen; return -t->hlen; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 10a6a60..6789092 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -187,6 +187,16 @@ struct ic_device { static struct ic_device *ic_first_dev __initdata = NULL;/* List of open device */ static struct net_device *ic_dev __initdata = NULL; /* Selected device */ +static bool __init ic_device_match(struct net_device *dev) +{ + if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) : + (!(dev->flags & IFF_LOOPBACK) && + (dev->flags & (IFF_POINTOPOINT|IFF_BROADCAST)) && + strncmp(dev->name, "dummy", 5))) + return true; + return false; +} + static int __init ic_open_devs(void) { struct ic_device *d, **last; @@ -207,10 +217,7 @@ static int __init ic_open_devs(void) for_each_netdev(&init_net, dev) { if (dev->flags & IFF_LOOPBACK) continue; - if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) : - (!(dev->flags & IFF_LOOPBACK) && - (dev->flags & (IFF_POINTOPOINT|IFF_BROADCAST)) && - strncmp(dev->name, "dummy", 5))) { + if (ic_device_match(dev)) { int able = 0; if (dev->mtu >= 364) able |= IC_BOOTP; @@ -228,7 +235,7 @@ static int __init ic_open_devs(void) } if (!(d = kmalloc(sizeof(struct ic_device), GFP_KERNEL))) { rtnl_unlock(); - return -1; + return -ENOMEM; } d->dev = dev; *last = d; @@ -253,7 +260,7 @@ static int __init ic_open_devs(void) printk(KERN_ERR "IP-Config: Device `%s' not found.\n", user_dev_name); else printk(KERN_ERR "IP-Config: No network devices available.\n"); - return -1; + return -ENODEV; } return 0; } @@ -1303,6 +1310,32 @@ __be32 __init root_nfs_parse_addr(char *name) return addr; } +#define DEVICE_WAIT_MAX 12 /* 12 seconds */ + +static int __init wait_for_devices(void) +{ + int i; + + msleep(CONF_PRE_OPEN); + for (i = 0; i < DEVICE_WAIT_MAX; i++) { + struct net_device *dev; + int found = 0; + + rtnl_lock(); + for_each_netdev(&init_net, dev) { + if (ic_device_match(dev)) { + found = 1; + break; + } + } + rtnl_unlock(); + if (found) + return 0; + ssleep(1); + } + return -ENODEV; +} + /* * IP Autoconfig dispatcher. */ @@ -1313,6 +1346,7 @@ static int __init ip_auto_config(void) #ifdef IPCONFIG_DYNAMIC int retries = CONF_OPEN_RETRIES; #endif + int err; #ifdef CONFIG_PROC_FS proc_net_fops_create(&init_net, "pnp", S_IRUGO, &pnp_seq_fops); @@ -1325,12 +1359,15 @@ static int __init ip_auto_config(void) #ifdef IPCONFIG_DYNAMIC try_try_again: #endif - /* Give hardware a chance to settle */ - msleep(CONF_PRE_OPEN); + /* Wait for devices to appear */ + err = wait_for_devices(); + if (err) + return err; /* Setup all network devices */ - if (ic_open_devs() < 0) - return -1; + err = ic_open_devs(); + if (err) + return err; /* Give drivers a chance to settle */ ssleep(CONF_POST_OPEN); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 8582e12..0b9d03c 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -802,6 +802,9 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock) int line; struct mfc_cache *uc, *c, **cp; + if (mfc->mfcc_parent >= MAXVIFS) + return -ENFILE; + line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); for (cp = &net->ipv4.mfc_cache_array[line]; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 242ed23..3dc9914 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -249,6 +249,9 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPSackShifted", LINUX_MIB_SACKSHIFTED), SNMP_MIB_ITEM("TCPSackMerged", LINUX_MIB_SACKMERGED), SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK), + SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP), + SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP), + SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b2ba558..32d3961 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -146,7 +146,6 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); static int rt_garbage_collect(struct dst_ops *ops); -static void rt_emergency_hash_rebuild(struct net *net); static struct dst_ops ipv4_dst_ops = { @@ -780,11 +779,30 @@ static void rt_do_flush(int process_context) #define FRACT_BITS 3 #define ONE (1UL << FRACT_BITS) +/* + * Given a hash chain and an item in this hash chain, + * find if a previous entry has the same hash_inputs + * (but differs on tos, mark or oif) + * Returns 0 if an alias is found. + * Returns ONE if rth has no alias before itself. + */ +static int has_noalias(const struct rtable *head, const struct rtable *rth) +{ + const struct rtable *aux = head; + + while (aux != rth) { + if (compare_hash_inputs(&aux->fl, &rth->fl)) + return 0; + aux = aux->u.dst.rt_next; + } + return ONE; +} + static void rt_check_expire(void) { static unsigned int rover; unsigned int i = rover, goal; - struct rtable *rth, *aux, **rthp; + struct rtable *rth, **rthp; unsigned long samples = 0; unsigned long sum = 0, sum2 = 0; unsigned long delta; @@ -835,15 +853,7 @@ nofree: * attributes don't unfairly skew * the length computation */ - for (aux = rt_hash_table[i].chain;;) { - if (aux == rth) { - length += ONE; - break; - } - if (compare_hash_inputs(&aux->fl, &rth->fl)) - break; - aux = aux->u.dst.rt_next; - } + length += has_noalias(rt_hash_table[i].chain, rth); continue; } } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) @@ -922,10 +932,8 @@ static void rt_secret_rebuild_oneshot(struct net *net) { del_timer_sync(&net->ipv4.rt_secret_timer); rt_cache_invalidate(net); - if (ip_rt_secret_interval) { - net->ipv4.rt_secret_timer.expires += ip_rt_secret_interval; - add_timer(&net->ipv4.rt_secret_timer); - } + if (ip_rt_secret_interval) + mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval); } static void rt_emergency_hash_rebuild(struct net *net) @@ -1073,6 +1081,21 @@ work_done: out: return 0; } +/* + * Returns number of entries in a hash chain that have different hash_inputs + */ +static int slow_chain_length(const struct rtable *head) +{ + int length = 0; + const struct rtable *rth = head; + + while (rth) { + length += has_noalias(head, rth); + rth = rth->u.dst.rt_next; + } + return length >> FRACT_BITS; +} + static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp, struct sk_buff *skb) { @@ -1185,7 +1208,8 @@ restart: rt_free(cand); } } else { - if (chain_length > rt_chain_length_max) { + if (chain_length > rt_chain_length_max && + slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) { struct net *net = dev_net(rt->u.dst.dev); int num = ++net->ipv4.current_rt_cache_rebuild_count; if (!rt_caching(dev_net(rt->u.dst.dev))) { @@ -1417,7 +1441,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, dev_hold(rt->u.dst.dev); if (rt->idev) in_dev_hold(rt->idev); - rt->u.dst.obsolete = 0; + rt->u.dst.obsolete = -1; rt->u.dst.lastuse = jiffies; rt->u.dst.path = &rt->u.dst; rt->u.dst.neighbour = NULL; @@ -1482,7 +1506,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) struct dst_entry *ret = dst; if (rt) { - if (dst->obsolete) { + if (dst->obsolete > 0) { ip_rt_put(rt); ret = NULL; } else if ((rt->rt_flags & RTCF_REDIRECTED) || @@ -1702,7 +1726,9 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) { - return NULL; + if (rt_is_expired((struct rtable *)dst)) + return NULL; + return dst; } static void ipv4_dst_destroy(struct dst_entry *dst) @@ -1864,7 +1890,8 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (!rth) goto e_nobufs; - rth->u.dst.output= ip_rt_bug; + rth->u.dst.output = ip_rt_bug; + rth->u.dst.obsolete = -1; atomic_set(&rth->u.dst.__refcnt, 1); rth->u.dst.flags= DST_HOST; @@ -2030,6 +2057,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->fl.oif = 0; rth->rt_spec_dst= spec_dst; + rth->u.dst.obsolete = -1; rth->u.dst.input = ip_forward; rth->u.dst.output = ip_output; rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev)); @@ -2194,6 +2222,7 @@ local_input: goto e_nobufs; rth->u.dst.output= ip_rt_bug; + rth->u.dst.obsolete = -1; rth->rt_genid = rt_genid(net); atomic_set(&rth->u.dst.__refcnt, 1); @@ -2420,6 +2449,7 @@ static int __mkroute_output(struct rtable **result, rth->rt_spec_dst= fl->fl4_src; rth->u.dst.output=ip_output; + rth->u.dst.obsolete = -1; rth->rt_genid = rt_genid(dev_net(dev_out)); RT_CACHE_STAT_INC(out_slow_tot); @@ -3077,22 +3107,20 @@ static void rt_secret_reschedule(int old) rtnl_lock(); for_each_net(net) { int deleted = del_timer_sync(&net->ipv4.rt_secret_timer); + long time; if (!new) continue; if (deleted) { - long time = net->ipv4.rt_secret_timer.expires - jiffies; + time = net->ipv4.rt_secret_timer.expires - jiffies; if (time <= 0 || (time += diff) <= 0) time = 0; - - net->ipv4.rt_secret_timer.expires = time; } else - net->ipv4.rt_secret_timer.expires = new; + time = new; - net->ipv4.rt_secret_timer.expires += jiffies; - add_timer(&net->ipv4.rt_secret_timer); + mod_timer(&net->ipv4.rt_secret_timer, jiffies + time); } rtnl_unlock(); } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5901010..6afb6d8 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -429,7 +429,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) if (tp->urg_seq == tp->copied_seq && !sock_flag(sk, SOCK_URGINLINE) && tp->urg_data) - target--; + target++; /* Potential race condition. If read of tp below will * escape above sk->sk_state, we can be illegally awaken @@ -1254,6 +1254,39 @@ static void tcp_prequeue_process(struct sock *sk) tp->ucopy.memory = 0; } +#ifdef CONFIG_NET_DMA +static void tcp_service_net_dma(struct sock *sk, bool wait) +{ + dma_cookie_t done, used; + dma_cookie_t last_issued; + struct tcp_sock *tp = tcp_sk(sk); + + if (!tp->ucopy.dma_chan) + return; + + last_issued = tp->ucopy.dma_cookie; + dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); + + do { + if (dma_async_memcpy_complete(tp->ucopy.dma_chan, + last_issued, &done, + &used) == DMA_SUCCESS) { + /* Safe to free early-copied skbs now */ + __skb_queue_purge(&sk->sk_async_wait_queue); + break; + } else { + struct sk_buff *skb; + while ((skb = skb_peek(&sk->sk_async_wait_queue)) && + (dma_async_is_complete(skb->dma_cookie, done, + used) == DMA_SUCCESS)) { + __skb_dequeue(&sk->sk_async_wait_queue); + kfree_skb(skb); + } + } + } while (wait); +} +#endif + static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) { struct sk_buff *skb; @@ -1546,6 +1579,10 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, /* __ Set realtime policy in scheduler __ */ } +#ifdef CONFIG_NET_DMA + if (tp->ucopy.dma_chan) + dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); +#endif if (copied >= target) { /* Do not sleep, just process backlog. */ release_sock(sk); @@ -1554,6 +1591,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, sk_wait_data(sk, &timeo); #ifdef CONFIG_NET_DMA + tcp_service_net_dma(sk, false); /* Don't block */ tp->ucopy.wakeup = 0; #endif @@ -1633,6 +1671,9 @@ do_prequeue: copied = -EFAULT; break; } + + dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); + if ((offset + used) == skb->len) copied_early = 1; @@ -1702,27 +1743,9 @@ skip_copy: } #ifdef CONFIG_NET_DMA - if (tp->ucopy.dma_chan) { - dma_cookie_t done, used; - - dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); - - while (dma_async_memcpy_complete(tp->ucopy.dma_chan, - tp->ucopy.dma_cookie, &done, - &used) == DMA_IN_PROGRESS) { - /* do partial cleanup of sk_async_wait_queue */ - while ((skb = skb_peek(&sk->sk_async_wait_queue)) && - (dma_async_is_complete(skb->dma_cookie, done, - used) == DMA_SUCCESS)) { - __skb_dequeue(&sk->sk_async_wait_queue); - kfree_skb(skb); - } - } + tcp_service_net_dma(sk, true); /* Wait for queue to drain */ + tp->ucopy.dma_chan = NULL; - /* Safe to free early-copied skbs now */ - __skb_queue_purge(&sk->sk_async_wait_queue); - tp->ucopy.dma_chan = NULL; - } if (tp->ucopy.pinned_list) { dma_unpin_iovec_pages(tp->ucopy.pinned_list); tp->ucopy.pinned_list = NULL; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 788851c..c096a42 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2511,6 +2511,9 @@ static void tcp_mark_head_lost(struct sock *sk, int packets) int err; unsigned int mss; + if (packets == 0) + return; + WARN_ON(packets > tp->packets_out); if (tp->lost_skb_hint) { skb = tp->lost_skb_hint; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c3588b4..f4df5f9 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -370,6 +370,11 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) if (sk->sk_state == TCP_CLOSE) goto out; + if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { + NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); + goto out; + } + icsk = inet_csk(sk); tp = tcp_sk(sk); seq = ntohl(th->seq); @@ -1651,13 +1656,15 @@ int tcp_v4_rcv(struct sk_buff *skb) if (!sk) goto no_tcp_socket; - if (iph->ttl < inet_sk(sk)->min_ttl) - goto discard_and_relse; - process: if (sk->sk_state == TCP_TIME_WAIT) goto do_time_wait; + if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { + NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); + goto discard_and_relse; + } + if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; nf_reset(skb); @@ -1682,8 +1689,11 @@ process: if (!tcp_prequeue(sk, skb)) ret = tcp_v4_do_rcv(sk, skb); } - } else - sk_add_backlog(sk, skb); + } else if (unlikely(sk_add_backlog(sk, skb))) { + bh_unlock_sock(sk); + NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); + goto discard_and_relse; + } bh_unlock_sock(sk); sock_put(sk); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f206ee5..32f9627 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -671,6 +671,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { inet_rsk(req)->acked = 1; + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); return NULL; } @@ -728,7 +729,7 @@ int tcp_child_process(struct sock *parent, struct sock *child, * in main socket hash table and lock on listening * socket does not protect us more. */ - sk_add_backlog(child, skb); + __sk_add_backlog(child, skb); } bh_unlock_sock(child); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 4a1605d..f181b78 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2395,13 +2395,17 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct tcp_extend_values *xvp = tcp_xv(rvp); struct inet_request_sock *ireq = inet_rsk(req); struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_cookie_values *cvp = tp->cookie_values; struct tcphdr *th; struct sk_buff *skb; struct tcp_md5sig_key *md5; int tcp_header_size; int mss; + int s_data_desired = 0; - skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC); + if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) + s_data_desired = cvp->s_data_desired; + skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15 + s_data_desired, 1, GFP_ATOMIC); if (skb == NULL) return NULL; @@ -2457,16 +2461,12 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, TCPCB_FLAG_SYN | TCPCB_FLAG_ACK); if (OPTION_COOKIE_EXTENSION & opts.options) { - const struct tcp_cookie_values *cvp = tp->cookie_values; - - if (cvp != NULL && - cvp->s_data_constant && - cvp->s_data_desired > 0) { - u8 *buf = skb_put(skb, cvp->s_data_desired); + if (s_data_desired) { + u8 *buf = skb_put(skb, s_data_desired); /* copy data directly from the listening socket. */ - memcpy(buf, cvp->s_data_payload, cvp->s_data_desired); - TCP_SKB_CB(skb)->end_seq += cvp->s_data_desired; + memcpy(buf, cvp->s_data_payload, s_data_desired); + TCP_SKB_CB(skb)->end_seq += s_data_desired; } if (opts.hash_size > 0) { diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index a17629b..b2e6bbc 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -134,7 +134,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) } /* This function calculates a "timeout" which is equivalent to the timeout of a - * TCP connection after "boundary" unsucessful, exponentially backed-off + * TCP connection after "boundary" unsuccessful, exponentially backed-off * retransmissions with an initial RTO of TCP_RTO_MIN. */ static bool retransmits_timed_out(struct sock *sk, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 608a544..7af756d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1371,8 +1371,10 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) bh_lock_sock(sk); if (!sock_owned_by_user(sk)) rc = __udp_queue_rcv_skb(sk, skb); - else - sk_add_backlog(sk, skb); + else if (sk_add_backlog(sk, skb)) { + bh_unlock_sock(sk); + goto drop; + } bh_unlock_sock(sk); return rc; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 67107d6..e4a1483 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -91,11 +91,12 @@ static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst, return 0; } -static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) +static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, + struct flowi *fl) { struct rtable *rt = (struct rtable *)xdst->route; - xdst->u.rt.fl = rt->fl; + xdst->u.rt.fl = *fl; xdst->u.dst.dev = dev; dev_hold(dev); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 88fd8c5..68e5809 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -81,7 +81,7 @@ #include <linux/random.h> #endif -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/unaligned.h> #include <linux/proc_fs.h> @@ -97,7 +97,11 @@ #endif #define INFINITY_LIFE_TIME 0xFFFFFFFF -#define TIME_DELTA(a,b) ((unsigned long)((long)(a) - (long)(b))) +#define TIME_DELTA(a, b) ((unsigned long)((long)(a) - (long)(b))) + +#define ADDRCONF_TIMER_FUZZ_MINUS (HZ > 50 ? HZ/50 : 1) +#define ADDRCONF_TIMER_FUZZ (HZ / 4) +#define ADDRCONF_TIMER_FUZZ_MAX (HZ) #ifdef CONFIG_SYSCTL static void addrconf_sysctl_register(struct inet6_dev *idev); @@ -126,8 +130,8 @@ static int ipv6_count_addresses(struct inet6_dev *idev); /* * Configured unicast address hash table */ -static struct inet6_ifaddr *inet6_addr_lst[IN6_ADDR_HSIZE]; -static DEFINE_RWLOCK(addrconf_hash_lock); +static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE]; +static DEFINE_SPINLOCK(addrconf_hash_lock); static void addrconf_verify(unsigned long); @@ -137,8 +141,8 @@ static DEFINE_SPINLOCK(addrconf_verify_lock); static void addrconf_join_anycast(struct inet6_ifaddr *ifp); static void addrconf_leave_anycast(struct inet6_ifaddr *ifp); -static void addrconf_bonding_change(struct net_device *dev, - unsigned long event); +static void addrconf_type_change(struct net_device *dev, + unsigned long event); static int addrconf_ifdown(struct net_device *dev, int how); static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags); @@ -151,8 +155,8 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); static void inet6_prefix_notify(int event, struct inet6_dev *idev, struct prefix_info *pinfo); -static int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, - struct net_device *dev); +static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, + struct net_device *dev); static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); @@ -249,8 +253,7 @@ static void addrconf_del_timer(struct inet6_ifaddr *ifp) __in6_ifa_put(ifp); } -enum addrconf_timer_t -{ +enum addrconf_timer_t { AC_NONE, AC_DAD, AC_RS, @@ -270,7 +273,8 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp, case AC_RS: ifp->timer.function = addrconf_rs_timer; break; - default:; + default: + break; } ifp->timer.expires = jiffies + when; add_timer(&ifp->timer); @@ -317,7 +321,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev) { struct net_device *dev = idev->dev; - WARN_ON(idev->addr_list != NULL); + WARN_ON(!list_empty(&idev->addr_list)); WARN_ON(idev->mc_list != NULL); #ifdef NET_REFCNT_DEBUG @@ -325,7 +329,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev) #endif dev_put(dev); if (!idev->dead) { - printk("Freeing alive inet6 device %p\n", idev); + pr_warning("Freeing alive inet6 device %p\n", idev); return; } snmp6_free_dev(idev); @@ -350,6 +354,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) rwlock_init(&ndev->lock); ndev->dev = dev; + INIT_LIST_HEAD(&ndev->addr_list); + memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf)); ndev->cnf.mtu6 = dev->mtu; ndev->cnf.sysctl = NULL; @@ -401,6 +407,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) #endif #ifdef CONFIG_IPV6_PRIVACY + INIT_LIST_HEAD(&ndev->tempaddr_list); setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev); if ((dev->flags&IFF_LOOPBACK) || dev->type == ARPHRD_TUNNEL || @@ -438,8 +445,10 @@ static struct inet6_dev * ipv6_find_idev(struct net_device *dev) ASSERT_RTNL(); - if ((idev = __in6_dev_get(dev)) == NULL) { - if ((idev = ipv6_add_dev(dev)) == NULL) + idev = __in6_dev_get(dev); + if (!idev) { + idev = ipv6_add_dev(dev); + if (!idev) return NULL; } @@ -465,7 +474,8 @@ static void dev_forward_change(struct inet6_dev *idev) else ipv6_dev_mc_dec(dev, &in6addr_linklocal_allrouters); } - for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) { + + list_for_each_entry(ifa, &idev->addr_list, if_list) { if (ifa->flags&IFA_F_TENTATIVE) continue; if (idev->cnf.forwarding) @@ -522,12 +532,16 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old) } #endif -/* Nobody refers to this ifaddr, destroy it */ +static void inet6_ifa_finish_destroy_rcu(struct rcu_head *head) +{ + struct inet6_ifaddr *ifp = container_of(head, struct inet6_ifaddr, rcu); + kfree(ifp); +} +/* Nobody refers to this ifaddr, destroy it */ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) { - WARN_ON(ifp->if_next != NULL); - WARN_ON(ifp->lst_next != NULL); + WARN_ON(!hlist_unhashed(&ifp->addr_lst)); #ifdef NET_REFCNT_DEBUG printk(KERN_DEBUG "inet6_ifa_finish_destroy\n"); @@ -536,54 +550,45 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) in6_dev_put(ifp->idev); if (del_timer(&ifp->timer)) - printk("Timer is still running, when freeing ifa=%p\n", ifp); + pr_notice("Timer is still running, when freeing ifa=%p\n", ifp); if (!ifp->dead) { - printk("Freeing alive inet6 address %p\n", ifp); + pr_warning("Freeing alive inet6 address %p\n", ifp); return; } dst_release(&ifp->rt->u.dst); - kfree(ifp); + call_rcu(&ifp->rcu, inet6_ifa_finish_destroy_rcu); } static void ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp) { - struct inet6_ifaddr *ifa, **ifap; + struct list_head *p; int ifp_scope = ipv6_addr_src_scope(&ifp->addr); /* * Each device address list is sorted in order of scope - * global before linklocal. */ - for (ifap = &idev->addr_list; (ifa = *ifap) != NULL; - ifap = &ifa->if_next) { + list_for_each(p, &idev->addr_list) { + struct inet6_ifaddr *ifa + = list_entry(p, struct inet6_ifaddr, if_list); if (ifp_scope >= ipv6_addr_src_scope(&ifa->addr)) break; } - ifp->if_next = *ifap; - *ifap = ifp; + list_add(&ifp->if_list, p); } -/* - * Hash function taken from net_alias.c - */ -static u8 ipv6_addr_hash(const struct in6_addr *addr) +static u32 ipv6_addr_hash(const struct in6_addr *addr) { - __u32 word; - /* * We perform the hash function over the last 64 bits of the address * This will include the IEEE address token on links that support it. */ - - word = (__force u32)(addr->s6_addr32[2] ^ addr->s6_addr32[3]); - word ^= (word >> 16); - word ^= (word >> 8); - - return ((word ^ (word >> 4)) & 0x0f); + return jhash_2words(addr->s6_addr32[2], addr->s6_addr32[3], 0) + & (IN6_ADDR_HSIZE - 1); } /* On success it returns ifp with increased reference count */ @@ -594,7 +599,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, { struct inet6_ifaddr *ifa = NULL; struct rt6_info *rt; - int hash; + unsigned int hash; int err = 0; int addr_type = ipv6_addr_type(addr); @@ -615,7 +620,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, goto out2; } - write_lock(&addrconf_hash_lock); + spin_lock(&addrconf_hash_lock); /* Ignore adding duplicate addresses on an interface */ if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) { @@ -642,6 +647,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, spin_lock_init(&ifa->lock); init_timer(&ifa->timer); + INIT_HLIST_NODE(&ifa->addr_lst); ifa->timer.data = (unsigned long) ifa; ifa->scope = scope; ifa->prefix_len = pfxlen; @@ -668,10 +674,9 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, /* Add to big hash table */ hash = ipv6_addr_hash(addr); - ifa->lst_next = inet6_addr_lst[hash]; - inet6_addr_lst[hash] = ifa; + hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]); in6_ifa_hold(ifa); - write_unlock(&addrconf_hash_lock); + spin_unlock(&addrconf_hash_lock); write_lock(&idev->lock); /* Add to inet6_dev unicast addr list. */ @@ -679,8 +684,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, #ifdef CONFIG_IPV6_PRIVACY if (ifa->flags&IFA_F_TEMPORARY) { - ifa->tmp_next = idev->tempaddr_list; - idev->tempaddr_list = ifa; + list_add(&ifa->tmp_list, &idev->tempaddr_list); in6_ifa_hold(ifa); } #endif @@ -699,7 +703,7 @@ out2: return ifa; out: - write_unlock(&addrconf_hash_lock); + spin_unlock(&addrconf_hash_lock); goto out2; } @@ -707,7 +711,7 @@ out: static void ipv6_del_addr(struct inet6_ifaddr *ifp) { - struct inet6_ifaddr *ifa, **ifap; + struct inet6_ifaddr *ifa, *ifn; struct inet6_dev *idev = ifp->idev; int hash; int deleted = 0, onlink = 0; @@ -717,42 +721,28 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) ifp->dead = 1; - write_lock_bh(&addrconf_hash_lock); - for (ifap = &inet6_addr_lst[hash]; (ifa=*ifap) != NULL; - ifap = &ifa->lst_next) { - if (ifa == ifp) { - *ifap = ifa->lst_next; - __in6_ifa_put(ifp); - ifa->lst_next = NULL; - break; - } - } - write_unlock_bh(&addrconf_hash_lock); + spin_lock_bh(&addrconf_hash_lock); + hlist_del_init_rcu(&ifp->addr_lst); + __in6_ifa_put(ifp); + spin_unlock_bh(&addrconf_hash_lock); write_lock_bh(&idev->lock); #ifdef CONFIG_IPV6_PRIVACY if (ifp->flags&IFA_F_TEMPORARY) { - for (ifap = &idev->tempaddr_list; (ifa=*ifap) != NULL; - ifap = &ifa->tmp_next) { - if (ifa == ifp) { - *ifap = ifa->tmp_next; - if (ifp->ifpub) { - in6_ifa_put(ifp->ifpub); - ifp->ifpub = NULL; - } - __in6_ifa_put(ifp); - ifa->tmp_next = NULL; - break; - } + list_del(&ifp->tmp_list); + if (ifp->ifpub) { + in6_ifa_put(ifp->ifpub); + ifp->ifpub = NULL; } + __in6_ifa_put(ifp); } #endif - for (ifap = &idev->addr_list; (ifa=*ifap) != NULL;) { + list_for_each_entry_safe(ifa, ifn, &idev->addr_list, if_list) { if (ifa == ifp) { - *ifap = ifa->if_next; + list_del_init(&ifp->if_list); __in6_ifa_put(ifp); - ifa->if_next = NULL; + if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0) break; deleted = 1; @@ -785,7 +775,6 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) } } } - ifap = &ifa->if_next; } write_unlock_bh(&idev->lock); @@ -1164,7 +1153,7 @@ int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev, continue; read_lock_bh(&idev->lock); - for (score->ifa = idev->addr_list; score->ifa; score->ifa = score->ifa->if_next) { + list_for_each_entry(score->ifa, &idev->addr_list, if_list) { int i; /* @@ -1242,7 +1231,6 @@ try_nextdev: in6_ifa_put(hiscore->ifa); return 0; } - EXPORT_SYMBOL(ipv6_dev_get_saddr); int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, @@ -1252,12 +1240,14 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, int err = -EADDRNOTAVAIL; rcu_read_lock(); - if ((idev = __in6_dev_get(dev)) != NULL) { + idev = __in6_dev_get(dev); + if (idev) { struct inet6_ifaddr *ifp; read_lock_bh(&idev->lock); - for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { - if (ifp->scope == IFA_LINK && !(ifp->flags & banned_flags)) { + list_for_each_entry(ifp, &idev->addr_list, if_list) { + if (ifp->scope == IFA_LINK && + !(ifp->flags & banned_flags)) { ipv6_addr_copy(addr, &ifp->addr); err = 0; break; @@ -1275,7 +1265,7 @@ static int ipv6_count_addresses(struct inet6_dev *idev) struct inet6_ifaddr *ifp; read_lock_bh(&idev->lock); - for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) + list_for_each_entry(ifp, &idev->addr_list, if_list) cnt++; read_unlock_bh(&idev->lock); return cnt; @@ -1284,11 +1274,12 @@ static int ipv6_count_addresses(struct inet6_dev *idev) int ipv6_chk_addr(struct net *net, struct in6_addr *addr, struct net_device *dev, int strict) { - struct inet6_ifaddr * ifp; - u8 hash = ipv6_addr_hash(addr); + struct inet6_ifaddr *ifp = NULL; + struct hlist_node *node; + unsigned int hash = ipv6_addr_hash(addr); - read_lock_bh(&addrconf_hash_lock); - for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { + rcu_read_lock_bh(); + hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr) && @@ -1298,27 +1289,28 @@ int ipv6_chk_addr(struct net *net, struct in6_addr *addr, break; } } - read_unlock_bh(&addrconf_hash_lock); + rcu_read_unlock_bh(); + return ifp != NULL; } EXPORT_SYMBOL(ipv6_chk_addr); -static -int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, - struct net_device *dev) +static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, + struct net_device *dev) { - struct inet6_ifaddr * ifp; - u8 hash = ipv6_addr_hash(addr); + unsigned int hash = ipv6_addr_hash(addr); + struct inet6_ifaddr *ifp; + struct hlist_node *node; - for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { + hlist_for_each_entry(ifp, node, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr)) { if (dev == NULL || ifp->idev->dev == dev) - break; + return true; } } - return ifp != NULL; + return false; } int ipv6_chk_prefix(struct in6_addr *addr, struct net_device *dev) @@ -1332,7 +1324,7 @@ int ipv6_chk_prefix(struct in6_addr *addr, struct net_device *dev) idev = __in6_dev_get(dev); if (idev) { read_lock_bh(&idev->lock); - for (ifa = idev->addr_list; ifa; ifa = ifa->if_next) { + list_for_each_entry(ifa, &idev->addr_list, if_list) { onlink = ipv6_prefix_equal(addr, &ifa->addr, ifa->prefix_len); if (onlink) @@ -1349,11 +1341,12 @@ EXPORT_SYMBOL(ipv6_chk_prefix); struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr, struct net_device *dev, int strict) { - struct inet6_ifaddr * ifp; - u8 hash = ipv6_addr_hash(addr); + struct inet6_ifaddr *ifp = NULL; + struct hlist_node *node; + unsigned int hash = ipv6_addr_hash(addr); - read_lock_bh(&addrconf_hash_lock); - for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { + rcu_read_lock_bh(); + hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr)) { @@ -1364,7 +1357,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add } } } - read_unlock_bh(&addrconf_hash_lock); + rcu_read_unlock_bh(); return ifp; } @@ -1380,6 +1373,8 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) if (dad_failed) ifp->flags |= IFA_F_DADFAILED; spin_unlock_bh(&ifp->lock); + if (dad_failed) + ipv6_ifa_notify(0, ifp); in6_ifa_put(ifp); #ifdef CONFIG_IPV6_PRIVACY } else if (ifp->flags&IFA_F_TEMPORARY) { @@ -1567,7 +1562,7 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev) struct inet6_ifaddr *ifp; read_lock_bh(&idev->lock); - for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { + list_for_each_entry(ifp, &idev->addr_list, if_list) { if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) { memcpy(eui, ifp->addr.s6_addr+8, 8); err = 0; @@ -1735,7 +1730,8 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev) ASSERT_RTNL(); - if ((idev = ipv6_find_idev(dev)) == NULL) + idev = ipv6_find_idev(dev); + if (!idev) return NULL; /* Add default multicast route */ @@ -1968,7 +1964,7 @@ ok: #ifdef CONFIG_IPV6_PRIVACY read_lock_bh(&in6_dev->lock); /* update all temporary addresses in the list */ - for (ift=in6_dev->tempaddr_list; ift; ift=ift->tmp_next) { + list_for_each_entry(ift, &in6_dev->tempaddr_list, tmp_list) { /* * When adjusting the lifetimes of an existing * temporary address, only lower the lifetimes. @@ -2171,7 +2167,7 @@ static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx, return -ENXIO; read_lock_bh(&idev->lock); - for (ifp = idev->addr_list; ifp; ifp=ifp->if_next) { + list_for_each_entry(ifp, &idev->addr_list, if_list) { if (ifp->prefix_len == plen && ipv6_addr_equal(pfx, &ifp->addr)) { in6_ifa_hold(ifp); @@ -2182,7 +2178,7 @@ static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx, /* If the last address is deleted administratively, disable IPv6 on this interface. */ - if (idev->addr_list == NULL) + if (list_empty(&idev->addr_list)) addrconf_ifdown(idev->dev, 1); return 0; } @@ -2443,7 +2439,8 @@ static void addrconf_ip6_tnl_config(struct net_device *dev) ASSERT_RTNL(); - if ((idev = addrconf_add_dev(dev)) == NULL) { + idev = addrconf_add_dev(dev); + if (!idev) { printk(KERN_DEBUG "init ip6-ip6: add_dev failed\n"); return; } @@ -2458,7 +2455,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, int run_pending = 0; int err; - switch(event) { + switch (event) { case NETDEV_REGISTER: if (!idev && dev->mtu >= IPV6_MIN_MTU) { idev = ipv6_add_dev(dev); @@ -2466,6 +2463,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, return notifier_from_errno(-ENOMEM); } break; + case NETDEV_UP: case NETDEV_CHANGE: if (dev->flags & IFF_SLAVE) @@ -2495,10 +2493,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, } if (idev) { - if (idev->if_flags & IF_READY) { + if (idev->if_flags & IF_READY) /* device is already configured. */ break; - } idev->if_flags |= IF_READY; } @@ -2510,7 +2507,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, run_pending = 1; } - switch(dev->type) { + switch (dev->type) { #if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) case ARPHRD_SIT: addrconf_sit_config(dev); @@ -2527,25 +2524,30 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, addrconf_dev_config(dev); break; } + if (idev) { if (run_pending) addrconf_dad_run(idev); - /* If the MTU changed during the interface down, when the - interface up, the changed MTU must be reflected in the - idev as well as routers. + /* + * If the MTU changed during the interface down, + * when the interface up, the changed MTU must be + * reflected in the idev as well as routers. */ - if (idev->cnf.mtu6 != dev->mtu && dev->mtu >= IPV6_MIN_MTU) { + if (idev->cnf.mtu6 != dev->mtu && + dev->mtu >= IPV6_MIN_MTU) { rt6_mtu_change(dev, dev->mtu); idev->cnf.mtu6 = dev->mtu; } idev->tstamp = jiffies; inet6_ifinfo_notify(RTM_NEWLINK, idev); - /* If the changed mtu during down is lower than IPV6_MIN_MTU - stop IPv6 on this interface. + + /* + * If the changed mtu during down is lower than + * IPV6_MIN_MTU stop IPv6 on this interface. */ if (dev->mtu < IPV6_MIN_MTU) - addrconf_ifdown(dev, event != NETDEV_DOWN); + addrconf_ifdown(dev, 1); } break; @@ -2562,7 +2564,10 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, break; } - /* MTU falled under IPV6_MIN_MTU. Stop IPv6 on this interface. */ + /* + * MTU falled under IPV6_MIN_MTU. + * Stop IPv6 on this interface. + */ case NETDEV_DOWN: case NETDEV_UNREGISTER: @@ -2582,9 +2587,10 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, return notifier_from_errno(err); } break; - case NETDEV_BONDING_OLDTYPE: - case NETDEV_BONDING_NEWTYPE: - addrconf_bonding_change(dev, event); + + case NETDEV_PRE_TYPE_CHANGE: + case NETDEV_POST_TYPE_CHANGE: + addrconf_type_change(dev, event); break; } @@ -2596,28 +2602,27 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, */ static struct notifier_block ipv6_dev_notf = { .notifier_call = addrconf_notify, - .priority = 0 }; -static void addrconf_bonding_change(struct net_device *dev, unsigned long event) +static void addrconf_type_change(struct net_device *dev, unsigned long event) { struct inet6_dev *idev; ASSERT_RTNL(); idev = __in6_dev_get(dev); - if (event == NETDEV_BONDING_NEWTYPE) + if (event == NETDEV_POST_TYPE_CHANGE) ipv6_mc_remap(idev); - else if (event == NETDEV_BONDING_OLDTYPE) + else if (event == NETDEV_PRE_TYPE_CHANGE) ipv6_mc_unmap(idev); } static int addrconf_ifdown(struct net_device *dev, int how) { - struct inet6_dev *idev; - struct inet6_ifaddr *ifa, **bifa; struct net *net = dev_net(dev); - int i; + struct inet6_dev *idev; + struct inet6_ifaddr *ifa; + LIST_HEAD(keep_list); ASSERT_RTNL(); @@ -2628,8 +2633,9 @@ static int addrconf_ifdown(struct net_device *dev, int how) if (idev == NULL) return -ENODEV; - /* Step 1: remove reference to ipv6 device from parent device. - Do not dev_put! + /* + * Step 1: remove reference to ipv6 device from parent device. + * Do not dev_put! */ if (how) { idev->dead = 1; @@ -2642,40 +2648,21 @@ static int addrconf_ifdown(struct net_device *dev, int how) } - /* Step 2: clear hash table */ - for (i=0; i<IN6_ADDR_HSIZE; i++) { - bifa = &inet6_addr_lst[i]; - - write_lock_bh(&addrconf_hash_lock); - while ((ifa = *bifa) != NULL) { - if (ifa->idev == idev && - (how || !(ifa->flags&IFA_F_PERMANENT))) { - *bifa = ifa->lst_next; - ifa->lst_next = NULL; - addrconf_del_timer(ifa); - in6_ifa_put(ifa); - continue; - } - bifa = &ifa->lst_next; - } - write_unlock_bh(&addrconf_hash_lock); - } - write_lock_bh(&idev->lock); - /* Step 3: clear flags for stateless addrconf */ + /* Step 2: clear flags for stateless addrconf */ if (!how) idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY); - /* Step 4: clear address list */ #ifdef CONFIG_IPV6_PRIVACY if (how && del_timer(&idev->regen_timer)) in6_dev_put(idev); - /* clear tempaddr list */ - while ((ifa = idev->tempaddr_list) != NULL) { - idev->tempaddr_list = ifa->tmp_next; - ifa->tmp_next = NULL; + /* Step 3: clear tempaddr list */ + while (!list_empty(&idev->tempaddr_list)) { + ifa = list_first_entry(&idev->tempaddr_list, + struct inet6_ifaddr, tmp_list); + list_del(&ifa->tmp_list); ifa->dead = 1; write_unlock_bh(&idev->lock); spin_lock_bh(&ifa->lock); @@ -2689,35 +2676,56 @@ static int addrconf_ifdown(struct net_device *dev, int how) write_lock_bh(&idev->lock); } #endif - bifa = &idev->addr_list; - while ((ifa = *bifa) != NULL) { - if (how == 0 && (ifa->flags&IFA_F_PERMANENT)) { - /* Retain permanent address on admin down */ - bifa = &ifa->if_next; - - /* Restart DAD if needed when link comes back up */ - if ( !((dev->flags&(IFF_NOARP|IFF_LOOPBACK)) || - idev->cnf.accept_dad <= 0 || - (ifa->flags & IFA_F_NODAD))) - ifa->flags |= IFA_F_TENTATIVE; - } else { - *bifa = ifa->if_next; - ifa->if_next = NULL; - ifa->dead = 1; - write_unlock_bh(&idev->lock); + while (!list_empty(&idev->addr_list)) { + ifa = list_first_entry(&idev->addr_list, + struct inet6_ifaddr, if_list); + addrconf_del_timer(ifa); + + /* If just doing link down, and address is permanent + and not link-local, then retain it. */ + if (!how && + (ifa->flags&IFA_F_PERMANENT) && + !(ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) { + list_move_tail(&ifa->if_list, &keep_list); + + /* If not doing DAD on this address, just keep it. */ + if ((dev->flags&(IFF_NOARP|IFF_LOOPBACK)) || + idev->cnf.accept_dad <= 0 || + (ifa->flags & IFA_F_NODAD)) + continue; - __ipv6_ifa_notify(RTM_DELADDR, ifa); - atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa); - in6_ifa_put(ifa); + /* If it was tentative already, no need to notify */ + if (ifa->flags & IFA_F_TENTATIVE) + continue; - write_lock_bh(&idev->lock); + /* Flag it for later restoration when link comes up */ + ifa->flags |= IFA_F_TENTATIVE; + in6_ifa_hold(ifa); + } else { + list_del(&ifa->if_list); + ifa->dead = 1; } + write_unlock_bh(&idev->lock); + + /* clear hash table */ + spin_lock_bh(&addrconf_hash_lock); + hlist_del_init_rcu(&ifa->addr_lst); + __in6_ifa_put(ifa); + spin_unlock_bh(&addrconf_hash_lock); + + __ipv6_ifa_notify(RTM_DELADDR, ifa); + atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa); + in6_ifa_put(ifa); + + write_lock_bh(&idev->lock); } + + list_splice(&keep_list, &idev->addr_list); + write_unlock_bh(&idev->lock); /* Step 5: Discard multicast list */ - if (how) ipv6_mc_destroy_dev(idev); else @@ -2725,8 +2733,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) idev->tstamp = jiffies; - /* Shot the device (if unregistered) */ - + /* Last: Shot the device (if unregistered) */ if (how) { addrconf_sysctl_unregister(idev); neigh_parms_release(&nd_tbl, idev->nd_parms); @@ -2739,28 +2746,29 @@ static int addrconf_ifdown(struct net_device *dev, int how) static void addrconf_rs_timer(unsigned long data) { struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data; + struct inet6_dev *idev = ifp->idev; - if (ifp->idev->cnf.forwarding) + read_lock(&idev->lock); + if (idev->dead || !(idev->if_flags & IF_READY)) goto out; - if (ifp->idev->if_flags & IF_RA_RCVD) { - /* - * Announcement received after solicitation - * was sent - */ + if (idev->cnf.forwarding) + goto out; + + /* Announcement received after solicitation was sent */ + if (idev->if_flags & IF_RA_RCVD) goto out; - } spin_lock(&ifp->lock); - if (ifp->probes++ < ifp->idev->cnf.rtr_solicits) { + if (ifp->probes++ < idev->cnf.rtr_solicits) { /* The wait after the last probe can be shorter */ addrconf_mod_timer(ifp, AC_RS, - (ifp->probes == ifp->idev->cnf.rtr_solicits) ? - ifp->idev->cnf.rtr_solicit_delay : - ifp->idev->cnf.rtr_solicit_interval); + (ifp->probes == idev->cnf.rtr_solicits) ? + idev->cnf.rtr_solicit_delay : + idev->cnf.rtr_solicit_interval); spin_unlock(&ifp->lock); - ndisc_send_rs(ifp->idev->dev, &ifp->addr, &in6addr_linklocal_allrouters); + ndisc_send_rs(idev->dev, &ifp->addr, &in6addr_linklocal_allrouters); } else { spin_unlock(&ifp->lock); /* @@ -2768,10 +2776,11 @@ static void addrconf_rs_timer(unsigned long data) * assumption any longer. */ printk(KERN_DEBUG "%s: no IPv6 routers present\n", - ifp->idev->dev->name); + idev->dev->name); } out: + read_unlock(&idev->lock); in6_ifa_put(ifp); } @@ -2835,7 +2844,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags) * Optimistic nodes can start receiving * Frames right away */ - if(ifp->flags & IFA_F_OPTIMISTIC) + if (ifp->flags & IFA_F_OPTIMISTIC) ip6_ins_rt(ifp->rt); addrconf_dad_kick(ifp); @@ -2850,9 +2859,9 @@ static void addrconf_dad_timer(unsigned long data) struct inet6_dev *idev = ifp->idev; struct in6_addr mcaddr; - read_lock_bh(&idev->lock); - if (idev->dead) { - read_unlock_bh(&idev->lock); + read_lock(&idev->lock); + if (idev->dead || !(idev->if_flags & IF_READY)) { + read_unlock(&idev->lock); goto out; } @@ -2864,7 +2873,7 @@ static void addrconf_dad_timer(unsigned long data) ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED); spin_unlock(&ifp->lock); - read_unlock_bh(&idev->lock); + read_unlock(&idev->lock); addrconf_dad_completed(ifp); @@ -2874,7 +2883,7 @@ static void addrconf_dad_timer(unsigned long data) ifp->probes--; addrconf_mod_timer(ifp, AC_DAD, ifp->idev->nd_parms->retrans_time); spin_unlock(&ifp->lock); - read_unlock_bh(&idev->lock); + read_unlock(&idev->lock); /* send a neighbour solicitation for our addr */ addrconf_addr_solict_mult(&ifp->addr, &mcaddr); @@ -2885,7 +2894,7 @@ out: static void addrconf_dad_completed(struct inet6_ifaddr *ifp) { - struct net_device * dev = ifp->idev->dev; + struct net_device *dev = ifp->idev->dev; /* * Configure the address for reception. Now it is valid. @@ -2916,11 +2925,12 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) } } -static void addrconf_dad_run(struct inet6_dev *idev) { +static void addrconf_dad_run(struct inet6_dev *idev) +{ struct inet6_ifaddr *ifp; read_lock_bh(&idev->lock); - for (ifp = idev->addr_list; ifp; ifp = ifp->if_next) { + list_for_each_entry(ifp, &idev->addr_list, if_list) { spin_lock(&ifp->lock); if (!(ifp->flags & IFA_F_TENTATIVE)) { spin_unlock(&ifp->lock); @@ -2945,36 +2955,35 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq) struct net *net = seq_file_net(seq); for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) { - ifa = inet6_addr_lst[state->bucket]; - - while (ifa && !net_eq(dev_net(ifa->idev->dev), net)) - ifa = ifa->lst_next; - if (ifa) - break; + struct hlist_node *n; + hlist_for_each_entry_rcu(ifa, n, &inet6_addr_lst[state->bucket], + addr_lst) + if (net_eq(dev_net(ifa->idev->dev), net)) + return ifa; } - return ifa; + return NULL; } -static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct inet6_ifaddr *ifa) +static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, + struct inet6_ifaddr *ifa) { struct if6_iter_state *state = seq->private; struct net *net = seq_file_net(seq); + struct hlist_node *n = &ifa->addr_lst; - ifa = ifa->lst_next; -try_again: - if (ifa) { - if (!net_eq(dev_net(ifa->idev->dev), net)) { - ifa = ifa->lst_next; - goto try_again; - } - } + hlist_for_each_entry_continue_rcu(ifa, n, addr_lst) + if (net_eq(dev_net(ifa->idev->dev), net)) + return ifa; - if (!ifa && ++state->bucket < IN6_ADDR_HSIZE) { - ifa = inet6_addr_lst[state->bucket]; - goto try_again; + while (++state->bucket < IN6_ADDR_HSIZE) { + hlist_for_each_entry(ifa, n, + &inet6_addr_lst[state->bucket], addr_lst) { + if (net_eq(dev_net(ifa->idev->dev), net)) + return ifa; + } } - return ifa; + return NULL; } static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos) @@ -2982,15 +2991,15 @@ static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos) struct inet6_ifaddr *ifa = if6_get_first(seq); if (ifa) - while(pos && (ifa = if6_get_next(seq, ifa)) != NULL) + while (pos && (ifa = if6_get_next(seq, ifa)) != NULL) --pos; return pos ? NULL : ifa; } static void *if6_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(addrconf_hash_lock) + __acquires(rcu) { - read_lock_bh(&addrconf_hash_lock); + rcu_read_lock_bh(); return if6_get_idx(seq, *pos); } @@ -3004,9 +3013,9 @@ static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void if6_seq_stop(struct seq_file *seq, void *v) - __releases(addrconf_hash_lock) + __releases(rcu) { - read_unlock_bh(&addrconf_hash_lock); + rcu_read_unlock_bh(); } static int if6_seq_show(struct seq_file *seq, void *v) @@ -3076,10 +3085,12 @@ void if6_proc_exit(void) int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr) { int ret = 0; - struct inet6_ifaddr * ifp; - u8 hash = ipv6_addr_hash(addr); - read_lock_bh(&addrconf_hash_lock); - for (ifp = inet6_addr_lst[hash]; ifp; ifp = ifp->lst_next) { + struct inet6_ifaddr *ifp = NULL; + struct hlist_node *n; + unsigned int hash = ipv6_addr_hash(addr); + + rcu_read_lock_bh(); + hlist_for_each_entry_rcu(ifp, n, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr) && @@ -3088,7 +3099,7 @@ int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr) break; } } - read_unlock_bh(&addrconf_hash_lock); + rcu_read_unlock_bh(); return ret; } #endif @@ -3099,43 +3110,35 @@ int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr) static void addrconf_verify(unsigned long foo) { + unsigned long now, next, next_sec, next_sched; struct inet6_ifaddr *ifp; - unsigned long now, next; + struct hlist_node *node; int i; - spin_lock_bh(&addrconf_verify_lock); + rcu_read_lock_bh(); + spin_lock(&addrconf_verify_lock); now = jiffies; - next = now + ADDR_CHECK_FREQUENCY; + next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY); del_timer(&addr_chk_timer); - for (i=0; i < IN6_ADDR_HSIZE; i++) { - + for (i = 0; i < IN6_ADDR_HSIZE; i++) { restart: - read_lock(&addrconf_hash_lock); - for (ifp=inet6_addr_lst[i]; ifp; ifp=ifp->lst_next) { + hlist_for_each_entry_rcu(ifp, node, + &inet6_addr_lst[i], addr_lst) { unsigned long age; -#ifdef CONFIG_IPV6_PRIVACY - unsigned long regen_advance; -#endif if (ifp->flags & IFA_F_PERMANENT) continue; spin_lock(&ifp->lock); - age = (now - ifp->tstamp) / HZ; - -#ifdef CONFIG_IPV6_PRIVACY - regen_advance = ifp->idev->cnf.regen_max_retry * - ifp->idev->cnf.dad_transmits * - ifp->idev->nd_parms->retrans_time / HZ; -#endif + /* We try to batch several events at once. */ + age = (now - ifp->tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ; if (ifp->valid_lft != INFINITY_LIFE_TIME && age >= ifp->valid_lft) { spin_unlock(&ifp->lock); in6_ifa_hold(ifp); - read_unlock(&addrconf_hash_lock); ipv6_del_addr(ifp); goto restart; } else if (ifp->prefered_lft == INFINITY_LIFE_TIME) { @@ -3157,7 +3160,6 @@ restart: if (deprecate) { in6_ifa_hold(ifp); - read_unlock(&addrconf_hash_lock); ipv6_ifa_notify(0, ifp); in6_ifa_put(ifp); @@ -3166,6 +3168,10 @@ restart: #ifdef CONFIG_IPV6_PRIVACY } else if ((ifp->flags&IFA_F_TEMPORARY) && !(ifp->flags&IFA_F_TENTATIVE)) { + unsigned long regen_advance = ifp->idev->cnf.regen_max_retry * + ifp->idev->cnf.dad_transmits * + ifp->idev->nd_parms->retrans_time / HZ; + if (age >= ifp->prefered_lft - regen_advance) { struct inet6_ifaddr *ifpub = ifp->ifpub; if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next)) @@ -3175,7 +3181,7 @@ restart: in6_ifa_hold(ifp); in6_ifa_hold(ifpub); spin_unlock(&ifp->lock); - read_unlock(&addrconf_hash_lock); + spin_lock(&ifpub->lock); ifpub->regen_count = 0; spin_unlock(&ifpub->lock); @@ -3195,12 +3201,26 @@ restart: spin_unlock(&ifp->lock); } } - read_unlock(&addrconf_hash_lock); } - addr_chk_timer.expires = time_before(next, jiffies + HZ) ? jiffies + HZ : next; + next_sec = round_jiffies_up(next); + next_sched = next; + + /* If rounded timeout is accurate enough, accept it. */ + if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ)) + next_sched = next_sec; + + /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */ + if (time_before(next_sched, jiffies + ADDRCONF_TIMER_FUZZ_MAX)) + next_sched = jiffies + ADDRCONF_TIMER_FUZZ_MAX; + + ADBG((KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n", + now, next, next_sec, next_sched)); + + addr_chk_timer.expires = next_sched; add_timer(&addr_chk_timer); - spin_unlock_bh(&addrconf_verify_lock); + spin_unlock(&addrconf_verify_lock); + rcu_read_unlock_bh(); } static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local) @@ -3490,8 +3510,7 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, return nlmsg_end(skb, nlh); } -enum addr_type_t -{ +enum addr_type_t { UNICAST_ADDR, MULTICAST_ADDR, ANYCAST_ADDR, @@ -3502,7 +3521,6 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, struct netlink_callback *cb, enum addr_type_t type, int s_ip_idx, int *p_ip_idx) { - struct inet6_ifaddr *ifa; struct ifmcaddr6 *ifmca; struct ifacaddr6 *ifaca; int err = 1; @@ -3510,11 +3528,12 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, read_lock_bh(&idev->lock); switch (type) { - case UNICAST_ADDR: + case UNICAST_ADDR: { + struct inet6_ifaddr *ifa; + /* unicast address incl. temp addr */ - for (ifa = idev->addr_list; ifa; - ifa = ifa->if_next, ip_idx++) { - if (ip_idx < s_ip_idx) + list_for_each_entry(ifa, &idev->addr_list, if_list) { + if (++ip_idx < s_ip_idx) continue; err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, @@ -3525,6 +3544,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, break; } break; + } case MULTICAST_ADDR: /* multicast address */ for (ifmca = idev->mc_list; ifmca; @@ -3589,7 +3609,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, if (idx > s_idx) s_ip_idx = 0; ip_idx = 0; - if ((idev = __in6_dev_get(dev)) == NULL) + idev = __in6_dev_get(dev); + if (!idev) goto cont; if (in6_dump_addrs(idev, skb, cb, type, @@ -3656,12 +3677,14 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, if (ifm->ifa_index) dev = __dev_get_by_index(net, ifm->ifa_index); - if ((ifa = ipv6_get_ifaddr(net, addr, dev, 1)) == NULL) { + ifa = ipv6_get_ifaddr(net, addr, dev, 1); + if (!ifa) { err = -EADDRNOTAVAIL; goto errout; } - if ((skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_KERNEL)) == NULL) { + skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_KERNEL); + if (!skb) { err = -ENOBUFS; goto errout_ifa; } @@ -3786,7 +3809,7 @@ static inline void __snmp6_fill_stats(u64 *stats, void __percpu **mib, static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, int bytes) { - switch(attrtype) { + switch (attrtype) { case IFLA_INET6_STATS: __snmp6_fill_stats(stats, (void __percpu **)idev->stats.ipv6, IPSTATS_MIB_MAX, bytes); break; @@ -4138,211 +4161,211 @@ static struct addrconf_sysctl_table .sysctl_header = NULL, .addrconf_vars = { { - .procname = "forwarding", - .data = &ipv6_devconf.forwarding, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = addrconf_sysctl_forward, + .procname = "forwarding", + .data = &ipv6_devconf.forwarding, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = addrconf_sysctl_forward, }, { - .procname = "hop_limit", - .data = &ipv6_devconf.hop_limit, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "hop_limit", + .data = &ipv6_devconf.hop_limit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, { - .procname = "mtu", - .data = &ipv6_devconf.mtu6, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "mtu", + .data = &ipv6_devconf.mtu6, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, { - .procname = "accept_ra", - .data = &ipv6_devconf.accept_ra, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "accept_ra", + .data = &ipv6_devconf.accept_ra, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, { - .procname = "accept_redirects", - .data = &ipv6_devconf.accept_redirects, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "accept_redirects", + .data = &ipv6_devconf.accept_redirects, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, { - .procname = "autoconf", - .data = &ipv6_devconf.autoconf, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "autoconf", + .data = &ipv6_devconf.autoconf, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, { - .procname = "dad_transmits", - .data = &ipv6_devconf.dad_transmits, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "dad_transmits", + .data = &ipv6_devconf.dad_transmits, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, { - .procname = "router_solicitations", - .data = &ipv6_devconf.rtr_solicits, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "router_solicitations", + .data = &ipv6_devconf.rtr_solicits, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, { - .procname = "router_solicitation_interval", - .data = &ipv6_devconf.rtr_solicit_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, + .procname = "router_solicitation_interval", + .data = &ipv6_devconf.rtr_solicit_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, }, { - .procname = "router_solicitation_delay", - .data = &ipv6_devconf.rtr_solicit_delay, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, + .procname = "router_solicitation_delay", + .data = &ipv6_devconf.rtr_solicit_delay, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, }, { - .procname = "force_mld_version", - .data = &ipv6_devconf.force_mld_version, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "force_mld_version", + .data = &ipv6_devconf.force_mld_version, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, #ifdef CONFIG_IPV6_PRIVACY { - .procname = "use_tempaddr", - .data = &ipv6_devconf.use_tempaddr, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "use_tempaddr", + .data = &ipv6_devconf.use_tempaddr, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, { - .procname = "temp_valid_lft", - .data = &ipv6_devconf.temp_valid_lft, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "temp_valid_lft", + .data = &ipv6_devconf.temp_valid_lft, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, { - .procname = "temp_prefered_lft", - .data = &ipv6_devconf.temp_prefered_lft, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "temp_prefered_lft", + .data = &ipv6_devconf.temp_prefered_lft, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, { - .procname = "regen_max_retry", - .data = &ipv6_devconf.regen_max_retry, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "regen_max_retry", + .data = &ipv6_devconf.regen_max_retry, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, { - .procname = "max_desync_factor", - .data = &ipv6_devconf.max_desync_factor, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "max_desync_factor", + .data = &ipv6_devconf.max_desync_factor, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, #endif { - .procname = "max_addresses", - .data = &ipv6_devconf.max_addresses, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "max_addresses", + .data = &ipv6_devconf.max_addresses, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, { - .procname = "accept_ra_defrtr", - .data = &ipv6_devconf.accept_ra_defrtr, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "accept_ra_defrtr", + .data = &ipv6_devconf.accept_ra_defrtr, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, { - .procname = "accept_ra_pinfo", - .data = &ipv6_devconf.accept_ra_pinfo, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "accept_ra_pinfo", + .data = &ipv6_devconf.accept_ra_pinfo, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, #ifdef CONFIG_IPV6_ROUTER_PREF { - .procname = "accept_ra_rtr_pref", - .data = &ipv6_devconf.accept_ra_rtr_pref, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "accept_ra_rtr_pref", + .data = &ipv6_devconf.accept_ra_rtr_pref, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, { - .procname = "router_probe_interval", - .data = &ipv6_devconf.rtr_probe_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, + .procname = "router_probe_interval", + .data = &ipv6_devconf.rtr_probe_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, }, #ifdef CONFIG_IPV6_ROUTE_INFO { - .procname = "accept_ra_rt_info_max_plen", - .data = &ipv6_devconf.accept_ra_rt_info_max_plen, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "accept_ra_rt_info_max_plen", + .data = &ipv6_devconf.accept_ra_rt_info_max_plen, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, #endif #endif { - .procname = "proxy_ndp", - .data = &ipv6_devconf.proxy_ndp, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "proxy_ndp", + .data = &ipv6_devconf.proxy_ndp, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, { - .procname = "accept_source_route", - .data = &ipv6_devconf.accept_source_route, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "accept_source_route", + .data = &ipv6_devconf.accept_source_route, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, #ifdef CONFIG_IPV6_OPTIMISTIC_DAD { - .procname = "optimistic_dad", - .data = &ipv6_devconf.optimistic_dad, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "optimistic_dad", + .data = &ipv6_devconf.optimistic_dad, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, #endif #ifdef CONFIG_IPV6_MROUTE { - .procname = "mc_forwarding", - .data = &ipv6_devconf.mc_forwarding, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = proc_dointvec, + .procname = "mc_forwarding", + .data = &ipv6_devconf.mc_forwarding, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = proc_dointvec, }, #endif { - .procname = "disable_ipv6", - .data = &ipv6_devconf.disable_ipv6, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = addrconf_sysctl_disable, + .procname = "disable_ipv6", + .data = &ipv6_devconf.disable_ipv6, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = addrconf_sysctl_disable, }, { - .procname = "accept_dad", - .data = &ipv6_devconf.accept_dad, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "accept_dad", + .data = &ipv6_devconf.accept_dad, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, { .procname = "force_tllao", @@ -4378,8 +4401,8 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name, if (t == NULL) goto out; - for (i=0; t->addrconf_vars[i].data; i++) { - t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf; + for (i = 0; t->addrconf_vars[i].data; i++) { + t->addrconf_vars[i].data += (char *)p - (char *)&ipv6_devconf; t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */ t->addrconf_vars[i].extra2 = net; } @@ -4516,14 +4539,12 @@ int register_inet6addr_notifier(struct notifier_block *nb) { return atomic_notifier_chain_register(&inet6addr_chain, nb); } - EXPORT_SYMBOL(register_inet6addr_notifier); int unregister_inet6addr_notifier(struct notifier_block *nb) { - return atomic_notifier_chain_unregister(&inet6addr_chain,nb); + return atomic_notifier_chain_unregister(&inet6addr_chain, nb); } - EXPORT_SYMBOL(unregister_inet6addr_notifier); /* @@ -4532,11 +4553,12 @@ EXPORT_SYMBOL(unregister_inet6addr_notifier); int __init addrconf_init(void) { - int err; + int i, err; - if ((err = ipv6_addr_label_init()) < 0) { - printk(KERN_CRIT "IPv6 Addrconf: cannot initialize default policy table: %d.\n", - err); + err = ipv6_addr_label_init(); + if (err < 0) { + printk(KERN_CRIT "IPv6 Addrconf:" + " cannot initialize default policy table: %d.\n", err); return err; } @@ -4567,6 +4589,9 @@ int __init addrconf_init(void) if (err) goto errlo; + for (i = 0; i < IN6_ADDR_HSIZE; i++) + INIT_HLIST_HEAD(&inet6_addr_lst[i]); + register_netdevice_notifier(&ipv6_dev_notf); addrconf_verify(0); @@ -4595,7 +4620,6 @@ errlo: void addrconf_cleanup(void) { - struct inet6_ifaddr *ifa; struct net_device *dev; int i; @@ -4615,20 +4639,10 @@ void addrconf_cleanup(void) /* * Check hash table. */ - write_lock_bh(&addrconf_hash_lock); - for (i=0; i < IN6_ADDR_HSIZE; i++) { - for (ifa=inet6_addr_lst[i]; ifa; ) { - struct inet6_ifaddr *bifa; - - bifa = ifa; - ifa = ifa->lst_next; - printk(KERN_DEBUG "bug: IPv6 address leakage detected: ifa=%p\n", bifa); - /* Do not free it; something is wrong. - Now we can investigate it with debugger. - */ - } - } - write_unlock_bh(&addrconf_hash_lock); + spin_lock_bh(&addrconf_hash_lock); + for (i = 0; i < IN6_ADDR_HSIZE; i++) + WARN_ON(!hlist_empty(&inet6_addr_lst[i])); + spin_unlock_bh(&addrconf_hash_lock); del_timer(&addr_chk_timer); rtnl_unlock(); diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 551882b..5e463c43 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -84,18 +84,11 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, if ((rule->flags & FIB_RULE_FIND_SADDR) && r->src.plen && !(flags & RT6_LOOKUP_F_HAS_SADDR)) { struct in6_addr saddr; - unsigned int srcprefs = 0; - - if (flags & RT6_LOOKUP_F_SRCPREF_TMP) - srcprefs |= IPV6_PREFER_SRC_TMP; - if (flags & RT6_LOOKUP_F_SRCPREF_PUBLIC) - srcprefs |= IPV6_PREFER_SRC_PUBLIC; - if (flags & RT6_LOOKUP_F_SRCPREF_COA) - srcprefs |= IPV6_PREFER_SRC_COA; if (ipv6_dev_get_saddr(net, ip6_dst_idev(&rt->u.dst)->dev, - &flp->fl6_dst, srcprefs, + &flp->fl6_dst, + rt6_flags2srcprefs(flags), &saddr)) goto again; if (!ipv6_prefix_equal(&saddr, &r->src.addr, diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 52e0f74..23e4ac0 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1113,6 +1113,9 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock) unsigned char ttls[MAXMIFS]; int i; + if (mfc->mf6cc_parent >= MAXMIFS) + return -ENFILE; + memset(ttls, 255, MAXMIFS); for (i = 0; i < MAXMIFS; i++) { if (IF_ISSET(i, &mfc->mf6cc_ifset)) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b08879e..7fcb0e5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -819,15 +819,8 @@ struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, if (!ipv6_addr_any(&fl->fl6_src)) flags |= RT6_LOOKUP_F_HAS_SADDR; - else if (sk) { - unsigned int prefs = inet6_sk(sk)->srcprefs; - if (prefs & IPV6_PREFER_SRC_TMP) - flags |= RT6_LOOKUP_F_SRCPREF_TMP; - if (prefs & IPV6_PREFER_SRC_PUBLIC) - flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC; - if (prefs & IPV6_PREFER_SRC_COA) - flags |= RT6_LOOKUP_F_SRCPREF_COA; - } + else if (sk) + flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output); } @@ -886,7 +879,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) rt = (struct rt6_info *) dst; - if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) + if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) return dst; return NULL; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6963a6b..9b6dbba 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1740,8 +1740,11 @@ process: if (!tcp_prequeue(sk, skb)) ret = tcp_v6_do_rcv(sk, skb); } - } else - sk_add_backlog(sk, skb); + } else if (unlikely(sk_add_backlog(sk, skb))) { + bh_unlock_sock(sk); + NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); + goto discard_and_relse; + } bh_unlock_sock(sk); sock_put(sk); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 52b8347..3c0c9c7 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -583,16 +583,20 @@ static void flush_stack(struct sock **stack, unsigned int count, bh_lock_sock(sk); if (!sock_owned_by_user(sk)) udpv6_queue_rcv_skb(sk, skb1); - else - sk_add_backlog(sk, skb1); + else if (sk_add_backlog(sk, skb1)) { + kfree_skb(skb1); + bh_unlock_sock(sk); + goto drop; + } bh_unlock_sock(sk); - } else { - atomic_inc(&sk->sk_drops); - UDP6_INC_STATS_BH(sock_net(sk), - UDP_MIB_RCVBUFERRORS, IS_UDPLITE(sk)); - UDP6_INC_STATS_BH(sock_net(sk), - UDP_MIB_INERRORS, IS_UDPLITE(sk)); + continue; } +drop: + atomic_inc(&sk->sk_drops); + UDP6_INC_STATS_BH(sock_net(sk), + UDP_MIB_RCVBUFERRORS, IS_UDPLITE(sk)); + UDP6_INC_STATS_BH(sock_net(sk), + UDP_MIB_INERRORS, IS_UDPLITE(sk)); } } /* @@ -754,8 +758,12 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, bh_lock_sock(sk); if (!sock_owned_by_user(sk)) udpv6_queue_rcv_skb(sk, skb); - else - sk_add_backlog(sk, skb); + else if (sk_add_backlog(sk, skb)) { + atomic_inc(&sk->sk_drops); + bh_unlock_sock(sk); + sock_put(sk); + goto discard; + } bh_unlock_sock(sk); sock_put(sk); return 0; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index dbdc696..ae18165 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -116,7 +116,8 @@ static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst, return 0; } -static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) +static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, + struct flowi *fl) { struct rt6_info *rt = (struct rt6_info*)xdst->route; diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index 019c780..86d6985 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -1437,7 +1437,7 @@ static void llc_process_tmr_ev(struct sock *sk, struct sk_buff *skb) llc_conn_state_process(sk, skb); else { llc_set_backlog_type(skb, LLC_EVENT); - sk_add_backlog(sk, skb); + __sk_add_backlog(sk, skb); } } } diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index a8dde9b..a12144d 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -827,7 +827,8 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb) else { dprintk("%s: adding to backlog...\n", __func__); llc_set_backlog_type(skb, LLC_PACKET); - sk_add_backlog(sk, skb); + if (sk_add_backlog(sk, skb)) + goto drop_unlock; } out: bh_unlock_sock(sk); diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 9affe2c..b4ddb2f 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -48,20 +48,24 @@ static ssize_t ieee80211_if_write( ssize_t (*write)(struct ieee80211_sub_if_data *, const char *, int)) { u8 *buf; - ssize_t ret = -ENODEV; + ssize_t ret; - buf = kzalloc(count, GFP_KERNEL); + buf = kmalloc(count, GFP_KERNEL); if (!buf) return -ENOMEM; + ret = -EFAULT; if (copy_from_user(buf, userbuf, count)) - return -EFAULT; + goto freebuf; + ret = -ENODEV; rtnl_lock(); if (sdata->dev->reg_state == NETREG_REGISTERED) ret = (*write)(sdata, buf, count); rtnl_unlock(); +freebuf: + kfree(buf); return ret; } diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index bc4e20e..1a29c4a 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -744,7 +744,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m break; default: /* should not get here, PLINK_BLOCKED is dealt with at the - * beggining of the function + * beginning of the function */ spin_unlock_bh(&sta->lock); break; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 0ab284c..be5f723 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -436,10 +436,12 @@ static void ieee80211_enable_ps(struct ieee80211_local *local, if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) ieee80211_send_nullfunc(local, sdata, 1); - if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) { - conf->flags |= IEEE80211_CONF_PS; - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); - } + if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) && + (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) + return; + + conf->flags |= IEEE80211_CONF_PS; + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); } } @@ -558,7 +560,8 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work) (!(ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED))) ieee80211_send_nullfunc(local, sdata, 1); - if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) || + if (!((local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) && + (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)) || (ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)) { ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED; local->hw.conf.flags |= IEEE80211_CONF_PS; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 211c475..56422d8 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -434,6 +434,7 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU) /* check if STA exists already */ if (sta_info_get_bss(sdata, sta->sta.addr)) { spin_unlock_irqrestore(&local->sta_lock, flags); + mutex_unlock(&local->sta_mtx); rcu_read_lock(); err = -EEXIST; goto out_free; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 2b2af63..569410a 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -582,7 +582,9 @@ nla_put_failure: nlmsg_failure: kfree_skb(skb); errout: - nfnetlink_set_err(net, 0, group, -ENOBUFS); + if (nfnetlink_set_err(net, 0, group, -ENOBUFS) > 0) + return -ENOBUFS; + return 0; } #endif /* CONFIG_NF_CONNTRACK_EVENTS */ diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 8dd75d9..c6cd1b8 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -284,7 +284,7 @@ EXPORT_SYMBOL_GPL(ct_sip_parse_request); * tabs, spaces and continuation lines, which are treated as a single whitespace * character. * - * Some headers may appear multiple times. A comma seperated list of values is + * Some headers may appear multiple times. A comma separated list of values is * equivalent to multiple headers. */ static const struct sip_header ct_sip_hdrs[] = { @@ -421,7 +421,7 @@ int ct_sip_get_header(const struct nf_conn *ct, const char *dptr, } EXPORT_SYMBOL_GPL(ct_sip_get_header); -/* Get next header field in a list of comma seperated values */ +/* Get next header field in a list of comma separated values */ static int ct_sip_next_header(const struct nf_conn *ct, const char *dptr, unsigned int dataoff, unsigned int datalen, enum sip_header_types type, diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 8eb0cc2..6afa3d5 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -113,9 +113,9 @@ int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, } EXPORT_SYMBOL_GPL(nfnetlink_send); -void nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error) +int nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error) { - netlink_set_err(net->nfnl, pid, group, error); + return netlink_set_err(net->nfnl, pid, group, error); } EXPORT_SYMBOL_GPL(nfnetlink_set_err); diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index d952806..9e9c489 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -1,6 +1,6 @@ /* * xt_hashlimit - Netfilter module to limit the number of packets per time - * seperately for each hashbucket (sourceip/sourceport/dstip/dstport) + * separately for each hashbucket (sourceip/sourceport/dstip/dstport) * * (C) 2003-2004 by Harald Welte <laforge@netfilter.org> * Copyright © CC Computer Consultants GmbH, 2007 - 2008 diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 320d042..acbbae1 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1093,6 +1093,7 @@ static inline int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p) { struct netlink_sock *nlk = nlk_sk(sk); + int ret = 0; if (sk == p->exclude_sk) goto out; @@ -1104,10 +1105,15 @@ static inline int do_one_set_err(struct sock *sk, !test_bit(p->group - 1, nlk->groups)) goto out; + if (p->code == ENOBUFS && nlk->flags & NETLINK_RECV_NO_ENOBUFS) { + ret = 1; + goto out; + } + sk->sk_err = p->code; sk->sk_error_report(sk); out: - return 0; + return ret; } /** @@ -1116,12 +1122,16 @@ out: * @pid: the PID of a process that we want to skip (if any) * @groups: the broadcast group that will notice the error * @code: error code, must be negative (as usual in kernelspace) + * + * This function returns the number of broadcast listeners that have set the + * NETLINK_RECV_NO_ENOBUFS socket option. */ -void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) +int netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) { struct netlink_set_err_data info; struct hlist_node *node; struct sock *sk; + int ret = 0; info.exclude_sk = ssk; info.pid = pid; @@ -1132,9 +1142,10 @@ void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) read_lock(&nl_table_lock); sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) - do_one_set_err(sk, &info); + ret += do_one_set_err(sk, &info); read_unlock(&nl_table_lock); + return ret; } EXPORT_SYMBOL(netlink_set_err); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 031a5e6..1612d41 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1688,6 +1688,8 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i, { switch (i->type) { case PACKET_MR_MULTICAST: + if (i->alen != dev->addr_len) + return -EINVAL; if (what > 0) return dev_mc_add(dev, i->addr, i->alen, 0); else @@ -1700,6 +1702,8 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i, return dev_set_allmulti(dev, what); break; case PACKET_MR_UNICAST: + if (i->alen != dev->addr_len) + return -EINVAL; if (what > 0) return dev_unicast_add(dev, i->addr); else @@ -1734,7 +1738,7 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq) goto done; err = -EINVAL; - if (mreq->mr_alen != dev->addr_len) + if (mreq->mr_alen > dev->addr_len) goto done; err = -ENOBUFS; diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index c597cc5..5c6ae0c 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -107,8 +107,7 @@ static void phonet_device_destroy(struct net_device *dev) if (pnd) { u8 addr; - for (addr = find_first_bit(pnd->addrs, 64); addr < 64; - addr = find_next_bit(pnd->addrs, 64, 1+addr)) + for_each_set_bit(addr, pnd->addrs, 64) phonet_address_notify(RTM_DELADDR, dev, addr); kfree(pnd); } diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 2e6c7eb..fe2e708 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -141,8 +141,7 @@ static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb) continue; addr_idx = 0; - for (addr = find_first_bit(pnd->addrs, 64); addr < 64; - addr = find_next_bit(pnd->addrs, 64, 1+addr)) { + for_each_set_bit(addr, pnd->addrs, 64) { if (addr_idx++ < addr_start_idx) continue; diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 853c52b..937ecda 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -159,7 +159,8 @@ static unsigned int rds_poll(struct file *file, struct socket *sock, poll_wait(file, sk->sk_sleep, wait); - poll_wait(file, &rds_poll_waitq, wait); + if (rs->rs_seen_congestion) + poll_wait(file, &rds_poll_waitq, wait); read_lock_irqsave(&rs->rs_recv_lock, flags); if (!rs->rs_cong_monitor) { @@ -181,6 +182,10 @@ static unsigned int rds_poll(struct file *file, struct socket *sock, mask |= (POLLOUT | POLLWRNORM); read_unlock_irqrestore(&rs->rs_recv_lock, flags); + /* clear state any time we wake a seen-congested socket */ + if (mask) + rs->rs_seen_congestion = 0; + return mask; } diff --git a/net/rds/cong.c b/net/rds/cong.c index 6d06cac..dd2711d 100644 --- a/net/rds/cong.c +++ b/net/rds/cong.c @@ -218,8 +218,6 @@ void rds_cong_queue_updates(struct rds_cong_map *map) spin_lock_irqsave(&rds_cong_lock, flags); list_for_each_entry(conn, &map->m_conn_list, c_map_item) { - if (conn->c_loopback) - continue; if (!test_and_set_bit(0, &conn->c_map_queued)) { rds_stats_inc(s_cong_update_queued); queue_delayed_work(rds_wq, &conn->c_send_w, 0); diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 647cb8f..e1f124b 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -203,9 +203,10 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data) rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST); break; default: - rds_ib_conn_error(conn, "RDS/IB: Fatal QP Event %u " + rdsdebug("Fatal QP Event %u " "- connection %pI4->%pI4, reconnecting\n", event->event, &conn->c_laddr, &conn->c_faddr); + rds_conn_drop(conn); break; } } diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 4b0da86..cfb1d90 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -234,8 +234,8 @@ void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool) { flush_workqueue(rds_wq); rds_ib_flush_mr_pool(pool, 1); - BUG_ON(atomic_read(&pool->item_count)); - BUG_ON(atomic_read(&pool->free_pinned)); + WARN_ON(atomic_read(&pool->item_count)); + WARN_ON(atomic_read(&pool->free_pinned)); kfree(pool); } @@ -440,6 +440,7 @@ static void __rds_ib_teardown_mr(struct rds_ib_mr *ibmr) /* FIXME we need a way to tell a r/w MR * from a r/o MR */ + BUG_ON(in_interrupt()); set_page_dirty(page); put_page(page); } diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 04dc0d3..c338881 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -468,8 +468,8 @@ static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credi set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); rds_ib_stats_inc(s_ib_ack_send_failure); - /* Need to finesse this later. */ - BUG(); + + rds_ib_conn_error(ic->conn, "sending ack failed\n"); } else rds_ib_stats_inc(s_ib_ack_sent); } diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index a10fab6..17fa808 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -243,8 +243,12 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) struct rds_message *rm; rm = rds_send_get_message(conn, send->s_op); - if (rm) + if (rm) { + if (rm->m_rdma_op) + rds_ib_send_unmap_rdma(ic, rm->m_rdma_op); rds_ib_send_rdma_complete(rm, wc.status); + rds_message_put(rm); + } } oldest = (oldest + 1) % ic->i_send_ring.w_nr; @@ -482,6 +486,13 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, BUG_ON(off % RDS_FRAG_SIZE); BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header)); + /* Do not send cong updates to IB loopback */ + if (conn->c_loopback + && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { + rds_cong_map_updated(conn->c_fcong, ~(u64) 0); + return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; + } + /* FIXME we may overallocate here */ if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0) i = 1; @@ -574,8 +585,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, rds_ib_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits); adv_credits += posted; BUG_ON(adv_credits > 255); - } else if (ic->i_rm != rm) - BUG(); + } send = &ic->i_sends[pos]; first = send; @@ -714,8 +724,8 @@ add_header: ic->i_rm = prev->s_rm; prev->s_rm = NULL; } - /* Finesse this later */ - BUG(); + + rds_ib_conn_error(ic->conn, "ib_post_send failed\n"); goto out; } diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c index 394cf6b..6bc638f 100644 --- a/net/rds/iw_cm.c +++ b/net/rds/iw_cm.c @@ -156,9 +156,11 @@ static void rds_iw_qp_event_handler(struct ib_event *event, void *data) case IB_EVENT_QP_REQ_ERR: case IB_EVENT_QP_FATAL: default: - rds_iw_conn_error(conn, "RDS/IW: Fatal QP Event %u - connection %pI4->%pI4...reconnecting\n", + rdsdebug("Fatal QP Event %u " + "- connection %pI4->%pI4, reconnecting\n", event->event, &conn->c_laddr, &conn->c_faddr); + rds_conn_drop(conn); break; } } diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c index 54af7d6..337e4e5 100644 --- a/net/rds/iw_recv.c +++ b/net/rds/iw_recv.c @@ -468,8 +468,8 @@ static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credi set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); rds_iw_stats_inc(s_iw_ack_send_failure); - /* Need to finesse this later. */ - BUG(); + + rds_iw_conn_error(ic->conn, "sending ack failed\n"); } else rds_iw_stats_inc(s_iw_ack_sent); } diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c index 1379e9d..52182ff 100644 --- a/net/rds/iw_send.c +++ b/net/rds/iw_send.c @@ -616,8 +616,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, rds_iw_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits); adv_credits += posted; BUG_ON(adv_credits > 255); - } else if (ic->i_rm != rm) - BUG(); + } send = &ic->i_sends[pos]; first = send; diff --git a/net/rds/loop.c b/net/rds/loop.c index 4a61997..93a45f1 100644 --- a/net/rds/loop.c +++ b/net/rds/loop.c @@ -80,16 +80,9 @@ static int rds_loop_xmit_cong_map(struct rds_connection *conn, struct rds_cong_map *map, unsigned long offset) { - unsigned long i; - BUG_ON(offset); BUG_ON(map != conn->c_lcong); - for (i = 0; i < RDS_CONG_MAP_PAGES; i++) { - memcpy((void *)conn->c_fcong->m_page_addrs[i], - (void *)map->m_page_addrs[i], PAGE_SIZE); - } - rds_cong_map_updated(conn->c_fcong, ~(u64) 0); return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 4c64daa..61b359d 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -438,8 +438,10 @@ void rds_rdma_free_op(struct rds_rdma_op *ro) /* Mark page dirty if it was possibly modified, which * is the case for a RDMA_READ which copies from remote * to local memory */ - if (!ro->r_write) + if (!ro->r_write) { + BUG_ON(in_interrupt()); set_page_dirty(page); + } put_page(page); } diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index 9ece910..5ea82fc 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -101,7 +101,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, break; case RDMA_CM_EVENT_DISCONNECTED: - printk(KERN_WARNING "RDS/RDMA: DISCONNECT event - dropping connection " + rdsdebug("DISCONNECT event - dropping connection " "%pI4->%pI4\n", &conn->c_laddr, &conn->c_faddr); rds_conn_drop(conn); @@ -109,8 +109,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, default: /* things like device disconnect? */ - printk(KERN_ERR "unknown event %u\n", event->event); - BUG(); + printk(KERN_ERR "RDS: unknown event %u!\n", event->event); break; } diff --git a/net/rds/rds.h b/net/rds/rds.h index 85d6f89..4bec6e2 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -388,6 +388,8 @@ struct rds_sock { /* flag indicating we were congested or not */ int rs_congested; + /* seen congestion (ENOBUFS) when sending? */ + int rs_seen_congestion; /* rs_lock protects all these adjacent members before the newline */ spinlock_t rs_lock; diff --git a/net/rds/send.c b/net/rds/send.c index b2fccfc..4629a0b 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -507,12 +507,13 @@ EXPORT_SYMBOL_GPL(rds_send_get_message); */ void rds_send_remove_from_sock(struct list_head *messages, int status) { - unsigned long flags = 0; /* silence gcc :P */ + unsigned long flags; struct rds_sock *rs = NULL; struct rds_message *rm; - local_irq_save(flags); while (!list_empty(messages)) { + int was_on_sock = 0; + rm = list_entry(messages->next, struct rds_message, m_conn_item); list_del_init(&rm->m_conn_item); @@ -527,20 +528,19 @@ void rds_send_remove_from_sock(struct list_head *messages, int status) * while we're messing with it. It does not prevent the * message from being removed from the socket, though. */ - spin_lock(&rm->m_rs_lock); + spin_lock_irqsave(&rm->m_rs_lock, flags); if (!test_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) goto unlock_and_drop; if (rs != rm->m_rs) { if (rs) { - spin_unlock(&rs->rs_lock); rds_wake_sk_sleep(rs); sock_put(rds_rs_to_sk(rs)); } rs = rm->m_rs; - spin_lock(&rs->rs_lock); sock_hold(rds_rs_to_sk(rs)); } + spin_lock(&rs->rs_lock); if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) { struct rds_rdma_op *ro = rm->m_rdma_op; @@ -557,21 +557,22 @@ void rds_send_remove_from_sock(struct list_head *messages, int status) notifier->n_status = status; rm->m_rdma_op->r_notifier = NULL; } - rds_message_put(rm); + was_on_sock = 1; rm->m_rs = NULL; } + spin_unlock(&rs->rs_lock); unlock_and_drop: - spin_unlock(&rm->m_rs_lock); + spin_unlock_irqrestore(&rm->m_rs_lock, flags); rds_message_put(rm); + if (was_on_sock) + rds_message_put(rm); } if (rs) { - spin_unlock(&rs->rs_lock); rds_wake_sk_sleep(rs); sock_put(rds_rs_to_sk(rs)); } - local_irq_restore(flags); } /* @@ -633,9 +634,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) list_move(&rm->m_sock_item, &list); rds_send_sndbuf_remove(rs, rm); clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags); - - /* If this is a RDMA operation, notify the app. */ - __rds_rdma_send_complete(rs, rm, RDS_RDMA_CANCELED); } /* order flag updates with the rs lock */ @@ -644,9 +642,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) spin_unlock_irqrestore(&rs->rs_lock, flags); - if (wake) - rds_wake_sk_sleep(rs); - conn = NULL; /* now remove the messages from the conn list as needed */ @@ -654,6 +649,10 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) /* We do this here rather than in the loop above, so that * we don't have to nest m_rs_lock under rs->rs_lock */ spin_lock_irqsave(&rm->m_rs_lock, flags2); + /* If this is a RDMA operation, notify the app. */ + spin_lock(&rs->rs_lock); + __rds_rdma_send_complete(rs, rm, RDS_RDMA_CANCELED); + spin_unlock(&rs->rs_lock); rm->m_rs = NULL; spin_unlock_irqrestore(&rm->m_rs_lock, flags2); @@ -682,6 +681,9 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) if (conn) spin_unlock_irqrestore(&conn->c_lock, flags); + if (wake) + rds_wake_sk_sleep(rs); + while (!list_empty(&list)) { rm = list_entry(list.next, struct rds_message, m_sock_item); list_del_init(&rm->m_sock_item); @@ -815,7 +817,7 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, int ret = 0; int queued = 0, allocated_mr = 0; int nonblock = msg->msg_flags & MSG_DONTWAIT; - long timeo = sock_rcvtimeo(sk, nonblock); + long timeo = sock_sndtimeo(sk, nonblock); /* Mirror Linux UDP mirror of BSD error message compatibility */ /* XXX: Perhaps MSG_MORE someday */ @@ -894,8 +896,10 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, queue_delayed_work(rds_wq, &conn->c_conn_w, 0); ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs); - if (ret) + if (ret) { + rs->rs_seen_congestion = 1; goto out; + } while (!rds_send_queue_rm(rs, conn, rm, rs->rs_bound_port, dport, &queued)) { diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index c00daff..40bfcf8 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -97,6 +97,7 @@ int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov, goto out; } + rds_stats_add(s_copy_to_user, to_copy); size -= to_copy; ret += to_copy; skb_off += to_copy; diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 34fdcc0..a28b895 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -240,7 +240,9 @@ void rds_tcp_write_space(struct sock *sk) tc->t_last_seen_una = rds_tcp_snd_una(tc); rds_send_drop_acked(conn, rds_tcp_snd_una(tc), rds_tcp_is_acked); - queue_delayed_work(rds_wq, &conn->c_send_w, 0); + if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) + queue_delayed_work(rds_wq, &conn->c_send_w, 0); + out: read_unlock(&sk->sk_callback_lock); diff --git a/net/rds/threads.c b/net/rds/threads.c index 00fa10e..786c20e 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -259,7 +259,7 @@ void rds_threads_exit(void) int __init rds_threads_init(void) { - rds_wq = create_singlethread_workqueue("krdsd"); + rds_wq = create_workqueue("krdsd"); if (rds_wq == NULL) return -ENOMEM; diff --git a/net/sctp/input.c b/net/sctp/input.c index c0c973e..3d74b26 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -75,7 +75,7 @@ static struct sctp_association *__sctp_lookup_association( const union sctp_addr *peer, struct sctp_transport **pt); -static void sctp_add_backlog(struct sock *sk, struct sk_buff *skb); +static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb); /* Calculate the SCTP checksum of an SCTP packet. */ @@ -265,8 +265,13 @@ int sctp_rcv(struct sk_buff *skb) } if (sock_owned_by_user(sk)) { + if (sctp_add_backlog(sk, skb)) { + sctp_bh_unlock_sock(sk); + sctp_chunk_free(chunk); + skb = NULL; /* sctp_chunk_free already freed the skb */ + goto discard_release; + } SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_BACKLOG); - sctp_add_backlog(sk, skb); } else { SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_SOFTIRQ); sctp_inq_push(&chunk->rcvr->inqueue, chunk); @@ -336,8 +341,10 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) sctp_bh_lock_sock(sk); if (sock_owned_by_user(sk)) { - sk_add_backlog(sk, skb); - backloged = 1; + if (sk_add_backlog(sk, skb)) + sctp_chunk_free(chunk); + else + backloged = 1; } else sctp_inq_push(inqueue, chunk); @@ -362,22 +369,27 @@ done: return 0; } -static void sctp_add_backlog(struct sock *sk, struct sk_buff *skb) +static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb) { struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk; struct sctp_ep_common *rcvr = chunk->rcvr; + int ret; - /* Hold the assoc/ep while hanging on the backlog queue. - * This way, we know structures we need will not disappear from us - */ - if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type) - sctp_association_hold(sctp_assoc(rcvr)); - else if (SCTP_EP_TYPE_SOCKET == rcvr->type) - sctp_endpoint_hold(sctp_ep(rcvr)); - else - BUG(); + ret = sk_add_backlog(sk, skb); + if (!ret) { + /* Hold the assoc/ep while hanging on the backlog queue. + * This way, we know structures we need will not disappear + * from us + */ + if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type) + sctp_association_hold(sctp_assoc(rcvr)); + else if (SCTP_EP_TYPE_SOCKET == rcvr->type) + sctp_endpoint_hold(sctp_ep(rcvr)); + else + BUG(); + } + return ret; - sk_add_backlog(sk, skb); } /* Handle icmp frag needed error. */ diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 1d7ac70..240dceb 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -371,7 +371,7 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist, } read_lock_bh(&in6_dev->lock); - for (ifp = in6_dev->addr_list; ifp; ifp = ifp->if_next) { + list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { /* Add the address to the local list. */ addr = t_new(struct sctp_sockaddr_entry, GFP_ATOMIC); if (addr) { diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 4e4ca65..500886b 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -475,7 +475,7 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc, * used to provide an upper bound to this doubling operation. * * Special Case: the first HB doesn't trigger exponential backoff. - * The first unacknowleged HB triggers it. We do this with a flag + * The first unacknowledged HB triggers it. We do this with a flag * that indicates that we have an outstanding HB. */ if (!is_hb || transport->hb_sent) { diff --git a/net/sctp/socket.c b/net/sctp/socket.c index f6d1e59..dfc5c12 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3720,6 +3720,9 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) SCTP_DBG_OBJCNT_INC(sock); percpu_counter_inc(&sctp_sockets_allocated); + /* Set socket backlog limit. */ + sk->sk_backlog.limit = sysctl_sctp_rmem[1]; + local_bh_disable(); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); local_bh_enable(); diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index 6dcdd25..f845d9d 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -71,8 +71,9 @@ static size_t rpc_ntop6(const struct sockaddr *sap, if (unlikely(len == 0)) return len; - if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) && - !(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_SITELOCAL)) + if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)) + return len; + if (sin6->sin6_scope_id == 0) return len; rc = snprintf(scopebuf, sizeof(scopebuf), "%c%u", @@ -165,8 +166,7 @@ static int rpc_parse_scope_id(const char *buf, const size_t buflen, if (*delim != IPV6_SCOPE_DELIMITER) return 0; - if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) && - !(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_SITELOCAL)) + if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)) return 0; len = (buf + buflen) - delim - 1; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index f7a7f83..0cfccc2a 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -206,8 +206,14 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct ctx->gc_win = window_size; /* gssd signals an error by passing ctx->gc_win = 0: */ if (ctx->gc_win == 0) { - /* in which case, p points to an error code which we ignore */ - p = ERR_PTR(-EACCES); + /* + * in which case, p points to an error code. Anything other + * than -EKEYEXPIRED gets converted to -EACCES. + */ + p = simple_get_bytes(p, end, &ret, sizeof(ret)); + if (!IS_ERR(p)) + p = (ret == -EKEYEXPIRED) ? ERR_PTR(-EKEYEXPIRED) : + ERR_PTR(-EACCES); goto err; } /* copy the opaque wire context */ @@ -646,6 +652,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) err = PTR_ERR(p); switch (err) { case -EACCES: + case -EKEYEXPIRED: gss_msg->msg.errno = err; err = mlen; break; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 9ea4538..8d63f8f 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -999,19 +999,14 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) inode = rpc_get_inode(sb, S_IFDIR | 0755); if (!inode) return -ENOMEM; - root = d_alloc_root(inode); + sb->s_root = root = d_alloc_root(inode); if (!root) { iput(inode); return -ENOMEM; } if (rpc_populate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF, NULL)) - goto out; - sb->s_root = root; + return -ENOMEM; return 0; -out: - d_genocide(root); - dput(root); - return -ENOMEM; } static int diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 538ca43..8420a42 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -133,7 +133,7 @@ svc_pool_map_choose_mode(void) return SVC_POOL_PERNODE; } - node = any_online_node(node_online_map); + node = first_online_node; if (nr_cpus_node(node) > 2) { /* * Non-trivial SMP, or CONFIG_NUMA on @@ -506,6 +506,10 @@ svc_init_buffer(struct svc_rqst *rqstp, unsigned int size) { unsigned int pages, arghi; + /* bc_xprt uses fore channel allocated buffers */ + if (svc_is_backchannel(rqstp)) + return 1; + pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply. * We assume one is at most one page */ diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 7d1f9e9..8f0f1fb 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -173,11 +173,13 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, .sin_addr.s_addr = htonl(INADDR_ANY), .sin_port = htons(port), }; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) struct sockaddr_in6 sin6 = { .sin6_family = AF_INET6, .sin6_addr = IN6ADDR_ANY_INIT, .sin6_port = htons(port), }; +#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ struct sockaddr *sap; size_t len; @@ -186,10 +188,12 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, sap = (struct sockaddr *)&sin; len = sizeof(sin); break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case PF_INET6: sap = (struct sockaddr *)&sin6; len = sizeof(sin6); break; +#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ default: return ERR_PTR(-EAFNOSUPPORT); } @@ -231,7 +235,10 @@ int svc_create_xprt(struct svc_serv *serv, const char *xprt_name, err: spin_unlock(&svc_xprt_class_lock); dprintk("svc: transport %s not found\n", xprt_name); - return -ENOENT; + + /* This errno is exposed to user space. Provide a reasonable + * perror msg for a bad transport. */ + return -EPROTONOSUPPORT; } EXPORT_SYMBOL_GPL(svc_create_xprt); @@ -699,8 +706,10 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) spin_unlock_bh(&pool->sp_lock); len = 0; - if (test_bit(XPT_LISTENER, &xprt->xpt_flags) && - !test_bit(XPT_CLOSE, &xprt->xpt_flags)) { + if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) { + dprintk("svc_recv: found XPT_CLOSE\n"); + svc_delete_xprt(xprt); + } else if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) { struct svc_xprt *newxpt; newxpt = xprt->xpt_ops->xpo_accept(xprt); if (newxpt) { @@ -726,7 +735,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) svc_xprt_received(newxpt); } svc_xprt_received(xprt); - } else if (!test_bit(XPT_CLOSE, &xprt->xpt_flags)) { + } else { dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", rqstp, pool->sp_id, xprt, atomic_read(&xprt->xpt_ref.refcount)); @@ -739,11 +748,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) dprintk("svc: got len=%d\n", len); } - if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) { - dprintk("svc_recv: found XPT_CLOSE\n"); - svc_delete_xprt(xprt); - } - /* No data, incomplete (TCP) read, or accept() */ if (len == 0 || len == -EAGAIN) { rqstp->rq_res.len = 0; @@ -889,11 +893,8 @@ void svc_delete_xprt(struct svc_xprt *xprt) if (test_bit(XPT_TEMP, &xprt->xpt_flags)) serv->sv_tmpcnt--; - for (dr = svc_deferred_dequeue(xprt); dr; - dr = svc_deferred_dequeue(xprt)) { - svc_xprt_put(xprt); + while ((dr = svc_deferred_dequeue(xprt)) != NULL) kfree(dr); - } svc_xprt_put(xprt); spin_unlock_bh(&serv->sv_lock); diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index d8c0411..afdcb04 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -15,6 +15,7 @@ #include <linux/kernel.h> #define RPCDBG_FACILITY RPCDBG_AUTH +#include <linux/sunrpc/clnt.h> /* * AUTHUNIX and AUTHNULL credentials are both handled here. @@ -187,10 +188,13 @@ static int ip_map_parse(struct cache_detail *cd, * for scratch: */ char *buf = mesg; int len; - int b1, b2, b3, b4, b5, b6, b7, b8; - char c; char class[8]; - struct in6_addr addr; + union { + struct sockaddr sa; + struct sockaddr_in s4; + struct sockaddr_in6 s6; + } address; + struct sockaddr_in6 sin6; int err; struct ip_map *ipmp; @@ -209,24 +213,24 @@ static int ip_map_parse(struct cache_detail *cd, len = qword_get(&mesg, buf, mlen); if (len <= 0) return -EINVAL; - if (sscanf(buf, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) == 4) { - addr.s6_addr32[0] = 0; - addr.s6_addr32[1] = 0; - addr.s6_addr32[2] = htonl(0xffff); - addr.s6_addr32[3] = - htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4); - } else if (sscanf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x%c", - &b1, &b2, &b3, &b4, &b5, &b6, &b7, &b8, &c) == 8) { - addr.s6_addr16[0] = htons(b1); - addr.s6_addr16[1] = htons(b2); - addr.s6_addr16[2] = htons(b3); - addr.s6_addr16[3] = htons(b4); - addr.s6_addr16[4] = htons(b5); - addr.s6_addr16[5] = htons(b6); - addr.s6_addr16[6] = htons(b7); - addr.s6_addr16[7] = htons(b8); - } else + if (rpc_pton(buf, len, &address.sa, sizeof(address)) == 0) return -EINVAL; + switch (address.sa.sa_family) { + case AF_INET: + /* Form a mapped IPv4 address in sin6 */ + memset(&sin6, 0, sizeof(sin6)); + sin6.sin6_family = AF_INET6; + sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); + sin6.sin6_addr.s6_addr32[3] = address.s4.sin_addr.s_addr; + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + memcpy(&sin6, &address.s6, sizeof(sin6)); + break; +#endif + default: + return -EINVAL; + } expiry = get_expiry(&mesg); if (expiry ==0) @@ -243,7 +247,8 @@ static int ip_map_parse(struct cache_detail *cd, } else dom = NULL; - ipmp = ip_map_lookup(class, &addr); + /* IPv6 scope IDs are ignored for now */ + ipmp = ip_map_lookup(class, &sin6.sin6_addr); if (ipmp) { err = ip_map_update(ipmp, container_of(dom, struct unix_domain, h), @@ -619,7 +624,7 @@ static int unix_gid_show(struct seq_file *m, else glen = 0; - seq_printf(m, "%d %d:", ug->uid, glen); + seq_printf(m, "%u %d:", ug->uid, glen); for (i = 0; i < glen; i++) seq_printf(m, " %d", GROUP_AT(ug->gi, i)); seq_printf(m, "\n"); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 870929e..a29f259 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -968,6 +968,7 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) return len; err_delete: set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); + svc_xprt_received(&svsk->sk_xprt); err_again: return -EAGAIN; } @@ -1357,7 +1358,7 @@ int svc_addsock(struct svc_serv *serv, const int fd, char *name_return, if (!so) return err; - if (so->sk->sk_family != AF_INET) + if ((so->sk->sk_family != PF_INET) && (so->sk->sk_family != PF_INET6)) err = -EAFNOSUPPORT; else if (so->sk->sk_protocol != IPPROTO_TCP && so->sk->sk_protocol != IPPROTO_UDP) diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 7018eef..f96c2fe 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -160,16 +160,15 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt) (void)rpc_ntop(sap, buf, sizeof(buf)); xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); - (void)snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); + snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; - (void)snprintf(buf, sizeof(buf), "%02x%02x%02x%02x", - NIPQUAD(sin->sin_addr.s_addr)); + snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); - (void)snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); + snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); /* netid */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 3d739e5..75ab08e 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -297,12 +297,11 @@ static void xs_format_common_peer_addresses(struct rpc_xprt *xprt) switch (sap->sa_family) { case AF_INET: sin = xs_addr_in(xprt); - (void)snprintf(buf, sizeof(buf), "%02x%02x%02x%02x", - NIPQUAD(sin->sin_addr.s_addr)); + snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); break; case AF_INET6: sin6 = xs_addr_in6(xprt); - (void)snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr); + snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr); break; default: BUG(); @@ -315,10 +314,10 @@ static void xs_format_common_peer_ports(struct rpc_xprt *xprt) struct sockaddr *sap = xs_addr(xprt); char buf[128]; - (void)snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); + snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); - (void)snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); + snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); } @@ -1912,6 +1911,11 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt, case -EALREADY: xprt_clear_connecting(xprt); return; + case -EINVAL: + /* Happens, for instance, if the user specified a link + * local IPv6 address without a scope-id. + */ + goto out; } out_eagain: status = -EAGAIN; @@ -2100,7 +2104,7 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) * we allocate pages instead doing a kmalloc like rpc_malloc is because we want * to use the server side send routines. */ -void *bc_malloc(struct rpc_task *task, size_t size) +static void *bc_malloc(struct rpc_task *task, size_t size) { struct page *page; struct rpc_buffer *buf; @@ -2120,7 +2124,7 @@ void *bc_malloc(struct rpc_task *task, size_t size) /* * Free the space allocated in the bc_alloc routine */ -void bc_free(void *buffer) +static void bc_free(void *buffer) { struct rpc_buffer *buf; diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index a3bfd40..90a0519 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -558,10 +558,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, struct tipc_media_addr *unused2) { - static int send_count = 0; - int bp_index; - int swap_time; /* Prepare buffer for broadcasting (if first time trying to send it) */ @@ -575,11 +572,6 @@ static int tipc_bcbearer_send(struct sk_buff *buf, msg_set_mc_netid(msg, tipc_net_id); } - /* Determine if bearer pairs should be swapped following this attempt */ - - if ((swap_time = (++send_count >= 10))) - send_count = 0; - /* Send buffer over bearers until all targets reached */ bcbearer->remains = tipc_cltr_bcast_nodes; @@ -595,21 +587,22 @@ static int tipc_bcbearer_send(struct sk_buff *buf, if (bcbearer->remains_new.count == bcbearer->remains.count) continue; /* bearer pair doesn't add anything */ - if (!p->publ.blocked && - !p->media->send_msg(buf, &p->publ, &p->media->bcast_addr)) { - if (swap_time && s && !s->publ.blocked) - goto swap; - else - goto update; + if (p->publ.blocked || + p->media->send_msg(buf, &p->publ, &p->media->bcast_addr)) { + /* unable to send on primary bearer */ + if (!s || s->publ.blocked || + s->media->send_msg(buf, &s->publ, + &s->media->bcast_addr)) { + /* unable to send on either bearer */ + continue; + } + } + + if (s) { + bcbearer->bpairs[bp_index].primary = s; + bcbearer->bpairs[bp_index].secondary = p; } - if (!s || s->publ.blocked || - s->media->send_msg(buf, &s->publ, &s->media->bcast_addr)) - continue; /* unable to send using bearer pair */ -swap: - bcbearer->bpairs[bp_index].primary = s; - bcbearer->bpairs[bp_index].secondary = p; -update: if (bcbearer->remains_new.count == 0) return 0; diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 327011f..7809137 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -45,10 +45,10 @@ #define MAX_ADDR_STR 32 -static struct media *media_list = NULL; +static struct media media_list[MAX_MEDIA]; static u32 media_count = 0; -struct bearer *tipc_bearers = NULL; +struct bearer tipc_bearers[MAX_BEARERS]; /** * media_name_valid - validate media name @@ -108,9 +108,11 @@ int tipc_register_media(u32 media_type, int res = -EINVAL; write_lock_bh(&tipc_net_lock); - if (!media_list) - goto exit; + if (tipc_mode != TIPC_NET_MODE) { + warn("Media <%s> rejected, not in networked mode yet\n", name); + goto exit; + } if (!media_name_valid(name)) { warn("Media <%s> rejected, illegal name\n", name); goto exit; @@ -660,33 +662,10 @@ int tipc_disable_bearer(const char *name) -int tipc_bearer_init(void) -{ - int res; - - write_lock_bh(&tipc_net_lock); - tipc_bearers = kcalloc(MAX_BEARERS, sizeof(struct bearer), GFP_ATOMIC); - media_list = kcalloc(MAX_MEDIA, sizeof(struct media), GFP_ATOMIC); - if (tipc_bearers && media_list) { - res = 0; - } else { - kfree(tipc_bearers); - kfree(media_list); - tipc_bearers = NULL; - media_list = NULL; - res = -ENOMEM; - } - write_unlock_bh(&tipc_net_lock); - return res; -} - void tipc_bearer_stop(void) { u32 i; - if (!tipc_bearers) - return; - for (i = 0; i < MAX_BEARERS; i++) { if (tipc_bearers[i].active) tipc_bearers[i].publ.blocked = 1; @@ -695,10 +674,6 @@ void tipc_bearer_stop(void) if (tipc_bearers[i].active) bearer_disable(tipc_bearers[i].publ.name); } - kfree(tipc_bearers); - kfree(media_list); - tipc_bearers = NULL; - media_list = NULL; media_count = 0; } diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index ca57348..000228e 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -114,7 +114,7 @@ struct bearer_name { struct link; -extern struct bearer *tipc_bearers; +extern struct bearer tipc_bearers[]; void tipc_media_addr_printf(struct print_buf *pb, struct tipc_media_addr *a); struct sk_buff *tipc_media_get_names(void); diff --git a/net/tipc/link.c b/net/tipc/link.c index 6f50f64..49f2be8 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1553,7 +1553,7 @@ u32 tipc_link_push_packet(struct link *l_ptr) /* Continue retransmission now, if there is anything: */ - if (r_q_size && buf && !skb_cloned(buf)) { + if (r_q_size && buf) { msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1)); msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in); if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) { @@ -1722,15 +1722,16 @@ void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf, dbg("Retransmitting %u in link %x\n", retransmits, l_ptr); if (tipc_bearer_congested(l_ptr->b_ptr, l_ptr)) { - if (!skb_cloned(buf)) { + if (l_ptr->retransm_queue_size == 0) { msg_dbg(msg, ">NO_RETR->BCONG>"); dbg_print_link(l_ptr, " "); l_ptr->retransm_queue_head = msg_seqno(msg); l_ptr->retransm_queue_size = retransmits; - return; } else { - /* Don't retransmit if driver already has the buffer */ + err("Unexpected retransmit on link %s (qsize=%d)\n", + l_ptr->name, l_ptr->retransm_queue_size); } + return; } else { /* Detect repeated retransmit failures on uncongested bearer */ @@ -1745,7 +1746,7 @@ void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf, } } - while (retransmits && (buf != l_ptr->next_out) && buf && !skb_cloned(buf)) { + while (retransmits && (buf != l_ptr->next_out) && buf) { msg = buf_msg(buf); msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); @@ -1882,6 +1883,15 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr) (msg_destnode(msg) != tipc_own_addr))) goto cont; + /* Discard non-routeable messages destined for another node */ + + if (unlikely(!msg_isdata(msg) && + (msg_destnode(msg) != tipc_own_addr))) { + if ((msg_user(msg) != CONN_MANAGER) && + (msg_user(msg) != MSG_FRAGMENTER)) + goto cont; + } + /* Locate unicast link endpoint that should handle message */ n_ptr = tipc_node_find(msg_prevnode(msg)); diff --git a/net/tipc/net.c b/net/tipc/net.c index 7906608..f25b1cd 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -116,7 +116,8 @@ */ DEFINE_RWLOCK(tipc_net_lock); -struct network tipc_net = { NULL }; +struct _zone *tipc_zones[256] = { NULL, }; +struct network tipc_net = { tipc_zones }; struct tipc_node *tipc_net_select_remote_node(u32 addr, u32 ref) { @@ -158,28 +159,12 @@ void tipc_net_send_external_routes(u32 dest) } } -static int net_init(void) -{ - memset(&tipc_net, 0, sizeof(tipc_net)); - tipc_net.zones = kcalloc(tipc_max_zones + 1, sizeof(struct _zone *), GFP_ATOMIC); - if (!tipc_net.zones) { - return -ENOMEM; - } - return 0; -} - static void net_stop(void) { u32 z_num; - if (!tipc_net.zones) - return; - - for (z_num = 1; z_num <= tipc_max_zones; z_num++) { + for (z_num = 1; z_num <= tipc_max_zones; z_num++) tipc_zone_delete(tipc_net.zones[z_num]); - } - kfree(tipc_net.zones); - tipc_net.zones = NULL; } static void net_route_named_msg(struct sk_buff *buf) @@ -282,9 +267,7 @@ int tipc_net_start(u32 addr) tipc_named_reinit(); tipc_port_reinit(); - if ((res = tipc_bearer_init()) || - (res = net_init()) || - (res = tipc_cltr_init()) || + if ((res = tipc_cltr_init()) || (res = tipc_bclink_init())) { return res; } diff --git a/net/tipc/ref.c b/net/tipc/ref.c index 414fc34..8dea665 100644 --- a/net/tipc/ref.c +++ b/net/tipc/ref.c @@ -153,11 +153,11 @@ void tipc_ref_table_stop(void) u32 tipc_ref_acquire(void *object, spinlock_t **lock) { - struct reference *entry; u32 index; u32 index_mask; u32 next_plus_upper; u32 ref; + struct reference *entry = NULL; if (!object) { err("Attempt to acquire reference to non-existent object\n"); @@ -175,30 +175,36 @@ u32 tipc_ref_acquire(void *object, spinlock_t **lock) index = tipc_ref_table.first_free; entry = &(tipc_ref_table.entries[index]); index_mask = tipc_ref_table.index_mask; - /* take lock in case a previous user of entry still holds it */ - spin_lock_bh(&entry->lock); next_plus_upper = entry->ref; tipc_ref_table.first_free = next_plus_upper & index_mask; ref = (next_plus_upper & ~index_mask) + index; - entry->ref = ref; - entry->object = object; - *lock = &entry->lock; } else if (tipc_ref_table.init_point < tipc_ref_table.capacity) { index = tipc_ref_table.init_point++; entry = &(tipc_ref_table.entries[index]); spin_lock_init(&entry->lock); - spin_lock_bh(&entry->lock); ref = tipc_ref_table.start_mask + index; - entry->ref = ref; - entry->object = object; - *lock = &entry->lock; } else { ref = 0; } write_unlock_bh(&ref_table_lock); + /* + * Grab the lock so no one else can modify this entry + * While we assign its ref value & object pointer + */ + if (entry) { + spin_lock_bh(&entry->lock); + entry->ref = ref; + entry->object = object; + *lock = &entry->lock; + /* + * keep it locked, the caller is responsible + * for unlocking this when they're done with it + */ + } + return ref; } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 1ea64f0..4b235fc 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1322,8 +1322,10 @@ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf) if (!sock_owned_by_user(sk)) { res = filter_rcv(sk, buf); } else { - sk_add_backlog(sk, buf); - res = TIPC_OK; + if (sk_add_backlog(sk, buf)) + res = TIPC_ERR_OVERLOAD; + else + res = TIPC_OK; } bh_unlock_sock(sk); diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index ac91f0d..ff123e5 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -76,19 +76,6 @@ struct top_srv { static struct top_srv topsrv = { 0 }; /** - * htohl - convert value to endianness used by destination - * @in: value to convert - * @swap: non-zero if endianness must be reversed - * - * Returns converted value - */ - -static u32 htohl(u32 in, int swap) -{ - return swap ? swab32(in) : in; -} - -/** * subscr_send_event - send a message containing a tipc_event to the subscriber * * Note: Must not hold subscriber's server port lock, since tipc_send() will @@ -107,11 +94,11 @@ static void subscr_send_event(struct subscription *sub, msg_sect.iov_base = (void *)&sub->evt; msg_sect.iov_len = sizeof(struct tipc_event); - sub->evt.event = htohl(event, sub->swap); - sub->evt.found_lower = htohl(found_lower, sub->swap); - sub->evt.found_upper = htohl(found_upper, sub->swap); - sub->evt.port.ref = htohl(port_ref, sub->swap); - sub->evt.port.node = htohl(node, sub->swap); + sub->evt.event = htonl(event); + sub->evt.found_lower = htonl(found_lower); + sub->evt.found_upper = htonl(found_upper); + sub->evt.port.ref = htonl(port_ref); + sub->evt.port.node = htonl(node); tipc_send(sub->server_ref, 1, &msg_sect); } @@ -287,16 +274,23 @@ static void subscr_cancel(struct tipc_subscr *s, { struct subscription *sub; struct subscription *sub_temp; + __u32 type, lower, upper; int found = 0; /* Find first matching subscription, exit if not found */ + type = ntohl(s->seq.type); + lower = ntohl(s->seq.lower); + upper = ntohl(s->seq.upper); + list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list, subscription_list) { - if (!memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) { - found = 1; - break; - } + if ((type == sub->seq.type) && + (lower == sub->seq.lower) && + (upper == sub->seq.upper)) { + found = 1; + break; + } } if (!found) return; @@ -325,16 +319,10 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s, struct subscriber *subscriber) { struct subscription *sub; - int swap; - - /* Determine subscriber's endianness */ - - swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE)); /* Detect & process a subscription cancellation request */ - if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) { - s->filter &= ~htohl(TIPC_SUB_CANCEL, swap); + if (ntohl(s->filter) & TIPC_SUB_CANCEL) { subscr_cancel(s, subscriber); return NULL; } @@ -359,11 +347,11 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s, /* Initialize subscription object */ - sub->seq.type = htohl(s->seq.type, swap); - sub->seq.lower = htohl(s->seq.lower, swap); - sub->seq.upper = htohl(s->seq.upper, swap); - sub->timeout = htohl(s->timeout, swap); - sub->filter = htohl(s->filter, swap); + sub->seq.type = ntohl(s->seq.type); + sub->seq.lower = ntohl(s->seq.lower); + sub->seq.upper = ntohl(s->seq.upper); + sub->timeout = ntohl(s->timeout); + sub->filter = ntohl(s->filter); if ((!(sub->filter & TIPC_SUB_PORTS) == !(sub->filter & TIPC_SUB_SERVICE)) || (sub->seq.lower > sub->seq.upper)) { @@ -376,7 +364,6 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s, INIT_LIST_HEAD(&sub->nameseq_list); list_add(&sub->subscription_list, &subscriber->subscription_list); sub->server_ref = subscriber->port_ref; - sub->swap = swap; memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr)); atomic_inc(&topsrv.subscription_count); if (sub->timeout != TIPC_WAIT_FOREVER) { diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index 45d89bf..c20f496 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -53,7 +53,6 @@ typedef void (*tipc_subscr_event) (struct subscription *sub, * @nameseq_list: adjacent subscriptions in name sequence's subscription list * @subscription_list: adjacent subscriptions in subscriber's subscription list * @server_ref: object reference of server port associated with subscription - * @swap: indicates if subscriber uses opposite endianness in its messages * @evt: template for events generated by subscription */ @@ -66,7 +65,6 @@ struct subscription { struct list_head nameseq_list; struct list_head subscription_list; u32 server_ref; - int swap; struct tipc_event evt; }; diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index 3e1efe5..52e3042 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -53,7 +53,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb) if (!sock_owned_by_user(sk)) { queued = x25_process_rx_frame(sk, skb); } else { - sk_add_backlog(sk, skb); + queued = !sk_add_backlog(sk, skb); } bh_unlock_sock(sk); sock_put(sk); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 34a5ef8..843e066 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1372,7 +1372,8 @@ static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, return err; } -static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) +static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, + struct flowi *fl) { struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(xdst->u.dst.ops->family); @@ -1381,7 +1382,7 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) if (!afinfo) return -EINVAL; - err = afinfo->fill_dst(xdst, dev); + err = afinfo->fill_dst(xdst, dev, fl); xfrm_policy_put_afinfo(afinfo); @@ -1486,7 +1487,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev; - err = xfrm_fill_dst(xdst, dev); + err = xfrm_fill_dst(xdst, dev, fl); if (err) goto free_dst; |