From f8ff182c716c6f11ca3061961f5722f26a14e101 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 16 Nov 2010 04:30:14 +0000 Subject: rtnetlink: Link address family API Each net_device contains address family specific data such as per device settings and statistics. We already expose this data via procfs/sysfs and partially netlink. The netlink method requires the requester to send one RTM_GETLINK request for each address family it wishes to receive data of and then merge this data itself. This patch implements a new API which combines all address family specific link data in a new netlink attribute IFLA_AF_SPEC. IFLA_AF_SPEC contains a sequence of nested attributes, one for each address family which in turn defines the structure of its own attribute. Example: [IFLA_AF_SPEC] = { [AF_INET] = { [IFLA_INET_CONF] = ..., }, [AF_INET6] = { [IFLA_INET6_FLAGS] = ..., [IFLA_INET6_CONF] = ..., } } The API also allows for address families to implement a function which parses the IFLA_AF_SPEC attribute sent by userspace to implement address family specific link options. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 147 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 145 insertions(+), 2 deletions(-) (limited to 'net/core/rtnetlink.c') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 841c287..bf69e58 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -362,6 +362,95 @@ static size_t rtnl_link_get_size(const struct net_device *dev) return size; } +static LIST_HEAD(rtnl_af_ops); + +static const struct rtnl_af_ops *rtnl_af_lookup(const int family) +{ + const struct rtnl_af_ops *ops; + + list_for_each_entry(ops, &rtnl_af_ops, list) { + if (ops->family == family) + return ops; + } + + return NULL; +} + +/** + * __rtnl_af_register - Register rtnl_af_ops with rtnetlink. + * @ops: struct rtnl_af_ops * to register + * + * The caller must hold the rtnl_mutex. + * + * Returns 0 on success or a negative error code. + */ +int __rtnl_af_register(struct rtnl_af_ops *ops) +{ + list_add_tail(&ops->list, &rtnl_af_ops); + return 0; +} +EXPORT_SYMBOL_GPL(__rtnl_af_register); + +/** + * rtnl_af_register - Register rtnl_af_ops with rtnetlink. + * @ops: struct rtnl_af_ops * to register + * + * Returns 0 on success or a negative error code. + */ +int rtnl_af_register(struct rtnl_af_ops *ops) +{ + int err; + + rtnl_lock(); + err = __rtnl_af_register(ops); + rtnl_unlock(); + return err; +} +EXPORT_SYMBOL_GPL(rtnl_af_register); + +/** + * __rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink. + * @ops: struct rtnl_af_ops * to unregister + * + * The caller must hold the rtnl_mutex. + */ +void __rtnl_af_unregister(struct rtnl_af_ops *ops) +{ + list_del(&ops->list); +} +EXPORT_SYMBOL_GPL(__rtnl_af_unregister); + +/** + * rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink. + * @ops: struct rtnl_af_ops * to unregister + */ +void rtnl_af_unregister(struct rtnl_af_ops *ops) +{ + rtnl_lock(); + __rtnl_af_unregister(ops); + rtnl_unlock(); +} +EXPORT_SYMBOL_GPL(rtnl_af_unregister); + +static size_t rtnl_link_get_af_size(const struct net_device *dev) +{ + struct rtnl_af_ops *af_ops; + size_t size; + + /* IFLA_AF_SPEC */ + size = nla_total_size(sizeof(struct nlattr)); + + list_for_each_entry(af_ops, &rtnl_af_ops, list) { + if (af_ops->get_link_af_size) { + /* AF_* + nested data */ + size += nla_total_size(sizeof(struct nlattr)) + + af_ops->get_link_af_size(dev); + } + } + + return size; +} + static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev) { const struct rtnl_link_ops *ops = dev->rtnl_link_ops; @@ -671,7 +760,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev) + nla_total_size(4) /* IFLA_NUM_VF */ + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */ + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ - + rtnl_link_get_size(dev); /* IFLA_LINKINFO */ + + rtnl_link_get_size(dev) /* IFLA_LINKINFO */ + + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */ } static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev) @@ -757,7 +847,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct nlmsghdr *nlh; struct rtnl_link_stats64 temp; const struct rtnl_link_stats64 *stats; - struct nlattr *attr; + struct nlattr *attr, *af_spec; + struct rtnl_af_ops *af_ops; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); if (nlh == NULL) @@ -866,6 +957,36 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, goto nla_put_failure; } + if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC))) + goto nla_put_failure; + + list_for_each_entry(af_ops, &rtnl_af_ops, list) { + if (af_ops->fill_link_af) { + struct nlattr *af; + int err; + + if (!(af = nla_nest_start(skb, af_ops->family))) + goto nla_put_failure; + + err = af_ops->fill_link_af(skb, dev); + + /* + * Caller may return ENODATA to indicate that there + * was no data to be dumped. This is not an error, it + * means we should trim the attribute header and + * continue. + */ + if (err == -ENODATA) + nla_nest_cancel(skb, af); + else if (err < 0) + goto nla_put_failure; + + nla_nest_end(skb, af); + } + } + + nla_nest_end(skb, af_spec); + return nlmsg_end(skb, nlh); nla_put_failure: @@ -924,6 +1045,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_VFINFO_LIST] = {. type = NLA_NESTED }, [IFLA_VF_PORTS] = { .type = NLA_NESTED }, [IFLA_PORT_SELF] = { .type = NLA_NESTED }, + [IFLA_AF_SPEC] = { .type = NLA_NESTED }, }; EXPORT_SYMBOL(ifla_policy); @@ -1225,6 +1347,27 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, goto errout; modified = 1; } + + if (tb[IFLA_AF_SPEC]) { + struct nlattr *af; + int rem; + + nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { + const struct rtnl_af_ops *af_ops; + + if (!(af_ops = rtnl_af_lookup(nla_type(af)))) + continue; + + if (!af_ops->parse_link_af) + continue; + + err = af_ops->parse_link_af(dev, af); + if (err < 0) + goto errout; + + modified = 1; + } + } err = 0; errout: -- cgit v1.1 From cf7afbfeb8ceb0187348d0a1a0db61305e25f05f Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 22 Nov 2010 01:31:54 +0000 Subject: rtnl: make link af-specific updates atomic As David pointed out correctly, updates to af-specific attributes are currently not atomic. If multiple changes are requested and one of them fails, previous updates may have been applied already leaving the link behind in a undefined state. This patch splits the function parse_link_af() into two functions validate_link_af() and set_link_at(). validate_link_af() is placed to validate_linkmsg() check for errors as early as possible before any changes to the link have been made. set_link_af() is called to commit the changes later. This method is not fail proof, while it is currently sufficient to make set_link_af() inerrable and thus 100% atomic, the validation function method will not be able to detect all error scenarios in the future, there will likely always be errors depending on states which are f.e. not protected by rtnl_mutex and thus may change between validation and setting. Also, instead of silently ignoring unknown address families and config blocks for address families which did not register a set function the errors EAFNOSUPPORT respectively EOPNOSUPPORT are returned to avoid comitting 4 out of 5 update requests without notifying the user. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) (limited to 'net/core/rtnetlink.c') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index bf69e58..750db57 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1107,6 +1107,28 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) return -EINVAL; } + if (tb[IFLA_AF_SPEC]) { + struct nlattr *af; + int rem, err; + + nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { + const struct rtnl_af_ops *af_ops; + + if (!(af_ops = rtnl_af_lookup(nla_type(af)))) + return -EAFNOSUPPORT; + + if (!af_ops->set_link_af) + return -EOPNOTSUPP; + + if (af_ops->validate_link_af) { + err = af_ops->validate_link_af(dev, + tb[IFLA_AF_SPEC]); + if (err < 0) + return err; + } + } + } + return 0; } @@ -1356,12 +1378,9 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, const struct rtnl_af_ops *af_ops; if (!(af_ops = rtnl_af_lookup(nla_type(af)))) - continue; - - if (!af_ops->parse_link_af) - continue; + BUG(); - err = af_ops->parse_link_af(dev, af); + err = af_ops->set_link_af(dev, af); if (err < 0) goto errout; -- cgit v1.1 From 0ab03c2b1478f2438d2c80204f7fef65b1bca9cf Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Fri, 7 Jan 2011 03:15:05 +0000 Subject: netlink: test for all flags of the NLM_F_DUMP composite Due to NLM_F_DUMP is composed of two bits, NLM_F_ROOT | NLM_F_MATCH, when doing "if (x & NLM_F_DUMP)", it tests for _either_ of the bits being set. Because NLM_F_MATCH's value overlaps with NLM_F_EXCL, non-dump requests with NLM_F_EXCL set are mistaken as dump requests. Substitute the condition to test for _all_ bits being set. Signed-off-by: Jan Engelhardt Acked-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/rtnetlink.c') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 750db57..a5f7535 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1820,7 +1820,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (kind != 2 && security_netlink_recv(skb, CAP_NET_ADMIN)) return -EPERM; - if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { + if (kind == 2 && (nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { struct sock *rtnl; rtnl_dumpit_func dumpit; -- cgit v1.1 From b8f3ab4290f1e720166e888ea2a1d1d44c4d15dd Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 18 Jan 2011 12:40:38 -0800 Subject: Revert "netlink: test for all flags of the NLM_F_DUMP composite" This reverts commit 0ab03c2b1478f2438d2c80204f7fef65b1bca9cf. It breaks several things including the avahi daemon. Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/rtnetlink.c') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a5f7535..750db57 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1820,7 +1820,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (kind != 2 && security_netlink_recv(skb, CAP_NET_ADMIN)) return -EPERM; - if (kind == 2 && (nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { + if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { struct sock *rtnl; rtnl_dumpit_func dumpit; -- cgit v1.1 From 6d3a9a685465986d7653c5abbc0b24681e7c44d7 Mon Sep 17 00:00:00 2001 From: Kurt Van Dijck Date: Wed, 26 Jan 2011 04:55:24 +0000 Subject: net: fix validate_link_af in rtnetlink core I'm testing an API that uses IFLA_AF_SPEC attribute. In the rtnetlink core , the set_link_af() member of the rtnl_af_ops struct receives the nested attribute (as I expected), but the validate_link_af() member receives the parent attribute. IMO, this patch fixes this. Signed-off-by: Kurt Van Dijck Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/core/rtnetlink.c') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 750db57..31459ef 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1121,8 +1121,7 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) return -EOPNOTSUPP; if (af_ops->validate_link_af) { - err = af_ops->validate_link_af(dev, - tb[IFLA_AF_SPEC]); + err = af_ops->validate_link_af(dev, af); if (err < 0) return err; } -- cgit v1.1 From 13ad17745c2cbd437d9e24b2d97393e0be11c439 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 29 Jan 2011 14:57:22 +0000 Subject: net: Fix ip link add netns oops Ed Swierk writes: > On 2.6.35.7 > ip link add link eth0 netns 9999 type macvlan > where 9999 is a nonexistent PID triggers an oops and causes all network functions to hang: > [10663.821898] BUG: unable to handle kernel NULL pointer dereference at 000000000000006d > [10663.821917] IP: [] __dev_alloc_name+0x9a/0x170 > [10663.821933] PGD 1d3927067 PUD 22f5c5067 PMD 0 > [10663.821944] Oops: 0000 [#1] SMP > [10663.821953] last sysfs file: /sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq > [10663.821959] CPU 3 > [10663.821963] Modules linked in: macvlan ip6table_filter ip6_tables rfcomm ipt_MASQUERADE binfmt_misc iptable_nat nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack sco ipt_REJECT bnep l2cap xt_tcpudp iptable_filter ip_tables x_tables bridge stp vboxnetadp vboxnetflt vboxdrv kvm_intel kvm parport_pc ppdev snd_hda_codec_intelhdmi snd_hda_codec_conexant arc4 iwlagn iwlcore mac80211 snd_hda_intel snd_hda_codec snd_hwdep snd_pcm snd_seq_midi snd_rawmidi i915 snd_seq_midi_event snd_seq thinkpad_acpi drm_kms_helper btusb tpm_tis nvram uvcvideo snd_timer snd_seq_device bluetooth videodev v4l1_compat v4l2_compat_ioctl32 tpm drm tpm_bios snd cfg80211 psmouse serio_raw intel_ips soundcore snd_page_alloc intel_agp i2c_algo_bit video output netconsole configfs lp parport usbhid hid e1000e sdhci_pci ahci libahci sdhci led_class > [10663.822155] > [10663.822161] Pid: 6000, comm: ip Not tainted 2.6.35-23-generic #41-Ubuntu 2901CTO/2901CTO > [10663.822167] RIP: 0010:[] [] __dev_alloc_name+0x9a/0x170 > [10663.822177] RSP: 0018:ffff88014aebf7b8 EFLAGS: 00010286 > [10663.822182] RAX: 00000000fffffff4 RBX: ffff8801ad900800 RCX: 0000000000000000 > [10663.822187] RDX: ffff880000000000 RSI: 0000000000000000 RDI: ffff88014ad63000 > [10663.822191] RBP: ffff88014aebf808 R08: 0000000000000041 R09: 0000000000000041 > [10663.822196] R10: 0000000000000000 R11: dead000000200200 R12: ffff88014aebf818 > [10663.822201] R13: fffffffffffffffd R14: ffff88014aebf918 R15: ffff88014ad62000 > [10663.822207] FS: 00007f00c487f700(0000) GS:ffff880001f80000(0000) knlGS:0000000000000000 > [10663.822212] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > [10663.822216] CR2: 000000000000006d CR3: 0000000231f19000 CR4: 00000000000026e0 > [10663.822221] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > [10663.822226] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 > [10663.822231] Process ip (pid: 6000, threadinfo ffff88014aebe000, task ffff88014afb16e0) > [10663.822236] Stack: > [10663.822240] ffff88014aebf808 ffffffff814a2bb5 ffff88014aebf7e8 00000000a00ee8d6 > [10663.822251] <0> 0000000000000000 ffffffffa00ef940 ffff8801ad900800 ffff88014aebf818 > [10663.822265] <0> ffff88014aebf918 ffff8801ad900800 ffff88014aebf858 ffffffff8149c413 > [10663.822281] Call Trace: > [10663.822290] [] ? dev_addr_init+0x75/0xb0 > [10663.822298] [] dev_alloc_name+0x43/0x90 > [10663.822307] [] rtnl_create_link+0xbe/0x1b0 > [10663.822314] [] rtnl_newlink+0x48a/0x570 > [10663.822321] [] ? rtnl_newlink+0x1ac/0x570 > [10663.822332] [] ? native_x2apic_icr_read+0x4/0x20 > [10663.822339] [] rtnetlink_rcv_msg+0x177/0x290 > [10663.822346] [] ? rtnetlink_rcv_msg+0x0/0x290 > [10663.822354] [] netlink_rcv_skb+0xa9/0xd0 > [10663.822360] [] rtnetlink_rcv+0x25/0x40 > [10663.822367] [] netlink_unicast+0x2de/0x2f0 > [10663.822374] [] netlink_sendmsg+0x1fe/0x2e0 > [10663.822383] [] sock_sendmsg+0xf3/0x120 > [10663.822391] [] ? _raw_spin_lock+0xe/0x20 > [10663.822400] [] ? __d_lookup+0x136/0x150 > [10663.822406] [] ? _raw_spin_lock+0xe/0x20 > [10663.822414] [] ? _atomic_dec_and_lock+0x4d/0x80 > [10663.822422] [] ? mntput_no_expire+0x30/0x110 > [10663.822429] [] ? move_addr_to_kernel+0x65/0x70 > [10663.822435] [] ? verify_iovec+0x88/0xe0 > [10663.822442] [] sys_sendmsg+0x240/0x3a0 > [10663.822450] [] ? __do_fault+0x479/0x560 > [10663.822457] [] ? _raw_spin_lock+0xe/0x20 > [10663.822465] [] ? alloc_fd+0x10a/0x150 > [10663.822473] [] ? do_page_fault+0x15e/0x350 > [10663.822482] [] system_call_fastpath+0x16/0x1b > [10663.822487] Code: 90 48 8d 78 02 be 25 00 00 00 e8 92 1d e2 ff 48 85 c0 75 cf bf 20 00 00 00 e8 c3 b1 c6 ff 49 89 c7 b8 f4 ff ff ff 4d 85 ff 74 bd <4d> 8b 75 70 49 8d 45 70 48 89 45 b8 49 83 ee 58 eb 28 48 8d 55 > [10663.822618] RIP [] __dev_alloc_name+0x9a/0x170 > [10663.822627] RSP > [10663.822631] CR2: 000000000000006d > [10663.822636] ---[ end trace 3dfd6c3ad5327ca7 ]--- This bug was introduced in: commit 81adee47dfb608df3ad0b91d230fb3cef75f0060 Author: Eric W. Biederman Date: Sun Nov 8 00:53:51 2009 -0800 net: Support specifying the network namespace upon device creation. There is no good reason to not support userspace specifying the network namespace during device creation, and it makes it easier to create a network device and pass it to a child network namespace with a well known name. We have to be careful to ensure that the target network namespace for the new device exists through the life of the call. To keep that logic clear I have factored out the network namespace grabbing logic into rtnl_link_get_net. In addtion we need to continue to pass the source network namespace to the rtnl_link_ops.newlink method so that we can find the base device source network namespace. Signed-off-by: Eric W. Biederman Acked-by: Eric Dumazet Where apparently I forgot to add error handling to the path where we create a new network device in a new network namespace, and pass in an invalid pid. Cc: stable@kernel.org Reported-by: Ed Swierk Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/core/rtnetlink.c') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 31459ef..2d65c6b 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1671,6 +1671,9 @@ replay: snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); dest_net = rtnl_link_get_net(net, tb); + if (IS_ERR(dest_net)) + return PTR_ERR(dest_net); + dev = rtnl_create_link(net, dest_net, ifname, ops, tb); if (IS_ERR(dev)) -- cgit v1.1