From 269c4845d5b3627b95b1934107251bacbe99bb68 Mon Sep 17 00:00:00 2001 From: Gustavo Padovan Date: Fri, 15 Jun 2012 02:30:20 -0300 Subject: Bluetooth: Fix possible deadlock in SCO code sco_chan_del() only has conn != NULL when called from sco_conn_del() so just move the code from it that deal with conn to sco_conn_del(). [ 120.765529] [ 120.765529] ====================================================== [ 120.766529] [ INFO: possible circular locking dependency detected ] [ 120.766529] 3.5.0-rc1-10292-g3701f94-dirty #70 Tainted: G W [ 120.766529] ------------------------------------------------------- [ 120.766529] kworker/u:3/1497 is trying to acquire lock: [ 120.766529] (&(&conn->lock)->rlock#2){+.+...}, at: [] sco_chan_del+0x4c/0x170 [bluetooth] [ 120.766529] [ 120.766529] but task is already holding lock: [ 120.766529] (slock-AF_BLUETOOTH-BTPROTO_SCO){+.+...}, at: [] sco_conn_del+0x61/0xe0 [bluetooth] [ 120.766529] [ 120.766529] which lock already depends on the new lock. [ 120.766529] [ 120.766529] [ 120.766529] the existing dependency chain (in reverse order) is: [ 120.766529] [ 120.766529] -> #1 (slock-AF_BLUETOOTH-BTPROTO_SCO){+.+...}: [ 120.766529] [] lock_acquire+0x8e/0xb0 [ 120.766529] [] _raw_spin_lock+0x40/0x80 [ 120.766529] [] sco_connect_cfm+0x79/0x300 [bluetooth] [ 120.766529] [] hci_sync_conn_complete_evt.isra.90+0x343/0x400 [bluetooth] [ 120.766529] [] hci_event_packet+0x317/0xfb0 [bluetooth] [ 120.766529] [] hci_rx_work+0x2c8/0x890 [bluetooth] [ 120.766529] [] process_one_work+0x197/0x460 [ 120.766529] [] worker_thread+0x126/0x2d0 [ 120.766529] [] kthread+0x9d/0xb0 [ 120.766529] [] kernel_thread_helper+0x4/0x10 [ 120.766529] [ 120.766529] -> #0 (&(&conn->lock)->rlock#2){+.+...}: [ 120.766529] [] __lock_acquire+0x154a/0x1d30 [ 120.766529] [] lock_acquire+0x8e/0xb0 [ 120.766529] [] _raw_spin_lock+0x40/0x80 [ 120.766529] [] sco_chan_del+0x4c/0x170 [bluetooth] [ 120.766529] [] sco_conn_del+0x74/0xe0 [bluetooth] [ 120.766529] [] sco_disconn_cfm+0x32/0x60 [bluetooth] [ 120.766529] [] hci_disconn_complete_evt.isra.53+0x242/0x390 [bluetooth] [ 120.766529] [] hci_event_packet+0x617/0xfb0 [bluetooth] [ 120.766529] [] hci_rx_work+0x2c8/0x890 [bluetooth] [ 120.766529] [] process_one_work+0x197/0x460 [ 120.766529] [] worker_thread+0x126/0x2d0 [ 120.766529] [] kthread+0x9d/0xb0 [ 120.766529] [] kernel_thread_helper+0x4/0x10 [ 120.766529] [ 120.766529] other info that might help us debug this: [ 120.766529] [ 120.766529] Possible unsafe locking scenario: [ 120.766529] [ 120.766529] CPU0 CPU1 [ 120.766529] ---- ---- [ 120.766529] lock(slock-AF_BLUETOOTH-BTPROTO_SCO); [ 120.766529] lock(&(&conn->lock)->rlock#2); [ 120.766529] lock(slock-AF_BLUETOOTH-BTPROTO_SCO); [ 120.766529] lock(&(&conn->lock)->rlock#2); [ 120.766529] [ 120.766529] *** DEADLOCK *** Signed-off-by: Gustavo Padovan --- net/bluetooth/sco.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 40bbe25..3589e21 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -131,6 +131,15 @@ static int sco_conn_del(struct hci_conn *hcon, int err) sco_sock_clear_timer(sk); sco_chan_del(sk, err); bh_unlock_sock(sk); + + sco_conn_lock(conn); + conn->sk = NULL; + sco_pi(sk)->conn = NULL; + sco_conn_unlock(conn); + + if (conn->hcon) + hci_conn_put(conn->hcon); + sco_sock_kill(sk); } @@ -821,16 +830,6 @@ static void sco_chan_del(struct sock *sk, int err) BT_DBG("sk %p, conn %p, err %d", sk, conn, err); - if (conn) { - sco_conn_lock(conn); - conn->sk = NULL; - sco_pi(sk)->conn = NULL; - sco_conn_unlock(conn); - - if (conn->hcon) - hci_conn_put(conn->hcon); - } - sk->sk_state = BT_CLOSED; sk->sk_err = err; sk->sk_state_change(sk); -- cgit v1.1 From a9ea3ed9b71cc3271dd59e76f65748adcaa76422 Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Thu, 19 Jul 2012 14:46:08 +0200 Subject: Bluetooth: Fix legacy pairing with some devices Some devices e.g. some Android based phones don't do SDP search before pairing and cancel legacy pairing when ACL is disconnected. PIN Code Request event which changes ACL timeout to HCI_PAIRING_TIMEOUT is only received after remote user entered PIN. In that case no L2CAP is connected so default HCI_DISCONN_TIMEOUT (2 seconds) is being used to timeout ACL connection. This results in problems with legacy pairing as remote user has only few seconds to enter PIN before ACL is disconnected. Increase disconnect timeout for incomming connection to HCI_PAIRING_TIMEOUT if SSP is disabled and no linkey exists. To avoid keeping ACL alive for too long after SDP search set ACL timeout back to HCI_DISCONN_TIMEOUT when L2CAP is connected. 2012-07-19 13:24:43.413521 < HCI Command: Create Connection (0x01|0x0005) plen 13 bdaddr 00:02:72:D6:6A:3F ptype 0xcc18 rswitch 0x01 clkoffset 0x0000 Packet type: DM1 DM3 DM5 DH1 DH3 DH5 2012-07-19 13:24:43.425224 > HCI Event: Command Status (0x0f) plen 4 Create Connection (0x01|0x0005) status 0x00 ncmd 1 2012-07-19 13:24:43.885222 > HCI Event: Role Change (0x12) plen 8 status 0x00 bdaddr 00:02:72:D6:6A:3F role 0x01 Role: Slave 2012-07-19 13:24:44.054221 > HCI Event: Connect Complete (0x03) plen 11 status 0x00 handle 42 bdaddr 00:02:72:D6:6A:3F type ACL encrypt 0x00 2012-07-19 13:24:44.054313 < HCI Command: Read Remote Supported Features (0x01|0x001b) plen 2 handle 42 2012-07-19 13:24:44.055176 > HCI Event: Page Scan Repetition Mode Change (0x20) plen 7 bdaddr 00:02:72:D6:6A:3F mode 0 2012-07-19 13:24:44.056217 > HCI Event: Max Slots Change (0x1b) plen 3 handle 42 slots 5 2012-07-19 13:24:44.059218 > HCI Event: Command Status (0x0f) plen 4 Read Remote Supported Features (0x01|0x001b) status 0x00 ncmd 0 2012-07-19 13:24:44.062192 > HCI Event: Command Status (0x0f) plen 4 Unknown (0x00|0x0000) status 0x00 ncmd 1 2012-07-19 13:24:44.067219 > HCI Event: Read Remote Supported Features (0x0b) plen 11 status 0x00 handle 42 Features: 0xbf 0xfe 0xcf 0xfe 0xdb 0xff 0x7b 0x87 2012-07-19 13:24:44.067248 < HCI Command: Read Remote Extended Features (0x01|0x001c) plen 3 handle 42 page 1 2012-07-19 13:24:44.071217 > HCI Event: Command Status (0x0f) plen 4 Read Remote Extended Features (0x01|0x001c) status 0x00 ncmd 1 2012-07-19 13:24:44.076218 > HCI Event: Read Remote Extended Features (0x23) plen 13 status 0x00 handle 42 page 1 max 1 Features: 0x01 0x00 0x00 0x00 0x00 0x00 0x00 0x00 2012-07-19 13:24:44.076249 < HCI Command: Remote Name Request (0x01|0x0019) plen 10 bdaddr 00:02:72:D6:6A:3F mode 2 clkoffset 0x0000 2012-07-19 13:24:44.081218 > HCI Event: Command Status (0x0f) plen 4 Remote Name Request (0x01|0x0019) status 0x00 ncmd 1 2012-07-19 13:24:44.105214 > HCI Event: Remote Name Req Complete (0x07) plen 255 status 0x00 bdaddr 00:02:72:D6:6A:3F name 'uw000951-0' 2012-07-19 13:24:44.105284 < HCI Command: Authentication Requested (0x01|0x0011) plen 2 handle 42 2012-07-19 13:24:44.111207 > HCI Event: Command Status (0x0f) plen 4 Authentication Requested (0x01|0x0011) status 0x00 ncmd 1 2012-07-19 13:24:44.112220 > HCI Event: Link Key Request (0x17) plen 6 bdaddr 00:02:72:D6:6A:3F 2012-07-19 13:24:44.112249 < HCI Command: Link Key Request Negative Reply (0x01|0x000c) plen 6 bdaddr 00:02:72:D6:6A:3F 2012-07-19 13:24:44.115215 > HCI Event: Command Complete (0x0e) plen 10 Link Key Request Negative Reply (0x01|0x000c) ncmd 1 status 0x00 bdaddr 00:02:72:D6:6A:3F 2012-07-19 13:24:44.116215 > HCI Event: PIN Code Request (0x16) plen 6 bdaddr 00:02:72:D6:6A:3F 2012-07-19 13:24:48.099184 > HCI Event: Auth Complete (0x06) plen 3 status 0x13 handle 42 Error: Remote User Terminated Connection 2012-07-19 13:24:48.179182 > HCI Event: Disconn Complete (0x05) plen 4 status 0x00 handle 42 reason 0x13 Reason: Remote User Terminated Connection Cc: stable@vger.kernel.org Signed-off-by: Szymon Janc Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 7 ++++++- net/bluetooth/l2cap_core.c | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 41ff978..248632c 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1762,7 +1762,12 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) if (conn->type == ACL_LINK) { conn->state = BT_CONFIG; hci_conn_hold(conn); - conn->disc_timeout = HCI_DISCONN_TIMEOUT; + + if (!conn->out && !hci_conn_ssp_enabled(conn) && + !hci_find_link_key(hdev, &ev->bdaddr)) + conn->disc_timeout = HCI_PAIRING_TIMEOUT; + else + conn->disc_timeout = HCI_DISCONN_TIMEOUT; } else conn->state = BT_CONNECTED; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index a8964db..daa149b 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1181,6 +1181,7 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn) sk = chan->sk; hci_conn_hold(conn->hcon); + conn->hcon->disc_timeout = HCI_DISCONN_TIMEOUT; bacpy(&bt_sk(sk)->src, conn->src); bacpy(&bt_sk(sk)->dst, conn->dst); -- cgit v1.1 From c810089c27e48b816181b454fcc493d19fdbc2ba Mon Sep 17 00:00:00 2001 From: Ram Malovany Date: Thu, 19 Jul 2012 10:26:09 +0300 Subject: Bluetooth: Fix using NULL inquiry entry If entry wasn't found in the hci_inquiry_cache_lookup_resolve do not resolve the name.This will fix a kernel crash when trying to use NULL pointer. Cc: stable@vger.kernel.org Signed-off-by: Ram Malovany Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 248632c..b64cfa2 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1365,6 +1365,9 @@ static bool hci_resolve_next_name(struct hci_dev *hdev) return false; e = hci_inquiry_cache_lookup_resolve(hdev, BDADDR_ANY, NAME_NEEDED); + if (!e) + return false; + if (hci_resolve_name(hdev, e) == 0) { e->name_state = NAME_PENDING; return true; -- cgit v1.1 From 7cc8380eb10347016d95bf6f9d842c2ae6d12932 Mon Sep 17 00:00:00 2001 From: Ram Malovany Date: Thu, 19 Jul 2012 10:26:10 +0300 Subject: Bluetooth: Fix using a NULL inquiry cache entry If the device was not found in a list of found devices names of which are pending.This may happen in a case when HCI Remote Name Request was sent as a part of incoming connection establishment procedure. Hence there is no need to continue resolving a next name as it will be done upon receiving another Remote Name Request Complete Event. This will fix a kernel crash when trying to use this entry to resolve the next name. Cc: stable@vger.kernel.org Signed-off-by: Ram Malovany Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index b64cfa2..fe9a3d6 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1396,12 +1396,18 @@ static void hci_check_pending_name(struct hci_dev *hdev, struct hci_conn *conn, return; e = hci_inquiry_cache_lookup_resolve(hdev, bdaddr, NAME_PENDING); - if (e) { + /* If the device was not found in a list of found devices names of which + * are pending. there is no need to continue resolving a next name as it + * will be done upon receiving another Remote Name Request Complete + * Event */ + if (!e) + return; + + list_del(&e->list); + if (name) { e->name_state = NAME_KNOWN; - list_del(&e->list); - if (name) - mgmt_remote_name(hdev, bdaddr, ACL_LINK, 0x00, - e->data.rssi, name, name_len); + mgmt_remote_name(hdev, bdaddr, ACL_LINK, 0x00, + e->data.rssi, name, name_len); } if (hci_resolve_next_name(hdev)) -- cgit v1.1 From c3e7c0d90b14a3e7ac091d24cef09efb516d587b Mon Sep 17 00:00:00 2001 From: Ram Malovany Date: Thu, 19 Jul 2012 10:26:11 +0300 Subject: Bluetooth: Set name_state to unknown when entry name is empty When the name of the given entry is empty , the state needs to be updated accordingly. Cc: stable@vger.kernel.org Signed-off-by: Ram Malovany Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index fe9a3d6..715d7e3 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1408,6 +1408,8 @@ static void hci_check_pending_name(struct hci_dev *hdev, struct hci_conn *conn, e->name_state = NAME_KNOWN; mgmt_remote_name(hdev, bdaddr, ACL_LINK, 0x00, e->data.rssi, name, name_len); + } else { + e->name_state = NAME_NOT_KNOWN; } if (hci_resolve_next_name(hdev)) -- cgit v1.1 From d08fd0e712a834d4abb869c0215a702e290bc51e Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Thu, 19 Jul 2012 17:03:43 +0300 Subject: Bluetooth: smp: Fix possible NULL dereference smp_chan_create might return NULL so we need to check before dereferencing smp. Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- net/bluetooth/smp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 16ef0dc..901a616 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -579,8 +579,11 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) if (!test_and_set_bit(HCI_CONN_LE_SMP_PEND, &conn->hcon->flags)) smp = smp_chan_create(conn); + else + smp = conn->smp_chan; - smp = conn->smp_chan; + if (!smp) + return SMP_UNSPECIFIED; smp->preq[0] = SMP_CMD_PAIRING_REQ; memcpy(&smp->preq[1], req, sizeof(*req)); -- cgit v1.1 From 49dfbb9129c4edb318578de35cc45c555df37884 Mon Sep 17 00:00:00 2001 From: Jaganath Kanakkassery Date: Thu, 19 Jul 2012 12:54:04 +0530 Subject: Bluetooth: Fix socket not getting freed if l2cap channel create fails If l2cap_chan_create() fails then it will return from l2cap_sock_kill since zapped flag of sk is reset. Signed-off-by: Jaganath Kanakkassery Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index a4bb27e..b94abd3 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1174,7 +1174,7 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int p chan = l2cap_chan_create(); if (!chan) { - l2cap_sock_kill(sk); + sk_free(sk); return NULL; } -- cgit v1.1 From e9324b2ce656e1910d2385b9b47a2f926456dbe3 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 9 Aug 2012 10:08:45 +0000 Subject: netfilter: nf_ct_sip: fix helper name Commit 3a8fc53a (netfilter: nf_ct_helper: allocate 16 bytes for the helper and policy names) introduced a bug in the SIP helper, the helper name is sprinted to the sip_names array instead of instead of into the helper structure. This breaks the helper match and the /proc/net/nf_conntrack_expect output. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_sip.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 758a1ba..2fb6669 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1515,7 +1515,6 @@ static int sip_help_udp(struct sk_buff *skb, unsigned int protoff, } static struct nf_conntrack_helper sip[MAX_PORTS][4] __read_mostly; -static char sip_names[MAX_PORTS][4][sizeof("sip-65535")] __read_mostly; static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1] = { [SIP_EXPECT_SIGNALLING] = { @@ -1585,9 +1584,9 @@ static int __init nf_conntrack_sip_init(void) sip[i][j].me = THIS_MODULE; if (ports[i] == SIP_PORT) - sprintf(sip_names[i][j], "sip"); + sprintf(sip[i][j].name, "sip"); else - sprintf(sip_names[i][j], "sip-%u", i); + sprintf(sip[i][j].name, "sip-%u", i); pr_debug("port #%u: %u\n", i, ports[i]); -- cgit v1.1 From 02b69cbdc2fb2e1bfbfd9ac0c246d7be1b08d3cd Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 9 Aug 2012 10:08:46 +0000 Subject: netfilter: nf_ct_sip: fix IPv6 address parsing Within SIP messages IPv6 addresses are enclosed in square brackets in most cases, with the exception of the "received=" header parameter. Currently the helper fails to parse enclosed addresses. This patch: - changes the SIP address parsing function to enforce square brackets when required, and accept them when not required but present, as recommended by RFC 5118. - adds a new SDP address parsing function that never accepts square brackets since SDP doesn't use them. With these changes, the SIP helper correctly parses all test messages from RFC 5118 (Session Initiation Protocol (SIP) Torture Test Messages for Internet Protocol Version 6 (IPv6)). Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_nat_sip.c | 4 +- net/netfilter/nf_conntrack_sip.c | 87 ++++++++++++++++++++++++++++++++-------- 2 files changed, 72 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index ea4a2381..eef8f29 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -173,7 +173,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, * the reply. */ if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen, "maddr=", &poff, &plen, - &addr) > 0 && + &addr, true) > 0 && addr.ip == ct->tuplehash[dir].tuple.src.u3.ip && addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) { buflen = sprintf(buffer, "%pI4", @@ -187,7 +187,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, * from which the server received the request. */ if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen, "received=", &poff, &plen, - &addr) > 0 && + &addr, false) > 0 && addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip && addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { buflen = sprintf(buffer, "%pI4", diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 2fb6669..5c0a112 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -183,12 +183,12 @@ static int media_len(const struct nf_conn *ct, const char *dptr, return len + digits_len(ct, dptr, limit, shift); } -static int parse_addr(const struct nf_conn *ct, const char *cp, - const char **endp, union nf_inet_addr *addr, - const char *limit) +static int sip_parse_addr(const struct nf_conn *ct, const char *cp, + const char **endp, union nf_inet_addr *addr, + const char *limit, bool delim) { const char *end; - int ret = 0; + int ret; if (!ct) return 0; @@ -197,16 +197,28 @@ static int parse_addr(const struct nf_conn *ct, const char *cp, switch (nf_ct_l3num(ct)) { case AF_INET: ret = in4_pton(cp, limit - cp, (u8 *)&addr->ip, -1, &end); + if (ret == 0) + return 0; break; case AF_INET6: + if (cp < limit && *cp == '[') + cp++; + else if (delim) + return 0; + ret = in6_pton(cp, limit - cp, (u8 *)&addr->ip6, -1, &end); + if (ret == 0) + return 0; + + if (end < limit && *end == ']') + end++; + else if (delim) + return 0; break; default: BUG(); } - if (ret == 0 || end == cp) - return 0; if (endp) *endp = end; return 1; @@ -219,7 +231,7 @@ static int epaddr_len(const struct nf_conn *ct, const char *dptr, union nf_inet_addr addr; const char *aux = dptr; - if (!parse_addr(ct, dptr, &dptr, &addr, limit)) { + if (!sip_parse_addr(ct, dptr, &dptr, &addr, limit, true)) { pr_debug("ip: %s parse failed.!\n", dptr); return 0; } @@ -296,7 +308,7 @@ int ct_sip_parse_request(const struct nf_conn *ct, return 0; dptr += shift; - if (!parse_addr(ct, dptr, &end, addr, limit)) + if (!sip_parse_addr(ct, dptr, &end, addr, limit, true)) return -1; if (end < limit && *end == ':') { end++; @@ -550,7 +562,7 @@ int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr, if (ret == 0) return ret; - if (!parse_addr(ct, dptr + *matchoff, &c, addr, limit)) + if (!sip_parse_addr(ct, dptr + *matchoff, &c, addr, limit, true)) return -1; if (*c == ':') { c++; @@ -599,7 +611,7 @@ int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr, unsigned int dataoff, unsigned int datalen, const char *name, unsigned int *matchoff, unsigned int *matchlen, - union nf_inet_addr *addr) + union nf_inet_addr *addr, bool delim) { const char *limit = dptr + datalen; const char *start, *end; @@ -613,7 +625,7 @@ int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr, return 0; start += strlen(name); - if (!parse_addr(ct, start, &end, addr, limit)) + if (!sip_parse_addr(ct, start, &end, addr, limit, delim)) return 0; *matchoff = start - dptr; *matchlen = end - start; @@ -675,6 +687,47 @@ static int ct_sip_parse_transport(struct nf_conn *ct, const char *dptr, return 1; } +static int sdp_parse_addr(const struct nf_conn *ct, const char *cp, + const char **endp, union nf_inet_addr *addr, + const char *limit) +{ + const char *end; + int ret; + + memset(addr, 0, sizeof(*addr)); + switch (nf_ct_l3num(ct)) { + case AF_INET: + ret = in4_pton(cp, limit - cp, (u8 *)&addr->ip, -1, &end); + break; + case AF_INET6: + ret = in6_pton(cp, limit - cp, (u8 *)&addr->ip6, -1, &end); + break; + default: + BUG(); + } + + if (ret == 0) + return 0; + if (endp) + *endp = end; + return 1; +} + +/* skip ip address. returns its length. */ +static int sdp_addr_len(const struct nf_conn *ct, const char *dptr, + const char *limit, int *shift) +{ + union nf_inet_addr addr; + const char *aux = dptr; + + if (!sdp_parse_addr(ct, dptr, &dptr, &addr, limit)) { + pr_debug("ip: %s parse failed.!\n", dptr); + return 0; + } + + return dptr - aux; +} + /* SDP header parsing: a SDP session description contains an ordered set of * headers, starting with a section containing general session parameters, * optionally followed by multiple media descriptions. @@ -686,10 +739,10 @@ static int ct_sip_parse_transport(struct nf_conn *ct, const char *dptr, */ static const struct sip_header ct_sdp_hdrs[] = { [SDP_HDR_VERSION] = SDP_HDR("v=", NULL, digits_len), - [SDP_HDR_OWNER_IP4] = SDP_HDR("o=", "IN IP4 ", epaddr_len), - [SDP_HDR_CONNECTION_IP4] = SDP_HDR("c=", "IN IP4 ", epaddr_len), - [SDP_HDR_OWNER_IP6] = SDP_HDR("o=", "IN IP6 ", epaddr_len), - [SDP_HDR_CONNECTION_IP6] = SDP_HDR("c=", "IN IP6 ", epaddr_len), + [SDP_HDR_OWNER_IP4] = SDP_HDR("o=", "IN IP4 ", sdp_addr_len), + [SDP_HDR_CONNECTION_IP4] = SDP_HDR("c=", "IN IP4 ", sdp_addr_len), + [SDP_HDR_OWNER_IP6] = SDP_HDR("o=", "IN IP6 ", sdp_addr_len), + [SDP_HDR_CONNECTION_IP6] = SDP_HDR("c=", "IN IP6 ", sdp_addr_len), [SDP_HDR_MEDIA] = SDP_HDR("m=", NULL, media_len), }; @@ -775,8 +828,8 @@ static int ct_sip_parse_sdp_addr(const struct nf_conn *ct, const char *dptr, if (ret <= 0) return ret; - if (!parse_addr(ct, dptr + *matchoff, NULL, addr, - dptr + *matchoff + *matchlen)) + if (!sdp_parse_addr(ct, dptr + *matchoff, NULL, addr, + dptr + *matchoff + *matchlen)) return -1; return 1; } -- cgit v1.1 From f22eb25cf5b1157b29ef88c793b71972efc47143 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 9 Aug 2012 10:08:47 +0000 Subject: netfilter: nf_nat_sip: fix via header translation with multiple parameters Via-headers are parsed beginning at the first character after the Via-address. When the address is translated first and its length decreases, the offset to start parsing at is incorrect and header parameters might be missed. Update the offset after translating the Via-address to fix this. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_nat_sip.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index eef8f29..4ad9cf1 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -148,7 +148,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, hdr, NULL, &matchoff, &matchlen, &addr, &port) > 0) { - unsigned int matchend, poff, plen, buflen, n; + unsigned int olen, matchend, poff, plen, buflen, n; char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; /* We're only interested in headers related to this @@ -163,11 +163,12 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, goto next; } + olen = *datalen; if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, &addr, port)) return NF_DROP; - matchend = matchoff + matchlen; + matchend = matchoff + matchlen + *datalen - olen; /* The maddr= parameter (RFC 2361) specifies where to send * the reply. */ -- cgit v1.1 From 68e035c950dbceaf660144bf74054dfdfb6aad15 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 14 Aug 2012 12:47:37 +0200 Subject: netfilter: ctnetlink: fix missing locking while changing conntrack from nfqueue Since 9cb017665 netfilter: add glue code to integrate nfnetlink_queue and ctnetlink, we can modify the conntrack entry via nfnl_queue. However, the change of the conntrack entry via nfnetlink_queue requires appropriate locking to avoid concurrent updates. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 14f67a2..da4fc37 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1896,10 +1896,15 @@ static int ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct) { struct nlattr *cda[CTA_MAX+1]; + int ret; nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy); - return ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct); + spin_lock_bh(&nf_conntrack_lock); + ret = ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct); + spin_unlock_bh(&nf_conntrack_lock); + + return ret; } static struct nfq_ct_hook ctnetlink_nfqueue_hook = { -- cgit v1.1 From 47be03a28cc6c80e3aa2b3e8ed6d960ff0c5c0af Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Fri, 10 Aug 2012 01:24:37 +0000 Subject: netpoll: use GFP_ATOMIC in slave_enable_netpoll() and __netpoll_setup() slave_enable_netpoll() and __netpoll_setup() may be called with read_lock() held, so should use GFP_ATOMIC to allocate memory. Eric suggested to pass gfp flags to __netpoll_setup(). Cc: Eric Dumazet Cc: "David S. Miller" Reported-by: Dan Carpenter Signed-off-by: Eric Dumazet Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 7 ++++--- net/bridge/br_device.c | 12 ++++++------ net/bridge/br_if.c | 2 +- net/bridge/br_private.h | 4 ++-- net/core/netpoll.c | 8 ++++---- 5 files changed, 17 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 73a2a83..ee4ae09 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -669,19 +669,20 @@ static void vlan_dev_poll_controller(struct net_device *dev) return; } -static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo) +static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo, + gfp_t gfp) { struct vlan_dev_priv *info = vlan_dev_priv(dev); struct net_device *real_dev = info->real_dev; struct netpoll *netpoll; int err = 0; - netpoll = kzalloc(sizeof(*netpoll), GFP_KERNEL); + netpoll = kzalloc(sizeof(*netpoll), gfp); err = -ENOMEM; if (!netpoll) goto out; - err = __netpoll_setup(netpoll, real_dev); + err = __netpoll_setup(netpoll, real_dev, gfp); if (err) { kfree(netpoll); goto out; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 3334845..ed0e0f9 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -213,7 +213,8 @@ static void br_netpoll_cleanup(struct net_device *dev) } } -static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni) +static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni, + gfp_t gfp) { struct net_bridge *br = netdev_priv(dev); struct net_bridge_port *p, *n; @@ -222,8 +223,7 @@ static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni) list_for_each_entry_safe(p, n, &br->port_list, list) { if (!p->dev) continue; - - err = br_netpoll_enable(p); + err = br_netpoll_enable(p, gfp); if (err) goto fail; } @@ -236,17 +236,17 @@ fail: goto out; } -int br_netpoll_enable(struct net_bridge_port *p) +int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp) { struct netpoll *np; int err = 0; - np = kzalloc(sizeof(*p->np), GFP_KERNEL); + np = kzalloc(sizeof(*p->np), gfp); err = -ENOMEM; if (!np) goto out; - err = __netpoll_setup(np, p->dev); + err = __netpoll_setup(np, p->dev, gfp); if (err) { kfree(np); goto out; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index e1144e1..171fd6b 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -361,7 +361,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (err) goto err2; - if (br_netpoll_info(br) && ((err = br_netpoll_enable(p)))) + if (br_netpoll_info(br) && ((err = br_netpoll_enable(p, GFP_KERNEL)))) goto err3; err = netdev_set_master(dev, br->dev); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index a768b24..f507d2a 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -316,7 +316,7 @@ static inline void br_netpoll_send_skb(const struct net_bridge_port *p, netpoll_send_skb(np, skb); } -extern int br_netpoll_enable(struct net_bridge_port *p); +extern int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp); extern void br_netpoll_disable(struct net_bridge_port *p); #else static inline struct netpoll_info *br_netpoll_info(struct net_bridge *br) @@ -329,7 +329,7 @@ static inline void br_netpoll_send_skb(const struct net_bridge_port *p, { } -static inline int br_netpoll_enable(struct net_bridge_port *p) +static inline int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp) { return 0; } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index b4c90e4..37cc854 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -715,7 +715,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt) } EXPORT_SYMBOL(netpoll_parse_options); -int __netpoll_setup(struct netpoll *np, struct net_device *ndev) +int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) { struct netpoll_info *npinfo; const struct net_device_ops *ops; @@ -734,7 +734,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) } if (!ndev->npinfo) { - npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL); + npinfo = kmalloc(sizeof(*npinfo), gfp); if (!npinfo) { err = -ENOMEM; goto out; @@ -752,7 +752,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) ops = np->dev->netdev_ops; if (ops->ndo_netpoll_setup) { - err = ops->ndo_netpoll_setup(ndev, npinfo); + err = ops->ndo_netpoll_setup(ndev, npinfo, gfp); if (err) goto free_npinfo; } @@ -857,7 +857,7 @@ int netpoll_setup(struct netpoll *np) refill_skbs(); rtnl_lock(); - err = __netpoll_setup(np, ndev); + err = __netpoll_setup(np, ndev, GFP_KERNEL); rtnl_unlock(); if (err) -- cgit v1.1 From 38e6bc185d9544dfad1774b3f8902a0b061aea25 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Fri, 10 Aug 2012 01:24:38 +0000 Subject: netpoll: make __netpoll_cleanup non-block Like the previous patch, slave_disable_netpoll() and __netpoll_cleanup() may be called with read_lock() held too, so we should make them non-block, by moving the cleanup and kfree() to call_rcu_bh() callbacks. Cc: "David S. Miller" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 6 +----- net/bridge/br_device.c | 6 +----- net/core/netpoll.c | 42 ++++++++++++++++++++++++++++++++---------- 3 files changed, 34 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index ee4ae09..b65623f 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -704,11 +704,7 @@ static void vlan_dev_netpoll_cleanup(struct net_device *dev) info->netpoll = NULL; - /* Wait for transmitting packets to finish before freeing. */ - synchronize_rcu_bh(); - - __netpoll_cleanup(netpoll); - kfree(netpoll); + __netpoll_free_rcu(netpoll); } #endif /* CONFIG_NET_POLL_CONTROLLER */ diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index ed0e0f9..f41ba40 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -267,11 +267,7 @@ void br_netpoll_disable(struct net_bridge_port *p) p->np = NULL; - /* Wait for transmitting packets to finish before freeing. */ - synchronize_rcu_bh(); - - __netpoll_cleanup(np); - kfree(np); + __netpoll_free_rcu(np); } #endif diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 37cc854..dc17f1d 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -878,6 +878,24 @@ static int __init netpoll_init(void) } core_initcall(netpoll_init); +static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head) +{ + struct netpoll_info *npinfo = + container_of(rcu_head, struct netpoll_info, rcu); + + skb_queue_purge(&npinfo->arp_tx); + skb_queue_purge(&npinfo->txq); + + /* we can't call cancel_delayed_work_sync here, as we are in softirq */ + cancel_delayed_work(&npinfo->tx_work); + + /* clean after last, unfinished work */ + __skb_queue_purge(&npinfo->txq); + /* now cancel it again */ + cancel_delayed_work(&npinfo->tx_work); + kfree(npinfo); +} + void __netpoll_cleanup(struct netpoll *np) { struct netpoll_info *npinfo; @@ -903,20 +921,24 @@ void __netpoll_cleanup(struct netpoll *np) ops->ndo_netpoll_cleanup(np->dev); RCU_INIT_POINTER(np->dev->npinfo, NULL); + call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info); + } +} +EXPORT_SYMBOL_GPL(__netpoll_cleanup); - /* avoid racing with NAPI reading npinfo */ - synchronize_rcu_bh(); +static void rcu_cleanup_netpoll(struct rcu_head *rcu_head) +{ + struct netpoll *np = container_of(rcu_head, struct netpoll, rcu); - skb_queue_purge(&npinfo->arp_tx); - skb_queue_purge(&npinfo->txq); - cancel_delayed_work_sync(&npinfo->tx_work); + __netpoll_cleanup(np); + kfree(np); +} - /* clean after last, unfinished work */ - __skb_queue_purge(&npinfo->txq); - kfree(npinfo); - } +void __netpoll_free_rcu(struct netpoll *np) +{ + call_rcu_bh(&np->rcu, rcu_cleanup_netpoll); } -EXPORT_SYMBOL_GPL(__netpoll_cleanup); +EXPORT_SYMBOL_GPL(__netpoll_free_rcu); void netpoll_cleanup(struct netpoll *np) { -- cgit v1.1 From 57c5d46191e75312934c00eba65b13a31ca95120 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Fri, 10 Aug 2012 01:24:40 +0000 Subject: netpoll: take rcu_read_lock_bh() in netpoll_rx() In __netpoll_rx(), it dereferences ->npinfo without rcu_dereference_bh(), this patch fixes it by using the 'npinfo' passed from netpoll_rx() where it is already dereferenced with rcu_dereference_bh(). Cc: "David S. Miller" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/core/netpoll.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index dc17f1d..d055bb0 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -543,13 +543,12 @@ static void arp_reply(struct sk_buff *skb) spin_unlock_irqrestore(&npinfo->rx_lock, flags); } -int __netpoll_rx(struct sk_buff *skb) +int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) { int proto, len, ulen; int hits = 0; const struct iphdr *iph; struct udphdr *uh; - struct netpoll_info *npinfo = skb->dev->npinfo; struct netpoll *np, *tmp; if (list_empty(&npinfo->rx_np)) -- cgit v1.1 From 2899656b494dcd118123af1126826b115c8ea6f9 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Fri, 10 Aug 2012 01:24:42 +0000 Subject: netpoll: take rcu_read_lock_bh() in netpoll_send_skb_on_dev() This patch fixes several problems in the call path of netpoll_send_skb_on_dev(): 1. Disable IRQ's before calling netpoll_send_skb_on_dev(). 2. All the callees of netpoll_send_skb_on_dev() should use rcu_dereference_bh() to dereference ->npinfo. 3. Rename arp_reply() to netpoll_arp_reply(), the former is too generic. Cc: "David S. Miller" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/core/netpoll.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index d055bb0..174346a 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -54,7 +54,7 @@ static atomic_t trapped; MAX_UDP_CHUNK) static void zap_completion_queue(void); -static void arp_reply(struct sk_buff *skb); +static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo); static unsigned int carrier_timeout = 4; module_param(carrier_timeout, uint, 0644); @@ -170,7 +170,8 @@ static void poll_napi(struct net_device *dev) list_for_each_entry(napi, &dev->napi_list, dev_list) { if (napi->poll_owner != smp_processor_id() && spin_trylock(&napi->poll_lock)) { - budget = poll_one_napi(dev->npinfo, napi, budget); + budget = poll_one_napi(rcu_dereference_bh(dev->npinfo), + napi, budget); spin_unlock(&napi->poll_lock); if (!budget) @@ -185,13 +186,14 @@ static void service_arp_queue(struct netpoll_info *npi) struct sk_buff *skb; while ((skb = skb_dequeue(&npi->arp_tx))) - arp_reply(skb); + netpoll_arp_reply(skb, npi); } } static void netpoll_poll_dev(struct net_device *dev) { const struct net_device_ops *ops; + struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo); if (!dev || !netif_running(dev)) return; @@ -206,17 +208,18 @@ static void netpoll_poll_dev(struct net_device *dev) poll_napi(dev); if (dev->flags & IFF_SLAVE) { - if (dev->npinfo) { + if (ni) { struct net_device *bond_dev = dev->master; struct sk_buff *skb; - while ((skb = skb_dequeue(&dev->npinfo->arp_tx))) { + struct netpoll_info *bond_ni = rcu_dereference_bh(bond_dev->npinfo); + while ((skb = skb_dequeue(&ni->arp_tx))) { skb->dev = bond_dev; - skb_queue_tail(&bond_dev->npinfo->arp_tx, skb); + skb_queue_tail(&bond_ni->arp_tx, skb); } } } - service_arp_queue(dev->npinfo); + service_arp_queue(ni); zap_completion_queue(); } @@ -302,6 +305,7 @@ static int netpoll_owner_active(struct net_device *dev) return 0; } +/* call with IRQ disabled */ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, struct net_device *dev) { @@ -309,8 +313,11 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, unsigned long tries; const struct net_device_ops *ops = dev->netdev_ops; /* It is up to the caller to keep npinfo alive. */ - struct netpoll_info *npinfo = np->dev->npinfo; + struct netpoll_info *npinfo; + + WARN_ON_ONCE(!irqs_disabled()); + npinfo = rcu_dereference_bh(np->dev->npinfo); if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { __kfree_skb(skb); return; @@ -319,11 +326,9 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, /* don't get messages out of order, and no recursion */ if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) { struct netdev_queue *txq; - unsigned long flags; txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); - local_irq_save(flags); /* try until next clock tick */ for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; tries > 0; --tries) { @@ -347,10 +352,9 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, } WARN_ONCE(!irqs_disabled(), - "netpoll_send_skb(): %s enabled interrupts in poll (%pF)\n", + "netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pF)\n", dev->name, ops->ndo_start_xmit); - local_irq_restore(flags); } if (status != NETDEV_TX_OK) { @@ -423,9 +427,8 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) } EXPORT_SYMBOL(netpoll_send_udp); -static void arp_reply(struct sk_buff *skb) +static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo) { - struct netpoll_info *npinfo = skb->dev->npinfo; struct arphdr *arp; unsigned char *arp_ptr; int size, type = ARPOP_REPLY, ptype = ETH_P_ARP; -- cgit v1.1 From d30362c0712eb567334b3b66de7c40d4372f2c6f Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Fri, 10 Aug 2012 01:24:43 +0000 Subject: bridge: add some comments for NETDEV_RELEASE Add comments on why we don't notify NETDEV_RELEASE. Cc: David Miller Cc: Stephen Hemminger Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/bridge/br_if.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 171fd6b..1c8fdc3 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -427,6 +427,10 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) if (!p || p->br != br) return -EINVAL; + /* Since more than one interface can be attached to a bridge, + * there still maybe an alternate path for netconsole to use; + * therefore there is no reason for a NETDEV_RELEASE event. + */ del_nbp(p); spin_lock_bh(&br->lock); -- cgit v1.1 From 4e3828c4bfd90b00a951cad7c8da27d1966beefe Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Fri, 10 Aug 2012 01:24:44 +0000 Subject: bridge: use list_for_each_entry() in netpoll functions We don't delete 'p' from the list in the loop, so we can just use list_for_each_entry(). Cc: David Miller Cc: Stephen Hemminger Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/bridge/br_device.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index f41ba40..32211fa 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -206,21 +206,20 @@ static void br_poll_controller(struct net_device *br_dev) static void br_netpoll_cleanup(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); - struct net_bridge_port *p, *n; + struct net_bridge_port *p; - list_for_each_entry_safe(p, n, &br->port_list, list) { + list_for_each_entry(p, &br->port_list, list) br_netpoll_disable(p); - } } static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni, gfp_t gfp) { struct net_bridge *br = netdev_priv(dev); - struct net_bridge_port *p, *n; + struct net_bridge_port *p; int err = 0; - list_for_each_entry_safe(p, n, &br->port_list, list) { + list_for_each_entry(p, &br->port_list, list) { if (!p->dev) continue; err = br_netpoll_enable(p, gfp); -- cgit v1.1 From e15c3c2294605f09f9b336b2f3b97086ab4b8145 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Fri, 10 Aug 2012 01:24:45 +0000 Subject: netpoll: check netpoll tx status on the right device Although this doesn't matter actually, because netpoll_tx_running() doesn't use the parameter, the code will be more readable. For team_dev_queue_xmit() we have to move it down to avoid compile errors. Cc: David Miller Signed-off-by: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/bridge/br_forward.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index e9466d4..02015a5 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -65,7 +65,7 @@ static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) { skb->dev = to->dev; - if (unlikely(netpoll_tx_running(to->dev))) { + if (unlikely(netpoll_tx_running(to->br->dev))) { if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) kfree_skb(skb); else { -- cgit v1.1 From f3da38932b46d1bf171634cc7aae3da405eaf7a6 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Fri, 10 Aug 2012 01:24:47 +0000 Subject: vlan: clean up some variable names To be consistent, s/info/vlan/. Cc: Benjamin LaHaise Cc: Patrick McHardy Cc: David Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index b65623f..0347d48 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -672,8 +672,8 @@ static void vlan_dev_poll_controller(struct net_device *dev) static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo, gfp_t gfp) { - struct vlan_dev_priv *info = vlan_dev_priv(dev); - struct net_device *real_dev = info->real_dev; + struct vlan_dev_priv *vlan = vlan_dev_priv(dev); + struct net_device *real_dev = vlan->real_dev; struct netpoll *netpoll; int err = 0; @@ -688,7 +688,7 @@ static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *n goto out; } - info->netpoll = netpoll; + vlan->netpoll = netpoll; out: return err; @@ -696,13 +696,13 @@ out: static void vlan_dev_netpoll_cleanup(struct net_device *dev) { - struct vlan_dev_priv *info = vlan_dev_priv(dev); - struct netpoll *netpoll = info->netpoll; + struct vlan_dev_priv *vlan= vlan_dev_priv(dev); + struct netpoll *netpoll = vlan->netpoll; if (!netpoll) return; - info->netpoll = NULL; + vlan->netpoll = NULL; __netpoll_free_rcu(netpoll); } -- cgit v1.1 From 6eacf8ad8d01c49b95b994b0bf379db2b5b29460 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Fri, 10 Aug 2012 01:24:48 +0000 Subject: vlan: clean up vlan_dev_hard_start_xmit() Clean up vlan_dev_hard_start_xmit() function. Cc: Benjamin LaHaise Cc: Patrick McHardy Cc: David Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 0347d48..4024424 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -137,9 +137,21 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, return rc; } +static inline netdev_tx_t vlan_netpoll_send_skb(struct vlan_dev_priv *vlan, struct sk_buff *skb) +{ +#ifdef CONFIG_NET_POLL_CONTROLLER + if (vlan->netpoll) + netpoll_send_skb(vlan->netpoll, skb); +#else + BUG(); +#endif + return NETDEV_TX_OK; +} + static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) { + struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data); unsigned int len; int ret; @@ -150,29 +162,30 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs... */ if (veth->h_vlan_proto != htons(ETH_P_8021Q) || - vlan_dev_priv(dev)->flags & VLAN_FLAG_REORDER_HDR) { + vlan->flags & VLAN_FLAG_REORDER_HDR) { u16 vlan_tci; - vlan_tci = vlan_dev_priv(dev)->vlan_id; + vlan_tci = vlan->vlan_id; vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb); skb = __vlan_hwaccel_put_tag(skb, vlan_tci); } - skb->dev = vlan_dev_priv(dev)->real_dev; + skb->dev = vlan->real_dev; len = skb->len; - if (netpoll_tx_running(dev)) - return skb->dev->netdev_ops->ndo_start_xmit(skb, skb->dev); + if (unlikely(netpoll_tx_running(dev))) + return vlan_netpoll_send_skb(vlan, skb); + ret = dev_queue_xmit(skb); if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { struct vlan_pcpu_stats *stats; - stats = this_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats); + stats = this_cpu_ptr(vlan->vlan_pcpu_stats); u64_stats_update_begin(&stats->syncp); stats->tx_packets++; stats->tx_bytes += len; u64_stats_update_end(&stats->syncp); } else { - this_cpu_inc(vlan_dev_priv(dev)->vlan_pcpu_stats->tx_dropped); + this_cpu_inc(vlan->vlan_pcpu_stats->tx_dropped); } return ret; -- cgit v1.1 From 689971b44613883ee74ae9c1b31a864aaa3a8e17 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Fri, 10 Aug 2012 01:24:49 +0000 Subject: netpoll: handle vlan tags in netpoll tx and rx path Without this patch, I can't get netconsole logs remotely over vlan. The reason is probably we don't handle vlan tags in either netpoll tx or rx path. I am not sure if I use these vlan functions correctly, at least this patch works. Cc: Benjamin LaHaise Cc: Patrick McHardy Cc: David Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/core/netpoll.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 174346a..e4ba3e7 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -334,6 +335,14 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, tries > 0; --tries) { if (__netif_tx_trylock(txq)) { if (!netif_xmit_stopped(txq)) { + if (vlan_tx_tag_present(skb) && + !(netif_skb_features(skb) & NETIF_F_HW_VLAN_TX)) { + skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); + if (unlikely(!skb)) + break; + skb->vlan_tci = 0; + } + status = ops->ndo_start_xmit(skb, dev); if (status == NETDEV_TX_OK) txq_trans_update(txq); @@ -567,6 +576,12 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) return 1; } + if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { + skb = vlan_untag(skb); + if (unlikely(!skb)) + goto out; + } + proto = ntohs(eth_hdr(skb)->h_proto); if (proto != ETH_P_IP) goto out; -- cgit v1.1 From 6bdb7fe31046ac50b47e83c35cd6c6b6160a475d Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Fri, 10 Aug 2012 01:24:50 +0000 Subject: netpoll: re-enable irq in poll_napi() napi->poll() needs IRQ enabled, so we have to re-enable IRQ before calling it. Cc: David Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/core/netpoll.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index e4ba3e7..346b1eb 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -168,16 +168,24 @@ static void poll_napi(struct net_device *dev) struct napi_struct *napi; int budget = 16; + WARN_ON_ONCE(!irqs_disabled()); + list_for_each_entry(napi, &dev->napi_list, dev_list) { + local_irq_enable(); if (napi->poll_owner != smp_processor_id() && spin_trylock(&napi->poll_lock)) { + rcu_read_lock_bh(); budget = poll_one_napi(rcu_dereference_bh(dev->npinfo), napi, budget); + rcu_read_unlock_bh(); spin_unlock(&napi->poll_lock); - if (!budget) + if (!budget) { + local_irq_disable(); break; + } } + local_irq_disable(); } } -- cgit v1.1 From 7bd86cc282a458b66c41e3f6676de6656c99b8db Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Sun, 12 Aug 2012 20:09:59 +0000 Subject: ipv4: Cache local output routes Commit caacf05e5ad1abf causes big drop of UDP loop back performance. The cause of the regression is that we do not cache the local output routes. Each time we send a datagram from unconnected UDP socket, the kernel allocates a dst_entry and adds it to the rt_uncached_list. It creates lock contention on the rt_uncached_lock. Reported-by: Alex Shi Signed-off-by: Yan, Zheng Signed-off-by: David S. Miller --- net/ipv4/route.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e4ba974..fd9ecb5 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2028,7 +2028,6 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) } dev_out = net->loopback_dev; fl4->flowi4_oif = dev_out->ifindex; - res.fi = NULL; flags |= RTCF_LOCAL; goto make_route; } -- cgit v1.1 From 4855d6f3116e891b66198838b683dce3dcf6e874 Mon Sep 17 00:00:00 2001 From: Igor Maravic Date: Sun, 12 Aug 2012 22:31:58 +0000 Subject: net: ipv6: proc: Fix error handling Fix error handling in case making of dir dev_snmp6 failes Signed-off-by: Igor Maravic Signed-off-by: David S. Miller --- net/ipv6/proc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index da2e92d..745a320 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -307,10 +307,10 @@ static int __net_init ipv6_proc_init_net(struct net *net) goto proc_dev_snmp6_fail; return 0; +proc_dev_snmp6_fail: + proc_net_remove(net, "snmp6"); proc_snmp6_fail: proc_net_remove(net, "sockstat6"); -proc_dev_snmp6_fail: - proc_net_remove(net, "dev_snmp6"); return -ENOMEM; } -- cgit v1.1 From 6024935f5ff5f1646bce8404416318e5fd4a0c4a Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 13 Aug 2012 02:49:59 +0000 Subject: llc2: Fix silent failure of llc_station_init() llc_station_init() creates and processes an event skb with no effect other than to change the state from DOWN to UP. Allocation failure is reported, but then ignored by its caller, llc2_init(). Remove this possibility by simply initialising the state as UP. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/llc/llc_station.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index 6828e39..45ddbb9 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -687,12 +687,8 @@ static void llc_station_rcv(struct sk_buff *skb) llc_station_state_process(skb); } -int __init llc_station_init(void) +void __init llc_station_init(void) { - int rc = -ENOBUFS; - struct sk_buff *skb; - struct llc_station_state_ev *ev; - skb_queue_head_init(&llc_main_station.mac_pdu_q); skb_queue_head_init(&llc_main_station.ev_q.list); spin_lock_init(&llc_main_station.ev_q.lock); @@ -700,20 +696,9 @@ int __init llc_station_init(void) (unsigned long)&llc_main_station); llc_main_station.ack_timer.expires = jiffies + sysctl_llc_station_ack_timeout; - skb = alloc_skb(0, GFP_ATOMIC); - if (!skb) - goto out; - rc = 0; llc_set_station_handler(llc_station_rcv); - ev = llc_station_ev(skb); - memset(ev, 0, sizeof(*ev)); llc_main_station.maximum_retry = 1; - llc_main_station.state = LLC_STATION_STATE_DOWN; - ev->type = LLC_STATION_EV_TYPE_SIMPLE; - ev->prim_type = LLC_STATION_EV_ENABLE_WITHOUT_DUP_ADDR_CHECK; - rc = llc_station_next_state(skb); -out: - return rc; + llc_main_station.state = LLC_STATION_STATE_UP; } void __exit llc_station_exit(void) -- cgit v1.1 From f4f8720febf0d785a054fc09bde5e3ad09728a58 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 13 Aug 2012 02:50:43 +0000 Subject: llc2: Call llc_station_exit() on llc2_init() failure path Otherwise the station packet handler will remain registered even though the module is unloaded. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/llc/af_llc.c | 5 +++-- net/llc/llc_station.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index f6fe4d4..8c2919c 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -1206,7 +1206,7 @@ static int __init llc2_init(void) rc = llc_proc_init(); if (rc != 0) { printk(llc_proc_err_msg); - goto out_unregister_llc_proto; + goto out_station; } rc = llc_sysctl_init(); if (rc) { @@ -1226,7 +1226,8 @@ out_sysctl: llc_sysctl_exit(); out_proc: llc_proc_exit(); -out_unregister_llc_proto: +out_station: + llc_station_exit(); proto_unregister(&llc_proto); goto out; } diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index 45ddbb9..bba5184 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -701,7 +701,7 @@ void __init llc_station_init(void) llc_main_station.state = LLC_STATION_STATE_UP; } -void __exit llc_station_exit(void) +void llc_station_exit(void) { llc_set_station_handler(NULL); } -- cgit v1.1 From aadf31de16a7b2878af00a02e6557df84efa784b Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 13 Aug 2012 02:50:55 +0000 Subject: llc: Fix races between llc2 handler use and (un)registration When registering the handlers, any state they rely on must be completely initialised first. When unregistering, we must wait until they are definitely no longer running. llc_rcv() must also avoid reading the handler pointers again after checking for NULL. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/llc/llc_input.c | 21 +++++++++++++++++---- net/llc/llc_station.c | 2 +- 2 files changed, 18 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index e32cab4..dd3e833 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -42,6 +42,7 @@ static void (*llc_type_handlers[2])(struct llc_sap *sap, void llc_add_pack(int type, void (*handler)(struct llc_sap *sap, struct sk_buff *skb)) { + smp_wmb(); /* ensure initialisation is complete before it's called */ if (type == LLC_DEST_SAP || type == LLC_DEST_CONN) llc_type_handlers[type - 1] = handler; } @@ -50,11 +51,19 @@ void llc_remove_pack(int type) { if (type == LLC_DEST_SAP || type == LLC_DEST_CONN) llc_type_handlers[type - 1] = NULL; + synchronize_net(); } void llc_set_station_handler(void (*handler)(struct sk_buff *skb)) { + /* Ensure initialisation is complete before it's called */ + if (handler) + smp_wmb(); + llc_station_handler = handler; + + if (!handler) + synchronize_net(); } /** @@ -150,6 +159,8 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, int dest; int (*rcv)(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); + void (*sta_handler)(struct sk_buff *skb); + void (*sap_handler)(struct llc_sap *sap, struct sk_buff *skb); if (!net_eq(dev_net(dev), &init_net)) goto drop; @@ -182,7 +193,8 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, */ rcv = rcu_dereference(sap->rcv_func); dest = llc_pdu_type(skb); - if (unlikely(!dest || !llc_type_handlers[dest - 1])) { + sap_handler = dest ? ACCESS_ONCE(llc_type_handlers[dest - 1]) : NULL; + if (unlikely(!sap_handler)) { if (rcv) rcv(skb, dev, pt, orig_dev); else @@ -193,7 +205,7 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, if (cskb) rcv(cskb, dev, pt, orig_dev); } - llc_type_handlers[dest - 1](sap, skb); + sap_handler(sap, skb); } llc_sap_put(sap); out: @@ -202,9 +214,10 @@ drop: kfree_skb(skb); goto out; handle_station: - if (!llc_station_handler) + sta_handler = ACCESS_ONCE(llc_station_handler); + if (!sta_handler) goto drop; - llc_station_handler(skb); + sta_handler(skb); goto out; } diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index bba5184..b2f2bac 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -696,9 +696,9 @@ void __init llc_station_init(void) (unsigned long)&llc_main_station); llc_main_station.ack_timer.expires = jiffies + sysctl_llc_station_ack_timeout; - llc_set_station_handler(llc_station_rcv); llc_main_station.maximum_retry = 1; llc_main_station.state = LLC_STATION_STATE_UP; + llc_set_station_handler(llc_station_rcv); } void llc_station_exit(void) -- cgit v1.1 From 4acd4945cd1e1f92b20d14e349c6c6a52acbd42d Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 14 Aug 2012 08:54:51 +0000 Subject: ipv6: addrconf: Avoid calling netdevice notifiers with RCU read-side lock Cong Wang reports that lockdep detected suspicious RCU usage while enabling IPV6 forwarding: [ 1123.310275] =============================== [ 1123.442202] [ INFO: suspicious RCU usage. ] [ 1123.558207] 3.6.0-rc1+ #109 Not tainted [ 1123.665204] ------------------------------- [ 1123.768254] include/linux/rcupdate.h:430 Illegal context switch in RCU read-side critical section! [ 1123.992320] [ 1123.992320] other info that might help us debug this: [ 1123.992320] [ 1124.307382] [ 1124.307382] rcu_scheduler_active = 1, debug_locks = 0 [ 1124.522220] 2 locks held by sysctl/5710: [ 1124.648364] #0: (rtnl_mutex){+.+.+.}, at: [] rtnl_trylock+0x15/0x17 [ 1124.882211] #1: (rcu_read_lock){.+.+.+}, at: [] rcu_lock_acquire+0x0/0x29 [ 1125.085209] [ 1125.085209] stack backtrace: [ 1125.332213] Pid: 5710, comm: sysctl Not tainted 3.6.0-rc1+ #109 [ 1125.441291] Call Trace: [ 1125.545281] [] lockdep_rcu_suspicious+0x109/0x112 [ 1125.667212] [] rcu_preempt_sleep_check+0x45/0x47 [ 1125.781838] [] __might_sleep+0x1e/0x19b [...] [ 1127.445223] [] call_netdevice_notifiers+0x4a/0x4f [...] [ 1127.772188] [] dev_disable_lro+0x32/0x6b [ 1127.885174] [] dev_forward_change+0x30/0xcb [ 1128.013214] [] addrconf_forward_change+0x85/0xc5 [...] addrconf_forward_change() uses RCU iteration over the netdev list, which is unnecessary since it already holds the RTNL lock. We also cannot reasonably require netdevice notifier functions not to sleep. Reported-by: Cong Wang Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 7918181..6bc85f7 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -494,8 +494,7 @@ static void addrconf_forward_change(struct net *net, __s32 newf) struct net_device *dev; struct inet6_dev *idev; - rcu_read_lock(); - for_each_netdev_rcu(net, dev) { + for_each_netdev(net, dev) { idev = __in6_dev_get(dev); if (idev) { int changed = (!idev->cnf.forwarding) ^ (!newf); @@ -504,7 +503,6 @@ static void addrconf_forward_change(struct net *net, __s32 newf) dev_forward_change(idev); } } - rcu_read_unlock(); } static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) -- cgit v1.1 From c03307eab68d583ea6db917681afa14ed1fb3b84 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 14 Aug 2012 08:19:33 -0700 Subject: bridge: fix rcu dereference outside of rcu_read_lock Alternative solution for problem found by Linux Driver Verification project (linuxtesting.org). As it noted in the comment before the br_handle_frame_finish function, this function should be called under rcu_read_lock. The problem callgraph: br_dev_xmit -> br_nf_pre_routing_finish_bridge_slow -> -> br_handle_frame_finish -> br_port_get_rcu -> rcu_dereference And in this case there is no read-lock section. Reported-by: Denis Efremov Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 32211fa..070e8a6 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -31,9 +31,11 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) struct net_bridge_mdb_entry *mdst; struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats); + rcu_read_lock(); #ifdef CONFIG_BRIDGE_NETFILTER if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) { br_nf_pre_routing_finish_bridge_slow(skb); + rcu_read_unlock(); return NETDEV_TX_OK; } #endif @@ -48,7 +50,6 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) skb_reset_mac_header(skb); skb_pull(skb, ETH_HLEN); - rcu_read_lock(); if (is_broadcast_ether_addr(dest)) br_flood_deliver(br, skb); else if (is_multicast_ether_addr(dest)) { -- cgit v1.1 From e862f1a9b7df4e8196ebec45ac62295138aa3fc2 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:44 +0000 Subject: atm: fix info leak in getsockopt(SO_ATMPVC) The ATM code fails to initialize the two padding bytes of struct sockaddr_atmpvc inserted for alignment. Add an explicit memset(0) before filling the structure to avoid the info leak. Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- net/atm/common.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/atm/common.c b/net/atm/common.c index b4b44db..0c0ad93 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -812,6 +812,7 @@ int vcc_getsockopt(struct socket *sock, int level, int optname, if (!vcc->dev || !test_bit(ATM_VF_ADDR, &vcc->flags)) return -ENOTCONN; + memset(&pvc, 0, sizeof(pvc)); pvc.sap_family = AF_ATMPVC; pvc.sap_addr.itf = vcc->dev->number; pvc.sap_addr.vpi = vcc->vpi; -- cgit v1.1 From 3c0c5cfdcd4d69ffc4b9c0907cec99039f30a50a Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:45 +0000 Subject: atm: fix info leak via getsockname() The ATM code fails to initialize the two padding bytes of struct sockaddr_atmpvc inserted for alignment. Add an explicit memset(0) before filling the structure to avoid the info leak. Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- net/atm/pvc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/atm/pvc.c b/net/atm/pvc.c index 3a73491..ae03240 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -95,6 +95,7 @@ static int pvc_getname(struct socket *sock, struct sockaddr *sockaddr, return -ENOTCONN; *sockaddr_len = sizeof(struct sockaddr_atmpvc); addr = (struct sockaddr_atmpvc *)sockaddr; + memset(addr, 0, sizeof(*addr)); addr->sap_family = AF_ATMPVC; addr->sap_addr.itf = vcc->dev->number; addr->sap_addr.vpi = vcc->vpi; -- cgit v1.1 From e15ca9a0ef9a86f0477530b0f44a725d67f889ee Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:46 +0000 Subject: Bluetooth: HCI - Fix info leak in getsockopt(HCI_FILTER) The HCI code fails to initialize the two padding bytes of struct hci_ufilter before copying it to userland -- that for leaking two bytes kernel stack. Add an explicit memset(0) before filling the structure to avoid the info leak. Signed-off-by: Mathias Krause Cc: Marcel Holtmann Cc: Gustavo Padovan Cc: Johan Hedberg Signed-off-by: David S. Miller --- net/bluetooth/hci_sock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index a7f04de..a27bbc3 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1009,6 +1009,7 @@ static int hci_sock_getsockopt(struct socket *sock, int level, int optname, { struct hci_filter *f = &hci_pi(sk)->filter; + memset(&uf, 0, sizeof(uf)); uf.type_mask = f->type_mask; uf.opcode = f->opcode; uf.event_mask[0] = *((u32 *) f->event_mask + 0); -- cgit v1.1 From 3f68ba07b1da811bf383b4b701b129bfcb2e4988 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:47 +0000 Subject: Bluetooth: HCI - Fix info leak via getsockname() The HCI code fails to initialize the hci_channel member of struct sockaddr_hci and that for leaks two bytes kernel stack via the getsockname() syscall. Initialize hci_channel with 0 to avoid the info leak. Signed-off-by: Mathias Krause Cc: Marcel Holtmann Cc: Gustavo Padovan Cc: Johan Hedberg Signed-off-by: David S. Miller --- net/bluetooth/hci_sock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index a27bbc3..19fdac7 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -694,6 +694,7 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr, *addr_len = sizeof(*haddr); haddr->hci_family = AF_BLUETOOTH; haddr->hci_dev = hdev->id; + haddr->hci_channel= 0; release_sock(sk); return 0; -- cgit v1.1 From 9ad2de43f1aee7e7274a4e0d41465489299e344b Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:48 +0000 Subject: Bluetooth: RFCOMM - Fix info leak in getsockopt(BT_SECURITY) The RFCOMM code fails to initialize the key_size member of struct bt_security before copying it to userland -- that for leaking one byte kernel stack. Initialize key_size with 0 to avoid the info leak. Signed-off-by: Mathias Krause Cc: Marcel Holtmann Cc: Gustavo Padovan Cc: Johan Hedberg Signed-off-by: David S. Miller --- net/bluetooth/rfcomm/sock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 7e1e596..64f55ca 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -822,6 +822,7 @@ static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, c } sec.level = rfcomm_pi(sk)->sec_level; + sec.key_size = 0; len = min_t(unsigned int, len, sizeof(sec)); if (copy_to_user(optval, (char *) &sec, len)) -- cgit v1.1 From f9432c5ec8b1e9a09b9b0e5569e3c73db8de432a Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:49 +0000 Subject: Bluetooth: RFCOMM - Fix info leak in ioctl(RFCOMMGETDEVLIST) The RFCOMM code fails to initialize the two padding bytes of struct rfcomm_dev_list_req inserted for alignment before copying it to userland. Additionally there are two padding bytes in each instance of struct rfcomm_dev_info. The ioctl() that for disclosures two bytes plus dev_num times two bytes uninitialized kernel heap memory. Allocate the memory using kzalloc() to fix this issue. Signed-off-by: Mathias Krause Cc: Marcel Holtmann Cc: Gustavo Padovan Cc: Johan Hedberg Signed-off-by: David S. Miller --- net/bluetooth/rfcomm/tty.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index cb96077..56f1823 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -456,7 +456,7 @@ static int rfcomm_get_dev_list(void __user *arg) size = sizeof(*dl) + dev_num * sizeof(*di); - dl = kmalloc(size, GFP_KERNEL); + dl = kzalloc(size, GFP_KERNEL); if (!dl) return -ENOMEM; -- cgit v1.1 From 9344a972961d1a6d2c04d9008b13617bcb6ec2ef Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:50 +0000 Subject: Bluetooth: RFCOMM - Fix info leak via getsockname() The RFCOMM code fails to initialize the trailing padding byte of struct sockaddr_rc added for alignment. It that for leaks one byte kernel stack via the getsockname() syscall. Add an explicit memset(0) before filling the structure to avoid the info leak. Signed-off-by: Mathias Krause Cc: Marcel Holtmann Cc: Gustavo Padovan Cc: Johan Hedberg Signed-off-by: David S. Miller --- net/bluetooth/rfcomm/sock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 64f55ca..1a17850 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -528,6 +528,7 @@ static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int * BT_DBG("sock %p, sk %p", sock, sk); + memset(sa, 0, sizeof(*sa)); sa->rc_family = AF_BLUETOOTH; sa->rc_channel = rfcomm_pi(sk)->channel; if (peer) -- cgit v1.1 From 792039c73cf176c8e39a6e8beef2c94ff46522ed Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:51 +0000 Subject: Bluetooth: L2CAP - Fix info leak via getsockname() The L2CAP code fails to initialize the l2_bdaddr_type member of struct sockaddr_l2 and the padding byte added for alignment. It that for leaks two bytes kernel stack via the getsockname() syscall. Add an explicit memset(0) before filling the structure to avoid the info leak. Signed-off-by: Mathias Krause Cc: Marcel Holtmann Cc: Gustavo Padovan Cc: Johan Hedberg Signed-off-by: David S. Miller --- net/bluetooth/l2cap_sock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index b94abd3..1497edd 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -245,6 +245,7 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *l BT_DBG("sock %p, sk %p", sock, sk); + memset(la, 0, sizeof(struct sockaddr_l2)); addr->sa_family = AF_BLUETOOTH; *len = sizeof(struct sockaddr_l2); -- cgit v1.1 From 04d4fbca1017c11381e7d82acea21dd741e748bc Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:52 +0000 Subject: l2tp: fix info leak via getsockname() The L2TP code for IPv6 fails to initialize the l2tp_unused member of struct sockaddr_l2tpip6 and that for leaks two bytes kernel stack via the getsockname() syscall. Initialize l2tp_unused with 0 to avoid the info leak. Signed-off-by: Mathias Krause Cc: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip6.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 35e1e4b..9275471 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -410,6 +410,7 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr, lsa->l2tp_family = AF_INET6; lsa->l2tp_flowinfo = 0; lsa->l2tp_scope_id = 0; + lsa->l2tp_unused = 0; if (peer) { if (!lsk->peer_conn_id) return -ENOTCONN; -- cgit v1.1 From 3592aaeb80290bda0f2cf0b5456c97bfc638b192 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:53 +0000 Subject: llc: fix info leak via getsockname() The LLC code wrongly returns 0, i.e. "success", when the socket is zapped. Together with the uninitialized uaddrlen pointer argument from sys_getsockname this leads to an arbitrary memory leak of up to 128 bytes kernel stack via the getsockname() syscall. Return an error instead when the socket is zapped to prevent the info leak. Also remove the unnecessary memset(0). We don't directly write to the memory pointed by uaddr but memcpy() a local structure at the end of the function that is properly initialized. Signed-off-by: Mathias Krause Cc: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/llc/af_llc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 8c2919c..c219000 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -969,14 +969,13 @@ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr, struct sockaddr_llc sllc; struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); - int rc = 0; + int rc = -EBADF; memset(&sllc, 0, sizeof(sllc)); lock_sock(sk); if (sock_flag(sk, SOCK_ZAPPED)) goto out; *uaddrlen = sizeof(sllc); - memset(uaddr, 0, *uaddrlen); if (peer) { rc = -ENOTCONN; if (sk->sk_state != TCP_ESTABLISHED) -- cgit v1.1 From 276bdb82dedb290511467a5a4fdbe9f0b52dce6f Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:54 +0000 Subject: dccp: check ccid before dereferencing ccid_hc_rx_getsockopt() and ccid_hc_tx_getsockopt() might be called with a NULL ccid pointer leading to a NULL pointer dereference. This could lead to a privilege escalation if the attacker is able to map page 0 and prepare it with a fake ccid_ops pointer. Signed-off-by: Mathias Krause Cc: Gerrit Renker Cc: stable@vger.kernel.org Signed-off-by: David S. Miller --- net/dccp/ccid.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 75c3582..fb85d37 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -246,7 +246,7 @@ static inline int ccid_hc_rx_getsockopt(struct ccid *ccid, struct sock *sk, u32 __user *optval, int __user *optlen) { int rc = -ENOPROTOOPT; - if (ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL) + if (ccid != NULL && ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL) rc = ccid->ccid_ops->ccid_hc_rx_getsockopt(sk, optname, len, optval, optlen); return rc; @@ -257,7 +257,7 @@ static inline int ccid_hc_tx_getsockopt(struct ccid *ccid, struct sock *sk, u32 __user *optval, int __user *optlen) { int rc = -ENOPROTOOPT; - if (ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL) + if (ccid != NULL && ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL) rc = ccid->ccid_ops->ccid_hc_tx_getsockopt(sk, optname, len, optval, optlen); return rc; -- cgit v1.1 From 7b07f8eb75aa3097cdfd4f6eac3da49db787381d Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:55 +0000 Subject: dccp: fix info leak via getsockopt(DCCP_SOCKOPT_CCID_TX_INFO) The CCID3 code fails to initialize the trailing padding bytes of struct tfrc_tx_info added for alignment on 64 bit architectures. It that for potentially leaks four bytes kernel stack via the getsockopt() syscall. Add an explicit memset(0) before filling the structure to avoid the info leak. Signed-off-by: Mathias Krause Cc: Gerrit Renker Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index d65e987..119c043 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -535,6 +535,7 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, case DCCP_SOCKOPT_CCID_TX_INFO: if (len < sizeof(tfrc)) return -EINVAL; + memset(&tfrc, 0, sizeof(tfrc)); tfrc.tfrctx_x = hc->tx_x; tfrc.tfrctx_x_recv = hc->tx_x_recv; tfrc.tfrctx_x_calc = hc->tx_x_calc; -- cgit v1.1 From 2d8a041b7bfe1097af21441cb77d6af95f4f4680 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:56 +0000 Subject: ipvs: fix info leak in getsockopt(IP_VS_SO_GET_TIMEOUT) If at least one of CONFIG_IP_VS_PROTO_TCP or CONFIG_IP_VS_PROTO_UDP is not set, __ip_vs_get_timeouts() does not fully initialize the structure that gets copied to userland and that for leaks up to 12 bytes of kernel stack. Add an explicit memset(0) before passing the structure to __ip_vs_get_timeouts() to avoid the info leak. Signed-off-by: Mathias Krause Cc: Wensong Zhang Cc: Simon Horman Cc: Julian Anastasov Signed-off-by: David S. Miller --- net/netfilter/ipvs/ip_vs_ctl.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 84444dd..72bf32a 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2759,6 +2759,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) { struct ip_vs_timeout_user t; + memset(&t, 0, sizeof(t)); __ip_vs_get_timeouts(net, &t); if (copy_to_user(user, &t, sizeof(t)) != 0) ret = -EFAULT; -- cgit v1.1 From 43da5f2e0d0c69ded3d51907d9552310a6b545e8 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Aug 2012 11:31:57 +0000 Subject: net: fix info leak in compat dev_ifconf() The implementation of dev_ifconf() for the compat ioctl interface uses an intermediate ifc structure allocated in userland for the duration of the syscall. Though, it fails to initialize the padding bytes inserted for alignment and that for leaks four bytes of kernel stack. Add an explicit memset(0) before filling the structure to avoid the info leak. Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- net/socket.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index dfe5b66..a5471f8 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2657,6 +2657,7 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf))) return -EFAULT; + memset(&ifc, 0, sizeof(ifc)); if (ifc32.ifcbuf == 0) { ifc32.ifc_len = 0; ifc.ifc_len = 0; -- cgit v1.1 From 2614f86490122bf51eb7c12ec73927f1900f4e7d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 16 Aug 2012 02:25:24 +0200 Subject: netfilter: nf_ct_expect: fix possible access to uninitialized timer In __nf_ct_expect_check, the function refresh_timer returns 1 if a matching expectation is found and its timer is successfully refreshed. This results in nf_ct_expect_related returning 0. Note that at this point: - the passed expectation is not inserted in the expectation table and its timer was not initialized, since we have refreshed one matching/existing expectation. - nf_ct_expect_alloc uses kmem_cache_alloc, so the expectation timer is in some undefined state just after the allocation, until it is appropriately initialized. This can be a problem for the SIP helper during the expectation addition: ... if (nf_ct_expect_related(rtp_exp) == 0) { if (nf_ct_expect_related(rtcp_exp) != 0) nf_ct_unexpect_related(rtp_exp); ... Note that nf_ct_expect_related(rtp_exp) may return 0 for the timer refresh case that is detailed above. Then, if nf_ct_unexpect_related(rtcp_exp) returns != 0, nf_ct_unexpect_related(rtp_exp) is called, which does: spin_lock_bh(&nf_conntrack_lock); if (del_timer(&exp->timeout)) { nf_ct_unlink_expect(exp); nf_ct_expect_put(exp); } spin_unlock_bh(&nf_conntrack_lock); Note that del_timer always returns false if the timer has been initialized. However, the timer was not initialized since setup_timer was not called, therefore, the expectation timer remains in some undefined state. If I'm not missing anything, this may lead to the removal an unexistent expectation. To fix this, the optimization that allows refreshing an expectation is removed. Now nf_conntrack_expect_related looks more consistent to me since it always add the expectation in case that it returns success. Thanks to Patrick McHardy for participating in the discussion of this patch. I think this may be the source of the problem described by: http://marc.info/?l=netfilter-devel&m=134073514719421&w=2 Reported-by: Rafal Fitt Acked-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_expect.c | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 45cf602..527651a 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -361,23 +361,6 @@ static void evict_oldest_expect(struct nf_conn *master, } } -static inline int refresh_timer(struct nf_conntrack_expect *i) -{ - struct nf_conn_help *master_help = nfct_help(i->master); - const struct nf_conntrack_expect_policy *p; - - if (!del_timer(&i->timeout)) - return 0; - - p = &rcu_dereference_protected( - master_help->helper, - lockdep_is_held(&nf_conntrack_lock) - )->expect_policy[i->class]; - i->timeout.expires = jiffies + p->timeout * HZ; - add_timer(&i->timeout); - return 1; -} - static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) { const struct nf_conntrack_expect_policy *p; @@ -386,7 +369,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) struct nf_conn_help *master_help = nfct_help(master); struct nf_conntrack_helper *helper; struct net *net = nf_ct_exp_net(expect); - struct hlist_node *n; + struct hlist_node *n, *next; unsigned int h; int ret = 1; @@ -395,12 +378,12 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) goto out; } h = nf_ct_expect_dst_hash(&expect->tuple); - hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) { + hlist_for_each_entry_safe(i, n, next, &net->ct.expect_hash[h], hnode) { if (expect_matches(i, expect)) { - /* Refresh timer: if it's dying, ignore.. */ - if (refresh_timer(i)) { - ret = 0; - goto out; + if (del_timer(&i->timeout)) { + nf_ct_unlink_expect(i); + nf_ct_expect_put(i); + break; } } else if (expect_clash(i, expect)) { ret = -EBUSY; -- cgit v1.1 From 16c0b164bd24d44db137693a36b428ba28970c62 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 15 Aug 2012 20:44:27 +0000 Subject: act_mirred: do not drop packets when fails to mirror it We drop packet unconditionally when we fail to mirror it. This is not intended in some cases. Consdier for kvm guest, we may mirror the traffic of the bridge to a tap device used by a VM. When kernel fails to mirror the packet in conditions such as when qemu crashes or stop polling the tap, it's hard for the management software to detect such condition and clean the the mirroring before. This would lead all packets to the bridge to be dropped and break the netowrk of other virtual machines. To solve the issue, the patch does not drop packets when kernel fails to mirror it, and only drop the redirected packets. Signed-off-by: Jason Wang Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_mirred.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index fe81cc1..9c0fd0c 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -200,13 +200,12 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, out: if (err) { m->tcf_qstats.overlimits++; - /* should we be asking for packet to be dropped? - * may make sense for redirect case only - */ - retval = TC_ACT_SHOT; - } else { + if (m->tcfm_eaction != TCA_EGRESS_MIRROR) + retval = TC_ACT_SHOT; + else + retval = m->tcf_action; + } else retval = m->tcf_action; - } spin_unlock(&m->tcf_lock); return retval; -- cgit v1.1 From f796c20cf67aa54c9130d0dc41307c0025719b85 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 14 Aug 2012 12:34:24 +0000 Subject: net: netprio: fix files lock and remove useless d_path bits Add lock to prevent a race with a file closing and also remove useless and ugly sscanf code. The extra code was never needed and the case it supposedly protected against is in fact handled correctly by sock_from_file as pointed out by Al Viro. CC: Neil Horman Reported-by: Al Viro Signed-off-by: John Fastabend Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/core/netprio_cgroup.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index ed0c043..f65dba3 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -277,12 +277,6 @@ out_free_devname: void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) { struct task_struct *p; - char *tmp = kzalloc(sizeof(char) * PATH_MAX, GFP_KERNEL); - - if (!tmp) { - pr_warn("Unable to attach cgrp due to alloc failure!\n"); - return; - } cgroup_taskset_for_each(p, cgrp, tset) { unsigned int fd; @@ -296,32 +290,24 @@ void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) continue; } - rcu_read_lock(); + spin_lock(&files->file_lock); fdt = files_fdtable(files); for (fd = 0; fd < fdt->max_fds; fd++) { - char *path; struct file *file; struct socket *sock; - unsigned long s; - int rv, err = 0; + int err; file = fcheck_files(files, fd); if (!file) continue; - path = d_path(&file->f_path, tmp, PAGE_SIZE); - rv = sscanf(path, "socket:[%lu]", &s); - if (rv <= 0) - continue; - sock = sock_from_file(file, &err); - if (!err) + if (sock) sock_update_netprioidx(sock->sk, p); } - rcu_read_unlock(); + spin_unlock(&files->file_lock); task_unlock(p); } - kfree(tmp); } static struct cftype ss_files[] = { -- cgit v1.1 From 48a87cc26c13b68f6cce4e9d769fcb17a6b3e4b8 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 14 Aug 2012 12:34:30 +0000 Subject: net: netprio: fd passed in SCM_RIGHTS datagram not set correctly A socket fd passed in a SCM_RIGHTS datagram was not getting updated with the new tasks cgrp prioidx. This leaves IO on the socket tagged with the old tasks priority. To fix this add a check in the scm recvmsg path to update the sock cgrp prioidx with the new tasks value. Thanks to Al Viro for catching this. CC: Neil Horman Reported-by: Al Viro Signed-off-by: John Fastabend Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/core/scm.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/core/scm.c b/net/core/scm.c index 8f6ccfd..040cebe 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -265,6 +265,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) for (i=0, cmfptr=(__force int __user *)CMSG_DATA(cm); isk, current); fd_install(new_fd, fp[i]); } -- cgit v1.1 From 476ad154f3b41dd7d9a08a2f641e28388abc2fd1 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 14 Aug 2012 12:34:35 +0000 Subject: net: netprio: fix cgrp create and write priomap race A race exists where creating cgroups and also updating the priomap may result in losing a priomap update. This is because priomap writers are not protected by rtnl_lock. Move priority writer into rtnl_lock()/rtnl_unlock(). CC: Neil Horman Reported-by: Al Viro Signed-off-by: John Fastabend Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/core/netprio_cgroup.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index f65dba3..c75e3f9 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -101,12 +101,10 @@ static int write_update_netdev_table(struct net_device *dev) u32 max_len; struct netprio_map *map; - rtnl_lock(); max_len = atomic_read(&max_prioidx) + 1; map = rtnl_dereference(dev->priomap); if (!map || map->priomap_len < max_len) ret = extend_netdev_table(dev, max_len); - rtnl_unlock(); return ret; } @@ -256,17 +254,17 @@ static int write_priomap(struct cgroup *cgrp, struct cftype *cft, if (!dev) goto out_free_devname; + rtnl_lock(); ret = write_update_netdev_table(dev); if (ret < 0) goto out_put_dev; - rcu_read_lock(); - map = rcu_dereference(dev->priomap); + map = rtnl_dereference(dev->priomap); if (map) map->priomap[prioidx] = priority; - rcu_read_unlock(); out_put_dev: + rtnl_unlock(); dev_put(dev); out_free_devname: -- cgit v1.1 From c0de08d04215031d68fa13af36f347a6cfa252ca Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Thu, 16 Aug 2012 22:02:58 +0000 Subject: af_packet: don't emit packet on orig fanout group If a packet is emitted on one socket in one group of fanout sockets, it is transmitted again. It is thus read again on one of the sockets of the fanout group. This result in a loop for software which generate packets when receiving one. This retransmission is not the intended behavior: a fanout group must behave like a single socket. The packet should not be transmitted on a socket if it originates from a socket belonging to the same fanout group. This patch fixes the issue by changing the transmission check to take fanout group info account. Reported-by: Aleksandr Kotov Signed-off-by: Eric Leblond Signed-off-by: David S. Miller --- net/core/dev.c | 16 ++++++++++++++-- net/packet/af_packet.c | 9 +++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index a39354e..debd937 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1642,6 +1642,19 @@ static inline int deliver_skb(struct sk_buff *skb, return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } +static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb) +{ + if (ptype->af_packet_priv == NULL) + return false; + + if (ptype->id_match) + return ptype->id_match(ptype, skb->sk); + else if ((struct sock *)ptype->af_packet_priv == skb->sk) + return true; + + return false; +} + /* * Support routine. Sends outgoing frames to any network * taps currently in use. @@ -1659,8 +1672,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) * they originated from - MvS (miquels@drinkel.ow.org) */ if ((ptype->dev == dev || !ptype->dev) && - (ptype->af_packet_priv == NULL || - (struct sock *)ptype->af_packet_priv != skb->sk)) { + (!skb_loop_sk(ptype, skb))) { if (pt_prev) { deliver_skb(skb2, pt_prev, skb->dev); pt_prev = ptype; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8ac890a..aee7196 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1273,6 +1273,14 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po) spin_unlock(&f->lock); } +bool match_fanout_group(struct packet_type *ptype, struct sock * sk) +{ + if (ptype->af_packet_priv == (void*)((struct packet_sock *)sk)->fanout) + return true; + + return false; +} + static int fanout_add(struct sock *sk, u16 id, u16 type_flags) { struct packet_sock *po = pkt_sk(sk); @@ -1325,6 +1333,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) match->prot_hook.dev = po->prot_hook.dev; match->prot_hook.func = packet_rcv_fanout; match->prot_hook.af_packet_priv = match; + match->prot_hook.id_match = match_fanout_group; dev_add_pack(&match->prot_hook); list_add(&match->list, &fanout_list); } -- cgit v1.1 From d92c7f8aabae913de16eb855b19cd2002c341896 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Fri, 17 Aug 2012 10:33:12 +0000 Subject: caif: Do not dereference NULL in chnl_recv_cb() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In net/caif/chnl_net.c::chnl_recv_cb() we call skb_header_pointer() which may return NULL, but we do not check for a NULL pointer before dereferencing it. This patch adds such a NULL check and properly free's allocated memory and return an error (-EINVAL) on failure - much better than crashing.. Signed-off-by: Jesper Juhl Acked-by: Sjur Brændeland Signed-off-by: David S. Miller --- net/caif/chnl_net.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index 69771c0..e597733 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -94,6 +94,10 @@ static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt) /* check the version of IP */ ip_version = skb_header_pointer(skb, 0, 1, &buf); + if (!ip_version) { + kfree_skb(skb); + return -EINVAL; + } switch (*ip_version >> 4) { case 4: -- cgit v1.1 From 9d7b0fc1ef1f17aff57c0dc9a59453d8fca255c3 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 20 Aug 2012 02:56:56 -0700 Subject: net: ipv6: fix oops in inet_putpeer() Commit 97bab73f (inet: Hide route peer accesses behind helpers.) introduced a bug in xfrm6_policy_destroy(). The xfrm_dst's _rt6i_peer member is not initialized, causing a false positive result from inetpeer_ptr_is_peer(), which in turn causes a NULL pointer dereference in inet_putpeer(). Pid: 314, comm: kworker/0:1 Not tainted 3.6.0-rc1+ #17 To Be Filled By O.E.M. To Be Filled By O.E.M./P4S800D-X EIP: 0060:[] EFLAGS: 00010246 CPU: 0 EIP is at inet_putpeer+0xe/0x16 EAX: 00000000 EBX: f3481700 ECX: 00000000 EDX: 000dd641 ESI: f3481700 EDI: c05e949c EBP: f551def4 ESP: f551def4 DS: 007b ES: 007b FS: 0000 GS: 00e0 SS: 0068 CR0: 8005003b CR2: 00000070 CR3: 3243d000 CR4: 00000750 DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000 DR6: ffff0ff0 DR7: 00000400 f551df04 c0423de1 00000000 f3481700 f551df18 c038d5f7 f254b9f8 f551df28 f34f85d8 f551df20 c03ef48d f551df3c c0396870 f30697e8 f24e1738 c05e98f4 f5509540 c05cd2b4 f551df7c c0142d2b c043feb5 f5509540 00000000 c05cd2e8 [] xfrm6_dst_destroy+0x42/0xdb [] dst_destroy+0x1d/0xa4 [] xfrm_bundle_flo_delete+0x2b/0x36 [] flow_cache_gc_task+0x85/0x9f [] process_one_work+0x122/0x441 [] ? apic_timer_interrupt+0x31/0x38 [] ? flow_cache_new_hashrnd+0x2b/0x2b [] worker_thread+0x113/0x3cc Fix by adding a init_dst() callback to struct xfrm_policy_afinfo to properly initialize the dst's peer pointer. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/xfrm6_policy.c | 8 ++++++++ net/xfrm/xfrm_policy.c | 2 ++ 2 files changed, 10 insertions(+) (limited to 'net') diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index ef39812..f8c4c08 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -73,6 +73,13 @@ static int xfrm6_get_tos(const struct flowi *fl) return 0; } +static void xfrm6_init_dst(struct net *net, struct xfrm_dst *xdst) +{ + struct rt6_info *rt = (struct rt6_info *)xdst; + + rt6_init_peer(rt, net->ipv6.peers); +} + static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst, int nfheader_len) { @@ -286,6 +293,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .get_saddr = xfrm6_get_saddr, .decode_session = _decode_session6, .get_tos = xfrm6_get_tos, + .init_dst = xfrm6_init_dst, .init_path = xfrm6_init_path, .fill_dst = xfrm6_fill_dst, .blackhole_route = ip6_blackhole_route, diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index c5a5165..5a2aa17 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1357,6 +1357,8 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst)); xdst->flo.ops = &xfrm_bundle_fc_ops; + if (afinfo->init_dst) + afinfo->init_dst(net, xdst); } else xdst = ERR_PTR(-ENOBUFS); -- cgit v1.1 From 3de7a37b02f607716753dc669c1dca4f71db08fc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 18 Aug 2012 14:36:44 +0000 Subject: net/core/dev.c: fix kernel-doc warning Fix kernel-doc warning: Warning(net/core/dev.c:5745): No description found for parameter 'dev' Signed-off-by: Randy Dunlap Cc: "David S. Miller" Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index debd937..8398836 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5744,6 +5744,7 @@ EXPORT_SYMBOL(netdev_refcnt_read); /** * netdev_wait_allrefs - wait until all references are gone. + * @dev: target net_device * * This is called when unregistering network devices. * -- cgit v1.1 From fae6ef87faeb8853896920c68ee703d715799d28 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sun, 19 Aug 2012 03:30:38 +0000 Subject: net: tcp: move sk_rx_dst_set call after tcp_create_openreq_child() This commit removes the sk_rx_dst_set calls from tcp_create_openreq_child(), because at that point the icsk_af_ops field of ipv6_mapped TCP sockets has not been set to its proper final value. Instead, to make sure we get the right sk_rx_dst_set variant appropriate for the address family of the new connection, we have tcp_v{4,6}_syn_recv_sock() directly call the appropriate function shortly after the call to tcp_create_openreq_child() returns. This also moves inet6_sk_rx_dst_set() to avoid a forward declaration with the new approach. Signed-off-by: Neal Cardwell Reported-by: Artem Savkov Cc: Eric Dumazet Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_minisocks.c | 2 -- net/ipv6/tcp_ipv6.c | 25 +++++++++++++------------ 3 files changed, 14 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 7678237..5bf2040 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1462,6 +1462,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, goto exit_nonewsk; newsk->sk_gso_type = SKB_GSO_TCPV4; + inet_sk_rx_dst_set(newsk, skb); newtp = tcp_sk(newsk); newinet = inet_sk(newsk); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index d9c9dce..6ff7f10 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -387,8 +387,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct tcp_sock *oldtp = tcp_sk(sk); struct tcp_cookie_values *oldcvp = oldtp->cookie_values; - newicsk->icsk_af_ops->sk_rx_dst_set(newsk, skb); - /* TCP Cookie Transactions require space for the cookie pair, * as it differs for each connection. There is no need to * copy any s_data_payload stored at the original socket. diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index bb9ce2b..a3e60cc 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -94,6 +94,18 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk, } #endif +static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + const struct rt6_info *rt = (const struct rt6_info *)dst; + + dst_hold(dst); + sk->sk_rx_dst = dst; + inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; + if (rt->rt6i_node) + inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum; +} + static void tcp_v6_hash(struct sock *sk) { if (sk->sk_state != TCP_CLOSE) { @@ -1270,6 +1282,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newsk->sk_gso_type = SKB_GSO_TCPV6; __ip6_dst_store(newsk, dst, NULL, NULL); + inet6_sk_rx_dst_set(newsk, skb); newtcp6sk = (struct tcp6_sock *)newsk; inet_sk(newsk)->pinet6 = &newtcp6sk->inet6; @@ -1729,18 +1742,6 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_destructor= tcp_twsk_destructor, }; -static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) -{ - struct dst_entry *dst = skb_dst(skb); - const struct rt6_info *rt = (const struct rt6_info *)dst; - - dst_hold(dst); - sk->sk_rx_dst = dst; - inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; - if (rt->rt6i_node) - inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum; -} - static const struct inet_connection_sock_af_ops ipv6_specific = { .queue_xmit = inet6_csk_xmit, .send_check = tcp_v6_send_check, -- cgit v1.1 From 2dba62c30ec3040ef1b647a8976fd7e6854cc7a7 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 19 Aug 2012 10:16:08 +0000 Subject: netfilter: nfnetlink_log: fix NLA_PUT macro removal bug Commit 1db20a52 (nfnetlink_log: Stop using NLA_PUT*().) incorrectly converted a NLA_PUT_BE16 macro to nla_put_be32() in nfnetlink_log: - NLA_PUT_BE16(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)); + if (nla_put_be32(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)) Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 169ab59..592091c1 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -480,7 +480,7 @@ __build_packet_message(struct nfulnl_instance *inst, } if (indev && skb_mac_header_was_set(skb)) { - if (nla_put_be32(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)) || + if (nla_put_be16(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)) || nla_put_be16(inst->skb, NFULA_HWLEN, htons(skb->dev->hard_header_len)) || nla_put(inst->skb, NFULA_HWHEADER, skb->dev->hard_header_len, -- cgit v1.1 From d1c338a509cea5378df59629ad47382810c38623 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sun, 19 Aug 2012 12:29:16 -0700 Subject: libceph: delay debugfs initialization until we learn global_id The debugfs directory includes the cluster fsid and our unique global_id. We need to delay the initialization of the debug entry until we have learned both the fsid and our global_id from the monitor or else the second client can't create its debugfs entry and will fail (and multiple client instances aren't properly reflected in debugfs). Reported by: Yan, Zheng Signed-off-by: Sage Weil Reviewed-by: Yehuda Sadeh --- net/ceph/ceph_common.c | 1 - net/ceph/debugfs.c | 4 ++++ net/ceph/mon_client.c | 51 +++++++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 50 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 69e38db..a802029 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -84,7 +84,6 @@ int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid) return -1; } } else { - pr_info("client%lld fsid %pU\n", ceph_client_id(client), fsid); memcpy(&client->fsid, fsid, sizeof(*fsid)); } return 0; diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index 54b531a..38b5dc1 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -189,6 +189,9 @@ int ceph_debugfs_client_init(struct ceph_client *client) snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid, client->monc.auth->global_id); + dout("ceph_debugfs_client_init %p %s\n", client, name); + + BUG_ON(client->debugfs_dir); client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir); if (!client->debugfs_dir) goto out; @@ -234,6 +237,7 @@ out: void ceph_debugfs_client_cleanup(struct ceph_client *client) { + dout("ceph_debugfs_client_cleanup %p\n", client); debugfs_remove(client->debugfs_osdmap); debugfs_remove(client->debugfs_monmap); debugfs_remove(client->osdc.debugfs_file); diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 105d533..900ea0f 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -311,6 +311,17 @@ int ceph_monc_open_session(struct ceph_mon_client *monc) EXPORT_SYMBOL(ceph_monc_open_session); /* + * We require the fsid and global_id in order to initialize our + * debugfs dir. + */ +static bool have_debugfs_info(struct ceph_mon_client *monc) +{ + dout("have_debugfs_info fsid %d globalid %lld\n", + (int)monc->client->have_fsid, monc->auth->global_id); + return monc->client->have_fsid && monc->auth->global_id > 0; +} + +/* * The monitor responds with mount ack indicate mount success. The * included client ticket allows the client to talk to MDSs and OSDs. */ @@ -320,9 +331,12 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc, struct ceph_client *client = monc->client; struct ceph_monmap *monmap = NULL, *old = monc->monmap; void *p, *end; + int had_debugfs_info, init_debugfs = 0; mutex_lock(&monc->mutex); + had_debugfs_info = have_debugfs_info(monc); + dout("handle_monmap\n"); p = msg->front.iov_base; end = p + msg->front.iov_len; @@ -344,12 +358,22 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc, if (!client->have_fsid) { client->have_fsid = true; + if (!had_debugfs_info && have_debugfs_info(monc)) { + pr_info("client%lld fsid %pU\n", + ceph_client_id(monc->client), + &monc->client->fsid); + init_debugfs = 1; + } mutex_unlock(&monc->mutex); - /* - * do debugfs initialization without mutex to avoid - * creating a locking dependency - */ - ceph_debugfs_client_init(client); + + if (init_debugfs) { + /* + * do debugfs initialization without mutex to avoid + * creating a locking dependency + */ + ceph_debugfs_client_init(monc->client); + } + goto out_unlocked; } out: @@ -865,8 +889,10 @@ static void handle_auth_reply(struct ceph_mon_client *monc, { int ret; int was_auth = 0; + int had_debugfs_info, init_debugfs = 0; mutex_lock(&monc->mutex); + had_debugfs_info = have_debugfs_info(monc); if (monc->auth->ops) was_auth = monc->auth->ops->is_authenticated(monc->auth); monc->pending_auth = 0; @@ -889,7 +915,22 @@ static void handle_auth_reply(struct ceph_mon_client *monc, __send_subscribe(monc); __resend_generic_request(monc); } + + if (!had_debugfs_info && have_debugfs_info(monc)) { + pr_info("client%lld fsid %pU\n", + ceph_client_id(monc->client), + &monc->client->fsid); + init_debugfs = 1; + } mutex_unlock(&monc->mutex); + + if (init_debugfs) { + /* + * do debugfs initialization without mutex to avoid + * creating a locking dependency + */ + ceph_debugfs_client_init(monc->client); + } } static int __validate_auth(struct ceph_mon_client *monc) -- cgit v1.1 From be1e44441a560c43c136a562d49a1c9623c91197 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 9 Aug 2012 18:12:28 -0400 Subject: svcrpc: fix BUG() in svc_tcp_clear_pages Examination of svc_tcp_clear_pages shows that it assumes sk_tcplen is consistent with sk_pages[] (in particular, sk_pages[n] can't be NULL if sk_tcplen would lead us to expect n pages of data). svc_tcp_restore_pages zeroes out sk_pages[] while leaving sk_tcplen. This is OK, since both functions are serialized by XPT_BUSY. However, that means the inconsistency must be repaired before dropping XPT_BUSY. Therefore we should be ensuring that svc_tcp_save_pages repairs the problem before exiting svc_tcp_recv_record on error. Symptoms were a BUG() in svc_tcp_clear_pages. Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 18bc130..998aa8c 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1129,9 +1129,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) if (len >= 0) svsk->sk_tcplen += len; if (len != want) { + svc_tcp_save_pages(svsk, rqstp); if (len < 0 && len != -EAGAIN) goto err_other; - svc_tcp_save_pages(svsk, rqstp); dprintk("svc: incomplete TCP record (%d of %d)\n", svsk->sk_tcplen, svsk->sk_reclen); goto err_noclose; -- cgit v1.1 From f06f00a24d76e168ecb38d352126fd203937b601 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 20 Aug 2012 16:04:40 -0400 Subject: svcrpc: sends on closed socket should stop immediately svc_tcp_sendto sets XPT_CLOSE if we fail to transmit the entire reply. However, the XPT_CLOSE won't be acted on immediately. Meanwhile other threads could send further replies before the socket is really shut down. This can manifest as data corruption: for example, if a truncated read reply is followed by another rpc reply, that second reply will look to the client like further read data. Symptoms were data corruption preceded by svc_tcp_sendto logging something like kernel: rpc-srv/tcp: nfsd: sent only 963696 when sending 1048708 bytes - shutting down socket Cc: stable@vger.kernel.org Reported-by: Malahal Naineni Tested-by: Malahal Naineni Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 88f2bf6..0d693a8 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -794,7 +794,8 @@ int svc_send(struct svc_rqst *rqstp) /* Grab mutex to serialize outgoing data. */ mutex_lock(&xprt->xpt_mutex); - if (test_bit(XPT_DEAD, &xprt->xpt_flags)) + if (test_bit(XPT_DEAD, &xprt->xpt_flags) + || test_bit(XPT_CLOSE, &xprt->xpt_flags)) len = -ENOTCONN; else len = xprt->xpt_ops->xpo_sendto(rqstp); -- cgit v1.1 From d10f27a750312ed5638c876e4bd6aa83664cccd8 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 17 Aug 2012 17:31:53 -0400 Subject: svcrpc: fix svc_xprt_enqueue/svc_recv busy-looping The rpc server tries to ensure that there will be room to send a reply before it receives a request. It does this by tracking, in xpt_reserved, an upper bound on the total size of the replies that is has already committed to for the socket. Currently it is adding in the estimate for a new reply *before* it checks whether there is space available. If it finds that there is not space, it then subtracts the estimate back out. This may lead the subsequent svc_xprt_enqueue to decide that there is space after all. The results is a svc_recv() that will repeatedly return -EAGAIN, causing server threads to loop without doing any actual work. Cc: stable@vger.kernel.org Reported-by: Michael Tokarev Tested-by: Michael Tokarev Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 0d693a8..bac973a 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -316,7 +316,6 @@ static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt) */ void svc_xprt_enqueue(struct svc_xprt *xprt) { - struct svc_serv *serv = xprt->xpt_server; struct svc_pool *pool; struct svc_rqst *rqstp; int cpu; @@ -362,8 +361,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) rqstp, rqstp->rq_xprt); rqstp->rq_xprt = xprt; svc_xprt_get(xprt); - rqstp->rq_reserved = serv->sv_max_mesg; - atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); pool->sp_stats.threads_woken++; wake_up(&rqstp->rq_wait); } else { @@ -640,8 +637,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) if (xprt) { rqstp->rq_xprt = xprt; svc_xprt_get(xprt); - rqstp->rq_reserved = serv->sv_max_mesg; - atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); /* As there is a shortage of threads and this request * had to be queued, don't allow the thread to wait so @@ -738,6 +733,8 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) else len = xprt->xpt_ops->xpo_recvfrom(rqstp); dprintk("svc: got len=%d\n", len); + rqstp->rq_reserved = serv->sv_max_mesg; + atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); } svc_xprt_received(xprt); -- cgit v1.1 From 144d56e91044181ec0ef67aeca91e9a8b5718348 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 20 Aug 2012 00:22:46 +0000 Subject: tcp: fix possible socket refcount problem Commit 6f458dfb40 (tcp: improve latencies of timer triggered events) added bug leading to following trace : [ 2866.131281] IPv4: Attempt to release TCP socket in state 1 ffff880019ec0000 [ 2866.131726] [ 2866.132188] ========================= [ 2866.132281] [ BUG: held lock freed! ] [ 2866.132281] 3.6.0-rc1+ #622 Not tainted [ 2866.132281] ------------------------- [ 2866.132281] kworker/0:1/652 is freeing memory ffff880019ec0000-ffff880019ec0a1f, with a lock still held there! [ 2866.132281] (sk_lock-AF_INET-RPC){+.+...}, at: [] tcp_sendmsg+0x29/0xcc6 [ 2866.132281] 4 locks held by kworker/0:1/652: [ 2866.132281] #0: (rpciod){.+.+.+}, at: [] process_one_work+0x1de/0x47f [ 2866.132281] #1: ((&task->u.tk_work)){+.+.+.}, at: [] process_one_work+0x1de/0x47f [ 2866.132281] #2: (sk_lock-AF_INET-RPC){+.+...}, at: [] tcp_sendmsg+0x29/0xcc6 [ 2866.132281] #3: (&icsk->icsk_retransmit_timer){+.-...}, at: [] run_timer_softirq+0x1ad/0x35f [ 2866.132281] [ 2866.132281] stack backtrace: [ 2866.132281] Pid: 652, comm: kworker/0:1 Not tainted 3.6.0-rc1+ #622 [ 2866.132281] Call Trace: [ 2866.132281] [] debug_check_no_locks_freed+0x112/0x159 [ 2866.132281] [] ? __sk_free+0xfd/0x114 [ 2866.132281] [] kmem_cache_free+0x6b/0x13a [ 2866.132281] [] __sk_free+0xfd/0x114 [ 2866.132281] [] sk_free+0x1c/0x1e [ 2866.132281] [] tcp_write_timer+0x51/0x56 [ 2866.132281] [] run_timer_softirq+0x218/0x35f [ 2866.132281] [] ? run_timer_softirq+0x1ad/0x35f [ 2866.132281] [] ? rb_commit+0x58/0x85 [ 2866.132281] [] ? tcp_write_timer_handler+0x148/0x148 [ 2866.132281] [] __do_softirq+0xcb/0x1f9 [ 2866.132281] [] ? _raw_spin_unlock+0x29/0x2e [ 2866.132281] [] call_softirq+0x1c/0x30 [ 2866.132281] [] do_softirq+0x4a/0xa6 [ 2866.132281] [] irq_exit+0x51/0xad [ 2866.132281] [] do_IRQ+0x9d/0xb4 [ 2866.132281] [] common_interrupt+0x6f/0x6f [ 2866.132281] [] ? sched_clock_cpu+0x58/0xd1 [ 2866.132281] [] ? _raw_spin_unlock_irqrestore+0x4c/0x56 [ 2866.132281] [] mod_timer+0x178/0x1a9 [ 2866.132281] [] sk_reset_timer+0x19/0x26 [ 2866.132281] [] tcp_rearm_rto+0x99/0xa4 [ 2866.132281] [] tcp_event_new_data_sent+0x6e/0x70 [ 2866.132281] [] tcp_write_xmit+0x7de/0x8e4 [ 2866.132281] [] ? __alloc_skb+0xa0/0x1a1 [ 2866.132281] [] __tcp_push_pending_frames+0x2e/0x8a [ 2866.132281] [] tcp_sendmsg+0xb32/0xcc6 [ 2866.132281] [] inet_sendmsg+0xaa/0xd5 [ 2866.132281] [] ? inet_autobind+0x5f/0x5f [ 2866.132281] [] ? trace_clock_local+0x9/0xb [ 2866.132281] [] sock_sendmsg+0xa3/0xc4 [ 2866.132281] [] ? rb_reserve_next_event+0x26f/0x2d5 [ 2866.132281] [] ? native_sched_clock+0x29/0x6f [ 2866.132281] [] ? sched_clock+0x9/0xd [ 2866.132281] [] ? trace_clock_local+0x9/0xb [ 2866.132281] [] kernel_sendmsg+0x37/0x43 [ 2866.132281] [] xs_send_kvec+0x77/0x80 [ 2866.132281] [] xs_sendpages+0x6f/0x1a0 [ 2866.132281] [] ? try_to_del_timer_sync+0x55/0x61 [ 2866.132281] [] xs_tcp_send_request+0x55/0xf1 [ 2866.132281] [] xprt_transmit+0x89/0x1db [ 2866.132281] [] ? call_connect+0x3c/0x3c [ 2866.132281] [] call_transmit+0x1c5/0x20e [ 2866.132281] [] __rpc_execute+0x6f/0x225 [ 2866.132281] [] ? call_connect+0x3c/0x3c [ 2866.132281] [] rpc_async_schedule+0x28/0x34 [ 2866.132281] [] process_one_work+0x24d/0x47f [ 2866.132281] [] ? process_one_work+0x1de/0x47f [ 2866.132281] [] ? __rpc_execute+0x225/0x225 [ 2866.132281] [] worker_thread+0x236/0x317 [ 2866.132281] [] ? process_scheduled_works+0x2f/0x2f [ 2866.132281] [] kthread+0x9a/0xa2 [ 2866.132281] [] kernel_thread_helper+0x4/0x10 [ 2866.132281] [] ? retint_restore_args+0x13/0x13 [ 2866.132281] [] ? __init_kthread_worker+0x5a/0x5a [ 2866.132281] [] ? gs_change+0x13/0x13 [ 2866.308506] IPv4: Attempt to release TCP socket in state 1 ffff880019ec0000 [ 2866.309689] ============================================================================= [ 2866.310254] BUG TCP (Not tainted): Object already free [ 2866.310254] ----------------------------------------------------------------------------- [ 2866.310254] The bug comes from the fact that timer set in sk_reset_timer() can run before we actually do the sock_hold(). socket refcount reaches zero and we free the socket too soon. timer handler is not allowed to reduce socket refcnt if socket is owned by the user, or we need to change sk_reset_timer() implementation. We should take a reference on the socket in case TCP_DELACK_TIMER_DEFERRED or TCP_DELACK_TIMER_DEFERRED bit are set in tsq_flags Also fix a typo in tcp_delack_timer(), where TCP_WRITE_TIMER_DEFERRED was used instead of TCP_DELACK_TIMER_DEFERRED. For consistency, use same socket refcount change for TCP_MTU_REDUCED_DEFERRED, even if not fired from a timer. Reported-by: Fengguang Wu Tested-by: Fengguang Wu Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 8 +++++--- net/ipv4/tcp_output.c | 14 +++++++++----- net/ipv4/tcp_timer.c | 6 ++++-- 3 files changed, 18 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5bf2040..00a748d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -417,10 +417,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ tp->mtu_info = info; - if (!sock_owned_by_user(sk)) + if (!sock_owned_by_user(sk)) { tcp_v4_mtu_reduced(sk); - else - set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags); + } else { + if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags)) + sock_hold(sk); + } goto out; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 20dfd89..d046326 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -910,14 +910,18 @@ void tcp_release_cb(struct sock *sk) if (flags & (1UL << TCP_TSQ_DEFERRED)) tcp_tsq_handler(sk); - if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) + if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) { tcp_write_timer_handler(sk); - - if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) + __sock_put(sk); + } + if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) { tcp_delack_timer_handler(sk); - - if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) + __sock_put(sk); + } + if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) { sk->sk_prot->mtu_reduced(sk); + __sock_put(sk); + } } EXPORT_SYMBOL(tcp_release_cb); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 6df36ad..b774a03 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -252,7 +252,8 @@ static void tcp_delack_timer(unsigned long data) inet_csk(sk)->icsk_ack.blocked = 1; NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); /* deleguate our work to tcp_release_cb() */ - set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags); + if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags)) + sock_hold(sk); } bh_unlock_sock(sk); sock_put(sk); @@ -481,7 +482,8 @@ static void tcp_write_timer(unsigned long data) tcp_write_timer_handler(sk); } else { /* deleguate our work to tcp_release_cb() */ - set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags); + if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags)) + sock_hold(sk); } bh_unlock_sock(sk); sock_put(sk); -- cgit v1.1 From 1a7b27c97ce675b42eeb7bfaf6e15c34f35c8f95 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Mon, 20 Aug 2012 02:52:09 +0000 Subject: ipv4: Use newinet->inet_opt in inet_csk_route_child_sock() Since 0e734419923bd ("ipv4: Use inet_csk_route_child_sock() in DCCP and TCP."), inet_csk_route_child_sock() is called instead of inet_csk_route_req(). However, after creating the child-sock in tcp/dccp_v4_syn_recv_sock(), ireq->opt is set to NULL, before calling inet_csk_route_child_sock(). Thus, inside inet_csk_route_child_sock() opt is always NULL and the SRR-options are not respected anymore. Packets sent by the server won't have the correct destination-IP. This patch fixes it by accessing newinet->inet_opt instead of ireq->opt inside inet_csk_route_child_sock(). Reported-by: Luca Boccassi Signed-off-by: Christoph Paasch Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index db0cf17..7f75f21 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -404,12 +404,15 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, { const struct inet_request_sock *ireq = inet_rsk(req); struct inet_sock *newinet = inet_sk(newsk); - struct ip_options_rcu *opt = ireq->opt; + struct ip_options_rcu *opt; struct net *net = sock_net(sk); struct flowi4 *fl4; struct rtable *rt; fl4 = &newinet->cork.fl.u.ip4; + + rcu_read_lock(); + opt = rcu_dereference(newinet->inet_opt); flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), @@ -421,11 +424,13 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, goto no_route; if (opt && opt->opt.is_strictroute && rt->rt_gateway) goto route_err; + rcu_read_unlock(); return &rt->dst; route_err: ip_rt_put(rt); no_route: + rcu_read_unlock(); IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } -- cgit v1.1 From a9915a1b52df52ad87f3b33422da95cf25372f09 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 20 Aug 2012 07:26:45 +0000 Subject: ipv4: fix ip header ident selection in __ip_make_skb() Christian Casteyde reported a kmemcheck 32-bit read from uninitialized memory in __ip_select_ident(). It turns out that __ip_make_skb() called ip_select_ident() before properly initializing iph->daddr. This is a bug uncovered by commit 1d861aa4b3fb (inet: Minimize use of cached route inetpeer.) Addresses https://bugzilla.kernel.org/show_bug.cgi?id=46131 Reported-by: Christian Casteyde Signed-off-by: Eric Dumazet Cc: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 147ccc3..c196d74 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1338,10 +1338,10 @@ struct sk_buff *__ip_make_skb(struct sock *sk, iph->ihl = 5; iph->tos = inet->tos; iph->frag_off = df; - ip_select_ident(iph, &rt->dst, sk); iph->ttl = ttl; iph->protocol = sk->sk_protocol; ip_copy_addrs(iph, fl4); + ip_select_ident(iph, &rt->dst, sk); if (opt) { iph->ihl += opt->optlen>>2; -- cgit v1.1 From e0e3cea46d31d23dc40df0a49a7a2c04fe8edfea Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 21 Aug 2012 06:21:17 +0000 Subject: af_netlink: force credentials passing [CVE-2012-3520] Pablo Neira Ayuso discovered that avahi and potentially NetworkManager accept spoofed Netlink messages because of a kernel bug. The kernel passes all-zero SCM_CREDENTIALS ancillary data to the receiver if the sender did not provide such data, instead of not including any such data at all or including the correct data from the peer (as it is the case with AF_UNIX). This bug was introduced in commit 16e572626961 (af_unix: dont send SCM_CREDENTIALS by default) This patch forces passing credentials for netlink, as before the regression. Another fix would be to not add SCM_CREDENTIALS in netlink messages if not provided by the sender, but it might break some programs. With help from Florian Weimer & Petr Matousek This issue is designated as CVE-2012-3520 Signed-off-by: Eric Dumazet Cc: Petr Matousek Cc: Florian Weimer Cc: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 2 +- net/unix/af_unix.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 5463969..1445d73 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1362,7 +1362,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (NULL == siocb->scm) siocb->scm = &scm; - err = scm_send(sock, msg, siocb->scm); + err = scm_send(sock, msg, siocb->scm, true); if (err < 0) return err; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e4768c1..c5ee4ff 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1450,7 +1450,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, if (NULL == siocb->scm) siocb->scm = &tmp_scm; wait_for_unix_gc(); - err = scm_send(sock, msg, siocb->scm); + err = scm_send(sock, msg, siocb->scm, false); if (err < 0) return err; @@ -1619,7 +1619,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, if (NULL == siocb->scm) siocb->scm = &tmp_scm; wait_for_unix_gc(); - err = scm_send(sock, msg, siocb->scm); + err = scm_send(sock, msg, siocb->scm, false); if (err < 0) return err; -- cgit v1.1 From 6d4221b53707486dfad3f5bfe568d2ce7f4c9863 Mon Sep 17 00:00:00 2001 From: Jim Schutt Date: Fri, 10 Aug 2012 10:37:38 -0700 Subject: libceph: avoid truncation due to racing banners Because the Ceph client messenger uses a non-blocking connect, it is possible for the sending of the client banner to race with the arrival of the banner sent by the peer. When ceph_sock_state_change() notices the connect has completed, it schedules work to process the socket via con_work(). During this time the peer is writing its banner, and arrival of the peer banner races with con_work(). If con_work() calls try_read() before the peer banner arrives, there is nothing for it to do, after which con_work() calls try_write() to send the client's banner. In this case Ceph's protocol negotiation can complete succesfully. The server-side messenger immediately sends its banner and addresses after accepting a connect request, *before* actually attempting to read or verify the banner from the client. As a result, it is possible for the banner from the server to arrive before con_work() calls try_read(). If that happens, try_read() will read the banner and prepare protocol negotiation info via prepare_write_connect(). prepare_write_connect() calls con_out_kvec_reset(), which discards the as-yet-unsent client banner. Next, con_work() calls try_write(), which sends the protocol negotiation info rather than the banner that the peer is expecting. The result is that the peer sees an invalid banner, and the client reports "negotiation failed". Fix this by moving con_out_kvec_reset() out of prepare_write_connect() to its callers at all locations except the one where the banner might still need to be sent. [elder@inktak.com: added note about server-side behavior] Signed-off-by: Jim Schutt Reviewed-by: Alex Elder --- net/ceph/messenger.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index b979675..24c5eea 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -915,7 +915,6 @@ static int prepare_write_connect(struct ceph_connection *con) con->out_connect.authorizer_len = auth ? cpu_to_le32(auth->authorizer_buf_len) : 0; - con_out_kvec_reset(con); con_out_kvec_add(con, sizeof (con->out_connect), &con->out_connect); if (auth && auth->authorizer_buf_len) @@ -1557,6 +1556,7 @@ static int process_connect(struct ceph_connection *con) return -1; } con->auth_retry = 1; + con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) return ret; @@ -1577,6 +1577,7 @@ static int process_connect(struct ceph_connection *con) ENTITY_NAME(con->peer_name), ceph_pr_addr(&con->peer_addr.in_addr)); reset_connection(con); + con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) return ret; @@ -1601,6 +1602,7 @@ static int process_connect(struct ceph_connection *con) le32_to_cpu(con->out_connect.connect_seq), le32_to_cpu(con->in_reply.connect_seq)); con->connect_seq = le32_to_cpu(con->in_reply.connect_seq); + con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) return ret; @@ -1617,6 +1619,7 @@ static int process_connect(struct ceph_connection *con) le32_to_cpu(con->in_reply.global_seq)); get_global_seq(con->msgr, le32_to_cpu(con->in_reply.global_seq)); + con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) return ret; @@ -2135,7 +2138,11 @@ more: BUG_ON(con->state != CON_STATE_CONNECTING); con->state = CON_STATE_NEGOTIATING; - /* Banner is good, exchange connection info */ + /* + * Received banner is good, exchange connection info. + * Do not reset out_kvec, as sending our banner raced + * with receiving peer banner after connect completed. + */ ret = prepare_write_connect(con); if (ret < 0) goto out; -- cgit v1.1 From 27f011243a6e4e8b81078df1d83608dae31e3d38 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Mon, 20 Aug 2012 11:28:25 -0700 Subject: mac80211: fix DS to MBSS address translation The destination address of unicast frames forwarded through a mesh gate was being replaced with the broadcast address. Instead leave the original destination address as the mesh DA. If the nexthop address is not in the mpath table it will be resolved. If that fails, the frame will be forwarded to known mesh gates. Reported-by: Cedric Voncken Signed-off-by: Thomas Pedersen Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 38 ++++++++++++++++---------------------- 1 file changed, 16 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index acf712f..c5e8c9c 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1811,37 +1811,31 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr, sdata, NULL, NULL); } else { - int is_mesh_mcast = 1; - const u8 *mesh_da; + /* DS -> MBSS (802.11-2012 13.11.3.3). + * For unicast with unknown forwarding information, + * destination might be in the MBSS or if that fails + * forwarded to another mesh gate. In either case + * resolution will be handled in ieee80211_xmit(), so + * leave the original DA. This also works for mcast */ + const u8 *mesh_da = skb->data; + + if (mppath) + mesh_da = mppath->mpp; + else if (mpath) + mesh_da = mpath->dst; + rcu_read_unlock(); - if (is_multicast_ether_addr(skb->data)) - /* DA TA mSA AE:SA */ - mesh_da = skb->data; - else { - static const u8 bcast[ETH_ALEN] = - { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; - if (mppath) { - /* RA TA mDA mSA AE:DA SA */ - mesh_da = mppath->mpp; - is_mesh_mcast = 0; - } else if (mpath) { - mesh_da = mpath->dst; - is_mesh_mcast = 0; - } else { - /* DA TA mSA AE:SA */ - mesh_da = bcast; - } - } hdrlen = ieee80211_fill_mesh_addresses(&hdr, &fc, mesh_da, sdata->vif.addr); - rcu_read_unlock(); - if (is_mesh_mcast) + if (is_multicast_ether_addr(mesh_da)) + /* DA TA mSA AE:SA */ meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr, sdata, skb->data + ETH_ALEN, NULL); else + /* RA TA mDA mSA AE:DA SA */ meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr, sdata, -- cgit v1.1 From 9b04f350057863d1fad1ba071e09362a1da3503e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 21 Aug 2012 20:48:29 +0000 Subject: ipv4: properly update pmtu Sylvain Munault reported following info : - TCP connection get "stuck" with data in send queue when doing "large" transfers ( like typing 'ps ax' on a ssh connection ) - Only happens on path where the PMTU is lower than the MTU of the interface - Is not present right after boot, it only appears 10-20min after boot or so. (and that's inside the _same_ TCP connection, it works fine at first and then in the same ssh session, it'll get stuck) - Definitely seems related to fragments somehow since I see a router sending ICMP message saying fragmentation is needed. - Exact same setup works fine with kernel 3.5.1 Problem happens when the 10 minutes (ip_rt_mtu_expires) expiration period is over. ip_rt_update_pmtu() calls dst_set_expires() to rearm a new expiration, but dst_set_expires() does nothing because dst.expires is already set. It seems we want to set the expires field to a new value, regardless of prior one. With help from Julian Anastasov. Reported-by: Sylvain Munaut Signed-off-by: Eric Dumazet CC: Julian Anastasov Tested-by: Sylvain Munaut Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index fd9ecb5..8c8c748 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -956,7 +956,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, dst->obsolete = DST_OBSOLETE_KILL; } else { rt->rt_pmtu = mtu; - dst_set_expires(&rt->dst, ip_rt_mtu_expires); + rt->dst.expires = max(1UL, jiffies + ip_rt_mtu_expires); } } -- cgit v1.1 From a0dfb2634e5671770f598cda08002d8cda66ac77 Mon Sep 17 00:00:00 2001 From: Fengguang Wu Date: Thu, 23 Aug 2012 19:51:21 +0800 Subject: af_packet: match_fanout_group() can be static cc: Eric Leblond Signed-off-by: Fengguang Wu Signed-off-by: David S. Miller --- net/packet/af_packet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index aee7196..c5c9e2a 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1273,7 +1273,7 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po) spin_unlock(&f->lock); } -bool match_fanout_group(struct packet_type *ptype, struct sock * sk) +static bool match_fanout_group(struct packet_type *ptype, struct sock * sk) { if (ptype->af_packet_priv == (void*)((struct packet_sock *)sk)->fanout) return true; -- cgit v1.1 From 78df76a065ae3b5dbcb9a29912adc02f697de498 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 24 Aug 2012 05:40:47 +0000 Subject: ipv4: take rt_uncached_lock only if needed Multicast traffic allocates dst with DST_NOCACHE, but dst is not inserted into rt_uncached_list. This slowdown multicast workloads on SMP because rt_uncached_lock is contended. Change the test before taking the lock to actually check the dst was inserted into rt_uncached_list. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8c8c748..24fd4c5 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1263,7 +1263,7 @@ static void ipv4_dst_destroy(struct dst_entry *dst) { struct rtable *rt = (struct rtable *) dst; - if (dst->flags & DST_NOCACHE) { + if (!list_empty(&rt->rt_uncached)) { spin_lock_bh(&rt_uncached_lock); list_del(&rt->rt_uncached); spin_unlock_bh(&rt_uncached_lock); -- cgit v1.1 From 20e1db19db5d6b9e4e83021595eab0dc8f107bef Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 23 Aug 2012 02:09:11 +0000 Subject: netlink: fix possible spoofing from non-root processes Non-root user-space processes can send Netlink messages to other processes that are well-known for being subscribed to Netlink asynchronous notifications. This allows ilegitimate non-root process to send forged messages to Netlink subscribers. The userspace process usually verifies the legitimate origin in two ways: a) Socket credentials. If UID != 0, then the message comes from some ilegitimate process and the message needs to be dropped. b) Netlink portID. In general, portID == 0 means that the origin of the messages comes from the kernel. Thus, discarding any message not coming from the kernel. However, ctnetlink sets the portID in event messages that has been triggered by some user-space process, eg. conntrack utility. So other processes subscribed to ctnetlink events, eg. conntrackd, know that the event was triggered by some user-space action. Neither of the two ways to discard ilegitimate messages coming from non-root processes can help for ctnetlink. This patch adds capability validation in case that dst_pid is set in netlink_sendmsg(). This approach is aggressive since existing applications using any Netlink bus to deliver messages between two user-space processes will break. Note that the exception is NETLINK_USERSOCK, since it is reserved for netlink-to-netlink userspace communication. Still, if anyone wants that his Netlink bus allows netlink-to-netlink userspace, then they can set NL_NONROOT_SEND. However, by default, I don't think it makes sense to allow to use NETLINK_ROUTE to communicate two processes that are sending no matter what information that is not related to link/neighbouring/routing. They should be using NETLINK_USERSOCK instead for that. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 1445d73..5270238 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1373,7 +1373,8 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, dst_pid = addr->nl_pid; dst_group = ffs(addr->nl_groups); err = -EPERM; - if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND)) + if ((dst_group || dst_pid) && + !netlink_capable(sock, NL_NONROOT_SEND)) goto out; } else { dst_pid = nlk->dst_pid; @@ -2147,6 +2148,7 @@ static void __init netlink_add_usersock_entry(void) rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners); nl_table[NETLINK_USERSOCK].module = THIS_MODULE; nl_table[NETLINK_USERSOCK].registered = 1; + nl_table[NETLINK_USERSOCK].nl_nonroot = NL_NONROOT_SEND; netlink_table_ungrab(); } -- cgit v1.1 From 7c4a56fec379ac0d7754e0d4da6a7361f1a4fe64 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 23 Aug 2012 07:05:17 +0000 Subject: tcp: fix cwnd reduction for non-sack recovery The cwnd reduction in fast recovery is based on the number of packets newly delivered per ACK. For non-sack connections every DUPACK signifies a packet has been delivered, but the sender mistakenly skips counting them for cwnd reduction. The fix is to compute newly_acked_sacked after DUPACKs are accounted in sacked_out for non-sack connections. Signed-off-by: Yuchung Cheng Acked-by: Nandita Dukkipati Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 85308b9..6e38c6c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2926,13 +2926,14 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) * tcp_xmit_retransmit_queue(). */ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, - int newly_acked_sacked, bool is_dupack, + int prior_sacked, bool is_dupack, int flag) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && (tcp_fackets_out(tp) > tp->reordering)); + int newly_acked_sacked = 0; int fast_rexmit = 0; if (WARN_ON(!tp->packets_out && tp->sacked_out)) @@ -2992,6 +2993,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, tcp_add_reno_sack(sk); } else do_lost = tcp_try_undo_partial(sk, pkts_acked); + newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked; break; case TCP_CA_Loss: if (flag & FLAG_DATA_ACKED) @@ -3013,6 +3015,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, if (is_dupack) tcp_add_reno_sack(sk); } + newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked; if (icsk->icsk_ca_state <= TCP_CA_Disorder) tcp_try_undo_dsack(sk); @@ -3590,7 +3593,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) int prior_packets; int prior_sacked = tp->sacked_out; int pkts_acked = 0; - int newly_acked_sacked = 0; bool frto_cwnd = false; /* If the ack is older than previous acks @@ -3666,8 +3668,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); pkts_acked = prior_packets - tp->packets_out; - newly_acked_sacked = (prior_packets - prior_sacked) - - (tp->packets_out - tp->sacked_out); if (tp->frto_counter) frto_cwnd = tcp_process_frto(sk, flag); @@ -3681,7 +3681,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_may_raise_cwnd(sk, flag)) tcp_cong_avoid(sk, ack, prior_in_flight); is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); - tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, + tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, is_dupack, flag); } else { if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) @@ -3698,7 +3698,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) no_queue: /* If data was DSACKed, see if we can undo a cwnd reduction. */ if (flag & FLAG_DSACKING_ACK) - tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, + tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, is_dupack, flag); /* If this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than @@ -3718,8 +3718,7 @@ old_ack: */ if (TCP_SKB_CB(skb)->sacked) { flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); - newly_acked_sacked = tp->sacked_out - prior_sacked; - tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, + tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, is_dupack, flag); } -- cgit v1.1 From 072a9c48600409d72aeb0d5b29fbb75861a06631 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Fri, 24 Aug 2012 21:41:11 +0000 Subject: netpoll: revert 6bdb7fe3104 and fix be_poll() instead Against -net. In the patch "netpoll: re-enable irq in poll_napi()", I tried to fix the following warning: [100718.051041] ------------[ cut here ]------------ [100718.051048] WARNING: at kernel/softirq.c:159 local_bh_enable_ip+0x7d/0xb0() (Not tainted) [100718.051049] Hardware name: ProLiant BL460c G7 ... [100718.051068] Call Trace: [100718.051073] [] ? warn_slowpath_common+0x87/0xc0 [100718.051075] [] ? warn_slowpath_null+0x1a/0x20 [100718.051077] [] ? local_bh_enable_ip+0x7d/0xb0 [100718.051080] [] ? _spin_unlock_bh+0x1b/0x20 [100718.051085] [] ? be_process_mcc+0x74/0x230 [be2net] [100718.051088] [] ? be_poll_tx_mcc+0x16c/0x290 [be2net] [100718.051090] [] ? netpoll_poll_dev+0xd6/0x490 [100718.051095] [] ? bond_poll_controller+0x75/0x80 [bonding] [100718.051097] [] ? netpoll_poll_dev+0x45/0x490 [100718.051100] [] ? ksize+0x19/0x80 [100718.051102] [] ? netpoll_send_skb_on_dev+0x157/0x240 by reenabling IRQ before calling ->poll, but it seems more problems are introduced after that patch: http://ozlabs.org/~akpm/stuff/IMG_20120824_122054.jpg http://marc.info/?l=linux-netdev&m=134563282530588&w=2 So it is safe to fix be2net driver code directly. This patch reverts the offending commit and fixes be_poll() by avoid disabling BH there, this is okay because be_poll() can be called either by poll_napi() which already disables IRQ, or by net_rx_action() which already disables BH. Reported-by: Andrew Morton Reported-by: Sylvain Munaut Cc: Sylvain Munaut Cc: Andrew Morton Cc: David Miller Cc: Sathya Perla Cc: Subbu Seetharaman Cc: Ajit Khaparde Signed-off-by: Cong Wang Tested-by: Sylvain Munaut Signed-off-by: David S. Miller --- net/core/netpoll.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 346b1eb..e4ba3e7 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -168,24 +168,16 @@ static void poll_napi(struct net_device *dev) struct napi_struct *napi; int budget = 16; - WARN_ON_ONCE(!irqs_disabled()); - list_for_each_entry(napi, &dev->napi_list, dev_list) { - local_irq_enable(); if (napi->poll_owner != smp_processor_id() && spin_trylock(&napi->poll_lock)) { - rcu_read_lock_bh(); budget = poll_one_napi(rcu_dereference_bh(dev->npinfo), napi, budget); - rcu_read_unlock_bh(); spin_unlock(&napi->poll_lock); - if (!budget) { - local_irq_disable(); + if (!budget) break; - } } - local_irq_disable(); } } -- cgit v1.1 From 0a54e939d8c2647c711ef2369c3e73c3b7f3028c Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 29 Aug 2012 06:49:11 +0000 Subject: ipvs: fix error return code Initialize return variable before exiting on an error path. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // ( if@p1 (\(ret < 0\|ret != 0\)) { ... return ret; } | ret@p1 = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Julia Lawall Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_ctl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 72bf32a..f51013c 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1171,8 +1171,10 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, goto out_err; } svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); - if (!svc->stats.cpustats) + if (!svc->stats.cpustats) { + ret = -ENOMEM; goto out_err; + } /* I'm the first user of the service */ atomic_set(&svc->usecnt, 0); -- cgit v1.1 From ef6acf68c259d907517dcc0ffefcd4e30276ae29 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 29 Aug 2012 06:49:16 +0000 Subject: netfilter: ctnetlink: fix error return code in init path Initialize return variable before exiting on an error path. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // ( if@p1 (\(ret < 0\|ret != 0\)) { ... return ret; } | ret@p1 = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Julia Lawall Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index da4fc37..9807f32 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2790,7 +2790,8 @@ static int __init ctnetlink_init(void) goto err_unreg_subsys; } - if (register_pernet_subsys(&ctnetlink_net_ops)) { + ret = register_pernet_subsys(&ctnetlink_net_ops); + if (ret < 0) { pr_err("ctnetlink_init: cannot register pernet operations\n"); goto err_unreg_exp_subsys; } -- cgit v1.1 From 6fc09f10f12477bdaa54e94f9cb428bef6d81315 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 29 Aug 2012 06:49:17 +0000 Subject: netfilter: nfnetlink_log: fix error return code in init path Initialize return variable before exiting on an error path. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // ( if@p1 (\(ret < 0\|ret != 0\)) { ... return ret; } | ret@p1 = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Julia Lawall Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 592091c1..14e2f39 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -996,8 +996,10 @@ static int __init nfnetlink_log_init(void) #ifdef CONFIG_PROC_FS if (!proc_create("nfnetlink_log", 0440, - proc_net_netfilter, &nful_file_ops)) + proc_net_netfilter, &nful_file_ops)) { + status = -ENOMEM; goto cleanup_logger; + } #endif return status; -- cgit v1.1 From 3f509c689a07a4aa989b426893d8491a7ffcc410 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 29 Aug 2012 15:24:09 +0000 Subject: netfilter: nf_nat_sip: fix incorrect handling of EBUSY for RTCP expectation We're hitting bug while trying to reinsert an already existing expectation: kernel BUG at kernel/timer.c:895! invalid opcode: 0000 [#1] SMP [...] Call Trace: [] nf_ct_expect_related_report+0x4a0/0x57a [nf_conntrack] [] ? in4_pton+0x72/0x131 [] ip_nat_sdp_media+0xeb/0x185 [nf_nat_sip] [] set_expected_rtp_rtcp+0x32d/0x39b [nf_conntrack_sip] [] process_sdp+0x30c/0x3ec [nf_conntrack_sip] [] ? irq_exit+0x9a/0x9c [] ? ip_nat_sdp_media+0x185/0x185 [nf_nat_sip] We have to remove the RTP expectation if the RTCP expectation hits EBUSY since we keep trying with other ports until we succeed. Reported-by: Rafal Fitt Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_nat_sip.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index 4ad9cf1..9c87cde 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -502,7 +502,10 @@ static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff, ret = nf_ct_expect_related(rtcp_exp); if (ret == 0) break; - else if (ret != -EBUSY) { + else if (ret == -EBUSY) { + nf_ct_unexpect_related(rtp_exp); + continue; + } else if (ret < 0) { nf_ct_unexpect_related(rtp_exp); port = 0; break; -- cgit v1.1 From 99469c32f79a32d8481f87be0d3c66dad286f4ec Mon Sep 17 00:00:00 2001 From: "xeb@mail.ru" Date: Fri, 24 Aug 2012 01:07:38 +0000 Subject: l2tp: avoid to use synchronize_rcu in tunnel free function Avoid to use synchronize_rcu in l2tp_tunnel_free because context may be atomic. Signed-off-by: Dmitry Kozlov Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 3 +-- net/l2tp/l2tp_core.h | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 393355d..513cab0 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1347,11 +1347,10 @@ static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel) /* Remove from tunnel list */ spin_lock_bh(&pn->l2tp_tunnel_list_lock); list_del_rcu(&tunnel->list); + kfree_rcu(tunnel, rcu); spin_unlock_bh(&pn->l2tp_tunnel_list_lock); - synchronize_rcu(); atomic_dec(&l2tp_tunnel_count); - kfree(tunnel); } /* Create a socket for the tunnel, if one isn't set up by diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index a38ec6c..56d583e 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -163,6 +163,7 @@ struct l2tp_tunnel_cfg { struct l2tp_tunnel { int magic; /* Should be L2TP_TUNNEL_MAGIC */ + struct rcu_head rcu; rwlock_t hlist_lock; /* protect session_hlist */ struct hlist_head session_hlist[L2TP_HASH_SIZE]; /* hashed list of sessions, -- cgit v1.1 From acbb219d5f53821b2d0080d047800410c0420ea1 Mon Sep 17 00:00:00 2001 From: Francesco Ruggeri Date: Fri, 24 Aug 2012 07:38:35 +0000 Subject: net: ipv4: ipmr_expire_timer causes crash when removing net namespace When tearing down a net namespace, ipv4 mr_table structures are freed without first deactivating their timers. This can result in a crash in run_timer_softirq. This patch mimics the corresponding behaviour in ipv6. Locking and synchronization seem to be adequate. We are about to kfree mrt, so existing code should already make sure that no other references to mrt are pending or can be created by incoming traffic. The functions invoked here do not cause new references to mrt or other race conditions to be created. Invoking del_timer_sync guarantees that ipmr_expire_timer is inactive. Both ipmr_expire_process (whose completion we may have to wait in del_timer_sync) and mroute_clean_tables internally use mfc_unres_lock or other synchronizations when needed, and they both only modify mrt. Tested in Linux 3.4.8. Signed-off-by: Francesco Ruggeri Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 8eec8f4..ebdf06f 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -124,6 +124,8 @@ static DEFINE_SPINLOCK(mfc_unres_lock); static struct kmem_cache *mrt_cachep __read_mostly; static struct mr_table *ipmr_new_table(struct net *net, u32 id); +static void ipmr_free_table(struct mr_table *mrt); + static int ip_mr_forward(struct net *net, struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *cache, int local); @@ -131,6 +133,7 @@ static int ipmr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, vifi_t vifi, int assert); static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm); +static void mroute_clean_tables(struct mr_table *mrt); static void ipmr_expire_process(unsigned long arg); #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES @@ -271,7 +274,7 @@ static void __net_exit ipmr_rules_exit(struct net *net) list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { list_del(&mrt->list); - kfree(mrt); + ipmr_free_table(mrt); } fib_rules_unregister(net->ipv4.mr_rules_ops); } @@ -299,7 +302,7 @@ static int __net_init ipmr_rules_init(struct net *net) static void __net_exit ipmr_rules_exit(struct net *net) { - kfree(net->ipv4.mrt); + ipmr_free_table(net->ipv4.mrt); } #endif @@ -336,6 +339,13 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) return mrt; } +static void ipmr_free_table(struct mr_table *mrt) +{ + del_timer_sync(&mrt->ipmr_expire_timer); + mroute_clean_tables(mrt); + kfree(mrt); +} + /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) -- cgit v1.1 From c5ae7d41927dbd208c885d4794e30617ad6cdf12 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Aug 2012 12:33:07 +0000 Subject: ipv4: must use rcu protection while calling fib_lookup Following lockdep splat was reported by Pavel Roskin : [ 1570.586223] =============================== [ 1570.586225] [ INFO: suspicious RCU usage. ] [ 1570.586228] 3.6.0-rc3-wl-main #98 Not tainted [ 1570.586229] ------------------------------- [ 1570.586231] /home/proski/src/linux/net/ipv4/route.c:645 suspicious rcu_dereference_check() usage! [ 1570.586233] [ 1570.586233] other info that might help us debug this: [ 1570.586233] [ 1570.586236] [ 1570.586236] rcu_scheduler_active = 1, debug_locks = 0 [ 1570.586238] 2 locks held by Chrome_IOThread/4467: [ 1570.586240] #0: (slock-AF_INET){+.-...}, at: [] release_sock+0x2c/0xa0 [ 1570.586253] #1: (fnhe_lock){+.-...}, at: [] update_or_create_fnhe+0x2c/0x270 [ 1570.586260] [ 1570.586260] stack backtrace: [ 1570.586263] Pid: 4467, comm: Chrome_IOThread Not tainted 3.6.0-rc3-wl-main #98 [ 1570.586265] Call Trace: [ 1570.586271] [] lockdep_rcu_suspicious+0xfd/0x130 [ 1570.586275] [] update_or_create_fnhe+0x15c/0x270 [ 1570.586278] [] __ip_rt_update_pmtu+0x73/0xb0 [ 1570.586282] [] ip_rt_update_pmtu+0x29/0x90 [ 1570.586285] [] inet_csk_update_pmtu+0x2c/0x80 [ 1570.586290] [] tcp_v4_mtu_reduced+0x2e/0xc0 [ 1570.586293] [] tcp_release_cb+0xa4/0xb0 [ 1570.586296] [] release_sock+0x55/0xa0 [ 1570.586300] [] tcp_sendmsg+0x4af/0xf50 [ 1570.586305] [] inet_sendmsg+0x120/0x230 [ 1570.586308] [] ? inet_sk_rebuild_header+0x40/0x40 [ 1570.586312] [] ? sock_update_classid+0xbd/0x3b0 [ 1570.586315] [] ? sock_update_classid+0x130/0x3b0 [ 1570.586320] [] do_sock_write+0xc5/0xe0 [ 1570.586323] [] sock_aio_write+0x53/0x80 [ 1570.586328] [] do_sync_write+0xa3/0xe0 [ 1570.586332] [] vfs_write+0x165/0x180 [ 1570.586335] [] sys_write+0x45/0x90 [ 1570.586340] [] system_call_fastpath+0x16/0x1b Signed-off-by: Eric Dumazet Reported-by: Pavel Roskin Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 24fd4c5..82cf2a7 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -934,12 +934,14 @@ static u32 __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) if (mtu < ip_rt_min_pmtu) mtu = ip_rt_min_pmtu; + rcu_read_lock(); if (fib_lookup(dev_net(rt->dst.dev), fl4, &res) == 0) { struct fib_nh *nh = &FIB_RES_NH(res); update_or_create_fnhe(nh, fl4->daddr, 0, mtu, jiffies + ip_rt_mtu_expires); } + rcu_read_unlock(); return mtu; } -- cgit v1.1 From 5b423f6a40a0327f9d40bc8b97ce9be266f74368 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 29 Aug 2012 16:25:49 +0000 Subject: netfilter: nf_conntrack: fix racy timer handling with reliable events Existing code assumes that del_timer returns true for alive conntrack entries. However, this is not true if reliable events are enabled. In that case, del_timer may return true for entries that were just inserted in the dying list. Note that packets / ctnetlink may hold references to conntrack entries that were just inserted to such list. This patch fixes the issue by adding an independent timer for event delivery. This increases the size of the ecache extension. Still we can revisit this later and use variable size extensions to allocate this area on demand. Tested-by: Oliver Smith Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index cf48755..2ceec64 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -249,12 +249,15 @@ static void death_by_event(unsigned long ul_conntrack) { struct nf_conn *ct = (void *)ul_conntrack; struct net *net = nf_ct_net(ct); + struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct); + + BUG_ON(ecache == NULL); if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) { /* bad luck, let's retry again */ - ct->timeout.expires = jiffies + + ecache->timeout.expires = jiffies + (random32() % net->ct.sysctl_events_retry_timeout); - add_timer(&ct->timeout); + add_timer(&ecache->timeout); return; } /* we've got the event delivered, now it's dying */ @@ -268,6 +271,9 @@ static void death_by_event(unsigned long ul_conntrack) void nf_ct_insert_dying_list(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); + struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct); + + BUG_ON(ecache == NULL); /* add this conntrack to the dying list */ spin_lock_bh(&nf_conntrack_lock); @@ -275,10 +281,10 @@ void nf_ct_insert_dying_list(struct nf_conn *ct) &net->ct.dying); spin_unlock_bh(&nf_conntrack_lock); /* set a new timer to retry event delivery */ - setup_timer(&ct->timeout, death_by_event, (unsigned long)ct); - ct->timeout.expires = jiffies + + setup_timer(&ecache->timeout, death_by_event, (unsigned long)ct); + ecache->timeout.expires = jiffies + (random32() % net->ct.sysctl_events_retry_timeout); - add_timer(&ct->timeout); + add_timer(&ecache->timeout); } EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list); -- cgit v1.1 From 48f125ce1cc3ff275f9587b5bf56bf0f90766c7d Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 29 Aug 2012 06:49:12 +0000 Subject: net: ipv6: fix error return code Initialize return variable before exiting on an error path. The initial initialization of the return variable is also dropped, because that value is never used. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // ( if@p1 (\(ret < 0\|ret != 0\)) { ... return ret; } | ret@p1 = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- net/ipv6/esp6.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 6dc7fd3..282f372 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -167,8 +167,6 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) struct esp_data *esp = x->data; /* skb is pure payload to encrypt */ - err = -ENOMEM; - aead = esp->aead; alen = crypto_aead_authsize(aead); @@ -203,8 +201,10 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) } tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); - if (!tmp) + if (!tmp) { + err = -ENOMEM; goto error; + } seqhi = esp_tmp_seqhi(tmp); iv = esp_tmp_iv(aead, tmp, seqhilen); -- cgit v1.1 From 599901c3e4204e9d9c5a24df5402cd91617a2a26 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 29 Aug 2012 06:49:15 +0000 Subject: net/xfrm/xfrm_state.c: fix error return code Initialize return variable before exiting on an error path. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // ( if@p1 (\(ret < 0\|ret != 0\)) { ... return ret; } | ret@p1 = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 87cd0e4..210be48 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1994,8 +1994,10 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay) goto error; x->outer_mode = xfrm_get_mode(x->props.mode, family); - if (x->outer_mode == NULL) + if (x->outer_mode == NULL) { + err = -EPROTONOSUPPORT; goto error; + } if (init_replay) { err = xfrm_init_replay(x); -- cgit v1.1