From 269c4845d5b3627b95b1934107251bacbe99bb68 Mon Sep 17 00:00:00 2001
From: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
Date: Fri, 15 Jun 2012 02:30:20 -0300
Subject: Bluetooth: Fix possible deadlock in SCO code

sco_chan_del() only has conn != NULL when called from sco_conn_del() so
just move the code from it that deal with conn to sco_conn_del().

[  120.765529]
[  120.765529] ======================================================
[  120.766529] [ INFO: possible circular locking dependency detected ]
[  120.766529] 3.5.0-rc1-10292-g3701f94-dirty #70 Tainted: G        W
[  120.766529] -------------------------------------------------------
[  120.766529] kworker/u:3/1497 is trying to acquire lock:
[  120.766529]  (&(&conn->lock)->rlock#2){+.+...}, at:
[<ffffffffa00b7ecc>] sco_chan_del+0x4c/0x170 [bluetooth]
[  120.766529]
[  120.766529] but task is already holding lock:
[  120.766529]  (slock-AF_BLUETOOTH-BTPROTO_SCO){+.+...}, at:
[<ffffffffa00b8401>] sco_conn_del+0x61/0xe0 [bluetooth]
[  120.766529]
[  120.766529] which lock already depends on the new lock.
[  120.766529]
[  120.766529]
[  120.766529] the existing dependency chain (in reverse order) is:
[  120.766529]
[  120.766529] -> #1 (slock-AF_BLUETOOTH-BTPROTO_SCO){+.+...}:
[  120.766529]        [<ffffffff8107980e>] lock_acquire+0x8e/0xb0
[  120.766529]        [<ffffffff813c19e0>] _raw_spin_lock+0x40/0x80
[  120.766529]        [<ffffffffa00b85e9>] sco_connect_cfm+0x79/0x300
[bluetooth]
[  120.766529]        [<ffffffffa0094b13>]
hci_sync_conn_complete_evt.isra.90+0x343/0x400 [bluetooth]
[  120.766529]        [<ffffffffa009d447>] hci_event_packet+0x317/0xfb0
[bluetooth]
[  120.766529]        [<ffffffffa008aa68>] hci_rx_work+0x2c8/0x890
[bluetooth]
[  120.766529]        [<ffffffff81047db7>] process_one_work+0x197/0x460
[  120.766529]        [<ffffffff810489d6>] worker_thread+0x126/0x2d0
[  120.766529]        [<ffffffff8104ee4d>] kthread+0x9d/0xb0
[  120.766529]        [<ffffffff813c4294>] kernel_thread_helper+0x4/0x10
[  120.766529]
[  120.766529] -> #0 (&(&conn->lock)->rlock#2){+.+...}:
[  120.766529]        [<ffffffff81078a8a>] __lock_acquire+0x154a/0x1d30
[  120.766529]        [<ffffffff8107980e>] lock_acquire+0x8e/0xb0
[  120.766529]        [<ffffffff813c19e0>] _raw_spin_lock+0x40/0x80
[  120.766529]        [<ffffffffa00b7ecc>] sco_chan_del+0x4c/0x170
[bluetooth]
[  120.766529]        [<ffffffffa00b8414>] sco_conn_del+0x74/0xe0
[bluetooth]
[  120.766529]        [<ffffffffa00b88a2>] sco_disconn_cfm+0x32/0x60
[bluetooth]
[  120.766529]        [<ffffffffa0093a82>]
hci_disconn_complete_evt.isra.53+0x242/0x390 [bluetooth]
[  120.766529]        [<ffffffffa009d747>] hci_event_packet+0x617/0xfb0
[bluetooth]
[  120.766529]        [<ffffffffa008aa68>] hci_rx_work+0x2c8/0x890
[bluetooth]
[  120.766529]        [<ffffffff81047db7>] process_one_work+0x197/0x460
[  120.766529]        [<ffffffff810489d6>] worker_thread+0x126/0x2d0
[  120.766529]        [<ffffffff8104ee4d>] kthread+0x9d/0xb0
[  120.766529]        [<ffffffff813c4294>] kernel_thread_helper+0x4/0x10
[  120.766529]
[  120.766529] other info that might help us debug this:
[  120.766529]
[  120.766529]  Possible unsafe locking scenario:
[  120.766529]
[  120.766529]        CPU0                    CPU1
[  120.766529]        ----                    ----
[  120.766529]   lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
[  120.766529]
lock(&(&conn->lock)->rlock#2);
[  120.766529]
lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
[  120.766529]   lock(&(&conn->lock)->rlock#2);
[  120.766529]
[  120.766529]  *** DEADLOCK ***

Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 net/bluetooth/sco.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'net')
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 40bbe25..3589e21 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -131,6 +131,15 @@ static int sco_conn_del(struct hci_conn *hcon, int err)
 		sco_sock_clear_timer(sk);
 		sco_chan_del(sk, err);
 		bh_unlock_sock(sk);
+
+		sco_conn_lock(conn);
+		conn->sk = NULL;
+		sco_pi(sk)->conn = NULL;
+		sco_conn_unlock(conn);
+
+		if (conn->hcon)
+			hci_conn_put(conn->hcon);
+
 		sco_sock_kill(sk);
 	}
 
@@ -821,16 +830,6 @@ static void sco_chan_del(struct sock *sk, int err)
 
 	BT_DBG("sk %p, conn %p, err %d", sk, conn, err);
 
-	if (conn) {
-		sco_conn_lock(conn);
-		conn->sk = NULL;
-		sco_pi(sk)->conn = NULL;
-		sco_conn_unlock(conn);
-
-		if (conn->hcon)
-			hci_conn_put(conn->hcon);
-	}
-
 	sk->sk_state = BT_CLOSED;
 	sk->sk_err   = err;
 	sk->sk_state_change(sk);
-- 
cgit v1.1


From a9ea3ed9b71cc3271dd59e76f65748adcaa76422 Mon Sep 17 00:00:00 2001
From: Szymon Janc <szymon.janc@tieto.com>
Date: Thu, 19 Jul 2012 14:46:08 +0200
Subject: Bluetooth: Fix legacy pairing with some devices

Some devices e.g. some Android based phones don't do SDP search before
pairing and cancel legacy pairing when ACL is disconnected.

PIN Code Request event which changes ACL timeout to HCI_PAIRING_TIMEOUT
is only received after remote user entered PIN.

In that case no L2CAP is connected so default HCI_DISCONN_TIMEOUT
(2 seconds) is being used to timeout ACL connection. This results in
problems with legacy pairing as remote user has only few seconds to
enter PIN before ACL is disconnected.

Increase disconnect timeout for incomming connection to
HCI_PAIRING_TIMEOUT if SSP is disabled and no linkey exists.

To avoid keeping ACL alive for too long after SDP search set ACL
timeout back to HCI_DISCONN_TIMEOUT when L2CAP is connected.

2012-07-19 13:24:43.413521 < HCI Command: Create Connection (0x01|0x0005) plen 13
    bdaddr 00:02:72:D6:6A:3F ptype 0xcc18 rswitch 0x01 clkoffset 0x0000
    Packet type: DM1 DM3 DM5 DH1 DH3 DH5
2012-07-19 13:24:43.425224 > HCI Event: Command Status (0x0f) plen 4
    Create Connection (0x01|0x0005) status 0x00 ncmd 1
2012-07-19 13:24:43.885222 > HCI Event: Role Change (0x12) plen 8
    status 0x00 bdaddr 00:02:72:D6:6A:3F role 0x01
    Role: Slave
2012-07-19 13:24:44.054221 > HCI Event: Connect Complete (0x03) plen 11
    status 0x00 handle 42 bdaddr 00:02:72:D6:6A:3F type ACL encrypt 0x00
2012-07-19 13:24:44.054313 < HCI Command: Read Remote Supported Features (0x01|0x001b) plen 2
    handle 42
2012-07-19 13:24:44.055176 > HCI Event: Page Scan Repetition Mode Change (0x20) plen 7
    bdaddr 00:02:72:D6:6A:3F mode 0
2012-07-19 13:24:44.056217 > HCI Event: Max Slots Change (0x1b) plen 3
    handle 42 slots 5
2012-07-19 13:24:44.059218 > HCI Event: Command Status (0x0f) plen 4
    Read Remote Supported Features (0x01|0x001b) status 0x00 ncmd 0
2012-07-19 13:24:44.062192 > HCI Event: Command Status (0x0f) plen 4
    Unknown (0x00|0x0000) status 0x00 ncmd 1
2012-07-19 13:24:44.067219 > HCI Event: Read Remote Supported Features (0x0b) plen 11
    status 0x00 handle 42
    Features: 0xbf 0xfe 0xcf 0xfe 0xdb 0xff 0x7b 0x87
2012-07-19 13:24:44.067248 < HCI Command: Read Remote Extended Features (0x01|0x001c) plen 3
    handle 42 page 1
2012-07-19 13:24:44.071217 > HCI Event: Command Status (0x0f) plen 4
    Read Remote Extended Features (0x01|0x001c) status 0x00 ncmd 1
2012-07-19 13:24:44.076218 > HCI Event: Read Remote Extended Features (0x23) plen 13
    status 0x00 handle 42 page 1 max 1
    Features: 0x01 0x00 0x00 0x00 0x00 0x00 0x00 0x00
2012-07-19 13:24:44.076249 < HCI Command: Remote Name Request (0x01|0x0019) plen 10
    bdaddr 00:02:72:D6:6A:3F mode 2 clkoffset 0x0000
2012-07-19 13:24:44.081218 > HCI Event: Command Status (0x0f) plen 4
    Remote Name Request (0x01|0x0019) status 0x00 ncmd 1
2012-07-19 13:24:44.105214 > HCI Event: Remote Name Req Complete (0x07) plen 255
    status 0x00 bdaddr 00:02:72:D6:6A:3F name 'uw000951-0'
2012-07-19 13:24:44.105284 < HCI Command: Authentication Requested (0x01|0x0011) plen 2
    handle 42
2012-07-19 13:24:44.111207 > HCI Event: Command Status (0x0f) plen 4
    Authentication Requested (0x01|0x0011) status 0x00 ncmd 1
2012-07-19 13:24:44.112220 > HCI Event: Link Key Request (0x17) plen 6
    bdaddr 00:02:72:D6:6A:3F
2012-07-19 13:24:44.112249 < HCI Command: Link Key Request Negative Reply (0x01|0x000c) plen 6
    bdaddr 00:02:72:D6:6A:3F
2012-07-19 13:24:44.115215 > HCI Event: Command Complete (0x0e) plen 10
    Link Key Request Negative Reply (0x01|0x000c) ncmd 1
    status 0x00 bdaddr 00:02:72:D6:6A:3F
2012-07-19 13:24:44.116215 > HCI Event: PIN Code Request (0x16) plen 6
    bdaddr 00:02:72:D6:6A:3F
2012-07-19 13:24:48.099184 > HCI Event: Auth Complete (0x06) plen 3
    status 0x13 handle 42
    Error: Remote User Terminated Connection
2012-07-19 13:24:48.179182 > HCI Event: Disconn Complete (0x05) plen 4
    status 0x00 handle 42 reason 0x13
    Reason: Remote User Terminated Connection

Cc: stable@vger.kernel.org
Signed-off-by: Szymon Janc <szymon.janc@tieto.com>
Acked-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 net/bluetooth/hci_event.c  | 7 ++++++-
 net/bluetooth/l2cap_core.c | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 41ff978..248632c 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1762,7 +1762,12 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		if (conn->type == ACL_LINK) {
 			conn->state = BT_CONFIG;
 			hci_conn_hold(conn);
-			conn->disc_timeout = HCI_DISCONN_TIMEOUT;
+
+			if (!conn->out && !hci_conn_ssp_enabled(conn) &&
+			    !hci_find_link_key(hdev, &ev->bdaddr))
+				conn->disc_timeout = HCI_PAIRING_TIMEOUT;
+			else
+				conn->disc_timeout = HCI_DISCONN_TIMEOUT;
 		} else
 			conn->state = BT_CONNECTED;
 
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index a8964db..daa149b 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -1181,6 +1181,7 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn)
 	sk = chan->sk;
 
 	hci_conn_hold(conn->hcon);
+	conn->hcon->disc_timeout = HCI_DISCONN_TIMEOUT;
 
 	bacpy(&bt_sk(sk)->src, conn->src);
 	bacpy(&bt_sk(sk)->dst, conn->dst);
-- 
cgit v1.1


From c810089c27e48b816181b454fcc493d19fdbc2ba Mon Sep 17 00:00:00 2001
From: Ram Malovany <ramm@ti.com>
Date: Thu, 19 Jul 2012 10:26:09 +0300
Subject: Bluetooth: Fix using NULL inquiry entry

If entry wasn't found in the hci_inquiry_cache_lookup_resolve do not
resolve the name.This will fix a kernel crash when trying to use NULL
pointer.

Cc: stable@vger.kernel.org
Signed-off-by: Ram Malovany <ramm@ti.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 net/bluetooth/hci_event.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 248632c..b64cfa2 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1365,6 +1365,9 @@ static bool hci_resolve_next_name(struct hci_dev *hdev)
 		return false;
 
 	e = hci_inquiry_cache_lookup_resolve(hdev, BDADDR_ANY, NAME_NEEDED);
+	if (!e)
+		return false;
+
 	if (hci_resolve_name(hdev, e) == 0) {
 		e->name_state = NAME_PENDING;
 		return true;
-- 
cgit v1.1


From 7cc8380eb10347016d95bf6f9d842c2ae6d12932 Mon Sep 17 00:00:00 2001
From: Ram Malovany <ramm@ti.com>
Date: Thu, 19 Jul 2012 10:26:10 +0300
Subject: Bluetooth: Fix using a NULL inquiry cache entry

If the device was not found in a list of found devices names of which
are pending.This may happen in a case when HCI Remote Name Request
was sent as a part of incoming connection establishment procedure.
Hence there is no need to continue resolving a next name as it will
be done upon receiving another Remote Name Request Complete Event.
This will fix a kernel crash when trying to use this entry to resolve
the next name.

Cc: stable@vger.kernel.org
Signed-off-by: Ram Malovany <ramm@ti.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 net/bluetooth/hci_event.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index b64cfa2..fe9a3d6 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1396,12 +1396,18 @@ static void hci_check_pending_name(struct hci_dev *hdev, struct hci_conn *conn,
 		return;
 
 	e = hci_inquiry_cache_lookup_resolve(hdev, bdaddr, NAME_PENDING);
-	if (e) {
+	/* If the device was not found in a list of found devices names of which
+	 * are pending. there is no need to continue resolving a next name as it
+	 * will be done upon receiving another Remote Name Request Complete
+	 * Event */
+	if (!e)
+		return;
+
+	list_del(&e->list);
+	if (name) {
 		e->name_state = NAME_KNOWN;
-		list_del(&e->list);
-		if (name)
-			mgmt_remote_name(hdev, bdaddr, ACL_LINK, 0x00,
-					 e->data.rssi, name, name_len);
+		mgmt_remote_name(hdev, bdaddr, ACL_LINK, 0x00,
+				 e->data.rssi, name, name_len);
 	}
 
 	if (hci_resolve_next_name(hdev))
-- 
cgit v1.1


From c3e7c0d90b14a3e7ac091d24cef09efb516d587b Mon Sep 17 00:00:00 2001
From: Ram Malovany <ramm@ti.com>
Date: Thu, 19 Jul 2012 10:26:11 +0300
Subject: Bluetooth: Set name_state to unknown when entry name is empty

When the name of the given entry is empty , the state needs to be
updated accordingly.

Cc: stable@vger.kernel.org
Signed-off-by: Ram Malovany <ramm@ti.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 net/bluetooth/hci_event.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index fe9a3d6..715d7e3 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1408,6 +1408,8 @@ static void hci_check_pending_name(struct hci_dev *hdev, struct hci_conn *conn,
 		e->name_state = NAME_KNOWN;
 		mgmt_remote_name(hdev, bdaddr, ACL_LINK, 0x00,
 				 e->data.rssi, name, name_len);
+	} else {
+		e->name_state = NAME_NOT_KNOWN;
 	}
 
 	if (hci_resolve_next_name(hdev))
-- 
cgit v1.1


From d08fd0e712a834d4abb869c0215a702e290bc51e Mon Sep 17 00:00:00 2001
From: Andrei Emeltchenko <andrei.emeltchenko@intel.com>
Date: Thu, 19 Jul 2012 17:03:43 +0300
Subject: Bluetooth: smp: Fix possible NULL dereference

smp_chan_create might return NULL so we need to check before
dereferencing smp.

Signed-off-by: Andrei Emeltchenko <andrei.emeltchenko@intel.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 net/bluetooth/smp.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 16ef0dc..901a616 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -579,8 +579,11 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb)
 
 	if (!test_and_set_bit(HCI_CONN_LE_SMP_PEND, &conn->hcon->flags))
 		smp = smp_chan_create(conn);
+	else
+		smp = conn->smp_chan;
 
-	smp = conn->smp_chan;
+	if (!smp)
+		return SMP_UNSPECIFIED;
 
 	smp->preq[0] = SMP_CMD_PAIRING_REQ;
 	memcpy(&smp->preq[1], req, sizeof(*req));
-- 
cgit v1.1


From 49dfbb9129c4edb318578de35cc45c555df37884 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k@samsung.com>
Date: Thu, 19 Jul 2012 12:54:04 +0530
Subject: Bluetooth: Fix socket not getting freed if l2cap channel create fails

If l2cap_chan_create() fails then it will return from l2cap_sock_kill
since zapped flag of sk is reset.

Signed-off-by: Jaganath Kanakkassery <jaganath.k@samsung.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 net/bluetooth/l2cap_sock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index a4bb27e..b94abd3 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1174,7 +1174,7 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int p
 
 	chan = l2cap_chan_create();
 	if (!chan) {
-		l2cap_sock_kill(sk);
+		sk_free(sk);
 		return NULL;
 	}
 
-- 
cgit v1.1


From e9324b2ce656e1910d2385b9b47a2f926456dbe3 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 9 Aug 2012 10:08:45 +0000
Subject: netfilter: nf_ct_sip: fix helper name

Commit 3a8fc53a (netfilter: nf_ct_helper: allocate 16 bytes for the helper
and policy names) introduced a bug in the SIP helper, the helper name is
sprinted to the sip_names array instead of instead of into the helper
structure. This breaks the helper match and the /proc/net/nf_conntrack_expect
output.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_sip.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 758a1ba..2fb6669 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1515,7 +1515,6 @@ static int sip_help_udp(struct sk_buff *skb, unsigned int protoff,
 }
 
 static struct nf_conntrack_helper sip[MAX_PORTS][4] __read_mostly;
-static char sip_names[MAX_PORTS][4][sizeof("sip-65535")] __read_mostly;
 
 static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1] = {
 	[SIP_EXPECT_SIGNALLING] = {
@@ -1585,9 +1584,9 @@ static int __init nf_conntrack_sip_init(void)
 			sip[i][j].me = THIS_MODULE;
 
 			if (ports[i] == SIP_PORT)
-				sprintf(sip_names[i][j], "sip");
+				sprintf(sip[i][j].name, "sip");
 			else
-				sprintf(sip_names[i][j], "sip-%u", i);
+				sprintf(sip[i][j].name, "sip-%u", i);
 
 			pr_debug("port #%u: %u\n", i, ports[i]);
 
-- 
cgit v1.1


From 02b69cbdc2fb2e1bfbfd9ac0c246d7be1b08d3cd Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 9 Aug 2012 10:08:46 +0000
Subject: netfilter: nf_ct_sip: fix IPv6 address parsing

Within SIP messages IPv6 addresses are enclosed in square brackets in most
cases, with the exception of the "received=" header parameter. Currently
the helper fails to parse enclosed addresses.

This patch:

- changes the SIP address parsing function to enforce square brackets
  when required, and accept them when not required but present, as
  recommended by RFC 5118.

- adds a new SDP address parsing function that never accepts square
  brackets since SDP doesn't use them.

With these changes, the SIP helper correctly parses all test messages
from RFC 5118 (Session Initiation Protocol (SIP) Torture Test Messages
for Internet Protocol Version 6 (IPv6)).

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/nf_nat_sip.c  |  4 +-
 net/netfilter/nf_conntrack_sip.c | 87 ++++++++++++++++++++++++++++++++--------
 2 files changed, 72 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index ea4a2381..eef8f29 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -173,7 +173,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
 		 * the reply. */
 		if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
 					       "maddr=", &poff, &plen,
-					       &addr) > 0 &&
+					       &addr, true) > 0 &&
 		    addr.ip == ct->tuplehash[dir].tuple.src.u3.ip &&
 		    addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) {
 			buflen = sprintf(buffer, "%pI4",
@@ -187,7 +187,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
 		 * from which the server received the request. */
 		if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
 					       "received=", &poff, &plen,
-					       &addr) > 0 &&
+					       &addr, false) > 0 &&
 		    addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip &&
 		    addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) {
 			buflen = sprintf(buffer, "%pI4",
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 2fb6669..5c0a112 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -183,12 +183,12 @@ static int media_len(const struct nf_conn *ct, const char *dptr,
 	return len + digits_len(ct, dptr, limit, shift);
 }
 
-static int parse_addr(const struct nf_conn *ct, const char *cp,
-                      const char **endp, union nf_inet_addr *addr,
-                      const char *limit)
+static int sip_parse_addr(const struct nf_conn *ct, const char *cp,
+			  const char **endp, union nf_inet_addr *addr,
+			  const char *limit, bool delim)
 {
 	const char *end;
-	int ret = 0;
+	int ret;
 
 	if (!ct)
 		return 0;
@@ -197,16 +197,28 @@ static int parse_addr(const struct nf_conn *ct, const char *cp,
 	switch (nf_ct_l3num(ct)) {
 	case AF_INET:
 		ret = in4_pton(cp, limit - cp, (u8 *)&addr->ip, -1, &end);
+		if (ret == 0)
+			return 0;
 		break;
 	case AF_INET6:
+		if (cp < limit && *cp == '[')
+			cp++;
+		else if (delim)
+			return 0;
+
 		ret = in6_pton(cp, limit - cp, (u8 *)&addr->ip6, -1, &end);
+		if (ret == 0)
+			return 0;
+
+		if (end < limit && *end == ']')
+			end++;
+		else if (delim)
+			return 0;
 		break;
 	default:
 		BUG();
 	}
 
-	if (ret == 0 || end == cp)
-		return 0;
 	if (endp)
 		*endp = end;
 	return 1;
@@ -219,7 +231,7 @@ static int epaddr_len(const struct nf_conn *ct, const char *dptr,
 	union nf_inet_addr addr;
 	const char *aux = dptr;
 
-	if (!parse_addr(ct, dptr, &dptr, &addr, limit)) {
+	if (!sip_parse_addr(ct, dptr, &dptr, &addr, limit, true)) {
 		pr_debug("ip: %s parse failed.!\n", dptr);
 		return 0;
 	}
@@ -296,7 +308,7 @@ int ct_sip_parse_request(const struct nf_conn *ct,
 		return 0;
 	dptr += shift;
 
-	if (!parse_addr(ct, dptr, &end, addr, limit))
+	if (!sip_parse_addr(ct, dptr, &end, addr, limit, true))
 		return -1;
 	if (end < limit && *end == ':') {
 		end++;
@@ -550,7 +562,7 @@ int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr,
 	if (ret == 0)
 		return ret;
 
-	if (!parse_addr(ct, dptr + *matchoff, &c, addr, limit))
+	if (!sip_parse_addr(ct, dptr + *matchoff, &c, addr, limit, true))
 		return -1;
 	if (*c == ':') {
 		c++;
@@ -599,7 +611,7 @@ int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr,
 			       unsigned int dataoff, unsigned int datalen,
 			       const char *name,
 			       unsigned int *matchoff, unsigned int *matchlen,
-			       union nf_inet_addr *addr)
+			       union nf_inet_addr *addr, bool delim)
 {
 	const char *limit = dptr + datalen;
 	const char *start, *end;
@@ -613,7 +625,7 @@ int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr,
 		return 0;
 
 	start += strlen(name);
-	if (!parse_addr(ct, start, &end, addr, limit))
+	if (!sip_parse_addr(ct, start, &end, addr, limit, delim))
 		return 0;
 	*matchoff = start - dptr;
 	*matchlen = end - start;
@@ -675,6 +687,47 @@ static int ct_sip_parse_transport(struct nf_conn *ct, const char *dptr,
 	return 1;
 }
 
+static int sdp_parse_addr(const struct nf_conn *ct, const char *cp,
+			  const char **endp, union nf_inet_addr *addr,
+			  const char *limit)
+{
+	const char *end;
+	int ret;
+
+	memset(addr, 0, sizeof(*addr));
+	switch (nf_ct_l3num(ct)) {
+	case AF_INET:
+		ret = in4_pton(cp, limit - cp, (u8 *)&addr->ip, -1, &end);
+		break;
+	case AF_INET6:
+		ret = in6_pton(cp, limit - cp, (u8 *)&addr->ip6, -1, &end);
+		break;
+	default:
+		BUG();
+	}
+
+	if (ret == 0)
+		return 0;
+	if (endp)
+		*endp = end;
+	return 1;
+}
+
+/* skip ip address. returns its length. */
+static int sdp_addr_len(const struct nf_conn *ct, const char *dptr,
+			const char *limit, int *shift)
+{
+	union nf_inet_addr addr;
+	const char *aux = dptr;
+
+	if (!sdp_parse_addr(ct, dptr, &dptr, &addr, limit)) {
+		pr_debug("ip: %s parse failed.!\n", dptr);
+		return 0;
+	}
+
+	return dptr - aux;
+}
+
 /* SDP header parsing: a SDP session description contains an ordered set of
  * headers, starting with a section containing general session parameters,
  * optionally followed by multiple media descriptions.
@@ -686,10 +739,10 @@ static int ct_sip_parse_transport(struct nf_conn *ct, const char *dptr,
  */
 static const struct sip_header ct_sdp_hdrs[] = {
 	[SDP_HDR_VERSION]		= SDP_HDR("v=", NULL, digits_len),
-	[SDP_HDR_OWNER_IP4]		= SDP_HDR("o=", "IN IP4 ", epaddr_len),
-	[SDP_HDR_CONNECTION_IP4]	= SDP_HDR("c=", "IN IP4 ", epaddr_len),
-	[SDP_HDR_OWNER_IP6]		= SDP_HDR("o=", "IN IP6 ", epaddr_len),
-	[SDP_HDR_CONNECTION_IP6]	= SDP_HDR("c=", "IN IP6 ", epaddr_len),
+	[SDP_HDR_OWNER_IP4]		= SDP_HDR("o=", "IN IP4 ", sdp_addr_len),
+	[SDP_HDR_CONNECTION_IP4]	= SDP_HDR("c=", "IN IP4 ", sdp_addr_len),
+	[SDP_HDR_OWNER_IP6]		= SDP_HDR("o=", "IN IP6 ", sdp_addr_len),
+	[SDP_HDR_CONNECTION_IP6]	= SDP_HDR("c=", "IN IP6 ", sdp_addr_len),
 	[SDP_HDR_MEDIA]			= SDP_HDR("m=", NULL, media_len),
 };
 
@@ -775,8 +828,8 @@ static int ct_sip_parse_sdp_addr(const struct nf_conn *ct, const char *dptr,
 	if (ret <= 0)
 		return ret;
 
-	if (!parse_addr(ct, dptr + *matchoff, NULL, addr,
-			dptr + *matchoff + *matchlen))
+	if (!sdp_parse_addr(ct, dptr + *matchoff, NULL, addr,
+			    dptr + *matchoff + *matchlen))
 		return -1;
 	return 1;
 }
-- 
cgit v1.1


From f22eb25cf5b1157b29ef88c793b71972efc47143 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 9 Aug 2012 10:08:47 +0000
Subject: netfilter: nf_nat_sip: fix via header translation with multiple
 parameters

Via-headers are parsed beginning at the first character after the Via-address.
When the address is translated first and its length decreases, the offset to
start parsing at is incorrect and header parameters might be missed.

Update the offset after translating the Via-address to fix this.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/nf_nat_sip.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index eef8f29..4ad9cf1 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -148,7 +148,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
 	if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen,
 				    hdr, NULL, &matchoff, &matchlen,
 				    &addr, &port) > 0) {
-		unsigned int matchend, poff, plen, buflen, n;
+		unsigned int olen, matchend, poff, plen, buflen, n;
 		char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
 
 		/* We're only interested in headers related to this
@@ -163,11 +163,12 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
 				goto next;
 		}
 
+		olen = *datalen;
 		if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
 			      &addr, port))
 			return NF_DROP;
 
-		matchend = matchoff + matchlen;
+		matchend = matchoff + matchlen + *datalen - olen;
 
 		/* The maddr= parameter (RFC 2361) specifies where to send
 		 * the reply. */
-- 
cgit v1.1


From 68e035c950dbceaf660144bf74054dfdfb6aad15 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 14 Aug 2012 12:47:37 +0200
Subject: netfilter: ctnetlink: fix missing locking while changing conntrack
 from nfqueue

Since 9cb017665 netfilter: add glue code to integrate nfnetlink_queue and
ctnetlink, we can modify the conntrack entry via nfnl_queue. However, the
change of the conntrack entry via nfnetlink_queue requires appropriate
locking to avoid concurrent updates.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_netlink.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 14f67a2..da4fc37 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1896,10 +1896,15 @@ static int
 ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct)
 {
 	struct nlattr *cda[CTA_MAX+1];
+	int ret;
 
 	nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy);
 
-	return ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct);
+	spin_lock_bh(&nf_conntrack_lock);
+	ret = ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct);
+	spin_unlock_bh(&nf_conntrack_lock);
+
+	return ret;
 }
 
 static struct nfq_ct_hook ctnetlink_nfqueue_hook = {
-- 
cgit v1.1


From 47be03a28cc6c80e3aa2b3e8ed6d960ff0c5c0af Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:37 +0000
Subject: netpoll: use GFP_ATOMIC in slave_enable_netpoll() and
 __netpoll_setup()

slave_enable_netpoll() and __netpoll_setup() may be called
with read_lock() held, so should use GFP_ATOMIC to allocate
memory. Eric suggested to pass gfp flags to __netpoll_setup().

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c    |  7 ++++---
 net/bridge/br_device.c  | 12 ++++++------
 net/bridge/br_if.c      |  2 +-
 net/bridge/br_private.h |  4 ++--
 net/core/netpoll.c      |  8 ++++----
 5 files changed, 17 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 73a2a83..ee4ae09 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -669,19 +669,20 @@ static void vlan_dev_poll_controller(struct net_device *dev)
 	return;
 }
 
-static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo)
+static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo,
+				  gfp_t gfp)
 {
 	struct vlan_dev_priv *info = vlan_dev_priv(dev);
 	struct net_device *real_dev = info->real_dev;
 	struct netpoll *netpoll;
 	int err = 0;
 
-	netpoll = kzalloc(sizeof(*netpoll), GFP_KERNEL);
+	netpoll = kzalloc(sizeof(*netpoll), gfp);
 	err = -ENOMEM;
 	if (!netpoll)
 		goto out;
 
-	err = __netpoll_setup(netpoll, real_dev);
+	err = __netpoll_setup(netpoll, real_dev, gfp);
 	if (err) {
 		kfree(netpoll);
 		goto out;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 3334845..ed0e0f9 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -213,7 +213,8 @@ static void br_netpoll_cleanup(struct net_device *dev)
 	}
 }
 
-static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni)
+static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni,
+			    gfp_t gfp)
 {
 	struct net_bridge *br = netdev_priv(dev);
 	struct net_bridge_port *p, *n;
@@ -222,8 +223,7 @@ static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni)
 	list_for_each_entry_safe(p, n, &br->port_list, list) {
 		if (!p->dev)
 			continue;
-
-		err = br_netpoll_enable(p);
+		err = br_netpoll_enable(p, gfp);
 		if (err)
 			goto fail;
 	}
@@ -236,17 +236,17 @@ fail:
 	goto out;
 }
 
-int br_netpoll_enable(struct net_bridge_port *p)
+int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp)
 {
 	struct netpoll *np;
 	int err = 0;
 
-	np = kzalloc(sizeof(*p->np), GFP_KERNEL);
+	np = kzalloc(sizeof(*p->np), gfp);
 	err = -ENOMEM;
 	if (!np)
 		goto out;
 
-	err = __netpoll_setup(np, p->dev);
+	err = __netpoll_setup(np, p->dev, gfp);
 	if (err) {
 		kfree(np);
 		goto out;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index e1144e1..171fd6b 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -361,7 +361,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	if (err)
 		goto err2;
 
-	if (br_netpoll_info(br) && ((err = br_netpoll_enable(p))))
+	if (br_netpoll_info(br) && ((err = br_netpoll_enable(p, GFP_KERNEL))))
 		goto err3;
 
 	err = netdev_set_master(dev, br->dev);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index a768b24..f507d2a 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -316,7 +316,7 @@ static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
 		netpoll_send_skb(np, skb);
 }
 
-extern int br_netpoll_enable(struct net_bridge_port *p);
+extern int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp);
 extern void br_netpoll_disable(struct net_bridge_port *p);
 #else
 static inline struct netpoll_info *br_netpoll_info(struct net_bridge *br)
@@ -329,7 +329,7 @@ static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
 {
 }
 
-static inline int br_netpoll_enable(struct net_bridge_port *p)
+static inline int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp)
 {
 	return 0;
 }
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index b4c90e4..37cc854 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -715,7 +715,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
 }
 EXPORT_SYMBOL(netpoll_parse_options);
 
-int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
+int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
 {
 	struct netpoll_info *npinfo;
 	const struct net_device_ops *ops;
@@ -734,7 +734,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
 	}
 
 	if (!ndev->npinfo) {
-		npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
+		npinfo = kmalloc(sizeof(*npinfo), gfp);
 		if (!npinfo) {
 			err = -ENOMEM;
 			goto out;
@@ -752,7 +752,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
 
 		ops = np->dev->netdev_ops;
 		if (ops->ndo_netpoll_setup) {
-			err = ops->ndo_netpoll_setup(ndev, npinfo);
+			err = ops->ndo_netpoll_setup(ndev, npinfo, gfp);
 			if (err)
 				goto free_npinfo;
 		}
@@ -857,7 +857,7 @@ int netpoll_setup(struct netpoll *np)
 	refill_skbs();
 
 	rtnl_lock();
-	err = __netpoll_setup(np, ndev);
+	err = __netpoll_setup(np, ndev, GFP_KERNEL);
 	rtnl_unlock();
 
 	if (err)
-- 
cgit v1.1


From 38e6bc185d9544dfad1774b3f8902a0b061aea25 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:38 +0000
Subject: netpoll: make __netpoll_cleanup non-block

Like the previous patch, slave_disable_netpoll() and __netpoll_cleanup()
may be called with read_lock() held too, so we should make them
non-block, by moving the cleanup and kfree() to call_rcu_bh() callbacks.

Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c   |  6 +-----
 net/bridge/br_device.c |  6 +-----
 net/core/netpoll.c     | 42 ++++++++++++++++++++++++++++++++----------
 3 files changed, 34 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index ee4ae09..b65623f 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -704,11 +704,7 @@ static void vlan_dev_netpoll_cleanup(struct net_device *dev)
 
 	info->netpoll = NULL;
 
-        /* Wait for transmitting packets to finish before freeing. */
-        synchronize_rcu_bh();
-
-        __netpoll_cleanup(netpoll);
-        kfree(netpoll);
+	__netpoll_free_rcu(netpoll);
 }
 #endif /* CONFIG_NET_POLL_CONTROLLER */
 
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index ed0e0f9..f41ba40 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -267,11 +267,7 @@ void br_netpoll_disable(struct net_bridge_port *p)
 
 	p->np = NULL;
 
-	/* Wait for transmitting packets to finish before freeing. */
-	synchronize_rcu_bh();
-
-	__netpoll_cleanup(np);
-	kfree(np);
+	__netpoll_free_rcu(np);
 }
 
 #endif
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 37cc854..dc17f1d 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -878,6 +878,24 @@ static int __init netpoll_init(void)
 }
 core_initcall(netpoll_init);
 
+static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
+{
+	struct netpoll_info *npinfo =
+			container_of(rcu_head, struct netpoll_info, rcu);
+
+	skb_queue_purge(&npinfo->arp_tx);
+	skb_queue_purge(&npinfo->txq);
+
+	/* we can't call cancel_delayed_work_sync here, as we are in softirq */
+	cancel_delayed_work(&npinfo->tx_work);
+
+	/* clean after last, unfinished work */
+	__skb_queue_purge(&npinfo->txq);
+	/* now cancel it again */
+	cancel_delayed_work(&npinfo->tx_work);
+	kfree(npinfo);
+}
+
 void __netpoll_cleanup(struct netpoll *np)
 {
 	struct netpoll_info *npinfo;
@@ -903,20 +921,24 @@ void __netpoll_cleanup(struct netpoll *np)
 			ops->ndo_netpoll_cleanup(np->dev);
 
 		RCU_INIT_POINTER(np->dev->npinfo, NULL);
+		call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info);
+	}
+}
+EXPORT_SYMBOL_GPL(__netpoll_cleanup);
 
-		/* avoid racing with NAPI reading npinfo */
-		synchronize_rcu_bh();
+static void rcu_cleanup_netpoll(struct rcu_head *rcu_head)
+{
+	struct netpoll *np = container_of(rcu_head, struct netpoll, rcu);
 
-		skb_queue_purge(&npinfo->arp_tx);
-		skb_queue_purge(&npinfo->txq);
-		cancel_delayed_work_sync(&npinfo->tx_work);
+	__netpoll_cleanup(np);
+	kfree(np);
+}
 
-		/* clean after last, unfinished work */
-		__skb_queue_purge(&npinfo->txq);
-		kfree(npinfo);
-	}
+void __netpoll_free_rcu(struct netpoll *np)
+{
+	call_rcu_bh(&np->rcu, rcu_cleanup_netpoll);
 }
-EXPORT_SYMBOL_GPL(__netpoll_cleanup);
+EXPORT_SYMBOL_GPL(__netpoll_free_rcu);
 
 void netpoll_cleanup(struct netpoll *np)
 {
-- 
cgit v1.1


From 57c5d46191e75312934c00eba65b13a31ca95120 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:40 +0000
Subject: netpoll: take rcu_read_lock_bh() in netpoll_rx()

In __netpoll_rx(), it dereferences ->npinfo without rcu_dereference_bh(),
this patch fixes it by using the 'npinfo' passed from netpoll_rx()
where it is already dereferenced with rcu_dereference_bh().

Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index dc17f1d..d055bb0 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -543,13 +543,12 @@ static void arp_reply(struct sk_buff *skb)
 	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 }
 
-int __netpoll_rx(struct sk_buff *skb)
+int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
 {
 	int proto, len, ulen;
 	int hits = 0;
 	const struct iphdr *iph;
 	struct udphdr *uh;
-	struct netpoll_info *npinfo = skb->dev->npinfo;
 	struct netpoll *np, *tmp;
 
 	if (list_empty(&npinfo->rx_np))
-- 
cgit v1.1


From 2899656b494dcd118123af1126826b115c8ea6f9 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:42 +0000
Subject: netpoll: take rcu_read_lock_bh() in netpoll_send_skb_on_dev()

This patch fixes several problems in the call path of
netpoll_send_skb_on_dev():

1. Disable IRQ's before calling netpoll_send_skb_on_dev().

2. All the callees of netpoll_send_skb_on_dev() should use
   rcu_dereference_bh() to dereference ->npinfo.

3. Rename arp_reply() to netpoll_arp_reply(), the former is too generic.

Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index d055bb0..174346a 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -54,7 +54,7 @@ static atomic_t trapped;
 	 MAX_UDP_CHUNK)
 
 static void zap_completion_queue(void);
-static void arp_reply(struct sk_buff *skb);
+static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo);
 
 static unsigned int carrier_timeout = 4;
 module_param(carrier_timeout, uint, 0644);
@@ -170,7 +170,8 @@ static void poll_napi(struct net_device *dev)
 	list_for_each_entry(napi, &dev->napi_list, dev_list) {
 		if (napi->poll_owner != smp_processor_id() &&
 		    spin_trylock(&napi->poll_lock)) {
-			budget = poll_one_napi(dev->npinfo, napi, budget);
+			budget = poll_one_napi(rcu_dereference_bh(dev->npinfo),
+					       napi, budget);
 			spin_unlock(&napi->poll_lock);
 
 			if (!budget)
@@ -185,13 +186,14 @@ static void service_arp_queue(struct netpoll_info *npi)
 		struct sk_buff *skb;
 
 		while ((skb = skb_dequeue(&npi->arp_tx)))
-			arp_reply(skb);
+			netpoll_arp_reply(skb, npi);
 	}
 }
 
 static void netpoll_poll_dev(struct net_device *dev)
 {
 	const struct net_device_ops *ops;
+	struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
 
 	if (!dev || !netif_running(dev))
 		return;
@@ -206,17 +208,18 @@ static void netpoll_poll_dev(struct net_device *dev)
 	poll_napi(dev);
 
 	if (dev->flags & IFF_SLAVE) {
-		if (dev->npinfo) {
+		if (ni) {
 			struct net_device *bond_dev = dev->master;
 			struct sk_buff *skb;
-			while ((skb = skb_dequeue(&dev->npinfo->arp_tx))) {
+			struct netpoll_info *bond_ni = rcu_dereference_bh(bond_dev->npinfo);
+			while ((skb = skb_dequeue(&ni->arp_tx))) {
 				skb->dev = bond_dev;
-				skb_queue_tail(&bond_dev->npinfo->arp_tx, skb);
+				skb_queue_tail(&bond_ni->arp_tx, skb);
 			}
 		}
 	}
 
-	service_arp_queue(dev->npinfo);
+	service_arp_queue(ni);
 
 	zap_completion_queue();
 }
@@ -302,6 +305,7 @@ static int netpoll_owner_active(struct net_device *dev)
 	return 0;
 }
 
+/* call with IRQ disabled */
 void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 			     struct net_device *dev)
 {
@@ -309,8 +313,11 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 	unsigned long tries;
 	const struct net_device_ops *ops = dev->netdev_ops;
 	/* It is up to the caller to keep npinfo alive. */
-	struct netpoll_info *npinfo = np->dev->npinfo;
+	struct netpoll_info *npinfo;
+
+	WARN_ON_ONCE(!irqs_disabled());
 
+	npinfo = rcu_dereference_bh(np->dev->npinfo);
 	if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
 		__kfree_skb(skb);
 		return;
@@ -319,11 +326,9 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 	/* don't get messages out of order, and no recursion */
 	if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
 		struct netdev_queue *txq;
-		unsigned long flags;
 
 		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
 
-		local_irq_save(flags);
 		/* try until next clock tick */
 		for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
 		     tries > 0; --tries) {
@@ -347,10 +352,9 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 		}
 
 		WARN_ONCE(!irqs_disabled(),
-			"netpoll_send_skb(): %s enabled interrupts in poll (%pF)\n",
+			"netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pF)\n",
 			dev->name, ops->ndo_start_xmit);
 
-		local_irq_restore(flags);
 	}
 
 	if (status != NETDEV_TX_OK) {
@@ -423,9 +427,8 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 }
 EXPORT_SYMBOL(netpoll_send_udp);
 
-static void arp_reply(struct sk_buff *skb)
+static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
 {
-	struct netpoll_info *npinfo = skb->dev->npinfo;
 	struct arphdr *arp;
 	unsigned char *arp_ptr;
 	int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
-- 
cgit v1.1


From d30362c0712eb567334b3b66de7c40d4372f2c6f Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:43 +0000
Subject: bridge: add some comments for NETDEV_RELEASE

Add comments on why we don't notify NETDEV_RELEASE.

Cc: David Miller <davem@davemloft.net>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_if.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 171fd6b..1c8fdc3 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -427,6 +427,10 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
 	if (!p || p->br != br)
 		return -EINVAL;
 
+	/* Since more than one interface can be attached to a bridge,
+	 * there still maybe an alternate path for netconsole to use;
+	 * therefore there is no reason for a NETDEV_RELEASE event.
+	 */
 	del_nbp(p);
 
 	spin_lock_bh(&br->lock);
-- 
cgit v1.1


From 4e3828c4bfd90b00a951cad7c8da27d1966beefe Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:44 +0000
Subject: bridge: use list_for_each_entry() in netpoll functions

We don't delete 'p' from the list in the loop,
so we can just use list_for_each_entry().

Cc: David Miller <davem@davemloft.net>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index f41ba40..32211fa 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -206,21 +206,20 @@ static void br_poll_controller(struct net_device *br_dev)
 static void br_netpoll_cleanup(struct net_device *dev)
 {
 	struct net_bridge *br = netdev_priv(dev);
-	struct net_bridge_port *p, *n;
+	struct net_bridge_port *p;
 
-	list_for_each_entry_safe(p, n, &br->port_list, list) {
+	list_for_each_entry(p, &br->port_list, list)
 		br_netpoll_disable(p);
-	}
 }
 
 static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni,
 			    gfp_t gfp)
 {
 	struct net_bridge *br = netdev_priv(dev);
-	struct net_bridge_port *p, *n;
+	struct net_bridge_port *p;
 	int err = 0;
 
-	list_for_each_entry_safe(p, n, &br->port_list, list) {
+	list_for_each_entry(p, &br->port_list, list) {
 		if (!p->dev)
 			continue;
 		err = br_netpoll_enable(p, gfp);
-- 
cgit v1.1


From e15c3c2294605f09f9b336b2f3b97086ab4b8145 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:45 +0000
Subject: netpoll: check netpoll tx status on the right device

Although this doesn't matter actually, because netpoll_tx_running()
doesn't use the parameter, the code will be more readable.

For team_dev_queue_xmit() we have to move it down to avoid
compile errors.

Cc: David Miller <davem@davemloft.net>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_forward.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index e9466d4..02015a5 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -65,7 +65,7 @@ static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
 {
 	skb->dev = to->dev;
 
-	if (unlikely(netpoll_tx_running(to->dev))) {
+	if (unlikely(netpoll_tx_running(to->br->dev))) {
 		if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb))
 			kfree_skb(skb);
 		else {
-- 
cgit v1.1


From f3da38932b46d1bf171634cc7aae3da405eaf7a6 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:47 +0000
Subject: vlan: clean up some variable names

To be consistent, s/info/vlan/.

Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Patrick McHardy <kaber@trash.net>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index b65623f..0347d48 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -672,8 +672,8 @@ static void vlan_dev_poll_controller(struct net_device *dev)
 static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo,
 				  gfp_t gfp)
 {
-	struct vlan_dev_priv *info = vlan_dev_priv(dev);
-	struct net_device *real_dev = info->real_dev;
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
+	struct net_device *real_dev = vlan->real_dev;
 	struct netpoll *netpoll;
 	int err = 0;
 
@@ -688,7 +688,7 @@ static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *n
 		goto out;
 	}
 
-	info->netpoll = netpoll;
+	vlan->netpoll = netpoll;
 
 out:
 	return err;
@@ -696,13 +696,13 @@ out:
 
 static void vlan_dev_netpoll_cleanup(struct net_device *dev)
 {
-	struct vlan_dev_priv *info = vlan_dev_priv(dev);
-	struct netpoll *netpoll = info->netpoll;
+	struct vlan_dev_priv *vlan= vlan_dev_priv(dev);
+	struct netpoll *netpoll = vlan->netpoll;
 
 	if (!netpoll)
 		return;
 
-	info->netpoll = NULL;
+	vlan->netpoll = NULL;
 
 	__netpoll_free_rcu(netpoll);
 }
-- 
cgit v1.1


From 6eacf8ad8d01c49b95b994b0bf379db2b5b29460 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:48 +0000
Subject: vlan: clean up vlan_dev_hard_start_xmit()

Clean up vlan_dev_hard_start_xmit() function.

Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Patrick McHardy <kaber@trash.net>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 0347d48..4024424 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -137,9 +137,21 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 	return rc;
 }
 
+static inline netdev_tx_t vlan_netpoll_send_skb(struct vlan_dev_priv *vlan, struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	if (vlan->netpoll)
+		netpoll_send_skb(vlan->netpoll, skb);
+#else
+	BUG();
+#endif
+	return NETDEV_TX_OK;
+}
+
 static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 					    struct net_device *dev)
 {
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
 	unsigned int len;
 	int ret;
@@ -150,29 +162,30 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 	 * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...
 	 */
 	if (veth->h_vlan_proto != htons(ETH_P_8021Q) ||
-	    vlan_dev_priv(dev)->flags & VLAN_FLAG_REORDER_HDR) {
+	    vlan->flags & VLAN_FLAG_REORDER_HDR) {
 		u16 vlan_tci;
-		vlan_tci = vlan_dev_priv(dev)->vlan_id;
+		vlan_tci = vlan->vlan_id;
 		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
 		skb = __vlan_hwaccel_put_tag(skb, vlan_tci);
 	}
 
-	skb->dev = vlan_dev_priv(dev)->real_dev;
+	skb->dev = vlan->real_dev;
 	len = skb->len;
-	if (netpoll_tx_running(dev))
-		return skb->dev->netdev_ops->ndo_start_xmit(skb, skb->dev);
+	if (unlikely(netpoll_tx_running(dev)))
+		return vlan_netpoll_send_skb(vlan, skb);
+
 	ret = dev_queue_xmit(skb);
 
 	if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
 		struct vlan_pcpu_stats *stats;
 
-		stats = this_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats);
+		stats = this_cpu_ptr(vlan->vlan_pcpu_stats);
 		u64_stats_update_begin(&stats->syncp);
 		stats->tx_packets++;
 		stats->tx_bytes += len;
 		u64_stats_update_end(&stats->syncp);
 	} else {
-		this_cpu_inc(vlan_dev_priv(dev)->vlan_pcpu_stats->tx_dropped);
+		this_cpu_inc(vlan->vlan_pcpu_stats->tx_dropped);
 	}
 
 	return ret;
-- 
cgit v1.1


From 689971b44613883ee74ae9c1b31a864aaa3a8e17 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:49 +0000
Subject: netpoll: handle vlan tags in netpoll tx and rx path

Without this patch, I can't get netconsole logs remotely over
vlan. The reason is probably we don't handle vlan tags in either
netpoll tx or rx path.

I am not sure if I use these vlan functions correctly, at
least this patch works.

Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Patrick McHardy <kaber@trash.net>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'net')

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 174346a..e4ba3e7 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -26,6 +26,7 @@
 #include <linux/workqueue.h>
 #include <linux/slab.h>
 #include <linux/export.h>
+#include <linux/if_vlan.h>
 #include <net/tcp.h>
 #include <net/udp.h>
 #include <asm/unaligned.h>
@@ -334,6 +335,14 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 		     tries > 0; --tries) {
 			if (__netif_tx_trylock(txq)) {
 				if (!netif_xmit_stopped(txq)) {
+					if (vlan_tx_tag_present(skb) &&
+					    !(netif_skb_features(skb) & NETIF_F_HW_VLAN_TX)) {
+						skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+						if (unlikely(!skb))
+							break;
+						skb->vlan_tci = 0;
+					}
+
 					status = ops->ndo_start_xmit(skb, dev);
 					if (status == NETDEV_TX_OK)
 						txq_trans_update(txq);
@@ -567,6 +576,12 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
 		return 1;
 	}
 
+	if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
+		skb = vlan_untag(skb);
+		if (unlikely(!skb))
+			goto out;
+	}
+
 	proto = ntohs(eth_hdr(skb)->h_proto);
 	if (proto != ETH_P_IP)
 		goto out;
-- 
cgit v1.1


From 6bdb7fe31046ac50b47e83c35cd6c6b6160a475d Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:50 +0000
Subject: netpoll: re-enable irq in poll_napi()

napi->poll() needs IRQ enabled, so we have to re-enable IRQ before
calling it.

Cc: David Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index e4ba3e7..346b1eb 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -168,16 +168,24 @@ static void poll_napi(struct net_device *dev)
 	struct napi_struct *napi;
 	int budget = 16;
 
+	WARN_ON_ONCE(!irqs_disabled());
+
 	list_for_each_entry(napi, &dev->napi_list, dev_list) {
+		local_irq_enable();
 		if (napi->poll_owner != smp_processor_id() &&
 		    spin_trylock(&napi->poll_lock)) {
+			rcu_read_lock_bh();
 			budget = poll_one_napi(rcu_dereference_bh(dev->npinfo),
 					       napi, budget);
+			rcu_read_unlock_bh();
 			spin_unlock(&napi->poll_lock);
 
-			if (!budget)
+			if (!budget) {
+				local_irq_disable();
 				break;
+			}
 		}
+		local_irq_disable();
 	}
 }
 
-- 
cgit v1.1


From 7bd86cc282a458b66c41e3f6676de6656c99b8db Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Sun, 12 Aug 2012 20:09:59 +0000
Subject: ipv4: Cache local output routes

Commit caacf05e5ad1abf causes big drop of UDP loop back performance.
The cause of the regression is that we do not cache the local output
routes. Each time we send a datagram from unconnected UDP socket,
the kernel allocates a dst_entry and adds it to the rt_uncached_list.
It creates lock contention on the rt_uncached_lock.

Reported-by: Alex Shi <alex.shi@intel.com>
Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index e4ba974..fd9ecb5 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2028,7 +2028,6 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
 		}
 		dev_out = net->loopback_dev;
 		fl4->flowi4_oif = dev_out->ifindex;
-		res.fi = NULL;
 		flags |= RTCF_LOCAL;
 		goto make_route;
 	}
-- 
cgit v1.1


From 4855d6f3116e891b66198838b683dce3dcf6e874 Mon Sep 17 00:00:00 2001
From: Igor Maravic <igorm@etf.rs>
Date: Sun, 12 Aug 2012 22:31:58 +0000
Subject: net: ipv6: proc: Fix error handling

Fix error handling in case making of dir dev_snmp6 failes

Signed-off-by: Igor Maravic <igorm@etf.rs>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/proc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index da2e92d..745a320 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -307,10 +307,10 @@ static int __net_init ipv6_proc_init_net(struct net *net)
 		goto proc_dev_snmp6_fail;
 	return 0;
 
+proc_dev_snmp6_fail:
+	proc_net_remove(net, "snmp6");
 proc_snmp6_fail:
 	proc_net_remove(net, "sockstat6");
-proc_dev_snmp6_fail:
-	proc_net_remove(net, "dev_snmp6");
 	return -ENOMEM;
 }
 
-- 
cgit v1.1


From 6024935f5ff5f1646bce8404416318e5fd4a0c4a Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Mon, 13 Aug 2012 02:49:59 +0000
Subject: llc2: Fix silent failure of llc_station_init()

llc_station_init() creates and processes an event skb with no effect
other than to change the state from DOWN to UP.  Allocation failure is
reported, but then ignored by its caller, llc2_init().  Remove this
possibility by simply initialising the state as UP.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/llc_station.c | 19 ++-----------------
 1 file changed, 2 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c
index 6828e39..45ddbb9 100644
--- a/net/llc/llc_station.c
+++ b/net/llc/llc_station.c
@@ -687,12 +687,8 @@ static void llc_station_rcv(struct sk_buff *skb)
 	llc_station_state_process(skb);
 }
 
-int __init llc_station_init(void)
+void __init llc_station_init(void)
 {
-	int rc = -ENOBUFS;
-	struct sk_buff *skb;
-	struct llc_station_state_ev *ev;
-
 	skb_queue_head_init(&llc_main_station.mac_pdu_q);
 	skb_queue_head_init(&llc_main_station.ev_q.list);
 	spin_lock_init(&llc_main_station.ev_q.lock);
@@ -700,20 +696,9 @@ int __init llc_station_init(void)
 			(unsigned long)&llc_main_station);
 	llc_main_station.ack_timer.expires  = jiffies +
 						sysctl_llc_station_ack_timeout;
-	skb = alloc_skb(0, GFP_ATOMIC);
-	if (!skb)
-		goto out;
-	rc = 0;
 	llc_set_station_handler(llc_station_rcv);
-	ev = llc_station_ev(skb);
-	memset(ev, 0, sizeof(*ev));
 	llc_main_station.maximum_retry	= 1;
-	llc_main_station.state		= LLC_STATION_STATE_DOWN;
-	ev->type	= LLC_STATION_EV_TYPE_SIMPLE;
-	ev->prim_type	= LLC_STATION_EV_ENABLE_WITHOUT_DUP_ADDR_CHECK;
-	rc = llc_station_next_state(skb);
-out:
-	return rc;
+	llc_main_station.state		= LLC_STATION_STATE_UP;
 }
 
 void __exit llc_station_exit(void)
-- 
cgit v1.1


From f4f8720febf0d785a054fc09bde5e3ad09728a58 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Mon, 13 Aug 2012 02:50:43 +0000
Subject: llc2: Call llc_station_exit() on llc2_init() failure path

Otherwise the station packet handler will remain registered even though
the module is unloaded.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/af_llc.c      | 5 +++--
 net/llc/llc_station.c | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index f6fe4d4..8c2919c 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -1206,7 +1206,7 @@ static int __init llc2_init(void)
 	rc = llc_proc_init();
 	if (rc != 0) {
 		printk(llc_proc_err_msg);
-		goto out_unregister_llc_proto;
+		goto out_station;
 	}
 	rc = llc_sysctl_init();
 	if (rc) {
@@ -1226,7 +1226,8 @@ out_sysctl:
 	llc_sysctl_exit();
 out_proc:
 	llc_proc_exit();
-out_unregister_llc_proto:
+out_station:
+	llc_station_exit();
 	proto_unregister(&llc_proto);
 	goto out;
 }
diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c
index 45ddbb9..bba5184 100644
--- a/net/llc/llc_station.c
+++ b/net/llc/llc_station.c
@@ -701,7 +701,7 @@ void __init llc_station_init(void)
 	llc_main_station.state		= LLC_STATION_STATE_UP;
 }
 
-void __exit llc_station_exit(void)
+void llc_station_exit(void)
 {
 	llc_set_station_handler(NULL);
 }
-- 
cgit v1.1


From aadf31de16a7b2878af00a02e6557df84efa784b Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Mon, 13 Aug 2012 02:50:55 +0000
Subject: llc: Fix races between llc2 handler use and (un)registration

When registering the handlers, any state they rely on must be
completely initialised first.  When unregistering, we must wait until
they are definitely no longer running.  llc_rcv() must also avoid
reading the handler pointers again after checking for NULL.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/llc_input.c   | 21 +++++++++++++++++----
 net/llc/llc_station.c |  2 +-
 2 files changed, 18 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
index e32cab4..dd3e833 100644
--- a/net/llc/llc_input.c
+++ b/net/llc/llc_input.c
@@ -42,6 +42,7 @@ static void (*llc_type_handlers[2])(struct llc_sap *sap,
 void llc_add_pack(int type, void (*handler)(struct llc_sap *sap,
 					    struct sk_buff *skb))
 {
+	smp_wmb(); /* ensure initialisation is complete before it's called */
 	if (type == LLC_DEST_SAP || type == LLC_DEST_CONN)
 		llc_type_handlers[type - 1] = handler;
 }
@@ -50,11 +51,19 @@ void llc_remove_pack(int type)
 {
 	if (type == LLC_DEST_SAP || type == LLC_DEST_CONN)
 		llc_type_handlers[type - 1] = NULL;
+	synchronize_net();
 }
 
 void llc_set_station_handler(void (*handler)(struct sk_buff *skb))
 {
+	/* Ensure initialisation is complete before it's called */
+	if (handler)
+		smp_wmb();
+
 	llc_station_handler = handler;
+
+	if (!handler)
+		synchronize_net();
 }
 
 /**
@@ -150,6 +159,8 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev,
 	int dest;
 	int (*rcv)(struct sk_buff *, struct net_device *,
 		   struct packet_type *, struct net_device *);
+	void (*sta_handler)(struct sk_buff *skb);
+	void (*sap_handler)(struct llc_sap *sap, struct sk_buff *skb);
 
 	if (!net_eq(dev_net(dev), &init_net))
 		goto drop;
@@ -182,7 +193,8 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev,
 	 */
 	rcv = rcu_dereference(sap->rcv_func);
 	dest = llc_pdu_type(skb);
-	if (unlikely(!dest || !llc_type_handlers[dest - 1])) {
+	sap_handler = dest ? ACCESS_ONCE(llc_type_handlers[dest - 1]) : NULL;
+	if (unlikely(!sap_handler)) {
 		if (rcv)
 			rcv(skb, dev, pt, orig_dev);
 		else
@@ -193,7 +205,7 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev,
 			if (cskb)
 				rcv(cskb, dev, pt, orig_dev);
 		}
-		llc_type_handlers[dest - 1](sap, skb);
+		sap_handler(sap, skb);
 	}
 	llc_sap_put(sap);
 out:
@@ -202,9 +214,10 @@ drop:
 	kfree_skb(skb);
 	goto out;
 handle_station:
-	if (!llc_station_handler)
+	sta_handler = ACCESS_ONCE(llc_station_handler);
+	if (!sta_handler)
 		goto drop;
-	llc_station_handler(skb);
+	sta_handler(skb);
 	goto out;
 }
 
diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c
index bba5184..b2f2bac 100644
--- a/net/llc/llc_station.c
+++ b/net/llc/llc_station.c
@@ -696,9 +696,9 @@ void __init llc_station_init(void)
 			(unsigned long)&llc_main_station);
 	llc_main_station.ack_timer.expires  = jiffies +
 						sysctl_llc_station_ack_timeout;
-	llc_set_station_handler(llc_station_rcv);
 	llc_main_station.maximum_retry	= 1;
 	llc_main_station.state		= LLC_STATION_STATE_UP;
+	llc_set_station_handler(llc_station_rcv);
 }
 
 void llc_station_exit(void)
-- 
cgit v1.1


From 4acd4945cd1e1f92b20d14e349c6c6a52acbd42d Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Tue, 14 Aug 2012 08:54:51 +0000
Subject: ipv6: addrconf: Avoid calling netdevice notifiers with RCU read-side
 lock

Cong Wang reports that lockdep detected suspicious RCU usage while
enabling IPV6 forwarding:

 [ 1123.310275] ===============================
 [ 1123.442202] [ INFO: suspicious RCU usage. ]
 [ 1123.558207] 3.6.0-rc1+ #109 Not tainted
 [ 1123.665204] -------------------------------
 [ 1123.768254] include/linux/rcupdate.h:430 Illegal context switch in RCU read-side critical section!
 [ 1123.992320]
 [ 1123.992320] other info that might help us debug this:
 [ 1123.992320]
 [ 1124.307382]
 [ 1124.307382] rcu_scheduler_active = 1, debug_locks = 0
 [ 1124.522220] 2 locks held by sysctl/5710:
 [ 1124.648364]  #0:  (rtnl_mutex){+.+.+.}, at: [<ffffffff81768498>] rtnl_trylock+0x15/0x17
 [ 1124.882211]  #1:  (rcu_read_lock){.+.+.+}, at: [<ffffffff81871df8>] rcu_lock_acquire+0x0/0x29
 [ 1125.085209]
 [ 1125.085209] stack backtrace:
 [ 1125.332213] Pid: 5710, comm: sysctl Not tainted 3.6.0-rc1+ #109
 [ 1125.441291] Call Trace:
 [ 1125.545281]  [<ffffffff8109d915>] lockdep_rcu_suspicious+0x109/0x112
 [ 1125.667212]  [<ffffffff8107c240>] rcu_preempt_sleep_check+0x45/0x47
 [ 1125.781838]  [<ffffffff8107c260>] __might_sleep+0x1e/0x19b
[...]
 [ 1127.445223]  [<ffffffff81757ac5>] call_netdevice_notifiers+0x4a/0x4f
[...]
 [ 1127.772188]  [<ffffffff8175e125>] dev_disable_lro+0x32/0x6b
 [ 1127.885174]  [<ffffffff81872d26>] dev_forward_change+0x30/0xcb
 [ 1128.013214]  [<ffffffff818738c4>] addrconf_forward_change+0x85/0xc5
[...]

addrconf_forward_change() uses RCU iteration over the netdev list,
which is unnecessary since it already holds the RTNL lock.  We also
cannot reasonably require netdevice notifier functions not to sleep.

Reported-by: Cong Wang <amwang@redhat.com>
Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 7918181..6bc85f7 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -494,8 +494,7 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
 	struct net_device *dev;
 	struct inet6_dev *idev;
 
-	rcu_read_lock();
-	for_each_netdev_rcu(net, dev) {
+	for_each_netdev(net, dev) {
 		idev = __in6_dev_get(dev);
 		if (idev) {
 			int changed = (!idev->cnf.forwarding) ^ (!newf);
@@ -504,7 +503,6 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
 				dev_forward_change(idev);
 		}
 	}
-	rcu_read_unlock();
 }
 
 static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
-- 
cgit v1.1


From c03307eab68d583ea6db917681afa14ed1fb3b84 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Tue, 14 Aug 2012 08:19:33 -0700
Subject: bridge: fix rcu dereference outside of rcu_read_lock

Alternative solution for problem found by Linux Driver Verification
project (linuxtesting.org).

As it noted in the comment before the br_handle_frame_finish
function, this function should be called under rcu_read_lock.

The problem callgraph:
br_dev_xmit -> br_nf_pre_routing_finish_bridge_slow ->
 -> br_handle_frame_finish -> br_port_get_rcu -> rcu_dereference

And in this case there is no read-lock section.

Reported-by: Denis Efremov <yefremov.denis@gmail.com>
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 32211fa..070e8a6 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -31,9 +31,11 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct net_bridge_mdb_entry *mdst;
 	struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
 
+	rcu_read_lock();
 #ifdef CONFIG_BRIDGE_NETFILTER
 	if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) {
 		br_nf_pre_routing_finish_bridge_slow(skb);
+		rcu_read_unlock();
 		return NETDEV_TX_OK;
 	}
 #endif
@@ -48,7 +50,6 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb_reset_mac_header(skb);
 	skb_pull(skb, ETH_HLEN);
 
-	rcu_read_lock();
 	if (is_broadcast_ether_addr(dest))
 		br_flood_deliver(br, skb);
 	else if (is_multicast_ether_addr(dest)) {
-- 
cgit v1.1


From e862f1a9b7df4e8196ebec45ac62295138aa3fc2 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Wed, 15 Aug 2012 11:31:44 +0000
Subject: atm: fix info leak in getsockopt(SO_ATMPVC)

The ATM code fails to initialize the two padding bytes of struct
sockaddr_atmpvc inserted for alignment. Add an explicit memset(0)
before filling the structure to avoid the info leak.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/common.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/atm/common.c b/net/atm/common.c
index b4b44db..0c0ad93 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -812,6 +812,7 @@ int vcc_getsockopt(struct socket *sock, int level, int optname,
 
 		if (!vcc->dev || !test_bit(ATM_VF_ADDR, &vcc->flags))
 			return -ENOTCONN;
+		memset(&pvc, 0, sizeof(pvc));
 		pvc.sap_family = AF_ATMPVC;
 		pvc.sap_addr.itf = vcc->dev->number;
 		pvc.sap_addr.vpi = vcc->vpi;
-- 
cgit v1.1


From 3c0c5cfdcd4d69ffc4b9c0907cec99039f30a50a Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Wed, 15 Aug 2012 11:31:45 +0000
Subject: atm: fix info leak via getsockname()

The ATM code fails to initialize the two padding bytes of struct
sockaddr_atmpvc inserted for alignment. Add an explicit memset(0)
before filling the structure to avoid the info leak.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/pvc.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index 3a73491..ae03240 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -95,6 +95,7 @@ static int pvc_getname(struct socket *sock, struct sockaddr *sockaddr,
 		return -ENOTCONN;
 	*sockaddr_len = sizeof(struct sockaddr_atmpvc);
 	addr = (struct sockaddr_atmpvc *)sockaddr;
+	memset(addr, 0, sizeof(*addr));
 	addr->sap_family = AF_ATMPVC;
 	addr->sap_addr.itf = vcc->dev->number;
 	addr->sap_addr.vpi = vcc->vpi;
-- 
cgit v1.1


From e15ca9a0ef9a86f0477530b0f44a725d67f889ee Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Wed, 15 Aug 2012 11:31:46 +0000
Subject: Bluetooth: HCI - Fix info leak in getsockopt(HCI_FILTER)

The HCI code fails to initialize the two padding bytes of struct
hci_ufilter before copying it to userland -- that for leaking two
bytes kernel stack. Add an explicit memset(0) before filling the
structure to avoid the info leak.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Marcel Holtmann <marcel@holtmann.org>
Cc: Gustavo Padovan <gustavo@padovan.org>
Cc: Johan Hedberg <johan.hedberg@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/hci_sock.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index a7f04de..a27bbc3 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -1009,6 +1009,7 @@ static int hci_sock_getsockopt(struct socket *sock, int level, int optname,
 		{
 			struct hci_filter *f = &hci_pi(sk)->filter;
 
+			memset(&uf, 0, sizeof(uf));
 			uf.type_mask = f->type_mask;
 			uf.opcode    = f->opcode;
 			uf.event_mask[0] = *((u32 *) f->event_mask + 0);
-- 
cgit v1.1


From 3f68ba07b1da811bf383b4b701b129bfcb2e4988 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Wed, 15 Aug 2012 11:31:47 +0000
Subject: Bluetooth: HCI - Fix info leak via getsockname()

The HCI code fails to initialize the hci_channel member of struct
sockaddr_hci and that for leaks two bytes kernel stack via the
getsockname() syscall. Initialize hci_channel with 0 to avoid the
info leak.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Marcel Holtmann <marcel@holtmann.org>
Cc: Gustavo Padovan <gustavo@padovan.org>
Cc: Johan Hedberg <johan.hedberg@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/hci_sock.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index a27bbc3..19fdac7 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -694,6 +694,7 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr,
 	*addr_len = sizeof(*haddr);
 	haddr->hci_family = AF_BLUETOOTH;
 	haddr->hci_dev    = hdev->id;
+	haddr->hci_channel= 0;
 
 	release_sock(sk);
 	return 0;
-- 
cgit v1.1


From 9ad2de43f1aee7e7274a4e0d41465489299e344b Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Wed, 15 Aug 2012 11:31:48 +0000
Subject: Bluetooth: RFCOMM - Fix info leak in getsockopt(BT_SECURITY)

The RFCOMM code fails to initialize the key_size member of struct
bt_security before copying it to userland -- that for leaking one
byte kernel stack. Initialize key_size with 0 to avoid the info
leak.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Marcel Holtmann <marcel@holtmann.org>
Cc: Gustavo Padovan <gustavo@padovan.org>
Cc: Johan Hedberg <johan.hedberg@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/rfcomm/sock.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 7e1e596..64f55ca 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -822,6 +822,7 @@ static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, c
 		}
 
 		sec.level = rfcomm_pi(sk)->sec_level;
+		sec.key_size = 0;
 
 		len = min_t(unsigned int, len, sizeof(sec));
 		if (copy_to_user(optval, (char *) &sec, len))
-- 
cgit v1.1


From f9432c5ec8b1e9a09b9b0e5569e3c73db8de432a Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Wed, 15 Aug 2012 11:31:49 +0000
Subject: Bluetooth: RFCOMM - Fix info leak in ioctl(RFCOMMGETDEVLIST)

The RFCOMM code fails to initialize the two padding bytes of struct
rfcomm_dev_list_req inserted for alignment before copying it to
userland. Additionally there are two padding bytes in each instance of
struct rfcomm_dev_info. The ioctl() that for disclosures two bytes plus
dev_num times two bytes uninitialized kernel heap memory.

Allocate the memory using kzalloc() to fix this issue.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Marcel Holtmann <marcel@holtmann.org>
Cc: Gustavo Padovan <gustavo@padovan.org>
Cc: Johan Hedberg <johan.hedberg@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/rfcomm/tty.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index cb96077..56f1823 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -456,7 +456,7 @@ static int rfcomm_get_dev_list(void __user *arg)
 
 	size = sizeof(*dl) + dev_num * sizeof(*di);
 
-	dl = kmalloc(size, GFP_KERNEL);
+	dl = kzalloc(size, GFP_KERNEL);
 	if (!dl)
 		return -ENOMEM;
 
-- 
cgit v1.1


From 9344a972961d1a6d2c04d9008b13617bcb6ec2ef Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Wed, 15 Aug 2012 11:31:50 +0000
Subject: Bluetooth: RFCOMM - Fix info leak via getsockname()

The RFCOMM code fails to initialize the trailing padding byte of struct
sockaddr_rc added for alignment. It that for leaks one byte kernel stack
via the getsockname() syscall. Add an explicit memset(0) before filling
the structure to avoid the info leak.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Marcel Holtmann <marcel@holtmann.org>
Cc: Gustavo Padovan <gustavo@padovan.org>
Cc: Johan Hedberg <johan.hedberg@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/rfcomm/sock.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 64f55ca..1a17850 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -528,6 +528,7 @@ static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int *
 
 	BT_DBG("sock %p, sk %p", sock, sk);
 
+	memset(sa, 0, sizeof(*sa));
 	sa->rc_family  = AF_BLUETOOTH;
 	sa->rc_channel = rfcomm_pi(sk)->channel;
 	if (peer)
-- 
cgit v1.1


From 792039c73cf176c8e39a6e8beef2c94ff46522ed Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Wed, 15 Aug 2012 11:31:51 +0000
Subject: Bluetooth: L2CAP - Fix info leak via getsockname()

The L2CAP code fails to initialize the l2_bdaddr_type member of struct
sockaddr_l2 and the padding byte added for alignment. It that for leaks
two bytes kernel stack via the getsockname() syscall. Add an explicit
memset(0) before filling the structure to avoid the info leak.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Marcel Holtmann <marcel@holtmann.org>
Cc: Gustavo Padovan <gustavo@padovan.org>
Cc: Johan Hedberg <johan.hedberg@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/l2cap_sock.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index b94abd3..1497edd 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -245,6 +245,7 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *l
 
 	BT_DBG("sock %p, sk %p", sock, sk);
 
+	memset(la, 0, sizeof(struct sockaddr_l2));
 	addr->sa_family = AF_BLUETOOTH;
 	*len = sizeof(struct sockaddr_l2);
 
-- 
cgit v1.1


From 04d4fbca1017c11381e7d82acea21dd741e748bc Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Wed, 15 Aug 2012 11:31:52 +0000
Subject: l2tp: fix info leak via getsockname()

The L2TP code for IPv6 fails to initialize the l2tp_unused member of
struct sockaddr_l2tpip6 and that for leaks two bytes kernel stack via
the getsockname() syscall. Initialize l2tp_unused with 0 to avoid the
info leak.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ip6.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 35e1e4b..9275471 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -410,6 +410,7 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr,
 	lsa->l2tp_family = AF_INET6;
 	lsa->l2tp_flowinfo = 0;
 	lsa->l2tp_scope_id = 0;
+	lsa->l2tp_unused = 0;
 	if (peer) {
 		if (!lsk->peer_conn_id)
 			return -ENOTCONN;
-- 
cgit v1.1


From 3592aaeb80290bda0f2cf0b5456c97bfc638b192 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Wed, 15 Aug 2012 11:31:53 +0000
Subject: llc: fix info leak via getsockname()

The LLC code wrongly returns 0, i.e. "success", when the socket is
zapped. Together with the uninitialized uaddrlen pointer argument from
sys_getsockname this leads to an arbitrary memory leak of up to 128
bytes kernel stack via the getsockname() syscall.

Return an error instead when the socket is zapped to prevent the info
leak. Also remove the unnecessary memset(0). We don't directly write to
the memory pointed by uaddr but memcpy() a local structure at the end of
the function that is properly initialized.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/af_llc.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 8c2919c..c219000 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -969,14 +969,13 @@ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr,
 	struct sockaddr_llc sllc;
 	struct sock *sk = sock->sk;
 	struct llc_sock *llc = llc_sk(sk);
-	int rc = 0;
+	int rc = -EBADF;
 
 	memset(&sllc, 0, sizeof(sllc));
 	lock_sock(sk);
 	if (sock_flag(sk, SOCK_ZAPPED))
 		goto out;
 	*uaddrlen = sizeof(sllc);
-	memset(uaddr, 0, *uaddrlen);
 	if (peer) {
 		rc = -ENOTCONN;
 		if (sk->sk_state != TCP_ESTABLISHED)
-- 
cgit v1.1


From 276bdb82dedb290511467a5a4fdbe9f0b52dce6f Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Wed, 15 Aug 2012 11:31:54 +0000
Subject: dccp: check ccid before dereferencing

ccid_hc_rx_getsockopt() and ccid_hc_tx_getsockopt() might be called with
a NULL ccid pointer leading to a NULL pointer dereference. This could
lead to a privilege escalation if the attacker is able to map page 0 and
prepare it with a fake ccid_ops pointer.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Cc: stable@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccid.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index 75c3582..fb85d37 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -246,7 +246,7 @@ static inline int ccid_hc_rx_getsockopt(struct ccid *ccid, struct sock *sk,
 					u32 __user *optval, int __user *optlen)
 {
 	int rc = -ENOPROTOOPT;
-	if (ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL)
+	if (ccid != NULL && ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL)
 		rc = ccid->ccid_ops->ccid_hc_rx_getsockopt(sk, optname, len,
 						 optval, optlen);
 	return rc;
@@ -257,7 +257,7 @@ static inline int ccid_hc_tx_getsockopt(struct ccid *ccid, struct sock *sk,
 					u32 __user *optval, int __user *optlen)
 {
 	int rc = -ENOPROTOOPT;
-	if (ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL)
+	if (ccid != NULL && ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL)
 		rc = ccid->ccid_ops->ccid_hc_tx_getsockopt(sk, optname, len,
 						 optval, optlen);
 	return rc;
-- 
cgit v1.1


From 7b07f8eb75aa3097cdfd4f6eac3da49db787381d Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Wed, 15 Aug 2012 11:31:55 +0000
Subject: dccp: fix info leak via getsockopt(DCCP_SOCKOPT_CCID_TX_INFO)

The CCID3 code fails to initialize the trailing padding bytes of struct
tfrc_tx_info added for alignment on 64 bit architectures. It that for
potentially leaks four bytes kernel stack via the getsockopt() syscall.
Add an explicit memset(0) before filling the structure to avoid the
info leak.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index d65e987..119c043 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -535,6 +535,7 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
 	case DCCP_SOCKOPT_CCID_TX_INFO:
 		if (len < sizeof(tfrc))
 			return -EINVAL;
+		memset(&tfrc, 0, sizeof(tfrc));
 		tfrc.tfrctx_x	   = hc->tx_x;
 		tfrc.tfrctx_x_recv = hc->tx_x_recv;
 		tfrc.tfrctx_x_calc = hc->tx_x_calc;
-- 
cgit v1.1


From 2d8a041b7bfe1097af21441cb77d6af95f4f4680 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Wed, 15 Aug 2012 11:31:56 +0000
Subject: ipvs: fix info leak in getsockopt(IP_VS_SO_GET_TIMEOUT)

If at least one of CONFIG_IP_VS_PROTO_TCP or CONFIG_IP_VS_PROTO_UDP is
not set, __ip_vs_get_timeouts() does not fully initialize the structure
that gets copied to userland and that for leaks up to 12 bytes of kernel
stack. Add an explicit memset(0) before passing the structure to
__ip_vs_get_timeouts() to avoid the info leak.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Wensong Zhang <wensong@linux-vs.org>
Cc: Simon Horman <horms@verge.net.au>
Cc: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/ipvs/ip_vs_ctl.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 84444dd..72bf32a 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2759,6 +2759,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 	{
 		struct ip_vs_timeout_user t;
 
+		memset(&t, 0, sizeof(t));
 		__ip_vs_get_timeouts(net, &t);
 		if (copy_to_user(user, &t, sizeof(t)) != 0)
 			ret = -EFAULT;
-- 
cgit v1.1


From 43da5f2e0d0c69ded3d51907d9552310a6b545e8 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Wed, 15 Aug 2012 11:31:57 +0000
Subject: net: fix info leak in compat dev_ifconf()

The implementation of dev_ifconf() for the compat ioctl interface uses
an intermediate ifc structure allocated in userland for the duration of
the syscall. Though, it fails to initialize the padding bytes inserted
for alignment and that for leaks four bytes of kernel stack. Add an
explicit memset(0) before filling the structure to avoid the info leak.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index dfe5b66..a5471f8 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2657,6 +2657,7 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
 	if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
 		return -EFAULT;
 
+	memset(&ifc, 0, sizeof(ifc));
 	if (ifc32.ifcbuf == 0) {
 		ifc32.ifc_len = 0;
 		ifc.ifc_len = 0;
-- 
cgit v1.1


From 2614f86490122bf51eb7c12ec73927f1900f4e7d Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 16 Aug 2012 02:25:24 +0200
Subject: netfilter: nf_ct_expect: fix possible access to uninitialized timer

In __nf_ct_expect_check, the function refresh_timer returns 1
if a matching expectation is found and its timer is successfully
refreshed. This results in nf_ct_expect_related returning 0.
Note that at this point:

- the passed expectation is not inserted in the expectation table
  and its timer was not initialized, since we have refreshed one
  matching/existing expectation.

- nf_ct_expect_alloc uses kmem_cache_alloc, so the expectation
  timer is in some undefined state just after the allocation,
  until it is appropriately initialized.

This can be a problem for the SIP helper during the expectation
addition:

 ...
 if (nf_ct_expect_related(rtp_exp) == 0) {
         if (nf_ct_expect_related(rtcp_exp) != 0)
                 nf_ct_unexpect_related(rtp_exp);
 ...

Note that nf_ct_expect_related(rtp_exp) may return 0 for the timer refresh
case that is detailed above. Then, if nf_ct_unexpect_related(rtcp_exp)
returns != 0, nf_ct_unexpect_related(rtp_exp) is called, which does:

 spin_lock_bh(&nf_conntrack_lock);
 if (del_timer(&exp->timeout)) {
         nf_ct_unlink_expect(exp);
         nf_ct_expect_put(exp);
 }
 spin_unlock_bh(&nf_conntrack_lock);

Note that del_timer always returns false if the timer has been
initialized.  However, the timer was not initialized since setup_timer
was not called, therefore, the expectation timer remains in some
undefined state. If I'm not missing anything, this may lead to the
removal an unexistent expectation.

To fix this, the optimization that allows refreshing an expectation
is removed. Now nf_conntrack_expect_related looks more consistent
to me since it always add the expectation in case that it returns
success.

Thanks to Patrick McHardy for participating in the discussion of
this patch.

I think this may be the source of the problem described by:
http://marc.info/?l=netfilter-devel&m=134073514719421&w=2

Reported-by: Rafal Fitt <rafalf@aplusc.com.pl>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_expect.c | 29 ++++++-----------------------
 1 file changed, 6 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 45cf602..527651a 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -361,23 +361,6 @@ static void evict_oldest_expect(struct nf_conn *master,
 	}
 }
 
-static inline int refresh_timer(struct nf_conntrack_expect *i)
-{
-	struct nf_conn_help *master_help = nfct_help(i->master);
-	const struct nf_conntrack_expect_policy *p;
-
-	if (!del_timer(&i->timeout))
-		return 0;
-
-	p = &rcu_dereference_protected(
-		master_help->helper,
-		lockdep_is_held(&nf_conntrack_lock)
-		)->expect_policy[i->class];
-	i->timeout.expires = jiffies + p->timeout * HZ;
-	add_timer(&i->timeout);
-	return 1;
-}
-
 static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
 {
 	const struct nf_conntrack_expect_policy *p;
@@ -386,7 +369,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
 	struct nf_conn_help *master_help = nfct_help(master);
 	struct nf_conntrack_helper *helper;
 	struct net *net = nf_ct_exp_net(expect);
-	struct hlist_node *n;
+	struct hlist_node *n, *next;
 	unsigned int h;
 	int ret = 1;
 
@@ -395,12 +378,12 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
 		goto out;
 	}
 	h = nf_ct_expect_dst_hash(&expect->tuple);
-	hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
+	hlist_for_each_entry_safe(i, n, next, &net->ct.expect_hash[h], hnode) {
 		if (expect_matches(i, expect)) {
-			/* Refresh timer: if it's dying, ignore.. */
-			if (refresh_timer(i)) {
-				ret = 0;
-				goto out;
+			if (del_timer(&i->timeout)) {
+				nf_ct_unlink_expect(i);
+				nf_ct_expect_put(i);
+				break;
 			}
 		} else if (expect_clash(i, expect)) {
 			ret = -EBUSY;
-- 
cgit v1.1


From 16c0b164bd24d44db137693a36b428ba28970c62 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Wed, 15 Aug 2012 20:44:27 +0000
Subject: act_mirred: do not drop packets when fails to mirror it

We drop packet unconditionally when we fail to mirror it. This is not intended
in some cases. Consdier for kvm guest, we may mirror the traffic of the bridge
to a tap device used by a VM. When kernel fails to mirror the packet in
conditions such as when qemu crashes or stop polling the tap, it's hard for the
management software to detect such condition and clean the the mirroring
before. This would lead all packets to the bridge to be dropped and break the
netowrk of other virtual machines.

To solve the issue, the patch does not drop packets when kernel fails to mirror
it, and only drop the redirected packets.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_mirred.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index fe81cc1..9c0fd0c 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -200,13 +200,12 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
 out:
 	if (err) {
 		m->tcf_qstats.overlimits++;
-		/* should we be asking for packet to be dropped?
-		 * may make sense for redirect case only
-		 */
-		retval = TC_ACT_SHOT;
-	} else {
+		if (m->tcfm_eaction != TCA_EGRESS_MIRROR)
+			retval = TC_ACT_SHOT;
+		else
+			retval = m->tcf_action;
+	} else
 		retval = m->tcf_action;
-	}
 	spin_unlock(&m->tcf_lock);
 
 	return retval;
-- 
cgit v1.1


From f796c20cf67aa54c9130d0dc41307c0025719b85 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Tue, 14 Aug 2012 12:34:24 +0000
Subject: net: netprio: fix files lock and remove useless d_path bits

Add lock to prevent a race with a file closing and also remove
useless and ugly sscanf code. The extra code was never needed
and the case it supposedly protected against is in fact handled
correctly by sock_from_file as pointed out by Al Viro.

CC: Neil Horman <nhorman@tuxdriver.com>
Reported-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netprio_cgroup.c | 22 ++++------------------
 1 file changed, 4 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index ed0c043..f65dba3 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -277,12 +277,6 @@ out_free_devname:
 void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
 {
 	struct task_struct *p;
-	char *tmp = kzalloc(sizeof(char) * PATH_MAX, GFP_KERNEL);
-
-	if (!tmp) {
-		pr_warn("Unable to attach cgrp due to alloc failure!\n");
-		return;
-	}
 
 	cgroup_taskset_for_each(p, cgrp, tset) {
 		unsigned int fd;
@@ -296,32 +290,24 @@ void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
 			continue;
 		}
 
-		rcu_read_lock();
+		spin_lock(&files->file_lock);
 		fdt = files_fdtable(files);
 		for (fd = 0; fd < fdt->max_fds; fd++) {
-			char *path;
 			struct file *file;
 			struct socket *sock;
-			unsigned long s;
-			int rv, err = 0;
+			int err;
 
 			file = fcheck_files(files, fd);
 			if (!file)
 				continue;
 
-			path = d_path(&file->f_path, tmp, PAGE_SIZE);
-			rv = sscanf(path, "socket:[%lu]", &s);
-			if (rv <= 0)
-				continue;
-
 			sock = sock_from_file(file, &err);
-			if (!err)
+			if (sock)
 				sock_update_netprioidx(sock->sk, p);
 		}
-		rcu_read_unlock();
+		spin_unlock(&files->file_lock);
 		task_unlock(p);
 	}
-	kfree(tmp);
 }
 
 static struct cftype ss_files[] = {
-- 
cgit v1.1


From 48a87cc26c13b68f6cce4e9d769fcb17a6b3e4b8 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Tue, 14 Aug 2012 12:34:30 +0000
Subject: net: netprio: fd passed in SCM_RIGHTS datagram not set correctly

A socket fd passed in a SCM_RIGHTS datagram was not getting
updated with the new tasks cgrp prioidx. This leaves IO on
the socket tagged with the old tasks priority.

To fix this add a check in the scm recvmsg path to update the
sock cgrp prioidx with the new tasks value.

Thanks to Al Viro for catching this.

CC: Neil Horman <nhorman@tuxdriver.com>
Reported-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/scm.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/core/scm.c b/net/core/scm.c
index 8f6ccfd..040cebe 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -265,6 +265,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
 	for (i=0, cmfptr=(__force int __user *)CMSG_DATA(cm); i<fdmax;
 	     i++, cmfptr++)
 	{
+		struct socket *sock;
 		int new_fd;
 		err = security_file_receive(fp[i]);
 		if (err)
@@ -281,6 +282,9 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
 		}
 		/* Bump the usage count and install the file. */
 		get_file(fp[i]);
+		sock = sock_from_file(fp[i], &err);
+		if (sock)
+			sock_update_netprioidx(sock->sk, current);
 		fd_install(new_fd, fp[i]);
 	}
 
-- 
cgit v1.1


From 476ad154f3b41dd7d9a08a2f641e28388abc2fd1 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Tue, 14 Aug 2012 12:34:35 +0000
Subject: net: netprio: fix cgrp create and write priomap race

A race exists where creating cgroups and also updating the priomap
may result in losing a priomap update. This is because priomap
writers are not protected by rtnl_lock.

Move priority writer into rtnl_lock()/rtnl_unlock().

CC: Neil Horman <nhorman@tuxdriver.com>
Reported-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netprio_cgroup.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index f65dba3..c75e3f9 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -101,12 +101,10 @@ static int write_update_netdev_table(struct net_device *dev)
 	u32 max_len;
 	struct netprio_map *map;
 
-	rtnl_lock();
 	max_len = atomic_read(&max_prioidx) + 1;
 	map = rtnl_dereference(dev->priomap);
 	if (!map || map->priomap_len < max_len)
 		ret = extend_netdev_table(dev, max_len);
-	rtnl_unlock();
 
 	return ret;
 }
@@ -256,17 +254,17 @@ static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
 	if (!dev)
 		goto out_free_devname;
 
+	rtnl_lock();
 	ret = write_update_netdev_table(dev);
 	if (ret < 0)
 		goto out_put_dev;
 
-	rcu_read_lock();
-	map = rcu_dereference(dev->priomap);
+	map = rtnl_dereference(dev->priomap);
 	if (map)
 		map->priomap[prioidx] = priority;
-	rcu_read_unlock();
 
 out_put_dev:
+	rtnl_unlock();
 	dev_put(dev);
 
 out_free_devname:
-- 
cgit v1.1


From c0de08d04215031d68fa13af36f347a6cfa252ca Mon Sep 17 00:00:00 2001
From: Eric Leblond <eric@regit.org>
Date: Thu, 16 Aug 2012 22:02:58 +0000
Subject: af_packet: don't emit packet on orig fanout group

If a packet is emitted on one socket in one group of fanout sockets,
it is transmitted again. It is thus read again on one of the sockets
of the fanout group. This result in a loop for software which
generate packets when receiving one.
This retransmission is not the intended behavior: a fanout group
must behave like a single socket. The packet should not be
transmitted on a socket if it originates from a socket belonging
to the same fanout group.

This patch fixes the issue by changing the transmission check to
take fanout group info account.

Reported-by: Aleksandr Kotov <a1k@mail.ru>
Signed-off-by: Eric Leblond <eric@regit.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c         | 16 ++++++++++++++--
 net/packet/af_packet.c |  9 +++++++++
 2 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index a39354e..debd937 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1642,6 +1642,19 @@ static inline int deliver_skb(struct sk_buff *skb,
 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 }
 
+static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
+{
+	if (ptype->af_packet_priv == NULL)
+		return false;
+
+	if (ptype->id_match)
+		return ptype->id_match(ptype, skb->sk);
+	else if ((struct sock *)ptype->af_packet_priv == skb->sk)
+		return true;
+
+	return false;
+}
+
 /*
  *	Support routine. Sends outgoing frames to any network
  *	taps currently in use.
@@ -1659,8 +1672,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 		 * they originated from - MvS (miquels@drinkel.ow.org)
 		 */
 		if ((ptype->dev == dev || !ptype->dev) &&
-		    (ptype->af_packet_priv == NULL ||
-		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
+		    (!skb_loop_sk(ptype, skb))) {
 			if (pt_prev) {
 				deliver_skb(skb2, pt_prev, skb->dev);
 				pt_prev = ptype;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8ac890a..aee7196 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1273,6 +1273,14 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po)
 	spin_unlock(&f->lock);
 }
 
+bool match_fanout_group(struct packet_type *ptype, struct sock * sk)
+{
+	if (ptype->af_packet_priv == (void*)((struct packet_sock *)sk)->fanout)
+		return true;
+
+	return false;
+}
+
 static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 {
 	struct packet_sock *po = pkt_sk(sk);
@@ -1325,6 +1333,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 		match->prot_hook.dev = po->prot_hook.dev;
 		match->prot_hook.func = packet_rcv_fanout;
 		match->prot_hook.af_packet_priv = match;
+		match->prot_hook.id_match = match_fanout_group;
 		dev_add_pack(&match->prot_hook);
 		list_add(&match->list, &fanout_list);
 	}
-- 
cgit v1.1


From d92c7f8aabae913de16eb855b19cd2002c341896 Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jj@chaosbits.net>
Date: Fri, 17 Aug 2012 10:33:12 +0000
Subject: caif: Do not dereference NULL in chnl_recv_cb()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In net/caif/chnl_net.c::chnl_recv_cb() we call skb_header_pointer()
which may return NULL, but we do not check for a NULL pointer before
dereferencing it.
This patch adds such a NULL check and properly free's allocated memory
and return an error (-EINVAL) on failure - much better than crashing..

Signed-off-by: Jesper Juhl <jj@chaosbits.net>
Acked-by: Sjur Brændeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/chnl_net.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 69771c0..e597733 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -94,6 +94,10 @@ static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt)
 
 	/* check the version of IP */
 	ip_version = skb_header_pointer(skb, 0, 1, &buf);
+	if (!ip_version) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
 
 	switch (*ip_version >> 4) {
 	case 4:
-- 
cgit v1.1


From 9d7b0fc1ef1f17aff57c0dc9a59453d8fca255c3 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 20 Aug 2012 02:56:56 -0700
Subject: net: ipv6: fix oops in inet_putpeer()

Commit 97bab73f (inet: Hide route peer accesses behind helpers.) introduced
a bug in xfrm6_policy_destroy(). The xfrm_dst's _rt6i_peer member is not
initialized, causing a false positive result from inetpeer_ptr_is_peer(),
which in turn causes a NULL pointer dereference in inet_putpeer().

Pid: 314, comm: kworker/0:1 Not tainted 3.6.0-rc1+ #17 To Be Filled By O.E.M. To Be Filled By O.E.M./P4S800D-X
EIP: 0060:[<c03abf93>] EFLAGS: 00010246 CPU: 0
EIP is at inet_putpeer+0xe/0x16
EAX: 00000000 EBX: f3481700 ECX: 00000000 EDX: 000dd641
ESI: f3481700 EDI: c05e949c EBP: f551def4 ESP: f551def4
 DS: 007b ES: 007b FS: 0000 GS: 00e0 SS: 0068
CR0: 8005003b CR2: 00000070 CR3: 3243d000 CR4: 00000750
DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
DR6: ffff0ff0 DR7: 00000400
 f551df04 c0423de1 00000000 f3481700 f551df18 c038d5f7 f254b9f8 f551df28
 f34f85d8 f551df20 c03ef48d f551df3c c0396870 f30697e8 f24e1738 c05e98f4
 f5509540 c05cd2b4 f551df7c c0142d2b c043feb5 f5509540 00000000 c05cd2e8
 [<c0423de1>] xfrm6_dst_destroy+0x42/0xdb
 [<c038d5f7>] dst_destroy+0x1d/0xa4
 [<c03ef48d>] xfrm_bundle_flo_delete+0x2b/0x36
 [<c0396870>] flow_cache_gc_task+0x85/0x9f
 [<c0142d2b>] process_one_work+0x122/0x441
 [<c043feb5>] ? apic_timer_interrupt+0x31/0x38
 [<c03967eb>] ? flow_cache_new_hashrnd+0x2b/0x2b
 [<c0143e2d>] worker_thread+0x113/0x3cc

Fix by adding a init_dst() callback to struct xfrm_policy_afinfo to
properly initialize the dst's peer pointer.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/xfrm6_policy.c | 8 ++++++++
 net/xfrm/xfrm_policy.c  | 2 ++
 2 files changed, 10 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index ef39812..f8c4c08 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -73,6 +73,13 @@ static int xfrm6_get_tos(const struct flowi *fl)
 	return 0;
 }
 
+static void xfrm6_init_dst(struct net *net, struct xfrm_dst *xdst)
+{
+	struct rt6_info *rt = (struct rt6_info *)xdst;
+
+	rt6_init_peer(rt, net->ipv6.peers);
+}
+
 static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
 			   int nfheader_len)
 {
@@ -286,6 +293,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
 	.get_saddr = 		xfrm6_get_saddr,
 	.decode_session =	_decode_session6,
 	.get_tos =		xfrm6_get_tos,
+	.init_dst =		xfrm6_init_dst,
 	.init_path =		xfrm6_init_path,
 	.fill_dst =		xfrm6_fill_dst,
 	.blackhole_route =	ip6_blackhole_route,
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index c5a5165..5a2aa17 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1357,6 +1357,8 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
 
 		memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst));
 		xdst->flo.ops = &xfrm_bundle_fc_ops;
+		if (afinfo->init_dst)
+			afinfo->init_dst(net, xdst);
 	} else
 		xdst = ERR_PTR(-ENOBUFS);
 
-- 
cgit v1.1


From 3de7a37b02f607716753dc669c1dca4f71db08fc Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Sat, 18 Aug 2012 14:36:44 +0000
Subject: net/core/dev.c: fix kernel-doc warning

Fix kernel-doc warning:

Warning(net/core/dev.c:5745): No description found for parameter 'dev'

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Cc:	"David S. Miller" <davem@davemloft.net>
Cc:	netdev@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index debd937..8398836 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5744,6 +5744,7 @@ EXPORT_SYMBOL(netdev_refcnt_read);
 
 /**
  * netdev_wait_allrefs - wait until all references are gone.
+ * @dev: target net_device
  *
  * This is called when unregistering network devices.
  *
-- 
cgit v1.1


From fae6ef87faeb8853896920c68ee703d715799d28 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Sun, 19 Aug 2012 03:30:38 +0000
Subject: net: tcp: move sk_rx_dst_set call after tcp_create_openreq_child()

This commit removes the sk_rx_dst_set calls from
tcp_create_openreq_child(), because at that point the icsk_af_ops
field of ipv6_mapped TCP sockets has not been set to its proper final
value.

Instead, to make sure we get the right sk_rx_dst_set variant
appropriate for the address family of the new connection, we have
tcp_v{4,6}_syn_recv_sock() directly call the appropriate function
shortly after the call to tcp_create_openreq_child() returns.

This also moves inet6_sk_rx_dst_set() to avoid a forward declaration
with the new approach.

Signed-off-by: Neal Cardwell <ncardwell@google.com>
Reported-by: Artem Savkov <artem.savkov@gmail.com>
Cc: Eric Dumazet <edumazet@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c      |  1 +
 net/ipv4/tcp_minisocks.c |  2 --
 net/ipv6/tcp_ipv6.c      | 25 +++++++++++++------------
 3 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7678237..5bf2040 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1462,6 +1462,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		goto exit_nonewsk;
 
 	newsk->sk_gso_type = SKB_GSO_TCPV4;
+	inet_sk_rx_dst_set(newsk, skb);
 
 	newtp		      = tcp_sk(newsk);
 	newinet		      = inet_sk(newsk);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index d9c9dce..6ff7f10 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -387,8 +387,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		struct tcp_sock *oldtp = tcp_sk(sk);
 		struct tcp_cookie_values *oldcvp = oldtp->cookie_values;
 
-		newicsk->icsk_af_ops->sk_rx_dst_set(newsk, skb);
-
 		/* TCP Cookie Transactions require space for the cookie pair,
 		 * as it differs for each connection.  There is no need to
 		 * copy any s_data_payload stored at the original socket.
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index bb9ce2b..a3e60cc 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -94,6 +94,18 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
 }
 #endif
 
+static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb_dst(skb);
+	const struct rt6_info *rt = (const struct rt6_info *)dst;
+
+	dst_hold(dst);
+	sk->sk_rx_dst = dst;
+	inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
+	if (rt->rt6i_node)
+		inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum;
+}
+
 static void tcp_v6_hash(struct sock *sk)
 {
 	if (sk->sk_state != TCP_CLOSE) {
@@ -1270,6 +1282,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 	newsk->sk_gso_type = SKB_GSO_TCPV6;
 	__ip6_dst_store(newsk, dst, NULL, NULL);
+	inet6_sk_rx_dst_set(newsk, skb);
 
 	newtcp6sk = (struct tcp6_sock *)newsk;
 	inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
@@ -1729,18 +1742,6 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = {
 	.twsk_destructor= tcp_twsk_destructor,
 };
 
-static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
-{
-	struct dst_entry *dst = skb_dst(skb);
-	const struct rt6_info *rt = (const struct rt6_info *)dst;
-
-	dst_hold(dst);
-	sk->sk_rx_dst = dst;
-	inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
-	if (rt->rt6i_node)
-		inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum;
-}
-
 static const struct inet_connection_sock_af_ops ipv6_specific = {
 	.queue_xmit	   = inet6_csk_xmit,
 	.send_check	   = tcp_v6_send_check,
-- 
cgit v1.1


From 2dba62c30ec3040ef1b647a8976fd7e6854cc7a7 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sun, 19 Aug 2012 10:16:08 +0000
Subject: netfilter: nfnetlink_log: fix NLA_PUT macro removal bug

Commit 1db20a52 (nfnetlink_log: Stop using NLA_PUT*().) incorrectly
converted a NLA_PUT_BE16 macro to nla_put_be32() in nfnetlink_log:

-               NLA_PUT_BE16(inst->skb, NFULA_HWTYPE, htons(skb->dev->type));
+               if (nla_put_be32(inst->skb, NFULA_HWTYPE, htons(skb->dev->type))

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink_log.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 169ab59..592091c1 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -480,7 +480,7 @@ __build_packet_message(struct nfulnl_instance *inst,
 	}
 
 	if (indev && skb_mac_header_was_set(skb)) {
-		if (nla_put_be32(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)) ||
+		if (nla_put_be16(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)) ||
 		    nla_put_be16(inst->skb, NFULA_HWLEN,
 				 htons(skb->dev->hard_header_len)) ||
 		    nla_put(inst->skb, NFULA_HWHEADER, skb->dev->hard_header_len,
-- 
cgit v1.1


From d1c338a509cea5378df59629ad47382810c38623 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@inktank.com>
Date: Sun, 19 Aug 2012 12:29:16 -0700
Subject: libceph: delay debugfs initialization until we learn global_id

The debugfs directory includes the cluster fsid and our unique global_id.
We need to delay the initialization of the debug entry until we have
learned both the fsid and our global_id from the monitor or else the
second client can't create its debugfs entry and will fail (and multiple
client instances aren't properly reflected in debugfs).

Reported by: Yan, Zheng <zheng.z.yan@intel.com>
Signed-off-by: Sage Weil <sage@inktank.com>
Reviewed-by: Yehuda Sadeh <yehuda@inktank.com>
---
 net/ceph/ceph_common.c |  1 -
 net/ceph/debugfs.c     |  4 ++++
 net/ceph/mon_client.c  | 51 +++++++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 50 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 69e38db..a802029 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -84,7 +84,6 @@ int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid)
 			return -1;
 		}
 	} else {
-		pr_info("client%lld fsid %pU\n", ceph_client_id(client), fsid);
 		memcpy(&client->fsid, fsid, sizeof(*fsid));
 	}
 	return 0;
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 54b531a..38b5dc1 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -189,6 +189,9 @@ int ceph_debugfs_client_init(struct ceph_client *client)
 	snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid,
 		 client->monc.auth->global_id);
 
+	dout("ceph_debugfs_client_init %p %s\n", client, name);
+
+	BUG_ON(client->debugfs_dir);
 	client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir);
 	if (!client->debugfs_dir)
 		goto out;
@@ -234,6 +237,7 @@ out:
 
 void ceph_debugfs_client_cleanup(struct ceph_client *client)
 {
+	dout("ceph_debugfs_client_cleanup %p\n", client);
 	debugfs_remove(client->debugfs_osdmap);
 	debugfs_remove(client->debugfs_monmap);
 	debugfs_remove(client->osdc.debugfs_file);
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 105d533..900ea0f 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -311,6 +311,17 @@ int ceph_monc_open_session(struct ceph_mon_client *monc)
 EXPORT_SYMBOL(ceph_monc_open_session);
 
 /*
+ * We require the fsid and global_id in order to initialize our
+ * debugfs dir.
+ */
+static bool have_debugfs_info(struct ceph_mon_client *monc)
+{
+	dout("have_debugfs_info fsid %d globalid %lld\n",
+	     (int)monc->client->have_fsid, monc->auth->global_id);
+	return monc->client->have_fsid && monc->auth->global_id > 0;
+}
+
+/*
  * The monitor responds with mount ack indicate mount success.  The
  * included client ticket allows the client to talk to MDSs and OSDs.
  */
@@ -320,9 +331,12 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,
 	struct ceph_client *client = monc->client;
 	struct ceph_monmap *monmap = NULL, *old = monc->monmap;
 	void *p, *end;
+	int had_debugfs_info, init_debugfs = 0;
 
 	mutex_lock(&monc->mutex);
 
+	had_debugfs_info = have_debugfs_info(monc);
+
 	dout("handle_monmap\n");
 	p = msg->front.iov_base;
 	end = p + msg->front.iov_len;
@@ -344,12 +358,22 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,
 
 	if (!client->have_fsid) {
 		client->have_fsid = true;
+		if (!had_debugfs_info && have_debugfs_info(monc)) {
+			pr_info("client%lld fsid %pU\n",
+				ceph_client_id(monc->client),
+				&monc->client->fsid);
+			init_debugfs = 1;
+		}
 		mutex_unlock(&monc->mutex);
-		/*
-		 * do debugfs initialization without mutex to avoid
-		 * creating a locking dependency
-		 */
-		ceph_debugfs_client_init(client);
+
+		if (init_debugfs) {
+			/*
+			 * do debugfs initialization without mutex to avoid
+			 * creating a locking dependency
+			 */
+			ceph_debugfs_client_init(monc->client);
+		}
+
 		goto out_unlocked;
 	}
 out:
@@ -865,8 +889,10 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
 {
 	int ret;
 	int was_auth = 0;
+	int had_debugfs_info, init_debugfs = 0;
 
 	mutex_lock(&monc->mutex);
+	had_debugfs_info = have_debugfs_info(monc);
 	if (monc->auth->ops)
 		was_auth = monc->auth->ops->is_authenticated(monc->auth);
 	monc->pending_auth = 0;
@@ -889,7 +915,22 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
 		__send_subscribe(monc);
 		__resend_generic_request(monc);
 	}
+
+	if (!had_debugfs_info && have_debugfs_info(monc)) {
+		pr_info("client%lld fsid %pU\n",
+			ceph_client_id(monc->client),
+			&monc->client->fsid);
+		init_debugfs = 1;
+	}
 	mutex_unlock(&monc->mutex);
+
+	if (init_debugfs) {
+		/*
+		 * do debugfs initialization without mutex to avoid
+		 * creating a locking dependency
+		 */
+		ceph_debugfs_client_init(monc->client);
+	}
 }
 
 static int __validate_auth(struct ceph_mon_client *monc)
-- 
cgit v1.1


From be1e44441a560c43c136a562d49a1c9623c91197 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Thu, 9 Aug 2012 18:12:28 -0400
Subject: svcrpc: fix BUG() in svc_tcp_clear_pages

Examination of svc_tcp_clear_pages shows that it assumes sk_tcplen is
consistent with sk_pages[] (in particular, sk_pages[n] can't be NULL if
sk_tcplen would lead us to expect n pages of data).

svc_tcp_restore_pages zeroes out sk_pages[] while leaving sk_tcplen.
This is OK, since both functions are serialized by XPT_BUSY.  However,
that means the inconsistency must be repaired before dropping XPT_BUSY.

Therefore we should be ensuring that svc_tcp_save_pages repairs the
problem before exiting svc_tcp_recv_record on error.

Symptoms were a BUG() in svc_tcp_clear_pages.

Cc: stable@vger.kernel.org
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svcsock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 18bc130..998aa8c 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1129,9 +1129,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 	if (len >= 0)
 		svsk->sk_tcplen += len;
 	if (len != want) {
+		svc_tcp_save_pages(svsk, rqstp);
 		if (len < 0 && len != -EAGAIN)
 			goto err_other;
-		svc_tcp_save_pages(svsk, rqstp);
 		dprintk("svc: incomplete TCP record (%d of %d)\n",
 			svsk->sk_tcplen, svsk->sk_reclen);
 		goto err_noclose;
-- 
cgit v1.1


From f06f00a24d76e168ecb38d352126fd203937b601 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 20 Aug 2012 16:04:40 -0400
Subject: svcrpc: sends on closed socket should stop immediately

svc_tcp_sendto sets XPT_CLOSE if we fail to transmit the entire reply.
However, the XPT_CLOSE won't be acted on immediately.  Meanwhile other
threads could send further replies before the socket is really shut
down.  This can manifest as data corruption: for example, if a truncated
read reply is followed by another rpc reply, that second reply will look
to the client like further read data.

Symptoms were data corruption preceded by svc_tcp_sendto logging
something like

	kernel: rpc-srv/tcp: nfsd: sent only 963696 when sending 1048708 bytes - shutting down socket

Cc: stable@vger.kernel.org
Reported-by: Malahal Naineni <malahal@us.ibm.com>
Tested-by: Malahal Naineni <malahal@us.ibm.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svc_xprt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 88f2bf6..0d693a8 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -794,7 +794,8 @@ int svc_send(struct svc_rqst *rqstp)
 
 	/* Grab mutex to serialize outgoing data. */
 	mutex_lock(&xprt->xpt_mutex);
-	if (test_bit(XPT_DEAD, &xprt->xpt_flags))
+	if (test_bit(XPT_DEAD, &xprt->xpt_flags)
+			|| test_bit(XPT_CLOSE, &xprt->xpt_flags))
 		len = -ENOTCONN;
 	else
 		len = xprt->xpt_ops->xpo_sendto(rqstp);
-- 
cgit v1.1


From d10f27a750312ed5638c876e4bd6aa83664cccd8 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Fri, 17 Aug 2012 17:31:53 -0400
Subject: svcrpc: fix svc_xprt_enqueue/svc_recv busy-looping

The rpc server tries to ensure that there will be room to send a reply
before it receives a request.

It does this by tracking, in xpt_reserved, an upper bound on the total
size of the replies that is has already committed to for the socket.

Currently it is adding in the estimate for a new reply *before* it
checks whether there is space available.  If it finds that there is not
space, it then subtracts the estimate back out.

This may lead the subsequent svc_xprt_enqueue to decide that there is
space after all.

The results is a svc_recv() that will repeatedly return -EAGAIN, causing
server threads to loop without doing any actual work.

Cc: stable@vger.kernel.org
Reported-by: Michael Tokarev <mjt@tls.msk.ru>
Tested-by: Michael Tokarev <mjt@tls.msk.ru>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svc_xprt.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 0d693a8..bac973a 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -316,7 +316,6 @@ static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt)
  */
 void svc_xprt_enqueue(struct svc_xprt *xprt)
 {
-	struct svc_serv	*serv = xprt->xpt_server;
 	struct svc_pool *pool;
 	struct svc_rqst	*rqstp;
 	int cpu;
@@ -362,8 +361,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
 				rqstp, rqstp->rq_xprt);
 		rqstp->rq_xprt = xprt;
 		svc_xprt_get(xprt);
-		rqstp->rq_reserved = serv->sv_max_mesg;
-		atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
 		pool->sp_stats.threads_woken++;
 		wake_up(&rqstp->rq_wait);
 	} else {
@@ -640,8 +637,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 	if (xprt) {
 		rqstp->rq_xprt = xprt;
 		svc_xprt_get(xprt);
-		rqstp->rq_reserved = serv->sv_max_mesg;
-		atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
 
 		/* As there is a shortage of threads and this request
 		 * had to be queued, don't allow the thread to wait so
@@ -738,6 +733,8 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 		else
 			len = xprt->xpt_ops->xpo_recvfrom(rqstp);
 		dprintk("svc: got len=%d\n", len);
+		rqstp->rq_reserved = serv->sv_max_mesg;
+		atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
 	}
 	svc_xprt_received(xprt);
 
-- 
cgit v1.1


From 144d56e91044181ec0ef67aeca91e9a8b5718348 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 20 Aug 2012 00:22:46 +0000
Subject: tcp: fix possible socket refcount problem

Commit 6f458dfb40 (tcp: improve latencies of timer triggered events)
added bug leading to following trace :

[ 2866.131281] IPv4: Attempt to release TCP socket in state 1 ffff880019ec0000
[ 2866.131726]
[ 2866.132188] =========================
[ 2866.132281] [ BUG: held lock freed! ]
[ 2866.132281] 3.6.0-rc1+ #622 Not tainted
[ 2866.132281] -------------------------
[ 2866.132281] kworker/0:1/652 is freeing memory ffff880019ec0000-ffff880019ec0a1f, with a lock still held there!
[ 2866.132281]  (sk_lock-AF_INET-RPC){+.+...}, at: [<ffffffff81903619>] tcp_sendmsg+0x29/0xcc6
[ 2866.132281] 4 locks held by kworker/0:1/652:
[ 2866.132281]  #0:  (rpciod){.+.+.+}, at: [<ffffffff81083567>] process_one_work+0x1de/0x47f
[ 2866.132281]  #1:  ((&task->u.tk_work)){+.+.+.}, at: [<ffffffff81083567>] process_one_work+0x1de/0x47f
[ 2866.132281]  #2:  (sk_lock-AF_INET-RPC){+.+...}, at: [<ffffffff81903619>] tcp_sendmsg+0x29/0xcc6
[ 2866.132281]  #3:  (&icsk->icsk_retransmit_timer){+.-...}, at: [<ffffffff81078017>] run_timer_softirq+0x1ad/0x35f
[ 2866.132281]
[ 2866.132281] stack backtrace:
[ 2866.132281] Pid: 652, comm: kworker/0:1 Not tainted 3.6.0-rc1+ #622
[ 2866.132281] Call Trace:
[ 2866.132281]  <IRQ>  [<ffffffff810bc527>] debug_check_no_locks_freed+0x112/0x159
[ 2866.132281]  [<ffffffff818a0839>] ? __sk_free+0xfd/0x114
[ 2866.132281]  [<ffffffff811549fa>] kmem_cache_free+0x6b/0x13a
[ 2866.132281]  [<ffffffff818a0839>] __sk_free+0xfd/0x114
[ 2866.132281]  [<ffffffff818a08c0>] sk_free+0x1c/0x1e
[ 2866.132281]  [<ffffffff81911e1c>] tcp_write_timer+0x51/0x56
[ 2866.132281]  [<ffffffff81078082>] run_timer_softirq+0x218/0x35f
[ 2866.132281]  [<ffffffff81078017>] ? run_timer_softirq+0x1ad/0x35f
[ 2866.132281]  [<ffffffff810f5831>] ? rb_commit+0x58/0x85
[ 2866.132281]  [<ffffffff81911dcb>] ? tcp_write_timer_handler+0x148/0x148
[ 2866.132281]  [<ffffffff81070bd6>] __do_softirq+0xcb/0x1f9
[ 2866.132281]  [<ffffffff81a0a00c>] ? _raw_spin_unlock+0x29/0x2e
[ 2866.132281]  [<ffffffff81a1227c>] call_softirq+0x1c/0x30
[ 2866.132281]  [<ffffffff81039f38>] do_softirq+0x4a/0xa6
[ 2866.132281]  [<ffffffff81070f2b>] irq_exit+0x51/0xad
[ 2866.132281]  [<ffffffff81a129cd>] do_IRQ+0x9d/0xb4
[ 2866.132281]  [<ffffffff81a0a3ef>] common_interrupt+0x6f/0x6f
[ 2866.132281]  <EOI>  [<ffffffff8109d006>] ? sched_clock_cpu+0x58/0xd1
[ 2866.132281]  [<ffffffff81a0a172>] ? _raw_spin_unlock_irqrestore+0x4c/0x56
[ 2866.132281]  [<ffffffff81078692>] mod_timer+0x178/0x1a9
[ 2866.132281]  [<ffffffff818a00aa>] sk_reset_timer+0x19/0x26
[ 2866.132281]  [<ffffffff8190b2cc>] tcp_rearm_rto+0x99/0xa4
[ 2866.132281]  [<ffffffff8190dfba>] tcp_event_new_data_sent+0x6e/0x70
[ 2866.132281]  [<ffffffff8190f7ea>] tcp_write_xmit+0x7de/0x8e4
[ 2866.132281]  [<ffffffff818a565d>] ? __alloc_skb+0xa0/0x1a1
[ 2866.132281]  [<ffffffff8190f952>] __tcp_push_pending_frames+0x2e/0x8a
[ 2866.132281]  [<ffffffff81904122>] tcp_sendmsg+0xb32/0xcc6
[ 2866.132281]  [<ffffffff819229c2>] inet_sendmsg+0xaa/0xd5
[ 2866.132281]  [<ffffffff81922918>] ? inet_autobind+0x5f/0x5f
[ 2866.132281]  [<ffffffff810ee7f1>] ? trace_clock_local+0x9/0xb
[ 2866.132281]  [<ffffffff8189adab>] sock_sendmsg+0xa3/0xc4
[ 2866.132281]  [<ffffffff810f5de6>] ? rb_reserve_next_event+0x26f/0x2d5
[ 2866.132281]  [<ffffffff8103e6a9>] ? native_sched_clock+0x29/0x6f
[ 2866.132281]  [<ffffffff8103e6f8>] ? sched_clock+0x9/0xd
[ 2866.132281]  [<ffffffff810ee7f1>] ? trace_clock_local+0x9/0xb
[ 2866.132281]  [<ffffffff8189ae03>] kernel_sendmsg+0x37/0x43
[ 2866.132281]  [<ffffffff8199ce49>] xs_send_kvec+0x77/0x80
[ 2866.132281]  [<ffffffff8199cec1>] xs_sendpages+0x6f/0x1a0
[ 2866.132281]  [<ffffffff8107826d>] ? try_to_del_timer_sync+0x55/0x61
[ 2866.132281]  [<ffffffff8199d0d2>] xs_tcp_send_request+0x55/0xf1
[ 2866.132281]  [<ffffffff8199bb90>] xprt_transmit+0x89/0x1db
[ 2866.132281]  [<ffffffff81999bcd>] ? call_connect+0x3c/0x3c
[ 2866.132281]  [<ffffffff81999d92>] call_transmit+0x1c5/0x20e
[ 2866.132281]  [<ffffffff819a0d55>] __rpc_execute+0x6f/0x225
[ 2866.132281]  [<ffffffff81999bcd>] ? call_connect+0x3c/0x3c
[ 2866.132281]  [<ffffffff819a0f33>] rpc_async_schedule+0x28/0x34
[ 2866.132281]  [<ffffffff810835d6>] process_one_work+0x24d/0x47f
[ 2866.132281]  [<ffffffff81083567>] ? process_one_work+0x1de/0x47f
[ 2866.132281]  [<ffffffff819a0f0b>] ? __rpc_execute+0x225/0x225
[ 2866.132281]  [<ffffffff81083a6d>] worker_thread+0x236/0x317
[ 2866.132281]  [<ffffffff81083837>] ? process_scheduled_works+0x2f/0x2f
[ 2866.132281]  [<ffffffff8108b7b8>] kthread+0x9a/0xa2
[ 2866.132281]  [<ffffffff81a12184>] kernel_thread_helper+0x4/0x10
[ 2866.132281]  [<ffffffff81a0a4b0>] ? retint_restore_args+0x13/0x13
[ 2866.132281]  [<ffffffff8108b71e>] ? __init_kthread_worker+0x5a/0x5a
[ 2866.132281]  [<ffffffff81a12180>] ? gs_change+0x13/0x13
[ 2866.308506] IPv4: Attempt to release TCP socket in state 1 ffff880019ec0000
[ 2866.309689] =============================================================================
[ 2866.310254] BUG TCP (Not tainted): Object already free
[ 2866.310254] -----------------------------------------------------------------------------
[ 2866.310254]

The bug comes from the fact that timer set in sk_reset_timer() can run
before we actually do the sock_hold(). socket refcount reaches zero and
we free the socket too soon.

timer handler is not allowed to reduce socket refcnt if socket is owned
by the user, or we need to change sk_reset_timer() implementation.

We should take a reference on the socket in case TCP_DELACK_TIMER_DEFERRED
or TCP_DELACK_TIMER_DEFERRED bit are set in tsq_flags

Also fix a typo in tcp_delack_timer(), where TCP_WRITE_TIMER_DEFERRED
was used instead of TCP_DELACK_TIMER_DEFERRED.

For consistency, use same socket refcount change for TCP_MTU_REDUCED_DEFERRED,
even if not fired from a timer.

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Tested-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c   |  8 +++++---
 net/ipv4/tcp_output.c | 14 +++++++++-----
 net/ipv4/tcp_timer.c  |  6 ++++--
 3 files changed, 18 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5bf2040..00a748d 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -417,10 +417,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 
 		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
 			tp->mtu_info = info;
-			if (!sock_owned_by_user(sk))
+			if (!sock_owned_by_user(sk)) {
 				tcp_v4_mtu_reduced(sk);
-			else
-				set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags);
+			} else {
+				if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
+					sock_hold(sk);
+			}
 			goto out;
 		}
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 20dfd89..d046326 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -910,14 +910,18 @@ void tcp_release_cb(struct sock *sk)
 	if (flags & (1UL << TCP_TSQ_DEFERRED))
 		tcp_tsq_handler(sk);
 
-	if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED))
+	if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) {
 		tcp_write_timer_handler(sk);
-
-	if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED))
+		__sock_put(sk);
+	}
+	if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) {
 		tcp_delack_timer_handler(sk);
-
-	if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED))
+		__sock_put(sk);
+	}
+	if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) {
 		sk->sk_prot->mtu_reduced(sk);
+		__sock_put(sk);
+	}
 }
 EXPORT_SYMBOL(tcp_release_cb);
 
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 6df36ad..b774a03 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -252,7 +252,8 @@ static void tcp_delack_timer(unsigned long data)
 		inet_csk(sk)->icsk_ack.blocked = 1;
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
 		/* deleguate our work to tcp_release_cb() */
-		set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags);
+		if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
+			sock_hold(sk);
 	}
 	bh_unlock_sock(sk);
 	sock_put(sk);
@@ -481,7 +482,8 @@ static void tcp_write_timer(unsigned long data)
 		tcp_write_timer_handler(sk);
 	} else {
 		/* deleguate our work to tcp_release_cb() */
-		set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags);
+		if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
+			sock_hold(sk);
 	}
 	bh_unlock_sock(sk);
 	sock_put(sk);
-- 
cgit v1.1


From 1a7b27c97ce675b42eeb7bfaf6e15c34f35c8f95 Mon Sep 17 00:00:00 2001
From: Christoph Paasch <christoph.paasch@uclouvain.be>
Date: Mon, 20 Aug 2012 02:52:09 +0000
Subject: ipv4: Use newinet->inet_opt in inet_csk_route_child_sock()

Since 0e734419923bd ("ipv4: Use inet_csk_route_child_sock() in DCCP and
TCP."), inet_csk_route_child_sock() is called instead of
inet_csk_route_req().

However, after creating the child-sock in tcp/dccp_v4_syn_recv_sock(),
ireq->opt is set to NULL, before calling inet_csk_route_child_sock().
Thus, inside inet_csk_route_child_sock() opt is always NULL and the
SRR-options are not respected anymore.
Packets sent by the server won't have the correct destination-IP.

This patch fixes it by accessing newinet->inet_opt instead of ireq->opt
inside inet_csk_route_child_sock().

Reported-by: Luca Boccassi <luca.boccassi@gmail.com>
Signed-off-by: Christoph Paasch <christoph.paasch@uclouvain.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_connection_sock.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index db0cf17..7f75f21 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -404,12 +404,15 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
 {
 	const struct inet_request_sock *ireq = inet_rsk(req);
 	struct inet_sock *newinet = inet_sk(newsk);
-	struct ip_options_rcu *opt = ireq->opt;
+	struct ip_options_rcu *opt;
 	struct net *net = sock_net(sk);
 	struct flowi4 *fl4;
 	struct rtable *rt;
 
 	fl4 = &newinet->cork.fl.u.ip4;
+
+	rcu_read_lock();
+	opt = rcu_dereference(newinet->inet_opt);
 	flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
 			   sk->sk_protocol, inet_sk_flowi_flags(sk),
@@ -421,11 +424,13 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
 		goto no_route;
 	if (opt && opt->opt.is_strictroute && rt->rt_gateway)
 		goto route_err;
+	rcu_read_unlock();
 	return &rt->dst;
 
 route_err:
 	ip_rt_put(rt);
 no_route:
+	rcu_read_unlock();
 	IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
 	return NULL;
 }
-- 
cgit v1.1


From a9915a1b52df52ad87f3b33422da95cf25372f09 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 20 Aug 2012 07:26:45 +0000
Subject: ipv4: fix ip header ident selection in __ip_make_skb()

Christian Casteyde reported a kmemcheck 32-bit read from uninitialized
memory in __ip_select_ident().

It turns out that __ip_make_skb() called ip_select_ident() before
properly initializing iph->daddr.

This is a bug uncovered by commit 1d861aa4b3fb (inet: Minimize use of
cached route inetpeer.)

Addresses https://bugzilla.kernel.org/show_bug.cgi?id=46131

Reported-by: Christian Casteyde <casteyde.christian@free.fr>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 147ccc3..c196d74 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1338,10 +1338,10 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
 	iph->ihl = 5;
 	iph->tos = inet->tos;
 	iph->frag_off = df;
-	ip_select_ident(iph, &rt->dst, sk);
 	iph->ttl = ttl;
 	iph->protocol = sk->sk_protocol;
 	ip_copy_addrs(iph, fl4);
+	ip_select_ident(iph, &rt->dst, sk);
 
 	if (opt) {
 		iph->ihl += opt->optlen>>2;
-- 
cgit v1.1


From e0e3cea46d31d23dc40df0a49a7a2c04fe8edfea Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 21 Aug 2012 06:21:17 +0000
Subject: af_netlink: force credentials passing [CVE-2012-3520]

Pablo Neira Ayuso discovered that avahi and
potentially NetworkManager accept spoofed Netlink messages because of a
kernel bug.  The kernel passes all-zero SCM_CREDENTIALS ancillary data
to the receiver if the sender did not provide such data, instead of not
including any such data at all or including the correct data from the
peer (as it is the case with AF_UNIX).

This bug was introduced in commit 16e572626961
(af_unix: dont send SCM_CREDENTIALS by default)

This patch forces passing credentials for netlink, as
before the regression.

Another fix would be to not add SCM_CREDENTIALS in
netlink messages if not provided by the sender, but it
might break some programs.

With help from Florian Weimer & Petr Matousek

This issue is designated as CVE-2012-3520

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Petr Matousek <pmatouse@redhat.com>
Cc: Florian Weimer <fweimer@redhat.com>
Cc: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 2 +-
 net/unix/af_unix.c       | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 5463969..1445d73 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1362,7 +1362,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	if (NULL == siocb->scm)
 		siocb->scm = &scm;
 
-	err = scm_send(sock, msg, siocb->scm);
+	err = scm_send(sock, msg, siocb->scm, true);
 	if (err < 0)
 		return err;
 
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index e4768c1..c5ee4ff 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1450,7 +1450,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	if (NULL == siocb->scm)
 		siocb->scm = &tmp_scm;
 	wait_for_unix_gc();
-	err = scm_send(sock, msg, siocb->scm);
+	err = scm_send(sock, msg, siocb->scm, false);
 	if (err < 0)
 		return err;
 
@@ -1619,7 +1619,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	if (NULL == siocb->scm)
 		siocb->scm = &tmp_scm;
 	wait_for_unix_gc();
-	err = scm_send(sock, msg, siocb->scm);
+	err = scm_send(sock, msg, siocb->scm, false);
 	if (err < 0)
 		return err;
 
-- 
cgit v1.1


From 6d4221b53707486dfad3f5bfe568d2ce7f4c9863 Mon Sep 17 00:00:00 2001
From: Jim Schutt <jaschut@sandia.gov>
Date: Fri, 10 Aug 2012 10:37:38 -0700
Subject: libceph: avoid truncation due to racing banners

Because the Ceph client messenger uses a non-blocking connect, it is
possible for the sending of the client banner to race with the
arrival of the banner sent by the peer.

When ceph_sock_state_change() notices the connect has completed, it
schedules work to process the socket via con_work().  During this
time the peer is writing its banner, and arrival of the peer banner
races with con_work().

If con_work() calls try_read() before the peer banner arrives, there
is nothing for it to do, after which con_work() calls try_write() to
send the client's banner.  In this case Ceph's protocol negotiation
can complete succesfully.

The server-side messenger immediately sends its banner and addresses
after accepting a connect request, *before* actually attempting to
read or verify the banner from the client.  As a result, it is
possible for the banner from the server to arrive before con_work()
calls try_read().  If that happens, try_read() will read the banner
and prepare protocol negotiation info via prepare_write_connect().
prepare_write_connect() calls con_out_kvec_reset(), which discards
the as-yet-unsent client banner.  Next, con_work() calls
try_write(), which sends the protocol negotiation info rather than
the banner that the peer is expecting.

The result is that the peer sees an invalid banner, and the client
reports "negotiation failed".

Fix this by moving con_out_kvec_reset() out of
prepare_write_connect() to its callers at all locations except the
one where the banner might still need to be sent.

[elder@inktak.com: added note about server-side behavior]

Signed-off-by: Jim Schutt <jaschut@sandia.gov>
Reviewed-by: Alex Elder <elder@inktank.com>
---
 net/ceph/messenger.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index b979675..24c5eea 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -915,7 +915,6 @@ static int prepare_write_connect(struct ceph_connection *con)
 	con->out_connect.authorizer_len = auth ?
 		cpu_to_le32(auth->authorizer_buf_len) : 0;
 
-	con_out_kvec_reset(con);
 	con_out_kvec_add(con, sizeof (con->out_connect),
 					&con->out_connect);
 	if (auth && auth->authorizer_buf_len)
@@ -1557,6 +1556,7 @@ static int process_connect(struct ceph_connection *con)
 			return -1;
 		}
 		con->auth_retry = 1;
+		con_out_kvec_reset(con);
 		ret = prepare_write_connect(con);
 		if (ret < 0)
 			return ret;
@@ -1577,6 +1577,7 @@ static int process_connect(struct ceph_connection *con)
 		       ENTITY_NAME(con->peer_name),
 		       ceph_pr_addr(&con->peer_addr.in_addr));
 		reset_connection(con);
+		con_out_kvec_reset(con);
 		ret = prepare_write_connect(con);
 		if (ret < 0)
 			return ret;
@@ -1601,6 +1602,7 @@ static int process_connect(struct ceph_connection *con)
 		     le32_to_cpu(con->out_connect.connect_seq),
 		     le32_to_cpu(con->in_reply.connect_seq));
 		con->connect_seq = le32_to_cpu(con->in_reply.connect_seq);
+		con_out_kvec_reset(con);
 		ret = prepare_write_connect(con);
 		if (ret < 0)
 			return ret;
@@ -1617,6 +1619,7 @@ static int process_connect(struct ceph_connection *con)
 		     le32_to_cpu(con->in_reply.global_seq));
 		get_global_seq(con->msgr,
 			       le32_to_cpu(con->in_reply.global_seq));
+		con_out_kvec_reset(con);
 		ret = prepare_write_connect(con);
 		if (ret < 0)
 			return ret;
@@ -2135,7 +2138,11 @@ more:
 		BUG_ON(con->state != CON_STATE_CONNECTING);
 		con->state = CON_STATE_NEGOTIATING;
 
-		/* Banner is good, exchange connection info */
+		/*
+		 * Received banner is good, exchange connection info.
+		 * Do not reset out_kvec, as sending our banner raced
+		 * with receiving peer banner after connect completed.
+		 */
 		ret = prepare_write_connect(con);
 		if (ret < 0)
 			goto out;
-- 
cgit v1.1


From 27f011243a6e4e8b81078df1d83608dae31e3d38 Mon Sep 17 00:00:00 2001
From: Thomas Pedersen <thomas@cozybit.com>
Date: Mon, 20 Aug 2012 11:28:25 -0700
Subject: mac80211: fix DS to MBSS address translation

The destination address of unicast frames forwarded through a mesh gate
was being replaced with the broadcast address. Instead leave the
original destination address as the mesh DA. If the nexthop address is
not in the mpath table it will be resolved. If that fails, the frame
will be forwarded to known mesh gates.

Reported-by: Cedric Voncken <cedric.voncken@acksys.fr>
Signed-off-by: Thomas Pedersen <thomas@cozybit.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 38 ++++++++++++++++----------------------
 1 file changed, 16 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index acf712f..c5e8c9c 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1811,37 +1811,31 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 			meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr,
 					sdata, NULL, NULL);
 		} else {
-			int is_mesh_mcast = 1;
-			const u8 *mesh_da;
+			/* DS -> MBSS (802.11-2012 13.11.3.3).
+			 * For unicast with unknown forwarding information,
+			 * destination might be in the MBSS or if that fails
+			 * forwarded to another mesh gate. In either case
+			 * resolution will be handled in ieee80211_xmit(), so
+			 * leave the original DA. This also works for mcast */
+			const u8 *mesh_da = skb->data;
+
+			if (mppath)
+				mesh_da = mppath->mpp;
+			else if (mpath)
+				mesh_da = mpath->dst;
+			rcu_read_unlock();
 
-			if (is_multicast_ether_addr(skb->data))
-				/* DA TA mSA AE:SA */
-				mesh_da = skb->data;
-			else {
-				static const u8 bcast[ETH_ALEN] =
-					{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
-				if (mppath) {
-					/* RA TA mDA mSA AE:DA SA */
-					mesh_da = mppath->mpp;
-					is_mesh_mcast = 0;
-				} else if (mpath) {
-					mesh_da = mpath->dst;
-					is_mesh_mcast = 0;
-				} else {
-					/* DA TA mSA AE:SA */
-					mesh_da = bcast;
-				}
-			}
 			hdrlen = ieee80211_fill_mesh_addresses(&hdr, &fc,
 					mesh_da, sdata->vif.addr);
-			rcu_read_unlock();
-			if (is_mesh_mcast)
+			if (is_multicast_ether_addr(mesh_da))
+				/* DA TA mSA AE:SA */
 				meshhdrlen =
 					ieee80211_new_mesh_header(&mesh_hdr,
 							sdata,
 							skb->data + ETH_ALEN,
 							NULL);
 			else
+				/* RA TA mDA mSA AE:DA SA */
 				meshhdrlen =
 					ieee80211_new_mesh_header(&mesh_hdr,
 							sdata,
-- 
cgit v1.1


From 9b04f350057863d1fad1ba071e09362a1da3503e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 21 Aug 2012 20:48:29 +0000
Subject: ipv4: properly update pmtu

Sylvain Munault reported following info :

 - TCP connection get "stuck" with data in send queue when doing
   "large" transfers ( like typing 'ps ax' on a ssh connection )
 - Only happens on path where the PMTU is lower than the MTU of
   the interface
 - Is not present right after boot, it only appears 10-20min after
   boot or so. (and that's inside the _same_ TCP connection, it works
   fine at first and then in the same ssh session, it'll get stuck)
 - Definitely seems related to fragments somehow since I see a router
   sending ICMP message saying fragmentation is needed.
 - Exact same setup works fine with kernel 3.5.1

Problem happens when the 10 minutes (ip_rt_mtu_expires) expiration
period is over.

ip_rt_update_pmtu() calls dst_set_expires() to rearm a new expiration,
but dst_set_expires() does nothing because dst.expires is already set.

It seems we want to set the expires field to a new value, regardless
of prior one.

With help from Julian Anastasov.

Reported-by: Sylvain Munaut <s.munaut@whatever-company.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
CC: Julian Anastasov <ja@ssi.bg>
Tested-by: Sylvain Munaut <s.munaut@whatever-company.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index fd9ecb5..8c8c748 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -956,7 +956,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
 		dst->obsolete = DST_OBSOLETE_KILL;
 	} else {
 		rt->rt_pmtu = mtu;
-		dst_set_expires(&rt->dst, ip_rt_mtu_expires);
+		rt->dst.expires = max(1UL, jiffies + ip_rt_mtu_expires);
 	}
 }
 
-- 
cgit v1.1


From a0dfb2634e5671770f598cda08002d8cda66ac77 Mon Sep 17 00:00:00 2001
From: Fengguang Wu <fengguang.wu@intel.com>
Date: Thu, 23 Aug 2012 19:51:21 +0800
Subject: af_packet: match_fanout_group() can be static

cc: Eric Leblond <eric@regit.org>
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index aee7196..c5c9e2a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1273,7 +1273,7 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po)
 	spin_unlock(&f->lock);
 }
 
-bool match_fanout_group(struct packet_type *ptype, struct sock * sk)
+static bool match_fanout_group(struct packet_type *ptype, struct sock * sk)
 {
 	if (ptype->af_packet_priv == (void*)((struct packet_sock *)sk)->fanout)
 		return true;
-- 
cgit v1.1


From 78df76a065ae3b5dbcb9a29912adc02f697de498 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 24 Aug 2012 05:40:47 +0000
Subject: ipv4: take rt_uncached_lock only if needed

Multicast traffic allocates dst with DST_NOCACHE, but dst is
not inserted into rt_uncached_list.

This slowdown multicast workloads on SMP because rt_uncached_lock is
contended.

Change the test before taking the lock to actually check the dst
was inserted into rt_uncached_list.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8c8c748..24fd4c5 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1263,7 +1263,7 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
 {
 	struct rtable *rt = (struct rtable *) dst;
 
-	if (dst->flags & DST_NOCACHE) {
+	if (!list_empty(&rt->rt_uncached)) {
 		spin_lock_bh(&rt_uncached_lock);
 		list_del(&rt->rt_uncached);
 		spin_unlock_bh(&rt_uncached_lock);
-- 
cgit v1.1


From 20e1db19db5d6b9e4e83021595eab0dc8f107bef Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 23 Aug 2012 02:09:11 +0000
Subject: netlink: fix possible spoofing from non-root processes

Non-root user-space processes can send Netlink messages to other
processes that are well-known for being subscribed to Netlink
asynchronous notifications. This allows ilegitimate non-root
process to send forged messages to Netlink subscribers.

The userspace process usually verifies the legitimate origin in
two ways:

a) Socket credentials. If UID != 0, then the message comes from
   some ilegitimate process and the message needs to be dropped.

b) Netlink portID. In general, portID == 0 means that the origin
   of the messages comes from the kernel. Thus, discarding any
   message not coming from the kernel.

However, ctnetlink sets the portID in event messages that has
been triggered by some user-space process, eg. conntrack utility.
So other processes subscribed to ctnetlink events, eg. conntrackd,
know that the event was triggered by some user-space action.

Neither of the two ways to discard ilegitimate messages coming
from non-root processes can help for ctnetlink.

This patch adds capability validation in case that dst_pid is set
in netlink_sendmsg(). This approach is aggressive since existing
applications using any Netlink bus to deliver messages between
two user-space processes will break. Note that the exception is
NETLINK_USERSOCK, since it is reserved for netlink-to-netlink
userspace communication.

Still, if anyone wants that his Netlink bus allows netlink-to-netlink
userspace, then they can set NL_NONROOT_SEND. However, by default,
I don't think it makes sense to allow to use NETLINK_ROUTE to
communicate two processes that are sending no matter what information
that is not related to link/neighbouring/routing. They should be using
NETLINK_USERSOCK instead for that.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 1445d73..5270238 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1373,7 +1373,8 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		dst_pid = addr->nl_pid;
 		dst_group = ffs(addr->nl_groups);
 		err =  -EPERM;
-		if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND))
+		if ((dst_group || dst_pid) &&
+		    !netlink_capable(sock, NL_NONROOT_SEND))
 			goto out;
 	} else {
 		dst_pid = nlk->dst_pid;
@@ -2147,6 +2148,7 @@ static void __init netlink_add_usersock_entry(void)
 	rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners);
 	nl_table[NETLINK_USERSOCK].module = THIS_MODULE;
 	nl_table[NETLINK_USERSOCK].registered = 1;
+	nl_table[NETLINK_USERSOCK].nl_nonroot = NL_NONROOT_SEND;
 
 	netlink_table_ungrab();
 }
-- 
cgit v1.1


From 7c4a56fec379ac0d7754e0d4da6a7361f1a4fe64 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Thu, 23 Aug 2012 07:05:17 +0000
Subject: tcp: fix cwnd reduction for non-sack recovery

The cwnd reduction in fast recovery is based on the number of packets
newly delivered per ACK. For non-sack connections every DUPACK
signifies a packet has been delivered, but the sender mistakenly
skips counting them for cwnd reduction.

The fix is to compute newly_acked_sacked after DUPACKs are accounted
in sacked_out for non-sack connections.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Nandita Dukkipati <nanditad@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 85308b9..6e38c6c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2926,13 +2926,14 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
  * tcp_xmit_retransmit_queue().
  */
 static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
-				  int newly_acked_sacked, bool is_dupack,
+				  int prior_sacked, bool is_dupack,
 				  int flag)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
 				    (tcp_fackets_out(tp) > tp->reordering));
+	int newly_acked_sacked = 0;
 	int fast_rexmit = 0;
 
 	if (WARN_ON(!tp->packets_out && tp->sacked_out))
@@ -2992,6 +2993,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
 				tcp_add_reno_sack(sk);
 		} else
 			do_lost = tcp_try_undo_partial(sk, pkts_acked);
+		newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;
 		break;
 	case TCP_CA_Loss:
 		if (flag & FLAG_DATA_ACKED)
@@ -3013,6 +3015,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
 			if (is_dupack)
 				tcp_add_reno_sack(sk);
 		}
+		newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;
 
 		if (icsk->icsk_ca_state <= TCP_CA_Disorder)
 			tcp_try_undo_dsack(sk);
@@ -3590,7 +3593,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	int prior_packets;
 	int prior_sacked = tp->sacked_out;
 	int pkts_acked = 0;
-	int newly_acked_sacked = 0;
 	bool frto_cwnd = false;
 
 	/* If the ack is older than previous acks
@@ -3666,8 +3668,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
 
 	pkts_acked = prior_packets - tp->packets_out;
-	newly_acked_sacked = (prior_packets - prior_sacked) -
-			     (tp->packets_out - tp->sacked_out);
 
 	if (tp->frto_counter)
 		frto_cwnd = tcp_process_frto(sk, flag);
@@ -3681,7 +3681,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 		    tcp_may_raise_cwnd(sk, flag))
 			tcp_cong_avoid(sk, ack, prior_in_flight);
 		is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
-		tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked,
+		tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
 				      is_dupack, flag);
 	} else {
 		if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
@@ -3698,7 +3698,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 no_queue:
 	/* If data was DSACKed, see if we can undo a cwnd reduction. */
 	if (flag & FLAG_DSACKING_ACK)
-		tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked,
+		tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
 				      is_dupack, flag);
 	/* If this ack opens up a zero window, clear backoff.  It was
 	 * being used to time the probes, and is probably far higher than
@@ -3718,8 +3718,7 @@ old_ack:
 	 */
 	if (TCP_SKB_CB(skb)->sacked) {
 		flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
-		newly_acked_sacked = tp->sacked_out - prior_sacked;
-		tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked,
+		tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
 				      is_dupack, flag);
 	}
 
-- 
cgit v1.1


From 072a9c48600409d72aeb0d5b29fbb75861a06631 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 24 Aug 2012 21:41:11 +0000
Subject: netpoll: revert 6bdb7fe3104 and fix be_poll() instead

Against -net.

In the patch "netpoll: re-enable irq in poll_napi()", I tried to
fix the following warning:

[100718.051041] ------------[ cut here ]------------
[100718.051048] WARNING: at kernel/softirq.c:159 local_bh_enable_ip+0x7d/0xb0()
(Not tainted)
[100718.051049] Hardware name: ProLiant BL460c G7
...
[100718.051068] Call Trace:
[100718.051073]  [<ffffffff8106b747>] ? warn_slowpath_common+0x87/0xc0
[100718.051075]  [<ffffffff8106b79a>] ? warn_slowpath_null+0x1a/0x20
[100718.051077]  [<ffffffff810747ed>] ? local_bh_enable_ip+0x7d/0xb0
[100718.051080]  [<ffffffff8150041b>] ? _spin_unlock_bh+0x1b/0x20
[100718.051085]  [<ffffffffa00ee974>] ? be_process_mcc+0x74/0x230 [be2net]
[100718.051088]  [<ffffffffa00ea68c>] ? be_poll_tx_mcc+0x16c/0x290 [be2net]
[100718.051090]  [<ffffffff8144fe76>] ? netpoll_poll_dev+0xd6/0x490
[100718.051095]  [<ffffffffa01d24a5>] ? bond_poll_controller+0x75/0x80 [bonding]
[100718.051097]  [<ffffffff8144fde5>] ? netpoll_poll_dev+0x45/0x490
[100718.051100]  [<ffffffff81161b19>] ? ksize+0x19/0x80
[100718.051102]  [<ffffffff81450437>] ? netpoll_send_skb_on_dev+0x157/0x240

by reenabling IRQ before calling ->poll, but it seems more
problems are introduced after that patch:

http://ozlabs.org/~akpm/stuff/IMG_20120824_122054.jpg
http://marc.info/?l=linux-netdev&m=134563282530588&w=2

So it is safe to fix be2net driver code directly.

This patch reverts the offending commit and fixes be_poll() by
avoid disabling BH there, this is okay because be_poll()
can be called either by poll_napi() which already disables
IRQ, or by net_rx_action() which already disables BH.

Reported-by: Andrew Morton <akpm@linux-foundation.org>
Reported-by: Sylvain Munaut <s.munaut@whatever-company.com>
Cc: Sylvain Munaut <s.munaut@whatever-company.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: David Miller <davem@davemloft.net>
Cc: Sathya Perla <sathya.perla@emulex.com>
Cc: Subbu Seetharaman <subbu.seetharaman@emulex.com>
Cc: Ajit Khaparde <ajit.khaparde@emulex.com>
Signed-off-by: Cong Wang <amwang@redhat.com>
Tested-by: Sylvain Munaut <s.munaut@whatever-company.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 346b1eb..e4ba3e7 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -168,24 +168,16 @@ static void poll_napi(struct net_device *dev)
 	struct napi_struct *napi;
 	int budget = 16;
 
-	WARN_ON_ONCE(!irqs_disabled());
-
 	list_for_each_entry(napi, &dev->napi_list, dev_list) {
-		local_irq_enable();
 		if (napi->poll_owner != smp_processor_id() &&
 		    spin_trylock(&napi->poll_lock)) {
-			rcu_read_lock_bh();
 			budget = poll_one_napi(rcu_dereference_bh(dev->npinfo),
 					       napi, budget);
-			rcu_read_unlock_bh();
 			spin_unlock(&napi->poll_lock);
 
-			if (!budget) {
-				local_irq_disable();
+			if (!budget)
 				break;
-			}
 		}
-		local_irq_disable();
 	}
 }
 
-- 
cgit v1.1


From 0a54e939d8c2647c711ef2369c3e73c3b7f3028c Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Wed, 29 Aug 2012 06:49:11 +0000
Subject: ipvs: fix error return code

Initialize return variable before exiting on an error path.

A simplified version of the semantic match that finds this problem is as
follows: (http://coccinelle.lip6.fr/)

// <smpl>
(
if@p1 (\(ret < 0\|ret != 0\))
 { ... return ret; }
|
ret@p1 = 0
)
... when != ret = e1
    when != &ret
*if(...)
{
  ... when != ret = e2
      when forall
 return ret;
}

// </smpl>

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Acked-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/ipvs/ip_vs_ctl.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 72bf32a..f51013c 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1171,8 +1171,10 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
 		goto out_err;
 	}
 	svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
-	if (!svc->stats.cpustats)
+	if (!svc->stats.cpustats) {
+		ret = -ENOMEM;
 		goto out_err;
+	}
 
 	/* I'm the first user of the service */
 	atomic_set(&svc->usecnt, 0);
-- 
cgit v1.1


From ef6acf68c259d907517dcc0ffefcd4e30276ae29 Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Wed, 29 Aug 2012 06:49:16 +0000
Subject: netfilter: ctnetlink: fix error return code in init path

Initialize return variable before exiting on an error path.

A simplified version of the semantic match that finds this problem is as
follows: (http://coccinelle.lip6.fr/)

// <smpl>
(
if@p1 (\(ret < 0\|ret != 0\))
 { ... return ret; }
|
ret@p1 = 0
)
... when != ret = e1
    when != &ret
*if(...)
{
  ... when != ret = e2
      when forall
 return ret;
}

// </smpl>

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_netlink.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index da4fc37..9807f32 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2790,7 +2790,8 @@ static int __init ctnetlink_init(void)
 		goto err_unreg_subsys;
 	}
 
-	if (register_pernet_subsys(&ctnetlink_net_ops)) {
+	ret = register_pernet_subsys(&ctnetlink_net_ops);
+	if (ret < 0) {
 		pr_err("ctnetlink_init: cannot register pernet operations\n");
 		goto err_unreg_exp_subsys;
 	}
-- 
cgit v1.1


From 6fc09f10f12477bdaa54e94f9cb428bef6d81315 Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Wed, 29 Aug 2012 06:49:17 +0000
Subject: netfilter: nfnetlink_log: fix error return code in init path

Initialize return variable before exiting on an error path.

A simplified version of the semantic match that finds this problem is as
follows: (http://coccinelle.lip6.fr/)

// <smpl>
(
if@p1 (\(ret < 0\|ret != 0\))
 { ... return ret; }
|
ret@p1 = 0
)
... when != ret = e1
    when != &ret
*if(...)
{
  ... when != ret = e2
      when forall
 return ret;
}

// </smpl>

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink_log.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 592091c1..14e2f39 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -996,8 +996,10 @@ static int __init nfnetlink_log_init(void)
 
 #ifdef CONFIG_PROC_FS
 	if (!proc_create("nfnetlink_log", 0440,
-			 proc_net_netfilter, &nful_file_ops))
+			 proc_net_netfilter, &nful_file_ops)) {
+		status = -ENOMEM;
 		goto cleanup_logger;
+	}
 #endif
 	return status;
 
-- 
cgit v1.1


From 3f509c689a07a4aa989b426893d8491a7ffcc410 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 29 Aug 2012 15:24:09 +0000
Subject: netfilter: nf_nat_sip: fix incorrect handling of EBUSY for RTCP
 expectation

We're hitting bug while trying to reinsert an already existing
expectation:

kernel BUG at kernel/timer.c:895!
invalid opcode: 0000 [#1] SMP
[...]
Call Trace:
 <IRQ>
 [<ffffffffa0069563>] nf_ct_expect_related_report+0x4a0/0x57a [nf_conntrack]
 [<ffffffff812d423a>] ? in4_pton+0x72/0x131
 [<ffffffffa00ca69e>] ip_nat_sdp_media+0xeb/0x185 [nf_nat_sip]
 [<ffffffffa00b5b9b>] set_expected_rtp_rtcp+0x32d/0x39b [nf_conntrack_sip]
 [<ffffffffa00b5f15>] process_sdp+0x30c/0x3ec [nf_conntrack_sip]
 [<ffffffff8103f1eb>] ? irq_exit+0x9a/0x9c
 [<ffffffffa00ca738>] ? ip_nat_sdp_media+0x185/0x185 [nf_nat_sip]

We have to remove the RTP expectation if the RTCP expectation hits EBUSY
since we keep trying with other ports until we succeed.

Reported-by: Rafal Fitt <rafalf@aplusc.com.pl>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/nf_nat_sip.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index 4ad9cf1..9c87cde 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -502,7 +502,10 @@ static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff,
 		ret = nf_ct_expect_related(rtcp_exp);
 		if (ret == 0)
 			break;
-		else if (ret != -EBUSY) {
+		else if (ret == -EBUSY) {
+			nf_ct_unexpect_related(rtp_exp);
+			continue;
+		} else if (ret < 0) {
 			nf_ct_unexpect_related(rtp_exp);
 			port = 0;
 			break;
-- 
cgit v1.1


From 99469c32f79a32d8481f87be0d3c66dad286f4ec Mon Sep 17 00:00:00 2001
From: "xeb@mail.ru" <xeb@mail.ru>
Date: Fri, 24 Aug 2012 01:07:38 +0000
Subject: l2tp: avoid to use synchronize_rcu in tunnel free function

Avoid to use synchronize_rcu in l2tp_tunnel_free because context may be
atomic.

Signed-off-by: Dmitry Kozlov <xeb@mail.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 3 +--
 net/l2tp/l2tp_core.h | 1 +
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 393355d..513cab0 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1347,11 +1347,10 @@ static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
 	/* Remove from tunnel list */
 	spin_lock_bh(&pn->l2tp_tunnel_list_lock);
 	list_del_rcu(&tunnel->list);
+	kfree_rcu(tunnel, rcu);
 	spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
-	synchronize_rcu();
 
 	atomic_dec(&l2tp_tunnel_count);
-	kfree(tunnel);
 }
 
 /* Create a socket for the tunnel, if one isn't set up by
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index a38ec6c..56d583e 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -163,6 +163,7 @@ struct l2tp_tunnel_cfg {
 
 struct l2tp_tunnel {
 	int			magic;		/* Should be L2TP_TUNNEL_MAGIC */
+	struct rcu_head rcu;
 	rwlock_t		hlist_lock;	/* protect session_hlist */
 	struct hlist_head	session_hlist[L2TP_HASH_SIZE];
 						/* hashed list of sessions,
-- 
cgit v1.1


From acbb219d5f53821b2d0080d047800410c0420ea1 Mon Sep 17 00:00:00 2001
From: Francesco Ruggeri <fruggeri@aristanetworks.com>
Date: Fri, 24 Aug 2012 07:38:35 +0000
Subject: net: ipv4: ipmr_expire_timer causes crash when removing net namespace

When tearing down a net namespace, ipv4 mr_table structures are freed
without first deactivating their timers. This can result in a crash in
run_timer_softirq.
This patch mimics the corresponding behaviour in ipv6.
Locking and synchronization seem to be adequate.
We are about to kfree mrt, so existing code should already make sure that
no other references to mrt are pending or can be created by incoming traffic.
The functions invoked here do not cause new references to mrt or other
race conditions to be created.
Invoking del_timer_sync guarantees that ipmr_expire_timer is inactive.
Both ipmr_expire_process (whose completion we may have to wait in
del_timer_sync) and mroute_clean_tables internally use mfc_unres_lock
or other synchronizations when needed, and they both only modify mrt.

Tested in Linux 3.4.8.

Signed-off-by: Francesco Ruggeri <fruggeri@aristanetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 8eec8f4..ebdf06f 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -124,6 +124,8 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
 static struct kmem_cache *mrt_cachep __read_mostly;
 
 static struct mr_table *ipmr_new_table(struct net *net, u32 id);
+static void ipmr_free_table(struct mr_table *mrt);
+
 static int ip_mr_forward(struct net *net, struct mr_table *mrt,
 			 struct sk_buff *skb, struct mfc_cache *cache,
 			 int local);
@@ -131,6 +133,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
 			     struct sk_buff *pkt, vifi_t vifi, int assert);
 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 			      struct mfc_cache *c, struct rtmsg *rtm);
+static void mroute_clean_tables(struct mr_table *mrt);
 static void ipmr_expire_process(unsigned long arg);
 
 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
@@ -271,7 +274,7 @@ static void __net_exit ipmr_rules_exit(struct net *net)
 
 	list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
 		list_del(&mrt->list);
-		kfree(mrt);
+		ipmr_free_table(mrt);
 	}
 	fib_rules_unregister(net->ipv4.mr_rules_ops);
 }
@@ -299,7 +302,7 @@ static int __net_init ipmr_rules_init(struct net *net)
 
 static void __net_exit ipmr_rules_exit(struct net *net)
 {
-	kfree(net->ipv4.mrt);
+	ipmr_free_table(net->ipv4.mrt);
 }
 #endif
 
@@ -336,6 +339,13 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
 	return mrt;
 }
 
+static void ipmr_free_table(struct mr_table *mrt)
+{
+	del_timer_sync(&mrt->ipmr_expire_timer);
+	mroute_clean_tables(mrt);
+	kfree(mrt);
+}
+
 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
 
 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
-- 
cgit v1.1


From c5ae7d41927dbd208c885d4794e30617ad6cdf12 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 28 Aug 2012 12:33:07 +0000
Subject: ipv4: must use rcu protection while calling fib_lookup

Following lockdep splat was reported by Pavel Roskin :

[ 1570.586223] ===============================
[ 1570.586225] [ INFO: suspicious RCU usage. ]
[ 1570.586228] 3.6.0-rc3-wl-main #98 Not tainted
[ 1570.586229] -------------------------------
[ 1570.586231] /home/proski/src/linux/net/ipv4/route.c:645 suspicious rcu_dereference_check() usage!
[ 1570.586233]
[ 1570.586233] other info that might help us debug this:
[ 1570.586233]
[ 1570.586236]
[ 1570.586236] rcu_scheduler_active = 1, debug_locks = 0
[ 1570.586238] 2 locks held by Chrome_IOThread/4467:
[ 1570.586240]  #0:  (slock-AF_INET){+.-...}, at: [<ffffffff814f2c0c>] release_sock+0x2c/0xa0
[ 1570.586253]  #1:  (fnhe_lock){+.-...}, at: [<ffffffff815302fc>] update_or_create_fnhe+0x2c/0x270
[ 1570.586260]
[ 1570.586260] stack backtrace:
[ 1570.586263] Pid: 4467, comm: Chrome_IOThread Not tainted 3.6.0-rc3-wl-main #98
[ 1570.586265] Call Trace:
[ 1570.586271]  [<ffffffff810976ed>] lockdep_rcu_suspicious+0xfd/0x130
[ 1570.586275]  [<ffffffff8153042c>] update_or_create_fnhe+0x15c/0x270
[ 1570.586278]  [<ffffffff815305b3>] __ip_rt_update_pmtu+0x73/0xb0
[ 1570.586282]  [<ffffffff81530619>] ip_rt_update_pmtu+0x29/0x90
[ 1570.586285]  [<ffffffff815411dc>] inet_csk_update_pmtu+0x2c/0x80
[ 1570.586290]  [<ffffffff81558d1e>] tcp_v4_mtu_reduced+0x2e/0xc0
[ 1570.586293]  [<ffffffff81553bc4>] tcp_release_cb+0xa4/0xb0
[ 1570.586296]  [<ffffffff814f2c35>] release_sock+0x55/0xa0
[ 1570.586300]  [<ffffffff815442ef>] tcp_sendmsg+0x4af/0xf50
[ 1570.586305]  [<ffffffff8156fc60>] inet_sendmsg+0x120/0x230
[ 1570.586308]  [<ffffffff8156fb40>] ? inet_sk_rebuild_header+0x40/0x40
[ 1570.586312]  [<ffffffff814f4bdd>] ? sock_update_classid+0xbd/0x3b0
[ 1570.586315]  [<ffffffff814f4c50>] ? sock_update_classid+0x130/0x3b0
[ 1570.586320]  [<ffffffff814ec435>] do_sock_write+0xc5/0xe0
[ 1570.586323]  [<ffffffff814ec4a3>] sock_aio_write+0x53/0x80
[ 1570.586328]  [<ffffffff8114bc83>] do_sync_write+0xa3/0xe0
[ 1570.586332]  [<ffffffff8114c5a5>] vfs_write+0x165/0x180
[ 1570.586335]  [<ffffffff8114c805>] sys_write+0x45/0x90
[ 1570.586340]  [<ffffffff815d2722>] system_call_fastpath+0x16/0x1b

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Pavel Roskin <proski@gnu.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 24fd4c5..82cf2a7 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -934,12 +934,14 @@ static u32 __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
 	if (mtu < ip_rt_min_pmtu)
 		mtu = ip_rt_min_pmtu;
 
+	rcu_read_lock();
 	if (fib_lookup(dev_net(rt->dst.dev), fl4, &res) == 0) {
 		struct fib_nh *nh = &FIB_RES_NH(res);
 
 		update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
 				      jiffies + ip_rt_mtu_expires);
 	}
+	rcu_read_unlock();
 	return mtu;
 }
 
-- 
cgit v1.1


From 5b423f6a40a0327f9d40bc8b97ce9be266f74368 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 29 Aug 2012 16:25:49 +0000
Subject: netfilter: nf_conntrack: fix racy timer handling with reliable events

Existing code assumes that del_timer returns true for alive conntrack
entries. However, this is not true if reliable events are enabled.
In that case, del_timer may return true for entries that were
just inserted in the dying list. Note that packets / ctnetlink may
hold references to conntrack entries that were just inserted to such
list.

This patch fixes the issue by adding an independent timer for
event delivery. This increases the size of the ecache extension.
Still we can revisit this later and use variable size extensions
to allocate this area on demand.

Tested-by: Oliver Smith <olipro@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_core.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index cf48755..2ceec64 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -249,12 +249,15 @@ static void death_by_event(unsigned long ul_conntrack)
 {
 	struct nf_conn *ct = (void *)ul_conntrack;
 	struct net *net = nf_ct_net(ct);
+	struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct);
+
+	BUG_ON(ecache == NULL);
 
 	if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) {
 		/* bad luck, let's retry again */
-		ct->timeout.expires = jiffies +
+		ecache->timeout.expires = jiffies +
 			(random32() % net->ct.sysctl_events_retry_timeout);
-		add_timer(&ct->timeout);
+		add_timer(&ecache->timeout);
 		return;
 	}
 	/* we've got the event delivered, now it's dying */
@@ -268,6 +271,9 @@ static void death_by_event(unsigned long ul_conntrack)
 void nf_ct_insert_dying_list(struct nf_conn *ct)
 {
 	struct net *net = nf_ct_net(ct);
+	struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct);
+
+	BUG_ON(ecache == NULL);
 
 	/* add this conntrack to the dying list */
 	spin_lock_bh(&nf_conntrack_lock);
@@ -275,10 +281,10 @@ void nf_ct_insert_dying_list(struct nf_conn *ct)
 			     &net->ct.dying);
 	spin_unlock_bh(&nf_conntrack_lock);
 	/* set a new timer to retry event delivery */
-	setup_timer(&ct->timeout, death_by_event, (unsigned long)ct);
-	ct->timeout.expires = jiffies +
+	setup_timer(&ecache->timeout, death_by_event, (unsigned long)ct);
+	ecache->timeout.expires = jiffies +
 		(random32() % net->ct.sysctl_events_retry_timeout);
-	add_timer(&ct->timeout);
+	add_timer(&ecache->timeout);
 }
 EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list);
 
-- 
cgit v1.1


From 48f125ce1cc3ff275f9587b5bf56bf0f90766c7d Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Wed, 29 Aug 2012 06:49:12 +0000
Subject: net: ipv6: fix error return code

Initialize return variable before exiting on an error path.

The initial initialization of the return variable is also dropped, because
that value is never used.

A simplified version of the semantic match that finds this problem is as
follows: (http://coccinelle.lip6.fr/)

// <smpl>
(
if@p1 (\(ret < 0\|ret != 0\))
 { ... return ret; }
|
ret@p1 = 0
)
... when != ret = e1
    when != &ret
*if(...)
{
  ... when != ret = e2
      when forall
 return ret;
}

// </smpl>

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/esp6.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 6dc7fd3..282f372 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -167,8 +167,6 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 	struct esp_data *esp = x->data;
 
 	/* skb is pure payload to encrypt */
-	err = -ENOMEM;
-
 	aead = esp->aead;
 	alen = crypto_aead_authsize(aead);
 
@@ -203,8 +201,10 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 	}
 
 	tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen);
-	if (!tmp)
+	if (!tmp) {
+		err = -ENOMEM;
 		goto error;
+	}
 
 	seqhi = esp_tmp_seqhi(tmp);
 	iv = esp_tmp_iv(aead, tmp, seqhilen);
-- 
cgit v1.1


From 599901c3e4204e9d9c5a24df5402cd91617a2a26 Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Wed, 29 Aug 2012 06:49:15 +0000
Subject: net/xfrm/xfrm_state.c: fix error return code

Initialize return variable before exiting on an error path.

A simplified version of the semantic match that finds this problem is as
follows: (http://coccinelle.lip6.fr/)

// <smpl>
(
if@p1 (\(ret < 0\|ret != 0\))
 { ... return ret; }
|
ret@p1 = 0
)
... when != ret = e1
    when != &ret
*if(...)
{
  ... when != ret = e2
      when forall
 return ret;
}

// </smpl>

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_state.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 87cd0e4..210be48 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1994,8 +1994,10 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay)
 		goto error;
 
 	x->outer_mode = xfrm_get_mode(x->props.mode, family);
-	if (x->outer_mode == NULL)
+	if (x->outer_mode == NULL) {
+		err = -EPROTONOSUPPORT;
 		goto error;
+	}
 
 	if (init_replay) {
 		err = xfrm_init_replay(x);
-- 
cgit v1.1