From a5f4cea74f1397bb29d0bbdabeb05bd05a23a741 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 21:42:42 -0400
Subject: sctp: Use correct address family in sctp_getsockopt_peer_addrs()

The function should use the address family of the address when
trying to determine the length of the structure.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/socket.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/sctp')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 13d8229..1282a0e 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4384,7 +4384,7 @@ static int sctp_getsockopt_peer_addrs(struct sock *sk, int len,
 				transports) {
 		memcpy(&temp, &from->ipaddr, sizeof(temp));
 		sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
-		addrlen = sctp_get_af_specific(sk->sk_family)->sockaddr_len;
+		addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
 		if (space_left < addrlen)
 			return -ENOMEM;
 		if (copy_to_user(to, &temp, addrlen))
-- 
cgit v1.1


From c17b02b38aa99ef806c7066ef19a6f51122304f1 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 21:42:43 -0400
Subject: sctp: send SHUTDOWN-ACK chunk back to the source.

SHUTDOWN-ACK is alaways sent to the primary path at the first time,
but should better transmit SHUTDOWN-ACK chunk to the same destination
transport address from which it received the SHUTDOWN chunk.
Based on the work from Wei Yongjun <yjwei@cn.fujitsu.com>.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/sm_sideeffect.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net/sctp')

diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 4c5bed9..49fb9ac 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -697,11 +697,15 @@ static void sctp_cmd_setup_t2(sctp_cmd_seq_t *cmds,
 {
 	struct sctp_transport *t;
 
-	t = sctp_assoc_choose_alter_transport(asoc,
+	if (chunk->transport)
+		t = chunk->transport;
+	else {
+		t = sctp_assoc_choose_alter_transport(asoc,
 					      asoc->shutdown_last_sent_to);
+		chunk->transport = t;
+	}
 	asoc->shutdown_last_sent_to = t;
 	asoc->timeouts[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] = t->rto;
-	chunk->transport = t;
 }
 
 /* Helper function to change the state of an association. */
-- 
cgit v1.1


From bd69b981a354be40cc709f3046f0c56f00da6163 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Fri, 30 Apr 2010 21:42:43 -0400
Subject: sctp: assure at least one T3-rtx timer is running if a FORWARD TSN is
 sent

PR-SCTP extension section 3.5 Sender Side Implementation of PR-SCTP:
  C5) If a FORWARD TSN is sent, the sender MUST assure that at
      least one T3-rtx timer is running.

So this patch fix to assure at least one T3-rtx timer is running
if a FORWARD TSN is or will to sent.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/outqueue.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net/sctp')

diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index abfc0b8..16d451a 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -854,6 +854,12 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
 			if (status  != SCTP_XMIT_OK) {
 				/* put the chunk back */
 				list_add(&chunk->list, &q->control_chunk_list);
+			} else if (chunk->chunk_hdr->type == SCTP_CID_FWD_TSN) {
+				/* PR-SCTP C5) If a FORWARD TSN is sent, the
+				 * sender MUST assure that at least one T3-rtx
+				 * timer is running.
+				 */
+				sctp_transport_reset_timers(transport, 0);
 			}
 			break;
 
-- 
cgit v1.1


From 6429d3dc4bd6251b01c11b851e23a4d60f079e06 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Fri, 30 Apr 2010 21:42:44 -0400
Subject: sctp: missing set src and dest port while lookup output route

While lookup the output route, we do not set the src and dest
port. This will cause we got a wrong route if we had set the
outbund transport to IPsec with src or dst port.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/protocol.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'net/sctp')

diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 704298f..1827498 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -474,13 +474,17 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
 
 	memset(&fl, 0x0, sizeof(struct flowi));
 	fl.fl4_dst  = daddr->v4.sin_addr.s_addr;
+	fl.fl_ip_dport = daddr->v4.sin_port;
 	fl.proto = IPPROTO_SCTP;
 	if (asoc) {
 		fl.fl4_tos = RT_CONN_FLAGS(asoc->base.sk);
 		fl.oif = asoc->base.sk->sk_bound_dev_if;
+		fl.fl_ip_sport = htons(asoc->base.bind_addr.port);
 	}
-	if (saddr)
+	if (saddr) {
 		fl.fl4_src = saddr->v4.sin_addr.s_addr;
+		fl.fl_ip_sport = saddr->v4.sin_port;
+	}
 
 	SCTP_DEBUG_PRINTK("%s: DST:%pI4, SRC:%pI4 - ",
 			  __func__, &fl.fl4_dst, &fl.fl4_src);
@@ -528,6 +532,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
 		if ((laddr->state == SCTP_ADDR_SRC) &&
 		    (AF_INET == laddr->a.sa.sa_family)) {
 			fl.fl4_src = laddr->a.v4.sin_addr.s_addr;
+			fl.fl_ip_sport = laddr->a.v4.sin_port;
 			if (!ip_route_output_key(&init_net, &rt, &fl)) {
 				dst = &rt->u.dst;
 				goto out_unlock;
-- 
cgit v1.1


From bc4f841a05364b2572bcc266e9fd7e9cf5f06d5b Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Fri, 30 Apr 2010 22:38:53 -0400
Subject: sctp: fix to retranmit at least one DATA chunk

While doing retranmit, if control chunk exists, such as
FORWARD TSN chunk, and the DATA chunk can not be bundled with
this control chunk because of PMTU limit, no DATA chunk
will be retranmitted in the current implementation. This
patch makes sure to retranmit at least one DATA chunk in this case.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/outqueue.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'net/sctp')

diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 16d451a..e333d58 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -598,11 +598,23 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 		if (fast_rtx && !chunk->fast_retransmit)
 			continue;
 
+redo:
 		/* Attempt to append this chunk to the packet. */
 		status = sctp_packet_append_chunk(pkt, chunk);
 
 		switch (status) {
 		case SCTP_XMIT_PMTU_FULL:
+			if (!pkt->has_data && !pkt->has_cookie_echo) {
+				/* If this packet did not contain DATA then
+				 * retransmission did not happen, so do it
+				 * again.  We'll ignore the error here since
+				 * control chunks are already freed so there
+				 * is nothing we can do.
+				 */
+				sctp_packet_transmit(pkt);
+				goto redo;
+			}
+
 			/* Send this packet.  */
 			error = sctp_packet_transmit(pkt);
 
-- 
cgit v1.1


From fbdf501c9374966a56829ecca3a7f25d2b49a305 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 22:39:26 -0400
Subject: sctp: Do no select unconfirmed transports for retransmissions

An unconfirmed transport is one that we have not been
able to reach since the beginning.  There is no point in
trying to retrasnmit data on those transports.  Also, the
specification forbids it due to security issues.

Reported-by: Frank Schuster <frank.schuster01@web.de>

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/associola.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net/sctp')

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index df5abbf..de830c2 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -762,7 +762,8 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
 		asoc->peer.retran_path = peer;
 	}
 
-	if (asoc->peer.active_path == asoc->peer.retran_path) {
+	if (asoc->peer.active_path == asoc->peer.retran_path &&
+	    peer->state != SCTP_UNCONFIRMED) {
 		asoc->peer.retran_path = peer;
 	}
 
@@ -1318,7 +1319,7 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc)
 			/* Keep track of the next transport in case
 			 * we don't find any active transport.
 			 */
-			if (!next)
+			if (t->state != SCTP_UNCONFIRMED && !next)
 				next = t;
 		}
 	}
-- 
cgit v1.1


From ec7b9519509061bbc09a43284c3570aa492e07f0 Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Fri, 30 Apr 2010 22:41:09 -0400
Subject: sctp: use sctp_chunk_is_data macro to decide a chunk is data chunk

sctp_chunk_is_data macro is defined to decide that
whether a chunk is data chunk or not.

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/outqueue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/sctp')

diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index e333d58..a4fe7de 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -308,7 +308,7 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk)
 	/* If it is data, queue it up, otherwise, send it
 	 * immediately.
 	 */
-	if (SCTP_CID_DATA == chunk->chunk_hdr->type) {
+	if (sctp_chunk_is_data(chunk)) {
 		/* Is it OK to queue data chunks?  */
 		/* From 9. Termination of Association
 		 *
-- 
cgit v1.1


From 787a51a0878f7bee3a9a83040077301e1556b69a Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Fri, 30 Apr 2010 22:41:09 -0400
Subject: sctp: implement sctp association probing module

This patch implement sctp association probing module, the module
will be called sctp_probe.

This module allows for capturing the changes to SCTP association
state in response to incoming packets. It is used for debugging
SCTP congestion control algorithms.

Usage:
  $ modprobe sctp_probe [full=n] [port=n] [bufsize=n]
  $ cat /proc/net/sctpprobe

  The output format is:
    TIME     ASSOC     LPORT RPORT MTU    RWND  UNACK <REMOTE-ADDR   STATE  CWND   SSTHRESH  INFLIGHT  PARTIAL_BYTES_ACKED MTU> ...

  The output will be like this:
    9.226086 c4064c48  9000  8000  1500    53352     1 *192.168.0.19  1     4380    54784     1252        0     1500
    9.287195 c4064c48  9000  8000  1500    45144     5 *192.168.0.19  1     5880    54784     6500        0     1500
    9.289130 c4064c48  9000  8000  1500    42724     5 *192.168.0.19  1     7380    54784     6500        0     1500
    9.620332 c4064c48  9000  8000  1500    48284     4 *192.168.0.19  1     8880    54784     5200        0     1500
    ......

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/Kconfig  |  12 +++
 net/sctp/Makefile |   3 +
 net/sctp/probe.c  | 213 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 228 insertions(+)
 create mode 100644 net/sctp/probe.c

(limited to 'net/sctp')

diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig
index 58b3e88..126b014 100644
--- a/net/sctp/Kconfig
+++ b/net/sctp/Kconfig
@@ -37,6 +37,18 @@ menuconfig IP_SCTP
 
 if IP_SCTP
 
+config NET_SCTPPROBE
+	tristate "SCTP: Association probing"
+        depends on PROC_FS && KPROBES
+        ---help---
+        This module allows for capturing the changes to SCTP association
+        state in response to incoming packets. It is used for debugging
+        SCTP congestion control algorithms. If you don't understand
+        what was just said, you don't need it: say N.
+
+        To compile this code as a module, choose M here: the
+        module will be called sctp_probe.
+
 config SCTP_DBG_MSG
 	bool "SCTP: Debug messages"
 	help
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index 6b79473..5c30b7a 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -3,6 +3,7 @@
 #
 
 obj-$(CONFIG_IP_SCTP) += sctp.o
+obj-$(CONFIG_NET_SCTPPROBE) += sctp_probe.o
 
 sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
 	  protocol.o endpointola.o associola.o \
@@ -11,6 +12,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
 	  tsnmap.o bind_addr.o socket.o primitive.o \
 	  output.o input.o debug.o ssnmap.o auth.o
 
+sctp_probe-y := probe.o
+
 sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o
 sctp-$(CONFIG_PROC_FS) += proc.o
 sctp-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
new file mode 100644
index 0000000..8f025d5
--- /dev/null
+++ b/net/sctp/probe.c
@@ -0,0 +1,213 @@
+/*
+ * sctp_probe - Observe the SCTP flow with kprobes.
+ *
+ * The idea for this came from Werner Almesberger's umlsim
+ * Copyright (C) 2004, Stephen Hemminger <shemminger@osdl.org>
+ *
+ * Modified for SCTP from Stephen Hemminger's code
+ * Copyright (C) 2010, Wei Yongjun <yjwei@cn.fujitsu.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/socket.h>
+#include <linux/sctp.h>
+#include <linux/proc_fs.h>
+#include <linux/module.h>
+#include <linux/kfifo.h>
+#include <linux/time.h>
+#include <net/net_namespace.h>
+
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+
+MODULE_AUTHOR("Wei Yongjun <yjwei@cn.fujitsu.com>");
+MODULE_DESCRIPTION("SCTP snooper");
+MODULE_LICENSE("GPL");
+
+static int port __read_mostly = 0;
+MODULE_PARM_DESC(port, "Port to match (0=all)");
+module_param(port, int, 0);
+
+static int bufsize __read_mostly = 64 * 1024;
+MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)");
+module_param(bufsize, int, 0);
+
+static int full __read_mostly = 1;
+MODULE_PARM_DESC(full, "Full log (1=every ack packet received,  0=only cwnd changes)");
+module_param(full, int, 0);
+
+static const char procname[] = "sctpprobe";
+
+static struct {
+	struct kfifo	  fifo;
+	spinlock_t	  lock;
+	wait_queue_head_t wait;
+	struct timespec	  tstart;
+} sctpw;
+
+static void printl(const char *fmt, ...)
+{
+	va_list args;
+	int len;
+	char tbuf[256];
+
+	va_start(args, fmt);
+	len = vscnprintf(tbuf, sizeof(tbuf), fmt, args);
+	va_end(args);
+
+	kfifo_in_locked(&sctpw.fifo, tbuf, len, &sctpw.lock);
+	wake_up(&sctpw.wait);
+}
+
+static int sctpprobe_open(struct inode *inode, struct file *file)
+{
+	kfifo_reset(&sctpw.fifo);
+	getnstimeofday(&sctpw.tstart);
+
+	return 0;
+}
+
+static ssize_t sctpprobe_read(struct file *file, char __user *buf,
+			      size_t len, loff_t *ppos)
+{
+	int error = 0, cnt = 0;
+	unsigned char *tbuf;
+
+	if (!buf)
+		return -EINVAL;
+
+	if (len == 0)
+		return 0;
+
+	tbuf = vmalloc(len);
+	if (!tbuf)
+		return -ENOMEM;
+
+	error = wait_event_interruptible(sctpw.wait,
+					 kfifo_len(&sctpw.fifo) != 0);
+	if (error)
+		goto out_free;
+
+	cnt = kfifo_out_locked(&sctpw.fifo, tbuf, len, &sctpw.lock);
+	error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0;
+
+out_free:
+	vfree(tbuf);
+
+	return error ? error : cnt;
+}
+
+static const struct file_operations sctpprobe_fops = {
+	.owner	= THIS_MODULE,
+	.open	= sctpprobe_open,
+	.read	= sctpprobe_read,
+};
+
+sctp_disposition_t jsctp_sf_eat_sack(const struct sctp_endpoint *ep,
+				     const struct sctp_association *asoc,
+				     const sctp_subtype_t type,
+				     void *arg,
+				     sctp_cmd_seq_t *commands)
+{
+	struct sctp_transport *sp;
+	static __u32 lcwnd = 0;
+	struct timespec now;
+
+	sp = asoc->peer.primary_path;
+
+	if ((full || sp->cwnd != lcwnd) &&
+	    (!port || asoc->peer.port == port ||
+	     ep->base.bind_addr.port == port)) {
+		lcwnd = sp->cwnd;
+
+		getnstimeofday(&now);
+		now = timespec_sub(now, sctpw.tstart);
+
+		printl("%lu.%06lu ", (unsigned long) now.tv_sec,
+		       (unsigned long) now.tv_nsec / NSEC_PER_USEC);
+
+		printl("%p %5d %5d %5d %8d %5d ", asoc,
+		       ep->base.bind_addr.port, asoc->peer.port,
+		       asoc->pathmtu, asoc->peer.rwnd, asoc->unack_data);
+
+		list_for_each_entry(sp, &asoc->peer.transport_addr_list,
+					transports) {
+			if (sp == asoc->peer.primary_path)
+				printl("*");
+
+			if (sp->ipaddr.sa.sa_family == AF_INET)
+				printl("%pI4 ", &sp->ipaddr.v4.sin_addr);
+			else
+				printl("%pI6 ", &sp->ipaddr.v6.sin6_addr);
+
+			printl("%2u %8u %8u %8u %8u %8u ",
+			       sp->state, sp->cwnd, sp->ssthresh,
+			       sp->flight_size, sp->partial_bytes_acked,
+			       sp->pathmtu);
+		}
+		printl("\n");
+	}
+
+	jprobe_return();
+	return 0;
+}
+
+static struct jprobe sctp_recv_probe = {
+	.kp	= {
+		.symbol_name = "sctp_sf_eat_sack_6_2",
+	},
+	.entry	= jsctp_sf_eat_sack,
+};
+
+static __init int sctpprobe_init(void)
+{
+	int ret = -ENOMEM;
+
+	init_waitqueue_head(&sctpw.wait);
+	spin_lock_init(&sctpw.lock);
+	if (kfifo_alloc(&sctpw.fifo, bufsize, GFP_KERNEL))
+		return ret;
+
+	if (!proc_net_fops_create(&init_net, procname, S_IRUSR,
+				  &sctpprobe_fops))
+		goto free_kfifo;
+
+	ret = register_jprobe(&sctp_recv_probe);
+	if (ret)
+		goto remove_proc;
+
+	pr_info("SCTP probe registered (port=%d)\n", port);
+
+	return 0;
+
+remove_proc:
+	proc_net_remove(&init_net, procname);
+free_kfifo:
+	kfifo_free(&sctpw.fifo);
+	return ret;
+}
+
+static __exit void sctpprobe_exit(void)
+{
+	kfifo_free(&sctpw.fifo);
+	proc_net_remove(&init_net, procname);
+	unregister_jprobe(&sctp_recv_probe);
+}
+
+module_init(sctpprobe_init);
+module_exit(sctpprobe_exit);
-- 
cgit v1.1


From b99a4d53a74ac25eb4b930eef6c745579149c571 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Fri, 30 Apr 2010 22:41:09 -0400
Subject: sctp: cleanup: remove duplicate assignment

This assignment isn't needed because we did it earlier already.

Also another reason to delete the assignment is because it triggers a
Smatch warning about checking for NULL pointers after a dereference.

Reported-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/sm_make_chunk.c | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

(limited to 'net/sctp')

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 17cb400e..33aed1c 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -419,10 +419,17 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
 	if (!retval)
 		goto nomem_chunk;
 
-	/* Per the advice in RFC 2960 6.4, send this reply to
-	 * the source of the INIT packet.
+	/* RFC 2960 6.4 Multi-homed SCTP Endpoints
+	 *
+	 * An endpoint SHOULD transmit reply chunks (e.g., SACK,
+	 * HEARTBEAT ACK, * etc.) to the same destination transport
+	 * address from which it received the DATA or control chunk
+	 * to which it is replying.
+	 *
+	 * [INIT ACK back to where the INIT came from.]
 	 */
 	retval->transport = chunk->transport;
+
 	retval->subh.init_hdr =
 		sctp_addto_chunk(retval, sizeof(initack), &initack);
 	retval->param_hdr.v = sctp_addto_chunk(retval, addrs_len, addrs.v);
@@ -461,18 +468,6 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
 	/* We need to remove the const qualifier at this point.  */
 	retval->asoc = (struct sctp_association *) asoc;
 
-	/* RFC 2960 6.4 Multi-homed SCTP Endpoints
-	 *
-	 * An endpoint SHOULD transmit reply chunks (e.g., SACK,
-	 * HEARTBEAT ACK, * etc.) to the same destination transport
-	 * address from which it received the DATA or control chunk
-	 * to which it is replying.
-	 *
-	 * [INIT ACK back to where the INIT came from.]
-	 */
-	if (chunk)
-		retval->transport = chunk->transport;
-
 nomem_chunk:
 	kfree(cookie);
 nomem_cookie:
-- 
cgit v1.1


From d598b166ced20d9b9281ea3527c0e18405ddb803 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 22:41:09 -0400
Subject: sctp: Make sure we always return valid retransmit path

commit 4951feda0c60d1ef681f1a270afdd617924ab041
    sctp: Do no select unconfirmed transports for retransmissions

added code to make sure that we do not select unconfirmed paths
for data transmission.  This caused a problem when there are only
2 paths, 1 unconfirmed and 1 unreachable.  In that case, the next
retransmit path returned is NULL and that causes a kernel crash.

The solution is to only change retransmit paths if we found one to use.

Reported-by: Frank Schuster <frank.schuster01@web.de>
Signed-off-b: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/associola.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net/sctp')

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index de830c2..fab9cb2 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1324,7 +1324,8 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc)
 		}
 	}
 
-	asoc->peer.retran_path = t;
+	if (t)
+		asoc->peer.retran_path = t;
 
 	SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_update_retran_path:association"
 				 " %p addr: ",
-- 
cgit v1.1


From ae19c54866450f6c6f79223ca7d37965859a54e1 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 22:41:09 -0400
Subject: sctp: remove 'resent' bit from the chunk

The 'resent' bit is used to make sure that we don't update
rto estimate based on retransmitted chunks.  However, we already
have the 'rto_pending' bit that we test when need to update rto,
so 'resent' bit is just extra.  Additionally, we currently have
a bug in that we always set a 'resent' bit and thus rto estimate
is only updated by Heartbeats.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/output.c        | 25 +++++++++----------------
 net/sctp/outqueue.c      |  1 -
 net/sctp/sm_make_chunk.c |  1 -
 3 files changed, 9 insertions(+), 18 deletions(-)

(limited to 'net/sctp')

diff --git a/net/sctp/output.c b/net/sctp/output.c
index fad261d..35e49b9 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -429,24 +429,17 @@ int sctp_packet_transmit(struct sctp_packet *packet)
 	list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
 		list_del_init(&chunk->list);
 		if (sctp_chunk_is_data(chunk)) {
+			/* 6.3.1 C4) When data is in flight and when allowed
+			 * by rule C5, a new RTT measurement MUST be made each
+			 * round trip.  Furthermore, new RTT measurements
+			 * SHOULD be made no more than once per round-trip
+			 * for a given destination transport address.
+			 */
 
-			if (!chunk->resent) {
-
-				/* 6.3.1 C4) When data is in flight and when allowed
-				 * by rule C5, a new RTT measurement MUST be made each
-				 * round trip.  Furthermore, new RTT measurements
-				 * SHOULD be made no more than once per round-trip
-				 * for a given destination transport address.
-				 */
-
-				if (!tp->rto_pending) {
-					chunk->rtt_in_progress = 1;
-					tp->rto_pending = 1;
-				}
+			if (!tp->rto_pending) {
+				chunk->rtt_in_progress = 1;
+				tp->rto_pending = 1;
 			}
-
-			chunk->resent = 1;
-
 			has_data = 1;
 		}
 
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index a4fe7de..4e551ba 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -1405,7 +1405,6 @@ static void sctp_check_transmitted(struct sctp_outq *q,
 				 * instance).
 				 */
 				if (!tchunk->tsn_gap_acked &&
-				    !tchunk->resent &&
 				    tchunk->rtt_in_progress) {
 					tchunk->rtt_in_progress = 0;
 					rtt = jiffies - tchunk->sent_at;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 33aed1c..24effdf 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1205,7 +1205,6 @@ struct sctp_chunk *sctp_chunkify(struct sk_buff *skb,
 	INIT_LIST_HEAD(&retval->list);
 	retval->skb		= skb;
 	retval->asoc		= (struct sctp_association *)asoc;
-	retval->resent  	= 0;
 	retval->has_tsn		= 0;
 	retval->has_ssn         = 0;
 	retval->rtt_in_progress	= 0;
-- 
cgit v1.1


From d9efc2231b28bc199f9de4dd594248b7341188e5 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 22:41:09 -0400
Subject: sctp: Do not force T3 timer on fast retransmissions.

We don't need to force the T3 timer any more and it's
actually wrong to do as it causes too long of a delay.
The timer will be started if one is not running, but if
one is running, we leave it alone.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/outqueue.c  | 15 +++------------
 net/sctp/transport.c |  4 ++--
 2 files changed, 5 insertions(+), 14 deletions(-)

(limited to 'net/sctp')

diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 4e551ba..786c4ff 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -659,14 +659,6 @@ redo:
 			if (chunk->fast_retransmit == SCTP_NEED_FRTX)
 				chunk->fast_retransmit = SCTP_DONT_FRTX;
 
-			/* Force start T3-rtx timer when fast retransmitting
-			 * the earliest outstanding TSN
-			 */
-			if (!timer && fast_rtx &&
-			    ntohl(chunk->subh.data_hdr->tsn) ==
-					     asoc->ctsn_ack_point + 1)
-				timer = 2;
-
 			q->empty = 0;
 			break;
 		}
@@ -871,7 +863,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
 				 * sender MUST assure that at least one T3-rtx
 				 * timer is running.
 				 */
-				sctp_transport_reset_timers(transport, 0);
+				sctp_transport_reset_timers(transport);
 			}
 			break;
 
@@ -924,8 +916,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
 						    rtx_timeout, &start_timer);
 
 			if (start_timer)
-				sctp_transport_reset_timers(transport,
-							    start_timer-1);
+				sctp_transport_reset_timers(transport);
 
 			/* This can happen on COOKIE-ECHO resend.  Only
 			 * one chunk can get bundled with a COOKIE-ECHO.
@@ -1058,7 +1049,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
 			list_add_tail(&chunk->transmitted_list,
 				      &transport->transmitted);
 
-			sctp_transport_reset_timers(transport, 0);
+			sctp_transport_reset_timers(transport);
 
 			q->empty = 0;
 
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index be4d63d..0ebb97f 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -195,7 +195,7 @@ static void sctp_transport_destroy(struct sctp_transport *transport)
 /* Start T3_rtx timer if it is not already running and update the heartbeat
  * timer.  This routine is called every time a DATA chunk is sent.
  */
-void sctp_transport_reset_timers(struct sctp_transport *transport, int force)
+void sctp_transport_reset_timers(struct sctp_transport *transport)
 {
 	/* RFC 2960 6.3.2 Retransmission Timer Rules
 	 *
@@ -205,7 +205,7 @@ void sctp_transport_reset_timers(struct sctp_transport *transport, int force)
 	 * address.
 	 */
 
-	if (force || !timer_pending(&transport->T3_rtx_timer))
+	if (!timer_pending(&transport->T3_rtx_timer))
 		if (!mod_timer(&transport->T3_rtx_timer,
 			       jiffies + transport->rto))
 			sctp_transport_hold(transport);
-- 
cgit v1.1


From b2cf9b6bd93af1cc047d3356f1c6cc9367fe3731 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 22:41:10 -0400
Subject: sctp: update transport initializations

Right now, sctp transports are not fully initialized and when
adding any new fields, they have to be explicitely initialized.
This is prone to mistakes.  So we switch to calling kzalloc()
which makes things much simpler.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/associola.c   |  3 ---
 net/sctp/endpointola.c |  2 --
 net/sctp/transport.c   | 25 -------------------------
 3 files changed, 30 deletions(-)

(limited to 'net/sctp')

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index fab9cb2..37753cd 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -87,9 +87,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 	/* Retrieve the SCTP per socket area.  */
 	sp = sctp_sk((struct sock *)sk);
 
-	/* Init all variables to a known value.  */
-	memset(asoc, 0, sizeof(struct sctp_association));
-
 	/* Discarding const is appropriate here.  */
 	asoc->ep = (struct sctp_endpoint *)ep;
 	sctp_endpoint_hold(asoc->ep);
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 905fda5..2f8763b 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -70,8 +70,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 	struct sctp_shared_key *null_key;
 	int err;
 
-	memset(ep, 0, sizeof(struct sctp_endpoint));
-
 	ep->digest = kzalloc(SCTP_SIGNATURE_SIZE, gfp);
 	if (!ep->digest)
 		return NULL;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 0ebb97f..854228b 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -64,9 +64,6 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
 	/* Copy in the address.  */
 	peer->ipaddr = *addr;
 	peer->af_specific = sctp_get_af_specific(addr->sa.sa_family);
-	peer->asoc = NULL;
-
-	peer->dst = NULL;
 	memset(&peer->saddr, 0, sizeof(union sctp_addr));
 
 	/* From 6.3.1 RTO Calculation:
@@ -76,34 +73,21 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
 	 * parameter 'RTO.Initial'.
 	 */
 	peer->rto = msecs_to_jiffies(sctp_rto_initial);
-	peer->rtt = 0;
-	peer->rttvar = 0;
-	peer->srtt = 0;
-	peer->rto_pending = 0;
-	peer->hb_sent = 0;
-	peer->fast_recovery = 0;
 
 	peer->last_time_heard = jiffies;
 	peer->last_time_ecne_reduced = jiffies;
 
-	peer->init_sent_count = 0;
-
 	peer->param_flags = SPP_HB_DISABLE |
 			    SPP_PMTUD_ENABLE |
 			    SPP_SACKDELAY_ENABLE;
-	peer->hbinterval  = 0;
 
 	/* Initialize the default path max_retrans.  */
 	peer->pathmaxrxt  = sctp_max_retrans_path;
-	peer->error_count = 0;
 
 	INIT_LIST_HEAD(&peer->transmitted);
 	INIT_LIST_HEAD(&peer->send_ready);
 	INIT_LIST_HEAD(&peer->transports);
 
-	peer->T3_rtx_timer.expires = 0;
-	peer->hb_timer.expires = 0;
-
 	setup_timer(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event,
 			(unsigned long)peer);
 	setup_timer(&peer->hb_timer, sctp_generate_heartbeat_event,
@@ -113,15 +97,6 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
 	get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce));
 
 	atomic_set(&peer->refcnt, 1);
-	peer->dead = 0;
-
-	peer->malloced = 0;
-
-	/* Initialize the state information for SFR-CACC */
-	peer->cacc.changeover_active = 0;
-	peer->cacc.cycling_changeover = 0;
-	peer->cacc.next_tsn_at_change = 0;
-	peer->cacc.cacc_saw_newack = 0;
 
 	return peer;
 }
-- 
cgit v1.1


From cf9b4812e18aab6f86ff998bd7425a9e823269c3 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 22:41:10 -0400
Subject: sctp: fast recovery algorithm is per association.

SCTP fast recovery algorithm really applies per association
and impacts all transports.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/transport.c | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

(limited to 'net/sctp')

diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 854228b..fccf494 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -378,15 +378,16 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt)
 void sctp_transport_raise_cwnd(struct sctp_transport *transport,
 			       __u32 sack_ctsn, __u32 bytes_acked)
 {
+	struct sctp_association *asoc = transport->asoc;
 	__u32 cwnd, ssthresh, flight_size, pba, pmtu;
 
 	cwnd = transport->cwnd;
 	flight_size = transport->flight_size;
 
 	/* See if we need to exit Fast Recovery first */
-	if (transport->fast_recovery &&
-	    TSN_lte(transport->fast_recovery_exit, sack_ctsn))
-		transport->fast_recovery = 0;
+	if (asoc->fast_recovery &&
+	    TSN_lte(asoc->fast_recovery_exit, sack_ctsn))
+		asoc->fast_recovery = 0;
 
 	/* The appropriate cwnd increase algorithm is performed if, and only
 	 * if the cumulative TSN whould advanced and the congestion window is
@@ -415,7 +416,7 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
 		 *    2) the destination's path MTU.  This upper bound protects
 		 *    against the ACK-Splitting attack outlined in [SAVAGE99].
 		 */
-		if (transport->fast_recovery)
+		if (asoc->fast_recovery)
 			return;
 
 		if (bytes_acked > pmtu)
@@ -466,6 +467,8 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
 void sctp_transport_lower_cwnd(struct sctp_transport *transport,
 			       sctp_lower_cwnd_t reason)
 {
+	struct sctp_association *asoc = transport->asoc;
+
 	switch (reason) {
 	case SCTP_LOWER_CWND_T3_RTX:
 		/* RFC 2960 Section 7.2.3, sctpimpguide
@@ -476,11 +479,11 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
 		 *      partial_bytes_acked = 0
 		 */
 		transport->ssthresh = max(transport->cwnd/2,
-					  4*transport->asoc->pathmtu);
-		transport->cwnd = transport->asoc->pathmtu;
+					  4*asoc->pathmtu);
+		transport->cwnd = asoc->pathmtu;
 
-		/* T3-rtx also clears fast recovery on the transport */
-		transport->fast_recovery = 0;
+		/* T3-rtx also clears fast recovery */
+		asoc->fast_recovery = 0;
 		break;
 
 	case SCTP_LOWER_CWND_FAST_RTX:
@@ -496,15 +499,15 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
 		 *      cwnd = ssthresh
 		 *      partial_bytes_acked = 0
 		 */
-		if (transport->fast_recovery)
+		if (asoc->fast_recovery)
 			return;
 
 		/* Mark Fast recovery */
-		transport->fast_recovery = 1;
-		transport->fast_recovery_exit = transport->asoc->next_tsn - 1;
+		asoc->fast_recovery = 1;
+		asoc->fast_recovery_exit = asoc->next_tsn - 1;
 
 		transport->ssthresh = max(transport->cwnd/2,
-					  4*transport->asoc->pathmtu);
+					  4*asoc->pathmtu);
 		transport->cwnd = transport->ssthresh;
 		break;
 
@@ -524,7 +527,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
 		if (time_after(jiffies, transport->last_time_ecne_reduced +
 					transport->rtt)) {
 			transport->ssthresh = max(transport->cwnd/2,
-						  4*transport->asoc->pathmtu);
+						  4*asoc->pathmtu);
 			transport->cwnd = transport->ssthresh;
 			transport->last_time_ecne_reduced = jiffies;
 		}
@@ -540,7 +543,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
 		 * interval.
 		 */
 		transport->cwnd = max(transport->cwnd/2,
-					 4*transport->asoc->pathmtu);
+					 4*asoc->pathmtu);
 		break;
 	}
 
@@ -625,7 +628,6 @@ void sctp_transport_reset(struct sctp_transport *t)
 	t->error_count = 0;
 	t->rto_pending = 0;
 	t->hb_sent = 0;
-	t->fast_recovery = 0;
 
 	/* Initialize the state information for SFR-CACC */
 	t->cacc.changeover_active = 0;
-- 
cgit v1.1


From 65883371894be2631603d5d412f90f8c09290fef Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 22:41:10 -0400
Subject: sctp: rwnd_press should be cumulative

rwnd_press tracks the pressure on the recieve window.  Every
timer the receive buffer overlows, we truncate the receive
window and then grow it back.  However, if we don't track
the cumulative presser, it's possible to reach a situation
when receive buffer is empty, but rwnd stays truncated.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/associola.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/sctp')

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 37753cd..65f9a7c 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1482,7 +1482,7 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned len)
 	if (asoc->rwnd >= len) {
 		asoc->rwnd -= len;
 		if (over) {
-			asoc->rwnd_press = asoc->rwnd;
+			asoc->rwnd_press += asoc->rwnd;
 			asoc->rwnd = 0;
 		}
 	} else {
-- 
cgit v1.1


From ea862c8d1f4a0d193979c7412c3b946f600721ce Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 22:41:10 -0400
Subject: sctp: correctly mark missing chunks in fast recovery

According to RFC 4960 Section 7.2.4:
 					If an endpoint is in Fast
   Recovery and a SACK arrives that advances the Cumulative TSN Ack
   Point, the miss indications are incremented for all TSNs reported
   missing in the SACK.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/outqueue.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

(limited to 'net/sctp')

diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 786c4ff..b491a1a 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -1154,6 +1154,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
 	struct sctp_transport *primary = asoc->peer.primary_path;
 	int count_of_newacks = 0;
 	int gap_ack_blocks;
+	u8 accum_moved = 0;
 
 	/* Grab the association's destination address list. */
 	transport_list = &asoc->peer.transport_addr_list;
@@ -1232,16 +1233,22 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
 			count_of_newacks ++;
 	}
 
+	/* Move the Cumulative TSN Ack Point if appropriate.  */
+	if (TSN_lt(asoc->ctsn_ack_point, sack_ctsn)) {
+		asoc->ctsn_ack_point = sack_ctsn;
+		accum_moved = 1;
+	}
+
 	if (gap_ack_blocks) {
+
+		if (asoc->fast_recovery && accum_moved)
+			highest_new_tsn = highest_tsn;
+
 		list_for_each_entry(transport, transport_list, transports)
 			sctp_mark_missing(q, &transport->transmitted, transport,
 					  highest_new_tsn, count_of_newacks);
 	}
 
-	/* Move the Cumulative TSN Ack Point if appropriate.  */
-	if (TSN_lt(asoc->ctsn_ack_point, sack_ctsn))
-		asoc->ctsn_ack_point = sack_ctsn;
-
 	/* Update unack_data field in the assoc. */
 	sctp_sack_update_unack_data(asoc, sack);
 
@@ -1685,7 +1692,8 @@ static void sctp_mark_missing(struct sctp_outq *q,
 	struct sctp_chunk *chunk;
 	__u32 tsn;
 	char do_fast_retransmit = 0;
-	struct sctp_transport *primary = q->asoc->peer.primary_path;
+	struct sctp_association *asoc = q->asoc;
+	struct sctp_transport *primary = asoc->peer.primary_path;
 
 	list_for_each_entry(chunk, transmitted_queue, transmitted_list) {
 
-- 
cgit v1.1


From bfa0d9843ac5feb9667990706b4524390fee4df9 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 22:41:10 -0400
Subject: sctp: Optimize computation of highest new tsn in SACK.

Right now, if the highest tsn in the SACK doesn't change, we'll
end up scanning the transmitted lists on the transports twice:
once for locating the highest _new_ tsn, and once for actually
tagging chunks as acked.  This is a waste, since we can record
the highest _new_ tsn at the same time as tagging chunks.  Long
ago this was not possible because we would try to mark chunks
as missing at the same time as tagging them acked and this approach
didn't work.  Now that the two steps are separate, we can re-use
the old approach.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/outqueue.c | 42 +++++++-----------------------------------
 1 file changed, 7 insertions(+), 35 deletions(-)

(limited to 'net/sctp')

diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index b491a1a..5d05717 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -62,7 +62,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
 				   struct list_head *transmitted_queue,
 				   struct sctp_transport *transport,
 				   struct sctp_sackhdr *sack,
-				   __u32 highest_new_tsn);
+				   __u32 *highest_new_tsn);
 
 static void sctp_mark_missing(struct sctp_outq *q,
 			      struct list_head *transmitted_queue,
@@ -1109,32 +1109,6 @@ static void sctp_sack_update_unack_data(struct sctp_association *assoc,
 	assoc->unack_data = unack_data;
 }
 
-/* Return the highest new tsn that is acknowledged by the given SACK chunk. */
-static __u32 sctp_highest_new_tsn(struct sctp_sackhdr *sack,
-				  struct sctp_association *asoc)
-{
-	struct sctp_transport *transport;
-	struct sctp_chunk *chunk;
-	__u32 highest_new_tsn, tsn;
-	struct list_head *transport_list = &asoc->peer.transport_addr_list;
-
-	highest_new_tsn = ntohl(sack->cum_tsn_ack);
-
-	list_for_each_entry(transport, transport_list, transports) {
-		list_for_each_entry(chunk, &transport->transmitted,
-				transmitted_list) {
-			tsn = ntohl(chunk->subh.data_hdr->tsn);
-
-			if (!chunk->tsn_gap_acked &&
-			    TSN_lt(highest_new_tsn, tsn) &&
-			    sctp_acked(sack, tsn))
-				highest_new_tsn = tsn;
-		}
-	}
-
-	return highest_new_tsn;
-}
-
 /* This is where we REALLY process a SACK.
  *
  * Process the SACK against the outqueue.  Mostly, this just frees
@@ -1203,18 +1177,15 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
 	if (gap_ack_blocks)
 		highest_tsn += ntohs(frags[gap_ack_blocks - 1].gab.end);
 
-	if (TSN_lt(asoc->highest_sacked, highest_tsn)) {
-		highest_new_tsn = highest_tsn;
+	if (TSN_lt(asoc->highest_sacked, highest_tsn))
 		asoc->highest_sacked = highest_tsn;
-	} else {
-		highest_new_tsn = sctp_highest_new_tsn(sack, asoc);
-	}
 
+	highest_new_tsn = sack_ctsn;
 
 	/* Run through the retransmit queue.  Credit bytes received
 	 * and free those chunks that we can.
 	 */
-	sctp_check_transmitted(q, &q->retransmit, NULL, sack, highest_new_tsn);
+	sctp_check_transmitted(q, &q->retransmit, NULL, sack, &highest_new_tsn);
 
 	/* Run through the transmitted queue.
 	 * Credit bytes received and free those chunks which we can.
@@ -1223,7 +1194,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
 	 */
 	list_for_each_entry(transport, transport_list, transports) {
 		sctp_check_transmitted(q, &transport->transmitted,
-				       transport, sack, highest_new_tsn);
+				       transport, sack, &highest_new_tsn);
 		/*
 		 * SFR-CACC algorithm:
 		 * C) Let count_of_newacks be the number of
@@ -1331,7 +1302,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
 				   struct list_head *transmitted_queue,
 				   struct sctp_transport *transport,
 				   struct sctp_sackhdr *sack,
-				   __u32 highest_new_tsn_in_sack)
+				   __u32 *highest_new_tsn_in_sack)
 {
 	struct list_head *lchunk;
 	struct sctp_chunk *tchunk;
@@ -1419,6 +1390,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
 			 */
 			if (!tchunk->tsn_gap_acked) {
 				tchunk->tsn_gap_acked = 1;
+				*highest_new_tsn_in_sack = tsn;
 				bytes_acked += sctp_data_size(tchunk);
 				if (!tchunk->transport)
 					migrate_bytes += sctp_data_size(tchunk);
-- 
cgit v1.1


From 0e3aef8d09a8c11e3fb83cdcb24b5bc7421b3726 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 22:41:10 -0400
Subject: sctp: Tag messages that can be Nagle delayed at creation.

When we create the sctp_datamsg and fragment the user data,
we know exactly if we are sending full segments or not and
how they might be bundled.  During this time, we can mark
messages a Nagle capable or not.  This makes the check at
transmit time much simpler.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/chunk.c  | 4 ++--
 net/sctp/output.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net/sctp')

diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 3eab6db..476caaf 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -58,9 +58,9 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg)
 	msg->send_failed = 0;
 	msg->send_error = 0;
 	msg->can_abandon = 0;
+	msg->can_delay = 1;
 	msg->expires_at = 0;
 	INIT_LIST_HEAD(&msg->chunks);
-	msg->msg_size = 0;
 }
 
 /* Allocate and initialize datamsg. */
@@ -157,7 +157,6 @@ static void sctp_datamsg_assign(struct sctp_datamsg *msg, struct sctp_chunk *chu
 {
 	sctp_datamsg_hold(msg);
 	chunk->msg = msg;
-	msg->msg_size += chunk->skb->len;
 }
 
 
@@ -247,6 +246,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 	if (msg_len >= first_len) {
 		msg_len -= first_len;
 		whole = 1;
+		msg->can_delay = 0;
 	}
 
 	/* How many full sized?  How many bytes leftover? */
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 35e49b9..a646681 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -674,7 +674,7 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
 		 * Don't delay large message writes that may have been
 		 * fragmeneted into small peices.
 		 */
-		if ((len < max) && (chunk->msg->msg_size < max)) {
+		if ((len < max) && chunk->msg->can_delay) {
 			retval = SCTP_XMIT_NAGLE_DELAY;
 			goto finish;
 		}
-- 
cgit v1.1