From 14deae41566b5cdd992c01d0069518ced5227c83 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 4 Jan 2009 16:04:39 -0800
Subject: ipv6: Fix sporadic sendmsg -EINVAL when sending to multicast groups.

Thanks to excellent diagnosis by Eduard Guzovsky.

The core problem is that on a network with lots of active
multicast traffic, the neighbour cache can fill up.  If
we try to allocate a new route and thus neighbour cache
entry, the bog-standard GC attempt the neighbour layer does
in ineffective because route entries hold a reference
to the existing neighbour entries and GC can only liberate
entries with no references.

IPV4 already has a way to handle this, by doing a route cache
GC in such situations (when neigh attach returns -ENOBUFS).

So simply mimick this on the ipv6 side.

Tested-by: Eduard Guzovsky <eguzovsky@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 47 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 18c486c..76f06b9 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -627,6 +627,9 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad
 	rt = ip6_rt_copy(ort);
 
 	if (rt) {
+		struct neighbour *neigh;
+		int attempts = !in_softirq();
+
 		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
 			if (rt->rt6i_dst.plen != 128 &&
 			    ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
@@ -646,7 +649,35 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad
 		}
 #endif
 
-		rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
+	retry:
+		neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
+		if (IS_ERR(neigh)) {
+			struct net *net = dev_net(rt->rt6i_dev);
+			int saved_rt_min_interval =
+				net->ipv6.sysctl.ip6_rt_gc_min_interval;
+			int saved_rt_elasticity =
+				net->ipv6.sysctl.ip6_rt_gc_elasticity;
+
+			if (attempts-- > 0) {
+				net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
+				net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
+
+				ip6_dst_gc(net->ipv6.ip6_dst_ops);
+
+				net->ipv6.sysctl.ip6_rt_gc_elasticity =
+					saved_rt_elasticity;
+				net->ipv6.sysctl.ip6_rt_gc_min_interval =
+					saved_rt_min_interval;
+				goto retry;
+			}
+
+			if (net_ratelimit())
+				printk(KERN_WARNING
+				       "Neighbour table overflow.\n");
+			dst_free(&rt->u.dst);
+			return NULL;
+		}
+		rt->rt6i_nexthop = neigh;
 
 	}
 
@@ -945,8 +976,11 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 	dev_hold(dev);
 	if (neigh)
 		neigh_hold(neigh);
-	else
+	else {
 		neigh = ndisc_get_neigh(dev, addr);
+		if (IS_ERR(neigh))
+			neigh = NULL;
+	}
 
 	rt->rt6i_dev	  = dev;
 	rt->rt6i_idev     = idev;
@@ -1887,6 +1921,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 {
 	struct net *net = dev_net(idev->dev);
 	struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
+	struct neighbour *neigh;
 
 	if (rt == NULL)
 		return ERR_PTR(-ENOMEM);
@@ -1909,11 +1944,18 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 		rt->rt6i_flags |= RTF_ANYCAST;
 	else
 		rt->rt6i_flags |= RTF_LOCAL;
-	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
-	if (rt->rt6i_nexthop == NULL) {
+	neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
+	if (IS_ERR(neigh)) {
 		dst_free(&rt->u.dst);
-		return ERR_PTR(-ENOMEM);
+
+		/* We are casting this because that is the return
+		 * value type.  But an errno encoded pointer is the
+		 * same regardless of the underlying pointer type,
+		 * and that's what we are returning.  So this is OK.
+		 */
+		return (struct rt6_info *) neigh;
 	}
+	rt->rt6i_nexthop = neigh;
 
 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
 	rt->rt6i_dst.plen = 128;
-- 
cgit v1.1


From b530256d2e0f1a75fab31f9821129fff1bb49faa Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 4 Jan 2009 16:13:19 -0800
Subject: gro: Use gso_size to store MSS

In order to allow GRO packets without frag_list at all, we need to
store the MSS in the packet itself.  The obvious place is gso_size.
The only thing to watch out for is if the packet ends up not being
GRO then we need to clear gso_size before pushing the packet into
the stack.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c    | 2 ++
 net/core/skbuff.c | 1 +
 net/ipv4/tcp.c    | 5 +----
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 09c66a4..1e1a680 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2365,6 +2365,7 @@ static int napi_gro_complete(struct sk_buff *skb)
 	}
 
 out:
+	skb_shinfo(skb)->gso_size = 0;
 	__skb_push(skb, -skb_network_offset(skb));
 	return netif_receive_skb(skb);
 }
@@ -2446,6 +2447,7 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 	}
 
 	NAPI_GRO_CB(skb)->count = 1;
+	skb_shinfo(skb)->gso_size = skb->len;
 	skb->next = napi->gro_list;
 	napi->gro_list = skb;
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b8d0abb..3aafb10 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2613,6 +2613,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 
 	*NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p);
 	skb_shinfo(nskb)->frag_list = p;
+	skb_shinfo(nskb)->gso_size = skb_shinfo(p)->gso_size;
 	skb_header_release(p);
 	nskb->prev = p;
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f28acf1..4d655e9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2519,9 +2519,7 @@ found:
 	flush |= memcmp(th + 1, th2 + 1, thlen - sizeof(*th));
 
 	total = p->len;
-	mss = total;
-	if (skb_shinfo(p)->frag_list)
-		mss = skb_shinfo(p)->frag_list->len;
+	mss = skb_shinfo(p)->gso_size;
 
 	flush |= skb->len > mss || skb->len <= 0;
 	flush |= ntohl(th2->seq) + total != ntohl(th->seq);
@@ -2557,7 +2555,6 @@ int tcp_gro_complete(struct sk_buff *skb)
 	skb->csum_offset = offsetof(struct tcphdr, check);
 	skb->ip_summed = CHECKSUM_PARTIAL;
 
-	skb_shinfo(skb)->gso_size = skb_shinfo(skb)->frag_list->len;
 	skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
 
 	if (th->cwr)
-- 
cgit v1.1


From 5d38a079ce3971f932bbdc0dc5b887806fabd5dc Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 4 Jan 2009 16:13:40 -0800
Subject: gro: Add page frag support

This patch allows GRO to merge page frags (skb_shinfo(skb)->frags)
in one skb, rather than using the less efficient frag_list.

It also adds a new interface, napi_gro_frags to allow drivers
to inject page frags directly into the stack without allocating
an skb.  This is intended to be the GRO equivalent for LRO's
lro_receive_frags interface.

The existing GSO interface can already handle page frags with
or without an appended frag_list so nothing needs to be changed
there.

The merging itself is rather simple.  We store any new frag entries
after the last existing entry, without checking whether the first
new entry can be merged with the last existing entry.  Making this
check would actually be easy but since no existing driver can
produce contiguous frags anyway it would just be mental masturbation.

If the total number of entries would exceed the capacity of a
single skb, we simply resort to using frag_list as we do now.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c    | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
 net/core/skbuff.c | 14 ++++++++-
 2 files changed, 99 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 1e1a680..382df6c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -132,6 +132,9 @@
 /* Instead of increasing this, you should create a hash table. */
 #define MAX_GRO_SKBS 8
 
+/* This should be increased if a protocol with a bigger head is added. */
+#define GRO_MAX_HEAD (MAX_HEADER + 128)
+
 /*
  *	The list of packet types we will receive (as opposed to discard)
  *	and the routines to invoke.
@@ -2345,7 +2348,7 @@ static int napi_gro_complete(struct sk_buff *skb)
 	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
 	int err = -ENOENT;
 
-	if (!skb_shinfo(skb)->frag_list)
+	if (NAPI_GRO_CB(skb)->count == 1)
 		goto out;
 
 	rcu_read_lock();
@@ -2384,7 +2387,7 @@ void napi_gro_flush(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(napi_gro_flush);
 
-int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
 	struct sk_buff **pp = NULL;
 	struct packet_type *ptype;
@@ -2393,6 +2396,7 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 	int count = 0;
 	int same_flow;
 	int mac_len;
+	int free;
 
 	if (!(skb->dev->features & NETIF_F_GRO))
 		goto normal;
@@ -2409,6 +2413,7 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 		skb->mac_len = mac_len;
 		NAPI_GRO_CB(skb)->same_flow = 0;
 		NAPI_GRO_CB(skb)->flush = 0;
+		NAPI_GRO_CB(skb)->free = 0;
 
 		for (p = napi->gro_list; p; p = p->next) {
 			count++;
@@ -2428,6 +2433,7 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 		goto normal;
 
 	same_flow = NAPI_GRO_CB(skb)->same_flow;
+	free = NAPI_GRO_CB(skb)->free;
 
 	if (pp) {
 		struct sk_buff *nskb = *pp;
@@ -2452,13 +2458,86 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 	napi->gro_list = skb;
 
 ok:
-	return NET_RX_SUCCESS;
+	return free;
 
 normal:
-	return netif_receive_skb(skb);
+	return -1;
+}
+
+int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+{
+	switch (__napi_gro_receive(napi, skb)) {
+	case -1:
+		return netif_receive_skb(skb);
+
+	case 1:
+		kfree_skb(skb);
+		break;
+	}
+
+	return NET_RX_SUCCESS;
 }
 EXPORT_SYMBOL(napi_gro_receive);
 
+int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info)
+{
+	struct net_device *dev = napi->dev;
+	struct sk_buff *skb = napi->skb;
+	int err = NET_RX_DROP;
+
+	napi->skb = NULL;
+
+	if (!skb) {
+		skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN);
+		if (!skb)
+			goto out;
+
+		skb_reserve(skb, NET_IP_ALIGN);
+	}
+
+	BUG_ON(info->nr_frags > MAX_SKB_FRAGS);
+	skb_shinfo(skb)->nr_frags = info->nr_frags;
+	memcpy(skb_shinfo(skb)->frags, info->frags, sizeof(info->frags));
+
+	skb->data_len = info->len;
+	skb->len += info->len;
+	skb->truesize += info->len;
+
+	if (!pskb_may_pull(skb, ETH_HLEN))
+		goto reuse;
+
+	err = NET_RX_SUCCESS;
+
+	skb->protocol = eth_type_trans(skb, dev);
+
+	skb->ip_summed = info->ip_summed;
+	skb->csum = info->csum;
+
+	switch (__napi_gro_receive(napi, skb)) {
+	case -1:
+		return netif_receive_skb(skb);
+
+	case 0:
+		goto out;
+	}
+
+reuse:
+	skb_shinfo(skb)->nr_frags = 0;
+
+	skb->len -= skb->data_len;
+	skb->truesize -= skb->data_len;
+	skb->data_len = 0;
+
+	__skb_pull(skb, skb_headlen(skb));
+	skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
+
+	napi->skb = skb;
+
+out:
+	return err;
+}
+EXPORT_SYMBOL(napi_gro_frags);
+
 static int process_backlog(struct napi_struct *napi, int quota)
 {
 	int work = 0;
@@ -2537,11 +2616,12 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 {
 	INIT_LIST_HEAD(&napi->poll_list);
 	napi->gro_list = NULL;
+	napi->skb = NULL;
 	napi->poll = poll;
 	napi->weight = weight;
 	list_add(&napi->dev_list, &dev->napi_list);
-#ifdef CONFIG_NETPOLL
 	napi->dev = dev;
+#ifdef CONFIG_NETPOLL
 	spin_lock_init(&napi->poll_lock);
 	napi->poll_owner = -1;
 #endif
@@ -2554,6 +2634,7 @@ void netif_napi_del(struct napi_struct *napi)
 	struct sk_buff *skb, *next;
 
 	list_del_init(&napi->dev_list);
+	kfree(napi->skb);
 
 	for (skb = napi->gro_list; skb; skb = next) {
 		next = skb->next;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3aafb10..5110b35 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2594,6 +2594,17 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 
 	if (skb_shinfo(p)->frag_list)
 		goto merge;
+	else if (!skb_headlen(p) && !skb_headlen(skb) &&
+		 skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags <
+		 MAX_SKB_FRAGS) {
+		memcpy(skb_shinfo(p)->frags + skb_shinfo(p)->nr_frags,
+		       skb_shinfo(skb)->frags,
+		       skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
+
+		skb_shinfo(p)->nr_frags += skb_shinfo(skb)->nr_frags;
+		NAPI_GRO_CB(skb)->free = 1;
+		goto done;
+	}
 
 	headroom = skb_headroom(p);
 	nskb = netdev_alloc_skb(p->dev, headroom);
@@ -2628,11 +2639,12 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 	p = nskb;
 
 merge:
-	NAPI_GRO_CB(p)->count++;
 	p->prev->next = skb;
 	p->prev = skb;
 	skb_header_release(skb);
 
+done:
+	NAPI_GRO_CB(p)->count++;
 	p->data_len += skb->len;
 	p->truesize += skb->len;
 	p->len += skb->len;
-- 
cgit v1.1


From f32f8b72e02e851972a0172603104046aa5fec96 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Simon=20Holm=20Th=C3=B8gersen?= <odie@cs.aau.dk>
Date: Sun, 4 Jan 2009 17:11:24 -0800
Subject: net/rfkill/rfkill.c: fix unused rfkill_led_trigger() warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 4dec9b807be757780ca3611a959ac22c28d292a7 ("rfkill: strip pointless
notifier chain") removed the only user of rfkill_led_trigger() that was not
guarded by #ifdef CONFIG_RFKILL_LEDS. Therefore, move rfkill_led_trigger()
completely inside #ifdef CONFIG_RFKILL_LEDS and avoid the compile time
warning:

net/rfkill/rfkill.c:59: warning: 'rfkill_led_trigger' defined but not used

Signed-off-by: Simon Holm Thøgersen <odie@cs.aau.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rfkill/rfkill.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index 3c94f76..3eaa394 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -54,10 +54,10 @@ static unsigned long rfkill_states_lockdflt[BITS_TO_LONGS(RFKILL_TYPE_MAX)];
 static bool rfkill_epo_lock_active;
 
 
+#ifdef CONFIG_RFKILL_LEDS
 static void rfkill_led_trigger(struct rfkill *rfkill,
 			       enum rfkill_state state)
 {
-#ifdef CONFIG_RFKILL_LEDS
 	struct led_trigger *led = &rfkill->led_trigger;
 
 	if (!led->name)
@@ -66,10 +66,8 @@ static void rfkill_led_trigger(struct rfkill *rfkill,
 		led_trigger_event(led, LED_OFF);
 	else
 		led_trigger_event(led, LED_FULL);
-#endif /* CONFIG_RFKILL_LEDS */
 }
 
-#ifdef CONFIG_RFKILL_LEDS
 static void rfkill_led_trigger_activate(struct led_classdev *led)
 {
 	struct rfkill *rfkill = container_of(led->trigger,
-- 
cgit v1.1


From 22604c866889c4b2e12b73cbf1683bda1b72a313 Mon Sep 17 00:00:00 2001
From: Michael Marineau <mike@marineau.org>
Date: Sun, 4 Jan 2009 17:18:51 -0800
Subject: net: Fix for initial link state in 2.6.28

From: Michael Marineau <mike@marineau.org>

Commit b47300168e770b60ab96c8924854c3b0eb4260eb "Do not fire linkwatch
events until the device is registered." was made as a workaround for
drivers that call netif_carrier_off before registering the device.
Unfortunately this causes these drivers to incorrectly report their
link status as IF_OPER_UNKNOWN which can falsely set the IFF_RUNNING
flag when the interface is first brought up. This issues was
previously pointed out[1] but was dismissed saying that IFF_RUNNING is
not related to the link status. From my digging IFF_RUNNING, as
reported to userspace, is based on the link state. It is set based on
__LINK_STATE_START and IF_OPER_UP or IF_OPER_UNKNOWN. See [2], [3],
and [4]. (Whether or not the kernel has IFF_RUNNING set in flags is
not reported to user space so it may well be independent of the link,
I don't know if and when it may get set.)

The end result depends slightly depending on the driver. The the two I
tested were e1000e and b44. With e1000e if the system is booted
without a network cable attached the interface will falsely report
RUNNING when it is brought up causing NetworkManager to attempt to
start it and eventually time out. With b44 when the system is booted
with a network cable attached and brought up with dhcpcd it will time
out the first time.

The attached patch that will still set the operstate variable
correctly to IF_OPER_UP/DOWN/etc when linkwatch_fire_event is called
but then return rather than skipping the linkwatch_fire_event call
entirely as the previous fix did. (sorry it isn't inline, I don't have
a patch friendly email client at the moment)

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/link_watch.c   | 7 ++++++-
 net/sched/sch_generic.c | 4 ----
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index bf8f7af..1e401e1 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -178,7 +178,6 @@ static void __linkwatch_run_queue(int urgent_only)
 		 */
 		clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
 
-		rfc2863_policy(dev);
 		if (dev->flags & IFF_UP) {
 			if (netif_carrier_ok(dev))
 				dev_activate(dev);
@@ -215,6 +214,12 @@ void linkwatch_fire_event(struct net_device *dev)
 {
 	bool urgent = linkwatch_urgent_event(dev);
 
+	rfc2863_policy(dev);
+
+	/* Some drivers call netif_carrier_off early */
+	if (dev->reg_state == NETREG_UNINITIALIZED)
+		return;
+
 	if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) {
 		dev_hold(dev);
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 5f5efe4..23a8e61 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -270,8 +270,6 @@ static void dev_watchdog_down(struct net_device *dev)
 void netif_carrier_on(struct net_device *dev)
 {
 	if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
-		if (dev->reg_state == NETREG_UNINITIALIZED)
-			return;
 		linkwatch_fire_event(dev);
 		if (netif_running(dev))
 			__netdev_watchdog_up(dev);
@@ -288,8 +286,6 @@ EXPORT_SYMBOL(netif_carrier_on);
 void netif_carrier_off(struct net_device *dev)
 {
 	if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
-		if (dev->reg_state == NETREG_UNINITIALIZED)
-			return;
 		linkwatch_fire_event(dev);
 	}
 }
-- 
cgit v1.1


From 914d11647b6d6fe81bdf0c059612ee36282b8cee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sun, 4 Jan 2009 17:27:31 -0800
Subject: ipv6: IPV6_PKTINFO relied userspace providing correct length
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Reported-by: Eric Sesterhenn <snakebyte@gmx.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ipv6_sockglue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index eeeaad2..40f3246 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -404,7 +404,7 @@ sticky_done:
 		else if (optlen < sizeof(struct in6_pktinfo) || optval == NULL)
 			goto e_inval;
 
-		if (copy_from_user(&pkt, optval, optlen)) {
+		if (copy_from_user(&pkt, optval, sizeof(struct in6_pktinfo))) {
 				retv = -EFAULT;
 				break;
 		}
-- 
cgit v1.1


From 858eb711ba64f8a001d7003295b8078bcab33b6d Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Sun, 4 Jan 2009 17:29:21 -0800
Subject: DCB: fix kfree(skb)

Use kfree_skb instead of kfree for struct sk_buff pointers.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dcb/dcbnl.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 5dbfe5f..8379496 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -191,7 +191,7 @@ static int dcbnl_reply(u8 value, u8 event, u8 cmd, u8 attr, u32 pid,
 	return 0;
 nlmsg_failure:
 err:
-	kfree(dcbnl_skb);
+	kfree_skb(dcbnl_skb);
 	return ret;
 }
 
@@ -272,7 +272,7 @@ static int dcbnl_getpfccfg(struct net_device *netdev, struct nlattr **tb,
 	return 0;
 nlmsg_failure:
 err:
-	kfree(dcbnl_skb);
+	kfree_skb(dcbnl_skb);
 err_out:
 	return -EINVAL;
 }
@@ -314,7 +314,7 @@ static int dcbnl_getperm_hwaddr(struct net_device *netdev, struct nlattr **tb,
 
 nlmsg_failure:
 err:
-	kfree(dcbnl_skb);
+	kfree_skb(dcbnl_skb);
 err_out:
 	return -EINVAL;
 }
@@ -380,7 +380,7 @@ static int dcbnl_getcap(struct net_device *netdev, struct nlattr **tb,
 	return 0;
 nlmsg_failure:
 err:
-	kfree(dcbnl_skb);
+	kfree_skb(dcbnl_skb);
 err_out:
 	return -EINVAL;
 }
@@ -458,7 +458,7 @@ static int dcbnl_getnumtcs(struct net_device *netdev, struct nlattr **tb,
 	return 0;
 nlmsg_failure:
 err:
-	kfree(dcbnl_skb);
+	kfree_skb(dcbnl_skb);
 err_out:
 	return ret;
 }
@@ -687,7 +687,7 @@ err_pg:
 	nla_nest_cancel(dcbnl_skb, pg_nest);
 nlmsg_failure:
 err:
-	kfree(dcbnl_skb);
+	kfree_skb(dcbnl_skb);
 err_out:
 	ret  = -EINVAL;
 	return ret;
@@ -949,7 +949,7 @@ err_bcn:
 	nla_nest_cancel(dcbnl_skb, bcn_nest);
 nlmsg_failure:
 err:
-	kfree(dcbnl_skb);
+	kfree_skb(dcbnl_skb);
 err_out:
 	ret  = -EINVAL;
 	return ret;
-- 
cgit v1.1


From 6e5c172cf7ca1ab878cc6a6a4c1d52fef60f3ee0 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <oliver@hartkopp.net>
Date: Sun, 4 Jan 2009 17:31:18 -0800
Subject: can: update can-bcm for hrtimer hardirq callbacks

Since commit ca109491f612aab5c8152207631c0444f63da97f ("hrtimer:
removing all ur callback modes") the hrtimer callbacks are processed
only in hardirq context.

This patch moves some functionality into tasklets to run in softirq
context.

Additionally some duplicated code was removed in bcm_rx_thr_flush()
and an avoidable memcpy was removed from bcm_rx_handler().

Signed-off-by: Oliver Hartkopp <oliver@hartkopp.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/can/bcm.c | 208 +++++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 125 insertions(+), 83 deletions(-)

(limited to 'net')

diff --git a/net/can/bcm.c b/net/can/bcm.c
index da0d426..6248ae2 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -70,7 +70,7 @@
 
 #define CAN_BCM_VERSION CAN_VERSION
 static __initdata const char banner[] = KERN_INFO
-	"can: broadcast manager protocol (rev " CAN_BCM_VERSION ")\n";
+	"can: broadcast manager protocol (rev " CAN_BCM_VERSION " t)\n";
 
 MODULE_DESCRIPTION("PF_CAN broadcast manager protocol");
 MODULE_LICENSE("Dual BSD/GPL");
@@ -90,6 +90,7 @@ struct bcm_op {
 	unsigned long frames_abs, frames_filtered;
 	struct timeval ival1, ival2;
 	struct hrtimer timer, thrtimer;
+	struct tasklet_struct tsklet, thrtsklet;
 	ktime_t rx_stamp, kt_ival1, kt_ival2, kt_lastmsg;
 	int rx_ifindex;
 	int count;
@@ -341,6 +342,23 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
 	}
 }
 
+static void bcm_tx_timeout_tsklet(unsigned long data)
+{
+	struct bcm_op *op = (struct bcm_op *)data;
+	struct bcm_msg_head msg_head;
+
+	/* create notification to user */
+	msg_head.opcode  = TX_EXPIRED;
+	msg_head.flags   = op->flags;
+	msg_head.count   = op->count;
+	msg_head.ival1   = op->ival1;
+	msg_head.ival2   = op->ival2;
+	msg_head.can_id  = op->can_id;
+	msg_head.nframes = 0;
+
+	bcm_send_to_user(op, &msg_head, NULL, 0);
+}
+
 /*
  * bcm_tx_timeout_handler - performes cyclic CAN frame transmissions
  */
@@ -352,20 +370,8 @@ static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer)
 	if (op->kt_ival1.tv64 && (op->count > 0)) {
 
 		op->count--;
-		if (!op->count && (op->flags & TX_COUNTEVT)) {
-			struct bcm_msg_head msg_head;
-
-			/* create notification to user */
-			msg_head.opcode  = TX_EXPIRED;
-			msg_head.flags   = op->flags;
-			msg_head.count   = op->count;
-			msg_head.ival1   = op->ival1;
-			msg_head.ival2   = op->ival2;
-			msg_head.can_id  = op->can_id;
-			msg_head.nframes = 0;
-
-			bcm_send_to_user(op, &msg_head, NULL, 0);
-		}
+		if (!op->count && (op->flags & TX_COUNTEVT))
+			tasklet_schedule(&op->tsklet);
 	}
 
 	if (op->kt_ival1.tv64 && (op->count > 0)) {
@@ -402,6 +408,9 @@ static void bcm_rx_changed(struct bcm_op *op, struct can_frame *data)
 	if (op->frames_filtered > ULONG_MAX/100)
 		op->frames_filtered = op->frames_abs = 0;
 
+	/* this element is not throttled anymore */
+	data->can_dlc &= (BCM_CAN_DLC_MASK|RX_RECV);
+
 	head.opcode  = RX_CHANGED;
 	head.flags   = op->flags;
 	head.count   = op->count;
@@ -420,37 +429,32 @@ static void bcm_rx_changed(struct bcm_op *op, struct can_frame *data)
  */
 static void bcm_rx_update_and_send(struct bcm_op *op,
 				   struct can_frame *lastdata,
-				   struct can_frame *rxdata)
+				   const struct can_frame *rxdata)
 {
 	memcpy(lastdata, rxdata, CFSIZ);
 
-	/* mark as used */
-	lastdata->can_dlc |= RX_RECV;
+	/* mark as used and throttled by default */
+	lastdata->can_dlc |= (RX_RECV|RX_THR);
 
-	/* throtteling mode inactive OR data update already on the run ? */
-	if (!op->kt_ival2.tv64 || hrtimer_callback_running(&op->thrtimer)) {
+	/* throtteling mode inactive ? */
+	if (!op->kt_ival2.tv64) {
 		/* send RX_CHANGED to the user immediately */
-		bcm_rx_changed(op, rxdata);
+		bcm_rx_changed(op, lastdata);
 		return;
 	}
 
-	if (hrtimer_active(&op->thrtimer)) {
-		/* mark as 'throttled' */
-		lastdata->can_dlc |= RX_THR;
+	/* with active throttling timer we are just done here */
+	if (hrtimer_active(&op->thrtimer))
 		return;
-	}
 
-	if (!op->kt_lastmsg.tv64) {
-		/* send first RX_CHANGED to the user immediately */
-		bcm_rx_changed(op, rxdata);
-		op->kt_lastmsg = ktime_get();
-		return;
-	}
+	/* first receiption with enabled throttling mode */
+	if (!op->kt_lastmsg.tv64)
+		goto rx_changed_settime;
 
+	/* got a second frame inside a potential throttle period? */
 	if (ktime_us_delta(ktime_get(), op->kt_lastmsg) <
 	    ktime_to_us(op->kt_ival2)) {
-		/* mark as 'throttled' and start timer */
-		lastdata->can_dlc |= RX_THR;
+		/* do not send the saved data - only start throttle timer */
 		hrtimer_start(&op->thrtimer,
 			      ktime_add(op->kt_lastmsg, op->kt_ival2),
 			      HRTIMER_MODE_ABS);
@@ -458,7 +462,8 @@ static void bcm_rx_update_and_send(struct bcm_op *op,
 	}
 
 	/* the gap was that big, that throttling was not needed here */
-	bcm_rx_changed(op, rxdata);
+rx_changed_settime:
+	bcm_rx_changed(op, lastdata);
 	op->kt_lastmsg = ktime_get();
 }
 
@@ -467,7 +472,7 @@ static void bcm_rx_update_and_send(struct bcm_op *op,
  *                       received data stored in op->last_frames[]
  */
 static void bcm_rx_cmp_to_index(struct bcm_op *op, int index,
-				struct can_frame *rxdata)
+				const struct can_frame *rxdata)
 {
 	/*
 	 * no one uses the MSBs of can_dlc for comparation,
@@ -511,14 +516,12 @@ static void bcm_rx_starttimer(struct bcm_op *op)
 		hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL);
 }
 
-/*
- * bcm_rx_timeout_handler - when the (cyclic) CAN frame receiption timed out
- */
-static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer)
+static void bcm_rx_timeout_tsklet(unsigned long data)
 {
-	struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer);
+	struct bcm_op *op = (struct bcm_op *)data;
 	struct bcm_msg_head msg_head;
 
+	/* create notification to user */
 	msg_head.opcode  = RX_TIMEOUT;
 	msg_head.flags   = op->flags;
 	msg_head.count   = op->count;
@@ -528,6 +531,17 @@ static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer)
 	msg_head.nframes = 0;
 
 	bcm_send_to_user(op, &msg_head, NULL, 0);
+}
+
+/*
+ * bcm_rx_timeout_handler - when the (cyclic) CAN frame receiption timed out
+ */
+static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer)
+{
+	struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer);
+
+	/* schedule before NET_RX_SOFTIRQ */
+	tasklet_hi_schedule(&op->tsklet);
 
 	/* no restart of the timer is done here! */
 
@@ -541,9 +555,25 @@ static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer)
 }
 
 /*
+ * bcm_rx_do_flush - helper for bcm_rx_thr_flush
+ */
+static inline int bcm_rx_do_flush(struct bcm_op *op, int update, int index)
+{
+	if ((op->last_frames) && (op->last_frames[index].can_dlc & RX_THR)) {
+		if (update)
+			bcm_rx_changed(op, &op->last_frames[index]);
+		return 1;
+	}
+	return 0;
+}
+
+/*
  * bcm_rx_thr_flush - Check for throttled data and send it to the userspace
+ *
+ * update == 0 : just check if throttled data is available  (any irq context)
+ * update == 1 : check and send throttled data to userspace (soft_irq context)
  */
-static int bcm_rx_thr_flush(struct bcm_op *op)
+static int bcm_rx_thr_flush(struct bcm_op *op, int update)
 {
 	int updated = 0;
 
@@ -551,27 +581,25 @@ static int bcm_rx_thr_flush(struct bcm_op *op)
 		int i;
 
 		/* for MUX filter we start at index 1 */
-		for (i = 1; i < op->nframes; i++) {
-			if ((op->last_frames) &&
-			    (op->last_frames[i].can_dlc & RX_THR)) {
-				op->last_frames[i].can_dlc &= ~RX_THR;
-				bcm_rx_changed(op, &op->last_frames[i]);
-				updated++;
-			}
-		}
+		for (i = 1; i < op->nframes; i++)
+			updated += bcm_rx_do_flush(op, update, i);
 
 	} else {
 		/* for RX_FILTER_ID and simple filter */
-		if (op->last_frames && (op->last_frames[0].can_dlc & RX_THR)) {
-			op->last_frames[0].can_dlc &= ~RX_THR;
-			bcm_rx_changed(op, &op->last_frames[0]);
-			updated++;
-		}
+		updated += bcm_rx_do_flush(op, update, 0);
 	}
 
 	return updated;
 }
 
+static void bcm_rx_thr_tsklet(unsigned long data)
+{
+	struct bcm_op *op = (struct bcm_op *)data;
+
+	/* push the changed data to the userspace */
+	bcm_rx_thr_flush(op, 1);
+}
+
 /*
  * bcm_rx_thr_handler - the time for blocked content updates is over now:
  *                      Check for throttled data and send it to the userspace
@@ -580,7 +608,9 @@ static enum hrtimer_restart bcm_rx_thr_handler(struct hrtimer *hrtimer)
 {
 	struct bcm_op *op = container_of(hrtimer, struct bcm_op, thrtimer);
 
-	if (bcm_rx_thr_flush(op)) {
+	tasklet_schedule(&op->thrtsklet);
+
+	if (bcm_rx_thr_flush(op, 0)) {
 		hrtimer_forward(hrtimer, ktime_get(), op->kt_ival2);
 		return HRTIMER_RESTART;
 	} else {
@@ -596,48 +626,38 @@ static enum hrtimer_restart bcm_rx_thr_handler(struct hrtimer *hrtimer)
 static void bcm_rx_handler(struct sk_buff *skb, void *data)
 {
 	struct bcm_op *op = (struct bcm_op *)data;
-	struct can_frame rxframe;
+	const struct can_frame *rxframe = (struct can_frame *)skb->data;
 	int i;
 
 	/* disable timeout */
 	hrtimer_cancel(&op->timer);
 
-	if (skb->len == sizeof(rxframe)) {
-		memcpy(&rxframe, skb->data, sizeof(rxframe));
-		/* save rx timestamp */
-		op->rx_stamp = skb->tstamp;
-		/* save originator for recvfrom() */
-		op->rx_ifindex = skb->dev->ifindex;
-		/* update statistics */
-		op->frames_abs++;
-		kfree_skb(skb);
+	if (op->can_id != rxframe->can_id)
+		goto rx_freeskb;
 
-	} else {
-		kfree_skb(skb);
-		return;
-	}
-
-	if (op->can_id != rxframe.can_id)
-		return;
+	/* save rx timestamp */
+	op->rx_stamp = skb->tstamp;
+	/* save originator for recvfrom() */
+	op->rx_ifindex = skb->dev->ifindex;
+	/* update statistics */
+	op->frames_abs++;
 
 	if (op->flags & RX_RTR_FRAME) {
 		/* send reply for RTR-request (placed in op->frames[0]) */
 		bcm_can_tx(op);
-		return;
+		goto rx_freeskb;
 	}
 
 	if (op->flags & RX_FILTER_ID) {
 		/* the easiest case */
-		bcm_rx_update_and_send(op, &op->last_frames[0], &rxframe);
-		bcm_rx_starttimer(op);
-		return;
+		bcm_rx_update_and_send(op, &op->last_frames[0], rxframe);
+		goto rx_freeskb_starttimer;
 	}
 
 	if (op->nframes == 1) {
 		/* simple compare with index 0 */
-		bcm_rx_cmp_to_index(op, 0, &rxframe);
-		bcm_rx_starttimer(op);
-		return;
+		bcm_rx_cmp_to_index(op, 0, rxframe);
+		goto rx_freeskb_starttimer;
 	}
 
 	if (op->nframes > 1) {
@@ -649,15 +669,19 @@ static void bcm_rx_handler(struct sk_buff *skb, void *data)
 		 */
 
 		for (i = 1; i < op->nframes; i++) {
-			if ((GET_U64(&op->frames[0]) & GET_U64(&rxframe)) ==
+			if ((GET_U64(&op->frames[0]) & GET_U64(rxframe)) ==
 			    (GET_U64(&op->frames[0]) &
 			     GET_U64(&op->frames[i]))) {
-				bcm_rx_cmp_to_index(op, i, &rxframe);
+				bcm_rx_cmp_to_index(op, i, rxframe);
 				break;
 			}
 		}
-		bcm_rx_starttimer(op);
 	}
+
+rx_freeskb_starttimer:
+	bcm_rx_starttimer(op);
+rx_freeskb:
+	kfree_skb(skb);
 }
 
 /*
@@ -681,6 +705,12 @@ static void bcm_remove_op(struct bcm_op *op)
 	hrtimer_cancel(&op->timer);
 	hrtimer_cancel(&op->thrtimer);
 
+	if (op->tsklet.func)
+		tasklet_kill(&op->tsklet);
+
+	if (op->thrtsklet.func)
+		tasklet_kill(&op->thrtsklet);
+
 	if ((op->frames) && (op->frames != &op->sframe))
 		kfree(op->frames);
 
@@ -891,6 +921,10 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 		hrtimer_init(&op->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 		op->timer.function = bcm_tx_timeout_handler;
 
+		/* initialize tasklet for tx countevent notification */
+		tasklet_init(&op->tsklet, bcm_tx_timeout_tsklet,
+			     (unsigned long) op);
+
 		/* currently unused in tx_ops */
 		hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 
@@ -1054,9 +1088,17 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 		hrtimer_init(&op->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 		op->timer.function = bcm_rx_timeout_handler;
 
+		/* initialize tasklet for rx timeout notification */
+		tasklet_init(&op->tsklet, bcm_rx_timeout_tsklet,
+			     (unsigned long) op);
+
 		hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 		op->thrtimer.function = bcm_rx_thr_handler;
 
+		/* initialize tasklet for rx throttle handling */
+		tasklet_init(&op->thrtsklet, bcm_rx_thr_tsklet,
+			     (unsigned long) op);
+
 		/* add this bcm_op to the list of the rx_ops */
 		list_add(&op->list, &bo->rx_ops);
 
@@ -1102,7 +1144,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 			 */
 			op->kt_lastmsg = ktime_set(0, 0);
 			hrtimer_cancel(&op->thrtimer);
-			bcm_rx_thr_flush(op);
+			bcm_rx_thr_flush(op, 1);
 		}
 
 		if ((op->flags & STARTTIMER) && op->kt_ival1.tv64)
-- 
cgit v1.1


From ddebc973c56b51b4e5d84d606f0430d81b895d67 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sun, 4 Jan 2009 21:42:53 -0800
Subject: dccp: Lockless integration of CCID congestion-control plugins

Based on Arnaldo's earlier patch, this patch integrates the standardised
CCID congestion control plugins (CCID-2 and CCID-3) of DCCP with dccp.ko:

 * enables a faster connection path by eliminating the need to always go
   through the CCID registration lock;

 * updates the implementation to use only a single array whose size equals
   the number of configured CCIDs instead of the maximum (256);

 * since the CCIDs are now fixed array elements, synchronization is no
   longer needed, simplifying use and implementation.

CCID-2 is suggested as minimum for a basic DCCP implementation (RFC 4340, 10);
CCID-3 is a standards-track CCID supported by RFC 4342 and RFC 5348.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/Kconfig        |   4 --
 net/dccp/Makefile       |   9 ++-
 net/dccp/ackvec.h       |   4 +-
 net/dccp/ccid.c         | 156 +++++++++++++++++++++++++++++++-----------------
 net/dccp/ccid.h         |  11 ++--
 net/dccp/ccids/Kconfig  |  70 +++++++---------------
 net/dccp/ccids/Makefile |   8 ---
 net/dccp/ccids/ccid2.c  |  22 +------
 net/dccp/ccids/ccid3.c  |  23 +------
 net/dccp/dccp.h         |   2 -
 net/dccp/proto.c        |   7 +++
 11 files changed, 148 insertions(+), 168 deletions(-)

(limited to 'net')

diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig
index 7aa2a7a..ad6dffd 100644
--- a/net/dccp/Kconfig
+++ b/net/dccp/Kconfig
@@ -1,7 +1,6 @@
 menuconfig IP_DCCP
 	tristate "The DCCP Protocol (EXPERIMENTAL)"
 	depends on INET && EXPERIMENTAL
-	select IP_DCCP_CCID2
 	---help---
 	  Datagram Congestion Control Protocol (RFC 4340)
 
@@ -25,9 +24,6 @@ config INET_DCCP_DIAG
 	def_tristate y if (IP_DCCP = y && INET_DIAG = y)
 	def_tristate m
 
-config IP_DCCP_ACKVEC
-	bool
-
 source "net/dccp/ccids/Kconfig"
 
 menu "DCCP Kernel Hacking"
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index f4f8793..5ff2e7b 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -2,14 +2,19 @@ obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o
 
 dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o
 
+#
+# CCID algorithms to be used by dccp.ko
+#
+# CCID-2 is default (RFC 4340, p. 77) and has Ack Vectors as dependency
+dccp-y += ccids/ccid2.o ackvec.o
+dccp-$(CONFIG_IP_DCCP_CCID3)	+= ccids/ccid3.o
+
 dccp_ipv4-y := ipv4.o
 
 # build dccp_ipv6 as module whenever either IPv6 or DCCP is a module
 obj-$(subst y,$(CONFIG_IP_DCCP),$(CONFIG_IPV6)) += dccp_ipv6.o
 dccp_ipv6-y := ipv6.o
 
-dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o
-
 obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o
 obj-$(CONFIG_NET_DCCPPROBE) += dccp_probe.o
 
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index 4ccee03..569a33a 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -84,7 +84,7 @@ struct dccp_ackvec_record {
 struct sock;
 struct sk_buff;
 
-#ifdef CONFIG_IP_DCCP_ACKVEC
+#ifndef ___OLD_INTERFACE_TO_BE_REMOVED___
 extern int dccp_ackvec_init(void);
 extern void dccp_ackvec_exit(void);
 
@@ -106,7 +106,7 @@ static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
 {
 	return av->av_vec_len;
 }
-#else /* CONFIG_IP_DCCP_ACKVEC */
+#else /* ___OLD_INTERFACE_TO_BE_REMOVED___ */
 static inline int dccp_ackvec_init(void)
 {
 	return 0;
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
index bcc643f..538d3b1 100644
--- a/net/dccp/ccid.c
+++ b/net/dccp/ccid.c
@@ -13,6 +13,70 @@
 
 #include "ccid.h"
 
+static struct ccid_operations *ccids[] = {
+	&ccid2_ops,
+#ifdef CONFIG_IP_DCCP_CCID3
+	&ccid3_ops,
+#endif
+};
+
+static struct ccid_operations *ccid_by_number(const u8 id)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ccids); i++)
+		if (ccids[i]->ccid_id == id)
+			return ccids[i];
+	return NULL;
+}
+
+/* check that up to @array_len members in @ccid_array are supported */
+bool ccid_support_check(u8 const *ccid_array, u8 array_len)
+{
+	while (array_len > 0)
+		if (ccid_by_number(ccid_array[--array_len]) == NULL)
+			return false;
+	return true;
+}
+
+/**
+ * ccid_get_builtin_ccids  -  Populate a list of built-in CCIDs
+ * @ccid_array: pointer to copy into
+ * @array_len: value to return length into
+ * This function allocates memory - caller must see that it is freed after use.
+ */
+int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len)
+{
+	*ccid_array = kmalloc(ARRAY_SIZE(ccids), gfp_any());
+	if (*ccid_array == NULL)
+		return -ENOBUFS;
+
+	for (*array_len = 0; *array_len < ARRAY_SIZE(ccids); *array_len += 1)
+		(*ccid_array)[*array_len] = ccids[*array_len]->ccid_id;
+	return 0;
+}
+
+int ccid_getsockopt_builtin_ccids(struct sock *sk, int len,
+				  char __user *optval, int __user *optlen)
+{
+	u8 *ccid_array, array_len;
+	int err = 0;
+
+	if (len < ARRAY_SIZE(ccids))
+		return -EINVAL;
+
+	if (ccid_get_builtin_ccids(&ccid_array, &array_len))
+		return -ENOBUFS;
+
+	if (put_user(array_len, optlen) ||
+	    copy_to_user(optval, ccid_array, array_len))
+		err = -EFAULT;
+
+	kfree(ccid_array);
+	return err;
+}
+
+#ifdef ___OLD_INTERFACE_TO_BE_REMOVED___
 static u8 builtin_ccids[] = {
 	DCCPC_CCID2,		/* CCID2 is supported by default */
 #if defined(CONFIG_IP_DCCP_CCID3) || defined(CONFIG_IP_DCCP_CCID3_MODULE)
@@ -62,6 +126,7 @@ static inline void ccids_read_unlock(void)
 #define ccids_read_lock() do { } while(0)
 #define ccids_read_unlock() do { } while(0)
 #endif
+#endif /* ___OLD_INTERFACE_TO_BE_REMOVED___ */
 
 static struct kmem_cache *ccid_kmem_cache_create(int obj_size, const char *fmt,...)
 {
@@ -93,6 +158,7 @@ static void ccid_kmem_cache_destroy(struct kmem_cache *slab)
 	}
 }
 
+#ifdef ___OLD_INTERFACE_TO_BE_REMOVED___
 /* check that up to @array_len members in @ccid_array are supported */
 bool ccid_support_check(u8 const *ccid_array, u8 array_len)
 {
@@ -133,8 +199,9 @@ int ccid_getsockopt_builtin_ccids(struct sock *sk, int len,
 		return -EFAULT;
 	return 0;
 }
+#endif /* ___OLD_INTERFACE_TO_BE_REMOVED___ */
 
-int ccid_register(struct ccid_operations *ccid_ops)
+static int ccid_activate(struct ccid_operations *ccid_ops)
 {
 	int err = -ENOBUFS;
 
@@ -152,79 +219,40 @@ int ccid_register(struct ccid_operations *ccid_ops)
 	if (ccid_ops->ccid_hc_tx_slab == NULL)
 		goto out_free_rx_slab;
 
-	ccids_write_lock();
-	err = -EEXIST;
-	if (ccids[ccid_ops->ccid_id] == NULL) {
-		ccids[ccid_ops->ccid_id] = ccid_ops;
-		err = 0;
-	}
-	ccids_write_unlock();
-	if (err != 0)
-		goto out_free_tx_slab;
-
-	pr_info("CCID: Registered CCID %d (%s)\n",
+	pr_info("CCID: Activated CCID %d (%s)\n",
 		ccid_ops->ccid_id, ccid_ops->ccid_name);
+	err = 0;
 out:
 	return err;
-out_free_tx_slab:
-	ccid_kmem_cache_destroy(ccid_ops->ccid_hc_tx_slab);
-	ccid_ops->ccid_hc_tx_slab = NULL;
-	goto out;
 out_free_rx_slab:
 	ccid_kmem_cache_destroy(ccid_ops->ccid_hc_rx_slab);
 	ccid_ops->ccid_hc_rx_slab = NULL;
 	goto out;
 }
 
-EXPORT_SYMBOL_GPL(ccid_register);
-
-int ccid_unregister(struct ccid_operations *ccid_ops)
+static void ccid_deactivate(struct ccid_operations *ccid_ops)
 {
-	ccids_write_lock();
-	ccids[ccid_ops->ccid_id] = NULL;
-	ccids_write_unlock();
-
 	ccid_kmem_cache_destroy(ccid_ops->ccid_hc_tx_slab);
 	ccid_ops->ccid_hc_tx_slab = NULL;
 	ccid_kmem_cache_destroy(ccid_ops->ccid_hc_rx_slab);
 	ccid_ops->ccid_hc_rx_slab = NULL;
 
-	pr_info("CCID: Unregistered CCID %d (%s)\n",
+	pr_info("CCID: Deactivated CCID %d (%s)\n",
 		ccid_ops->ccid_id, ccid_ops->ccid_name);
-	return 0;
 }
 
-EXPORT_SYMBOL_GPL(ccid_unregister);
-
 struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp)
 {
-	struct ccid_operations *ccid_ops;
+	struct ccid_operations *ccid_ops = ccid_by_number(id);
 	struct ccid *ccid = NULL;
 
-	ccids_read_lock();
-#ifdef CONFIG_MODULES
-	if (ccids[id] == NULL) {
-		/* We only try to load if in process context */
-		ccids_read_unlock();
-		if (gfp & GFP_ATOMIC)
-			goto out;
-		request_module("net-dccp-ccid-%d", id);
-		ccids_read_lock();
-	}
-#endif
-	ccid_ops = ccids[id];
 	if (ccid_ops == NULL)
-		goto out_unlock;
-
-	if (!try_module_get(ccid_ops->ccid_owner))
-		goto out_unlock;
-
-	ccids_read_unlock();
+		goto out;
 
 	ccid = kmem_cache_alloc(rx ? ccid_ops->ccid_hc_rx_slab :
 				     ccid_ops->ccid_hc_tx_slab, gfp);
 	if (ccid == NULL)
-		goto out_module_put;
+		goto out;
 	ccid->ccid_ops = ccid_ops;
 	if (rx) {
 		memset(ccid + 1, 0, ccid_ops->ccid_hc_rx_obj_size);
@@ -239,15 +267,10 @@ struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp)
 	}
 out:
 	return ccid;
-out_unlock:
-	ccids_read_unlock();
-	goto out;
 out_free_ccid:
 	kmem_cache_free(rx ? ccid_ops->ccid_hc_rx_slab :
 			ccid_ops->ccid_hc_tx_slab, ccid);
 	ccid = NULL;
-out_module_put:
-	module_put(ccid_ops->ccid_owner);
 	goto out;
 }
 
@@ -270,10 +293,6 @@ static void ccid_delete(struct ccid *ccid, struct sock *sk, int rx)
 			ccid_ops->ccid_hc_tx_exit(sk);
 		kmem_cache_free(ccid_ops->ccid_hc_tx_slab,  ccid);
 	}
-	ccids_read_lock();
-	if (ccids[ccid_ops->ccid_id] != NULL)
-		module_put(ccid_ops->ccid_owner);
-	ccids_read_unlock();
 }
 
 void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk)
@@ -289,3 +308,28 @@ void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk)
 }
 
 EXPORT_SYMBOL_GPL(ccid_hc_tx_delete);
+
+int __init ccid_initialize_builtins(void)
+{
+	int i, err;
+
+	for (i = 0; i < ARRAY_SIZE(ccids); i++) {
+		err = ccid_activate(ccids[i]);
+		if (err)
+			goto unwind_registrations;
+	}
+	return 0;
+
+unwind_registrations:
+	while(--i >= 0)
+		ccid_deactivate(ccids[i]);
+	return err;
+}
+
+void ccid_cleanup_builtins(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ccids); i++)
+		ccid_deactivate(ccids[i]);
+}
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index 18f6942..75c21c5 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -29,7 +29,6 @@ struct tcp_info;
  *  @ccid_id: numerical CCID ID (up to %CCID_MAX, cf. table 5 in RFC 4340, 10.)
  *  @ccid_ccmps: the CCMPS including network/transport headers (0 when disabled)
  *  @ccid_name: alphabetical identifier string for @ccid_id
- *  @ccid_owner: module which implements/owns this CCID
  *  @ccid_hc_{r,t}x_slab: memory pool for the receiver/sender half-connection
  *  @ccid_hc_{r,t}x_obj_size: size of the receiver/sender half-connection socket
  *
@@ -48,7 +47,6 @@ struct ccid_operations {
 	unsigned char		ccid_id;
 	__u32			ccid_ccmps;
 	const char		*ccid_name;
-	struct module		*ccid_owner;
 	struct kmem_cache	*ccid_hc_rx_slab,
 				*ccid_hc_tx_slab;
 	__u32			ccid_hc_rx_obj_size,
@@ -90,8 +88,13 @@ struct ccid_operations {
 						 int __user *optlen);
 };
 
-extern int ccid_register(struct ccid_operations *ccid_ops);
-extern int ccid_unregister(struct ccid_operations *ccid_ops);
+extern struct ccid_operations ccid2_ops;
+#ifdef CONFIG_IP_DCCP_CCID3
+extern struct ccid_operations ccid3_ops;
+#endif
+
+extern int  ccid_initialize_builtins(void);
+extern void ccid_cleanup_builtins(void);
 
 struct ccid {
 	struct ccid_operations *ccid_ops;
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index 1227594..b30f049 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -1,80 +1,52 @@
 menu "DCCP CCIDs Configuration (EXPERIMENTAL)"
 	depends on EXPERIMENTAL
 
-config IP_DCCP_CCID2
-	tristate "CCID2 (TCP-Like) (EXPERIMENTAL)"
-	def_tristate IP_DCCP
-	select IP_DCCP_ACKVEC
-	---help---
-	  CCID 2, TCP-like Congestion Control, denotes Additive Increase,
-	  Multiplicative Decrease (AIMD) congestion control with behavior
-	  modelled directly on TCP, including congestion window, slow start,
-	  timeouts, and so forth [RFC 2581].  CCID 2 achieves maximum
-	  bandwidth over the long term, consistent with the use of end-to-end
-	  congestion control, but halves its congestion window in response to
-	  each congestion event.  This leads to the abrupt rate changes
-	  typical of TCP.  Applications should use CCID 2 if they prefer
-	  maximum bandwidth utilization to steadiness of rate.  This is often
-	  the case for applications that are not playing their data directly
-	  to the user.  For example, a hypothetical application that
-	  transferred files over DCCP, using application-level retransmissions
-	  for lost packets, would prefer CCID 2 to CCID 3.  On-line games may
-	  also prefer CCID 2.  See RFC 4341 for further details.
-
-	  CCID2 is the default CCID used by DCCP.
-
 config IP_DCCP_CCID2_DEBUG
-	  bool "CCID2 debugging messages"
-	  depends on IP_DCCP_CCID2
-	  ---help---
-	    Enable CCID2-specific debugging messages.
+	bool "CCID-2 debugging messages"
+	---help---
+	  Enable CCID-2 specific debugging messages.
 
-	    When compiling CCID2 as a module, this debugging output can
-	    additionally be toggled by setting the ccid2_debug module
-	    parameter to 0 or 1.
+	  The debugging output can additionally be toggled by setting the
+	  ccid2_debug parameter to 0 or 1.
 
-	    If in doubt, say N.
+	  If in doubt, say N.
 
 config IP_DCCP_CCID3
-	tristate "CCID3 (TCP-Friendly) (EXPERIMENTAL)"
-	def_tristate IP_DCCP
+	bool "CCID-3 (TCP-Friendly) (EXPERIMENTAL)"
+	def_bool y if (IP_DCCP = y || IP_DCCP = m)
 	select IP_DCCP_TFRC_LIB
 	---help---
-	  CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based
+	  CCID-3 denotes TCP-Friendly Rate Control (TFRC), an equation-based
 	  rate-controlled congestion control mechanism.  TFRC is designed to
 	  be reasonably fair when competing for bandwidth with TCP-like flows,
 	  where a flow is "reasonably fair" if its sending rate is generally
 	  within a factor of two of the sending rate of a TCP flow under the
 	  same conditions.  However, TFRC has a much lower variation of
-	  throughput over time compared with TCP, which makes CCID 3 more
-	  suitable than CCID 2 for applications such streaming media where a
+	  throughput over time compared with TCP, which makes CCID-3 more
+	  suitable than CCID-2 for applications such streaming media where a
 	  relatively smooth sending rate is of importance.
 
-	  CCID 3 is further described in RFC 4342,
+	  CCID-3 is further described in RFC 4342,
 	  http://www.ietf.org/rfc/rfc4342.txt
 
 	  The TFRC congestion control algorithms were initially described in
-	  RFC 3448.
+	  RFC 5448.
 
 	  This text was extracted from RFC 4340 (sec. 10.2),
 	  http://www.ietf.org/rfc/rfc4340.txt
-	  
-	  To compile this CCID as a module, choose M here: the module will be
-	  called dccp_ccid3.
 
-	  If in doubt, say M.
+	  If in doubt, say N.
 
 config IP_DCCP_CCID3_DEBUG
-	  bool "CCID3 debugging messages"
-	  depends on IP_DCCP_CCID3
-	  ---help---
-	    Enable CCID3-specific debugging messages.
+	bool "CCID-3 debugging messages"
+	depends on IP_DCCP_CCID3
+	---help---
+	  Enable CCID-3 specific debugging messages.
 
-	    When compiling CCID3 as a module, this debugging output can
-	    additionally be toggled by setting the ccid3_debug module
-	    parameter to 0 or 1.
+	  The debugging output can additionally be toggled by setting the
+	  ccid3_debug parameter to 0 or 1.
 
-	    If in doubt, say N.
+	  If in doubt, say N.
 
 config IP_DCCP_CCID3_RTO
 	  int "Use higher bound for nofeedback timer"
diff --git a/net/dccp/ccids/Makefile b/net/dccp/ccids/Makefile
index 438f20b..cdaefff 100644
--- a/net/dccp/ccids/Makefile
+++ b/net/dccp/ccids/Makefile
@@ -1,9 +1 @@
-obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o
-
-dccp_ccid3-y := ccid3.o
-
-obj-$(CONFIG_IP_DCCP_CCID2) += dccp_ccid2.o
-
-dccp_ccid2-y := ccid2.o
-
 obj-y += lib/
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index c9ea19a..d235294 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -768,10 +768,9 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	}
 }
 
-static struct ccid_operations ccid2 = {
+struct ccid_operations ccid2_ops = {
 	.ccid_id		= DCCPC_CCID2,
 	.ccid_name		= "TCP-like",
-	.ccid_owner		= THIS_MODULE,
 	.ccid_hc_tx_obj_size	= sizeof(struct ccid2_hc_tx_sock),
 	.ccid_hc_tx_init	= ccid2_hc_tx_init,
 	.ccid_hc_tx_exit	= ccid2_hc_tx_exit,
@@ -784,22 +783,5 @@ static struct ccid_operations ccid2 = {
 
 #ifdef CONFIG_IP_DCCP_CCID2_DEBUG
 module_param(ccid2_debug, bool, 0644);
-MODULE_PARM_DESC(ccid2_debug, "Enable debug messages");
+MODULE_PARM_DESC(ccid2_debug, "Enable CCID-2 debug messages");
 #endif
-
-static __init int ccid2_module_init(void)
-{
-	return ccid_register(&ccid2);
-}
-module_init(ccid2_module_init);
-
-static __exit void ccid2_module_exit(void)
-{
-	ccid_unregister(&ccid2);
-}
-module_exit(ccid2_module_exit);
-
-MODULE_AUTHOR("Andrea Bittau <a.bittau@cs.ucl.ac.uk>");
-MODULE_DESCRIPTION("DCCP TCP-Like (CCID2) CCID");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("net-dccp-ccid-2");
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 3b8bd7c..a27b7f4 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -940,10 +940,9 @@ static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
 	return 0;
 }
 
-static struct ccid_operations ccid3 = {
+struct ccid_operations ccid3_ops = {
 	.ccid_id		   = DCCPC_CCID3,
 	.ccid_name		   = "TCP-Friendly Rate Control",
-	.ccid_owner		   = THIS_MODULE,
 	.ccid_hc_tx_obj_size	   = sizeof(struct ccid3_hc_tx_sock),
 	.ccid_hc_tx_init	   = ccid3_hc_tx_init,
 	.ccid_hc_tx_exit	   = ccid3_hc_tx_exit,
@@ -964,23 +963,5 @@ static struct ccid_operations ccid3 = {
 
 #ifdef CONFIG_IP_DCCP_CCID3_DEBUG
 module_param(ccid3_debug, bool, 0644);
-MODULE_PARM_DESC(ccid3_debug, "Enable debug messages");
+MODULE_PARM_DESC(ccid3_debug, "Enable CCID-3 debug messages");
 #endif
-
-static __init int ccid3_module_init(void)
-{
-	return ccid_register(&ccid3);
-}
-module_init(ccid3_module_init);
-
-static __exit void ccid3_module_exit(void)
-{
-	ccid_unregister(&ccid3);
-}
-module_exit(ccid3_module_exit);
-
-MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>, "
-	      "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>");
-MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("net-dccp-ccid-3");
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 0bc4c9a..f2230fc 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -432,10 +432,8 @@ static inline int dccp_ack_pending(const struct sock *sk)
 {
 	const struct dccp_sock *dp = dccp_sk(sk);
 	return dp->dccps_timestamp_echo != 0 ||
-#ifdef CONFIG_IP_DCCP_ACKVEC
 	       (dp->dccps_hc_rx_ackvec != NULL &&
 		dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) ||
-#endif
 	       inet_csk_ack_scheduled(sk);
 }
 
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 1747cca..945b4d5 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1118,9 +1118,15 @@ static int __init dccp_init(void)
 	if (rc)
 		goto out_ackvec_exit;
 
+	rc = ccid_initialize_builtins();
+	if (rc)
+		goto out_sysctl_exit;
+
 	dccp_timestamping_init();
 out:
 	return rc;
+out_sysctl_exit:
+	dccp_sysctl_exit();
 out_ackvec_exit:
 	dccp_ackvec_exit();
 out_free_dccp_mib:
@@ -1143,6 +1149,7 @@ out_free_percpu:
 
 static void __exit dccp_fini(void)
 {
+	ccid_cleanup_builtins();
 	dccp_mib_exit();
 	free_pages((unsigned long)dccp_hashinfo.bhash,
 		   get_order(dccp_hashinfo.bhash_size *
-- 
cgit v1.1


From e5fd56ca4eb3a130882bbef69d6952ef6aca5c8d Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sun, 4 Jan 2009 21:43:23 -0800
Subject: dccp: Clean up ccid.c after integration of CCID plugins

This patch cleans up after integrating the CCID modules and, in addition,

 * moves the if/else cases from ccid_delete() into ccid_hc_{tx,rx}_delete();
 * removes the 'gfp' argument to ccid_new() - since it is always gfp_any().

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ackvec.h |  49 --------------------
 net/dccp/ccid.c   | 136 +++++-------------------------------------------------
 net/dccp/ccid.h   |   3 +-
 net/dccp/feat.c   |   2 +-
 4 files changed, 14 insertions(+), 176 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index 569a33a..45f95e5 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -84,7 +84,6 @@ struct dccp_ackvec_record {
 struct sock;
 struct sk_buff;
 
-#ifndef ___OLD_INTERFACE_TO_BE_REMOVED___
 extern int dccp_ackvec_init(void);
 extern void dccp_ackvec_exit(void);
 
@@ -106,52 +105,4 @@ static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
 {
 	return av->av_vec_len;
 }
-#else /* ___OLD_INTERFACE_TO_BE_REMOVED___ */
-static inline int dccp_ackvec_init(void)
-{
-	return 0;
-}
-
-static inline void dccp_ackvec_exit(void)
-{
-}
-
-static inline struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
-{
-	return NULL;
-}
-
-static inline void dccp_ackvec_free(struct dccp_ackvec *av)
-{
-}
-
-static inline int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
-				  const u64 ackno, const u8 state)
-{
-	return -1;
-}
-
-static inline void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av,
-					       struct sock *sk, const u64 ackno)
-{
-}
-
-static inline int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
-				    const u64 *ackno, const u8 opt,
-				    const u8 *value, const u8 len)
-{
-	return -1;
-}
-
-static inline int dccp_insert_option_ackvec(const struct sock *sk,
-					    const struct sk_buff *skb)
-{
-	return -1;
-}
-
-static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
-{
-	return 0;
-}
-#endif /* CONFIG_IP_DCCP_ACKVEC */
 #endif /* _ACKVEC_H */
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
index 538d3b1..19b214a 100644
--- a/net/dccp/ccid.c
+++ b/net/dccp/ccid.c
@@ -76,58 +76,6 @@ int ccid_getsockopt_builtin_ccids(struct sock *sk, int len,
 	return err;
 }
 
-#ifdef ___OLD_INTERFACE_TO_BE_REMOVED___
-static u8 builtin_ccids[] = {
-	DCCPC_CCID2,		/* CCID2 is supported by default */
-#if defined(CONFIG_IP_DCCP_CCID3) || defined(CONFIG_IP_DCCP_CCID3_MODULE)
-	DCCPC_CCID3,
-#endif
-};
-
-static struct ccid_operations *ccids[CCID_MAX];
-#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
-static atomic_t ccids_lockct = ATOMIC_INIT(0);
-static DEFINE_SPINLOCK(ccids_lock);
-
-/*
- * The strategy is: modifications ccids vector are short, do not sleep and
- * veeery rare, but read access should be free of any exclusive locks.
- */
-static void ccids_write_lock(void)
-{
-	spin_lock(&ccids_lock);
-	while (atomic_read(&ccids_lockct) != 0) {
-		spin_unlock(&ccids_lock);
-		yield();
-		spin_lock(&ccids_lock);
-	}
-}
-
-static inline void ccids_write_unlock(void)
-{
-	spin_unlock(&ccids_lock);
-}
-
-static inline void ccids_read_lock(void)
-{
-	atomic_inc(&ccids_lockct);
-	smp_mb__after_atomic_inc();
-	spin_unlock_wait(&ccids_lock);
-}
-
-static inline void ccids_read_unlock(void)
-{
-	atomic_dec(&ccids_lockct);
-}
-
-#else
-#define ccids_write_lock() do { } while(0)
-#define ccids_write_unlock() do { } while(0)
-#define ccids_read_lock() do { } while(0)
-#define ccids_read_unlock() do { } while(0)
-#endif
-#endif /* ___OLD_INTERFACE_TO_BE_REMOVED___ */
-
 static struct kmem_cache *ccid_kmem_cache_create(int obj_size, const char *fmt,...)
 {
 	struct kmem_cache *slab;
@@ -158,49 +106,6 @@ static void ccid_kmem_cache_destroy(struct kmem_cache *slab)
 	}
 }
 
-#ifdef ___OLD_INTERFACE_TO_BE_REMOVED___
-/* check that up to @array_len members in @ccid_array are supported */
-bool ccid_support_check(u8 const *ccid_array, u8 array_len)
-{
-	u8 i, j, found;
-
-	for (i = 0, found = 0; i < array_len; i++, found = 0) {
-		for (j = 0; !found && j < ARRAY_SIZE(builtin_ccids); j++)
-			found = (ccid_array[i] == builtin_ccids[j]);
-		if (!found)
-			return false;
-	}
-	return true;
-}
-
-/**
- * ccid_get_builtin_ccids  -  Provide copy of `builtin' CCID array
- * @ccid_array: pointer to copy into
- * @array_len: value to return length into
- * This function allocates memory - caller must see that it is freed after use.
- */
-int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len)
-{
-	*ccid_array = kmemdup(builtin_ccids, sizeof(builtin_ccids), gfp_any());
-	if (*ccid_array == NULL)
-		return -ENOBUFS;
-	*array_len = ARRAY_SIZE(builtin_ccids);
-	return 0;
-}
-
-int ccid_getsockopt_builtin_ccids(struct sock *sk, int len,
-				    char __user *optval, int __user *optlen)
-{
-	if (len < sizeof(builtin_ccids))
-		return -EINVAL;
-
-	if (put_user(sizeof(builtin_ccids), optlen) ||
-	    copy_to_user(optval, builtin_ccids, sizeof(builtin_ccids)))
-		return -EFAULT;
-	return 0;
-}
-#endif /* ___OLD_INTERFACE_TO_BE_REMOVED___ */
-
 static int ccid_activate(struct ccid_operations *ccid_ops)
 {
 	int err = -ENOBUFS;
@@ -241,7 +146,7 @@ static void ccid_deactivate(struct ccid_operations *ccid_ops)
 		ccid_ops->ccid_id, ccid_ops->ccid_name);
 }
 
-struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp)
+struct ccid *ccid_new(const u8 id, struct sock *sk, bool rx)
 {
 	struct ccid_operations *ccid_ops = ccid_by_number(id);
 	struct ccid *ccid = NULL;
@@ -250,7 +155,7 @@ struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp)
 		goto out;
 
 	ccid = kmem_cache_alloc(rx ? ccid_ops->ccid_hc_rx_slab :
-				     ccid_ops->ccid_hc_tx_slab, gfp);
+				     ccid_ops->ccid_hc_tx_slab, gfp_any());
 	if (ccid == NULL)
 		goto out;
 	ccid->ccid_ops = ccid_ops;
@@ -274,41 +179,24 @@ out_free_ccid:
 	goto out;
 }
 
-EXPORT_SYMBOL_GPL(ccid_new);
-
-static void ccid_delete(struct ccid *ccid, struct sock *sk, int rx)
-{
-	struct ccid_operations *ccid_ops;
-
-	if (ccid == NULL)
-		return;
-
-	ccid_ops = ccid->ccid_ops;
-	if (rx) {
-		if (ccid_ops->ccid_hc_rx_exit != NULL)
-			ccid_ops->ccid_hc_rx_exit(sk);
-		kmem_cache_free(ccid_ops->ccid_hc_rx_slab,  ccid);
-	} else {
-		if (ccid_ops->ccid_hc_tx_exit != NULL)
-			ccid_ops->ccid_hc_tx_exit(sk);
-		kmem_cache_free(ccid_ops->ccid_hc_tx_slab,  ccid);
-	}
-}
-
 void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk)
 {
-	ccid_delete(ccid, sk, 1);
+	if (ccid != NULL) {
+		if (ccid->ccid_ops->ccid_hc_rx_exit != NULL)
+			ccid->ccid_ops->ccid_hc_rx_exit(sk);
+		kmem_cache_free(ccid->ccid_ops->ccid_hc_rx_slab, ccid);
+	}
 }
 
-EXPORT_SYMBOL_GPL(ccid_hc_rx_delete);
-
 void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk)
 {
-	ccid_delete(ccid, sk, 0);
+	if (ccid != NULL) {
+		if (ccid->ccid_ops->ccid_hc_tx_exit != NULL)
+			ccid->ccid_ops->ccid_hc_tx_exit(sk);
+		kmem_cache_free(ccid->ccid_ops->ccid_hc_tx_slab, ccid);
+	}
 }
 
-EXPORT_SYMBOL_GPL(ccid_hc_tx_delete);
-
 int __init ccid_initialize_builtins(void)
 {
 	int i, err;
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index 75c21c5..facedd2 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -111,8 +111,7 @@ extern int  ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len);
 extern int  ccid_getsockopt_builtin_ccids(struct sock *sk, int len,
 					  char __user *, int __user *);
 
-extern struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx,
-			     gfp_t gfp);
+extern struct ccid *ccid_new(const u8 id, struct sock *sk, bool rx);
 
 static inline int ccid_get_current_rx_ccid(struct dccp_sock *dp)
 {
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 30f9fb7..741b2db 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -34,7 +34,7 @@
 static int dccp_hdlr_ccid(struct sock *sk, u64 ccid, bool rx)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
-	struct ccid *new_ccid = ccid_new(ccid, sk, rx, gfp_any());
+	struct ccid *new_ccid = ccid_new(ccid, sk, rx);
 
 	if (new_ccid == NULL)
 		return -ENOMEM;
-- 
cgit v1.1


From 129fa44785a399248ae2466b6cb5c655e96668f7 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sun, 4 Jan 2009 21:45:33 -0800
Subject: dccp: Integrate the TFRC library with DCCP

This patch integrates the TFRC library, which is a dependency of CCID-3 (and
CCID-4), with the new use of CCIDs in the DCCP module.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/Makefile                   |  6 ++++--
 net/dccp/ccid.c                     |  8 +++++++-
 net/dccp/ccids/Kconfig              |  9 ++-------
 net/dccp/ccids/Makefile             |  1 -
 net/dccp/ccids/lib/Makefile         |  3 ---
 net/dccp/ccids/lib/loss_interval.c  |  3 ---
 net/dccp/ccids/lib/packet_history.c |  9 ---------
 net/dccp/ccids/lib/tfrc.c           | 19 ++++---------------
 net/dccp/ccids/lib/tfrc.h           | 11 ++++++++++-
 net/dccp/ccids/lib/tfrc_equation.c  |  4 ----
 net/dccp/feat.c                     |  4 ----
 net/dccp/input.c                    |  2 --
 12 files changed, 27 insertions(+), 52 deletions(-)
 delete mode 100644 net/dccp/ccids/Makefile
 delete mode 100644 net/dccp/ccids/lib/Makefile

(limited to 'net')

diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index 5ff2e7b..2991efc 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -8,6 +8,10 @@ dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o
 # CCID-2 is default (RFC 4340, p. 77) and has Ack Vectors as dependency
 dccp-y += ccids/ccid2.o ackvec.o
 dccp-$(CONFIG_IP_DCCP_CCID3)	+= ccids/ccid3.o
+dccp-$(CONFIG_IP_DCCP_TFRC_LIB) += ccids/lib/tfrc.o		\
+				   ccids/lib/tfrc_equation.o	\
+				   ccids/lib/packet_history.o	\
+				   ccids/lib/loss_interval.o
 
 dccp_ipv4-y := ipv4.o
 
@@ -22,5 +26,3 @@ dccp-$(CONFIG_SYSCTL) += sysctl.o
 
 dccp_diag-y := diag.o
 dccp_probe-y := probe.o
-
-obj-y += ccids/
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
index 19b214a..f3e9ba1 100644
--- a/net/dccp/ccid.c
+++ b/net/dccp/ccid.c
@@ -12,6 +12,7 @@
  */
 
 #include "ccid.h"
+#include "ccids/lib/tfrc.h"
 
 static struct ccid_operations *ccids[] = {
 	&ccid2_ops,
@@ -199,7 +200,10 @@ void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk)
 
 int __init ccid_initialize_builtins(void)
 {
-	int i, err;
+	int i, err = tfrc_lib_init();
+
+	if (err)
+		return err;
 
 	for (i = 0; i < ARRAY_SIZE(ccids); i++) {
 		err = ccid_activate(ccids[i]);
@@ -211,6 +215,7 @@ int __init ccid_initialize_builtins(void)
 unwind_registrations:
 	while(--i >= 0)
 		ccid_deactivate(ccids[i]);
+	tfrc_lib_exit();
 	return err;
 }
 
@@ -220,4 +225,5 @@ void ccid_cleanup_builtins(void)
 
 	for (i = 0; i < ARRAY_SIZE(ccids); i++)
 		ccid_deactivate(ccids[i]);
+	tfrc_lib_exit();
 }
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index b30f049..b28bf96 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -14,7 +14,6 @@ config IP_DCCP_CCID2_DEBUG
 config IP_DCCP_CCID3
 	bool "CCID-3 (TCP-Friendly) (EXPERIMENTAL)"
 	def_bool y if (IP_DCCP = y || IP_DCCP = m)
-	select IP_DCCP_TFRC_LIB
 	---help---
 	  CCID-3 denotes TCP-Friendly Rate Control (TFRC), an equation-based
 	  rate-controlled congestion control mechanism.  TFRC is designed to
@@ -80,12 +79,8 @@ config IP_DCCP_CCID3_RTO
 	    therefore not be performed on WANs.
 
 config IP_DCCP_TFRC_LIB
-	tristate
-	default n
+	def_bool y if IP_DCCP_CCID3
 
 config IP_DCCP_TFRC_DEBUG
-	bool
-	depends on IP_DCCP_TFRC_LIB
-	default y if IP_DCCP_CCID3_DEBUG
-
+	def_bool y if IP_DCCP_CCID3_DEBUG
 endmenu
diff --git a/net/dccp/ccids/Makefile b/net/dccp/ccids/Makefile
deleted file mode 100644
index cdaefff..0000000
--- a/net/dccp/ccids/Makefile
+++ /dev/null
@@ -1 +0,0 @@
-obj-y += lib/
diff --git a/net/dccp/ccids/lib/Makefile b/net/dccp/ccids/lib/Makefile
deleted file mode 100644
index 68c93e3..0000000
--- a/net/dccp/ccids/lib/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-obj-$(CONFIG_IP_DCCP_TFRC_LIB) += dccp_tfrc_lib.o
-
-dccp_tfrc_lib-y := tfrc.o tfrc_equation.o packet_history.o loss_interval.o
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 5b3ce06..4d1e401 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -60,7 +60,6 @@ void tfrc_lh_cleanup(struct tfrc_loss_hist *lh)
 			lh->ring[LIH_INDEX(lh->counter)] = NULL;
 		}
 }
-EXPORT_SYMBOL_GPL(tfrc_lh_cleanup);
 
 static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh)
 {
@@ -121,7 +120,6 @@ u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
 
 	return (lh->i_mean < old_i_mean);
 }
-EXPORT_SYMBOL_GPL(tfrc_lh_update_i_mean);
 
 /* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */
 static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur,
@@ -169,7 +167,6 @@ int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
 	}
 	return 1;
 }
-EXPORT_SYMBOL_GPL(tfrc_lh_interval_add);
 
 int __init tfrc_li_init(void)
 {
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 6cc108a..b7785b3 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -94,7 +94,6 @@ int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno)
 	*headp	     = entry;
 	return 0;
 }
-EXPORT_SYMBOL_GPL(tfrc_tx_hist_add);
 
 void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp)
 {
@@ -109,7 +108,6 @@ void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp)
 
 	*headp = NULL;
 }
-EXPORT_SYMBOL_GPL(tfrc_tx_hist_purge);
 
 u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno,
 		     const ktime_t now)
@@ -127,7 +125,6 @@ u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno,
 
 	return rtt;
 }
-EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt);
 
 
 /*
@@ -172,7 +169,6 @@ void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
 
 	tfrc_rx_hist_entry_from_skb(entry, skb, ndp);
 }
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_add_packet);
 
 /* has the packet contained in skb been seen before? */
 int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb)
@@ -189,7 +185,6 @@ int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb)
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_duplicate);
 
 static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
 {
@@ -390,7 +385,6 @@ int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
 	}
 	return is_new_loss;
 }
-EXPORT_SYMBOL_GPL(tfrc_rx_handle_loss);
 
 int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h)
 {
@@ -412,7 +406,6 @@ out_free:
 	}
 	return -ENOBUFS;
 }
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_alloc);
 
 void tfrc_rx_hist_purge(struct tfrc_rx_hist *h)
 {
@@ -424,7 +417,6 @@ void tfrc_rx_hist_purge(struct tfrc_rx_hist *h)
 			h->ring[i] = NULL;
 		}
 }
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_purge);
 
 /**
  * tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against
@@ -495,4 +487,3 @@ keep_ref_for_next_time:
 
 	return sample;
 }
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_sample_rtt);
diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c
index 1859162..60c412c 100644
--- a/net/dccp/ccids/lib/tfrc.c
+++ b/net/dccp/ccids/lib/tfrc.c
@@ -1,20 +1,18 @@
 /*
- * TFRC: main module holding the pieces of the TFRC library together
+ * TFRC library initialisation
  *
  * Copyright (c) 2007 The University of Aberdeen, Scotland, UK
  * Copyright (c) 2007 Arnaldo Carvalho de Melo <acme@redhat.com>
  */
-#include <linux/module.h>
-#include <linux/moduleparam.h>
 #include "tfrc.h"
 
 #ifdef CONFIG_IP_DCCP_TFRC_DEBUG
 int tfrc_debug;
 module_param(tfrc_debug, bool, 0644);
-MODULE_PARM_DESC(tfrc_debug, "Enable debug messages");
+MODULE_PARM_DESC(tfrc_debug, "Enable TFRC debug messages");
 #endif
 
-static int __init tfrc_module_init(void)
+int __init tfrc_lib_init(void)
 {
 	int rc = tfrc_li_init();
 
@@ -38,18 +36,9 @@ out:
 	return rc;
 }
 
-static void __exit tfrc_module_exit(void)
+void __exit tfrc_lib_exit(void)
 {
 	tfrc_rx_packet_history_exit();
 	tfrc_tx_packet_history_exit();
 	tfrc_li_exit();
 }
-
-module_init(tfrc_module_init);
-module_exit(tfrc_module_exit);
-
-MODULE_AUTHOR("Gerrit Renker <gerrit@erg.abdn.ac.uk>, "
-	      "Ian McDonald <ian.mcdonald@jandi.co.nz>, "
-	      "Arnaldo Carvalho de Melo <acme@redhat.com>");
-MODULE_DESCRIPTION("DCCP TFRC library");
-MODULE_LICENSE("GPL");
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index ed98575..e9720b1 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -17,7 +17,8 @@
 #include <linux/types.h>
 #include <linux/math64.h>
 #include "../../dccp.h"
-/* internal includes that this module exports: */
+
+/* internal includes that this library exports: */
 #include "loss_interval.h"
 #include "packet_history.h"
 
@@ -66,4 +67,12 @@ extern void tfrc_rx_packet_history_exit(void);
 
 extern int  tfrc_li_init(void);
 extern void tfrc_li_exit(void);
+
+#ifdef CONFIG_IP_DCCP_TFRC_LIB
+extern int  tfrc_lib_init(void);
+extern void tfrc_lib_exit(void);
+#else
+#define tfrc_lib_init() (0)
+#define tfrc_lib_exit()
+#endif
 #endif /* _TFRC_H_ */
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c
index 2f20a29..c5d3a9e 100644
--- a/net/dccp/ccids/lib/tfrc_equation.c
+++ b/net/dccp/ccids/lib/tfrc_equation.c
@@ -659,8 +659,6 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p)
 	return scaled_div32(result, f);
 }
 
-EXPORT_SYMBOL_GPL(tfrc_calc_x);
-
 /**
  *  tfrc_calc_x_reverse_lookup  -  try to find p given f(p)
  *
@@ -693,5 +691,3 @@ u32 tfrc_calc_x_reverse_lookup(u32 fvalue)
 	index = tfrc_binsearch(fvalue, 0);
 	return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE;
 }
-
-EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup);
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 741b2db..4152308 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -1214,8 +1214,6 @@ const char *dccp_feat_typename(const u8 type)
 	return NULL;
 }
 
-EXPORT_SYMBOL_GPL(dccp_feat_typename);
-
 const char *dccp_feat_name(const u8 feat)
 {
 	static const char *feature_names[] = {
@@ -1240,6 +1238,4 @@ const char *dccp_feat_name(const u8 feat)
 
 	return feature_names[feat];
 }
-
-EXPORT_SYMBOL_GPL(dccp_feat_name);
 #endif /* CONFIG_IP_DCCP_DEBUG */
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 5eb443f..7648f31 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -741,5 +741,3 @@ u32 dccp_sample_rtt(struct sock *sk, long delta)
 
 	return delta;
 }
-
-EXPORT_SYMBOL_GPL(dccp_sample_rtt);
-- 
cgit v1.1


From 4f7d54f59bc470f0aaa932f747a95232d7ebf8b1 Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@marvell.com>
Date: Mon, 5 Jan 2009 00:00:12 -0800
Subject: tcp: don't mask EOF and socket errors on nonblocking splice receive

Currently, setting SPLICE_F_NONBLOCK on splice from a TCP socket
results in masking of EOF (RDHUP) and error conditions on the socket
by an -EAGAIN return.  Move the NONBLOCK check in tcp_splice_read()
to be after the EOF and error checks to fix this.

Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4d655e9..bce1b06 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -580,10 +580,6 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
 		else if (!ret) {
 			if (spliced)
 				break;
-			if (flags & SPLICE_F_NONBLOCK) {
-				ret = -EAGAIN;
-				break;
-			}
 			if (sock_flag(sk, SOCK_DONE))
 				break;
 			if (sk->sk_err) {
@@ -601,6 +597,10 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
 					ret = -ENOTCONN;
 				break;
 			}
+			if (flags & SPLICE_F_NONBLOCK) {
+				ret = -EAGAIN;
+				break;
+			}
 			if (!timeo) {
 				ret = -EAGAIN;
 				break;
-- 
cgit v1.1


From 7945cc6464a4db0caf6dfacdfe05806051c4cb7b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 5 Jan 2009 00:59:00 -0800
Subject: tcp: Kill extraneous SPLICE_F_NONBLOCK checks.

In splice TCP receive, the SPLICE_F_NONBLOCK flag is used
to compute the "timeo" value.  So checking it again inside
of the main receive loop to trigger -EAGAIN processing is
entirely unnecessary.

Noticed by Jarek P. and Lennert Buytenhek.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bce1b06..35bcddf 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -597,10 +597,6 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
 					ret = -ENOTCONN;
 				break;
 			}
-			if (flags & SPLICE_F_NONBLOCK) {
-				ret = -EAGAIN;
-				break;
-			}
 			if (!timeo) {
 				ret = -EAGAIN;
 				break;
-- 
cgit v1.1


From c276e098d3ee33059b4a1c747354226cec58487c Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 5 Jan 2009 16:01:51 -0800
Subject: Revert "net: Fix for initial link state in 2.6.28"

This reverts commit 22604c866889c4b2e12b73cbf1683bda1b72a313.

We can't fix this issue in this way, because we now can try
to take the dev_base_lock rwlock as a writer in software interrupt
context and that is not allowed without major surgery elsewhere.

This initial link state problem needs to be solved in some other
way.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/link_watch.c   | 7 +------
 net/sched/sch_generic.c | 4 ++++
 2 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 1e401e1..bf8f7af 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -178,6 +178,7 @@ static void __linkwatch_run_queue(int urgent_only)
 		 */
 		clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
 
+		rfc2863_policy(dev);
 		if (dev->flags & IFF_UP) {
 			if (netif_carrier_ok(dev))
 				dev_activate(dev);
@@ -214,12 +215,6 @@ void linkwatch_fire_event(struct net_device *dev)
 {
 	bool urgent = linkwatch_urgent_event(dev);
 
-	rfc2863_policy(dev);
-
-	/* Some drivers call netif_carrier_off early */
-	if (dev->reg_state == NETREG_UNINITIALIZED)
-		return;
-
 	if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) {
 		dev_hold(dev);
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 23a8e61..5f5efe4 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -270,6 +270,8 @@ static void dev_watchdog_down(struct net_device *dev)
 void netif_carrier_on(struct net_device *dev)
 {
 	if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
+		if (dev->reg_state == NETREG_UNINITIALIZED)
+			return;
 		linkwatch_fire_event(dev);
 		if (netif_running(dev))
 			__netdev_watchdog_up(dev);
@@ -286,6 +288,8 @@ EXPORT_SYMBOL(netif_carrier_on);
 void netif_carrier_off(struct net_device *dev)
 {
 	if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
+		if (dev->reg_state == NETREG_UNINITIALIZED)
+			return;
 		linkwatch_fire_event(dev);
 	}
 }
-- 
cgit v1.1


From 55cdea9ed9cf2d76993e40ed7a1fc649a14db07c Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Mon, 5 Jan 2009 18:07:07 -0800
Subject: af_iucv: New error return codes for connect()

If the iucv_path_connect() call fails then return an error code that
corresponds to the iucv_path_connect() failure condition; instead of
returning -ECONNREFUSED for any failure.

This helps to improve error handling for user space applications
(e.g.  inform the user that the z/VM guest is not authorized to
connect to other guest virtual machines).

The error return codes are based on those described in connect(2).

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ursula.braun@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/iucv/af_iucv.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index af3192d..1077bc4 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -494,7 +494,21 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr,
 	if (err) {
 		iucv_path_free(iucv->path);
 		iucv->path = NULL;
-		err = -ECONNREFUSED;
+		switch (err) {
+		case 0x0b:	/* Target communicator is not logged on */
+			err = -ENETUNREACH;
+			break;
+		case 0x0d:	/* Max connections for this guest exceeded */
+		case 0x0e:	/* Max connections for target guest exceeded */
+			err = -EAGAIN;
+			break;
+		case 0x0f:	/* Missing IUCV authorization */
+			err = -EACCES;
+			break;
+		default:
+			err = -ECONNREFUSED;
+			break;
+		}
 		goto done;
 	}
 
-- 
cgit v1.1


From 18becbc5479f88d5adc218374ca62b8b93ec2545 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ursula.braun@de.ibm.com>
Date: Mon, 5 Jan 2009 18:07:46 -0800
Subject: af_iucv: avoid left over IUCV connections from failing connects

For certain types of AFIUCV socket connect failures IUCV connections
are left over. Add some cleanup-statements to avoid cluttered IUCV
connections.

Signed-off-by: Ursula Braun <ursula.braun@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/iucv/af_iucv.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 1077bc4..6b5f193 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -521,6 +521,13 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr,
 		release_sock(sk);
 		return -ECONNREFUSED;
 	}
+
+	if (err) {
+		iucv_path_sever(iucv->path, NULL);
+		iucv_path_free(iucv->path);
+		iucv->path = NULL;
+	}
+
 done:
 	release_sock(sk);
 	return err;
-- 
cgit v1.1


From 65dbd7c2778f1921ef1ee2a73e47a2a126fed30f Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Mon, 5 Jan 2009 18:08:23 -0800
Subject: af_iucv: Free iucv path/socket in path_pending callback

Free iucv path after iucv_path_sever() calls in iucv_callback_connreq()
(path_pending() iucv callback).
If iucv_path_accept() fails, free path and free/kill newly created socket.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ursula.braun@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/iucv/af_iucv.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 6b5f193..eb8a2a0 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1042,12 +1042,14 @@ static int iucv_callback_connreq(struct iucv_path *path,
 	ASCEBC(user_data, sizeof(user_data));
 	if (sk->sk_state != IUCV_LISTEN) {
 		err = iucv_path_sever(path, user_data);
+		iucv_path_free(path);
 		goto fail;
 	}
 
 	/* Check for backlog size */
 	if (sk_acceptq_is_full(sk)) {
 		err = iucv_path_sever(path, user_data);
+		iucv_path_free(path);
 		goto fail;
 	}
 
@@ -1055,6 +1057,7 @@ static int iucv_callback_connreq(struct iucv_path *path,
 	nsk = iucv_sock_alloc(NULL, SOCK_STREAM, GFP_ATOMIC);
 	if (!nsk) {
 		err = iucv_path_sever(path, user_data);
+		iucv_path_free(path);
 		goto fail;
 	}
 
@@ -1078,6 +1081,8 @@ static int iucv_callback_connreq(struct iucv_path *path,
 	err = iucv_path_accept(path, &af_iucv_handler, nuser_data, nsk);
 	if (err) {
 		err = iucv_path_sever(path, user_data);
+		iucv_path_free(path);
+		iucv_sock_kill(nsk);
 		goto fail;
 	}
 
-- 
cgit v1.1


From f1d3e4dca3f8d4f55656477e83d0afe0ea7cbaed Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 5 Jan 2009 18:09:02 -0800
Subject: iucv: fix cpu hotplug

If the iucv module is compiled in/loaded but no user is registered cpu
hot remove doesn't work. Reason for that is that the iucv cpu hotplug
notifier on CPU_DOWN_PREPARE checks if the iucv_buffer_cpumask would
be empty after the corresponding bit would be cleared. However the bit
was never set since iucv wasn't enable. That causes all cpu hot unplug
operations to fail in this scenario.
To fix this use iucv_path_table as an indicator wether iucv is enabled
or not.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Ursula Braun <ursula.braun@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/iucv/iucv.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 8f57d4f..032f61e 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -517,6 +517,7 @@ static int iucv_enable(void)
 	size_t alloc_size;
 	int cpu, rc;
 
+	get_online_cpus();
 	rc = -ENOMEM;
 	alloc_size = iucv_max_pathid * sizeof(struct iucv_path);
 	iucv_path_table = kzalloc(alloc_size, GFP_KERNEL);
@@ -524,19 +525,17 @@ static int iucv_enable(void)
 		goto out;
 	/* Declare per cpu buffers. */
 	rc = -EIO;
-	get_online_cpus();
 	for_each_online_cpu(cpu)
 		smp_call_function_single(cpu, iucv_declare_cpu, NULL, 1);
 	if (cpus_empty(iucv_buffer_cpumask))
 		/* No cpu could declare an iucv buffer. */
-		goto out_path;
+		goto out;
 	put_online_cpus();
 	return 0;
-
-out_path:
-	put_online_cpus();
-	kfree(iucv_path_table);
 out:
+	kfree(iucv_path_table);
+	iucv_path_table = NULL;
+	put_online_cpus();
 	return rc;
 }
 
@@ -551,8 +550,9 @@ static void iucv_disable(void)
 {
 	get_online_cpus();
 	on_each_cpu(iucv_retrieve_cpu, NULL, 1);
-	put_online_cpus();
 	kfree(iucv_path_table);
+	iucv_path_table = NULL;
+	put_online_cpus();
 }
 
 static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
@@ -589,10 +589,14 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
 	case CPU_ONLINE_FROZEN:
 	case CPU_DOWN_FAILED:
 	case CPU_DOWN_FAILED_FROZEN:
+		if (!iucv_path_table)
+			break;
 		smp_call_function_single(cpu, iucv_declare_cpu, NULL, 1);
 		break;
 	case CPU_DOWN_PREPARE:
 	case CPU_DOWN_PREPARE_FROZEN:
+		if (!iucv_path_table)
+			break;
 		cpumask = iucv_buffer_cpumask;
 		cpu_clear(cpu, cpumask);
 		if (cpus_empty(cpumask))
-- 
cgit v1.1


From 6f57321422e0d359e83c978c2b03db77b967b7d5 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Mon, 5 Jan 2009 18:14:19 -0800
Subject: pkt_sched: cls_u32: Fix locking in u32_change()

New nodes are inserted in u32_change() under rtnl_lock() with wmb(),
so without tcf_tree_lock() like in other classifiers (e.g. cls_fw).
This isn't enough without rmb() on the read side, but on the other
hand adding such barriers doesn't give any savings, so the lock is
added instead.

Reported-by: m0sia <m0sia@plotinka.ru>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_u32.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 05d1780..07372f6 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -638,8 +638,9 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
 				break;
 
 		n->next = *ins;
-		wmb();
+		tcf_tree_lock(tp);
 		*ins = n;
+		tcf_tree_unlock(tp);
 
 		*arg = (unsigned long)n;
 		return 0;
-- 
cgit v1.1