From c4266263249f22479eb1abb1a1709c38240b1597 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 15 Dec 2010 08:18:36 +0000
Subject: net_sched: sch_sfq: add backlog info in sfq_dump_class_stats()

We currently return for each active SFQ slot the number of packets in
queue. We can also give number of bytes accounted for these packets.

tc -s class show dev ifb0

Before patch :

class sfq 11:3d9 parent 11:
 (dropped 0, overlimits 0 requeues 0)
 backlog 0b 3p requeues 0
 allot 1266

After patch :

class sfq 11:3e4 parent 11:
 (dropped 0, overlimits 0 requeues 0)
 backlog 4380b 3p requeues 0
 allot 1212

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_sfq.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'net/sched/sch_sfq.c')

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 3cf478d..cb331de 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -548,8 +548,13 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
 	sfq_index idx = q->ht[cl-1];
-	struct gnet_stats_queue qs = { .qlen = q->qs[idx].qlen };
+	struct sk_buff_head *list = &q->qs[idx];
+	struct gnet_stats_queue qs = { .qlen = list->qlen };
 	struct tc_sfq_xstats xstats = { .allot = q->allot[idx] };
+	struct sk_buff *skb;
+
+	skb_queue_walk(list, skb)
+		qs.backlog += qdisc_pkt_len(skb);
 
 	if (gnet_stats_copy_queue(d, &qs) < 0)
 		return -1;
-- 
cgit v1.1


From eda83e3b63e88351310c13c99178eb4634f137b2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 20 Dec 2010 12:54:58 +0000
Subject: net_sched: sch_sfq: better struct layouts

Here is a respin of patch.

I'll send a short patch to make SFQ more fair in presence of large
packets as well.

Thanks

[PATCH v3 net-next-2.6] net_sched: sch_sfq: better struct layouts

This patch shrinks sizeof(struct sfq_sched_data)
from 0x14f8 (or more if spinlocks are bigger) to 0x1180 bytes, and
reduce text size as well.

   text    data     bss     dec     hex filename
   4821     152       0    4973    136d old/net/sched/sch_sfq.o
   4627     136       0    4763    129b new/net/sched/sch_sfq.o

All data for a slot/flow is now grouped in a compact and cache friendly
structure, instead of being spreaded in many different points.

struct sfq_slot {
        struct sk_buff  *skblist_next;
        struct sk_buff  *skblist_prev;
        sfq_index       qlen; /* number of skbs in skblist */
        sfq_index       next; /* next slot in sfq chain */
        struct sfq_head dep; /* anchor in dep[] chains */
        unsigned short  hash; /* hash value (index in ht[]) */
        short           allot; /* credit for this slot */
};

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Jarek Poplawski <jarkao2@gmail.com>
Cc: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_sfq.c | 260 ++++++++++++++++++++++++++++++++--------------------
 1 file changed, 162 insertions(+), 98 deletions(-)

(limited to 'net/sched/sch_sfq.c')

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 42396c9..13322e8 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -67,27 +67,42 @@
 
 	IMPLEMENTATION:
 	This implementation limits maximal queue length to 128;
-	maximal mtu to 2^15-1; number of hash buckets to 1024.
+	maximal mtu to 2^15-1; max 128 flows, number of hash buckets to 1024.
 	The only goal of this restrictions was that all data
-	fit into one 4K page :-). Struct sfq_sched_data is
-	organized in anti-cache manner: all the data for a bucket
-	are scattered over different locations. This is not good,
-	but it allowed me to put it into 4K.
+	fit into one 4K page on 32bit arches.
 
 	It is easy to increase these values, but not in flight.  */
 
-#define SFQ_DEPTH		128
+#define SFQ_DEPTH		128 /* max number of packets per flow */
+#define SFQ_SLOTS		128 /* max number of flows */
+#define SFQ_EMPTY_SLOT		255
 #define SFQ_HASH_DIVISOR	1024
 
-/* This type should contain at least SFQ_DEPTH*2 values */
+/* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */
 typedef unsigned char sfq_index;
 
+/*
+ * We dont use pointers to save space.
+ * Small indexes [0 ... SFQ_SLOTS - 1] are 'pointers' to slots[] array
+ * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1]
+ * are 'pointers' to dep[] array
+ */
 struct sfq_head
 {
 	sfq_index	next;
 	sfq_index	prev;
 };
 
+struct sfq_slot {
+	struct sk_buff	*skblist_next;
+	struct sk_buff	*skblist_prev;
+	sfq_index	qlen; /* number of skbs in skblist */
+	sfq_index	next; /* next slot in sfq chain */
+	struct sfq_head dep; /* anchor in dep[] chains */
+	unsigned short	hash; /* hash value (index in ht[]) */
+	short		allot; /* credit for this slot */
+};
+
 struct sfq_sched_data
 {
 /* Parameters */
@@ -99,17 +114,24 @@ struct sfq_sched_data
 	struct tcf_proto *filter_list;
 	struct timer_list perturb_timer;
 	u32		perturbation;
-	sfq_index	tail;		/* Index of current slot in round */
-	sfq_index	max_depth;	/* Maximal depth */
+	sfq_index	cur_depth;	/* depth of longest slot */
 
+	struct sfq_slot *tail;		/* current slot in round */
 	sfq_index	ht[SFQ_HASH_DIVISOR];	/* Hash table */
-	sfq_index	next[SFQ_DEPTH];	/* Active slots link */
-	short		allot[SFQ_DEPTH];	/* Current allotment per slot */
-	unsigned short	hash[SFQ_DEPTH];	/* Hash value indexed by slots */
-	struct sk_buff_head	qs[SFQ_DEPTH];		/* Slot queue */
-	struct sfq_head	dep[SFQ_DEPTH*2];	/* Linked list of slots, indexed by depth */
+	struct sfq_slot	slots[SFQ_SLOTS];
+	struct sfq_head	dep[SFQ_DEPTH];	/* Linked list of slots, indexed by depth */
 };
 
+/*
+ * sfq_head are either in a sfq_slot or in dep[] array
+ */
+static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val)
+{
+	if (val < SFQ_SLOTS)
+		return &q->slots[val].dep;
+	return &q->dep[val - SFQ_SLOTS];
+}
+
 static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
 {
 	return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1);
@@ -200,30 +222,41 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
 	return 0;
 }
 
+/*
+ * x : slot number [0 .. SFQ_SLOTS - 1]
+ */
 static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
 {
 	sfq_index p, n;
-	int d = q->qs[x].qlen + SFQ_DEPTH;
+	int qlen = q->slots[x].qlen;
+
+	p = qlen + SFQ_SLOTS;
+	n = q->dep[qlen].next;
 
-	p = d;
-	n = q->dep[d].next;
-	q->dep[x].next = n;
-	q->dep[x].prev = p;
-	q->dep[p].next = q->dep[n].prev = x;
+	q->slots[x].dep.next = n;
+	q->slots[x].dep.prev = p;
+
+	q->dep[qlen].next = x;		/* sfq_dep_head(q, p)->next = x */
+	sfq_dep_head(q, n)->prev = x;
 }
 
+#define sfq_unlink(q, x, n, p)			\
+	n = q->slots[x].dep.next;		\
+	p = q->slots[x].dep.prev;		\
+	sfq_dep_head(q, p)->next = n;		\
+	sfq_dep_head(q, n)->prev = p
+
+
 static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
 {
 	sfq_index p, n;
+	int d;
 
-	n = q->dep[x].next;
-	p = q->dep[x].prev;
-	q->dep[p].next = n;
-	q->dep[n].prev = p;
-
-	if (n == p && q->max_depth == q->qs[x].qlen + 1)
-		q->max_depth--;
+	sfq_unlink(q, x, n, p);
 
+	d = q->slots[x].qlen--;
+	if (n == p && q->cur_depth == d)
+		q->cur_depth--;
 	sfq_link(q, x);
 }
 
@@ -232,34 +265,72 @@ static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
 	sfq_index p, n;
 	int d;
 
-	n = q->dep[x].next;
-	p = q->dep[x].prev;
-	q->dep[p].next = n;
-	q->dep[n].prev = p;
-	d = q->qs[x].qlen;
-	if (q->max_depth < d)
-		q->max_depth = d;
+	sfq_unlink(q, x, n, p);
 
+	d = ++q->slots[x].qlen;
+	if (q->cur_depth < d)
+		q->cur_depth = d;
 	sfq_link(q, x);
 }
 
+/* helper functions : might be changed when/if skb use a standard list_head */
+
+/* remove one skb from tail of slot queue */
+static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot)
+{
+	struct sk_buff *skb = slot->skblist_prev;
+
+	slot->skblist_prev = skb->prev;
+	skb->next = skb->prev = NULL;
+	return skb;
+}
+
+/* remove one skb from head of slot queue */
+static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot)
+{
+	struct sk_buff *skb = slot->skblist_next;
+
+	slot->skblist_next = skb->next;
+	skb->next = skb->prev = NULL;
+	return skb;
+}
+
+static inline void slot_queue_init(struct sfq_slot *slot)
+{
+	slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot;
+}
+
+/* add skb to slot queue (tail add) */
+static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb)
+{
+	skb->prev = slot->skblist_prev;
+	skb->next = (struct sk_buff *)slot;
+	slot->skblist_prev->next = skb;
+	slot->skblist_prev = skb;
+}
+
+#define	slot_queue_walk(slot, skb)		\
+	for (skb = slot->skblist_next;		\
+	     skb != (struct sk_buff *)slot;	\
+	     skb = skb->next)
+
 static unsigned int sfq_drop(struct Qdisc *sch)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
-	sfq_index d = q->max_depth;
+	sfq_index x, d = q->cur_depth;
 	struct sk_buff *skb;
 	unsigned int len;
+	struct sfq_slot *slot;
 
-	/* Queue is full! Find the longest slot and
-	   drop a packet from it */
-
+	/* Queue is full! Find the longest slot and drop tail packet from it */
 	if (d > 1) {
-		sfq_index x = q->dep[d + SFQ_DEPTH].next;
-		skb = q->qs[x].prev;
+		x = q->dep[d].next;
+		slot = &q->slots[x];
+drop:
+		skb = slot_dequeue_tail(slot);
 		len = qdisc_pkt_len(skb);
-		__skb_unlink(skb, &q->qs[x]);
-		kfree_skb(skb);
 		sfq_dec(q, x);
+		kfree_skb(skb);
 		sch->q.qlen--;
 		sch->qstats.drops++;
 		sch->qstats.backlog -= len;
@@ -268,18 +339,11 @@ static unsigned int sfq_drop(struct Qdisc *sch)
 
 	if (d == 1) {
 		/* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
-		d = q->next[q->tail];
-		q->next[q->tail] = q->next[d];
-		skb = q->qs[d].prev;
-		len = qdisc_pkt_len(skb);
-		__skb_unlink(skb, &q->qs[d]);
-		kfree_skb(skb);
-		sfq_dec(q, d);
-		sch->q.qlen--;
-		q->ht[q->hash[d]] = SFQ_DEPTH;
-		sch->qstats.drops++;
-		sch->qstats.backlog -= len;
-		return len;
+		x = q->tail->next;
+		slot = &q->slots[x];
+		q->tail->next = slot->next;
+		q->ht[slot->hash] = SFQ_EMPTY_SLOT;
+		goto drop;
 	}
 
 	return 0;
@@ -291,6 +355,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	struct sfq_sched_data *q = qdisc_priv(sch);
 	unsigned int hash;
 	sfq_index x;
+	struct sfq_slot *slot;
 	int uninitialized_var(ret);
 
 	hash = sfq_classify(skb, sch, &ret);
@@ -303,30 +368,33 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	hash--;
 
 	x = q->ht[hash];
-	if (x == SFQ_DEPTH) {
-		q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
-		q->hash[x] = hash;
+	slot = &q->slots[x];
+	if (x == SFQ_EMPTY_SLOT) {
+		x = q->dep[0].next; /* get a free slot */
+		q->ht[hash] = x;
+		slot = &q->slots[x];
+		slot->hash = hash;
+		slot_queue_init(slot);
 	}
 
-	/* If selected queue has length q->limit, this means that
-	 * all another queues are empty and that we do simple tail drop,
+	/* If selected queue has length q->limit, do simple tail drop,
 	 * i.e. drop _this_ packet.
 	 */
-	if (q->qs[x].qlen >= q->limit)
+	if (slot->qlen >= q->limit)
 		return qdisc_drop(skb, sch);
 
 	sch->qstats.backlog += qdisc_pkt_len(skb);
-	__skb_queue_tail(&q->qs[x], skb);
+	slot_queue_add(slot, skb);
 	sfq_inc(q, x);
-	if (q->qs[x].qlen == 1) {		/* The flow is new */
-		if (q->tail == SFQ_DEPTH) {	/* It is the first flow */
-			q->next[x] = x;
+	if (slot->qlen == 1) {		/* The flow is new */
+		if (q->tail == NULL) {	/* It is the first flow */
+			slot->next = x;
 		} else {
-			q->next[x] = q->next[q->tail];
-			q->next[q->tail] = x;
+			slot->next = q->tail->next;
+			q->tail->next = x;
 		}
-		q->tail = x;
-		q->allot[x] = q->quantum;
+		q->tail = slot;
+		slot->allot = q->quantum;
 	}
 	if (++sch->q.qlen <= q->limit) {
 		sch->bstats.bytes += qdisc_pkt_len(skb);
@@ -342,14 +410,12 @@ static struct sk_buff *
 sfq_peek(struct Qdisc *sch)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
-	sfq_index a;
 
 	/* No active slots */
-	if (q->tail == SFQ_DEPTH)
+	if (q->tail == NULL)
 		return NULL;
 
-	a = q->next[q->tail];
-	return skb_peek(&q->qs[a]);
+	return q->slots[q->tail->next].skblist_next;
 }
 
 static struct sk_buff *
@@ -358,31 +424,31 @@ sfq_dequeue(struct Qdisc *sch)
 	struct sfq_sched_data *q = qdisc_priv(sch);
 	struct sk_buff *skb;
 	sfq_index a, next_a;
+	struct sfq_slot *slot;
 
 	/* No active slots */
-	if (q->tail == SFQ_DEPTH)
+	if (q->tail == NULL)
 		return NULL;
 
-	a = q->next[q->tail];
-
-	/* Grab packet */
-	skb = __skb_dequeue(&q->qs[a]);
+	a = q->tail->next;
+	slot = &q->slots[a];
+	skb = slot_dequeue_head(slot);
 	sfq_dec(q, a);
 	sch->q.qlen--;
 	sch->qstats.backlog -= qdisc_pkt_len(skb);
 
 	/* Is the slot empty? */
-	if (q->qs[a].qlen == 0) {
-		q->ht[q->hash[a]] = SFQ_DEPTH;
-		next_a = q->next[a];
+	if (slot->qlen == 0) {
+		q->ht[slot->hash] = SFQ_EMPTY_SLOT;
+		next_a = slot->next;
 		if (a == next_a) {
-			q->tail = SFQ_DEPTH;
+			q->tail = NULL; /* no more active slots */
 			return skb;
 		}
-		q->next[q->tail] = next_a;
-	} else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) {
-		q->allot[a] += q->quantum;
-		q->tail = a;
+		q->tail->next = next_a;
+	} else if ((slot->allot -= qdisc_pkt_len(skb)) <= 0) {
+		q->tail = slot;
+		slot->allot += q->quantum;
 	}
 	return skb;
 }
@@ -446,17 +512,16 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 	init_timer_deferrable(&q->perturb_timer);
 
 	for (i = 0; i < SFQ_HASH_DIVISOR; i++)
-		q->ht[i] = SFQ_DEPTH;
+		q->ht[i] = SFQ_EMPTY_SLOT;
 
 	for (i = 0; i < SFQ_DEPTH; i++) {
-		skb_queue_head_init(&q->qs[i]);
-		q->dep[i + SFQ_DEPTH].next = i + SFQ_DEPTH;
-		q->dep[i + SFQ_DEPTH].prev = i + SFQ_DEPTH;
+		q->dep[i].next = i + SFQ_SLOTS;
+		q->dep[i].prev = i + SFQ_SLOTS;
 	}
 
 	q->limit = SFQ_DEPTH - 1;
-	q->max_depth = 0;
-	q->tail = SFQ_DEPTH;
+	q->cur_depth = 0;
+	q->tail = NULL;
 	if (opt == NULL) {
 		q->quantum = psched_mtu(qdisc_dev(sch));
 		q->perturb_period = 0;
@@ -467,7 +532,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 			return err;
 	}
 
-	for (i = 0; i < SFQ_DEPTH; i++)
+	for (i = 0; i < SFQ_SLOTS; i++)
 		sfq_link(q, i);
 	return 0;
 }
@@ -543,13 +608,12 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 				struct gnet_dump *d)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
-	sfq_index idx = q->ht[cl-1];
-	struct sk_buff_head *list = &q->qs[idx];
-	struct gnet_stats_queue qs = { .qlen = list->qlen };
-	struct tc_sfq_xstats xstats = { .allot = q->allot[idx] };
+	const struct sfq_slot *slot = &q->slots[q->ht[cl - 1]];
+	struct gnet_stats_queue qs = { .qlen = slot->qlen };
+	struct tc_sfq_xstats xstats = { .allot = slot->allot };
 	struct sk_buff *skb;
 
-	skb_queue_walk(list, skb)
+	slot_queue_walk(slot, skb)
 		qs.backlog += qdisc_pkt_len(skb);
 
 	if (gnet_stats_copy_queue(d, &qs) < 0)
@@ -566,7 +630,7 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 		return;
 
 	for (i = 0; i < SFQ_HASH_DIVISOR; i++) {
-		if (q->ht[i] == SFQ_DEPTH ||
+		if (q->ht[i] == SFQ_EMPTY_SLOT ||
 		    arg->count < arg->skip) {
 			arg->count++;
 			continue;
-- 
cgit v1.1


From ee09b3c1cff0335137dc1b146488e4352f640f13 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 22 Dec 2010 11:39:59 -0800
Subject: sfq: fix sfq class stats handling

sfq_walk() runs without qdisc lock. By the time it selects a non empty
hash slot and sfq_dump_class_stats() is run (with lock held), slot might
have been freed : We then access q->slots[SFQ_EMPTY_SLOT], out of
bounds, and crash in slot_queue_walk()

On previous kernels, bug is here but out of bounds qs[SFQ_DEPTH] and
allot[SFQ_DEPTH] are located in struct sfq_sched_data, so no illegal
memory access happens, only possibly wrong data reported to user.

Also, slot_dequeue_tail() should make sure slot skb chain is correctly
terminated, or sfq_dump_class_stats() can access freed skbs.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_sfq.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'net/sched/sch_sfq.c')

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 13322e8..6a2f88f 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -281,6 +281,7 @@ static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot)
 	struct sk_buff *skb = slot->skblist_prev;
 
 	slot->skblist_prev = skb->prev;
+	skb->prev->next = (struct sk_buff *)slot;
 	skb->next = skb->prev = NULL;
 	return skb;
 }
@@ -608,14 +609,19 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 				struct gnet_dump *d)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
-	const struct sfq_slot *slot = &q->slots[q->ht[cl - 1]];
-	struct gnet_stats_queue qs = { .qlen = slot->qlen };
-	struct tc_sfq_xstats xstats = { .allot = slot->allot };
+	sfq_index idx = q->ht[cl - 1];
+	struct gnet_stats_queue qs = { 0 };
+	struct tc_sfq_xstats xstats = { 0 };
 	struct sk_buff *skb;
 
-	slot_queue_walk(slot, skb)
-		qs.backlog += qdisc_pkt_len(skb);
+	if (idx != SFQ_EMPTY_SLOT) {
+		const struct sfq_slot *slot = &q->slots[idx];
 
+		xstats.allot = slot->allot;
+		qs.qlen = slot->qlen;
+		slot_queue_walk(slot, skb)
+			qs.backlog += qdisc_pkt_len(skb);
+	}
 	if (gnet_stats_copy_queue(d, &qs) < 0)
 		return -1;
 	return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
-- 
cgit v1.1


From eeaeb068f1393b4db4861481bf594bcd1c3eda7a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 28 Dec 2010 21:53:33 +0000
Subject: sch_sfq: allow big packets and be fair

SFQ is currently 'limited' to small packets, because it uses a 15bit
allotment number per flow. Introduce a scale by 8, so that we can handle
full size TSO/GRO packets.

Use appropriate handling to make sure allot is positive before a new
packet is dequeued, so that fairness is respected.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Jarek Poplawski <jarkao2@gmail.com>
Cc: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_sfq.c | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

(limited to 'net/sched/sch_sfq.c')

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 6a2f88f..b76d46b 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -67,7 +67,7 @@
 
 	IMPLEMENTATION:
 	This implementation limits maximal queue length to 128;
-	maximal mtu to 2^15-1; max 128 flows, number of hash buckets to 1024.
+	max mtu to 2^18-1; max 128 flows, number of hash buckets to 1024.
 	The only goal of this restrictions was that all data
 	fit into one 4K page on 32bit arches.
 
@@ -77,6 +77,11 @@
 #define SFQ_SLOTS		128 /* max number of flows */
 #define SFQ_EMPTY_SLOT		255
 #define SFQ_HASH_DIVISOR	1024
+/* We use 16 bits to store allot, and want to handle packets up to 64K
+ * Scale allot by 8 (1<<3) so that no overflow occurs.
+ */
+#define SFQ_ALLOT_SHIFT		3
+#define SFQ_ALLOT_SIZE(X)	DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT)
 
 /* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */
 typedef unsigned char sfq_index;
@@ -115,7 +120,7 @@ struct sfq_sched_data
 	struct timer_list perturb_timer;
 	u32		perturbation;
 	sfq_index	cur_depth;	/* depth of longest slot */
-
+	unsigned short  scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
 	struct sfq_slot *tail;		/* current slot in round */
 	sfq_index	ht[SFQ_HASH_DIVISOR];	/* Hash table */
 	struct sfq_slot	slots[SFQ_SLOTS];
@@ -395,7 +400,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 			q->tail->next = x;
 		}
 		q->tail = slot;
-		slot->allot = q->quantum;
+		slot->allot = q->scaled_quantum;
 	}
 	if (++sch->q.qlen <= q->limit) {
 		sch->bstats.bytes += qdisc_pkt_len(skb);
@@ -431,8 +436,14 @@ sfq_dequeue(struct Qdisc *sch)
 	if (q->tail == NULL)
 		return NULL;
 
+next_slot:
 	a = q->tail->next;
 	slot = &q->slots[a];
+	if (slot->allot <= 0) {
+		q->tail = slot;
+		slot->allot += q->scaled_quantum;
+		goto next_slot;
+	}
 	skb = slot_dequeue_head(slot);
 	sfq_dec(q, a);
 	sch->q.qlen--;
@@ -447,9 +458,8 @@ sfq_dequeue(struct Qdisc *sch)
 			return skb;
 		}
 		q->tail->next = next_a;
-	} else if ((slot->allot -= qdisc_pkt_len(skb)) <= 0) {
-		q->tail = slot;
-		slot->allot += q->quantum;
+	} else {
+		slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb));
 	}
 	return skb;
 }
@@ -485,6 +495,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 
 	sch_tree_lock(sch);
 	q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch));
+	q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
 	q->perturb_period = ctl->perturb_period * HZ;
 	if (ctl->limit)
 		q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
@@ -525,6 +536,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 	q->tail = NULL;
 	if (opt == NULL) {
 		q->quantum = psched_mtu(qdisc_dev(sch));
+		q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
 		q->perturb_period = 0;
 		q->perturbation = net_random();
 	} else {
@@ -617,7 +629,7 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 	if (idx != SFQ_EMPTY_SLOT) {
 		const struct sfq_slot *slot = &q->slots[idx];
 
-		xstats.allot = slot->allot;
+		xstats.allot = slot->allot << SFQ_ALLOT_SHIFT;
 		qs.qlen = slot->qlen;
 		slot_queue_walk(slot, skb)
 			qs.backlog += qdisc_pkt_len(skb);
-- 
cgit v1.1


From 18c8d82ae5b802c5d82e0dfbcc08b1b568955f46 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 31 Dec 2010 12:48:55 -0800
Subject: sfq: fix slot_dequeue_head()

slot_dequeue_head() should make sure slot skb chain is correct in both
ways, or we can crash if all possible flows are in use.

Jarek pointed out slot_queue_init() can now be done in sfq_init() once,
instead each time a flow is setup.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_sfq.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net/sched/sch_sfq.c')

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index b76d46b..d54ac94 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -297,6 +297,7 @@ static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot)
 	struct sk_buff *skb = slot->skblist_next;
 
 	slot->skblist_next = skb->next;
+	skb->next->prev = (struct sk_buff *)slot;
 	skb->next = skb->prev = NULL;
 	return skb;
 }
@@ -380,7 +381,6 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		q->ht[hash] = x;
 		slot = &q->slots[x];
 		slot->hash = hash;
-		slot_queue_init(slot);
 	}
 
 	/* If selected queue has length q->limit, do simple tail drop,
@@ -545,8 +545,10 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 			return err;
 	}
 
-	for (i = 0; i < SFQ_SLOTS; i++)
+	for (i = 0; i < SFQ_SLOTS; i++) {
+		slot_queue_init(&q->slots[i]);
 		sfq_link(q, i);
+	}
 	return 0;
 }
 
-- 
cgit v1.1


From bfe0d0298f2a67d94d58c39ea904a999aeeb7c3c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 9 Jan 2011 08:30:54 +0000
Subject: net_sched: factorize qdisc stats handling

HTB takes into account skb is segmented in stats updates.
Generalize this to all schedulers.

They should use qdisc_bstats_update() helper instead of manipulating
bstats.bytes and bstats.packets

Add bstats_update() helper too for classes that use
gnet_stats_basic_packed fields.

Note : Right now, TCQ_F_CAN_BYPASS shortcurt can be taken only if no
stab is setup on qdisc.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_sfq.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net/sched/sch_sfq.c')

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index d54ac94..239ec53 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -403,8 +403,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		slot->allot = q->scaled_quantum;
 	}
 	if (++sch->q.qlen <= q->limit) {
-		sch->bstats.bytes += qdisc_pkt_len(skb);
-		sch->bstats.packets++;
+		qdisc_bstats_update(sch, skb);
 		return NET_XMIT_SUCCESS;
 	}
 
-- 
cgit v1.1


From 9190b3b3208d052d98cb601fcc192f3f71a5658b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 20 Jan 2011 23:31:33 -0800
Subject: net_sched: accurate bytes/packets stats/rates

In commit 44b8288308ac9d (net_sched: pfifo_head_drop problem), we fixed
a problem with pfifo_head drops that incorrectly decreased
sch->bstats.bytes and sch->bstats.packets

Several qdiscs (CHOKe, SFQ, pfifo_head, ...) are able to drop a
previously enqueued packet, and bstats cannot be changed, so
bstats/rates are not accurate (over estimated)

This patch changes the qdisc_bstats updates to be done at dequeue() time
instead of enqueue() time. bstats counters no longer account for dropped
frames, and rates are more correct, since enqueue() bursts dont have
effect on dequeue() rate.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_sfq.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net/sched/sch_sfq.c')

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 239ec53..edea8ce 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -402,10 +402,8 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		q->tail = slot;
 		slot->allot = q->scaled_quantum;
 	}
-	if (++sch->q.qlen <= q->limit) {
-		qdisc_bstats_update(sch, skb);
+	if (++sch->q.qlen <= q->limit)
 		return NET_XMIT_SUCCESS;
-	}
 
 	sfq_drop(sch);
 	return NET_XMIT_CN;
@@ -445,6 +443,7 @@ next_slot:
 	}
 	skb = slot_dequeue_head(slot);
 	sfq_dec(q, a);
+	qdisc_bstats_update(sch, skb);
 	sch->q.qlen--;
 	sch->qstats.backlog -= qdisc_pkt_len(skb);
 
-- 
cgit v1.1