From c4266263249f22479eb1abb1a1709c38240b1597 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Dec 2010 08:18:36 +0000 Subject: net_sched: sch_sfq: add backlog info in sfq_dump_class_stats() We currently return for each active SFQ slot the number of packets in queue. We can also give number of bytes accounted for these packets. tc -s class show dev ifb0 Before patch : class sfq 11:3d9 parent 11: (dropped 0, overlimits 0 requeues 0) backlog 0b 3p requeues 0 allot 1266 After patch : class sfq 11:3e4 parent 11: (dropped 0, overlimits 0 requeues 0) backlog 4380b 3p requeues 0 allot 1212 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_sfq.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net/sched/sch_sfq.c') diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 3cf478d..cb331de 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -548,8 +548,13 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl, { struct sfq_sched_data *q = qdisc_priv(sch); sfq_index idx = q->ht[cl-1]; - struct gnet_stats_queue qs = { .qlen = q->qs[idx].qlen }; + struct sk_buff_head *list = &q->qs[idx]; + struct gnet_stats_queue qs = { .qlen = list->qlen }; struct tc_sfq_xstats xstats = { .allot = q->allot[idx] }; + struct sk_buff *skb; + + skb_queue_walk(list, skb) + qs.backlog += qdisc_pkt_len(skb); if (gnet_stats_copy_queue(d, &qs) < 0) return -1; -- cgit v1.1 From eda83e3b63e88351310c13c99178eb4634f137b2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 20 Dec 2010 12:54:58 +0000 Subject: net_sched: sch_sfq: better struct layouts Here is a respin of patch. I'll send a short patch to make SFQ more fair in presence of large packets as well. Thanks [PATCH v3 net-next-2.6] net_sched: sch_sfq: better struct layouts This patch shrinks sizeof(struct sfq_sched_data) from 0x14f8 (or more if spinlocks are bigger) to 0x1180 bytes, and reduce text size as well. text data bss dec hex filename 4821 152 0 4973 136d old/net/sched/sch_sfq.o 4627 136 0 4763 129b new/net/sched/sch_sfq.o All data for a slot/flow is now grouped in a compact and cache friendly structure, instead of being spreaded in many different points. struct sfq_slot { struct sk_buff *skblist_next; struct sk_buff *skblist_prev; sfq_index qlen; /* number of skbs in skblist */ sfq_index next; /* next slot in sfq chain */ struct sfq_head dep; /* anchor in dep[] chains */ unsigned short hash; /* hash value (index in ht[]) */ short allot; /* credit for this slot */ }; Signed-off-by: Eric Dumazet Cc: Jarek Poplawski Cc: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_sfq.c | 260 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 162 insertions(+), 98 deletions(-) (limited to 'net/sched/sch_sfq.c') diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 42396c9..13322e8 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -67,27 +67,42 @@ IMPLEMENTATION: This implementation limits maximal queue length to 128; - maximal mtu to 2^15-1; number of hash buckets to 1024. + maximal mtu to 2^15-1; max 128 flows, number of hash buckets to 1024. The only goal of this restrictions was that all data - fit into one 4K page :-). Struct sfq_sched_data is - organized in anti-cache manner: all the data for a bucket - are scattered over different locations. This is not good, - but it allowed me to put it into 4K. + fit into one 4K page on 32bit arches. It is easy to increase these values, but not in flight. */ -#define SFQ_DEPTH 128 +#define SFQ_DEPTH 128 /* max number of packets per flow */ +#define SFQ_SLOTS 128 /* max number of flows */ +#define SFQ_EMPTY_SLOT 255 #define SFQ_HASH_DIVISOR 1024 -/* This type should contain at least SFQ_DEPTH*2 values */ +/* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */ typedef unsigned char sfq_index; +/* + * We dont use pointers to save space. + * Small indexes [0 ... SFQ_SLOTS - 1] are 'pointers' to slots[] array + * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1] + * are 'pointers' to dep[] array + */ struct sfq_head { sfq_index next; sfq_index prev; }; +struct sfq_slot { + struct sk_buff *skblist_next; + struct sk_buff *skblist_prev; + sfq_index qlen; /* number of skbs in skblist */ + sfq_index next; /* next slot in sfq chain */ + struct sfq_head dep; /* anchor in dep[] chains */ + unsigned short hash; /* hash value (index in ht[]) */ + short allot; /* credit for this slot */ +}; + struct sfq_sched_data { /* Parameters */ @@ -99,17 +114,24 @@ struct sfq_sched_data struct tcf_proto *filter_list; struct timer_list perturb_timer; u32 perturbation; - sfq_index tail; /* Index of current slot in round */ - sfq_index max_depth; /* Maximal depth */ + sfq_index cur_depth; /* depth of longest slot */ + struct sfq_slot *tail; /* current slot in round */ sfq_index ht[SFQ_HASH_DIVISOR]; /* Hash table */ - sfq_index next[SFQ_DEPTH]; /* Active slots link */ - short allot[SFQ_DEPTH]; /* Current allotment per slot */ - unsigned short hash[SFQ_DEPTH]; /* Hash value indexed by slots */ - struct sk_buff_head qs[SFQ_DEPTH]; /* Slot queue */ - struct sfq_head dep[SFQ_DEPTH*2]; /* Linked list of slots, indexed by depth */ + struct sfq_slot slots[SFQ_SLOTS]; + struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */ }; +/* + * sfq_head are either in a sfq_slot or in dep[] array + */ +static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val) +{ + if (val < SFQ_SLOTS) + return &q->slots[val].dep; + return &q->dep[val - SFQ_SLOTS]; +} + static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1) { return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1); @@ -200,30 +222,41 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, return 0; } +/* + * x : slot number [0 .. SFQ_SLOTS - 1] + */ static inline void sfq_link(struct sfq_sched_data *q, sfq_index x) { sfq_index p, n; - int d = q->qs[x].qlen + SFQ_DEPTH; + int qlen = q->slots[x].qlen; + + p = qlen + SFQ_SLOTS; + n = q->dep[qlen].next; - p = d; - n = q->dep[d].next; - q->dep[x].next = n; - q->dep[x].prev = p; - q->dep[p].next = q->dep[n].prev = x; + q->slots[x].dep.next = n; + q->slots[x].dep.prev = p; + + q->dep[qlen].next = x; /* sfq_dep_head(q, p)->next = x */ + sfq_dep_head(q, n)->prev = x; } +#define sfq_unlink(q, x, n, p) \ + n = q->slots[x].dep.next; \ + p = q->slots[x].dep.prev; \ + sfq_dep_head(q, p)->next = n; \ + sfq_dep_head(q, n)->prev = p + + static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x) { sfq_index p, n; + int d; - n = q->dep[x].next; - p = q->dep[x].prev; - q->dep[p].next = n; - q->dep[n].prev = p; - - if (n == p && q->max_depth == q->qs[x].qlen + 1) - q->max_depth--; + sfq_unlink(q, x, n, p); + d = q->slots[x].qlen--; + if (n == p && q->cur_depth == d) + q->cur_depth--; sfq_link(q, x); } @@ -232,34 +265,72 @@ static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x) sfq_index p, n; int d; - n = q->dep[x].next; - p = q->dep[x].prev; - q->dep[p].next = n; - q->dep[n].prev = p; - d = q->qs[x].qlen; - if (q->max_depth < d) - q->max_depth = d; + sfq_unlink(q, x, n, p); + d = ++q->slots[x].qlen; + if (q->cur_depth < d) + q->cur_depth = d; sfq_link(q, x); } +/* helper functions : might be changed when/if skb use a standard list_head */ + +/* remove one skb from tail of slot queue */ +static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot) +{ + struct sk_buff *skb = slot->skblist_prev; + + slot->skblist_prev = skb->prev; + skb->next = skb->prev = NULL; + return skb; +} + +/* remove one skb from head of slot queue */ +static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot) +{ + struct sk_buff *skb = slot->skblist_next; + + slot->skblist_next = skb->next; + skb->next = skb->prev = NULL; + return skb; +} + +static inline void slot_queue_init(struct sfq_slot *slot) +{ + slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot; +} + +/* add skb to slot queue (tail add) */ +static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb) +{ + skb->prev = slot->skblist_prev; + skb->next = (struct sk_buff *)slot; + slot->skblist_prev->next = skb; + slot->skblist_prev = skb; +} + +#define slot_queue_walk(slot, skb) \ + for (skb = slot->skblist_next; \ + skb != (struct sk_buff *)slot; \ + skb = skb->next) + static unsigned int sfq_drop(struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); - sfq_index d = q->max_depth; + sfq_index x, d = q->cur_depth; struct sk_buff *skb; unsigned int len; + struct sfq_slot *slot; - /* Queue is full! Find the longest slot and - drop a packet from it */ - + /* Queue is full! Find the longest slot and drop tail packet from it */ if (d > 1) { - sfq_index x = q->dep[d + SFQ_DEPTH].next; - skb = q->qs[x].prev; + x = q->dep[d].next; + slot = &q->slots[x]; +drop: + skb = slot_dequeue_tail(slot); len = qdisc_pkt_len(skb); - __skb_unlink(skb, &q->qs[x]); - kfree_skb(skb); sfq_dec(q, x); + kfree_skb(skb); sch->q.qlen--; sch->qstats.drops++; sch->qstats.backlog -= len; @@ -268,18 +339,11 @@ static unsigned int sfq_drop(struct Qdisc *sch) if (d == 1) { /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ - d = q->next[q->tail]; - q->next[q->tail] = q->next[d]; - skb = q->qs[d].prev; - len = qdisc_pkt_len(skb); - __skb_unlink(skb, &q->qs[d]); - kfree_skb(skb); - sfq_dec(q, d); - sch->q.qlen--; - q->ht[q->hash[d]] = SFQ_DEPTH; - sch->qstats.drops++; - sch->qstats.backlog -= len; - return len; + x = q->tail->next; + slot = &q->slots[x]; + q->tail->next = slot->next; + q->ht[slot->hash] = SFQ_EMPTY_SLOT; + goto drop; } return 0; @@ -291,6 +355,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) struct sfq_sched_data *q = qdisc_priv(sch); unsigned int hash; sfq_index x; + struct sfq_slot *slot; int uninitialized_var(ret); hash = sfq_classify(skb, sch, &ret); @@ -303,30 +368,33 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) hash--; x = q->ht[hash]; - if (x == SFQ_DEPTH) { - q->ht[hash] = x = q->dep[SFQ_DEPTH].next; - q->hash[x] = hash; + slot = &q->slots[x]; + if (x == SFQ_EMPTY_SLOT) { + x = q->dep[0].next; /* get a free slot */ + q->ht[hash] = x; + slot = &q->slots[x]; + slot->hash = hash; + slot_queue_init(slot); } - /* If selected queue has length q->limit, this means that - * all another queues are empty and that we do simple tail drop, + /* If selected queue has length q->limit, do simple tail drop, * i.e. drop _this_ packet. */ - if (q->qs[x].qlen >= q->limit) + if (slot->qlen >= q->limit) return qdisc_drop(skb, sch); sch->qstats.backlog += qdisc_pkt_len(skb); - __skb_queue_tail(&q->qs[x], skb); + slot_queue_add(slot, skb); sfq_inc(q, x); - if (q->qs[x].qlen == 1) { /* The flow is new */ - if (q->tail == SFQ_DEPTH) { /* It is the first flow */ - q->next[x] = x; + if (slot->qlen == 1) { /* The flow is new */ + if (q->tail == NULL) { /* It is the first flow */ + slot->next = x; } else { - q->next[x] = q->next[q->tail]; - q->next[q->tail] = x; + slot->next = q->tail->next; + q->tail->next = x; } - q->tail = x; - q->allot[x] = q->quantum; + q->tail = slot; + slot->allot = q->quantum; } if (++sch->q.qlen <= q->limit) { sch->bstats.bytes += qdisc_pkt_len(skb); @@ -342,14 +410,12 @@ static struct sk_buff * sfq_peek(struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); - sfq_index a; /* No active slots */ - if (q->tail == SFQ_DEPTH) + if (q->tail == NULL) return NULL; - a = q->next[q->tail]; - return skb_peek(&q->qs[a]); + return q->slots[q->tail->next].skblist_next; } static struct sk_buff * @@ -358,31 +424,31 @@ sfq_dequeue(struct Qdisc *sch) struct sfq_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; sfq_index a, next_a; + struct sfq_slot *slot; /* No active slots */ - if (q->tail == SFQ_DEPTH) + if (q->tail == NULL) return NULL; - a = q->next[q->tail]; - - /* Grab packet */ - skb = __skb_dequeue(&q->qs[a]); + a = q->tail->next; + slot = &q->slots[a]; + skb = slot_dequeue_head(slot); sfq_dec(q, a); sch->q.qlen--; sch->qstats.backlog -= qdisc_pkt_len(skb); /* Is the slot empty? */ - if (q->qs[a].qlen == 0) { - q->ht[q->hash[a]] = SFQ_DEPTH; - next_a = q->next[a]; + if (slot->qlen == 0) { + q->ht[slot->hash] = SFQ_EMPTY_SLOT; + next_a = slot->next; if (a == next_a) { - q->tail = SFQ_DEPTH; + q->tail = NULL; /* no more active slots */ return skb; } - q->next[q->tail] = next_a; - } else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) { - q->allot[a] += q->quantum; - q->tail = a; + q->tail->next = next_a; + } else if ((slot->allot -= qdisc_pkt_len(skb)) <= 0) { + q->tail = slot; + slot->allot += q->quantum; } return skb; } @@ -446,17 +512,16 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) init_timer_deferrable(&q->perturb_timer); for (i = 0; i < SFQ_HASH_DIVISOR; i++) - q->ht[i] = SFQ_DEPTH; + q->ht[i] = SFQ_EMPTY_SLOT; for (i = 0; i < SFQ_DEPTH; i++) { - skb_queue_head_init(&q->qs[i]); - q->dep[i + SFQ_DEPTH].next = i + SFQ_DEPTH; - q->dep[i + SFQ_DEPTH].prev = i + SFQ_DEPTH; + q->dep[i].next = i + SFQ_SLOTS; + q->dep[i].prev = i + SFQ_SLOTS; } q->limit = SFQ_DEPTH - 1; - q->max_depth = 0; - q->tail = SFQ_DEPTH; + q->cur_depth = 0; + q->tail = NULL; if (opt == NULL) { q->quantum = psched_mtu(qdisc_dev(sch)); q->perturb_period = 0; @@ -467,7 +532,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) return err; } - for (i = 0; i < SFQ_DEPTH; i++) + for (i = 0; i < SFQ_SLOTS; i++) sfq_link(q, i); return 0; } @@ -543,13 +608,12 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct gnet_dump *d) { struct sfq_sched_data *q = qdisc_priv(sch); - sfq_index idx = q->ht[cl-1]; - struct sk_buff_head *list = &q->qs[idx]; - struct gnet_stats_queue qs = { .qlen = list->qlen }; - struct tc_sfq_xstats xstats = { .allot = q->allot[idx] }; + const struct sfq_slot *slot = &q->slots[q->ht[cl - 1]]; + struct gnet_stats_queue qs = { .qlen = slot->qlen }; + struct tc_sfq_xstats xstats = { .allot = slot->allot }; struct sk_buff *skb; - skb_queue_walk(list, skb) + slot_queue_walk(slot, skb) qs.backlog += qdisc_pkt_len(skb); if (gnet_stats_copy_queue(d, &qs) < 0) @@ -566,7 +630,7 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) return; for (i = 0; i < SFQ_HASH_DIVISOR; i++) { - if (q->ht[i] == SFQ_DEPTH || + if (q->ht[i] == SFQ_EMPTY_SLOT || arg->count < arg->skip) { arg->count++; continue; -- cgit v1.1 From ee09b3c1cff0335137dc1b146488e4352f640f13 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Dec 2010 11:39:59 -0800 Subject: sfq: fix sfq class stats handling sfq_walk() runs without qdisc lock. By the time it selects a non empty hash slot and sfq_dump_class_stats() is run (with lock held), slot might have been freed : We then access q->slots[SFQ_EMPTY_SLOT], out of bounds, and crash in slot_queue_walk() On previous kernels, bug is here but out of bounds qs[SFQ_DEPTH] and allot[SFQ_DEPTH] are located in struct sfq_sched_data, so no illegal memory access happens, only possibly wrong data reported to user. Also, slot_dequeue_tail() should make sure slot skb chain is correctly terminated, or sfq_dump_class_stats() can access freed skbs. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_sfq.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'net/sched/sch_sfq.c') diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 13322e8..6a2f88f 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -281,6 +281,7 @@ static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot) struct sk_buff *skb = slot->skblist_prev; slot->skblist_prev = skb->prev; + skb->prev->next = (struct sk_buff *)slot; skb->next = skb->prev = NULL; return skb; } @@ -608,14 +609,19 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct gnet_dump *d) { struct sfq_sched_data *q = qdisc_priv(sch); - const struct sfq_slot *slot = &q->slots[q->ht[cl - 1]]; - struct gnet_stats_queue qs = { .qlen = slot->qlen }; - struct tc_sfq_xstats xstats = { .allot = slot->allot }; + sfq_index idx = q->ht[cl - 1]; + struct gnet_stats_queue qs = { 0 }; + struct tc_sfq_xstats xstats = { 0 }; struct sk_buff *skb; - slot_queue_walk(slot, skb) - qs.backlog += qdisc_pkt_len(skb); + if (idx != SFQ_EMPTY_SLOT) { + const struct sfq_slot *slot = &q->slots[idx]; + xstats.allot = slot->allot; + qs.qlen = slot->qlen; + slot_queue_walk(slot, skb) + qs.backlog += qdisc_pkt_len(skb); + } if (gnet_stats_copy_queue(d, &qs) < 0) return -1; return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); -- cgit v1.1 From eeaeb068f1393b4db4861481bf594bcd1c3eda7a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Dec 2010 21:53:33 +0000 Subject: sch_sfq: allow big packets and be fair SFQ is currently 'limited' to small packets, because it uses a 15bit allotment number per flow. Introduce a scale by 8, so that we can handle full size TSO/GRO packets. Use appropriate handling to make sure allot is positive before a new packet is dequeued, so that fairness is respected. Signed-off-by: Eric Dumazet Acked-by: Jarek Poplawski Cc: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_sfq.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'net/sched/sch_sfq.c') diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 6a2f88f..b76d46b 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -67,7 +67,7 @@ IMPLEMENTATION: This implementation limits maximal queue length to 128; - maximal mtu to 2^15-1; max 128 flows, number of hash buckets to 1024. + max mtu to 2^18-1; max 128 flows, number of hash buckets to 1024. The only goal of this restrictions was that all data fit into one 4K page on 32bit arches. @@ -77,6 +77,11 @@ #define SFQ_SLOTS 128 /* max number of flows */ #define SFQ_EMPTY_SLOT 255 #define SFQ_HASH_DIVISOR 1024 +/* We use 16 bits to store allot, and want to handle packets up to 64K + * Scale allot by 8 (1<<3) so that no overflow occurs. + */ +#define SFQ_ALLOT_SHIFT 3 +#define SFQ_ALLOT_SIZE(X) DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT) /* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */ typedef unsigned char sfq_index; @@ -115,7 +120,7 @@ struct sfq_sched_data struct timer_list perturb_timer; u32 perturbation; sfq_index cur_depth; /* depth of longest slot */ - + unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ struct sfq_slot *tail; /* current slot in round */ sfq_index ht[SFQ_HASH_DIVISOR]; /* Hash table */ struct sfq_slot slots[SFQ_SLOTS]; @@ -395,7 +400,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) q->tail->next = x; } q->tail = slot; - slot->allot = q->quantum; + slot->allot = q->scaled_quantum; } if (++sch->q.qlen <= q->limit) { sch->bstats.bytes += qdisc_pkt_len(skb); @@ -431,8 +436,14 @@ sfq_dequeue(struct Qdisc *sch) if (q->tail == NULL) return NULL; +next_slot: a = q->tail->next; slot = &q->slots[a]; + if (slot->allot <= 0) { + q->tail = slot; + slot->allot += q->scaled_quantum; + goto next_slot; + } skb = slot_dequeue_head(slot); sfq_dec(q, a); sch->q.qlen--; @@ -447,9 +458,8 @@ sfq_dequeue(struct Qdisc *sch) return skb; } q->tail->next = next_a; - } else if ((slot->allot -= qdisc_pkt_len(skb)) <= 0) { - q->tail = slot; - slot->allot += q->quantum; + } else { + slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb)); } return skb; } @@ -485,6 +495,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) sch_tree_lock(sch); q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch)); + q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); q->perturb_period = ctl->perturb_period * HZ; if (ctl->limit) q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1); @@ -525,6 +536,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) q->tail = NULL; if (opt == NULL) { q->quantum = psched_mtu(qdisc_dev(sch)); + q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); q->perturb_period = 0; q->perturbation = net_random(); } else { @@ -617,7 +629,7 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl, if (idx != SFQ_EMPTY_SLOT) { const struct sfq_slot *slot = &q->slots[idx]; - xstats.allot = slot->allot; + xstats.allot = slot->allot << SFQ_ALLOT_SHIFT; qs.qlen = slot->qlen; slot_queue_walk(slot, skb) qs.backlog += qdisc_pkt_len(skb); -- cgit v1.1 From 18c8d82ae5b802c5d82e0dfbcc08b1b568955f46 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 31 Dec 2010 12:48:55 -0800 Subject: sfq: fix slot_dequeue_head() slot_dequeue_head() should make sure slot skb chain is correct in both ways, or we can crash if all possible flows are in use. Jarek pointed out slot_queue_init() can now be done in sfq_init() once, instead each time a flow is setup. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_sfq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/sched/sch_sfq.c') diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index b76d46b..d54ac94 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -297,6 +297,7 @@ static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot) struct sk_buff *skb = slot->skblist_next; slot->skblist_next = skb->next; + skb->next->prev = (struct sk_buff *)slot; skb->next = skb->prev = NULL; return skb; } @@ -380,7 +381,6 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) q->ht[hash] = x; slot = &q->slots[x]; slot->hash = hash; - slot_queue_init(slot); } /* If selected queue has length q->limit, do simple tail drop, @@ -545,8 +545,10 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) return err; } - for (i = 0; i < SFQ_SLOTS; i++) + for (i = 0; i < SFQ_SLOTS; i++) { + slot_queue_init(&q->slots[i]); sfq_link(q, i); + } return 0; } -- cgit v1.1 From bfe0d0298f2a67d94d58c39ea904a999aeeb7c3c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 9 Jan 2011 08:30:54 +0000 Subject: net_sched: factorize qdisc stats handling HTB takes into account skb is segmented in stats updates. Generalize this to all schedulers. They should use qdisc_bstats_update() helper instead of manipulating bstats.bytes and bstats.packets Add bstats_update() helper too for classes that use gnet_stats_basic_packed fields. Note : Right now, TCQ_F_CAN_BYPASS shortcurt can be taken only if no stab is setup on qdisc. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_sfq.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/sched/sch_sfq.c') diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index d54ac94..239ec53 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -403,8 +403,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) slot->allot = q->scaled_quantum; } if (++sch->q.qlen <= q->limit) { - sch->bstats.bytes += qdisc_pkt_len(skb); - sch->bstats.packets++; + qdisc_bstats_update(sch, skb); return NET_XMIT_SUCCESS; } -- cgit v1.1 From 9190b3b3208d052d98cb601fcc192f3f71a5658b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Jan 2011 23:31:33 -0800 Subject: net_sched: accurate bytes/packets stats/rates In commit 44b8288308ac9d (net_sched: pfifo_head_drop problem), we fixed a problem with pfifo_head drops that incorrectly decreased sch->bstats.bytes and sch->bstats.packets Several qdiscs (CHOKe, SFQ, pfifo_head, ...) are able to drop a previously enqueued packet, and bstats cannot be changed, so bstats/rates are not accurate (over estimated) This patch changes the qdisc_bstats updates to be done at dequeue() time instead of enqueue() time. bstats counters no longer account for dropped frames, and rates are more correct, since enqueue() bursts dont have effect on dequeue() rate. Signed-off-by: Eric Dumazet Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_sfq.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net/sched/sch_sfq.c') diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 239ec53..edea8ce 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -402,10 +402,8 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) q->tail = slot; slot->allot = q->scaled_quantum; } - if (++sch->q.qlen <= q->limit) { - qdisc_bstats_update(sch, skb); + if (++sch->q.qlen <= q->limit) return NET_XMIT_SUCCESS; - } sfq_drop(sch); return NET_XMIT_CN; @@ -445,6 +443,7 @@ next_slot: } skb = slot_dequeue_head(slot); sfq_dec(q, a); + qdisc_bstats_update(sch, skb); sch->q.qlen--; sch->qstats.backlog -= qdisc_pkt_len(skb); -- cgit v1.1