From 432649916b0435b608fb3e1fcb97347ac294d38d Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sat, 23 Aug 2008 13:28:27 +0200 Subject: dccp: Toggle debug output without module unloading This sets the sysfs permissions so that root can toggle the `debug' parameter available for nearly every DCCP module. This is useful since there are various module inter-dependencies. The debug flag can now be toggled at runtime using echo 1 > /sys/module/dccp/parameters/dccp_debug echo 1 > /sys/module/dccp_ccid2/parameters/ccid2_debug echo 1 > /sys/module/dccp_ccid3/parameters/ccid3_debug echo 1 > /sys/module/dccp_tfrc_lib/parameters/tfrc_debug The last is not very useful yet, since no code at the moment calls the tfrc_debug() macro. Signed-off-by: Gerrit Renker --- net/dccp/ccids/lib/tfrc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/dccp/ccids/lib') diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c index 97ecec0..1859162 100644 --- a/net/dccp/ccids/lib/tfrc.c +++ b/net/dccp/ccids/lib/tfrc.c @@ -10,7 +10,7 @@ #ifdef CONFIG_IP_DCCP_TFRC_DEBUG int tfrc_debug; -module_param(tfrc_debug, bool, 0444); +module_param(tfrc_debug, bool, 0644); MODULE_PARM_DESC(tfrc_debug, "Enable debug messages"); #endif -- cgit v1.1 From 959fd992f05b7468bf30d759ac0c9fd0ef0fa80b Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sat, 23 Aug 2008 13:28:27 +0200 Subject: dccp ccid-3: Replace lazy BUG_ON with condition The BUG_ON(w_tot == 0) only holds if there is no more than 1 loss interval in the loss history. If there is only a single loss interval, the calc_i_mean() routine need in fact not be called (RFC 3448, 6.3.1). Signed-off-by: Gerrit Renker --- net/dccp/ccids/lib/loss_interval.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/dccp/ccids/lib') diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index bcd6ac4..5b3ce06 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -67,7 +67,10 @@ static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh) u32 i_i, i_tot0 = 0, i_tot1 = 0, w_tot = 0; int i, k = tfrc_lh_length(lh) - 1; /* k is as in rfc3448bis, 5.4 */ - for (i=0; i <= k; i++) { + if (k <= 0) + return; + + for (i = 0; i <= k; i++) { i_i = tfrc_lh_get_interval(lh, i); if (i < k) { @@ -78,7 +81,6 @@ static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh) i_tot1 += i_i * tfrc_lh_weights[i-1]; } - BUG_ON(w_tot == 0); lh->i_mean = max(i_tot0, i_tot1) / w_tot; } -- cgit v1.1 From 63b3a73bb85daf441f964aaf9b3fc89be4209c23 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp ccid-3: Remove ugly RTT-sampling history lookup This removes the RTT-sampling function tfrc_tx_hist_rtt(), since 1. it suffered from complex passing of return values (the return value both indicated successful lookup while the value doubled as RTT sample); 2. when for some odd reason the sample value equalled 0, this triggered a bug warning about "bogus Ack", due to the ambiguity of the return value; 3. on a passive host which has not sent anything the TX history is empty and thus will lead to unwanted "bogus Ack" warnings such as ccid3_hc_tx_packet_recv: server(e7b7d518): DATAACK with bogus ACK-28197148 ccid3_hc_tx_packet_recv: server(e7b7d518): DATAACK with bogus ACK-26641606. The fix is to replace the implicit encoding by performing the steps manually. Furthermore, the "bogus Ack" warning has been removed, since it can actually be triggered due to several reasons (network reordering, old packet, (3) above), hence it is not very useful. Signed-off-by: Gerrit Renker --- net/dccp/ccids/lib/packet_history.c | 40 ------------------------------------- net/dccp/ccids/lib/packet_history.h | 22 +++++++++++++++++--- 2 files changed, 19 insertions(+), 43 deletions(-) (limited to 'net/dccp/ccids/lib') diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index 6cc108a..5c44508 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -40,18 +40,6 @@ #include "packet_history.h" #include "../../dccp.h" -/** - * tfrc_tx_hist_entry - Simple singly-linked TX history list - * @next: next oldest entry (LIFO order) - * @seqno: sequence number of this entry - * @stamp: send time of packet with sequence number @seqno - */ -struct tfrc_tx_hist_entry { - struct tfrc_tx_hist_entry *next; - u64 seqno; - ktime_t stamp; -}; - /* * Transmitter History Routines */ @@ -73,15 +61,6 @@ void tfrc_tx_packet_history_exit(void) } } -static struct tfrc_tx_hist_entry * - tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno) -{ - while (head != NULL && head->seqno != seqno) - head = head->next; - - return head; -} - int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno) { struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any()); @@ -111,25 +90,6 @@ void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp) } EXPORT_SYMBOL_GPL(tfrc_tx_hist_purge); -u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno, - const ktime_t now) -{ - u32 rtt = 0; - struct tfrc_tx_hist_entry *packet = tfrc_tx_hist_find_entry(head, seqno); - - if (packet != NULL) { - rtt = ktime_us_delta(now, packet->stamp); - /* - * Garbage-collect older (irrelevant) entries: - */ - tfrc_tx_hist_purge(&packet->next); - } - - return rtt; -} -EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt); - - /* * Receiver History Routines */ diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index 461cc91..221d810 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -40,12 +40,28 @@ #include #include "tfrc.h" -struct tfrc_tx_hist_entry; +/** + * tfrc_tx_hist_entry - Simple singly-linked TX history list + * @next: next oldest entry (LIFO order) + * @seqno: sequence number of this entry + * @stamp: send time of packet with sequence number @seqno + */ +struct tfrc_tx_hist_entry { + struct tfrc_tx_hist_entry *next; + u64 seqno; + ktime_t stamp; +}; + +static inline struct tfrc_tx_hist_entry * + tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno) +{ + while (head != NULL && head->seqno != seqno) + head = head->next; + return head; +} extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno); extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp); -extern u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, - const u64 seqno, const ktime_t now); /* Subtraction a-b modulo-16, respects circular wrap-around */ #define SUB16(a, b) (((a) + 16 - (b)) & 0xF) -- cgit v1.1 From 535c55df136ad2783d444e54d518a8fae8bdbf79 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp tfrc/ccid-3: Computing Loss Rate from Loss Event Rate This adds a function to take care of the following cases occurring in the computation of the Loss Rate p: * 1/(2^32-1) is mapped into 0% as per RFC 4342, 8.5; * 1/0 is mapped into the maximum of 100%; * we want to avoid that p = 1/x is rounded down to 0 when x is very large, since this means accidentally re-entering slow-start (indicated by p==0). In the last case, the minimum-resolution value of p is returned. Furthermore, a bug in ccid3_hc_rx_getsockopt is fixed (1/0 was mapped into ~0U), which now allows to consistently print the scaled p-values as printf("Loss Event Rate = %u.%04u %%\n", rx_info.tfrcrx_p / 10000, rx_info.tfrcrx_p % 10000); Signed-off-by: Gerrit Renker --- net/dccp/ccids/lib/tfrc.h | 1 + net/dccp/ccids/lib/tfrc_equation.c | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'net/dccp/ccids/lib') diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h index ed98575..bb47146 100644 --- a/net/dccp/ccids/lib/tfrc.h +++ b/net/dccp/ccids/lib/tfrc.h @@ -58,6 +58,7 @@ static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight) extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); +extern u32 tfrc_invert_loss_event_rate(u32 loss_event_rate); extern int tfrc_tx_packet_history_init(void); extern void tfrc_tx_packet_history_exit(void); diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c index 2f20a29..bc3dc2b 100644 --- a/net/dccp/ccids/lib/tfrc_equation.c +++ b/net/dccp/ccids/lib/tfrc_equation.c @@ -658,7 +658,6 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p) result = scaled_div(s, R); return scaled_div32(result, f); } - EXPORT_SYMBOL_GPL(tfrc_calc_x); /** @@ -693,5 +692,19 @@ u32 tfrc_calc_x_reverse_lookup(u32 fvalue) index = tfrc_binsearch(fvalue, 0); return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE; } - EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup); + +/** + * tfrc_invert_loss_event_rate - Compute p so that 10^6 corresponds to 100% + * When @loss_event_rate is large, there is a chance that p is truncated to 0. + * To avoid re-entering slow-start in that case, we set p = TFRC_SMALLEST_P > 0. + */ +u32 tfrc_invert_loss_event_rate(u32 loss_event_rate) +{ + if (loss_event_rate == UINT_MAX) /* see RFC 4342, 8.5 */ + return 0; + if (unlikely(loss_event_rate == 0)) /* map 1/0 into 100% */ + return 1000000; + return max_t(u32, scaled_div(1, loss_event_rate), TFRC_SMALLEST_P); +} +EXPORT_SYMBOL_GPL(tfrc_invert_loss_event_rate); -- cgit v1.1 From 8b67ad12b04ef7bdf5d2b4de24fe5a609b26cf12 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp tfrc: Suppress unavoidable "below resolution" warning In the congestion-avoidance phase a decay of p towards 0 is natural once fewer losses are encountered. Hence the warning message "p is below resolution" is not necessary, and thus turned into a debug message by this patch. The TFRC_SMALLEST_P is needed since in theory p never actually reaches 0. When no further losses are encountered, the loss interval I_0 grows in length, causing p to decrease towards 0, causing X_calc = s/(RTT * f(p)) to increase. With the given minimum-resolution this congestion avoidance phase stops at some fixed value, an approximation formula has been added to the documentation. Signed-off-by: Gerrit Renker --- net/dccp/ccids/lib/tfrc_equation.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'net/dccp/ccids/lib') diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c index bc3dc2b..38239c4 100644 --- a/net/dccp/ccids/lib/tfrc_equation.c +++ b/net/dccp/ccids/lib/tfrc_equation.c @@ -632,8 +632,16 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p) if (p <= TFRC_CALC_X_SPLIT) { /* 0.0000 < p <= 0.05 */ if (p < TFRC_SMALLEST_P) { /* 0.0000 < p < 0.0001 */ - DCCP_WARN("Value of p (%d) below resolution. " - "Substituting %d\n", p, TFRC_SMALLEST_P); + /* + * In the congestion-avoidance phase p decays towards 0 + * when there are no further losses, so this case is + * natural. Truncating to p_min = 0.01% means that the + * maximum achievable throughput is limited to about + * X_calc_max = 122.4 * s/RTT (see RFC 3448, 3.1); e.g. + * with s=1500 bytes, RTT=0.01 s: X_calc_max = 147 Mbps. + */ + tfrc_pr_debug("Value of p (%d) below resolution. " + "Substituting %d\n", p, TFRC_SMALLEST_P); index = 0; } else /* 0.0001 <= p <= 0.05 */ index = p/TFRC_SMALLEST_P - 1; -- cgit v1.1 From 24b8d343215919c7a2ba18b9f89a0961e1459cad Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp tfrc: Receiver history initialisation routine This patch 1) separates history allocation and initialisation, to facilitate early loss detection (implemented by a subsequent patch); 2) removes duplication by using the existing tfrc_rx_hist_purge() if the allocation fails. This is now possible, since the initialisation routine 3) zeroes out the entire history before using it. Signed-off-by: Gerrit Renker --- net/dccp/ccids/lib/packet_history.c | 52 +++++++++++++++++++++---------------- net/dccp/ccids/lib/packet_history.h | 2 +- 2 files changed, 31 insertions(+), 23 deletions(-) (limited to 'net/dccp/ccids/lib') diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index 5c44508..5b4e1cf 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -352,28 +352,6 @@ int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, } EXPORT_SYMBOL_GPL(tfrc_rx_handle_loss); -int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h) -{ - int i; - - for (i = 0; i <= TFRC_NDUPACK; i++) { - h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC); - if (h->ring[i] == NULL) - goto out_free; - } - - h->loss_count = h->loss_start = 0; - return 0; - -out_free: - while (i-- != 0) { - kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]); - h->ring[i] = NULL; - } - return -ENOBUFS; -} -EXPORT_SYMBOL_GPL(tfrc_rx_hist_alloc); - void tfrc_rx_hist_purge(struct tfrc_rx_hist *h) { int i; @@ -386,6 +364,36 @@ void tfrc_rx_hist_purge(struct tfrc_rx_hist *h) } EXPORT_SYMBOL_GPL(tfrc_rx_hist_purge); +static int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h) +{ + int i; + + memset(h, 0, sizeof(*h)); + + for (i = 0; i <= TFRC_NDUPACK; i++) { + h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC); + if (h->ring[i] == NULL) { + tfrc_rx_hist_purge(h); + return -ENOBUFS; + } + } + return 0; +} + +int tfrc_rx_hist_init(struct tfrc_rx_hist *h, struct sock *sk) +{ + if (tfrc_rx_hist_alloc(h)) + return -ENOBUFS; + /* + * Initialise first entry with GSR to start loss detection as early as + * possible. Code using this must not use any other fields. The entry + * will be overwritten once the CCID updates its received packets. + */ + tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno = dccp_sk(sk)->dccps_gsr; + return 0; +} +EXPORT_SYMBOL_GPL(tfrc_rx_hist_init); + /** * tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against */ diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index 221d810..e9d8097 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -153,7 +153,7 @@ extern int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, struct sock *sk); extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb); -extern int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h); +extern int tfrc_rx_hist_init(struct tfrc_rx_hist *h, struct sock *sk); extern void tfrc_rx_hist_purge(struct tfrc_rx_hist *h); #endif /* _DCCP_PKT_HIST_ */ -- cgit v1.1 From d20ed95f8bf3d98d31dbbab8b00bb4c1a4a140f3 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp tfrc: Perform early loss detection This enables the TFRC code to begin loss detection (as soon as the module is loaded), using the latest updates from rfc3448bis-06, 6.3.1: * when the first data packet(s) are lost or marked, set * X_target = s/(2*R) => f(p) = s/(R * X_target) = 2, * corresponding to a loss rate of ~ 20.64%. The handle_loss() function is now called right at the begin of rx_packet_recv() and thus no longer protected against duplicates: hence a call to rx_duplicate() has been added. Such a call makes sense now, as the previous patch initialises the first entry with a sequence number of GSR. Signed-off-by: Gerrit Renker --- net/dccp/ccids/lib/packet_history.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/dccp/ccids/lib') diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index 5b4e1cf..8db3422 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -335,6 +335,9 @@ int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, { int is_new_loss = 0; + if (tfrc_rx_hist_duplicate(h, skb)) + return 0; + if (h->loss_count == 0) { __do_track_loss(h, skb, ndp); } else if (h->loss_count == 1) { -- cgit v1.1 From 3ca7aea04152255bb65275b0018d3c673bc1f4e7 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp tfrc: Return type of update_i_mean is void This changes the return type of tfrc_lh_update_i_mean() to void, since that function returns always `false'. This is due to len = dccp_delta_seqno(cur->li_seqno, DCCP_SKB_CB(skb)->dccpd_seq) + 1; if (len - (s64)cur->li_length <= 0) /* duplicate or reordered */ return 0; which means that update_i_mean can only increase the length of the open loss interval I_0, and hence the value of I_tot0 (RFC 3448, 5.4). Consequently the test `i_mean < old_i_mean' at the end of the function always evaluates to false. There is no known way by which a loss interval can suddenly become shorter, therefore the return type of the function is changed to void. (That is, under the given circumstances step (3) in RFC 3448, 6.1 will not occur.) Further changes: ---------------- * the function is now called from tfrc_rx_handle_loss, which is equivalent to the previous way of calling from rx_packet_recv (it was called whenever there was no new or pending loss, now it is also updated when there is a pending loss - this increases the accuracy a bit); * added a FIXME to possibly consider NDP counting as per RFC 4342 (this is not implemented yet). Signed-off-by: Gerrit Renker --- net/dccp/ccids/lib/loss_interval.c | 20 +++++++++++--------- net/dccp/ccids/lib/loss_interval.h | 2 +- net/dccp/ccids/lib/packet_history.c | 5 +++++ 3 files changed, 17 insertions(+), 10 deletions(-) (limited to 'net/dccp/ccids/lib') diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index 5b3ce06..fe5c2a3 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -86,21 +86,26 @@ static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh) /** * tfrc_lh_update_i_mean - Update the `open' loss interval I_0 - * For recomputing p: returns `true' if p > p_prev <=> 1/p < 1/p_prev + * This updates I_mean as the sequence numbers increase. As a consequence, the + * open loss interval I_0 increases, hence p = W_tot/max(I_tot0, I_tot1) + * decreases, and thus there is no need to send renewed feedback. */ -u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb) +void tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb) { struct tfrc_loss_interval *cur = tfrc_lh_peek(lh); - u32 old_i_mean = lh->i_mean; s64 len; if (cur == NULL) /* not initialised */ - return 0; + return; + + /* FIXME: should probably also count non-data packets (RFC 4342, 6.1) */ + if (!dccp_data_packet(skb)) + return; len = dccp_delta_seqno(cur->li_seqno, DCCP_SKB_CB(skb)->dccpd_seq) + 1; if (len - (s64)cur->li_length <= 0) /* duplicate or reordered */ - return 0; + return; if (SUB16(dccp_hdr(skb)->dccph_ccval, cur->li_ccval) > 4) /* @@ -114,14 +119,11 @@ u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb) cur->li_is_closed = 1; if (tfrc_lh_length(lh) == 1) /* due to RFC 3448, 6.3.1 */ - return 0; + return; cur->li_length = len; tfrc_lh_calc_i_mean(lh); - - return (lh->i_mean < old_i_mean); } -EXPORT_SYMBOL_GPL(tfrc_lh_update_i_mean); /* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */ static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur, diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h index 246018a..f101ae2 100644 --- a/net/dccp/ccids/lib/loss_interval.h +++ b/net/dccp/ccids/lib/loss_interval.h @@ -69,7 +69,7 @@ struct tfrc_rx_hist; extern int tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *, u32 (*first_li)(struct sock *), struct sock *); -extern u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *); +extern void tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *); extern void tfrc_lh_cleanup(struct tfrc_loss_hist *lh); #endif /* _DCCP_LI_HIST_ */ diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index 8db3422..8ea9690 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -351,6 +351,11 @@ int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, is_new_loss = tfrc_lh_interval_add(lh, h, calc_first_li, sk); __three_after_loss(h); } + + /* RFC 3448, 6.1: update I_0, whose growth implies p <= p_prev */ + if (!is_new_loss) + tfrc_lh_update_i_mean(lh, skb); + return is_new_loss; } EXPORT_SYMBOL_GPL(tfrc_rx_handle_loss); -- cgit v1.1 From 34a081be8e14b7ada70e069b65b05d54db4af497 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp tfrc: Let dccp_tfrc_lib do the sampling work This migrates more TFRC-related code into the dccp_tfrc_lib: * sampling of the packet size `s' (which is only needed until the first loss interval is computed (ccid3_first_li)); * updating the byte-counter `bytes_recvd' in between sending feedbacks. The result is a better separation of CCID-3 specific and TFRC specific code, which aids future integration with ECN and e.g. CCID-4. Further changes: ---------------- * replaced magic number of 536 with equivalent constant TCP_MIN_RCVMSS; (this constant is also used when no estimate for `s' is available). Signed-off-by: Gerrit Renker --- net/dccp/ccids/lib/packet_history.c | 10 ++++++++++ net/dccp/ccids/lib/packet_history.h | 16 ++++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'net/dccp/ccids/lib') diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index 8ea9690..ee34b45 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -352,6 +352,16 @@ int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, __three_after_loss(h); } + /* + * Update moving-average of `s' and the sum of received payload bytes. + */ + if (dccp_data_packet(skb)) { + const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4; + + h->packet_size = tfrc_ewma(h->packet_size, payload, 9); + h->bytes_recvd += payload; + } + /* RFC 3448, 6.1: update I_0, whose growth implies p <= p_prev */ if (!is_new_loss) tfrc_lh_update_i_mean(lh, skb); diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index e9d8097..b7c87a1 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -91,12 +91,16 @@ struct tfrc_rx_hist_entry { * @loss_count: Number of entries in circular history * @loss_start: Movable index (for loss detection) * @rtt_sample_prev: Used during RTT sampling, points to candidate entry + * @packet_size: Packet size in bytes (as per RFC 3448, 3.1) + * @bytes_recvd: Number of bytes received since last sending feedback */ struct tfrc_rx_hist { struct tfrc_rx_hist_entry *ring[TFRC_NDUPACK + 1]; u8 loss_count:2, loss_start:2; #define rtt_sample_prev loss_start + u32 packet_size, + bytes_recvd; }; /** @@ -140,6 +144,18 @@ static inline bool tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h) return h->loss_count > 0; } +/* + * Accessor functions to retrieve parameters sampled by the RX history + */ +static inline u32 tfrc_rx_hist_packet_size(const struct tfrc_rx_hist *h) +{ + if (h->packet_size == 0) { + DCCP_WARN("No sample for s, using fallback\n"); + return TCP_MIN_RCVMSS; + } + return h->packet_size; +} + extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, const struct sk_buff *skb, const u64 ndp); -- cgit v1.1 From 2b81143aa3505e2460b24b357996c2f21840ea58 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp ccid-3: Always perform receiver RTT sampling This updates the CCID-3 receiver in part with regard to errata 610 and 611 (http://www.rfc-editor.org/errata_list.php), which change RFC 4342 to use the Receive Rate as specified in rfc3448bis, requiring to constantly sample the RTT (or use a sender RTT). Doing this requires reusing the RX history structure after dealing with a loss. The patch does not resolve how to compute X_recv if the interval is less than 1 RTT. A FIXME has been added (and is resolved in subsequent patch). Furthermore, since this is all TFRC-based functionality, the RTT estimation is now also performed by the dccp_tfrc_lib module. This further simplifies the CCID-3 code. Signed-off-by: Gerrit Renker --- net/dccp/ccids/lib/packet_history.c | 60 +++++++++++++++++++++++++++---------- net/dccp/ccids/lib/packet_history.h | 17 +++++++++-- 2 files changed, 59 insertions(+), 18 deletions(-) (limited to 'net/dccp/ccids/lib') diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index ee34b45..e2e250a 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -151,14 +151,31 @@ int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb) } EXPORT_SYMBOL_GPL(tfrc_rx_hist_duplicate); + +static void __tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b) +{ + struct tfrc_rx_hist_entry *tmp = h->ring[a]; + + h->ring[a] = h->ring[b]; + h->ring[b] = tmp; +} + static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b) { - const u8 idx_a = tfrc_rx_hist_index(h, a), - idx_b = tfrc_rx_hist_index(h, b); - struct tfrc_rx_hist_entry *tmp = h->ring[idx_a]; + __tfrc_rx_hist_swap(h, tfrc_rx_hist_index(h, a), + tfrc_rx_hist_index(h, b)); +} - h->ring[idx_a] = h->ring[idx_b]; - h->ring[idx_b] = tmp; +/** + * tfrc_rx_hist_resume_rtt_sampling - Prepare RX history for RTT sampling + * This is called after loss detection has finished, when the history entry + * with the index of `loss_count' holds the highest-received sequence number. + * RTT sampling requires this information at ring[0] (tfrc_rx_hist_sample_rtt). + */ +static inline void tfrc_rx_hist_resume_rtt_sampling(struct tfrc_rx_hist *h) +{ + __tfrc_rx_hist_swap(h, 0, tfrc_rx_hist_index(h, h->loss_count)); + h->loss_count = h->loss_start = 0; } /* @@ -200,8 +217,7 @@ static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2 if (dccp_loss_free(s2, s1, n1)) { /* hole is filled: S0, S2, and S1 are consecutive */ - h->loss_count = 0; - h->loss_start = tfrc_rx_hist_index(h, 1); + tfrc_rx_hist_resume_rtt_sampling(h); } else /* gap between S2 and S1: just update loss_prev */ tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n2); @@ -254,8 +270,7 @@ static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3) if (dccp_loss_free(s1, s2, n2)) { /* entire hole filled by S0, S3, S1, S2 */ - h->loss_start = tfrc_rx_hist_index(h, 2); - h->loss_count = 0; + tfrc_rx_hist_resume_rtt_sampling(h); } else { /* gap remains between S1 and S2 */ h->loss_start = tfrc_rx_hist_index(h, 1); @@ -299,8 +314,7 @@ static void __three_after_loss(struct tfrc_rx_hist *h) if (dccp_loss_free(s2, s3, n3)) { /* no gap between S2 and S3: entire hole is filled */ - h->loss_start = tfrc_rx_hist_index(h, 3); - h->loss_count = 0; + tfrc_rx_hist_resume_rtt_sampling(h); } else { /* gap between S2 and S3 */ h->loss_start = tfrc_rx_hist_index(h, 2); @@ -340,6 +354,7 @@ int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, if (h->loss_count == 0) { __do_track_loss(h, skb, ndp); + tfrc_rx_hist_sample_rtt(h, skb); } else if (h->loss_count == 1) { __one_after_loss(h, skb, ndp); } else if (h->loss_count != 2) { @@ -435,11 +450,24 @@ static inline struct tfrc_rx_hist_entry * * Based on ideas presented in RFC 4342, 8.1. Returns 0 if it was not able * to compute a sample with given data - calling function should check this. */ -u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb) +void tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb) { - u32 sample = 0, - delta_v = SUB16(dccp_hdr(skb)->dccph_ccval, - tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval); + u32 sample = 0, delta_v; + + /* + * When not to sample: + * - on non-data packets + * (RFC 4342, 8.1: CCVal only fully defined for data packets); + * - when no data packets have been received yet + * (FIXME: using sampled packet size as indicator here); + * - as long as there are gaps in the sequence space (pending loss). + */ + if (!dccp_data_packet(skb) || h->packet_size == 0 || + tfrc_rx_hist_loss_pending(h)) + return; + + delta_v = SUB16(dccp_hdr(skb)->dccph_ccval, + tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval); if (delta_v < 1 || delta_v > 4) { /* unsuitable CCVal delta */ if (h->rtt_sample_prev == 2) { /* previous candidate stored */ @@ -479,6 +507,6 @@ u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb) h->rtt_sample_prev = 0; /* use current entry as next reference */ keep_ref_for_next_time: - return sample; + h->rtt_estimate = tfrc_ewma(h->rtt_estimate, sample, 9); } EXPORT_SYMBOL_GPL(tfrc_rx_hist_sample_rtt); diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index b7c87a1..ba5832b 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -91,6 +91,7 @@ struct tfrc_rx_hist_entry { * @loss_count: Number of entries in circular history * @loss_start: Movable index (for loss detection) * @rtt_sample_prev: Used during RTT sampling, points to candidate entry + * @rtt_estimate: Receiver RTT estimate * @packet_size: Packet size in bytes (as per RFC 3448, 3.1) * @bytes_recvd: Number of bytes received since last sending feedback */ @@ -98,7 +99,10 @@ struct tfrc_rx_hist { struct tfrc_rx_hist_entry *ring[TFRC_NDUPACK + 1]; u8 loss_count:2, loss_start:2; + /* Receiver RTT sampling */ #define rtt_sample_prev loss_start + u32 rtt_estimate; + /* Receiver sampling of application payload lengths */ u32 packet_size, bytes_recvd; }; @@ -154,6 +158,15 @@ static inline u32 tfrc_rx_hist_packet_size(const struct tfrc_rx_hist *h) return TCP_MIN_RCVMSS; } return h->packet_size; + +} +static inline u32 tfrc_rx_hist_rtt(const struct tfrc_rx_hist *h) +{ + if (h->rtt_estimate == 0) { + DCCP_WARN("No RTT estimate available, using fallback RTT\n"); + return DCCP_FALLBACK_RTT; + } + return h->rtt_estimate; } extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, @@ -167,8 +180,8 @@ extern int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, const u64 ndp, u32 (*first_li)(struct sock *sk), struct sock *sk); -extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, - const struct sk_buff *skb); +extern void tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, + const struct sk_buff *skb); extern int tfrc_rx_hist_init(struct tfrc_rx_hist *h, struct sock *sk); extern void tfrc_rx_hist_purge(struct tfrc_rx_hist *h); -- cgit v1.1 From 22338f09bd60434a3f1d6608f0fa55972067985f Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp tfrc: Increase number of RTT samples This improves the receiver RTT sampling algorithm so that it tries harder to get as many RTT samples as possible. The algorithm is based the concepts presented in RFC 4340, 8.1, using timestamps and the CCVal window counter. There exist 4 cases for the CCVal difference: * == 0: less than RTT/4 passed since last packet -- unusable; * > 4: (much) more than 1 RTT has passed since last packet -- also unusable; * == 4: perfect sample (exactly one RTT has passed since last packet); * 1..3: sub-optimal sample (between RTT/4 and 3*RTT/4 has passed). In the last case the algorithm tried to optimise by storing away the candidate and then re-trying next time. The problem is that * a large number of samples is needed to smooth out the inaccuracies of the algorithm; * the sender may not be sending enough packets to warrant a "next time"; * hence it is better to use suboptimal samples whenever possible. The algorithm now stores away the current sample only if the difference is 0. Applicability and background ---------------------------- A realistic example is MP3 streaming where packets are sent at a rate of less than one packet per RTT, which means that suitable samples are absent for a very long time. The effectiveness of using suboptimal samples (with a delta between 1 and 4) was confirmed by instrumenting the algorithm with counters. The results of two 20 second test runs were: * With the old algorithm and a total of 38442 function calls, only 394 of these calls resulted in usable RTT samples (about 1%), and 378 out of these were "perfect" samples and 28013 (unused) samples had a delta of 1..3. * With the new algorithm and a total of 37057 function calls, 1702 usable RTT samples were retrieved (about 4.6%), 5 out of these were "perfect" samples. Signed-off-by: Gerrit Renker --- net/dccp/ccids/lib/packet_history.c | 83 +++++++++++-------------------------- 1 file changed, 24 insertions(+), 59 deletions(-) (limited to 'net/dccp/ccids/lib') diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index e2e250a..5c4ded1 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -428,31 +428,16 @@ int tfrc_rx_hist_init(struct tfrc_rx_hist *h, struct sock *sk) EXPORT_SYMBOL_GPL(tfrc_rx_hist_init); /** - * tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against - */ -static inline struct tfrc_rx_hist_entry * - tfrc_rx_hist_rtt_last_s(const struct tfrc_rx_hist *h) -{ - return h->ring[0]; -} - -/** - * tfrc_rx_hist_rtt_prev_s: previously suitable (wrt rtt_last_s) RTT-sampling entry - */ -static inline struct tfrc_rx_hist_entry * - tfrc_rx_hist_rtt_prev_s(const struct tfrc_rx_hist *h) -{ - return h->ring[h->rtt_sample_prev]; -} - -/** * tfrc_rx_hist_sample_rtt - Sample RTT from timestamp / CCVal - * Based on ideas presented in RFC 4342, 8.1. Returns 0 if it was not able - * to compute a sample with given data - calling function should check this. + * Based on ideas presented in RFC 4342, 8.1. This function expects that no loss + * is pending and uses the following history entries (via rtt_sample_prev): + * - h->ring[0] contains the most recent history entry prior to @skb; + * - h->ring[1] is an unused `dummy' entry when the current difference is 0; */ void tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb) { - u32 sample = 0, delta_v; + struct tfrc_rx_hist_entry *last = h->ring[0]; + u32 sample, delta_v; /* * When not to sample: @@ -466,47 +451,27 @@ void tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb) tfrc_rx_hist_loss_pending(h)) return; - delta_v = SUB16(dccp_hdr(skb)->dccph_ccval, - tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval); - - if (delta_v < 1 || delta_v > 4) { /* unsuitable CCVal delta */ - if (h->rtt_sample_prev == 2) { /* previous candidate stored */ - sample = SUB16(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval, - tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval); - if (sample) - sample = 4 / sample * - ktime_us_delta(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_tstamp, - tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp); - else /* - * FIXME: This condition is in principle not - * possible but occurs when CCID is used for - * two-way data traffic. I have tried to trace - * it, but the cause does not seem to be here. - */ - DCCP_BUG("please report to dccp@vger.kernel.org" - " => prev = %u, last = %u", - tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval, - tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval); - } else if (delta_v < 1) { - h->rtt_sample_prev = 1; - goto keep_ref_for_next_time; - } - - } else if (delta_v == 4) /* optimal match */ - sample = ktime_to_us(net_timedelta(tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp)); - else { /* suboptimal match */ - h->rtt_sample_prev = 2; - goto keep_ref_for_next_time; - } + h->rtt_sample_prev = 0; /* reset previous candidate */ - if (unlikely(sample > DCCP_SANE_RTT_MAX)) { - DCCP_WARN("RTT sample %u too large, using max\n", sample); - sample = DCCP_SANE_RTT_MAX; + delta_v = SUB16(dccp_hdr(skb)->dccph_ccval, last->tfrchrx_ccval); + if (delta_v == 0) { /* less than RTT/4 difference */ + h->rtt_sample_prev = 1; + return; } + sample = dccp_sane_rtt(ktime_to_us(net_timedelta(last->tfrchrx_tstamp))); - h->rtt_sample_prev = 0; /* use current entry as next reference */ -keep_ref_for_next_time: + if (delta_v <= 4) /* between RTT/4 and RTT */ + sample *= 4 / delta_v; + else if (!(sample < h->rtt_estimate && sample > h->rtt_estimate/2)) + /* + * Optimisation: CCVal difference is greater than 1 RTT, yet the + * sample is less than the local RTT estimate; which means that + * the RTT estimate is too high. + * To avoid noise, it is not done if the sample is below RTT/2. + */ + return; - h->rtt_estimate = tfrc_ewma(h->rtt_estimate, sample, 9); + /* Use a lower weight than usual to increase responsiveness */ + h->rtt_estimate = tfrc_ewma(h->rtt_estimate, sample, 5); } EXPORT_SYMBOL_GPL(tfrc_rx_hist_sample_rtt); -- cgit v1.1 From 68c89ee53571a441799c03d5e240c6441bced620 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp ccid-3: Update the computation of X_recv This updates the computation of X_recv with regard to Errata 610/611 for RFC 4342 and draft rfc3448bis-06, ensuring that at least an interval of 1 RTT is used to compute X_recv. The change is wrapped into a new function ccid3_hc_rx_x_recv(). Further changes: ---------------- * feedback is not sent when no data packets arrived (bytes_recv == 0), as per rfc3448bis-06, 6.2; * take the timestamp for the feedback /after/ dccp_send_ack() returns, to avoid taking the transmission time into account (in case layer-2 is busy); * clearer handling of failure in ccid3_first_li(). Signed-off-by: Gerrit Renker --- net/dccp/ccids/lib/packet_history.c | 30 ++++++++++++++++++++++++++++++ net/dccp/ccids/lib/packet_history.h | 13 ++++++++++++- 2 files changed, 42 insertions(+), 1 deletion(-) (limited to 'net/dccp/ccids/lib') diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index 5c4ded1..547ad09 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -385,6 +385,36 @@ int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, } EXPORT_SYMBOL_GPL(tfrc_rx_handle_loss); +/* Compute the sending rate X_recv measured between feedback intervals */ +u32 tfrc_rx_hist_x_recv(struct tfrc_rx_hist *h, const u32 last_x_recv) +{ + u64 bytes = h->bytes_recvd, last_rtt = h->rtt_estimate; + s64 delta = ktime_to_us(net_timedelta(h->bytes_start)); + + WARN_ON(delta <= 0); + /* + * Ensure that the sampling interval for X_recv is at least one RTT, + * by extending the sampling interval backwards in time, over the last + * R_(m-1) seconds, as per rfc3448bis-06, 6.2. + * To reduce noise (e.g. when the RTT changes often), this is only + * done when delta is smaller than RTT/2. + */ + if (last_x_recv > 0 && delta < last_rtt/2) { + tfrc_pr_debug("delta < RTT ==> %ld us < %u us\n", + (long)delta, (unsigned)last_rtt); + + delta = (bytes ? delta : 0) + last_rtt; + bytes += div_u64((u64)last_x_recv * last_rtt, USEC_PER_SEC); + } + + if (unlikely(bytes == 0)) { + DCCP_WARN("X_recv == 0, using old value of %u\n", last_x_recv); + return last_x_recv; + } + return scaled_div32(bytes, delta); +} +EXPORT_SYMBOL_GPL(tfrc_rx_hist_x_recv); + void tfrc_rx_hist_purge(struct tfrc_rx_hist *h) { int i; diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index ba5832b..6552be6 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -93,7 +93,8 @@ struct tfrc_rx_hist_entry { * @rtt_sample_prev: Used during RTT sampling, points to candidate entry * @rtt_estimate: Receiver RTT estimate * @packet_size: Packet size in bytes (as per RFC 3448, 3.1) - * @bytes_recvd: Number of bytes received since last sending feedback + * @bytes_recvd: Number of bytes received since @bytes_start + * @bytes_start: Start time for counting @bytes_recvd */ struct tfrc_rx_hist { struct tfrc_rx_hist_entry *ring[TFRC_NDUPACK + 1]; @@ -105,6 +106,7 @@ struct tfrc_rx_hist { /* Receiver sampling of application payload lengths */ u32 packet_size, bytes_recvd; + ktime_t bytes_start; }; /** @@ -169,6 +171,15 @@ static inline u32 tfrc_rx_hist_rtt(const struct tfrc_rx_hist *h) return h->rtt_estimate; } +static inline void tfrc_rx_hist_restart_byte_counter(struct tfrc_rx_hist *h) +{ + h->bytes_recvd = 0; + h->bytes_start = ktime_get_real(); +} + +extern u32 tfrc_rx_hist_x_recv(struct tfrc_rx_hist *h, const u32 last_x_recv); + + extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, const struct sk_buff *skb, const u64 ndp); -- cgit v1.1 From 88e97a93342c0b9e835d510921e7b2df8547d1bd Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp ccid-3: Update the RX history records in one place This patch is a requirement for enabling ECN support later on. With that change in mind, the following preparations are done: * renamed handle_loss() into congestion_event() since it returns true when a congestion event happens (it will eventually also take care of ECN packets); * lets tfrc_rx_congestion_event() always update the RX history records, since this routine needs to be called for each non-duplicate packet anyway; * made all involved boolean-type functions to have return type `bool'; Updating the RX history records is now only necessary for the packets received up to sending the first feedback. The receiver code becomes again simpler. Signed-off-by: Gerrit Renker --- net/dccp/ccids/lib/loss_interval.c | 10 +++++----- net/dccp/ccids/lib/loss_interval.h | 2 +- net/dccp/ccids/lib/packet_history.c | 37 ++++++++++++++++++------------------- net/dccp/ccids/lib/packet_history.h | 10 +++++----- 4 files changed, 29 insertions(+), 30 deletions(-) (limited to 'net/dccp/ccids/lib') diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index fe5c2a3..b1ae8f8 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -140,18 +140,18 @@ static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur, * @sk: Used by @calc_first_li in caller-specific way (subtyping) * Updates I_mean and returns 1 if a new interval has in fact been added to @lh. */ -int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh, - u32 (*calc_first_li)(struct sock *), struct sock *sk) +bool tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh, + u32 (*calc_first_li)(struct sock *), struct sock *sk) { struct tfrc_loss_interval *cur = tfrc_lh_peek(lh), *new; if (cur != NULL && !tfrc_lh_is_new_loss(cur, tfrc_rx_hist_loss_prev(rh))) - return 0; + return false; new = tfrc_lh_demand_next(lh); if (unlikely(new == NULL)) { DCCP_CRIT("Cannot allocate/add loss record."); - return 0; + return false; } new->li_seqno = tfrc_rx_hist_loss_prev(rh)->tfrchrx_seqno; @@ -169,7 +169,7 @@ int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh, tfrc_lh_calc_i_mean(lh); } - return 1; + return true; } EXPORT_SYMBOL_GPL(tfrc_lh_interval_add); diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h index f101ae2..d08a226 100644 --- a/net/dccp/ccids/lib/loss_interval.h +++ b/net/dccp/ccids/lib/loss_interval.h @@ -67,7 +67,7 @@ static inline u8 tfrc_lh_length(struct tfrc_loss_hist *lh) struct tfrc_rx_hist; -extern int tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *, +extern bool tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *, u32 (*first_li)(struct sock *), struct sock *); extern void tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *); extern void tfrc_lh_cleanup(struct tfrc_loss_hist *lh); diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index 547ad09..cce9f03 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -192,10 +192,8 @@ static void __do_track_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u64 n1) u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, s1 = DCCP_SKB_CB(skb)->dccpd_seq; - if (!dccp_loss_free(s0, s1, n1)) { /* gap between S0 and S1 */ + if (!dccp_loss_free(s0, s1, n1)) /* gap between S0 and S1 */ h->loss_count = 1; - tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n1); - } } static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2) @@ -328,13 +326,13 @@ static void __three_after_loss(struct tfrc_rx_hist *h) } /** - * tfrc_rx_handle_loss - Loss detection and further processing - * @h: The non-empty RX history object - * @lh: Loss Intervals database to update - * @skb: Currently received packet - * @ndp: The NDP count belonging to @skb - * @calc_first_li: Caller-dependent computation of first loss interval in @lh - * @sk: Used by @calc_first_li (see tfrc_lh_interval_add) + * tfrc_rx_congestion_event - Loss detection and further processing + * @h: The non-empty RX history object + * @lh: Loss Intervals database to update + * @skb: Currently received packet + * @ndp: The NDP count belonging to @skb + * @first_li: Caller-dependent computation of first loss interval in @lh + * @sk: Used by @calc_first_li (see tfrc_lh_interval_add) * Chooses action according to pending loss, updates LI database when a new * loss was detected, and does required post-processing. Returns 1 when caller * should send feedback, 0 otherwise. @@ -342,12 +340,12 @@ static void __three_after_loss(struct tfrc_rx_hist *h) * records accordingly, the caller should not perform any more RX history * operations when loss_count is greater than 0 after calling this function. */ -int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, - struct tfrc_loss_hist *lh, - struct sk_buff *skb, const u64 ndp, - u32 (*calc_first_li)(struct sock *), struct sock *sk) +bool tfrc_rx_congestion_event(struct tfrc_rx_hist *h, + struct tfrc_loss_hist *lh, + struct sk_buff *skb, const u64 ndp, + u32 (*first_li)(struct sock *), struct sock *sk) { - int is_new_loss = 0; + bool new_event = false; if (tfrc_rx_hist_duplicate(h, skb)) return 0; @@ -355,6 +353,7 @@ int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, if (h->loss_count == 0) { __do_track_loss(h, skb, ndp); tfrc_rx_hist_sample_rtt(h, skb); + tfrc_rx_hist_add_packet(h, skb, ndp); } else if (h->loss_count == 1) { __one_after_loss(h, skb, ndp); } else if (h->loss_count != 2) { @@ -363,7 +362,7 @@ int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, /* * Update Loss Interval database and recycle RX records */ - is_new_loss = tfrc_lh_interval_add(lh, h, calc_first_li, sk); + new_event = tfrc_lh_interval_add(lh, h, first_li, sk); __three_after_loss(h); } @@ -378,12 +377,12 @@ int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, } /* RFC 3448, 6.1: update I_0, whose growth implies p <= p_prev */ - if (!is_new_loss) + if (!new_event) tfrc_lh_update_i_mean(lh, skb); - return is_new_loss; + return new_event; } -EXPORT_SYMBOL_GPL(tfrc_rx_handle_loss); +EXPORT_SYMBOL_GPL(tfrc_rx_congestion_event); /* Compute the sending rate X_recv measured between feedback intervals */ u32 tfrc_rx_hist_x_recv(struct tfrc_rx_hist *h, const u32 last_x_recv) diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index 6552be6..555e65c 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -186,11 +186,11 @@ extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb); struct tfrc_loss_hist; -extern int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, - struct tfrc_loss_hist *lh, - struct sk_buff *skb, const u64 ndp, - u32 (*first_li)(struct sock *sk), - struct sock *sk); +extern bool tfrc_rx_congestion_event(struct tfrc_rx_hist *h, + struct tfrc_loss_hist *lh, + struct sk_buff *skb, const u64 ndp, + u32 (*first_li)(struct sock *sk), + struct sock *sk); extern void tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb); extern int tfrc_rx_hist_init(struct tfrc_rx_hist *h, struct sock *sk); -- cgit v1.1 From a3cbdde8e9c38b66b4f13ac5d6ff1939ded0ff20 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp ccid-3: Preventing Oscillations This implements [RFC 3448, 4.5], which performs congestion avoidance behaviour by reducing the transmit rate as the queueing delay (measured in terms of long-term RTT) increases. Oscillation can be turned on/off via a module option (do_osc_prev) and via sysfs (using mode 0644), the default is off. Overflow analysis: ------------------ * oscillation prevention is done after update_x(), so that t_ipi <= 64000; * hence the multiplication "t_ipi * sqrt(R_sample)" needs 64 bits; * done using u64 for sqrt_sample and explicit typecast of t_ipi; * the divisor, R_sqmean, is non-zero because oscillation prevention is first called when receiving the second feedback packet, and tfrc_scaled_rtt() > 0. A detailed discussion of the algorithm (with plots) is on http://www.erg.abdn.ac.uk/users/gerrit/dccp/notes/ccid3/sender_notes/oscillation_prevention/ The algorithm has negative side effects: * when allowing to decrease t_ipi (leads to a large RTT) and * when using it during slow-start; both uses are therefore disabled. Signed-off-by: Gerrit Renker --- net/dccp/ccids/lib/tfrc.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'net/dccp/ccids/lib') diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h index bb47146..ede12f5 100644 --- a/net/dccp/ccids/lib/tfrc.h +++ b/net/dccp/ccids/lib/tfrc.h @@ -48,6 +48,21 @@ static inline u32 scaled_div32(u64 a, u64 b) } /** + * tfrc_scaled_sqrt - Compute scaled integer sqrt(x) for 0 < x < 2^22-1 + * Uses scaling to improve accuracy of the integer approximation of sqrt(). The + * scaling factor of 2^10 limits the maximum @sample to 4e6; this is okay for + * clamped RTT samples (dccp_sample_rtt). + * Should best be used for expressions of type sqrt(x)/sqrt(y), since then the + * scaling factor is neutralised. For this purpose, it avoids returning zero. + */ +static inline u16 tfrc_scaled_sqrt(const u32 sample) +{ + const unsigned long non_zero_sample = sample ? : 1; + + return int_sqrt(non_zero_sample << 10); +} + +/** * tfrc_ewma - Exponentially weighted moving average * @weight: Weight to be used as damping factor, in units of 1/10 */ -- cgit v1.1