diff options
author | David S. Miller <davem@davemloft.net> | 2008-09-08 17:28:59 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2008-09-08 17:28:59 -0700 |
commit | 0a68a20cc3eafa73bb54097c28b921147d7d3685 (patch) | |
tree | 8e5f315226b618cb8e050a0c7653c8ec134501e3 /net/dccp/ccids/lib | |
parent | 17dce5dfe38ae2fb359b61e855f5d8a3a8b7892b (diff) | |
parent | a3cbdde8e9c38b66b4f13ac5d6ff1939ded0ff20 (diff) | |
download | kernel_samsung_tuna-0a68a20cc3eafa73bb54097c28b921147d7d3685.zip kernel_samsung_tuna-0a68a20cc3eafa73bb54097c28b921147d7d3685.tar.gz kernel_samsung_tuna-0a68a20cc3eafa73bb54097c28b921147d7d3685.tar.bz2 |
Merge branch 'dccp' of git://eden-feed.erg.abdn.ac.uk/dccp_exp
Conflicts:
net/dccp/input.c
net/dccp/options.c
Diffstat (limited to 'net/dccp/ccids/lib')
-rw-r--r-- | net/dccp/ccids/lib/loss_interval.c | 30 | ||||
-rw-r--r-- | net/dccp/ccids/lib/loss_interval.h | 4 | ||||
-rw-r--r-- | net/dccp/ccids/lib/packet_history.c | 282 | ||||
-rw-r--r-- | net/dccp/ccids/lib/packet_history.h | 78 | ||||
-rw-r--r-- | net/dccp/ccids/lib/tfrc.h | 16 | ||||
-rw-r--r-- | net/dccp/ccids/lib/tfrc_equation.c | 29 |
6 files changed, 271 insertions, 168 deletions
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index 5b3ce06..b1ae8f8 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -86,21 +86,26 @@ static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh) /** * tfrc_lh_update_i_mean - Update the `open' loss interval I_0 - * For recomputing p: returns `true' if p > p_prev <=> 1/p < 1/p_prev + * This updates I_mean as the sequence numbers increase. As a consequence, the + * open loss interval I_0 increases, hence p = W_tot/max(I_tot0, I_tot1) + * decreases, and thus there is no need to send renewed feedback. */ -u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb) +void tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb) { struct tfrc_loss_interval *cur = tfrc_lh_peek(lh); - u32 old_i_mean = lh->i_mean; s64 len; if (cur == NULL) /* not initialised */ - return 0; + return; + + /* FIXME: should probably also count non-data packets (RFC 4342, 6.1) */ + if (!dccp_data_packet(skb)) + return; len = dccp_delta_seqno(cur->li_seqno, DCCP_SKB_CB(skb)->dccpd_seq) + 1; if (len - (s64)cur->li_length <= 0) /* duplicate or reordered */ - return 0; + return; if (SUB16(dccp_hdr(skb)->dccph_ccval, cur->li_ccval) > 4) /* @@ -114,14 +119,11 @@ u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb) cur->li_is_closed = 1; if (tfrc_lh_length(lh) == 1) /* due to RFC 3448, 6.3.1 */ - return 0; + return; cur->li_length = len; tfrc_lh_calc_i_mean(lh); - - return (lh->i_mean < old_i_mean); } -EXPORT_SYMBOL_GPL(tfrc_lh_update_i_mean); /* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */ static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur, @@ -138,18 +140,18 @@ static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur, * @sk: Used by @calc_first_li in caller-specific way (subtyping) * Updates I_mean and returns 1 if a new interval has in fact been added to @lh. */ -int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh, - u32 (*calc_first_li)(struct sock *), struct sock *sk) +bool tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh, + u32 (*calc_first_li)(struct sock *), struct sock *sk) { struct tfrc_loss_interval *cur = tfrc_lh_peek(lh), *new; if (cur != NULL && !tfrc_lh_is_new_loss(cur, tfrc_rx_hist_loss_prev(rh))) - return 0; + return false; new = tfrc_lh_demand_next(lh); if (unlikely(new == NULL)) { DCCP_CRIT("Cannot allocate/add loss record."); - return 0; + return false; } new->li_seqno = tfrc_rx_hist_loss_prev(rh)->tfrchrx_seqno; @@ -167,7 +169,7 @@ int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh, tfrc_lh_calc_i_mean(lh); } - return 1; + return true; } EXPORT_SYMBOL_GPL(tfrc_lh_interval_add); diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h index 246018a..d08a226 100644 --- a/net/dccp/ccids/lib/loss_interval.h +++ b/net/dccp/ccids/lib/loss_interval.h @@ -67,9 +67,9 @@ static inline u8 tfrc_lh_length(struct tfrc_loss_hist *lh) struct tfrc_rx_hist; -extern int tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *, +extern bool tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *, u32 (*first_li)(struct sock *), struct sock *); -extern u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *); +extern void tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *); extern void tfrc_lh_cleanup(struct tfrc_loss_hist *lh); #endif /* _DCCP_LI_HIST_ */ diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index 6cc108a..cce9f03 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -40,18 +40,6 @@ #include "packet_history.h" #include "../../dccp.h" -/** - * tfrc_tx_hist_entry - Simple singly-linked TX history list - * @next: next oldest entry (LIFO order) - * @seqno: sequence number of this entry - * @stamp: send time of packet with sequence number @seqno - */ -struct tfrc_tx_hist_entry { - struct tfrc_tx_hist_entry *next; - u64 seqno; - ktime_t stamp; -}; - /* * Transmitter History Routines */ @@ -73,15 +61,6 @@ void tfrc_tx_packet_history_exit(void) } } -static struct tfrc_tx_hist_entry * - tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno) -{ - while (head != NULL && head->seqno != seqno) - head = head->next; - - return head; -} - int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno) { struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any()); @@ -111,25 +90,6 @@ void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp) } EXPORT_SYMBOL_GPL(tfrc_tx_hist_purge); -u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno, - const ktime_t now) -{ - u32 rtt = 0; - struct tfrc_tx_hist_entry *packet = tfrc_tx_hist_find_entry(head, seqno); - - if (packet != NULL) { - rtt = ktime_us_delta(now, packet->stamp); - /* - * Garbage-collect older (irrelevant) entries: - */ - tfrc_tx_hist_purge(&packet->next); - } - - return rtt; -} -EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt); - - /* * Receiver History Routines */ @@ -191,14 +151,31 @@ int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb) } EXPORT_SYMBOL_GPL(tfrc_rx_hist_duplicate); + +static void __tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b) +{ + struct tfrc_rx_hist_entry *tmp = h->ring[a]; + + h->ring[a] = h->ring[b]; + h->ring[b] = tmp; +} + static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b) { - const u8 idx_a = tfrc_rx_hist_index(h, a), - idx_b = tfrc_rx_hist_index(h, b); - struct tfrc_rx_hist_entry *tmp = h->ring[idx_a]; + __tfrc_rx_hist_swap(h, tfrc_rx_hist_index(h, a), + tfrc_rx_hist_index(h, b)); +} - h->ring[idx_a] = h->ring[idx_b]; - h->ring[idx_b] = tmp; +/** + * tfrc_rx_hist_resume_rtt_sampling - Prepare RX history for RTT sampling + * This is called after loss detection has finished, when the history entry + * with the index of `loss_count' holds the highest-received sequence number. + * RTT sampling requires this information at ring[0] (tfrc_rx_hist_sample_rtt). + */ +static inline void tfrc_rx_hist_resume_rtt_sampling(struct tfrc_rx_hist *h) +{ + __tfrc_rx_hist_swap(h, 0, tfrc_rx_hist_index(h, h->loss_count)); + h->loss_count = h->loss_start = 0; } /* @@ -215,10 +192,8 @@ static void __do_track_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u64 n1) u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, s1 = DCCP_SKB_CB(skb)->dccpd_seq; - if (!dccp_loss_free(s0, s1, n1)) { /* gap between S0 and S1 */ + if (!dccp_loss_free(s0, s1, n1)) /* gap between S0 and S1 */ h->loss_count = 1; - tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n1); - } } static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2) @@ -240,8 +215,7 @@ static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2 if (dccp_loss_free(s2, s1, n1)) { /* hole is filled: S0, S2, and S1 are consecutive */ - h->loss_count = 0; - h->loss_start = tfrc_rx_hist_index(h, 1); + tfrc_rx_hist_resume_rtt_sampling(h); } else /* gap between S2 and S1: just update loss_prev */ tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n2); @@ -294,8 +268,7 @@ static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3) if (dccp_loss_free(s1, s2, n2)) { /* entire hole filled by S0, S3, S1, S2 */ - h->loss_start = tfrc_rx_hist_index(h, 2); - h->loss_count = 0; + tfrc_rx_hist_resume_rtt_sampling(h); } else { /* gap remains between S1 and S2 */ h->loss_start = tfrc_rx_hist_index(h, 1); @@ -339,8 +312,7 @@ static void __three_after_loss(struct tfrc_rx_hist *h) if (dccp_loss_free(s2, s3, n3)) { /* no gap between S2 and S3: entire hole is filled */ - h->loss_start = tfrc_rx_hist_index(h, 3); - h->loss_count = 0; + tfrc_rx_hist_resume_rtt_sampling(h); } else { /* gap between S2 and S3 */ h->loss_start = tfrc_rx_hist_index(h, 2); @@ -354,13 +326,13 @@ static void __three_after_loss(struct tfrc_rx_hist *h) } /** - * tfrc_rx_handle_loss - Loss detection and further processing - * @h: The non-empty RX history object - * @lh: Loss Intervals database to update - * @skb: Currently received packet - * @ndp: The NDP count belonging to @skb - * @calc_first_li: Caller-dependent computation of first loss interval in @lh - * @sk: Used by @calc_first_li (see tfrc_lh_interval_add) + * tfrc_rx_congestion_event - Loss detection and further processing + * @h: The non-empty RX history object + * @lh: Loss Intervals database to update + * @skb: Currently received packet + * @ndp: The NDP count belonging to @skb + * @first_li: Caller-dependent computation of first loss interval in @lh + * @sk: Used by @calc_first_li (see tfrc_lh_interval_add) * Chooses action according to pending loss, updates LI database when a new * loss was detected, and does required post-processing. Returns 1 when caller * should send feedback, 0 otherwise. @@ -368,15 +340,20 @@ static void __three_after_loss(struct tfrc_rx_hist *h) * records accordingly, the caller should not perform any more RX history * operations when loss_count is greater than 0 after calling this function. */ -int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, - struct tfrc_loss_hist *lh, - struct sk_buff *skb, const u64 ndp, - u32 (*calc_first_li)(struct sock *), struct sock *sk) +bool tfrc_rx_congestion_event(struct tfrc_rx_hist *h, + struct tfrc_loss_hist *lh, + struct sk_buff *skb, const u64 ndp, + u32 (*first_li)(struct sock *), struct sock *sk) { - int is_new_loss = 0; + bool new_event = false; + + if (tfrc_rx_hist_duplicate(h, skb)) + return 0; if (h->loss_count == 0) { __do_track_loss(h, skb, ndp); + tfrc_rx_hist_sample_rtt(h, skb); + tfrc_rx_hist_add_packet(h, skb, ndp); } else if (h->loss_count == 1) { __one_after_loss(h, skb, ndp); } else if (h->loss_count != 2) { @@ -385,34 +362,57 @@ int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, /* * Update Loss Interval database and recycle RX records */ - is_new_loss = tfrc_lh_interval_add(lh, h, calc_first_li, sk); + new_event = tfrc_lh_interval_add(lh, h, first_li, sk); __three_after_loss(h); } - return is_new_loss; + + /* + * Update moving-average of `s' and the sum of received payload bytes. + */ + if (dccp_data_packet(skb)) { + const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4; + + h->packet_size = tfrc_ewma(h->packet_size, payload, 9); + h->bytes_recvd += payload; + } + + /* RFC 3448, 6.1: update I_0, whose growth implies p <= p_prev */ + if (!new_event) + tfrc_lh_update_i_mean(lh, skb); + + return new_event; } -EXPORT_SYMBOL_GPL(tfrc_rx_handle_loss); +EXPORT_SYMBOL_GPL(tfrc_rx_congestion_event); -int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h) +/* Compute the sending rate X_recv measured between feedback intervals */ +u32 tfrc_rx_hist_x_recv(struct tfrc_rx_hist *h, const u32 last_x_recv) { - int i; + u64 bytes = h->bytes_recvd, last_rtt = h->rtt_estimate; + s64 delta = ktime_to_us(net_timedelta(h->bytes_start)); - for (i = 0; i <= TFRC_NDUPACK; i++) { - h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC); - if (h->ring[i] == NULL) - goto out_free; - } + WARN_ON(delta <= 0); + /* + * Ensure that the sampling interval for X_recv is at least one RTT, + * by extending the sampling interval backwards in time, over the last + * R_(m-1) seconds, as per rfc3448bis-06, 6.2. + * To reduce noise (e.g. when the RTT changes often), this is only + * done when delta is smaller than RTT/2. + */ + if (last_x_recv > 0 && delta < last_rtt/2) { + tfrc_pr_debug("delta < RTT ==> %ld us < %u us\n", + (long)delta, (unsigned)last_rtt); - h->loss_count = h->loss_start = 0; - return 0; + delta = (bytes ? delta : 0) + last_rtt; + bytes += div_u64((u64)last_x_recv * last_rtt, USEC_PER_SEC); + } -out_free: - while (i-- != 0) { - kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]); - h->ring[i] = NULL; + if (unlikely(bytes == 0)) { + DCCP_WARN("X_recv == 0, using old value of %u\n", last_x_recv); + return last_x_recv; } - return -ENOBUFS; + return scaled_div32(bytes, delta); } -EXPORT_SYMBOL_GPL(tfrc_rx_hist_alloc); +EXPORT_SYMBOL_GPL(tfrc_rx_hist_x_recv); void tfrc_rx_hist_purge(struct tfrc_rx_hist *h) { @@ -426,73 +426,81 @@ void tfrc_rx_hist_purge(struct tfrc_rx_hist *h) } EXPORT_SYMBOL_GPL(tfrc_rx_hist_purge); -/** - * tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against - */ -static inline struct tfrc_rx_hist_entry * - tfrc_rx_hist_rtt_last_s(const struct tfrc_rx_hist *h) +static int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h) { - return h->ring[0]; + int i; + + memset(h, 0, sizeof(*h)); + + for (i = 0; i <= TFRC_NDUPACK; i++) { + h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC); + if (h->ring[i] == NULL) { + tfrc_rx_hist_purge(h); + return -ENOBUFS; + } + } + return 0; } -/** - * tfrc_rx_hist_rtt_prev_s: previously suitable (wrt rtt_last_s) RTT-sampling entry - */ -static inline struct tfrc_rx_hist_entry * - tfrc_rx_hist_rtt_prev_s(const struct tfrc_rx_hist *h) +int tfrc_rx_hist_init(struct tfrc_rx_hist *h, struct sock *sk) { - return h->ring[h->rtt_sample_prev]; + if (tfrc_rx_hist_alloc(h)) + return -ENOBUFS; + /* + * Initialise first entry with GSR to start loss detection as early as + * possible. Code using this must not use any other fields. The entry + * will be overwritten once the CCID updates its received packets. + */ + tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno = dccp_sk(sk)->dccps_gsr; + return 0; } +EXPORT_SYMBOL_GPL(tfrc_rx_hist_init); /** * tfrc_rx_hist_sample_rtt - Sample RTT from timestamp / CCVal - * Based on ideas presented in RFC 4342, 8.1. Returns 0 if it was not able - * to compute a sample with given data - calling function should check this. + * Based on ideas presented in RFC 4342, 8.1. This function expects that no loss + * is pending and uses the following history entries (via rtt_sample_prev): + * - h->ring[0] contains the most recent history entry prior to @skb; + * - h->ring[1] is an unused `dummy' entry when the current difference is 0; */ -u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb) +void tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb) { - u32 sample = 0, - delta_v = SUB16(dccp_hdr(skb)->dccph_ccval, - tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval); - - if (delta_v < 1 || delta_v > 4) { /* unsuitable CCVal delta */ - if (h->rtt_sample_prev == 2) { /* previous candidate stored */ - sample = SUB16(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval, - tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval); - if (sample) - sample = 4 / sample * - ktime_us_delta(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_tstamp, - tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp); - else /* - * FIXME: This condition is in principle not - * possible but occurs when CCID is used for - * two-way data traffic. I have tried to trace - * it, but the cause does not seem to be here. - */ - DCCP_BUG("please report to dccp@vger.kernel.org" - " => prev = %u, last = %u", - tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval, - tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval); - } else if (delta_v < 1) { - h->rtt_sample_prev = 1; - goto keep_ref_for_next_time; - } + struct tfrc_rx_hist_entry *last = h->ring[0]; + u32 sample, delta_v; - } else if (delta_v == 4) /* optimal match */ - sample = ktime_to_us(net_timedelta(tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp)); - else { /* suboptimal match */ - h->rtt_sample_prev = 2; - goto keep_ref_for_next_time; - } + /* + * When not to sample: + * - on non-data packets + * (RFC 4342, 8.1: CCVal only fully defined for data packets); + * - when no data packets have been received yet + * (FIXME: using sampled packet size as indicator here); + * - as long as there are gaps in the sequence space (pending loss). + */ + if (!dccp_data_packet(skb) || h->packet_size == 0 || + tfrc_rx_hist_loss_pending(h)) + return; - if (unlikely(sample > DCCP_SANE_RTT_MAX)) { - DCCP_WARN("RTT sample %u too large, using max\n", sample); - sample = DCCP_SANE_RTT_MAX; + h->rtt_sample_prev = 0; /* reset previous candidate */ + + delta_v = SUB16(dccp_hdr(skb)->dccph_ccval, last->tfrchrx_ccval); + if (delta_v == 0) { /* less than RTT/4 difference */ + h->rtt_sample_prev = 1; + return; } + sample = dccp_sane_rtt(ktime_to_us(net_timedelta(last->tfrchrx_tstamp))); - h->rtt_sample_prev = 0; /* use current entry as next reference */ -keep_ref_for_next_time: + if (delta_v <= 4) /* between RTT/4 and RTT */ + sample *= 4 / delta_v; + else if (!(sample < h->rtt_estimate && sample > h->rtt_estimate/2)) + /* + * Optimisation: CCVal difference is greater than 1 RTT, yet the + * sample is less than the local RTT estimate; which means that + * the RTT estimate is too high. + * To avoid noise, it is not done if the sample is below RTT/2. + */ + return; - return sample; + /* Use a lower weight than usual to increase responsiveness */ + h->rtt_estimate = tfrc_ewma(h->rtt_estimate, sample, 5); } EXPORT_SYMBOL_GPL(tfrc_rx_hist_sample_rtt); diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index 461cc91..555e65c 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -40,12 +40,28 @@ #include <linux/slab.h> #include "tfrc.h" -struct tfrc_tx_hist_entry; +/** + * tfrc_tx_hist_entry - Simple singly-linked TX history list + * @next: next oldest entry (LIFO order) + * @seqno: sequence number of this entry + * @stamp: send time of packet with sequence number @seqno + */ +struct tfrc_tx_hist_entry { + struct tfrc_tx_hist_entry *next; + u64 seqno; + ktime_t stamp; +}; + +static inline struct tfrc_tx_hist_entry * + tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno) +{ + while (head != NULL && head->seqno != seqno) + head = head->next; + return head; +} extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno); extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp); -extern u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, - const u64 seqno, const ktime_t now); /* Subtraction a-b modulo-16, respects circular wrap-around */ #define SUB16(a, b) (((a) + 16 - (b)) & 0xF) @@ -75,12 +91,22 @@ struct tfrc_rx_hist_entry { * @loss_count: Number of entries in circular history * @loss_start: Movable index (for loss detection) * @rtt_sample_prev: Used during RTT sampling, points to candidate entry + * @rtt_estimate: Receiver RTT estimate + * @packet_size: Packet size in bytes (as per RFC 3448, 3.1) + * @bytes_recvd: Number of bytes received since @bytes_start + * @bytes_start: Start time for counting @bytes_recvd */ struct tfrc_rx_hist { struct tfrc_rx_hist_entry *ring[TFRC_NDUPACK + 1]; u8 loss_count:2, loss_start:2; + /* Receiver RTT sampling */ #define rtt_sample_prev loss_start + u32 rtt_estimate; + /* Receiver sampling of application payload lengths */ + u32 packet_size, + bytes_recvd; + ktime_t bytes_start; }; /** @@ -124,20 +150,50 @@ static inline bool tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h) return h->loss_count > 0; } +/* + * Accessor functions to retrieve parameters sampled by the RX history + */ +static inline u32 tfrc_rx_hist_packet_size(const struct tfrc_rx_hist *h) +{ + if (h->packet_size == 0) { + DCCP_WARN("No sample for s, using fallback\n"); + return TCP_MIN_RCVMSS; + } + return h->packet_size; + +} +static inline u32 tfrc_rx_hist_rtt(const struct tfrc_rx_hist *h) +{ + if (h->rtt_estimate == 0) { + DCCP_WARN("No RTT estimate available, using fallback RTT\n"); + return DCCP_FALLBACK_RTT; + } + return h->rtt_estimate; +} + +static inline void tfrc_rx_hist_restart_byte_counter(struct tfrc_rx_hist *h) +{ + h->bytes_recvd = 0; + h->bytes_start = ktime_get_real(); +} + +extern u32 tfrc_rx_hist_x_recv(struct tfrc_rx_hist *h, const u32 last_x_recv); + + extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, const struct sk_buff *skb, const u64 ndp); extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb); struct tfrc_loss_hist; -extern int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, - struct tfrc_loss_hist *lh, - struct sk_buff *skb, const u64 ndp, - u32 (*first_li)(struct sock *sk), - struct sock *sk); -extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, - const struct sk_buff *skb); -extern int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h); +extern bool tfrc_rx_congestion_event(struct tfrc_rx_hist *h, + struct tfrc_loss_hist *lh, + struct sk_buff *skb, const u64 ndp, + u32 (*first_li)(struct sock *sk), + struct sock *sk); +extern void tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, + const struct sk_buff *skb); +extern int tfrc_rx_hist_init(struct tfrc_rx_hist *h, struct sock *sk); extern void tfrc_rx_hist_purge(struct tfrc_rx_hist *h); #endif /* _DCCP_PKT_HIST_ */ diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h index ed98575..ede12f5 100644 --- a/net/dccp/ccids/lib/tfrc.h +++ b/net/dccp/ccids/lib/tfrc.h @@ -48,6 +48,21 @@ static inline u32 scaled_div32(u64 a, u64 b) } /** + * tfrc_scaled_sqrt - Compute scaled integer sqrt(x) for 0 < x < 2^22-1 + * Uses scaling to improve accuracy of the integer approximation of sqrt(). The + * scaling factor of 2^10 limits the maximum @sample to 4e6; this is okay for + * clamped RTT samples (dccp_sample_rtt). + * Should best be used for expressions of type sqrt(x)/sqrt(y), since then the + * scaling factor is neutralised. For this purpose, it avoids returning zero. + */ +static inline u16 tfrc_scaled_sqrt(const u32 sample) +{ + const unsigned long non_zero_sample = sample ? : 1; + + return int_sqrt(non_zero_sample << 10); +} + +/** * tfrc_ewma - Exponentially weighted moving average * @weight: Weight to be used as damping factor, in units of 1/10 */ @@ -58,6 +73,7 @@ static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight) extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); +extern u32 tfrc_invert_loss_event_rate(u32 loss_event_rate); extern int tfrc_tx_packet_history_init(void); extern void tfrc_tx_packet_history_exit(void); diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c index 2f20a29..38239c4 100644 --- a/net/dccp/ccids/lib/tfrc_equation.c +++ b/net/dccp/ccids/lib/tfrc_equation.c @@ -632,8 +632,16 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p) if (p <= TFRC_CALC_X_SPLIT) { /* 0.0000 < p <= 0.05 */ if (p < TFRC_SMALLEST_P) { /* 0.0000 < p < 0.0001 */ - DCCP_WARN("Value of p (%d) below resolution. " - "Substituting %d\n", p, TFRC_SMALLEST_P); + /* + * In the congestion-avoidance phase p decays towards 0 + * when there are no further losses, so this case is + * natural. Truncating to p_min = 0.01% means that the + * maximum achievable throughput is limited to about + * X_calc_max = 122.4 * s/RTT (see RFC 3448, 3.1); e.g. + * with s=1500 bytes, RTT=0.01 s: X_calc_max = 147 Mbps. + */ + tfrc_pr_debug("Value of p (%d) below resolution. " + "Substituting %d\n", p, TFRC_SMALLEST_P); index = 0; } else /* 0.0001 <= p <= 0.05 */ index = p/TFRC_SMALLEST_P - 1; @@ -658,7 +666,6 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p) result = scaled_div(s, R); return scaled_div32(result, f); } - EXPORT_SYMBOL_GPL(tfrc_calc_x); /** @@ -693,5 +700,19 @@ u32 tfrc_calc_x_reverse_lookup(u32 fvalue) index = tfrc_binsearch(fvalue, 0); return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE; } - EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup); + +/** + * tfrc_invert_loss_event_rate - Compute p so that 10^6 corresponds to 100% + * When @loss_event_rate is large, there is a chance that p is truncated to 0. + * To avoid re-entering slow-start in that case, we set p = TFRC_SMALLEST_P > 0. + */ +u32 tfrc_invert_loss_event_rate(u32 loss_event_rate) +{ + if (loss_event_rate == UINT_MAX) /* see RFC 4342, 8.5 */ + return 0; + if (unlikely(loss_event_rate == 0)) /* map 1/0 into 100% */ + return 1000000; + return max_t(u32, scaled_div(1, loss_event_rate), TFRC_SMALLEST_P); +} +EXPORT_SYMBOL_GPL(tfrc_invert_loss_event_rate); |