diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/esp4.c | 6 | ||||
-rw-r--r-- | net/ipv4/fib_trie.c | 5 | ||||
-rw-r--r-- | net/ipv4/igmp.c | 8 | ||||
-rw-r--r-- | net/ipv4/inetpeer.c | 4 | ||||
-rw-r--r-- | net/ipv4/ip_fragment.c | 15 | ||||
-rw-r--r-- | net/ipv4/ip_gre.c | 2 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 6 | ||||
-rw-r--r-- | net/ipv4/ipip.c | 2 | ||||
-rw-r--r-- | net/ipv4/ipmr.c | 2 | ||||
-rw-r--r-- | net/ipv4/raw.c | 2 | ||||
-rw-r--r-- | net/ipv4/syncookies.c | 4 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 6 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 6 | ||||
-rw-r--r-- | net/ipv4/tcp_cubic.c | 12 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 65 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 10 | ||||
-rw-r--r-- | net/ipv4/udp.c | 3 | ||||
-rw-r--r-- | net/ipv4/xfrm4_mode_tunnel.c | 2 |
18 files changed, 88 insertions, 72 deletions
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 530787b..238fc3b 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -137,8 +137,6 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) /* skb is pure payload to encrypt */ - err = -ENOMEM; - esp = x->data; aead = esp->aead; alen = crypto_aead_authsize(aead); @@ -174,8 +172,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) } tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); - if (!tmp) + if (!tmp) { + err = -ENOMEM; goto error; + } seqhi = esp_tmp_seqhi(tmp); iv = esp_tmp_iv(aead, tmp, seqhilen); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 0d884eb..e11f4a5 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -72,7 +72,6 @@ #include <linux/init.h> #include <linux/list.h> #include <linux/slab.h> -#include <linux/prefetch.h> #include <net/net_namespace.h> #include <net/ip.h> #include <net/protocol.h> @@ -1772,10 +1771,8 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c) if (!c) continue; - if (IS_LEAF(c)) { - prefetch(rcu_dereference_rtnl(p->child[idx])); + if (IS_LEAF(c)) return (struct leaf *) c; - } /* Rescan start scanning in new node */ p = (struct tnode *) c; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index e0d42db..508bae1 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -342,7 +342,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) pip->saddr = fl4.saddr; pip->protocol = IPPROTO_IGMP; pip->tot_len = 0; /* filled in later */ - ip_select_ident(pip, &rt->dst, NULL); + ip_select_ident(skb, &rt->dst, NULL); ((u8*)&pip[1])[0] = IPOPT_RA; ((u8*)&pip[1])[1] = 4; ((u8*)&pip[1])[2] = 0; @@ -683,7 +683,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, iph->daddr = dst; iph->saddr = fl4.saddr; iph->protocol = IPPROTO_IGMP; - ip_select_ident(iph, &rt->dst, NULL); + ip_select_ident(skb, &rt->dst, NULL); ((u8*)&iph[1])[0] = IPOPT_RA; ((u8*)&iph[1])[1] = 4; ((u8*)&iph[1])[2] = 0; @@ -705,7 +705,7 @@ static void igmp_gq_timer_expire(unsigned long data) in_dev->mr_gq_running = 0; igmpv3_send_report(in_dev, NULL); - __in_dev_put(in_dev); + in_dev_put(in_dev); } static void igmp_ifc_timer_expire(unsigned long data) @@ -717,7 +717,7 @@ static void igmp_ifc_timer_expire(unsigned long data) in_dev->mr_ifc_count--; igmp_ifc_start_timer(in_dev, IGMP_Unsolicited_Report_Interval); } - __in_dev_put(in_dev); + in_dev_put(in_dev); } static void igmp_ifc_event(struct in_device *in_dev) diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 6877645..996df36 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -31,8 +31,8 @@ * At the moment of writing this notes identifier of IP packets is generated * to be unpredictable using this code only for packets subjected * (actually or potentially) to defragmentation. I.e. DF packets less than - * PMTU in size uses a constant ID and do not use this code (see - * ip_select_ident() in include/net/ip.h). + * PMTU in size when local fragmentation is disabled use a constant ID and do + * not use this code (see ip_select_ident() in include/net/ip.h). * * Route cache entries hold references to our nodes. * New cache entries get references via lookup by destination IP address in diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index da38472..daf19ad 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -251,8 +251,7 @@ static void ip_expire(unsigned long arg) if (!head->dev) goto out_rcu_unlock; - /* skb dst is stale, drop it, and perform route lookup again */ - skb_dst_drop(head); + /* skb has no dst, perform route lookup again */ iph = ip_hdr(head); err = ip_route_input_noref(head, iph->daddr, iph->saddr, iph->tos, head->dev); @@ -517,8 +516,16 @@ found: qp->q.last_in |= INET_FRAG_FIRST_IN; if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && - qp->q.meat == qp->q.len) - return ip_frag_reasm(qp, prev, dev); + qp->q.meat == qp->q.len) { + unsigned long orefdst = skb->_skb_refdst; + + skb->_skb_refdst = 0UL; + err = ip_frag_reasm(qp, prev, dev); + skb->_skb_refdst = orefdst; + return err; + } + + skb_dst_drop(skb); write_lock(&ip4_frags.lock); list_move_tail(&qp->q.lru_list, &qp->q.net->lru_list); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index d7bb94c..91ddbd4 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -716,6 +716,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev tiph = &tunnel->parms.iph; } + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); if ((dst = tiph->daddr) == 0) { /* NBMA tunnel */ @@ -853,7 +854,6 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev skb_reset_transport_header(skb); skb_push(skb, gre_hlen); skb_reset_network_header(skb); - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED); skb_dst_drop(skb); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 6daaf60..69b32cb 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -161,7 +161,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); iph->saddr = saddr; iph->protocol = sk->sk_protocol; - ip_select_ident(iph, &rt->dst, sk); + ip_select_ident(skb, &rt->dst, sk); if (opt && opt->opt.optlen) { iph->ihl += opt->opt.optlen>>2; @@ -411,7 +411,7 @@ packet_routed: ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0); } - ip_select_ident_more(iph, &rt->dst, sk, + ip_select_ident_more(skb, &rt->dst, sk, (skb_shinfo(skb)->gso_segs ?: 1) - 1); skb->priority = sk->sk_priority; @@ -1354,7 +1354,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, iph->ihl = 5; iph->tos = inet->tos; iph->frag_off = df; - ip_select_ident(iph, &rt->dst, sk); + ip_select_ident(skb, &rt->dst, sk); iph->ttl = ttl; iph->protocol = sk->sk_protocol; ip_copy_addrs(iph, fl4); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 6f06f7f..6d1165f 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -448,6 +448,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (tos & 1) tos = old_iph->tos; + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); if (!dst) { /* NBMA tunnel */ if ((rt = skb_rtable(skb)) == NULL) { @@ -531,7 +532,6 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb->transport_header = skb->network_header; skb_push(skb, sizeof(struct iphdr)); skb_reset_network_header(skb); - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED); skb_dst_drop(skb); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index dc89714..033ea7e 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1575,7 +1575,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) iph->protocol = IPPROTO_IPIP; iph->ihl = 5; iph->tot_len = htons(skb->len); - ip_select_ident(iph, skb_dst(skb), NULL); + ip_select_ident(skb, skb_dst(skb), NULL); ip_send_check(iph); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 4b32679..14b0486 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -379,7 +379,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, iph->check = 0; iph->tot_len = htons(length); if (!iph->id) - ip_select_ident(iph, &rt->dst, NULL); + ip_select_ident(skb, &rt->dst, NULL); iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 3f6a67d..62d9020 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -345,8 +345,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, * hasn't changed since we received the original syn, but I see * no easy way to do this. */ - flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk), - RT_SCOPE_UNIVERSE, IPPROTO_TCP, + flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark, + RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, ireq->loc_addr, th->source, th->dest, diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index e182501..3c74912 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -32,6 +32,8 @@ static int tcp_adv_win_scale_min = -31; static int tcp_adv_win_scale_max = 31; static int ip_ttl_min = 1; static int ip_ttl_max = 255; +static int tcp_syn_retries_min = 1; +static int tcp_syn_retries_max = MAX_TCP_SYNCNT; static int ip_ping_group_range_min[] = { 0, 0 }; static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; @@ -246,7 +248,9 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_syn_retries, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &tcp_syn_retries_min, + .extra2 = &tcp_syn_retries_max }, { .procname = "tcp_synack_retries", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1a9a57a..4349e46 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3058,7 +3058,11 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, for (i = 0; i < shi->nr_frags; ++i) { const struct skb_frag_struct *f = &shi->frags[i]; - sg_set_page(&sg, f->page, f->size, f->page_offset); + unsigned int offset = f->page_offset; + struct page *page = f->page + (offset >> PAGE_SHIFT); + + sg_set_page(&sg, page, f->size, + offset_in_page(offset)); if (crypto_hash_update(desc, &sg, f->size)) return 1; } diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index f376b05..b78eac2 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -204,8 +204,8 @@ static u32 cubic_root(u64 a) */ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) { - u64 offs; - u32 delta, t, bic_target, max_cnt; + u32 delta, bic_target, max_cnt; + u64 offs, t; ca->ack_cnt++; /* count the number of ACKs */ @@ -248,9 +248,11 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) * if the cwnd < 1 million packets !!! */ + t = (s32)(tcp_time_stamp - ca->epoch_start); + t += msecs_to_jiffies(ca->delay_min >> 3); /* change the unit from HZ to bictcp_HZ */ - t = ((tcp_time_stamp + msecs_to_jiffies(ca->delay_min>>3) - - ca->epoch_start) << BICTCP_HZ) / HZ; + t <<= BICTCP_HZ; + do_div(t, HZ); if (t < ca->bic_K) /* t - K */ offs = ca->bic_K - t; @@ -412,7 +414,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) return; /* Discard delay samples right after fast recovery */ - if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ) + if (ca->epoch_start && (s32)(tcp_time_stamp - ca->epoch_start) < HZ) return; delay = (rtt_us << 3) / USEC_PER_MSEC; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 959405e..c91002f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -116,6 +116,7 @@ int sysctl_tcp_default_init_rwnd __read_mostly = TCP_DEFAULT_INIT_RCVWND; #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ #define FLAG_NONHEAD_RETRANS_ACKED 0x1000 /* Non-head rexmitted data was ACKed */ #define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ +#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */ #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) @@ -3659,6 +3660,27 @@ static void tcp_send_challenge_ack(struct sock *sk) } } +static void tcp_store_ts_recent(struct tcp_sock *tp) +{ + tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval; + tp->rx_opt.ts_recent_stamp = get_seconds(); +} + +static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) +{ + if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) { + /* PAWS bug workaround wrt. ACK frames, the PAWS discard + * extra check below makes sure this can only happen + * for pure ACK frames. -DaveM + * + * Not only, also it occurs for expired timestamps. + */ + + if (tcp_paws_check(&tp->rx_opt, 0)) + tcp_store_ts_recent(tp); + } +} + /* This routine deals with incoming acks, but not outgoing ones. */ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) { @@ -3705,6 +3727,12 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) prior_fackets = tp->fackets_out; prior_in_flight = tcp_packets_in_flight(tp); + /* ts_recent update must be made after we are sure that the packet + * is in window. + */ + if (flag & FLAG_UPDATE_TS_RECENT) + tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); + if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) { /* Window is constant, pure forward advance. * No more checks are required. @@ -3991,27 +4019,6 @@ u8 *tcp_parse_md5sig_option(struct tcphdr *th) EXPORT_SYMBOL(tcp_parse_md5sig_option); #endif -static inline void tcp_store_ts_recent(struct tcp_sock *tp) -{ - tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval; - tp->rx_opt.ts_recent_stamp = get_seconds(); -} - -static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) -{ - if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) { - /* PAWS bug workaround wrt. ACK frames, the PAWS discard - * extra check below makes sure this can only happen - * for pure ACK frames. -DaveM - * - * Not only, also it occurs for expired timestamps. - */ - - if (tcp_paws_check(&tp->rx_opt, 0)) - tcp_store_ts_recent(tp); - } -} - /* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM * * It is not fatal. If this ACK does _not_ change critical state (seqs, window) @@ -5480,14 +5487,10 @@ slow_path: return 0; step5: - if (th->ack && tcp_ack(sk, skb, FLAG_SLOWPATH) < 0) + if (th->ack && + tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0) goto discard; - /* ts_recent update must be made after we are sure that the packet - * is in window. - */ - tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); - tcp_rcv_rtt_measure_ts(sk, skb); /* Process urgent data. */ @@ -5851,7 +5854,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, /* step 5: check the ACK field */ if (th->ack) { - int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > 0; + int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH | + FLAG_UPDATE_TS_RECENT) > 0; switch (sk->sk_state) { case TCP_SYN_RECV: @@ -5964,11 +5968,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, } else goto discard; - /* ts_recent update must be made after we are sure that the packet - * is in window. - */ - tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); - /* step 6: check the URG bit */ tcp_urg(sk, skb, th); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 63fb1ea..acd5c87 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -832,11 +832,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, &md5); tcp_header_size = tcp_options_size + sizeof(struct tcphdr); - if (tcp_packets_in_flight(tp) == 0) { + if (tcp_packets_in_flight(tp) == 0) tcp_ca_event(sk, CA_EVENT_TX_START); - skb->ooo_okay = 1; - } else - skb->ooo_okay = 0; + + /* if no packet is in qdisc/device queue, then allow XPS to select + * another queue. + */ + skb->ooo_okay = sk_wmem_alloc_get(sk) == 0; skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 68277fd..966c3e1 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -765,7 +765,7 @@ send: /* * Push out all pending data as one UDP datagram. Socket is locked. */ -static int udp_push_pending_frames(struct sock *sk) +int udp_push_pending_frames(struct sock *sk) { struct udp_sock *up = udp_sk(sk); struct inet_sock *inet = inet_sk(sk); @@ -784,6 +784,7 @@ out: up->pending = 0; return err; } +EXPORT_SYMBOL(udp_push_pending_frames); int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index ed4bf11..938553e 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -54,7 +54,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); - ip_select_ident(top_iph, dst->child, NULL); + ip_select_ident(skb, dst->child, NULL); top_iph->ttl = ip4_dst_hoplimit(dst->child); |