aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/cipso_ipv4.c6
-rw-r--r--net/ipv4/esp4.c24
-rw-r--r--net/ipv4/fib_semantics.c20
-rw-r--r--net/ipv4/fib_trie.c2
-rw-r--r--net/ipv4/ipmr.c14
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c8
-rw-r--r--net/ipv4/netfilter/nf_nat_sip.c10
-rw-r--r--net/ipv4/raw.c14
-rw-r--r--net/ipv4/tcp.c39
-rw-r--r--net/ipv4/tcp_cong.c3
-rw-r--r--net/ipv4/tcp_illinois.c8
-rw-r--r--net/ipv4/tcp_input.c8
-rw-r--r--net/ipv4/tcp_ipv4.c7
-rw-r--r--net/ipv4/tcp_output.c21
14 files changed, 116 insertions, 68 deletions
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 2b3c23c..062876b 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1725,8 +1725,10 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option)
case CIPSO_V4_TAG_LOCAL:
/* This is a non-standard tag that we only allow for
* local connections, so if the incoming interface is
- * not the loopback device drop the packet. */
- if (!(skb->dev->flags & IFF_LOOPBACK)) {
+ * not the loopback device drop the packet. Further,
+ * there is no legitimate reason for setting this from
+ * userspace so reject it if skb is NULL. */
+ if (skb == NULL || !(skb->dev->flags & IFF_LOOPBACK)) {
err_offset = opt_iter;
goto validate_return_locked;
}
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index a5b4134..530787b 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -457,28 +457,22 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
struct esp_data *esp = x->data;
u32 blksize = ALIGN(crypto_aead_blocksize(esp->aead), 4);
u32 align = max_t(u32, blksize, esp->padlen);
- u32 rem;
-
- mtu -= x->props.header_len + crypto_aead_authsize(esp->aead);
- rem = mtu & (align - 1);
- mtu &= ~(align - 1);
+ unsigned int net_adj;
switch (x->props.mode) {
- case XFRM_MODE_TUNNEL:
- break;
- default:
case XFRM_MODE_TRANSPORT:
- /* The worst case */
- mtu -= blksize - 4;
- mtu += min_t(u32, blksize - 4, rem);
- break;
case XFRM_MODE_BEET:
- /* The worst case. */
- mtu += min_t(u32, IPV4_BEET_PHMAXLEN, rem);
+ net_adj = sizeof(struct iphdr);
break;
+ case XFRM_MODE_TUNNEL:
+ net_adj = 0;
+ break;
+ default:
+ BUG();
}
- return mtu - 2;
+ return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) -
+ net_adj) & ~(align - 1)) + (net_adj - 2);
}
static void esp4_err(struct sk_buff *skb, u32 info)
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 33e2c35..7e454ba 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -142,6 +142,18 @@ const struct fib_prop fib_props[RTN_MAX + 1] = {
};
/* Release a nexthop info record */
+static void free_fib_info_rcu(struct rcu_head *head)
+{
+ struct fib_info *fi = container_of(head, struct fib_info, rcu);
+
+ change_nexthops(fi) {
+ if (nexthop_nh->nh_dev)
+ dev_put(nexthop_nh->nh_dev);
+ } endfor_nexthops(fi);
+
+ release_net(fi->fib_net);
+ kfree(fi);
+}
void free_fib_info(struct fib_info *fi)
{
@@ -149,14 +161,8 @@ void free_fib_info(struct fib_info *fi)
pr_warning("Freeing alive fib_info %p\n", fi);
return;
}
- change_nexthops(fi) {
- if (nexthop_nh->nh_dev)
- dev_put(nexthop_nh->nh_dev);
- nexthop_nh->nh_dev = NULL;
- } endfor_nexthops(fi);
fib_info_cnt--;
- release_net(fi->fib_net);
- kfree_rcu(fi, rcu);
+ call_rcu(&fi->rcu, free_fib_info_rcu);
}
void fib_release_info(struct fib_info *fi)
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 58c25ea..0d884eb 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1371,6 +1371,8 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l,
if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
continue;
+ if (fi->fib_dead)
+ continue;
if (fa->fa_info->fib_scope < flp->flowi4_scope)
continue;
fib_alias_accessed(fa);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index f81af8d..ec7d8e7 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -124,6 +124,8 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
static struct kmem_cache *mrt_cachep __read_mostly;
static struct mr_table *ipmr_new_table(struct net *net, u32 id);
+static void ipmr_free_table(struct mr_table *mrt);
+
static int ip_mr_forward(struct net *net, struct mr_table *mrt,
struct sk_buff *skb, struct mfc_cache *cache,
int local);
@@ -131,6 +133,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
struct sk_buff *pkt, vifi_t vifi, int assert);
static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
struct mfc_cache *c, struct rtmsg *rtm);
+static void mroute_clean_tables(struct mr_table *mrt);
static void ipmr_expire_process(unsigned long arg);
#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
@@ -271,7 +274,7 @@ static void __net_exit ipmr_rules_exit(struct net *net)
list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
list_del(&mrt->list);
- kfree(mrt);
+ ipmr_free_table(mrt);
}
fib_rules_unregister(net->ipv4.mr_rules_ops);
}
@@ -299,7 +302,7 @@ static int __net_init ipmr_rules_init(struct net *net)
static void __net_exit ipmr_rules_exit(struct net *net)
{
- kfree(net->ipv4.mrt);
+ ipmr_free_table(net->ipv4.mrt);
}
#endif
@@ -336,6 +339,13 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
return mrt;
}
+static void ipmr_free_table(struct mr_table *mrt)
+{
+ del_timer_sync(&mrt->ipmr_expire_timer);
+ mroute_clean_tables(mrt);
+ kfree(mrt);
+}
+
/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index de9da21..d7d63f4 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -84,6 +84,14 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
*dataoff = nhoff + (iph->ihl << 2);
*protonum = iph->protocol;
+ /* Check bogus IP headers */
+ if (*dataoff > skb->len) {
+ pr_debug("nf_conntrack_ipv4: bogus IPv4 packet: "
+ "nhoff %u, ihl %u, skblen %u\n",
+ nhoff, iph->ihl << 2, skb->len);
+ return -NF_ACCEPT;
+ }
+
return NF_ACCEPT;
}
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index e40cf78..cd6881e 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -148,7 +148,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen,
hdr, NULL, &matchoff, &matchlen,
&addr, &port) > 0) {
- unsigned int matchend, poff, plen, buflen, n;
+ unsigned int olen, matchend, poff, plen, buflen, n;
char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
/* We're only interested in headers related to this
@@ -163,11 +163,12 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
goto next;
}
+ olen = *datalen;
if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
&addr, port))
return NF_DROP;
- matchend = matchoff + matchlen;
+ matchend = matchoff + matchlen + *datalen - olen;
/* The maddr= parameter (RFC 2361) specifies where to send
* the reply. */
@@ -501,7 +502,10 @@ static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff,
ret = nf_ct_expect_related(rtcp_exp);
if (ret == 0)
break;
- else if (ret != -EBUSY) {
+ else if (ret == -EBUSY) {
+ nf_ct_unexpect_related(rtp_exp);
+ continue;
+ } else if (ret < 0) {
nf_ct_unexpect_related(rtp_exp);
port = 0;
break;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index c9893d4..3d8bb18 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -130,18 +130,20 @@ found:
* 0 - deliver
* 1 - block
*/
-static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb)
+static int icmp_filter(const struct sock *sk, const struct sk_buff *skb)
{
- int type;
+ struct icmphdr _hdr;
+ const struct icmphdr *hdr;
- if (!pskb_may_pull(skb, sizeof(struct icmphdr)))
+ hdr = skb_header_pointer(skb, skb_transport_offset(skb),
+ sizeof(_hdr), &_hdr);
+ if (!hdr)
return 1;
- type = icmp_hdr(skb)->type;
- if (type < 32) {
+ if (hdr->type < 32) {
__u32 data = raw_sk(sk)->filter.data;
- return ((1 << type) & data) != 0;
+ return ((1U << hdr->type) & data) != 0;
}
/* Do not block unknown ICMP types */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b0e5330..1a9a57a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -485,14 +485,12 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
!tp->urg_data ||
before(tp->urg_seq, tp->copied_seq) ||
!before(tp->urg_seq, tp->rcv_nxt)) {
- struct sk_buff *skb;
answ = tp->rcv_nxt - tp->copied_seq;
- /* Subtract 1, if FIN is in queue. */
- skb = skb_peek_tail(&sk->sk_receive_queue);
- if (answ && skb)
- answ -= tcp_hdr(skb)->fin;
+ /* Subtract 1, if FIN was received */
+ if (answ && sock_flag(sk, SOCK_DONE))
+ answ--;
} else
answ = tp->urg_seq - tp->copied_seq;
release_sock(sk);
@@ -743,7 +741,9 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
old_size_goal + mss_now > xmit_size_goal)) {
xmit_size_goal = old_size_goal;
} else {
- tp->xmit_size_goal_segs = xmit_size_goal / mss_now;
+ tp->xmit_size_goal_segs =
+ min_t(u16, xmit_size_goal / mss_now,
+ sk->sk_gso_max_segs);
xmit_size_goal = tp->xmit_size_goal_segs * mss_now;
}
}
@@ -854,8 +854,7 @@ new_segment:
wait_for_sndbuf:
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
wait_for_memory:
- if (copied)
- tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
+ tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
goto do_error;
@@ -1601,8 +1600,14 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
}
#ifdef CONFIG_NET_DMA
- if (tp->ucopy.dma_chan)
- dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
+ if (tp->ucopy.dma_chan) {
+ if (tp->rcv_wnd == 0 &&
+ !skb_queue_empty(&sk->sk_async_wait_queue)) {
+ tcp_service_net_dma(sk, true);
+ tcp_cleanup_rbuf(sk, copied);
+ } else
+ dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
+ }
#endif
if (copied >= target) {
/* Do not sleep, just process backlog. */
@@ -2410,7 +2415,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
/* Cap the max timeout in ms TCP will retry/retrans
* before giving up and aborting (ETIMEDOUT) a connection.
*/
- icsk->icsk_user_timeout = msecs_to_jiffies(val);
+ if (val < 0)
+ err = -EINVAL;
+ else
+ icsk->icsk_user_timeout = msecs_to_jiffies(val);
break;
default:
err = -ENOPROTOOPT;
@@ -3236,7 +3244,7 @@ void __init tcp_init(void)
{
struct sk_buff *skb = NULL;
unsigned long limit;
- int i, max_share, cnt;
+ int i, max_rshare, max_wshare, cnt;
unsigned long jiffy = jiffies;
BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
@@ -3300,15 +3308,16 @@ void __init tcp_init(void)
/* Set per-socket limits to no more than 1/128 the pressure threshold */
limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7);
- max_share = min(4UL*1024*1024, limit);
+ max_wshare = min(4UL*1024*1024, limit);
+ max_rshare = min(6UL*1024*1024, limit);
sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
sysctl_tcp_wmem[1] = 16*1024;
- sysctl_tcp_wmem[2] = max(64*1024, max_share);
+ sysctl_tcp_wmem[2] = max(64*1024, max_wshare);
sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
sysctl_tcp_rmem[1] = 87380;
- sysctl_tcp_rmem[2] = max(87380, max_share);
+ sysctl_tcp_rmem[2] = max(87380, max_rshare);
printk(KERN_INFO "TCP: Hash tables configured "
"(established %u bind %u)\n",
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 850c737..6cebfd2 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -290,7 +290,8 @@ int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
left = tp->snd_cwnd - in_flight;
if (sk_can_gso(sk) &&
left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd &&
- left * tp->mss_cache < sk->sk_gso_max_size)
+ left * tp->mss_cache < sk->sk_gso_max_size &&
+ left < sk->sk_gso_max_segs)
return 1;
return left <= tcp_max_burst(tp);
}
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 813b43a..834857f 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -313,11 +313,13 @@ static void tcp_illinois_info(struct sock *sk, u32 ext,
.tcpv_rttcnt = ca->cnt_rtt,
.tcpv_minrtt = ca->base_rtt,
};
- u64 t = ca->sum_rtt;
- do_div(t, ca->cnt_rtt);
- info.tcpv_rtt = t;
+ if (info.tcpv_rttcnt > 0) {
+ u64 t = ca->sum_rtt;
+ do_div(t, info.tcpv_rttcnt);
+ info.tcpv_rtt = t;
+ }
nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
}
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c3a9f03..b76aa2d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -83,7 +83,7 @@ int sysctl_tcp_ecn __read_mostly = 2;
EXPORT_SYMBOL(sysctl_tcp_ecn);
int sysctl_tcp_dsack __read_mostly = 1;
int sysctl_tcp_app_win __read_mostly = 31;
-int sysctl_tcp_adv_win_scale __read_mostly = 2;
+int sysctl_tcp_adv_win_scale __read_mostly = 1;
EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
int sysctl_tcp_stdurg __read_mostly;
@@ -5340,7 +5340,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
if (tp->copied_seq == tp->rcv_nxt &&
len - tcp_header_len <= tp->ucopy.len) {
#ifdef CONFIG_NET_DMA
- if (tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
+ if (tp->ucopy.task == current &&
+ sock_owned_by_user(sk) &&
+ tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
copied_early = 1;
eaten = 1;
}
@@ -5761,6 +5763,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
goto discard;
if (th->syn) {
+ if (th->fin)
+ goto discard;
if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
return 1;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 53a5af6..d645c6f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -651,10 +651,11 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
arg.csumoffset = offsetof(struct tcphdr, check) / 2;
arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
/* When socket is gone, all binding information is lost.
- * routing might fail in this case. using iif for oif to
- * make sure we can deliver it
+ * routing might fail in this case. No choice here, if we choose to force
+ * input interface, we will misroute in case of asymmetric route.
*/
- arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb);
+ if (sk)
+ arg.bound_dev_if = sk->sk_bound_dev_if;
net = dev_net(skb_dst(skb)->dev);
ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index faf257b..e0b8bd1 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1310,21 +1310,21 @@ static void tcp_cwnd_validate(struct sock *sk)
* when we would be allowed to send the split-due-to-Nagle skb fully.
*/
static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb,
- unsigned int mss_now, unsigned int cwnd)
+ unsigned int mss_now, unsigned int max_segs)
{
struct tcp_sock *tp = tcp_sk(sk);
- u32 needed, window, cwnd_len;
+ u32 needed, window, max_len;
window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
- cwnd_len = mss_now * cwnd;
+ max_len = mss_now * max_segs;
- if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk)))
- return cwnd_len;
+ if (likely(max_len <= window && skb != tcp_write_queue_tail(sk)))
+ return max_len;
needed = min(skb->len, window);
- if (cwnd_len <= needed)
- return cwnd_len;
+ if (max_len <= needed)
+ return max_len;
return needed - needed % mss_now;
}
@@ -1551,7 +1551,8 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
limit = min(send_win, cong_win);
/* If a full-sized TSO skb can be sent, do it. */
- if (limit >= sk->sk_gso_max_size)
+ if (limit >= min_t(unsigned int, sk->sk_gso_max_size,
+ sk->sk_gso_max_segs * tp->mss_cache))
goto send_now;
/* Middle in queue won't get any more data, full sendable already? */
@@ -1777,7 +1778,9 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
limit = mss_now;
if (tso_segs > 1 && !tcp_urg_mode(tp))
limit = tcp_mss_split_point(sk, skb, mss_now,
- cwnd_quota);
+ min_t(unsigned int,
+ cwnd_quota,
+ sk->sk_gso_max_segs));
if (skb->len > limit &&
unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))