aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2011-10-24 03:06:21 -0400
committerZiyan <jaraidaniel@gmail.com>2016-03-11 15:57:04 +0100
commit80607610d928c21da5b4ba3d8fded38f8ed80e9f (patch)
treeff6f41a211e834c44bbd41b5c4132d931b2fe69a /net
parent0ea36994dec559df6f63dcae75b9b360b53560b6 (diff)
downloadkernel_samsung_espresso10-80607610d928c21da5b4ba3d8fded38f8ed80e9f.zip
kernel_samsung_espresso10-80607610d928c21da5b4ba3d8fded38f8ed80e9f.tar.gz
kernel_samsung_espresso10-80607610d928c21da5b4ba3d8fded38f8ed80e9f.tar.bz2
ipv4: tcp: fix TOS value in ACK messages sent from TIME_WAIT
There is a long standing bug in linux tcp stack, about ACK messages sent on behalf of TIME_WAIT sockets. In the IP header of the ACK message, we choose to reflect TOS field of incoming message, and this might break some setups. Example of things that were broken : - Routing using TOS as a selector - Firewalls - Trafic classification / shaping We now remember in timewait structure the inet tos field and use it in ACK generation, and route lookup. Notes : - We still reflect incoming TOS in RST messages. - We could extend MuraliRaja Muniraju patch to report TOS value in netlink messages for TIME_WAIT sockets. - A patch is needed for IPv6 Change-Id: Ic7ad8a7b858de181bfe2a789c472f84955397d4c Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/inet_timewait_sock.c1
-rw-r--r--net/ipv4/ip_output.c6
-rw-r--r--net/ipv4/tcp_ipv4.c11
3 files changed, 11 insertions, 7 deletions
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 3c8dfa1..44d65d5 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -183,6 +183,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
tw->tw_daddr = inet->inet_daddr;
tw->tw_rcv_saddr = inet->inet_rcv_saddr;
tw->tw_bound_dev_if = sk->sk_bound_dev_if;
+ tw->tw_tos = inet->tos;
tw->tw_num = inet->inet_num;
tw->tw_state = TCP_TIME_WAIT;
tw->tw_substate = state;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index dc4bea4..63e38cc 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1472,7 +1472,7 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset,
* structure to pass arguments.
*/
void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr,
- struct ip_reply_arg *arg, unsigned int len)
+ const struct ip_reply_arg *arg, unsigned int len)
{
struct inet_sock *inet = inet_sk(sk);
struct ip_options_data replyopts;
@@ -1495,7 +1495,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr,
}
flowi4_init_output(&fl4, arg->bound_dev_if, 0,
- RT_TOS(ip_hdr(skb)->tos),
+ RT_TOS(arg->tos),
RT_SCOPE_UNIVERSE, sk->sk_protocol,
ip_reply_arg_flowi_flags(arg),
daddr, rt->rt_spec_dst,
@@ -1512,7 +1512,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr,
with locally disabled BH and that sk cannot be already spinlocked.
*/
bh_lock_sock(sk);
- inet->tos = ip_hdr(skb)->tos;
+ inet->tos = arg->tos;
sk->sk_priority = skb->priority;
sk->sk_protocol = ip_hdr(skb)->protocol;
sk->sk_bound_dev_if = arg->bound_dev_if;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d645c6f..4933d2c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -658,6 +658,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
arg.bound_dev_if = sk->sk_bound_dev_if;
net = dev_net(skb_dst(skb)->dev);
+ arg.tos = ip_hdr(skb)->tos;
ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
&arg, arg.iov[0].iov_len);
@@ -672,7 +673,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
u32 win, u32 ts, int oif,
struct tcp_md5sig_key *key,
- int reply_flags)
+ int reply_flags, u8 tos)
{
struct tcphdr *th = tcp_hdr(skb);
struct {
@@ -732,7 +733,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
arg.csumoffset = offsetof(struct tcphdr, check) / 2;
if (oif)
arg.bound_dev_if = oif;
-
+ arg.tos = tos;
ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
&arg, arg.iov[0].iov_len);
@@ -749,7 +750,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcptw->tw_ts_recent,
tw->tw_bound_dev_if,
tcp_twsk_md5_key(tcptw),
- tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0
+ tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
+ tw->tw_tos
);
inet_twsk_put(tw);
@@ -763,7 +765,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
req->ts_recent,
0,
tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
- inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0);
+ inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
+ ip_hdr(skb)->tos);
}
/*