From eb6f1160ddb2fdadf50f350da79d0796c37f17e2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 1 Sep 2005 17:43:25 -0700 Subject: [CRYPTO]: Use CRYPTO_TFM_REQ_MAY_SLEEP where appropriate This patch goes through the current users of the crypto layer and sets CRYPTO_TFM_REQ_MAY_SLEEP at crypto_alloc_tfm() where all crypto operations are performed in process context. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/sunrpc/auth_gss/gss_krb5_crypto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 5a7265a..7ad7444 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -160,7 +160,7 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, " unsupported checksum %d", cksumtype); goto out; } - if (!(tfm = crypto_alloc_tfm(cksumname, 0))) + if (!(tfm = crypto_alloc_tfm(cksumname, CRYPTO_TFM_REQ_MAY_SLEEP))) goto out; cksum->len = crypto_tfm_alg_digestsize(tfm); if ((cksum->data = kmalloc(cksum->len, GFP_KERNEL)) == NULL) -- cgit v1.1 From 12a49ffd842bf5b07c62eaabf178703ce4fe09d7 Mon Sep 17 00:00:00 2001 From: Patrick Caulfield Date: Thu, 1 Sep 2005 17:43:45 -0700 Subject: [DECNET]: Tidy send side socket SKB allocation. Patch from Steve Whitehouse which I've vetted and tested: "This patch is really intended has a move towards fixing the sendmsg/recvmsg functions in various ways so that we will finally have working nagle. Also reduces code duplication." Signed-off-by: Patrick Caulfield Signed-off-by: David S. Miller --- net/decnet/af_decnet.c | 40 +++++++++++++++++++++++++------ net/decnet/dn_nsp_out.c | 63 ------------------------------------------------- 2 files changed, 33 insertions(+), 70 deletions(-) (limited to 'net') diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 621680f..348f36b 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1876,8 +1876,27 @@ static inline unsigned int dn_current_mss(struct sock *sk, int flags) return mss_now; } +/* + * N.B. We get the timeout wrong here, but then we always did get it + * wrong before and this is another step along the road to correcting + * it. It ought to get updated each time we pass through the routine, + * but in practise it probably doesn't matter too much for now. + */ +static inline struct sk_buff *dn_alloc_send_pskb(struct sock *sk, + unsigned long datalen, int noblock, + int *errcode) +{ + struct sk_buff *skb = sock_alloc_send_skb(sk, datalen, + noblock, errcode); + if (skb) { + skb->protocol = __constant_htons(ETH_P_DNA_RT); + skb->pkt_type = PACKET_OUTGOING; + } + return skb; +} + static int dn_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size) + struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; struct dn_scp *scp = DN_SK(sk); @@ -1892,7 +1911,7 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock, struct dn_skb_cb *cb; size_t len; unsigned char fctype; - long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); + long timeo; if (flags & ~(MSG_TRYHARD|MSG_OOB|MSG_DONTWAIT|MSG_EOR|MSG_NOSIGNAL|MSG_MORE|MSG_CMSG_COMPAT)) return -EOPNOTSUPP; @@ -1900,18 +1919,21 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock, if (addr_len && (addr_len != sizeof(struct sockaddr_dn))) return -EINVAL; + lock_sock(sk); + timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); /* * The only difference between stream sockets and sequenced packet * sockets is that the stream sockets always behave as if MSG_EOR * has been set. */ if (sock->type == SOCK_STREAM) { - if (flags & MSG_EOR) - return -EINVAL; + if (flags & MSG_EOR) { + err = -EINVAL; + goto out; + } flags |= MSG_EOR; } - lock_sock(sk); err = dn_check_state(sk, addr, addr_len, &timeo, flags); if (err) @@ -1980,8 +2002,12 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock, /* * Get a suitably sized skb. + * 64 is a bit of a hack really, but its larger than any + * link-layer headers and has served us well as a good + * guess as to their real length. */ - skb = dn_alloc_send_skb(sk, &len, flags & MSG_DONTWAIT, timeo, &err); + skb = dn_alloc_send_pskb(sk, len + 64 + DN_MAX_NSP_DATA_HEADER, + flags & MSG_DONTWAIT, &err); if (err) break; @@ -1991,7 +2017,7 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock, cb = DN_SKB_CB(skb); - skb_reserve(skb, DN_MAX_NSP_DATA_HEADER); + skb_reserve(skb, 64 + DN_MAX_NSP_DATA_HEADER); if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { err = -EFAULT; diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c index e0bebf4..53633d3 100644 --- a/net/decnet/dn_nsp_out.c +++ b/net/decnet/dn_nsp_out.c @@ -137,69 +137,6 @@ struct sk_buff *dn_alloc_skb(struct sock *sk, int size, int pri) } /* - * Wrapper for the above, for allocs of data skbs. We try and get the - * whole size thats been asked for (plus 11 bytes of header). If this - * fails, then we try for any size over 16 bytes for SOCK_STREAMS. - */ -struct sk_buff *dn_alloc_send_skb(struct sock *sk, size_t *size, int noblock, long timeo, int *err) -{ - int space; - int len; - struct sk_buff *skb = NULL; - - *err = 0; - - while(skb == NULL) { - if (signal_pending(current)) { - *err = sock_intr_errno(timeo); - break; - } - - if (sk->sk_shutdown & SEND_SHUTDOWN) { - *err = EINVAL; - break; - } - - if (sk->sk_err) - break; - - len = *size + 11; - space = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); - - if (space < len) { - if ((sk->sk_socket->type == SOCK_STREAM) && - (space >= (16 + 11))) - len = space; - } - - if (space < len) { - set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); - if (noblock) { - *err = EWOULDBLOCK; - break; - } - - clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); - SOCK_SLEEP_PRE(sk) - - if ((sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc)) < - len) - schedule(); - - SOCK_SLEEP_POST(sk) - continue; - } - - if ((skb = dn_alloc_skb(sk, len, sk->sk_allocation)) == NULL) - continue; - - *size = len - 11; - } - - return skb; -} - -/* * Calculate persist timer based upon the smoothed round * trip time and the variance. Backoff according to the * nsp_backoff[] array. -- cgit v1.1 From 5170dbebbb2e9159cdf6bbf35e5d79cd7009799a Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Thu, 1 Sep 2005 17:44:06 -0700 Subject: [NETFILTER]: CLUSTERIP: fix memcpy() length typo Fix a trivial typo in clusterip_config_init(). Signed-off-by: KOVACS Krisztian Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 2d05caf..7d38913 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -144,7 +144,7 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); c->num_total_nodes = i->num_total_nodes; c->num_local_nodes = i->num_local_nodes; - memcpy(&c->local_nodes, &i->local_nodes, sizeof(&c->local_nodes)); + memcpy(&c->local_nodes, &i->local_nodes, sizeof(c->local_nodes)); c->hash_mode = i->hash_mode; c->hash_initval = i->hash_initval; atomic_set(&c->refcount, 1); -- cgit v1.1 From 573dbd95964b01a942aa0c68e92b06f2c9536964 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Thu, 1 Sep 2005 17:44:29 -0700 Subject: [CRYPTO]: crypto_free_tfm() callers no longer need to check for NULL Since the patch to add a NULL short-circuit to crypto_free_tfm() went in, there's no longer any need for callers of that function to check for NULL. This patch removes the redundant NULL checks and also a few similar checks for NULL before calls to kfree() that I ran into while doing the crypto_free_tfm bits. I've succesfuly compile tested this patch, and a kernel with the patch applied boots and runs just fine. When I posted the patch to LKML (and other lists/people on Cc) it drew the following comments : J. Bruce Fields commented "I've no problem with the auth_gss or nfsv4 bits.--b." Sridhar Samudrala said "sctp change looks fine." Herbert Xu signed off on the patch. So, I guess this is ready to be dropped into -mm and eventually mainline. Signed-off-by: Jesper Juhl Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/ah4.c | 18 ++++++------------ net/ipv4/esp4.c | 24 ++++++++---------------- net/ipv4/ipcomp.c | 3 +-- net/ipv6/addrconf.c | 6 ++---- net/ipv6/ah6.c | 18 ++++++------------ net/ipv6/esp6.c | 24 ++++++++---------------- net/ipv6/ipcomp6.c | 3 +-- net/sctp/endpointola.c | 3 +-- net/sctp/socket.c | 3 +-- net/sunrpc/auth_gss/gss_krb5_crypto.c | 3 +-- net/sunrpc/auth_gss/gss_krb5_mech.c | 9 +++------ net/sunrpc/auth_gss/gss_spkm3_mech.c | 12 ++++-------- 12 files changed, 42 insertions(+), 84 deletions(-) (limited to 'net') diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 514c85b..035ad2c 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -263,10 +263,8 @@ static int ah_init_state(struct xfrm_state *x) error: if (ahp) { - if (ahp->work_icv) - kfree(ahp->work_icv); - if (ahp->tfm) - crypto_free_tfm(ahp->tfm); + kfree(ahp->work_icv); + crypto_free_tfm(ahp->tfm); kfree(ahp); } return -EINVAL; @@ -279,14 +277,10 @@ static void ah_destroy(struct xfrm_state *x) if (!ahp) return; - if (ahp->work_icv) { - kfree(ahp->work_icv); - ahp->work_icv = NULL; - } - if (ahp->tfm) { - crypto_free_tfm(ahp->tfm); - ahp->tfm = NULL; - } + kfree(ahp->work_icv); + ahp->work_icv = NULL; + crypto_free_tfm(ahp->tfm); + ahp->tfm = NULL; kfree(ahp); } diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index b31ffc5..1b5a09d 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -343,22 +343,14 @@ static void esp_destroy(struct xfrm_state *x) if (!esp) return; - if (esp->conf.tfm) { - crypto_free_tfm(esp->conf.tfm); - esp->conf.tfm = NULL; - } - if (esp->conf.ivec) { - kfree(esp->conf.ivec); - esp->conf.ivec = NULL; - } - if (esp->auth.tfm) { - crypto_free_tfm(esp->auth.tfm); - esp->auth.tfm = NULL; - } - if (esp->auth.work_icv) { - kfree(esp->auth.work_icv); - esp->auth.work_icv = NULL; - } + crypto_free_tfm(esp->conf.tfm); + esp->conf.tfm = NULL; + kfree(esp->conf.ivec); + esp->conf.ivec = NULL; + crypto_free_tfm(esp->auth.tfm); + esp->auth.tfm = NULL; + kfree(esp->auth.work_icv); + esp->auth.work_icv = NULL; kfree(esp); } diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index dcb7ee6..fc718df 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -345,8 +345,7 @@ static void ipcomp_free_tfms(struct crypto_tfm **tfms) for_each_cpu(cpu) { struct crypto_tfm *tfm = *per_cpu_ptr(tfms, cpu); - if (tfm) - crypto_free_tfm(tfm); + crypto_free_tfm(tfm); } free_percpu(tfms); } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 937ad32..6d6fb74 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3593,10 +3593,8 @@ void __exit addrconf_cleanup(void) rtnl_unlock(); #ifdef CONFIG_IPV6_PRIVACY - if (likely(md5_tfm != NULL)) { - crypto_free_tfm(md5_tfm); - md5_tfm = NULL; - } + crypto_free_tfm(md5_tfm); + md5_tfm = NULL; #endif #ifdef CONFIG_PROC_FS diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 0ebfad9..f362973 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -401,10 +401,8 @@ static int ah6_init_state(struct xfrm_state *x) error: if (ahp) { - if (ahp->work_icv) - kfree(ahp->work_icv); - if (ahp->tfm) - crypto_free_tfm(ahp->tfm); + kfree(ahp->work_icv); + crypto_free_tfm(ahp->tfm); kfree(ahp); } return -EINVAL; @@ -417,14 +415,10 @@ static void ah6_destroy(struct xfrm_state *x) if (!ahp) return; - if (ahp->work_icv) { - kfree(ahp->work_icv); - ahp->work_icv = NULL; - } - if (ahp->tfm) { - crypto_free_tfm(ahp->tfm); - ahp->tfm = NULL; - } + kfree(ahp->work_icv); + ahp->work_icv = NULL; + crypto_free_tfm(ahp->tfm); + ahp->tfm = NULL; kfree(ahp); } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index e8bff9d..9b27460 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -276,22 +276,14 @@ static void esp6_destroy(struct xfrm_state *x) if (!esp) return; - if (esp->conf.tfm) { - crypto_free_tfm(esp->conf.tfm); - esp->conf.tfm = NULL; - } - if (esp->conf.ivec) { - kfree(esp->conf.ivec); - esp->conf.ivec = NULL; - } - if (esp->auth.tfm) { - crypto_free_tfm(esp->auth.tfm); - esp->auth.tfm = NULL; - } - if (esp->auth.work_icv) { - kfree(esp->auth.work_icv); - esp->auth.work_icv = NULL; - } + crypto_free_tfm(esp->conf.tfm); + esp->conf.tfm = NULL; + kfree(esp->conf.ivec); + esp->conf.ivec = NULL; + crypto_free_tfm(esp->auth.tfm); + esp->auth.tfm = NULL; + kfree(esp->auth.work_icv); + esp->auth.work_icv = NULL; kfree(esp); } diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 135383e..85bfbc6 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -341,8 +341,7 @@ static void ipcomp6_free_tfms(struct crypto_tfm **tfms) for_each_cpu(cpu) { struct crypto_tfm *tfm = *per_cpu_ptr(tfms, cpu); - if (tfm) - crypto_free_tfm(tfm); + crypto_free_tfm(tfm); } free_percpu(tfms); } diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index e47ac0d..e22ccd6 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -193,8 +193,7 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) sctp_unhash_endpoint(ep); /* Free up the HMAC transform. */ - if (sctp_sk(ep->base.sk)->hmac) - sctp_crypto_free_tfm(sctp_sk(ep->base.sk)->hmac); + sctp_crypto_free_tfm(sctp_sk(ep->base.sk)->hmac); /* Cleanup. */ sctp_inq_free(&ep->base.inqueue); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 4454afe..91ec8c9 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4194,8 +4194,7 @@ out: sctp_release_sock(sk); return err; cleanup: - if (tfm) - sctp_crypto_free_tfm(tfm); + sctp_crypto_free_tfm(tfm); goto out; } diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 7ad7444..ee6ae74 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -199,8 +199,7 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, crypto_digest_final(tfm, cksum->data); code = 0; out: - if (tfm) - crypto_free_tfm(tfm); + crypto_free_tfm(tfm); return code; } diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index cf72651..606a8a8 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -185,12 +185,9 @@ static void gss_delete_sec_context_kerberos(void *internal_ctx) { struct krb5_ctx *kctx = internal_ctx; - if (kctx->seq) - crypto_free_tfm(kctx->seq); - if (kctx->enc) - crypto_free_tfm(kctx->enc); - if (kctx->mech_used.data) - kfree(kctx->mech_used.data); + crypto_free_tfm(kctx->seq); + crypto_free_tfm(kctx->enc); + kfree(kctx->mech_used.data); kfree(kctx); } diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c index dad0599..6c97d61 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c @@ -214,14 +214,10 @@ static void gss_delete_sec_context_spkm3(void *internal_ctx) { struct spkm3_ctx *sctx = internal_ctx; - if(sctx->derived_integ_key) - crypto_free_tfm(sctx->derived_integ_key); - if(sctx->derived_conf_key) - crypto_free_tfm(sctx->derived_conf_key); - if(sctx->share_key.data) - kfree(sctx->share_key.data); - if(sctx->mech_used.data) - kfree(sctx->mech_used.data); + crypto_free_tfm(sctx->derived_integ_key); + crypto_free_tfm(sctx->derived_conf_key); + kfree(sctx->share_key.data); + kfree(sctx->mech_used.data); kfree(sctx); } -- cgit v1.1 From 2dac4b96b9362954a0638317b90e3e7bcb112e83 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Thu, 1 Sep 2005 17:44:49 -0700 Subject: [IPV6]: Repair Incoming Interface Handling for Raw Socket. Due to changes to enforce checking interface bindings, sockets did not see loopback packets bound for our local address on our interface. e.g.) When we ping6 fe80::1%eth0, skb->dev points loopback_dev while IP6CB(skb)->iif indicates eth0. This patch fixes the issue by using appropriate incoming interface, in the sense of scoping architecture. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 2 +- net/ipv6/raw.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 5176fc6..fa8f1bb 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -549,7 +549,7 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info) read_lock(&raw_v6_lock); if ((sk = sk_head(&raw_v6_htable[hash])) != NULL) { while((sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, - skb->dev->ifindex))) { + IP6CB(skb)->iif))) { rawv6_err(sk, skb, NULL, type, code, inner_offset, info); sk = sk_next(sk); } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 7a58632..ed3a76b 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -166,7 +166,7 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) if (sk == NULL) goto out; - sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, skb->dev->ifindex); + sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, IP6CB(skb)->iif); while (sk) { delivered = 1; @@ -178,7 +178,7 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) rawv6_rcv(sk, clone); } sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr, - skb->dev->ifindex); + IP6CB(skb)->iif); } out: read_unlock(&raw_v6_lock); -- cgit v1.1 From d80d99d643090c3cf2b1f9fb3fadd1256f7e384f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 1 Sep 2005 17:48:23 -0700 Subject: [NET]: Add sk_stream_wmem_schedule This patch introduces sk_stream_wmem_schedule as a short-hand for the sk_forward_alloc checking on egress. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 02fdda6..854f6d0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -552,8 +552,7 @@ new_segment: tcp_mark_push(tp, skb); goto new_segment; } - if (sk->sk_forward_alloc < copy && - !sk_stream_mem_schedule(sk, copy, 0)) + if (!sk_stream_wmem_schedule(sk, copy)) goto wait_for_memory; if (can_coalesce) { -- cgit v1.1 From ef015786152adaff5a6a8bf0c8ea2f70cee8059d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 1 Sep 2005 17:48:59 -0700 Subject: [TCP]: Fix sk_forward_alloc underflow in tcp_sendmsg I've finally found a potential cause of the sk_forward_alloc underflows that people have been reporting sporadically. When tcp_sendmsg tacks on extra bits to an existing TCP_PAGE we don't check sk_forward_alloc even though a large amount of time may have elapsed since we allocated the page. In the mean time someone could've come along and liberated packets and reclaimed sk_forward_alloc memory. This patch makes tcp_sendmsg check sk_forward_alloc every time as we do in do_tcp_sendpages. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 854f6d0..cbcc9fc 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -769,19 +769,23 @@ new_segment: if (off == PAGE_SIZE) { put_page(page); TCP_PAGE(sk) = page = NULL; + TCP_OFF(sk) = off = 0; } - } + } else + BUG_ON(off); + + if (copy > PAGE_SIZE - off) + copy = PAGE_SIZE - off; + + if (!sk_stream_wmem_schedule(sk, copy)) + goto wait_for_memory; if (!page) { /* Allocate new cache page. */ if (!(page = sk_stream_alloc_page(sk))) goto wait_for_memory; - off = 0; } - if (copy > PAGE_SIZE - off) - copy = PAGE_SIZE - off; - /* Time to copy data. We are close to * the end! */ err = skb_copy_to_page(sk, from, skb, page, -- cgit v1.1 From 6475be16fd9b3c6746ca4d18959246b13c669ea8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 1 Sep 2005 22:47:01 -0700 Subject: [TCP]: Keep TSO enabled even during loss events. All we need to do is resegment the queue so that we record SACK information accurately. The edges of the SACK blocks guide our resegmenting decisions. With help from Herbert Xu. Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 36 ++++++++++++++++++++++----------- net/ipv4/tcp_output.c | 55 +++++++++++++++++++-------------------------------- 2 files changed, 44 insertions(+), 47 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 1afb080..29222b96 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -923,14 +923,6 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ int flag = 0; int i; - /* So, SACKs for already sent large segments will be lost. - * Not good, but alternative is to resegment the queue. */ - if (sk->sk_route_caps & NETIF_F_TSO) { - sk->sk_route_caps &= ~NETIF_F_TSO; - sock_set_flag(sk, SOCK_NO_LARGESEND); - tp->mss_cache = tp->mss_cache; - } - if (!tp->sacked_out) tp->fackets_out = 0; prior_fackets = tp->fackets_out; @@ -978,20 +970,40 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ flag |= FLAG_DATA_LOST; sk_stream_for_retrans_queue(skb, sk) { - u8 sacked = TCP_SKB_CB(skb)->sacked; - int in_sack; + int in_sack, pcount; + u8 sacked; /* The retransmission queue is always in order, so * we can short-circuit the walk early. */ - if(!before(TCP_SKB_CB(skb)->seq, end_seq)) + if (!before(TCP_SKB_CB(skb)->seq, end_seq)) break; - fack_count += tcp_skb_pcount(skb); + pcount = tcp_skb_pcount(skb); + + if (pcount > 1 && + (after(start_seq, TCP_SKB_CB(skb)->seq) || + before(end_seq, TCP_SKB_CB(skb)->end_seq))) { + unsigned int pkt_len; + + if (after(start_seq, TCP_SKB_CB(skb)->seq)) + pkt_len = (start_seq - + TCP_SKB_CB(skb)->seq); + else + pkt_len = (end_seq - + TCP_SKB_CB(skb)->seq); + if (tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->tso_size)) + break; + pcount = tcp_skb_pcount(skb); + } + + fack_count += pcount; in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && !before(end_seq, TCP_SKB_CB(skb)->end_seq); + sacked = TCP_SKB_CB(skb)->sacked; + /* Account D-SACK for retransmitted packet. */ if ((dup_sack && in_sack) && (sacked & TCPCB_RETRANS) && diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 75b6811..6094db5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -428,11 +428,11 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned * packet to the list. This won't be called frequently, I hope. * Remember, these are still headerless SKBs at this point. */ -static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss_now) +int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss_now) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *buff; - int nsize; + int nsize, old_factor; u16 flags; nsize = skb_headlen(skb) - len; @@ -490,18 +490,29 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned tp->left_out -= tcp_skb_pcount(skb); } + old_factor = tcp_skb_pcount(skb); + /* Fix up tso_factor for both original and new SKB. */ tcp_set_skb_tso_segs(sk, skb, mss_now); tcp_set_skb_tso_segs(sk, buff, mss_now); - if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { - tp->lost_out += tcp_skb_pcount(skb); - tp->left_out += tcp_skb_pcount(skb); - } + /* If this packet has been sent out already, we must + * adjust the various packet counters. + */ + if (after(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) { + int diff = old_factor - tcp_skb_pcount(skb) - + tcp_skb_pcount(buff); - if (TCP_SKB_CB(buff)->sacked&TCPCB_LOST) { - tp->lost_out += tcp_skb_pcount(buff); - tp->left_out += tcp_skb_pcount(buff); + tp->packets_out -= diff; + if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { + tp->lost_out -= diff; + tp->left_out -= diff; + } + if (diff > 0) { + tp->fackets_out -= diff; + if ((int)tp->fackets_out < 0) + tp->fackets_out = 0; + } } /* Link BUFF into the send queue. */ @@ -1350,12 +1361,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) { if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) BUG(); - - if (sk->sk_route_caps & NETIF_F_TSO) { - sk->sk_route_caps &= ~NETIF_F_TSO; - sock_set_flag(sk, SOCK_NO_LARGESEND); - } - if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq)) return -ENOMEM; } @@ -1370,22 +1375,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) return -EAGAIN; if (skb->len > cur_mss) { - int old_factor = tcp_skb_pcount(skb); - int diff; - if (tcp_fragment(sk, skb, cur_mss, cur_mss)) return -ENOMEM; /* We'll try again later. */ - - /* New SKB created, account for it. */ - diff = old_factor - tcp_skb_pcount(skb) - - tcp_skb_pcount(skb->next); - tp->packets_out -= diff; - - if (diff > 0) { - tp->fackets_out -= diff; - if ((int)tp->fackets_out < 0) - tp->fackets_out = 0; - } } /* Collapse two adjacent packets if worthwhile and we can. */ @@ -1993,12 +1984,6 @@ int tcp_write_wakeup(struct sock *sk) TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; if (tcp_fragment(sk, skb, seg_size, mss)) return -1; - /* SWS override triggered forced fragmentation. - * Disable TSO, the connection is too sick. */ - if (sk->sk_route_caps & NETIF_F_TSO) { - sock_set_flag(sk, SOCK_NO_LARGESEND); - sk->sk_route_caps &= ~NETIF_F_TSO; - } } else if (!tcp_skb_pcount(skb)) tcp_set_skb_tso_segs(sk, skb, mss); -- cgit v1.1