diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/af_inet.c | 11 | ||||
-rw-r--r-- | net/ipv4/arp.c | 8 | ||||
-rw-r--r-- | net/ipv4/igmp.c | 13 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 17 | ||||
-rw-r--r-- | net/ipv4/ip_gre.c | 4 | ||||
-rw-r--r-- | net/ipv4/ip_sockglue.c | 35 | ||||
-rw-r--r-- | net/ipv4/ipconfig.c | 15 | ||||
-rw-r--r-- | net/ipv4/ipip.c | 2 | ||||
-rw-r--r-- | net/ipv4/proc.c | 9 | ||||
-rw-r--r-- | net/ipv4/raw.c | 10 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 3 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 2 | ||||
-rw-r--r-- | net/ipv4/udp.c | 6 |
13 files changed, 84 insertions, 51 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 1b5096a..15dc4c4 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1250,7 +1250,8 @@ out: return err; } -static struct sk_buff *inet_gso_segment(struct sk_buff *skb, u32 features) +static struct sk_buff *inet_gso_segment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); struct iphdr *iph; @@ -1572,9 +1573,9 @@ static __net_init int ipv4_mib_init_net(struct net *net) sizeof(struct icmp_mib), __alignof__(struct icmp_mib)) < 0) goto err_icmp_mib; - if (snmp_mib_init((void __percpu **)net->mib.icmpmsg_statistics, - sizeof(struct icmpmsg_mib), - __alignof__(struct icmpmsg_mib)) < 0) + net->mib.icmpmsg_statistics = kzalloc(sizeof(struct icmpmsg_mib), + GFP_KERNEL); + if (!net->mib.icmpmsg_statistics) goto err_icmpmsg_mib; tcp_mib_init(net); @@ -1598,7 +1599,7 @@ err_tcp_mib: static __net_exit void ipv4_mib_exit_net(struct net *net) { - snmp_mib_free((void __percpu **)net->mib.icmpmsg_statistics); + kfree(net->mib.icmpmsg_statistics); snmp_mib_free((void __percpu **)net->mib.icmp_statistics); snmp_mib_free((void __percpu **)net->mib.udplite_statistics); snmp_mib_free((void __percpu **)net->mib.udp_statistics); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 96a164a..5c29ac5 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -177,7 +177,7 @@ struct neigh_table arp_tbl = { .gc_staletime = 60 * HZ, .reachable_time = 30 * HZ, .delay_probe_time = 5 * HZ, - .queue_len = 3, + .queue_len_bytes = 64*1024, .ucast_probes = 3, .mcast_probes = 3, .anycast_delay = 1 * HZ, @@ -592,16 +592,18 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, struct sk_buff *skb; struct arphdr *arp; unsigned char *arp_ptr; + int hlen = LL_RESERVED_SPACE(dev); + int tlen = dev->needed_tailroom; /* * Allocate a buffer */ - skb = alloc_skb(arp_hdr_len(dev) + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); + skb = alloc_skb(arp_hdr_len(dev) + hlen + tlen, GFP_ATOMIC); if (skb == NULL) return NULL; - skb_reserve(skb, LL_RESERVED_SPACE(dev)); + skb_reserve(skb, hlen); skb_reset_network_header(skb); arp = (struct arphdr *) skb_put(skb, arp_hdr_len(dev)); skb->dev = dev; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index c7472ef..fbc5376 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -304,9 +304,11 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) struct igmpv3_report *pig; struct net *net = dev_net(dev); struct flowi4 fl4; + int hlen = LL_RESERVED_SPACE(dev); + int tlen = dev->needed_tailroom; while (1) { - skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), + skb = alloc_skb(size + hlen + tlen, GFP_ATOMIC | __GFP_NOWARN); if (skb) break; @@ -327,7 +329,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) skb_dst_set(skb, &rt->dst); skb->dev = dev; - skb_reserve(skb, LL_RESERVED_SPACE(dev)); + skb_reserve(skb, hlen); skb_reset_network_header(skb); pip = ip_hdr(skb); @@ -647,6 +649,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, __be32 group = pmc ? pmc->multiaddr : 0; struct flowi4 fl4; __be32 dst; + int hlen, tlen; if (type == IGMPV3_HOST_MEMBERSHIP_REPORT) return igmpv3_send_report(in_dev, pmc); @@ -661,7 +664,9 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, if (IS_ERR(rt)) return -1; - skb = alloc_skb(IGMP_SIZE+LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); + hlen = LL_RESERVED_SPACE(dev); + tlen = dev->needed_tailroom; + skb = alloc_skb(IGMP_SIZE + hlen + tlen, GFP_ATOMIC); if (skb == NULL) { ip_rt_put(rt); return -1; @@ -669,7 +674,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, skb_dst_set(skb, &rt->dst); - skb_reserve(skb, LL_RESERVED_SPACE(dev)); + skb_reserve(skb, hlen); skb_reset_network_header(skb); iph = ip_hdr(skb); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index c14d88a..a598768 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -588,10 +588,19 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, } EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune); -struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, - const gfp_t priority) +/** + * inet_csk_clone_lock - clone an inet socket, and lock its clone + * @sk: the socket to clone + * @req: request_sock + * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) + * + * Caller must unlock socket even in error path (bh_unlock_sock(newsk)) + */ +struct sock *inet_csk_clone_lock(const struct sock *sk, + const struct request_sock *req, + const gfp_t priority) { - struct sock *newsk = sk_clone(sk, priority); + struct sock *newsk = sk_clone_lock(sk, priority); if (newsk != NULL) { struct inet_connection_sock *newicsk = inet_csk(newsk); @@ -615,7 +624,7 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, } return newsk; } -EXPORT_SYMBOL_GPL(inet_csk_clone); +EXPORT_SYMBOL_GPL(inet_csk_clone_lock); /* * At this point, there should be no process reference to this diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index d55110e..2b32296 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -171,7 +171,7 @@ struct pcpu_tstats { unsigned long rx_bytes; unsigned long tx_packets; unsigned long tx_bytes; -}; +} __attribute__((aligned(4*sizeof(unsigned long)))); static struct net_device_stats *ipgre_get_stats(struct net_device *dev) { @@ -835,6 +835,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); + if (max_headroom > dev->needed_headroom) + dev->needed_headroom = max_headroom; if (!new_skb) { ip_rt_put(rt); dev->stats.tx_dropped++; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 09ff51b..80d5fa4 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -55,20 +55,13 @@ /* * SOL_IP control messages. */ +#define PKTINFO_SKB_CB(__skb) ((struct in_pktinfo *)((__skb)->cb)) static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) { - struct in_pktinfo info; - struct rtable *rt = skb_rtable(skb); + struct in_pktinfo info = *PKTINFO_SKB_CB(skb); info.ipi_addr.s_addr = ip_hdr(skb)->daddr; - if (rt) { - info.ipi_ifindex = rt->rt_iif; - info.ipi_spec_dst.s_addr = rt->rt_spec_dst; - } else { - info.ipi_ifindex = 0; - info.ipi_spec_dst.s_addr = 0; - } put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); } @@ -992,20 +985,28 @@ e_inval: } /** - * ip_queue_rcv_skb - Queue an skb into sock receive queue + * ipv4_pktinfo_prepare - transfert some info from rtable to skb * @sk: socket * @skb: buffer * - * Queues an skb into socket receive queue. If IP_CMSG_PKTINFO option - * is not set, we drop skb dst entry now, while dst cache line is hot. + * To support IP_CMSG_PKTINFO option, we store rt_iif and rt_spec_dst + * in skb->cb[] before dst drop. + * This way, receiver doesnt make cache line misses to read rtable. */ -int ip_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +void ipv4_pktinfo_prepare(struct sk_buff *skb) { - if (!(inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO)) - skb_dst_drop(skb); - return sock_queue_rcv_skb(sk, skb); + struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb); + const struct rtable *rt = skb_rtable(skb); + + if (rt) { + pktinfo->ipi_ifindex = rt->rt_iif; + pktinfo->ipi_spec_dst.s_addr = rt->rt_spec_dst; + } else { + pktinfo->ipi_ifindex = 0; + pktinfo->ipi_spec_dst.s_addr = 0; + } + skb_dst_drop(skb); } -EXPORT_SYMBOL(ip_queue_rcv_skb); int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 0da2afc..915eb52 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -763,13 +763,15 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d struct sk_buff *skb; struct bootp_pkt *b; struct iphdr *h; + int hlen = LL_RESERVED_SPACE(dev); + int tlen = dev->needed_tailroom; /* Allocate packet */ - skb = alloc_skb(sizeof(struct bootp_pkt) + LL_ALLOCATED_SPACE(dev) + 15, + skb = alloc_skb(sizeof(struct bootp_pkt) + hlen + tlen + 15, GFP_KERNEL); if (!skb) return; - skb_reserve(skb, LL_RESERVED_SPACE(dev)); + skb_reserve(skb, hlen); b = (struct bootp_pkt *) skb_put(skb, sizeof(struct bootp_pkt)); memset(b, 0, sizeof(struct bootp_pkt)); @@ -822,8 +824,13 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d skb->dev = dev; skb->protocol = htons(ETH_P_IP); if (dev_hard_header(skb, dev, ntohs(skb->protocol), - dev->broadcast, dev->dev_addr, skb->len) < 0 || - dev_queue_xmit(skb) < 0) + dev->broadcast, dev->dev_addr, skb->len) < 0) { + kfree_skb(skb); + printk("E"); + return; + } + + if (dev_queue_xmit(skb) < 0) printk("E"); } diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 065effd..9490690 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -148,7 +148,7 @@ struct pcpu_tstats { unsigned long rx_bytes; unsigned long tx_packets; unsigned long tx_bytes; -}; +} __attribute__((aligned(4*sizeof(unsigned long)))); static struct net_device_stats *ipip_get_stats(struct net_device *dev) { diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 466ea8b..961eed4 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -288,7 +288,7 @@ static void icmpmsg_put(struct seq_file *seq) count = 0; for (i = 0; i < ICMPMSG_MIB_MAX; i++) { - val = snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics, i); + val = atomic_long_read(&net->mib.icmpmsg_statistics->mibs[i]); if (val) { type[count] = i; vals[count++] = val; @@ -307,6 +307,7 @@ static void icmp_put(struct seq_file *seq) { int i; struct net *net = seq->private; + atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs; seq_puts(seq, "\nIcmp: InMsgs InErrors"); for (i=0; icmpmibmap[i].name != NULL; i++) @@ -319,15 +320,13 @@ static void icmp_put(struct seq_file *seq) snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS)); for (i=0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics, - icmpmibmap[i].index)); + atomic_long_read(ptr + icmpmibmap[i].index)); seq_printf(seq, " %lu %lu", snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); for (i=0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics, - icmpmibmap[i].index | 0x100)); + atomic_long_read(ptr + (icmpmibmap[i].index | 0x100))); } /* diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 007e2eb..3ccda5a 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -292,7 +292,8 @@ static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb) { /* Charge it to the socket. */ - if (ip_queue_rcv_skb(sk, skb) < 0) { + ipv4_pktinfo_prepare(skb); + if (sock_queue_rcv_skb(sk, skb) < 0) { kfree_skb(skb); return NET_RX_DROP; } @@ -327,6 +328,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, unsigned int iphlen; int err; struct rtable *rt = *rtp; + int hlen, tlen; if (length > rt->dst.dev->mtu) { ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, @@ -336,12 +338,14 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, if (flags&MSG_PROBE) goto out; + hlen = LL_RESERVED_SPACE(rt->dst.dev); + tlen = rt->dst.dev->needed_tailroom; skb = sock_alloc_send_skb(sk, - length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15, + length + hlen + tlen + 15, flags & MSG_DONTWAIT, &err); if (skb == NULL) goto error; - skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev)); + skb_reserve(skb, hlen); skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 34f5db1..50c3596 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2653,7 +2653,8 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname, EXPORT_SYMBOL(compat_tcp_getsockopt); #endif -struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features) +struct sk_buff *tcp_tso_segment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); struct tcphdr *th; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 66363b6..0a7e339 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -425,7 +425,7 @@ static inline void TCP_ECN_openreq_child(struct tcp_sock *tp, */ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb) { - struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC); + struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC); if (newsk != NULL) { const struct inet_request_sock *ireq = inet_rsk(req); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ab0966d..b867ea2 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1357,7 +1357,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (inet_sk(sk)->inet_daddr) sock_rps_save_rxhash(sk, skb); - rc = ip_queue_rcv_skb(sk, skb); + rc = sock_queue_rcv_skb(sk, skb); if (rc < 0) { int is_udplite = IS_UDPLITE(sk); @@ -1473,6 +1473,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) rc = 0; + ipv4_pktinfo_prepare(skb); bh_lock_sock(sk); if (!sock_owned_by_user(sk)) rc = __udp_queue_rcv_skb(sk, skb); @@ -2246,7 +2247,8 @@ int udp4_ufo_send_check(struct sk_buff *skb) return 0; } -struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, u32 features) +struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); unsigned int mss; |