aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Kconfig10
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c36
-rw-r--r--net/ipv4/ah4.c4
-rw-r--r--net/ipv4/arp.c3
-rw-r--r--net/ipv4/devinet.c6
-rw-r--r--net/ipv4/esp4.c4
-rw-r--r--net/ipv4/fib_hash.c6
-rw-r--r--net/ipv4/fib_rules.c7
-rw-r--r--net/ipv4/fib_semantics.c27
-rw-r--r--net/ipv4/fib_trie.c2
-rw-r--r--net/ipv4/igmp.c50
-rw-r--r--net/ipv4/inet_diag.c3
-rw-r--r--net/ipv4/inetpeer.c2
-rw-r--r--net/ipv4/ip_gre.c1
-rw-r--r--net/ipv4/ip_input.c3
-rw-r--r--net/ipv4/ip_options.c1
-rw-r--r--net/ipv4/ip_output.c16
-rw-r--r--net/ipv4/ip_sockglue.c9
-rw-r--r--net/ipv4/ipcomp.c6
-rw-r--r--net/ipv4/ipip.c1
-rw-r--r--net/ipv4/ipmr.c21
-rw-r--r--net/ipv4/ipvs/ip_vs_ctl.c10
-rw-r--r--net/ipv4/ipvs/ip_vs_est.c3
-rw-r--r--net/ipv4/ipvs/ip_vs_ftp.c27
-rw-r--r--net/ipv4/netfilter/arp_tables.c30
-rw-r--r--net/ipv4/netfilter/ip_conntrack_helper_h323.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netlink.c17
-rw-r--r--net/ipv4/netfilter/ip_conntrack_sip.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c4
-rw-r--r--net/ipv4/netfilter/ip_nat_snmp_basic.c4
-rw-r--r--net/ipv4/netfilter/ip_tables.c36
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c3
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c5
-rw-r--r--net/ipv4/netfilter/ipt_hashlimit.c14
-rw-r--r--net/ipv4/raw.c1
-rw-r--r--net/ipv4/route.c10
-rw-r--r--net/ipv4/tcp.c5
-rw-r--r--net/ipv4/tcp_compound.c448
-rw-r--r--net/ipv4/tcp_cong.c2
-rw-r--r--net/ipv4/tcp_highspeed.c13
-rw-r--r--net/ipv4/tcp_input.c14
-rw-r--r--net/ipv4/tcp_ipv4.c23
-rw-r--r--net/ipv4/tcp_lp.c35
-rw-r--r--net/ipv4/tcp_minisocks.c4
-rw-r--r--net/ipv4/tcp_output.c13
-rw-r--r--net/ipv4/tcp_probe.c5
-rw-r--r--net/ipv4/udp.c3
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c1
-rw-r--r--net/ipv4/xfrm4_output.c2
50 files changed, 308 insertions, 647 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index da33393..8514106 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -572,16 +572,6 @@ config TCP_CONG_VENO
loss packets.
See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf
-config TCP_CONG_COMPOUND
- tristate "TCP Compound"
- depends on EXPERIMENTAL
- default n
- ---help---
- TCP Compound is a sender-side only change to TCP that uses
- a mixed Reno/Vegas approach to calculate the cwnd.
- For further details look here:
- ftp://ftp.research.microsoft.com/pub/tr/TR-2005-86.pdf
-
endmenu
config TCP_CONG_BIC
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 38b8039..4878fc5 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -47,7 +47,6 @@ obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o
obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o
obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
-obj-$(CONFIG_TCP_CONG_COMPOUND) += tcp_compound.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 318d467..c84a320 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1097,6 +1097,40 @@ int inet_sk_rebuild_header(struct sock *sk)
EXPORT_SYMBOL(inet_sk_rebuild_header);
+static int inet_gso_send_check(struct sk_buff *skb)
+{
+ struct iphdr *iph;
+ struct net_protocol *ops;
+ int proto;
+ int ihl;
+ int err = -EINVAL;
+
+ if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
+ goto out;
+
+ iph = skb->nh.iph;
+ ihl = iph->ihl * 4;
+ if (ihl < sizeof(*iph))
+ goto out;
+
+ if (unlikely(!pskb_may_pull(skb, ihl)))
+ goto out;
+
+ skb->h.raw = __skb_pull(skb, ihl);
+ iph = skb->nh.iph;
+ proto = iph->protocol & (MAX_INET_PROTOS - 1);
+ err = -EPROTONOSUPPORT;
+
+ rcu_read_lock();
+ ops = rcu_dereference(inet_protos[proto]);
+ if (likely(ops && ops->gso_send_check))
+ err = ops->gso_send_check(skb);
+ rcu_read_unlock();
+
+out:
+ return err;
+}
+
static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
@@ -1162,6 +1196,7 @@ static struct net_protocol igmp_protocol = {
static struct net_protocol tcp_protocol = {
.handler = tcp_v4_rcv,
.err_handler = tcp_v4_err,
+ .gso_send_check = tcp_v4_gso_send_check,
.gso_segment = tcp_tso_segment,
.no_policy = 1,
};
@@ -1208,6 +1243,7 @@ static int ipv4_proc_init(void);
static struct packet_type ip_packet_type = {
.type = __constant_htons(ETH_P_IP),
.func = ip_rcv,
+ .gso_send_check = inet_gso_send_check,
.gso_segment = inet_gso_segment,
};
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 8e748be..1366bc6 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -215,12 +215,10 @@ static int ah_init_state(struct xfrm_state *x)
if (x->encap)
goto error;
- ahp = kmalloc(sizeof(*ahp), GFP_KERNEL);
+ ahp = kzalloc(sizeof(*ahp), GFP_KERNEL);
if (ahp == NULL)
return -ENOMEM;
- memset(ahp, 0, sizeof(*ahp));
-
ahp->key = x->aalg->alg_key;
ahp->key_len = (x->aalg->alg_key_len+7)/8;
ahp->tfm = crypto_alloc_tfm(x->aalg->alg_name, 0);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 7b51b3b..c8a3723 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1372,12 +1372,11 @@ static int arp_seq_open(struct inode *inode, struct file *file)
{
struct seq_file *seq;
int rc = -ENOMEM;
- struct neigh_seq_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+ struct neigh_seq_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
if (!s)
goto out;
- memset(s, 0, sizeof(*s));
rc = seq_open(file, &arp_seq_ops);
if (rc)
goto out_kfree;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index a7c65e9..a6cc31d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -93,10 +93,9 @@ static void devinet_sysctl_unregister(struct ipv4_devconf *p);
static struct in_ifaddr *inet_alloc_ifa(void)
{
- struct in_ifaddr *ifa = kmalloc(sizeof(*ifa), GFP_KERNEL);
+ struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
if (ifa) {
- memset(ifa, 0, sizeof(*ifa));
INIT_RCU_HEAD(&ifa->rcu_head);
}
@@ -140,10 +139,9 @@ struct in_device *inetdev_init(struct net_device *dev)
ASSERT_RTNL();
- in_dev = kmalloc(sizeof(*in_dev), GFP_KERNEL);
+ in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
if (!in_dev)
goto out;
- memset(in_dev, 0, sizeof(*in_dev));
INIT_RCU_HEAD(&in_dev->rcu_head);
memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
in_dev->cnf.sysctl = NULL;
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 4e11273..fc2f8ce 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -316,12 +316,10 @@ static int esp_init_state(struct xfrm_state *x)
if (x->ealg == NULL)
goto error;
- esp = kmalloc(sizeof(*esp), GFP_KERNEL);
+ esp = kzalloc(sizeof(*esp), GFP_KERNEL);
if (esp == NULL)
return -ENOMEM;
- memset(esp, 0, sizeof(*esp));
-
if (x->aalg) {
struct xfrm_algo_desc *aalg_desc;
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 3c1d32a..72c633b 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -204,11 +204,10 @@ static struct fn_zone *
fn_new_zone(struct fn_hash *table, int z)
{
int i;
- struct fn_zone *fz = kmalloc(sizeof(struct fn_zone), GFP_KERNEL);
+ struct fn_zone *fz = kzalloc(sizeof(struct fn_zone), GFP_KERNEL);
if (!fz)
return NULL;
- memset(fz, 0, sizeof(struct fn_zone));
if (z) {
fz->fz_divisor = 16;
} else {
@@ -1046,7 +1045,7 @@ static int fib_seq_open(struct inode *inode, struct file *file)
{
struct seq_file *seq;
int rc = -ENOMEM;
- struct fib_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+ struct fib_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
if (!s)
goto out;
@@ -1057,7 +1056,6 @@ static int fib_seq_open(struct inode *inode, struct file *file)
seq = file->private_data;
seq->private = s;
- memset(s, 0, sizeof(*s));
out:
return rc;
out_kfree:
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 6c642d1..79b0471 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -196,10 +196,9 @@ int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
}
}
- new_r = kmalloc(sizeof(*new_r), GFP_KERNEL);
+ new_r = kzalloc(sizeof(*new_r), GFP_KERNEL);
if (!new_r)
return -ENOMEM;
- memset(new_r, 0, sizeof(*new_r));
if (rta[RTA_SRC-1])
memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 4);
@@ -457,13 +456,13 @@ int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
rcu_read_lock();
hlist_for_each_entry(r, node, &fib_rules, hlist) {
-
if (idx < s_idx)
- continue;
+ goto next;
if (inet_fill_rule(skb, r, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq,
RTM_NEWRULE, NLM_F_MULTI) < 0)
break;
+next:
idx++;
}
rcu_read_unlock();
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 5f87533..5173800 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -159,7 +159,7 @@ void free_fib_info(struct fib_info *fi)
void fib_release_info(struct fib_info *fi)
{
- write_lock(&fib_info_lock);
+ write_lock_bh(&fib_info_lock);
if (fi && --fi->fib_treeref == 0) {
hlist_del(&fi->fib_hash);
if (fi->fib_prefsrc)
@@ -172,7 +172,7 @@ void fib_release_info(struct fib_info *fi)
fi->fib_dead = 1;
fib_info_put(fi);
}
- write_unlock(&fib_info_lock);
+ write_unlock_bh(&fib_info_lock);
}
static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
@@ -598,7 +598,7 @@ static void fib_hash_move(struct hlist_head *new_info_hash,
unsigned int old_size = fib_hash_size;
unsigned int i, bytes;
- write_lock(&fib_info_lock);
+ write_lock_bh(&fib_info_lock);
old_info_hash = fib_info_hash;
old_laddrhash = fib_info_laddrhash;
fib_hash_size = new_size;
@@ -639,7 +639,7 @@ static void fib_hash_move(struct hlist_head *new_info_hash,
}
fib_info_laddrhash = new_laddrhash;
- write_unlock(&fib_info_lock);
+ write_unlock_bh(&fib_info_lock);
bytes = old_size * sizeof(struct hlist_head *);
fib_hash_free(old_info_hash, bytes);
@@ -709,11 +709,10 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
goto failure;
}
- fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
+ fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
if (fi == NULL)
goto failure;
fib_info_cnt++;
- memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
fi->fib_protocol = r->rtm_protocol;
@@ -821,7 +820,7 @@ link_it:
fi->fib_treeref++;
atomic_inc(&fi->fib_clntref);
- write_lock(&fib_info_lock);
+ write_lock_bh(&fib_info_lock);
hlist_add_head(&fi->fib_hash,
&fib_info_hash[fib_info_hashfn(fi)]);
if (fi->fib_prefsrc) {
@@ -840,7 +839,7 @@ link_it:
head = &fib_info_devhash[hash];
hlist_add_head(&nh->nh_hash, head);
} endfor_nexthops(fi)
- write_unlock(&fib_info_lock);
+ write_unlock_bh(&fib_info_lock);
return fi;
err_inval:
@@ -962,10 +961,6 @@ fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
rtm->rtm_protocol = fi->fib_protocol;
if (fi->fib_priority)
RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
-#ifdef CONFIG_NET_CLS_ROUTE
- if (fi->fib_nh[0].nh_tclassid)
- RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
-#endif
if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
goto rtattr_failure;
if (fi->fib_prefsrc)
@@ -975,6 +970,10 @@ fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
if (fi->fib_nh->nh_oif)
RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
+#ifdef CONFIG_NET_CLS_ROUTE
+ if (fi->fib_nh[0].nh_tclassid)
+ RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
+#endif
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (fi->fib_nhs > 1) {
@@ -993,6 +992,10 @@ fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
nhp->rtnh_ifindex = nh->nh_oif;
if (nh->nh_gw)
RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
+#ifdef CONFIG_NET_CLS_ROUTE
+ if (nh->nh_tclassid)
+ RTA_PUT(skb, RTA_FLOW, 4, &nh->nh_tclassid);
+#endif
nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
} endfor_nexthops(fi);
mp_head->rta_type = RTA_MULTIPATH;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 1cb6530..23fb9d9 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1252,8 +1252,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
*/
if (!fa_head) {
- fa_head = fib_insert_node(t, &err, key, plen);
err = 0;
+ fa_head = fib_insert_node(t, &err, key, plen);
if (err)
goto out_free_new_fa;
}
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index d299c8e..8e8117c 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1028,10 +1028,9 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
* for deleted items allows change reports to use common code with
* non-deleted or query-response MCA's.
*/
- pmc = kmalloc(sizeof(*pmc), GFP_KERNEL);
+ pmc = kzalloc(sizeof(*pmc), GFP_KERNEL);
if (!pmc)
return;
- memset(pmc, 0, sizeof(*pmc));
spin_lock_bh(&im->lock);
pmc->interface = im->interface;
in_dev_hold(in_dev);
@@ -1529,10 +1528,9 @@ static int ip_mc_add1_src(struct ip_mc_list *pmc, int sfmode,
psf_prev = psf;
}
if (!psf) {
- psf = kmalloc(sizeof(*psf), GFP_ATOMIC);
+ psf = kzalloc(sizeof(*psf), GFP_ATOMIC);
if (!psf)
return -ENOBUFS;
- memset(psf, 0, sizeof(*psf));
psf->sf_inaddr = *psfsrc;
if (psf_prev) {
psf_prev->sf_next = psf;
@@ -1795,29 +1793,35 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
struct in_device *in_dev;
u32 group = imr->imr_multiaddr.s_addr;
u32 ifindex;
+ int ret = -EADDRNOTAVAIL;
rtnl_lock();
in_dev = ip_mc_find_dev(imr);
- if (!in_dev) {
- rtnl_unlock();
- return -ENODEV;
- }
ifindex = imr->imr_ifindex;
for (imlp = &inet->mc_list; (iml = *imlp) != NULL; imlp = &iml->next) {
- if (iml->multi.imr_multiaddr.s_addr == group &&
- iml->multi.imr_ifindex == ifindex) {
- (void) ip_mc_leave_src(sk, iml, in_dev);
+ if (iml->multi.imr_multiaddr.s_addr != group)
+ continue;
+ if (ifindex) {
+ if (iml->multi.imr_ifindex != ifindex)
+ continue;
+ } else if (imr->imr_address.s_addr && imr->imr_address.s_addr !=
+ iml->multi.imr_address.s_addr)
+ continue;
+
+ (void) ip_mc_leave_src(sk, iml, in_dev);
- *imlp = iml->next;
+ *imlp = iml->next;
+ if (in_dev)
ip_mc_dec_group(in_dev, group);
- rtnl_unlock();
- sock_kfree_s(sk, iml, sizeof(*iml));
- return 0;
- }
+ rtnl_unlock();
+ sock_kfree_s(sk, iml, sizeof(*iml));
+ return 0;
}
+ if (!in_dev)
+ ret = -ENODEV;
rtnl_unlock();
- return -EADDRNOTAVAIL;
+ return ret;
}
int ip_mc_source(int add, int omode, struct sock *sk, struct
@@ -2201,13 +2205,13 @@ void ip_mc_drop_socket(struct sock *sk)
struct in_device *in_dev;
inet->mc_list = iml->next;
- if ((in_dev = inetdev_by_index(iml->multi.imr_ifindex)) != NULL) {
- (void) ip_mc_leave_src(sk, iml, in_dev);
+ in_dev = inetdev_by_index(iml->multi.imr_ifindex);
+ (void) ip_mc_leave_src(sk, iml, in_dev);
+ if (in_dev != NULL) {
ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
in_dev_put(in_dev);
}
sock_kfree_s(sk, iml, sizeof(*iml));
-
}
rtnl_unlock();
}
@@ -2380,7 +2384,7 @@ static int igmp_mc_seq_open(struct inode *inode, struct file *file)
{
struct seq_file *seq;
int rc = -ENOMEM;
- struct igmp_mc_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+ struct igmp_mc_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
if (!s)
goto out;
@@ -2390,7 +2394,6 @@ static int igmp_mc_seq_open(struct inode *inode, struct file *file)
seq = file->private_data;
seq->private = s;
- memset(s, 0, sizeof(*s));
out:
return rc;
out_kfree:
@@ -2555,7 +2558,7 @@ static int igmp_mcf_seq_open(struct inode *inode, struct file *file)
{
struct seq_file *seq;
int rc = -ENOMEM;
- struct igmp_mcf_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+ struct igmp_mcf_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
if (!s)
goto out;
@@ -2565,7 +2568,6 @@ static int igmp_mcf_seq_open(struct inode *inode, struct file *file)
seq = file->private_data;
seq->private = s;
- memset(s, 0, sizeof(*s));
out:
return rc;
out_kfree:
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 8e7e41b..492858e 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -909,11 +909,10 @@ static int __init inet_diag_init(void)
sizeof(struct inet_diag_handler *));
int err = -ENOMEM;
- inet_diag_table = kmalloc(inet_diag_table_size, GFP_KERNEL);
+ inet_diag_table = kzalloc(inet_diag_table_size, GFP_KERNEL);
if (!inet_diag_table)
goto out;
- memset(inet_diag_table, 0, inet_diag_table_size);
idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, 0, inet_diag_rcv,
THIS_MODULE);
if (idiagnl == NULL)
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 2160874..03ff62e 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -86,7 +86,7 @@ static struct inet_peer *peer_root = peer_avl_empty;
static DEFINE_RWLOCK(peer_pool_lock);
#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
-static volatile int peer_total;
+static int peer_total;
/* Exported for sysctl_net_ipv4. */
int inet_peer_threshold = 65536 + 128; /* start to throw entries more
* aggressively at this stage */
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 6ff9b10..0f9b3a3 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -617,7 +617,6 @@ static int ipgre_rcv(struct sk_buff *skb)
skb->mac.raw = skb->nh.raw;
skb->nh.raw = __pskb_pull(skb, offset);
skb_postpull_rcsum(skb, skb->h.raw, offset);
- memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
skb->pkt_type = PACKET_HOST;
#ifdef CONFIG_NET_IPGRE_BROADCAST
if (MULTICAST(iph->daddr)) {
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index e1a7dba..212734c 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -428,6 +428,9 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
goto drop;
}
+ /* Remove any debris in the socket control block */
+ memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
+
return NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, dev, NULL,
ip_rcv_finish);
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index cbcae65..406056e 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -256,7 +256,6 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
if (!opt) {
opt = &(IPCB(skb)->opt);
- memset(opt, 0, sizeof(struct ip_options));
iph = skb->nh.raw;
opt->optlen = ((struct iphdr *)iph)->ihl*4 - sizeof(struct iphdr);
optptr = iph + sizeof(struct iphdr);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index ca0e714..a2ede16 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -209,7 +209,7 @@ static inline int ip_finish_output(struct sk_buff *skb)
return dst_output(skb);
}
#endif
- if (skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->gso_size)
+ if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb))
return ip_fragment(skb, ip_finish_output2);
else
return ip_finish_output2(skb);
@@ -440,6 +440,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
iph = skb->nh.iph;
if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
+ IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(dst_mtu(&rt->u.dst)));
kfree_skb(skb);
@@ -526,6 +527,8 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
err = output(skb);
+ if (!err)
+ IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
if (err || !frag)
break;
@@ -649,9 +652,6 @@ slow_path:
/*
* Put this fragment into the sending queue.
*/
-
- IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
-
iph->tot_len = htons(len + hlen);
ip_send_check(iph);
@@ -659,6 +659,8 @@ slow_path:
err = output(skb2);
if (err)
goto fail;
+
+ IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
}
kfree_skb(skb);
IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
@@ -946,7 +948,7 @@ alloc_new_skb:
skb_prev->csum = csum_sub(skb_prev->csum,
skb->csum);
data += fraggap;
- skb_trim(skb_prev, maxfraglen);
+ pskb_trim_unique(skb_prev, maxfraglen);
}
copy = datalen - transhdrlen - fraggap;
@@ -1095,7 +1097,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
while (size > 0) {
int i;
- if (skb_shinfo(skb)->gso_size)
+ if (skb_is_gso(skb))
len = size;
else {
@@ -1141,7 +1143,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
data, fraggap, 0);
skb_prev->csum = csum_sub(skb_prev->csum,
skb->csum);
- skb_trim(skb_prev, maxfraglen);
+ pskb_trim_unique(skb_prev, maxfraglen);
}
/*
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 84f43a3..2d05c41 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -112,14 +112,19 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb)
static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb)
{
char *secdata;
- u32 seclen;
+ u32 seclen, secid;
int err;
- err = security_socket_getpeersec_dgram(skb, &secdata, &seclen);
+ err = security_socket_getpeersec_dgram(NULL, skb, &secid);
+ if (err)
+ return;
+
+ err = security_secid_to_secctx(secid, &secdata, &seclen);
if (err)
return;
put_cmsg(msg, SOL_IP, SCM_SECURITY, seclen, secdata);
+ security_release_secctx(secdata, seclen);
}
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 8e03748..a0c28b2 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -70,7 +70,8 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
if (err)
goto out;
- skb_put(skb, dlen - plen);
+ skb->truesize += dlen - plen;
+ __skb_put(skb, dlen - plen);
memcpy(skb->data, scratch, dlen);
out:
put_cpu();
@@ -409,11 +410,10 @@ static int ipcomp_init_state(struct xfrm_state *x)
goto out;
err = -ENOMEM;
- ipcd = kmalloc(sizeof(*ipcd), GFP_KERNEL);
+ ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL);
if (!ipcd)
goto out;
- memset(ipcd, 0, sizeof(*ipcd));
x->props.header_len = 0;
if (x->props.mode)
x->props.header_len += sizeof(struct iphdr);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 3291d51..76ab50b 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -487,7 +487,6 @@ static int ipip_rcv(struct sk_buff *skb)
skb->mac.raw = skb->nh.raw;
skb->nh.raw = skb->data;
- memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
skb->protocol = htons(ETH_P_IP);
skb->pkt_type = PACKET_HOST;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index ba33f86..85893ee 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1461,7 +1461,6 @@ int pim_rcv_v1(struct sk_buff * skb)
skb_pull(skb, (u8*)encap - skb->data);
skb->nh.iph = (struct iphdr *)skb->data;
skb->dev = reg_dev;
- memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
skb->protocol = htons(ETH_P_IP);
skb->ip_summed = 0;
skb->pkt_type = PACKET_HOST;
@@ -1517,7 +1516,6 @@ static int pim_rcv(struct sk_buff * skb)
skb_pull(skb, (u8*)encap - skb->data);
skb->nh.iph = (struct iphdr *)skb->data;
skb->dev = reg_dev;
- memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
skb->protocol = htons(ETH_P_IP);
skb->ip_summed = 0;
skb->pkt_type = PACKET_HOST;
@@ -1580,6 +1578,7 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
if (cache==NULL) {
+ struct sk_buff *skb2;
struct net_device *dev;
int vif;
@@ -1593,12 +1592,18 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
read_unlock(&mrt_lock);
return -ENODEV;
}
- skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
- skb->nh.iph->ihl = sizeof(struct iphdr)>>2;
- skb->nh.iph->saddr = rt->rt_src;
- skb->nh.iph->daddr = rt->rt_dst;
- skb->nh.iph->version = 0;
- err = ipmr_cache_unresolved(vif, skb);
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (!skb2) {
+ read_unlock(&mrt_lock);
+ return -ENOMEM;
+ }
+
+ skb2->nh.raw = skb_push(skb2, sizeof(struct iphdr));
+ skb2->nh.iph->ihl = sizeof(struct iphdr)>>2;
+ skb2->nh.iph->saddr = rt->rt_src;
+ skb2->nh.iph->daddr = rt->rt_dst;
+ skb2->nh.iph->version = 0;
+ err = ipmr_cache_unresolved(vif, skb2);
read_unlock(&mrt_lock);
return err;
}
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index f28ec68..6a28faf 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -735,12 +735,11 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
if (atype != RTN_LOCAL && atype != RTN_UNICAST)
return -EINVAL;
- dest = kmalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
+ dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
if (dest == NULL) {
IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
return -ENOMEM;
}
- memset(dest, 0, sizeof(struct ip_vs_dest));
dest->protocol = svc->protocol;
dest->vaddr = svc->addr;
@@ -1050,14 +1049,12 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
goto out_mod_dec;
}
- svc = (struct ip_vs_service *)
- kmalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
+ svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
if (svc == NULL) {
IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
ret = -ENOMEM;
goto out_err;
}
- memset(svc, 0, sizeof(struct ip_vs_service));
/* I'm the first user of the service */
atomic_set(&svc->usecnt, 1);
@@ -1797,7 +1794,7 @@ static int ip_vs_info_open(struct inode *inode, struct file *file)
{
struct seq_file *seq;
int rc = -ENOMEM;
- struct ip_vs_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
+ struct ip_vs_iter *s = kzalloc(sizeof(*s), GFP_KERNEL);
if (!s)
goto out;
@@ -1808,7 +1805,6 @@ static int ip_vs_info_open(struct inode *inode, struct file *file)
seq = file->private_data;
seq->private = s;
- memset(s, 0, sizeof(*s));
out:
return rc;
out_kfree:
diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c
index 4c19403..7d68b80 100644
--- a/net/ipv4/ipvs/ip_vs_est.c
+++ b/net/ipv4/ipvs/ip_vs_est.c
@@ -123,11 +123,10 @@ int ip_vs_new_estimator(struct ip_vs_stats *stats)
{
struct ip_vs_estimator *est;
- est = kmalloc(sizeof(*est), GFP_KERNEL);
+ est = kzalloc(sizeof(*est), GFP_KERNEL);
if (est == NULL)
return -ENOMEM;
- memset(est, 0, sizeof(*est));
est->stats = stats;
est->last_conns = stats->conns;
est->cps = stats->cps<<10;
diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c
index a19a33c..37fafb1 100644
--- a/net/ipv4/ipvs/ip_vs_ftp.c
+++ b/net/ipv4/ipvs/ip_vs_ftp.c
@@ -46,14 +46,7 @@
*/
static int ports[IP_VS_APP_MAX_PORTS] = {21, 0};
module_param_array(ports, int, NULL, 0);
-
-/*
- * Debug level
- */
-#ifdef CONFIG_IP_VS_DEBUG
-static int debug=0;
-module_param(debug, int, 0);
-#endif
+MODULE_PARM_DESC(ports, "Ports to monitor for FTP control commands");
/* Dummy variable */
@@ -177,7 +170,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
&start, &end) != 1)
return 1;
- IP_VS_DBG(1-debug, "PASV response (%u.%u.%u.%u:%d) -> "
+ IP_VS_DBG(7, "PASV response (%u.%u.%u.%u:%d) -> "
"%u.%u.%u.%u:%d detected\n",
NIPQUAD(from), ntohs(port), NIPQUAD(cp->caddr), 0);
@@ -280,7 +273,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
while (data <= data_limit - 6) {
if (strnicmp(data, "PASV\r\n", 6) == 0) {
/* Passive mode on */
- IP_VS_DBG(1-debug, "got PASV at %zd of %zd\n",
+ IP_VS_DBG(7, "got PASV at %zd of %zd\n",
data - data_start,
data_limit - data_start);
cp->app_data = &ip_vs_ftp_pasv;
@@ -302,7 +295,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
&start, &end) != 1)
return 1;
- IP_VS_DBG(1-debug, "PORT %u.%u.%u.%u:%d detected\n",
+ IP_VS_DBG(7, "PORT %u.%u.%u.%u:%d detected\n",
NIPQUAD(to), ntohs(port));
/* Passive mode off */
@@ -311,7 +304,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
/*
* Now update or create a connection entry for it
*/
- IP_VS_DBG(1-debug, "protocol %s %u.%u.%u.%u:%d %u.%u.%u.%u:%d\n",
+ IP_VS_DBG(7, "protocol %s %u.%u.%u.%u:%d %u.%u.%u.%u:%d\n",
ip_vs_proto_name(iph->protocol),
NIPQUAD(to), ntohs(port), NIPQUAD(cp->vaddr), 0);
@@ -372,11 +365,17 @@ static int __init ip_vs_ftp_init(void)
for (i=0; i<IP_VS_APP_MAX_PORTS; i++) {
if (!ports[i])
continue;
+ if (ports[i] < 0 || ports[i] > 0xffff) {
+ IP_VS_WARNING("ip_vs_ftp: Ignoring invalid "
+ "configuration port[%d] = %d\n",
+ i, ports[i]);
+ continue;
+ }
ret = register_ip_vs_app_inc(app, app->protocol, ports[i]);
if (ret)
break;
- IP_VS_DBG(1-debug, "%s: loaded support on port[%d] = %d\n",
- app->name, i, ports[i]);
+ IP_VS_INFO("%s: loaded support on port[%d] = %d\n",
+ app->name, i, ports[i]);
}
if (ret)
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 80c73ca..8d1d7a6 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -236,7 +236,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
struct arpt_entry *e, *back;
const char *indev, *outdev;
void *table_base;
- struct xt_table_info *private = table->private;
+ struct xt_table_info *private;
/* ARP header, plus 2 device addresses, plus 2 IP addresses. */
if (!pskb_may_pull((*pskb), (sizeof(struct arphdr) +
@@ -248,6 +248,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
outdev = out ? out->name : nulldevname;
read_lock_bh(&table->lock);
+ private = table->private;
table_base = (void *)private->entries[smp_processor_id()];
e = get_entry(table_base, private->hook_entry[hook]);
back = get_entry(table_base, private->underflow[hook]);
@@ -1170,21 +1171,34 @@ static int __init arp_tables_init(void)
{
int ret;
- xt_proto_init(NF_ARP);
+ ret = xt_proto_init(NF_ARP);
+ if (ret < 0)
+ goto err1;
/* Noone else will be downing sem now, so we won't sleep */
- xt_register_target(&arpt_standard_target);
- xt_register_target(&arpt_error_target);
+ ret = xt_register_target(&arpt_standard_target);
+ if (ret < 0)
+ goto err2;
+ ret = xt_register_target(&arpt_error_target);
+ if (ret < 0)
+ goto err3;
/* Register setsockopt */
ret = nf_register_sockopt(&arpt_sockopts);
- if (ret < 0) {
- duprintf("Unable to register sockopts.\n");
- return ret;
- }
+ if (ret < 0)
+ goto err4;
printk("arp_tables: (C) 2002 David S. Miller\n");
return 0;
+
+err4:
+ xt_unregister_target(&arpt_error_target);
+err3:
+ xt_unregister_target(&arpt_standard_target);
+err2:
+ xt_proto_fini(NF_ARP);
+err1:
+ return ret;
}
static void __exit arp_tables_fini(void)
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_h323.c b/net/ipv4/netfilter/ip_conntrack_helper_h323.c
index af35235..9a39e29 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_h323.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_h323.c
@@ -1200,7 +1200,7 @@ static struct ip_conntrack_expect *find_expect(struct ip_conntrack *ct,
tuple.dst.protonum = IPPROTO_TCP;
exp = __ip_conntrack_expect_find(&tuple);
- if (exp->master == ct)
+ if (exp && exp->master == ct)
return exp;
return NULL;
}
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index 33891bb..0d4cc92 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -415,21 +415,18 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
cb->args[0], *id);
read_lock_bh(&ip_conntrack_lock);
+ last = (struct ip_conntrack *)cb->args[1];
for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++) {
restart:
- last = (struct ip_conntrack *)cb->args[1];
list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) {
h = (struct ip_conntrack_tuple_hash *) i;
if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
continue;
ct = tuplehash_to_ctrack(h);
- if (last != NULL) {
- if (ct == last) {
- ip_conntrack_put(last);
- cb->args[1] = 0;
- last = NULL;
- } else
+ if (cb->args[1]) {
+ if (ct != last)
continue;
+ cb->args[1] = 0;
}
if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq,
@@ -440,17 +437,17 @@ restart:
goto out;
}
}
- if (last != NULL) {
- ip_conntrack_put(last);
+ if (cb->args[1]) {
cb->args[1] = 0;
goto restart;
}
}
out:
read_unlock_bh(&ip_conntrack_lock);
+ if (last)
+ ip_conntrack_put(last);
DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id);
-
return skb->len;
}
diff --git a/net/ipv4/netfilter/ip_conntrack_sip.c b/net/ipv4/netfilter/ip_conntrack_sip.c
index fc87ce0..4f222d6 100644
--- a/net/ipv4/netfilter/ip_conntrack_sip.c
+++ b/net/ipv4/netfilter/ip_conntrack_sip.c
@@ -442,7 +442,7 @@ static int __init init(void)
sip[i].tuple.src.u.udp.port = htons(ports[i]);
sip[i].mask.src.u.udp.port = 0xFFFF;
sip[i].mask.dst.protonum = 0xFF;
- sip[i].max_expected = 1;
+ sip[i].max_expected = 2;
sip[i].timeout = 3 * 60; /* 3 minutes */
sip[i].me = THIS_MODULE;
sip[i].help = sip_help;
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index 7bd3c22..7a9fa04 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -534,6 +534,8 @@ static struct nf_hook_ops ip_conntrack_ops[] = {
/* Sysctl support */
+int ip_conntrack_checksum = 1;
+
#ifdef CONFIG_SYSCTL
/* From ip_conntrack_core.c */
@@ -568,8 +570,6 @@ extern unsigned int ip_ct_generic_timeout;
static int log_invalid_proto_min = 0;
static int log_invalid_proto_max = 255;
-int ip_conntrack_checksum = 1;
-
static struct ctl_table_header *ip_ct_sysctl_header;
static ctl_table ip_ct_sysctl_table[] = {
diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c
index 0b1b416..18b7fbd 100644
--- a/net/ipv4/netfilter/ip_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c
@@ -1255,9 +1255,9 @@ static int help(struct sk_buff **pskb,
struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl);
/* SNMP replies and originating SNMP traps get mangled */
- if (udph->source == ntohs(SNMP_PORT) && dir != IP_CT_DIR_REPLY)
+ if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY)
return NF_ACCEPT;
- if (udph->dest == ntohs(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL)
+ if (udph->dest == htons(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL)
return NF_ACCEPT;
/* No NAT? */
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index fc5bdd5..048514f 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -230,7 +230,7 @@ ipt_do_table(struct sk_buff **pskb,
const char *indev, *outdev;
void *table_base;
struct ipt_entry *e, *back;
- struct xt_table_info *private = table->private;
+ struct xt_table_info *private;
/* Initialization */
ip = (*pskb)->nh.iph;
@@ -247,6 +247,7 @@ ipt_do_table(struct sk_buff **pskb,
read_lock_bh(&table->lock);
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
+ private = table->private;
table_base = (void *)private->entries[smp_processor_id()];
e = get_entry(table_base, private->hook_entry[hook]);
@@ -2239,22 +2240,39 @@ static int __init ip_tables_init(void)
{
int ret;
- xt_proto_init(AF_INET);
+ ret = xt_proto_init(AF_INET);
+ if (ret < 0)
+ goto err1;
/* Noone else will be downing sem now, so we won't sleep */
- xt_register_target(&ipt_standard_target);
- xt_register_target(&ipt_error_target);
- xt_register_match(&icmp_matchstruct);
+ ret = xt_register_target(&ipt_standard_target);
+ if (ret < 0)
+ goto err2;
+ ret = xt_register_target(&ipt_error_target);
+ if (ret < 0)
+ goto err3;
+ ret = xt_register_match(&icmp_matchstruct);
+ if (ret < 0)
+ goto err4;
/* Register setsockopt */
ret = nf_register_sockopt(&ipt_sockopts);
- if (ret < 0) {
- duprintf("Unable to register sockopts.\n");
- return ret;
- }
+ if (ret < 0)
+ goto err5;
printk("ip_tables: (C) 2000-2006 Netfilter Core Team\n");
return 0;
+
+err5:
+ xt_unregister_match(&icmp_matchstruct);
+err4:
+ xt_unregister_target(&ipt_error_target);
+err3:
+ xt_unregister_target(&ipt_standard_target);
+err2:
+ xt_proto_fini(AF_INET);
+err1:
+ return ret;
}
static void __exit ip_tables_fini(void)
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index cbffeae..d994c5f 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -172,11 +172,10 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
struct clusterip_config *c;
char buffer[16];
- c = kmalloc(sizeof(*c), GFP_ATOMIC);
+ c = kzalloc(sizeof(*c), GFP_ATOMIC);
if (!c)
return NULL;
- memset(c, 0, sizeof(*c));
c->dev = dev;
c->clusterip = ip;
memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index d7dd7fe..d46fd67 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -115,6 +115,11 @@ static void ulog_send(unsigned int nlgroupnum)
del_timer(&ub->timer);
}
+ if (!ub->skb) {
+ DEBUGP("ipt_ULOG: ulog_send: nothing to send\n");
+ return;
+ }
+
/* last nlmsg needs NLMSG_DONE */
if (ub->qlen > 1)
ub->lastnlh->nlmsg_type = NLMSG_DONE;
diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c
index 92980ab..3bd2368 100644
--- a/net/ipv4/netfilter/ipt_hashlimit.c
+++ b/net/ipv4/netfilter/ipt_hashlimit.c
@@ -454,15 +454,12 @@ hashlimit_match(const struct sk_buff *skb,
dh->rateinfo.credit_cap = user2credits(hinfo->cfg.avg *
hinfo->cfg.burst);
dh->rateinfo.cost = user2credits(hinfo->cfg.avg);
-
- spin_unlock_bh(&hinfo->lock);
- return 1;
+ } else {
+ /* update expiration timeout */
+ dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
+ rateinfo_recalc(dh, now);
}
- /* update expiration timeout */
- dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
-
- rateinfo_recalc(dh, now);
if (dh->rateinfo.credit >= dh->rateinfo.cost) {
/* We're underlimit. */
dh->rateinfo.credit -= dh->rateinfo.cost;
@@ -508,6 +505,9 @@ hashlimit_checkentry(const char *tablename,
if (!r->cfg.expire)
return 0;
+ if (r->name[sizeof(r->name) - 1] != '\0')
+ return 0;
+
/* This is the best we've got: We cannot release and re-grab lock,
* since checkentry() is called before ip_tables.c grabs ipt_mutex.
* We also cannot grab the hashtable spinlock, since htable_create will
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index bd221ec..62b2762 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -609,6 +609,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if (sin) {
sin->sin_family = AF_INET;
sin->sin_addr.s_addr = skb->nh.iph->saddr;
+ sin->sin_port = 0;
memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
}
if (inet->cmsg_flags)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 2dc6dbb..b873cbc 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -104,6 +104,7 @@
#include <net/icmp.h>
#include <net/xfrm.h>
#include <net/ip_mp_alg.h>
+#include <net/netevent.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
@@ -1125,6 +1126,7 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
struct rtable *rth, **rthp;
u32 skeys[2] = { saddr, 0 };
int ikeys[2] = { dev->ifindex, 0 };
+ struct netevent_redirect netevent;
if (!in_dev)
return;
@@ -1216,6 +1218,11 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
rt_drop(rt);
goto do_next;
}
+
+ netevent.old = &rth->u.dst;
+ netevent.new = &rt->u.dst;
+ call_netevent_notifiers(NETEVENT_REDIRECT,
+ &netevent);
rt_del(hash, rth);
if (!rt_intern_hash(hash, rt, &rt))
@@ -1452,6 +1459,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
}
dst->metrics[RTAX_MTU-1] = mtu;
dst_set_expires(dst, ip_rt_mtu_expires);
+ call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
}
}
@@ -3149,7 +3157,7 @@ int __init ip_rt_init(void)
rhash_entries,
(num_physpages >= 128 * 1024) ?
15 : 17,
- HASH_HIGHMEM,
+ 0,
&rt_hash_log,
&rt_hash_mask,
0);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f6a2d92..934396b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1132,7 +1132,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
tp->ucopy.dma_chan = NULL;
preempt_disable();
if ((len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) &&
- !sysctl_tcp_low_latency && __get_cpu_var(softnet_data.net_dma)) {
+ !sysctl_tcp_low_latency && __get_cpu_var(softnet_data).net_dma) {
preempt_enable_no_resched();
tp->ucopy.pinned_list = dma_pin_iovec_pages(msg->msg_iov, len);
} else
@@ -1659,7 +1659,8 @@ adjudge_to_death:
const int tmo = tcp_fin_time(sk);
if (tmo > TCP_TIMEWAIT_LEN) {
- inet_csk_reset_keepalive_timer(sk, tcp_fin_time(sk));
+ inet_csk_reset_keepalive_timer(sk,
+ tmo - TCP_TIMEWAIT_LEN);
} else {
tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
goto out;
diff --git a/net/ipv4/tcp_compound.c b/net/ipv4/tcp_compound.c
deleted file mode 100644
index bc54f7e..0000000
--- a/net/ipv4/tcp_compound.c
+++ /dev/null
@@ -1,448 +0,0 @@
-/*
- * TCP Vegas congestion control
- *
- * This is based on the congestion detection/avoidance scheme described in
- * Lawrence S. Brakmo and Larry L. Peterson.
- * "TCP Vegas: End to end congestion avoidance on a global internet."
- * IEEE Journal on Selected Areas in Communication, 13(8):1465--1480,
- * October 1995. Available from:
- * ftp://ftp.cs.arizona.edu/xkernel/Papers/jsac.ps
- *
- * See http://www.cs.arizona.edu/xkernel/ for their implementation.
- * The main aspects that distinguish this implementation from the
- * Arizona Vegas implementation are:
- * o We do not change the loss detection or recovery mechanisms of
- * Linux in any way. Linux already recovers from losses quite well,
- * using fine-grained timers, NewReno, and FACK.
- * o To avoid the performance penalty imposed by increasing cwnd
- * only every-other RTT during slow start, we increase during
- * every RTT during slow start, just like Reno.
- * o Largely to allow continuous cwnd growth during slow start,
- * we use the rate at which ACKs come back as the "actual"
- * rate, rather than the rate at which data is sent.
- * o To speed convergence to the right rate, we set the cwnd
- * to achieve the right ("actual") rate when we exit slow start.
- * o To filter out the noise caused by delayed ACKs, we use the
- * minimum RTT sample observed during the last RTT to calculate
- * the actual rate.
- * o When the sender re-starts from idle, it waits until it has
- * received ACKs for an entire flight of new data before making
- * a cwnd adjustment decision. The original Vegas implementation
- * assumed senders never went idle.
- *
- *
- * TCP Compound based on TCP Vegas
- *
- * further details can be found here:
- * ftp://ftp.research.microsoft.com/pub/tr/TR-2005-86.pdf
- */
-
-#include <linux/config.h>
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/inet_diag.h>
-
-#include <net/tcp.h>
-
-/* Default values of the Vegas variables, in fixed-point representation
- * with V_PARAM_SHIFT bits to the right of the binary point.
- */
-#define V_PARAM_SHIFT 1
-
-#define TCP_COMPOUND_ALPHA 3U
-#define TCP_COMPOUND_BETA 1U
-#define TCP_COMPOUND_GAMMA 30
-#define TCP_COMPOUND_ZETA 1
-
-/* TCP compound variables */
-struct compound {
- u32 beg_snd_nxt; /* right edge during last RTT */
- u32 beg_snd_una; /* left edge during last RTT */
- u32 beg_snd_cwnd; /* saves the size of the cwnd */
- u8 doing_vegas_now; /* if true, do vegas for this RTT */
- u16 cntRTT; /* # of RTTs measured within last RTT */
- u32 minRTT; /* min of RTTs measured within last RTT (in usec) */
- u32 baseRTT; /* the min of all Vegas RTT measurements seen (in usec) */
-
- u32 cwnd;
- u32 dwnd;
-};
-
-/* There are several situations when we must "re-start" Vegas:
- *
- * o when a connection is established
- * o after an RTO
- * o after fast recovery
- * o when we send a packet and there is no outstanding
- * unacknowledged data (restarting an idle connection)
- *
- * In these circumstances we cannot do a Vegas calculation at the
- * end of the first RTT, because any calculation we do is using
- * stale info -- both the saved cwnd and congestion feedback are
- * stale.
- *
- * Instead we must wait until the completion of an RTT during
- * which we actually receive ACKs.
- */
-static inline void vegas_enable(struct sock *sk)
-{
- const struct tcp_sock *tp = tcp_sk(sk);
- struct compound *vegas = inet_csk_ca(sk);
-
- /* Begin taking Vegas samples next time we send something. */
- vegas->doing_vegas_now = 1;
-
- /* Set the beginning of the next send window. */
- vegas->beg_snd_nxt = tp->snd_nxt;
-
- vegas->cntRTT = 0;
- vegas->minRTT = 0x7fffffff;
-}
-
-/* Stop taking Vegas samples for now. */
-static inline void vegas_disable(struct sock *sk)
-{
- struct compound *vegas = inet_csk_ca(sk);
-
- vegas->doing_vegas_now = 0;
-}
-
-static void tcp_compound_init(struct sock *sk)
-{
- struct compound *vegas = inet_csk_ca(sk);
- const struct tcp_sock *tp = tcp_sk(sk);
-
- vegas->baseRTT = 0x7fffffff;
- vegas_enable(sk);
-
- vegas->dwnd = 0;
- vegas->cwnd = tp->snd_cwnd;
-}
-
-/* Do RTT sampling needed for Vegas.
- * Basically we:
- * o min-filter RTT samples from within an RTT to get the current
- * propagation delay + queuing delay (we are min-filtering to try to
- * avoid the effects of delayed ACKs)
- * o min-filter RTT samples from a much longer window (forever for now)
- * to find the propagation delay (baseRTT)
- */
-static void tcp_compound_rtt_calc(struct sock *sk, u32 usrtt)
-{
- struct compound *vegas = inet_csk_ca(sk);
- u32 vrtt = usrtt + 1; /* Never allow zero rtt or baseRTT */
-
- /* Filter to find propagation delay: */
- if (vrtt < vegas->baseRTT)
- vegas->baseRTT = vrtt;
-
- /* Find the min RTT during the last RTT to find
- * the current prop. delay + queuing delay:
- */
-
- vegas->minRTT = min(vegas->minRTT, vrtt);
- vegas->cntRTT++;
-}
-
-static void tcp_compound_state(struct sock *sk, u8 ca_state)
-{
-
- if (ca_state == TCP_CA_Open)
- vegas_enable(sk);
- else
- vegas_disable(sk);
-}
-
-
-/* 64bit divisor, dividend and result. dynamic precision */
-static inline u64 div64_64(u64 dividend, u64 divisor)
-{
- u32 d = divisor;
-
- if (divisor > 0xffffffffULL) {
- unsigned int shift = fls(divisor >> 32);
-
- d = divisor >> shift;
- dividend >>= shift;
- }
-
- /* avoid 64 bit division if possible */
- if (dividend >> 32)
- do_div(dividend, d);
- else
- dividend = (u32) dividend / d;
-
- return dividend;
-}
-
-/* calculate the quartic root of "a" using Newton-Raphson */
-static u32 qroot(u64 a)
-{
- u32 x, x1;
-
- /* Initial estimate is based on:
- * qrt(x) = exp(log(x) / 4)
- */
- x = 1u << (fls64(a) >> 2);
-
- /*
- * Iteration based on:
- * 3
- * x = ( 3 * x + a / x ) / 4
- * k+1 k k
- */
- do {
- u64 x3 = x;
-
- x1 = x;
- x3 *= x;
- x3 *= x;
-
- x = (3 * x + (u32) div64_64(a, x3)) / 4;
- } while (abs(x1 - x) > 1);
-
- return x;
-}
-
-
-/*
- * If the connection is idle and we are restarting,
- * then we don't want to do any Vegas calculations
- * until we get fresh RTT samples. So when we
- * restart, we reset our Vegas state to a clean
- * slate. After we get acks for this flight of
- * packets, _then_ we can make Vegas calculations
- * again.
- */
-static void tcp_compound_cwnd_event(struct sock *sk, enum tcp_ca_event event)
-{
- if (event == CA_EVENT_CWND_RESTART || event == CA_EVENT_TX_START)
- tcp_compound_init(sk);
-}
-
-static void tcp_compound_cong_avoid(struct sock *sk, u32 ack,
- u32 seq_rtt, u32 in_flight, int flag)
-{
- struct tcp_sock *tp = tcp_sk(sk);
- struct compound *vegas = inet_csk_ca(sk);
- u8 inc = 0;
-
- if (vegas->cwnd + vegas->dwnd > tp->snd_cwnd) {
- if (vegas->cwnd > tp->snd_cwnd || vegas->dwnd > tp->snd_cwnd) {
- vegas->cwnd = tp->snd_cwnd;
- vegas->dwnd = 0;
- } else
- vegas->cwnd = tp->snd_cwnd - vegas->dwnd;
-
- }
-
- if (!tcp_is_cwnd_limited(sk, in_flight))
- return;
-
- if (vegas->cwnd <= tp->snd_ssthresh)
- inc = 1;
- else if (tp->snd_cwnd_cnt < tp->snd_cwnd)
- tp->snd_cwnd_cnt++;
-
- if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
- inc = 1;
- tp->snd_cwnd_cnt = 0;
- }
-
- if (inc && tp->snd_cwnd < tp->snd_cwnd_clamp)
- vegas->cwnd++;
-
- /* The key players are v_beg_snd_una and v_beg_snd_nxt.
- *
- * These are so named because they represent the approximate values
- * of snd_una and snd_nxt at the beginning of the current RTT. More
- * precisely, they represent the amount of data sent during the RTT.
- * At the end of the RTT, when we receive an ACK for v_beg_snd_nxt,
- * we will calculate that (v_beg_snd_nxt - v_beg_snd_una) outstanding
- * bytes of data have been ACKed during the course of the RTT, giving
- * an "actual" rate of:
- *
- * (v_beg_snd_nxt - v_beg_snd_una) / (rtt duration)
- *
- * Unfortunately, v_beg_snd_una is not exactly equal to snd_una,
- * because delayed ACKs can cover more than one segment, so they
- * don't line up nicely with the boundaries of RTTs.
- *
- * Another unfortunate fact of life is that delayed ACKs delay the
- * advance of the left edge of our send window, so that the number
- * of bytes we send in an RTT is often less than our cwnd will allow.
- * So we keep track of our cwnd separately, in v_beg_snd_cwnd.
- */
-
- if (after(ack, vegas->beg_snd_nxt)) {
- /* Do the Vegas once-per-RTT cwnd adjustment. */
- u32 old_wnd, old_snd_cwnd;
-
- /* Here old_wnd is essentially the window of data that was
- * sent during the previous RTT, and has all
- * been acknowledged in the course of the RTT that ended
- * with the ACK we just received. Likewise, old_snd_cwnd
- * is the cwnd during the previous RTT.
- */
- if (!tp->mss_cache)
- return;
-
- old_wnd = (vegas->beg_snd_nxt - vegas->beg_snd_una) /
- tp->mss_cache;
- old_snd_cwnd = vegas->beg_snd_cwnd;
-
- /* Save the extent of the current window so we can use this
- * at the end of the next RTT.
- */
- vegas->beg_snd_una = vegas->beg_snd_nxt;
- vegas->beg_snd_nxt = tp->snd_nxt;
- vegas->beg_snd_cwnd = tp->snd_cwnd;
-
- /* We do the Vegas calculations only if we got enough RTT
- * samples that we can be reasonably sure that we got
- * at least one RTT sample that wasn't from a delayed ACK.
- * If we only had 2 samples total,
- * then that means we're getting only 1 ACK per RTT, which
- * means they're almost certainly delayed ACKs.
- * If we have 3 samples, we should be OK.
- */
-
- if (vegas->cntRTT > 2) {
- u32 rtt, target_cwnd, diff;
- u32 brtt, dwnd;
-
- /* We have enough RTT samples, so, using the Vegas
- * algorithm, we determine if we should increase or
- * decrease cwnd, and by how much.
- */
-
- /* Pluck out the RTT we are using for the Vegas
- * calculations. This is the min RTT seen during the
- * last RTT. Taking the min filters out the effects
- * of delayed ACKs, at the cost of noticing congestion
- * a bit later.
- */
- rtt = vegas->minRTT;
-
- /* Calculate the cwnd we should have, if we weren't
- * going too fast.
- *
- * This is:
- * (actual rate in segments) * baseRTT
- * We keep it as a fixed point number with
- * V_PARAM_SHIFT bits to the right of the binary point.
- */
- if (!rtt)
- return;
-
- brtt = vegas->baseRTT;
- target_cwnd = ((old_wnd * brtt)
- << V_PARAM_SHIFT) / rtt;
-
- /* Calculate the difference between the window we had,
- * and the window we would like to have. This quantity
- * is the "Diff" from the Arizona Vegas papers.
- *
- * Again, this is a fixed point number with
- * V_PARAM_SHIFT bits to the right of the binary
- * point.
- */
-
- diff = (old_wnd << V_PARAM_SHIFT) - target_cwnd;
-
- dwnd = vegas->dwnd;
-
- if (diff < (TCP_COMPOUND_GAMMA << V_PARAM_SHIFT)) {
- u64 v;
- u32 x;
-
- /*
- * The TCP Compound paper describes the choice
- * of "k" determines the agressiveness,
- * ie. slope of the response function.
- *
- * For same value as HSTCP would be 0.8
- * but for computaional reasons, both the
- * original authors and this implementation
- * use 0.75.
- */
- v = old_wnd;
- x = qroot(v * v * v) >> TCP_COMPOUND_ALPHA;
- if (x > 1)
- dwnd = x - 1;
- else
- dwnd = 0;
-
- dwnd += vegas->dwnd;
-
- } else if ((dwnd << V_PARAM_SHIFT) <
- (diff * TCP_COMPOUND_BETA))
- dwnd = 0;
- else
- dwnd =
- ((dwnd << V_PARAM_SHIFT) -
- (diff *
- TCP_COMPOUND_BETA)) >> V_PARAM_SHIFT;
-
- vegas->dwnd = dwnd;
-
- }
-
- /* Wipe the slate clean for the next RTT. */
- vegas->cntRTT = 0;
- vegas->minRTT = 0x7fffffff;
- }
-
- tp->snd_cwnd = vegas->cwnd + vegas->dwnd;
-}
-
-/* Extract info for Tcp socket info provided via netlink. */
-static void tcp_compound_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
-{
- const struct compound *ca = inet_csk_ca(sk);
- if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
- struct tcpvegas_info *info;
-
- info = RTA_DATA(__RTA_PUT(skb, INET_DIAG_VEGASINFO,
- sizeof(*info)));
-
- info->tcpv_enabled = ca->doing_vegas_now;
- info->tcpv_rttcnt = ca->cntRTT;
- info->tcpv_rtt = ca->baseRTT;
- info->tcpv_minrtt = ca->minRTT;
- rtattr_failure:;
- }
-}
-
-static struct tcp_congestion_ops tcp_compound = {
- .init = tcp_compound_init,
- .ssthresh = tcp_reno_ssthresh,
- .cong_avoid = tcp_compound_cong_avoid,
- .rtt_sample = tcp_compound_rtt_calc,
- .set_state = tcp_compound_state,
- .cwnd_event = tcp_compound_cwnd_event,
- .get_info = tcp_compound_get_info,
-
- .owner = THIS_MODULE,
- .name = "compound",
-};
-
-static int __init tcp_compound_register(void)
-{
- BUG_ON(sizeof(struct compound) > ICSK_CA_PRIV_SIZE);
- tcp_register_congestion_control(&tcp_compound);
- return 0;
-}
-
-static void __exit tcp_compound_unregister(void)
-{
- tcp_unregister_congestion_control(&tcp_compound);
-}
-
-module_init(tcp_compound_register);
-module_exit(tcp_compound_unregister);
-
-MODULE_AUTHOR("Angelo P. Castellani, Stephen Hemminger");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("TCP Compound");
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 5765f9d..7ff2e42 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -189,7 +189,7 @@ void tcp_slow_start(struct tcp_sock *tp)
return;
/* We MAY increase by 2 if discovered delayed ack */
- if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) {
+ if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache) {
if (tp->snd_cwnd < tp->snd_cwnd_clamp)
tp->snd_cwnd++;
}
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index aaa1538..fa3e1aa 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -139,14 +139,19 @@ static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt,
tp->snd_cwnd++;
}
} else {
- /* Update AIMD parameters */
+ /* Update AIMD parameters.
+ *
+ * We want to guarantee that:
+ * hstcp_aimd_vals[ca->ai-1].cwnd <
+ * snd_cwnd <=
+ * hstcp_aimd_vals[ca->ai].cwnd
+ */
if (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd) {
while (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd &&
ca->ai < HSTCP_AIMD_MAX - 1)
ca->ai++;
- } else if (tp->snd_cwnd < hstcp_aimd_vals[ca->ai].cwnd) {
- while (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd &&
- ca->ai > 0)
+ } else if (ca->ai && tp->snd_cwnd <= hstcp_aimd_vals[ca->ai-1].cwnd) {
+ while (ca->ai && tp->snd_cwnd <= hstcp_aimd_vals[ca->ai-1].cwnd)
ca->ai--;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 738dad9..159fa3f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -89,7 +89,7 @@ int sysctl_tcp_frto;
int sysctl_tcp_nometrics_save;
int sysctl_tcp_moderate_rcvbuf = 1;
-int sysctl_tcp_abc = 1;
+int sysctl_tcp_abc;
#define FLAG_DATA 0x01 /* Incoming frame contained data. */
#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
@@ -2505,8 +2505,13 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
if (before(ack, prior_snd_una))
goto old_ack;
- if (sysctl_tcp_abc && icsk->icsk_ca_state < TCP_CA_CWR)
- tp->bytes_acked += ack - prior_snd_una;
+ if (sysctl_tcp_abc) {
+ if (icsk->icsk_ca_state < TCP_CA_CWR)
+ tp->bytes_acked += ack - prior_snd_una;
+ else if (icsk->icsk_ca_state == TCP_CA_Loss)
+ /* we assume just one segment left network */
+ tp->bytes_acked += min(ack - prior_snd_una, tp->mss_cache);
+ }
if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
/* Window is constant, pure forward advance.
@@ -3541,7 +3546,8 @@ void tcp_cwnd_application_limited(struct sock *sk)
if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
/* Limited by application or receiver window. */
- u32 win_used = max(tp->snd_cwnd_used, 2U);
+ u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
+ u32 win_used = max(tp->snd_cwnd_used, init_win);
if (win_used < tp->snd_cwnd) {
tp->snd_ssthresh = tcp_current_ssthresh(sk);
tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5a886e6..4b04c3e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -438,7 +438,6 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
It can f.e. if SYNs crossed.
*/
if (!sock_owned_by_user(sk)) {
- TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
sk->sk_err = err;
sk->sk_error_report(sk);
@@ -496,6 +495,24 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
}
}
+int tcp_v4_gso_send_check(struct sk_buff *skb)
+{
+ struct iphdr *iph;
+ struct tcphdr *th;
+
+ if (!pskb_may_pull(skb, sizeof(*th)))
+ return -EINVAL;
+
+ iph = skb->nh.iph;
+ th = skb->h.th;
+
+ th->check = 0;
+ th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0);
+ skb->csum = offsetof(struct tcphdr, check);
+ skb->ip_summed = CHECKSUM_HW;
+ return 0;
+}
+
/*
* This routine will send an RST to the other tcp.
*
@@ -856,7 +873,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
drop_and_free:
reqsk_free(req);
drop:
- TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
return 0;
}
@@ -1622,10 +1638,9 @@ static int tcp_seq_open(struct inode *inode, struct file *file)
if (unlikely(afinfo == NULL))
return -EINVAL;
- s = kmalloc(sizeof(*s), GFP_KERNEL);
+ s = kzalloc(sizeof(*s), GFP_KERNEL);
if (!s)
return -ENOMEM;
- memset(s, 0, sizeof(*s));
s->family = afinfo->family;
s->seq_ops.start = tcp_seq_start;
s->seq_ops.next = tcp_seq_next;
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index 1f977b6..48f28d6 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -3,13 +3,8 @@
*
* TCP Low Priority is a distributed algorithm whose goal is to utilize only
* the excess network bandwidth as compared to the ``fair share`` of
- * bandwidth as targeted by TCP. Available from:
- * http://www.ece.rice.edu/~akuzma/Doc/akuzma/TCP-LP.pdf
+ * bandwidth as targeted by TCP.
*
- * Original Author:
- * Aleksandar Kuzmanovic <akuzma@northwestern.edu>
- *
- * See http://www-ece.rice.edu/networks/TCP-LP/ for their implementation.
* As of 2.6.13, Linux supports pluggable congestion control algorithms.
* Due to the limitation of the API, we take the following changes from
* the original TCP-LP implementation:
@@ -24,11 +19,20 @@
* o OWD is handled in relative format, where local time stamp will in
* tcp_time_stamp format.
*
- * Port from 2.4.19 to 2.6.16 as module by:
- * Wong Hoi Sing Edison <hswong3i@gmail.com>
- * Hung Hing Lun <hlhung3i@gmail.com>
+ * Original Author:
+ * Aleksandar Kuzmanovic <akuzma@northwestern.edu>
+ * Available from:
+ * http://www.ece.rice.edu/~akuzma/Doc/akuzma/TCP-LP.pdf
+ * Original implementation for 2.4.19:
+ * http://www-ece.rice.edu/networks/TCP-LP/
*
- * Version: $Id: tcp_lp.c,v 1.22 2006-05-02 18:18:19 hswong3i Exp $
+ * 2.6.x module Authors:
+ * Wong Hoi Sing, Edison <hswong3i@gmail.com>
+ * Hung Hing Lun, Mike <hlhung3i@gmail.com>
+ * SourceForge project page:
+ * http://tcp-lp-mod.sourceforge.net/
+ *
+ * Version: $Id: tcp_lp.c,v 1.24 2006/09/05 20:22:53 hswong3i Exp $
*/
#include <linux/config.h>
@@ -153,16 +157,19 @@ static u32 tcp_lp_remote_hz_estimator(struct sock *sk)
if (m < 0)
m = -m;
- if (rhz != 0) {
+ if (rhz > 0) {
m -= rhz >> 6; /* m is now error in remote HZ est */
rhz += m; /* 63/64 old + 1/64 new */
} else
rhz = m << 6;
+ out:
/* record time for successful remote HZ calc */
- lp->flag |= LP_VALID_RHZ;
+ if (rhz > 0)
+ lp->flag |= LP_VALID_RHZ;
+ else
+ lp->flag &= ~LP_VALID_RHZ;
- out:
/* record reference time stamp */
lp->remote_ref_time = tp->rx_opt.rcv_tsval;
lp->local_ref_time = tp->rx_opt.rcv_tsecr;
@@ -333,6 +340,6 @@ static void __exit tcp_lp_unregister(void)
module_init(tcp_lp_register);
module_exit(tcp_lp_unregister);
-MODULE_AUTHOR("Wong Hoi Sing Edison, Hung Hing Lun");
+MODULE_AUTHOR("Wong Hoi Sing Edison, Hung Hing Lun Mike");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("TCP Low Priority");
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 0ccb7cb..624e2b2 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -589,8 +589,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
/* RFC793: "second check the RST bit" and
* "fourth, check the SYN bit"
*/
- if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN))
+ if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) {
+ TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
goto embryonic_reset;
+ }
/* ACK sequence verified above, just make sure ACK is
* set. If ACK not set, just silently drop the packet.
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5c08ea2..b4f3ffe 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -201,6 +201,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
* See RFC1323 for an explanation of the limit to 14
*/
space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max);
+ space = min_t(u32, space, *window_clamp);
while (space > 65535 && (*rcv_wscale) < 14) {
space >>= 1;
(*rcv_wscale)++;
@@ -466,7 +467,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
if (skb->len != tcp_header_size)
tcp_event_data_sent(tp, skb, sk);
- TCP_INC_STATS(TCP_MIB_OUTSEGS);
+ if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
+ TCP_INC_STATS(TCP_MIB_OUTSEGS);
err = icsk->icsk_af_ops->queue_xmit(skb, 0);
if (likely(err <= 0))
@@ -2157,10 +2159,9 @@ int tcp_connect(struct sock *sk)
skb_shinfo(buff)->gso_size = 0;
skb_shinfo(buff)->gso_type = 0;
buff->csum = 0;
+ tp->snd_nxt = tp->write_seq;
TCP_SKB_CB(buff)->seq = tp->write_seq++;
TCP_SKB_CB(buff)->end_seq = tp->write_seq;
- tp->snd_nxt = tp->write_seq;
- tp->pushed_seq = tp->write_seq;
/* Send it off. */
TCP_SKB_CB(buff)->when = tcp_time_stamp;
@@ -2170,6 +2171,12 @@ int tcp_connect(struct sock *sk)
sk_charge_skb(sk, buff);
tp->packets_out += tcp_skb_pcount(buff);
tcp_transmit_skb(sk, buff, 1, GFP_KERNEL);
+
+ /* We change tp->snd_nxt after the tcp_transmit_skb() call
+ * in order to make this packet get counted in tcpOutSegs.
+ */
+ tp->snd_nxt = tp->write_seq;
+ tp->pushed_seq = tp->write_seq;
TCP_INC_STATS(TCP_MIB_ACTIVEOPENS);
/* Timer for repeating the SYN until an answer. */
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index d7d517a..dab37d2 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -114,7 +114,7 @@ static int tcpprobe_open(struct inode * inode, struct file * file)
static ssize_t tcpprobe_read(struct file *file, char __user *buf,
size_t len, loff_t *ppos)
{
- int error = 0, cnt;
+ int error = 0, cnt = 0;
unsigned char *tbuf;
if (!buf || len < 0)
@@ -130,11 +130,12 @@ static ssize_t tcpprobe_read(struct file *file, char __user *buf,
error = wait_event_interruptible(tcpw.wait,
__kfifo_len(tcpw.fifo) != 0);
if (error)
- return error;
+ goto out_free;
cnt = kfifo_get(tcpw.fifo, tbuf, len);
error = copy_to_user(buf, tbuf, cnt);
+out_free:
vfree(tbuf);
return error ? error : cnt;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 9bfcdda..f136cec 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1468,11 +1468,10 @@ static int udp_seq_open(struct inode *inode, struct file *file)
struct udp_seq_afinfo *afinfo = PDE(inode)->data;
struct seq_file *seq;
int rc = -ENOMEM;
- struct udp_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+ struct udp_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
if (!s)
goto out;
- memset(s, 0, sizeof(*s));
s->family = afinfo->family;
s->seq_ops.start = udp_seq_start;
s->seq_ops.next = udp_seq_next;
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index f8d880b..13cafbe 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -92,7 +92,6 @@ static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
skb->mac.raw = memmove(skb->data - skb->mac_len,
skb->mac.raw, skb->mac_len);
skb->nh.raw = skb->data;
- memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
err = 0;
out:
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 193363e..d16f863 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -134,7 +134,7 @@ static int xfrm4_output_finish(struct sk_buff *skb)
}
#endif
- if (!skb_shinfo(skb)->gso_size)
+ if (!skb_is_gso(skb))
return xfrm4_output_finish2(skb);
skb->protocol = htons(ETH_P_IP);