diff options
author | Jiri Kosina <jkosina@suse.cz> | 2011-05-18 17:06:31 +0200 |
---|---|---|
committer | Jiri Kosina <jkosina@suse.cz> | 2011-05-18 17:06:49 +0200 |
commit | 6b7b8e488bbdedeccabdd001a78ffcbe43bb8a3a (patch) | |
tree | f2f77cc31b4548745778fca6a51b09e1d8a49804 /net/netfilter | |
parent | b50f315cbb865079a16a12fd9ae6083f98fd592c (diff) | |
parent | c1d10d18c542278b7fbc413c289d3cb6219da6b3 (diff) | |
download | kernel_samsung_smdk4412-6b7b8e488bbdedeccabdd001a78ffcbe43bb8a3a.zip kernel_samsung_smdk4412-6b7b8e488bbdedeccabdd001a78ffcbe43bb8a3a.tar.gz kernel_samsung_smdk4412-6b7b8e488bbdedeccabdd001a78ffcbe43bb8a3a.tar.bz2 |
Merge branch 'master' into upstream.
This is sync with Linus' tree to receive KEY_IMAGES definition
that went in through input tree.
Diffstat (limited to 'net/netfilter')
34 files changed, 499 insertions, 359 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index c3f988a..32bff6d 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -652,7 +652,6 @@ comment "Xtables matches" config NETFILTER_XT_MATCH_ADDRTYPE tristate '"addrtype" address type match support' depends on NETFILTER_ADVANCED - depends on (IPV6 || IPV6=n) ---help--- This option allows you to match what routing thinks of an address, eg. UNICAST, LOCAL, BROADCAST, ... diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index bca9699..a113ff0 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -338,8 +338,7 @@ bitmap_ip_head(struct ip_set *set, struct sk_buff *skb) NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)); if (map->netmask != 32) NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, map->netmask); - NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, - htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)); NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(sizeof(*map) + map->memsize)); if (with_timeout(map->timeout)) diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 5e79017..a274300 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -343,6 +343,10 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb, ipset_adtfn adtfn = set->variant->adt[adt]; struct ipmac data; + /* MAC can be src only */ + if (!(flags & IPSET_DIM_TWO_SRC)) + return 0; + data.id = ntohl(ip4addr(skb, flags & IPSET_DIM_ONE_SRC)); if (data.id < map->first_ip || data.id > map->last_ip) return -IPSET_ERR_BITMAP_RANGE; @@ -434,8 +438,7 @@ bitmap_ipmac_head(struct ip_set *set, struct sk_buff *skb) goto nla_put_failure; NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, htonl(map->first_ip)); NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)); - NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, - htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)); NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(sizeof(*map) + (map->last_ip - map->first_ip + 1) * map->dsize)); diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 165f09b..6b38eb8 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -320,8 +320,7 @@ bitmap_port_head(struct ip_set *set, struct sk_buff *skb) goto nla_put_failure; NLA_PUT_NET16(skb, IPSET_ATTR_PORT, htons(map->first_port)); NLA_PUT_NET16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port)); - NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, - htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)); NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(sizeof(*map) + map->memsize)); if (with_timeout(map->timeout)) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 618a615..72d1ac6 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -26,6 +26,7 @@ static LIST_HEAD(ip_set_type_list); /* all registered set types */ static DEFINE_MUTEX(ip_set_type_mutex); /* protects ip_set_type_list */ +static DEFINE_RWLOCK(ip_set_ref_lock); /* protects the set refs */ static struct ip_set **ip_set_list; /* all individual sets */ static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */ @@ -94,16 +95,28 @@ static int find_set_type_get(const char *name, u8 family, u8 revision, struct ip_set_type **found) { + struct ip_set_type *type; + int err; + rcu_read_lock(); *found = find_set_type(name, family, revision); if (*found) { - int err = !try_module_get((*found)->me); - rcu_read_unlock(); - return err ? -EFAULT : 0; + err = !try_module_get((*found)->me) ? -EFAULT : 0; + goto unlock; } + /* Make sure the type is loaded but we don't support the revision */ + list_for_each_entry_rcu(type, &ip_set_type_list, list) + if (STREQ(type->name, name)) { + err = -IPSET_ERR_FIND_TYPE; + goto unlock; + } rcu_read_unlock(); return try_to_load_type(name); + +unlock: + rcu_read_unlock(); + return err; } /* Find a given set type by name and family. @@ -116,7 +129,7 @@ find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max) struct ip_set_type *type; bool found = false; - *min = *max = 0; + *min = 255; *max = 0; rcu_read_lock(); list_for_each_entry_rcu(type, &ip_set_type_list, list) if (STREQ(type->name, name) && @@ -124,7 +137,7 @@ find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max) found = true; if (type->revision < *min) *min = type->revision; - else if (type->revision > *max) + if (type->revision > *max) *max = type->revision; } rcu_read_unlock(); @@ -289,13 +302,18 @@ EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6); static inline void __ip_set_get(ip_set_id_t index) { - atomic_inc(&ip_set_list[index]->ref); + write_lock_bh(&ip_set_ref_lock); + ip_set_list[index]->ref++; + write_unlock_bh(&ip_set_ref_lock); } static inline void __ip_set_put(ip_set_id_t index) { - atomic_dec(&ip_set_list[index]->ref); + write_lock_bh(&ip_set_ref_lock); + BUG_ON(ip_set_list[index]->ref == 0); + ip_set_list[index]->ref--; + write_unlock_bh(&ip_set_ref_lock); } /* @@ -312,7 +330,7 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb, struct ip_set *set = ip_set_list[index]; int ret = 0; - BUG_ON(set == NULL || atomic_read(&set->ref) == 0); + BUG_ON(set == NULL); pr_debug("set %s, index %u\n", set->name, index); if (dim < set->type->dimension || @@ -344,7 +362,7 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb, struct ip_set *set = ip_set_list[index]; int ret; - BUG_ON(set == NULL || atomic_read(&set->ref) == 0); + BUG_ON(set == NULL); pr_debug("set %s, index %u\n", set->name, index); if (dim < set->type->dimension || @@ -366,7 +384,7 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb, struct ip_set *set = ip_set_list[index]; int ret = 0; - BUG_ON(set == NULL || atomic_read(&set->ref) == 0); + BUG_ON(set == NULL); pr_debug("set %s, index %u\n", set->name, index); if (dim < set->type->dimension || @@ -385,7 +403,6 @@ EXPORT_SYMBOL_GPL(ip_set_del); * Find set by name, reference it once. The reference makes sure the * thing pointed to, does not go away under our feet. * - * The nfnl mutex must already be activated. */ ip_set_id_t ip_set_get_byname(const char *name, struct ip_set **set) @@ -411,15 +428,12 @@ EXPORT_SYMBOL_GPL(ip_set_get_byname); * reference count by 1. The caller shall not assume the index * to be valid, after calling this function. * - * The nfnl mutex must already be activated. */ void ip_set_put_byindex(ip_set_id_t index) { - if (ip_set_list[index] != NULL) { - BUG_ON(atomic_read(&ip_set_list[index]->ref) == 0); + if (ip_set_list[index] != NULL) __ip_set_put(index); - } } EXPORT_SYMBOL_GPL(ip_set_put_byindex); @@ -429,7 +443,6 @@ EXPORT_SYMBOL_GPL(ip_set_put_byindex); * can't be destroyed. The set cannot be renamed due to * the referencing either. * - * The nfnl mutex must already be activated. */ const char * ip_set_name_byindex(ip_set_id_t index) @@ -437,7 +450,7 @@ ip_set_name_byindex(ip_set_id_t index) const struct ip_set *set = ip_set_list[index]; BUG_ON(set == NULL); - BUG_ON(atomic_read(&set->ref) == 0); + BUG_ON(set->ref == 0); /* Referenced, so it's safe */ return set->name; @@ -503,10 +516,7 @@ void ip_set_nfnl_put(ip_set_id_t index) { nfnl_lock(); - if (ip_set_list[index] != NULL) { - BUG_ON(atomic_read(&ip_set_list[index]->ref) == 0); - __ip_set_put(index); - } + ip_set_put_byindex(index); nfnl_unlock(); } EXPORT_SYMBOL_GPL(ip_set_nfnl_put); @@ -514,7 +524,7 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_put); /* * Communication protocol with userspace over netlink. * - * We already locked by nfnl_lock. + * The commands are serialized by the nfnl mutex. */ static inline bool @@ -645,7 +655,6 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, return -ENOMEM; rwlock_init(&set->lock); strlcpy(set->name, name, IPSET_MAXNAMELEN); - atomic_set(&set->ref, 0); set->family = family; /* @@ -678,8 +687,8 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, /* * Here, we have a valid, constructed set and we are protected - * by nfnl_lock. Find the first free index in ip_set_list and - * check clashing. + * by the nfnl mutex. Find the first free index in ip_set_list + * and check clashing. */ if ((ret = find_free_id(set->name, &index, &clash)) != 0) { /* If this is the same set and requested, ignore error */ @@ -739,31 +748,51 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, const struct nlattr * const attr[]) { ip_set_id_t i; + int ret = 0; if (unlikely(protocol_failed(attr))) return -IPSET_ERR_PROTOCOL; - /* References are protected by the nfnl mutex */ + /* Commands are serialized and references are + * protected by the ip_set_ref_lock. + * External systems (i.e. xt_set) must call + * ip_set_put|get_nfnl_* functions, that way we + * can safely check references here. + * + * list:set timer can only decrement the reference + * counter, so if it's already zero, we can proceed + * without holding the lock. + */ + read_lock_bh(&ip_set_ref_lock); if (!attr[IPSET_ATTR_SETNAME]) { for (i = 0; i < ip_set_max; i++) { - if (ip_set_list[i] != NULL && - (atomic_read(&ip_set_list[i]->ref))) - return -IPSET_ERR_BUSY; + if (ip_set_list[i] != NULL && ip_set_list[i]->ref) { + ret = IPSET_ERR_BUSY; + goto out; + } } + read_unlock_bh(&ip_set_ref_lock); for (i = 0; i < ip_set_max; i++) { if (ip_set_list[i] != NULL) ip_set_destroy_set(i); } } else { i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); - if (i == IPSET_INVALID_ID) - return -ENOENT; - else if (atomic_read(&ip_set_list[i]->ref)) - return -IPSET_ERR_BUSY; + if (i == IPSET_INVALID_ID) { + ret = -ENOENT; + goto out; + } else if (ip_set_list[i]->ref) { + ret = -IPSET_ERR_BUSY; + goto out; + } + read_unlock_bh(&ip_set_ref_lock); ip_set_destroy_set(i); } return 0; +out: + read_unlock_bh(&ip_set_ref_lock); + return ret; } /* Flush sets */ @@ -822,6 +851,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set; const char *name2; ip_set_id_t i; + int ret = 0; if (unlikely(protocol_failed(attr) || attr[IPSET_ATTR_SETNAME] == NULL || @@ -831,25 +861,33 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); if (set == NULL) return -ENOENT; - if (atomic_read(&set->ref) != 0) - return -IPSET_ERR_REFERENCED; + + read_lock_bh(&ip_set_ref_lock); + if (set->ref != 0) { + ret = -IPSET_ERR_REFERENCED; + goto out; + } name2 = nla_data(attr[IPSET_ATTR_SETNAME2]); for (i = 0; i < ip_set_max; i++) { if (ip_set_list[i] != NULL && - STREQ(ip_set_list[i]->name, name2)) - return -IPSET_ERR_EXIST_SETNAME2; + STREQ(ip_set_list[i]->name, name2)) { + ret = -IPSET_ERR_EXIST_SETNAME2; + goto out; + } } strncpy(set->name, name2, IPSET_MAXNAMELEN); - return 0; +out: + read_unlock_bh(&ip_set_ref_lock); + return ret; } /* Swap two sets so that name/index points to the other. * References and set names are also swapped. * - * We are protected by the nfnl mutex and references are - * manipulated only by holding the mutex. The kernel interfaces + * The commands are serialized by the nfnl mutex and references are + * protected by the ip_set_ref_lock. The kernel interfaces * do not hold the mutex but the pointer settings are atomic * so the ip_set_list always contains valid pointers to the sets. */ @@ -862,7 +900,6 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, struct ip_set *from, *to; ip_set_id_t from_id, to_id; char from_name[IPSET_MAXNAMELEN]; - u32 from_ref; if (unlikely(protocol_failed(attr) || attr[IPSET_ATTR_SETNAME] == NULL || @@ -881,23 +918,21 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, to = ip_set_list[to_id]; /* Features must not change. - * Not an artifical restriction anymore, as we must prevent + * Not an artificial restriction anymore, as we must prevent * possible loops created by swapping in setlist type of sets. */ if (!(from->type->features == to->type->features && from->type->family == to->type->family)) return -IPSET_ERR_TYPE_MISMATCH; - /* No magic here: ref munging protected by the nfnl_lock */ strncpy(from_name, from->name, IPSET_MAXNAMELEN); - from_ref = atomic_read(&from->ref); - strncpy(from->name, to->name, IPSET_MAXNAMELEN); - atomic_set(&from->ref, atomic_read(&to->ref)); strncpy(to->name, from_name, IPSET_MAXNAMELEN); - atomic_set(&to->ref, from_ref); + write_lock_bh(&ip_set_ref_lock); + swap(from->ref, to->ref); ip_set_list[from_id] = to; ip_set_list[to_id] = from; + write_unlock_bh(&ip_set_ref_lock); return 0; } @@ -914,7 +949,7 @@ ip_set_dump_done(struct netlink_callback *cb) { if (cb->args[2]) { pr_debug("release set %s\n", ip_set_list[cb->args[1]]->name); - __ip_set_put((ip_set_id_t) cb->args[1]); + ip_set_put_byindex((ip_set_id_t) cb->args[1]); } return 0; } @@ -987,8 +1022,9 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) if (cb->args[1] >= ip_set_max) goto out; - pr_debug("args[0]: %ld args[1]: %ld\n", cb->args[0], cb->args[1]); max = cb->args[0] == DUMP_ONE ? cb->args[1] + 1 : ip_set_max; +dump_last: + pr_debug("args[0]: %ld args[1]: %ld\n", cb->args[0], cb->args[1]); for (; cb->args[1] < max; cb->args[1]++) { index = (ip_set_id_t) cb->args[1]; set = ip_set_list[index]; @@ -1003,8 +1039,8 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) * so that lists (unions of sets) are dumped last. */ if (cb->args[0] != DUMP_ONE && - !((cb->args[0] == DUMP_ALL) ^ - (set->type->features & IPSET_DUMP_LAST))) + ((cb->args[0] == DUMP_ALL) == + !!(set->type->features & IPSET_DUMP_LAST))) continue; pr_debug("List set: %s\n", set->name); if (!cb->args[2]) { @@ -1048,6 +1084,12 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) goto release_refcount; } } + /* If we dump all sets, continue with dumping last ones */ + if (cb->args[0] == DUMP_ALL) { + cb->args[0] = DUMP_LAST; + cb->args[1] = 0; + goto dump_last; + } goto out; nla_put_failure: @@ -1056,13 +1098,8 @@ release_refcount: /* If there was an error or set is done, release set */ if (ret || !cb->args[2]) { pr_debug("release set %s\n", ip_set_list[index]->name); - __ip_set_put(index); + ip_set_put_byindex(index); } - - /* If we dump all sets, continue with dumping last ones */ - if (cb->args[0] == DUMP_ALL && cb->args[1] >= max && !cb->args[2]) - cb->args[0] = DUMP_LAST; - out: if (nlh) { nlmsg_end(skb, nlh); diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index adbe787..b921414 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -150,6 +150,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_ipport4_elem data = { }; u32 ip, ip_to, p, port, port_to; u32 timeout = h->timeout; + bool with_ports = false; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || @@ -172,21 +173,15 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_PROTO]) { data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + with_ports = ip_set_proto_with_ports(data.proto); if (data.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else return -IPSET_ERR_MISSING_PROTO; - switch (data.proto) { - case IPPROTO_UDP: - case IPPROTO_TCP: - case IPPROTO_ICMP: - break; - default: + if (!(with_ports || data.proto == IPPROTO_ICMP)) data.port = 0; - break; - } if (tb[IPSET_ATTR_TIMEOUT]) { if (!with_timeout(h->timeout)) @@ -195,7 +190,6 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], } if (adt == IPSET_TEST || - !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] || tb[IPSET_ATTR_PORT_TO])) { ret = adtfn(set, &data, timeout); @@ -219,13 +213,12 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], } else ip_to = ip; - port = ntohs(data.port); - if (tb[IPSET_ATTR_PORT_TO]) { + port_to = port = ntohs(data.port); + if (with_ports && tb[IPSET_ATTR_PORT_TO]) { port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); if (port > port_to) swap(port, port_to); - } else - port_to = port; + } for (; !before(ip_to, ip); ip++) for (p = port; p <= port_to; p++) { @@ -361,6 +354,7 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_ipport6_elem data = { }; u32 port, port_to; u32 timeout = h->timeout; + bool with_ports = false; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || @@ -385,21 +379,15 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_PROTO]) { data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + with_ports = ip_set_proto_with_ports(data.proto); if (data.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else return -IPSET_ERR_MISSING_PROTO; - switch (data.proto) { - case IPPROTO_UDP: - case IPPROTO_TCP: - case IPPROTO_ICMPV6: - break; - default: + if (!(with_ports || data.proto == IPPROTO_ICMPV6)) data.port = 0; - break; - } if (tb[IPSET_ATTR_TIMEOUT]) { if (!with_timeout(h->timeout)) @@ -407,9 +395,7 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - if (adt == IPSET_TEST || - !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || - !tb[IPSET_ATTR_PORT_TO]) { + if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { ret = adtfn(set, &data, timeout); return ip_set_eexist(ret, flags) ? 0 : ret; } diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 22e23ab..4642872 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -154,6 +154,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_ipportip4_elem data = { }; u32 ip, ip_to, p, port, port_to; u32 timeout = h->timeout; + bool with_ports = false; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || @@ -180,21 +181,15 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_PROTO]) { data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + with_ports = ip_set_proto_with_ports(data.proto); if (data.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else return -IPSET_ERR_MISSING_PROTO; - switch (data.proto) { - case IPPROTO_UDP: - case IPPROTO_TCP: - case IPPROTO_ICMP: - break; - default: + if (!(with_ports || data.proto == IPPROTO_ICMP)) data.port = 0; - break; - } if (tb[IPSET_ATTR_TIMEOUT]) { if (!with_timeout(h->timeout)) @@ -203,7 +198,6 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], } if (adt == IPSET_TEST || - !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] || tb[IPSET_ATTR_PORT_TO])) { ret = adtfn(set, &data, timeout); @@ -227,13 +221,12 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], } else ip_to = ip; - port = ntohs(data.port); - if (tb[IPSET_ATTR_PORT_TO]) { + port_to = port = ntohs(data.port); + if (with_ports && tb[IPSET_ATTR_PORT_TO]) { port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); if (port > port_to) swap(port, port_to); - } else - port_to = port; + } for (; !before(ip_to, ip); ip++) for (p = port; p <= port_to; p++) { @@ -375,6 +368,7 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_ipportip6_elem data = { }; u32 port, port_to; u32 timeout = h->timeout; + bool with_ports = false; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || @@ -403,21 +397,15 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_PROTO]) { data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + with_ports = ip_set_proto_with_ports(data.proto); if (data.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else return -IPSET_ERR_MISSING_PROTO; - switch (data.proto) { - case IPPROTO_UDP: - case IPPROTO_TCP: - case IPPROTO_ICMPV6: - break; - default: + if (!(with_ports || data.proto == IPPROTO_ICMPV6)) data.port = 0; - break; - } if (tb[IPSET_ATTR_TIMEOUT]) { if (!with_timeout(h->timeout)) @@ -425,9 +413,7 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - if (adt == IPSET_TEST || - !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || - !tb[IPSET_ATTR_PORT_TO]) { + if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { ret = adtfn(set, &data, timeout); return ip_set_eexist(ret, flags) ? 0 : ret; } diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 6033e8b..2cb84a5 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -174,6 +174,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_ipportnet4_elem data = { .cidr = HOST_MASK }; u32 ip, ip_to, p, port, port_to; u32 timeout = h->timeout; + bool with_ports = false; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || @@ -208,21 +209,15 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_PROTO]) { data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + with_ports = ip_set_proto_with_ports(data.proto); if (data.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else return -IPSET_ERR_MISSING_PROTO; - switch (data.proto) { - case IPPROTO_UDP: - case IPPROTO_TCP: - case IPPROTO_ICMP: - break; - default: + if (!(with_ports || data.proto == IPPROTO_ICMP)) data.port = 0; - break; - } if (tb[IPSET_ATTR_TIMEOUT]) { if (!with_timeout(h->timeout)) @@ -231,7 +226,6 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], } if (adt == IPSET_TEST || - !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] || tb[IPSET_ATTR_PORT_TO])) { ret = adtfn(set, &data, timeout); @@ -255,13 +249,12 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], } else ip_to = ip; - port = ntohs(data.port); - if (tb[IPSET_ATTR_PORT_TO]) { + port_to = port = ntohs(data.port); + if (with_ports && tb[IPSET_ATTR_PORT_TO]) { port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); if (port > port_to) swap(port, port_to); - } else - port_to = port; + } for (; !before(ip_to, ip); ip++) for (p = port; p <= port_to; p++) { @@ -429,6 +422,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_ipportnet6_elem data = { .cidr = HOST_MASK }; u32 port, port_to; u32 timeout = h->timeout; + bool with_ports = false; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || @@ -465,21 +459,15 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_PROTO]) { data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + with_ports = ip_set_proto_with_ports(data.proto); if (data.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else return -IPSET_ERR_MISSING_PROTO; - switch (data.proto) { - case IPPROTO_UDP: - case IPPROTO_TCP: - case IPPROTO_ICMPV6: - break; - default: + if (!(with_ports || data.proto == IPPROTO_ICMPV6)) data.port = 0; - break; - } if (tb[IPSET_ATTR_TIMEOUT]) { if (!with_timeout(h->timeout)) @@ -487,9 +475,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - if (adt == IPSET_TEST || - !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || - !tb[IPSET_ATTR_PORT_TO]) { + if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { ret = adtfn(set, &data, timeout); return ip_set_eexist(ret, flags) ? 0 : ret; } diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 34a1656..8598676 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -170,6 +170,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_netport4_elem data = { .cidr = HOST_MASK }; u32 port, port_to; u32 timeout = h->timeout; + bool with_ports = false; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || @@ -198,21 +199,15 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_PROTO]) { data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + with_ports = ip_set_proto_with_ports(data.proto); if (data.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else return -IPSET_ERR_MISSING_PROTO; - switch (data.proto) { - case IPPROTO_UDP: - case IPPROTO_TCP: - case IPPROTO_ICMP: - break; - default: + if (!(with_ports || data.proto == IPPROTO_ICMP)) data.port = 0; - break; - } if (tb[IPSET_ATTR_TIMEOUT]) { if (!with_timeout(h->timeout)) @@ -220,9 +215,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - if (adt == IPSET_TEST || - !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || - !tb[IPSET_ATTR_PORT_TO]) { + if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { ret = adtfn(set, &data, timeout); return ip_set_eexist(ret, flags) ? 0 : ret; } @@ -390,6 +383,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_netport6_elem data = { .cidr = HOST_MASK }; u32 port, port_to; u32 timeout = h->timeout; + bool with_ports = false; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || @@ -418,21 +412,15 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_PROTO]) { data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + with_ports = ip_set_proto_with_ports(data.proto); if (data.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else return -IPSET_ERR_MISSING_PROTO; - switch (data.proto) { - case IPPROTO_UDP: - case IPPROTO_TCP: - case IPPROTO_ICMPV6: - break; - default: + if (!(with_ports || data.proto == IPPROTO_ICMPV6)) data.port = 0; - break; - } if (tb[IPSET_ATTR_TIMEOUT]) { if (!with_timeout(h->timeout)) @@ -440,9 +428,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - if (adt == IPSET_TEST || - !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || - !tb[IPSET_ATTR_PORT_TO]) { + if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { ret = adtfn(set, &data, timeout); return ip_set_eexist(ret, flags) ? 0 : ret; } diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index a47c329..e9159e9 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -43,14 +43,19 @@ struct list_set { static inline struct set_elem * list_set_elem(const struct list_set *map, u32 id) { - return (struct set_elem *)((char *)map->members + id * map->dsize); + return (struct set_elem *)((void *)map->members + id * map->dsize); +} + +static inline struct set_telem * +list_set_telem(const struct list_set *map, u32 id) +{ + return (struct set_telem *)((void *)map->members + id * map->dsize); } static inline bool list_set_timeout(const struct list_set *map, u32 id) { - const struct set_telem *elem = - (const struct set_telem *) list_set_elem(map, id); + const struct set_telem *elem = list_set_telem(map, id); return ip_set_timeout_test(elem->timeout); } @@ -58,19 +63,11 @@ list_set_timeout(const struct list_set *map, u32 id) static inline bool list_set_expired(const struct list_set *map, u32 id) { - const struct set_telem *elem = - (const struct set_telem *) list_set_elem(map, id); + const struct set_telem *elem = list_set_telem(map, id); return ip_set_timeout_expired(elem->timeout); } -static inline int -list_set_exist(const struct set_telem *elem) -{ - return elem->id != IPSET_INVALID_ID && - !ip_set_timeout_expired(elem->timeout); -} - /* Set list without and with timeout */ static int @@ -146,11 +143,11 @@ list_elem_tadd(struct list_set *map, u32 i, ip_set_id_t id, struct set_telem *e; for (; i < map->size; i++) { - e = (struct set_telem *)list_set_elem(map, i); + e = list_set_telem(map, i); swap(e->id, id); + swap(e->timeout, timeout); if (e->id == IPSET_INVALID_ID) break; - swap(e->timeout, timeout); } } @@ -164,7 +161,7 @@ list_set_add(struct list_set *map, u32 i, ip_set_id_t id, /* Last element replaced: e.g. add new,before,last */ ip_set_put_byindex(e->id); if (with_timeout(map->timeout)) - list_elem_tadd(map, i, id, timeout); + list_elem_tadd(map, i, id, ip_set_timeout_set(timeout)); else list_elem_add(map, i, id); @@ -172,11 +169,11 @@ list_set_add(struct list_set *map, u32 i, ip_set_id_t id, } static int -list_set_del(struct list_set *map, ip_set_id_t id, u32 i) +list_set_del(struct list_set *map, u32 i) { struct set_elem *a = list_set_elem(map, i), *b; - ip_set_put_byindex(id); + ip_set_put_byindex(a->id); for (; i < map->size - 1; i++) { b = list_set_elem(map, i + 1); @@ -308,11 +305,11 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], (before == 0 || (before > 0 && next_id_eq(map, i, refid)))) - ret = list_set_del(map, id, i); + ret = list_set_del(map, i); else if (before < 0 && elem->id == refid && next_id_eq(map, i, id)) - ret = list_set_del(map, id, i + 1); + ret = list_set_del(map, i + 1); } break; default: @@ -369,8 +366,7 @@ list_set_head(struct ip_set *set, struct sk_buff *skb) NLA_PUT_NET32(skb, IPSET_ATTR_SIZE, htonl(map->size)); if (with_timeout(map->timeout)) NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout)); - NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, - htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)); NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(sizeof(*map) + map->size * map->dsize)); ipset_nest_end(skb, nested); @@ -461,16 +457,13 @@ list_set_gc(unsigned long ul_set) struct set_telem *e; u32 i; - /* We run parallel with other readers (test element) - * but adding/deleting new entries is locked out */ - read_lock_bh(&set->lock); - for (i = map->size - 1; i >= 0; i--) { - e = (struct set_telem *) list_set_elem(map, i); - if (e->id != IPSET_INVALID_ID && - list_set_expired(map, i)) - list_set_del(map, e->id, i); + write_lock_bh(&set->lock); + for (i = 0; i < map->size; i++) { + e = list_set_telem(map, i); + if (e->id != IPSET_INVALID_ID && list_set_expired(map, i)) + list_set_del(map, i); } - read_unlock_bh(&set->lock); + write_unlock_bh(&set->lock); map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; add_timer(&map->gc); diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 5c48ffb..059af31 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -43,6 +43,8 @@ EXPORT_SYMBOL(register_ip_vs_app); EXPORT_SYMBOL(unregister_ip_vs_app); EXPORT_SYMBOL(register_ip_vs_app_inc); +static DEFINE_MUTEX(__ip_vs_app_mutex); + /* * Get an ip_vs_app object */ @@ -167,14 +169,13 @@ int register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto, __u16 port) { - struct netns_ipvs *ipvs = net_ipvs(net); int result; - mutex_lock(&ipvs->app_mutex); + mutex_lock(&__ip_vs_app_mutex); result = ip_vs_app_inc_new(net, app, proto, port); - mutex_unlock(&ipvs->app_mutex); + mutex_unlock(&__ip_vs_app_mutex); return result; } @@ -189,11 +190,11 @@ int register_ip_vs_app(struct net *net, struct ip_vs_app *app) /* increase the module use count */ ip_vs_use_count_inc(); - mutex_lock(&ipvs->app_mutex); + mutex_lock(&__ip_vs_app_mutex); list_add(&app->a_list, &ipvs->app_list); - mutex_unlock(&ipvs->app_mutex); + mutex_unlock(&__ip_vs_app_mutex); return 0; } @@ -205,10 +206,9 @@ int register_ip_vs_app(struct net *net, struct ip_vs_app *app) */ void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app) { - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_app *inc, *nxt; - mutex_lock(&ipvs->app_mutex); + mutex_lock(&__ip_vs_app_mutex); list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) { ip_vs_app_inc_release(net, inc); @@ -216,7 +216,7 @@ void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app) list_del(&app->a_list); - mutex_unlock(&ipvs->app_mutex); + mutex_unlock(&__ip_vs_app_mutex); /* decrease the module use count */ ip_vs_use_count_dec(); @@ -501,7 +501,7 @@ static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos) struct net *net = seq_file_net(seq); struct netns_ipvs *ipvs = net_ipvs(net); - mutex_lock(&ipvs->app_mutex); + mutex_lock(&__ip_vs_app_mutex); return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN; } @@ -535,9 +535,7 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void ip_vs_app_seq_stop(struct seq_file *seq, void *v) { - struct netns_ipvs *ipvs = net_ipvs(seq_file_net(seq)); - - mutex_unlock(&ipvs->app_mutex); + mutex_unlock(&__ip_vs_app_mutex); } static int ip_vs_app_seq_show(struct seq_file *seq, void *v) @@ -574,40 +572,30 @@ static const struct file_operations ip_vs_app_fops = { .open = ip_vs_app_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_net, }; #endif -static int __net_init __ip_vs_app_init(struct net *net) +int __net_init __ip_vs_app_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); INIT_LIST_HEAD(&ipvs->app_list); - __mutex_init(&ipvs->app_mutex, "ipvs->app_mutex", &ipvs->app_key); proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops); return 0; } -static void __net_exit __ip_vs_app_cleanup(struct net *net) +void __net_exit __ip_vs_app_cleanup(struct net *net) { proc_net_remove(net, "ip_vs_app"); } -static struct pernet_operations ip_vs_app_ops = { - .init = __ip_vs_app_init, - .exit = __ip_vs_app_cleanup, -}; - int __init ip_vs_app_init(void) { - int rv; - - rv = register_pernet_subsys(&ip_vs_app_ops); - return rv; + return 0; } void ip_vs_app_cleanup(void) { - unregister_pernet_subsys(&ip_vs_app_ops); } diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index f289306..bf28ac2 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -595,7 +595,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) atomic_inc(&dest->inactconns); } else { /* It is a persistent connection/template, so increase - the peristent connection counter */ + the persistent connection counter */ atomic_inc(&dest->persistconns); } @@ -657,7 +657,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp) } } else { /* It is a persistent connection/template, so decrease - the peristent connection counter */ + the persistent connection counter */ atomic_dec(&dest->persistconns); } @@ -1046,7 +1046,7 @@ static const struct file_operations ip_vs_conn_fops = { .open = ip_vs_conn_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_net, }; static const char *ip_vs_origin_name(unsigned flags) @@ -1114,7 +1114,7 @@ static const struct file_operations ip_vs_conn_sync_fops = { .open = ip_vs_conn_sync_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_net, }; #endif @@ -1258,22 +1258,17 @@ int __net_init __ip_vs_conn_init(struct net *net) return 0; } -static void __net_exit __ip_vs_conn_cleanup(struct net *net) +void __net_exit __ip_vs_conn_cleanup(struct net *net) { /* flush all the connection entries first */ ip_vs_conn_flush(net); proc_net_remove(net, "ip_vs_conn"); proc_net_remove(net, "ip_vs_conn_sync"); } -static struct pernet_operations ipvs_conn_ops = { - .init = __ip_vs_conn_init, - .exit = __ip_vs_conn_cleanup, -}; int __init ip_vs_conn_init(void) { int idx; - int retc; /* Compute size and mask */ ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits; @@ -1309,17 +1304,14 @@ int __init ip_vs_conn_init(void) rwlock_init(&__ip_vs_conntbl_lock_array[idx].l); } - retc = register_pernet_subsys(&ipvs_conn_ops); - /* calculate the random value for connection hash */ get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd)); - return retc; + return 0; } void ip_vs_conn_cleanup(void) { - unregister_pernet_subsys(&ipvs_conn_ops); /* Release the empty cache */ kmem_cache_destroy(ip_vs_conn_cachep); vfree(ip_vs_conn_tab); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 07accf6..a74dae6 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1113,6 +1113,9 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) return NF_ACCEPT; net = skb_net(skb); + if (!net_ipvs(net)->enable) + return NF_ACCEPT; + ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { @@ -1343,6 +1346,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) return NF_ACCEPT; /* The packet looks wrong, ignore */ net = skb_net(skb); + pd = ip_vs_proto_data_get(net, cih->protocol); if (!pd) return NF_ACCEPT; @@ -1529,6 +1533,11 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) IP_VS_DBG_ADDR(af, &iph.daddr), hooknum); return NF_ACCEPT; } + /* ipvs enabled in this netns ? */ + net = skb_net(skb); + if (!net_ipvs(net)->enable) + return NF_ACCEPT; + ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); /* Bad... Do not break raw sockets */ @@ -1562,7 +1571,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); } - net = skb_net(skb); /* Protocol supported? */ pd = ip_vs_proto_data_get(net, iph.protocol); if (unlikely(!pd)) @@ -1588,7 +1596,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) } IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet"); - net = skb_net(skb); ipvs = net_ipvs(net); /* Check the server status */ if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { @@ -1743,10 +1750,16 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb, int (*okfn)(struct sk_buff *)) { int r; + struct net *net; if (ip_hdr(skb)->protocol != IPPROTO_ICMP) return NF_ACCEPT; + /* ipvs enabled in this netns ? */ + net = skb_net(skb); + if (!net_ipvs(net)->enable) + return NF_ACCEPT; + return ip_vs_in_icmp(skb, &r, hooknum); } @@ -1757,10 +1770,16 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, int (*okfn)(struct sk_buff *)) { int r; + struct net *net; if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6) return NF_ACCEPT; + /* ipvs enabled in this netns ? */ + net = skb_net(skb); + if (!net_ipvs(net)->enable) + return NF_ACCEPT; + return ip_vs_in_icmp_v6(skb, &r, hooknum); } #endif @@ -1884,19 +1903,70 @@ static int __net_init __ip_vs_init(struct net *net) pr_err("%s(): no memory.\n", __func__); return -ENOMEM; } + /* Hold the beast until a service is registerd */ + ipvs->enable = 0; ipvs->net = net; /* Counters used for creating unique names */ ipvs->gen = atomic_read(&ipvs_netns_cnt); atomic_inc(&ipvs_netns_cnt); net->ipvs = ipvs; + + if (__ip_vs_estimator_init(net) < 0) + goto estimator_fail; + + if (__ip_vs_control_init(net) < 0) + goto control_fail; + + if (__ip_vs_protocol_init(net) < 0) + goto protocol_fail; + + if (__ip_vs_app_init(net) < 0) + goto app_fail; + + if (__ip_vs_conn_init(net) < 0) + goto conn_fail; + + if (__ip_vs_sync_init(net) < 0) + goto sync_fail; + printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n", sizeof(struct netns_ipvs), ipvs->gen); return 0; +/* + * Error handling + */ + +sync_fail: + __ip_vs_conn_cleanup(net); +conn_fail: + __ip_vs_app_cleanup(net); +app_fail: + __ip_vs_protocol_cleanup(net); +protocol_fail: + __ip_vs_control_cleanup(net); +control_fail: + __ip_vs_estimator_cleanup(net); +estimator_fail: + return -ENOMEM; } static void __net_exit __ip_vs_cleanup(struct net *net) { - IP_VS_DBG(10, "ipvs netns %d released\n", net_ipvs(net)->gen); + __ip_vs_service_cleanup(net); /* ip_vs_flush() with locks */ + __ip_vs_conn_cleanup(net); + __ip_vs_app_cleanup(net); + __ip_vs_protocol_cleanup(net); + __ip_vs_control_cleanup(net); + __ip_vs_estimator_cleanup(net); + IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen); +} + +static void __net_exit __ip_vs_dev_cleanup(struct net *net) +{ + EnterFunction(2); + net_ipvs(net)->enable = 0; /* Disable packet reception */ + __ip_vs_sync_cleanup(net); + LeaveFunction(2); } static struct pernet_operations ipvs_core_ops = { @@ -1906,6 +1976,10 @@ static struct pernet_operations ipvs_core_ops = { .size = sizeof(struct netns_ipvs), }; +static struct pernet_operations ipvs_core_dev_ops = { + .exit = __ip_vs_dev_cleanup, +}; + /* * Initialize IP Virtual Server */ @@ -1913,10 +1987,6 @@ static int __init ip_vs_init(void) { int ret; - ret = register_pernet_subsys(&ipvs_core_ops); /* Alloc ip_vs struct */ - if (ret < 0) - return ret; - ip_vs_estimator_init(); ret = ip_vs_control_init(); if (ret < 0) { @@ -1944,15 +2014,28 @@ static int __init ip_vs_init(void) goto cleanup_conn; } + ret = register_pernet_subsys(&ipvs_core_ops); /* Alloc ip_vs struct */ + if (ret < 0) + goto cleanup_sync; + + ret = register_pernet_device(&ipvs_core_dev_ops); + if (ret < 0) + goto cleanup_sub; + ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); if (ret < 0) { pr_err("can't register hooks.\n"); - goto cleanup_sync; + goto cleanup_dev; } pr_info("ipvs loaded.\n"); + return ret; +cleanup_dev: + unregister_pernet_device(&ipvs_core_dev_ops); +cleanup_sub: + unregister_pernet_subsys(&ipvs_core_ops); cleanup_sync: ip_vs_sync_cleanup(); cleanup_conn: @@ -1964,20 +2047,20 @@ cleanup_sync: ip_vs_control_cleanup(); cleanup_estimator: ip_vs_estimator_cleanup(); - unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ return ret; } static void __exit ip_vs_cleanup(void) { nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); + unregister_pernet_device(&ipvs_core_dev_ops); + unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ ip_vs_sync_cleanup(); ip_vs_conn_cleanup(); ip_vs_app_cleanup(); ip_vs_protocol_cleanup(); ip_vs_control_cleanup(); ip_vs_estimator_cleanup(); - unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ pr_info("ipvs unloaded.\n"); } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index b799cea..37890f2 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -69,6 +69,11 @@ int ip_vs_get_debug_level(void) } #endif + +/* Protos */ +static void __ip_vs_del_service(struct ip_vs_service *svc); + + #ifdef CONFIG_IP_VS_IPV6 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */ static int __ip_vs_addr_is_local_v6(struct net *net, @@ -1214,6 +1219,8 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, write_unlock_bh(&__ip_vs_svc_lock); *svc_p = svc; + /* Now there is a service - full throttle */ + ipvs->enable = 1; return 0; @@ -1472,6 +1479,84 @@ static int ip_vs_flush(struct net *net) return 0; } +/* + * Delete service by {netns} in the service table. + * Called by __ip_vs_cleanup() + */ +void __ip_vs_service_cleanup(struct net *net) +{ + EnterFunction(2); + /* Check for "full" addressed entries */ + mutex_lock(&__ip_vs_mutex); + ip_vs_flush(net); + mutex_unlock(&__ip_vs_mutex); + LeaveFunction(2); +} +/* + * Release dst hold by dst_cache + */ +static inline void +__ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev) +{ + spin_lock_bh(&dest->dst_lock); + if (dest->dst_cache && dest->dst_cache->dev == dev) { + IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n", + dev->name, + IP_VS_DBG_ADDR(dest->af, &dest->addr), + ntohs(dest->port), + atomic_read(&dest->refcnt)); + ip_vs_dst_reset(dest); + } + spin_unlock_bh(&dest->dst_lock); + +} +/* + * Netdev event receiver + * Currently only NETDEV_UNREGISTER is handled, i.e. if we hold a reference to + * a device that is "unregister" it must be released. + */ +static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct net_device *dev = ptr; + struct net *net = dev_net(dev); + struct ip_vs_service *svc; + struct ip_vs_dest *dest; + unsigned int idx; + + if (event != NETDEV_UNREGISTER) + return NOTIFY_DONE; + IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name); + EnterFunction(2); + mutex_lock(&__ip_vs_mutex); + for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { + list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { + if (net_eq(svc->net, net)) { + list_for_each_entry(dest, &svc->destinations, + n_list) { + __ip_vs_dev_reset(dest, dev); + } + } + } + + list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { + if (net_eq(svc->net, net)) { + list_for_each_entry(dest, &svc->destinations, + n_list) { + __ip_vs_dev_reset(dest, dev); + } + } + + } + } + + list_for_each_entry(dest, &net_ipvs(net)->dest_trash, n_list) { + __ip_vs_dev_reset(dest, dev); + } + mutex_unlock(&__ip_vs_mutex); + LeaveFunction(2); + return NOTIFY_DONE; +} /* * Zero counters in a service or all services @@ -1981,7 +2066,7 @@ static const struct file_operations ip_vs_info_fops = { .open = ip_vs_info_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; #endif @@ -2024,7 +2109,7 @@ static const struct file_operations ip_vs_stats_fops = { .open = ip_vs_stats_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = single_release_net, }; static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) @@ -2093,7 +2178,7 @@ static const struct file_operations ip_vs_stats_percpu_fops = { .open = ip_vs_stats_percpu_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = single_release_net, }; #endif @@ -3120,7 +3205,7 @@ nla_put_failure: static int ip_vs_genl_dump_daemons(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb_net(skb); + struct net *net = skb_sknet(skb); struct netns_ipvs *ipvs = net_ipvs(net); mutex_lock(&__ip_vs_mutex); @@ -3588,6 +3673,10 @@ void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) { } #endif +static struct notifier_block ip_vs_dst_notifier = { + .notifier_call = ip_vs_dst_event, +}; + int __net_init __ip_vs_control_init(struct net *net) { int idx; @@ -3605,7 +3694,7 @@ int __net_init __ip_vs_control_init(struct net *net) /* procfs stats */ ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); - if (ipvs->tot_stats.cpustats) { + if (!ipvs->tot_stats.cpustats) { pr_err("%s(): alloc_percpu.\n", __func__); return -ENOMEM; } @@ -3626,7 +3715,7 @@ err: return -ENOMEM; } -static void __net_exit __ip_vs_control_cleanup(struct net *net) +void __net_exit __ip_vs_control_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -3639,11 +3728,6 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net) free_percpu(ipvs->tot_stats.cpustats); } -static struct pernet_operations ipvs_control_ops = { - .init = __ip_vs_control_init, - .exit = __ip_vs_control_cleanup, -}; - int __init ip_vs_control_init(void) { int idx; @@ -3657,33 +3741,32 @@ int __init ip_vs_control_init(void) INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); } - ret = register_pernet_subsys(&ipvs_control_ops); - if (ret) { - pr_err("cannot register namespace.\n"); - goto err; - } - smp_wmb(); /* Do we really need it now ? */ ret = nf_register_sockopt(&ip_vs_sockopts); if (ret) { pr_err("cannot register sockopt.\n"); - goto err_net; + goto err_sock; } ret = ip_vs_genl_register(); if (ret) { pr_err("cannot register Generic Netlink interface.\n"); - nf_unregister_sockopt(&ip_vs_sockopts); - goto err_net; + goto err_genl; } + ret = register_netdevice_notifier(&ip_vs_dst_notifier); + if (ret < 0) + goto err_notf; + LeaveFunction(2); return 0; -err_net: - unregister_pernet_subsys(&ipvs_control_ops); -err: +err_notf: + ip_vs_genl_unregister(); +err_genl: + nf_unregister_sockopt(&ip_vs_sockopts); +err_sock: return ret; } @@ -3691,7 +3774,6 @@ err: void ip_vs_control_cleanup(void) { EnterFunction(2); - unregister_pernet_subsys(&ipvs_control_ops); ip_vs_genl_unregister(); nf_unregister_sockopt(&ip_vs_sockopts); LeaveFunction(2); diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 8c8766c..508cce9 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -192,7 +192,7 @@ void ip_vs_read_estimator(struct ip_vs_stats_user *dst, dst->outbps = (e->outbps + 0xF) >> 5; } -static int __net_init __ip_vs_estimator_init(struct net *net) +int __net_init __ip_vs_estimator_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -203,24 +203,16 @@ static int __net_init __ip_vs_estimator_init(struct net *net) return 0; } -static void __net_exit __ip_vs_estimator_exit(struct net *net) +void __net_exit __ip_vs_estimator_cleanup(struct net *net) { del_timer_sync(&net_ipvs(net)->est_timer); } -static struct pernet_operations ip_vs_app_ops = { - .init = __ip_vs_estimator_init, - .exit = __ip_vs_estimator_exit, -}; int __init ip_vs_estimator_init(void) { - int rv; - - rv = register_pernet_subsys(&ip_vs_app_ops); - return rv; + return 0; } void ip_vs_estimator_cleanup(void) { - unregister_pernet_subsys(&ip_vs_app_ops); } diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index f276df9..87e40ea 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -131,7 +131,7 @@ static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) { list_del(&en->list); /* - * We don't kfree dest because it is refered either by its service + * We don't kfree dest because it is referred either by its service * or the trash dest list. */ atomic_dec(&en->dest->refcnt); diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index cb1c991..90f618a 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -152,7 +152,7 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set) write_lock(&set->lock); list_for_each_entry_safe(e, ep, &set->list, list) { /* - * We don't kfree dest because it is refered either + * We don't kfree dest because it is referred either * by its service or by the trash dest list. */ atomic_dec(&e->dest->refcnt); diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 17484a4..eb86028 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -316,7 +316,7 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, /* * per network name-space init */ -static int __net_init __ip_vs_protocol_init(struct net *net) +int __net_init __ip_vs_protocol_init(struct net *net) { #ifdef CONFIG_IP_VS_PROTO_TCP register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp); @@ -336,7 +336,7 @@ static int __net_init __ip_vs_protocol_init(struct net *net) return 0; } -static void __net_exit __ip_vs_protocol_cleanup(struct net *net) +void __net_exit __ip_vs_protocol_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_proto_data *pd; @@ -349,11 +349,6 @@ static void __net_exit __ip_vs_protocol_cleanup(struct net *net) } } -static struct pernet_operations ipvs_proto_ops = { - .init = __ip_vs_protocol_init, - .exit = __ip_vs_protocol_cleanup, -}; - int __init ip_vs_protocol_init(void) { char protocols[64]; @@ -382,7 +377,6 @@ int __init ip_vs_protocol_init(void) REGISTER_PROTOCOL(&ip_vs_protocol_esp); #endif pr_info("Registered protocols (%s)\n", &protocols[2]); - return register_pernet_subsys(&ipvs_proto_ops); return 0; } @@ -393,7 +387,6 @@ void ip_vs_protocol_cleanup(void) struct ip_vs_protocol *pp; int i; - unregister_pernet_subsys(&ipvs_proto_ops); /* unregister all the ipvs protocols */ for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { while ((pp = ip_vs_proto_table[i]) != NULL) diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index b027ccc..d12ed53 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -566,7 +566,7 @@ static struct ipvs_sctp_nextstate * SHUTDOWN sent from the client, waitinf for SHUT ACK from the server */ /* - * We recieved the data chuck, keep the state unchanged. I assume + * We received the data chuck, keep the state unchanged. I assume * that still data chuncks can be received by both the peers in * SHUDOWN state */ @@ -633,7 +633,7 @@ static struct ipvs_sctp_nextstate * SHUTDOWN sent from the server, waitinf for SHUTDOWN ACK from client */ /* - * We recieved the data chuck, keep the state unchanged. I assume + * We received the data chuck, keep the state unchanged. I assume * that still data chuncks can be received by both the peers in * SHUDOWN state */ @@ -701,7 +701,7 @@ static struct ipvs_sctp_nextstate * SHUTDOWN ACK from the client, awaiting for SHUTDOWN COM from server */ /* - * We recieved the data chuck, keep the state unchanged. I assume + * We received the data chuck, keep the state unchanged. I assume * that still data chuncks can be received by both the peers in * SHUDOWN state */ @@ -771,7 +771,7 @@ static struct ipvs_sctp_nextstate * SHUTDOWN ACK from the server, awaiting for SHUTDOWN COM from client */ /* - * We recieved the data chuck, keep the state unchanged. I assume + * We received the data chuck, keep the state unchanged. I assume * that still data chuncks can be received by both the peers in * SHUDOWN state */ diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 3e7961e..e292e5b 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1303,13 +1303,18 @@ static struct socket *make_send_sock(struct net *net) struct socket *sock; int result; - /* First create a socket */ - result = __sock_create(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1); + /* First create a socket move it to right name space later */ + result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); return ERR_PTR(result); } - + /* + * Kernel sockets that are a part of a namespace, should not + * hold a reference to a namespace in order to allow to stop it. + * After sk_change_net should be released using sk_release_kernel. + */ + sk_change_net(sock->sk, net); result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn); if (result < 0) { pr_err("Error setting outbound mcast interface\n"); @@ -1334,8 +1339,8 @@ static struct socket *make_send_sock(struct net *net) return sock; - error: - sock_release(sock); +error: + sk_release_kernel(sock->sk); return ERR_PTR(result); } @@ -1350,12 +1355,17 @@ static struct socket *make_receive_sock(struct net *net) int result; /* First create a socket */ - result = __sock_create(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1); + result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); return ERR_PTR(result); } - + /* + * Kernel sockets that are a part of a namespace, should not + * hold a reference to a namespace in order to allow to stop it. + * After sk_change_net should be released using sk_release_kernel. + */ + sk_change_net(sock->sk, net); /* it is equivalent to the REUSEADDR option in user-space */ sock->sk->sk_reuse = 1; @@ -1377,8 +1387,8 @@ static struct socket *make_receive_sock(struct net *net) return sock; - error: - sock_release(sock); +error: + sk_release_kernel(sock->sk); return ERR_PTR(result); } @@ -1473,7 +1483,7 @@ static int sync_thread_master(void *data) ip_vs_sync_buff_release(sb); /* release the sending multicast socket */ - sock_release(tinfo->sock); + sk_release_kernel(tinfo->sock->sk); kfree(tinfo); return 0; @@ -1513,7 +1523,7 @@ static int sync_thread_backup(void *data) } /* release the sending multicast socket */ - sock_release(tinfo->sock); + sk_release_kernel(tinfo->sock->sk); kfree(tinfo->buf); kfree(tinfo); @@ -1601,7 +1611,7 @@ outtinfo: outbuf: kfree(buf); outsocket: - sock_release(sock); + sk_release_kernel(sock->sk); out: return result; } @@ -1610,6 +1620,7 @@ out: int stop_sync_thread(struct net *net, int state) { struct netns_ipvs *ipvs = net_ipvs(net); + int retc = -EINVAL; IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); @@ -1629,7 +1640,7 @@ int stop_sync_thread(struct net *net, int state) spin_lock_bh(&ipvs->sync_lock); ipvs->sync_state &= ~IP_VS_STATE_MASTER; spin_unlock_bh(&ipvs->sync_lock); - kthread_stop(ipvs->master_thread); + retc = kthread_stop(ipvs->master_thread); ipvs->master_thread = NULL; } else if (state == IP_VS_STATE_BACKUP) { if (!ipvs->backup_thread) @@ -1639,22 +1650,20 @@ int stop_sync_thread(struct net *net, int state) task_pid_nr(ipvs->backup_thread)); ipvs->sync_state &= ~IP_VS_STATE_BACKUP; - kthread_stop(ipvs->backup_thread); + retc = kthread_stop(ipvs->backup_thread); ipvs->backup_thread = NULL; - } else { - return -EINVAL; } /* decrease the module use count */ ip_vs_use_count_dec(); - return 0; + return retc; } /* * Initialize data struct for each netns */ -static int __net_init __ip_vs_sync_init(struct net *net) +int __net_init __ip_vs_sync_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -1668,24 +1677,24 @@ static int __net_init __ip_vs_sync_init(struct net *net) return 0; } -static void __ip_vs_sync_cleanup(struct net *net) +void __ip_vs_sync_cleanup(struct net *net) { - stop_sync_thread(net, IP_VS_STATE_MASTER); - stop_sync_thread(net, IP_VS_STATE_BACKUP); -} + int retc; -static struct pernet_operations ipvs_sync_ops = { - .init = __ip_vs_sync_init, - .exit = __ip_vs_sync_cleanup, -}; + retc = stop_sync_thread(net, IP_VS_STATE_MASTER); + if (retc && retc != -ESRCH) + pr_err("Failed to stop Master Daemon\n"); + retc = stop_sync_thread(net, IP_VS_STATE_BACKUP); + if (retc && retc != -ESRCH) + pr_err("Failed to stop Backup Daemon\n"); +} int __init ip_vs_sync_init(void) { - return register_pernet_subsys(&ipvs_sync_ops); + return 0; } void ip_vs_sync_cleanup(void) { - unregister_pernet_subsys(&ipvs_sync_ops); } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 941286c..2e1c11f 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -453,7 +453,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) REJECT will give spurious warnings here. */ /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */ - /* No external references means noone else could have + /* No external references means no one else could have confirmed us. */ NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); pr_debug("Confirming conntrack %p\n", ct); @@ -901,7 +901,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, ret = l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff, &protonum); if (ret <= 0) { - pr_debug("not prepared to track yet or error occured\n"); + pr_debug("not prepared to track yet or error occurred\n"); NF_CT_STAT_INC_ATOMIC(net, error); NF_CT_STAT_INC_ATOMIC(net, invalid); ret = -ret; diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index 8678823..bcd5ed6 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -631,7 +631,7 @@ static int decode_seqof(bitstr_t *bs, const struct field_t *f, CHECK_BOUND(bs, 2); count = *bs->cur++; count <<= 8; - count = *bs->cur++; + count += *bs->cur++; break; case SEMI: BYTE_ALIGN(bs); diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 533a183..18b2ce5 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -731,10 +731,10 @@ static int callforward_do_filter(const union nf_inet_addr *src, memset(&fl2, 0, sizeof(fl2)); fl2.daddr = dst->ip; - if (!afinfo->route((struct dst_entry **)&rt1, - flowi4_to_flowi(&fl1))) { - if (!afinfo->route((struct dst_entry **)&rt2, - flowi4_to_flowi(&fl2))) { + if (!afinfo->route(&init_net, (struct dst_entry **)&rt1, + flowi4_to_flowi(&fl1), false)) { + if (!afinfo->route(&init_net, (struct dst_entry **)&rt2, + flowi4_to_flowi(&fl2), false)) { if (rt1->rt_gateway == rt2->rt_gateway && rt1->dst.dev == rt2->dst.dev) ret = 1; @@ -755,10 +755,10 @@ static int callforward_do_filter(const union nf_inet_addr *src, memset(&fl2, 0, sizeof(fl2)); ipv6_addr_copy(&fl2.daddr, &dst->in6); - if (!afinfo->route((struct dst_entry **)&rt1, - flowi6_to_flowi(&fl1))) { - if (!afinfo->route((struct dst_entry **)&rt2, - flowi6_to_flowi(&fl2))) { + if (!afinfo->route(&init_net, (struct dst_entry **)&rt1, + flowi6_to_flowi(&fl1), false)) { + if (!afinfo->route(&init_net, (struct dst_entry **)&rt2, + flowi6_to_flowi(&fl2), false)) { if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway, sizeof(rt1->rt6i_gateway)) && rt1->dst.dev == rt2->dst.dev) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 30bf8a1..482e90c 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1334,6 +1334,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, struct nf_conn *ct; int err = -EINVAL; struct nf_conntrack_helper *helper; + struct nf_conn_tstamp *tstamp; ct = nf_conntrack_alloc(net, zone, otuple, rtuple, GFP_ATOMIC); if (IS_ERR(ct)) @@ -1451,6 +1452,9 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, __set_bit(IPS_EXPECTED_BIT, &ct->status); ct->master = master_ct; } + tstamp = nf_conn_tstamp_find(ct); + if (tstamp) + tstamp->start = ktime_to_ns(ktime_get_real()); add_timer(&ct->timeout); nf_conntrack_hash_insert(ct); diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 9ae57c5..2e664a6 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -98,7 +98,7 @@ static const char * const dccp_state_names[] = { #define sIV CT_DCCP_INVALID /* - * DCCP state transistion table + * DCCP state transition table * * The assumption is the same as for TCP tracking: * diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 6f4ee70..6772b11 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -107,9 +107,9 @@ static const u8 sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = { /* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, /* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA}, /* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA}, -/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant have Stale cookie*/ +/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Can't have Stale cookie*/ /* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA},/* 5.2.4 - Big TODO */ -/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in orig dir */ +/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Can't come in orig dir */ /* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL} }, { @@ -121,7 +121,7 @@ static const u8 sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = { /* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA}, /* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA}, /* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA}, -/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in reply dir */ +/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Can't come in reply dir */ /* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA}, /* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL} } diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index bcf47eb..237cc19 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -707,7 +707,7 @@ static const char *ct_sdp_header_search(const char *dptr, const char *limit, } /* Locate a SDP header (optionally a substring within the header value), - * optionally stopping at the first occurence of the term header, parse + * optionally stopping at the first occurrence of the term header, parse * it and return the offset and length of the data we're interested in. */ int ct_sip_get_sdp_header(const struct nf_conn *ct, const char *dptr, diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 5ab22e2..5b466cd 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -134,7 +134,7 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_afinfo *afinfo; const struct nf_queue_handler *qh; - /* QUEUE == DROP if noone is waiting, to be safe. */ + /* QUEUE == DROP if no one is waiting, to be safe. */ rcu_read_lock(); qh = rcu_dereference(queue_handler[pf]); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index a9adf4c..8a025a5 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -455,6 +455,7 @@ void xt_compat_flush_offsets(u_int8_t af) vfree(xt[af].compat_tab); xt[af].compat_tab = NULL; xt[af].number = 0; + xt[af].cur = 0; } } EXPORT_SYMBOL_GPL(xt_compat_flush_offsets); @@ -473,8 +474,7 @@ int xt_compat_calc_jump(u_int8_t af, unsigned int offset) else return mid ? tmp[mid - 1].delta : 0; } - WARN_ON_ONCE(1); - return 0; + return left ? tmp[left - 1].delta : 0; } EXPORT_SYMBOL_GPL(xt_compat_calc_jump); diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c index 0a22919..ae82716 100644 --- a/net/netfilter/xt_DSCP.c +++ b/net/netfilter/xt_DSCP.c @@ -99,7 +99,7 @@ tos_tg6(struct sk_buff *skb, const struct xt_action_param *par) u_int8_t orig, nv; orig = ipv6_get_dsfield(iph); - nv = (orig & info->tos_mask) ^ info->tos_value; + nv = (orig & ~info->tos_mask) ^ info->tos_value; if (orig != nv) { if (!skb_make_writable(skb, sizeof(struct iphdr))) diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 6e6b46c..9e63b43 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -166,7 +166,7 @@ static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb, rcu_read_lock(); ai = nf_get_afinfo(family); if (ai != NULL) - ai->route((struct dst_entry **)&rt, &fl); + ai->route(&init_net, (struct dst_entry **)&rt, &fl, false); rcu_read_unlock(); if (rt != NULL) { diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c index 2220b85..b77d383 100644 --- a/net/netfilter/xt_addrtype.c +++ b/net/netfilter/xt_addrtype.c @@ -32,11 +32,32 @@ MODULE_ALIAS("ipt_addrtype"); MODULE_ALIAS("ip6t_addrtype"); #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) -static u32 xt_addrtype_rt6_to_type(const struct rt6_info *rt) +static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, + const struct in6_addr *addr) { + const struct nf_afinfo *afinfo; + struct flowi6 flow; + struct rt6_info *rt; u32 ret; + int route_err; - if (!rt) + memset(&flow, 0, sizeof(flow)); + ipv6_addr_copy(&flow.daddr, addr); + if (dev) + flow.flowi6_oif = dev->ifindex; + + rcu_read_lock(); + + afinfo = nf_get_afinfo(NFPROTO_IPV6); + if (afinfo != NULL) + route_err = afinfo->route(net, (struct dst_entry **)&rt, + flowi6_to_flowi(&flow), !!dev); + else + route_err = 1; + + rcu_read_unlock(); + + if (route_err) return XT_ADDRTYPE_UNREACHABLE; if (rt->rt6i_flags & RTF_REJECT) @@ -48,6 +69,9 @@ static u32 xt_addrtype_rt6_to_type(const struct rt6_info *rt) ret |= XT_ADDRTYPE_LOCAL; if (rt->rt6i_flags & RTF_ANYCAST) ret |= XT_ADDRTYPE_ANYCAST; + + + dst_release(&rt->dst); return ret; } @@ -65,18 +89,8 @@ static bool match_type6(struct net *net, const struct net_device *dev, return false; if ((XT_ADDRTYPE_LOCAL | XT_ADDRTYPE_ANYCAST | - XT_ADDRTYPE_UNREACHABLE) & mask) { - struct rt6_info *rt; - u32 type; - int ifindex = dev ? dev->ifindex : 0; - - rt = rt6_lookup(net, addr, NULL, ifindex, !!dev); - - type = xt_addrtype_rt6_to_type(rt); - - dst_release(&rt->dst); - return !!(mask & type); - } + XT_ADDRTYPE_UNREACHABLE) & mask) + return !!(mask & match_lookup_rt6(net, dev, addr)); return true; } diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 2c0086a..61805d7 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -195,7 +195,7 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par, return info->match_flags & XT_CONNTRACK_STATE; if ((info->match_flags & XT_CONNTRACK_DIRECTION) && (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) ^ - !!(info->invert_flags & XT_CONNTRACK_DIRECTION)) + !(info->invert_flags & XT_CONNTRACK_DIRECTION)) return false; if (info->match_flags & XT_CONNTRACK_ORIGSRC) @@ -272,11 +272,6 @@ static int conntrack_mt_check(const struct xt_mtchk_param *par) { int ret; - if (strcmp(par->table, "raw") == 0) { - pr_info("state is undetermined at the time of raw table\n"); - return -EINVAL; - } - ret = nf_ct_l3proto_try_module_get(par->family); if (ret < 0) pr_info("cannot load conntrack support for proto=%u\n", diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 061d48c..b3babae 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -81,6 +81,7 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par) if (info->match_set.u.flags[IPSET_DIM_MAX-1] != 0) { pr_warning("Protocol error: set match dimension " "is over the limit!\n"); + ip_set_nfnl_put(info->match_set.index); return -ERANGE; } @@ -135,6 +136,8 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) if (index == IPSET_INVALID_ID) { pr_warning("Cannot find del_set index %u as target\n", info->del_set.index); + if (info->add_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(info->add_set.index); return -ENOENT; } } @@ -142,6 +145,10 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) info->del_set.u.flags[IPSET_DIM_MAX-1] != 0) { pr_warning("Protocol error: SET target dimension " "is over the limit!\n"); + if (info->add_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(info->add_set.index); + if (info->del_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(info->del_set.index); return -ERANGE; } @@ -192,6 +199,7 @@ set_match_checkentry(const struct xt_mtchk_param *par) if (info->match_set.dim > IPSET_DIM_MAX) { pr_warning("Protocol error: set match dimension " "is over the limit!\n"); + ip_set_nfnl_put(info->match_set.index); return -ERANGE; } @@ -219,7 +227,7 @@ set_target(struct sk_buff *skb, const struct xt_action_param *par) if (info->del_set.index != IPSET_INVALID_ID) ip_set_del(info->del_set.index, skb, par->family, - info->add_set.dim, + info->del_set.dim, info->del_set.flags); return XT_CONTINUE; @@ -245,13 +253,19 @@ set_target_checkentry(const struct xt_tgchk_param *par) if (index == IPSET_INVALID_ID) { pr_warning("Cannot find del_set index %u as target\n", info->del_set.index); + if (info->add_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(info->add_set.index); return -ENOENT; } } if (info->add_set.dim > IPSET_DIM_MAX || - info->del_set.flags > IPSET_DIM_MAX) { + info->del_set.dim > IPSET_DIM_MAX) { pr_warning("Protocol error: SET target dimension " "is over the limit!\n"); + if (info->add_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(info->add_set.index); + if (info->del_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(info->del_set.index); return -ERANGE; } |