diff options
Diffstat (limited to 'net/sched')
-rw-r--r-- | net/sched/Kconfig | 20 | ||||
-rw-r--r-- | net/sched/Makefile | 2 | ||||
-rw-r--r-- | net/sched/act_api.c | 59 | ||||
-rw-r--r-- | net/sched/cls_api.c | 12 | ||||
-rw-r--r-- | net/sched/em_canid.c | 240 | ||||
-rw-r--r-- | net/sched/em_ipset.c | 135 | ||||
-rw-r--r-- | net/sched/sch_api.c | 24 | ||||
-rw-r--r-- | net/sched/sch_netem.c | 9 | ||||
-rw-r--r-- | net/sched/sch_teql.c | 47 |
9 files changed, 479 insertions, 69 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig index e7a8976..62fb51f 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -507,6 +507,26 @@ config NET_EMATCH_TEXT To compile this code as a module, choose M here: the module will be called em_text. +config NET_EMATCH_CANID + tristate "CAN Identifier" + depends on NET_EMATCH && CAN + ---help--- + Say Y here if you want to be able to classify CAN frames based + on CAN Identifier. + + To compile this code as a module, choose M here: the + module will be called em_canid. + +config NET_EMATCH_IPSET + tristate "IPset" + depends on NET_EMATCH && IP_SET + ---help--- + Say Y here if you want to be able to classify packets based on + ipset membership. + + To compile this code as a module, choose M here: the + module will be called em_ipset. + config NET_CLS_ACT bool "Actions" ---help--- diff --git a/net/sched/Makefile b/net/sched/Makefile index 5940a19..978cbf0 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -55,3 +55,5 @@ obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o obj-$(CONFIG_NET_EMATCH_U32) += em_u32.o obj-$(CONFIG_NET_EMATCH_META) += em_meta.o obj-$(CONFIG_NET_EMATCH_TEXT) += em_text.o +obj-$(CONFIG_NET_EMATCH_CANID) += em_canid.o +obj-$(CONFIG_NET_EMATCH_IPSET) += em_ipset.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 5cfb160..e3d2c78 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -652,27 +652,27 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq, unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags); - - t = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*t), flags); + if (!nlh) + goto out_nlmsg_trim; + t = nlmsg_data(nlh); t->tca_family = AF_UNSPEC; t->tca__pad1 = 0; t->tca__pad2 = 0; nest = nla_nest_start(skb, TCA_ACT_TAB); if (nest == NULL) - goto nla_put_failure; + goto out_nlmsg_trim; if (tcf_action_dump(skb, a, bind, ref) < 0) - goto nla_put_failure; + goto out_nlmsg_trim; nla_nest_end(skb, nest); nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; -nla_put_failure: -nlmsg_failure: +out_nlmsg_trim: nlmsg_trim(skb, b); return -1; } @@ -799,19 +799,21 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, if (a->ops == NULL) goto err_out; - nlh = NLMSG_PUT(skb, pid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t)); - t = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, pid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0); + if (!nlh) + goto out_module_put; + t = nlmsg_data(nlh); t->tca_family = AF_UNSPEC; t->tca__pad1 = 0; t->tca__pad2 = 0; nest = nla_nest_start(skb, TCA_ACT_TAB); if (nest == NULL) - goto nla_put_failure; + goto out_module_put; err = a->ops->walk(skb, &dcb, RTM_DELACTION, a); if (err < 0) - goto nla_put_failure; + goto out_module_put; if (err == 0) goto noflush_out; @@ -828,8 +830,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, return err; -nla_put_failure: -nlmsg_failure: +out_module_put: module_put(a->ops->owner); err_out: noflush_out: @@ -919,18 +920,20 @@ static int tcf_add_notify(struct net *net, struct tc_action *a, b = skb_tail_pointer(skb); - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags); - t = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*t), flags); + if (!nlh) + goto out_kfree_skb; + t = nlmsg_data(nlh); t->tca_family = AF_UNSPEC; t->tca__pad1 = 0; t->tca__pad2 = 0; nest = nla_nest_start(skb, TCA_ACT_TAB); if (nest == NULL) - goto nla_put_failure; + goto out_kfree_skb; if (tcf_action_dump(skb, a, 0, 0) < 0) - goto nla_put_failure; + goto out_kfree_skb; nla_nest_end(skb, nest); @@ -942,8 +945,7 @@ static int tcf_add_notify(struct net *net, struct tc_action *a, err = 0; return err; -nla_put_failure: -nlmsg_failure: +out_kfree_skb: kfree_skb(skb); return -1; } @@ -1062,7 +1064,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) struct tc_action_ops *a_o; struct tc_action a; int ret = 0; - struct tcamsg *t = (struct tcamsg *) NLMSG_DATA(cb->nlh); + struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh); struct nlattr *kind = find_dump_kind(cb->nlh); if (kind == NULL) { @@ -1080,23 +1082,25 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) if (a_o->walk == NULL) { WARN(1, "tc_dump_action: %s !capable of dumping table\n", a_o->kind); - goto nla_put_failure; + goto out_module_put; } - nlh = NLMSG_PUT(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, - cb->nlh->nlmsg_type, sizeof(*t)); - t = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + cb->nlh->nlmsg_type, sizeof(*t), 0); + if (!nlh) + goto out_module_put; + t = nlmsg_data(nlh); t->tca_family = AF_UNSPEC; t->tca__pad1 = 0; t->tca__pad2 = 0; nest = nla_nest_start(skb, TCA_ACT_TAB); if (nest == NULL) - goto nla_put_failure; + goto out_module_put; ret = a_o->walk(skb, cb, RTM_GETACTION, &a); if (ret < 0) - goto nla_put_failure; + goto out_module_put; if (ret > 0) { nla_nest_end(skb, nest); @@ -1110,8 +1114,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) module_put(a_o->owner); return skb->len; -nla_put_failure: -nlmsg_failure: +out_module_put: module_put(a_o->owner); nlmsg_trim(skb, b); return skb->len; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index f452f69..6dd1131 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -140,7 +140,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) int tp_created = 0; replay: - t = NLMSG_DATA(n); + t = nlmsg_data(n); protocol = TC_H_MIN(t->tcm_info); prio = TC_H_MAJ(t->tcm_info); nprio = prio; @@ -349,8 +349,10 @@ static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, struct nlmsghdr *nlh; unsigned char *b = skb_tail_pointer(skb); - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); - tcm = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*tcm), flags); + if (!nlh) + goto out_nlmsg_trim; + tcm = nlmsg_data(nlh); tcm->tcm_family = AF_UNSPEC; tcm->tcm__pad1 = 0; tcm->tcm__pad2 = 0; @@ -368,7 +370,7 @@ static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; -nlmsg_failure: +out_nlmsg_trim: nla_put_failure: nlmsg_trim(skb, b); return -1; @@ -418,7 +420,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) struct net_device *dev; struct Qdisc *q; struct tcf_proto *tp, **chain; - struct tcmsg *tcm = (struct tcmsg *)NLMSG_DATA(cb->nlh); + struct tcmsg *tcm = nlmsg_data(cb->nlh); unsigned long cl = 0; const struct Qdisc_class_ops *cops; struct tcf_dump_args arg; diff --git a/net/sched/em_canid.c b/net/sched/em_canid.c new file mode 100644 index 0000000..bfd34e4 --- /dev/null +++ b/net/sched/em_canid.c @@ -0,0 +1,240 @@ +/* + * em_canid.c Ematch rule to match CAN frames according to their CAN IDs + * + * This program is free software; you can distribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Idea: Oliver Hartkopp <oliver.hartkopp@volkswagen.de> + * Copyright: (c) 2011 Czech Technical University in Prague + * (c) 2011 Volkswagen Group Research + * Authors: Michal Sojka <sojkam1@fel.cvut.cz> + * Pavel Pisa <pisa@cmp.felk.cvut.cz> + * Rostislav Lisovy <lisovy@gmail.cz> + * Funded by: Volkswagen Group Research + */ + +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/skbuff.h> +#include <net/pkt_cls.h> +#include <linux/can.h> + +#define EM_CAN_RULES_MAX 500 + +struct canid_match { + /* For each SFF CAN ID (11 bit) there is one record in this bitfield */ + DECLARE_BITMAP(match_sff, (1 << CAN_SFF_ID_BITS)); + + int rules_count; + int sff_rules_count; + int eff_rules_count; + + /* + * Raw rules copied from netlink message; Used for sending + * information to userspace (when 'tc filter show' is invoked) + * AND when matching EFF frames + */ + struct can_filter rules_raw[]; +}; + +/** + * em_canid_get_id() - Extracts Can ID out of the sk_buff structure. + */ +static canid_t em_canid_get_id(struct sk_buff *skb) +{ + /* CAN ID is stored within the data field */ + struct can_frame *cf = (struct can_frame *)skb->data; + + return cf->can_id; +} + +static void em_canid_sff_match_add(struct canid_match *cm, u32 can_id, + u32 can_mask) +{ + int i; + + /* + * Limit can_mask and can_id to SFF range to + * protect against write after end of array + */ + can_mask &= CAN_SFF_MASK; + can_id &= can_mask; + + /* Single frame */ + if (can_mask == CAN_SFF_MASK) { + set_bit(can_id, cm->match_sff); + return; + } + + /* All frames */ + if (can_mask == 0) { + bitmap_fill(cm->match_sff, (1 << CAN_SFF_ID_BITS)); + return; + } + + /* + * Individual frame filter. + * Add record (set bit to 1) for each ID that + * conforms particular rule + */ + for (i = 0; i < (1 << CAN_SFF_ID_BITS); i++) { + if ((i & can_mask) == can_id) + set_bit(i, cm->match_sff); + } +} + +static inline struct canid_match *em_canid_priv(struct tcf_ematch *m) +{ + return (struct canid_match *)m->data; +} + +static int em_canid_match(struct sk_buff *skb, struct tcf_ematch *m, + struct tcf_pkt_info *info) +{ + struct canid_match *cm = em_canid_priv(m); + canid_t can_id; + int match = 0; + int i; + const struct can_filter *lp; + + can_id = em_canid_get_id(skb); + + if (can_id & CAN_EFF_FLAG) { + for (i = 0, lp = cm->rules_raw; + i < cm->eff_rules_count; i++, lp++) { + if (!(((lp->can_id ^ can_id) & lp->can_mask))) { + match = 1; + break; + } + } + } else { /* SFF */ + can_id &= CAN_SFF_MASK; + match = (test_bit(can_id, cm->match_sff) ? 1 : 0); + } + + return match; +} + +static int em_canid_change(struct tcf_proto *tp, void *data, int len, + struct tcf_ematch *m) +{ + struct can_filter *conf = data; /* Array with rules */ + struct canid_match *cm; + struct canid_match *cm_old = (struct canid_match *)m->data; + int i; + + if (!len) + return -EINVAL; + + if (len % sizeof(struct can_filter)) + return -EINVAL; + + if (len > sizeof(struct can_filter) * EM_CAN_RULES_MAX) + return -EINVAL; + + cm = kzalloc(sizeof(struct canid_match) + len, GFP_KERNEL); + if (!cm) + return -ENOMEM; + + cm->rules_count = len / sizeof(struct can_filter); + + /* + * We need two for() loops for copying rules into two contiguous + * areas in rules_raw to process all eff rules with a simple loop. + * NB: The configuration interface supports sff and eff rules. + * We do not support filters here that match for the same can_id + * provided in a SFF and EFF frame (e.g. 0x123 / 0x80000123). + * For this (unusual case) two filters have to be specified. The + * SFF/EFF separation is done with the CAN_EFF_FLAG in the can_id. + */ + + /* Fill rules_raw with EFF rules first */ + for (i = 0; i < cm->rules_count; i++) { + if (conf[i].can_id & CAN_EFF_FLAG) { + memcpy(cm->rules_raw + cm->eff_rules_count, + &conf[i], + sizeof(struct can_filter)); + + cm->eff_rules_count++; + } + } + + /* append SFF frame rules */ + for (i = 0; i < cm->rules_count; i++) { + if (!(conf[i].can_id & CAN_EFF_FLAG)) { + memcpy(cm->rules_raw + + cm->eff_rules_count + + cm->sff_rules_count, + &conf[i], sizeof(struct can_filter)); + + cm->sff_rules_count++; + + em_canid_sff_match_add(cm, + conf[i].can_id, conf[i].can_mask); + } + } + + m->datalen = sizeof(struct canid_match) + len; + m->data = (unsigned long)cm; + + if (cm_old != NULL) { + pr_err("canid: Configuring an existing ematch!\n"); + kfree(cm_old); + } + + return 0; +} + +static void em_canid_destroy(struct tcf_proto *tp, struct tcf_ematch *m) +{ + struct canid_match *cm = em_canid_priv(m); + + kfree(cm); +} + +static int em_canid_dump(struct sk_buff *skb, struct tcf_ematch *m) +{ + struct canid_match *cm = em_canid_priv(m); + + /* + * When configuring this ematch 'rules_count' is set not to exceed + * 'rules_raw' array size + */ + if (nla_put_nohdr(skb, sizeof(struct can_filter) * cm->rules_count, + &cm->rules_raw) < 0) + return -EMSGSIZE; + + return 0; +} + +static struct tcf_ematch_ops em_canid_ops = { + .kind = TCF_EM_CANID, + .change = em_canid_change, + .match = em_canid_match, + .destroy = em_canid_destroy, + .dump = em_canid_dump, + .owner = THIS_MODULE, + .link = LIST_HEAD_INIT(em_canid_ops.link) +}; + +static int __init init_em_canid(void) +{ + return tcf_em_register(&em_canid_ops); +} + +static void __exit exit_em_canid(void) +{ + tcf_em_unregister(&em_canid_ops); +} + +MODULE_LICENSE("GPL"); + +module_init(init_em_canid); +module_exit(exit_em_canid); + +MODULE_ALIAS_TCF_EMATCH(TCF_EM_CANID); diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c new file mode 100644 index 0000000..3130320 --- /dev/null +++ b/net/sched/em_ipset.c @@ -0,0 +1,135 @@ +/* + * net/sched/em_ipset.c ipset ematch + * + * Copyright (c) 2012 Florian Westphal <fw@strlen.de> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ + +#include <linux/gfp.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/skbuff.h> +#include <linux/netfilter/xt_set.h> +#include <linux/ipv6.h> +#include <net/ip.h> +#include <net/pkt_cls.h> + +static int em_ipset_change(struct tcf_proto *tp, void *data, int data_len, + struct tcf_ematch *em) +{ + struct xt_set_info *set = data; + ip_set_id_t index; + + if (data_len != sizeof(*set)) + return -EINVAL; + + index = ip_set_nfnl_get_byindex(set->index); + if (index == IPSET_INVALID_ID) + return -ENOENT; + + em->datalen = sizeof(*set); + em->data = (unsigned long)kmemdup(data, em->datalen, GFP_KERNEL); + if (em->data) + return 0; + + ip_set_nfnl_put(index); + return -ENOMEM; +} + +static void em_ipset_destroy(struct tcf_proto *p, struct tcf_ematch *em) +{ + const struct xt_set_info *set = (const void *) em->data; + if (set) { + ip_set_nfnl_put(set->index); + kfree((void *) em->data); + } +} + +static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em, + struct tcf_pkt_info *info) +{ + struct ip_set_adt_opt opt; + struct xt_action_param acpar; + const struct xt_set_info *set = (const void *) em->data; + struct net_device *dev, *indev = NULL; + int ret, network_offset; + + switch (skb->protocol) { + case htons(ETH_P_IP): + acpar.family = NFPROTO_IPV4; + if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) + return 0; + acpar.thoff = ip_hdrlen(skb); + break; + case htons(ETH_P_IPV6): + acpar.family = NFPROTO_IPV6; + if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) + return 0; + /* doesn't call ipv6_find_hdr() because ipset doesn't use thoff, yet */ + acpar.thoff = sizeof(struct ipv6hdr); + break; + default: + return 0; + } + + acpar.hooknum = 0; + + opt.family = acpar.family; + opt.dim = set->dim; + opt.flags = set->flags; + opt.cmdflags = 0; + opt.timeout = ~0u; + + network_offset = skb_network_offset(skb); + skb_pull(skb, network_offset); + + dev = skb->dev; + + rcu_read_lock(); + + if (dev && skb->skb_iif) + indev = dev_get_by_index_rcu(dev_net(dev), skb->skb_iif); + + acpar.in = indev ? indev : dev; + acpar.out = dev; + + ret = ip_set_test(set->index, skb, &acpar, &opt); + + rcu_read_unlock(); + + skb_push(skb, network_offset); + return ret; +} + +static struct tcf_ematch_ops em_ipset_ops = { + .kind = TCF_EM_IPSET, + .change = em_ipset_change, + .destroy = em_ipset_destroy, + .match = em_ipset_match, + .owner = THIS_MODULE, + .link = LIST_HEAD_INIT(em_ipset_ops.link) +}; + +static int __init init_em_ipset(void) +{ + return tcf_em_register(&em_ipset_ops); +} + +static void __exit exit_em_ipset(void) +{ + tcf_em_unregister(&em_ipset_ops); +} + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); +MODULE_DESCRIPTION("TC extended match for IP sets"); + +module_init(init_em_ipset); +module_exit(exit_em_ipset); + +MODULE_ALIAS_TCF_EMATCH(TCF_EM_IPSET); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 085ce53..a08b4ab 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -973,7 +973,7 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { struct net *net = sock_net(skb->sk); - struct tcmsg *tcm = NLMSG_DATA(n); + struct tcmsg *tcm = nlmsg_data(n); struct nlattr *tca[TCA_MAX + 1]; struct net_device *dev; u32 clid = tcm->tcm_parent; @@ -1046,7 +1046,7 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) replay: /* Reinit, just in case something touches this. */ - tcm = NLMSG_DATA(n); + tcm = nlmsg_data(n); clid = tcm->tcm_parent; q = p = NULL; @@ -1193,8 +1193,10 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, struct gnet_dump d; struct qdisc_size_table *stab; - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); - tcm = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*tcm), flags); + if (!nlh) + goto out_nlmsg_trim; + tcm = nlmsg_data(nlh); tcm->tcm_family = AF_UNSPEC; tcm->tcm__pad1 = 0; tcm->tcm__pad2 = 0; @@ -1230,7 +1232,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; -nlmsg_failure: +out_nlmsg_trim: nla_put_failure: nlmsg_trim(skb, b); return -1; @@ -1366,7 +1368,7 @@ done: static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { struct net *net = sock_net(skb->sk); - struct tcmsg *tcm = NLMSG_DATA(n); + struct tcmsg *tcm = nlmsg_data(n); struct nlattr *tca[TCA_MAX + 1]; struct net_device *dev; struct Qdisc *q = NULL; @@ -1498,8 +1500,10 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, struct gnet_dump d; const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); - tcm = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*tcm), flags); + if (!nlh) + goto out_nlmsg_trim; + tcm = nlmsg_data(nlh); tcm->tcm_family = AF_UNSPEC; tcm->tcm__pad1 = 0; tcm->tcm__pad2 = 0; @@ -1525,7 +1529,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; -nlmsg_failure: +out_nlmsg_trim: nla_put_failure: nlmsg_trim(skb, b); return -1; @@ -1616,7 +1620,7 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) { - struct tcmsg *tcm = (struct tcmsg *)NLMSG_DATA(cb->nlh); + struct tcmsg *tcm = nlmsg_data(cb->nlh); struct net *net = sock_net(skb->sk); struct netdev_queue *dev_queue; struct net_device *dev; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index c412ad0..298c0dd 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -380,7 +380,14 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; } - skb_orphan(skb); + /* If a delay is expected, orphan the skb. (orphaning usually takes + * place at TX completion time, so _before_ the link transit delay) + * Ideally, this orphaning should be done after the rate limiting + * module, because this breaks TCP Small Queue, and other mechanisms + * based on socket sk_wmem_alloc. + */ + if (q->latency || q->jitter) + skb_orphan(skb); /* * If we need to duplicate packet, then re-insert at top of the diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index ca0c296..4741671 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -67,7 +67,6 @@ struct teql_master { struct teql_sched_data { struct Qdisc *next; struct teql_master *m; - struct neighbour *ncache; struct sk_buff_head q; }; @@ -134,7 +133,6 @@ teql_reset(struct Qdisc *sch) skb_queue_purge(&dat->q); sch->q.qlen = 0; - teql_neigh_release(xchg(&dat->ncache, NULL)); } static void @@ -166,7 +164,6 @@ teql_destroy(struct Qdisc *sch) } } skb_queue_purge(&dat->q); - teql_neigh_release(xchg(&dat->ncache, NULL)); break; } @@ -225,21 +222,25 @@ static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt) static int __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev, struct netdev_queue *txq, - struct neighbour *mn) + struct dst_entry *dst) { - struct teql_sched_data *q = qdisc_priv(txq->qdisc); - struct neighbour *n = q->ncache; + struct neighbour *n; + int err = 0; - if (mn->tbl == NULL) - return -EINVAL; - if (n && n->tbl == mn->tbl && - memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) { - atomic_inc(&n->refcnt); - } else { - n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev); - if (IS_ERR(n)) - return PTR_ERR(n); + n = dst_neigh_lookup_skb(dst, skb); + if (!n) + return -ENOENT; + + if (dst->dev != dev) { + struct neighbour *mn; + + mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev); + neigh_release(n); + if (IS_ERR(mn)) + return PTR_ERR(mn); + n = mn; } + if (neigh_event_send(n, skb_res) == 0) { int err; char haddr[MAX_ADDR_LEN]; @@ -248,15 +249,13 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr, NULL, skb->len); - if (err < 0) { - neigh_release(n); - return -EINVAL; - } - teql_neigh_release(xchg(&q->ncache, n)); - return 0; + if (err < 0) + err = -EINVAL; + } else { + err = (skb_res == NULL) ? -EAGAIN : 1; } neigh_release(n); - return (skb_res == NULL) ? -EAGAIN : 1; + return err; } static inline int teql_resolve(struct sk_buff *skb, @@ -265,7 +264,6 @@ static inline int teql_resolve(struct sk_buff *skb, struct netdev_queue *txq) { struct dst_entry *dst = skb_dst(skb); - struct neighbour *mn; int res; if (txq->qdisc == &noop_qdisc) @@ -275,8 +273,7 @@ static inline int teql_resolve(struct sk_buff *skb, return 0; rcu_read_lock(); - mn = dst_get_neighbour_noref(dst); - res = mn ? __teql_resolve(skb, skb_res, dev, txq, mn) : 0; + res = __teql_resolve(skb, skb_res, dev, txq, dst); rcu_read_unlock(); return res; |