From 01f2f3f6ef4d076c0c10a8a7b42624416d56b523 Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer Date: Sat, 19 Jun 2010 17:05:36 +0000 Subject: net: optimize Berkeley Packet Filter (BPF) processing Gcc is currenlty not in the ability to optimize the switch statement in sk_run_filter() because of dense case labels. This patch replace the OR'd labels with ordered sequenced case labels. The sk_chk_filter() function is modified to patch/replace the original OPCODES in a ordered but equivalent form. gcc is now in the ability to transform the switch statement in sk_run_filter into a jump table of complexity O(1). Until this patch gcc generates a sequence of conditional branches (O(n) of 567 byte .text segment size (arch x86_64): 7ff: 8b 06 mov (%rsi),%eax 801: 66 83 f8 35 cmp $0x35,%ax 805: 0f 84 d0 02 00 00 je adb 80b: 0f 87 07 01 00 00 ja 918 811: 66 83 f8 15 cmp $0x15,%ax 815: 0f 84 c5 02 00 00 je ae0 81b: 77 73 ja 890 81d: 66 83 f8 04 cmp $0x4,%ax 821: 0f 84 17 02 00 00 je a3e 827: 77 29 ja 852 829: 66 83 f8 01 cmp $0x1,%ax [...] With the modification the compiler translate the switch statement into the following jump table fragment: 7ff: 66 83 3e 2c cmpw $0x2c,(%rsi) 803: 0f 87 1f 02 00 00 ja a28 809: 0f b7 06 movzwl (%rsi),%eax 80c: ff 24 c5 00 00 00 00 jmpq *0x0(,%rax,8) 813: 44 89 e3 mov %r12d,%ebx 816: e9 43 03 00 00 jmpq b5e 81b: 41 89 dc mov %ebx,%r12d 81e: e9 3b 03 00 00 jmpq b5e Furthermore, I reordered the instructions to reduce cache line misses by order the most common instruction to the start. Signed-off-by: Hagen Paul Pfeifer Signed-off-by: David S. Miller --- net/core/filter.c | 212 +++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 161 insertions(+), 51 deletions(-) (limited to 'net/core') diff --git a/net/core/filter.c b/net/core/filter.c index da69fb7..52b051f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -128,87 +128,87 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int fentry = &filter[pc]; switch (fentry->code) { - case BPF_ALU|BPF_ADD|BPF_X: + case BPF_S_ALU_ADD_X: A += X; continue; - case BPF_ALU|BPF_ADD|BPF_K: + case BPF_S_ALU_ADD_K: A += fentry->k; continue; - case BPF_ALU|BPF_SUB|BPF_X: + case BPF_S_ALU_SUB_X: A -= X; continue; - case BPF_ALU|BPF_SUB|BPF_K: + case BPF_S_ALU_SUB_K: A -= fentry->k; continue; - case BPF_ALU|BPF_MUL|BPF_X: + case BPF_S_ALU_MUL_X: A *= X; continue; - case BPF_ALU|BPF_MUL|BPF_K: + case BPF_S_ALU_MUL_K: A *= fentry->k; continue; - case BPF_ALU|BPF_DIV|BPF_X: + case BPF_S_ALU_DIV_X: if (X == 0) return 0; A /= X; continue; - case BPF_ALU|BPF_DIV|BPF_K: + case BPF_S_ALU_DIV_K: A /= fentry->k; continue; - case BPF_ALU|BPF_AND|BPF_X: + case BPF_S_ALU_AND_X: A &= X; continue; - case BPF_ALU|BPF_AND|BPF_K: + case BPF_S_ALU_AND_K: A &= fentry->k; continue; - case BPF_ALU|BPF_OR|BPF_X: + case BPF_S_ALU_OR_X: A |= X; continue; - case BPF_ALU|BPF_OR|BPF_K: + case BPF_S_ALU_OR_K: A |= fentry->k; continue; - case BPF_ALU|BPF_LSH|BPF_X: + case BPF_S_ALU_LSH_X: A <<= X; continue; - case BPF_ALU|BPF_LSH|BPF_K: + case BPF_S_ALU_LSH_K: A <<= fentry->k; continue; - case BPF_ALU|BPF_RSH|BPF_X: + case BPF_S_ALU_RSH_X: A >>= X; continue; - case BPF_ALU|BPF_RSH|BPF_K: + case BPF_S_ALU_RSH_K: A >>= fentry->k; continue; - case BPF_ALU|BPF_NEG: + case BPF_S_ALU_NEG: A = -A; continue; - case BPF_JMP|BPF_JA: + case BPF_S_JMP_JA: pc += fentry->k; continue; - case BPF_JMP|BPF_JGT|BPF_K: + case BPF_S_JMP_JGT_K: pc += (A > fentry->k) ? fentry->jt : fentry->jf; continue; - case BPF_JMP|BPF_JGE|BPF_K: + case BPF_S_JMP_JGE_K: pc += (A >= fentry->k) ? fentry->jt : fentry->jf; continue; - case BPF_JMP|BPF_JEQ|BPF_K: + case BPF_S_JMP_JEQ_K: pc += (A == fentry->k) ? fentry->jt : fentry->jf; continue; - case BPF_JMP|BPF_JSET|BPF_K: + case BPF_S_JMP_JSET_K: pc += (A & fentry->k) ? fentry->jt : fentry->jf; continue; - case BPF_JMP|BPF_JGT|BPF_X: + case BPF_S_JMP_JGT_X: pc += (A > X) ? fentry->jt : fentry->jf; continue; - case BPF_JMP|BPF_JGE|BPF_X: + case BPF_S_JMP_JGE_X: pc += (A >= X) ? fentry->jt : fentry->jf; continue; - case BPF_JMP|BPF_JEQ|BPF_X: + case BPF_S_JMP_JEQ_X: pc += (A == X) ? fentry->jt : fentry->jf; continue; - case BPF_JMP|BPF_JSET|BPF_X: + case BPF_S_JMP_JSET_X: pc += (A & X) ? fentry->jt : fentry->jf; continue; - case BPF_LD|BPF_W|BPF_ABS: + case BPF_S_LD_W_ABS: k = fentry->k; load_w: ptr = load_pointer(skb, k, 4, &tmp); @@ -217,7 +217,7 @@ load_w: continue; } break; - case BPF_LD|BPF_H|BPF_ABS: + case BPF_S_LD_H_ABS: k = fentry->k; load_h: ptr = load_pointer(skb, k, 2, &tmp); @@ -226,7 +226,7 @@ load_h: continue; } break; - case BPF_LD|BPF_B|BPF_ABS: + case BPF_S_LD_B_ABS: k = fentry->k; load_b: ptr = load_pointer(skb, k, 1, &tmp); @@ -235,54 +235,54 @@ load_b: continue; } break; - case BPF_LD|BPF_W|BPF_LEN: + case BPF_S_LD_W_LEN: A = skb->len; continue; - case BPF_LDX|BPF_W|BPF_LEN: + case BPF_S_LDX_W_LEN: X = skb->len; continue; - case BPF_LD|BPF_W|BPF_IND: + case BPF_S_LD_W_IND: k = X + fentry->k; goto load_w; - case BPF_LD|BPF_H|BPF_IND: + case BPF_S_LD_H_IND: k = X + fentry->k; goto load_h; - case BPF_LD|BPF_B|BPF_IND: + case BPF_S_LD_B_IND: k = X + fentry->k; goto load_b; - case BPF_LDX|BPF_B|BPF_MSH: + case BPF_S_LDX_B_MSH: ptr = load_pointer(skb, fentry->k, 1, &tmp); if (ptr != NULL) { X = (*(u8 *)ptr & 0xf) << 2; continue; } return 0; - case BPF_LD|BPF_IMM: + case BPF_S_LD_IMM: A = fentry->k; continue; - case BPF_LDX|BPF_IMM: + case BPF_S_LDX_IMM: X = fentry->k; continue; - case BPF_LD|BPF_MEM: + case BPF_S_LD_MEM: A = mem[fentry->k]; continue; - case BPF_LDX|BPF_MEM: + case BPF_S_LDX_MEM: X = mem[fentry->k]; continue; - case BPF_MISC|BPF_TAX: + case BPF_S_MISC_TAX: X = A; continue; - case BPF_MISC|BPF_TXA: + case BPF_S_MISC_TXA: A = X; continue; - case BPF_RET|BPF_K: + case BPF_S_RET_K: return fentry->k; - case BPF_RET|BPF_A: + case BPF_S_RET_A: return A; - case BPF_ST: + case BPF_S_ST: mem[fentry->k] = A; continue; - case BPF_STX: + case BPF_S_STX: mem[fentry->k] = X; continue; default: @@ -390,53 +390,128 @@ int sk_chk_filter(struct sock_filter *filter, int flen) /* Only allow valid instructions */ switch (ftest->code) { case BPF_ALU|BPF_ADD|BPF_K: + ftest->code = BPF_S_ALU_ADD_K; + break; case BPF_ALU|BPF_ADD|BPF_X: + ftest->code = BPF_S_ALU_ADD_X; + break; case BPF_ALU|BPF_SUB|BPF_K: + ftest->code = BPF_S_ALU_SUB_K; + break; case BPF_ALU|BPF_SUB|BPF_X: + ftest->code = BPF_S_ALU_SUB_X; + break; case BPF_ALU|BPF_MUL|BPF_K: + ftest->code = BPF_S_ALU_MUL_K; + break; case BPF_ALU|BPF_MUL|BPF_X: + ftest->code = BPF_S_ALU_MUL_X; + break; case BPF_ALU|BPF_DIV|BPF_X: + ftest->code = BPF_S_ALU_DIV_X; + break; case BPF_ALU|BPF_AND|BPF_K: + ftest->code = BPF_S_ALU_AND_K; + break; case BPF_ALU|BPF_AND|BPF_X: + ftest->code = BPF_S_ALU_AND_X; + break; case BPF_ALU|BPF_OR|BPF_K: + ftest->code = BPF_S_ALU_OR_K; + break; case BPF_ALU|BPF_OR|BPF_X: + ftest->code = BPF_S_ALU_OR_X; + break; case BPF_ALU|BPF_LSH|BPF_K: + ftest->code = BPF_S_ALU_LSH_K; + break; case BPF_ALU|BPF_LSH|BPF_X: + ftest->code = BPF_S_ALU_LSH_X; + break; case BPF_ALU|BPF_RSH|BPF_K: + ftest->code = BPF_S_ALU_RSH_K; + break; case BPF_ALU|BPF_RSH|BPF_X: + ftest->code = BPF_S_ALU_RSH_X; + break; case BPF_ALU|BPF_NEG: + ftest->code = BPF_S_ALU_NEG; + break; case BPF_LD|BPF_W|BPF_ABS: + ftest->code = BPF_S_LD_W_ABS; + break; case BPF_LD|BPF_H|BPF_ABS: + ftest->code = BPF_S_LD_H_ABS; + break; case BPF_LD|BPF_B|BPF_ABS: + ftest->code = BPF_S_LD_B_ABS; + break; case BPF_LD|BPF_W|BPF_LEN: + ftest->code = BPF_S_LD_W_LEN; + break; case BPF_LD|BPF_W|BPF_IND: + ftest->code = BPF_S_LD_W_IND; + break; case BPF_LD|BPF_H|BPF_IND: + ftest->code = BPF_S_LD_H_IND; + break; case BPF_LD|BPF_B|BPF_IND: + ftest->code = BPF_S_LD_B_IND; + break; case BPF_LD|BPF_IMM: + ftest->code = BPF_S_LD_IMM; + break; case BPF_LDX|BPF_W|BPF_LEN: + ftest->code = BPF_S_LDX_W_LEN; + break; case BPF_LDX|BPF_B|BPF_MSH: + ftest->code = BPF_S_LDX_B_MSH; + break; case BPF_LDX|BPF_IMM: + ftest->code = BPF_S_LDX_IMM; + break; case BPF_MISC|BPF_TAX: + ftest->code = BPF_S_MISC_TAX; + break; case BPF_MISC|BPF_TXA: + ftest->code = BPF_S_MISC_TXA; + break; case BPF_RET|BPF_K: + ftest->code = BPF_S_RET_K; + break; case BPF_RET|BPF_A: + ftest->code = BPF_S_RET_A; break; /* Some instructions need special checks */ - case BPF_ALU|BPF_DIV|BPF_K: /* check for division by zero */ + case BPF_ALU|BPF_DIV|BPF_K: if (ftest->k == 0) return -EINVAL; + ftest->code = BPF_S_ALU_DIV_K; break; + /* check for invalid memory addresses */ case BPF_LD|BPF_MEM: + if (ftest->k >= BPF_MEMWORDS) + return -EINVAL; + ftest->code = BPF_S_LD_MEM; + break; case BPF_LDX|BPF_MEM: + if (ftest->k >= BPF_MEMWORDS) + return -EINVAL; + ftest->code = BPF_S_LDX_MEM; + break; case BPF_ST: + if (ftest->k >= BPF_MEMWORDS) + return -EINVAL; + ftest->code = BPF_S_ST; + break; case BPF_STX: - /* check for invalid memory addresses */ if (ftest->k >= BPF_MEMWORDS) return -EINVAL; + ftest->code = BPF_S_STX; break; case BPF_JMP|BPF_JA: @@ -447,28 +522,63 @@ int sk_chk_filter(struct sock_filter *filter, int flen) */ if (ftest->k >= (unsigned)(flen-pc-1)) return -EINVAL; + ftest->code = BPF_S_JMP_JA; break; case BPF_JMP|BPF_JEQ|BPF_K: + ftest->code = BPF_S_JMP_JEQ_K; + break; case BPF_JMP|BPF_JEQ|BPF_X: + ftest->code = BPF_S_JMP_JEQ_X; + break; case BPF_JMP|BPF_JGE|BPF_K: + ftest->code = BPF_S_JMP_JGE_K; + break; case BPF_JMP|BPF_JGE|BPF_X: + ftest->code = BPF_S_JMP_JGE_X; + break; case BPF_JMP|BPF_JGT|BPF_K: + ftest->code = BPF_S_JMP_JGT_K; + break; case BPF_JMP|BPF_JGT|BPF_X: + ftest->code = BPF_S_JMP_JGT_X; + break; case BPF_JMP|BPF_JSET|BPF_K: + ftest->code = BPF_S_JMP_JSET_K; + break; case BPF_JMP|BPF_JSET|BPF_X: + ftest->code = BPF_S_JMP_JSET_X; + break; + + default: + return -EINVAL; + } + /* for conditionals both must be safe */ + switch (ftest->code) { + case BPF_S_JMP_JEQ_K: + case BPF_S_JMP_JEQ_X: + case BPF_S_JMP_JGE_K: + case BPF_S_JMP_JGE_X: + case BPF_S_JMP_JGT_K: + case BPF_S_JMP_JGT_X: + case BPF_S_JMP_JSET_X: + case BPF_S_JMP_JSET_K: if (pc + ftest->jt + 1 >= flen || pc + ftest->jf + 1 >= flen) return -EINVAL; - break; + } + } + /* last instruction must be a RET code */ + switch (filter[flen - 1].code) { + case BPF_S_RET_K: + case BPF_S_RET_A: + return 0; + break; default: return -EINVAL; } - } - - return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL; } EXPORT_SYMBOL(sk_chk_filter); -- cgit v1.1