diff options
-rw-r--r-- | lib/Target/CellSPU/SPU.h | 29 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUISelDAGToDAG.cpp | 15 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUInstrInfo.td | 2 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUOperands.td | 10 | ||||
-rw-r--r-- | test/CodeGen/CellSPU/and_ops.ll | 270 |
5 files changed, 322 insertions, 4 deletions
diff --git a/lib/Target/CellSPU/SPU.h b/lib/Target/CellSPU/SPU.h index aee87fb..4555fda 100644 --- a/lib/Target/CellSPU/SPU.h +++ b/lib/Target/CellSPU/SPU.h @@ -25,7 +25,7 @@ namespace llvm { FunctionPass *createSPUISelDag(SPUTargetMachine &TM); FunctionPass *createSPUAsmPrinterPass(std::ostream &o, SPUTargetMachine &tm); - /* Utility functions/predicates/etc used all over the place: */ + /*--== Utility functions/predicates/etc used all over the place: --==*/ //! Predicate test for a signed 10-bit value /*! \param Value The input value to be tested @@ -54,6 +54,33 @@ namespace llvm { inline bool isS10Constant(uint64_t Value) { return (Value <= ((1 << 9) - 1)); } + + //! Predicate test for an unsigned 10-bit value + /*! + \param Value The input value to be tested + + This predicate tests for an unsigned 10-bit value, returning the 10-bit value + as a short if true. + */ + inline bool isU10Constant(short Value) { + return (Value == (Value & 0x3ff)); + } + + inline bool isU10Constant(int Value) { + return (Value == (Value & 0x3ff)); + } + + inline bool isU10Constant(uint32_t Value) { + return (Value == (Value & 0x3ff)); + } + + inline bool isU10Constant(int64_t Value) { + return (Value == (Value & 0x3ff)); + } + + inline bool isU10Constant(uint64_t Value) { + return (Value == (Value & 0x3ff)); + } } // Defines symbolic names for the SPU instructions. diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 935064d..ab02a81 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -78,6 +78,21 @@ namespace { && isI16IntS10Immediate(cast<ConstantSDNode>(N))); } + //! ConstantSDNode predicate for i16 unsigned 10-bit immediate values + bool + isI16IntU10Immediate(ConstantSDNode *CN) + { + return isU10Constant((short) CN->getValue()); + } + + //! SDNode predicate for i16 sign-extended, 10-bit immediate values + bool + isI16IntU10Immediate(SDNode *N) + { + return (N->getOpcode() == ISD::Constant + && isI16IntU10Immediate(cast<ConstantSDNode>(N))); + } + //! ConstantSDNode predicate for signed 16-bit values /*! \arg CN The constant SelectionDAG node holding the value diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index faa6a7c..2ec14d2 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -1127,7 +1127,7 @@ def ANDHIv8i16: def ANDHIr16: RI10Form<0b10101000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val), "andhi\t$rT, $rA, $val", IntegerOp, - [(set R16C:$rT, (and R16C:$rA, i16ImmSExt10:$val))]>; + [(set R16C:$rT, (and R16C:$rA, i16ImmU10:$val))]>; def ANDIv4i32: RI10Form<0b00101000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), diff --git a/lib/Target/CellSPU/SPUOperands.td b/lib/Target/CellSPU/SPUOperands.td index 05270dd..70103b8 100644 --- a/lib/Target/CellSPU/SPUOperands.td +++ b/lib/Target/CellSPU/SPUOperands.td @@ -99,12 +99,18 @@ def i32ImmSExt10 : PatLeaf<(imm), [{ return isI32IntS10Immediate(N); }]>; -// i16ImmSExt10 predicate - True if the i32 immediate fits in a 10-bit sign +// i16ImmSExt10 predicate - True if the i16 immediate fits in a 10-bit sign // extended field. Used by RI10Form instructions like 'ldq'. def i16ImmSExt10 : PatLeaf<(imm), [{ return isI16IntS10Immediate(N); }]>; +// i16ImmU10 predicate - True if the i16 immediate fits into a 10-bit unsigned +// value. Used by RI10Form instructions. +def i16ImmU10 : PatLeaf<(imm), [{ + return isI16IntU10Immediate(N); +}]>; + def immSExt16 : PatLeaf<(imm), [{ // immSExt16 predicate - True if the immediate fits in a 16-bit sign extended // field. @@ -206,7 +212,7 @@ def fpimm18 : PatLeaf<(fpimm), [{ }], FPimm_u18>; //===----------------------------------------------------------------------===// -// 64-bit operands: +// 64-bit operands (TODO): //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/CellSPU/and_ops.ll b/test/CodeGen/CellSPU/and_ops.ll new file mode 100644 index 0000000..5c88d7e --- /dev/null +++ b/test/CodeGen/CellSPU/and_ops.ll @@ -0,0 +1,270 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep and %t1.s | count 227 +; RUN: grep andc %t1.s | count 85 +; RUN: grep andi %t1.s | count 36 +; RUN: grep andhi %t1.s | count 31 +; RUN: grep andbi %t1.s | count 1 + +; AND instruction generation: +define <4 x i32> @and_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) { + %A = and <4 x i32> %arg1, %arg2 + ret <4 x i32> %A +} + +define <4 x i32> @and_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) { + %A = and <4 x i32> %arg2, %arg1 + ret <4 x i32> %A +} + +define <8 x i16> @and_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) { + %A = and <8 x i16> %arg1, %arg2 + ret <8 x i16> %A +} + +define <8 x i16> @and_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) { + %A = and <8 x i16> %arg2, %arg1 + ret <8 x i16> %A +} + +define <16 x i8> @and_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) { + %A = and <16 x i8> %arg2, %arg1 + ret <16 x i8> %A +} + +define <16 x i8> @and_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) { + %A = and <16 x i8> %arg1, %arg2 + ret <16 x i8> %A +} + +define i32 @and_i32_1(i32 %arg1, i32 %arg2) { + %A = and i32 %arg2, %arg1 + ret i32 %A +} + +define i32 @and_i32_2(i32 %arg1, i32 %arg2) { + %A = and i32 %arg1, %arg2 + ret i32 %A +} + +define i16 @and_i16_1(i16 %arg1, i16 %arg2) { + %A = and i16 %arg2, %arg1 + ret i16 %A +} + +define i16 @and_i16_2(i16 %arg1, i16 %arg2) { + %A = and i16 %arg1, %arg2 + ret i16 %A +} + +define i8 @and_i8_1(i8 %arg1, i8 %arg2) { + %A = and i8 %arg2, %arg1 + ret i8 %A +} + +define i8 @and_i8_2(i8 %arg1, i8 %arg2) { + %A = and i8 %arg1, %arg2 + ret i8 %A +} + +; ANDC instruction generation: +define <4 x i32> @andc_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) { + %A = xor <4 x i32> %arg2, < i32 -1, i32 -1, i32 -1, i32 -1 > + %B = and <4 x i32> %arg1, %A + ret <4 x i32> %B +} + +define <4 x i32> @andc_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) { + %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 > + %B = and <4 x i32> %arg2, %A + ret <4 x i32> %B +} + +define <4 x i32> @andc_v4i32_3(<4 x i32> %arg1, <4 x i32> %arg2) { + %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 > + %B = and <4 x i32> %A, %arg2 + ret <4 x i32> %B +} + +define <8 x i16> @andc_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) { + %A = xor <8 x i16> %arg2, < i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1, i16 -1, i16 -1 > + %B = and <8 x i16> %arg1, %A + ret <8 x i16> %B +} + +define <8 x i16> @andc_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) { + %A = xor <8 x i16> %arg1, < i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1, i16 -1, i16 -1 > + %B = and <8 x i16> %arg2, %A + ret <8 x i16> %B +} + +define <16 x i8> @andc_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) { + %A = xor <16 x i8> %arg1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = and <16 x i8> %arg2, %A + ret <16 x i8> %B +} + +define <16 x i8> @andc_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) { + %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = and <16 x i8> %arg1, %A + ret <16 x i8> %B +} + +define <16 x i8> @andc_v16i8_3(<16 x i8> %arg1, <16 x i8> %arg2) { + %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = and <16 x i8> %A, %arg1 + ret <16 x i8> %B +} + +define i32 @andc_i32_1(i32 %arg1, i32 %arg2) { + %A = xor i32 %arg2, -1 + %B = and i32 %A, %arg1 + ret i32 %B +} + +define i32 @andc_i32_2(i32 %arg1, i32 %arg2) { + %A = xor i32 %arg1, -1 + %B = and i32 %A, %arg2 + ret i32 %B +} + +define i32 @andc_i32_3(i32 %arg1, i32 %arg2) { + %A = xor i32 %arg2, -1 + %B = and i32 %arg1, %A + ret i32 %B +} + +define i16 @andc_i16_1(i16 %arg1, i16 %arg2) { + %A = xor i16 %arg2, -1 + %B = and i16 %A, %arg1 + ret i16 %B +} + +define i16 @andc_i16_2(i16 %arg1, i16 %arg2) { + %A = xor i16 %arg1, -1 + %B = and i16 %A, %arg2 + ret i16 %B +} + +define i16 @andc_i16_3(i16 %arg1, i16 %arg2) { + %A = xor i16 %arg2, -1 + %B = and i16 %arg1, %A + ret i16 %B +} + +define i8 @andc_i8_1(i8 %arg1, i8 %arg2) { + %A = xor i8 %arg2, -1 + %B = and i8 %A, %arg1 + ret i8 %B +} + +define i8 @andc_i8_2(i8 %arg1, i8 %arg2) { + %A = xor i8 %arg1, -1 + %B = and i8 %A, %arg2 + ret i8 %B +} + +define i8 @andc_i8_3(i8 %arg1, i8 %arg2) { + %A = xor i8 %arg2, -1 + %B = and i8 %arg1, %A + ret i8 %B +} + +; ANDI instruction generation (i32 data type): +define <4 x i32> @andi_v4i32_1(<4 x i32> %in) { + %tmp2 = and <4 x i32> %in, < i32 511, i32 511, i32 511, i32 511 > + ret <4 x i32> %tmp2 +} + +define <4 x i32> @andi_v4i32_2(<4 x i32> %in) { + %tmp2 = and <4 x i32> %in, < i32 510, i32 510, i32 510, i32 510 > + ret <4 x i32> %tmp2 +} + +define <4 x i32> @andi_v4i32_3(<4 x i32> %in) { + %tmp2 = and <4 x i32> %in, < i32 -1, i32 -1, i32 -1, i32 -1 > + ret <4 x i32> %tmp2 +} + +define <4 x i32> @andi_v4i32_4(<4 x i32> %in) { + %tmp2 = and <4 x i32> %in, < i32 -512, i32 -512, i32 -512, i32 -512 > + ret <4 x i32> %tmp2 +} + +define i32 @andi_u32(i32 zeroext %in) zeroext { + %tmp37 = and i32 %in, 37 + ret i32 %tmp37 +} + +define i32 @andi_i32(i32 signext %in) signext { + %tmp38 = and i32 %in, 37 + ret i32 %tmp38 +} + +define i32 @andi_i32_1(i32 %in) { + %tmp37 = and i32 %in, 37 + ret i32 %tmp37 +} + +; ANDHI instruction generation (i16 data type): +define <8 x i16> @andhi_v8i16_1(<8 x i16> %in) { + %tmp2 = and <8 x i16> %in, < i16 511, i16 511, i16 511, i16 511, + i16 511, i16 511, i16 511, i16 511 > + ret <8 x i16> %tmp2 +} + +define <8 x i16> @andhi_v8i16_2(<8 x i16> %in) { + %tmp2 = and <8 x i16> %in, < i16 510, i16 510, i16 510, i16 510, + i16 510, i16 510, i16 510, i16 510 > + ret <8 x i16> %tmp2 +} + +define <8 x i16> @andhi_v8i16_3(<8 x i16> %in) { + %tmp2 = and <8 x i16> %in, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1, i16 -1 > + ret <8 x i16> %tmp2 +} + +define <8 x i16> @andhi_v8i16_4(<8 x i16> %in) { + %tmp2 = and <8 x i16> %in, < i16 -512, i16 -512, i16 -512, i16 -512, + i16 -512, i16 -512, i16 -512, i16 -512 > + ret <8 x i16> %tmp2 +} + +define i16 @andhi_u16(i16 zeroext %in) zeroext { + %tmp37 = and i16 %in, 37 ; <i16> [#uses=1] + ret i16 %tmp37 +} + +define i16 @andhi_i16(i16 signext %in) signext { + %tmp38 = and i16 %in, 37 ; <i16> [#uses=1] + ret i16 %tmp38 +} + +; i8 data type (s/b ANDBI if 8-bit registers were supported): +define <16 x i8> @and_v16i8(<16 x i8> %in) { + ; ANDBI generated for vector types + %tmp2 = and <16 x i8> %in, < i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, + i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, + i8 42, i8 42, i8 42, i8 42 > + ret <16 x i8> %tmp2 +} + +define i8 @and_u8(i8 zeroext %in) zeroext { + ; ANDI generated: + %tmp37 = and i8 %in, 37 ; <i8> [#uses=1] + ret i8 %tmp37 +} + +define i8 @and_i8(i8 signext %in) signext { + ; ANDHI generated + %tmp38 = and i8 %in, 37 ; <i8> [#uses=1] + ret i8 %tmp38 +} |