aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp14
-rw-r--r--lib/Target/X86/X86Instr64bit.td32
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp32
-rw-r--r--lib/Target/X86/X86InstrInfo.td92
-rw-r--r--test/CodeGen/X86/rot16.ll73
-rw-r--r--test/CodeGen/X86/rot32.ll73
-rw-r--r--test/CodeGen/X86/rot64.ll73
7 files changed, 364 insertions, 25 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index f546ed4..4b1945e 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2053,13 +2053,15 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS) {
}
}
- // Look for sign/zext/any-extended cases:
+ // Look for sign/zext/any-extended or truncate cases:
if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
|| LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
- || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND) &&
+ || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND
+ || LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
(RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
|| RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
- || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND)) {
+ || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND
+ || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
SDValue LExtOp0 = LHSShiftAmt.getOperand(0);
SDValue RExtOp0 = RHSShiftAmt.getOperand(0);
if (RExtOp0.getOpcode() == ISD::SUB &&
@@ -2068,7 +2070,8 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS) {
// (rotl x, y)
// fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
// (rotr x, (sub 32, y))
- if (ConstantSDNode *SUBC = cast<ConstantSDNode>(RExtOp0.getOperand(0))) {
+ if (ConstantSDNode *SUBC =
+ dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) {
if (SUBC->getAPIntValue() == OpSizeInBits) {
return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, VT, LHSShiftArg,
HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
@@ -2080,7 +2083,8 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS) {
// (rotr x, y)
// fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
// (rotl x, (sub 32, y))
- if (ConstantSDNode *SUBC = cast<ConstantSDNode>(LExtOp0.getOperand(0))) {
+ if (ConstantSDNode *SUBC =
+ dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) {
if (SUBC->getAPIntValue() == OpSizeInBits) {
return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, VT, LHSShiftArg,
HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index 026c359..5085f54 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -1397,6 +1397,22 @@ def : Pat<(store (or (srl (loadi64 addr:$dst), CL:$amt),
(shl GR64:$src2, (sub 64, CL:$amt))), addr:$dst),
(SHRD64mrCL addr:$dst, GR64:$src2)>;
+def : Pat<(or (srl GR64:$src1, (i8 (trunc RCX:$amt))),
+ (shl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),
+ (SHRD64rrCL GR64:$src1, GR64:$src2)>;
+
+def : Pat<(store (or (srl (loadi64 addr:$dst), (i8 (trunc RCX:$amt))),
+ (shl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),
+ addr:$dst),
+ (SHRD64mrCL addr:$dst, GR64:$src2)>;
+
+def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)),
+ (SHRD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>;
+
+def : Pat<(store (shrd (loadi64 addr:$dst), (i8 imm:$amt1),
+ GR64:$src2, (i8 imm:$amt2)), addr:$dst),
+ (SHRD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>;
+
// (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
def : Pat<(or (shl GR64:$src1, CL:$amt),
(srl GR64:$src2, (sub 64, CL:$amt))),
@@ -1406,6 +1422,22 @@ def : Pat<(store (or (shl (loadi64 addr:$dst), CL:$amt),
(srl GR64:$src2, (sub 64, CL:$amt))), addr:$dst),
(SHLD64mrCL addr:$dst, GR64:$src2)>;
+def : Pat<(or (shl GR64:$src1, (i8 (trunc RCX:$amt))),
+ (srl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),
+ (SHLD64rrCL GR64:$src1, GR64:$src2)>;
+
+def : Pat<(store (or (shl (loadi64 addr:$dst), (i8 (trunc RCX:$amt))),
+ (srl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),
+ addr:$dst),
+ (SHLD64mrCL addr:$dst, GR64:$src2)>;
+
+def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)),
+ (SHLD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>;
+
+def : Pat<(store (shld (loadi64 addr:$dst), (i8 imm:$amt1),
+ GR64:$src2, (i8 imm:$amt2)), addr:$dst),
+ (SHLD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>;
+
// X86 specific add which produces a flag.
def : Pat<(addc GR64:$src1, GR64:$src2),
(ADD64rr GR64:$src1, GR64:$src2)>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index e105b0f..b19c8b9 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -1248,26 +1248,14 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
case X86::SHLD64rri8: Size = 64; Opc = X86::SHRD64rri8; break;
}
unsigned Amt = MI->getOperand(3).getImm();
- unsigned A = MI->getOperand(0).getReg();
- unsigned B = MI->getOperand(1).getReg();
- unsigned C = MI->getOperand(2).getReg();
- bool AisDead = MI->getOperand(0).isDead();
- bool BisKill = MI->getOperand(1).isKill();
- bool CisKill = MI->getOperand(2).isKill();
- // If machine instrs are no longer in two-address forms, update
- // destination register as well.
- if (A == B) {
- // Must be two address instruction!
- assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) &&
- "Expecting a two-address instruction!");
- A = C;
- CisKill = false;
+ if (NewMI) {
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MI = MF.CloneMachineInstr(MI);
+ NewMI = false;
}
- MachineFunction &MF = *MI->getParent()->getParent();
- return BuildMI(MF, get(Opc))
- .addReg(A, true, false, false, AisDead)
- .addReg(C, false, false, CisKill)
- .addReg(B, false, false, BisKill).addImm(Size-Amt);
+ MI->setDesc(get(Opc));
+ MI->getOperand(3).setImm(Size-Amt);
+ return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
}
case X86::CMOVB16rr:
case X86::CMOVB32rr:
@@ -1357,7 +1345,11 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
case X86::CMOVNP32rr: Opc = X86::CMOVP32rr; break;
case X86::CMOVNP64rr: Opc = X86::CMOVP64rr; break;
}
-
+ if (NewMI) {
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MI = MF.CloneMachineInstr(MI);
+ NewMI = false;
+ }
MI->setDesc(get(Opc));
// Fallthrough intended.
}
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 7a78e03..16dc366 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -325,6 +325,34 @@ def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
return N->hasOneUse();
}]>;
+// 'shld' and 'shrd' instruction patterns. Note that even though these have
+// the srl and shl in their patterns, the C++ code must still check for them,
+// because predicates are tested before children nodes are explored.
+
+def shrd : PatFrag<(ops node:$src1, node:$amt1, node:$src2, node:$amt2),
+ (or (srl node:$src1, node:$amt1),
+ (shl node:$src2, node:$amt2)), [{
+ assert(N->getOpcode() == ISD::OR);
+ return N->getOperand(0).getOpcode() == ISD::SRL &&
+ N->getOperand(1).getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(N->getOperand(0).getOperand(1)) &&
+ isa<ConstantSDNode>(N->getOperand(1).getOperand(1)) &&
+ N->getOperand(0).getConstantOperandVal(1) ==
+ N->getValueSizeInBits(0) - N->getOperand(1).getConstantOperandVal(1);
+}]>;
+
+def shld : PatFrag<(ops node:$src1, node:$amt1, node:$src2, node:$amt2),
+ (or (shl node:$src1, node:$amt1),
+ (srl node:$src2, node:$amt2)), [{
+ assert(N->getOpcode() == ISD::OR);
+ return N->getOperand(0).getOpcode() == ISD::SHL &&
+ N->getOperand(1).getOpcode() == ISD::SRL &&
+ isa<ConstantSDNode>(N->getOperand(0).getOperand(1)) &&
+ isa<ConstantSDNode>(N->getOperand(1).getOperand(1)) &&
+ N->getOperand(0).getConstantOperandVal(1) ==
+ N->getValueSizeInBits(0) - N->getOperand(1).getConstantOperandVal(1);
+}]>;
+
//===----------------------------------------------------------------------===//
// Instruction list...
//
@@ -2973,6 +3001,22 @@ def : Pat<(store (or (srl (loadi32 addr:$dst), CL:$amt),
(shl GR32:$src2, (sub 32, CL:$amt))), addr:$dst),
(SHRD32mrCL addr:$dst, GR32:$src2)>;
+def : Pat<(or (srl GR32:$src1, (i8 (trunc ECX:$amt))),
+ (shl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))),
+ (SHRD32rrCL GR32:$src1, GR32:$src2)>;
+
+def : Pat<(store (or (srl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))),
+ (shl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))),
+ addr:$dst),
+ (SHRD32mrCL addr:$dst, GR32:$src2)>;
+
+def : Pat<(shrd GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm:$amt2)),
+ (SHRD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>;
+
+def : Pat<(store (shrd (loadi32 addr:$dst), (i8 imm:$amt1),
+ GR32:$src2, (i8 imm:$amt2)), addr:$dst),
+ (SHRD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>;
+
// (or (x << c) | (y >> (32 - c))) ==> (shld32 x, y, c)
def : Pat<(or (shl GR32:$src1, CL:$amt),
(srl GR32:$src2, (sub 32, CL:$amt))),
@@ -2982,6 +3026,22 @@ def : Pat<(store (or (shl (loadi32 addr:$dst), CL:$amt),
(srl GR32:$src2, (sub 32, CL:$amt))), addr:$dst),
(SHLD32mrCL addr:$dst, GR32:$src2)>;
+def : Pat<(or (shl GR32:$src1, (i8 (trunc ECX:$amt))),
+ (srl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))),
+ (SHLD32rrCL GR32:$src1, GR32:$src2)>;
+
+def : Pat<(store (or (shl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))),
+ (srl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))),
+ addr:$dst),
+ (SHLD32mrCL addr:$dst, GR32:$src2)>;
+
+def : Pat<(shld GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm:$amt2)),
+ (SHLD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>;
+
+def : Pat<(store (shld (loadi32 addr:$dst), (i8 imm:$amt1),
+ GR32:$src2, (i8 imm:$amt2)), addr:$dst),
+ (SHLD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>;
+
// (or (x >> c) | (y << (16 - c))) ==> (shrd16 x, y, c)
def : Pat<(or (srl GR16:$src1, CL:$amt),
(shl GR16:$src2, (sub 16, CL:$amt))),
@@ -2991,6 +3051,22 @@ def : Pat<(store (or (srl (loadi16 addr:$dst), CL:$amt),
(shl GR16:$src2, (sub 16, CL:$amt))), addr:$dst),
(SHRD16mrCL addr:$dst, GR16:$src2)>;
+def : Pat<(or (srl GR16:$src1, (i8 (trunc CX:$amt))),
+ (shl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))),
+ (SHRD16rrCL GR16:$src1, GR16:$src2)>;
+
+def : Pat<(store (or (srl (loadi16 addr:$dst), (i8 (trunc CX:$amt))),
+ (shl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))),
+ addr:$dst),
+ (SHRD16mrCL addr:$dst, GR16:$src2)>;
+
+def : Pat<(shrd GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm:$amt2)),
+ (SHRD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>;
+
+def : Pat<(store (shrd (loadi16 addr:$dst), (i8 imm:$amt1),
+ GR16:$src2, (i8 imm:$amt2)), addr:$dst),
+ (SHRD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>;
+
// (or (x << c) | (y >> (16 - c))) ==> (shld16 x, y, c)
def : Pat<(or (shl GR16:$src1, CL:$amt),
(srl GR16:$src2, (sub 16, CL:$amt))),
@@ -3000,6 +3076,22 @@ def : Pat<(store (or (shl (loadi16 addr:$dst), CL:$amt),
(srl GR16:$src2, (sub 16, CL:$amt))), addr:$dst),
(SHLD16mrCL addr:$dst, GR16:$src2)>;
+def : Pat<(or (shl GR16:$src1, (i8 (trunc CX:$amt))),
+ (srl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))),
+ (SHLD16rrCL GR16:$src1, GR16:$src2)>;
+
+def : Pat<(store (or (shl (loadi16 addr:$dst), (i8 (trunc CX:$amt))),
+ (srl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))),
+ addr:$dst),
+ (SHLD16mrCL addr:$dst, GR16:$src2)>;
+
+def : Pat<(shld GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm:$amt2)),
+ (SHLD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>;
+
+def : Pat<(store (shld (loadi16 addr:$dst), (i8 imm:$amt1),
+ GR16:$src2, (i8 imm:$amt2)), addr:$dst),
+ (SHLD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>;
+
//===----------------------------------------------------------------------===//
// Floating Point Stack Support
//===----------------------------------------------------------------------===//
diff --git a/test/CodeGen/X86/rot16.ll b/test/CodeGen/X86/rot16.ll
new file mode 100644
index 0000000..c196ce2
--- /dev/null
+++ b/test/CodeGen/X86/rot16.ll
@@ -0,0 +1,73 @@
+; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: grep rol %t | count 3
+; RUN: grep ror %t | count 1
+; RUN: grep shld %t | count 2
+; RUN: grep shrd %t | count 2
+
+define i16 @foo(i16 %x, i16 %y, i16 %z) nounwind readnone {
+entry:
+ %0 = shl i16 %x, %z
+ %1 = sub i16 16, %z
+ %2 = lshr i16 %x, %1
+ %3 = or i16 %2, %0
+ ret i16 %3
+}
+
+define i16 @bar(i16 %x, i16 %y, i16 %z) nounwind readnone {
+entry:
+ %0 = shl i16 %y, %z
+ %1 = sub i16 16, %z
+ %2 = lshr i16 %x, %1
+ %3 = or i16 %2, %0
+ ret i16 %3
+}
+
+define i16 @un(i16 %x, i16 %y, i16 %z) nounwind readnone {
+entry:
+ %0 = lshr i16 %x, %z
+ %1 = sub i16 16, %z
+ %2 = shl i16 %x, %1
+ %3 = or i16 %2, %0
+ ret i16 %3
+}
+
+define i16 @bu(i16 %x, i16 %y, i16 %z) nounwind readnone {
+entry:
+ %0 = lshr i16 %y, %z
+ %1 = sub i16 16, %z
+ %2 = shl i16 %x, %1
+ %3 = or i16 %2, %0
+ ret i16 %3
+}
+
+define i16 @xfoo(i16 %x, i16 %y, i16 %z) nounwind readnone {
+entry:
+ %0 = lshr i16 %x, 11
+ %1 = shl i16 %x, 5
+ %2 = or i16 %0, %1
+ ret i16 %2
+}
+
+define i16 @xbar(i16 %x, i16 %y, i16 %z) nounwind readnone {
+entry:
+ %0 = shl i16 %y, 5
+ %1 = lshr i16 %x, 11
+ %2 = or i16 %0, %1
+ ret i16 %2
+}
+
+define i16 @xun(i16 %x, i16 %y, i16 %z) nounwind readnone {
+entry:
+ %0 = lshr i16 %x, 5
+ %1 = shl i16 %x, 11
+ %2 = or i16 %0, %1
+ ret i16 %2
+}
+
+define i16 @xbu(i16 %x, i16 %y, i16 %z) nounwind readnone {
+entry:
+ %0 = lshr i16 %y, 5
+ %1 = shl i16 %x, 11
+ %2 = or i16 %0, %1
+ ret i16 %2
+}
diff --git a/test/CodeGen/X86/rot32.ll b/test/CodeGen/X86/rot32.ll
new file mode 100644
index 0000000..7cebcb8
--- /dev/null
+++ b/test/CodeGen/X86/rot32.ll
@@ -0,0 +1,73 @@
+; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: grep rol %t | count 3
+; RUN: grep ror %t | count 1
+; RUN: grep shld %t | count 2
+; RUN: grep shrd %t | count 2
+
+define i32 @foo(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+ %0 = shl i32 %x, %z
+ %1 = sub i32 32, %z
+ %2 = lshr i32 %x, %1
+ %3 = or i32 %2, %0
+ ret i32 %3
+}
+
+define i32 @bar(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+ %0 = shl i32 %y, %z
+ %1 = sub i32 32, %z
+ %2 = lshr i32 %x, %1
+ %3 = or i32 %2, %0
+ ret i32 %3
+}
+
+define i32 @un(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+ %0 = lshr i32 %x, %z
+ %1 = sub i32 32, %z
+ %2 = shl i32 %x, %1
+ %3 = or i32 %2, %0
+ ret i32 %3
+}
+
+define i32 @bu(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+ %0 = lshr i32 %y, %z
+ %1 = sub i32 32, %z
+ %2 = shl i32 %x, %1
+ %3 = or i32 %2, %0
+ ret i32 %3
+}
+
+define i32 @xfoo(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+ %0 = lshr i32 %x, 25
+ %1 = shl i32 %x, 7
+ %2 = or i32 %0, %1
+ ret i32 %2
+}
+
+define i32 @xbar(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+ %0 = shl i32 %y, 7
+ %1 = lshr i32 %x, 25
+ %2 = or i32 %0, %1
+ ret i32 %2
+}
+
+define i32 @xun(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+ %0 = lshr i32 %x, 7
+ %1 = shl i32 %x, 25
+ %2 = or i32 %0, %1
+ ret i32 %2
+}
+
+define i32 @xbu(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+ %0 = lshr i32 %y, 7
+ %1 = shl i32 %x, 25
+ %2 = or i32 %0, %1
+ ret i32 %2
+}
diff --git a/test/CodeGen/X86/rot64.ll b/test/CodeGen/X86/rot64.ll
new file mode 100644
index 0000000..2408359
--- /dev/null
+++ b/test/CodeGen/X86/rot64.ll
@@ -0,0 +1,73 @@
+; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: grep rol %t | count 3
+; RUN: grep ror %t | count 1
+; RUN: grep shld %t | count 2
+; RUN: grep shrd %t | count 2
+
+define i64 @foo(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+ %0 = shl i64 %x, %z
+ %1 = sub i64 64, %z
+ %2 = lshr i64 %x, %1
+ %3 = or i64 %2, %0
+ ret i64 %3
+}
+
+define i64 @bar(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+ %0 = shl i64 %y, %z
+ %1 = sub i64 64, %z
+ %2 = lshr i64 %x, %1
+ %3 = or i64 %2, %0
+ ret i64 %3
+}
+
+define i64 @un(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+ %0 = lshr i64 %x, %z
+ %1 = sub i64 64, %z
+ %2 = shl i64 %x, %1
+ %3 = or i64 %2, %0
+ ret i64 %3
+}
+
+define i64 @bu(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+ %0 = lshr i64 %y, %z
+ %1 = sub i64 64, %z
+ %2 = shl i64 %x, %1
+ %3 = or i64 %2, %0
+ ret i64 %3
+}
+
+define i64 @xfoo(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+ %0 = lshr i64 %x, 57
+ %1 = shl i64 %x, 7
+ %2 = or i64 %0, %1
+ ret i64 %2
+}
+
+define i64 @xbar(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+ %0 = shl i64 %y, 7
+ %1 = lshr i64 %x, 57
+ %2 = or i64 %0, %1
+ ret i64 %2
+}
+
+define i64 @xun(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+ %0 = lshr i64 %x, 7
+ %1 = shl i64 %x, 57
+ %2 = or i64 %0, %1
+ ret i64 %2
+}
+
+define i64 @xbu(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+ %0 = lshr i64 %y, 7
+ %1 = shl i64 %x, 57
+ %2 = or i64 %0, %1
+ ret i64 %2
+}