aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/ARM/ARM.h109
-rw-r--r--lib/Target/ARM/ARM.td119
-rw-r--r--lib/Target/ARM/ARMAddressingModes.h394
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp1029
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp92
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp1277
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.cpp90
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.h75
-rw-r--r--lib/Target/ARM/ARMFrameInfo.h33
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp859
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp1859
-rw-r--r--lib/Target/ARM/ARMISelLowering.h144
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp612
-rw-r--r--lib/Target/ARM/ARMInstrInfo.h133
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td1320
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td596
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td386
-rw-r--r--lib/Target/ARM/ARMJITInfo.cpp131
-rw-r--r--lib/Target/ARM/ARMJITInfo.h50
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp750
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h220
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.cpp1566
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.h108
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td196
-rw-r--r--lib/Target/ARM/ARMRelocations.h28
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp57
-rw-r--r--lib/Target/ARM/ARMSubtarget.h94
-rw-r--r--lib/Target/ARM/ARMTargetAsmInfo.cpp276
-rw-r--r--lib/Target/ARM/ARMTargetAsmInfo.h38
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp160
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h81
-rw-r--r--lib/Target/ARM/Makefile21
-rw-r--r--lib/Target/ARM/README-Thumb.txt223
-rw-r--r--lib/Target/ARM/README.txt530
-rw-r--r--lib/Target/Alpha/Alpha.h48
-rw-r--r--lib/Target/Alpha/Alpha.td66
-rw-r--r--lib/Target/Alpha/AlphaAsmPrinter.cpp297
-rw-r--r--lib/Target/Alpha/AlphaBranchSelector.cpp67
-rw-r--r--lib/Target/Alpha/AlphaCodeEmitter.cpp222
-rw-r--r--lib/Target/Alpha/AlphaISelDAGToDAG.cpp563
-rw-r--r--lib/Target/Alpha/AlphaISelLowering.cpp623
-rw-r--r--lib/Target/Alpha/AlphaISelLowering.h94
-rw-r--r--lib/Target/Alpha/AlphaInstrFormats.td249
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.cpp266
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.h57
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.td1088
-rw-r--r--lib/Target/Alpha/AlphaJITInfo.cpp305
-rw-r--r--lib/Target/Alpha/AlphaJITInfo.h49
-rw-r--r--lib/Target/Alpha/AlphaLLRP.cpp162
-rw-r--r--lib/Target/Alpha/AlphaRegisterInfo.cpp433
-rw-r--r--lib/Target/Alpha/AlphaRegisterInfo.h85
-rw-r--r--lib/Target/Alpha/AlphaRegisterInfo.td171
-rw-r--r--lib/Target/Alpha/AlphaRelocations.h31
-rw-r--r--lib/Target/Alpha/AlphaSchedule.td84
-rw-r--r--lib/Target/Alpha/AlphaSubtarget.cpp25
-rw-r--r--lib/Target/Alpha/AlphaSubtarget.h46
-rw-r--r--lib/Target/Alpha/AlphaTargetAsmInfo.cpp24
-rw-r--r--lib/Target/Alpha/AlphaTargetAsmInfo.h30
-rw-r--r--lib/Target/Alpha/AlphaTargetMachine.cpp97
-rw-r--r--lib/Target/Alpha/AlphaTargetMachine.h73
-rw-r--r--lib/Target/Alpha/Makefile20
-rw-r--r--lib/Target/Alpha/README.txt42
-rw-r--r--lib/Target/CBackend/CBackend.cpp2930
-rw-r--r--lib/Target/CBackend/CTargetMachine.h41
-rw-r--r--lib/Target/CBackend/Makefile14
-rw-r--r--lib/Target/CellSPU/README.txt10
-rw-r--r--lib/Target/IA64/IA64.h54
-rw-r--r--lib/Target/IA64/IA64.td39
-rw-r--r--lib/Target/IA64/IA64AsmPrinter.cpp360
-rw-r--r--lib/Target/IA64/IA64Bundling.cpp118
-rw-r--r--lib/Target/IA64/IA64ISelDAGToDAG.cpp587
-rw-r--r--lib/Target/IA64/IA64ISelLowering.cpp602
-rw-r--r--lib/Target/IA64/IA64ISelLowering.h74
-rw-r--r--lib/Target/IA64/IA64InstrBuilder.h52
-rw-r--r--lib/Target/IA64/IA64InstrFormats.td79
-rw-r--r--lib/Target/IA64/IA64InstrInfo.cpp58
-rw-r--r--lib/Target/IA64/IA64InstrInfo.h49
-rw-r--r--lib/Target/IA64/IA64InstrInfo.td744
-rw-r--r--lib/Target/IA64/IA64MachineFunctionInfo.h34
-rw-r--r--lib/Target/IA64/IA64RegisterInfo.cpp388
-rw-r--r--lib/Target/IA64/IA64RegisterInfo.h81
-rw-r--r--lib/Target/IA64/IA64RegisterInfo.td508
-rw-r--r--lib/Target/IA64/IA64TargetAsmInfo.cpp34
-rw-r--r--lib/Target/IA64/IA64TargetAsmInfo.h31
-rw-r--r--lib/Target/IA64/IA64TargetMachine.cpp91
-rw-r--r--lib/Target/IA64/IA64TargetMachine.h60
-rw-r--r--lib/Target/IA64/Makefile18
-rw-r--r--lib/Target/IA64/README106
-rw-r--r--lib/Target/MRegisterInfo.cpp81
-rw-r--r--lib/Target/MSIL/MSILWriter.cpp1657
-rw-r--r--lib/Target/MSIL/MSILWriter.h255
-rw-r--r--lib/Target/MSIL/Makefile14
-rw-r--r--lib/Target/MSIL/README.TXT26
-rw-r--r--lib/Target/Makefile20
-rw-r--r--lib/Target/Mips/Makefile21
-rw-r--r--lib/Target/Mips/Mips.h38
-rw-r--r--lib/Target/Mips/Mips.td63
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp433
-rw-r--r--lib/Target/Mips/MipsCallingConv.td39
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp272
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp557
-rw-r--r--lib/Target/Mips/MipsISelLowering.h84
-rw-r--r--lib/Target/Mips/MipsInstrFormats.td96
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp114
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h63
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td469
-rw-r--r--lib/Target/Mips/MipsMachineFunction.h54
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp422
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h83
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td80
-rw-r--r--lib/Target/Mips/MipsSubtarget.cpp26
-rw-r--r--lib/Target/Mips/MipsSubtarget.h43
-rw-r--r--lib/Target/Mips/MipsTargetAsmInfo.cpp22
-rw-r--r--lib/Target/Mips/MipsTargetAsmInfo.h30
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp83
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h65
-rw-r--r--lib/Target/PowerPC/Makefile20
-rw-r--r--lib/Target/PowerPC/PPC.h47
-rw-r--r--lib/Target/PowerPC/PPC.td114
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp1101
-rw-r--r--lib/Target/PowerPC/PPCBranchSelector.cpp199
-rw-r--r--lib/Target/PowerPC/PPCCallingConv.td65
-rw-r--r--lib/Target/PowerPC/PPCCodeEmitter.cpp237
-rw-r--r--lib/Target/PowerPC/PPCFrameInfo.h93
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.cpp303
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.h73
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp1122
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp3451
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h263
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td590
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td622
-rw-r--r--lib/Target/PowerPC/PPCInstrBuilder.h55
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td800
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp303
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h112
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td1164
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.cpp429
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.h46
-rw-r--r--lib/Target/PowerPC/PPCMachOWriterInfo.cpp150
-rw-r--r--lib/Target/PowerPC/PPCMachOWriterInfo.h55
-rw-r--r--lib/Target/PowerPC/PPCMachineFunctionInfo.h50
-rw-r--r--lib/Target/PowerPC/PPCPerfectShuffle.h6586
-rw-r--r--lib/Target/PowerPC/PPCPredicates.cpp30
-rw-r--r--lib/Target/PowerPC/PPCPredicates.h39
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp1153
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h107
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.td333
-rw-r--r--lib/Target/PowerPC/PPCRelocations.h56
-rw-r--r--lib/Target/PowerPC/PPCSchedule.td508
-rw-r--r--lib/Target/PowerPC/PPCScheduleG3.td63
-rw-r--r--lib/Target/PowerPC/PPCScheduleG4.td73
-rw-r--r--lib/Target/PowerPC/PPCScheduleG4Plus.td76
-rw-r--r--lib/Target/PowerPC/PPCScheduleG5.td83
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp141
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h146
-rw-r--r--lib/Target/PowerPC/PPCTargetAsmInfo.cpp96
-rw-r--r--lib/Target/PowerPC/PPCTargetAsmInfo.h38
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp166
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h101
-rw-r--r--lib/Target/PowerPC/README.txt664
-rw-r--r--lib/Target/PowerPC/README_ALTIVEC.txt179
-rw-r--r--lib/Target/README.txt451
-rw-r--r--lib/Target/Sparc/DelaySlotFiller.cpp76
-rw-r--r--lib/Target/Sparc/FPMover.cpp138
-rw-r--r--lib/Target/Sparc/Makefile20
-rw-r--r--lib/Target/Sparc/README.txt57
-rw-r--r--lib/Target/Sparc/Sparc.h116
-rw-r--r--lib/Target/Sparc/Sparc.td80
-rw-r--r--lib/Target/Sparc/SparcAsmPrinter.cpp291
-rw-r--r--lib/Target/Sparc/SparcISelDAGToDAG.cpp1140
-rw-r--r--lib/Target/Sparc/SparcInstrFormats.td113
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.cpp108
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.h73
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.td776
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.cpp276
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.h86
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.td158
-rw-r--r--lib/Target/Sparc/SparcSubtarget.cpp43
-rw-r--r--lib/Target/Sparc/SparcSubtarget.h42
-rw-r--r--lib/Target/Sparc/SparcTargetAsmInfo.cpp25
-rw-r--r--lib/Target/Sparc/SparcTargetAsmInfo.h31
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp83
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.h57
-rw-r--r--lib/Target/SubtargetFeature.cpp357
-rw-r--r--lib/Target/Target.td413
-rw-r--r--lib/Target/TargetAsmInfo.cpp131
-rw-r--r--lib/Target/TargetCallingConv.td88
-rw-r--r--lib/Target/TargetData.cpp595
-rw-r--r--lib/Target/TargetFrameInfo.cpp19
-rw-r--r--lib/Target/TargetInstrInfo.cpp99
-rw-r--r--lib/Target/TargetMachOWriterInfo.cpp25
-rw-r--r--lib/Target/TargetMachine.cpp164
-rw-r--r--lib/Target/TargetMachineRegistry.cpp107
-rw-r--r--lib/Target/TargetSchedule.td72
-rw-r--r--lib/Target/TargetSelectionDAG.td763
-rw-r--r--lib/Target/TargetSubtarget.cpp22
-rw-r--r--lib/Target/X86/Makefile20
-rw-r--r--lib/Target/X86/README-FPStack.txt99
-rw-r--r--lib/Target/X86/README-MMX.txt69
-rw-r--r--lib/Target/X86/README-SSE.txt629
-rw-r--r--lib/Target/X86/README-X86-64.txt223
-rw-r--r--lib/Target/X86/README.txt1150
-rw-r--r--lib/Target/X86/X86.h66
-rw-r--r--lib/Target/X86/X86.td150
-rwxr-xr-xlib/Target/X86/X86ATTAsmPrinter.cpp607
-rwxr-xr-xlib/Target/X86/X86ATTAsmPrinter.h87
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp409
-rwxr-xr-xlib/Target/X86/X86AsmPrinter.h97
-rw-r--r--lib/Target/X86/X86COFF.h95
-rw-r--r--lib/Target/X86/X86CallingConv.td172
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp824
-rw-r--r--lib/Target/X86/X86ELFWriterInfo.cpp18
-rw-r--r--lib/Target/X86/X86ELFWriterInfo.h29
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp882
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp1342
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp5094
-rw-r--r--lib/Target/X86/X86ISelLowering.h437
-rw-r--r--lib/Target/X86/X86InstrBuilder.h125
-rw-r--r--lib/Target/X86/X86InstrFPStack.td456
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp567
-rw-r--r--lib/Target/X86/X86InstrInfo.h287
-rw-r--r--lib/Target/X86/X86InstrInfo.td2674
-rw-r--r--lib/Target/X86/X86InstrMMX.td645
-rw-r--r--lib/Target/X86/X86InstrSSE.td2572
-rw-r--r--lib/Target/X86/X86InstrX86-64.td1165
-rwxr-xr-xlib/Target/X86/X86IntelAsmPrinter.cpp533
-rwxr-xr-xlib/Target/X86/X86IntelAsmPrinter.h112
-rw-r--r--lib/Target/X86/X86JITInfo.cpp372
-rw-r--r--lib/Target/X86/X86JITInfo.h50
-rw-r--r--lib/Target/X86/X86MachineFunctionInfo.h74
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp1613
-rw-r--r--lib/Target/X86/X86RegisterInfo.h130
-rw-r--r--lib/Target/X86/X86RegisterInfo.td468
-rw-r--r--lib/Target/X86/X86Relocations.h34
-rw-r--r--lib/Target/X86/X86Subtarget.cpp293
-rw-r--r--lib/Target/X86/X86Subtarget.h156
-rw-r--r--lib/Target/X86/X86TargetAsmInfo.cpp280
-rw-r--r--lib/Target/X86/X86TargetAsmInfo.h33
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp190
-rw-r--r--lib/Target/X86/X86TargetMachine.h95
240 files changed, 85965 insertions, 0 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
new file mode 100644
index 0000000..8134dcc
--- /dev/null
+++ b/lib/Target/ARM/ARM.h
@@ -0,0 +1,109 @@
+//===-- ARM.h - Top-level interface for ARM representation---- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the "Instituto Nokia de Tecnologia" and
+// is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// ARM back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_ARM_H
+#define TARGET_ARM_H
+
+#include <iosfwd>
+#include <cassert>
+
+namespace llvm {
+
+class ARMTargetMachine;
+class FunctionPass;
+class MachineCodeEmitter;
+
+// Enums corresponding to ARM condition codes
+namespace ARMCC {
+ enum CondCodes {
+ EQ,
+ NE,
+ HS,
+ LO,
+ MI,
+ PL,
+ VS,
+ VC,
+ HI,
+ LS,
+ GE,
+ LT,
+ GT,
+ LE,
+ AL
+ };
+
+ inline static CondCodes getOppositeCondition(CondCodes CC){
+ switch (CC) {
+ default: assert(0 && "Unknown condition code");
+ case EQ: return NE;
+ case NE: return EQ;
+ case HS: return LO;
+ case LO: return HS;
+ case MI: return PL;
+ case PL: return MI;
+ case VS: return VC;
+ case VC: return VS;
+ case HI: return LS;
+ case LS: return HI;
+ case GE: return LT;
+ case LT: return GE;
+ case GT: return LE;
+ case LE: return GT;
+ }
+ }
+}
+
+inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
+ switch (CC) {
+ default: assert(0 && "Unknown condition code");
+ case ARMCC::EQ: return "eq";
+ case ARMCC::NE: return "ne";
+ case ARMCC::HS: return "hs";
+ case ARMCC::LO: return "lo";
+ case ARMCC::MI: return "mi";
+ case ARMCC::PL: return "pl";
+ case ARMCC::VS: return "vs";
+ case ARMCC::VC: return "vc";
+ case ARMCC::HI: return "hi";
+ case ARMCC::LS: return "ls";
+ case ARMCC::GE: return "ge";
+ case ARMCC::LT: return "lt";
+ case ARMCC::GT: return "gt";
+ case ARMCC::LE: return "le";
+ case ARMCC::AL: return "al";
+ }
+}
+
+FunctionPass *createARMISelDag(ARMTargetMachine &TM);
+FunctionPass *createARMCodePrinterPass(std::ostream &O, ARMTargetMachine &TM);
+FunctionPass *createARMCodeEmitterPass(ARMTargetMachine &TM,
+ MachineCodeEmitter &MCE);
+FunctionPass *createARMLoadStoreOptimizationPass();
+FunctionPass *createARMConstantIslandPass();
+
+} // end namespace llvm;
+
+// Defines symbolic names for ARM registers. This defines a mapping from
+// register name to register number.
+//
+#include "ARMGenRegisterNames.inc"
+
+// Defines symbolic names for the ARM instructions.
+//
+#include "ARMGenInstrNames.inc"
+
+
+#endif
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
new file mode 100644
index 0000000..0272004
--- /dev/null
+++ b/lib/Target/ARM/ARM.td
@@ -0,0 +1,119 @@
+//===- ARM.td - Describe the ARM Target Machine -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the "Instituto Nokia de Tecnologia" and
+// is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "../Target.td"
+
+//===----------------------------------------------------------------------===//
+// ARM Subtarget features.
+//
+
+def ArchV4T : SubtargetFeature<"v4t", "ARMArchVersion", "V4T",
+ "ARM v4T">;
+def ArchV5T : SubtargetFeature<"v5t", "ARMArchVersion", "V5T",
+ "ARM v5T">;
+def ArchV5TE : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE",
+ "ARM v5TE, v5TEj, v5TExp">;
+def ArchV6 : SubtargetFeature<"v6", "ARMArchVersion", "V6",
+ "ARM v6">;
+def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFP2", "true",
+ "Enable VFP2 instructions ">;
+
+//===----------------------------------------------------------------------===//
+// ARM Processors supported.
+//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+// V4 Processors.
+def : Proc<"generic", []>;
+def : Proc<"arm8", []>;
+def : Proc<"arm810", []>;
+def : Proc<"strongarm", []>;
+def : Proc<"strongarm110", []>;
+def : Proc<"strongarm1100", []>;
+def : Proc<"strongarm1110", []>;
+
+// V4T Processors.
+def : Proc<"arm7tdmi", [ArchV4T]>;
+def : Proc<"arm7tdmi-s", [ArchV4T]>;
+def : Proc<"arm710t", [ArchV4T]>;
+def : Proc<"arm720t", [ArchV4T]>;
+def : Proc<"arm9", [ArchV4T]>;
+def : Proc<"arm9tdmi", [ArchV4T]>;
+def : Proc<"arm920", [ArchV4T]>;
+def : Proc<"arm920t", [ArchV4T]>;
+def : Proc<"arm922t", [ArchV4T]>;
+def : Proc<"arm940t", [ArchV4T]>;
+def : Proc<"ep9312", [ArchV4T]>;
+
+// V5T Processors.
+def : Proc<"arm10tdmi", [ArchV5T]>;
+def : Proc<"arm1020t", [ArchV5T]>;
+
+// V5TE Processors.
+def : Proc<"arm9e", [ArchV5TE]>;
+def : Proc<"arm926ej-s", [ArchV5TE]>;
+def : Proc<"arm946e-s", [ArchV5TE]>;
+def : Proc<"arm966e-s", [ArchV5TE]>;
+def : Proc<"arm968e-s", [ArchV5TE]>;
+def : Proc<"arm10e", [ArchV5TE]>;
+def : Proc<"arm1020e", [ArchV5TE]>;
+def : Proc<"arm1022e", [ArchV5TE]>;
+def : Proc<"xscale", [ArchV5TE]>;
+def : Proc<"iwmmxt", [ArchV5TE]>;
+
+// V6 Processors.
+def : Proc<"arm1136j-s", [ArchV6]>;
+def : Proc<"arm1136jf-s", [ArchV6, FeatureVFP2]>;
+def : Proc<"arm1176jz-s", [ArchV6]>;
+def : Proc<"arm1176jzf-s", [ArchV6, FeatureVFP2]>;
+def : Proc<"mpcorenovfp", [ArchV6]>;
+def : Proc<"mpcore", [ArchV6, FeatureVFP2]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "ARMRegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "ARMInstrInfo.td"
+
+def ARMInstrInfo : InstrInfo {
+ // Define how we want to layout our target-specific information field.
+ let TSFlagsFields = ["AddrModeBits",
+ "SizeFlag",
+ "IndexModeBits",
+ "Opcode"];
+ let TSFlagsShifts = [0,
+ 4,
+ 7,
+ 9];
+}
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def ARM : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = ARMInstrInfo;
+}
diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h
new file mode 100644
index 0000000..3f47a69
--- /dev/null
+++ b/lib/Target/ARM/ARMAddressingModes.h
@@ -0,0 +1,394 @@
+//===- ARMAddressingModes.h - ARM Addressing Modes --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM addressing mode implementation stuff.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
+#define LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
+
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+
+namespace llvm {
+
+/// ARM_AM - ARM Addressing Mode Stuff
+namespace ARM_AM {
+ enum ShiftOpc {
+ no_shift = 0,
+ asr,
+ lsl,
+ lsr,
+ ror,
+ rrx
+ };
+
+ enum AddrOpc {
+ add = '+', sub = '-'
+ };
+
+ static inline const char *getShiftOpcStr(ShiftOpc Op) {
+ switch (Op) {
+ default: assert(0 && "Unknown shift opc!");
+ case ARM_AM::asr: return "asr";
+ case ARM_AM::lsl: return "lsl";
+ case ARM_AM::lsr: return "lsr";
+ case ARM_AM::ror: return "ror";
+ case ARM_AM::rrx: return "rrx";
+ }
+ }
+
+ static inline ShiftOpc getShiftOpcForNode(SDOperand N) {
+ switch (N.getOpcode()) {
+ default: return ARM_AM::no_shift;
+ case ISD::SHL: return ARM_AM::lsl;
+ case ISD::SRL: return ARM_AM::lsr;
+ case ISD::SRA: return ARM_AM::asr;
+ case ISD::ROTR: return ARM_AM::ror;
+ //case ISD::ROTL: // Only if imm -> turn into ROTR.
+ // Can't handle RRX here, because it would require folding a flag into
+ // the addressing mode. :( This causes us to miss certain things.
+ //case ARMISD::RRX: return ARM_AM::rrx;
+ }
+ }
+
+ enum AMSubMode {
+ bad_am_submode = 0,
+ ia,
+ ib,
+ da,
+ db
+ };
+
+ static inline const char *getAMSubModeStr(AMSubMode Mode) {
+ switch (Mode) {
+ default: assert(0 && "Unknown addressing sub-mode!");
+ case ARM_AM::ia: return "ia";
+ case ARM_AM::ib: return "ib";
+ case ARM_AM::da: return "da";
+ case ARM_AM::db: return "db";
+ }
+ }
+
+ static inline const char *getAMSubModeAltStr(AMSubMode Mode, bool isLD) {
+ switch (Mode) {
+ default: assert(0 && "Unknown addressing sub-mode!");
+ case ARM_AM::ia: return isLD ? "fd" : "ea";
+ case ARM_AM::ib: return isLD ? "ed" : "fa";
+ case ARM_AM::da: return isLD ? "fa" : "ed";
+ case ARM_AM::db: return isLD ? "ea" : "fd";
+ }
+ }
+
+ /// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits.
+ ///
+ static inline unsigned rotr32(unsigned Val, unsigned Amt) {
+ assert(Amt < 32 && "Invalid rotate amount");
+ return (Val >> Amt) | (Val << ((32-Amt)&31));
+ }
+
+ /// rotl32 - Rotate a 32-bit unsigned value left by a specified # bits.
+ ///
+ static inline unsigned rotl32(unsigned Val, unsigned Amt) {
+ assert(Amt < 32 && "Invalid rotate amount");
+ return (Val << Amt) | (Val >> ((32-Amt)&31));
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #1: shift_operand with registers
+ //===--------------------------------------------------------------------===//
+ //
+ // This 'addressing mode' is used for arithmetic instructions. It can
+ // represent things like:
+ // reg
+ // reg [asr|lsl|lsr|ror|rrx] reg
+ // reg [asr|lsl|lsr|ror|rrx] imm
+ //
+ // This is stored three operands [rega, regb, opc]. The first is the base
+ // reg, the second is the shift amount (or reg0 if not present or imm). The
+ // third operand encodes the shift opcode and the imm if a reg isn't present.
+ //
+ static inline unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm) {
+ return ShOp | (Imm << 3);
+ }
+ static inline unsigned getSORegOffset(unsigned Op) {
+ return Op >> 3;
+ }
+ static inline ShiftOpc getSORegShOp(unsigned Op) {
+ return (ShiftOpc)(Op & 7);
+ }
+
+ /// getSOImmValImm - Given an encoded imm field for the reg/imm form, return
+ /// the 8-bit imm value.
+ static inline unsigned getSOImmValImm(unsigned Imm) {
+ return Imm & 0xFF;
+ }
+ /// getSOImmValRotate - Given an encoded imm field for the reg/imm form, return
+ /// the rotate amount.
+ static inline unsigned getSOImmValRot(unsigned Imm) {
+ return (Imm >> 8) * 2;
+ }
+
+ /// getSOImmValRotate - Try to handle Imm with an immediate shifter operand,
+ /// computing the rotate amount to use. If this immediate value cannot be
+ /// handled with a single shifter-op, determine a good rotate amount that will
+ /// take a maximal chunk of bits out of the immediate.
+ static inline unsigned getSOImmValRotate(unsigned Imm) {
+ // 8-bit (or less) immediates are trivially shifter_operands with a rotate
+ // of zero.
+ if ((Imm & ~255U) == 0) return 0;
+
+ // Use CTZ to compute the rotate amount.
+ unsigned TZ = CountTrailingZeros_32(Imm);
+
+ // Rotate amount must be even. Something like 0x200 must be rotated 8 bits,
+ // not 9.
+ unsigned RotAmt = TZ & ~1;
+
+ // If we can handle this spread, return it.
+ if ((rotr32(Imm, RotAmt) & ~255U) == 0)
+ return (32-RotAmt)&31; // HW rotates right, not left.
+
+ // For values like 0xF000000F, we should skip the first run of ones, then
+ // retry the hunt.
+ if (Imm & 1) {
+ unsigned TrailingOnes = CountTrailingZeros_32(~Imm);
+ if (TrailingOnes != 32) { // Avoid overflow on 0xFFFFFFFF
+ // Restart the search for a high-order bit after the initial seconds of
+ // ones.
+ unsigned TZ2 = CountTrailingZeros_32(Imm & ~((1 << TrailingOnes)-1));
+
+ // Rotate amount must be even.
+ unsigned RotAmt2 = TZ2 & ~1;
+
+ // If this fits, use it.
+ if (RotAmt2 != 32 && (rotr32(Imm, RotAmt2) & ~255U) == 0)
+ return (32-RotAmt2)&31; // HW rotates right, not left.
+ }
+ }
+
+ // Otherwise, we have no way to cover this span of bits with a single
+ // shifter_op immediate. Return a chunk of bits that will be useful to
+ // handle.
+ return (32-RotAmt)&31; // HW rotates right, not left.
+ }
+
+ /// getSOImmVal - Given a 32-bit immediate, if it is something that can fit
+ /// into an shifter_operand immediate operand, return the 12-bit encoding for
+ /// it. If not, return -1.
+ static inline int getSOImmVal(unsigned Arg) {
+ // 8-bit (or less) immediates are trivially shifter_operands with a rotate
+ // of zero.
+ if ((Arg & ~255U) == 0) return Arg;
+
+ unsigned RotAmt = getSOImmValRotate(Arg);
+
+ // If this cannot be handled with a single shifter_op, bail out.
+ if (rotr32(~255U, RotAmt) & Arg)
+ return -1;
+
+ // Encode this correctly.
+ return rotl32(Arg, RotAmt) | ((RotAmt>>1) << 8);
+ }
+
+ /// isSOImmTwoPartVal - Return true if the specified value can be obtained by
+ /// or'ing together two SOImmVal's.
+ static inline bool isSOImmTwoPartVal(unsigned V) {
+ // If this can be handled with a single shifter_op, bail out.
+ V = rotr32(~255U, getSOImmValRotate(V)) & V;
+ if (V == 0)
+ return false;
+
+ // If this can be handled with two shifter_op's, accept.
+ V = rotr32(~255U, getSOImmValRotate(V)) & V;
+ return V == 0;
+ }
+
+ /// getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal,
+ /// return the first chunk of it.
+ static inline unsigned getSOImmTwoPartFirst(unsigned V) {
+ return rotr32(255U, getSOImmValRotate(V)) & V;
+ }
+
+ /// getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal,
+ /// return the second chunk of it.
+ static inline unsigned getSOImmTwoPartSecond(unsigned V) {
+ // Mask out the first hunk.
+ V = rotr32(~255U, getSOImmValRotate(V)) & V;
+
+ // Take what's left.
+ assert(V == (rotr32(255U, getSOImmValRotate(V)) & V));
+ return V;
+ }
+
+ /// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed
+ /// by a left shift. Returns the shift amount to use.
+ static inline unsigned getThumbImmValShift(unsigned Imm) {
+ // 8-bit (or less) immediates are trivially immediate operand with a shift
+ // of zero.
+ if ((Imm & ~255U) == 0) return 0;
+
+ // Use CTZ to compute the shift amount.
+ return CountTrailingZeros_32(Imm);
+ }
+
+ /// isThumbImmShiftedVal - Return true if the specified value can be obtained
+ /// by left shifting a 8-bit immediate.
+ static inline bool isThumbImmShiftedVal(unsigned V) {
+ // If this can be handled with
+ V = (~255U << getThumbImmValShift(V)) & V;
+ return V == 0;
+ }
+
+ /// getThumbImmNonShiftedVal - If V is a value that satisfies
+ /// isThumbImmShiftedVal, return the non-shiftd value.
+ static inline unsigned getThumbImmNonShiftedVal(unsigned V) {
+ return V >> getThumbImmValShift(V);
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #2
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for most simple load/store instructions.
+ //
+ // addrmode2 := reg +/- reg shop imm
+ // addrmode2 := reg +/- imm12
+ //
+ // The first operand is always a Reg. The second operand is a reg if in
+ // reg/reg form, otherwise it's reg#0. The third field encodes the operation
+ // in bit 12, the immediate in bits 0-11, and the shift op in 13-15.
+ //
+ // If this addressing mode is a frame index (before prolog/epilog insertion
+ // and code rewriting), this operand will have the form: FI#, reg0, <offs>
+ // with no shift amount for the frame offset.
+ //
+ static inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO) {
+ assert(Imm12 < (1 << 12) && "Imm too large!");
+ bool isSub = Opc == sub;
+ return Imm12 | ((int)isSub << 12) | (SO << 13);
+ }
+ static inline unsigned getAM2Offset(unsigned AM2Opc) {
+ return AM2Opc & ((1 << 12)-1);
+ }
+ static inline AddrOpc getAM2Op(unsigned AM2Opc) {
+ return ((AM2Opc >> 12) & 1) ? sub : add;
+ }
+ static inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) {
+ return (ShiftOpc)(AM2Opc >> 13);
+ }
+
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #3
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for sign-extending loads, and load/store-pair instructions.
+ //
+ // addrmode3 := reg +/- reg
+ // addrmode3 := reg +/- imm8
+ //
+ // The first operand is always a Reg. The second operand is a reg if in
+ // reg/reg form, otherwise it's reg#0. The third field encodes the operation
+ // in bit 8, the immediate in bits 0-7.
+
+ /// getAM3Opc - This function encodes the addrmode3 opc field.
+ static inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset) {
+ bool isSub = Opc == sub;
+ return ((int)isSub << 8) | Offset;
+ }
+ static inline unsigned char getAM3Offset(unsigned AM3Opc) {
+ return AM3Opc & 0xFF;
+ }
+ static inline AddrOpc getAM3Op(unsigned AM3Opc) {
+ return ((AM3Opc >> 8) & 1) ? sub : add;
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #4
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for load / store multiple instructions.
+ //
+ // addrmode4 := reg, <mode>
+ //
+ // The four modes are:
+ // IA - Increment after
+ // IB - Increment before
+ // DA - Decrement after
+ // DB - Decrement before
+ //
+ // If the 4th bit (writeback)is set, then the base register is updated after
+ // the memory transfer.
+
+ static inline AMSubMode getAM4SubMode(unsigned Mode) {
+ return (AMSubMode)(Mode & 0x7);
+ }
+
+ static inline unsigned getAM4ModeImm(AMSubMode SubMode, bool WB = false) {
+ return (int)SubMode | ((int)WB << 3);
+ }
+
+ static inline bool getAM4WBFlag(unsigned Mode) {
+ return (Mode >> 3) & 1;
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #5
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for coprocessor instructions, such as FP load/stores.
+ //
+ // addrmode5 := reg +/- imm8*4
+ //
+ // The first operand is always a Reg. The third field encodes the operation
+ // in bit 8, the immediate in bits 0-7.
+ //
+ // This can also be used for FP load/store multiple ops. The third field encodes
+ // writeback mode in bit 8, the number of registers (or 2 times the number of
+ // registers for DPR ops) in bits 0-7. In addition, bit 9-11 encodes one of the
+ // following two sub-modes:
+ //
+ // IA - Increment after
+ // DB - Decrement before
+
+ /// getAM5Opc - This function encodes the addrmode5 opc field.
+ static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) {
+ bool isSub = Opc == sub;
+ return ((int)isSub << 8) | Offset;
+ }
+ static inline unsigned char getAM5Offset(unsigned AM5Opc) {
+ return AM5Opc & 0xFF;
+ }
+ static inline AddrOpc getAM5Op(unsigned AM5Opc) {
+ return ((AM5Opc >> 8) & 1) ? sub : add;
+ }
+
+ /// getAM5Opc - This function encodes the addrmode5 opc field for FLDM and
+ /// FSTM instructions.
+ static inline unsigned getAM5Opc(AMSubMode SubMode, bool WB,
+ unsigned char Offset) {
+ assert((SubMode == ia || SubMode == db) &&
+ "Illegal addressing mode 5 sub-mode!");
+ return ((int)SubMode << 9) | ((int)WB << 8) | Offset;
+ }
+ static inline AMSubMode getAM5SubMode(unsigned AM5Opc) {
+ return (AMSubMode)((AM5Opc >> 9) & 0x7);
+ }
+ static inline bool getAM5WBFlag(unsigned AM5Opc) {
+ return ((AM5Opc >> 8) & 1);
+ }
+
+} // end namespace ARM_AM
+} // end namespace llvm
+
+#endif
+
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
new file mode 100644
index 0000000..5e65226
--- /dev/null
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -0,0 +1,1029 @@
+//===-- ARMAsmPrinter.cpp - ARM LLVM assembly writer ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the "Instituto Nokia de Tecnologia" and
+// is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format ARM assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "ARM.h"
+#include "ARMTargetMachine.h"
+#include "ARMAddressingModes.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/MathExtras.h"
+#include <cctype>
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+namespace {
+ struct VISIBILITY_HIDDEN ARMAsmPrinter : public AsmPrinter {
+ ARMAsmPrinter(std::ostream &O, TargetMachine &TM, const TargetAsmInfo *T)
+ : AsmPrinter(O, TM, T), DW(O, this, T), AFI(NULL), InCPMode(false) {
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ }
+
+ DwarfWriter DW;
+
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when printing asm code for different targets.
+ const ARMSubtarget *Subtarget;
+
+ /// AFI - Keep a pointer to ARMFunctionInfo for the current
+ /// MachineFunction
+ ARMFunctionInfo *AFI;
+
+ /// We name each basic block in a Function with a unique number, so
+ /// that we can consistently refer to them later. This is cleared
+ /// at the beginning of each call to runOnMachineFunction().
+ ///
+ typedef std::map<const Value *, unsigned> ValueMapTy;
+ ValueMapTy NumberForBB;
+
+ /// Keeps the set of GlobalValues that require non-lazy-pointers for
+ /// indirect access.
+ std::set<std::string> GVNonLazyPtrs;
+
+ /// Keeps the set of external function GlobalAddresses that the asm
+ /// printer should generate stubs for.
+ std::set<std::string> FnStubs;
+
+ /// True if asm printer is printing a series of CONSTPOOL_ENTRY.
+ bool InCPMode;
+
+ virtual const char *getPassName() const {
+ return "ARM Assembly Printer";
+ }
+
+ void printOperand(const MachineInstr *MI, int opNum,
+ const char *Modifier = 0);
+ void printSOImmOperand(const MachineInstr *MI, int opNum);
+ void printSOImm2PartOperand(const MachineInstr *MI, int opNum);
+ void printSORegOperand(const MachineInstr *MI, int opNum);
+ void printAddrMode2Operand(const MachineInstr *MI, int OpNo);
+ void printAddrMode2OffsetOperand(const MachineInstr *MI, int OpNo);
+ void printAddrMode3Operand(const MachineInstr *MI, int OpNo);
+ void printAddrMode3OffsetOperand(const MachineInstr *MI, int OpNo);
+ void printAddrMode4Operand(const MachineInstr *MI, int OpNo,
+ const char *Modifier = 0);
+ void printAddrMode5Operand(const MachineInstr *MI, int OpNo,
+ const char *Modifier = 0);
+ void printAddrModePCOperand(const MachineInstr *MI, int OpNo,
+ const char *Modifier = 0);
+ void printThumbAddrModeRROperand(const MachineInstr *MI, int OpNo);
+ void printThumbAddrModeRI5Operand(const MachineInstr *MI, int OpNo,
+ unsigned Scale);
+ void printThumbAddrModeS1Operand(const MachineInstr *MI, int OpNo);
+ void printThumbAddrModeS2Operand(const MachineInstr *MI, int OpNo);
+ void printThumbAddrModeS4Operand(const MachineInstr *MI, int OpNo);
+ void printThumbAddrModeSPOperand(const MachineInstr *MI, int OpNo);
+ void printPredicateOperand(const MachineInstr *MI, int opNum);
+ void printSBitModifierOperand(const MachineInstr *MI, int opNum);
+ void printPCLabel(const MachineInstr *MI, int opNum);
+ void printRegisterList(const MachineInstr *MI, int opNum);
+ void printCPInstOperand(const MachineInstr *MI, int opNum,
+ const char *Modifier);
+ void printJTBlockOperand(const MachineInstr *MI, int opNum);
+
+ virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+
+ bool printInstruction(const MachineInstr *MI); // autogenerated.
+ void printMachineInstruction(const MachineInstr *MI);
+ bool runOnMachineFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+
+ virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+ printDataDirective(MCPV->getType());
+
+ ARMConstantPoolValue *ACPV = (ARMConstantPoolValue*)MCPV;
+ GlobalValue *GV = ACPV->getGV();
+ std::string Name = GV ? Mang->getValueName(GV) : TAI->getGlobalPrefix();
+ if (!GV)
+ Name += ACPV->getSymbol();
+ if (ACPV->isNonLazyPointer()) {
+ GVNonLazyPtrs.insert(Name);
+ O << TAI->getPrivateGlobalPrefix() << Name << "$non_lazy_ptr";
+ } else if (ACPV->isStub()) {
+ FnStubs.insert(Name);
+ O << TAI->getPrivateGlobalPrefix() << Name << "$stub";
+ } else
+ O << Name;
+ if (ACPV->hasModifier()) O << "(" << ACPV->getModifier() << ")";
+ if (ACPV->getPCAdjustment() != 0) {
+ O << "-(" << TAI->getPrivateGlobalPrefix() << "PC"
+ << utostr(ACPV->getLabelId())
+ << "+" << (unsigned)ACPV->getPCAdjustment();
+ if (ACPV->mustAddCurrentAddress())
+ O << "-.";
+ O << ")";
+ }
+ O << "\n";
+
+ // If the constant pool value is a extern weak symbol, remember to emit
+ // the weak reference.
+ if (GV && GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineModuleInfo>();
+ }
+ };
+} // end of anonymous namespace
+
+#include "ARMGenAsmWriter.inc"
+
+/// createARMCodePrinterPass - Returns a pass that prints the ARM
+/// assembly code for a MachineFunction to the given output stream,
+/// using the given target machine description. This should work
+/// regardless of whether the function is in SSA form.
+///
+FunctionPass *llvm::createARMCodePrinterPass(std::ostream &o,
+ ARMTargetMachine &tm) {
+ return new ARMAsmPrinter(o, tm, tm.getTargetAsmInfo());
+}
+
+/// runOnMachineFunction - This uses the printInstruction()
+/// method to print assembly for each instruction.
+///
+bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ AFI = MF.getInfo<ARMFunctionInfo>();
+
+ DW.SetModuleInfo(&getAnalysis<MachineModuleInfo>());
+
+ SetupMachineFunction(MF);
+ O << "\n";
+
+ // NOTE: we don't print out constant pools here, they are handled as
+ // instructions.
+
+ O << "\n";
+ // Print out labels for the function.
+ const Function *F = MF.getFunction();
+ switch (F->getLinkage()) {
+ default: assert(0 && "Unknown linkage type!");
+ case Function::InternalLinkage:
+ SwitchToTextSection("\t.text", F);
+ break;
+ case Function::ExternalLinkage:
+ SwitchToTextSection("\t.text", F);
+ O << "\t.globl\t" << CurrentFnName << "\n";
+ break;
+ case Function::WeakLinkage:
+ case Function::LinkOnceLinkage:
+ if (Subtarget->isTargetDarwin()) {
+ SwitchToTextSection(
+ ".section __TEXT,__textcoal_nt,coalesced,pure_instructions", F);
+ O << "\t.globl\t" << CurrentFnName << "\n";
+ O << "\t.weak_definition\t" << CurrentFnName << "\n";
+ } else {
+ O << TAI->getWeakRefDirective() << CurrentFnName << "\n";
+ }
+ break;
+ }
+
+ const char *VisibilityDirective = NULL;
+ if (F->hasHiddenVisibility())
+ VisibilityDirective = TAI->getHiddenDirective();
+ else if (F->hasProtectedVisibility())
+ VisibilityDirective = TAI->getProtectedDirective();
+
+ if (VisibilityDirective)
+ O << VisibilityDirective << CurrentFnName << "\n";
+
+ if (AFI->isThumbFunction()) {
+ EmitAlignment(1, F, AFI->getAlign());
+ O << "\t.code\t16\n";
+ O << "\t.thumb_func";
+ if (Subtarget->isTargetDarwin())
+ O << "\t" << CurrentFnName;
+ O << "\n";
+ InCPMode = false;
+ } else
+ EmitAlignment(2, F);
+
+ O << CurrentFnName << ":\n";
+ // Emit pre-function debug information.
+ DW.BeginFunction(&MF);
+
+ // Print out code for the function.
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ // Print a label for the basic block.
+ if (I != MF.begin()) {
+ printBasicBlockLabel(I, true);
+ O << '\n';
+ }
+ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+ II != E; ++II) {
+ // Print the assembly for the instruction.
+ printMachineInstruction(II);
+ }
+ }
+
+ if (TAI->hasDotTypeDotSizeDirective())
+ O << "\t.size " << CurrentFnName << ", .-" << CurrentFnName << "\n";
+
+ // Emit post-function debug information.
+ DW.EndFunction();
+
+ return false;
+}
+
+void ARMAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+ const char *Modifier) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ if (MRegisterInfo::isPhysicalRegister(MO.getReg()))
+ O << TM.getRegisterInfo()->get(MO.getReg()).Name;
+ else
+ assert(0 && "not implemented");
+ break;
+ case MachineOperand::MO_Immediate: {
+ if (!Modifier || strcmp(Modifier, "no_hash") != 0)
+ O << "#";
+
+ O << (int)MO.getImmedValue();
+ break;
+ }
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMachineBasicBlock());
+ return;
+ case MachineOperand::MO_GlobalAddress: {
+ bool isCallOp = Modifier && !strcmp(Modifier, "call");
+ GlobalValue *GV = MO.getGlobal();
+ std::string Name = Mang->getValueName(GV);
+ bool isExt = (GV->isDeclaration() || GV->hasWeakLinkage() ||
+ GV->hasLinkOnceLinkage());
+ if (isExt && isCallOp && Subtarget->isTargetDarwin() &&
+ TM.getRelocationModel() != Reloc::Static) {
+ O << TAI->getPrivateGlobalPrefix() << Name << "$stub";
+ FnStubs.insert(Name);
+ } else
+ O << Name;
+
+ if (MO.getOffset() > 0)
+ O << '+' << MO.getOffset();
+ else if (MO.getOffset() < 0)
+ O << MO.getOffset();
+
+ if (isCallOp && Subtarget->isTargetELF() &&
+ TM.getRelocationModel() == Reloc::PIC_)
+ O << "(PLT)";
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+ break;
+ }
+ case MachineOperand::MO_ExternalSymbol: {
+ bool isCallOp = Modifier && !strcmp(Modifier, "call");
+ std::string Name(TAI->getGlobalPrefix());
+ Name += MO.getSymbolName();
+ if (isCallOp && Subtarget->isTargetDarwin() &&
+ TM.getRelocationModel() != Reloc::Static) {
+ O << TAI->getPrivateGlobalPrefix() << Name << "$stub";
+ FnStubs.insert(Name);
+ } else
+ O << Name;
+ if (isCallOp && Subtarget->isTargetELF() &&
+ TM.getRelocationModel() == Reloc::PIC_)
+ O << "(PLT)";
+ break;
+ }
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+ << '_' << MO.getConstantPoolIndex();
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getJumpTableIndex();
+ break;
+ default:
+ O << "<unknown operand type>"; abort (); break;
+ }
+}
+
+static void printSOImm(std::ostream &O, int64_t V, const TargetAsmInfo *TAI) {
+ assert(V < (1 << 12) && "Not a valid so_imm value!");
+ unsigned Imm = ARM_AM::getSOImmValImm(V);
+ unsigned Rot = ARM_AM::getSOImmValRot(V);
+
+ // Print low-level immediate formation info, per
+ // A5.1.3: "Data-processing operands - Immediate".
+ if (Rot) {
+ O << "#" << Imm << ", " << Rot;
+ // Pretty printed version.
+ O << ' ' << TAI->getCommentString() << ' ' << (int)ARM_AM::rotr32(Imm, Rot);
+ } else {
+ O << "#" << Imm;
+ }
+}
+
+/// printSOImmOperand - SOImm is 4-bit rotate amount in bits 8-11 with 8-bit
+/// immediate in bits 0-7.
+void ARMAsmPrinter::printSOImmOperand(const MachineInstr *MI, int OpNum) {
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ assert(MO.isImmediate() && "Not a valid so_imm value!");
+ printSOImm(O, MO.getImmedValue(), TAI);
+}
+
+/// printSOImm2PartOperand - SOImm is broken into two pieces using a mov
+/// followed by a or to materialize.
+void ARMAsmPrinter::printSOImm2PartOperand(const MachineInstr *MI, int OpNum) {
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ assert(MO.isImmediate() && "Not a valid so_imm value!");
+ unsigned V1 = ARM_AM::getSOImmTwoPartFirst(MO.getImmedValue());
+ unsigned V2 = ARM_AM::getSOImmTwoPartSecond(MO.getImmedValue());
+ printSOImm(O, ARM_AM::getSOImmVal(V1), TAI);
+ O << "\n\torr";
+ printPredicateOperand(MI, 2);
+ O << " ";
+ printOperand(MI, 0);
+ O << ", ";
+ printOperand(MI, 0);
+ O << ", ";
+ printSOImm(O, ARM_AM::getSOImmVal(V2), TAI);
+}
+
+// so_reg is a 4-operand unit corresponding to register forms of the A5.1
+// "Addressing Mode 1 - Data-processing operands" forms. This includes:
+// REG 0 0 - e.g. R5
+// REG REG 0,SH_OPC - e.g. R5, ROR R3
+// REG 0 IMM,SH_OPC - e.g. R5, LSL #3
+void ARMAsmPrinter::printSORegOperand(const MachineInstr *MI, int Op) {
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+ const MachineOperand &MO3 = MI->getOperand(Op+2);
+
+ assert(MRegisterInfo::isPhysicalRegister(MO1.getReg()));
+ O << TM.getRegisterInfo()->get(MO1.getReg()).Name;
+
+ // Print the shift opc.
+ O << ", "
+ << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImmedValue()))
+ << " ";
+
+ if (MO2.getReg()) {
+ assert(MRegisterInfo::isPhysicalRegister(MO2.getReg()));
+ O << TM.getRegisterInfo()->get(MO2.getReg()).Name;
+ assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
+ } else {
+ O << "#" << ARM_AM::getSORegOffset(MO3.getImm());
+ }
+}
+
+void ARMAsmPrinter::printAddrMode2Operand(const MachineInstr *MI, int Op) {
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+ const MachineOperand &MO3 = MI->getOperand(Op+2);
+
+ if (!MO1.isRegister()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, Op);
+ return;
+ }
+
+ O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).Name;
+
+ if (!MO2.getReg()) {
+ if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0.
+ O << ", #"
+ << (char)ARM_AM::getAM2Op(MO3.getImm())
+ << ARM_AM::getAM2Offset(MO3.getImm());
+ O << "]";
+ return;
+ }
+
+ O << ", "
+ << (char)ARM_AM::getAM2Op(MO3.getImm())
+ << TM.getRegisterInfo()->get(MO2.getReg()).Name;
+
+ if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm()))
+ O << ", "
+ << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImmedValue()))
+ << " #" << ShImm;
+ O << "]";
+}
+
+void ARMAsmPrinter::printAddrMode2OffsetOperand(const MachineInstr *MI, int Op){
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+
+ if (!MO1.getReg()) {
+ unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
+ assert(ImmOffs && "Malformed indexed load / store!");
+ O << "#"
+ << (char)ARM_AM::getAM2Op(MO2.getImm())
+ << ImmOffs;
+ return;
+ }
+
+ O << (char)ARM_AM::getAM2Op(MO2.getImm())
+ << TM.getRegisterInfo()->get(MO1.getReg()).Name;
+
+ if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm()))
+ O << ", "
+ << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO2.getImmedValue()))
+ << " #" << ShImm;
+}
+
+void ARMAsmPrinter::printAddrMode3Operand(const MachineInstr *MI, int Op) {
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+ const MachineOperand &MO3 = MI->getOperand(Op+2);
+
+ assert(MRegisterInfo::isPhysicalRegister(MO1.getReg()));
+ O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).Name;
+
+ if (MO2.getReg()) {
+ O << ", "
+ << (char)ARM_AM::getAM3Op(MO3.getImm())
+ << TM.getRegisterInfo()->get(MO2.getReg()).Name
+ << "]";
+ return;
+ }
+
+ if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()))
+ O << ", #"
+ << (char)ARM_AM::getAM3Op(MO3.getImm())
+ << ImmOffs;
+ O << "]";
+}
+
+void ARMAsmPrinter::printAddrMode3OffsetOperand(const MachineInstr *MI, int Op){
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+
+ if (MO1.getReg()) {
+ O << (char)ARM_AM::getAM3Op(MO2.getImm())
+ << TM.getRegisterInfo()->get(MO1.getReg()).Name;
+ return;
+ }
+
+ unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
+ assert(ImmOffs && "Malformed indexed load / store!");
+ O << "#"
+ << (char)ARM_AM::getAM3Op(MO2.getImm())
+ << ImmOffs;
+}
+
+void ARMAsmPrinter::printAddrMode4Operand(const MachineInstr *MI, int Op,
+ const char *Modifier) {
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+ ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm());
+ if (Modifier && strcmp(Modifier, "submode") == 0) {
+ if (MO1.getReg() == ARM::SP) {
+ bool isLDM = (MI->getOpcode() == ARM::LDM ||
+ MI->getOpcode() == ARM::LDM_RET);
+ O << ARM_AM::getAMSubModeAltStr(Mode, isLDM);
+ } else
+ O << ARM_AM::getAMSubModeStr(Mode);
+ } else {
+ printOperand(MI, Op);
+ if (ARM_AM::getAM4WBFlag(MO2.getImm()))
+ O << "!";
+ }
+}
+
+void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op,
+ const char *Modifier) {
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+
+ if (!MO1.isRegister()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, Op);
+ return;
+ }
+
+ assert(MRegisterInfo::isPhysicalRegister(MO1.getReg()));
+
+ if (Modifier && strcmp(Modifier, "submode") == 0) {
+ ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm());
+ if (MO1.getReg() == ARM::SP) {
+ bool isFLDM = (MI->getOpcode() == ARM::FLDMD ||
+ MI->getOpcode() == ARM::FLDMS);
+ O << ARM_AM::getAMSubModeAltStr(Mode, isFLDM);
+ } else
+ O << ARM_AM::getAMSubModeStr(Mode);
+ return;
+ } else if (Modifier && strcmp(Modifier, "base") == 0) {
+ // Used for FSTM{D|S} and LSTM{D|S} operations.
+ O << TM.getRegisterInfo()->get(MO1.getReg()).Name;
+ if (ARM_AM::getAM5WBFlag(MO2.getImm()))
+ O << "!";
+ return;
+ }
+
+ O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).Name;
+
+ if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
+ O << ", #"
+ << (char)ARM_AM::getAM5Op(MO2.getImm())
+ << ImmOffs*4;
+ }
+ O << "]";
+}
+
+void ARMAsmPrinter::printAddrModePCOperand(const MachineInstr *MI, int Op,
+ const char *Modifier) {
+ if (Modifier && strcmp(Modifier, "label") == 0) {
+ printPCLabel(MI, Op+1);
+ return;
+ }
+
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ assert(MRegisterInfo::isPhysicalRegister(MO1.getReg()));
+ O << "[pc, +" << TM.getRegisterInfo()->get(MO1.getReg()).Name << "]";
+}
+
+void
+ARMAsmPrinter::printThumbAddrModeRROperand(const MachineInstr *MI, int Op) {
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+ O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).Name;
+ O << ", " << TM.getRegisterInfo()->get(MO2.getReg()).Name << "]";
+}
+
+void
+ARMAsmPrinter::printThumbAddrModeRI5Operand(const MachineInstr *MI, int Op,
+ unsigned Scale) {
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+ const MachineOperand &MO3 = MI->getOperand(Op+2);
+
+ if (!MO1.isRegister()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, Op);
+ return;
+ }
+
+ O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).Name;
+ if (MO3.getReg())
+ O << ", " << TM.getRegisterInfo()->get(MO3.getReg()).Name;
+ else if (unsigned ImmOffs = MO2.getImm()) {
+ O << ", #" << ImmOffs;
+ if (Scale > 1)
+ O << " * " << Scale;
+ }
+ O << "]";
+}
+
+void
+ARMAsmPrinter::printThumbAddrModeS1Operand(const MachineInstr *MI, int Op) {
+ printThumbAddrModeRI5Operand(MI, Op, 1);
+}
+void
+ARMAsmPrinter::printThumbAddrModeS2Operand(const MachineInstr *MI, int Op) {
+ printThumbAddrModeRI5Operand(MI, Op, 2);
+}
+void
+ARMAsmPrinter::printThumbAddrModeS4Operand(const MachineInstr *MI, int Op) {
+ printThumbAddrModeRI5Operand(MI, Op, 4);
+}
+
+void ARMAsmPrinter::printThumbAddrModeSPOperand(const MachineInstr *MI,int Op) {
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+ O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).Name;
+ if (unsigned ImmOffs = MO2.getImm())
+ O << ", #" << ImmOffs << " * 4";
+ O << "]";
+}
+
+void ARMAsmPrinter::printPredicateOperand(const MachineInstr *MI, int opNum) {
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(opNum).getImmedValue();
+ if (CC != ARMCC::AL)
+ O << ARMCondCodeToString(CC);
+}
+
+void ARMAsmPrinter::printSBitModifierOperand(const MachineInstr *MI, int opNum){
+ unsigned Reg = MI->getOperand(opNum).getReg();
+ if (Reg) {
+ assert(Reg == ARM::CPSR && "Expect ARM CPSR register!");
+ O << 's';
+ }
+}
+
+void ARMAsmPrinter::printPCLabel(const MachineInstr *MI, int opNum) {
+ int Id = (int)MI->getOperand(opNum).getImmedValue();
+ O << TAI->getPrivateGlobalPrefix() << "PC" << Id;
+}
+
+void ARMAsmPrinter::printRegisterList(const MachineInstr *MI, int opNum) {
+ O << "{";
+ for (unsigned i = opNum, e = MI->getNumOperands(); i != e; ++i) {
+ printOperand(MI, i);
+ if (i != e-1) O << ", ";
+ }
+ O << "}";
+}
+
+void ARMAsmPrinter::printCPInstOperand(const MachineInstr *MI, int OpNo,
+ const char *Modifier) {
+ assert(Modifier && "This operand only works with a modifier!");
+ // There are two aspects to a CONSTANTPOOL_ENTRY operand, the label and the
+ // data itself.
+ if (!strcmp(Modifier, "label")) {
+ unsigned ID = MI->getOperand(OpNo).getImm();
+ O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+ << '_' << ID << ":\n";
+ } else {
+ assert(!strcmp(Modifier, "cpentry") && "Unknown modifier for CPE");
+ unsigned CPI = MI->getOperand(OpNo).getConstantPoolIndex();
+
+ const MachineConstantPoolEntry &MCPE = // Chasing pointers is fun?
+ MI->getParent()->getParent()->getConstantPool()->getConstants()[CPI];
+
+ if (MCPE.isMachineConstantPoolEntry())
+ EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
+ else {
+ EmitGlobalConstant(MCPE.Val.ConstVal);
+ // remember to emit the weak reference
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(MCPE.Val.ConstVal))
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+ }
+ }
+}
+
+void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNo) {
+ const MachineOperand &MO1 = MI->getOperand(OpNo);
+ const MachineOperand &MO2 = MI->getOperand(OpNo+1); // Unique Id
+ unsigned JTI = MO1.getJumpTableIndex();
+ O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << JTI << '_' << MO2.getImmedValue() << ":\n";
+
+ const char *JTEntryDirective = TAI->getJumpTableDirective();
+ if (!JTEntryDirective)
+ JTEntryDirective = TAI->getData32bitsDirective();
+
+ const MachineFunction *MF = MI->getParent()->getParent();
+ MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+ bool UseSet= TAI->getSetDirective() && TM.getRelocationModel() == Reloc::PIC_;
+ std::set<MachineBasicBlock*> JTSets;
+ for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = JTBBs[i];
+ if (UseSet && JTSets.insert(MBB).second)
+ printSetLabel(JTI, MO2.getImmedValue(), MBB);
+
+ O << JTEntryDirective << ' ';
+ if (UseSet)
+ O << TAI->getPrivateGlobalPrefix() << getFunctionNumber()
+ << '_' << JTI << '_' << MO2.getImmedValue()
+ << "_set_" << MBB->getNumber();
+ else if (TM.getRelocationModel() == Reloc::PIC_) {
+ printBasicBlockLabel(MBB, false, false);
+ // If the arch uses custom Jump Table directives, don't calc relative to JT
+ if (!TAI->getJumpTableDirective())
+ O << '-' << TAI->getPrivateGlobalPrefix() << "JTI"
+ << getFunctionNumber() << '_' << JTI << '_' << MO2.getImmedValue();
+ } else
+ printBasicBlockLabel(MBB, false, false);
+ if (i != e-1)
+ O << '\n';
+ }
+}
+
+
+bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode){
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'c': // Don't print "$" before a global var name or constant.
+ case 'P': // Print a VFP double precision register.
+ printOperand(MI, OpNo);
+ return false;
+ case 'Q':
+ if (TM.getTargetData()->isLittleEndian())
+ break;
+ // Fallthrough
+ case 'R':
+ if (TM.getTargetData()->isBigEndian())
+ break;
+ // Fallthrough
+ case 'H': // Write second word of DI / DF reference.
+ // Verify that this operand has two consecutive registers.
+ if (!MI->getOperand(OpNo).isRegister() ||
+ OpNo+1 == MI->getNumOperands() ||
+ !MI->getOperand(OpNo+1).isRegister())
+ return true;
+ ++OpNo; // Return the high-part.
+ }
+ }
+
+ printOperand(MI, OpNo);
+ return false;
+}
+
+void ARMAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
+ ++EmittedInsts;
+
+ int Opc = MI->getOpcode();
+ switch (Opc) {
+ case ARM::CONSTPOOL_ENTRY:
+ if (!InCPMode && AFI->isThumbFunction()) {
+ EmitAlignment(2);
+ InCPMode = true;
+ }
+ break;
+ default: {
+ if (InCPMode && AFI->isThumbFunction())
+ InCPMode = false;
+ switch (Opc) {
+ case ARM::PICADD:
+ case ARM::PICLD:
+ case ARM::PICLDZH:
+ case ARM::PICLDZB:
+ case ARM::PICLDH:
+ case ARM::PICLDB:
+ case ARM::PICLDSH:
+ case ARM::PICLDSB:
+ case ARM::PICSTR:
+ case ARM::PICSTRH:
+ case ARM::PICSTRB:
+ case ARM::tPICADD:
+ break;
+ default:
+ O << "\t";
+ break;
+ }
+ }}
+
+ // Call the autogenerated instruction printer routines.
+ printInstruction(MI);
+}
+
+bool ARMAsmPrinter::doInitialization(Module &M) {
+ // Emit initial debug information.
+ DW.BeginModule(&M);
+
+ AsmPrinter::doInitialization(M);
+
+ // Darwin wants symbols to be quoted if they have complex names.
+ if (Subtarget->isTargetDarwin())
+ Mang->setUseQuotes(true);
+
+ return false;
+}
+
+bool ARMAsmPrinter::doFinalization(Module &M) {
+ const TargetData *TD = TM.getTargetData();
+
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ if (!I->hasInitializer()) // External global require no code
+ continue;
+
+ if (EmitSpecialLLVMGlobal(I)) {
+ if (Subtarget->isTargetDarwin() &&
+ TM.getRelocationModel() == Reloc::Static) {
+ if (I->getName() == "llvm.global_ctors")
+ O << ".reference .constructors_used\n";
+ else if (I->getName() == "llvm.global_dtors")
+ O << ".reference .destructors_used\n";
+ }
+ continue;
+ }
+
+ std::string name = Mang->getValueName(I);
+ Constant *C = I->getInitializer();
+ const Type *Type = C->getType();
+ unsigned Size = TD->getTypeSize(Type);
+ unsigned Align = TD->getPreferredAlignmentLog(I);
+
+ const char *VisibilityDirective = NULL;
+ if (I->hasHiddenVisibility())
+ VisibilityDirective = TAI->getHiddenDirective();
+ else if (I->hasProtectedVisibility())
+ VisibilityDirective = TAI->getProtectedDirective();
+
+ if (VisibilityDirective)
+ O << VisibilityDirective << name << "\n";
+
+ if (Subtarget->isTargetELF())
+ O << "\t.type " << name << ",%object\n";
+
+ if (C->isNullValue()) {
+ if (I->hasExternalLinkage()) {
+ if (const char *Directive = TAI->getZeroFillDirective()) {
+ O << "\t.globl\t" << name << "\n";
+ O << Directive << "__DATA__, __common, " << name << ", "
+ << Size << ", " << Align << "\n";
+ continue;
+ }
+ }
+
+ if (!I->hasSection() &&
+ (I->hasInternalLinkage() || I->hasWeakLinkage() ||
+ I->hasLinkOnceLinkage())) {
+ if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
+ if (!NoZerosInBSS && TAI->getBSSSection())
+ SwitchToDataSection(TAI->getBSSSection(), I);
+ else
+ SwitchToDataSection(TAI->getDataSection(), I);
+ if (TAI->getLCOMMDirective() != NULL) {
+ if (I->hasInternalLinkage()) {
+ O << TAI->getLCOMMDirective() << name << "," << Size;
+ if (Subtarget->isTargetDarwin())
+ O << "," << Align;
+ } else
+ O << TAI->getCOMMDirective() << name << "," << Size;
+ } else {
+ if (I->hasInternalLinkage())
+ O << "\t.local\t" << name << "\n";
+ O << TAI->getCOMMDirective() << name << "," << Size;
+ if (TAI->getCOMMDirectiveTakesAlignment())
+ O << "," << (TAI->getAlignmentIsInBytes() ? (1 << Align) : Align);
+ }
+ O << "\t\t" << TAI->getCommentString() << " " << I->getName() << "\n";
+ continue;
+ }
+ }
+
+ switch (I->getLinkage()) {
+ case GlobalValue::LinkOnceLinkage:
+ case GlobalValue::WeakLinkage:
+ if (Subtarget->isTargetDarwin()) {
+ O << "\t.globl " << name << "\n"
+ << "\t.weak_definition " << name << "\n";
+ SwitchToDataSection("\t.section __DATA,__const_coal,coalesced", I);
+ } else {
+ std::string SectionName("\t.section\t.llvm.linkonce.d." +
+ name +
+ ",\"aw\",%progbits");
+ SwitchToDataSection(SectionName.c_str(), I);
+ O << "\t.weak " << name << "\n";
+ }
+ break;
+ case GlobalValue::AppendingLinkage:
+ // FIXME: appending linkage variables should go into a section of
+ // their name or something. For now, just emit them as external.
+ case GlobalValue::ExternalLinkage:
+ O << "\t.globl " << name << "\n";
+ // FALL THROUGH
+ case GlobalValue::InternalLinkage: {
+ if (I->isConstant()) {
+ const ConstantArray *CVA = dyn_cast<ConstantArray>(C);
+ if (TAI->getCStringSection() && CVA && CVA->isCString()) {
+ SwitchToDataSection(TAI->getCStringSection(), I);
+ break;
+ }
+ }
+ // FIXME: special handling for ".ctors" & ".dtors" sections
+ if (I->hasSection() &&
+ (I->getSection() == ".ctors" ||
+ I->getSection() == ".dtors")) {
+ assert(!Subtarget->isTargetDarwin());
+ std::string SectionName = ".section " + I->getSection();
+ SectionName += ",\"aw\",%progbits";
+ SwitchToDataSection(SectionName.c_str());
+ } else {
+ if (C->isNullValue() && !NoZerosInBSS && TAI->getBSSSection())
+ SwitchToDataSection(I->isThreadLocal() ? TAI->getTLSBSSSection() :
+ TAI->getBSSSection(), I);
+ else if (!I->isConstant())
+ SwitchToDataSection(I->isThreadLocal() ? TAI->getTLSDataSection() :
+ TAI->getDataSection(), I);
+ else if (I->isThreadLocal())
+ SwitchToDataSection(TAI->getTLSDataSection());
+ else {
+ // Read-only data.
+ bool HasReloc = C->ContainsRelocations();
+ if (HasReloc &&
+ Subtarget->isTargetDarwin() &&
+ TM.getRelocationModel() != Reloc::Static)
+ SwitchToDataSection("\t.const_data\n");
+ else if (!HasReloc && Size == 4 &&
+ TAI->getFourByteConstantSection())
+ SwitchToDataSection(TAI->getFourByteConstantSection(), I);
+ else if (!HasReloc && Size == 8 &&
+ TAI->getEightByteConstantSection())
+ SwitchToDataSection(TAI->getEightByteConstantSection(), I);
+ else if (!HasReloc && Size == 16 &&
+ TAI->getSixteenByteConstantSection())
+ SwitchToDataSection(TAI->getSixteenByteConstantSection(), I);
+ else if (TAI->getReadOnlySection())
+ SwitchToDataSection(TAI->getReadOnlySection(), I);
+ else
+ SwitchToDataSection(TAI->getDataSection(), I);
+ }
+ }
+
+ break;
+ }
+ default:
+ assert(0 && "Unknown linkage type!");
+ break;
+ }
+
+ EmitAlignment(Align, I);
+ O << name << ":\t\t\t\t" << TAI->getCommentString() << " " << I->getName()
+ << "\n";
+ if (TAI->hasDotTypeDotSizeDirective())
+ O << "\t.size " << name << ", " << Size << "\n";
+ // If the initializer is a extern weak symbol, remember to emit the weak
+ // reference!
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+
+ EmitGlobalConstant(C);
+ O << '\n';
+ }
+
+ if (Subtarget->isTargetDarwin()) {
+ SwitchToDataSection("");
+
+ // Output stubs for dynamically-linked functions
+ unsigned j = 1;
+ for (std::set<std::string>::iterator i = FnStubs.begin(), e = FnStubs.end();
+ i != e; ++i, ++j) {
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ SwitchToTextSection(".section __TEXT,__picsymbolstub4,symbol_stubs,"
+ "none,16", 0);
+ else
+ SwitchToTextSection(".section __TEXT,__symbol_stub4,symbol_stubs,"
+ "none,12", 0);
+
+ EmitAlignment(2);
+ O << "\t.code\t32\n";
+
+ O << "L" << *i << "$stub:\n";
+ O << "\t.indirect_symbol " << *i << "\n";
+ O << "\tldr ip, L" << *i << "$slp\n";
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ O << "L" << *i << "$scv:\n";
+ O << "\tadd ip, pc, ip\n";
+ }
+ O << "\tldr pc, [ip, #0]\n";
+ O << "L" << *i << "$slp:\n";
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ O << "\t.long\tL" << *i << "$lazy_ptr-(L" << *i << "$scv+8)\n";
+ else
+ O << "\t.long\tL" << *i << "$lazy_ptr\n";
+ SwitchToDataSection(".lazy_symbol_pointer", 0);
+ O << "L" << *i << "$lazy_ptr:\n";
+ O << "\t.indirect_symbol " << *i << "\n";
+ O << "\t.long\tdyld_stub_binding_helper\n";
+ }
+ O << "\n";
+
+ // Output non-lazy-pointers for external and common global variables.
+ if (GVNonLazyPtrs.begin() != GVNonLazyPtrs.end())
+ SwitchToDataSection(".non_lazy_symbol_pointer", 0);
+ for (std::set<std::string>::iterator i = GVNonLazyPtrs.begin(),
+ e = GVNonLazyPtrs.end(); i != e; ++i) {
+ O << "L" << *i << "$non_lazy_ptr:\n";
+ O << "\t.indirect_symbol " << *i << "\n";
+ O << "\t.long\t0\n";
+ }
+
+ // Emit initial debug information.
+ DW.EndModule();
+
+ // Funny Darwin hack: This flag tells the linker that no global symbols
+ // contain code that falls through to other global symbols (e.g. the obvious
+ // implementation of multiple entry points). If this doesn't occur, the
+ // linker can safely perform dead code stripping. Since LLVM never
+ // generates code that does this, it is always safe to set.
+ O << "\t.subsections_via_symbols\n";
+ } else {
+ // Emit final debug information for ELF.
+ DW.EndModule();
+ }
+
+ AsmPrinter::doFinalization(M);
+ return false; // success
+}
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
new file mode 100644
index 0000000..ed1d287
--- /dev/null
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -0,0 +1,92 @@
+//===-- ARM/ARMCodeEmitter.cpp - Convert ARM code to machine code ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the Raul Herbster and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the pass that transforms the ARM machine instructions into
+// relocatable machine code.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-emitter"
+#include "ARMInstrInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
+#include "ARM.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+using namespace llvm;
+
+STATISTIC(NumEmitted, "Number of machine instructions emitted");
+
+namespace {
+ class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass {
+ const ARMInstrInfo *II;
+ const TargetData *TD;
+ TargetMachine &TM;
+ MachineCodeEmitter &MCE;
+ public:
+ static char ID;
+ explicit Emitter(TargetMachine &tm, MachineCodeEmitter &mce)
+ : MachineFunctionPass((intptr_t)&ID), II(0), TD(0), TM(tm),
+ MCE(mce) {}
+ Emitter(TargetMachine &tm, MachineCodeEmitter &mce,
+ const ARMInstrInfo &ii, const TargetData &td)
+ : MachineFunctionPass((intptr_t)&ID), II(&ii), TD(&td), TM(tm),
+ MCE(mce) {}
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "ARM Machine Code Emitter";
+ }
+
+ void emitInstruction(const MachineInstr &MI);
+
+ private:
+
+ };
+ char Emitter::ID = 0;
+}
+
+/// createARMCodeEmitterPass - Return a pass that emits the collected ARM code
+/// to the specified MCE object.
+FunctionPass *llvm::createARMCodeEmitterPass(ARMTargetMachine &TM,
+ MachineCodeEmitter &MCE) {
+ return new Emitter(TM, MCE);
+}
+
+bool Emitter::runOnMachineFunction(MachineFunction &MF) {
+ assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
+ MF.getTarget().getRelocationModel() != Reloc::Static) &&
+ "JIT relocation model must be set to static or default!");
+ II = ((ARMTargetMachine&)MF.getTarget()).getInstrInfo();
+ TD = ((ARMTargetMachine&)MF.getTarget()).getTargetData();
+
+ do {
+ MCE.startFunction(MF);
+ for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
+ MBB != E; ++MBB) {
+ MCE.StartMachineBasicBlock(MBB);
+ for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I)
+ emitInstruction(*I);
+ }
+ } while (MCE.finishFunction(MF));
+
+ return false;
+}
+
+void Emitter::emitInstruction(const MachineInstr &MI) {
+ NumEmitted++; // Keep track of the # of mi's emitted
+}
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
new file mode 100644
index 0000000..1b93631
--- /dev/null
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -0,0 +1,1277 @@
+//===-- ARMConstantIslandPass.cpp - ARM constant islands --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that splits the constant pool up into 'islands'
+// which are scattered through-out the function. This is required due to the
+// limited pc-relative displacements that ARM has.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-cp-islands"
+#include "ARM.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMInstrInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumCPEs, "Number of constpool entries");
+STATISTIC(NumSplit, "Number of uncond branches inserted");
+STATISTIC(NumCBrFixed, "Number of cond branches fixed");
+STATISTIC(NumUBrFixed, "Number of uncond branches fixed");
+
+namespace {
+ /// ARMConstantIslands - Due to limited PC-relative displacements, ARM
+ /// requires constant pool entries to be scattered among the instructions
+ /// inside a function. To do this, it completely ignores the normal LLVM
+ /// constant pool; instead, it places constants wherever it feels like with
+ /// special instructions.
+ ///
+ /// The terminology used in this pass includes:
+ /// Islands - Clumps of constants placed in the function.
+ /// Water - Potential places where an island could be formed.
+ /// CPE - A constant pool entry that has been placed somewhere, which
+ /// tracks a list of users.
+ class VISIBILITY_HIDDEN ARMConstantIslands : public MachineFunctionPass {
+ /// NextUID - Assign unique ID's to CPE's.
+ unsigned NextUID;
+
+ /// BBSizes - The size of each MachineBasicBlock in bytes of code, indexed
+ /// by MBB Number. The two-byte pads required for Thumb alignment are
+ /// counted as part of the following block (i.e., the offset and size for
+ /// a padded block will both be ==2 mod 4).
+ std::vector<unsigned> BBSizes;
+
+ /// BBOffsets - the offset of each MBB in bytes, starting from 0.
+ /// The two-byte pads required for Thumb alignment are counted as part of
+ /// the following block.
+ std::vector<unsigned> BBOffsets;
+
+ /// WaterList - A sorted list of basic blocks where islands could be placed
+ /// (i.e. blocks that don't fall through to the following block, due
+ /// to a return, unreachable, or unconditional branch).
+ std::vector<MachineBasicBlock*> WaterList;
+
+ /// CPUser - One user of a constant pool, keeping the machine instruction
+ /// pointer, the constant pool being referenced, and the max displacement
+ /// allowed from the instruction to the CP.
+ struct CPUser {
+ MachineInstr *MI;
+ MachineInstr *CPEMI;
+ unsigned MaxDisp;
+ CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp)
+ : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp) {}
+ };
+
+ /// CPUsers - Keep track of all of the machine instructions that use various
+ /// constant pools and their max displacement.
+ std::vector<CPUser> CPUsers;
+
+ /// CPEntry - One per constant pool entry, keeping the machine instruction
+ /// pointer, the constpool index, and the number of CPUser's which
+ /// reference this entry.
+ struct CPEntry {
+ MachineInstr *CPEMI;
+ unsigned CPI;
+ unsigned RefCount;
+ CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0)
+ : CPEMI(cpemi), CPI(cpi), RefCount(rc) {}
+ };
+
+ /// CPEntries - Keep track of all of the constant pool entry machine
+ /// instructions. For each original constpool index (i.e. those that
+ /// existed upon entry to this pass), it keeps a vector of entries.
+ /// Original elements are cloned as we go along; the clones are
+ /// put in the vector of the original element, but have distinct CPIs.
+ std::vector<std::vector<CPEntry> > CPEntries;
+
+ /// ImmBranch - One per immediate branch, keeping the machine instruction
+ /// pointer, conditional or unconditional, the max displacement,
+ /// and (if isCond is true) the corresponding unconditional branch
+ /// opcode.
+ struct ImmBranch {
+ MachineInstr *MI;
+ unsigned MaxDisp : 31;
+ bool isCond : 1;
+ int UncondBr;
+ ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, int ubr)
+ : MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {}
+ };
+
+ /// ImmBranches - Keep track of all the immediate branch instructions.
+ ///
+ std::vector<ImmBranch> ImmBranches;
+
+ /// PushPopMIs - Keep track of all the Thumb push / pop instructions.
+ ///
+ SmallVector<MachineInstr*, 4> PushPopMIs;
+
+ /// HasFarJump - True if any far jump instruction has been emitted during
+ /// the branch fix up pass.
+ bool HasFarJump;
+
+ const TargetInstrInfo *TII;
+ ARMFunctionInfo *AFI;
+ bool isThumb;
+ public:
+ static char ID;
+ ARMConstantIslands() : MachineFunctionPass((intptr_t)&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM constant island placement and branch shortening pass";
+ }
+
+ private:
+ void DoInitialPlacement(MachineFunction &Fn,
+ std::vector<MachineInstr*> &CPEMIs);
+ CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
+ void InitialFunctionScan(MachineFunction &Fn,
+ const std::vector<MachineInstr*> &CPEMIs);
+ MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI);
+ void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB);
+ void AdjustBBOffsetsAfter(MachineBasicBlock *BB, int delta);
+ bool DecrementOldEntry(unsigned CPI, MachineInstr* CPEMI);
+ int LookForExistingCPEntry(CPUser& U, unsigned UserOffset);
+ bool LookForWater(CPUser&U, unsigned UserOffset,
+ MachineBasicBlock** NewMBB);
+ MachineBasicBlock* AcceptWater(MachineBasicBlock *WaterBB,
+ std::vector<MachineBasicBlock*>::iterator IP);
+ void CreateNewWater(unsigned CPUserIndex, unsigned UserOffset,
+ MachineBasicBlock** NewMBB);
+ bool HandleConstantPoolUser(MachineFunction &Fn, unsigned CPUserIndex);
+ void RemoveDeadCPEMI(MachineInstr *CPEMI);
+ bool RemoveUnusedCPEntries();
+ bool CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
+ MachineInstr *CPEMI, unsigned Disp,
+ bool DoDump);
+ bool WaterIsInRange(unsigned UserOffset, MachineBasicBlock *Water,
+ CPUser &U);
+ bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset,
+ unsigned Disp, bool NegativeOK);
+ bool BBIsInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
+ bool FixUpImmediateBr(MachineFunction &Fn, ImmBranch &Br);
+ bool FixUpConditionalBr(MachineFunction &Fn, ImmBranch &Br);
+ bool FixUpUnconditionalBr(MachineFunction &Fn, ImmBranch &Br);
+ bool UndoLRSpillRestore();
+
+ unsigned GetOffsetOf(MachineInstr *MI) const;
+ void dumpBBs();
+ void verify(MachineFunction &Fn);
+ };
+ char ARMConstantIslands::ID = 0;
+}
+
+/// verify - check BBOffsets, BBSizes, alignment of islands
+void ARMConstantIslands::verify(MachineFunction &Fn) {
+ assert(BBOffsets.size() == BBSizes.size());
+ for (unsigned i = 1, e = BBOffsets.size(); i != e; ++i)
+ assert(BBOffsets[i-1]+BBSizes[i-1] == BBOffsets[i]);
+ if (isThumb) {
+ for (MachineFunction::iterator MBBI = Fn.begin(), E = Fn.end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock *MBB = MBBI;
+ if (!MBB->empty() &&
+ MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY)
+ assert((BBOffsets[MBB->getNumber()]%4 == 0 &&
+ BBSizes[MBB->getNumber()]%4 == 0) ||
+ (BBOffsets[MBB->getNumber()]%4 != 0 &&
+ BBSizes[MBB->getNumber()]%4 != 0));
+ }
+ }
+}
+
+/// print block size and offset information - debugging
+void ARMConstantIslands::dumpBBs() {
+ for (unsigned J = 0, E = BBOffsets.size(); J !=E; ++J) {
+ DOUT << "block " << J << " offset " << BBOffsets[J] <<
+ " size " << BBSizes[J] << "\n";
+ }
+}
+
+/// createARMConstantIslandPass - returns an instance of the constpool
+/// island pass.
+FunctionPass *llvm::createARMConstantIslandPass() {
+ return new ARMConstantIslands();
+}
+
+bool ARMConstantIslands::runOnMachineFunction(MachineFunction &Fn) {
+ MachineConstantPool &MCP = *Fn.getConstantPool();
+
+ TII = Fn.getTarget().getInstrInfo();
+ AFI = Fn.getInfo<ARMFunctionInfo>();
+ isThumb = AFI->isThumbFunction();
+
+ HasFarJump = false;
+
+ // Renumber all of the machine basic blocks in the function, guaranteeing that
+ // the numbers agree with the position of the block in the function.
+ Fn.RenumberBlocks();
+
+ /// Thumb functions containing constant pools get 2-byte alignment. This is so
+ /// we can keep exact track of where the alignment padding goes. Set default.
+ AFI->setAlign(isThumb ? 1U : 2U);
+
+ // Perform the initial placement of the constant pool entries. To start with,
+ // we put them all at the end of the function.
+ std::vector<MachineInstr*> CPEMIs;
+ if (!MCP.isEmpty()) {
+ DoInitialPlacement(Fn, CPEMIs);
+ if (isThumb)
+ AFI->setAlign(2U);
+ }
+
+ /// The next UID to take is the first unused one.
+ NextUID = CPEMIs.size();
+
+ // Do the initial scan of the function, building up information about the
+ // sizes of each block, the location of all the water, and finding all of the
+ // constant pool users.
+ InitialFunctionScan(Fn, CPEMIs);
+ CPEMIs.clear();
+
+ /// Remove dead constant pool entries.
+ RemoveUnusedCPEntries();
+
+ // Iteratively place constant pool entries and fix up branches until there
+ // is no change.
+ bool MadeChange = false;
+ while (true) {
+ bool Change = false;
+ for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
+ Change |= HandleConstantPoolUser(Fn, i);
+ DEBUG(dumpBBs());
+ for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
+ Change |= FixUpImmediateBr(Fn, ImmBranches[i]);
+ DEBUG(dumpBBs());
+ if (!Change)
+ break;
+ MadeChange = true;
+ }
+
+ // After a while, this might be made debug-only, but it is not expensive.
+ verify(Fn);
+
+ // If LR has been forced spilled and no far jumps (i.e. BL) has been issued.
+ // Undo the spill / restore of LR if possible.
+ if (!HasFarJump && AFI->isLRSpilledForFarJump() && isThumb)
+ MadeChange |= UndoLRSpillRestore();
+
+ BBSizes.clear();
+ BBOffsets.clear();
+ WaterList.clear();
+ CPUsers.clear();
+ CPEntries.clear();
+ ImmBranches.clear();
+ PushPopMIs.clear();
+
+ return MadeChange;
+}
+
+/// DoInitialPlacement - Perform the initial placement of the constant pool
+/// entries. To start with, we put them all at the end of the function.
+void ARMConstantIslands::DoInitialPlacement(MachineFunction &Fn,
+ std::vector<MachineInstr*> &CPEMIs){
+ // Create the basic block to hold the CPE's.
+ MachineBasicBlock *BB = new MachineBasicBlock();
+ Fn.getBasicBlockList().push_back(BB);
+
+ // Add all of the constants from the constant pool to the end block, use an
+ // identity mapping of CPI's to CPE's.
+ const std::vector<MachineConstantPoolEntry> &CPs =
+ Fn.getConstantPool()->getConstants();
+
+ const TargetData &TD = *Fn.getTarget().getTargetData();
+ for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
+ unsigned Size = TD.getTypeSize(CPs[i].getType());
+ // Verify that all constant pool entries are a multiple of 4 bytes. If not,
+ // we would have to pad them out or something so that instructions stay
+ // aligned.
+ assert((Size & 3) == 0 && "CP Entry not multiple of 4 bytes!");
+ MachineInstr *CPEMI =
+ BuildMI(BB, TII->get(ARM::CONSTPOOL_ENTRY))
+ .addImm(i).addConstantPoolIndex(i).addImm(Size);
+ CPEMIs.push_back(CPEMI);
+
+ // Add a new CPEntry, but no corresponding CPUser yet.
+ std::vector<CPEntry> CPEs;
+ CPEs.push_back(CPEntry(CPEMI, i));
+ CPEntries.push_back(CPEs);
+ NumCPEs++;
+ DOUT << "Moved CPI#" << i << " to end of function as #" << i << "\n";
+ }
+}
+
+/// BBHasFallthrough - Return true if the specified basic block can fallthrough
+/// into the block immediately after it.
+static bool BBHasFallthrough(MachineBasicBlock *MBB) {
+ // Get the next machine basic block in the function.
+ MachineFunction::iterator MBBI = MBB;
+ if (next(MBBI) == MBB->getParent()->end()) // Can't fall off end of function.
+ return false;
+
+ MachineBasicBlock *NextBB = next(MBBI);
+ for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I)
+ if (*I == NextBB)
+ return true;
+
+ return false;
+}
+
+/// findConstPoolEntry - Given the constpool index and CONSTPOOL_ENTRY MI,
+/// look up the corresponding CPEntry.
+ARMConstantIslands::CPEntry
+*ARMConstantIslands::findConstPoolEntry(unsigned CPI,
+ const MachineInstr *CPEMI) {
+ std::vector<CPEntry> &CPEs = CPEntries[CPI];
+ // Number of entries per constpool index should be small, just do a
+ // linear search.
+ for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
+ if (CPEs[i].CPEMI == CPEMI)
+ return &CPEs[i];
+ }
+ return NULL;
+}
+
+/// InitialFunctionScan - Do the initial scan of the function, building up
+/// information about the sizes of each block, the location of all the water,
+/// and finding all of the constant pool users.
+void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn,
+ const std::vector<MachineInstr*> &CPEMIs) {
+ unsigned Offset = 0;
+ for (MachineFunction::iterator MBBI = Fn.begin(), E = Fn.end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock &MBB = *MBBI;
+
+ // If this block doesn't fall through into the next MBB, then this is
+ // 'water' that a constant pool island could be placed.
+ if (!BBHasFallthrough(&MBB))
+ WaterList.push_back(&MBB);
+
+ unsigned MBBSize = 0;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ // Add instruction size to MBBSize.
+ MBBSize += ARM::GetInstSize(I);
+
+ int Opc = I->getOpcode();
+ if (TII->isBranch(Opc)) {
+ bool isCond = false;
+ unsigned Bits = 0;
+ unsigned Scale = 1;
+ int UOpc = Opc;
+ switch (Opc) {
+ case ARM::tBR_JTr:
+ // A Thumb table jump may involve padding; for the offsets to
+ // be right, functions containing these must be 4-byte aligned.
+ AFI->setAlign(2U);
+ if ((Offset+MBBSize)%4 != 0)
+ MBBSize += 2; // padding
+ continue; // Does not get an entry in ImmBranches
+ default:
+ continue; // Ignore other JT branches
+ case ARM::Bcc:
+ isCond = true;
+ UOpc = ARM::B;
+ // Fallthrough
+ case ARM::B:
+ Bits = 24;
+ Scale = 4;
+ break;
+ case ARM::tBcc:
+ isCond = true;
+ UOpc = ARM::tB;
+ Bits = 8;
+ Scale = 2;
+ break;
+ case ARM::tB:
+ Bits = 11;
+ Scale = 2;
+ break;
+ }
+
+ // Record this immediate branch.
+ unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
+ ImmBranches.push_back(ImmBranch(I, MaxOffs, isCond, UOpc));
+ }
+
+ if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET)
+ PushPopMIs.push_back(I);
+
+ // Scan the instructions for constant pool operands.
+ for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
+ if (I->getOperand(op).isConstantPoolIndex()) {
+ // We found one. The addressing mode tells us the max displacement
+ // from the PC that this instruction permits.
+
+ // Basic size info comes from the TSFlags field.
+ unsigned Bits = 0;
+ unsigned Scale = 1;
+ unsigned TSFlags = I->getInstrDescriptor()->TSFlags;
+ switch (TSFlags & ARMII::AddrModeMask) {
+ default:
+ // Constant pool entries can reach anything.
+ if (I->getOpcode() == ARM::CONSTPOOL_ENTRY)
+ continue;
+ if (I->getOpcode() == ARM::tLEApcrel) {
+ Bits = 8; // Taking the address of a CP entry.
+ break;
+ }
+ assert(0 && "Unknown addressing mode for CP reference!");
+ case ARMII::AddrMode1: // AM1: 8 bits << 2
+ Bits = 8;
+ Scale = 4; // Taking the address of a CP entry.
+ break;
+ case ARMII::AddrMode2:
+ Bits = 12; // +-offset_12
+ break;
+ case ARMII::AddrMode3:
+ Bits = 8; // +-offset_8
+ break;
+ // addrmode4 has no immediate offset.
+ case ARMII::AddrMode5:
+ Bits = 8;
+ Scale = 4; // +-(offset_8*4)
+ break;
+ case ARMII::AddrModeT1:
+ Bits = 5; // +offset_5
+ break;
+ case ARMII::AddrModeT2:
+ Bits = 5;
+ Scale = 2; // +(offset_5*2)
+ break;
+ case ARMII::AddrModeT4:
+ Bits = 5;
+ Scale = 4; // +(offset_5*4)
+ break;
+ case ARMII::AddrModeTs:
+ Bits = 8;
+ Scale = 4; // +(offset_8*4)
+ break;
+ }
+
+ // Remember that this is a user of a CP entry.
+ unsigned CPI = I->getOperand(op).getConstantPoolIndex();
+ MachineInstr *CPEMI = CPEMIs[CPI];
+ unsigned MaxOffs = ((1 << Bits)-1) * Scale;
+ CPUsers.push_back(CPUser(I, CPEMI, MaxOffs));
+
+ // Increment corresponding CPEntry reference count.
+ CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
+ assert(CPE && "Cannot find a corresponding CPEntry!");
+ CPE->RefCount++;
+
+ // Instructions can only use one CP entry, don't bother scanning the
+ // rest of the operands.
+ break;
+ }
+ }
+
+ // In thumb mode, if this block is a constpool island, we may need padding
+ // so it's aligned on 4 byte boundary.
+ if (isThumb &&
+ !MBB.empty() &&
+ MBB.begin()->getOpcode() == ARM::CONSTPOOL_ENTRY &&
+ (Offset%4) != 0)
+ MBBSize += 2;
+
+ BBSizes.push_back(MBBSize);
+ BBOffsets.push_back(Offset);
+ Offset += MBBSize;
+ }
+}
+
+/// GetOffsetOf - Return the current offset of the specified machine instruction
+/// from the start of the function. This offset changes as stuff is moved
+/// around inside the function.
+unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const {
+ MachineBasicBlock *MBB = MI->getParent();
+
+ // The offset is composed of two things: the sum of the sizes of all MBB's
+ // before this instruction's block, and the offset from the start of the block
+ // it is in.
+ unsigned Offset = BBOffsets[MBB->getNumber()];
+
+ // If we're looking for a CONSTPOOL_ENTRY in Thumb, see if this block has
+ // alignment padding, and compensate if so.
+ if (isThumb &&
+ MI->getOpcode() == ARM::CONSTPOOL_ENTRY &&
+ Offset%4 != 0)
+ Offset += 2;
+
+ // Sum instructions before MI in MBB.
+ for (MachineBasicBlock::iterator I = MBB->begin(); ; ++I) {
+ assert(I != MBB->end() && "Didn't find MI in its own basic block?");
+ if (&*I == MI) return Offset;
+ Offset += ARM::GetInstSize(I);
+ }
+}
+
+/// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB
+/// ID.
+static bool CompareMBBNumbers(const MachineBasicBlock *LHS,
+ const MachineBasicBlock *RHS) {
+ return LHS->getNumber() < RHS->getNumber();
+}
+
+/// UpdateForInsertedWaterBlock - When a block is newly inserted into the
+/// machine function, it upsets all of the block numbers. Renumber the blocks
+/// and update the arrays that parallel this numbering.
+void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
+ // Renumber the MBB's to keep them consequtive.
+ NewBB->getParent()->RenumberBlocks(NewBB);
+
+ // Insert a size into BBSizes to align it properly with the (newly
+ // renumbered) block numbers.
+ BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
+
+ // Likewise for BBOffsets.
+ BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
+
+ // Next, update WaterList. Specifically, we need to add NewMBB as having
+ // available water after it.
+ std::vector<MachineBasicBlock*>::iterator IP =
+ std::lower_bound(WaterList.begin(), WaterList.end(), NewBB,
+ CompareMBBNumbers);
+ WaterList.insert(IP, NewBB);
+}
+
+
+/// Split the basic block containing MI into two blocks, which are joined by
+/// an unconditional branch. Update datastructures and renumber blocks to
+/// account for this change and returns the newly created block.
+MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
+ MachineBasicBlock *OrigBB = MI->getParent();
+
+ // Create a new MBB for the code after the OrigBB.
+ MachineBasicBlock *NewBB = new MachineBasicBlock(OrigBB->getBasicBlock());
+ MachineFunction::iterator MBBI = OrigBB; ++MBBI;
+ OrigBB->getParent()->getBasicBlockList().insert(MBBI, NewBB);
+
+ // Splice the instructions starting with MI over to NewBB.
+ NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
+
+ // Add an unconditional branch from OrigBB to NewBB.
+ // Note the new unconditional branch is not being recorded.
+ BuildMI(OrigBB, TII->get(isThumb ? ARM::tB : ARM::B)).addMBB(NewBB);
+ NumSplit++;
+
+ // Update the CFG. All succs of OrigBB are now succs of NewBB.
+ while (!OrigBB->succ_empty()) {
+ MachineBasicBlock *Succ = *OrigBB->succ_begin();
+ OrigBB->removeSuccessor(Succ);
+ NewBB->addSuccessor(Succ);
+
+ // This pass should be run after register allocation, so there should be no
+ // PHI nodes to update.
+ assert((Succ->empty() || Succ->begin()->getOpcode() != TargetInstrInfo::PHI)
+ && "PHI nodes should be eliminated by now!");
+ }
+
+ // OrigBB branches to NewBB.
+ OrigBB->addSuccessor(NewBB);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ // This is almost the same as UpdateForInsertedWaterBlock, except that
+ // the Water goes after OrigBB, not NewBB.
+ NewBB->getParent()->RenumberBlocks(NewBB);
+
+ // Insert a size into BBSizes to align it properly with the (newly
+ // renumbered) block numbers.
+ BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
+
+ // Likewise for BBOffsets.
+ BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
+
+ // Next, update WaterList. Specifically, we need to add OrigMBB as having
+ // available water after it (but not if it's already there, which happens
+ // when splitting before a conditional branch that is followed by an
+ // unconditional branch - in that case we want to insert NewBB).
+ std::vector<MachineBasicBlock*>::iterator IP =
+ std::lower_bound(WaterList.begin(), WaterList.end(), OrigBB,
+ CompareMBBNumbers);
+ MachineBasicBlock* WaterBB = *IP;
+ if (WaterBB == OrigBB)
+ WaterList.insert(next(IP), NewBB);
+ else
+ WaterList.insert(IP, OrigBB);
+
+ // Figure out how large the first NewMBB is. (It cannot
+ // contain a constpool_entry or tablejump.)
+ unsigned NewBBSize = 0;
+ for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end();
+ I != E; ++I)
+ NewBBSize += ARM::GetInstSize(I);
+
+ unsigned OrigBBI = OrigBB->getNumber();
+ unsigned NewBBI = NewBB->getNumber();
+ // Set the size of NewBB in BBSizes.
+ BBSizes[NewBBI] = NewBBSize;
+
+ // We removed instructions from UserMBB, subtract that off from its size.
+ // Add 2 or 4 to the block to count the unconditional branch we added to it.
+ unsigned delta = isThumb ? 2 : 4;
+ BBSizes[OrigBBI] -= NewBBSize - delta;
+
+ // ...and adjust BBOffsets for NewBB accordingly.
+ BBOffsets[NewBBI] = BBOffsets[OrigBBI] + BBSizes[OrigBBI];
+
+ // All BBOffsets following these blocks must be modified.
+ AdjustBBOffsetsAfter(NewBB, delta);
+
+ return NewBB;
+}
+
+/// OffsetIsInRange - Checks whether UserOffset (the location of a constant pool
+/// reference) is within MaxDisp of TrialOffset (a proposed location of a
+/// constant pool entry).
+bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset,
+ unsigned TrialOffset, unsigned MaxDisp, bool NegativeOK) {
+ // On Thumb offsets==2 mod 4 are rounded down by the hardware for
+ // purposes of the displacement computation; compensate for that here.
+ // Effectively, the valid range of displacements is 2 bytes smaller for such
+ // references.
+ if (isThumb && UserOffset%4 !=0)
+ UserOffset -= 2;
+ // CPEs will be rounded up to a multiple of 4.
+ if (isThumb && TrialOffset%4 != 0)
+ TrialOffset += 2;
+
+ if (UserOffset <= TrialOffset) {
+ // User before the Trial.
+ if (TrialOffset-UserOffset <= MaxDisp)
+ return true;
+ } else if (NegativeOK) {
+ if (UserOffset-TrialOffset <= MaxDisp)
+ return true;
+ }
+ return false;
+}
+
+/// WaterIsInRange - Returns true if a CPE placed after the specified
+/// Water (a basic block) will be in range for the specific MI.
+
+bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset,
+ MachineBasicBlock* Water, CPUser &U)
+{
+ unsigned MaxDisp = U.MaxDisp;
+ MachineFunction::iterator I = next(MachineFunction::iterator(Water));
+ unsigned CPEOffset = BBOffsets[Water->getNumber()] +
+ BBSizes[Water->getNumber()];
+
+ // If the CPE is to be inserted before the instruction, that will raise
+ // the offset of the instruction. (Currently applies only to ARM, so
+ // no alignment compensation attempted here.)
+ if (CPEOffset < UserOffset)
+ UserOffset += U.CPEMI->getOperand(2).getImm();
+
+ return OffsetIsInRange (UserOffset, CPEOffset, MaxDisp, !isThumb);
+}
+
+/// CPEIsInRange - Returns true if the distance between specific MI and
+/// specific ConstPool entry instruction can fit in MI's displacement field.
+bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
+ MachineInstr *CPEMI,
+ unsigned MaxDisp, bool DoDump) {
+ unsigned CPEOffset = GetOffsetOf(CPEMI);
+ assert(CPEOffset%4 == 0 && "Misaligned CPE");
+
+ if (DoDump) {
+ DOUT << "User of CPE#" << CPEMI->getOperand(0).getImm()
+ << " max delta=" << MaxDisp
+ << " insn address=" << UserOffset
+ << " CPE address=" << CPEOffset
+ << " offset=" << int(CPEOffset-UserOffset) << "\t" << *MI;
+ }
+
+ return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, !isThumb);
+}
+
+/// BBIsJumpedOver - Return true of the specified basic block's only predecessor
+/// unconditionally branches to its only successor.
+static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
+ if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
+ return false;
+
+ MachineBasicBlock *Succ = *MBB->succ_begin();
+ MachineBasicBlock *Pred = *MBB->pred_begin();
+ MachineInstr *PredMI = &Pred->back();
+ if (PredMI->getOpcode() == ARM::B || PredMI->getOpcode() == ARM::tB)
+ return PredMI->getOperand(0).getMBB() == Succ;
+ return false;
+}
+
+void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
+ int delta) {
+ MachineFunction::iterator MBBI = BB; MBBI = next(MBBI);
+ for(unsigned i=BB->getNumber()+1; i<BB->getParent()->getNumBlockIDs(); i++) {
+ BBOffsets[i] += delta;
+ // If some existing blocks have padding, adjust the padding as needed, a
+ // bit tricky. delta can be negative so don't use % on that.
+ if (isThumb) {
+ MachineBasicBlock *MBB = MBBI;
+ if (!MBB->empty()) {
+ // Constant pool entries require padding.
+ if (MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
+ unsigned oldOffset = BBOffsets[i] - delta;
+ if (oldOffset%4==0 && BBOffsets[i]%4!=0) {
+ // add new padding
+ BBSizes[i] += 2;
+ delta += 2;
+ } else if (oldOffset%4!=0 && BBOffsets[i]%4==0) {
+ // remove existing padding
+ BBSizes[i] -=2;
+ delta -= 2;
+ }
+ }
+ // Thumb jump tables require padding. They should be at the end;
+ // following unconditional branches are removed by AnalyzeBranch.
+ MachineInstr *ThumbJTMI = NULL;
+ if (prior(MBB->end())->getOpcode() == ARM::tBR_JTr)
+ ThumbJTMI = prior(MBB->end());
+ if (ThumbJTMI) {
+ unsigned newMIOffset = GetOffsetOf(ThumbJTMI);
+ unsigned oldMIOffset = newMIOffset - delta;
+ if (oldMIOffset%4 == 0 && newMIOffset%4 != 0) {
+ // remove existing padding
+ BBSizes[i] -= 2;
+ delta -= 2;
+ } else if (oldMIOffset%4 != 0 && newMIOffset%4 == 0) {
+ // add new padding
+ BBSizes[i] += 2;
+ delta += 2;
+ }
+ }
+ if (delta==0)
+ return;
+ }
+ MBBI = next(MBBI);
+ }
+ }
+}
+
+/// DecrementOldEntry - find the constant pool entry with index CPI
+/// and instruction CPEMI, and decrement its refcount. If the refcount
+/// becomes 0 remove the entry and instruction. Returns true if we removed
+/// the entry, false if we didn't.
+
+bool ARMConstantIslands::DecrementOldEntry(unsigned CPI, MachineInstr *CPEMI) {
+ // Find the old entry. Eliminate it if it is no longer used.
+ CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
+ assert(CPE && "Unexpected!");
+ if (--CPE->RefCount == 0) {
+ RemoveDeadCPEMI(CPEMI);
+ CPE->CPEMI = NULL;
+ NumCPEs--;
+ return true;
+ }
+ return false;
+}
+
+/// LookForCPEntryInRange - see if the currently referenced CPE is in range;
+/// if not, see if an in-range clone of the CPE is in range, and if so,
+/// change the data structures so the user references the clone. Returns:
+/// 0 = no existing entry found
+/// 1 = entry found, and there were no code insertions or deletions
+/// 2 = entry found, and there were code insertions or deletions
+int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset)
+{
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+
+ // Check to see if the CPE is already in-range.
+ if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.MaxDisp, true)) {
+ DOUT << "In range\n";
+ return 1;
+ }
+
+ // No. Look for previously created clones of the CPE that are in range.
+ unsigned CPI = CPEMI->getOperand(1).getConstantPoolIndex();
+ std::vector<CPEntry> &CPEs = CPEntries[CPI];
+ for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
+ // We already tried this one
+ if (CPEs[i].CPEMI == CPEMI)
+ continue;
+ // Removing CPEs can leave empty entries, skip
+ if (CPEs[i].CPEMI == NULL)
+ continue;
+ if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.MaxDisp, false)) {
+ DOUT << "Replacing CPE#" << CPI << " with CPE#" << CPEs[i].CPI << "\n";
+ // Point the CPUser node to the replacement
+ U.CPEMI = CPEs[i].CPEMI;
+ // Change the CPI in the instruction operand to refer to the clone.
+ for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j)
+ if (UserMI->getOperand(j).isConstantPoolIndex()) {
+ UserMI->getOperand(j).setConstantPoolIndex(CPEs[i].CPI);
+ break;
+ }
+ // Adjust the refcount of the clone...
+ CPEs[i].RefCount++;
+ // ...and the original. If we didn't remove the old entry, none of the
+ // addresses changed, so we don't need another pass.
+ return DecrementOldEntry(CPI, CPEMI) ? 2 : 1;
+ }
+ }
+ return 0;
+}
+
+/// getUnconditionalBrDisp - Returns the maximum displacement that can fit in
+/// the specific unconditional branch instruction.
+static inline unsigned getUnconditionalBrDisp(int Opc) {
+ return (Opc == ARM::tB) ? ((1<<10)-1)*2 : ((1<<23)-1)*4;
+}
+
+/// AcceptWater - Small amount of common code factored out of the following.
+
+MachineBasicBlock* ARMConstantIslands::AcceptWater(MachineBasicBlock *WaterBB,
+ std::vector<MachineBasicBlock*>::iterator IP) {
+ DOUT << "found water in range\n";
+ // Remove the original WaterList entry; we want subsequent
+ // insertions in this vicinity to go after the one we're
+ // about to insert. This considerably reduces the number
+ // of times we have to move the same CPE more than once.
+ WaterList.erase(IP);
+ // CPE goes before following block (NewMBB).
+ return next(MachineFunction::iterator(WaterBB));
+}
+
+/// LookForWater - look for an existing entry in the WaterList in which
+/// we can place the CPE referenced from U so it's within range of U's MI.
+/// Returns true if found, false if not. If it returns true, *NewMBB
+/// is set to the WaterList entry.
+/// For ARM, we prefer the water that's farthest away. For Thumb, prefer
+/// water that will not introduce padding to water that will; within each
+/// group, prefer the water that's farthest away.
+
+bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset,
+ MachineBasicBlock** NewMBB) {
+ std::vector<MachineBasicBlock*>::iterator IPThatWouldPad;
+ MachineBasicBlock* WaterBBThatWouldPad = NULL;
+ if (!WaterList.empty()) {
+ for (std::vector<MachineBasicBlock*>::iterator IP = prior(WaterList.end()),
+ B = WaterList.begin();; --IP) {
+ MachineBasicBlock* WaterBB = *IP;
+ if (WaterIsInRange(UserOffset, WaterBB, U)) {
+ if (isThumb &&
+ (BBOffsets[WaterBB->getNumber()] +
+ BBSizes[WaterBB->getNumber()])%4 != 0) {
+ // This is valid Water, but would introduce padding. Remember
+ // it in case we don't find any Water that doesn't do this.
+ if (!WaterBBThatWouldPad) {
+ WaterBBThatWouldPad = WaterBB;
+ IPThatWouldPad = IP;
+ }
+ } else {
+ *NewMBB = AcceptWater(WaterBB, IP);
+ return true;
+ }
+ }
+ if (IP == B)
+ break;
+ }
+ }
+ if (isThumb && WaterBBThatWouldPad) {
+ *NewMBB = AcceptWater(WaterBBThatWouldPad, IPThatWouldPad);
+ return true;
+ }
+ return false;
+}
+
+/// CreateNewWater - No existing WaterList entry will work for
+/// CPUsers[CPUserIndex], so create a place to put the CPE. The end of the
+/// block is used if in range, and the conditional branch munged so control
+/// flow is correct. Otherwise the block is split to create a hole with an
+/// unconditional branch around it. In either case *NewMBB is set to a
+/// block following which the new island can be inserted (the WaterList
+/// is not adjusted).
+
+void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
+ unsigned UserOffset, MachineBasicBlock** NewMBB) {
+ CPUser &U = CPUsers[CPUserIndex];
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+ MachineBasicBlock *UserMBB = UserMI->getParent();
+ unsigned OffsetOfNextBlock = BBOffsets[UserMBB->getNumber()] +
+ BBSizes[UserMBB->getNumber()];
+ assert(OffsetOfNextBlock== BBOffsets[UserMBB->getNumber()+1]);
+
+ // If the use is at the end of the block, or the end of the block
+ // is within range, make new water there. (The addition below is
+ // for the unconditional branch we will be adding: 4 bytes on ARM,
+ // 2 on Thumb. Possible Thumb alignment padding is allowed for
+ // inside OffsetIsInRange.
+ // If the block ends in an unconditional branch already, it is water,
+ // and is known to be out of range, so we'll always be adding a branch.)
+ if (&UserMBB->back() == UserMI ||
+ OffsetIsInRange(UserOffset, OffsetOfNextBlock + (isThumb ? 2: 4),
+ U.MaxDisp, !isThumb)) {
+ DOUT << "Split at end of block\n";
+ if (&UserMBB->back() == UserMI)
+ assert(BBHasFallthrough(UserMBB) && "Expected a fallthrough BB!");
+ *NewMBB = next(MachineFunction::iterator(UserMBB));
+ // Add an unconditional branch from UserMBB to fallthrough block.
+ // Record it for branch lengthening; this new branch will not get out of
+ // range, but if the preceding conditional branch is out of range, the
+ // targets will be exchanged, and the altered branch may be out of
+ // range, so the machinery has to know about it.
+ int UncondBr = isThumb ? ARM::tB : ARM::B;
+ BuildMI(UserMBB, TII->get(UncondBr)).addMBB(*NewMBB);
+ unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
+ ImmBranches.push_back(ImmBranch(&UserMBB->back(),
+ MaxDisp, false, UncondBr));
+ int delta = isThumb ? 2 : 4;
+ BBSizes[UserMBB->getNumber()] += delta;
+ AdjustBBOffsetsAfter(UserMBB, delta);
+ } else {
+ // What a big block. Find a place within the block to split it.
+ // This is a little tricky on Thumb since instructions are 2 bytes
+ // and constant pool entries are 4 bytes: if instruction I references
+ // island CPE, and instruction I+1 references CPE', it will
+ // not work well to put CPE as far forward as possible, since then
+ // CPE' cannot immediately follow it (that location is 2 bytes
+ // farther away from I+1 than CPE was from I) and we'd need to create
+ // a new island. So, we make a first guess, then walk through the
+ // instructions between the one currently being looked at and the
+ // possible insertion point, and make sure any other instructions
+ // that reference CPEs will be able to use the same island area;
+ // if not, we back up the insertion point.
+
+ // The 4 in the following is for the unconditional branch we'll be
+ // inserting (allows for long branch on Thumb). Alignment of the
+ // island is handled inside OffsetIsInRange.
+ unsigned BaseInsertOffset = UserOffset + U.MaxDisp -4;
+ // This could point off the end of the block if we've already got
+ // constant pool entries following this block; only the last one is
+ // in the water list. Back past any possible branches (allow for a
+ // conditional and a maximally long unconditional).
+ if (BaseInsertOffset >= BBOffsets[UserMBB->getNumber()+1])
+ BaseInsertOffset = BBOffsets[UserMBB->getNumber()+1] -
+ (isThumb ? 6 : 8);
+ unsigned EndInsertOffset = BaseInsertOffset +
+ CPEMI->getOperand(2).getImm();
+ MachineBasicBlock::iterator MI = UserMI;
+ ++MI;
+ unsigned CPUIndex = CPUserIndex+1;
+ for (unsigned Offset = UserOffset+ARM::GetInstSize(UserMI);
+ Offset < BaseInsertOffset;
+ Offset += ARM::GetInstSize(MI),
+ MI = next(MI)) {
+ if (CPUIndex < CPUsers.size() && CPUsers[CPUIndex].MI == MI) {
+ if (!OffsetIsInRange(Offset, EndInsertOffset,
+ CPUsers[CPUIndex].MaxDisp, !isThumb)) {
+ BaseInsertOffset -= (isThumb ? 2 : 4);
+ EndInsertOffset -= (isThumb ? 2 : 4);
+ }
+ // This is overly conservative, as we don't account for CPEMIs
+ // being reused within the block, but it doesn't matter much.
+ EndInsertOffset += CPUsers[CPUIndex].CPEMI->getOperand(2).getImm();
+ CPUIndex++;
+ }
+ }
+ DOUT << "Split in middle of big block\n";
+ *NewMBB = SplitBlockBeforeInstr(prior(MI));
+ }
+}
+
+/// HandleConstantPoolUser - Analyze the specified user, checking to see if it
+/// is out-of-range. If so, pick it up the constant pool value and move it some
+/// place in-range. Return true if we changed any addresses (thus must run
+/// another pass of branch lengthening), false otherwise.
+bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &Fn,
+ unsigned CPUserIndex){
+ CPUser &U = CPUsers[CPUserIndex];
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+ unsigned CPI = CPEMI->getOperand(1).getConstantPoolIndex();
+ unsigned Size = CPEMI->getOperand(2).getImm();
+ MachineBasicBlock *NewMBB;
+ // Compute this only once, it's expensive. The 4 or 8 is the value the
+ // hardware keeps in the PC (2 insns ahead of the reference).
+ unsigned UserOffset = GetOffsetOf(UserMI) + (isThumb ? 4 : 8);
+
+ // Special case: tLEApcrel are two instructions MI's. The actual user is the
+ // second instruction.
+ if (UserMI->getOpcode() == ARM::tLEApcrel)
+ UserOffset += 2;
+
+ // See if the current entry is within range, or there is a clone of it
+ // in range.
+ int result = LookForExistingCPEntry(U, UserOffset);
+ if (result==1) return false;
+ else if (result==2) return true;
+
+ // No existing clone of this CPE is within range.
+ // We will be generating a new clone. Get a UID for it.
+ unsigned ID = NextUID++;
+
+ // Look for water where we can place this CPE. We look for the farthest one
+ // away that will work. Forward references only for now (although later
+ // we might find some that are backwards).
+
+ if (!LookForWater(U, UserOffset, &NewMBB)) {
+ // No water found.
+ DOUT << "No water found\n";
+ CreateNewWater(CPUserIndex, UserOffset, &NewMBB);
+ }
+
+ // Okay, we know we can put an island before NewMBB now, do it!
+ MachineBasicBlock *NewIsland = new MachineBasicBlock();
+ Fn.getBasicBlockList().insert(NewMBB, NewIsland);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ UpdateForInsertedWaterBlock(NewIsland);
+
+ // Decrement the old entry, and remove it if refcount becomes 0.
+ DecrementOldEntry(CPI, CPEMI);
+
+ // Now that we have an island to add the CPE to, clone the original CPE and
+ // add it to the island.
+ U.CPEMI = BuildMI(NewIsland, TII->get(ARM::CONSTPOOL_ENTRY))
+ .addImm(ID).addConstantPoolIndex(CPI).addImm(Size);
+ CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
+ NumCPEs++;
+
+ BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()];
+ // Compensate for .align 2 in thumb mode.
+ if (isThumb && BBOffsets[NewIsland->getNumber()]%4 != 0)
+ Size += 2;
+ // Increase the size of the island block to account for the new entry.
+ BBSizes[NewIsland->getNumber()] += Size;
+ AdjustBBOffsetsAfter(NewIsland, Size);
+
+ // Finally, change the CPI in the instruction operand to be ID.
+ for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
+ if (UserMI->getOperand(i).isConstantPoolIndex()) {
+ UserMI->getOperand(i).setConstantPoolIndex(ID);
+ break;
+ }
+
+ DOUT << " Moved CPE to #" << ID << " CPI=" << CPI << "\t" << *UserMI;
+
+ return true;
+}
+
+/// RemoveDeadCPEMI - Remove a dead constant pool entry instruction. Update
+/// sizes and offsets of impacted basic blocks.
+void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) {
+ MachineBasicBlock *CPEBB = CPEMI->getParent();
+ unsigned Size = CPEMI->getOperand(2).getImm();
+ CPEMI->eraseFromParent();
+ BBSizes[CPEBB->getNumber()] -= Size;
+ // All succeeding offsets have the current size value added in, fix this.
+ if (CPEBB->empty()) {
+ // In thumb mode, the size of island may be padded by two to compensate for
+ // the alignment requirement. Then it will now be 2 when the block is
+ // empty, so fix this.
+ // All succeeding offsets have the current size value added in, fix this.
+ if (BBSizes[CPEBB->getNumber()] != 0) {
+ Size += BBSizes[CPEBB->getNumber()];
+ BBSizes[CPEBB->getNumber()] = 0;
+ }
+ }
+ AdjustBBOffsetsAfter(CPEBB, -Size);
+ // An island has only one predecessor BB and one successor BB. Check if
+ // this BB's predecessor jumps directly to this BB's successor. This
+ // shouldn't happen currently.
+ assert(!BBIsJumpedOver(CPEBB) && "How did this happen?");
+ // FIXME: remove the empty blocks after all the work is done?
+}
+
+/// RemoveUnusedCPEntries - Remove constant pool entries whose refcounts
+/// are zero.
+bool ARMConstantIslands::RemoveUnusedCPEntries() {
+ unsigned MadeChange = false;
+ for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
+ std::vector<CPEntry> &CPEs = CPEntries[i];
+ for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) {
+ if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) {
+ RemoveDeadCPEMI(CPEs[j].CPEMI);
+ CPEs[j].CPEMI = NULL;
+ MadeChange = true;
+ }
+ }
+ }
+ return MadeChange;
+}
+
+/// BBIsInRange - Returns true if the distance between specific MI and
+/// specific BB can fit in MI's displacement field.
+bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
+ unsigned MaxDisp) {
+ unsigned PCAdj = isThumb ? 4 : 8;
+ unsigned BrOffset = GetOffsetOf(MI) + PCAdj;
+ unsigned DestOffset = BBOffsets[DestBB->getNumber()];
+
+ DOUT << "Branch of destination BB#" << DestBB->getNumber()
+ << " from BB#" << MI->getParent()->getNumber()
+ << " max delta=" << MaxDisp
+ << " from " << GetOffsetOf(MI) << " to " << DestOffset
+ << " offset " << int(DestOffset-BrOffset) << "\t" << *MI;
+
+ if (BrOffset <= DestOffset) {
+ // Branch before the Dest.
+ if (DestOffset-BrOffset <= MaxDisp)
+ return true;
+ } else {
+ if (BrOffset-DestOffset <= MaxDisp)
+ return true;
+ }
+ return false;
+}
+
+/// FixUpImmediateBr - Fix up an immediate branch whose destination is too far
+/// away to fit in its displacement field.
+bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &Fn, ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *DestBB = MI->getOperand(0).getMachineBasicBlock();
+
+ // Check to see if the DestBB is already in-range.
+ if (BBIsInRange(MI, DestBB, Br.MaxDisp))
+ return false;
+
+ if (!Br.isCond)
+ return FixUpUnconditionalBr(Fn, Br);
+ return FixUpConditionalBr(Fn, Br);
+}
+
+/// FixUpUnconditionalBr - Fix up an unconditional branch whose destination is
+/// too far away to fit in its displacement field. If the LR register has been
+/// spilled in the epilogue, then we can use BL to implement a far jump.
+/// Otherwise, add an intermediate branch instruction to to a branch.
+bool
+ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &Fn, ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *MBB = MI->getParent();
+ assert(isThumb && "Expected a Thumb function!");
+
+ // Use BL to implement far jump.
+ Br.MaxDisp = (1 << 21) * 2;
+ MI->setInstrDescriptor(TII->get(ARM::tBfar));
+ BBSizes[MBB->getNumber()] += 2;
+ AdjustBBOffsetsAfter(MBB, 2);
+ HasFarJump = true;
+ NumUBrFixed++;
+
+ DOUT << " Changed B to long jump " << *MI;
+
+ return true;
+}
+
+/// FixUpConditionalBr - Fix up a conditional branch whose destination is too
+/// far away to fit in its displacement field. It is converted to an inverse
+/// conditional branch + an unconditional branch to the destination.
+bool
+ARMConstantIslands::FixUpConditionalBr(MachineFunction &Fn, ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *DestBB = MI->getOperand(0).getMachineBasicBlock();
+
+ // Add a unconditional branch to the destination and invert the branch
+ // condition to jump over it:
+ // blt L1
+ // =>
+ // bge L2
+ // b L1
+ // L2:
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(1).getImmedValue();
+ CC = ARMCC::getOppositeCondition(CC);
+ unsigned CCReg = MI->getOperand(2).getReg();
+
+ // If the branch is at the end of its MBB and that has a fall-through block,
+ // direct the updated conditional branch to the fall-through block. Otherwise,
+ // split the MBB before the next instruction.
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineInstr *BMI = &MBB->back();
+ bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
+
+ NumCBrFixed++;
+ if (BMI != MI) {
+ if (next(MachineBasicBlock::iterator(MI)) == MBB->back() &&
+ BMI->getOpcode() == Br.UncondBr) {
+ // Last MI in the BB is a unconditional branch. Can we simply invert the
+ // condition and swap destinations:
+ // beq L1
+ // b L2
+ // =>
+ // bne L2
+ // b L1
+ MachineBasicBlock *NewDest = BMI->getOperand(0).getMachineBasicBlock();
+ if (BBIsInRange(MI, NewDest, Br.MaxDisp)) {
+ DOUT << " Invert Bcc condition and swap its destination with " << *BMI;
+ BMI->getOperand(0).setMachineBasicBlock(DestBB);
+ MI->getOperand(0).setMachineBasicBlock(NewDest);
+ MI->getOperand(1).setImm(CC);
+ return true;
+ }
+ }
+ }
+
+ if (NeedSplit) {
+ SplitBlockBeforeInstr(MI);
+ // No need for the branch to the next block. We're adding a unconditional
+ // branch to the destination.
+ int delta = ARM::GetInstSize(&MBB->back());
+ BBSizes[MBB->getNumber()] -= delta;
+ MachineBasicBlock* SplitBB = next(MachineFunction::iterator(MBB));
+ AdjustBBOffsetsAfter(SplitBB, -delta);
+ MBB->back().eraseFromParent();
+ // BBOffsets[SplitBB] is wrong temporarily, fixed below
+ }
+ MachineBasicBlock *NextBB = next(MachineFunction::iterator(MBB));
+
+ DOUT << " Insert B to BB#" << DestBB->getNumber()
+ << " also invert condition and change dest. to BB#"
+ << NextBB->getNumber() << "\n";
+
+ // Insert a new conditional branch and a new unconditional branch.
+ // Also update the ImmBranch as well as adding a new entry for the new branch.
+ BuildMI(MBB, TII->get(MI->getOpcode())).addMBB(NextBB)
+ .addImm(CC).addReg(CCReg);
+ Br.MI = &MBB->back();
+ BBSizes[MBB->getNumber()] += ARM::GetInstSize(&MBB->back());
+ BuildMI(MBB, TII->get(Br.UncondBr)).addMBB(DestBB);
+ BBSizes[MBB->getNumber()] += ARM::GetInstSize(&MBB->back());
+ unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
+ ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
+
+ // Remove the old conditional branch. It may or may not still be in MBB.
+ BBSizes[MI->getParent()->getNumber()] -= ARM::GetInstSize(MI);
+ MI->eraseFromParent();
+
+ // The net size change is an addition of one unconditional branch.
+ int delta = ARM::GetInstSize(&MBB->back());
+ AdjustBBOffsetsAfter(MBB, delta);
+ return true;
+}
+
+/// UndoLRSpillRestore - Remove Thumb push / pop instructions that only spills
+/// LR / restores LR to pc.
+bool ARMConstantIslands::UndoLRSpillRestore() {
+ bool MadeChange = false;
+ for (unsigned i = 0, e = PushPopMIs.size(); i != e; ++i) {
+ MachineInstr *MI = PushPopMIs[i];
+ if (MI->getOpcode() == ARM::tPOP_RET &&
+ MI->getOperand(0).getReg() == ARM::PC &&
+ MI->getNumExplicitOperands() == 1) {
+ BuildMI(MI->getParent(), TII->get(ARM::tBX_RET));
+ MI->eraseFromParent();
+ MadeChange = true;
+ }
+ }
+ return MadeChange;
+}
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp
new file mode 100644
index 0000000..30a8eaf
--- /dev/null
+++ b/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -0,0 +1,90 @@
+//===- ARMConstantPoolValue.cpp - ARM constantpool value --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Evan Cheng and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific constantpool value class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMConstantPoolValue.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Type.h"
+using namespace llvm;
+
+ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv, unsigned id,
+ ARMCP::ARMCPKind k,
+ unsigned char PCAdj,
+ const char *Modif,
+ bool AddCA)
+ : MachineConstantPoolValue((const Type*)gv->getType()),
+ GV(gv), S(NULL), LabelId(id), Kind(k), PCAdjust(PCAdj),
+ Modifier(Modif), AddCurrentAddress(AddCA) {}
+
+ARMConstantPoolValue::ARMConstantPoolValue(const char *s, unsigned id,
+ ARMCP::ARMCPKind k,
+ unsigned char PCAdj,
+ const char *Modif,
+ bool AddCA)
+ : MachineConstantPoolValue((const Type*)Type::Int32Ty),
+ GV(NULL), S(s), LabelId(id), Kind(k), PCAdjust(PCAdj),
+ Modifier(Modif), AddCurrentAddress(AddCA) {}
+
+ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv,
+ ARMCP::ARMCPKind k,
+ const char *Modif)
+ : MachineConstantPoolValue((const Type*)Type::Int32Ty),
+ GV(gv), S(NULL), LabelId(0), Kind(k), PCAdjust(0),
+ Modifier(Modif) {}
+
+int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment) {
+ unsigned AlignMask = (1 << Alignment)-1;
+ const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ if (Constants[i].isMachineConstantPoolEntry() &&
+ (Constants[i].Offset & AlignMask) == 0) {
+ ARMConstantPoolValue *CPV =
+ (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
+ if (CPV->GV == GV &&
+ CPV->S == S &&
+ CPV->LabelId == LabelId &&
+ CPV->Kind == Kind &&
+ CPV->PCAdjust == PCAdjust)
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+void
+ARMConstantPoolValue::AddSelectionDAGCSEId(FoldingSetNodeID &ID) {
+ ID.AddPointer(GV);
+ ID.AddPointer(S);
+ ID.AddInteger(LabelId);
+ ID.AddInteger((unsigned)Kind);
+ ID.AddInteger(PCAdjust);
+}
+
+void ARMConstantPoolValue::print(std::ostream &O) const {
+ if (GV)
+ O << GV->getName();
+ else
+ O << S;
+ if (isNonLazyPointer()) O << "$non_lazy_ptr";
+ else if (isStub()) O << "$stub";
+ if (Modifier) O << "(" << Modifier << ")";
+ if (PCAdjust != 0) {
+ O << "-(LPIC" << LabelId << "+"
+ << (unsigned)PCAdjust;
+ if (AddCurrentAddress)
+ O << "-.";
+ O << ")";
+ }
+}
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
new file mode 100644
index 0000000..d71bcf0
--- /dev/null
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -0,0 +1,75 @@
+//===- ARMConstantPoolValue.h - ARM constantpool value ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Evan Cheng and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific constantpool value class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
+#define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
+
+#include "llvm/CodeGen/MachineConstantPool.h"
+
+namespace llvm {
+
+namespace ARMCP {
+ enum ARMCPKind {
+ CPValue,
+ CPNonLazyPtr,
+ CPStub
+ };
+}
+
+/// ARMConstantPoolValue - ARM specific constantpool value. This is used to
+/// represent PC relative displacement between the address of the load
+/// instruction and the global value being loaded, i.e. (&GV-(LPIC+8)).
+class ARMConstantPoolValue : public MachineConstantPoolValue {
+ GlobalValue *GV; // GlobalValue being loaded.
+ const char *S; // ExtSymbol being loaded.
+ unsigned LabelId; // Label id of the load.
+ ARMCP::ARMCPKind Kind; // non_lazy_ptr or stub?
+ unsigned char PCAdjust; // Extra adjustment if constantpool is pc relative.
+ // 8 for ARM, 4 for Thumb.
+ const char *Modifier; // GV modifier i.e. (&GV(modifier)-(LPIC+8))
+ bool AddCurrentAddress;
+
+public:
+ ARMConstantPoolValue(GlobalValue *gv, unsigned id,
+ ARMCP::ARMCPKind Kind = ARMCP::CPValue,
+ unsigned char PCAdj = 0, const char *Modifier = NULL,
+ bool AddCurrentAddress = false);
+ ARMConstantPoolValue(const char *s, unsigned id,
+ ARMCP::ARMCPKind Kind = ARMCP::CPValue,
+ unsigned char PCAdj = 0, const char *Modifier = NULL,
+ bool AddCurrentAddress = false);
+ ARMConstantPoolValue(GlobalValue *GV, ARMCP::ARMCPKind Kind,
+ const char *Modifier);
+
+
+ GlobalValue *getGV() const { return GV; }
+ const char *getSymbol() const { return S; }
+ const char *getModifier() const { return Modifier; }
+ bool hasModifier() const { return Modifier != NULL; }
+ bool mustAddCurrentAddress() const { return AddCurrentAddress; }
+ unsigned getLabelId() const { return LabelId; }
+ bool isNonLazyPointer() const { return Kind == ARMCP::CPNonLazyPtr; }
+ bool isStub() const { return Kind == ARMCP::CPStub; }
+ unsigned char getPCAdjustment() const { return PCAdjust; }
+
+ virtual int getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment);
+
+ virtual void AddSelectionDAGCSEId(FoldingSetNodeID &ID);
+
+ virtual void print(std::ostream &O) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/ARM/ARMFrameInfo.h b/lib/Target/ARM/ARMFrameInfo.h
new file mode 100644
index 0000000..c56640a
--- /dev/null
+++ b/lib/Target/ARM/ARMFrameInfo.h
@@ -0,0 +1,33 @@
+//===-- ARMTargetFrameInfo.h - Define TargetFrameInfo for ARM ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the "Instituto Nokia de Tecnologia" and
+// is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_FRAMEINFO_H
+#define ARM_FRAMEINFO_H
+
+#include "ARM.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "ARMSubtarget.h"
+
+namespace llvm {
+
+class ARMFrameInfo : public TargetFrameInfo {
+public:
+ ARMFrameInfo(const ARMSubtarget &ST)
+ : TargetFrameInfo(StackGrowsDown, ST.getStackAlignment(), 0) {
+ }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
new file mode 100644
index 0000000..933651d
--- /dev/null
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -0,0 +1,859 @@
+//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the ARM target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMISelLowering.h"
+#include "ARMTargetMachine.h"
+#include "ARMAddressingModes.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+//===--------------------------------------------------------------------===//
+/// ARMDAGToDAGISel - ARM specific code to select ARM machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+class ARMDAGToDAGISel : public SelectionDAGISel {
+ ARMTargetLowering Lowering;
+
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const ARMSubtarget *Subtarget;
+
+public:
+ ARMDAGToDAGISel(ARMTargetMachine &TM)
+ : SelectionDAGISel(Lowering), Lowering(TM),
+ Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
+ }
+
+ virtual const char *getPassName() const {
+ return "ARM Instruction Selection";
+ }
+
+ SDNode *Select(SDOperand Op);
+ virtual void InstructionSelectBasicBlock(SelectionDAG &DAG);
+ bool SelectAddrMode2(SDOperand Op, SDOperand N, SDOperand &Base,
+ SDOperand &Offset, SDOperand &Opc);
+ bool SelectAddrMode2Offset(SDOperand Op, SDOperand N,
+ SDOperand &Offset, SDOperand &Opc);
+ bool SelectAddrMode3(SDOperand Op, SDOperand N, SDOperand &Base,
+ SDOperand &Offset, SDOperand &Opc);
+ bool SelectAddrMode3Offset(SDOperand Op, SDOperand N,
+ SDOperand &Offset, SDOperand &Opc);
+ bool SelectAddrMode5(SDOperand Op, SDOperand N, SDOperand &Base,
+ SDOperand &Offset);
+
+ bool SelectAddrModePC(SDOperand Op, SDOperand N, SDOperand &Offset,
+ SDOperand &Label);
+
+ bool SelectThumbAddrModeRR(SDOperand Op, SDOperand N, SDOperand &Base,
+ SDOperand &Offset);
+ bool SelectThumbAddrModeRI5(SDOperand Op, SDOperand N, unsigned Scale,
+ SDOperand &Base, SDOperand &OffImm,
+ SDOperand &Offset);
+ bool SelectThumbAddrModeS1(SDOperand Op, SDOperand N, SDOperand &Base,
+ SDOperand &OffImm, SDOperand &Offset);
+ bool SelectThumbAddrModeS2(SDOperand Op, SDOperand N, SDOperand &Base,
+ SDOperand &OffImm, SDOperand &Offset);
+ bool SelectThumbAddrModeS4(SDOperand Op, SDOperand N, SDOperand &Base,
+ SDOperand &OffImm, SDOperand &Offset);
+ bool SelectThumbAddrModeSP(SDOperand Op, SDOperand N, SDOperand &Base,
+ SDOperand &OffImm);
+
+ bool SelectShifterOperandReg(SDOperand Op, SDOperand N, SDOperand &A,
+ SDOperand &B, SDOperand &C);
+
+ // Include the pieces autogenerated from the target description.
+#include "ARMGenDAGISel.inc"
+};
+}
+
+void ARMDAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) {
+ DEBUG(BB->dump());
+
+ DAG.setRoot(SelectRoot(DAG.getRoot()));
+ DAG.RemoveDeadNodes();
+
+ ScheduleAndEmitDAG(DAG);
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode2(SDOperand Op, SDOperand N,
+ SDOperand &Base, SDOperand &Offset,
+ SDOperand &Opc) {
+ if (N.getOpcode() == ISD::MUL) {
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ // X * [3,5,9] -> X + X * [2,4,8] etc.
+ int RHSC = (int)RHS->getValue();
+ if (RHSC & 1) {
+ RHSC = RHSC & ~1;
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = - RHSC;
+ }
+ if (isPowerOf2_32(RHSC)) {
+ unsigned ShAmt = Log2_32(RHSC);
+ Base = Offset = N.getOperand(0);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
+ ARM_AM::lsl),
+ MVT::i32);
+ return true;
+ }
+ }
+ }
+ }
+
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB) {
+ Base = N;
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ } else if (N.getOpcode() == ARMISD::Wrapper) {
+ Base = N.getOperand(0);
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return true;
+ }
+
+ // Match simple R +/- imm12 operands.
+ if (N.getOpcode() == ISD::ADD)
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getValue();
+ if ((RHSC >= 0 && RHSC < 0x1000) ||
+ (RHSC < 0 && RHSC > -0x1000)) { // 12 bits.
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
+
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = - RHSC;
+ }
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return true;
+ }
+ }
+
+ // Otherwise this is R +/- [possibly shifted] R
+ ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub;
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
+ unsigned ShAmt = 0;
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't fold
+ // it.
+ if (ConstantSDNode *Sh =
+ dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
+ ShAmt = Sh->getValue();
+ Offset = N.getOperand(1).getOperand(0);
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+
+ // Try matching (R shl C) + (R).
+ if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift) {
+ ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't
+ // fold it.
+ if (ConstantSDNode *Sh =
+ dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
+ ShAmt = Sh->getValue();
+ Offset = N.getOperand(0).getOperand(0);
+ Base = N.getOperand(1);
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+ }
+
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+ MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDOperand Op, SDOperand N,
+ SDOperand &Offset, SDOperand &Opc) {
+ unsigned Opcode = Op.getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+ ? ARM_AM::add : ARM_AM::sub;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
+ int Val = (int)C->getValue();
+ if (Val >= 0 && Val < 0x1000) { // 12 bits.
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return true;
+ }
+ }
+
+ Offset = N;
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+ unsigned ShAmt = 0;
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't fold
+ // it.
+ if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ ShAmt = Sh->getValue();
+ Offset = N.getOperand(0);
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+ MVT::i32);
+ return true;
+}
+
+
+bool ARMDAGToDAGISel::SelectAddrMode3(SDOperand Op, SDOperand N,
+ SDOperand &Base, SDOperand &Offset,
+ SDOperand &Opc) {
+ if (N.getOpcode() == ISD::SUB) {
+ // X - C is canonicalize to X + -C, no need to handle it here.
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0),MVT::i32);
+ return true;
+ }
+
+ if (N.getOpcode() != ISD::ADD) {
+ Base = N;
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32);
+ return true;
+ }
+
+ // If the RHS is +/- imm8, fold into addr mode.
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getValue();
+ if ((RHSC >= 0 && RHSC < 256) ||
+ (RHSC < 0 && RHSC > -256)) { // note -256 itself isn't allowed.
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
+
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = - RHSC;
+ }
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC),MVT::i32);
+ return true;
+ }
+ }
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDOperand Op, SDOperand N,
+ SDOperand &Offset, SDOperand &Opc) {
+ unsigned Opcode = Op.getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+ ? ARM_AM::add : ARM_AM::sub;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
+ int Val = (int)C->getValue();
+ if (Val >= 0 && Val < 256) {
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), MVT::i32);
+ return true;
+ }
+ }
+
+ Offset = N;
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), MVT::i32);
+ return true;
+}
+
+
+bool ARMDAGToDAGISel::SelectAddrMode5(SDOperand Op, SDOperand N,
+ SDOperand &Base, SDOperand &Offset) {
+ if (N.getOpcode() != ISD::ADD) {
+ Base = N;
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ } else if (N.getOpcode() == ARMISD::Wrapper) {
+ Base = N.getOperand(0);
+ }
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
+ MVT::i32);
+ return true;
+ }
+
+ // If the RHS is +/- imm8, fold into addr mode.
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getValue();
+ if ((RHSC & 3) == 0) { // The constant is implicitly multiplied by 4.
+ RHSC >>= 2;
+ if ((RHSC >= 0 && RHSC < 256) ||
+ (RHSC < 0 && RHSC > -256)) { // note -256 itself isn't allowed.
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = - RHSC;
+ }
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
+ MVT::i32);
+ return true;
+ }
+ }
+ }
+
+ Base = N;
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
+ MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrModePC(SDOperand Op, SDOperand N,
+ SDOperand &Offset, SDOperand &Label) {
+ if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
+ Offset = N.getOperand(0);
+ SDOperand N1 = N.getOperand(1);
+ Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getValue(),
+ MVT::i32);
+ return true;
+ }
+ return false;
+}
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDOperand Op, SDOperand N,
+ SDOperand &Base, SDOperand &Offset){
+ if (N.getOpcode() != ISD::ADD) {
+ Base = N;
+ // We must materialize a zero in a reg! Returning an constant here won't
+ // work since its node is -1 so it won't get added to the selection queue.
+ // Explicitly issue a tMOVri8 node!
+ Offset = SDOperand(CurDAG->getTargetNode(ARM::tMOVi8, MVT::i32,
+ CurDAG->getTargetConstant(0, MVT::i32)), 0);
+ return true;
+ }
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ return true;
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI5(SDOperand Op, SDOperand N,
+ unsigned Scale, SDOperand &Base,
+ SDOperand &OffImm, SDOperand &Offset) {
+ if (Scale == 4) {
+ SDOperand TmpBase, TmpOffImm;
+ if (SelectThumbAddrModeSP(Op, N, TmpBase, TmpOffImm))
+ return false; // We want to select tLDRspi / tSTRspi instead.
+ if (N.getOpcode() == ARMISD::Wrapper &&
+ N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
+ return false; // We want to select tLDRpci instead.
+ }
+
+ if (N.getOpcode() != ISD::ADD) {
+ Base = (N.getOpcode() == ARMISD::Wrapper) ? N.getOperand(0) : N;
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ // Thumb does not have [sp, r] address mode.
+ RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
+ RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
+ if ((LHSR && LHSR->getReg() == ARM::SP) ||
+ (RHSR && RHSR->getReg() == ARM::SP)) {
+ Base = N;
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ // If the RHS is + imm5 * scale, fold into addr mode.
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getValue();
+ if ((RHSC & (Scale-1)) == 0) { // The constant is implicitly multiplied.
+ RHSC /= Scale;
+ if (RHSC >= 0 && RHSC < 32) {
+ Base = N.getOperand(0);
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+ return true;
+ }
+ }
+ }
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeS1(SDOperand Op, SDOperand N,
+ SDOperand &Base, SDOperand &OffImm,
+ SDOperand &Offset) {
+ return SelectThumbAddrModeRI5(Op, N, 1, Base, OffImm, Offset);
+}
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeS2(SDOperand Op, SDOperand N,
+ SDOperand &Base, SDOperand &OffImm,
+ SDOperand &Offset) {
+ return SelectThumbAddrModeRI5(Op, N, 2, Base, OffImm, Offset);
+}
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeS4(SDOperand Op, SDOperand N,
+ SDOperand &Base, SDOperand &OffImm,
+ SDOperand &Offset) {
+ return SelectThumbAddrModeRI5(Op, N, 4, Base, OffImm, Offset);
+}
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDOperand Op, SDOperand N,
+ SDOperand &Base, SDOperand &OffImm) {
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ if (N.getOpcode() != ISD::ADD)
+ return false;
+
+ RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
+ if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
+ (LHSR && LHSR->getReg() == ARM::SP)) {
+ // If the RHS is + imm8 * scale, fold into addr mode.
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getValue();
+ if ((RHSC & 3) == 0) { // The constant is implicitly multiplied.
+ RHSC >>= 2;
+ if (RHSC >= 0 && RHSC < 256) {
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+bool ARMDAGToDAGISel::SelectShifterOperandReg(SDOperand Op,
+ SDOperand N,
+ SDOperand &BaseReg,
+ SDOperand &ShReg,
+ SDOperand &Opc) {
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+
+ // Don't match base register only case. That is matched to a separate
+ // lower complexity pattern with explicit register operand.
+ if (ShOpcVal == ARM_AM::no_shift) return false;
+
+ BaseReg = N.getOperand(0);
+ unsigned ShImmVal = 0;
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ ShReg = CurDAG->getRegister(0, MVT::i32);
+ ShImmVal = RHS->getValue() & 31;
+ } else {
+ ShReg = N.getOperand(1);
+ }
+ Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
+ MVT::i32);
+ return true;
+}
+
+/// getAL - Returns a ARMCC::AL immediate node.
+static inline SDOperand getAL(SelectionDAG *CurDAG) {
+ return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, MVT::i32);
+}
+
+
+SDNode *ARMDAGToDAGISel::Select(SDOperand Op) {
+ SDNode *N = Op.Val;
+ unsigned Opcode = N->getOpcode();
+
+ if (Opcode >= ISD::BUILTIN_OP_END && Opcode < ARMISD::FIRST_NUMBER)
+ return NULL; // Already selected.
+
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::Constant: {
+ unsigned Val = cast<ConstantSDNode>(N)->getValue();
+ bool UseCP = true;
+ if (Subtarget->isThumb())
+ UseCP = (Val > 255 && // MOV
+ ~Val > 255 && // MOV + MVN
+ !ARM_AM::isThumbImmShiftedVal(Val)); // MOV + LSL
+ else
+ UseCP = (ARM_AM::getSOImmVal(Val) == -1 && // MOV
+ ARM_AM::getSOImmVal(~Val) == -1 && // MVN
+ !ARM_AM::isSOImmTwoPartVal(Val)); // two instrs.
+ if (UseCP) {
+ SDOperand CPIdx =
+ CurDAG->getTargetConstantPool(ConstantInt::get(Type::Int32Ty, Val),
+ TLI.getPointerTy());
+
+ SDNode *ResNode;
+ if (Subtarget->isThumb())
+ ResNode = CurDAG->getTargetNode(ARM::tLDRcp, MVT::i32, MVT::Other,
+ CPIdx, CurDAG->getEntryNode());
+ else {
+ SDOperand Ops[] = {
+ CPIdx,
+ CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getTargetConstant(0, MVT::i32),
+ getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getEntryNode()
+ };
+ ResNode=CurDAG->getTargetNode(ARM::LDRcp, MVT::i32, MVT::Other, Ops, 6);
+ }
+ ReplaceUses(Op, SDOperand(ResNode, 0));
+ return NULL;
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::FrameIndex: {
+ // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ SDOperand TFI = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ if (Subtarget->isThumb())
+ return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, TFI,
+ CurDAG->getTargetConstant(0, MVT::i32));
+ else {
+ SDOperand Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->SelectNodeTo(N, ARM::ADDri, MVT::i32, Ops, 5);
+ }
+ }
+ case ISD::ADD: {
+ // Select add sp, c to tADDhirr.
+ SDOperand N0 = Op.getOperand(0);
+ SDOperand N1 = Op.getOperand(1);
+ RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(Op.getOperand(0));
+ RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(Op.getOperand(1));
+ if (LHSR && LHSR->getReg() == ARM::SP) {
+ std::swap(N0, N1);
+ std::swap(LHSR, RHSR);
+ }
+ if (RHSR && RHSR->getReg() == ARM::SP) {
+ AddToISelQueue(N0);
+ AddToISelQueue(N1);
+ return CurDAG->SelectNodeTo(N, ARM::tADDhirr, Op.getValueType(), N0, N1);
+ }
+ break;
+ }
+ case ISD::MUL:
+ if (Subtarget->isThumb())
+ break;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned RHSV = C->getValue();
+ if (!RHSV) break;
+ if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
+ SDOperand V = Op.getOperand(0);
+ AddToISelQueue(V);
+ unsigned ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, Log2_32(RHSV-1));
+ SDOperand Ops[] = { V, V, CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getTargetConstant(ShImm, MVT::i32),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->SelectNodeTo(N, ARM::ADDrs, MVT::i32, Ops, 7);
+ }
+ if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
+ SDOperand V = Op.getOperand(0);
+ AddToISelQueue(V);
+ unsigned ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, Log2_32(RHSV+1));
+ SDOperand Ops[] = { V, V, CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getTargetConstant(ShImm, MVT::i32),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->SelectNodeTo(N, ARM::RSBrs, MVT::i32, Ops, 7);
+ }
+ }
+ break;
+ case ARMISD::FMRRD:
+ AddToISelQueue(Op.getOperand(0));
+ return CurDAG->getTargetNode(ARM::FMRRD, MVT::i32, MVT::i32,
+ Op.getOperand(0), getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32));
+ case ARMISD::MULHILOU: {
+ AddToISelQueue(Op.getOperand(0));
+ AddToISelQueue(Op.getOperand(1));
+ SDOperand Ops[] = { Op.getOperand(0), Op.getOperand(1),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getTargetNode(ARM::UMULL, MVT::i32, MVT::i32, Ops, 5);
+ }
+ case ARMISD::MULHILOS: {
+ AddToISelQueue(Op.getOperand(0));
+ AddToISelQueue(Op.getOperand(1));
+ SDOperand Ops[] = { Op.getOperand(0), Op.getOperand(1),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getTargetNode(ARM::SMULL, MVT::i32, MVT::i32, Ops, 5);
+ }
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+ MVT::ValueType LoadedVT = LD->getLoadedVT();
+ if (AM != ISD::UNINDEXED) {
+ SDOperand Offset, AMOpc;
+ bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
+ unsigned Opcode = 0;
+ bool Match = false;
+ if (LoadedVT == MVT::i32 &&
+ SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+ Opcode = isPre ? ARM::LDR_PRE : ARM::LDR_POST;
+ Match = true;
+ } else if (LoadedVT == MVT::i16 &&
+ SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
+ ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
+ : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
+ } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
+ if (LD->getExtensionType() == ISD::SEXTLOAD) {
+ if (SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
+ }
+ } else {
+ if (SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = isPre ? ARM::LDRB_PRE : ARM::LDRB_POST;
+ }
+ }
+ }
+
+ if (Match) {
+ SDOperand Chain = LD->getChain();
+ SDOperand Base = LD->getBasePtr();
+ AddToISelQueue(Chain);
+ AddToISelQueue(Base);
+ AddToISelQueue(Offset);
+ SDOperand Ops[]= { Base, Offset, AMOpc, getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32), Chain };
+ return CurDAG->getTargetNode(Opcode, MVT::i32, MVT::i32,
+ MVT::Other, Ops, 6);
+ }
+ }
+ // Other cases are autogenerated.
+ break;
+ }
+ case ARMISD::BRCOND: {
+ // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
+ // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 1 size = 0
+
+ // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
+ // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 1 size = 0
+
+ unsigned Opc = Subtarget->isThumb() ? ARM::tBcc : ARM::Bcc;
+ SDOperand Chain = Op.getOperand(0);
+ SDOperand N1 = Op.getOperand(1);
+ SDOperand N2 = Op.getOperand(2);
+ SDOperand N3 = Op.getOperand(3);
+ SDOperand InFlag = Op.getOperand(4);
+ assert(N1.getOpcode() == ISD::BasicBlock);
+ assert(N2.getOpcode() == ISD::Constant);
+ assert(N3.getOpcode() == ISD::Register);
+
+ AddToISelQueue(Chain);
+ AddToISelQueue(N1);
+ AddToISelQueue(InFlag);
+ SDOperand Tmp2 = CurDAG->getTargetConstant(((unsigned)
+ cast<ConstantSDNode>(N2)->getValue()), MVT::i32);
+ SDOperand Ops[] = { N1, Tmp2, N3, Chain, InFlag };
+ SDNode *ResNode = CurDAG->getTargetNode(Opc, MVT::Other, MVT::Flag, Ops, 5);
+ Chain = SDOperand(ResNode, 0);
+ InFlag = SDOperand(ResNode, 1);
+ ReplaceUses(SDOperand(Op.Val, 1), InFlag);
+ ReplaceUses(SDOperand(Op.Val, 0), SDOperand(Chain.Val, Chain.ResNo));
+ return NULL;
+ }
+ case ARMISD::CMOV: {
+ bool isThumb = Subtarget->isThumb();
+ MVT::ValueType VT = Op.getValueType();
+ SDOperand N0 = Op.getOperand(0);
+ SDOperand N1 = Op.getOperand(1);
+ SDOperand N2 = Op.getOperand(2);
+ SDOperand N3 = Op.getOperand(3);
+ SDOperand InFlag = Op.getOperand(4);
+ assert(N2.getOpcode() == ISD::Constant);
+ assert(N3.getOpcode() == ISD::Register);
+
+ // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
+ // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
+ // Pattern complexity = 18 cost = 1 size = 0
+ SDOperand CPTmp0;
+ SDOperand CPTmp1;
+ SDOperand CPTmp2;
+ if (!isThumb && VT == MVT::i32 &&
+ SelectShifterOperandReg(Op, N1, CPTmp0, CPTmp1, CPTmp2)) {
+ AddToISelQueue(N0);
+ AddToISelQueue(CPTmp0);
+ AddToISelQueue(CPTmp1);
+ AddToISelQueue(CPTmp2);
+ AddToISelQueue(InFlag);
+ SDOperand Tmp2 = CurDAG->getTargetConstant(((unsigned)
+ cast<ConstantSDNode>(N2)->getValue()), MVT::i32);
+ SDOperand Ops[] = { N0, CPTmp0, CPTmp1, CPTmp2, Tmp2, N3, InFlag };
+ return CurDAG->SelectNodeTo(Op.Val, ARM::MOVCCs, MVT::i32, Ops, 7);
+ }
+
+ // Pattern: (ARMcmov:i32 GPR:i32:$false,
+ // (imm:i32)<<P:Predicate_so_imm>><<X:so_imm_XFORM>>:$true,
+ // (imm:i32):$cc)
+ // Emits: (MOVCCi:i32 GPR:i32:$false,
+ // (so_imm_XFORM:i32 (imm:i32):$true), (imm:i32):$cc)
+ // Pattern complexity = 10 cost = 1 size = 0
+ if (VT == MVT::i32 &&
+ N3.getOpcode() == ISD::Constant &&
+ Predicate_so_imm(N3.Val)) {
+ AddToISelQueue(N0);
+ AddToISelQueue(InFlag);
+ SDOperand Tmp1 = CurDAG->getTargetConstant(((unsigned)
+ cast<ConstantSDNode>(N1)->getValue()), MVT::i32);
+ Tmp1 = Transform_so_imm_XFORM(Tmp1.Val);
+ SDOperand Tmp2 = CurDAG->getTargetConstant(((unsigned)
+ cast<ConstantSDNode>(N2)->getValue()), MVT::i32);
+ SDOperand Ops[] = { N0, Tmp1, Tmp2, N3, InFlag };
+ return CurDAG->SelectNodeTo(Op.Val, ARM::MOVCCi, MVT::i32, Ops, 5);
+ }
+
+ // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+ // Emits: (MOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 1 size = 0
+ //
+ // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+ // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 11 size = 0
+ //
+ // Also FCPYScc and FCPYDcc.
+ AddToISelQueue(N0);
+ AddToISelQueue(N1);
+ AddToISelQueue(InFlag);
+ SDOperand Tmp2 = CurDAG->getTargetConstant(((unsigned)
+ cast<ConstantSDNode>(N2)->getValue()), MVT::i32);
+ SDOperand Ops[] = { N0, N1, Tmp2, N3, InFlag };
+ unsigned Opc = 0;
+ switch (VT) {
+ default: assert(false && "Illegal conditional move type!");
+ break;
+ case MVT::i32:
+ Opc = isThumb ? ARM::tMOVCCr : ARM::MOVCCr;
+ break;
+ case MVT::f32:
+ Opc = ARM::FCPYScc;
+ break;
+ case MVT::f64:
+ Opc = ARM::FCPYDcc;
+ break;
+ }
+ return CurDAG->SelectNodeTo(Op.Val, Opc, VT, Ops, 5);
+ }
+ case ARMISD::CNEG: {
+ MVT::ValueType VT = Op.getValueType();
+ SDOperand N0 = Op.getOperand(0);
+ SDOperand N1 = Op.getOperand(1);
+ SDOperand N2 = Op.getOperand(2);
+ SDOperand N3 = Op.getOperand(3);
+ SDOperand InFlag = Op.getOperand(4);
+ assert(N2.getOpcode() == ISD::Constant);
+ assert(N3.getOpcode() == ISD::Register);
+
+ AddToISelQueue(N0);
+ AddToISelQueue(N1);
+ AddToISelQueue(InFlag);
+ SDOperand Tmp2 = CurDAG->getTargetConstant(((unsigned)
+ cast<ConstantSDNode>(N2)->getValue()), MVT::i32);
+ SDOperand Ops[] = { N0, N1, Tmp2, N3, InFlag };
+ unsigned Opc = 0;
+ switch (VT) {
+ default: assert(false && "Illegal conditional move type!");
+ break;
+ case MVT::f32:
+ Opc = ARM::FNEGScc;
+ break;
+ case MVT::f64:
+ Opc = ARM::FNEGDcc;
+ break;
+ }
+ return CurDAG->SelectNodeTo(Op.Val, Opc, VT, Ops, 5);
+ }
+ }
+ return SelectCode(Op);
+}
+
+/// createARMISelDag - This pass converts a legalized DAG into a
+/// ARM-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createARMISelDag(ARMTargetMachine &TM) {
+ return new ARMDAGToDAGISel(TM);
+}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
new file mode 100644
index 0000000..6f63fbd
--- /dev/null
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -0,0 +1,1859 @@
+//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Evan Cheng and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that ARM uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMISelLowering.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/Instruction.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
+ : TargetLowering(TM), ARMPCLabelIndex(0) {
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+
+ if (Subtarget->isTargetDarwin()) {
+ // Don't have these.
+ setLibcallName(RTLIB::UINTTOFP_I64_F32, NULL);
+ setLibcallName(RTLIB::UINTTOFP_I64_F64, NULL);
+
+ // Uses VFP for Thumb libfuncs if available.
+ if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
+ // Single-precision floating-point arithmetic.
+ setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
+ setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
+ setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
+ setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
+
+ // Double-precision floating-point arithmetic.
+ setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
+ setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
+ setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
+ setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
+
+ // Single-precision comparisons.
+ setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
+ setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
+ setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
+ setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
+ setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
+ setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
+ setLibcallName(RTLIB::UO_F32, "__unordsf2vfp");
+ setLibcallName(RTLIB::O_F32, "__unordsf2vfp");
+
+ setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ);
+
+ // Double-precision comparisons.
+ setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
+ setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
+ setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
+ setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
+ setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
+ setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
+ setLibcallName(RTLIB::UO_F64, "__unorddf2vfp");
+ setLibcallName(RTLIB::O_F64, "__unorddf2vfp");
+
+ setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ);
+
+ // Floating-point to integer conversions.
+ // i64 conversions are done via library routines even when generating VFP
+ // instructions, so use the same ones.
+ setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
+ setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
+ setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
+ setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
+
+ // Conversions between floating types.
+ setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
+ setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp");
+
+ // Integer to floating-point conversions.
+ // i64 conversions are done via library routines even when generating VFP
+ // instructions, so use the same ones.
+ // FIXME: There appears to be some naming inconsistency in ARM libgcc: e.g.
+ // __floatunsidf vs. __floatunssidfvfp.
+ setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
+ setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
+ setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
+ setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
+ }
+ }
+
+ addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
+ if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb()) {
+ addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
+ addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
+ }
+ computeRegisterProperties();
+
+ // ARM does not have f32 extending load.
+ setLoadXAction(ISD::EXTLOAD, MVT::f32, Expand);
+
+ // ARM supports all 4 flavors of integer indexed load / store.
+ for (unsigned im = (unsigned)ISD::PRE_INC;
+ im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
+ setIndexedLoadAction(im, MVT::i1, Legal);
+ setIndexedLoadAction(im, MVT::i8, Legal);
+ setIndexedLoadAction(im, MVT::i16, Legal);
+ setIndexedLoadAction(im, MVT::i32, Legal);
+ setIndexedStoreAction(im, MVT::i1, Legal);
+ setIndexedStoreAction(im, MVT::i8, Legal);
+ setIndexedStoreAction(im, MVT::i16, Legal);
+ setIndexedStoreAction(im, MVT::i32, Legal);
+ }
+
+ // i64 operation support.
+ if (Subtarget->isThumb()) {
+ setOperationAction(ISD::MUL, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+ } else {
+ setOperationAction(ISD::MUL, MVT::i64, Custom);
+ setOperationAction(ISD::MULHU, MVT::i32, Custom);
+ if (!Subtarget->hasV6Ops())
+ setOperationAction(ISD::MULHS, MVT::i32, Custom);
+ }
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL, MVT::i64, Custom);
+ setOperationAction(ISD::SRA, MVT::i64, Custom);
+
+ // ARM does not have ROTL.
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ if (!Subtarget->hasV5TOps() || Subtarget->isThumb())
+ setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+
+ // Only ARMv6 has BSWAP.
+ if (!Subtarget->hasV6Ops())
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+
+ // These are expanded into libcalls.
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+
+ // Support label based line numbers.
+ setOperationAction(ISD::LOCATION, MVT::Other, Expand);
+ setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
+
+ setOperationAction(ISD::RET, MVT::Other, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+ setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+
+ // Expand mem operations genericly.
+ setOperationAction(ISD::MEMSET , MVT::Other, Expand);
+ setOperationAction(ISD::MEMCPY , MVT::Other, Custom);
+ setOperationAction(ISD::MEMMOVE , MVT::Other, Expand);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VASTART , MVT::Other, Expand);
+ setOperationAction(ISD::VAARG , MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
+
+ if (!Subtarget->hasV6Ops()) {
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
+ }
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb())
+ // Turn f64->i64 into FMRRD iff target supports vfp2.
+ setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom);
+
+ setOperationAction(ISD::SETCC , MVT::i32, Expand);
+ setOperationAction(ISD::SETCC , MVT::f32, Expand);
+ setOperationAction(ISD::SETCC , MVT::f64, Expand);
+ setOperationAction(ISD::SELECT , MVT::i32, Expand);
+ setOperationAction(ISD::SELECT , MVT::f32, Expand);
+ setOperationAction(ISD::SELECT , MVT::f64, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+ setOperationAction(ISD::BRCOND , MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC , MVT::i32, Custom);
+ setOperationAction(ISD::BR_CC , MVT::f32, Custom);
+ setOperationAction(ISD::BR_CC , MVT::f64, Custom);
+ setOperationAction(ISD::BR_JT , MVT::Other, Custom);
+
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+
+ // FP Constants can't be immediates.
+ setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
+
+ // We don't support sin/cos/fmod/copysign
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FREM , MVT::f64, Expand);
+ setOperationAction(ISD::FREM , MVT::f32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+
+ // int <-> fp are custom expanded into bit_convert + ARMISD ops.
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+
+ setStackPointerRegisterToSaveRestore(ARM::SP);
+ setSchedulingPreference(SchedulingForRegPressure);
+ setIfCvtBlockSizeLimit(Subtarget->isThumb() ? 0 : 10);
+ setIfCvtDupBlockSizeLimit(Subtarget->isThumb() ? 0 : 2);
+
+ maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type
+}
+
+
+const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case ARMISD::Wrapper: return "ARMISD::Wrapper";
+ case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
+ case ARMISD::CALL: return "ARMISD::CALL";
+ case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
+ case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
+ case ARMISD::tCALL: return "ARMISD::tCALL";
+ case ARMISD::BRCOND: return "ARMISD::BRCOND";
+ case ARMISD::BR_JT: return "ARMISD::BR_JT";
+ case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
+ case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
+ case ARMISD::CMP: return "ARMISD::CMP";
+ case ARMISD::CMPNZ: return "ARMISD::CMPNZ";
+ case ARMISD::CMPFP: return "ARMISD::CMPFP";
+ case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
+ case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
+ case ARMISD::CMOV: return "ARMISD::CMOV";
+ case ARMISD::CNEG: return "ARMISD::CNEG";
+
+ case ARMISD::FTOSI: return "ARMISD::FTOSI";
+ case ARMISD::FTOUI: return "ARMISD::FTOUI";
+ case ARMISD::SITOF: return "ARMISD::SITOF";
+ case ARMISD::UITOF: return "ARMISD::UITOF";
+ case ARMISD::MULHILOU: return "ARMISD::MULHILOU";
+ case ARMISD::MULHILOS: return "ARMISD::MULHILOS";
+
+ case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
+ case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
+ case ARMISD::RRX: return "ARMISD::RRX";
+
+ case ARMISD::FMRRD: return "ARMISD::FMRRD";
+ case ARMISD::FMDRR: return "ARMISD::FMDRR";
+
+ case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Lowering Code
+//===----------------------------------------------------------------------===//
+
+
+/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
+static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
+ switch (CC) {
+ default: assert(0 && "Unknown condition code!");
+ case ISD::SETNE: return ARMCC::NE;
+ case ISD::SETEQ: return ARMCC::EQ;
+ case ISD::SETGT: return ARMCC::GT;
+ case ISD::SETGE: return ARMCC::GE;
+ case ISD::SETLT: return ARMCC::LT;
+ case ISD::SETLE: return ARMCC::LE;
+ case ISD::SETUGT: return ARMCC::HI;
+ case ISD::SETUGE: return ARMCC::HS;
+ case ISD::SETULT: return ARMCC::LO;
+ case ISD::SETULE: return ARMCC::LS;
+ }
+}
+
+/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. It
+/// returns true if the operands should be inverted to form the proper
+/// comparison.
+static bool FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
+ ARMCC::CondCodes &CondCode2) {
+ bool Invert = false;
+ CondCode2 = ARMCC::AL;
+ switch (CC) {
+ default: assert(0 && "Unknown FP condition!");
+ case ISD::SETEQ:
+ case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
+ case ISD::SETGT:
+ case ISD::SETOGT: CondCode = ARMCC::GT; break;
+ case ISD::SETGE:
+ case ISD::SETOGE: CondCode = ARMCC::GE; break;
+ case ISD::SETOLT: CondCode = ARMCC::MI; break;
+ case ISD::SETOLE: CondCode = ARMCC::GT; Invert = true; break;
+ case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
+ case ISD::SETO: CondCode = ARMCC::VC; break;
+ case ISD::SETUO: CondCode = ARMCC::VS; break;
+ case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
+ case ISD::SETUGT: CondCode = ARMCC::HI; break;
+ case ISD::SETUGE: CondCode = ARMCC::PL; break;
+ case ISD::SETLT:
+ case ISD::SETULT: CondCode = ARMCC::LT; break;
+ case ISD::SETLE:
+ case ISD::SETULE: CondCode = ARMCC::LE; break;
+ case ISD::SETNE:
+ case ISD::SETUNE: CondCode = ARMCC::NE; break;
+ }
+ return Invert;
+}
+
+static void
+HowToPassArgument(MVT::ValueType ObjectVT, unsigned NumGPRs,
+ unsigned StackOffset, unsigned &NeededGPRs,
+ unsigned &NeededStackSize, unsigned &GPRPad,
+ unsigned &StackPad, unsigned Flags) {
+ NeededStackSize = 0;
+ NeededGPRs = 0;
+ StackPad = 0;
+ GPRPad = 0;
+ unsigned align = (Flags >> ISD::ParamFlags::OrigAlignmentOffs);
+ GPRPad = NumGPRs % ((align + 3)/4);
+ StackPad = StackOffset % align;
+ unsigned firstGPR = NumGPRs + GPRPad;
+ switch (ObjectVT) {
+ default: assert(0 && "Unhandled argument type!");
+ case MVT::i32:
+ case MVT::f32:
+ if (firstGPR < 4)
+ NeededGPRs = 1;
+ else
+ NeededStackSize = 4;
+ break;
+ case MVT::i64:
+ case MVT::f64:
+ if (firstGPR < 3)
+ NeededGPRs = 2;
+ else if (firstGPR == 3) {
+ NeededGPRs = 1;
+ NeededStackSize = 4;
+ } else
+ NeededStackSize = 8;
+ }
+}
+
+/// LowerCALL - Lowering a ISD::CALL node into a callseq_start <-
+/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
+/// nodes.
+SDOperand ARMTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
+ MVT::ValueType RetVT= Op.Val->getValueType(0);
+ SDOperand Chain = Op.getOperand(0);
+ unsigned CallConv = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
+ assert((CallConv == CallingConv::C ||
+ CallConv == CallingConv::Fast) && "unknown calling convention");
+ SDOperand Callee = Op.getOperand(4);
+ unsigned NumOps = (Op.getNumOperands() - 5) / 2;
+ unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot
+ unsigned NumGPRs = 0; // GPRs used for parameter passing.
+
+ // Count how many bytes are to be pushed on the stack.
+ unsigned NumBytes = 0;
+
+ // Add up all the space actually used.
+ for (unsigned i = 0; i < NumOps; ++i) {
+ unsigned ObjSize;
+ unsigned ObjGPRs;
+ unsigned StackPad;
+ unsigned GPRPad;
+ MVT::ValueType ObjectVT = Op.getOperand(5+2*i).getValueType();
+ unsigned Flags = Op.getConstantOperandVal(5+2*i+1);
+ HowToPassArgument(ObjectVT, NumGPRs, NumBytes, ObjGPRs, ObjSize,
+ GPRPad, StackPad, Flags);
+ NumBytes += ObjSize + StackPad;
+ NumGPRs += ObjGPRs + GPRPad;
+ }
+
+ // Adjust the stack pointer for the new arguments...
+ // These operations are automatically eliminated by the prolog/epilog pass
+ Chain = DAG.getCALLSEQ_START(Chain,
+ DAG.getConstant(NumBytes, MVT::i32));
+
+ SDOperand StackPtr = DAG.getRegister(ARM::SP, MVT::i32);
+
+ static const unsigned GPRArgRegs[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3
+ };
+
+ NumGPRs = 0;
+ std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
+ std::vector<SDOperand> MemOpChains;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ SDOperand Arg = Op.getOperand(5+2*i);
+ unsigned Flags = Op.getConstantOperandVal(5+2*i+1);
+ MVT::ValueType ArgVT = Arg.getValueType();
+
+ unsigned ObjSize;
+ unsigned ObjGPRs;
+ unsigned GPRPad;
+ unsigned StackPad;
+ HowToPassArgument(ArgVT, NumGPRs, ArgOffset, ObjGPRs,
+ ObjSize, GPRPad, StackPad, Flags);
+ NumGPRs += GPRPad;
+ ArgOffset += StackPad;
+ if (ObjGPRs > 0) {
+ switch (ArgVT) {
+ default: assert(0 && "Unexpected ValueType for argument!");
+ case MVT::i32:
+ RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs], Arg));
+ break;
+ case MVT::f32:
+ RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs],
+ DAG.getNode(ISD::BIT_CONVERT, MVT::i32, Arg)));
+ break;
+ case MVT::i64: {
+ SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Arg,
+ DAG.getConstant(0, getPointerTy()));
+ SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Arg,
+ DAG.getConstant(1, getPointerTy()));
+ RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs], Lo));
+ if (ObjGPRs == 2)
+ RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs+1], Hi));
+ else {
+ SDOperand PtrOff= DAG.getConstant(ArgOffset, StackPtr.getValueType());
+ PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, Hi, PtrOff, NULL, 0));
+ }
+ break;
+ }
+ case MVT::f64: {
+ SDOperand Cvt = DAG.getNode(ARMISD::FMRRD,
+ DAG.getVTList(MVT::i32, MVT::i32),
+ &Arg, 1);
+ RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs], Cvt));
+ if (ObjGPRs == 2)
+ RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs+1],
+ Cvt.getValue(1)));
+ else {
+ SDOperand PtrOff= DAG.getConstant(ArgOffset, StackPtr.getValueType());
+ PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, Cvt.getValue(1), PtrOff,
+ NULL, 0));
+ }
+ break;
+ }
+ }
+ } else {
+ assert(ObjSize != 0);
+ SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+ PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
+ }
+
+ NumGPRs += ObjGPRs;
+ ArgOffset += ObjSize;
+ }
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDOperand InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
+ InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ bool isDirect = false;
+ bool isARMFunc = false;
+ bool isLocalARMFunc = false;
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ GlobalValue *GV = G->getGlobal();
+ isDirect = true;
+ bool isExt = (GV->isDeclaration() || GV->hasWeakLinkage() ||
+ GV->hasLinkOnceLinkage());
+ bool isStub = (isExt && Subtarget->isTargetDarwin()) &&
+ getTargetMachine().getRelocationModel() != Reloc::Static;
+ isARMFunc = !Subtarget->isThumb() || isStub;
+ // ARM call to a local ARM function is predicable.
+ isLocalARMFunc = !Subtarget->isThumb() && !isExt;
+ // tBX takes a register source operand.
+ if (isARMFunc && Subtarget->isThumb() && !Subtarget->hasV5TOps()) {
+ ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex,
+ ARMCP::CPStub, 4);
+ SDOperand CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 2);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, MVT::i32, CPAddr);
+ Callee = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), CPAddr, NULL, 0);
+ SDOperand PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ Callee = DAG.getNode(ARMISD::PIC_ADD, getPointerTy(), Callee, PICLabel);
+ } else
+ Callee = DAG.getTargetGlobalAddress(GV, getPointerTy());
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ isDirect = true;
+ bool isStub = Subtarget->isTargetDarwin() &&
+ getTargetMachine().getRelocationModel() != Reloc::Static;
+ isARMFunc = !Subtarget->isThumb() || isStub;
+ // tBX takes a register source operand.
+ const char *Sym = S->getSymbol();
+ if (isARMFunc && Subtarget->isThumb() && !Subtarget->hasV5TOps()) {
+ ARMConstantPoolValue *CPV = new ARMConstantPoolValue(Sym, ARMPCLabelIndex,
+ ARMCP::CPStub, 4);
+ SDOperand CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 2);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, MVT::i32, CPAddr);
+ Callee = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), CPAddr, NULL, 0);
+ SDOperand PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ Callee = DAG.getNode(ARMISD::PIC_ADD, getPointerTy(), Callee, PICLabel);
+ } else
+ Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy());
+ }
+
+ // FIXME: handle tail calls differently.
+ unsigned CallOpc;
+ if (Subtarget->isThumb()) {
+ if (!Subtarget->hasV5TOps() && (!isDirect || isARMFunc))
+ CallOpc = ARMISD::CALL_NOLINK;
+ else
+ CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
+ } else {
+ CallOpc = (isDirect || Subtarget->hasV5TOps())
+ ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL)
+ : ARMISD::CALL_NOLINK;
+ }
+ if (CallOpc == ARMISD::CALL_NOLINK && !Subtarget->isThumb()) {
+ // implicit def LR - LR mustn't be allocated as GRP:$dst of CALL_NOLINK
+ Chain = DAG.getCopyToReg(Chain, ARM::LR,
+ DAG.getNode(ISD::UNDEF, MVT::i32), InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ std::vector<MVT::ValueType> NodeTys;
+ NodeTys.push_back(MVT::Other); // Returns a chain
+ NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
+
+ std::vector<SDOperand> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.Val)
+ Ops.push_back(InFlag);
+ Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ SDOperand CSOps[] = { Chain, DAG.getConstant(NumBytes, MVT::i32), InFlag };
+ Chain = DAG.getNode(ISD::CALLSEQ_END,
+ DAG.getNodeValueTypes(MVT::Other, MVT::Flag),
+ ((RetVT != MVT::Other) ? 2 : 1), CSOps, 3);
+ if (RetVT != MVT::Other)
+ InFlag = Chain.getValue(1);
+
+ std::vector<SDOperand> ResultVals;
+ NodeTys.clear();
+
+ // If the call has results, copy the values out of the ret val registers.
+ switch (RetVT) {
+ default: assert(0 && "Unexpected ret value!");
+ case MVT::Other:
+ break;
+ case MVT::i32:
+ Chain = DAG.getCopyFromReg(Chain, ARM::R0, MVT::i32, InFlag).getValue(1);
+ ResultVals.push_back(Chain.getValue(0));
+ if (Op.Val->getValueType(1) == MVT::i32) {
+ // Returns a i64 value.
+ Chain = DAG.getCopyFromReg(Chain, ARM::R1, MVT::i32,
+ Chain.getValue(2)).getValue(1);
+ ResultVals.push_back(Chain.getValue(0));
+ NodeTys.push_back(MVT::i32);
+ }
+ NodeTys.push_back(MVT::i32);
+ break;
+ case MVT::f32:
+ Chain = DAG.getCopyFromReg(Chain, ARM::R0, MVT::i32, InFlag).getValue(1);
+ ResultVals.push_back(DAG.getNode(ISD::BIT_CONVERT, MVT::f32,
+ Chain.getValue(0)));
+ NodeTys.push_back(MVT::f32);
+ break;
+ case MVT::f64: {
+ SDOperand Lo = DAG.getCopyFromReg(Chain, ARM::R0, MVT::i32, InFlag);
+ SDOperand Hi = DAG.getCopyFromReg(Lo, ARM::R1, MVT::i32, Lo.getValue(2));
+ ResultVals.push_back(DAG.getNode(ARMISD::FMDRR, MVT::f64, Lo, Hi));
+ NodeTys.push_back(MVT::f64);
+ break;
+ }
+ }
+
+ NodeTys.push_back(MVT::Other);
+
+ if (ResultVals.empty())
+ return Chain;
+
+ ResultVals.push_back(Chain);
+ SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, &ResultVals[0],
+ ResultVals.size());
+ return Res.getValue(Op.ResNo);
+}
+
+static SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG) {
+ SDOperand Copy;
+ SDOperand Chain = Op.getOperand(0);
+ switch(Op.getNumOperands()) {
+ default:
+ assert(0 && "Do not know how to return this many arguments!");
+ abort();
+ case 1: {
+ SDOperand LR = DAG.getRegister(ARM::LR, MVT::i32);
+ return DAG.getNode(ARMISD::RET_FLAG, MVT::Other, Chain);
+ }
+ case 3:
+ Op = Op.getOperand(1);
+ if (Op.getValueType() == MVT::f32) {
+ Op = DAG.getNode(ISD::BIT_CONVERT, MVT::i32, Op);
+ } else if (Op.getValueType() == MVT::f64) {
+ // Recursively legalize f64 -> i64.
+ Op = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Op);
+ return DAG.getNode(ISD::RET, MVT::Other, Chain, Op,
+ DAG.getConstant(0, MVT::i32));
+ }
+ Copy = DAG.getCopyToReg(Chain, ARM::R0, Op, SDOperand());
+ if (DAG.getMachineFunction().liveout_empty())
+ DAG.getMachineFunction().addLiveOut(ARM::R0);
+ break;
+ case 5:
+ Copy = DAG.getCopyToReg(Chain, ARM::R1, Op.getOperand(3), SDOperand());
+ Copy = DAG.getCopyToReg(Copy, ARM::R0, Op.getOperand(1), Copy.getValue(1));
+ // If we haven't noted the R0+R1 are live out, do so now.
+ if (DAG.getMachineFunction().liveout_empty()) {
+ DAG.getMachineFunction().addLiveOut(ARM::R0);
+ DAG.getMachineFunction().addLiveOut(ARM::R1);
+ }
+ break;
+ }
+
+ //We must use RET_FLAG instead of BRIND because BRIND doesn't have a flag
+ return DAG.getNode(ARMISD::RET_FLAG, MVT::Other, Copy, Copy.getValue(1));
+}
+
+// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
+// their target countpart wrapped in the ARMISD::Wrapper node. Suppose N is
+// one of the above mentioned nodes. It has to be wrapped because otherwise
+// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
+// be used to form addressing mode. These wrapped nodes will be selected
+// into MOVi.
+static SDOperand LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {
+ MVT::ValueType PtrVT = Op.getValueType();
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ SDOperand Res;
+ if (CP->isMachineConstantPoolEntry())
+ Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
+ CP->getAlignment());
+ else
+ Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
+ CP->getAlignment());
+ return DAG.getNode(ARMISD::Wrapper, MVT::i32, Res);
+}
+
+// Lower ISD::GlobalTLSAddress using the "general dynamic" model
+SDOperand
+ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) {
+ MVT::ValueType PtrVT = getPointerTy();
+ unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
+ ARMConstantPoolValue *CPV =
+ new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue,
+ PCAdj, "tlsgd", true);
+ SDOperand Argument = DAG.getTargetConstantPool(CPV, PtrVT, 2);
+ Argument = DAG.getNode(ARMISD::Wrapper, MVT::i32, Argument);
+ Argument = DAG.getLoad(PtrVT, DAG.getEntryNode(), Argument, NULL, 0);
+ SDOperand Chain = Argument.getValue(1);
+
+ SDOperand PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ Argument = DAG.getNode(ARMISD::PIC_ADD, PtrVT, Argument, PICLabel);
+
+ // call __tls_get_addr.
+ ArgListTy Args;
+ ArgListEntry Entry;
+ Entry.Node = Argument;
+ Entry.Ty = (const Type *) Type::Int32Ty;
+ Args.push_back(Entry);
+ std::pair<SDOperand, SDOperand> CallResult =
+ LowerCallTo(Chain, (const Type *) Type::Int32Ty, false, false,
+ CallingConv::C, false,
+ DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG);
+ return CallResult.first;
+}
+
+// Lower ISD::GlobalTLSAddress using the "initial exec" or
+// "local exec" model.
+SDOperand
+ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) {
+ GlobalValue *GV = GA->getGlobal();
+ SDOperand Offset;
+ SDOperand Chain = DAG.getEntryNode();
+ MVT::ValueType PtrVT = getPointerTy();
+ // Get the Thread Pointer
+ SDOperand ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, PtrVT);
+
+ if (GV->isDeclaration()){
+ // initial exec model
+ unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
+ ARMConstantPoolValue *CPV =
+ new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue,
+ PCAdj, "gottpoff", true);
+ Offset = DAG.getTargetConstantPool(CPV, PtrVT, 2);
+ Offset = DAG.getNode(ARMISD::Wrapper, MVT::i32, Offset);
+ Offset = DAG.getLoad(PtrVT, Chain, Offset, NULL, 0);
+ Chain = Offset.getValue(1);
+
+ SDOperand PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ Offset = DAG.getNode(ARMISD::PIC_ADD, PtrVT, Offset, PICLabel);
+
+ Offset = DAG.getLoad(PtrVT, Chain, Offset, NULL, 0);
+ } else {
+ // local exec model
+ ARMConstantPoolValue *CPV =
+ new ARMConstantPoolValue(GV, ARMCP::CPValue, "tpoff");
+ Offset = DAG.getTargetConstantPool(CPV, PtrVT, 2);
+ Offset = DAG.getNode(ARMISD::Wrapper, MVT::i32, Offset);
+ Offset = DAG.getLoad(PtrVT, Chain, Offset, NULL, 0);
+ }
+
+ // The address of the thread local variable is the add of the thread
+ // pointer with the offset of the variable.
+ return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset);
+}
+
+SDOperand
+ARMTargetLowering::LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) {
+ // TODO: implement the "local dynamic" model
+ assert(Subtarget->isTargetELF() &&
+ "TLS not implemented for non-ELF targets");
+ GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ // If the relocation model is PIC, use the "General Dynamic" TLS Model,
+ // otherwise use the "Local Exec" TLS Model
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
+ return LowerToTLSGeneralDynamicModel(GA, DAG);
+ else
+ return LowerToTLSExecModels(GA, DAG);
+}
+
+SDOperand ARMTargetLowering::LowerGlobalAddressELF(SDOperand Op,
+ SelectionDAG &DAG) {
+ MVT::ValueType PtrVT = getPointerTy();
+ GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+ if (RelocM == Reloc::PIC_) {
+ bool UseGOTOFF = GV->hasInternalLinkage() || GV->hasHiddenVisibility();
+ ARMConstantPoolValue *CPV =
+ new ARMConstantPoolValue(GV, ARMCP::CPValue, UseGOTOFF ? "GOTOFF":"GOT");
+ SDOperand CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 2);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, MVT::i32, CPAddr);
+ SDOperand Result = DAG.getLoad(PtrVT, DAG.getEntryNode(), CPAddr, NULL, 0);
+ SDOperand Chain = Result.getValue(1);
+ SDOperand GOT = DAG.getNode(ISD::GLOBAL_OFFSET_TABLE, PtrVT);
+ Result = DAG.getNode(ISD::ADD, PtrVT, Result, GOT);
+ if (!UseGOTOFF)
+ Result = DAG.getLoad(PtrVT, Chain, Result, NULL, 0);
+ return Result;
+ } else {
+ SDOperand CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 2);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, MVT::i32, CPAddr);
+ return DAG.getLoad(PtrVT, DAG.getEntryNode(), CPAddr, NULL, 0);
+ }
+}
+
+/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol
+/// even in non-static mode.
+static bool GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) {
+ return RelocM != Reloc::Static &&
+ (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
+ (GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode()));
+}
+
+SDOperand ARMTargetLowering::LowerGlobalAddressDarwin(SDOperand Op,
+ SelectionDAG &DAG) {
+ MVT::ValueType PtrVT = getPointerTy();
+ GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+ bool IsIndirect = GVIsIndirectSymbol(GV, RelocM);
+ SDOperand CPAddr;
+ if (RelocM == Reloc::Static)
+ CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 2);
+ else {
+ unsigned PCAdj = (RelocM != Reloc::PIC_)
+ ? 0 : (Subtarget->isThumb() ? 4 : 8);
+ ARMCP::ARMCPKind Kind = IsIndirect ? ARMCP::CPNonLazyPtr
+ : ARMCP::CPValue;
+ ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex,
+ Kind, PCAdj);
+ CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 2);
+ }
+ CPAddr = DAG.getNode(ARMISD::Wrapper, MVT::i32, CPAddr);
+
+ SDOperand Result = DAG.getLoad(PtrVT, DAG.getEntryNode(), CPAddr, NULL, 0);
+ SDOperand Chain = Result.getValue(1);
+
+ if (RelocM == Reloc::PIC_) {
+ SDOperand PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ Result = DAG.getNode(ARMISD::PIC_ADD, PtrVT, Result, PICLabel);
+ }
+ if (IsIndirect)
+ Result = DAG.getLoad(PtrVT, Chain, Result, NULL, 0);
+
+ return Result;
+}
+
+SDOperand ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDOperand Op,
+ SelectionDAG &DAG){
+ assert(Subtarget->isTargetELF() &&
+ "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
+ MVT::ValueType PtrVT = getPointerTy();
+ unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
+ ARMConstantPoolValue *CPV = new ARMConstantPoolValue("_GLOBAL_OFFSET_TABLE_",
+ ARMPCLabelIndex,
+ ARMCP::CPValue, PCAdj);
+ SDOperand CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 2);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, MVT::i32, CPAddr);
+ SDOperand Result = DAG.getLoad(PtrVT, DAG.getEntryNode(), CPAddr, NULL, 0);
+ SDOperand PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ return DAG.getNode(ARMISD::PIC_ADD, PtrVT, Result, PICLabel);
+}
+
+static SDOperand LowerVASTART(SDOperand Op, SelectionDAG &DAG,
+ unsigned VarArgsFrameIndex) {
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
+ SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2));
+ return DAG.getStore(Op.getOperand(0), FR, Op.getOperand(1), SV->getValue(),
+ SV->getOffset());
+}
+
+static SDOperand LowerFORMAL_ARGUMENT(SDOperand Op, SelectionDAG &DAG,
+ unsigned *vRegs, unsigned ArgNo,
+ unsigned &NumGPRs, unsigned &ArgOffset) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
+ SDOperand Root = Op.getOperand(0);
+ std::vector<SDOperand> ArgValues;
+ SSARegMap *RegMap = MF.getSSARegMap();
+
+ static const unsigned GPRArgRegs[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3
+ };
+
+ unsigned ObjSize;
+ unsigned ObjGPRs;
+ unsigned GPRPad;
+ unsigned StackPad;
+ unsigned Flags = Op.getConstantOperandVal(ArgNo + 3);
+ HowToPassArgument(ObjectVT, NumGPRs, ArgOffset, ObjGPRs,
+ ObjSize, GPRPad, StackPad, Flags);
+ NumGPRs += GPRPad;
+ ArgOffset += StackPad;
+
+ SDOperand ArgValue;
+ if (ObjGPRs == 1) {
+ unsigned VReg = RegMap->createVirtualRegister(&ARM::GPRRegClass);
+ MF.addLiveIn(GPRArgRegs[NumGPRs], VReg);
+ vRegs[NumGPRs] = VReg;
+ ArgValue = DAG.getCopyFromReg(Root, VReg, MVT::i32);
+ if (ObjectVT == MVT::f32)
+ ArgValue = DAG.getNode(ISD::BIT_CONVERT, MVT::f32, ArgValue);
+ } else if (ObjGPRs == 2) {
+ unsigned VReg = RegMap->createVirtualRegister(&ARM::GPRRegClass);
+ MF.addLiveIn(GPRArgRegs[NumGPRs], VReg);
+ vRegs[NumGPRs] = VReg;
+ ArgValue = DAG.getCopyFromReg(Root, VReg, MVT::i32);
+
+ VReg = RegMap->createVirtualRegister(&ARM::GPRRegClass);
+ MF.addLiveIn(GPRArgRegs[NumGPRs+1], VReg);
+ vRegs[NumGPRs+1] = VReg;
+ SDOperand ArgValue2 = DAG.getCopyFromReg(Root, VReg, MVT::i32);
+
+ if (ObjectVT == MVT::i64)
+ ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2);
+ else
+ ArgValue = DAG.getNode(ARMISD::FMDRR, MVT::f64, ArgValue, ArgValue2);
+ }
+ NumGPRs += ObjGPRs;
+
+ if (ObjSize) {
+ // If the argument is actually used, emit a load from the right stack
+ // slot.
+ if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
+ SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
+ if (ObjGPRs == 0)
+ ArgValue = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
+ else {
+ SDOperand ArgValue2 =
+ DAG.getLoad(MVT::i32, Root, FIN, NULL, 0);
+ if (ObjectVT == MVT::i64)
+ ArgValue= DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2);
+ else
+ ArgValue= DAG.getNode(ARMISD::FMDRR, MVT::f64, ArgValue, ArgValue2);
+ }
+ } else {
+ // Don't emit a dead load.
+ ArgValue = DAG.getNode(ISD::UNDEF, ObjectVT);
+ }
+
+ ArgOffset += ObjSize; // Move on to the next argument.
+ }
+
+ return ArgValue;
+}
+
+SDOperand
+ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) {
+ std::vector<SDOperand> ArgValues;
+ SDOperand Root = Op.getOperand(0);
+ unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot
+ unsigned NumGPRs = 0; // GPRs used for parameter passing.
+ unsigned VRegs[4];
+
+ unsigned NumArgs = Op.Val->getNumValues()-1;
+ for (unsigned ArgNo = 0; ArgNo < NumArgs; ++ArgNo)
+ ArgValues.push_back(LowerFORMAL_ARGUMENT(Op, DAG, VRegs, ArgNo,
+ NumGPRs, ArgOffset));
+
+ bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
+ if (isVarArg) {
+ static const unsigned GPRArgRegs[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3
+ };
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ SSARegMap *RegMap = MF.getSSARegMap();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+ unsigned VARegSize = (4 - NumGPRs) * 4;
+ unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
+ if (VARegSaveSize) {
+ // If this function is vararg, store any remaining integer argument regs
+ // to their spots on the stack so that they may be loaded by deferencing
+ // the result of va_next.
+ AFI->setVarArgsRegSaveSize(VARegSaveSize);
+ VarArgsFrameIndex = MFI->CreateFixedObject(VARegSaveSize, ArgOffset +
+ VARegSaveSize - VARegSize);
+ SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
+
+ SmallVector<SDOperand, 4> MemOps;
+ for (; NumGPRs < 4; ++NumGPRs) {
+ unsigned VReg = RegMap->createVirtualRegister(&ARM::GPRRegClass);
+ MF.addLiveIn(GPRArgRegs[NumGPRs], VReg);
+ SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i32);
+ SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
+ DAG.getConstant(4, getPointerTy()));
+ }
+ if (!MemOps.empty())
+ Root = DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &MemOps[0], MemOps.size());
+ } else
+ // This will point to the next argument passed via stack.
+ VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset);
+ }
+
+ ArgValues.push_back(Root);
+
+ // Return the new list of results.
+ std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
+ Op.Val->value_end());
+ return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
+}
+
+/// isFloatingPointZero - Return true if this is +0.0.
+static bool isFloatingPointZero(SDOperand Op) {
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
+ return CFP->isExactlyValue(0.0);
+ else if (ISD::isEXTLoad(Op.Val) || ISD::isNON_EXTLoad(Op.Val)) {
+ // Maybe this has already been legalized into the constant pool?
+ if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
+ SDOperand WrapperOp = Op.getOperand(1).getOperand(0);
+ if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
+ return CFP->isExactlyValue(0.0);
+ }
+ }
+ return false;
+}
+
+static bool isLegalCmpImmediate(unsigned C, bool isThumb) {
+ return ( isThumb && (C & ~255U) == 0) ||
+ (!isThumb && ARM_AM::getSOImmVal(C) != -1);
+}
+
+/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
+/// the given operands.
+static SDOperand getARMCmp(SDOperand LHS, SDOperand RHS, ISD::CondCode CC,
+ SDOperand &ARMCC, SelectionDAG &DAG, bool isThumb) {
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.Val)) {
+ unsigned C = RHSC->getValue();
+ if (!isLegalCmpImmediate(C, isThumb)) {
+ // Constant does not fit, try adjusting it by one?
+ switch (CC) {
+ default: break;
+ case ISD::SETLT:
+ case ISD::SETGE:
+ if (isLegalCmpImmediate(C-1, isThumb)) {
+ CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
+ RHS = DAG.getConstant(C-1, MVT::i32);
+ }
+ break;
+ case ISD::SETULT:
+ case ISD::SETUGE:
+ if (C > 0 && isLegalCmpImmediate(C-1, isThumb)) {
+ CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
+ RHS = DAG.getConstant(C-1, MVT::i32);
+ }
+ break;
+ case ISD::SETLE:
+ case ISD::SETGT:
+ if (isLegalCmpImmediate(C+1, isThumb)) {
+ CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
+ RHS = DAG.getConstant(C+1, MVT::i32);
+ }
+ break;
+ case ISD::SETULE:
+ case ISD::SETUGT:
+ if (C < 0xffffffff && isLegalCmpImmediate(C+1, isThumb)) {
+ CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
+ RHS = DAG.getConstant(C+1, MVT::i32);
+ }
+ break;
+ }
+ }
+ }
+
+ ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+ ARMISD::NodeType CompareType;
+ switch (CondCode) {
+ default:
+ CompareType = ARMISD::CMP;
+ break;
+ case ARMCC::EQ:
+ case ARMCC::NE:
+ case ARMCC::MI:
+ case ARMCC::PL:
+ // Uses only N and Z Flags
+ CompareType = ARMISD::CMPNZ;
+ break;
+ }
+ ARMCC = DAG.getConstant(CondCode, MVT::i32);
+ return DAG.getNode(CompareType, MVT::Flag, LHS, RHS);
+}
+
+/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
+static SDOperand getVFPCmp(SDOperand LHS, SDOperand RHS, SelectionDAG &DAG) {
+ SDOperand Cmp;
+ if (!isFloatingPointZero(RHS))
+ Cmp = DAG.getNode(ARMISD::CMPFP, MVT::Flag, LHS, RHS);
+ else
+ Cmp = DAG.getNode(ARMISD::CMPFPw0, MVT::Flag, LHS);
+ return DAG.getNode(ARMISD::FMSTAT, MVT::Flag, Cmp);
+}
+
+static SDOperand LowerSELECT_CC(SDOperand Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ MVT::ValueType VT = Op.getValueType();
+ SDOperand LHS = Op.getOperand(0);
+ SDOperand RHS = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+ SDOperand TrueVal = Op.getOperand(2);
+ SDOperand FalseVal = Op.getOperand(3);
+
+ if (LHS.getValueType() == MVT::i32) {
+ SDOperand ARMCC;
+ SDOperand CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDOperand Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb());
+ return DAG.getNode(ARMISD::CMOV, VT, FalseVal, TrueVal, ARMCC, CCR, Cmp);
+ }
+
+ ARMCC::CondCodes CondCode, CondCode2;
+ if (FPCCToARMCC(CC, CondCode, CondCode2))
+ std::swap(TrueVal, FalseVal);
+
+ SDOperand ARMCC = DAG.getConstant(CondCode, MVT::i32);
+ SDOperand CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDOperand Cmp = getVFPCmp(LHS, RHS, DAG);
+ SDOperand Result = DAG.getNode(ARMISD::CMOV, VT, FalseVal, TrueVal,
+ ARMCC, CCR, Cmp);
+ if (CondCode2 != ARMCC::AL) {
+ SDOperand ARMCC2 = DAG.getConstant(CondCode2, MVT::i32);
+ // FIXME: Needs another CMP because flag can have but one use.
+ SDOperand Cmp2 = getVFPCmp(LHS, RHS, DAG);
+ Result = DAG.getNode(ARMISD::CMOV, VT, Result, TrueVal, ARMCC2, CCR, Cmp2);
+ }
+ return Result;
+}
+
+static SDOperand LowerBR_CC(SDOperand Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ SDOperand Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDOperand LHS = Op.getOperand(2);
+ SDOperand RHS = Op.getOperand(3);
+ SDOperand Dest = Op.getOperand(4);
+
+ if (LHS.getValueType() == MVT::i32) {
+ SDOperand ARMCC;
+ SDOperand CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDOperand Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb());
+ return DAG.getNode(ARMISD::BRCOND, MVT::Other, Chain, Dest, ARMCC, CCR,Cmp);
+ }
+
+ assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
+ ARMCC::CondCodes CondCode, CondCode2;
+ if (FPCCToARMCC(CC, CondCode, CondCode2))
+ // Swap the LHS/RHS of the comparison if needed.
+ std::swap(LHS, RHS);
+
+ SDOperand Cmp = getVFPCmp(LHS, RHS, DAG);
+ SDOperand ARMCC = DAG.getConstant(CondCode, MVT::i32);
+ SDOperand CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDOperand Ops[] = { Chain, Dest, ARMCC, CCR, Cmp };
+ SDOperand Res = DAG.getNode(ARMISD::BRCOND, VTList, Ops, 5);
+ if (CondCode2 != ARMCC::AL) {
+ ARMCC = DAG.getConstant(CondCode2, MVT::i32);
+ SDOperand Ops[] = { Res, Dest, ARMCC, CCR, Res.getValue(1) };
+ Res = DAG.getNode(ARMISD::BRCOND, VTList, Ops, 5);
+ }
+ return Res;
+}
+
+SDOperand ARMTargetLowering::LowerBR_JT(SDOperand Op, SelectionDAG &DAG) {
+ SDOperand Chain = Op.getOperand(0);
+ SDOperand Table = Op.getOperand(1);
+ SDOperand Index = Op.getOperand(2);
+
+ MVT::ValueType PTy = getPointerTy();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
+ ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
+ SDOperand UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
+ SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
+ Table = DAG.getNode(ARMISD::WrapperJT, MVT::i32, JTI, UId);
+ Index = DAG.getNode(ISD::MUL, PTy, Index, DAG.getConstant(4, PTy));
+ SDOperand Addr = DAG.getNode(ISD::ADD, PTy, Index, Table);
+ bool isPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
+ Addr = DAG.getLoad(isPIC ? (MVT::ValueType)MVT::i32 : PTy,
+ Chain, Addr, NULL, 0);
+ Chain = Addr.getValue(1);
+ if (isPIC)
+ Addr = DAG.getNode(ISD::ADD, PTy, Addr, Table);
+ return DAG.getNode(ARMISD::BR_JT, MVT::Other, Chain, Addr, JTI, UId);
+}
+
+static SDOperand LowerFP_TO_INT(SDOperand Op, SelectionDAG &DAG) {
+ unsigned Opc =
+ Op.getOpcode() == ISD::FP_TO_SINT ? ARMISD::FTOSI : ARMISD::FTOUI;
+ Op = DAG.getNode(Opc, MVT::f32, Op.getOperand(0));
+ return DAG.getNode(ISD::BIT_CONVERT, MVT::i32, Op);
+}
+
+static SDOperand LowerINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
+ MVT::ValueType VT = Op.getValueType();
+ unsigned Opc =
+ Op.getOpcode() == ISD::SINT_TO_FP ? ARMISD::SITOF : ARMISD::UITOF;
+
+ Op = DAG.getNode(ISD::BIT_CONVERT, MVT::f32, Op.getOperand(0));
+ return DAG.getNode(Opc, VT, Op);
+}
+
+static SDOperand LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) {
+ // Implement fcopysign with a fabs and a conditional fneg.
+ SDOperand Tmp0 = Op.getOperand(0);
+ SDOperand Tmp1 = Op.getOperand(1);
+ MVT::ValueType VT = Op.getValueType();
+ MVT::ValueType SrcVT = Tmp1.getValueType();
+ SDOperand AbsVal = DAG.getNode(ISD::FABS, VT, Tmp0);
+ SDOperand Cmp = getVFPCmp(Tmp1, DAG.getConstantFP(0.0, SrcVT), DAG);
+ SDOperand ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32);
+ SDOperand CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ return DAG.getNode(ARMISD::CNEG, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp);
+}
+
+static SDOperand LowerBIT_CONVERT(SDOperand Op, SelectionDAG &DAG) {
+ // Turn f64->i64 into FMRRD.
+ assert(Op.getValueType() == MVT::i64 &&
+ Op.getOperand(0).getValueType() == MVT::f64);
+
+ Op = Op.getOperand(0);
+ SDOperand Cvt = DAG.getNode(ARMISD::FMRRD, DAG.getVTList(MVT::i32, MVT::i32),
+ &Op, 1);
+
+ // Merge the pieces into a single i64 value.
+ return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Cvt, Cvt.getValue(1));
+}
+
+static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG) {
+ // FIXME: All this code is target-independent. Create a new target-indep
+ // MULHILO node and move this code to the legalizer.
+ //
+ assert(Op.getValueType() == MVT::i64 && "Only handles i64 expand right now!");
+
+ SDOperand LL = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
+ DAG.getConstant(0, MVT::i32));
+ SDOperand RL = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(1),
+ DAG.getConstant(0, MVT::i32));
+
+ unsigned LHSSB = DAG.ComputeNumSignBits(Op.getOperand(0));
+ unsigned RHSSB = DAG.ComputeNumSignBits(Op.getOperand(1));
+
+ SDOperand Lo, Hi;
+ // Figure out how to lower this multiply.
+ if (LHSSB >= 33 && RHSSB >= 33) {
+ // If the input values are both sign extended, we can emit a mulhs+mul.
+ Lo = DAG.getNode(ISD::MUL, MVT::i32, LL, RL);
+ Hi = DAG.getNode(ISD::MULHS, MVT::i32, LL, RL);
+ } else if (LHSSB == 32 && RHSSB == 32 &&
+ DAG.MaskedValueIsZero(Op.getOperand(0), 0xFFFFFFFF00000000ULL) &&
+ DAG.MaskedValueIsZero(Op.getOperand(1), 0xFFFFFFFF00000000ULL)) {
+ // If the inputs are zero extended, use mulhu.
+ Lo = DAG.getNode(ISD::MUL, MVT::i32, LL, RL);
+ Hi = DAG.getNode(ISD::MULHU, MVT::i32, LL, RL);
+ } else {
+ SDOperand LH = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
+ DAG.getConstant(1, MVT::i32));
+ SDOperand RH = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(1),
+ DAG.getConstant(1, MVT::i32));
+
+ // Lo,Hi = umul LHS, RHS.
+ SDOperand Ops[] = { LL, RL };
+ SDOperand UMul64 = DAG.getNode(ARMISD::MULHILOU,
+ DAG.getVTList(MVT::i32, MVT::i32), Ops, 2);
+ Lo = UMul64;
+ Hi = UMul64.getValue(1);
+ RH = DAG.getNode(ISD::MUL, MVT::i32, LL, RH);
+ LH = DAG.getNode(ISD::MUL, MVT::i32, LH, RL);
+ Hi = DAG.getNode(ISD::ADD, MVT::i32, Hi, RH);
+ Hi = DAG.getNode(ISD::ADD, MVT::i32, Hi, LH);
+ }
+
+ // Merge the pieces into a single i64 value.
+ return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi);
+}
+
+static SDOperand LowerMULHU(SDOperand Op, SelectionDAG &DAG) {
+ SDOperand Ops[] = { Op.getOperand(0), Op.getOperand(1) };
+ return DAG.getNode(ARMISD::MULHILOU,
+ DAG.getVTList(MVT::i32, MVT::i32), Ops, 2).getValue(1);
+}
+
+static SDOperand LowerMULHS(SDOperand Op, SelectionDAG &DAG) {
+ SDOperand Ops[] = { Op.getOperand(0), Op.getOperand(1) };
+ return DAG.getNode(ARMISD::MULHILOS,
+ DAG.getVTList(MVT::i32, MVT::i32), Ops, 2).getValue(1);
+}
+
+static SDOperand LowerSRx(SDOperand Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ assert(Op.getValueType() == MVT::i64 &&
+ (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
+ "Unknown shift to lower!");
+
+ // We only lower SRA, SRL of 1 here, all others use generic lowering.
+ if (!isa<ConstantSDNode>(Op.getOperand(1)) ||
+ cast<ConstantSDNode>(Op.getOperand(1))->getValue() != 1)
+ return SDOperand();
+
+ // If we are in thumb mode, we don't have RRX.
+ if (ST->isThumb()) return SDOperand();
+
+ // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
+ SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
+ DAG.getConstant(0, MVT::i32));
+ SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
+ DAG.getConstant(1, MVT::i32));
+
+ // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
+ // captures the result into a carry flag.
+ unsigned Opc = Op.getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
+ Hi = DAG.getNode(Opc, DAG.getVTList(MVT::i32, MVT::Flag), &Hi, 1);
+
+ // The low part is an ARMISD::RRX operand, which shifts the carry in.
+ Lo = DAG.getNode(ARMISD::RRX, MVT::i32, Lo, Hi.getValue(1));
+
+ // Merge the pieces into a single i64 value.
+ return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi);
+}
+
+SDOperand ARMTargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
+ SDOperand Chain = Op.getOperand(0);
+ SDOperand Dest = Op.getOperand(1);
+ SDOperand Src = Op.getOperand(2);
+ SDOperand Count = Op.getOperand(3);
+ unsigned Align =
+ (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
+ if (Align == 0) Align = 1;
+
+ ConstantSDNode *I = dyn_cast<ConstantSDNode>(Count);
+ // Just call memcpy if:
+ // not 4-byte aligned
+ // size is unknown
+ // size is >= the threshold.
+ if ((Align & 3) != 0 ||
+ !I ||
+ I->getValue() >= 64 ||
+ (I->getValue() & 3) != 0) {
+ MVT::ValueType IntPtr = getPointerTy();
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = getTargetData()->getIntPtrType();
+ Entry.Node = Op.getOperand(1); Args.push_back(Entry);
+ Entry.Node = Op.getOperand(2); Args.push_back(Entry);
+ Entry.Node = Op.getOperand(3); Args.push_back(Entry);
+ std::pair<SDOperand,SDOperand> CallResult =
+ LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false,
+ DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG);
+ return CallResult.second;
+ }
+
+ // Otherwise do repeated 4-byte loads and stores. To be improved.
+ assert((I->getValue() & 3) == 0);
+ assert((Align & 3) == 0);
+ unsigned NumMemOps = I->getValue() >> 2;
+ unsigned EmittedNumMemOps = 0;
+ unsigned SrcOff = 0, DstOff = 0;
+ MVT::ValueType VT = MVT::i32;
+ unsigned VTSize = 4;
+ const unsigned MAX_LOADS_IN_LDM = 6;
+ SDOperand LoadChains[MAX_LOADS_IN_LDM];
+ SDOperand Loads[MAX_LOADS_IN_LDM];
+
+ // Emit up to 4 loads, then a TokenFactor barrier, then the same
+ // number of stores. The loads and stores will get combined into
+ // ldm/stm later on.
+ while(EmittedNumMemOps < NumMemOps) {
+ unsigned i;
+ for (i=0; i<MAX_LOADS_IN_LDM && EmittedNumMemOps+i < NumMemOps; i++) {
+ Loads[i] = DAG.getLoad(VT, Chain,
+ DAG.getNode(ISD::ADD, VT, Src,
+ DAG.getConstant(SrcOff, VT)),
+ NULL, 0);
+ LoadChains[i] = Loads[i].getValue(1);
+ SrcOff += VTSize;
+ }
+
+ Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, &LoadChains[0], i);
+
+ for (i=0; i<MAX_LOADS_IN_LDM && EmittedNumMemOps+i < NumMemOps; i++) {
+ Chain = DAG.getStore(Chain, Loads[i],
+ DAG.getNode(ISD::ADD, VT, Dest,
+ DAG.getConstant(DstOff, VT)),
+ NULL, 0);
+ DstOff += VTSize;
+ }
+ EmittedNumMemOps += i;
+ }
+
+ return Chain;
+}
+
+SDOperand ARMTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Don't know how to custom lower this!"); abort();
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::GlobalAddress:
+ return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
+ LowerGlobalAddressELF(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::CALL: return LowerCALL(Op, DAG);
+ case ISD::RET: return LowerRET(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG, Subtarget);
+ case ISD::BR_CC: return LowerBR_CC(Op, DAG, Subtarget);
+ case ISD::BR_JT: return LowerBR_JT(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex);
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
+ case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
+ case ISD::BIT_CONVERT: return LowerBIT_CONVERT(Op, DAG);
+ case ISD::MUL: return LowerMUL(Op, DAG);
+ case ISD::MULHU: return LowerMULHU(Op, DAG);
+ case ISD::MULHS: return LowerMULHS(Op, DAG);
+ case ISD::SRL:
+ case ISD::SRA: return LowerSRx(Op, DAG, Subtarget);
+ case ISD::FORMAL_ARGUMENTS:
+ return LowerFORMAL_ARGUMENTS(Op, DAG);
+ case ISD::RETURNADDR: break;
+ case ISD::FRAMEADDR: break;
+ case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
+ case ISD::MEMCPY: return LowerMEMCPY(Op, DAG);
+ }
+ return SDOperand();
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Scheduler Hooks
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+ARMTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
+ MachineBasicBlock *BB) {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ switch (MI->getOpcode()) {
+ default: assert(false && "Unexpected instr type to insert");
+ case ARM::tMOVCCr: {
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ ilist<MachineBasicBlock>::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
+ BuildMI(BB, TII->get(ARM::tBcc)).addMBB(sinkMBB)
+ .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
+ MachineFunction *F = BB->getParent();
+ F->getBasicBlockList().insert(It, copy0MBB);
+ F->getBasicBlockList().insert(It, sinkMBB);
+ // Update machine-CFG edges by first adding all successors of the current
+ // block to the new block which will contain the Phi node for the select.
+ for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
+ e = BB->succ_end(); i != e; ++i)
+ sinkMBB->addSuccessor(*i);
+ // Next, remove all successors of the current block, and add the true
+ // and fallthrough blocks as its successors.
+ while(!BB->succ_empty())
+ BB->removeSuccessor(BB->succ_begin());
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(BB, TII->get(ARM::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+ delete MI; // The pseudo instruction is gone now.
+ return BB;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+/// isLegalAddressImmediate - Return true if the integer value can be used
+/// as the offset of the target addressing mode for load / store of the
+/// given type.
+static bool isLegalAddressImmediate(int64_t V, MVT::ValueType VT,
+ const ARMSubtarget *Subtarget) {
+ if (V == 0)
+ return true;
+
+ if (Subtarget->isThumb()) {
+ if (V < 0)
+ return false;
+
+ unsigned Scale = 1;
+ switch (VT) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ // Scale == 1;
+ break;
+ case MVT::i16:
+ // Scale == 2;
+ Scale = 2;
+ break;
+ case MVT::i32:
+ // Scale == 4;
+ Scale = 4;
+ break;
+ }
+
+ if ((V & (Scale - 1)) != 0)
+ return false;
+ V /= Scale;
+ return V == V & ((1LL << 5) - 1);
+ }
+
+ if (V < 0)
+ V = - V;
+ switch (VT) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i32:
+ // +- imm12
+ return V == V & ((1LL << 12) - 1);
+ case MVT::i16:
+ // +- imm8
+ return V == V & ((1LL << 8) - 1);
+ case MVT::f32:
+ case MVT::f64:
+ if (!Subtarget->hasVFP2())
+ return false;
+ if ((V & 3) != 0)
+ return false;
+ V >>= 2;
+ return V == V & ((1LL << 8) - 1);
+ }
+}
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ const Type *Ty) const {
+ if (!isLegalAddressImmediate(AM.BaseOffs, getValueType(Ty), Subtarget))
+ return false;
+
+ // Can never fold addr of global into load/store.
+ if (AM.BaseGV)
+ return false;
+
+ switch (AM.Scale) {
+ case 0: // no scale reg, must be "r+i" or "r", or "i".
+ break;
+ case 1:
+ if (Subtarget->isThumb())
+ return false;
+ // FALL THROUGH.
+ default:
+ // ARM doesn't support any R+R*scale+imm addr modes.
+ if (AM.BaseOffs)
+ return false;
+
+ int Scale = AM.Scale;
+ switch (getValueType(Ty)) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i32:
+ case MVT::i64:
+ // This assumes i64 is legalized to a pair of i32. If not (i.e.
+ // ldrd / strd are used, then its address mode is same as i16.
+ // r + r
+ if (Scale < 0) Scale = -Scale;
+ if (Scale == 1)
+ return true;
+ // r + r << imm
+ return isPowerOf2_32(Scale & ~1);
+ case MVT::i16:
+ // r + r
+ if (((unsigned)AM.HasBaseReg + Scale) <= 2)
+ return true;
+ return false;
+
+ case MVT::isVoid:
+ // Note, we allow "void" uses (basically, uses that aren't loads or
+ // stores), because arm allows folding a scale into many arithmetic
+ // operations. This should be made more precise and revisited later.
+
+ // Allow r << imm, but the imm has to be a multiple of two.
+ if (AM.Scale & 1) return false;
+ return isPowerOf2_32(AM.Scale);
+ }
+ break;
+ }
+ return true;
+}
+
+
+static bool getIndexedAddressParts(SDNode *Ptr, MVT::ValueType VT,
+ bool isSEXTLoad, SDOperand &Base,
+ SDOperand &Offset, bool &isInc,
+ SelectionDAG &DAG) {
+ if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
+ return false;
+
+ if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
+ // AddressingMode 3
+ Base = Ptr->getOperand(0);
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+ int RHSC = (int)RHS->getValue();
+ if (RHSC < 0 && RHSC > -256) {
+ isInc = false;
+ Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+ return true;
+ }
+ }
+ isInc = (Ptr->getOpcode() == ISD::ADD);
+ Offset = Ptr->getOperand(1);
+ return true;
+ } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
+ // AddressingMode 2
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+ int RHSC = (int)RHS->getValue();
+ if (RHSC < 0 && RHSC > -0x1000) {
+ isInc = false;
+ Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+ Base = Ptr->getOperand(0);
+ return true;
+ }
+ }
+
+ if (Ptr->getOpcode() == ISD::ADD) {
+ isInc = true;
+ ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0));
+ if (ShOpcVal != ARM_AM::no_shift) {
+ Base = Ptr->getOperand(1);
+ Offset = Ptr->getOperand(0);
+ } else {
+ Base = Ptr->getOperand(0);
+ Offset = Ptr->getOperand(1);
+ }
+ return true;
+ }
+
+ isInc = (Ptr->getOpcode() == ISD::ADD);
+ Base = Ptr->getOperand(0);
+ Offset = Ptr->getOperand(1);
+ return true;
+ }
+
+ // FIXME: Use FLDM / FSTM to emulate indexed FP load / store.
+ return false;
+}
+
+/// getPreIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if the node's address
+/// can be legally represented as pre-indexed load / store address.
+bool
+ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDOperand &Base,
+ SDOperand &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) {
+ if (Subtarget->isThumb())
+ return false;
+
+ MVT::ValueType VT;
+ SDOperand Ptr;
+ bool isSEXTLoad = false;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ Ptr = LD->getBasePtr();
+ VT = LD->getLoadedVT();
+ isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ Ptr = ST->getBasePtr();
+ VT = ST->getStoredVT();
+ } else
+ return false;
+
+ bool isInc;
+ bool isLegal = getIndexedAddressParts(Ptr.Val, VT, isSEXTLoad, Base, Offset,
+ isInc, DAG);
+ if (isLegal) {
+ AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
+ return true;
+ }
+ return false;
+}
+
+/// getPostIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if this node can be
+/// combined with a load / store to form a post-indexed load / store.
+bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDOperand &Base,
+ SDOperand &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) {
+ if (Subtarget->isThumb())
+ return false;
+
+ MVT::ValueType VT;
+ SDOperand Ptr;
+ bool isSEXTLoad = false;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ VT = LD->getLoadedVT();
+ isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ VT = ST->getStoredVT();
+ } else
+ return false;
+
+ bool isInc;
+ bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+ isInc, DAG);
+ if (isLegal) {
+ AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
+ return true;
+ }
+ return false;
+}
+
+void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
+ uint64_t Mask,
+ uint64_t &KnownZero,
+ uint64_t &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ KnownZero = 0;
+ KnownOne = 0;
+ switch (Op.getOpcode()) {
+ default: break;
+ case ARMISD::CMOV: {
+ // Bits are known zero/one if known on the LHS and RHS.
+ DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+ if (KnownZero == 0 && KnownOne == 0) return;
+
+ uint64_t KnownZeroRHS, KnownOneRHS;
+ DAG.ComputeMaskedBits(Op.getOperand(1), Mask,
+ KnownZeroRHS, KnownOneRHS, Depth+1);
+ KnownZero &= KnownZeroRHS;
+ KnownOne &= KnownOneRHS;
+ return;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+ARMTargetLowering::ConstraintType
+ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'l': return C_RegisterClass;
+ case 'w': return C_RegisterClass;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT::ValueType VT) const {
+ if (Constraint.size() == 1) {
+ // GCC RS6000 Constraint Letters
+ switch (Constraint[0]) {
+ case 'l':
+ // FIXME: in thumb mode, 'l' is only low-regs.
+ // FALL THROUGH.
+ case 'r':
+ return std::make_pair(0U, ARM::GPRRegisterClass);
+ case 'w':
+ if (VT == MVT::f32)
+ return std::make_pair(0U, ARM::SPRRegisterClass);
+ if (VT == MVT::f64)
+ return std::make_pair(0U, ARM::DPRRegisterClass);
+ break;
+ }
+ }
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+std::vector<unsigned> ARMTargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT::ValueType VT) const {
+ if (Constraint.size() != 1)
+ return std::vector<unsigned>();
+
+ switch (Constraint[0]) { // GCC ARM Constraint Letters
+ default: break;
+ case 'l':
+ case 'r':
+ return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+ ARM::R8, ARM::R9, ARM::R10, ARM::R11,
+ ARM::R12, ARM::LR, 0);
+ case 'w':
+ if (VT == MVT::f32)
+ return make_vector<unsigned>(ARM::S0, ARM::S1, ARM::S2, ARM::S3,
+ ARM::S4, ARM::S5, ARM::S6, ARM::S7,
+ ARM::S8, ARM::S9, ARM::S10, ARM::S11,
+ ARM::S12,ARM::S13,ARM::S14,ARM::S15,
+ ARM::S16,ARM::S17,ARM::S18,ARM::S19,
+ ARM::S20,ARM::S21,ARM::S22,ARM::S23,
+ ARM::S24,ARM::S25,ARM::S26,ARM::S27,
+ ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0);
+ if (VT == MVT::f64)
+ return make_vector<unsigned>(ARM::D0, ARM::D1, ARM::D2, ARM::D3,
+ ARM::D4, ARM::D5, ARM::D6, ARM::D7,
+ ARM::D8, ARM::D9, ARM::D10,ARM::D11,
+ ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0);
+ break;
+ }
+
+ return std::vector<unsigned>();
+}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
new file mode 100644
index 0000000..2b66f23
--- /dev/null
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -0,0 +1,144 @@
+//===-- ARMISelLowering.h - ARM DAG Lowering Interface ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Evan Cheng and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that ARM uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMISELLOWERING_H
+#define ARMISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include <vector>
+
+namespace llvm {
+ class ARMConstantPoolValue;
+ class ARMSubtarget;
+
+ namespace ARMISD {
+ // ARM Specific DAG Nodes
+ enum NodeType {
+ // Start the numbering where the builting ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END+ARM::INSTRUCTION_LIST_END,
+
+ Wrapper, // Wrapper - A wrapper node for TargetConstantPool,
+ // TargetExternalSymbol, and TargetGlobalAddress.
+ WrapperJT, // WrapperJT - A wrapper node for TargetJumpTable
+
+ CALL, // Function call.
+ CALL_PRED, // Function call that's predicable.
+ CALL_NOLINK, // Function call with branch not branch-and-link.
+ tCALL, // Thumb function call.
+ BRCOND, // Conditional branch.
+ BR_JT, // Jumptable branch.
+ RET_FLAG, // Return with a flag operand.
+
+ PIC_ADD, // Add with a PC operand and a PIC label.
+
+ CMP, // ARM compare instructions.
+ CMPNZ, // ARM compare that uses only N or Z flags.
+ CMPFP, // ARM VFP compare instruction, sets FPSCR.
+ CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR.
+ FMSTAT, // ARM fmstat instruction.
+ CMOV, // ARM conditional move instructions.
+ CNEG, // ARM conditional negate instructions.
+
+ FTOSI, // FP to sint within a FP register.
+ FTOUI, // FP to uint within a FP register.
+ SITOF, // sint to FP within a FP register.
+ UITOF, // uint to FP within a FP register.
+
+ MULHILOU, // Lo,Hi = umul LHS, RHS.
+ MULHILOS, // Lo,Hi = smul LHS, RHS.
+
+ SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out.
+ SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out.
+ RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag.
+
+ FMRRD, // double to two gprs.
+ FMDRR, // Two gprs to double.
+
+ THREAD_POINTER
+ };
+ }
+
+ //===----------------------------------------------------------------------===//
+ // ARMTargetLowering - ARM Implementation of the TargetLowering interface
+
+ class ARMTargetLowering : public TargetLowering {
+ int VarArgsFrameIndex; // FrameIndex for start of varargs area.
+ public:
+ ARMTargetLowering(TargetMachine &TM);
+
+ virtual SDOperand LowerOperation(SDOperand Op, SelectionDAG &DAG);
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ virtual MachineBasicBlock *InsertAtEndOfBasicBlock(MachineInstr *MI,
+ MachineBasicBlock *MBB);
+
+ /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// by AM is legal for this target, for a load/store of the specified type.
+ virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
+
+ /// getPreIndexedAddressParts - returns true by value, base pointer and
+ /// offset pointer and addressing mode by reference if the node's address
+ /// can be legally represented as pre-indexed load / store address.
+ virtual bool getPreIndexedAddressParts(SDNode *N, SDOperand &Base,
+ SDOperand &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG);
+
+ /// getPostIndexedAddressParts - returns true by value, base pointer and
+ /// offset pointer and addressing mode by reference if this node can be
+ /// combined with a load / store to form a post-indexed load / store.
+ virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDOperand &Base, SDOperand &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG);
+
+ virtual void computeMaskedBitsForTargetNode(const SDOperand Op,
+ uint64_t Mask,
+ uint64_t &KnownZero,
+ uint64_t &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const;
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT::ValueType VT) const;
+ std::vector<unsigned>
+ getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT::ValueType VT) const;
+ private:
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const ARMSubtarget *Subtarget;
+
+ /// ARMPCLabelIndex - Keep track the number of ARM PC labels created.
+ ///
+ unsigned ARMPCLabelIndex;
+
+ SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerGlobalAddressDarwin(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerGlobalAddressELF(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG);
+ SDOperand LowerToTLSExecModels(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG);
+ SDOperand LowerGLOBAL_OFFSET_TABLE(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerBR_JT(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerMEMCPY(SDOperand Op, SelectionDAG &DAG);
+ };
+}
+
+#endif // ARMISELLOWERING_H
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
new file mode 100644
index 0000000..b404ec0
--- /dev/null
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -0,0 +1,612 @@
+//===- ARMInstrInfo.cpp - ARM Instruction Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the "Instituto Nokia de Tecnologia" and
+// is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMInstrInfo.h"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMGenInstrInfo.inc"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+static cl::opt<bool> EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
+ cl::desc("Enable ARM 2-addr to 3-addr conv"));
+
+ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
+ : TargetInstrInfo(ARMInsts, sizeof(ARMInsts)/sizeof(ARMInsts[0])),
+ RI(*this, STI) {
+}
+
+const TargetRegisterClass *ARMInstrInfo::getPointerRegClass() const {
+ return &ARM::GPRRegClass;
+}
+
+/// Return true if the instruction is a register to register move and
+/// leave the source and dest operands in the passed parameters.
+///
+bool ARMInstrInfo::isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg) const {
+ MachineOpCode oc = MI.getOpcode();
+ switch (oc) {
+ default:
+ return false;
+ case ARM::FCPYS:
+ case ARM::FCPYD:
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ return true;
+ case ARM::MOVr:
+ case ARM::tMOVr:
+ assert(MI.getInstrDescriptor()->numOperands >= 2 &&
+ MI.getOperand(0).isRegister() &&
+ MI.getOperand(1).isRegister() &&
+ "Invalid ARM MOV instruction");
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ return true;
+ }
+}
+
+unsigned ARMInstrInfo::isLoadFromStackSlot(MachineInstr *MI, int &FrameIndex) const{
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::LDR:
+ if (MI->getOperand(1).isFrameIndex() &&
+ MI->getOperand(2).isReg() &&
+ MI->getOperand(3).isImmediate() &&
+ MI->getOperand(2).getReg() == 0 &&
+ MI->getOperand(3).getImmedValue() == 0) {
+ FrameIndex = MI->getOperand(1).getFrameIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::FLDD:
+ case ARM::FLDS:
+ if (MI->getOperand(1).isFrameIndex() &&
+ MI->getOperand(2).isImmediate() &&
+ MI->getOperand(2).getImmedValue() == 0) {
+ FrameIndex = MI->getOperand(1).getFrameIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::tRestore:
+ if (MI->getOperand(1).isFrameIndex() &&
+ MI->getOperand(2).isImmediate() &&
+ MI->getOperand(2).getImmedValue() == 0) {
+ FrameIndex = MI->getOperand(1).getFrameIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+unsigned ARMInstrInfo::isStoreToStackSlot(MachineInstr *MI, int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::STR:
+ if (MI->getOperand(1).isFrameIndex() &&
+ MI->getOperand(2).isReg() &&
+ MI->getOperand(3).isImmediate() &&
+ MI->getOperand(2).getReg() == 0 &&
+ MI->getOperand(3).getImmedValue() == 0) {
+ FrameIndex = MI->getOperand(1).getFrameIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::FSTD:
+ case ARM::FSTS:
+ if (MI->getOperand(1).isFrameIndex() &&
+ MI->getOperand(2).isImmediate() &&
+ MI->getOperand(2).getImmedValue() == 0) {
+ FrameIndex = MI->getOperand(1).getFrameIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::tSpill:
+ if (MI->getOperand(1).isFrameIndex() &&
+ MI->getOperand(2).isImmediate() &&
+ MI->getOperand(2).getImmedValue() == 0) {
+ FrameIndex = MI->getOperand(1).getFrameIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+static unsigned getUnindexedOpcode(unsigned Opc) {
+ switch (Opc) {
+ default: break;
+ case ARM::LDR_PRE:
+ case ARM::LDR_POST:
+ return ARM::LDR;
+ case ARM::LDRH_PRE:
+ case ARM::LDRH_POST:
+ return ARM::LDRH;
+ case ARM::LDRB_PRE:
+ case ARM::LDRB_POST:
+ return ARM::LDRB;
+ case ARM::LDRSH_PRE:
+ case ARM::LDRSH_POST:
+ return ARM::LDRSH;
+ case ARM::LDRSB_PRE:
+ case ARM::LDRSB_POST:
+ return ARM::LDRSB;
+ case ARM::STR_PRE:
+ case ARM::STR_POST:
+ return ARM::STR;
+ case ARM::STRH_PRE:
+ case ARM::STRH_POST:
+ return ARM::STRH;
+ case ARM::STRB_PRE:
+ case ARM::STRB_POST:
+ return ARM::STRB;
+ }
+ return 0;
+}
+
+MachineInstr *
+ARMInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables &LV) const {
+ if (!EnableARM3Addr)
+ return NULL;
+
+ MachineInstr *MI = MBBI;
+ unsigned TSFlags = MI->getInstrDescriptor()->TSFlags;
+ bool isPre = false;
+ switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
+ default: return NULL;
+ case ARMII::IndexModePre:
+ isPre = true;
+ break;
+ case ARMII::IndexModePost:
+ break;
+ }
+
+ // Try spliting an indexed load / store to a un-indexed one plus an add/sub
+ // operation.
+ unsigned MemOpc = getUnindexedOpcode(MI->getOpcode());
+ if (MemOpc == 0)
+ return NULL;
+
+ MachineInstr *UpdateMI = NULL;
+ MachineInstr *MemMI = NULL;
+ unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
+ const TargetInstrDescriptor *TID = MI->getInstrDescriptor();
+ unsigned NumOps = TID->numOperands;
+ bool isLoad = (TID->Flags & M_LOAD_FLAG) != 0;
+ const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
+ const MachineOperand &Base = MI->getOperand(2);
+ const MachineOperand &Offset = MI->getOperand(NumOps-3);
+ unsigned WBReg = WB.getReg();
+ unsigned BaseReg = Base.getReg();
+ unsigned OffReg = Offset.getReg();
+ unsigned OffImm = MI->getOperand(NumOps-2).getImm();
+ ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm();
+ switch (AddrMode) {
+ default:
+ assert(false && "Unknown indexed op!");
+ return NULL;
+ case ARMII::AddrMode2: {
+ bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
+ unsigned Amt = ARM_AM::getAM2Offset(OffImm);
+ if (OffReg == 0) {
+ int SOImmVal = ARM_AM::getSOImmVal(Amt);
+ if (SOImmVal == -1)
+ // Can't encode it in a so_imm operand. This transformation will
+ // add more than 1 instruction. Abandon!
+ return NULL;
+ UpdateMI = BuildMI(get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
+ .addReg(BaseReg).addImm(SOImmVal)
+ .addImm(Pred).addReg(0).addReg(0);
+ } else if (Amt != 0) {
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
+ unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
+ UpdateMI = BuildMI(get(isSub ? ARM::SUBrs : ARM::ADDrs), WBReg)
+ .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc)
+ .addImm(Pred).addReg(0).addReg(0);
+ } else
+ UpdateMI = BuildMI(get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
+ .addReg(BaseReg).addReg(OffReg)
+ .addImm(Pred).addReg(0).addReg(0);
+ break;
+ }
+ case ARMII::AddrMode3 : {
+ bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
+ unsigned Amt = ARM_AM::getAM3Offset(OffImm);
+ if (OffReg == 0)
+ // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
+ UpdateMI = BuildMI(get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
+ .addReg(BaseReg).addImm(Amt)
+ .addImm(Pred).addReg(0).addReg(0);
+ else
+ UpdateMI = BuildMI(get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
+ .addReg(BaseReg).addReg(OffReg)
+ .addImm(Pred).addReg(0).addReg(0);
+ break;
+ }
+ }
+
+ std::vector<MachineInstr*> NewMIs;
+ if (isPre) {
+ if (isLoad)
+ MemMI = BuildMI(get(MemOpc), MI->getOperand(0).getReg())
+ .addReg(WBReg).addReg(0).addImm(0).addImm(Pred);
+ else
+ MemMI = BuildMI(get(MemOpc)).addReg(MI->getOperand(1).getReg())
+ .addReg(WBReg).addReg(0).addImm(0).addImm(Pred);
+ NewMIs.push_back(MemMI);
+ NewMIs.push_back(UpdateMI);
+ } else {
+ if (isLoad)
+ MemMI = BuildMI(get(MemOpc), MI->getOperand(0).getReg())
+ .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred);
+ else
+ MemMI = BuildMI(get(MemOpc)).addReg(MI->getOperand(1).getReg())
+ .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred);
+ if (WB.isDead())
+ UpdateMI->getOperand(0).setIsDead();
+ NewMIs.push_back(UpdateMI);
+ NewMIs.push_back(MemMI);
+ }
+
+ // Transfer LiveVariables states, kill / dead info.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isRegister() && MO.getReg() &&
+ MRegisterInfo::isVirtualRegister(MO.getReg())) {
+ unsigned Reg = MO.getReg();
+ LiveVariables::VarInfo &VI = LV.getVarInfo(Reg);
+ if (MO.isDef()) {
+ MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
+ if (MO.isDead())
+ LV.addVirtualRegisterDead(Reg, NewMI);
+ // Update the defining instruction.
+ if (VI.DefInst == MI)
+ VI.DefInst = NewMI;
+ }
+ if (MO.isUse() && MO.isKill()) {
+ for (unsigned j = 0; j < 2; ++j) {
+ // Look at the two new MI's in reverse order.
+ MachineInstr *NewMI = NewMIs[j];
+ int NIdx = NewMI->findRegisterUseOperandIdx(Reg);
+ if (NIdx == -1)
+ continue;
+ LV.addVirtualRegisterKilled(Reg, NewMI);
+ if (VI.removeKill(MI))
+ VI.Kills.push_back(NewMI);
+ break;
+ }
+ }
+ }
+ }
+
+ MFI->insert(MBBI, NewMIs[1]);
+ MFI->insert(MBBI, NewMIs[0]);
+ return NewMIs[0];
+}
+
+// Branch analysis.
+bool ARMInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ std::vector<MachineOperand> &Cond) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ unsigned LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (LastOpc == ARM::B || LastOpc == ARM::tB) {
+ TBB = LastInst->getOperand(0).getMachineBasicBlock();
+ return false;
+ }
+ if (LastOpc == ARM::Bcc || LastOpc == ARM::tBcc) {
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(0).getMachineBasicBlock();
+ Cond.push_back(LastInst->getOperand(1));
+ Cond.push_back(LastInst->getOperand(2));
+ return false;
+ }
+ return true; // Can't handle indirect branch.
+ }
+
+ // Get the instruction before it if it is a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with ARM::B/ARM::tB and a ARM::Bcc/ARM::tBcc, handle it.
+ unsigned SecondLastOpc = SecondLastInst->getOpcode();
+ if ((SecondLastOpc == ARM::Bcc && LastOpc == ARM::B) ||
+ (SecondLastOpc == ARM::tBcc && LastOpc == ARM::tB)) {
+ TBB = SecondLastInst->getOperand(0).getMachineBasicBlock();
+ Cond.push_back(SecondLastInst->getOperand(1));
+ Cond.push_back(SecondLastInst->getOperand(2));
+ FBB = LastInst->getOperand(0).getMachineBasicBlock();
+ return false;
+ }
+
+ // If the block ends with two unconditional branches, handle it. The second
+ // one is not executed, so remove it.
+ if ((SecondLastOpc == ARM::B || SecondLastOpc==ARM::tB) &&
+ (LastOpc == ARM::B || LastOpc == ARM::tB)) {
+ TBB = SecondLastInst->getOperand(0).getMachineBasicBlock();
+ I = LastInst;
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Likewise if it ends with a branch table followed by an unconditional branch.
+ // The branch folder can create these, and we must get rid of them for
+ // correctness of Thumb constant islands.
+ if ((SecondLastOpc == ARM::BR_JTr || SecondLastOpc==ARM::BR_JTm ||
+ SecondLastOpc == ARM::BR_JTadd || SecondLastOpc==ARM::tBR_JTr) &&
+ (LastOpc == ARM::B || LastOpc == ARM::tB)) {
+ I = LastInst;
+ I->eraseFromParent();
+ return true;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+
+unsigned ARMInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ int BOpc = AFI->isThumbFunction() ? ARM::tB : ARM::B;
+ int BccOpc = AFI->isThumbFunction() ? ARM::tBcc : ARM::Bcc;
+
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ if (I->getOpcode() != BOpc && I->getOpcode() != BccOpc)
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (I->getOpcode() != BccOpc)
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+unsigned ARMInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const std::vector<MachineOperand> &Cond) const {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ int BOpc = AFI->isThumbFunction() ? ARM::tB : ARM::B;
+ int BccOpc = AFI->isThumbFunction() ? ARM::tBcc : ARM::Bcc;
+
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "ARM branch conditions have two components!");
+
+ if (FBB == 0) {
+ if (Cond.empty()) // Unconditional branch?
+ BuildMI(&MBB, get(BOpc)).addMBB(TBB);
+ else
+ BuildMI(&MBB, get(BccOpc)).addMBB(TBB)
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
+ return 1;
+ }
+
+ // Two-way conditional branch.
+ BuildMI(&MBB, get(BccOpc)).addMBB(TBB)
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
+ BuildMI(&MBB, get(BOpc)).addMBB(FBB);
+ return 2;
+}
+
+bool ARMInstrInfo::BlockHasNoFallThrough(MachineBasicBlock &MBB) const {
+ if (MBB.empty()) return false;
+
+ switch (MBB.back().getOpcode()) {
+ case ARM::BX_RET: // Return.
+ case ARM::LDM_RET:
+ case ARM::tBX_RET:
+ case ARM::tBX_RET_vararg:
+ case ARM::tPOP_RET:
+ case ARM::B:
+ case ARM::tB: // Uncond branch.
+ case ARM::tBR_JTr:
+ case ARM::BR_JTr: // Jumptable branch.
+ case ARM::BR_JTm: // Jumptable branch through mem.
+ case ARM::BR_JTadd: // Jumptable branch add to pc.
+ return true;
+ default: return false;
+ }
+}
+
+bool ARMInstrInfo::
+ReverseBranchCondition(std::vector<MachineOperand> &Cond) const {
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
+ Cond[0].setImm(ARMCC::getOppositeCondition(CC));
+ return false;
+}
+
+bool ARMInstrInfo::isPredicated(const MachineInstr *MI) const {
+ int PIdx = MI->findFirstPredOperandIdx();
+ return PIdx != -1 && MI->getOperand(PIdx).getImmedValue() != ARMCC::AL;
+}
+
+bool ARMInstrInfo::PredicateInstruction(MachineInstr *MI,
+ const std::vector<MachineOperand> &Pred) const {
+ unsigned Opc = MI->getOpcode();
+ if (Opc == ARM::B || Opc == ARM::tB) {
+ MI->setInstrDescriptor(get(Opc == ARM::B ? ARM::Bcc : ARM::tBcc));
+ MI->addImmOperand(Pred[0].getImmedValue());
+ MI->addRegOperand(Pred[1].getReg(), false);
+ return true;
+ }
+
+ int PIdx = MI->findFirstPredOperandIdx();
+ if (PIdx != -1) {
+ MachineOperand &PMO = MI->getOperand(PIdx);
+ PMO.setImm(Pred[0].getImmedValue());
+ MI->getOperand(PIdx+1).setReg(Pred[1].getReg());
+ return true;
+ }
+ return false;
+}
+
+bool
+ARMInstrInfo::SubsumesPredicate(const std::vector<MachineOperand> &Pred1,
+ const std::vector<MachineOperand> &Pred2) const{
+ if (Pred1.size() > 2 || Pred2.size() > 2)
+ return false;
+
+ ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImmedValue();
+ ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImmedValue();
+ if (CC1 == CC2)
+ return true;
+
+ switch (CC1) {
+ default:
+ return false;
+ case ARMCC::AL:
+ return true;
+ case ARMCC::HS:
+ return CC2 == ARMCC::HI;
+ case ARMCC::LS:
+ return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
+ case ARMCC::GE:
+ return CC2 == ARMCC::GT;
+ case ARMCC::LE:
+ return CC2 == ARMCC::LT;
+ }
+}
+
+bool ARMInstrInfo::DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const {
+ const TargetInstrDescriptor *TID = MI->getInstrDescriptor();
+ if (!TID->ImplicitDefs && (TID->Flags & M_HAS_OPTIONAL_DEF) == 0)
+ return false;
+
+ bool Found = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == ARM::CPSR) {
+ Pred.push_back(MO);
+ Found = true;
+ }
+ }
+
+ return Found;
+}
+
+
+/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing
+static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
+ unsigned JTI) DISABLE_INLINE;
+static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
+ unsigned JTI) {
+ return JT[JTI].MBBs.size();
+}
+
+/// GetInstSize - Return the size of the specified MachineInstr.
+///
+unsigned ARM::GetInstSize(MachineInstr *MI) {
+ MachineBasicBlock &MBB = *MI->getParent();
+ const MachineFunction *MF = MBB.getParent();
+ const TargetAsmInfo *TAI = MF->getTarget().getTargetAsmInfo();
+
+ // Basic size info comes from the TSFlags field.
+ const TargetInstrDescriptor *TID = MI->getInstrDescriptor();
+ unsigned TSFlags = TID->TSFlags;
+
+ switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) {
+ default:
+ // If this machine instr is an inline asm, measure it.
+ if (MI->getOpcode() == ARM::INLINEASM)
+ return TAI->getInlineAsmLength(MI->getOperand(0).getSymbolName());
+ if (MI->getOpcode() == ARM::LABEL)
+ return 0;
+ assert(0 && "Unknown or unset size field for instr!");
+ break;
+ case ARMII::Size8Bytes: return 8; // Arm instruction x 2.
+ case ARMII::Size4Bytes: return 4; // Arm instruction.
+ case ARMII::Size2Bytes: return 2; // Thumb instruction.
+ case ARMII::SizeSpecial: {
+ switch (MI->getOpcode()) {
+ case ARM::CONSTPOOL_ENTRY:
+ // If this machine instr is a constant pool entry, its size is recorded as
+ // operand #2.
+ return MI->getOperand(2).getImm();
+ case ARM::BR_JTr:
+ case ARM::BR_JTm:
+ case ARM::BR_JTadd:
+ case ARM::tBR_JTr: {
+ // These are jumptable branches, i.e. a branch followed by an inlined
+ // jumptable. The size is 4 + 4 * number of entries.
+ unsigned NumOps = TID->numOperands;
+ MachineOperand JTOP =
+ MI->getOperand(NumOps - ((TID->Flags & M_PREDICABLE) ? 3 : 2));
+ unsigned JTI = JTOP.getJumpTableIndex();
+ MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ assert(JTI < JT.size());
+ // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
+ // 4 aligned. The assembler / linker may add 2 byte padding just before
+ // the JT entries. The size does not include this padding; the
+ // constant islands pass does separate bookkeeping for it.
+ // FIXME: If we know the size of the function is less than (1 << 16) *2
+ // bytes, we can use 16-bit entries instead. Then there won't be an
+ // alignment issue.
+ return getNumJTEntries(JT, JTI) * 4 +
+ (MI->getOpcode()==ARM::tBR_JTr ? 2 : 4);
+ }
+ default:
+ // Otherwise, pseudo-instruction sizes are zero.
+ return 0;
+ }
+ }
+ }
+}
+
+/// GetFunctionSize - Returns the size of the specified MachineFunction.
+///
+unsigned ARM::GetFunctionSize(MachineFunction &MF) {
+ unsigned FnSize = 0;
+ for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock &MBB = *MBBI;
+ for (MachineBasicBlock::iterator I = MBB.begin(),E = MBB.end(); I != E; ++I)
+ FnSize += ARM::GetInstSize(I);
+ }
+ return FnSize;
+}
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
new file mode 100644
index 0000000..2c158b8
--- /dev/null
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -0,0 +1,133 @@
+//===- ARMInstrInfo.h - ARM Instruction Information -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the "Instituto Nokia de Tecnologia" and
+// is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMINSTRUCTIONINFO_H
+#define ARMINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARMRegisterInfo.h"
+
+namespace llvm {
+ class ARMSubtarget;
+
+/// ARMII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace ARMII {
+ enum {
+ //===------------------------------------------------------------------===//
+ // Instruction Flags.
+
+ //===------------------------------------------------------------------===//
+ // This three-bit field describes the addressing mode used. Zero is unused
+ // so that we can tell if we forgot to set a value.
+
+ AddrModeMask = 0xf,
+ AddrMode1 = 1,
+ AddrMode2 = 2,
+ AddrMode3 = 3,
+ AddrMode4 = 4,
+ AddrMode5 = 5,
+ AddrModeT1 = 6,
+ AddrModeT2 = 7,
+ AddrModeT4 = 8,
+ AddrModeTs = 9, // i8 * 4 for pc and sp relative data
+
+ // Size* - Flags to keep track of the size of an instruction.
+ SizeShift = 4,
+ SizeMask = 7 << SizeShift,
+ SizeSpecial = 1, // 0 byte pseudo or special case.
+ Size8Bytes = 2,
+ Size4Bytes = 3,
+ Size2Bytes = 4,
+
+ // IndexMode - Unindex, pre-indexed, or post-indexed. Only valid for load
+ // and store ops
+ IndexModeShift = 7,
+ IndexModeMask = 3 << IndexModeShift,
+ IndexModePre = 1,
+ IndexModePost = 2,
+
+ // Opcode
+ OpcodeShift = 9,
+ OpcodeMask = 0xf << OpcodeShift
+ };
+}
+
+class ARMInstrInfo : public TargetInstrInfo {
+ const ARMRegisterInfo RI;
+public:
+ ARMInstrInfo(const ARMSubtarget &STI);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const MRegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// getPointerRegClass - Return the register class to use to hold pointers.
+ /// This is used for addressing modes.
+ virtual const TargetRegisterClass *getPointerRegClass() const;
+
+ /// Return true if the instruction is a register to register move and
+ /// leave the source and dest operands in the passed parameters.
+ ///
+ virtual bool isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg) const;
+ virtual unsigned isLoadFromStackSlot(MachineInstr *MI, int &FrameIndex) const;
+ virtual unsigned isStoreToStackSlot(MachineInstr *MI, int &FrameIndex) const;
+
+ virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables &LV) const;
+
+ // Branch analysis.
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ std::vector<MachineOperand> &Cond) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const std::vector<MachineOperand> &Cond) const;
+ virtual bool BlockHasNoFallThrough(MachineBasicBlock &MBB) const;
+ virtual bool ReverseBranchCondition(std::vector<MachineOperand> &Cond) const;
+
+ // Predication support.
+ virtual bool isPredicated(const MachineInstr *MI) const;
+
+ virtual
+ bool PredicateInstruction(MachineInstr *MI,
+ const std::vector<MachineOperand> &Pred) const;
+
+ virtual
+ bool SubsumesPredicate(const std::vector<MachineOperand> &Pred1,
+ const std::vector<MachineOperand> &Pred1) const;
+
+ virtual bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const;
+};
+
+ // Utility routines
+ namespace ARM {
+ /// GetInstSize - Returns the size of the specified MachineInstr.
+ ///
+ unsigned GetInstSize(MachineInstr *MI);
+
+ /// GetFunctionSize - Returns the size of the specified MachineFunction.
+ ///
+ unsigned GetFunctionSize(MachineFunction &MF);
+ }
+}
+
+#endif
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
new file mode 100644
index 0000000..adc203b
--- /dev/null
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -0,0 +1,1320 @@
+//===- ARMInstrInfo.td - Target Description for ARM Target -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the "Instituto Nokia de Tecnologia" and
+// is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the ARM instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ARM specific DAG Nodes.
+//
+
+// Type profiles.
+def SDT_ARMCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>;
+
+def SDT_ARMSaveCallPC : SDTypeProfile<0, 1, []>;
+
+def SDT_ARMcall : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
+
+def SDT_ARMCMov : SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisVT<3, i32>]>;
+
+def SDT_ARMBrcond : SDTypeProfile<0, 2,
+ [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>;
+
+def SDT_ARMBrJT : SDTypeProfile<0, 3,
+ [SDTCisPtrTy<0>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>]>;
+
+def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+
+def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
+ SDTCisPtrTy<1>, SDTCisVT<2, i32>]>;
+
+def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
+
+// Node definitions.
+def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
+def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>;
+
+def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeq,
+ [SDNPHasChain, SDNPOutFlag]>;
+def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeq,
+ [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+
+def ARMcall : SDNode<"ARMISD::CALL", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+def ARMcall_pred : SDNode<"ARMISD::CALL_PRED", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTRet,
+ [SDNPHasChain, SDNPOptInFlag]>;
+
+def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
+ [SDNPInFlag]>;
+def ARMcneg : SDNode<"ARMISD::CNEG", SDT_ARMCMov,
+ [SDNPInFlag]>;
+
+def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond,
+ [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+
+def ARMbrjt : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT,
+ [SDNPHasChain]>;
+
+def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp,
+ [SDNPOutFlag]>;
+
+def ARMcmpNZ : SDNode<"ARMISD::CMPNZ", SDT_ARMCmp,
+ [SDNPOutFlag]>;
+
+def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>;
+
+def ARMsrl_flag : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutFlag]>;
+def ARMsra_flag : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutFlag]>;
+def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInFlag ]>;
+
+def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>;
+
+//===----------------------------------------------------------------------===//
+// ARM Instruction Predicate Definitions.
+//
+def HasV5T : Predicate<"Subtarget->hasV5TOps()">;
+def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">;
+def HasV6 : Predicate<"Subtarget->hasV6Ops()">;
+def IsThumb : Predicate<"Subtarget->isThumb()">;
+def IsARM : Predicate<"!Subtarget->isThumb()">;
+
+//===----------------------------------------------------------------------===//
+// ARM Flag Definitions.
+
+class RegConstraint<string C> {
+ string Constraints = C;
+}
+
+//===----------------------------------------------------------------------===//
+// ARM specific transformation functions and pattern fragments.
+//
+
+// so_imm_XFORM - Return a so_imm value packed into the format described for
+// so_imm def below.
+def so_imm_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(N->getValue()),
+ MVT::i32);
+}]>;
+
+// so_imm_neg_XFORM - Return a so_imm value packed into the format described for
+// so_imm_neg def below.
+def so_imm_neg_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(-(int)N->getValue()),
+ MVT::i32);
+}]>;
+
+// so_imm_not_XFORM - Return a so_imm value packed into the format described for
+// so_imm_not def below.
+def so_imm_not_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(~(int)N->getValue()),
+ MVT::i32);
+}]>;
+
+// rot_imm predicate - True if the 32-bit immediate is equal to 8, 16, or 24.
+def rot_imm : PatLeaf<(i32 imm), [{
+ int32_t v = (int32_t)N->getValue();
+ return v == 8 || v == 16 || v == 24;
+}]>;
+
+/// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15].
+def imm1_15 : PatLeaf<(i32 imm), [{
+ return (int32_t)N->getValue() >= 1 && (int32_t)N->getValue() < 16;
+}]>;
+
+/// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31].
+def imm16_31 : PatLeaf<(i32 imm), [{
+ return (int32_t)N->getValue() >= 16 && (int32_t)N->getValue() < 32;
+}]>;
+
+def so_imm_neg :
+ PatLeaf<(imm), [{ return ARM_AM::getSOImmVal(-(int)N->getValue()) != -1; }],
+ so_imm_neg_XFORM>;
+
+def so_imm_not :
+ PatLeaf<(imm), [{ return ARM_AM::getSOImmVal(~(int)N->getValue()) != -1; }],
+ so_imm_not_XFORM>;
+
+// sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits.
+def sext_16_node : PatLeaf<(i32 GPR:$a), [{
+ return CurDAG->ComputeNumSignBits(SDOperand(N,0)) >= 17;
+}]>;
+
+
+
+//===----------------------------------------------------------------------===//
+// Operand Definitions.
+//
+
+// Branch target.
+def brtarget : Operand<OtherVT>;
+
+// A list of registers separated by comma. Used by load/store multiple.
+def reglist : Operand<i32> {
+ let PrintMethod = "printRegisterList";
+}
+
+// An operand for the CONSTPOOL_ENTRY pseudo-instruction.
+def cpinst_operand : Operand<i32> {
+ let PrintMethod = "printCPInstOperand";
+}
+
+def jtblock_operand : Operand<i32> {
+ let PrintMethod = "printJTBlockOperand";
+}
+
+// Local PC labels.
+def pclabel : Operand<i32> {
+ let PrintMethod = "printPCLabel";
+}
+
+// shifter_operand operands: so_reg and so_imm.
+def so_reg : Operand<i32>, // reg reg imm
+ ComplexPattern<i32, 3, "SelectShifterOperandReg",
+ [shl,srl,sra,rotr]> {
+ let PrintMethod = "printSORegOperand";
+ let MIOperandInfo = (ops GPR, GPR, i32imm);
+}
+
+// so_imm - Match a 32-bit shifter_operand immediate operand, which is an
+// 8-bit immediate rotated by an arbitrary number of bits. so_imm values are
+// represented in the imm field in the same 12-bit form that they are encoded
+// into so_imm instructions: the 8-bit immediate is the least significant bits
+// [bits 0-7], the 4-bit shift amount is the next 4 bits [bits 8-11].
+def so_imm : Operand<i32>,
+ PatLeaf<(imm),
+ [{ return ARM_AM::getSOImmVal(N->getValue()) != -1; }],
+ so_imm_XFORM> {
+ let PrintMethod = "printSOImmOperand";
+}
+
+// Break so_imm's up into two pieces. This handles immediates with up to 16
+// bits set in them. This uses so_imm2part to match and so_imm2part_[12] to
+// get the first/second pieces.
+def so_imm2part : Operand<i32>,
+ PatLeaf<(imm),
+ [{ return ARM_AM::isSOImmTwoPartVal((unsigned)N->getValue()); }]> {
+ let PrintMethod = "printSOImm2PartOperand";
+}
+
+def so_imm2part_1 : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getSOImmTwoPartFirst((unsigned)N->getValue());
+ return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(V), MVT::i32);
+}]>;
+
+def so_imm2part_2 : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getSOImmTwoPartSecond((unsigned)N->getValue());
+ return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(V), MVT::i32);
+}]>;
+
+
+// Define ARM specific addressing modes.
+
+// addrmode2 := reg +/- reg shop imm
+// addrmode2 := reg +/- imm12
+//
+def addrmode2 : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectAddrMode2", []> {
+ let PrintMethod = "printAddrMode2Operand";
+ let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
+
+def am2offset : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode2Offset", []> {
+ let PrintMethod = "printAddrMode2OffsetOperand";
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// addrmode3 := reg +/- reg
+// addrmode3 := reg +/- imm8
+//
+def addrmode3 : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectAddrMode3", []> {
+ let PrintMethod = "printAddrMode3Operand";
+ let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
+
+def am3offset : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode3Offset", []> {
+ let PrintMethod = "printAddrMode3OffsetOperand";
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// addrmode4 := reg, <mode|W>
+//
+def addrmode4 : Operand<i32>,
+ ComplexPattern<i32, 2, "", []> {
+ let PrintMethod = "printAddrMode4Operand";
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// addrmode5 := reg +/- imm8*4
+//
+def addrmode5 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode5", []> {
+ let PrintMethod = "printAddrMode5Operand";
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// addrmodepc := pc + reg
+//
+def addrmodepc : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrModePC", []> {
+ let PrintMethod = "printAddrModePCOperand";
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// ARM Predicate operand. Default to 14 = always (AL). Second part is CC
+// register whose default is 0 (no register).
+def pred : PredicateOperand<OtherVT, (ops i32imm, CCR),
+ (ops (i32 14), (i32 zero_reg))> {
+ let PrintMethod = "printPredicateOperand";
+}
+
+// Conditional code result for instructions whose 's' bit is set, e.g. subs.
+//
+def cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 zero_reg))> {
+ let PrintMethod = "printSBitModifierOperand";
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Instruction flags. These need to match ARMInstrInfo.h.
+//
+
+// Addressing mode.
+class AddrMode<bits<4> val> {
+ bits<4> Value = val;
+}
+def AddrModeNone : AddrMode<0>;
+def AddrMode1 : AddrMode<1>;
+def AddrMode2 : AddrMode<2>;
+def AddrMode3 : AddrMode<3>;
+def AddrMode4 : AddrMode<4>;
+def AddrMode5 : AddrMode<5>;
+def AddrModeT1 : AddrMode<6>;
+def AddrModeT2 : AddrMode<7>;
+def AddrModeT4 : AddrMode<8>;
+def AddrModeTs : AddrMode<9>;
+
+// Instruction size.
+class SizeFlagVal<bits<3> val> {
+ bits<3> Value = val;
+}
+def SizeInvalid : SizeFlagVal<0>; // Unset.
+def SizeSpecial : SizeFlagVal<1>; // Pseudo or special.
+def Size8Bytes : SizeFlagVal<2>;
+def Size4Bytes : SizeFlagVal<3>;
+def Size2Bytes : SizeFlagVal<4>;
+
+// Load / store index mode.
+class IndexMode<bits<2> val> {
+ bits<2> Value = val;
+}
+def IndexModeNone : IndexMode<0>;
+def IndexModePre : IndexMode<1>;
+def IndexModePost : IndexMode<2>;
+
+//===----------------------------------------------------------------------===//
+// ARM Instruction templates.
+//
+
+// ARMPat - Same as Pat<>, but requires that the compiler be in ARM mode.
+class ARMPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM];
+}
+class ARMV5TEPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM, HasV5TE];
+}
+class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM, HasV6];
+}
+
+class InstARM<bits<4> opcod, AddrMode am, SizeFlagVal sz, IndexMode im,
+ string cstr>
+ : Instruction {
+ let Namespace = "ARM";
+
+ bits<4> Opcode = opcod;
+ AddrMode AM = am;
+ bits<4> AddrModeBits = AM.Value;
+
+ SizeFlagVal SZ = sz;
+ bits<3> SizeFlag = SZ.Value;
+
+ IndexMode IM = im;
+ bits<2> IndexModeBits = IM.Value;
+
+ let Constraints = cstr;
+}
+
+class PseudoInst<dag ops, string asm, list<dag> pattern>
+ : InstARM<0, AddrModeNone, SizeSpecial, IndexModeNone, ""> {
+ let OperandList = ops;
+ let AsmString = asm;
+ let Pattern = pattern;
+}
+
+// Almost all ARM instructions are predicable.
+class I<dag oprnds, AddrMode am, SizeFlagVal sz, IndexMode im,
+ string opc, string asm, string cstr, list<dag> pattern>
+ // FIXME: Set all opcodes to 0 for now.
+ : InstARM<0, am, sz, im, cstr> {
+ let OperandList = !con(oprnds, (ops pred:$p));
+ let AsmString = !strconcat(opc, !strconcat("${p}", asm));
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsARM];
+}
+
+// Same as I except it can optionally modify CPSR.
+class sI<dag oprnds, AddrMode am, SizeFlagVal sz, IndexMode im,
+ string opc, string asm, string cstr, list<dag> pattern>
+ // FIXME: Set all opcodes to 0 for now.
+ : InstARM<0, am, sz, im, cstr> {
+ let OperandList = !con(oprnds, (ops pred:$p, cc_out:$s));
+ let AsmString = !strconcat(opc, !strconcat("${p}${s}", asm));
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsARM];
+}
+
+class AI<dag ops, string opc, string asm, list<dag> pattern>
+ : I<ops, AddrModeNone, Size4Bytes, IndexModeNone, opc, asm, "", pattern>;
+class AsI<dag ops, string opc, string asm, list<dag> pattern>
+ : sI<ops, AddrModeNone, Size4Bytes, IndexModeNone, opc, asm, "", pattern>;
+class AI1<dag ops, string opc, string asm, list<dag> pattern>
+ : I<ops, AddrMode1, Size4Bytes, IndexModeNone, opc, asm, "", pattern>;
+class AsI1<dag ops, string opc, string asm, list<dag> pattern>
+ : sI<ops, AddrMode1, Size4Bytes, IndexModeNone, opc, asm, "", pattern>;
+class AI2<dag ops, string opc, string asm, list<dag> pattern>
+ : I<ops, AddrMode2, Size4Bytes, IndexModeNone, opc, asm, "", pattern>;
+class AI3<dag ops, string opc, string asm, list<dag> pattern>
+ : I<ops, AddrMode3, Size4Bytes, IndexModeNone, opc, asm, "", pattern>;
+class AI4<dag ops, string opc, string asm, list<dag> pattern>
+ : I<ops, AddrMode4, Size4Bytes, IndexModeNone, opc, asm, "", pattern>;
+class AI1x2<dag ops, string opc, string asm, list<dag> pattern>
+ : I<ops, AddrMode1, Size8Bytes, IndexModeNone, opc, asm, "", pattern>;
+
+// Pre-indexed ops
+class AI2pr<dag ops, string opc, string asm, string cstr, list<dag> pattern>
+ : I<ops, AddrMode2, Size4Bytes, IndexModePre, opc, asm, cstr, pattern>;
+class AI3pr<dag ops, string opc, string asm, string cstr, list<dag> pattern>
+ : I<ops, AddrMode3, Size4Bytes, IndexModePre, opc, asm, cstr, pattern>;
+
+// Post-indexed ops
+class AI2po<dag ops, string opc, string asm, string cstr, list<dag> pattern>
+ : I<ops, AddrMode2, Size4Bytes, IndexModePost, opc, asm, cstr, pattern>;
+class AI3po<dag ops, string opc, string asm, string cstr, list<dag> pattern>
+ : I<ops, AddrMode3, Size4Bytes, IndexModePost, opc, asm, cstr, pattern>;
+
+
+class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
+class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>;
+
+
+/// AI1_bin_irs - Defines a set of (op r, {so_imm|r|so_reg}) patterns for a
+/// binop that produces a value.
+multiclass AsI1_bin_irs<string opc, PatFrag opnode> {
+ def ri : AsI1<(ops GPR:$dst, GPR:$a, so_imm:$b),
+ opc, " $dst, $a, $b",
+ [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>;
+ def rr : AsI1<(ops GPR:$dst, GPR:$a, GPR:$b),
+ opc, " $dst, $a, $b",
+ [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>;
+ def rs : AsI1<(ops GPR:$dst, GPR:$a, so_reg:$b),
+ opc, " $dst, $a, $b",
+ [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>;
+}
+
+/// ASI1_bin_s_irs - Similar to AsI1_bin_irs except it sets the 's' bit so the
+/// instruction modifies the CSPR register.
+multiclass ASI1_bin_s_irs<string opc, PatFrag opnode> {
+ def ri : AI1<(ops GPR:$dst, GPR:$a, so_imm:$b),
+ opc, "s $dst, $a, $b",
+ [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>, Imp<[], [CPSR]>;
+ def rr : AI1<(ops GPR:$dst, GPR:$a, GPR:$b),
+ opc, "s $dst, $a, $b",
+ [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>, Imp<[], [CPSR]>;
+ def rs : AI1<(ops GPR:$dst, GPR:$a, so_reg:$b),
+ opc, "s $dst, $a, $b",
+ [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>, Imp<[], [CPSR]>;
+}
+
+/// AI1_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
+/// patterns. Similar to AsI1_bin_irs except the instruction does not produce
+/// a explicit result, only implicitly set CPSR.
+multiclass AI1_cmp_irs<string opc, PatFrag opnode> {
+ def ri : AI1<(ops GPR:$a, so_imm:$b),
+ opc, " $a, $b",
+ [(opnode GPR:$a, so_imm:$b)]>, Imp<[], [CPSR]>;
+ def rr : AI1<(ops GPR:$a, GPR:$b),
+ opc, " $a, $b",
+ [(opnode GPR:$a, GPR:$b)]>, Imp<[], [CPSR]>;
+ def rs : AI1<(ops GPR:$a, so_reg:$b),
+ opc, " $a, $b",
+ [(opnode GPR:$a, so_reg:$b)]>, Imp<[], [CPSR]>;
+}
+
+/// AI_unary_rrot - A unary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+multiclass AI_unary_rrot<string opc, PatFrag opnode> {
+ def r : AI<(ops GPR:$dst, GPR:$Src),
+ opc, " $dst, $Src",
+ [(set GPR:$dst, (opnode GPR:$Src))]>, Requires<[IsARM, HasV6]>;
+ def r_rot : AI<(ops GPR:$dst, GPR:$Src, i32imm:$rot),
+ opc, " $dst, $Src, ror $rot",
+ [(set GPR:$dst, (opnode (rotr GPR:$Src, rot_imm:$rot)))]>,
+ Requires<[IsARM, HasV6]>;
+}
+
+/// AI_bin_rrot - A binary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+multiclass AI_bin_rrot<string opc, PatFrag opnode> {
+ def rr : AI<(ops GPR:$dst, GPR:$LHS, GPR:$RHS),
+ opc, " $dst, $LHS, $RHS",
+ [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>,
+ Requires<[IsARM, HasV6]>;
+ def rr_rot : AI<(ops GPR:$dst, GPR:$LHS, GPR:$RHS, i32imm:$rot),
+ opc, " $dst, $LHS, $RHS, ror $rot",
+ [(set GPR:$dst, (opnode GPR:$LHS,
+ (rotr GPR:$RHS, rot_imm:$rot)))]>,
+ Requires<[IsARM, HasV6]>;
+}
+
+// Special cases.
+class XI<dag oprnds, AddrMode am, SizeFlagVal sz, IndexMode im,
+ string asm, string cstr, list<dag> pattern>
+ // FIXME: Set all opcodes to 0 for now.
+ : InstARM<0, am, sz, im, cstr> {
+ let OperandList = oprnds;
+ let AsmString = asm;
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsARM];
+}
+
+class AXI<dag ops, string asm, list<dag> pattern>
+ : XI<ops, AddrModeNone, Size4Bytes, IndexModeNone, asm, "", pattern>;
+class AXI1<dag ops, string asm, list<dag> pattern>
+ : XI<ops, AddrMode1, Size4Bytes, IndexModeNone, asm, "", pattern>;
+class AXI2<dag ops, string asm, list<dag> pattern>
+ : XI<ops, AddrMode2, Size4Bytes, IndexModeNone, asm, "", pattern>;
+class AXI3<dag ops, string asm, list<dag> pattern>
+ : XI<ops, AddrMode3, Size4Bytes, IndexModeNone, asm, "", pattern>;
+class AXI4<dag ops, string asm, list<dag> pattern>
+ : XI<ops, AddrMode4, Size4Bytes, IndexModeNone, asm, "", pattern>;
+
+class AXIx2<dag ops, string asm, list<dag> pattern>
+ : XI<ops, AddrModeNone, Size8Bytes, IndexModeNone, asm, "", pattern>;
+
+// BR_JT instructions
+class JTI<dag ops, string asm, list<dag> pattern>
+ : XI<ops, AddrModeNone, SizeSpecial, IndexModeNone, asm, "", pattern>;
+class JTI1<dag ops, string asm, list<dag> pattern>
+ : XI<ops, AddrMode1, SizeSpecial, IndexModeNone, asm, "", pattern>;
+class JTI2<dag ops, string asm, list<dag> pattern>
+ : XI<ops, AddrMode2, SizeSpecial, IndexModeNone, asm, "", pattern>;
+
+/// AsXI1_bin_c_irs - Same as AsI1_bin_irs but without the predicate operand and
+/// setting carry bit. But it can optionally set CPSR.
+multiclass AsXI1_bin_c_irs<string opc, PatFrag opnode> {
+ def ri : AXI1<(ops GPR:$dst, GPR:$a, so_imm:$b, cc_out:$s),
+ !strconcat(opc, "${s} $dst, $a, $b"),
+ [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>, Imp<[CPSR], []>;
+ def rr : AXI1<(ops GPR:$dst, GPR:$a, GPR:$b, cc_out:$s),
+ !strconcat(opc, "${s} $dst, $a, $b"),
+ [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>, Imp<[CPSR], []>;
+ def rs : AXI1<(ops GPR:$dst, GPR:$a, so_reg:$b, cc_out:$s),
+ !strconcat(opc, "${s} $dst, $a, $b"),
+ [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>, Imp<[CPSR], []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//
+def IMPLICIT_DEF_GPR :
+PseudoInst<(ops GPR:$rD, pred:$p),
+ "@ IMPLICIT_DEF_GPR $rD",
+ [(set GPR:$rD, (undef))]>;
+
+
+/// CONSTPOOL_ENTRY - This instruction represents a floating constant pool in
+/// the function. The first operand is the ID# for this instruction, the second
+/// is the index into the MachineConstantPool that this is, the third is the
+/// size in bytes of this constant pool entry.
+let isNotDuplicable = 1 in
+def CONSTPOOL_ENTRY :
+PseudoInst<(ops cpinst_operand:$instid, cpinst_operand:$cpidx, i32imm:$size),
+ "${instid:label} ${cpidx:cpentry}", []>;
+
+def ADJCALLSTACKUP :
+PseudoInst<(ops i32imm:$amt, pred:$p),
+ "@ ADJCALLSTACKUP $amt",
+ [(ARMcallseq_end imm:$amt)]>, Imp<[SP],[SP]>;
+
+def ADJCALLSTACKDOWN :
+PseudoInst<(ops i32imm:$amt, pred:$p),
+ "@ ADJCALLSTACKDOWN $amt",
+ [(ARMcallseq_start imm:$amt)]>, Imp<[SP],[SP]>;
+
+def DWARF_LOC :
+PseudoInst<(ops i32imm:$line, i32imm:$col, i32imm:$file),
+ ".loc $file, $line, $col",
+ [(dwarf_loc (i32 imm:$line), (i32 imm:$col), (i32 imm:$file))]>;
+
+let isNotDuplicable = 1 in {
+def PICADD : AXI1<(ops GPR:$dst, GPR:$a, pclabel:$cp, pred:$p),
+ "$cp:\n\tadd$p $dst, pc, $a",
+ [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>;
+
+let isLoad = 1, AddedComplexity = 10 in {
+def PICLD : AXI2<(ops GPR:$dst, addrmodepc:$addr, pred:$p),
+ "${addr:label}:\n\tldr$p $dst, $addr",
+ [(set GPR:$dst, (load addrmodepc:$addr))]>;
+
+def PICLDZH : AXI3<(ops GPR:$dst, addrmodepc:$addr, pred:$p),
+ "${addr:label}:\n\tldr${p}h $dst, $addr",
+ [(set GPR:$dst, (zextloadi16 addrmodepc:$addr))]>;
+
+def PICLDZB : AXI2<(ops GPR:$dst, addrmodepc:$addr, pred:$p),
+ "${addr:label}:\n\tldr${p}b $dst, $addr",
+ [(set GPR:$dst, (zextloadi8 addrmodepc:$addr))]>;
+
+def PICLDH : AXI3<(ops GPR:$dst, addrmodepc:$addr, pred:$p),
+ "${addr:label}:\n\tldr${p}h $dst, $addr",
+ [(set GPR:$dst, (extloadi16 addrmodepc:$addr))]>;
+
+def PICLDB : AXI2<(ops GPR:$dst, addrmodepc:$addr, pred:$p),
+ "${addr:label}:\n\tldr${p}b $dst, $addr",
+ [(set GPR:$dst, (extloadi8 addrmodepc:$addr))]>;
+
+def PICLDSH : AXI3<(ops GPR:$dst, addrmodepc:$addr, pred:$p),
+ "${addr:label}:\n\tldr${p}sh $dst, $addr",
+ [(set GPR:$dst, (sextloadi16 addrmodepc:$addr))]>;
+
+def PICLDSB : AXI3<(ops GPR:$dst, addrmodepc:$addr, pred:$p),
+ "${addr:label}:\n\tldr${p}sb $dst, $addr",
+ [(set GPR:$dst, (sextloadi8 addrmodepc:$addr))]>;
+}
+let isStore = 1, AddedComplexity = 10 in {
+def PICSTR : AXI2<(ops GPR:$src, addrmodepc:$addr, pred:$p),
+ "${addr:label}:\n\tstr$p $src, $addr",
+ [(store GPR:$src, addrmodepc:$addr)]>;
+
+def PICSTRH : AXI3<(ops GPR:$src, addrmodepc:$addr, pred:$p),
+ "${addr:label}:\n\tstr${p}h $src, $addr",
+ [(truncstorei16 GPR:$src, addrmodepc:$addr)]>;
+
+def PICSTRB : AXI2<(ops GPR:$src, addrmodepc:$addr, pred:$p),
+ "${addr:label}:\n\tstr${p}b $src, $addr",
+ [(truncstorei8 GPR:$src, addrmodepc:$addr)]>;
+}
+}
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions.
+//
+
+let isReturn = 1, isTerminator = 1 in
+ def BX_RET : AI<(ops), "bx", " lr", [(ARMretflag)]>;
+
+// FIXME: remove when we have a way to marking a MI with these properties.
+let isLoad = 1, isReturn = 1, isTerminator = 1 in
+ def LDM_RET : AXI4<(ops addrmode4:$addr, pred:$p, reglist:$dst1, variable_ops),
+ "ldm${p}${addr:submode} $addr, $dst1",
+ []>;
+
+let isCall = 1, noResults = 1,
+ Defs = [R0, R1, R2, R3, R12, LR,
+ D0, D1, D2, D3, D4, D5, D6, D7, CPSR] in {
+ def BL : AXI<(ops i32imm:$func, variable_ops),
+ "bl ${func:call}",
+ [(ARMcall tglobaladdr:$func)]>;
+
+ def BL_pred : AI<(ops i32imm:$func, variable_ops),
+ "bl", " ${func:call}",
+ [(ARMcall_pred tglobaladdr:$func)]>;
+
+ // ARMv5T and above
+ def BLX : AXI<(ops GPR:$dst, variable_ops),
+ "blx $dst",
+ [(ARMcall GPR:$dst)]>, Requires<[IsARM, HasV5T]>;
+ let Uses = [LR] in {
+ // ARMv4T
+ def BX : AXIx2<(ops GPR:$dst, variable_ops),
+ "mov lr, pc\n\tbx $dst",
+ [(ARMcall_nolink GPR:$dst)]>;
+ }
+}
+
+let isBranch = 1, isTerminator = 1, noResults = 1 in {
+ // B is "predicable" since it can be xformed into a Bcc.
+ let isBarrier = 1 in {
+ let isPredicable = 1 in
+ def B : AXI<(ops brtarget:$dst), "b $dst",
+ [(br bb:$dst)]>;
+
+ let isNotDuplicable = 1 in {
+ def BR_JTr : JTI<(ops GPR:$dst, jtblock_operand:$jt, i32imm:$id),
+ "mov pc, $dst \n$jt",
+ [(ARMbrjt GPR:$dst, tjumptable:$jt, imm:$id)]>;
+ def BR_JTm : JTI2<(ops addrmode2:$dst, jtblock_operand:$jt, i32imm:$id),
+ "ldr pc, $dst \n$jt",
+ [(ARMbrjt (i32 (load addrmode2:$dst)), tjumptable:$jt,
+ imm:$id)]>;
+ def BR_JTadd : JTI1<(ops GPR:$dst, GPR:$idx, jtblock_operand:$jt, i32imm:$id),
+ "add pc, $dst, $idx \n$jt",
+ [(ARMbrjt (add GPR:$dst, GPR:$idx), tjumptable:$jt,
+ imm:$id)]>;
+ }
+ }
+
+ // FIXME: should be able to write a pattern for ARMBrcond, but can't use
+ // a two-value operand where a dag node expects two operands. :(
+ def Bcc : AI<(ops brtarget:$dst), "b", " $dst",
+ [/*(ARMbrcond bb:$dst, imm:$cc, CCR:$ccr)*/]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Load / store Instructions.
+//
+
+// Load
+let isLoad = 1 in {
+def LDR : AI2<(ops GPR:$dst, addrmode2:$addr),
+ "ldr", " $dst, $addr",
+ [(set GPR:$dst, (load addrmode2:$addr))]>;
+
+// Special LDR for loads from non-pc-relative constpools.
+let isReMaterializable = 1 in
+def LDRcp : AI2<(ops GPR:$dst, addrmode2:$addr),
+ "ldr", " $dst, $addr", []>;
+
+// Loads with zero extension
+def LDRH : AI3<(ops GPR:$dst, addrmode3:$addr),
+ "ldr", "h $dst, $addr",
+ [(set GPR:$dst, (zextloadi16 addrmode3:$addr))]>;
+
+def LDRB : AI2<(ops GPR:$dst, addrmode2:$addr),
+ "ldr", "b $dst, $addr",
+ [(set GPR:$dst, (zextloadi8 addrmode2:$addr))]>;
+
+// Loads with sign extension
+def LDRSH : AI3<(ops GPR:$dst, addrmode3:$addr),
+ "ldr", "sh $dst, $addr",
+ [(set GPR:$dst, (sextloadi16 addrmode3:$addr))]>;
+
+def LDRSB : AI3<(ops GPR:$dst, addrmode3:$addr),
+ "ldr", "sb $dst, $addr",
+ [(set GPR:$dst, (sextloadi8 addrmode3:$addr))]>;
+
+// Load doubleword
+def LDRD : AI3<(ops GPR:$dst, addrmode3:$addr),
+ "ldr", "d $dst, $addr",
+ []>, Requires<[IsARM, HasV5T]>;
+
+// Indexed loads
+def LDR_PRE : AI2pr<(ops GPR:$dst, GPR:$base_wb, addrmode2:$addr),
+ "ldr", " $dst, $addr!", "$addr.base = $base_wb", []>;
+
+def LDR_POST : AI2po<(ops GPR:$dst, GPR:$base_wb, GPR:$base, am2offset:$offset),
+ "ldr", " $dst, [$base], $offset", "$base = $base_wb", []>;
+
+def LDRH_PRE : AI3pr<(ops GPR:$dst, GPR:$base_wb, addrmode3:$addr),
+ "ldr", "h $dst, $addr!", "$addr.base = $base_wb", []>;
+
+def LDRH_POST : AI3po<(ops GPR:$dst, GPR:$base_wb, GPR:$base,am3offset:$offset),
+ "ldr", "h $dst, [$base], $offset", "$base = $base_wb", []>;
+
+def LDRB_PRE : AI2pr<(ops GPR:$dst, GPR:$base_wb, addrmode2:$addr),
+ "ldr", "b $dst, $addr!", "$addr.base = $base_wb", []>;
+
+def LDRB_POST : AI2po<(ops GPR:$dst, GPR:$base_wb, GPR:$base,am2offset:$offset),
+ "ldr", "b $dst, [$base], $offset", "$base = $base_wb", []>;
+
+def LDRSH_PRE : AI3pr<(ops GPR:$dst, GPR:$base_wb, addrmode3:$addr),
+ "ldr", "sh $dst, $addr!", "$addr.base = $base_wb", []>;
+
+def LDRSH_POST: AI3po<(ops GPR:$dst, GPR:$base_wb, GPR:$base,am3offset:$offset),
+ "ldr", "sh $dst, [$base], $offset", "$base = $base_wb", []>;
+
+def LDRSB_PRE : AI3pr<(ops GPR:$dst, GPR:$base_wb, addrmode3:$addr),
+ "ldr", "sb $dst, $addr!", "$addr.base = $base_wb", []>;
+
+def LDRSB_POST: AI3po<(ops GPR:$dst, GPR:$base_wb, GPR:$base,am3offset:$offset),
+ "ldr", "sb $dst, [$base], $offset", "$base = $base_wb", []>;
+} // isLoad
+
+// Store
+let isStore = 1 in {
+def STR : AI2<(ops GPR:$src, addrmode2:$addr),
+ "str", " $src, $addr",
+ [(store GPR:$src, addrmode2:$addr)]>;
+
+// Stores with truncate
+def STRH : AI3<(ops GPR:$src, addrmode3:$addr),
+ "str", "h $src, $addr",
+ [(truncstorei16 GPR:$src, addrmode3:$addr)]>;
+
+def STRB : AI2<(ops GPR:$src, addrmode2:$addr),
+ "str", "b $src, $addr",
+ [(truncstorei8 GPR:$src, addrmode2:$addr)]>;
+
+// Store doubleword
+def STRD : AI3<(ops GPR:$src, addrmode3:$addr),
+ "str", "d $src, $addr",
+ []>, Requires<[IsARM, HasV5T]>;
+
+// Indexed stores
+def STR_PRE : AI2pr<(ops GPR:$base_wb, GPR:$src, GPR:$base, am2offset:$offset),
+ "str", " $src, [$base, $offset]!", "$base = $base_wb",
+ [(set GPR:$base_wb,
+ (pre_store GPR:$src, GPR:$base, am2offset:$offset))]>;
+
+def STR_POST : AI2po<(ops GPR:$base_wb, GPR:$src, GPR:$base,am2offset:$offset),
+ "str", " $src, [$base], $offset", "$base = $base_wb",
+ [(set GPR:$base_wb,
+ (post_store GPR:$src, GPR:$base, am2offset:$offset))]>;
+
+def STRH_PRE : AI3pr<(ops GPR:$base_wb, GPR:$src, GPR:$base,am3offset:$offset),
+ "str", "h $src, [$base, $offset]!", "$base = $base_wb",
+ [(set GPR:$base_wb,
+ (pre_truncsti16 GPR:$src, GPR:$base,am3offset:$offset))]>;
+
+def STRH_POST: AI3po<(ops GPR:$base_wb, GPR:$src, GPR:$base,am3offset:$offset),
+ "str", "h $src, [$base], $offset", "$base = $base_wb",
+ [(set GPR:$base_wb, (post_truncsti16 GPR:$src,
+ GPR:$base, am3offset:$offset))]>;
+
+def STRB_PRE : AI2pr<(ops GPR:$base_wb, GPR:$src, GPR:$base,am2offset:$offset),
+ "str", "b $src, [$base, $offset]!", "$base = $base_wb",
+ [(set GPR:$base_wb, (pre_truncsti8 GPR:$src,
+ GPR:$base, am2offset:$offset))]>;
+
+def STRB_POST: AI2po<(ops GPR:$base_wb, GPR:$src, GPR:$base,am2offset:$offset),
+ "str", "b $src, [$base], $offset", "$base = $base_wb",
+ [(set GPR:$base_wb, (post_truncsti8 GPR:$src,
+ GPR:$base, am2offset:$offset))]>;
+} // isStore
+
+//===----------------------------------------------------------------------===//
+// Load / store multiple Instructions.
+//
+
+let isLoad = 1 in
+def LDM : AXI4<(ops addrmode4:$addr, pred:$p, reglist:$dst1, variable_ops),
+ "ldm${p}${addr:submode} $addr, $dst1",
+ []>;
+
+let isStore = 1 in
+def STM : AXI4<(ops addrmode4:$addr, pred:$p, reglist:$src1, variable_ops),
+ "stm${p}${addr:submode} $addr, $src1",
+ []>;
+
+//===----------------------------------------------------------------------===//
+// Move Instructions.
+//
+
+def MOVr : AsI1<(ops GPR:$dst, GPR:$src),
+ "mov", " $dst, $src", []>;
+def MOVs : AsI1<(ops GPR:$dst, so_reg:$src),
+ "mov", " $dst, $src", [(set GPR:$dst, so_reg:$src)]>;
+
+let isReMaterializable = 1 in
+def MOVi : AsI1<(ops GPR:$dst, so_imm:$src),
+ "mov", " $dst, $src", [(set GPR:$dst, so_imm:$src)]>;
+
+def MOVrx : AsI1<(ops GPR:$dst, GPR:$src),
+ "mov", " $dst, $src, rrx",
+ [(set GPR:$dst, (ARMrrx GPR:$src))]>;
+
+// These aren't really mov instructions, but we have to define them this way
+// due to flag operands.
+
+def MOVsrl_flag : AI1<(ops GPR:$dst, GPR:$src),
+ "mov", "s $dst, $src, lsr #1",
+ [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, Imp<[], [CPSR]>;
+def MOVsra_flag : AI1<(ops GPR:$dst, GPR:$src),
+ "mov", "s $dst, $src, asr #1",
+ [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, Imp<[], [CPSR]>;
+
+//===----------------------------------------------------------------------===//
+// Extend Instructions.
+//
+
+// Sign extenders
+
+defm SXTB : AI_unary_rrot<"sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>;
+defm SXTH : AI_unary_rrot<"sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>;
+
+defm SXTAB : AI_bin_rrot<"sxtab",
+ BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
+defm SXTAH : AI_bin_rrot<"sxtah",
+ BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
+
+// TODO: SXT(A){B|H}16
+
+// Zero extenders
+
+let AddedComplexity = 16 in {
+defm UXTB : AI_unary_rrot<"uxtb" , UnOpFrag<(and node:$Src, 0x000000FF)>>;
+defm UXTH : AI_unary_rrot<"uxth" , UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
+defm UXTB16 : AI_unary_rrot<"uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
+
+def : ARMV6Pat<(and (shl GPR:$Src, 8), 0xFF00FF),
+ (UXTB16r_rot GPR:$Src, 24)>;
+def : ARMV6Pat<(and (srl GPR:$Src, 8), 0xFF00FF),
+ (UXTB16r_rot GPR:$Src, 8)>;
+
+defm UXTAB : AI_bin_rrot<"uxtab",
+ BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
+defm UXTAH : AI_bin_rrot<"uxtah",
+ BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
+}
+
+// This isn't safe in general, the add is two 16-bit units, not a 32-bit add.
+//defm UXTAB16 : xxx<"uxtab16", 0xff00ff>;
+
+// TODO: UXT(A){B|H}16
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions.
+//
+
+defm ADD : AsI1_bin_irs<"add", BinOpFrag<(add node:$LHS, node:$RHS)>>;
+defm SUB : AsI1_bin_irs<"sub", BinOpFrag<(sub node:$LHS, node:$RHS)>>;
+
+// ADD and SUB with 's' bit set.
+defm ADDS : ASI1_bin_s_irs<"add", BinOpFrag<(addc node:$LHS, node:$RHS)>>;
+defm SUBS : ASI1_bin_s_irs<"sub", BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+
+// FIXME: Do not allow ADC / SBC to be predicated for now.
+defm ADC : AsXI1_bin_c_irs<"adc", BinOpFrag<(adde node:$LHS, node:$RHS)>>;
+defm SBC : AsXI1_bin_c_irs<"sbc", BinOpFrag<(sube node:$LHS, node:$RHS)>>;
+
+// These don't define reg/reg forms, because they are handled above.
+def RSBri : AsI1<(ops GPR:$dst, GPR:$a, so_imm:$b),
+ "rsb", " $dst, $a, $b",
+ [(set GPR:$dst, (sub so_imm:$b, GPR:$a))]>;
+
+def RSBrs : AsI1<(ops GPR:$dst, GPR:$a, so_reg:$b),
+ "rsb", " $dst, $a, $b",
+ [(set GPR:$dst, (sub so_reg:$b, GPR:$a))]>;
+
+// RSB with 's' bit set.
+def RSBSri : AI1<(ops GPR:$dst, GPR:$a, so_imm:$b),
+ "rsb", "s $dst, $a, $b",
+ [(set GPR:$dst, (subc so_imm:$b, GPR:$a))]>, Imp<[], [CPSR]>;
+def RSBSrs : AI1<(ops GPR:$dst, GPR:$a, so_reg:$b),
+ "rsb", "s $dst, $a, $b",
+ [(set GPR:$dst, (subc so_reg:$b, GPR:$a))]>, Imp<[], [CPSR]>;
+
+// FIXME: Do not allow RSC to be predicated for now. But they can set CPSR.
+def RSCri : AXI1<(ops GPR:$dst, GPR:$a, so_imm:$b, cc_out:$s),
+ "rsc${s} $dst, $a, $b",
+ [(set GPR:$dst, (sube so_imm:$b, GPR:$a))]>, Imp<[CPSR], []>;
+def RSCrs : AXI1<(ops GPR:$dst, GPR:$a, so_reg:$b, cc_out:$s),
+ "rsc${s} $dst, $a, $b",
+ [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>, Imp<[CPSR], []>;
+
+// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
+def : ARMPat<(add GPR:$src, so_imm_neg:$imm),
+ (SUBri GPR:$src, so_imm_neg:$imm)>;
+
+//def : ARMPat<(addc GPR:$src, so_imm_neg:$imm),
+// (SUBSri GPR:$src, so_imm_neg:$imm)>;
+//def : ARMPat<(adde GPR:$src, so_imm_neg:$imm),
+// (SBCri GPR:$src, so_imm_neg:$imm)>;
+
+// Note: These are implemented in C++ code, because they have to generate
+// ADD/SUBrs instructions, which use a complex pattern that a xform function
+// cannot produce.
+// (mul X, 2^n+1) -> (add (X << n), X)
+// (mul X, 2^n-1) -> (rsb X, (X << n))
+
+
+//===----------------------------------------------------------------------===//
+// Bitwise Instructions.
+//
+
+defm AND : AsI1_bin_irs<"and", BinOpFrag<(and node:$LHS, node:$RHS)>>;
+defm ORR : AsI1_bin_irs<"orr", BinOpFrag<(or node:$LHS, node:$RHS)>>;
+defm EOR : AsI1_bin_irs<"eor", BinOpFrag<(xor node:$LHS, node:$RHS)>>;
+defm BIC : AsI1_bin_irs<"bic", BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
+
+def MVNr : AsI<(ops GPR:$dst, GPR:$src),
+ "mvn", " $dst, $src", [(set GPR:$dst, (not GPR:$src))]>;
+def MVNs : AsI<(ops GPR:$dst, so_reg:$src),
+ "mvn", " $dst, $src", [(set GPR:$dst, (not so_reg:$src))]>;
+let isReMaterializable = 1 in
+def MVNi : AsI<(ops GPR:$dst, so_imm:$imm),
+ "mvn", " $dst, $imm", [(set GPR:$dst, so_imm_not:$imm)]>;
+
+def : ARMPat<(and GPR:$src, so_imm_not:$imm),
+ (BICri GPR:$src, so_imm_not:$imm)>;
+
+//===----------------------------------------------------------------------===//
+// Multiply Instructions.
+//
+
+def MUL : AsI<(ops GPR:$dst, GPR:$a, GPR:$b),
+ "mul", " $dst, $a, $b",
+ [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>;
+
+def MLA : AsI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$c),
+ "mla", " $dst, $a, $b, $c",
+ [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>;
+
+// Extra precision multiplies with low / high results
+def SMULL : AsI<(ops GPR:$ldst, GPR:$hdst, GPR:$a, GPR:$b),
+ "smull", " $ldst, $hdst, $a, $b", []>;
+
+def UMULL : AsI<(ops GPR:$ldst, GPR:$hdst, GPR:$a, GPR:$b),
+ "umull", " $ldst, $hdst, $a, $b", []>;
+
+// Multiply + accumulate
+def SMLAL : AsI<(ops GPR:$ldst, GPR:$hdst, GPR:$a, GPR:$b),
+ "smlal", " $ldst, $hdst, $a, $b", []>;
+
+def UMLAL : AsI<(ops GPR:$ldst, GPR:$hdst, GPR:$a, GPR:$b),
+ "umlal", " $ldst, $hdst, $a, $b", []>;
+
+def UMAAL : AI<(ops GPR:$ldst, GPR:$hdst, GPR:$a, GPR:$b),
+ "umaal", " $ldst, $hdst, $a, $b", []>,
+ Requires<[IsARM, HasV6]>;
+
+// Most significant word multiply
+def SMMUL : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+ "smmul", " $dst, $a, $b",
+ [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>,
+ Requires<[IsARM, HasV6]>;
+
+def SMMLA : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$c),
+ "smmla", " $dst, $a, $b, $c",
+ [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>,
+ Requires<[IsARM, HasV6]>;
+
+
+def SMMLS : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$c),
+ "smmls", " $dst, $a, $b, $c",
+ [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>,
+ Requires<[IsARM, HasV6]>;
+
+multiclass AI_smul<string opc, PatFrag opnode> {
+ def BB : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+ !strconcat(opc, "bb"), " $dst, $a, $b",
+ [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
+ (sext_inreg GPR:$b, i16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+ def BT : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+ !strconcat(opc, "bt"), " $dst, $a, $b",
+ [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
+ (sra GPR:$b, 16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+ def TB : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+ !strconcat(opc, "tb"), " $dst, $a, $b",
+ [(set GPR:$dst, (opnode (sra GPR:$a, 16),
+ (sext_inreg GPR:$b, i16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+ def TT : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+ !strconcat(opc, "tt"), " $dst, $a, $b",
+ [(set GPR:$dst, (opnode (sra GPR:$a, 16),
+ (sra GPR:$b, 16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+ def WB : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+ !strconcat(opc, "wb"), " $dst, $a, $b",
+ [(set GPR:$dst, (sra (opnode GPR:$a,
+ (sext_inreg GPR:$b, i16)), 16))]>,
+ Requires<[IsARM, HasV5TE]>;
+ def WT : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+ !strconcat(opc, "wt"), " $dst, $a, $b",
+ [(set GPR:$dst, (sra (opnode GPR:$a,
+ (sra GPR:$b, 16)), 16))]>,
+ Requires<[IsARM, HasV5TE]>;
+}
+
+multiclass AI_smla<string opc, PatFrag opnode> {
+ def BB : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+ !strconcat(opc, "bb"), " $dst, $a, $b, $acc",
+ [(set GPR:$dst, (add GPR:$acc,
+ (opnode (sext_inreg GPR:$a, i16),
+ (sext_inreg GPR:$b, i16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+ def BT : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+ !strconcat(opc, "bt"), " $dst, $a, $b, $acc",
+ [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
+ (sra GPR:$b, 16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+ def TB : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+ !strconcat(opc, "tb"), " $dst, $a, $b, $acc",
+ [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, 16),
+ (sext_inreg GPR:$b, i16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+ def TT : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+ !strconcat(opc, "tt"), " $dst, $a, $b, $acc",
+ [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, 16),
+ (sra GPR:$b, 16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def WB : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+ !strconcat(opc, "wb"), " $dst, $a, $b, $acc",
+ [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
+ (sext_inreg GPR:$b, i16)), 16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+ def WT : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+ !strconcat(opc, "wt"), " $dst, $a, $b, $acc",
+ [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
+ (sra GPR:$b, 16)), 16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+}
+
+defm SMUL : AI_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+
+// TODO: Halfword multiple accumulate long: SMLAL<x><y>
+// TODO: Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
+
+//===----------------------------------------------------------------------===//
+// Misc. Arithmetic Instructions.
+//
+
+def CLZ : AI<(ops GPR:$dst, GPR:$src),
+ "clz", " $dst, $src",
+ [(set GPR:$dst, (ctlz GPR:$src))]>, Requires<[IsARM, HasV5T]>;
+
+def REV : AI<(ops GPR:$dst, GPR:$src),
+ "rev", " $dst, $src",
+ [(set GPR:$dst, (bswap GPR:$src))]>, Requires<[IsARM, HasV6]>;
+
+def REV16 : AI<(ops GPR:$dst, GPR:$src),
+ "rev16", " $dst, $src",
+ [(set GPR:$dst,
+ (or (and (srl GPR:$src, 8), 0xFF),
+ (or (and (shl GPR:$src, 8), 0xFF00),
+ (or (and (srl GPR:$src, 8), 0xFF0000),
+ (and (shl GPR:$src, 8), 0xFF000000)))))]>,
+ Requires<[IsARM, HasV6]>;
+
+def REVSH : AI<(ops GPR:$dst, GPR:$src),
+ "revsh", " $dst, $src",
+ [(set GPR:$dst,
+ (sext_inreg
+ (or (srl (and GPR:$src, 0xFF00), 8),
+ (shl GPR:$src, 8)), i16))]>,
+ Requires<[IsARM, HasV6]>;
+
+def PKHBT : AI<(ops GPR:$dst, GPR:$src1, GPR:$src2, i32imm:$shamt),
+ "pkhbt", " $dst, $src1, $src2, LSL $shamt",
+ [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF),
+ (and (shl GPR:$src2, (i32 imm:$shamt)),
+ 0xFFFF0000)))]>,
+ Requires<[IsARM, HasV6]>;
+
+// Alternate cases for PKHBT where identities eliminate some nodes.
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)),
+ (PKHBT GPR:$src1, GPR:$src2, 0)>;
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)),
+ (PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>;
+
+
+def PKHTB : AI<(ops GPR:$dst, GPR:$src1, GPR:$src2, i32imm:$shamt),
+ "pkhtb", " $dst, $src1, $src2, ASR $shamt",
+ [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000),
+ (and (sra GPR:$src2, imm16_31:$shamt),
+ 0xFFFF)))]>, Requires<[IsARM, HasV6]>;
+
+// Alternate cases for PKHTB where identities eliminate some nodes. Note that
+// a shift amount of 0 is *not legal* here, it is PKHBT instead.
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, 16)),
+ (PKHTB GPR:$src1, GPR:$src2, 16)>;
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000),
+ (and (srl GPR:$src2, imm1_15:$shamt), 0xFFFF)),
+ (PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>;
+
+
+//===----------------------------------------------------------------------===//
+// Comparison Instructions...
+//
+
+defm CMP : AI1_cmp_irs<"cmp", BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
+defm CMN : AI1_cmp_irs<"cmn", BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
+
+// Note that TST/TEQ don't set all the same flags that CMP does!
+defm TST : AI1_cmp_irs<"tst", BinOpFrag<(ARMcmpNZ (and node:$LHS, node:$RHS), 0)>>;
+defm TEQ : AI1_cmp_irs<"teq", BinOpFrag<(ARMcmpNZ (xor node:$LHS, node:$RHS), 0)>>;
+
+defm CMPnz : AI1_cmp_irs<"cmp", BinOpFrag<(ARMcmpNZ node:$LHS, node:$RHS)>>;
+defm CMNnz : AI1_cmp_irs<"cmn", BinOpFrag<(ARMcmpNZ node:$LHS,(ineg node:$RHS))>>;
+
+def : ARMPat<(ARMcmp GPR:$src, so_imm_neg:$imm),
+ (CMNri GPR:$src, so_imm_neg:$imm)>;
+
+def : ARMPat<(ARMcmpNZ GPR:$src, so_imm_neg:$imm),
+ (CMNri GPR:$src, so_imm_neg:$imm)>;
+
+
+// Conditional moves
+// FIXME: should be able to write a pattern for ARMcmov, but can't use
+// a two-value operand where a dag node expects two operands. :(
+def MOVCCr : AI<(ops GPR:$dst, GPR:$false, GPR:$true),
+ "mov", " $dst, $true",
+ [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $dst">;
+
+def MOVCCs : AI<(ops GPR:$dst, GPR:$false, so_reg:$true),
+ "mov", " $dst, $true",
+ [/*(set GPR:$dst, (ARMcmov GPR:$false, so_reg:$true, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $dst">;
+
+def MOVCCi : AI<(ops GPR:$dst, GPR:$false, so_imm:$true),
+ "mov", " $dst, $true",
+ [/*(set GPR:$dst, (ARMcmov GPR:$false, so_imm:$true, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $dst">;
+
+
+// LEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+def LEApcrel : AXI1<(ops GPR:$dst, i32imm:$label, pred:$p),
+ !strconcat(!strconcat(".set PCRELV${:uid}, ($label-(",
+ "${:private}PCRELL${:uid}+8))\n"),
+ !strconcat("${:private}PCRELL${:uid}:\n\t",
+ "add$p $dst, pc, #PCRELV${:uid}")),
+ []>;
+
+def LEApcrelJT : AXI1<(ops GPR:$dst, i32imm:$label, i32imm:$id, pred:$p),
+ !strconcat(!strconcat(".set PCRELV${:uid}, (${label}_${id:no_hash}-(",
+ "${:private}PCRELL${:uid}+8))\n"),
+ !strconcat("${:private}PCRELL${:uid}:\n\t",
+ "add$p $dst, pc, #PCRELV${:uid}")),
+ []>;
+
+//===----------------------------------------------------------------------===//
+// TLS Instructions
+//
+
+// __aeabi_read_tp preserves the registers r1-r3.
+let isCall = 1,
+ Defs = [R0, R12, LR, CPSR] in {
+ def TPsoft : AXI<(ops),
+ "bl __aeabi_read_tp",
+ [(set R0, ARMthread_pointer)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// ConstantPool, GlobalAddress, and JumpTable
+def : ARMPat<(ARMWrapper tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>;
+def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>;
+def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+ (LEApcrelJT tjumptable:$dst, imm:$id)>;
+
+// Large immediate handling.
+
+// Two piece so_imms.
+let isReMaterializable = 1 in
+def MOVi2pieces : AI1x2<(ops GPR:$dst, so_imm2part:$src),
+ "mov", " $dst, $src",
+ [(set GPR:$dst, so_imm2part:$src)]>;
+
+def : ARMPat<(or GPR:$LHS, so_imm2part:$RHS),
+ (ORRri (ORRri GPR:$LHS, (so_imm2part_1 imm:$RHS)),
+ (so_imm2part_2 imm:$RHS))>;
+def : ARMPat<(xor GPR:$LHS, so_imm2part:$RHS),
+ (EORri (EORri GPR:$LHS, (so_imm2part_1 imm:$RHS)),
+ (so_imm2part_2 imm:$RHS))>;
+
+// TODO: add,sub,and, 3-instr forms?
+
+
+// Direct calls
+def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>;
+
+// zextload i1 -> zextload i8
+def : ARMPat<(zextloadi1 addrmode2:$addr), (LDRB addrmode2:$addr)>;
+
+// extload -> zextload
+def : ARMPat<(extloadi1 addrmode2:$addr), (LDRB addrmode2:$addr)>;
+def : ARMPat<(extloadi8 addrmode2:$addr), (LDRB addrmode2:$addr)>;
+def : ARMPat<(extloadi16 addrmode3:$addr), (LDRH addrmode3:$addr)>;
+
+// truncstore i1 -> truncstore i8
+def : ARMPat<(truncstorei1 GPR:$src, addrmode2:$dst),
+ (STRB GPR:$src, addrmode2:$dst)>;
+def : ARMPat<(pre_truncsti1 GPR:$src, GPR:$base, am2offset:$offset),
+ (STRB_PRE GPR:$src, GPR:$base, am2offset:$offset)>;
+def : ARMPat<(post_truncsti1 GPR:$src, GPR:$base, am2offset:$offset),
+ (STRB_POST GPR:$src, GPR:$base, am2offset:$offset)>;
+
+// smul* and smla*
+def : ARMV5TEPat<(mul (sra (shl GPR:$a, 16), 16), (sra (shl GPR:$b, 16), 16)),
+ (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b),
+ (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra (shl GPR:$a, 16), 16), (sra GPR:$b, 16)),
+ (SMULBT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, 16)),
+ (SMULBT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra GPR:$a, 16), (sra (shl GPR:$b, 16), 16)),
+ (SMULTB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra GPR:$a, 16), sext_16_node:$b),
+ (SMULTB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, 16), 16)), 16),
+ (SMULWB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), 16),
+ (SMULWB GPR:$a, GPR:$b)>;
+
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul (sra (shl GPR:$a, 16), 16),
+ (sra (shl GPR:$b, 16), 16))),
+ (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul sext_16_node:$a, sext_16_node:$b)),
+ (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul (sra (shl GPR:$a, 16), 16), (sra GPR:$b, 16))),
+ (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul sext_16_node:$a, (sra GPR:$b, 16))),
+ (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul (sra GPR:$a, 16), (sra (shl GPR:$b, 16), 16))),
+ (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul (sra GPR:$a, 16), sext_16_node:$b)),
+ (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (sra (mul GPR:$a, (sra (shl GPR:$b, 16), 16)), 16)),
+ (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (sra (mul GPR:$a, sext_16_node:$b), 16)),
+ (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
+
+//===----------------------------------------------------------------------===//
+// Thumb Support
+//
+
+include "ARMInstrThumb.td"
+
+//===----------------------------------------------------------------------===//
+// Floating Point Support
+//
+
+include "ARMInstrVFP.td"
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
new file mode 100644
index 0000000..27231da
--- /dev/null
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -0,0 +1,596 @@
+//===- ARMInstrThumb.td - Thumb support for ARM ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Thumb instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Thumb specific DAG Nodes.
+//
+
+def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+// TI - Thumb instruction.
+
+// ThumbPat - Same as Pat<>, but requires that the compiler be in Thumb mode.
+class ThumbPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsThumb];
+}
+
+class ThumbV5Pat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsThumb, HasV5T];
+}
+
+class ThumbI<dag ops, AddrMode am, SizeFlagVal sz,
+ string asm, string cstr, list<dag> pattern>
+ // FIXME: Set all opcodes to 0 for now.
+ : InstARM<0, am, sz, IndexModeNone, cstr> {
+ let OperandList = ops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb];
+}
+
+class TI<dag ops, string asm, list<dag> pattern>
+ : ThumbI<ops, AddrModeNone, Size2Bytes, asm, "", pattern>;
+class TI1<dag ops, string asm, list<dag> pattern>
+ : ThumbI<ops, AddrModeT1, Size2Bytes, asm, "", pattern>;
+class TI2<dag ops, string asm, list<dag> pattern>
+ : ThumbI<ops, AddrModeT2, Size2Bytes, asm, "", pattern>;
+class TI4<dag ops, string asm, list<dag> pattern>
+ : ThumbI<ops, AddrModeT4, Size2Bytes, asm, "", pattern>;
+class TIs<dag ops, string asm, list<dag> pattern>
+ : ThumbI<ops, AddrModeTs, Size2Bytes, asm, "", pattern>;
+
+// Two-address instructions
+class TIt<dag ops, string asm, list<dag> pattern>
+ : ThumbI<ops, AddrModeNone, Size2Bytes, asm, "$lhs = $dst", pattern>;
+
+// BL, BLX(1) are translated by assembler into two instructions
+class TIx2<dag ops, string asm, list<dag> pattern>
+ : ThumbI<ops, AddrModeNone, Size4Bytes, asm, "", pattern>;
+
+// BR_JT instructions
+class TJTI<dag ops, string asm, list<dag> pattern>
+ : ThumbI<ops, AddrModeNone, SizeSpecial, asm, "", pattern>;
+
+def imm_neg_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(-(int)N->getValue(), MVT::i32);
+}]>;
+def imm_comp_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(~((uint32_t)N->getValue()), MVT::i32);
+}]>;
+
+
+/// imm0_7 predicate - True if the 32-bit immediate is in the range [0,7].
+def imm0_7 : PatLeaf<(i32 imm), [{
+ return (uint32_t)N->getValue() < 8;
+}]>;
+def imm0_7_neg : PatLeaf<(i32 imm), [{
+ return (uint32_t)-N->getValue() < 8;
+}], imm_neg_XFORM>;
+
+def imm0_255 : PatLeaf<(i32 imm), [{
+ return (uint32_t)N->getValue() < 256;
+}]>;
+def imm0_255_comp : PatLeaf<(i32 imm), [{
+ return ~((uint32_t)N->getValue()) < 256;
+}]>;
+
+def imm8_255 : PatLeaf<(i32 imm), [{
+ return (uint32_t)N->getValue() >= 8 && (uint32_t)N->getValue() < 256;
+}]>;
+def imm8_255_neg : PatLeaf<(i32 imm), [{
+ unsigned Val = -N->getValue();
+ return Val >= 8 && Val < 256;
+}], imm_neg_XFORM>;
+
+// Break imm's up into two pieces: an immediate + a left shift.
+// This uses thumb_immshifted to match and thumb_immshifted_val and
+// thumb_immshifted_shamt to get the val/shift pieces.
+def thumb_immshifted : PatLeaf<(imm), [{
+ return ARM_AM::isThumbImmShiftedVal((unsigned)N->getValue());
+}]>;
+
+def thumb_immshifted_val : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getThumbImmNonShiftedVal((unsigned)N->getValue());
+ return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+def thumb_immshifted_shamt : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getThumbImmValShift((unsigned)N->getValue());
+ return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+// Define Thumb specific addressing modes.
+
+// t_addrmode_rr := reg + reg
+//
+def t_addrmode_rr : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeRR", []> {
+ let PrintMethod = "printThumbAddrModeRROperand";
+ let MIOperandInfo = (ops GPR:$base, GPR:$offsreg);
+}
+
+// t_addrmode_s4 := reg + reg
+// reg + imm5 * 4
+//
+def t_addrmode_s4 : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectThumbAddrModeS4", []> {
+ let PrintMethod = "printThumbAddrModeS4Operand";
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm, GPR:$offsreg);
+}
+
+// t_addrmode_s2 := reg + reg
+// reg + imm5 * 2
+//
+def t_addrmode_s2 : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectThumbAddrModeS2", []> {
+ let PrintMethod = "printThumbAddrModeS2Operand";
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm, GPR:$offsreg);
+}
+
+// t_addrmode_s1 := reg + reg
+// reg + imm5
+//
+def t_addrmode_s1 : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectThumbAddrModeS1", []> {
+ let PrintMethod = "printThumbAddrModeS1Operand";
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm, GPR:$offsreg);
+}
+
+// t_addrmode_sp := sp + imm8 * 4
+//
+def t_addrmode_sp : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeSP", []> {
+ let PrintMethod = "printThumbAddrModeSPOperand";
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//
+
+def tADJCALLSTACKUP :
+PseudoInst<(ops i32imm:$amt),
+ "@ tADJCALLSTACKUP $amt",
+ [(ARMcallseq_end imm:$amt)]>, Imp<[SP],[SP]>, Requires<[IsThumb]>;
+
+def tADJCALLSTACKDOWN :
+PseudoInst<(ops i32imm:$amt),
+ "@ tADJCALLSTACKDOWN $amt",
+ [(ARMcallseq_start imm:$amt)]>, Imp<[SP],[SP]>, Requires<[IsThumb]>;
+
+let isNotDuplicable = 1 in
+def tPICADD : TIt<(ops GPR:$dst, GPR:$lhs, pclabel:$cp),
+ "$cp:\n\tadd $dst, pc",
+ [(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>;
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions.
+//
+
+let isReturn = 1, isTerminator = 1 in {
+ def tBX_RET : TI<(ops), "bx lr", [(ARMretflag)]>;
+ // Alternative return instruction used by vararg functions.
+ def tBX_RET_vararg : TI<(ops GPR:$dst), "bx $dst", []>;
+}
+
+// FIXME: remove when we have a way to marking a MI with these properties.
+let isLoad = 1, isReturn = 1, isTerminator = 1 in
+def tPOP_RET : TI<(ops reglist:$dst1, variable_ops),
+ "pop $dst1", []>;
+
+let isCall = 1, noResults = 1,
+ Defs = [R0, R1, R2, R3, LR,
+ D0, D1, D2, D3, D4, D5, D6, D7] in {
+ def tBL : TIx2<(ops i32imm:$func, variable_ops),
+ "bl ${func:call}",
+ [(ARMtcall tglobaladdr:$func)]>;
+ // ARMv5T and above
+ def tBLXi : TIx2<(ops i32imm:$func, variable_ops),
+ "blx ${func:call}",
+ [(ARMcall tglobaladdr:$func)]>, Requires<[HasV5T]>;
+ def tBLXr : TI<(ops GPR:$dst, variable_ops),
+ "blx $dst",
+ [(ARMtcall GPR:$dst)]>, Requires<[HasV5T]>;
+ // ARMv4T
+ def tBX : TIx2<(ops GPR:$dst, variable_ops),
+ "cpy lr, pc\n\tbx $dst",
+ [(ARMcall_nolink GPR:$dst)]>;
+}
+
+let isBranch = 1, isTerminator = 1, noResults = 1 in {
+ let isBarrier = 1 in {
+ let isPredicable = 1 in
+ def tB : TI<(ops brtarget:$dst), "b $dst", [(br bb:$dst)]>;
+
+ // Far jump
+ def tBfar : TIx2<(ops brtarget:$dst), "bl $dst\t@ far jump", []>;
+
+ def tBR_JTr : TJTI<(ops GPR:$dst, jtblock_operand:$jt, i32imm:$id),
+ "cpy pc, $dst \n\t.align\t2\n$jt",
+ [(ARMbrjt GPR:$dst, tjumptable:$jt, imm:$id)]>;
+ }
+}
+
+// FIXME: should be able to write a pattern for ARMBrcond, but can't use
+// a two-value operand where a dag node expects two operands. :(
+let isBranch = 1, isTerminator = 1, noResults = 1 in
+ def tBcc : TI<(ops brtarget:$dst, pred:$cc), "b$cc $dst",
+ [/*(ARMbrcond bb:$dst, imm:$cc)*/]>;
+
+//===----------------------------------------------------------------------===//
+// Load Store Instructions.
+//
+
+let isLoad = 1 in {
+def tLDR : TI4<(ops GPR:$dst, t_addrmode_s4:$addr),
+ "ldr $dst, $addr",
+ [(set GPR:$dst, (load t_addrmode_s4:$addr))]>;
+
+def tLDRB : TI1<(ops GPR:$dst, t_addrmode_s1:$addr),
+ "ldrb $dst, $addr",
+ [(set GPR:$dst, (zextloadi8 t_addrmode_s1:$addr))]>;
+
+def tLDRH : TI2<(ops GPR:$dst, t_addrmode_s2:$addr),
+ "ldrh $dst, $addr",
+ [(set GPR:$dst, (zextloadi16 t_addrmode_s2:$addr))]>;
+
+def tLDRSB : TI1<(ops GPR:$dst, t_addrmode_rr:$addr),
+ "ldrsb $dst, $addr",
+ [(set GPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>;
+
+def tLDRSH : TI2<(ops GPR:$dst, t_addrmode_rr:$addr),
+ "ldrsh $dst, $addr",
+ [(set GPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>;
+
+def tLDRspi : TIs<(ops GPR:$dst, t_addrmode_sp:$addr),
+ "ldr $dst, $addr",
+ [(set GPR:$dst, (load t_addrmode_sp:$addr))]>;
+
+// Special instruction for restore. It cannot clobber condition register
+// when it's expanded by eliminateCallFramePseudoInstr().
+def tRestore : TIs<(ops GPR:$dst, t_addrmode_sp:$addr),
+ "ldr $dst, $addr", []>;
+
+// Load tconstpool
+def tLDRpci : TIs<(ops GPR:$dst, i32imm:$addr),
+ "ldr $dst, $addr",
+ [(set GPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>;
+
+// Special LDR for loads from non-pc-relative constpools.
+let isReMaterializable = 1 in
+def tLDRcp : TIs<(ops GPR:$dst, i32imm:$addr),
+ "ldr $dst, $addr", []>;
+} // isLoad
+
+let isStore = 1 in {
+def tSTR : TI4<(ops GPR:$src, t_addrmode_s4:$addr),
+ "str $src, $addr",
+ [(store GPR:$src, t_addrmode_s4:$addr)]>;
+
+def tSTRB : TI1<(ops GPR:$src, t_addrmode_s1:$addr),
+ "strb $src, $addr",
+ [(truncstorei8 GPR:$src, t_addrmode_s1:$addr)]>;
+
+def tSTRH : TI2<(ops GPR:$src, t_addrmode_s2:$addr),
+ "strh $src, $addr",
+ [(truncstorei16 GPR:$src, t_addrmode_s2:$addr)]>;
+
+def tSTRspi : TIs<(ops GPR:$src, t_addrmode_sp:$addr),
+ "str $src, $addr",
+ [(store GPR:$src, t_addrmode_sp:$addr)]>;
+
+// Special instruction for spill. It cannot clobber condition register
+// when it's expanded by eliminateCallFramePseudoInstr().
+def tSpill : TIs<(ops GPR:$src, t_addrmode_sp:$addr),
+ "str $src, $addr", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Load / store multiple Instructions.
+//
+
+// TODO: A7-44: LDMIA - load multiple
+
+let isLoad = 1 in
+def tPOP : TI<(ops reglist:$dst1, variable_ops),
+ "pop $dst1", []>;
+
+let isStore = 1 in
+def tPUSH : TI<(ops reglist:$src1, variable_ops),
+ "push $src1", []>;
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions.
+//
+
+// Add with carry
+def tADC : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+ "adc $dst, $rhs",
+ [(set GPR:$dst, (adde GPR:$lhs, GPR:$rhs))]>;
+
+def tADDS : TI<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+ "add $dst, $lhs, $rhs",
+ [(set GPR:$dst, (addc GPR:$lhs, GPR:$rhs))]>;
+
+
+def tADDi3 : TI<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
+ "add $dst, $lhs, $rhs",
+ [(set GPR:$dst, (add GPR:$lhs, imm0_7:$rhs))]>;
+
+def tADDi8 : TIt<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
+ "add $dst, $rhs",
+ [(set GPR:$dst, (add GPR:$lhs, imm8_255:$rhs))]>;
+
+def tADDrr : TI<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+ "add $dst, $lhs, $rhs",
+ [(set GPR:$dst, (add GPR:$lhs, GPR:$rhs))]>;
+
+def tADDhirr : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+ "add $dst, $rhs", []>;
+
+def tADDrPCi : TI<(ops GPR:$dst, i32imm:$rhs),
+ "add $dst, pc, $rhs * 4", []>;
+def tADDrSPi : TI<(ops GPR:$dst, GPR:$sp, i32imm:$rhs),
+ "add $dst, $sp, $rhs * 4", []>;
+def tADDspi : TIt<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
+ "add $dst, $rhs * 4", []>;
+
+def tAND : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+ "and $dst, $rhs",
+ [(set GPR:$dst, (and GPR:$lhs, GPR:$rhs))]>;
+
+def tASRri : TI<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
+ "asr $dst, $lhs, $rhs",
+ [(set GPR:$dst, (sra GPR:$lhs, imm:$rhs))]>;
+
+def tASRrr : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+ "asr $dst, $rhs",
+ [(set GPR:$dst, (sra GPR:$lhs, GPR:$rhs))]>;
+
+def tBIC : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+ "bic $dst, $rhs",
+ [(set GPR:$dst, (and GPR:$lhs, (not GPR:$rhs)))]>;
+
+
+def tCMN : TI<(ops GPR:$lhs, GPR:$rhs),
+ "cmn $lhs, $rhs",
+ [(ARMcmp GPR:$lhs, (ineg GPR:$rhs))]>;
+
+def tCMPi8 : TI<(ops GPR:$lhs, i32imm:$rhs),
+ "cmp $lhs, $rhs",
+ [(ARMcmp GPR:$lhs, imm0_255:$rhs)]>;
+
+def tCMPr : TI<(ops GPR:$lhs, GPR:$rhs),
+ "cmp $lhs, $rhs",
+ [(ARMcmp GPR:$lhs, GPR:$rhs)]>;
+
+def tTST : TI<(ops GPR:$lhs, GPR:$rhs),
+ "tst $lhs, $rhs",
+ [(ARMcmpNZ (and GPR:$lhs, GPR:$rhs), 0)]>;
+
+def tCMNNZ : TI<(ops GPR:$lhs, GPR:$rhs),
+ "cmn $lhs, $rhs",
+ [(ARMcmpNZ GPR:$lhs, (ineg GPR:$rhs))]>;
+
+def tCMPNZi8 : TI<(ops GPR:$lhs, i32imm:$rhs),
+ "cmp $lhs, $rhs",
+ [(ARMcmpNZ GPR:$lhs, imm0_255:$rhs)]>;
+
+def tCMPNZr : TI<(ops GPR:$lhs, GPR:$rhs),
+ "cmp $lhs, $rhs",
+ [(ARMcmpNZ GPR:$lhs, GPR:$rhs)]>;
+
+// TODO: A7-37: CMP(3) - cmp hi regs
+
+def tEOR : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+ "eor $dst, $rhs",
+ [(set GPR:$dst, (xor GPR:$lhs, GPR:$rhs))]>;
+
+def tLSLri : TI<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
+ "lsl $dst, $lhs, $rhs",
+ [(set GPR:$dst, (shl GPR:$lhs, imm:$rhs))]>;
+
+def tLSLrr : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+ "lsl $dst, $rhs",
+ [(set GPR:$dst, (shl GPR:$lhs, GPR:$rhs))]>;
+
+def tLSRri : TI<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
+ "lsr $dst, $lhs, $rhs",
+ [(set GPR:$dst, (srl GPR:$lhs, imm:$rhs))]>;
+
+def tLSRrr : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+ "lsr $dst, $rhs",
+ [(set GPR:$dst, (srl GPR:$lhs, GPR:$rhs))]>;
+
+// FIXME: This is not rematerializable because mov changes the condition code.
+def tMOVi8 : TI<(ops GPR:$dst, i32imm:$src),
+ "mov $dst, $src",
+ [(set GPR:$dst, imm0_255:$src)]>;
+
+// TODO: A7-73: MOV(2) - mov setting flag.
+
+
+// Note: MOV(2) of two low regs updates the flags, so we emit this as 'cpy',
+// which is MOV(3). This also supports high registers.
+def tMOVr : TI<(ops GPR:$dst, GPR:$src),
+ "cpy $dst, $src", []>;
+
+def tMUL : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+ "mul $dst, $rhs",
+ [(set GPR:$dst, (mul GPR:$lhs, GPR:$rhs))]>;
+
+def tMVN : TI<(ops GPR:$dst, GPR:$src),
+ "mvn $dst, $src",
+ [(set GPR:$dst, (not GPR:$src))]>;
+
+def tNEG : TI<(ops GPR:$dst, GPR:$src),
+ "neg $dst, $src",
+ [(set GPR:$dst, (ineg GPR:$src))]>;
+
+def tORR : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+ "orr $dst, $rhs",
+ [(set GPR:$dst, (or GPR:$lhs, GPR:$rhs))]>;
+
+
+def tREV : TI<(ops GPR:$dst, GPR:$src),
+ "rev $dst, $src",
+ [(set GPR:$dst, (bswap GPR:$src))]>,
+ Requires<[IsThumb, HasV6]>;
+
+def tREV16 : TI<(ops GPR:$dst, GPR:$src),
+ "rev16 $dst, $src",
+ [(set GPR:$dst,
+ (or (and (srl GPR:$src, 8), 0xFF),
+ (or (and (shl GPR:$src, 8), 0xFF00),
+ (or (and (srl GPR:$src, 8), 0xFF0000),
+ (and (shl GPR:$src, 8), 0xFF000000)))))]>,
+ Requires<[IsThumb, HasV6]>;
+
+def tREVSH : TI<(ops GPR:$dst, GPR:$src),
+ "revsh $dst, $src",
+ [(set GPR:$dst,
+ (sext_inreg
+ (or (srl (and GPR:$src, 0xFFFF), 8),
+ (shl GPR:$src, 8)), i16))]>,
+ Requires<[IsThumb, HasV6]>;
+
+def tROR : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+ "ror $dst, $rhs",
+ [(set GPR:$dst, (rotr GPR:$lhs, GPR:$rhs))]>;
+
+
+// Subtract with carry
+def tSBC : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+ "sbc $dst, $rhs",
+ [(set GPR:$dst, (sube GPR:$lhs, GPR:$rhs))]>;
+
+def tSUBS : TI<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+ "sub $dst, $lhs, $rhs",
+ [(set GPR:$dst, (subc GPR:$lhs, GPR:$rhs))]>;
+
+
+// TODO: A7-96: STMIA - store multiple.
+
+def tSUBi3 : TI<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
+ "sub $dst, $lhs, $rhs",
+ [(set GPR:$dst, (add GPR:$lhs, imm0_7_neg:$rhs))]>;
+
+def tSUBi8 : TIt<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
+ "sub $dst, $rhs",
+ [(set GPR:$dst, (add GPR:$lhs, imm8_255_neg:$rhs))]>;
+
+def tSUBrr : TI<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+ "sub $dst, $lhs, $rhs",
+ [(set GPR:$dst, (sub GPR:$lhs, GPR:$rhs))]>;
+
+def tSUBspi : TIt<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
+ "sub $dst, $rhs * 4", []>;
+
+def tSXTB : TI<(ops GPR:$dst, GPR:$src),
+ "sxtb $dst, $src",
+ [(set GPR:$dst, (sext_inreg GPR:$src, i8))]>,
+ Requires<[IsThumb, HasV6]>;
+def tSXTH : TI<(ops GPR:$dst, GPR:$src),
+ "sxth $dst, $src",
+ [(set GPR:$dst, (sext_inreg GPR:$src, i16))]>,
+ Requires<[IsThumb, HasV6]>;
+
+
+def tUXTB : TI<(ops GPR:$dst, GPR:$src),
+ "uxtb $dst, $src",
+ [(set GPR:$dst, (and GPR:$src, 0xFF))]>,
+ Requires<[IsThumb, HasV6]>;
+def tUXTH : TI<(ops GPR:$dst, GPR:$src),
+ "uxth $dst, $src",
+ [(set GPR:$dst, (and GPR:$src, 0xFFFF))]>,
+ Requires<[IsThumb, HasV6]>;
+
+
+// Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC DAG operation.
+// Expanded by the scheduler into a branch sequence.
+let usesCustomDAGSchedInserter = 1 in // Expanded by the scheduler.
+ def tMOVCCr :
+ PseudoInst<(ops GPR:$dst, GPR:$false, GPR:$true, pred:$cc),
+ "@ tMOVCCr $cc",
+ [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc))*/]>;
+
+// tLEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+def tLEApcrel : TIx2<(ops GPR:$dst, i32imm:$label),
+ !strconcat(!strconcat(".set PCRELV${:uid}, ($label-(",
+ "${:private}PCRELL${:uid}+4))\n"),
+ !strconcat("\tmov $dst, #PCRELV${:uid}\n",
+ "${:private}PCRELL${:uid}:\n\tadd $dst, pc")),
+ []>;
+
+def tLEApcrelJT : TIx2<(ops GPR:$dst, i32imm:$label, i32imm:$id),
+ !strconcat(!strconcat(".set PCRELV${:uid}, (${label}_${id:no_hash}-(",
+ "${:private}PCRELL${:uid}+4))\n"),
+ !strconcat("\tmov $dst, #PCRELV${:uid}\n",
+ "${:private}PCRELL${:uid}:\n\tadd $dst, pc")),
+ []>;
+
+//===----------------------------------------------------------------------===//
+// TLS Instructions
+//
+
+// __aeabi_read_tp preserves the registers r1-r3.
+let isCall = 1,
+ Defs = [R0, LR] in {
+ def tTPsoft : TIx2<(ops),
+ "bl __aeabi_read_tp",
+ [(set R0, ARMthread_pointer)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// ConstantPool, GlobalAddress
+def : ThumbPat<(ARMWrapper tglobaladdr :$dst), (tLEApcrel tglobaladdr :$dst)>;
+def : ThumbPat<(ARMWrapper tconstpool :$dst), (tLEApcrel tconstpool :$dst)>;
+
+// JumpTable
+def : ThumbPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+ (tLEApcrelJT tjumptable:$dst, imm:$id)>;
+
+// Direct calls
+def : ThumbPat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>;
+def : ThumbV5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>;
+
+// Indirect calls to ARM routines
+def : ThumbV5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>;
+
+// zextload i1 -> zextload i8
+def : ThumbPat<(zextloadi1 t_addrmode_s1:$addr),
+ (tLDRB t_addrmode_s1:$addr)>;
+
+// extload -> zextload
+def : ThumbPat<(extloadi1 t_addrmode_s1:$addr), (tLDRB t_addrmode_s1:$addr)>;
+def : ThumbPat<(extloadi8 t_addrmode_s1:$addr), (tLDRB t_addrmode_s1:$addr)>;
+def : ThumbPat<(extloadi16 t_addrmode_s2:$addr), (tLDRH t_addrmode_s2:$addr)>;
+
+// truncstore i1 -> truncstore i8
+def : ThumbPat<(truncstorei1 GPR:$src, t_addrmode_s1:$dst),
+ (tSTRB GPR:$src, t_addrmode_s1:$dst)>;
+
+// Large immediate handling.
+
+// Two piece imms.
+def : ThumbPat<(i32 thumb_immshifted:$src),
+ (tLSLri (tMOVi8 (thumb_immshifted_val imm:$src)),
+ (thumb_immshifted_shamt imm:$src))>;
+
+def : ThumbPat<(i32 imm0_255_comp:$src),
+ (tMVN (tMOVi8 (imm_comp_XFORM imm:$src)))>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
new file mode 100644
index 0000000..4bb9f04
--- /dev/null
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -0,0 +1,386 @@
+//===- ARMInstrVFP.td - VFP support for ARM -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the ARM VP instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ARM VFP Instruction templates.
+//
+
+// ARM Float Instruction
+class ASI<dag ops, string opc, string asm, list<dag> pattern>
+ : AI<ops, opc, asm, pattern> {
+ // TODO: Mark the instructions with the appropriate subtarget info.
+}
+
+class ASI5<dag ops, string opc, string asm, list<dag> pattern>
+ : I<ops, AddrMode5, Size4Bytes, IndexModeNone, opc, asm, "", pattern> {
+ // TODO: Mark the instructions with the appropriate subtarget info.
+}
+
+// ARM Double Instruction
+class ADI<dag ops, string opc, string asm, list<dag> pattern>
+ : AI<ops, opc, asm, pattern> {
+ // TODO: Mark the instructions with the appropriate subtarget info.
+}
+
+class ADI5<dag ops, string opc, string asm, list<dag> pattern>
+ : I<ops, AddrMode5, Size4Bytes, IndexModeNone, opc, asm, "", pattern> {
+ // TODO: Mark the instructions with the appropriate subtarget info.
+}
+
+// Special cases.
+class AXSI<dag ops, string asm, list<dag> pattern>
+ : XI<ops, AddrModeNone, Size4Bytes, IndexModeNone, asm, "", pattern> {
+ // TODO: Mark the instructions with the appropriate subtarget info.
+}
+
+class AXSI5<dag ops, string asm, list<dag> pattern>
+ : XI<ops, AddrMode5, Size4Bytes, IndexModeNone, asm, "", pattern> {
+ // TODO: Mark the instructions with the appropriate subtarget info.
+}
+
+class AXDI<dag ops, string asm, list<dag> pattern>
+ : XI<ops, AddrModeNone, Size4Bytes, IndexModeNone, asm, "", pattern> {
+ // TODO: Mark the instructions with the appropriate subtarget info.
+}
+
+class AXDI5<dag ops, string asm, list<dag> pattern>
+ : XI<ops, AddrMode5, Size4Bytes, IndexModeNone, asm, "", pattern> {
+ // TODO: Mark the instructions with the appropriate subtarget info.
+}
+
+
+def SDT_FTOI :
+SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
+def SDT_ITOF :
+SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
+def SDT_CMPFP0 :
+SDTypeProfile<0, 1, [SDTCisFP<0>]>;
+def SDT_FMDRR :
+SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
+ SDTCisSameAs<1, 2>]>;
+
+def arm_ftoui : SDNode<"ARMISD::FTOUI", SDT_FTOI>;
+def arm_ftosi : SDNode<"ARMISD::FTOSI", SDT_FTOI>;
+def arm_sitof : SDNode<"ARMISD::SITOF", SDT_ITOF>;
+def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>;
+def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTRet, [SDNPInFlag,SDNPOutFlag]>;
+def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutFlag]>;
+def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutFlag]>;
+def arm_fmdrr : SDNode<"ARMISD::FMDRR", SDT_FMDRR>;
+
+//===----------------------------------------------------------------------===//
+// Load / store Instructions.
+//
+
+let isLoad = 1 in {
+def FLDD : ADI5<(ops DPR:$dst, addrmode5:$addr),
+ "fldd", " $dst, $addr",
+ [(set DPR:$dst, (load addrmode5:$addr))]>;
+
+def FLDS : ASI5<(ops SPR:$dst, addrmode5:$addr),
+ "flds", " $dst, $addr",
+ [(set SPR:$dst, (load addrmode5:$addr))]>;
+} // isLoad
+
+let isStore = 1 in {
+def FSTD : ADI5<(ops DPR:$src, addrmode5:$addr),
+ "fstd", " $src, $addr",
+ [(store DPR:$src, addrmode5:$addr)]>;
+
+def FSTS : ASI5<(ops SPR:$src, addrmode5:$addr),
+ "fsts", " $src, $addr",
+ [(store SPR:$src, addrmode5:$addr)]>;
+} // isStore
+
+//===----------------------------------------------------------------------===//
+// Load / store multiple Instructions.
+//
+
+let isLoad = 1 in {
+def FLDMD : AXDI5<(ops addrmode5:$addr, pred:$p, reglist:$dst1, variable_ops),
+ "fldm${addr:submode}d${p} ${addr:base}, $dst1",
+ []>;
+
+def FLDMS : AXSI5<(ops addrmode5:$addr, pred:$p, reglist:$dst1, variable_ops),
+ "fldm${addr:submode}s${p} ${addr:base}, $dst1",
+ []>;
+} // isLoad
+
+let isStore = 1 in {
+def FSTMD : AXDI5<(ops addrmode5:$addr, pred:$p, reglist:$src1, variable_ops),
+ "fstm${addr:submode}d${p} ${addr:base}, $src1",
+ []>;
+
+def FSTMS : AXSI5<(ops addrmode5:$addr, pred:$p, reglist:$src1, variable_ops),
+ "fstm${addr:submode}s${p} ${addr:base}, $src1",
+ []>;
+} // isStore
+
+// FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores
+
+//===----------------------------------------------------------------------===//
+// FP Binary Operations.
+//
+
+def FADDD : ADI<(ops DPR:$dst, DPR:$a, DPR:$b),
+ "faddd", " $dst, $a, $b",
+ [(set DPR:$dst, (fadd DPR:$a, DPR:$b))]>;
+
+def FADDS : ASI<(ops SPR:$dst, SPR:$a, SPR:$b),
+ "fadds", " $dst, $a, $b",
+ [(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>;
+
+def FCMPED : ADI<(ops DPR:$a, DPR:$b),
+ "fcmped", " $a, $b",
+ [(arm_cmpfp DPR:$a, DPR:$b)]>;
+
+def FCMPES : ASI<(ops SPR:$a, SPR:$b),
+ "fcmpes", " $a, $b",
+ [(arm_cmpfp SPR:$a, SPR:$b)]>;
+
+def FDIVD : ADI<(ops DPR:$dst, DPR:$a, DPR:$b),
+ "fdivd", " $dst, $a, $b",
+ [(set DPR:$dst, (fdiv DPR:$a, DPR:$b))]>;
+
+def FDIVS : ASI<(ops SPR:$dst, SPR:$a, SPR:$b),
+ "fdivs", " $dst, $a, $b",
+ [(set SPR:$dst, (fdiv SPR:$a, SPR:$b))]>;
+
+def FMULD : ADI<(ops DPR:$dst, DPR:$a, DPR:$b),
+ "fmuld", " $dst, $a, $b",
+ [(set DPR:$dst, (fmul DPR:$a, DPR:$b))]>;
+
+def FMULS : ASI<(ops SPR:$dst, SPR:$a, SPR:$b),
+ "fmuls", " $dst, $a, $b",
+ [(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>;
+
+def FNMULD : ADI<(ops DPR:$dst, DPR:$a, DPR:$b),
+ "fnmuld", " $dst, $a, $b",
+ [(set DPR:$dst, (fneg (fmul DPR:$a, DPR:$b)))]>;
+
+def FNMULS : ASI<(ops SPR:$dst, SPR:$a, SPR:$b),
+ "fnmuls", " $dst, $a, $b",
+ [(set SPR:$dst, (fneg (fmul SPR:$a, SPR:$b)))]>;
+
+// Match reassociated forms only if not sign dependent rounding.
+def : Pat<(fmul (fneg DPR:$a), DPR:$b),
+ (FNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
+def : Pat<(fmul (fneg SPR:$a), SPR:$b),
+ (FNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
+
+
+def FSUBD : ADI<(ops DPR:$dst, DPR:$a, DPR:$b),
+ "fsubd", " $dst, $a, $b",
+ [(set DPR:$dst, (fsub DPR:$a, DPR:$b))]>;
+
+def FSUBS : ASI<(ops SPR:$dst, SPR:$a, SPR:$b),
+ "fsubs", " $dst, $a, $b",
+ [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]>;
+
+//===----------------------------------------------------------------------===//
+// FP Unary Operations.
+//
+
+def FABSD : ADI<(ops DPR:$dst, DPR:$a),
+ "fabsd", " $dst, $a",
+ [(set DPR:$dst, (fabs DPR:$a))]>;
+
+def FABSS : ASI<(ops SPR:$dst, SPR:$a),
+ "fabss", " $dst, $a",
+ [(set SPR:$dst, (fabs SPR:$a))]>;
+
+def FCMPEZD : ADI<(ops DPR:$a),
+ "fcmpezd", " $a",
+ [(arm_cmpfp0 DPR:$a)]>;
+
+def FCMPEZS : ASI<(ops SPR:$a),
+ "fcmpezs", " $a",
+ [(arm_cmpfp0 SPR:$a)]>;
+
+def FCVTDS : ADI<(ops DPR:$dst, SPR:$a),
+ "fcvtds", " $dst, $a",
+ [(set DPR:$dst, (fextend SPR:$a))]>;
+
+def FCVTSD : ADI<(ops SPR:$dst, DPR:$a),
+ "fcvtsd", " $dst, $a",
+ [(set SPR:$dst, (fround DPR:$a))]>;
+
+def FCPYD : ADI<(ops DPR:$dst, DPR:$a),
+ "fcpyd", " $dst, $a", []>;
+
+def FCPYS : ASI<(ops SPR:$dst, SPR:$a),
+ "fcpys", " $dst, $a", []>;
+
+def FNEGD : ADI<(ops DPR:$dst, DPR:$a),
+ "fnegd", " $dst, $a",
+ [(set DPR:$dst, (fneg DPR:$a))]>;
+
+def FNEGS : ASI<(ops SPR:$dst, SPR:$a),
+ "fnegs", " $dst, $a",
+ [(set SPR:$dst, (fneg SPR:$a))]>;
+
+def FSQRTD : ADI<(ops DPR:$dst, DPR:$a),
+ "fsqrtd", " $dst, $a",
+ [(set DPR:$dst, (fsqrt DPR:$a))]>;
+
+def FSQRTS : ASI<(ops SPR:$dst, SPR:$a),
+ "fsqrts", " $dst, $a",
+ [(set SPR:$dst, (fsqrt SPR:$a))]>;
+
+//===----------------------------------------------------------------------===//
+// FP <-> GPR Copies. Int <-> FP Conversions.
+//
+
+def IMPLICIT_DEF_SPR : PseudoInst<(ops SPR:$rD, pred:$p),
+ "@ IMPLICIT_DEF_SPR $rD",
+ [(set SPR:$rD, (undef))]>;
+def IMPLICIT_DEF_DPR : PseudoInst<(ops DPR:$rD, pred:$p),
+ "@ IMPLICIT_DEF_DPR $rD",
+ [(set DPR:$rD, (undef))]>;
+
+def FMRS : ASI<(ops GPR:$dst, SPR:$src),
+ "fmrs", " $dst, $src",
+ [(set GPR:$dst, (bitconvert SPR:$src))]>;
+
+def FMSR : ASI<(ops SPR:$dst, GPR:$src),
+ "fmsr", " $dst, $src",
+ [(set SPR:$dst, (bitconvert GPR:$src))]>;
+
+
+def FMRRD : ADI<(ops GPR:$dst1, GPR:$dst2, DPR:$src),
+ "fmrrd", " $dst1, $dst2, $src",
+ [/* FIXME: Can't write pattern for multiple result instr*/]>;
+
+// FMDHR: GPR -> SPR
+// FMDLR: GPR -> SPR
+
+def FMDRR : ADI<(ops DPR:$dst, GPR:$src1, GPR:$src2),
+ "fmdrr", " $dst, $src1, $src2",
+ [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]>;
+
+// FMRDH: SPR -> GPR
+// FMRDL: SPR -> GPR
+// FMRRS: SPR -> GPR
+// FMRX : SPR system reg -> GPR
+
+// FMSRR: GPR -> SPR
+
+def FMSTAT : ASI<(ops), "fmstat", "", [(arm_fmstat)]>, Imp<[], [CPSR]>;
+
+// FMXR: GPR -> VFP Sstem reg
+
+
+// Int to FP:
+
+def FSITOD : ADI<(ops DPR:$dst, SPR:$a),
+ "fsitod", " $dst, $a",
+ [(set DPR:$dst, (arm_sitof SPR:$a))]>;
+
+def FSITOS : ASI<(ops SPR:$dst, SPR:$a),
+ "fsitos", " $dst, $a",
+ [(set SPR:$dst, (arm_sitof SPR:$a))]>;
+
+def FUITOD : ADI<(ops DPR:$dst, SPR:$a),
+ "fuitod", " $dst, $a",
+ [(set DPR:$dst, (arm_uitof SPR:$a))]>;
+
+def FUITOS : ASI<(ops SPR:$dst, SPR:$a),
+ "fuitos", " $dst, $a",
+ [(set SPR:$dst, (arm_uitof SPR:$a))]>;
+
+// FP to Int:
+// Always set Z bit in the instruction, i.e. "round towards zero" variants.
+
+def FTOSIZD : ADI<(ops SPR:$dst, DPR:$a),
+ "ftosizd", " $dst, $a",
+ [(set SPR:$dst, (arm_ftosi DPR:$a))]>;
+
+def FTOSIZS : ASI<(ops SPR:$dst, SPR:$a),
+ "ftosizs", " $dst, $a",
+ [(set SPR:$dst, (arm_ftosi SPR:$a))]>;
+
+def FTOUIZD : ADI<(ops SPR:$dst, DPR:$a),
+ "ftouizd", " $dst, $a",
+ [(set SPR:$dst, (arm_ftoui DPR:$a))]>;
+
+def FTOUIZS : ASI<(ops SPR:$dst, SPR:$a),
+ "ftouizs", " $dst, $a",
+ [(set SPR:$dst, (arm_ftoui SPR:$a))]>;
+
+//===----------------------------------------------------------------------===//
+// FP FMA Operations.
+//
+
+def FMACD : ADI<(ops DPR:$dst, DPR:$dstin, DPR:$a, DPR:$b),
+ "fmacd", " $dst, $a, $b",
+ [(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b), DPR:$dstin))]>,
+ RegConstraint<"$dstin = $dst">;
+
+def FMACS : ASI<(ops SPR:$dst, SPR:$dstin, SPR:$a, SPR:$b),
+ "fmacs", " $dst, $a, $b",
+ [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
+ RegConstraint<"$dstin = $dst">;
+
+def FMSCD : ADI<(ops DPR:$dst, DPR:$dstin, DPR:$a, DPR:$b),
+ "fmscd", " $dst, $a, $b",
+ [(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b), DPR:$dstin))]>,
+ RegConstraint<"$dstin = $dst">;
+
+def FMSCS : ASI<(ops SPR:$dst, SPR:$dstin, SPR:$a, SPR:$b),
+ "fmscs", " $dst, $a, $b",
+ [(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
+ RegConstraint<"$dstin = $dst">;
+
+def FNMACD : ADI<(ops DPR:$dst, DPR:$dstin, DPR:$a, DPR:$b),
+ "fnmacd", " $dst, $a, $b",
+ [(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>,
+ RegConstraint<"$dstin = $dst">;
+
+def FNMACS : ASI<(ops SPR:$dst, SPR:$dstin, SPR:$a, SPR:$b),
+ "fnmacs", " $dst, $a, $b",
+ [(set SPR:$dst, (fadd (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
+ RegConstraint<"$dstin = $dst">;
+
+def FNMSCD : ADI<(ops DPR:$dst, DPR:$dstin, DPR:$a, DPR:$b),
+ "fnmscd", " $dst, $a, $b",
+ [(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>,
+ RegConstraint<"$dstin = $dst">;
+
+def FNMSCS : ASI<(ops SPR:$dst, SPR:$dstin, SPR:$a, SPR:$b),
+ "fnmscs", " $dst, $a, $b",
+ [(set SPR:$dst, (fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
+ RegConstraint<"$dstin = $dst">;
+
+//===----------------------------------------------------------------------===//
+// FP Conditional moves.
+//
+
+def FCPYDcc : ADI<(ops DPR:$dst, DPR:$false, DPR:$true),
+ "fcpyd", " $dst, $true",
+ [/*(set DPR:$dst, (ARMcmov DPR:$false, DPR:$true, imm:$cc))*/]>,
+ RegConstraint<"$false = $dst">;
+
+def FCPYScc : ASI<(ops SPR:$dst, SPR:$false, SPR:$true),
+ "fcpys", " $dst, $true",
+ [/*(set SPR:$dst, (ARMcmov SPR:$false, SPR:$true, imm:$cc))*/]>,
+ RegConstraint<"$false = $dst">;
+
+def FNEGDcc : ADI<(ops DPR:$dst, DPR:$false, DPR:$true),
+ "fnegd", " $dst, $true",
+ [/*(set DPR:$dst, (ARMcneg DPR:$false, DPR:$true, imm:$cc))*/]>,
+ RegConstraint<"$false = $dst">;
+
+def FNEGScc : ASI<(ops SPR:$dst, SPR:$false, SPR:$true),
+ "fnegs", " $dst, $true",
+ [/*(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))*/]>,
+ RegConstraint<"$false = $dst">;
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
new file mode 100644
index 0000000..294a12b
--- /dev/null
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -0,0 +1,131 @@
+//===-- ARMJITInfo.cpp - Implement the JIT interfaces for the ARM target --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the Raul Herbster and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the ARM target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "ARMJITInfo.h"
+#include "ARMRelocations.h"
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/Config/alloca.h"
+#include <cstdlib>
+using namespace llvm;
+
+void ARMJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+ unsigned char *OldByte = (unsigned char *)Old;
+ *OldByte++ = 0xEA; // Emit B opcode.
+ unsigned *OldWord = (unsigned *)OldByte;
+ unsigned NewAddr = (intptr_t)New;
+ unsigned OldAddr = (intptr_t)OldWord;
+ *OldWord = NewAddr - OldAddr - 4; // Emit PC-relative addr of New code.
+}
+
+/// JITCompilerFunction - This contains the address of the JIT function used to
+/// compile a function lazily.
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+// CompilationCallback stub - We can't use a C function with inline assembly in
+// it, because we the prolog/epilog inserted by GCC won't work for us. Instead,
+// write our own wrapper, which does things our way, so we have complete control
+// over register saving and restoring.
+extern "C" {
+#if defined(__arm__)
+ void ARMCompilationCallback(void);
+ asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl ARMCompilationCallback\n"
+ "ARMCompilationCallback:\n"
+ // save main registers
+ "mov ip, sp\n"
+ "stmfd sp!, {fp, ip, lr, pc}\n"
+ "sub fp, ip, #4\n"
+ // arguments to Compilation Callback
+ // r0 - our lr (address of the call instruction in stub plus 4)
+ // r1 - stub's lr (address of instruction that called the stub plus 4)
+ "mov r0, fp\n" // stub's frame
+ "mov r1, lr\n" // stub's lr
+ "bl ARMCompilationCallbackC\n"
+ // restore main registers
+ "ldmfd sp, {fp, sp, pc}\n");
+#else // Not an ARM host
+ void ARMCompilationCallback() {
+ assert(0 && "Cannot call ARMCompilationCallback() on a non-ARM arch!\n");
+ abort();
+ }
+#endif
+}
+
+/// ARMCompilationCallbackC - This is the target-specific function invoked by the
+/// function stub when we did not know the real target of a call. This function
+/// must locate the start of the stub or call site and pass it into the JIT
+/// compiler function.
+extern "C" void ARMCompilationCallbackC(intptr_t *StackPtr, intptr_t RetAddr) {
+ intptr_t *RetAddrLoc = &StackPtr[-1];
+
+ assert(*RetAddrLoc == RetAddr &&
+ "Could not find return address on the stack!");
+#if 0
+ DOUT << "In callback! Addr=" << (void*)RetAddr
+ << " FP=" << (void*)StackPtr
+ << ": Resolving call to function: "
+ << TheVM->getFunctionReferencedName((void*)RetAddr) << "\n";
+#endif
+
+ // Sanity check to make sure this really is a branch and link instruction.
+ assert(((unsigned char*)RetAddr-1)[3] == 0xEB && "Not a branch and link instr!");
+
+ intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)RetAddr);
+
+ // Rewrite the call target... so that we don't end up here every time we
+ // execute the call.
+ *(intptr_t *)RetAddr = (intptr_t)(NewVal-RetAddr-4);
+
+ // Change the return address to reexecute the branch and link instruction...
+ *RetAddrLoc -= 1;
+}
+
+TargetJITInfo::LazyResolverFn
+ARMJITInfo::getLazyResolverFunction(JITCompilerFn F) {
+ JITCompilerFunction = F;
+ return ARMCompilationCallback;
+}
+
+void *ARMJITInfo::emitFunctionStub(void *Fn, MachineCodeEmitter &MCE) {
+ unsigned addr = (intptr_t)Fn-MCE.getCurrentPCValue()-4;
+ // If this is just a call to an external function, emit a branch instead of a
+ // call. The code is the same except for one bit of the last instruction.
+ if (Fn != (void*)(intptr_t)ARMCompilationCallback) {
+ MCE.startFunctionStub(4, 2);
+ MCE.emitByte(0xEA); // branch to the corresponding function addr
+ MCE.emitByte((unsigned char)(addr >> 0));
+ MCE.emitByte((unsigned char)(addr >> 8));
+ MCE.emitByte((unsigned char)(addr >> 16));
+ return MCE.finishFunctionStub(0);
+ } else {
+ MCE.startFunctionStub(5, 2);
+ MCE.emitByte(0xEB); // branch and link to the corresponding function addr
+ }
+ MCE.emitByte((unsigned char)(addr >> 0));
+ MCE.emitByte((unsigned char)(addr >> 8));
+ MCE.emitByte((unsigned char)(addr >> 16));
+
+ return MCE.finishFunctionStub(0);
+}
+
+/// relocate - Before the JIT can run a block of code that has been emitted,
+/// it must rewrite the code to contain the actual addresses of any
+/// referenced global symbols.
+void ARMJITInfo::relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase) {
+
+}
diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h
new file mode 100644
index 0000000..bd0ea84
--- /dev/null
+++ b/lib/Target/ARM/ARMJITInfo.h
@@ -0,0 +1,50 @@
+//===- ARMJITInfo.h - ARM implementation of the JIT interface --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the Raul Herbster and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the ARMJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMJITINFO_H
+#define ARMJITINFO_H
+
+#include "llvm/Target/TargetJITInfo.h"
+
+namespace llvm {
+ class ARMTargetMachine;
+
+ class ARMJITInfo : public TargetJITInfo {
+ ARMTargetMachine &TM;
+ public:
+ ARMJITInfo(ARMTargetMachine &tm) : TM(tm) {useGOT = 0;}
+
+ /// replaceMachineCodeForFunction - Make it so that calling the function
+ /// whose machine code is at OLD turns into a call to NEW, perhaps by
+ /// overwriting OLD with a branch to NEW. This is used for self-modifying
+ /// code.
+ ///
+ virtual void replaceMachineCodeForFunction(void *Old, void *New);
+
+ /// emitFunctionStub - Use the specified MachineCodeEmitter object to emit a
+ /// small native function that simply calls the function at the specified
+ /// address.
+ virtual void *emitFunctionStub(void *Fn, MachineCodeEmitter &MCE);
+
+ /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
+ virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+
+ /// relocate - Before the JIT can run a block of code that has been emitted,
+ /// it must rewrite the code to contain the actual addresses of any
+ /// referenced global symbols.
+ virtual void relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase);
+ };
+}
+
+#endif
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
new file mode 100644
index 0000000..7562c5b
--- /dev/null
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -0,0 +1,750 @@
+//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Evan Cheng and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that performs load / store related peephole
+// optimizations. This pass should be run after register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-ldst-opt"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+STATISTIC(NumLDMGened , "Number of ldm instructions generated");
+STATISTIC(NumSTMGened , "Number of stm instructions generated");
+STATISTIC(NumFLDMGened, "Number of fldm instructions generated");
+STATISTIC(NumFSTMGened, "Number of fstm instructions generated");
+
+namespace {
+ struct VISIBILITY_HIDDEN ARMLoadStoreOpt : public MachineFunctionPass {
+ static char ID;
+ ARMLoadStoreOpt() : MachineFunctionPass((intptr_t)&ID) {}
+
+ const TargetInstrInfo *TII;
+ const MRegisterInfo *MRI;
+ ARMFunctionInfo *AFI;
+ RegScavenger *RS;
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM load / store optimization pass";
+ }
+
+ private:
+ struct MemOpQueueEntry {
+ int Offset;
+ unsigned Position;
+ MachineBasicBlock::iterator MBBI;
+ bool Merged;
+ MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
+ : Offset(o), Position(p), MBBI(i), Merged(false) {};
+ };
+ typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
+ typedef MemOpQueue::iterator MemOpQueueIter;
+
+ SmallVector<MachineBasicBlock::iterator, 4>
+ MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
+ int Opcode, unsigned Size,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned Scratch, MemOpQueue &MemOps);
+
+ void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
+ bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
+ bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
+ };
+ char ARMLoadStoreOpt::ID = 0;
+}
+
+/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
+/// optimization pass.
+FunctionPass *llvm::createARMLoadStoreOptimizationPass() {
+ return new ARMLoadStoreOpt();
+}
+
+static int getLoadStoreMultipleOpcode(int Opcode) {
+ switch (Opcode) {
+ case ARM::LDR:
+ NumLDMGened++;
+ return ARM::LDM;
+ case ARM::STR:
+ NumSTMGened++;
+ return ARM::STM;
+ case ARM::FLDS:
+ NumFLDMGened++;
+ return ARM::FLDMS;
+ case ARM::FSTS:
+ NumFSTMGened++;
+ return ARM::FSTMS;
+ case ARM::FLDD:
+ NumFLDMGened++;
+ return ARM::FLDMD;
+ case ARM::FSTD:
+ NumFSTMGened++;
+ return ARM::FSTMD;
+ default: abort();
+ }
+ return 0;
+}
+
+/// mergeOps - Create and insert a LDM or STM with Base as base register and
+/// registers in Regs as the register operands that would be loaded / stored.
+/// It returns true if the transformation is done.
+static bool mergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ int Offset, unsigned Base, bool BaseKill, int Opcode,
+ ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
+ SmallVector<std::pair<unsigned, bool>, 8> &Regs,
+ const TargetInstrInfo *TII) {
+ // Only a single register to load / store. Don't bother.
+ unsigned NumRegs = Regs.size();
+ if (NumRegs <= 1)
+ return false;
+
+ ARM_AM::AMSubMode Mode = ARM_AM::ia;
+ bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR;
+ if (isAM4 && Offset == 4)
+ Mode = ARM_AM::ib;
+ else if (isAM4 && Offset == -4 * (int)NumRegs + 4)
+ Mode = ARM_AM::da;
+ else if (isAM4 && Offset == -4 * (int)NumRegs)
+ Mode = ARM_AM::db;
+ else if (Offset != 0) {
+ // If starting offset isn't zero, insert a MI to materialize a new base.
+ // But only do so if it is cost effective, i.e. merging more than two
+ // loads / stores.
+ if (NumRegs <= 2)
+ return false;
+
+ unsigned NewBase;
+ if (Opcode == ARM::LDR)
+ // If it is a load, then just use one of the destination register to
+ // use as the new base.
+ NewBase = Regs[NumRegs-1].first;
+ else {
+ // Use the scratch register to use as a new base.
+ NewBase = Scratch;
+ if (NewBase == 0)
+ return false;
+ }
+ int BaseOpc = ARM::ADDri;
+ if (Offset < 0) {
+ BaseOpc = ARM::SUBri;
+ Offset = - Offset;
+ }
+ int ImmedOffset = ARM_AM::getSOImmVal(Offset);
+ if (ImmedOffset == -1)
+ return false; // Probably not worth it then.
+
+ BuildMI(MBB, MBBI, TII->get(BaseOpc), NewBase)
+ .addReg(Base, false, false, BaseKill).addImm(ImmedOffset)
+ .addImm(Pred).addReg(PredReg).addReg(0);
+ Base = NewBase;
+ BaseKill = true; // New base is always killed right its use.
+ }
+
+ bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD;
+ bool isDef = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
+ Opcode = getLoadStoreMultipleOpcode(Opcode);
+ MachineInstrBuilder MIB = (isAM4)
+ ? BuildMI(MBB, MBBI, TII->get(Opcode)).addReg(Base, false, false, BaseKill)
+ .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
+ : BuildMI(MBB, MBBI, TII->get(Opcode)).addReg(Base, false, false, BaseKill)
+ .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs))
+ .addImm(Pred).addReg(PredReg);
+ for (unsigned i = 0; i != NumRegs; ++i)
+ MIB = MIB.addReg(Regs[i].first, isDef, false, Regs[i].second);
+
+ return true;
+}
+
+/// MergeLDR_STR - Merge a number of load / store instructions into one or more
+/// load / store multiple instructions.
+SmallVector<MachineBasicBlock::iterator, 4>
+ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
+ unsigned Base, int Opcode, unsigned Size,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned Scratch, MemOpQueue &MemOps) {
+ SmallVector<MachineBasicBlock::iterator, 4> Merges;
+ bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR;
+ int Offset = MemOps[SIndex].Offset;
+ int SOffset = Offset;
+ unsigned Pos = MemOps[SIndex].Position;
+ MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
+ unsigned PReg = MemOps[SIndex].MBBI->getOperand(0).getReg();
+ unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg);
+ bool isKill = MemOps[SIndex].MBBI->getOperand(0).isKill();
+
+ SmallVector<std::pair<unsigned,bool>, 8> Regs;
+ Regs.push_back(std::make_pair(PReg, isKill));
+ for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
+ int NewOffset = MemOps[i].Offset;
+ unsigned Reg = MemOps[i].MBBI->getOperand(0).getReg();
+ unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg);
+ isKill = MemOps[i].MBBI->getOperand(0).isKill();
+ // AM4 - register numbers in ascending order.
+ // AM5 - consecutive register numbers in ascending order.
+ if (NewOffset == Offset + (int)Size &&
+ ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
+ Offset += Size;
+ Regs.push_back(std::make_pair(Reg, isKill));
+ PRegNum = RegNum;
+ } else {
+ // Can't merge this in. Try merge the earlier ones first.
+ if (mergeOps(MBB, ++Loc, SOffset, Base, false, Opcode, Pred, PredReg,
+ Scratch, Regs, TII)) {
+ Merges.push_back(prior(Loc));
+ for (unsigned j = SIndex; j < i; ++j) {
+ MBB.erase(MemOps[j].MBBI);
+ MemOps[j].Merged = true;
+ }
+ }
+ SmallVector<MachineBasicBlock::iterator, 4> Merges2 =
+ MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,MemOps);
+ Merges.append(Merges2.begin(), Merges2.end());
+ return Merges;
+ }
+
+ if (MemOps[i].Position > Pos) {
+ Pos = MemOps[i].Position;
+ Loc = MemOps[i].MBBI;
+ }
+ }
+
+ bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
+ if (mergeOps(MBB, ++Loc, SOffset, Base, BaseKill, Opcode, Pred, PredReg,
+ Scratch, Regs, TII)) {
+ Merges.push_back(prior(Loc));
+ for (unsigned i = SIndex, e = MemOps.size(); i != e; ++i) {
+ MBB.erase(MemOps[i].MBBI);
+ MemOps[i].Merged = true;
+ }
+ }
+
+ return Merges;
+}
+
+/// getInstrPredicate - If instruction is predicated, returns its predicate
+/// condition, otherwise returns AL. It also returns the condition code
+/// register by reference.
+static ARMCC::CondCodes getInstrPredicate(MachineInstr *MI, unsigned &PredReg) {
+ int PIdx = MI->findFirstPredOperandIdx();
+ if (PIdx == -1) {
+ PredReg = 0;
+ return ARMCC::AL;
+ }
+
+ PredReg = MI->getOperand(PIdx+1).getReg();
+ return (ARMCC::CondCodes)MI->getOperand(PIdx).getImmedValue();
+}
+
+static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
+ unsigned Bytes, ARMCC::CondCodes Pred,
+ unsigned PredReg) {
+ unsigned MyPredReg = 0;
+ return (MI && MI->getOpcode() == ARM::SUBri &&
+ MI->getOperand(0).getReg() == Base &&
+ MI->getOperand(1).getReg() == Base &&
+ ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes &&
+ getInstrPredicate(MI, MyPredReg) == Pred &&
+ MyPredReg == PredReg);
+}
+
+static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
+ unsigned Bytes, ARMCC::CondCodes Pred,
+ unsigned PredReg) {
+ unsigned MyPredReg = 0;
+ return (MI && MI->getOpcode() == ARM::ADDri &&
+ MI->getOperand(0).getReg() == Base &&
+ MI->getOperand(1).getReg() == Base &&
+ ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes &&
+ getInstrPredicate(MI, MyPredReg) == Pred &&
+ MyPredReg == PredReg);
+}
+
+static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default: return 0;
+ case ARM::LDR:
+ case ARM::STR:
+ case ARM::FLDS:
+ case ARM::FSTS:
+ return 4;
+ case ARM::FLDD:
+ case ARM::FSTD:
+ return 8;
+ case ARM::LDM:
+ case ARM::STM:
+ return (MI->getNumOperands() - 4) * 4;
+ case ARM::FLDMS:
+ case ARM::FSTMS:
+ case ARM::FLDMD:
+ case ARM::FSTMD:
+ return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
+ }
+}
+
+/// mergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
+/// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible:
+///
+/// stmia rn, <ra, rb, rc>
+/// rn := rn + 4 * 3;
+/// =>
+/// stmia rn!, <ra, rb, rc>
+///
+/// rn := rn - 4 * 3;
+/// ldmia rn, <ra, rb, rc>
+/// =>
+/// ldmdb rn!, <ra, rb, rc>
+static bool mergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) {
+ MachineInstr *MI = MBBI;
+ unsigned Base = MI->getOperand(0).getReg();
+ unsigned Bytes = getLSMultipleTransferSize(MI);
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+ int Opcode = MI->getOpcode();
+ bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::STM;
+
+ if (isAM4) {
+ if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm()))
+ return false;
+
+ // Can't use the updating AM4 sub-mode if the base register is also a dest
+ // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
+ for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
+ if (MI->getOperand(i).getReg() == Base)
+ return false;
+ }
+
+ ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
+ if (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+ if (Mode == ARM_AM::ia &&
+ isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) {
+ MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true));
+ MBB.erase(PrevMBBI);
+ return true;
+ } else if (Mode == ARM_AM::ib &&
+ isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) {
+ MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true));
+ MBB.erase(PrevMBBI);
+ return true;
+ }
+ }
+
+ if (MBBI != MBB.end()) {
+ MachineBasicBlock::iterator NextMBBI = next(MBBI);
+ if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
+ isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
+ MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
+ MBB.erase(NextMBBI);
+ return true;
+ } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
+ isMatchingDecrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
+ MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
+ MBB.erase(NextMBBI);
+ return true;
+ }
+ }
+ } else {
+ // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops.
+ if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()))
+ return false;
+
+ ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
+ unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
+ if (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+ if (Mode == ARM_AM::ia &&
+ isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) {
+ MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset));
+ MBB.erase(PrevMBBI);
+ return true;
+ }
+ }
+
+ if (MBBI != MBB.end()) {
+ MachineBasicBlock::iterator NextMBBI = next(MBBI);
+ if (Mode == ARM_AM::ia &&
+ isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
+ MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset));
+ MBB.erase(NextMBBI);
+ }
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
+ switch (Opc) {
+ case ARM::LDR: return ARM::LDR_PRE;
+ case ARM::STR: return ARM::STR_PRE;
+ case ARM::FLDS: return ARM::FLDMS;
+ case ARM::FLDD: return ARM::FLDMD;
+ case ARM::FSTS: return ARM::FSTMS;
+ case ARM::FSTD: return ARM::FSTMD;
+ default: abort();
+ }
+ return 0;
+}
+
+static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
+ switch (Opc) {
+ case ARM::LDR: return ARM::LDR_POST;
+ case ARM::STR: return ARM::STR_POST;
+ case ARM::FLDS: return ARM::FLDMS;
+ case ARM::FLDD: return ARM::FLDMD;
+ case ARM::FSTS: return ARM::FSTMS;
+ case ARM::FSTD: return ARM::FSTMD;
+ default: abort();
+ }
+ return 0;
+}
+
+/// mergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
+/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
+static bool mergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const TargetInstrInfo *TII) {
+ MachineInstr *MI = MBBI;
+ unsigned Base = MI->getOperand(1).getReg();
+ bool BaseKill = MI->getOperand(1).isKill();
+ unsigned Bytes = getLSMultipleTransferSize(MI);
+ int Opcode = MI->getOpcode();
+ bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
+ if ((isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) ||
+ (!isAM2 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0))
+ return false;
+
+ bool isLd = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
+ // Can't do the merge if the destination register is the same as the would-be
+ // writeback register.
+ if (isLd && MI->getOperand(0).getReg() == Base)
+ return false;
+
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+ bool DoMerge = false;
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ unsigned NewOpc = 0;
+ if (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+ if (isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) {
+ DoMerge = true;
+ AddSub = ARM_AM::sub;
+ NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
+ } else if (isAM2 && isMatchingIncrement(PrevMBBI, Base, Bytes,
+ Pred, PredReg)) {
+ DoMerge = true;
+ NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
+ }
+ if (DoMerge)
+ MBB.erase(PrevMBBI);
+ }
+
+ if (!DoMerge && MBBI != MBB.end()) {
+ MachineBasicBlock::iterator NextMBBI = next(MBBI);
+ if (isAM2 && isMatchingDecrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
+ DoMerge = true;
+ AddSub = ARM_AM::sub;
+ NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
+ } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
+ DoMerge = true;
+ NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
+ }
+ if (DoMerge)
+ MBB.erase(NextMBBI);
+ }
+
+ if (!DoMerge)
+ return false;
+
+ bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD;
+ unsigned Offset = isAM2 ? ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift)
+ : ARM_AM::getAM5Opc((AddSub == ARM_AM::sub) ? ARM_AM::db : ARM_AM::ia,
+ true, isDPR ? 2 : 1);
+ if (isLd) {
+ if (isAM2)
+ // LDR_PRE, LDR_POST;
+ BuildMI(MBB, MBBI, TII->get(NewOpc), MI->getOperand(0).getReg())
+ .addReg(Base, true)
+ .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+ else
+ // FLDMS, FLDMD
+ BuildMI(MBB, MBBI, TII->get(NewOpc)).addReg(Base, false, false, BaseKill)
+ .addImm(Offset).addImm(Pred).addReg(PredReg)
+ .addReg(MI->getOperand(0).getReg(), true);
+ } else {
+ MachineOperand &MO = MI->getOperand(0);
+ if (isAM2)
+ // STR_PRE, STR_POST;
+ BuildMI(MBB, MBBI, TII->get(NewOpc), Base)
+ .addReg(MO.getReg(), false, false, MO.isKill())
+ .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+ else
+ // FSTMS, FSTMD
+ BuildMI(MBB, MBBI, TII->get(NewOpc)).addReg(Base).addImm(Offset)
+ .addImm(Pred).addReg(PredReg)
+ .addReg(MO.getReg(), false, false, MO.isKill());
+ }
+ MBB.erase(MBBI);
+
+ return true;
+}
+
+/// isMemoryOp - Returns true if instruction is a memory operations (that this
+/// pass is capable of operating on).
+static bool isMemoryOp(MachineInstr *MI) {
+ int Opcode = MI->getOpcode();
+ switch (Opcode) {
+ default: break;
+ case ARM::LDR:
+ case ARM::STR:
+ return MI->getOperand(1).isRegister() && MI->getOperand(2).getReg() == 0;
+ case ARM::FLDS:
+ case ARM::FSTS:
+ return MI->getOperand(1).isRegister();
+ case ARM::FLDD:
+ case ARM::FSTD:
+ return MI->getOperand(1).isRegister();
+ }
+ return false;
+}
+
+/// AdvanceRS - Advance register scavenger to just before the earliest memory
+/// op that is being merged.
+void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
+ MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
+ unsigned Position = MemOps[0].Position;
+ for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
+ if (MemOps[i].Position < Position) {
+ Position = MemOps[i].Position;
+ Loc = MemOps[i].MBBI;
+ }
+ }
+
+ if (Loc != MBB.begin())
+ RS->forward(prior(Loc));
+}
+
+/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
+/// ops of the same base and incrementing offset into LDM / STM ops.
+bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
+ unsigned NumMerges = 0;
+ unsigned NumMemOps = 0;
+ MemOpQueue MemOps;
+ unsigned CurrBase = 0;
+ int CurrOpc = -1;
+ unsigned CurrSize = 0;
+ ARMCC::CondCodes CurrPred = ARMCC::AL;
+ unsigned CurrPredReg = 0;
+ unsigned Position = 0;
+
+ RS->enterBasicBlock(&MBB);
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ bool Advance = false;
+ bool TryMerge = false;
+ bool Clobber = false;
+
+ bool isMemOp = isMemoryOp(MBBI);
+ if (isMemOp) {
+ int Opcode = MBBI->getOpcode();
+ bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
+ unsigned Size = getLSMultipleTransferSize(MBBI);
+ unsigned Base = MBBI->getOperand(1).getReg();
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
+ const TargetInstrDescriptor *TID = MBBI->getInstrDescriptor();
+ unsigned OffField = MBBI->getOperand(TID->numOperands-3).getImm();
+ int Offset = isAM2
+ ? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4;
+ if (isAM2) {
+ if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
+ Offset = -Offset;
+ } else {
+ if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
+ Offset = -Offset;
+ }
+ // Watch out for:
+ // r4 := ldr [r5]
+ // r5 := ldr [r5, #4]
+ // r6 := ldr [r5, #8]
+ //
+ // The second ldr has effectively broken the chain even though it
+ // looks like the later ldr(s) use the same base register. Try to
+ // merge the ldr's so far, including this one. But don't try to
+ // combine the following ldr(s).
+ Clobber = (Opcode == ARM::LDR && Base == MBBI->getOperand(0).getReg());
+ if (CurrBase == 0 && !Clobber) {
+ // Start of a new chain.
+ CurrBase = Base;
+ CurrOpc = Opcode;
+ CurrSize = Size;
+ CurrPred = Pred;
+ CurrPredReg = PredReg;
+ MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
+ NumMemOps++;
+ Advance = true;
+ } else {
+ if (Clobber) {
+ TryMerge = true;
+ Advance = true;
+ }
+
+ if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
+ // No need to match PredReg.
+ // Continue adding to the queue.
+ if (Offset > MemOps.back().Offset) {
+ MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
+ NumMemOps++;
+ Advance = true;
+ } else {
+ for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
+ I != E; ++I) {
+ if (Offset < I->Offset) {
+ MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
+ NumMemOps++;
+ Advance = true;
+ break;
+ } else if (Offset == I->Offset) {
+ // Collision! This can't be merged!
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (Advance) {
+ ++Position;
+ ++MBBI;
+ } else
+ TryMerge = true;
+
+ if (TryMerge) {
+ if (NumMemOps > 1) {
+ // Try to find a free register to use as a new base in case it's needed.
+ // First advance to the instruction just before the start of the chain.
+ AdvanceRS(MBB, MemOps);
+ // Find a scratch register. Make sure it's a call clobbered register or
+ // a spilled callee-saved register.
+ unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass, true);
+ if (!Scratch)
+ Scratch = RS->FindUnusedReg(&ARM::GPRRegClass,
+ AFI->getSpilledCSRegisters());
+ // Process the load / store instructions.
+ RS->forward(prior(MBBI));
+
+ // Merge ops.
+ SmallVector<MachineBasicBlock::iterator,4> MBBII =
+ MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
+ CurrPred, CurrPredReg, Scratch, MemOps);
+
+ // Try folding preceeding/trailing base inc/dec into the generated
+ // LDM/STM ops.
+ for (unsigned i = 0, e = MBBII.size(); i < e; ++i)
+ if (mergeBaseUpdateLSMultiple(MBB, MBBII[i]))
+ NumMerges++;
+ NumMerges += MBBII.size();
+
+ // Try folding preceeding/trailing base inc/dec into those load/store
+ // that were not merged to form LDM/STM ops.
+ for (unsigned i = 0; i != NumMemOps; ++i)
+ if (!MemOps[i].Merged)
+ if (mergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII))
+ NumMerges++;
+
+ // RS may be pointing to an instruction that's deleted.
+ RS->skipTo(prior(MBBI));
+ }
+
+ CurrBase = 0;
+ CurrOpc = -1;
+ CurrSize = 0;
+ CurrPred = ARMCC::AL;
+ CurrPredReg = 0;
+ if (NumMemOps) {
+ MemOps.clear();
+ NumMemOps = 0;
+ }
+
+ // If iterator hasn't been advanced and this is not a memory op, skip it.
+ // It can't start a new chain anyway.
+ if (!Advance && !isMemOp && MBBI != E) {
+ ++Position;
+ ++MBBI;
+ }
+ }
+ }
+ return NumMerges > 0;
+}
+
+/// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
+/// (bx lr) into the preceeding stack restore so it directly restore the value
+/// of LR into pc.
+/// ldmfd sp!, {r7, lr}
+/// bx lr
+/// =>
+/// ldmfd sp!, {r7, pc}
+bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
+ if (MBB.empty()) return false;
+
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ if (MBBI->getOpcode() == ARM::BX_RET && MBBI != MBB.begin()) {
+ MachineInstr *PrevMI = prior(MBBI);
+ if (PrevMI->getOpcode() == ARM::LDM) {
+ MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
+ if (MO.getReg() == ARM::LR) {
+ PrevMI->setInstrDescriptor(TII->get(ARM::LDM_RET));
+ MO.setReg(ARM::PC);
+ MBB.erase(MBBI);
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+ const TargetMachine &TM = Fn.getTarget();
+ AFI = Fn.getInfo<ARMFunctionInfo>();
+ TII = TM.getInstrInfo();
+ MRI = TM.getRegisterInfo();
+ RS = new RegScavenger();
+
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock &MBB = *MFI;
+ Modified |= LoadStoreMultipleOpti(MBB);
+ Modified |= MergeReturnIntoLDM(MBB);
+ }
+
+ delete RS;
+ return Modified;
+}
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
new file mode 100644
index 0000000..665c5e3
--- /dev/null
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -0,0 +1,220 @@
+//====- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the Evan Cheng and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares ARM-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMMACHINEFUNCTIONINFO_H
+#define ARMMACHINEFUNCTIONINFO_H
+
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+
+namespace llvm {
+
+/// ARMFunctionInfo - This class is derived from MachineFunction private
+/// ARM target-specific information for each MachineFunction.
+class ARMFunctionInfo : public MachineFunctionInfo {
+
+ /// isThumb - True if this function is compiled under Thumb mode.
+ /// Used to initialized Align, so must precede it.
+ bool isThumb;
+
+ /// Align - required alignment. ARM functions and Thumb functions with
+ /// constant pools require 4-byte alignment; other Thumb functions
+ /// require only 2-byte alignment.
+ unsigned Align;
+
+ /// VarArgsRegSaveSize - Size of the register save area for vararg functions.
+ ///
+ unsigned VarArgsRegSaveSize;
+
+ /// HasStackFrame - True if this function has a stack frame. Set by
+ /// processFunctionBeforeCalleeSavedScan().
+ bool HasStackFrame;
+
+ /// LRSpilledForFarJump - True if the LR register has been for spilled to
+ /// enable far jump.
+ bool LRSpilledForFarJump;
+
+ /// R3IsLiveIn - True if R3 is live in to this function.
+ /// FIXME: Remove when register scavenger for Thumb is done.
+ bool R3IsLiveIn;
+
+ /// FramePtrSpillOffset - If HasStackFrame, this records the frame pointer
+ /// spill stack offset.
+ unsigned FramePtrSpillOffset;
+
+ /// GPRCS1Offset, GPRCS2Offset, DPRCSOffset - Starting offset of callee saved
+ /// register spills areas. For Mac OS X:
+ ///
+ /// GPR callee-saved (1) : r4, r5, r6, r7, lr
+ /// --------------------------------------------
+ /// GPR callee-saved (2) : r8, r10, r11
+ /// --------------------------------------------
+ /// DPR callee-saved : d8 - d15
+ unsigned GPRCS1Offset;
+ unsigned GPRCS2Offset;
+ unsigned DPRCSOffset;
+
+ /// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills
+ /// areas.
+ unsigned GPRCS1Size;
+ unsigned GPRCS2Size;
+ unsigned DPRCSSize;
+
+ /// GPRCS1Frames, GPRCS2Frames, DPRCSFrames - Keeps track of frame indices
+ /// which belong to these spill areas.
+ BitVector GPRCS1Frames;
+ BitVector GPRCS2Frames;
+ BitVector DPRCSFrames;
+
+ /// SpilledCSRegs - A BitVector mask of all spilled callee-saved registers.
+ ///
+ BitVector SpilledCSRegs;
+
+ /// JumpTableUId - Unique id for jumptables.
+ ///
+ unsigned JumpTableUId;
+
+public:
+ ARMFunctionInfo() :
+ isThumb(false),
+ Align(2U),
+ VarArgsRegSaveSize(0), HasStackFrame(false),
+ LRSpilledForFarJump(false), R3IsLiveIn(false),
+ FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
+ GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
+ GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0),
+ JumpTableUId(0) {}
+
+ ARMFunctionInfo(MachineFunction &MF) :
+ isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
+ Align(isThumb ? 1U : 2U),
+ VarArgsRegSaveSize(0), HasStackFrame(false),
+ LRSpilledForFarJump(false), R3IsLiveIn(false),
+ FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
+ GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
+ GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32),
+ SpilledCSRegs(MF.getTarget().getRegisterInfo()->getNumRegs()),
+ JumpTableUId(0) {}
+
+ bool isThumbFunction() const { return isThumb; }
+
+ unsigned getAlign() const { return Align; }
+ void setAlign(unsigned a) { Align = a; }
+
+ unsigned getVarArgsRegSaveSize() const { return VarArgsRegSaveSize; }
+ void setVarArgsRegSaveSize(unsigned s) { VarArgsRegSaveSize = s; }
+
+ bool hasStackFrame() const { return HasStackFrame; }
+ void setHasStackFrame(bool s) { HasStackFrame = s; }
+
+ bool isLRSpilledForFarJump() const { return LRSpilledForFarJump; }
+ void setLRIsSpilledForFarJump(bool s) { LRSpilledForFarJump = s; }
+
+ // FIXME: Remove when register scavenger for Thumb is done.
+ bool isR3LiveIn() const { return R3IsLiveIn; }
+ void setR3IsLiveIn(bool l) { R3IsLiveIn = l; }
+
+ unsigned getFramePtrSpillOffset() const { return FramePtrSpillOffset; }
+ void setFramePtrSpillOffset(unsigned o) { FramePtrSpillOffset = o; }
+
+ unsigned getGPRCalleeSavedArea1Offset() const { return GPRCS1Offset; }
+ unsigned getGPRCalleeSavedArea2Offset() const { return GPRCS2Offset; }
+ unsigned getDPRCalleeSavedAreaOffset() const { return DPRCSOffset; }
+
+ void setGPRCalleeSavedArea1Offset(unsigned o) { GPRCS1Offset = o; }
+ void setGPRCalleeSavedArea2Offset(unsigned o) { GPRCS2Offset = o; }
+ void setDPRCalleeSavedAreaOffset(unsigned o) { DPRCSOffset = o; }
+
+ unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; }
+ unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; }
+ unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; }
+
+ void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; }
+ void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
+ void setDPRCalleeSavedAreaSize(unsigned s) { DPRCSSize = s; }
+
+ bool isGPRCalleeSavedArea1Frame(int fi) const {
+ if (fi < 0 || fi >= (int)GPRCS1Frames.size())
+ return false;
+ return GPRCS1Frames[fi];
+ }
+ bool isGPRCalleeSavedArea2Frame(int fi) const {
+ if (fi < 0 || fi >= (int)GPRCS2Frames.size())
+ return false;
+ return GPRCS2Frames[fi];
+ }
+ bool isDPRCalleeSavedAreaFrame(int fi) const {
+ if (fi < 0 || fi >= (int)DPRCSFrames.size())
+ return false;
+ return DPRCSFrames[fi];
+ }
+
+ void addGPRCalleeSavedArea1Frame(int fi) {
+ if (fi >= 0) {
+ int Size = GPRCS1Frames.size();
+ if (fi >= Size) {
+ Size *= 2;
+ if (fi >= Size)
+ Size = fi+1;
+ GPRCS1Frames.resize(Size);
+ }
+ GPRCS1Frames[fi] = true;
+ }
+ }
+ void addGPRCalleeSavedArea2Frame(int fi) {
+ if (fi >= 0) {
+ int Size = GPRCS2Frames.size();
+ if (fi >= Size) {
+ Size *= 2;
+ if (fi >= Size)
+ Size = fi+1;
+ GPRCS2Frames.resize(Size);
+ }
+ GPRCS2Frames[fi] = true;
+ }
+ }
+ void addDPRCalleeSavedAreaFrame(int fi) {
+ if (fi >= 0) {
+ int Size = DPRCSFrames.size();
+ if (fi >= Size) {
+ Size *= 2;
+ if (fi >= Size)
+ Size = fi+1;
+ DPRCSFrames.resize(Size);
+ }
+ DPRCSFrames[fi] = true;
+ }
+ }
+
+ void setCSRegisterIsSpilled(unsigned Reg) {
+ SpilledCSRegs.set(Reg);
+ }
+
+ bool isCSRegisterSpilled(unsigned Reg) {
+ return SpilledCSRegs[Reg];
+ }
+
+ const BitVector &getSpilledCSRegisters() const {
+ return SpilledCSRegs;
+ }
+
+ unsigned createJumpTableUId() {
+ return JumpTableUId++;
+ }
+};
+} // End llvm namespace
+
+#endif // ARMMACHINEFUNCTIONINFO_H
diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp
new file mode 100644
index 0000000..f8e10de
--- /dev/null
+++ b/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -0,0 +1,1566 @@
+//===- ARMRegisterInfo.cpp - ARM Register Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the "Instituto Nokia de Tecnologia" and
+// is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include <algorithm>
+using namespace llvm;
+
+static cl::opt<bool> ThumbRegScavenging("enable-thumb-reg-scavenging",
+ cl::Hidden,
+ cl::desc("Enable register scavenging on Thumb"));
+
+unsigned ARMRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
+ using namespace ARM;
+ switch (RegEnum) {
+ case R0: case S0: case D0: return 0;
+ case R1: case S1: case D1: return 1;
+ case R2: case S2: case D2: return 2;
+ case R3: case S3: case D3: return 3;
+ case R4: case S4: case D4: return 4;
+ case R5: case S5: case D5: return 5;
+ case R6: case S6: case D6: return 6;
+ case R7: case S7: case D7: return 7;
+ case R8: case S8: case D8: return 8;
+ case R9: case S9: case D9: return 9;
+ case R10: case S10: case D10: return 10;
+ case R11: case S11: case D11: return 11;
+ case R12: case S12: case D12: return 12;
+ case SP: case S13: case D13: return 13;
+ case LR: case S14: case D14: return 14;
+ case PC: case S15: case D15: return 15;
+ case S16: return 16;
+ case S17: return 17;
+ case S18: return 18;
+ case S19: return 19;
+ case S20: return 20;
+ case S21: return 21;
+ case S22: return 22;
+ case S23: return 23;
+ case S24: return 24;
+ case S25: return 25;
+ case S26: return 26;
+ case S27: return 27;
+ case S28: return 28;
+ case S29: return 29;
+ case S30: return 30;
+ case S31: return 31;
+ default:
+ assert(0 && "Unknown ARM register!");
+ abort();
+ }
+}
+
+ARMRegisterInfo::ARMRegisterInfo(const TargetInstrInfo &tii,
+ const ARMSubtarget &sti)
+ : ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
+ TII(tii), STI(sti),
+ FramePtr((STI.useThumbBacktraces() || STI.isThumb()) ? ARM::R7 : ARM::R11) {
+}
+
+bool ARMRegisterInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ if (!AFI->isThumbFunction() || CSI.empty())
+ return false;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, TII.get(ARM::tPUSH));
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB.addLiveIn(Reg);
+ MIB.addReg(Reg, false/*isDef*/,false/*isImp*/,true/*isKill*/);
+ }
+ return true;
+}
+
+bool ARMRegisterInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ if (!AFI->isThumbFunction() || CSI.empty())
+ return false;
+
+ bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+ MachineInstr *PopMI = new MachineInstr(TII.get(ARM::tPOP));
+ MBB.insert(MI, PopMI);
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ if (Reg == ARM::LR) {
+ // Special epilogue for vararg functions. See emitEpilogue
+ if (isVarArg)
+ continue;
+ Reg = ARM::PC;
+ PopMI->setInstrDescriptor(TII.get(ARM::tPOP_RET));
+ MBB.erase(MI);
+ }
+ PopMI->addRegOperand(Reg, true);
+ }
+ return true;
+}
+
+void ARMRegisterInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, int FI,
+ const TargetRegisterClass *RC) const {
+ if (RC == ARM::GPRRegisterClass) {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ if (AFI->isThumbFunction())
+ BuildMI(MBB, I, TII.get(ARM::tSpill)).addReg(SrcReg, false, false, true)
+ .addFrameIndex(FI).addImm(0);
+ else
+ BuildMI(MBB, I, TII.get(ARM::STR)).addReg(SrcReg, false, false, true)
+ .addFrameIndex(FI).addReg(0).addImm(0).addImm((int64_t)ARMCC::AL)
+ .addReg(0);
+ } else if (RC == ARM::DPRRegisterClass) {
+ BuildMI(MBB, I, TII.get(ARM::FSTD)).addReg(SrcReg, false, false, true)
+ .addFrameIndex(FI).addImm(0).addImm((int64_t)ARMCC::AL).addReg(0);
+ } else {
+ assert(RC == ARM::SPRRegisterClass && "Unknown regclass!");
+ BuildMI(MBB, I, TII.get(ARM::FSTS)).addReg(SrcReg, false, false, true)
+ .addFrameIndex(FI).addImm(0).addImm((int64_t)ARMCC::AL).addReg(0);
+ }
+}
+
+void ARMRegisterInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC) const {
+ if (RC == ARM::GPRRegisterClass) {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ if (AFI->isThumbFunction())
+ BuildMI(MBB, I, TII.get(ARM::tRestore), DestReg)
+ .addFrameIndex(FI).addImm(0);
+ else
+ BuildMI(MBB, I, TII.get(ARM::LDR), DestReg)
+ .addFrameIndex(FI).addReg(0).addImm(0).addImm((int64_t)ARMCC::AL)
+ .addReg(0);
+ } else if (RC == ARM::DPRRegisterClass) {
+ BuildMI(MBB, I, TII.get(ARM::FLDD), DestReg)
+ .addFrameIndex(FI).addImm(0).addImm((int64_t)ARMCC::AL).addReg(0);
+ } else {
+ assert(RC == ARM::SPRRegisterClass && "Unknown regclass!");
+ BuildMI(MBB, I, TII.get(ARM::FLDS), DestReg)
+ .addFrameIndex(FI).addImm(0).addImm((int64_t)ARMCC::AL).addReg(0);
+ }
+}
+
+void ARMRegisterInfo::copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *RC) const {
+ if (RC == ARM::GPRRegisterClass) {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ if (AFI->isThumbFunction())
+ BuildMI(MBB, I, TII.get(ARM::tMOVr), DestReg).addReg(SrcReg);
+ else
+ BuildMI(MBB, I, TII.get(ARM::MOVr), DestReg).addReg(SrcReg)
+ .addImm((int64_t)ARMCC::AL).addReg(0).addReg(0);
+ } else if (RC == ARM::SPRRegisterClass)
+ BuildMI(MBB, I, TII.get(ARM::FCPYS), DestReg).addReg(SrcReg)
+ .addImm((int64_t)ARMCC::AL).addReg(0);
+ else if (RC == ARM::DPRRegisterClass)
+ BuildMI(MBB, I, TII.get(ARM::FCPYD), DestReg).addReg(SrcReg)
+ .addImm((int64_t)ARMCC::AL).addReg(0);
+ else
+ abort();
+}
+
+/// emitLoadConstPool - Emits a load from constpool to materialize the
+/// specified immediate.
+static void emitLoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned DestReg, int Val,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ const TargetInstrInfo &TII, bool isThumb) {
+ MachineFunction &MF = *MBB.getParent();
+ MachineConstantPool *ConstantPool = MF.getConstantPool();
+ Constant *C = ConstantInt::get(Type::Int32Ty, Val);
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, 2);
+ if (isThumb)
+ BuildMI(MBB, MBBI, TII.get(ARM::tLDRcp), DestReg).addConstantPoolIndex(Idx);
+ else
+ BuildMI(MBB, MBBI, TII.get(ARM::LDRcp), DestReg).addConstantPoolIndex(Idx)
+ .addReg(0).addImm(0).addImm((unsigned)Pred).addReg(PredReg);
+}
+
+void ARMRegisterInfo::reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg,
+ const MachineInstr *Orig) const {
+ if (Orig->getOpcode() == ARM::MOVi2pieces) {
+ emitLoadConstPool(MBB, I, DestReg,
+ Orig->getOperand(1).getImmedValue(),
+ (ARMCC::CondCodes)Orig->getOperand(2).getImmedValue(),
+ Orig->getOperand(3).getReg(),
+ TII, false);
+ return;
+ }
+
+ MachineInstr *MI = Orig->clone();
+ MI->getOperand(0).setReg(DestReg);
+ MBB.insert(I, MI);
+}
+
+/// isLowRegister - Returns true if the register is low register r0-r7.
+///
+static bool isLowRegister(unsigned Reg) {
+ using namespace ARM;
+ switch (Reg) {
+ case R0: case R1: case R2: case R3:
+ case R4: case R5: case R6: case R7:
+ return true;
+ default:
+ return false;
+ }
+}
+
+MachineInstr *ARMRegisterInfo::foldMemoryOperand(MachineInstr *MI,
+ unsigned OpNum, int FI) const {
+ unsigned Opc = MI->getOpcode();
+ MachineInstr *NewMI = NULL;
+ switch (Opc) {
+ default: break;
+ case ARM::MOVr: {
+ if (MI->getOperand(4).getReg() == ARM::CPSR)
+ // If it is updating CPSR, then it cannot be foled.
+ break;
+ unsigned Pred = MI->getOperand(2).getImmedValue();
+ unsigned PredReg = MI->getOperand(3).getReg();
+ if (OpNum == 0) { // move -> store
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ NewMI = BuildMI(TII.get(ARM::STR)).addReg(SrcReg).addFrameIndex(FI)
+ .addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
+ } else { // move -> load
+ unsigned DstReg = MI->getOperand(0).getReg();
+ NewMI = BuildMI(TII.get(ARM::LDR), DstReg).addFrameIndex(FI).addReg(0)
+ .addImm(0).addImm(Pred).addReg(PredReg);
+ }
+ break;
+ }
+ case ARM::tMOVr: {
+ if (OpNum == 0) { // move -> store
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ if (isPhysicalRegister(SrcReg) && !isLowRegister(SrcReg))
+ // tSpill cannot take a high register operand.
+ break;
+ NewMI = BuildMI(TII.get(ARM::tSpill)).addReg(SrcReg).addFrameIndex(FI)
+ .addImm(0);
+ } else { // move -> load
+ unsigned DstReg = MI->getOperand(0).getReg();
+ if (isPhysicalRegister(DstReg) && !isLowRegister(DstReg))
+ // tRestore cannot target a high register operand.
+ break;
+ NewMI = BuildMI(TII.get(ARM::tRestore), DstReg).addFrameIndex(FI)
+ .addImm(0);
+ }
+ break;
+ }
+ case ARM::FCPYS: {
+ unsigned Pred = MI->getOperand(2).getImmedValue();
+ unsigned PredReg = MI->getOperand(3).getReg();
+ if (OpNum == 0) { // move -> store
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ NewMI = BuildMI(TII.get(ARM::FSTS)).addReg(SrcReg).addFrameIndex(FI)
+ .addImm(0).addImm(Pred).addReg(PredReg);
+ } else { // move -> load
+ unsigned DstReg = MI->getOperand(0).getReg();
+ NewMI = BuildMI(TII.get(ARM::FLDS), DstReg).addFrameIndex(FI)
+ .addImm(0).addImm(Pred).addReg(PredReg);
+ }
+ break;
+ }
+ case ARM::FCPYD: {
+ unsigned Pred = MI->getOperand(2).getImmedValue();
+ unsigned PredReg = MI->getOperand(3).getReg();
+ if (OpNum == 0) { // move -> store
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ NewMI = BuildMI(TII.get(ARM::FSTD)).addReg(SrcReg).addFrameIndex(FI)
+ .addImm(0).addImm(Pred).addReg(PredReg);
+ } else { // move -> load
+ unsigned DstReg = MI->getOperand(0).getReg();
+ NewMI = BuildMI(TII.get(ARM::FLDD), DstReg).addFrameIndex(FI)
+ .addImm(0).addImm(Pred).addReg(PredReg);
+ }
+ break;
+ }
+ }
+
+ if (NewMI)
+ NewMI->copyKillDeadInfo(MI);
+ return NewMI;
+}
+
+const unsigned* ARMRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+ const {
+ static const unsigned CalleeSavedRegs[] = {
+ ARM::LR, ARM::R11, ARM::R10, ARM::R9, ARM::R8,
+ ARM::R7, ARM::R6, ARM::R5, ARM::R4,
+
+ ARM::D15, ARM::D14, ARM::D13, ARM::D12,
+ ARM::D11, ARM::D10, ARM::D9, ARM::D8,
+ 0
+ };
+
+ static const unsigned DarwinCalleeSavedRegs[] = {
+ ARM::LR, ARM::R7, ARM::R6, ARM::R5, ARM::R4,
+ ARM::R11, ARM::R10, ARM::R9, ARM::R8,
+
+ ARM::D15, ARM::D14, ARM::D13, ARM::D12,
+ ARM::D11, ARM::D10, ARM::D9, ARM::D8,
+ 0
+ };
+ return STI.isTargetDarwin() ? DarwinCalleeSavedRegs : CalleeSavedRegs;
+}
+
+const TargetRegisterClass* const *
+ARMRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
+ &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
+ &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
+ &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
+
+ &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
+ &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
+ 0
+ };
+ return CalleeSavedRegClasses;
+}
+
+BitVector ARMRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ // FIXME: avoid re-calculating this everytime.
+ BitVector Reserved(getNumRegs());
+ Reserved.set(ARM::SP);
+ Reserved.set(ARM::PC);
+ if (STI.isTargetDarwin() || hasFP(MF))
+ Reserved.set(FramePtr);
+ // Some targets reserve R9.
+ if (STI.isR9Reserved())
+ Reserved.set(ARM::R9);
+ return Reserved;
+}
+
+bool
+ARMRegisterInfo::isReservedReg(const MachineFunction &MF, unsigned Reg) const {
+ switch (Reg) {
+ default: break;
+ case ARM::SP:
+ case ARM::PC:
+ return true;
+ case ARM::R7:
+ case ARM::R11:
+ if (FramePtr == Reg && (STI.isTargetDarwin() || hasFP(MF)))
+ return true;
+ break;
+ case ARM::R9:
+ return STI.isR9Reserved();
+ }
+
+ return false;
+}
+
+bool
+ARMRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ return ThumbRegScavenging || !AFI->isThumbFunction();
+}
+
+/// hasFP - Return true if the specified function should have a dedicated frame
+/// pointer register. This is true if the function has variable sized allocas
+/// or if frame pointer elimination is disabled.
+///
+bool ARMRegisterInfo::hasFP(const MachineFunction &MF) const {
+ return NoFramePointerElim || MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
+// not required, we reserve argument space for call sites in the function
+// immediately on entry to the current function. This eliminates the need for
+// add/sub sp brackets around call sites. Returns true if the call frame is
+// included as part of the stack frame.
+bool ARMRegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
+ const MachineFrameInfo *FFI = MF.getFrameInfo();
+ unsigned CFSize = FFI->getMaxCallFrameSize();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ // It's not always a good idea to include the call frame as part of the
+ // stack frame. ARM (especially Thumb) has small immediate offset to
+ // address the stack frame. So a large call frame can cause poor codegen
+ // and may even makes it impossible to scavenge a register.
+ if (AFI->isThumbFunction()) {
+ if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4
+ return false;
+ } else {
+ if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
+ return false;
+ }
+ return !hasFP(MF);
+}
+
+/// emitARMRegPlusImmediate - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in ARM code.
+static
+void emitARMRegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned DestReg, unsigned BaseReg, int NumBytes,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ const TargetInstrInfo &TII) {
+ bool isSub = NumBytes < 0;
+ if (isSub) NumBytes = -NumBytes;
+
+ while (NumBytes) {
+ unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
+ unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
+ assert(ThisVal && "Didn't extract field correctly");
+
+ // We will handle these bits from offset, clear them.
+ NumBytes &= ~ThisVal;
+
+ // Get the properly encoded SOImmVal field.
+ int SOImmVal = ARM_AM::getSOImmVal(ThisVal);
+ assert(SOImmVal != -1 && "Bit extraction didn't work?");
+
+ // Build the new ADD / SUB.
+ BuildMI(MBB, MBBI, TII.get(isSub ? ARM::SUBri : ARM::ADDri), DestReg)
+ .addReg(BaseReg, false, false, true).addImm(SOImmVal)
+ .addImm((unsigned)Pred).addReg(PredReg).addReg(0);
+ BaseReg = DestReg;
+ }
+}
+
+/// calcNumMI - Returns the number of instructions required to materialize
+/// the specific add / sub r, c instruction.
+static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes,
+ unsigned NumBits, unsigned Scale) {
+ unsigned NumMIs = 0;
+ unsigned Chunk = ((1 << NumBits) - 1) * Scale;
+
+ if (Opc == ARM::tADDrSPi) {
+ unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+ Bytes -= ThisVal;
+ NumMIs++;
+ NumBits = 8;
+ Scale = 1; // Followed by a number of tADDi8.
+ Chunk = ((1 << NumBits) - 1) * Scale;
+ }
+
+ NumMIs += Bytes / Chunk;
+ if ((Bytes % Chunk) != 0)
+ NumMIs++;
+ if (ExtraOpc)
+ NumMIs++;
+ return NumMIs;
+}
+
+/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in Thumb code. Materialize the immediate
+/// in a register using mov / mvn sequences or load the immediate from a
+/// constpool entry.
+static
+void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned DestReg, unsigned BaseReg,
+ int NumBytes, bool CanChangeCC,
+ const TargetInstrInfo &TII) {
+ bool isHigh = !isLowRegister(DestReg) ||
+ (BaseReg != 0 && !isLowRegister(BaseReg));
+ bool isSub = false;
+ // Subtract doesn't have high register version. Load the negative value
+ // if either base or dest register is a high register. Also, if do not
+ // issue sub as part of the sequence if condition register is to be
+ // preserved.
+ if (NumBytes < 0 && !isHigh && CanChangeCC) {
+ isSub = true;
+ NumBytes = -NumBytes;
+ }
+ unsigned LdReg = DestReg;
+ if (DestReg == ARM::SP) {
+ assert(BaseReg == ARM::SP && "Unexpected!");
+ LdReg = ARM::R3;
+ BuildMI(MBB, MBBI, TII.get(ARM::tMOVr), ARM::R12)
+ .addReg(ARM::R3, false, false, true);
+ }
+
+ if (NumBytes <= 255 && NumBytes >= 0)
+ BuildMI(MBB, MBBI, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes);
+ else if (NumBytes < 0 && NumBytes >= -255) {
+ BuildMI(MBB, MBBI, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes);
+ BuildMI(MBB, MBBI, TII.get(ARM::tNEG), LdReg)
+ .addReg(LdReg, false, false, true);
+ } else
+ emitLoadConstPool(MBB, MBBI, LdReg, NumBytes, ARMCC::AL, 0, TII, true);
+
+ // Emit add / sub.
+ int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr);
+ const MachineInstrBuilder MIB = BuildMI(MBB, MBBI, TII.get(Opc), DestReg);
+ if (DestReg == ARM::SP || isSub)
+ MIB.addReg(BaseReg).addReg(LdReg, false, false, true);
+ else
+ MIB.addReg(LdReg).addReg(BaseReg, false, false, true);
+ if (DestReg == ARM::SP)
+ BuildMI(MBB, MBBI, TII.get(ARM::tMOVr), ARM::R3)
+ .addReg(ARM::R12, false, false, true);
+}
+
+/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in Thumb code.
+static
+void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned DestReg, unsigned BaseReg,
+ int NumBytes, const TargetInstrInfo &TII) {
+ bool isSub = NumBytes < 0;
+ unsigned Bytes = (unsigned)NumBytes;
+ if (isSub) Bytes = -NumBytes;
+ bool isMul4 = (Bytes & 3) == 0;
+ bool isTwoAddr = false;
+ bool DstNotEqBase = false;
+ unsigned NumBits = 1;
+ unsigned Scale = 1;
+ int Opc = 0;
+ int ExtraOpc = 0;
+
+ if (DestReg == BaseReg && BaseReg == ARM::SP) {
+ assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
+ NumBits = 7;
+ Scale = 4;
+ Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+ isTwoAddr = true;
+ } else if (!isSub && BaseReg == ARM::SP) {
+ // r1 = add sp, 403
+ // =>
+ // r1 = add sp, 100 * 4
+ // r1 = add r1, 3
+ if (!isMul4) {
+ Bytes &= ~3;
+ ExtraOpc = ARM::tADDi3;
+ }
+ NumBits = 8;
+ Scale = 4;
+ Opc = ARM::tADDrSPi;
+ } else {
+ // sp = sub sp, c
+ // r1 = sub sp, c
+ // r8 = sub sp, c
+ if (DestReg != BaseReg)
+ DstNotEqBase = true;
+ NumBits = 8;
+ Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+ isTwoAddr = true;
+ }
+
+ unsigned NumMIs = calcNumMI(Opc, ExtraOpc, Bytes, NumBits, Scale);
+ unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2;
+ if (NumMIs > Threshold) {
+ // This will expand into too many instructions. Load the immediate from a
+ // constpool entry.
+ emitThumbRegPlusImmInReg(MBB, MBBI, DestReg, BaseReg, NumBytes, true, TII);
+ return;
+ }
+
+ if (DstNotEqBase) {
+ if (isLowRegister(DestReg) && isLowRegister(BaseReg)) {
+ // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7)
+ unsigned Chunk = (1 << 3) - 1;
+ unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+ Bytes -= ThisVal;
+ BuildMI(MBB, MBBI, TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3), DestReg)
+ .addReg(BaseReg, false, false, true).addImm(ThisVal);
+ } else {
+ BuildMI(MBB, MBBI, TII.get(ARM::tMOVr), DestReg)
+ .addReg(BaseReg, false, false, true);
+ }
+ BaseReg = DestReg;
+ }
+
+ unsigned Chunk = ((1 << NumBits) - 1) * Scale;
+ while (Bytes) {
+ unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+ Bytes -= ThisVal;
+ ThisVal /= Scale;
+ // Build the new tADD / tSUB.
+ if (isTwoAddr)
+ BuildMI(MBB, MBBI, TII.get(Opc), DestReg).addReg(DestReg).addImm(ThisVal);
+ else {
+ bool isKill = BaseReg != ARM::SP;
+ BuildMI(MBB, MBBI, TII.get(Opc), DestReg)
+ .addReg(BaseReg, false, false, isKill).addImm(ThisVal);
+ BaseReg = DestReg;
+
+ if (Opc == ARM::tADDrSPi) {
+ // r4 = add sp, imm
+ // r4 = add r4, imm
+ // ...
+ NumBits = 8;
+ Scale = 1;
+ Chunk = ((1 << NumBits) - 1) * Scale;
+ Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+ isTwoAddr = true;
+ }
+ }
+ }
+
+ if (ExtraOpc)
+ BuildMI(MBB, MBBI, TII.get(ExtraOpc), DestReg)
+ .addReg(DestReg, false, false, true)
+ .addImm(((unsigned)NumBytes) & 3);
+}
+
+static
+void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg,
+ bool isThumb, const TargetInstrInfo &TII) {
+ if (isThumb)
+ emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII);
+ else
+ emitARMRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes,
+ Pred, PredReg, TII);
+}
+
+void ARMRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ if (!hasReservedCallFrame(MF)) {
+ // If we have alloca, convert as follows:
+ // ADJCALLSTACKDOWN -> sub, sp, sp, amount
+ // ADJCALLSTACKUP -> add, sp, sp, amount
+ MachineInstr *Old = I;
+ unsigned Amount = Old->getOperand(0).getImmedValue();
+ if (Amount != 0) {
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ // Replace the pseudo instruction with a new instruction...
+ unsigned Opc = Old->getOpcode();
+ bool isThumb = AFI->isThumbFunction();
+ ARMCC::CondCodes Pred = isThumb
+ ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(1).getImmedValue();
+ unsigned PredReg = isThumb ? 0 : Old->getOperand(2).getReg();
+ if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
+ emitSPUpdate(MBB, I, -Amount, Pred, PredReg, isThumb, TII);
+ } else {
+ assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
+ emitSPUpdate(MBB, I, Amount, Pred, PredReg, isThumb, TII);
+ }
+ }
+ }
+ MBB.erase(I);
+}
+
+/// emitThumbConstant - Emit a series of instructions to materialize a
+/// constant.
+static void emitThumbConstant(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned DestReg, int Imm,
+ const TargetInstrInfo &TII) {
+ bool isSub = Imm < 0;
+ if (isSub) Imm = -Imm;
+
+ int Chunk = (1 << 8) - 1;
+ int ThisVal = (Imm > Chunk) ? Chunk : Imm;
+ Imm -= ThisVal;
+ BuildMI(MBB, MBBI, TII.get(ARM::tMOVi8), DestReg).addImm(ThisVal);
+ if (Imm > 0)
+ emitThumbRegPlusImmediate(MBB, MBBI, DestReg, DestReg, Imm, TII);
+ if (isSub)
+ BuildMI(MBB, MBBI, TII.get(ARM::tNEG), DestReg)
+ .addReg(DestReg, false, false, true);
+}
+
+/// findScratchRegister - Find a 'free' ARM register. If register scavenger
+/// is not being used, R12 is available. Otherwise, try for a call-clobbered
+/// register first and then a spilled callee-saved register if that fails.
+static
+unsigned findScratchRegister(RegScavenger *RS, const TargetRegisterClass *RC,
+ ARMFunctionInfo *AFI) {
+ unsigned Reg = RS ? RS->FindUnusedReg(RC, true) : (unsigned) ARM::R12;
+ if (Reg == 0)
+ // Try a already spilled CS register.
+ Reg = RS->FindUnusedReg(RC, AFI->getSpilledCSRegisters());
+
+ return Reg;
+}
+
+void ARMRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const{
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ bool isThumb = AFI->isThumbFunction();
+
+ while (!MI.getOperand(i).isFrameIndex()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ unsigned FrameReg = ARM::SP;
+ int FrameIndex = MI.getOperand(i).getFrameIndex();
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+ MF.getFrameInfo()->getStackSize() + SPAdj;
+
+ if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
+ Offset -= AFI->getGPRCalleeSavedArea1Offset();
+ else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
+ Offset -= AFI->getGPRCalleeSavedArea2Offset();
+ else if (AFI->isDPRCalleeSavedAreaFrame(FrameIndex))
+ Offset -= AFI->getDPRCalleeSavedAreaOffset();
+ else if (hasFP(MF)) {
+ assert(SPAdj == 0 && "Unexpected");
+ // There is alloca()'s in this function, must reference off the frame
+ // pointer instead.
+ FrameReg = getFrameRegister(MF);
+ Offset -= AFI->getFramePtrSpillOffset();
+ }
+
+ unsigned Opcode = MI.getOpcode();
+ const TargetInstrDescriptor &Desc = TII.get(Opcode);
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ bool isSub = false;
+
+ if (Opcode == ARM::ADDri) {
+ Offset += MI.getOperand(i+1).getImm();
+ if (Offset == 0) {
+ // Turn it into a move.
+ MI.setInstrDescriptor(TII.get(ARM::MOVr));
+ MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ MI.RemoveOperand(i+1);
+ return;
+ } else if (Offset < 0) {
+ Offset = -Offset;
+ isSub = true;
+ MI.setInstrDescriptor(TII.get(ARM::SUBri));
+ }
+
+ // Common case: small offset, fits into instruction.
+ int ImmedOffset = ARM_AM::getSOImmVal(Offset);
+ if (ImmedOffset != -1) {
+ // Replace the FrameIndex with sp / fp
+ MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ MI.getOperand(i+1).ChangeToImmediate(ImmedOffset);
+ return;
+ }
+
+ // Otherwise, we fallback to common code below to form the imm offset with
+ // a sequence of ADDri instructions. First though, pull as much of the imm
+ // into this ADDri as possible.
+ unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
+ unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
+
+ // We will handle these bits from offset, clear them.
+ Offset &= ~ThisImmVal;
+
+ // Get the properly encoded SOImmVal field.
+ int ThisSOImmVal = ARM_AM::getSOImmVal(ThisImmVal);
+ assert(ThisSOImmVal != -1 && "Bit extraction didn't work?");
+ MI.getOperand(i+1).ChangeToImmediate(ThisSOImmVal);
+ } else if (Opcode == ARM::tADDrSPi) {
+ Offset += MI.getOperand(i+1).getImm();
+
+ // Can't use tADDrSPi if it's based off the frame pointer.
+ unsigned NumBits = 0;
+ unsigned Scale = 1;
+ if (FrameReg != ARM::SP) {
+ Opcode = ARM::tADDi3;
+ MI.setInstrDescriptor(TII.get(ARM::tADDi3));
+ NumBits = 3;
+ } else {
+ NumBits = 8;
+ Scale = 4;
+ assert((Offset & 3) == 0 &&
+ "Thumb add/sub sp, #imm immediate must be multiple of 4!");
+ }
+
+ if (Offset == 0) {
+ // Turn it into a move.
+ MI.setInstrDescriptor(TII.get(ARM::tMOVr));
+ MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ MI.RemoveOperand(i+1);
+ return;
+ }
+
+ // Common case: small offset, fits into instruction.
+ unsigned Mask = (1 << NumBits) - 1;
+ if (((Offset / Scale) & ~Mask) == 0) {
+ // Replace the FrameIndex with sp / fp
+ MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ MI.getOperand(i+1).ChangeToImmediate(Offset / Scale);
+ return;
+ }
+
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned Bytes = (Offset > 0) ? Offset : -Offset;
+ unsigned NumMIs = calcNumMI(Opcode, 0, Bytes, NumBits, Scale);
+ // MI would expand into a large number of instructions. Don't try to
+ // simplify the immediate.
+ if (NumMIs > 2) {
+ emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII);
+ MBB.erase(II);
+ return;
+ }
+
+ if (Offset > 0) {
+ // Translate r0 = add sp, imm to
+ // r0 = add sp, 255*4
+ // r0 = add r0, (imm - 255*4)
+ MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ MI.getOperand(i+1).ChangeToImmediate(Mask);
+ Offset = (Offset - Mask * Scale);
+ MachineBasicBlock::iterator NII = next(II);
+ emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII);
+ } else {
+ // Translate r0 = add sp, -imm to
+ // r0 = -imm (this is then translated into a series of instructons)
+ // r0 = add r0, sp
+ emitThumbConstant(MBB, II, DestReg, Offset, TII);
+ MI.setInstrDescriptor(TII.get(ARM::tADDhirr));
+ MI.getOperand(i).ChangeToRegister(DestReg, false, false, true);
+ MI.getOperand(i+1).ChangeToRegister(FrameReg, false);
+ }
+ return;
+ } else {
+ unsigned ImmIdx = 0;
+ int InstrOffs = 0;
+ unsigned NumBits = 0;
+ unsigned Scale = 1;
+ switch (AddrMode) {
+ case ARMII::AddrMode2: {
+ ImmIdx = i+2;
+ InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs *= -1;
+ NumBits = 12;
+ break;
+ }
+ case ARMII::AddrMode3: {
+ ImmIdx = i+2;
+ InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs *= -1;
+ NumBits = 8;
+ break;
+ }
+ case ARMII::AddrMode5: {
+ ImmIdx = i+1;
+ InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs *= -1;
+ NumBits = 8;
+ Scale = 4;
+ break;
+ }
+ case ARMII::AddrModeTs: {
+ ImmIdx = i+1;
+ InstrOffs = MI.getOperand(ImmIdx).getImm();
+ NumBits = (FrameReg == ARM::SP) ? 8 : 5;
+ Scale = 4;
+ break;
+ }
+ default:
+ assert(0 && "Unsupported addressing mode!");
+ abort();
+ break;
+ }
+
+ Offset += InstrOffs * Scale;
+ assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
+ if (Offset < 0 && !isThumb) {
+ Offset = -Offset;
+ isSub = true;
+ }
+
+ // Common case: small offset, fits into instruction.
+ MachineOperand &ImmOp = MI.getOperand(ImmIdx);
+ int ImmedOffset = Offset / Scale;
+ unsigned Mask = (1 << NumBits) - 1;
+ if ((unsigned)Offset <= Mask * Scale) {
+ // Replace the FrameIndex with sp
+ MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ if (isSub)
+ ImmedOffset |= 1 << NumBits;
+ ImmOp.ChangeToImmediate(ImmedOffset);
+ return;
+ }
+
+ bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == ARM::tSpill;
+ if (AddrMode == ARMII::AddrModeTs) {
+ // Thumb tLDRspi, tSTRspi. These will change to instructions that use
+ // a different base register.
+ NumBits = 5;
+ Mask = (1 << NumBits) - 1;
+ }
+ // If this is a thumb spill / restore, we will be using a constpool load to
+ // materialize the offset.
+ if (AddrMode == ARMII::AddrModeTs && isThumSpillRestore)
+ ImmOp.ChangeToImmediate(0);
+ else {
+ // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
+ ImmedOffset = ImmedOffset & Mask;
+ if (isSub)
+ ImmedOffset |= 1 << NumBits;
+ ImmOp.ChangeToImmediate(ImmedOffset);
+ Offset &= ~(Mask*Scale);
+ }
+ }
+
+ // If we get here, the immediate doesn't fit into the instruction. We folded
+ // as much as possible above, handle the rest, providing a register that is
+ // SP+LargeImm.
+ assert(Offset && "This code isn't needed if offset already handled!");
+
+ if (isThumb) {
+ if (TII.isLoad(Opcode)) {
+ // Use the destination register to materialize sp + offset.
+ unsigned TmpReg = MI.getOperand(0).getReg();
+ bool UseRR = false;
+ if (Opcode == ARM::tRestore) {
+ if (FrameReg == ARM::SP)
+ emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg,Offset,false,TII);
+ else {
+ emitLoadConstPool(MBB, II, TmpReg, Offset, ARMCC::AL, 0, TII, true);
+ UseRR = true;
+ }
+ } else
+ emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII);
+ MI.setInstrDescriptor(TII.get(ARM::tLDR));
+ MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
+ if (UseRR)
+ MI.addRegOperand(FrameReg, false); // Use [reg, reg] addrmode.
+ else
+ MI.addRegOperand(0, false); // tLDR has an extra register operand.
+ } else if (TII.isStore(Opcode)) {
+ // FIXME! This is horrific!!! We need register scavenging.
+ // Our temporary workaround has marked r3 unavailable. Of course, r3 is
+ // also a ABI register so it's possible that is is the register that is
+ // being storing here. If that's the case, we do the following:
+ // r12 = r2
+ // Use r2 to materialize sp + offset
+ // str r3, r2
+ // r2 = r12
+ unsigned ValReg = MI.getOperand(0).getReg();
+ unsigned TmpReg = ARM::R3;
+ bool UseRR = false;
+ if (ValReg == ARM::R3) {
+ BuildMI(MBB, II, TII.get(ARM::tMOVr), ARM::R12)
+ .addReg(ARM::R2, false, false, true);
+ TmpReg = ARM::R2;
+ }
+ if (TmpReg == ARM::R3 && AFI->isR3LiveIn())
+ BuildMI(MBB, II, TII.get(ARM::tMOVr), ARM::R12)
+ .addReg(ARM::R3, false, false, true);
+ if (Opcode == ARM::tSpill) {
+ if (FrameReg == ARM::SP)
+ emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg,Offset,false,TII);
+ else {
+ emitLoadConstPool(MBB, II, TmpReg, Offset, ARMCC::AL, 0, TII, true);
+ UseRR = true;
+ }
+ } else
+ emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII);
+ MI.setInstrDescriptor(TII.get(ARM::tSTR));
+ MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
+ if (UseRR)
+ MI.addRegOperand(FrameReg, false); // Use [reg, reg] addrmode.
+ else
+ MI.addRegOperand(0, false); // tSTR has an extra register operand.
+
+ MachineBasicBlock::iterator NII = next(II);
+ if (ValReg == ARM::R3)
+ BuildMI(MBB, NII, TII.get(ARM::tMOVr), ARM::R2)
+ .addReg(ARM::R12, false, false, true);
+ if (TmpReg == ARM::R3 && AFI->isR3LiveIn())
+ BuildMI(MBB, NII, TII.get(ARM::tMOVr), ARM::R3)
+ .addReg(ARM::R12, false, false, true);
+ } else
+ assert(false && "Unexpected opcode!");
+ } else {
+ // Insert a set of r12 with the full address: r12 = sp + offset
+ // If the offset we have is too large to fit into the instruction, we need
+ // to form it with a series of ADDri's. Do this by taking 8-bit chunks
+ // out of 'Offset'.
+ unsigned ScratchReg = findScratchRegister(RS, &ARM::GPRRegClass, AFI);
+ if (ScratchReg == 0)
+ // No register is "free". Scavenge a register.
+ ScratchReg = RS->scavengeRegister(&ARM::GPRRegClass, II, SPAdj);
+ int PIdx = MI.findFirstPredOperandIdx();
+ ARMCC::CondCodes Pred = (PIdx == -1)
+ ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImmedValue();
+ unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg();
+ emitARMRegPlusImmediate(MBB, II, ScratchReg, FrameReg,
+ isSub ? -Offset : Offset, Pred, PredReg, TII);
+ MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true);
+ }
+}
+
+static unsigned estimateStackSize(MachineFunction &MF, MachineFrameInfo *MFI) {
+ const MachineFrameInfo *FFI = MF.getFrameInfo();
+ int Offset = 0;
+ for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
+ int FixedOff = -FFI->getObjectOffset(i);
+ if (FixedOff > Offset) Offset = FixedOff;
+ }
+ for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
+ Offset += FFI->getObjectSize(i);
+ unsigned Align = FFI->getObjectAlignment(i);
+ // Adjust to alignment boundary
+ Offset = (Offset+Align-1)/Align*Align;
+ }
+ return (unsigned)Offset;
+}
+
+void
+ARMRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ // This tells PEI to spill the FP as if it is any other callee-save register
+ // to take advantage the eliminateFrameIndex machinery. This also ensures it
+ // is spilled in the order specified by getCalleeSavedRegs() to make it easier
+ // to combine multiple loads / stores.
+ bool CanEliminateFrame = true;
+ bool CS1Spilled = false;
+ bool LRSpilled = false;
+ unsigned NumGPRSpills = 0;
+ SmallVector<unsigned, 4> UnspilledCS1GPRs;
+ SmallVector<unsigned, 4> UnspilledCS2GPRs;
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ // Don't spill FP if the frame can be eliminated. This is determined
+ // by scanning the callee-save registers to see if any is used.
+ const unsigned *CSRegs = getCalleeSavedRegs();
+ const TargetRegisterClass* const *CSRegClasses = getCalleeSavedRegClasses();
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ unsigned Reg = CSRegs[i];
+ bool Spilled = false;
+ if (MF.isPhysRegUsed(Reg)) {
+ AFI->setCSRegisterIsSpilled(Reg);
+ Spilled = true;
+ CanEliminateFrame = false;
+ } else {
+ // Check alias registers too.
+ for (const unsigned *Aliases = getAliasSet(Reg); *Aliases; ++Aliases) {
+ if (MF.isPhysRegUsed(*Aliases)) {
+ Spilled = true;
+ CanEliminateFrame = false;
+ }
+ }
+ }
+
+ if (CSRegClasses[i] == &ARM::GPRRegClass) {
+ if (Spilled) {
+ NumGPRSpills++;
+
+ if (!STI.isTargetDarwin()) {
+ if (Reg == ARM::LR)
+ LRSpilled = true;
+ CS1Spilled = true;
+ continue;
+ }
+
+ // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
+ switch (Reg) {
+ case ARM::LR:
+ LRSpilled = true;
+ // Fallthrough
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ CS1Spilled = true;
+ break;
+ default:
+ break;
+ }
+ } else {
+ if (!STI.isTargetDarwin()) {
+ UnspilledCS1GPRs.push_back(Reg);
+ continue;
+ }
+
+ switch (Reg) {
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ case ARM::LR:
+ UnspilledCS1GPRs.push_back(Reg);
+ break;
+ default:
+ UnspilledCS2GPRs.push_back(Reg);
+ break;
+ }
+ }
+ }
+ }
+
+ bool ForceLRSpill = false;
+ if (!LRSpilled && AFI->isThumbFunction()) {
+ unsigned FnSize = ARM::GetFunctionSize(MF);
+ // Force LR to be spilled if the Thumb function size is > 2048. This enables
+ // use of BL to implement far jump. If it turns out that it's not needed
+ // then the branch fix up path will undo it.
+ if (FnSize >= (1 << 11)) {
+ CanEliminateFrame = false;
+ ForceLRSpill = true;
+ }
+ }
+
+ bool ExtraCSSpill = false;
+ if (!CanEliminateFrame || hasFP(MF)) {
+ AFI->setHasStackFrame(true);
+
+ // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
+ // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
+ if (!LRSpilled && CS1Spilled) {
+ MF.setPhysRegUsed(ARM::LR);
+ AFI->setCSRegisterIsSpilled(ARM::LR);
+ NumGPRSpills++;
+ UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(),
+ UnspilledCS1GPRs.end(), (unsigned)ARM::LR));
+ ForceLRSpill = false;
+ ExtraCSSpill = true;
+ }
+
+ // Darwin ABI requires FP to point to the stack slot that contains the
+ // previous FP.
+ if (STI.isTargetDarwin() || hasFP(MF)) {
+ MF.setPhysRegUsed(FramePtr);
+ NumGPRSpills++;
+ }
+
+ // If stack and double are 8-byte aligned and we are spilling an odd number
+ // of GPRs. Spill one extra callee save GPR so we won't have to pad between
+ // the integer and double callee save areas.
+ unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+ if (TargetAlign == 8 && (NumGPRSpills & 1)) {
+ if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
+ for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
+ unsigned Reg = UnspilledCS1GPRs[i];
+ // Don't spiil high register if the function is thumb
+ if (!AFI->isThumbFunction() || isLowRegister(Reg) || Reg == ARM::LR) {
+ MF.setPhysRegUsed(Reg);
+ AFI->setCSRegisterIsSpilled(Reg);
+ if (!isReservedReg(MF, Reg))
+ ExtraCSSpill = true;
+ break;
+ }
+ }
+ } else if (!UnspilledCS2GPRs.empty() &&
+ !AFI->isThumbFunction()) {
+ unsigned Reg = UnspilledCS2GPRs.front();
+ MF.setPhysRegUsed(Reg);
+ AFI->setCSRegisterIsSpilled(Reg);
+ if (!isReservedReg(MF, Reg))
+ ExtraCSSpill = true;
+ }
+ }
+
+ // Estimate if we might need to scavenge a register at some point in order
+ // to materialize a stack offset. If so, either spill one additiona
+ // callee-saved register or reserve a special spill slot to facilitate
+ // register scavenging.
+ if (RS && !ExtraCSSpill && !AFI->isThumbFunction()) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned Size = estimateStackSize(MF, MFI);
+ unsigned Limit = (1 << 12) - 1;
+ for (MachineFunction::iterator BB = MF.begin(),E = MF.end();BB != E; ++BB)
+ for (MachineBasicBlock::iterator I= BB->begin(); I != BB->end(); ++I) {
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (I->getOperand(i).isFrameIndex()) {
+ unsigned Opcode = I->getOpcode();
+ const TargetInstrDescriptor &Desc = TII.get(Opcode);
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ if (AddrMode == ARMII::AddrMode3) {
+ Limit = (1 << 8) - 1;
+ goto DoneEstimating;
+ } else if (AddrMode == ARMII::AddrMode5) {
+ unsigned ThisLimit = ((1 << 8) - 1) * 4;
+ if (ThisLimit < Limit)
+ Limit = ThisLimit;
+ }
+ }
+ }
+ DoneEstimating:
+ if (Size >= Limit) {
+ // If any non-reserved CS register isn't spilled, just spill one or two
+ // extra. That should take care of it!
+ unsigned NumExtras = TargetAlign / 4;
+ SmallVector<unsigned, 2> Extras;
+ while (NumExtras && !UnspilledCS1GPRs.empty()) {
+ unsigned Reg = UnspilledCS1GPRs.back();
+ UnspilledCS1GPRs.pop_back();
+ if (!isReservedReg(MF, Reg)) {
+ Extras.push_back(Reg);
+ NumExtras--;
+ }
+ }
+ while (NumExtras && !UnspilledCS2GPRs.empty()) {
+ unsigned Reg = UnspilledCS2GPRs.back();
+ UnspilledCS2GPRs.pop_back();
+ if (!isReservedReg(MF, Reg)) {
+ Extras.push_back(Reg);
+ NumExtras--;
+ }
+ }
+ if (Extras.size() && NumExtras == 0) {
+ for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
+ MF.setPhysRegUsed(Extras[i]);
+ AFI->setCSRegisterIsSpilled(Extras[i]);
+ }
+ } else {
+ // Reserve a slot closest to SP or frame pointer.
+ const TargetRegisterClass *RC = &ARM::GPRRegClass;
+ RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment()));
+ }
+ }
+ }
+ }
+
+ if (ForceLRSpill) {
+ MF.setPhysRegUsed(ARM::LR);
+ AFI->setCSRegisterIsSpilled(ARM::LR);
+ AFI->setLRIsSpilledForFarJump(true);
+ }
+}
+
+/// Move iterator pass the next bunch of callee save load / store ops for
+/// the particular spill area (1: integer area 1, 2: integer area 2,
+/// 3: fp area, 0: don't care).
+static void movePastCSLoadStoreOps(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ int Opc, unsigned Area,
+ const ARMSubtarget &STI) {
+ while (MBBI != MBB.end() &&
+ MBBI->getOpcode() == Opc && MBBI->getOperand(1).isFrameIndex()) {
+ if (Area != 0) {
+ bool Done = false;
+ unsigned Category = 0;
+ switch (MBBI->getOperand(0).getReg()) {
+ case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7:
+ case ARM::LR:
+ Category = 1;
+ break;
+ case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11:
+ Category = STI.isTargetDarwin() ? 2 : 1;
+ break;
+ case ARM::D8: case ARM::D9: case ARM::D10: case ARM::D11:
+ case ARM::D12: case ARM::D13: case ARM::D14: case ARM::D15:
+ Category = 3;
+ break;
+ default:
+ Done = true;
+ break;
+ }
+ if (Done || Category != Area)
+ break;
+ }
+
+ ++MBBI;
+ }
+}
+
+void ARMRegisterInfo::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ bool isThumb = AFI->isThumbFunction();
+ unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ unsigned NumBytes = MFI->getStackSize();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+ if (isThumb) {
+ // Check if R3 is live in. It might have to be used as a scratch register.
+ for (MachineFunction::livein_iterator I=MF.livein_begin(),E=MF.livein_end();
+ I != E; ++I) {
+ if ((*I).first == ARM::R3) {
+ AFI->setR3IsLiveIn(true);
+ break;
+ }
+ }
+
+ // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4.
+ NumBytes = (NumBytes + 3) & ~3;
+ MFI->setStackSize(NumBytes);
+ }
+
+ // Determine the sizes of each callee-save spill areas and record which frame
+ // belongs to which callee-save spill areas.
+ unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
+ int FramePtrSpillFI = 0;
+
+ if (VARegSaveSize)
+ emitSPUpdate(MBB, MBBI, -VARegSaveSize, ARMCC::AL, 0, isThumb, TII);
+
+ if (!AFI->hasStackFrame()) {
+ if (NumBytes != 0)
+ emitSPUpdate(MBB, MBBI, -NumBytes, ARMCC::AL, 0, isThumb, TII);
+ return;
+ }
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ int FI = CSI[i].getFrameIdx();
+ switch (Reg) {
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ case ARM::LR:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
+ AFI->addGPRCalleeSavedArea1Frame(FI);
+ GPRCS1Size += 4;
+ break;
+ case ARM::R8:
+ case ARM::R9:
+ case ARM::R10:
+ case ARM::R11:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
+ if (STI.isTargetDarwin()) {
+ AFI->addGPRCalleeSavedArea2Frame(FI);
+ GPRCS2Size += 4;
+ } else {
+ AFI->addGPRCalleeSavedArea1Frame(FI);
+ GPRCS1Size += 4;
+ }
+ break;
+ default:
+ AFI->addDPRCalleeSavedAreaFrame(FI);
+ DPRCSSize += 8;
+ }
+ }
+
+ if (!isThumb) {
+ // Build the new SUBri to adjust SP for integer callee-save spill area 1.
+ emitSPUpdate(MBB, MBBI, -GPRCS1Size, ARMCC::AL, 0, isThumb, TII);
+ movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 1, STI);
+ } else if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH)
+ ++MBBI;
+
+ // Darwin ABI requires FP to point to the stack slot that contains the
+ // previous FP.
+ if (STI.isTargetDarwin() || hasFP(MF)) {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, TII.get(isThumb ? ARM::tADDrSPi : ARM::ADDri),FramePtr)
+ .addFrameIndex(FramePtrSpillFI).addImm(0);
+ if (!isThumb) MIB.addImm(ARMCC::AL).addReg(0).addReg(0);
+ }
+
+ if (!isThumb) {
+ // Build the new SUBri to adjust SP for integer callee-save spill area 2.
+ emitSPUpdate(MBB, MBBI, -GPRCS2Size, ARMCC::AL, 0, false, TII);
+
+ // Build the new SUBri to adjust SP for FP callee-save spill area.
+ movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 2, STI);
+ emitSPUpdate(MBB, MBBI, -DPRCSSize, ARMCC::AL, 0, false, TII);
+ }
+
+ // Determine starting offsets of spill areas.
+ unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
+ unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
+ unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
+ AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes);
+ AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
+ AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
+ AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
+
+ NumBytes = DPRCSOffset;
+ if (NumBytes) {
+ // Insert it after all the callee-save spills.
+ if (!isThumb)
+ movePastCSLoadStoreOps(MBB, MBBI, ARM::FSTD, 3, STI);
+ emitSPUpdate(MBB, MBBI, -NumBytes, ARMCC::AL, 0, isThumb, TII);
+ }
+
+ if(STI.isTargetELF() && hasFP(MF)) {
+ MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
+ AFI->getFramePtrSpillOffset());
+ }
+
+ AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
+ AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+ AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+}
+
+static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ if (Reg == CSRegs[i])
+ return true;
+ return false;
+}
+
+static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
+ return ((MI->getOpcode() == ARM::FLDD ||
+ MI->getOpcode() == ARM::LDR ||
+ MI->getOpcode() == ARM::tRestore) &&
+ MI->getOperand(1).isFrameIndex() &&
+ isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs));
+}
+
+void ARMRegisterInfo::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ assert((MBBI->getOpcode() == ARM::BX_RET ||
+ MBBI->getOpcode() == ARM::tBX_RET ||
+ MBBI->getOpcode() == ARM::tPOP_RET) &&
+ "Can only insert epilog into returning blocks");
+
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ bool isThumb = AFI->isThumbFunction();
+ unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ int NumBytes = (int)MFI->getStackSize();
+ if (!AFI->hasStackFrame()) {
+ if (NumBytes != 0)
+ emitSPUpdate(MBB, MBBI, NumBytes, ARMCC::AL, 0, isThumb, TII);
+ } else {
+ // Unwind MBBI to point to first LDR / FLDD.
+ const unsigned *CSRegs = getCalleeSavedRegs();
+ if (MBBI != MBB.begin()) {
+ do
+ --MBBI;
+ while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs));
+ if (!isCSRestore(MBBI, CSRegs))
+ ++MBBI;
+ }
+
+ // Move SP to start of FP callee save spill area.
+ NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
+ AFI->getGPRCalleeSavedArea2Size() +
+ AFI->getDPRCalleeSavedAreaSize());
+ if (isThumb) {
+ if (hasFP(MF)) {
+ NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+ // Reset SP based on frame pointer only if the stack frame extends beyond
+ // frame pointer stack slot or target is ELF and the function has FP.
+ if (NumBytes)
+ emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, FramePtr, -NumBytes, TII);
+ else
+ BuildMI(MBB, MBBI, TII.get(ARM::tMOVr), ARM::SP).addReg(FramePtr);
+ } else {
+ if (MBBI->getOpcode() == ARM::tBX_RET &&
+ &MBB.front() != MBBI &&
+ prior(MBBI)->getOpcode() == ARM::tPOP) {
+ MachineBasicBlock::iterator PMBBI = prior(MBBI);
+ emitSPUpdate(MBB, PMBBI, NumBytes, ARMCC::AL, 0, isThumb, TII);
+ } else
+ emitSPUpdate(MBB, MBBI, NumBytes, ARMCC::AL, 0, isThumb, TII);
+ }
+ } else {
+ // Darwin ABI requires FP to point to the stack slot that contains the
+ // previous FP.
+ if ((STI.isTargetDarwin() && NumBytes) || hasFP(MF)) {
+ NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+ // Reset SP based on frame pointer only if the stack frame extends beyond
+ // frame pointer stack slot or target is ELF and the function has FP.
+ if (AFI->getGPRCalleeSavedArea2Size() ||
+ AFI->getDPRCalleeSavedAreaSize() ||
+ AFI->getDPRCalleeSavedAreaOffset()||
+ hasFP(MF))
+ if (NumBytes)
+ BuildMI(MBB, MBBI, TII.get(ARM::SUBri), ARM::SP).addReg(FramePtr)
+ .addImm(NumBytes)
+ .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ else
+ BuildMI(MBB, MBBI, TII.get(ARM::MOVr), ARM::SP).addReg(FramePtr)
+ .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ } else if (NumBytes) {
+ emitSPUpdate(MBB, MBBI, NumBytes, ARMCC::AL, 0, false, TII);
+ }
+
+ // Move SP to start of integer callee save spill area 2.
+ movePastCSLoadStoreOps(MBB, MBBI, ARM::FLDD, 3, STI);
+ emitSPUpdate(MBB, MBBI, AFI->getDPRCalleeSavedAreaSize(), ARMCC::AL, 0,
+ false, TII);
+
+ // Move SP to start of integer callee save spill area 1.
+ movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 2, STI);
+ emitSPUpdate(MBB, MBBI, AFI->getGPRCalleeSavedArea2Size(), ARMCC::AL, 0,
+ false, TII);
+
+ // Move SP to SP upon entry to the function.
+ movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 1, STI);
+ emitSPUpdate(MBB, MBBI, AFI->getGPRCalleeSavedArea1Size(), ARMCC::AL, 0,
+ false, TII);
+ }
+ }
+
+ if (VARegSaveSize) {
+ if (isThumb)
+ // Epilogue for vararg functions: pop LR to R3 and branch off it.
+ // FIXME: Verify this is still ok when R3 is no longer being reserved.
+ BuildMI(MBB, MBBI, TII.get(ARM::tPOP)).addReg(ARM::R3);
+
+ emitSPUpdate(MBB, MBBI, VARegSaveSize, ARMCC::AL, 0, isThumb, TII);
+
+ if (isThumb) {
+ BuildMI(MBB, MBBI, TII.get(ARM::tBX_RET_vararg)).addReg(ARM::R3);
+ MBB.erase(MBBI);
+ }
+ }
+}
+
+unsigned ARMRegisterInfo::getRARegister() const {
+ return ARM::LR;
+}
+
+unsigned ARMRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+ if (STI.isTargetDarwin() || hasFP(MF))
+ return (STI.useThumbBacktraces() || STI.isThumb()) ? ARM::R7 : ARM::R11;
+ else
+ return ARM::SP;
+}
+
+unsigned ARMRegisterInfo::getEHExceptionRegister() const {
+ assert(0 && "What is the exception register");
+ return 0;
+}
+
+unsigned ARMRegisterInfo::getEHHandlerRegister() const {
+ assert(0 && "What is the exception handler register");
+ return 0;
+}
+
+#include "ARMGenRegisterInfo.inc"
+
diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h
new file mode 100644
index 0000000..3db1d89
--- /dev/null
+++ b/lib/Target/ARM/ARMRegisterInfo.h
@@ -0,0 +1,108 @@
+//===- ARMRegisterInfo.h - ARM Register Information Impl --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the "Instituto Nokia de Tecnologia" and
+// is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMREGISTERINFO_H
+#define ARMREGISTERINFO_H
+
+#include "llvm/Target/MRegisterInfo.h"
+#include "ARMGenRegisterInfo.h.inc"
+
+namespace llvm {
+ class ARMSubtarget;
+ class TargetInstrInfo;
+ class Type;
+
+struct ARMRegisterInfo : public ARMGenRegisterInfo {
+ const TargetInstrInfo &TII;
+ const ARMSubtarget &STI;
+private:
+ /// FramePtr - ARM physical register used as frame ptr.
+ unsigned FramePtr;
+
+public:
+ ARMRegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI);
+
+ /// getRegisterNumbering - Given the enum value for some register, e.g.
+ /// ARM::LR, return the number that it corresponds to (e.g. 14).
+ static unsigned getRegisterNumbering(unsigned RegEnum);
+
+ /// Code Generation virtual methods...
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const;
+
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const;
+
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ void copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *RC) const;
+
+ void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ unsigned DestReg, const MachineInstr *Orig) const;
+
+ MachineInstr* foldMemoryOperand(MachineInstr* MI, unsigned OpNum,
+ int FrameIndex) const;
+
+ const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ const TargetRegisterClass* const* getCalleeSavedRegClasses(
+ const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
+
+ bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ bool hasReservedCallFrame(MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(MachineFunction &MF) const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
new file mode 100644
index 0000000..3d2646e
--- /dev/null
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -0,0 +1,196 @@
+//===- ARMRegisterInfo.td - ARM Register defs -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the "Instituto Nokia de Tecnologia" and
+// is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the ARM register file
+//===----------------------------------------------------------------------===//
+
+// Registers are identified with 4-bit ID numbers.
+class ARMReg<bits<4> num, string n, list<Register> subregs = []> : Register<n> {
+ field bits<4> Num;
+ let Namespace = "ARM";
+ let SubRegs = subregs;
+}
+
+class ARMFReg<bits<5> num, string n> : Register<n> {
+ field bits<5> Num;
+ let Namespace = "ARM";
+}
+
+// Integer registers
+def R0 : ARMReg< 0, "r0">, DwarfRegNum<0>;
+def R1 : ARMReg< 1, "r1">, DwarfRegNum<1>;
+def R2 : ARMReg< 2, "r2">, DwarfRegNum<2>;
+def R3 : ARMReg< 3, "r3">, DwarfRegNum<3>;
+def R4 : ARMReg< 4, "r4">, DwarfRegNum<4>;
+def R5 : ARMReg< 5, "r5">, DwarfRegNum<5>;
+def R6 : ARMReg< 6, "r6">, DwarfRegNum<6>;
+def R7 : ARMReg< 7, "r7">, DwarfRegNum<7>;
+def R8 : ARMReg< 8, "r8">, DwarfRegNum<8>;
+def R9 : ARMReg< 9, "r9">, DwarfRegNum<9>;
+def R10 : ARMReg<10, "r10">, DwarfRegNum<10>;
+def R11 : ARMReg<11, "r11">, DwarfRegNum<11>;
+def R12 : ARMReg<12, "r12">, DwarfRegNum<12>;
+def SP : ARMReg<13, "sp">, DwarfRegNum<13>;
+def LR : ARMReg<14, "lr">, DwarfRegNum<14>;
+def PC : ARMReg<15, "pc">, DwarfRegNum<15>;
+
+// Float registers
+def S0 : ARMFReg< 0, "s0">; def S1 : ARMFReg< 1, "s1">;
+def S2 : ARMFReg< 2, "s2">; def S3 : ARMFReg< 3, "s3">;
+def S4 : ARMFReg< 4, "s4">; def S5 : ARMFReg< 5, "s5">;
+def S6 : ARMFReg< 6, "s6">; def S7 : ARMFReg< 7, "s7">;
+def S8 : ARMFReg< 8, "s8">; def S9 : ARMFReg< 9, "s9">;
+def S10 : ARMFReg<10, "s10">; def S11 : ARMFReg<11, "s11">;
+def S12 : ARMFReg<12, "s12">; def S13 : ARMFReg<13, "s13">;
+def S14 : ARMFReg<14, "s14">; def S15 : ARMFReg<15, "s15">;
+def S16 : ARMFReg<16, "s16">; def S17 : ARMFReg<17, "s17">;
+def S18 : ARMFReg<18, "s18">; def S19 : ARMFReg<19, "s19">;
+def S20 : ARMFReg<20, "s20">; def S21 : ARMFReg<21, "s21">;
+def S22 : ARMFReg<22, "s22">; def S23 : ARMFReg<23, "s23">;
+def S24 : ARMFReg<24, "s24">; def S25 : ARMFReg<25, "s25">;
+def S26 : ARMFReg<26, "s26">; def S27 : ARMFReg<27, "s27">;
+def S28 : ARMFReg<28, "s28">; def S29 : ARMFReg<29, "s29">;
+def S30 : ARMFReg<30, "s30">; def S31 : ARMFReg<31, "s31">;
+
+// Aliases of the F* registers used to hold 64-bit fp values (doubles)
+def D0 : ARMReg< 0, "d0", [S0, S1]>;
+def D1 : ARMReg< 1, "d1", [S2, S3]>;
+def D2 : ARMReg< 2, "d2", [S4, S5]>;
+def D3 : ARMReg< 3, "d3", [S6, S7]>;
+def D4 : ARMReg< 4, "d4", [S8, S9]>;
+def D5 : ARMReg< 5, "d5", [S10, S11]>;
+def D6 : ARMReg< 6, "d6", [S12, S13]>;
+def D7 : ARMReg< 7, "d7", [S14, S15]>;
+def D8 : ARMReg< 8, "d8", [S16, S17]>;
+def D9 : ARMReg< 9, "d9", [S18, S19]>;
+def D10 : ARMReg<10, "d10", [S20, S21]>;
+def D11 : ARMReg<11, "d11", [S22, S23]>;
+def D12 : ARMReg<12, "d12", [S24, S25]>;
+def D13 : ARMReg<13, "d13", [S26, S27]>;
+def D14 : ARMReg<14, "d14", [S28, S29]>;
+def D15 : ARMReg<15, "d15", [S30, S31]>;
+
+// Current Program Status Register.
+def CPSR : ARMReg<0, "cpsr">;
+
+// Register classes.
+//
+// pc == Program Counter
+// lr == Link Register
+// sp == Stack Pointer
+// r12 == ip (scratch)
+// r7 == Frame Pointer (thumb-style backtraces)
+// r11 == Frame Pointer (arm-style backtraces)
+// r10 == Stack Limit
+//
+def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
+ R7, R8, R9, R10, R12, R11,
+ LR, SP, PC]> {
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ // FIXME: We are reserving r12 in case the PEI needs to use it to
+ // generate large stack offset. Make it available once we have register
+ // scavenging. Similarly r3 is reserved in Thumb mode for now.
+ let MethodBodies = [{
+ // FP is R11, R9 is available.
+ static const unsigned ARM_GPR_AO_1[] = {
+ ARM::R3, ARM::R2, ARM::R1, ARM::R0,
+ ARM::R12,ARM::LR,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+ ARM::R8, ARM::R9, ARM::R10,
+ ARM::R11 };
+ // FP is R11, R9 is not available.
+ static const unsigned ARM_GPR_AO_2[] = {
+ ARM::R3, ARM::R2, ARM::R1, ARM::R0,
+ ARM::R12,ARM::LR,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+ ARM::R8, ARM::R10,
+ ARM::R11 };
+ // FP is R7, R9 is available.
+ static const unsigned ARM_GPR_AO_3[] = {
+ ARM::R3, ARM::R2, ARM::R1, ARM::R0,
+ ARM::R12,ARM::LR,
+ ARM::R4, ARM::R5, ARM::R6,
+ ARM::R8, ARM::R9, ARM::R10,ARM::R11,
+ ARM::R7 };
+ // FP is R7, R9 is not available.
+ static const unsigned ARM_GPR_AO_4[] = {
+ ARM::R3, ARM::R2, ARM::R1, ARM::R0,
+ ARM::R12,ARM::LR,
+ ARM::R4, ARM::R5, ARM::R6,
+ ARM::R8, ARM::R10,ARM::R11,
+ ARM::R7 };
+
+ // FP is R7, only low registers available.
+ static const unsigned THUMB_GPR_AO[] = {
+ ARM::R2, ARM::R1, ARM::R0,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7 };
+
+ GPRClass::iterator
+ GPRClass::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+ if (Subtarget.isThumb())
+ return THUMB_GPR_AO;
+ if (Subtarget.useThumbBacktraces()) {
+ if (Subtarget.isR9Reserved())
+ return ARM_GPR_AO_4;
+ else
+ return ARM_GPR_AO_3;
+ } else {
+ if (Subtarget.isR9Reserved())
+ return ARM_GPR_AO_2;
+ else
+ return ARM_GPR_AO_1;
+ }
+ }
+
+ GPRClass::iterator
+ GPRClass::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const MRegisterInfo *RI = TM.getRegisterInfo();
+ const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+ GPRClass::iterator I;
+ if (Subtarget.isThumb())
+ I = THUMB_GPR_AO + (sizeof(THUMB_GPR_AO)/sizeof(unsigned));
+ else if (Subtarget.useThumbBacktraces()) {
+ if (Subtarget.isR9Reserved()) {
+ I = ARM_GPR_AO_4 + (sizeof(ARM_GPR_AO_4)/sizeof(unsigned));
+ } else {
+ I = ARM_GPR_AO_3 + (sizeof(ARM_GPR_AO_3)/sizeof(unsigned));
+ }
+ } else {
+ if (Subtarget.isR9Reserved()) {
+ I = ARM_GPR_AO_2 + (sizeof(ARM_GPR_AO_2)/sizeof(unsigned));
+ } else {
+ I = ARM_GPR_AO_1 + (sizeof(ARM_GPR_AO_1)/sizeof(unsigned));
+ }
+ }
+
+ // Mac OS X requires FP not to be clobbered for backtracing purpose.
+ return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
+ }
+ }];
+}
+
+def SPR : RegisterClass<"ARM", [f32], 32, [S0, S1, S2, S3, S4, S5, S6, S7, S8,
+ S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22,
+ S23, S24, S25, S26, S27, S28, S29, S30, S31]>;
+
+// ARM requires only word alignment for double. It's more performant if it
+// is double-word alignment though.
+def DPR : RegisterClass<"ARM", [f64], 64, [D0, D1, D2, D3, D4, D5, D6, D7, D8,
+ D9, D10, D11, D12, D13, D14, D15]>;
+
+// Condition code registers.
+def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>;
diff --git a/lib/Target/ARM/ARMRelocations.h b/lib/Target/ARM/ARMRelocations.h
new file mode 100644
index 0000000..beea52b
--- /dev/null
+++ b/lib/Target/ARM/ARMRelocations.h
@@ -0,0 +1,28 @@
+//===- ARMRelocations.h - ARM Code Relocations ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the Raul Herbster and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ARM target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMRELOCATIONS_H
+#define ARMRELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+ namespace ARM {
+ enum RelocationType {
+
+ };
+ }
+}
+
+#endif
+
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
new file mode 100644
index 0000000..6db36df
--- /dev/null
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -0,0 +1,57 @@
+//===-- ARMSubtarget.cpp - ARM Subtarget Information ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Evan Cheng and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMSubtarget.h"
+#include "ARMGenSubtarget.inc"
+#include "llvm/Module.h"
+using namespace llvm;
+
+ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS, bool thumb)
+ : ARMArchVersion(V4T)
+ , HasVFP2(false)
+ , IsThumb(thumb)
+ , UseThumbBacktraces(false)
+ , IsR9Reserved(false)
+ , stackAlignment(4)
+ , TargetType(isELF) // Default to ELF unless otherwise specified.
+ , TargetABI(ARM_ABI_APCS) {
+
+ // Determine default and user specified characteristics
+ std::string CPU = "generic";
+
+ // Parse features string.
+ ParseSubtargetFeatures(FS, CPU);
+
+ // Set the boolean corresponding to the current target triple, or the default
+ // if one cannot be determined, to true.
+ const std::string& TT = M.getTargetTriple();
+ if (TT.length() > 5) {
+ if (TT.find("-darwin") != std::string::npos)
+ TargetType = isDarwin;
+ } else if (TT.empty()) {
+#if defined(__APPLE__)
+ TargetType = isDarwin;
+#endif
+ }
+
+ if (TT.find("eabi") != std::string::npos)
+ TargetABI = ARM_ABI_AAPCS;
+
+ if (isAAPCS_ABI())
+ stackAlignment = 8;
+
+ if (isTargetDarwin()) {
+ UseThumbBacktraces = true;
+ IsR9Reserved = true;
+ }
+}
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
new file mode 100644
index 0000000..62367ca
--- /dev/null
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -0,0 +1,94 @@
+//=====---- ARMSubtarget.h - Define Subtarget for the ARM -----*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Evan Cheng and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the ARM specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMSUBTARGET_H
+#define ARMSUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+#include <string>
+
+namespace llvm {
+class Module;
+
+class ARMSubtarget : public TargetSubtarget {
+protected:
+ enum ARMArchEnum {
+ V4T, V5T, V5TE, V6
+ };
+
+ /// ARMArchVersion - ARM architecture vecrsion: V4T (base), V5T, V5TE,
+ /// and V6.
+ ARMArchEnum ARMArchVersion;
+
+ /// HasVFP2 - True if the processor supports Vector Floating Point (VFP) V2
+ /// instructions.
+ bool HasVFP2;
+
+ /// IsThumb - True if we are in thumb mode, false if in ARM mode.
+ bool IsThumb;
+
+ /// UseThumbBacktraces - True if we use thumb style backtraces.
+ bool UseThumbBacktraces;
+
+ /// IsR9Reserved - True if R9 is a not available as general purpose register.
+ bool IsR9Reserved;
+
+ /// stackAlignment - The minimum alignment known to hold of the stack frame on
+ /// entry to the function and which must be maintained by every function.
+ unsigned stackAlignment;
+
+ public:
+ enum {
+ isELF, isDarwin
+ } TargetType;
+
+ enum {
+ ARM_ABI_APCS,
+ ARM_ABI_AAPCS // ARM EABI
+ } TargetABI;
+
+ /// This constructor initializes the data members to match that
+ /// of the specified module.
+ ///
+ ARMSubtarget(const Module &M, const std::string &FS, bool thumb);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(const std::string &FS, const std::string &CPU);
+
+ bool hasV4TOps() const { return ARMArchVersion >= V4T; }
+ bool hasV5TOps() const { return ARMArchVersion >= V5T; }
+ bool hasV5TEOps() const { return ARMArchVersion >= V5TE; }
+ bool hasV6Ops() const { return ARMArchVersion >= V6; }
+
+ bool hasVFP2() const { return HasVFP2; }
+
+ bool isTargetDarwin() const { return TargetType == isDarwin; }
+ bool isTargetELF() const { return TargetType == isELF; }
+
+ bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; }
+ bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; }
+
+ bool isThumb() const { return IsThumb; }
+
+ bool useThumbBacktraces() const { return UseThumbBacktraces; }
+ bool isR9Reserved() const { return IsR9Reserved; }
+
+ /// getStackAlignment - Returns the minimum alignment known to hold of the
+ /// stack frame on entry to the function and which must be maintained by every
+ /// function for this subtarget.
+ unsigned getStackAlignment() const { return stackAlignment; }
+};
+} // End llvm namespace
+
+#endif // ARMSUBTARGET_H
diff --git a/lib/Target/ARM/ARMTargetAsmInfo.cpp b/lib/Target/ARM/ARMTargetAsmInfo.cpp
new file mode 100644
index 0000000..1dea1c1
--- /dev/null
+++ b/lib/Target/ARM/ARMTargetAsmInfo.cpp
@@ -0,0 +1,276 @@
+
+//===-- ARMTargetAsmInfo.cpp - ARM asm properties ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by James M. Laskey and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the ARMTargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMTargetAsmInfo.h"
+#include "ARMTargetMachine.h"
+#include <cstring>
+#include <cctype>
+using namespace llvm;
+
+static const char* arm_asm_table[] = {"{r0}", "r0",
+ "{r1}", "r1",
+ "{r2}", "r2",
+ "{r3}", "r3",
+ "{r4}", "r4",
+ "{r5}", "r5",
+ "{r6}", "r6",
+ "{r7}", "r7",
+ "{r8}", "r8",
+ "{r9}", "r9",
+ "{r10}", "r10",
+ "{r11}", "r11",
+ "{r12}", "r12",
+ "{r13}", "r13",
+ "{r14}", "r14",
+ "{lr}", "lr",
+ "{sp}", "sp",
+ "{ip}", "ip",
+ "{fp}", "fp",
+ "{sl}", "sl",
+ "{memory}", "memory",
+ "{cc}", "cc",
+ 0,0};
+
+ARMTargetAsmInfo::ARMTargetAsmInfo(const ARMTargetMachine &TM) {
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ AsmTransCBE = arm_asm_table;
+ if (Subtarget->isTargetDarwin()) {
+ GlobalPrefix = "_";
+ PrivateGlobalPrefix = "L";
+ BSSSection = 0; // no BSS section.
+ ZeroFillDirective = "\t.zerofill\t"; // Uses .zerofill
+ SetDirective = "\t.set";
+ WeakRefDirective = "\t.weak_reference\t";
+ HiddenDirective = "\t.private_extern\t";
+ ProtectedDirective = NULL;
+ JumpTableDataSection = ".const";
+ CStringSection = "\t.cstring";
+ FourByteConstantSection = "\t.literal4\n";
+ EightByteConstantSection = "\t.literal8\n";
+ ReadOnlySection = "\t.const\n";
+ HasDotTypeDotSizeDirective = false;
+ if (TM.getRelocationModel() == Reloc::Static) {
+ StaticCtorsSection = ".constructor";
+ StaticDtorsSection = ".destructor";
+ } else {
+ StaticCtorsSection = ".mod_init_func";
+ StaticDtorsSection = ".mod_term_func";
+ }
+
+ // In non-PIC modes, emit a special label before jump tables so that the
+ // linker can perform more accurate dead code stripping.
+ if (TM.getRelocationModel() != Reloc::PIC_) {
+ // Emit a local label that is preserved until the linker runs.
+ JumpTableSpecialLabelPrefix = "l";
+ }
+
+ NeedsSet = true;
+ DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug";
+ DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug";
+ DwarfLineSection = ".section __DWARF,__debug_line,regular,debug";
+ DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug";
+ DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug";
+ DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug";
+ DwarfStrSection = ".section __DWARF,__debug_str,regular,debug";
+ DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug";
+ DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug";
+ DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug";
+ DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug";
+ } else {
+ NeedsSet = false;
+ HasLEB128 = true;
+ AbsoluteDebugSectionOffsets = true;
+ ReadOnlySection = "\t.section\t.rodata\n";
+ PrivateGlobalPrefix = ".L";
+ WeakRefDirective = "\t.weak\t";
+ SetDirective = "\t.set\t";
+ DwarfRequiresFrameSection = false;
+ DwarfAbbrevSection = "\t.section\t.debug_abbrev,\"\",%progbits";
+ DwarfInfoSection = "\t.section\t.debug_info,\"\",%progbits";
+ DwarfLineSection = "\t.section\t.debug_line,\"\",%progbits";
+ DwarfFrameSection = "\t.section\t.debug_frame,\"\",%progbits";
+ DwarfPubNamesSection ="\t.section\t.debug_pubnames,\"\",%progbits";
+ DwarfPubTypesSection ="\t.section\t.debug_pubtypes,\"\",%progbits";
+ DwarfStrSection = "\t.section\t.debug_str,\"\",%progbits";
+ DwarfLocSection = "\t.section\t.debug_loc,\"\",%progbits";
+ DwarfARangesSection = "\t.section\t.debug_aranges,\"\",%progbits";
+ DwarfRangesSection = "\t.section\t.debug_ranges,\"\",%progbits";
+ DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",%progbits";
+
+ if (Subtarget->isAAPCS_ABI()) {
+ StaticCtorsSection = "\t.section .init_array,\"aw\",%init_array";
+ StaticDtorsSection = "\t.section .fini_array,\"aw\",%fini_array";
+ } else {
+ StaticCtorsSection = "\t.section .ctors,\"aw\",%progbits";
+ StaticDtorsSection = "\t.section .dtors,\"aw\",%progbits";
+ }
+ TLSDataSection = "\t.section .tdata,\"awT\",%progbits";
+ TLSBSSSection = "\t.section .tbss,\"awT\",%nobits";
+ }
+
+ ZeroDirective = "\t.space\t";
+ AlignmentIsInBytes = false;
+ Data64bitsDirective = 0;
+ CommentString = "@";
+ DataSection = "\t.data";
+ ConstantPoolSection = "\t.text\n";
+ COMMDirectiveTakesAlignment = false;
+ InlineAsmStart = "@ InlineAsm Start";
+ InlineAsmEnd = "@ InlineAsm End";
+ LCOMMDirective = "\t.lcomm\t";
+}
+
+/// Count the number of comma-separated arguments.
+/// Do not try to detect errors.
+unsigned ARMTargetAsmInfo::countArguments(const char* p) const {
+ unsigned count = 0;
+ while (*p && isspace(*p) && *p != '\n')
+ p++;
+ count++;
+ while (*p && *p!='\n' &&
+ strncmp(p, CommentString, strlen(CommentString))!=0) {
+ if (*p==',')
+ count++;
+ p++;
+ }
+ return count;
+}
+
+/// Count the length of a string enclosed in quote characters.
+/// Do not try to detect errors.
+unsigned ARMTargetAsmInfo::countString(const char* p) const {
+ unsigned count = 0;
+ while (*p && isspace(*p) && *p!='\n')
+ p++;
+ if (!*p || *p != '\"')
+ return count;
+ while (*++p && *p != '\"')
+ count++;
+ return count;
+}
+
+/// ARM-specific version of TargetAsmInfo::getInlineAsmLength.
+unsigned ARMTargetAsmInfo::getInlineAsmLength(const char *Str) const {
+ // Count the number of bytes in the asm.
+ bool atInsnStart = true;
+ bool inTextSection = true;
+ unsigned Length = 0;
+ for (; *Str; ++Str) {
+ if (atInsnStart) {
+ // Skip whitespace
+ while (*Str && isspace(*Str) && *Str != '\n')
+ Str++;
+ // Skip label
+ for (const char* p = Str; *p && !isspace(*p); p++)
+ if (*p == ':') {
+ Str = p+1;
+ while (*Str && isspace(*Str) && *Str != '\n')
+ Str++;
+ break;
+ }
+ // Ignore everything from comment char(s) to EOL
+ if (strncmp(Str, CommentString, strlen(CommentString))==-0)
+ atInsnStart = false;
+ // FIXME do something like the following for non-Darwin
+ else if (*Str == '.' && Subtarget->isTargetDarwin()) {
+ // Directive.
+ atInsnStart = false;
+ // Some change the section, but don't generate code.
+ if (strncasecmp(Str, ".literal4", strlen(".literal4"))==0 ||
+ strncasecmp(Str, ".literal8", strlen(".literal8"))==0 ||
+ strncasecmp(Str, ".const", strlen(".const"))==0 ||
+ strncasecmp(Str, ".constructor", strlen(".constructor"))==0 ||
+ strncasecmp(Str, ".cstring", strlen(".cstring"))==0 ||
+ strncasecmp(Str, ".data", strlen(".data"))==0 ||
+ strncasecmp(Str, ".destructor", strlen(".destructor"))==0 ||
+ strncasecmp(Str, ".fvmlib_init0", strlen(".fvmlib_init0"))==0 ||
+ strncasecmp(Str, ".fvmlib_init1", strlen(".fvmlib_init1"))==0 ||
+ strncasecmp(Str, ".mod_init_func", strlen(".mod_init_func"))==0 ||
+ strncasecmp(Str, ".mod_term_func", strlen(".mod_term_func"))==0 ||
+ strncasecmp(Str, ".picsymbol_stub", strlen(".picsymbol_stub"))==0 ||
+ strncasecmp(Str, ".symbol_stub", strlen(".symbol_stub"))==0 ||
+ strncasecmp(Str, ".static_data", strlen(".static_data"))==0 ||
+ strncasecmp(Str, ".section", strlen(".section"))==0 ||
+ strncasecmp(Str, ".lazy_symbol_pointer", strlen(".lazy_symbol_pointer"))==0 ||
+ strncasecmp(Str, ".non_lazy_symbol_pointer", strlen(".non_lazy_symbol_pointer"))==0 ||
+ strncasecmp(Str, ".dyld", strlen(".dyld"))==0 ||
+ strncasecmp(Str, ".const_data", strlen(".const_data"))==0 ||
+ strncasecmp(Str, ".objc", strlen(".objc"))==0 || //// many directives
+ strncasecmp(Str, ".static_const", strlen(".static_const"))==0)
+ inTextSection=false;
+ else if (strncasecmp(Str, ".text", strlen(".text"))==0)
+ inTextSection = true;
+ // Some can't really be handled without implementing significant pieces
+ // of an assembler. Others require dynamic adjustment of block sizes in
+ // AdjustBBOffsetsAfter; it's a big compile-time speed hit to check every
+ // instruction in there, and none of these are currently used in the kernel.
+ else if (strncasecmp(Str, ".macro", strlen(".macro"))==0 ||
+ strncasecmp(Str, ".if", strlen(".if"))==0 ||
+ strncasecmp(Str, ".align", strlen(".align"))==0 ||
+ strncasecmp(Str, ".fill", strlen(".fill"))==0 ||
+ strncasecmp(Str, ".space", strlen(".space"))==0 ||
+ strncasecmp(Str, ".zerofill", strlen(".zerofill"))==0 ||
+ strncasecmp(Str, ".p2align", strlen(".p2align"))==0 ||
+ strncasecmp(Str, ".p2alignw", strlen(".p2alignw"))==0 ||
+ strncasecmp(Str, ".p2alignl", strlen(".p2alignl"))==0 ||
+ strncasecmp(Str, ".align32", strlen(".p2align32"))==0 ||
+ strncasecmp(Str, ".include", strlen(".include"))==0)
+ cerr << "Directive " << Str << " in asm may lead to invalid offsets for" <<
+ " constant pools (the assembler will tell you if this happens).\n";
+ // Some generate code, but this is only interesting in the text section.
+ else if (inTextSection) {
+ if (strncasecmp(Str, ".long", strlen(".long"))==0)
+ Length += 4*countArguments(Str+strlen(".long"));
+ else if (strncasecmp(Str, ".short", strlen(".short"))==0)
+ Length += 2*countArguments(Str+strlen(".short"));
+ else if (strncasecmp(Str, ".byte", strlen(".byte"))==0)
+ Length += 1*countArguments(Str+strlen(".byte"));
+ else if (strncasecmp(Str, ".single", strlen(".single"))==0)
+ Length += 4*countArguments(Str+strlen(".single"));
+ else if (strncasecmp(Str, ".double", strlen(".double"))==0)
+ Length += 8*countArguments(Str+strlen(".double"));
+ else if (strncasecmp(Str, ".quad", strlen(".quad"))==0)
+ Length += 16*countArguments(Str+strlen(".quad"));
+ else if (strncasecmp(Str, ".ascii", strlen(".ascii"))==0)
+ Length += countString(Str+strlen(".ascii"));
+ else if (strncasecmp(Str, ".asciz", strlen(".asciz"))==0)
+ Length += countString(Str+strlen(".asciz"))+1;
+ }
+ } else if (inTextSection) {
+ // An instruction
+ atInsnStart = false;
+ if (Subtarget->isThumb()) {
+ // BL and BLX <non-reg> are 4 bytes, all others 2.
+ if (strncasecmp(Str, "blx", strlen("blx"))==0) {
+ const char* p = Str+3;
+ while (*p && isspace(*p))
+ p++;
+ if (*p == 'r' || *p=='R')
+ Length += 2; // BLX reg
+ else
+ Length += 4; // BLX non-reg
+ } else if (strncasecmp(Str, "bl", strlen("bl"))==0)
+ Length += 4; // BL
+ else
+ Length += 2; // Thumb anything else
+ }
+ else
+ Length += 4; // ARM
+ }
+ }
+ if (*Str == '\n' || *Str == SeparatorChar)
+ atInsnStart = true;
+ }
+ return Length;
+}
diff --git a/lib/Target/ARM/ARMTargetAsmInfo.h b/lib/Target/ARM/ARMTargetAsmInfo.h
new file mode 100644
index 0000000..9dd45e5
--- /dev/null
+++ b/lib/Target/ARM/ARMTargetAsmInfo.h
@@ -0,0 +1,38 @@
+//=====-- ARMTargetAsmInfo.h - ARM asm properties -------------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by James M. Laskey and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the ARMTargetAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMTARGETASMINFO_H
+#define ARMTARGETASMINFO_H
+
+#include "llvm/Target/TargetAsmInfo.h"
+#include "ARMSubtarget.h"
+
+namespace llvm {
+
+ // Forward declaration.
+ class ARMTargetMachine;
+
+ struct ARMTargetAsmInfo : public TargetAsmInfo {
+ ARMTargetAsmInfo(const ARMTargetMachine &TM);
+
+ const ARMSubtarget *Subtarget;
+
+ virtual unsigned getInlineAsmLength(const char *Str) const;
+ unsigned countArguments(const char *p) const;
+ unsigned countString(const char *p) const;
+ };
+
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
new file mode 100644
index 0000000..58b3ab9
--- /dev/null
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -0,0 +1,160 @@
+//===-- ARMTargetMachine.cpp - Define TargetMachine for ARM ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the "Instituto Nokia de Tecnologia" and
+// is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMTargetMachine.h"
+#include "ARMTargetAsmInfo.h"
+#include "ARMFrameInfo.h"
+#include "ARM.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+static cl::opt<bool> DisableLdStOpti("disable-arm-loadstore-opti", cl::Hidden,
+ cl::desc("Disable load store optimization pass"));
+static cl::opt<bool> EnableIfConversion("enable-arm-if-conversion", cl::Hidden,
+ cl::desc("Enable if-conversion pass"));
+
+namespace {
+ // Register the target.
+ RegisterTarget<ARMTargetMachine> X("arm", " ARM");
+ RegisterTarget<ThumbTargetMachine> Y("thumb", " Thumb");
+}
+
+/// ThumbTargetMachine - Create an Thumb architecture model.
+///
+unsigned ThumbTargetMachine::getJITMatchQuality() {
+#if defined(__arm__)
+ return 10;
+#endif
+ return 0;
+}
+
+unsigned ThumbTargetMachine::getModuleMatchQuality(const Module &M) {
+ std::string TT = M.getTargetTriple();
+ if (TT.size() >= 6 && std::string(TT.begin(), TT.begin()+6) == "thumb-")
+ return 20;
+
+ // If the target triple is something non-thumb, we don't match.
+ if (!TT.empty()) return 0;
+
+ if (M.getEndianness() == Module::LittleEndian &&
+ M.getPointerSize() == Module::Pointer32)
+ return 10; // Weak match
+ else if (M.getEndianness() != Module::AnyEndianness ||
+ M.getPointerSize() != Module::AnyPointerSize)
+ return 0; // Match for some other target
+
+ return getJITMatchQuality()/2;
+}
+
+ThumbTargetMachine::ThumbTargetMachine(const Module &M, const std::string &FS)
+ : ARMTargetMachine(M, FS, true) {
+}
+
+/// TargetMachine ctor - Create an ARM architecture model.
+///
+ARMTargetMachine::ARMTargetMachine(const Module &M, const std::string &FS,
+ bool isThumb)
+ : Subtarget(M, FS, isThumb),
+ DataLayout(Subtarget.isAPCS_ABI() ?
+ // APCS ABI
+ (isThumb ?
+ std::string("e-p:32:32-f64:32:32-i64:32:32-"
+ "i16:16:32-i8:8:32-i1:8:32-a:0:32") :
+ std::string("e-p:32:32-f64:32:32-i64:32:32")) :
+ // AAPCS ABI
+ (isThumb ?
+ std::string("e-p:32:32-f64:64:64-i64:64:64-"
+ "i16:16:32-i8:8:32-i1:8:32-a:0:32") :
+ std::string("e-p:32:32-f64:64:64-i64:64:64"))),
+ InstrInfo(Subtarget),
+ FrameInfo(Subtarget),
+ JITInfo(*this),
+ TLInfo(*this) {}
+
+unsigned ARMTargetMachine::getJITMatchQuality() {
+#if defined(__thumb__)
+ return 10;
+#endif
+ return 0;
+}
+
+unsigned ARMTargetMachine::getModuleMatchQuality(const Module &M) {
+ std::string TT = M.getTargetTriple();
+ if (TT.size() >= 4 && std::string(TT.begin(), TT.begin()+4) == "arm-")
+ return 20;
+ // If the target triple is something non-arm, we don't match.
+ if (!TT.empty()) return 0;
+
+ if (M.getEndianness() == Module::LittleEndian &&
+ M.getPointerSize() == Module::Pointer32)
+ return 10; // Weak match
+ else if (M.getEndianness() != Module::AnyEndianness ||
+ M.getPointerSize() != Module::AnyPointerSize)
+ return 0; // Match for some other target
+
+ return getJITMatchQuality()/2;
+}
+
+
+const TargetAsmInfo *ARMTargetMachine::createTargetAsmInfo() const {
+ return new ARMTargetAsmInfo(*this);
+}
+
+
+// Pass Pipeline Configuration
+bool ARMTargetMachine::addInstSelector(FunctionPassManager &PM, bool Fast) {
+ PM.add(createARMISelDag(*this));
+ return false;
+}
+
+bool ARMTargetMachine::addPreEmitPass(FunctionPassManager &PM, bool Fast) {
+ // FIXME: temporarily disabling load / store optimization pass for Thumb mode.
+ if (!Fast && !DisableLdStOpti && !Subtarget.isThumb())
+ PM.add(createARMLoadStoreOptimizationPass());
+
+ if (!Fast && EnableIfConversion && !Subtarget.isThumb())
+ PM.add(createIfConverterPass());
+
+ PM.add(createARMConstantIslandPass());
+ return true;
+}
+
+bool ARMTargetMachine::addAssemblyEmitter(FunctionPassManager &PM, bool Fast,
+ std::ostream &Out) {
+ // Output assembly language.
+ PM.add(createARMCodePrinterPass(Out, *this));
+ return false;
+}
+
+
+bool ARMTargetMachine::addCodeEmitter(FunctionPassManager &PM, bool Fast,
+ MachineCodeEmitter &MCE) {
+ // FIXME: Move this to TargetJITInfo!
+ setRelocationModel(Reloc::Static);
+
+ // Machine code emitter pass for ARM.
+ PM.add(createARMCodeEmitterPass(*this, MCE));
+ return false;
+}
+
+bool ARMTargetMachine::addSimpleCodeEmitter(FunctionPassManager &PM, bool Fast,
+ MachineCodeEmitter &MCE) {
+ // Machine code emitter pass for ARM.
+ PM.add(createARMCodeEmitterPass(*this, MCE));
+ return false;
+}
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
new file mode 100644
index 0000000..183a582
--- /dev/null
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -0,0 +1,81 @@
+//===-- ARMTargetMachine.h - Define TargetMachine for ARM -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the "Instituto Nokia de Tecnologia" and
+// is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the ARM specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMTARGETMACHINE_H
+#define ARMTARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "ARMInstrInfo.h"
+#include "ARMFrameInfo.h"
+#include "ARMJITInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMISelLowering.h"
+
+namespace llvm {
+
+class Module;
+
+class ARMTargetMachine : public LLVMTargetMachine {
+ ARMSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ ARMInstrInfo InstrInfo;
+ ARMFrameInfo FrameInfo;
+ ARMJITInfo JITInfo;
+ ARMTargetLowering TLInfo;
+
+public:
+ ARMTargetMachine(const Module &M, const std::string &FS, bool isThumb = false);
+
+ virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
+ virtual TargetJITInfo *getJITInfo() { return &JITInfo; }
+ virtual const MRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ virtual ARMTargetLowering *getTargetLowering() const {
+ return const_cast<ARMTargetLowering*>(&TLInfo);
+ }
+ static unsigned getModuleMatchQuality(const Module &M);
+ static unsigned getJITMatchQuality();
+
+ virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(FunctionPassManager &PM, bool Fast);
+ virtual bool addPreEmitPass(FunctionPassManager &PM, bool Fast);
+ virtual bool addAssemblyEmitter(FunctionPassManager &PM, bool Fast,
+ std::ostream &Out);
+ virtual bool addCodeEmitter(FunctionPassManager &PM, bool Fast,
+ MachineCodeEmitter &MCE);
+ virtual bool addSimpleCodeEmitter(FunctionPassManager &PM, bool Fast,
+ MachineCodeEmitter &MCE);
+};
+
+/// ThumbTargetMachine - Thumb target machine.
+///
+class ThumbTargetMachine : public ARMTargetMachine {
+public:
+ ThumbTargetMachine(const Module &M, const std::string &FS);
+
+ static unsigned getJITMatchQuality();
+ static unsigned getModuleMatchQuality(const Module &M);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile
new file mode 100644
index 0000000..77300a1
--- /dev/null
+++ b/lib/Target/ARM/Makefile
@@ -0,0 +1,21 @@
+##===- lib/Target/ARM/Makefile -----------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file was developed by the "Instituto Nokia de Tecnologia" and
+# is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMARM
+TARGET = ARM
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \
+ ARMGenRegisterInfo.inc ARMGenInstrNames.inc \
+ ARMGenInstrInfo.inc ARMGenAsmWriter.inc \
+ ARMGenDAGISel.inc ARMGenSubtarget.inc
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt
new file mode 100644
index 0000000..380097d
--- /dev/null
+++ b/lib/Target/ARM/README-Thumb.txt
@@ -0,0 +1,223 @@
+//===---------------------------------------------------------------------===//
+// Random ideas for the ARM backend (Thumb specific).
+//===---------------------------------------------------------------------===//
+
+* Add support for compiling functions in both ARM and Thumb mode, then taking
+ the smallest.
+* Add support for compiling individual basic blocks in thumb mode, when in a
+ larger ARM function. This can be used for presumed cold code, like paths
+ to abort (failure path of asserts), EH handling code, etc.
+
+* Thumb doesn't have normal pre/post increment addressing modes, but you can
+ load/store 32-bit integers with pre/postinc by using load/store multiple
+ instrs with a single register.
+
+* Make better use of high registers r8, r10, r11, r12 (ip). Some variants of add
+ and cmp instructions can use high registers. Also, we can use them as
+ temporaries to spill values into.
+
+* In thumb mode, short, byte, and bool preferred alignments are currently set
+ to 4 to accommodate ISA restriction (i.e. add sp, #imm, imm must be multiple
+ of 4).
+
+//===---------------------------------------------------------------------===//
+
+Potential jumptable improvements:
+
+* If we know function size is less than (1 << 16) * 2 bytes, we can use 16-bit
+ jumptable entries (e.g. (L1 - L2) >> 1). Or even smaller entries if the
+ function is even smaller. This also applies to ARM.
+
+* Thumb jumptable codegen can improve given some help from the assembler. This
+ is what we generate right now:
+
+ .set PCRELV0, (LJTI1_0_0-(LPCRELL0+4))
+LPCRELL0:
+ mov r1, #PCRELV0
+ add r1, pc
+ ldr r0, [r0, r1]
+ cpy pc, r0
+ .align 2
+LJTI1_0_0:
+ .long LBB1_3
+ ...
+
+Note there is another pc relative add that we can take advantage of.
+ add r1, pc, #imm_8 * 4
+
+We should be able to generate:
+
+LPCRELL0:
+ add r1, LJTI1_0_0
+ ldr r0, [r0, r1]
+ cpy pc, r0
+ .align 2
+LJTI1_0_0:
+ .long LBB1_3
+
+if the assembler can translate the add to:
+ add r1, pc, #((LJTI1_0_0-(LPCRELL0+4))&0xfffffffc)
+
+Note the assembler also does something similar to constpool load:
+LPCRELL0:
+ ldr r0, LCPI1_0
+=>
+ ldr r0, pc, #((LCPI1_0-(LPCRELL0+4))&0xfffffffc)
+
+
+//===---------------------------------------------------------------------===//
+
+We compiles the following:
+
+define i16 @func_entry_2E_ce(i32 %i) {
+ switch i32 %i, label %bb12.exitStub [
+ i32 0, label %bb4.exitStub
+ i32 1, label %bb9.exitStub
+ i32 2, label %bb4.exitStub
+ i32 3, label %bb4.exitStub
+ i32 7, label %bb9.exitStub
+ i32 8, label %bb.exitStub
+ i32 9, label %bb9.exitStub
+ ]
+
+bb12.exitStub:
+ ret i16 0
+
+bb4.exitStub:
+ ret i16 1
+
+bb9.exitStub:
+ ret i16 2
+
+bb.exitStub:
+ ret i16 3
+}
+
+into:
+
+_func_entry_2E_ce:
+ mov r2, #1
+ lsl r2, r0
+ cmp r0, #9
+ bhi LBB1_4 @bb12.exitStub
+LBB1_1: @newFuncRoot
+ mov r1, #13
+ tst r2, r1
+ bne LBB1_5 @bb4.exitStub
+LBB1_2: @newFuncRoot
+ ldr r1, LCPI1_0
+ tst r2, r1
+ bne LBB1_6 @bb9.exitStub
+LBB1_3: @newFuncRoot
+ mov r1, #1
+ lsl r1, r1, #8
+ tst r2, r1
+ bne LBB1_7 @bb.exitStub
+LBB1_4: @bb12.exitStub
+ mov r0, #0
+ bx lr
+LBB1_5: @bb4.exitStub
+ mov r0, #1
+ bx lr
+LBB1_6: @bb9.exitStub
+ mov r0, #2
+ bx lr
+LBB1_7: @bb.exitStub
+ mov r0, #3
+ bx lr
+LBB1_8:
+ .align 2
+LCPI1_0:
+ .long 642
+
+
+gcc compiles to:
+
+ cmp r0, #9
+ @ lr needed for prologue
+ bhi L2
+ ldr r3, L11
+ mov r2, #1
+ mov r1, r2, asl r0
+ ands r0, r3, r2, asl r0
+ movne r0, #2
+ bxne lr
+ tst r1, #13
+ beq L9
+L3:
+ mov r0, r2
+ bx lr
+L9:
+ tst r1, #256
+ movne r0, #3
+ bxne lr
+L2:
+ mov r0, #0
+ bx lr
+L12:
+ .align 2
+L11:
+ .long 642
+
+
+GCC is doing a couple of clever things here:
+ 1. It is predicating one of the returns. This isn't a clear win though: in
+ cases where that return isn't taken, it is replacing one condbranch with
+ two 'ne' predicated instructions.
+ 2. It is sinking the shift of "1 << i" into the tst, and using ands instead of
+ tst. This will probably require whole function isel.
+ 3. GCC emits:
+ tst r1, #256
+ we emit:
+ mov r1, #1
+ lsl r1, r1, #8
+ tst r2, r1
+
+
+//===---------------------------------------------------------------------===//
+
+When spilling in thumb mode and the sp offset is too large to fit in the ldr /
+str offset field, we load the offset from a constpool entry and add it to sp:
+
+ldr r2, LCPI
+add r2, sp
+ldr r2, [r2]
+
+These instructions preserve the condition code which is important if the spill
+is between a cmp and a bcc instruction. However, we can use the (potentially)
+cheaper sequnce if we know it's ok to clobber the condition register.
+
+add r2, sp, #255 * 4
+add r2, #132
+ldr r2, [r2, #7 * 4]
+
+This is especially bad when dynamic alloca is used. The all fixed size stack
+objects are referenced off the frame pointer with negative offsets. See
+oggenc for an example.
+
+//===---------------------------------------------------------------------===//
+
+We are reserving R3 as a scratch register under thumb mode. So if it is live in
+to the function, we save / restore R3 to / from R12. Until register scavenging
+is done, we should save R3 to a high callee saved reg at emitPrologue time
+(when hasFP is true or stack size is large) and restore R3 from that register
+instead. This allows us to at least get rid of the save to r12 everytime it is
+used.
+
+//===---------------------------------------------------------------------===//
+
+Poor codegen test/CodeGen/ARM/select.ll f7:
+
+ ldr r5, LCPI1_0
+LPC0:
+ add r5, pc
+ ldr r6, LCPI1_1
+ ldr r2, LCPI1_2
+ cpy r3, r6
+ cpy lr, pc
+ bx r5
+
+//===---------------------------------------------------------------------===//
+
+Make register allocator / spiller smarter so we can re-materialize "mov r, imm",
+etc. Almost all Thumb instructions clobber condition code.
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
new file mode 100644
index 0000000..3db8f54
--- /dev/null
+++ b/lib/Target/ARM/README.txt
@@ -0,0 +1,530 @@
+//===---------------------------------------------------------------------===//
+// Random ideas for the ARM backend.
+//===---------------------------------------------------------------------===//
+
+Reimplement 'select' in terms of 'SEL'.
+
+* We would really like to support UXTAB16, but we need to prove that the
+ add doesn't need to overflow between the two 16-bit chunks.
+
+* Implement pre/post increment support. (e.g. PR935)
+* Coalesce stack slots!
+* Implement smarter constant generation for binops with large immediates.
+
+* Consider materializing FP constants like 0.0f and 1.0f using integer
+ immediate instructions then copy to FPU. Slower than load into FPU?
+
+//===---------------------------------------------------------------------===//
+
+Crazy idea: Consider code that uses lots of 8-bit or 16-bit values. By the
+time regalloc happens, these values are now in a 32-bit register, usually with
+the top-bits known to be sign or zero extended. If spilled, we should be able
+to spill these to a 8-bit or 16-bit stack slot, zero or sign extending as part
+of the reload.
+
+Doing this reduces the size of the stack frame (important for thumb etc), and
+also increases the likelihood that we will be able to reload multiple values
+from the stack with a single load.
+
+//===---------------------------------------------------------------------===//
+
+The constant island pass is in good shape. Some cleanups might be desirable,
+but there is unlikely to be much improvement in the generated code.
+
+1. There may be some advantage to trying to be smarter about the initial
+placement, rather than putting everything at the end.
+
+2. There might be some compile-time efficiency to be had by representing
+consecutive islands as a single block rather than multiple blocks.
+
+3. Use a priority queue to sort constant pool users in inverse order of
+ position so we always process the one closed to the end of functions
+ first. This may simply CreateNewWater.
+
+//===---------------------------------------------------------------------===//
+
+Eliminate copysign custom expansion. We are still generating crappy code with
+default expansion + if-conversion.
+
+//===---------------------------------------------------------------------===//
+
+Eliminate one instruction from:
+
+define i32 @_Z6slow4bii(i32 %x, i32 %y) {
+ %tmp = icmp sgt i32 %x, %y
+ %retval = select i1 %tmp, i32 %x, i32 %y
+ ret i32 %retval
+}
+
+__Z6slow4bii:
+ cmp r0, r1
+ movgt r1, r0
+ mov r0, r1
+ bx lr
+=>
+
+__Z6slow4bii:
+ cmp r0, r1
+ movle r0, r1
+ bx lr
+
+//===---------------------------------------------------------------------===//
+
+Implement long long "X-3" with instructions that fold the immediate in. These
+were disabled due to badness with the ARM carry flag on subtracts.
+
+//===---------------------------------------------------------------------===//
+
+We currently compile abs:
+int foo(int p) { return p < 0 ? -p : p; }
+
+into:
+
+_foo:
+ rsb r1, r0, #0
+ cmn r0, #1
+ movgt r1, r0
+ mov r0, r1
+ bx lr
+
+This is very, uh, literal. This could be a 3 operation sequence:
+ t = (p sra 31);
+ res = (p xor t)-t
+
+Which would be better. This occurs in png decode.
+
+//===---------------------------------------------------------------------===//
+
+More load / store optimizations:
+1) Look past instructions without side-effects (not load, store, branch, etc.)
+ when forming the list of loads / stores to optimize.
+
+2) Smarter register allocation?
+We are probably missing some opportunities to use ldm / stm. Consider:
+
+ldr r5, [r0]
+ldr r4, [r0, #4]
+
+This cannot be merged into a ldm. Perhaps we will need to do the transformation
+before register allocation. Then teach the register allocator to allocate a
+chunk of consecutive registers.
+
+3) Better representation for block transfer? This is from Olden/power:
+
+ fldd d0, [r4]
+ fstd d0, [r4, #+32]
+ fldd d0, [r4, #+8]
+ fstd d0, [r4, #+40]
+ fldd d0, [r4, #+16]
+ fstd d0, [r4, #+48]
+ fldd d0, [r4, #+24]
+ fstd d0, [r4, #+56]
+
+If we can spare the registers, it would be better to use fldm and fstm here.
+Need major register allocator enhancement though.
+
+4) Can we recognize the relative position of constantpool entries? i.e. Treat
+
+ ldr r0, LCPI17_3
+ ldr r1, LCPI17_4
+ ldr r2, LCPI17_5
+
+ as
+ ldr r0, LCPI17
+ ldr r1, LCPI17+4
+ ldr r2, LCPI17+8
+
+ Then the ldr's can be combined into a single ldm. See Olden/power.
+
+Note for ARM v4 gcc uses ldmia to load a pair of 32-bit values to represent a
+double 64-bit FP constant:
+
+ adr r0, L6
+ ldmia r0, {r0-r1}
+
+ .align 2
+L6:
+ .long -858993459
+ .long 1074318540
+
+5) Can we make use of ldrd and strd? Instead of generating ldm / stm, use
+ldrd/strd instead if there are only two destination registers that form an
+odd/even pair. However, we probably would pay a penalty if the address is not
+aligned on 8-byte boundary. This requires more information on load / store
+nodes (and MI's?) then we currently carry.
+
+6) struct copies appear to be done field by field
+instead of by words, at least sometimes:
+
+struct foo { int x; short s; char c1; char c2; };
+void cpy(struct foo*a, struct foo*b) { *a = *b; }
+
+llvm code (-O2)
+ ldrb r3, [r1, #+6]
+ ldr r2, [r1]
+ ldrb r12, [r1, #+7]
+ ldrh r1, [r1, #+4]
+ str r2, [r0]
+ strh r1, [r0, #+4]
+ strb r3, [r0, #+6]
+ strb r12, [r0, #+7]
+gcc code (-O2)
+ ldmia r1, {r1-r2}
+ stmia r0, {r1-r2}
+
+In this benchmark poor handling of aggregate copies has shown up as
+having a large effect on size, and possibly speed as well (we don't have
+a good way to measure on ARM).
+
+//===---------------------------------------------------------------------===//
+
+* Consider this silly example:
+
+double bar(double x) {
+ double r = foo(3.1);
+ return x+r;
+}
+
+_bar:
+ sub sp, sp, #16
+ str r4, [sp, #+12]
+ str r5, [sp, #+8]
+ str lr, [sp, #+4]
+ mov r4, r0
+ mov r5, r1
+ ldr r0, LCPI2_0
+ bl _foo
+ fmsr f0, r0
+ fcvtsd d0, f0
+ fmdrr d1, r4, r5
+ faddd d0, d0, d1
+ fmrrd r0, r1, d0
+ ldr lr, [sp, #+4]
+ ldr r5, [sp, #+8]
+ ldr r4, [sp, #+12]
+ add sp, sp, #16
+ bx lr
+
+Ignore the prologue and epilogue stuff for a second. Note
+ mov r4, r0
+ mov r5, r1
+the copys to callee-save registers and the fact they are only being used by the
+fmdrr instruction. It would have been better had the fmdrr been scheduled
+before the call and place the result in a callee-save DPR register. The two
+mov ops would not have been necessary.
+
+//===---------------------------------------------------------------------===//
+
+Calling convention related stuff:
+
+* gcc's parameter passing implementation is terrible and we suffer as a result:
+
+e.g.
+struct s {
+ double d1;
+ int s1;
+};
+
+void foo(struct s S) {
+ printf("%g, %d\n", S.d1, S.s1);
+}
+
+'S' is passed via registers r0, r1, r2. But gcc stores them to the stack, and
+then reload them to r1, r2, and r3 before issuing the call (r0 contains the
+address of the format string):
+
+ stmfd sp!, {r7, lr}
+ add r7, sp, #0
+ sub sp, sp, #12
+ stmia sp, {r0, r1, r2}
+ ldmia sp, {r1-r2}
+ ldr r0, L5
+ ldr r3, [sp, #8]
+L2:
+ add r0, pc, r0
+ bl L_printf$stub
+
+Instead of a stmia, ldmia, and a ldr, wouldn't it be better to do three moves?
+
+* Return an aggregate type is even worse:
+
+e.g.
+struct s foo(void) {
+ struct s S = {1.1, 2};
+ return S;
+}
+
+ mov ip, r0
+ ldr r0, L5
+ sub sp, sp, #12
+L2:
+ add r0, pc, r0
+ @ lr needed for prologue
+ ldmia r0, {r0, r1, r2}
+ stmia sp, {r0, r1, r2}
+ stmia ip, {r0, r1, r2}
+ mov r0, ip
+ add sp, sp, #12
+ bx lr
+
+r0 (and later ip) is the hidden parameter from caller to store the value in. The
+first ldmia loads the constants into r0, r1, r2. The last stmia stores r0, r1,
+r2 into the address passed in. However, there is one additional stmia that
+stores r0, r1, and r2 to some stack location. The store is dead.
+
+The llvm-gcc generated code looks like this:
+
+csretcc void %foo(%struct.s* %agg.result) {
+entry:
+ %S = alloca %struct.s, align 4 ; <%struct.s*> [#uses=1]
+ %memtmp = alloca %struct.s ; <%struct.s*> [#uses=1]
+ cast %struct.s* %S to sbyte* ; <sbyte*>:0 [#uses=2]
+ call void %llvm.memcpy.i32( sbyte* %0, sbyte* cast ({ double, int }* %C.0.904 to sbyte*), uint 12, uint 4 )
+ cast %struct.s* %agg.result to sbyte* ; <sbyte*>:1 [#uses=2]
+ call void %llvm.memcpy.i32( sbyte* %1, sbyte* %0, uint 12, uint 0 )
+ cast %struct.s* %memtmp to sbyte* ; <sbyte*>:2 [#uses=1]
+ call void %llvm.memcpy.i32( sbyte* %2, sbyte* %1, uint 12, uint 0 )
+ ret void
+}
+
+llc ends up issuing two memcpy's (the first memcpy becomes 3 loads from
+constantpool). Perhaps we should 1) fix llvm-gcc so the memcpy is translated
+into a number of load and stores, or 2) custom lower memcpy (of small size) to
+be ldmia / stmia. I think option 2 is better but the current register
+allocator cannot allocate a chunk of registers at a time.
+
+A feasible temporary solution is to use specific physical registers at the
+lowering time for small (<= 4 words?) transfer size.
+
+* ARM CSRet calling convention requires the hidden argument to be returned by
+the callee.
+
+//===---------------------------------------------------------------------===//
+
+We can definitely do a better job on BB placements to eliminate some branches.
+It's very common to see llvm generated assembly code that looks like this:
+
+LBB3:
+ ...
+LBB4:
+...
+ beq LBB3
+ b LBB2
+
+If BB4 is the only predecessor of BB3, then we can emit BB3 after BB4. We can
+then eliminate beq and and turn the unconditional branch to LBB2 to a bne.
+
+See McCat/18-imp/ComputeBoundingBoxes for an example.
+
+//===---------------------------------------------------------------------===//
+
+Register scavenging is now implemented. The example in the previous version
+of this document produces optimal code at -O2.
+
+//===---------------------------------------------------------------------===//
+
+Pre-/post- indexed load / stores:
+
+1) We should not make the pre/post- indexed load/store transform if the base ptr
+is guaranteed to be live beyond the load/store. This can happen if the base
+ptr is live out of the block we are performing the optimization. e.g.
+
+mov r1, r2
+ldr r3, [r1], #4
+...
+
+vs.
+
+ldr r3, [r2]
+add r1, r2, #4
+...
+
+In most cases, this is just a wasted optimization. However, sometimes it can
+negatively impact the performance because two-address code is more restrictive
+when it comes to scheduling.
+
+Unfortunately, liveout information is currently unavailable during DAG combine
+time.
+
+2) Consider spliting a indexed load / store into a pair of add/sub + load/store
+ to solve #1 (in TwoAddressInstructionPass.cpp).
+
+3) Enhance LSR to generate more opportunities for indexed ops.
+
+4) Once we added support for multiple result patterns, write indexed loads
+ patterns instead of C++ instruction selection code.
+
+5) Use FLDM / FSTM to emulate indexed FP load / store.
+
+//===---------------------------------------------------------------------===//
+
+We should add i64 support to take advantage of the 64-bit load / stores.
+We can add a pseudo i64 register class containing pseudo registers that are
+register pairs. All other ops (e.g. add, sub) would be expanded as usual.
+
+We need to add pseudo instructions (i.e. gethi / getlo) to extract i32 registers
+from the i64 register. These are single moves which can be eliminated if the
+destination register is a sub-register of the source. We should implement proper
+subreg support in the register allocator to coalesce these away.
+
+There are other minor issues such as multiple instructions for a spill / restore
+/ move.
+
+//===---------------------------------------------------------------------===//
+
+Implement support for some more tricky ways to materialize immediates. For
+example, to get 0xffff8000, we can use:
+
+mov r9, #&3f8000
+sub r9, r9, #&400000
+
+//===---------------------------------------------------------------------===//
+
+We sometimes generate multiple add / sub instructions to update sp in prologue
+and epilogue if the inc / dec value is too large to fit in a single immediate
+operand. In some cases, perhaps it might be better to load the value from a
+constantpool instead.
+
+//===---------------------------------------------------------------------===//
+
+GCC generates significantly better code for this function.
+
+int foo(int StackPtr, unsigned char *Line, unsigned char *Stack, int LineLen) {
+ int i = 0;
+
+ if (StackPtr != 0) {
+ while (StackPtr != 0 && i < (((LineLen) < (32768))? (LineLen) : (32768)))
+ Line[i++] = Stack[--StackPtr];
+ if (LineLen > 32768)
+ {
+ while (StackPtr != 0 && i < LineLen)
+ {
+ i++;
+ --StackPtr;
+ }
+ }
+ }
+ return StackPtr;
+}
+
+//===---------------------------------------------------------------------===//
+
+This should compile to the mlas instruction:
+int mlas(int x, int y, int z) { return ((x * y + z) < 0) ? 7 : 13; }
+
+//===---------------------------------------------------------------------===//
+
+At some point, we should triage these to see if they still apply to us:
+
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19598
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=18560
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=27016
+
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11831
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11826
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11825
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11824
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11823
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11820
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10982
+
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10242
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9831
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9760
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9759
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9703
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9702
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9663
+
+http://www.inf.u-szeged.hu/gcc-arm/
+http://citeseer.ist.psu.edu/debus04linktime.html
+
+//===---------------------------------------------------------------------===//
+
+gcc generates smaller code for this function at -O2 or -Os:
+
+void foo(signed char* p) {
+ if (*p == 3)
+ bar();
+ else if (*p == 4)
+ baz();
+ else if (*p == 5)
+ quux();
+}
+
+llvm decides it's a good idea to turn the repeated if...else into a
+binary tree, as if it were a switch; the resulting code requires -1
+compare-and-branches when *p<=2 or *p==5, the same number if *p==4
+or *p>6, and +1 if *p==3. So it should be a speed win
+(on balance). However, the revised code is larger, with 4 conditional
+branches instead of 3.
+
+More seriously, there is a byte->word extend before
+each comparison, where there should be only one, and the condition codes
+are not remembered when the same two values are compared twice.
+
+//===---------------------------------------------------------------------===//
+
+More register scavenging work:
+
+1. Use the register scavenger to track frame index materialized into registers
+ (those that do not fit in addressing modes) to allow reuse in the same BB.
+2. Finish scavenging for Thumb.
+3. We know some spills and restores are unnecessary. The issue is once live
+ intervals are merged, they are not never split. So every def is spilled
+ and every use requires a restore if the register allocator decides the
+ resulting live interval is not assigned a physical register. It may be
+ possible (with the help of the scavenger) to turn some spill / restore
+ pairs into register copies.
+
+//===---------------------------------------------------------------------===//
+
+More LSR enhancements possible:
+
+1. Teach LSR about pre- and post- indexed ops to allow iv increment be merged
+ in a load / store.
+2. Allow iv reuse even when a type conversion is required. For example, i8
+ and i32 load / store addressing modes are identical.
+
+
+//===---------------------------------------------------------------------===//
+
+This:
+
+int foo(int a, int b, int c, int d) {
+ long long acc = (long long)a * (long long)b;
+ acc += (long long)c * (long long)d;
+ return (int)(acc >> 32);
+}
+
+Should compile to use SMLAL (Signed Multiply Accumulate Long) which multiplies
+two signed 32-bit values to produce a 64-bit value, and accumulates this with
+a 64-bit value.
+
+We currently get this with v6:
+
+_foo:
+ mul r12, r1, r0
+ smmul r1, r1, r0
+ smmul r0, r3, r2
+ mul r3, r3, r2
+ adds r3, r3, r12
+ adc r0, r0, r1
+ bx lr
+
+and this with v4:
+
+_foo:
+ stmfd sp!, {r7, lr}
+ mov r7, sp
+ mul r12, r1, r0
+ smull r0, r1, r1, r0
+ smull lr, r0, r3, r2
+ mul r3, r3, r2
+ adds r3, r3, r12
+ adc r0, r0, r1
+ ldmfd sp!, {r7, pc}
+
+This apparently occurs in real code.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/Alpha/Alpha.h b/lib/Target/Alpha/Alpha.h
new file mode 100644
index 0000000..a1acde4
--- /dev/null
+++ b/lib/Target/Alpha/Alpha.h
@@ -0,0 +1,48 @@
+//===-- Alpha.h - Top-level interface for Alpha representation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Alpha back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_ALPHA_H
+#define TARGET_ALPHA_H
+
+#include <iosfwd>
+
+namespace llvm {
+
+ class AlphaTargetMachine;
+ class FunctionPass;
+ class TargetMachine;
+ class MachineCodeEmitter;
+
+ FunctionPass *createAlphaSimpleInstructionSelector(TargetMachine &TM);
+ FunctionPass *createAlphaISelDag(TargetMachine &TM);
+ FunctionPass *createAlphaCodePrinterPass(std::ostream &OS,
+ TargetMachine &TM);
+ FunctionPass *createAlphaPatternInstructionSelector(TargetMachine &TM);
+ FunctionPass *createAlphaCodeEmitterPass(AlphaTargetMachine &TM,
+ MachineCodeEmitter &MCE);
+ FunctionPass *createAlphaLLRPPass(AlphaTargetMachine &tm);
+ FunctionPass *createAlphaBranchSelectionPass();
+
+} // end namespace llvm;
+
+// Defines symbolic names for Alpha registers. This defines a mapping from
+// register name to register number.
+//
+#include "AlphaGenRegisterNames.inc"
+
+// Defines symbolic names for the Alpha instructions.
+//
+#include "AlphaGenInstrNames.inc"
+
+#endif
diff --git a/lib/Target/Alpha/Alpha.td b/lib/Target/Alpha/Alpha.td
new file mode 100644
index 0000000..fbf7ed9
--- /dev/null
+++ b/lib/Target/Alpha/Alpha.td
@@ -0,0 +1,66 @@
+//===- Alpha.td - Describe the Alpha Target Machine --------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing...
+//
+include "../Target.td"
+
+//Alpha is little endian
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features
+//===----------------------------------------------------------------------===//
+
+def FeatureCIX : SubtargetFeature<"cix", "HasCT", "true",
+ "Enable CIX extentions">;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "AlphaRegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Schedule Description
+//===----------------------------------------------------------------------===//
+
+include "AlphaSchedule.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "AlphaInstrInfo.td"
+
+def AlphaInstrInfo : InstrInfo {
+ // Define how we want to layout our target-specific information field.
+ // let TSFlagsFields = [];
+ // let TSFlagsShifts = [];
+}
+
+//===----------------------------------------------------------------------===//
+// Alpha Processor Definitions
+//===----------------------------------------------------------------------===//
+
+def : Processor<"generic", Alpha21264Itineraries, []>;
+def : Processor<"ev6" , Alpha21264Itineraries, []>;
+def : Processor<"ev67" , Alpha21264Itineraries, [FeatureCIX]>;
+
+//===----------------------------------------------------------------------===//
+// The Alpha Target
+//===----------------------------------------------------------------------===//
+
+
+def Alpha : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = AlphaInstrInfo;
+}
diff --git a/lib/Target/Alpha/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AlphaAsmPrinter.cpp
new file mode 100644
index 0000000..0494777
--- /dev/null
+++ b/lib/Target/Alpha/AlphaAsmPrinter.cpp
@@ -0,0 +1,297 @@
+//===-- AlphaAsmPrinter.cpp - Alpha LLVM assembly writer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format Alpha assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "Alpha.h"
+#include "AlphaInstrInfo.h"
+#include "AlphaTargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+namespace {
+ struct VISIBILITY_HIDDEN AlphaAsmPrinter : public AsmPrinter {
+
+ /// Unique incrementer for label values for referencing Global values.
+ ///
+
+ AlphaAsmPrinter(std::ostream &o, TargetMachine &tm, const TargetAsmInfo *T)
+ : AsmPrinter(o, tm, T) {
+ }
+
+ virtual const char *getPassName() const {
+ return "Alpha Assembly Printer";
+ }
+ bool printInstruction(const MachineInstr *MI);
+ void printOp(const MachineOperand &MO, bool IsCallOp = false);
+ void printOperand(const MachineInstr *MI, int opNum);
+ void printBaseOffsetPair (const MachineInstr *MI, int i, bool brackets=true);
+ bool runOnMachineFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode);
+ };
+} // end of anonymous namespace
+
+/// createAlphaCodePrinterPass - Returns a pass that prints the Alpha
+/// assembly code for a MachineFunction to the given output stream,
+/// using the given target machine description. This should work
+/// regardless of whether the function is in SSA form.
+///
+FunctionPass *llvm::createAlphaCodePrinterPass(std::ostream &o,
+ TargetMachine &tm) {
+ return new AlphaAsmPrinter(o, tm, tm.getTargetAsmInfo());
+}
+
+#include "AlphaGenAsmWriter.inc"
+
+void AlphaAsmPrinter::printOperand(const MachineInstr *MI, int opNum)
+{
+ const MachineOperand &MO = MI->getOperand(opNum);
+ if (MO.getType() == MachineOperand::MO_Register) {
+ assert(MRegisterInfo::isPhysicalRegister(MO.getReg())&&"Not physreg??");
+ O << TM.getRegisterInfo()->get(MO.getReg()).Name;
+ } else if (MO.isImmediate()) {
+ O << MO.getImmedValue();
+ assert(MO.getImmedValue() < (1 << 30));
+ } else {
+ printOp(MO);
+ }
+}
+
+
+void AlphaAsmPrinter::printOp(const MachineOperand &MO, bool IsCallOp) {
+ const MRegisterInfo &RI = *TM.getRegisterInfo();
+
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ O << RI.get(MO.getReg()).Name;
+ return;
+
+ case MachineOperand::MO_Immediate:
+ cerr << "printOp() does not handle immediate values\n";
+ abort();
+ return;
+
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMachineBasicBlock());
+ return;
+
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
+ << MO.getConstantPoolIndex();
+ return;
+
+ case MachineOperand::MO_ExternalSymbol:
+ O << MO.getSymbolName();
+ return;
+
+ case MachineOperand::MO_GlobalAddress: {
+ GlobalValue *GV = MO.getGlobal();
+ O << Mang->getValueName(GV);
+ if (GV->isDeclaration() && GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+ return;
+ }
+
+ case MachineOperand::MO_JumpTableIndex:
+ O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getJumpTableIndex();
+ return;
+
+ default:
+ O << "<unknown operand type: " << MO.getType() << ">";
+ return;
+ }
+}
+
+/// runOnMachineFunction - This uses the printMachineInstruction()
+/// method to print assembly for each instruction.
+///
+bool AlphaAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ SetupMachineFunction(MF);
+ O << "\n\n";
+
+ // Print out constants referenced by the function
+ EmitConstantPool(MF.getConstantPool());
+
+ // Print out jump tables referenced by the function
+ EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+
+ // Print out labels for the function.
+ const Function *F = MF.getFunction();
+ SwitchToTextSection(getSectionForFunction(*F).c_str(), F);
+
+ EmitAlignment(4, F);
+ switch (F->getLinkage()) {
+ default: assert(0 && "Unknown linkage type!");
+ case Function::InternalLinkage: // Symbols default to internal.
+ break;
+ case Function::ExternalLinkage:
+ O << "\t.globl " << CurrentFnName << "\n";
+ break;
+ case Function::WeakLinkage:
+ case Function::LinkOnceLinkage:
+ O << TAI->getWeakRefDirective() << CurrentFnName << "\n";
+ break;
+ }
+
+ O << "\t.ent " << CurrentFnName << "\n";
+
+ O << CurrentFnName << ":\n";
+
+ // Print out code for the function.
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ if (I != MF.begin()) {
+ printBasicBlockLabel(I, true);
+ O << '\n';
+ }
+ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+ II != E; ++II) {
+ // Print the assembly for the instruction.
+ ++EmittedInsts;
+ O << "\t";
+ if (!printInstruction(II)) {
+ assert(0 && "Unhandled instruction in asm writer!");
+ abort();
+ }
+ }
+ }
+
+ O << "\t.end " << CurrentFnName << "\n";
+
+ // We didn't modify anything.
+ return false;
+}
+
+bool AlphaAsmPrinter::doInitialization(Module &M)
+{
+ if(TM.getSubtarget<AlphaSubtarget>().hasCT())
+ O << "\t.arch ev6\n"; //This might need to be ev67, so leave this test here
+ else
+ O << "\t.arch ev6\n";
+ O << "\t.set noat\n";
+ AsmPrinter::doInitialization(M);
+ return false;
+}
+
+bool AlphaAsmPrinter::doFinalization(Module &M) {
+ const TargetData *TD = TM.getTargetData();
+
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) {
+
+ if (!I->hasInitializer()) continue; // External global require no code
+
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(I))
+ continue;
+
+ std::string name = Mang->getValueName(I);
+ Constant *C = I->getInitializer();
+ unsigned Size = TD->getTypeSize(C->getType());
+ unsigned Align = TD->getPreferredAlignmentLog(I);
+
+ //1: hidden?
+ if (I->hasHiddenVisibility())
+ O << TAI->getHiddenDirective() << name << "\n";
+
+ //2: kind
+ switch (I->getLinkage()) {
+ case GlobalValue::LinkOnceLinkage:
+ case GlobalValue::WeakLinkage:
+ O << TAI->getWeakRefDirective() << name << '\n';
+ break;
+ case GlobalValue::AppendingLinkage:
+ case GlobalValue::ExternalLinkage:
+ O << "\t.globl " << name << "\n";
+ break;
+ case GlobalValue::InternalLinkage:
+ break;
+ default:
+ assert(0 && "Unknown linkage type!");
+ cerr << "Unknown linkage type!\n";
+ abort();
+ }
+
+ //3: Section (if changed)
+ if (I->hasSection() &&
+ (I->getSection() == ".ctors" ||
+ I->getSection() == ".dtors")) {
+ std::string SectionName = ".section\t" + I->getSection()
+ + ",\"aw\",@progbits";
+ SwitchToDataSection(SectionName.c_str());
+ } else {
+ if (C->isNullValue())
+ SwitchToDataSection("\t.section\t.bss", I);
+ else
+ SwitchToDataSection("\t.section\t.data", I);
+ }
+
+ //4: Type, Size, Align
+ O << "\t.type\t" << name << ", @object\n";
+ O << "\t.size\t" << name << ", " << Size << "\n";
+ EmitAlignment(Align, I);
+
+ O << name << ":\n";
+
+ // If the initializer is a extern weak symbol, remember to emit the weak
+ // reference!
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+
+ EmitGlobalConstant(C);
+ O << '\n';
+ }
+
+ AsmPrinter::doFinalization(M);
+ return false;
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool AlphaAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ printOperand(MI, OpNo);
+ return false;
+}
+
+bool AlphaAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+ O << "0(";
+ printOperand(MI, OpNo);
+ O << ")";
+ return false;
+}
diff --git a/lib/Target/Alpha/AlphaBranchSelector.cpp b/lib/Target/Alpha/AlphaBranchSelector.cpp
new file mode 100644
index 0000000..ac789b3
--- /dev/null
+++ b/lib/Target/Alpha/AlphaBranchSelector.cpp
@@ -0,0 +1,67 @@
+//===-- AlphaBranchSelector.cpp - Convert Pseudo branchs ----------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Andrew Lenharth and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Replace Pseudo COND_BRANCH_* with their appropriate real branch
+// Simplified version of the PPC Branch Selector
+//
+//===----------------------------------------------------------------------===//
+
+#include "Alpha.h"
+#include "AlphaInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetAsmInfo.h"
+using namespace llvm;
+
+namespace {
+ struct VISIBILITY_HIDDEN AlphaBSel : public MachineFunctionPass {
+ static char ID;
+ AlphaBSel() : MachineFunctionPass((intptr_t)&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "Alpha Branch Selection";
+ }
+ };
+ char AlphaBSel::ID = 0;
+}
+
+/// createAlphaBranchSelectionPass - returns an instance of the Branch Selection
+/// Pass
+///
+FunctionPass *llvm::createAlphaBranchSelectionPass() {
+ return new AlphaBSel();
+}
+
+bool AlphaBSel::runOnMachineFunction(MachineFunction &Fn) {
+
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock *MBB = MFI;
+
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
+ MBBI != EE; ++MBBI) {
+ if (MBBI->getOpcode() == Alpha::COND_BRANCH_I ||
+ MBBI->getOpcode() == Alpha::COND_BRANCH_F) {
+
+ // condbranch operands:
+ // 0. bc opcode
+ // 1. reg
+ // 2. target MBB
+ const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
+ MBBI->setInstrDescriptor(TII->get(MBBI->getOperand(0).getImm()));
+ }
+ }
+ }
+
+ return true;
+}
+
diff --git a/lib/Target/Alpha/AlphaCodeEmitter.cpp b/lib/Target/Alpha/AlphaCodeEmitter.cpp
new file mode 100644
index 0000000..3549551
--- /dev/null
+++ b/lib/Target/Alpha/AlphaCodeEmitter.cpp
@@ -0,0 +1,222 @@
+//===-- Alpha/AlphaCodeEmitter.cpp - Convert Alpha code to machine code ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the pass that transforms the Alpha machine instructions
+// into relocatable machine code.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "alpha-emitter"
+#include "AlphaTargetMachine.h"
+#include "AlphaRelocations.h"
+#include "Alpha.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+namespace {
+ class AlphaCodeEmitter : public MachineFunctionPass {
+ const AlphaInstrInfo *II;
+ TargetMachine &TM;
+ MachineCodeEmitter &MCE;
+
+ /// getMachineOpValue - evaluates the MachineOperand of a given MachineInstr
+ ///
+ int getMachineOpValue(MachineInstr &MI, MachineOperand &MO);
+
+ public:
+ static char ID;
+ explicit AlphaCodeEmitter(TargetMachine &tm, MachineCodeEmitter &mce)
+ : MachineFunctionPass((intptr_t)&ID), II(0), TM(tm), MCE(mce) {}
+ AlphaCodeEmitter(TargetMachine &tm, MachineCodeEmitter &mce,
+ const AlphaInstrInfo& ii)
+ : MachineFunctionPass((intptr_t)&ID), II(&ii), TM(tm), MCE(mce) {}
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "Alpha Machine Code Emitter";
+ }
+
+ void emitInstruction(const MachineInstr &MI);
+
+ /// getBinaryCodeForInstr - This function, generated by the
+ /// CodeEmitterGenerator using TableGen, produces the binary encoding for
+ /// machine instructions.
+ ///
+ unsigned getBinaryCodeForInstr(MachineInstr &MI);
+
+ private:
+ void emitBasicBlock(MachineBasicBlock &MBB);
+
+ };
+ char AlphaCodeEmitter::ID = 0;
+}
+
+/// createAlphaCodeEmitterPass - Return a pass that emits the collected Alpha code
+/// to the specified MCE object.
+FunctionPass *llvm::createAlphaCodeEmitterPass(AlphaTargetMachine &TM,
+ MachineCodeEmitter &MCE) {
+ return new AlphaCodeEmitter(TM, MCE);
+}
+
+bool AlphaCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
+ II = ((AlphaTargetMachine&)MF.getTarget()).getInstrInfo();
+
+ do {
+ MCE.startFunction(MF);
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ emitBasicBlock(*I);
+ } while (MCE.finishFunction(MF));
+
+ return false;
+}
+
+void AlphaCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) {
+ MCE.StartMachineBasicBlock(&MBB);
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ MachineInstr &MI = *I;
+ switch(MI.getOpcode()) {
+ default:
+ MCE.emitWordLE(getBinaryCodeForInstr(*I));
+ break;
+ case Alpha::ALTENT:
+ case Alpha::PCLABEL:
+ case Alpha::MEMLABEL:
+ case Alpha::IDEF_I:
+ case Alpha::IDEF_F32:
+ case Alpha::IDEF_F64:
+ break; //skip these
+ }
+ }
+}
+
+static unsigned getAlphaRegNumber(unsigned Reg) {
+ switch (Reg) {
+ case Alpha::R0 : case Alpha::F0 : return 0;
+ case Alpha::R1 : case Alpha::F1 : return 1;
+ case Alpha::R2 : case Alpha::F2 : return 2;
+ case Alpha::R3 : case Alpha::F3 : return 3;
+ case Alpha::R4 : case Alpha::F4 : return 4;
+ case Alpha::R5 : case Alpha::F5 : return 5;
+ case Alpha::R6 : case Alpha::F6 : return 6;
+ case Alpha::R7 : case Alpha::F7 : return 7;
+ case Alpha::R8 : case Alpha::F8 : return 8;
+ case Alpha::R9 : case Alpha::F9 : return 9;
+ case Alpha::R10 : case Alpha::F10 : return 10;
+ case Alpha::R11 : case Alpha::F11 : return 11;
+ case Alpha::R12 : case Alpha::F12 : return 12;
+ case Alpha::R13 : case Alpha::F13 : return 13;
+ case Alpha::R14 : case Alpha::F14 : return 14;
+ case Alpha::R15 : case Alpha::F15 : return 15;
+ case Alpha::R16 : case Alpha::F16 : return 16;
+ case Alpha::R17 : case Alpha::F17 : return 17;
+ case Alpha::R18 : case Alpha::F18 : return 18;
+ case Alpha::R19 : case Alpha::F19 : return 19;
+ case Alpha::R20 : case Alpha::F20 : return 20;
+ case Alpha::R21 : case Alpha::F21 : return 21;
+ case Alpha::R22 : case Alpha::F22 : return 22;
+ case Alpha::R23 : case Alpha::F23 : return 23;
+ case Alpha::R24 : case Alpha::F24 : return 24;
+ case Alpha::R25 : case Alpha::F25 : return 25;
+ case Alpha::R26 : case Alpha::F26 : return 26;
+ case Alpha::R27 : case Alpha::F27 : return 27;
+ case Alpha::R28 : case Alpha::F28 : return 28;
+ case Alpha::R29 : case Alpha::F29 : return 29;
+ case Alpha::R30 : case Alpha::F30 : return 30;
+ case Alpha::R31 : case Alpha::F31 : return 31;
+ default:
+ assert(0 && "Unhandled reg");
+ abort();
+ }
+}
+
+int AlphaCodeEmitter::getMachineOpValue(MachineInstr &MI, MachineOperand &MO) {
+
+ int rv = 0; // Return value; defaults to 0 for unhandled cases
+ // or things that get fixed up later by the JIT.
+
+ if (MO.isRegister()) {
+ rv = getAlphaRegNumber(MO.getReg());
+ } else if (MO.isImmediate()) {
+ rv = MO.getImmedValue();
+ } else if (MO.isGlobalAddress() || MO.isExternalSymbol()
+ || MO.isConstantPoolIndex()) {
+ DOUT << MO << " is a relocated op for " << MI << "\n";
+ unsigned Reloc = 0;
+ int Offset = 0;
+ bool useGOT = false;
+ switch (MI.getOpcode()) {
+ case Alpha::BSR:
+ Reloc = Alpha::reloc_bsr;
+ break;
+ case Alpha::LDLr:
+ case Alpha::LDQr:
+ case Alpha::LDBUr:
+ case Alpha::LDWUr:
+ case Alpha::LDSr:
+ case Alpha::LDTr:
+ case Alpha::LDAr:
+ case Alpha::STQr:
+ case Alpha::STLr:
+ case Alpha::STWr:
+ case Alpha::STBr:
+ case Alpha::STSr:
+ case Alpha::STTr:
+ Reloc = Alpha::reloc_gprellow;
+ break;
+ case Alpha::LDAHr:
+ Reloc = Alpha::reloc_gprelhigh;
+ break;
+ case Alpha::LDQl:
+ Reloc = Alpha::reloc_literal;
+ useGOT = true;
+ break;
+ case Alpha::LDAg:
+ case Alpha::LDAHg:
+ Reloc = Alpha::reloc_gpdist;
+ Offset = MI.getOperand(3).getImmedValue();
+ break;
+ default:
+ assert(0 && "unknown relocatable instruction");
+ abort();
+ }
+ if (MO.isGlobalAddress())
+ MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(),
+ Reloc, MO.getGlobal(), Offset,
+ false, useGOT));
+ else if (MO.isExternalSymbol())
+ MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+ Reloc, MO.getSymbolName(), Offset,
+ true));
+ else
+ MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+ Reloc, MO.getConstantPoolIndex(),
+ Offset));
+ } else if (MO.isMachineBasicBlock()) {
+ MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+ Alpha::reloc_bsr,
+ MO.getMachineBasicBlock()));
+ }else {
+ cerr << "ERROR: Unknown type of MachineOperand: " << MO << "\n";
+ abort();
+ }
+
+ return rv;
+}
+
+
+#include "AlphaGenCodeEmitter.inc"
+
diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
new file mode 100644
index 0000000..4f7533c
--- /dev/null
+++ b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
@@ -0,0 +1,563 @@
+//===-- AlphaISelDAGToDAG.cpp - Alpha pattern matching inst selector ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Andrew Lenharth and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pattern matching instruction selector for Alpha,
+// converting from a legalized dag to a Alpha dag.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Alpha.h"
+#include "AlphaTargetMachine.h"
+#include "AlphaISelLowering.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <queue>
+#include <set>
+using namespace llvm;
+
+namespace {
+
+ //===--------------------------------------------------------------------===//
+ /// AlphaDAGToDAGISel - Alpha specific code to select Alpha machine
+ /// instructions for SelectionDAG operations.
+ class AlphaDAGToDAGISel : public SelectionDAGISel {
+ AlphaTargetLowering AlphaLowering;
+
+ static const int64_t IMM_LOW = -32768;
+ static const int64_t IMM_HIGH = 32767;
+ static const int64_t IMM_MULT = 65536;
+ static const int64_t IMM_FULLHIGH = IMM_HIGH + IMM_HIGH * IMM_MULT;
+ static const int64_t IMM_FULLLOW = IMM_LOW + IMM_LOW * IMM_MULT;
+
+ static int64_t get_ldah16(int64_t x) {
+ int64_t y = x / IMM_MULT;
+ if (x % IMM_MULT > IMM_HIGH)
+ ++y;
+ return y;
+ }
+
+ static int64_t get_lda16(int64_t x) {
+ return x - get_ldah16(x) * IMM_MULT;
+ }
+
+ /// get_zapImm - Return a zap mask if X is a valid immediate for a zapnot
+ /// instruction (if not, return 0). Note that this code accepts partial
+ /// zap masks. For example (and LHS, 1) is a valid zap, as long we know
+ /// that the bits 1-7 of LHS are already zero. If LHS is non-null, we are
+ /// in checking mode. If LHS is null, we assume that the mask has already
+ /// been validated before.
+ uint64_t get_zapImm(SDOperand LHS, uint64_t Constant) {
+ uint64_t BitsToCheck = 0;
+ unsigned Result = 0;
+ for (unsigned i = 0; i != 8; ++i) {
+ if (((Constant >> 8*i) & 0xFF) == 0) {
+ // nothing to do.
+ } else {
+ Result |= 1 << i;
+ if (((Constant >> 8*i) & 0xFF) == 0xFF) {
+ // If the entire byte is set, zapnot the byte.
+ } else if (LHS.Val == 0) {
+ // Otherwise, if the mask was previously validated, we know its okay
+ // to zapnot this entire byte even though all the bits aren't set.
+ } else {
+ // Otherwise we don't know that the it's okay to zapnot this entire
+ // byte. Only do this iff we can prove that the missing bits are
+ // already null, so the bytezap doesn't need to really null them.
+ BitsToCheck |= ~Constant & (0xFF << 8*i);
+ }
+ }
+ }
+
+ // If there are missing bits in a byte (for example, X & 0xEF00), check to
+ // see if the missing bits (0x1000) are already known zero if not, the zap
+ // isn't okay to do, as it won't clear all the required bits.
+ if (BitsToCheck &&
+ !CurDAG->MaskedValueIsZero(LHS, BitsToCheck))
+ return 0;
+
+ return Result;
+ }
+
+ static uint64_t get_zapImm(uint64_t x) {
+ unsigned build = 0;
+ for(int i = 0; i != 8; ++i) {
+ if ((x & 0x00FF) == 0x00FF)
+ build |= 1 << i;
+ else if ((x & 0x00FF) != 0)
+ return 0;
+ x >>= 8;
+ }
+ return build;
+ }
+
+
+ static uint64_t getNearPower2(uint64_t x) {
+ if (!x) return 0;
+ unsigned at = CountLeadingZeros_64(x);
+ uint64_t complow = 1 << (63 - at);
+ uint64_t comphigh = 1 << (64 - at);
+ //cerr << x << ":" << complow << ":" << comphigh << "\n";
+ if (abs(complow - x) <= abs(comphigh - x))
+ return complow;
+ else
+ return comphigh;
+ }
+
+ static bool chkRemNearPower2(uint64_t x, uint64_t r, bool swap) {
+ uint64_t y = getNearPower2(x);
+ if (swap)
+ return (y - x) == r;
+ else
+ return (x - y) == r;
+ }
+
+ static bool isFPZ(SDOperand N) {
+ ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
+ return (CN && (CN->isExactlyValue(+0.0) || CN->isExactlyValue(-0.0)));
+ }
+ static bool isFPZn(SDOperand N) {
+ ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
+ return (CN && CN->isExactlyValue(-0.0));
+ }
+ static bool isFPZp(SDOperand N) {
+ ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
+ return (CN && CN->isExactlyValue(+0.0));
+ }
+
+ public:
+ AlphaDAGToDAGISel(TargetMachine &TM)
+ : SelectionDAGISel(AlphaLowering),
+ AlphaLowering(*(AlphaTargetLowering*)(TM.getTargetLowering()))
+ {}
+
+ /// getI64Imm - Return a target constant with the specified value, of type
+ /// i64.
+ inline SDOperand getI64Imm(int64_t Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i64);
+ }
+
+ // Select - Convert the specified operand from a target-independent to a
+ // target-specific node if it hasn't already been changed.
+ SDNode *Select(SDOperand Op);
+
+ /// InstructionSelectBasicBlock - This callback is invoked by
+ /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+ virtual void InstructionSelectBasicBlock(SelectionDAG &DAG);
+
+ virtual const char *getPassName() const {
+ return "Alpha DAG->DAG Pattern Instruction Selection";
+ }
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDOperand &Op,
+ char ConstraintCode,
+ std::vector<SDOperand> &OutOps,
+ SelectionDAG &DAG) {
+ SDOperand Op0;
+ switch (ConstraintCode) {
+ default: return true;
+ case 'm': // memory
+ Op0 = Op;
+ AddToISelQueue(Op0);
+ break;
+ }
+
+ OutOps.push_back(Op0);
+ return false;
+ }
+
+// Include the pieces autogenerated from the target description.
+#include "AlphaGenDAGISel.inc"
+
+private:
+ SDOperand getGlobalBaseReg();
+ SDOperand getGlobalRetAddr();
+ void SelectCALL(SDOperand Op);
+
+ };
+}
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// GOT address into a register.
+///
+SDOperand AlphaDAGToDAGISel::getGlobalBaseReg() {
+ MachineFunction* MF = BB->getParent();
+ unsigned GP = 0;
+ for(MachineFunction::livein_iterator ii = MF->livein_begin(),
+ ee = MF->livein_end(); ii != ee; ++ii)
+ if (ii->first == Alpha::R29) {
+ GP = ii->second;
+ break;
+ }
+ assert(GP && "GOT PTR not in liveins");
+ return CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+ GP, MVT::i64);
+}
+
+/// getRASaveReg - Grab the return address
+///
+SDOperand AlphaDAGToDAGISel::getGlobalRetAddr() {
+ MachineFunction* MF = BB->getParent();
+ unsigned RA = 0;
+ for(MachineFunction::livein_iterator ii = MF->livein_begin(),
+ ee = MF->livein_end(); ii != ee; ++ii)
+ if (ii->first == Alpha::R26) {
+ RA = ii->second;
+ break;
+ }
+ assert(RA && "RA PTR not in liveins");
+ return CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+ RA, MVT::i64);
+}
+
+/// InstructionSelectBasicBlock - This callback is invoked by
+/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+void AlphaDAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) {
+ DEBUG(BB->dump());
+
+ // Select target instructions for the DAG.
+ DAG.setRoot(SelectRoot(DAG.getRoot()));
+ DAG.RemoveDeadNodes();
+
+ // Emit machine code to BB.
+ ScheduleAndEmitDAG(DAG);
+}
+
+// Select - Convert the specified operand from a target-independent to a
+// target-specific node if it hasn't already been changed.
+SDNode *AlphaDAGToDAGISel::Select(SDOperand Op) {
+ SDNode *N = Op.Val;
+ if (N->getOpcode() >= ISD::BUILTIN_OP_END &&
+ N->getOpcode() < AlphaISD::FIRST_NUMBER) {
+ return NULL; // Already selected.
+ }
+
+ switch (N->getOpcode()) {
+ default: break;
+ case AlphaISD::CALL:
+ SelectCALL(Op);
+ return NULL;
+
+ case ISD::FrameIndex: {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ return CurDAG->SelectNodeTo(N, Alpha::LDA, MVT::i64,
+ CurDAG->getTargetFrameIndex(FI, MVT::i32),
+ getI64Imm(0));
+ }
+ case ISD::GLOBAL_OFFSET_TABLE: {
+ SDOperand Result = getGlobalBaseReg();
+ ReplaceUses(Op, Result);
+ return NULL;
+ }
+ case AlphaISD::GlobalRetAddr: {
+ SDOperand Result = getGlobalRetAddr();
+ ReplaceUses(Op, Result);
+ return NULL;
+ }
+
+ case AlphaISD::DivCall: {
+ SDOperand Chain = CurDAG->getEntryNode();
+ SDOperand N0 = Op.getOperand(0);
+ SDOperand N1 = Op.getOperand(1);
+ SDOperand N2 = Op.getOperand(2);
+ AddToISelQueue(N0);
+ AddToISelQueue(N1);
+ AddToISelQueue(N2);
+ Chain = CurDAG->getCopyToReg(Chain, Alpha::R24, N1,
+ SDOperand(0,0));
+ Chain = CurDAG->getCopyToReg(Chain, Alpha::R25, N2,
+ Chain.getValue(1));
+ Chain = CurDAG->getCopyToReg(Chain, Alpha::R27, N0,
+ Chain.getValue(1));
+ SDNode *CNode =
+ CurDAG->getTargetNode(Alpha::JSRs, MVT::Other, MVT::Flag,
+ Chain, Chain.getValue(1));
+ Chain = CurDAG->getCopyFromReg(Chain, Alpha::R27, MVT::i64,
+ SDOperand(CNode, 1));
+ return CurDAG->SelectNodeTo(N, Alpha::BISr, MVT::i64, Chain, Chain);
+ }
+
+ case ISD::READCYCLECOUNTER: {
+ SDOperand Chain = N->getOperand(0);
+ AddToISelQueue(Chain); //Select chain
+ return CurDAG->getTargetNode(Alpha::RPCC, MVT::i64, MVT::Other,
+ Chain);
+ }
+
+ case ISD::Constant: {
+ uint64_t uval = cast<ConstantSDNode>(N)->getValue();
+
+ if (uval == 0) {
+ SDOperand Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+ Alpha::R31, MVT::i64);
+ ReplaceUses(Op, Result);
+ return NULL;
+ }
+
+ int64_t val = (int64_t)uval;
+ int32_t val32 = (int32_t)val;
+ if (val <= IMM_HIGH + IMM_HIGH * IMM_MULT &&
+ val >= IMM_LOW + IMM_LOW * IMM_MULT)
+ break; //(LDAH (LDA))
+ if ((uval >> 32) == 0 && //empty upper bits
+ val32 <= IMM_HIGH + IMM_HIGH * IMM_MULT)
+ // val32 >= IMM_LOW + IMM_LOW * IMM_MULT) //always true
+ break; //(zext (LDAH (LDA)))
+ //Else use the constant pool
+ ConstantInt *C = ConstantInt::get(Type::Int64Ty, uval);
+ SDOperand CPI = CurDAG->getTargetConstantPool(C, MVT::i64);
+ SDNode *Tmp = CurDAG->getTargetNode(Alpha::LDAHr, MVT::i64, CPI,
+ getGlobalBaseReg());
+ return CurDAG->SelectNodeTo(N, Alpha::LDQr, MVT::i64, MVT::Other,
+ CPI, SDOperand(Tmp, 0), CurDAG->getEntryNode());
+ }
+ case ISD::TargetConstantFP: {
+ ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N);
+ bool isDouble = N->getValueType(0) == MVT::f64;
+ MVT::ValueType T = isDouble ? MVT::f64 : MVT::f32;
+ if (CN->isExactlyValue(+0.0)) {
+ return CurDAG->SelectNodeTo(N, isDouble ? Alpha::CPYST : Alpha::CPYSS,
+ T, CurDAG->getRegister(Alpha::F31, T),
+ CurDAG->getRegister(Alpha::F31, T));
+ } else if ( CN->isExactlyValue(-0.0)) {
+ return CurDAG->SelectNodeTo(N, isDouble ? Alpha::CPYSNT : Alpha::CPYSNS,
+ T, CurDAG->getRegister(Alpha::F31, T),
+ CurDAG->getRegister(Alpha::F31, T));
+ } else {
+ abort();
+ }
+ break;
+ }
+
+ case ISD::SETCC:
+ if (MVT::isFloatingPoint(N->getOperand(0).Val->getValueType(0))) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+
+ unsigned Opc = Alpha::WTF;
+ bool rev = false;
+ bool inv = false;
+ switch(CC) {
+ default: DEBUG(N->dump(CurDAG)); assert(0 && "Unknown FP comparison!");
+ case ISD::SETEQ: case ISD::SETOEQ: case ISD::SETUEQ:
+ Opc = Alpha::CMPTEQ; break;
+ case ISD::SETLT: case ISD::SETOLT: case ISD::SETULT:
+ Opc = Alpha::CMPTLT; break;
+ case ISD::SETLE: case ISD::SETOLE: case ISD::SETULE:
+ Opc = Alpha::CMPTLE; break;
+ case ISD::SETGT: case ISD::SETOGT: case ISD::SETUGT:
+ Opc = Alpha::CMPTLT; rev = true; break;
+ case ISD::SETGE: case ISD::SETOGE: case ISD::SETUGE:
+ Opc = Alpha::CMPTLE; rev = true; break;
+ case ISD::SETNE: case ISD::SETONE: case ISD::SETUNE:
+ Opc = Alpha::CMPTEQ; inv = true; break;
+ case ISD::SETO:
+ Opc = Alpha::CMPTUN; inv = true; break;
+ case ISD::SETUO:
+ Opc = Alpha::CMPTUN; break;
+ };
+ SDOperand tmp1 = N->getOperand(rev?1:0);
+ SDOperand tmp2 = N->getOperand(rev?0:1);
+ AddToISelQueue(tmp1);
+ AddToISelQueue(tmp2);
+ SDNode *cmp = CurDAG->getTargetNode(Opc, MVT::f64, tmp1, tmp2);
+ if (inv)
+ cmp = CurDAG->getTargetNode(Alpha::CMPTEQ, MVT::f64, SDOperand(cmp, 0),
+ CurDAG->getRegister(Alpha::F31, MVT::f64));
+ switch(CC) {
+ case ISD::SETUEQ: case ISD::SETULT: case ISD::SETULE:
+ case ISD::SETUNE: case ISD::SETUGT: case ISD::SETUGE:
+ {
+ SDNode* cmp2 = CurDAG->getTargetNode(Alpha::CMPTUN, MVT::f64,
+ tmp1, tmp2);
+ cmp = CurDAG->getTargetNode(Alpha::ADDT, MVT::f64,
+ SDOperand(cmp2, 0), SDOperand(cmp, 0));
+ break;
+ }
+ default: break;
+ }
+
+ SDNode* LD = CurDAG->getTargetNode(Alpha::FTOIT, MVT::i64, SDOperand(cmp, 0));
+ return CurDAG->getTargetNode(Alpha::CMPULT, MVT::i64,
+ CurDAG->getRegister(Alpha::R31, MVT::i64),
+ SDOperand(LD,0));
+ }
+ break;
+
+ case ISD::SELECT:
+ if (MVT::isFloatingPoint(N->getValueType(0)) &&
+ (N->getOperand(0).getOpcode() != ISD::SETCC ||
+ !MVT::isFloatingPoint(N->getOperand(0).getOperand(1).getValueType()))) {
+ //This should be the condition not covered by the Patterns
+ //FIXME: Don't have SelectCode die, but rather return something testable
+ // so that things like this can be caught in fall though code
+ //move int to fp
+ bool isDouble = N->getValueType(0) == MVT::f64;
+ SDOperand cond = N->getOperand(0);
+ SDOperand TV = N->getOperand(1);
+ SDOperand FV = N->getOperand(2);
+ AddToISelQueue(cond);
+ AddToISelQueue(TV);
+ AddToISelQueue(FV);
+
+ SDNode* LD = CurDAG->getTargetNode(Alpha::ITOFT, MVT::f64, cond);
+ return CurDAG->getTargetNode(isDouble?Alpha::FCMOVNET:Alpha::FCMOVNES,
+ MVT::f64, FV, TV, SDOperand(LD,0));
+ }
+ break;
+
+ case ISD::AND: {
+ ConstantSDNode* SC = NULL;
+ ConstantSDNode* MC = NULL;
+ if (N->getOperand(0).getOpcode() == ISD::SRL &&
+ (MC = dyn_cast<ConstantSDNode>(N->getOperand(1))) &&
+ (SC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1)))) {
+ uint64_t sval = SC->getValue();
+ uint64_t mval = MC->getValue();
+ // If the result is a zap, let the autogened stuff handle it.
+ if (get_zapImm(N->getOperand(0), mval))
+ break;
+ // given mask X, and shift S, we want to see if there is any zap in the
+ // mask if we play around with the botton S bits
+ uint64_t dontcare = (~0ULL) >> (64 - sval);
+ uint64_t mask = mval << sval;
+
+ if (get_zapImm(mask | dontcare))
+ mask = mask | dontcare;
+
+ if (get_zapImm(mask)) {
+ AddToISelQueue(N->getOperand(0).getOperand(0));
+ SDOperand Z =
+ SDOperand(CurDAG->getTargetNode(Alpha::ZAPNOTi, MVT::i64,
+ N->getOperand(0).getOperand(0),
+ getI64Imm(get_zapImm(mask))), 0);
+ return CurDAG->getTargetNode(Alpha::SRLr, MVT::i64, Z,
+ getI64Imm(sval));
+ }
+ }
+ break;
+ }
+
+ }
+
+ return SelectCode(Op);
+}
+
+void AlphaDAGToDAGISel::SelectCALL(SDOperand Op) {
+ //TODO: add flag stuff to prevent nondeturministic breakage!
+
+ SDNode *N = Op.Val;
+ SDOperand Chain = N->getOperand(0);
+ SDOperand Addr = N->getOperand(1);
+ SDOperand InFlag(0,0); // Null incoming flag value.
+ AddToISelQueue(Chain);
+
+ std::vector<SDOperand> CallOperands;
+ std::vector<MVT::ValueType> TypeOperands;
+
+ //grab the arguments
+ for(int i = 2, e = N->getNumOperands(); i < e; ++i) {
+ TypeOperands.push_back(N->getOperand(i).getValueType());
+ AddToISelQueue(N->getOperand(i));
+ CallOperands.push_back(N->getOperand(i));
+ }
+ int count = N->getNumOperands() - 2;
+
+ static const unsigned args_int[] = {Alpha::R16, Alpha::R17, Alpha::R18,
+ Alpha::R19, Alpha::R20, Alpha::R21};
+ static const unsigned args_float[] = {Alpha::F16, Alpha::F17, Alpha::F18,
+ Alpha::F19, Alpha::F20, Alpha::F21};
+
+ for (int i = 6; i < count; ++i) {
+ unsigned Opc = Alpha::WTF;
+ if (MVT::isInteger(TypeOperands[i])) {
+ Opc = Alpha::STQ;
+ } else if (TypeOperands[i] == MVT::f32) {
+ Opc = Alpha::STS;
+ } else if (TypeOperands[i] == MVT::f64) {
+ Opc = Alpha::STT;
+ } else
+ assert(0 && "Unknown operand");
+
+ SDOperand Ops[] = { CallOperands[i], getI64Imm((i - 6) * 8),
+ CurDAG->getCopyFromReg(Chain, Alpha::R30, MVT::i64),
+ Chain };
+ Chain = SDOperand(CurDAG->getTargetNode(Opc, MVT::Other, Ops, 4), 0);
+ }
+ for (int i = 0; i < std::min(6, count); ++i) {
+ if (MVT::isInteger(TypeOperands[i])) {
+ Chain = CurDAG->getCopyToReg(Chain, args_int[i], CallOperands[i], InFlag);
+ InFlag = Chain.getValue(1);
+ } else if (TypeOperands[i] == MVT::f32 || TypeOperands[i] == MVT::f64) {
+ Chain = CurDAG->getCopyToReg(Chain, args_float[i], CallOperands[i], InFlag);
+ InFlag = Chain.getValue(1);
+ } else
+ assert(0 && "Unknown operand");
+ }
+
+ // Finally, once everything is in registers to pass to the call, emit the
+ // call itself.
+ if (Addr.getOpcode() == AlphaISD::GPRelLo) {
+ SDOperand GOT = getGlobalBaseReg();
+ Chain = CurDAG->getCopyToReg(Chain, Alpha::R29, GOT, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = SDOperand(CurDAG->getTargetNode(Alpha::BSR, MVT::Other, MVT::Flag,
+ Addr.getOperand(0), Chain, InFlag), 0);
+ } else {
+ AddToISelQueue(Addr);
+ Chain = CurDAG->getCopyToReg(Chain, Alpha::R27, Addr, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = SDOperand(CurDAG->getTargetNode(Alpha::JSR, MVT::Other, MVT::Flag,
+ Chain, InFlag), 0);
+ }
+ InFlag = Chain.getValue(1);
+
+ std::vector<SDOperand> CallResults;
+
+ switch (N->getValueType(0)) {
+ default: assert(0 && "Unexpected ret value!");
+ case MVT::Other: break;
+ case MVT::i64:
+ Chain = CurDAG->getCopyFromReg(Chain, Alpha::R0, MVT::i64, InFlag).getValue(1);
+ CallResults.push_back(Chain.getValue(0));
+ break;
+ case MVT::f32:
+ Chain = CurDAG->getCopyFromReg(Chain, Alpha::F0, MVT::f32, InFlag).getValue(1);
+ CallResults.push_back(Chain.getValue(0));
+ break;
+ case MVT::f64:
+ Chain = CurDAG->getCopyFromReg(Chain, Alpha::F0, MVT::f64, InFlag).getValue(1);
+ CallResults.push_back(Chain.getValue(0));
+ break;
+ }
+
+ CallResults.push_back(Chain);
+ for (unsigned i = 0, e = CallResults.size(); i != e; ++i)
+ ReplaceUses(Op.getValue(i), CallResults[i]);
+}
+
+
+/// createAlphaISelDag - This pass converts a legalized DAG into a
+/// Alpha-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createAlphaISelDag(TargetMachine &TM) {
+ return new AlphaDAGToDAGISel(TM);
+}
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
new file mode 100644
index 0000000..d4777b2
--- /dev/null
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -0,0 +1,623 @@
+//===-- AlphaISelLowering.cpp - Alpha DAG Lowering Implementation ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Andrew Lenharth and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AlphaISelLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AlphaISelLowering.h"
+#include "AlphaTargetMachine.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+/// AddLiveIn - This helper function adds the specified physical register to the
+/// MachineFunction as a live in value. It also creates a corresponding virtual
+/// register for it.
+static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
+ TargetRegisterClass *RC) {
+ assert(RC->contains(PReg) && "Not the correct regclass!");
+ unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC);
+ MF.addLiveIn(PReg, VReg);
+ return VReg;
+}
+
+AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) : TargetLowering(TM) {
+ // Set up the TargetLowering object.
+ //I am having problems with shr n ubyte 1
+ setShiftAmountType(MVT::i64);
+ setSetCCResultType(MVT::i64);
+ setSetCCResultContents(ZeroOrOneSetCCResult);
+
+ setUsesGlobalOffsetTable(true);
+
+ addRegisterClass(MVT::i64, Alpha::GPRCRegisterClass);
+ addRegisterClass(MVT::f64, Alpha::F8RCRegisterClass);
+ addRegisterClass(MVT::f32, Alpha::F4RCRegisterClass);
+
+ setLoadXAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadXAction(ISD::EXTLOAD, MVT::f32, Expand);
+
+ setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadXAction(ISD::ZEXTLOAD, MVT::i32, Expand);
+
+ setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadXAction(ISD::SEXTLOAD, MVT::i8, Expand);
+ setLoadXAction(ISD::SEXTLOAD, MVT::i16, Expand);
+
+ setStoreXAction(MVT::i1, Promote);
+
+ // setOperationAction(ISD::BRIND, MVT::Other, Expand);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ setOperationAction(ISD::FREM, MVT::f32, Expand);
+ setOperationAction(ISD::FREM, MVT::f64, Expand);
+
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+
+ if (!TM.getSubtarget<AlphaSubtarget>().hasCT()) {
+ setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
+ setOperationAction(ISD::CTLZ , MVT::i64 , Expand);
+ }
+ setOperationAction(ISD::BSWAP , MVT::i64, Expand);
+ setOperationAction(ISD::ROTL , MVT::i64, Expand);
+ setOperationAction(ISD::ROTR , MVT::i64, Expand);
+
+ setOperationAction(ISD::SREM , MVT::i64, Custom);
+ setOperationAction(ISD::UREM , MVT::i64, Custom);
+ setOperationAction(ISD::SDIV , MVT::i64, Custom);
+ setOperationAction(ISD::UDIV , MVT::i64, Custom);
+
+ setOperationAction(ISD::MEMMOVE , MVT::Other, Expand);
+ setOperationAction(ISD::MEMSET , MVT::Other, Expand);
+ setOperationAction(ISD::MEMCPY , MVT::Other, Expand);
+
+ // We don't support sin/cos/sqrt
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+
+ setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f32, Expand);
+
+ setOperationAction(ISD::SETCC, MVT::f32, Promote);
+
+ setOperationAction(ISD::BIT_CONVERT, MVT::f32, Promote);
+
+ // We don't have line number support yet.
+ setOperationAction(ISD::LOCATION, MVT::Other, Expand);
+ setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
+ setOperationAction(ISD::LABEL, MVT::Other, Expand);
+
+ // Not implemented yet.
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
+
+ // We want to legalize GlobalAddress and ConstantPool and
+ // ExternalSymbols nodes into the appropriate instructions to
+ // materialize the address.
+ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
+ setOperationAction(ISD::ExternalSymbol, MVT::i64, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::i32, Custom);
+
+ setOperationAction(ISD::RET, MVT::Other, Custom);
+
+ setOperationAction(ISD::JumpTable, MVT::i64, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+
+ setStackPointerRegisterToSaveRestore(Alpha::R30);
+
+ setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
+ addLegalFPImmediate(+0.0); //F31
+ addLegalFPImmediate(-0.0); //-F31
+
+ setJumpBufSize(272);
+ setJumpBufAlignment(16);
+
+ computeRegisterProperties();
+}
+
+const char *AlphaTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case AlphaISD::CVTQT_: return "Alpha::CVTQT_";
+ case AlphaISD::CVTQS_: return "Alpha::CVTQS_";
+ case AlphaISD::CVTTQ_: return "Alpha::CVTTQ_";
+ case AlphaISD::GPRelHi: return "Alpha::GPRelHi";
+ case AlphaISD::GPRelLo: return "Alpha::GPRelLo";
+ case AlphaISD::RelLit: return "Alpha::RelLit";
+ case AlphaISD::GlobalRetAddr: return "Alpha::GlobalRetAddr";
+ case AlphaISD::CALL: return "Alpha::CALL";
+ case AlphaISD::DivCall: return "Alpha::DivCall";
+ case AlphaISD::RET_FLAG: return "Alpha::RET_FLAG";
+ case AlphaISD::COND_BRANCH_I: return "Alpha::COND_BRANCH_I";
+ case AlphaISD::COND_BRANCH_F: return "Alpha::COND_BRANCH_F";
+ }
+}
+
+static SDOperand LowerJumpTable(SDOperand Op, SelectionDAG &DAG) {
+ MVT::ValueType PtrVT = Op.getValueType();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+ SDOperand Zero = DAG.getConstant(0, PtrVT);
+
+ SDOperand Hi = DAG.getNode(AlphaISD::GPRelHi, MVT::i64, JTI,
+ DAG.getNode(ISD::GLOBAL_OFFSET_TABLE, MVT::i64));
+ SDOperand Lo = DAG.getNode(AlphaISD::GPRelLo, MVT::i64, JTI, Hi);
+ return Lo;
+}
+
+//http://www.cs.arizona.edu/computer.help/policy/DIGITAL_unix/
+//AA-PY8AC-TET1_html/callCH3.html#BLOCK21
+
+//For now, just use variable size stack frame format
+
+//In a standard call, the first six items are passed in registers $16
+//- $21 and/or registers $f16 - $f21. (See Section 4.1.2 for details
+//of argument-to-register correspondence.) The remaining items are
+//collected in a memory argument list that is a naturally aligned
+//array of quadwords. In a standard call, this list, if present, must
+//be passed at 0(SP).
+//7 ... n 0(SP) ... (n-7)*8(SP)
+
+// //#define FP $15
+// //#define RA $26
+// //#define PV $27
+// //#define GP $29
+// //#define SP $30
+
+static SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG,
+ int &VarArgsBase,
+ int &VarArgsOffset) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ std::vector<SDOperand> ArgValues;
+ SDOperand Root = Op.getOperand(0);
+
+ AddLiveIn(MF, Alpha::R29, &Alpha::GPRCRegClass); //GP
+ AddLiveIn(MF, Alpha::R26, &Alpha::GPRCRegClass); //RA
+
+ unsigned args_int[] = {
+ Alpha::R16, Alpha::R17, Alpha::R18, Alpha::R19, Alpha::R20, Alpha::R21};
+ unsigned args_float[] = {
+ Alpha::F16, Alpha::F17, Alpha::F18, Alpha::F19, Alpha::F20, Alpha::F21};
+
+ for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
+ SDOperand argt;
+ MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
+ SDOperand ArgVal;
+
+ if (ArgNo < 6) {
+ switch (ObjectVT) {
+ default:
+ cerr << "Unknown Type " << ObjectVT << "\n";
+ abort();
+ case MVT::f64:
+ args_float[ArgNo] = AddLiveIn(MF, args_float[ArgNo],
+ &Alpha::F8RCRegClass);
+ ArgVal = DAG.getCopyFromReg(Root, args_float[ArgNo], ObjectVT);
+ break;
+ case MVT::f32:
+ args_float[ArgNo] = AddLiveIn(MF, args_float[ArgNo],
+ &Alpha::F4RCRegClass);
+ ArgVal = DAG.getCopyFromReg(Root, args_float[ArgNo], ObjectVT);
+ break;
+ case MVT::i64:
+ args_int[ArgNo] = AddLiveIn(MF, args_int[ArgNo],
+ &Alpha::GPRCRegClass);
+ ArgVal = DAG.getCopyFromReg(Root, args_int[ArgNo], MVT::i64);
+ break;
+ }
+ } else { //more args
+ // Create the frame index object for this incoming parameter...
+ int FI = MFI->CreateFixedObject(8, 8 * (ArgNo - 6));
+
+ // Create the SelectionDAG nodes corresponding to a load
+ //from this parameter
+ SDOperand FIN = DAG.getFrameIndex(FI, MVT::i64);
+ ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
+ }
+ ArgValues.push_back(ArgVal);
+ }
+
+ // If the functions takes variable number of arguments, copy all regs to stack
+ bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
+ if (isVarArg) {
+ VarArgsOffset = (Op.Val->getNumValues()-1) * 8;
+ std::vector<SDOperand> LS;
+ for (int i = 0; i < 6; ++i) {
+ if (MRegisterInfo::isPhysicalRegister(args_int[i]))
+ args_int[i] = AddLiveIn(MF, args_int[i], &Alpha::GPRCRegClass);
+ SDOperand argt = DAG.getCopyFromReg(Root, args_int[i], MVT::i64);
+ int FI = MFI->CreateFixedObject(8, -8 * (6 - i));
+ if (i == 0) VarArgsBase = FI;
+ SDOperand SDFI = DAG.getFrameIndex(FI, MVT::i64);
+ LS.push_back(DAG.getStore(Root, argt, SDFI, NULL, 0));
+
+ if (MRegisterInfo::isPhysicalRegister(args_float[i]))
+ args_float[i] = AddLiveIn(MF, args_float[i], &Alpha::F8RCRegClass);
+ argt = DAG.getCopyFromReg(Root, args_float[i], MVT::f64);
+ FI = MFI->CreateFixedObject(8, - 8 * (12 - i));
+ SDFI = DAG.getFrameIndex(FI, MVT::i64);
+ LS.push_back(DAG.getStore(Root, argt, SDFI, NULL, 0));
+ }
+
+ //Set up a token factor with all the stack traffic
+ Root = DAG.getNode(ISD::TokenFactor, MVT::Other, &LS[0], LS.size());
+ }
+
+ ArgValues.push_back(Root);
+
+ // Return the new list of results.
+ std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
+ Op.Val->value_end());
+ return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
+}
+
+static SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG) {
+ SDOperand Copy = DAG.getCopyToReg(Op.getOperand(0), Alpha::R26,
+ DAG.getNode(AlphaISD::GlobalRetAddr,
+ MVT::i64),
+ SDOperand());
+ switch (Op.getNumOperands()) {
+ default:
+ assert(0 && "Do not know how to return this many arguments!");
+ abort();
+ case 1:
+ break;
+ //return SDOperand(); // ret void is legal
+ case 3: {
+ MVT::ValueType ArgVT = Op.getOperand(1).getValueType();
+ unsigned ArgReg;
+ if (MVT::isInteger(ArgVT))
+ ArgReg = Alpha::R0;
+ else {
+ assert(MVT::isFloatingPoint(ArgVT));
+ ArgReg = Alpha::F0;
+ }
+ Copy = DAG.getCopyToReg(Copy, ArgReg, Op.getOperand(1), Copy.getValue(1));
+ if (DAG.getMachineFunction().liveout_empty())
+ DAG.getMachineFunction().addLiveOut(ArgReg);
+ break;
+ }
+ }
+ return DAG.getNode(AlphaISD::RET_FLAG, MVT::Other, Copy, Copy.getValue(1));
+}
+
+std::pair<SDOperand, SDOperand>
+AlphaTargetLowering::LowerCallTo(SDOperand Chain, const Type *RetTy,
+ bool RetTyIsSigned, bool isVarArg,
+ unsigned CallingConv, bool isTailCall,
+ SDOperand Callee, ArgListTy &Args,
+ SelectionDAG &DAG) {
+ int NumBytes = 0;
+ if (Args.size() > 6)
+ NumBytes = (Args.size() - 6) * 8;
+
+ Chain = DAG.getCALLSEQ_START(Chain,
+ DAG.getConstant(NumBytes, getPointerTy()));
+ std::vector<SDOperand> args_to_use;
+ for (unsigned i = 0, e = Args.size(); i != e; ++i)
+ {
+ switch (getValueType(Args[i].Ty)) {
+ default: assert(0 && "Unexpected ValueType for argument!");
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ // Promote the integer to 64 bits. If the input type is signed use a
+ // sign extend, otherwise use a zero extend.
+ if (Args[i].isSExt)
+ Args[i].Node = DAG.getNode(ISD::SIGN_EXTEND, MVT::i64, Args[i].Node);
+ else if (Args[i].isZExt)
+ Args[i].Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i64, Args[i].Node);
+ else
+ Args[i].Node = DAG.getNode(ISD::ANY_EXTEND, MVT::i64, Args[i].Node);
+ break;
+ case MVT::i64:
+ case MVT::f64:
+ case MVT::f32:
+ break;
+ }
+ args_to_use.push_back(Args[i].Node);
+ }
+
+ std::vector<MVT::ValueType> RetVals;
+ MVT::ValueType RetTyVT = getValueType(RetTy);
+ MVT::ValueType ActualRetTyVT = RetTyVT;
+ if (RetTyVT >= MVT::i1 && RetTyVT <= MVT::i32)
+ ActualRetTyVT = MVT::i64;
+
+ if (RetTyVT != MVT::isVoid)
+ RetVals.push_back(ActualRetTyVT);
+ RetVals.push_back(MVT::Other);
+
+ std::vector<SDOperand> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+ Ops.insert(Ops.end(), args_to_use.begin(), args_to_use.end());
+ SDOperand TheCall = DAG.getNode(AlphaISD::CALL, RetVals, &Ops[0], Ops.size());
+ Chain = TheCall.getValue(RetTyVT != MVT::isVoid);
+ Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
+ DAG.getConstant(NumBytes, getPointerTy()));
+ SDOperand RetVal = TheCall;
+
+ if (RetTyVT != ActualRetTyVT) {
+ RetVal = DAG.getNode(RetTyIsSigned ? ISD::AssertSext : ISD::AssertZext,
+ MVT::i64, RetVal, DAG.getValueType(RetTyVT));
+ RetVal = DAG.getNode(ISD::TRUNCATE, RetTyVT, RetVal);
+ }
+
+ return std::make_pair(RetVal, Chain);
+}
+
+/// LowerOperation - Provide custom lowering hooks for some operations.
+///
+SDOperand AlphaTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Wasn't expecting to be able to lower this!");
+ case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG,
+ VarArgsBase,
+ VarArgsOffset);
+
+ case ISD::RET: return LowerRET(Op,DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+
+ case ISD::SINT_TO_FP: {
+ assert(MVT::i64 == Op.getOperand(0).getValueType() &&
+ "Unhandled SINT_TO_FP type in custom expander!");
+ SDOperand LD;
+ bool isDouble = MVT::f64 == Op.getValueType();
+ LD = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0));
+ SDOperand FP = DAG.getNode(isDouble?AlphaISD::CVTQT_:AlphaISD::CVTQS_,
+ isDouble?MVT::f64:MVT::f32, LD);
+ return FP;
+ }
+ case ISD::FP_TO_SINT: {
+ bool isDouble = MVT::f64 == Op.getOperand(0).getValueType();
+ SDOperand src = Op.getOperand(0);
+
+ if (!isDouble) //Promote
+ src = DAG.getNode(ISD::FP_EXTEND, MVT::f64, src);
+
+ src = DAG.getNode(AlphaISD::CVTTQ_, MVT::f64, src);
+
+ return DAG.getNode(ISD::BIT_CONVERT, MVT::i64, src);
+ }
+ case ISD::ConstantPool: {
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ Constant *C = CP->getConstVal();
+ SDOperand CPI = DAG.getTargetConstantPool(C, MVT::i64, CP->getAlignment());
+
+ SDOperand Hi = DAG.getNode(AlphaISD::GPRelHi, MVT::i64, CPI,
+ DAG.getNode(ISD::GLOBAL_OFFSET_TABLE, MVT::i64));
+ SDOperand Lo = DAG.getNode(AlphaISD::GPRelLo, MVT::i64, CPI, Hi);
+ return Lo;
+ }
+ case ISD::GlobalTLSAddress:
+ assert(0 && "TLS not implemented for Alpha.");
+ case ISD::GlobalAddress: {
+ GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
+ GlobalValue *GV = GSDN->getGlobal();
+ SDOperand GA = DAG.getTargetGlobalAddress(GV, MVT::i64, GSDN->getOffset());
+
+ // if (!GV->hasWeakLinkage() && !GV->isDeclaration() && !GV->hasLinkOnceLinkage()) {
+ if (GV->hasInternalLinkage()) {
+ SDOperand Hi = DAG.getNode(AlphaISD::GPRelHi, MVT::i64, GA,
+ DAG.getNode(ISD::GLOBAL_OFFSET_TABLE, MVT::i64));
+ SDOperand Lo = DAG.getNode(AlphaISD::GPRelLo, MVT::i64, GA, Hi);
+ return Lo;
+ } else
+ return DAG.getNode(AlphaISD::RelLit, MVT::i64, GA,
+ DAG.getNode(ISD::GLOBAL_OFFSET_TABLE, MVT::i64));
+ }
+ case ISD::ExternalSymbol: {
+ return DAG.getNode(AlphaISD::RelLit, MVT::i64,
+ DAG.getTargetExternalSymbol(cast<ExternalSymbolSDNode>(Op)
+ ->getSymbol(), MVT::i64),
+ DAG.getNode(ISD::GLOBAL_OFFSET_TABLE, MVT::i64));
+ }
+
+ case ISD::UREM:
+ case ISD::SREM:
+ //Expand only on constant case
+ if (Op.getOperand(1).getOpcode() == ISD::Constant) {
+ MVT::ValueType VT = Op.Val->getValueType(0);
+ SDOperand Tmp1 = Op.Val->getOpcode() == ISD::UREM ?
+ BuildUDIV(Op.Val, DAG, NULL) :
+ BuildSDIV(Op.Val, DAG, NULL);
+ Tmp1 = DAG.getNode(ISD::MUL, VT, Tmp1, Op.getOperand(1));
+ Tmp1 = DAG.getNode(ISD::SUB, VT, Op.getOperand(0), Tmp1);
+ return Tmp1;
+ }
+ //fall through
+ case ISD::SDIV:
+ case ISD::UDIV:
+ if (MVT::isInteger(Op.getValueType())) {
+ if (Op.getOperand(1).getOpcode() == ISD::Constant)
+ return Op.getOpcode() == ISD::SDIV ? BuildSDIV(Op.Val, DAG, NULL)
+ : BuildUDIV(Op.Val, DAG, NULL);
+ const char* opstr = 0;
+ switch (Op.getOpcode()) {
+ case ISD::UREM: opstr = "__remqu"; break;
+ case ISD::SREM: opstr = "__remq"; break;
+ case ISD::UDIV: opstr = "__divqu"; break;
+ case ISD::SDIV: opstr = "__divq"; break;
+ }
+ SDOperand Tmp1 = Op.getOperand(0),
+ Tmp2 = Op.getOperand(1),
+ Addr = DAG.getExternalSymbol(opstr, MVT::i64);
+ return DAG.getNode(AlphaISD::DivCall, MVT::i64, Addr, Tmp1, Tmp2);
+ }
+ break;
+
+ case ISD::VAARG: {
+ SDOperand Chain = Op.getOperand(0);
+ SDOperand VAListP = Op.getOperand(1);
+ SrcValueSDNode *VAListS = cast<SrcValueSDNode>(Op.getOperand(2));
+
+ SDOperand Base = DAG.getLoad(MVT::i64, Chain, VAListP, VAListS->getValue(),
+ VAListS->getOffset());
+ SDOperand Tmp = DAG.getNode(ISD::ADD, MVT::i64, VAListP,
+ DAG.getConstant(8, MVT::i64));
+ SDOperand Offset = DAG.getExtLoad(ISD::SEXTLOAD, MVT::i64, Base.getValue(1),
+ Tmp, NULL, 0, MVT::i32);
+ SDOperand DataPtr = DAG.getNode(ISD::ADD, MVT::i64, Base, Offset);
+ if (MVT::isFloatingPoint(Op.getValueType()))
+ {
+ //if fp && Offset < 6*8, then subtract 6*8 from DataPtr
+ SDOperand FPDataPtr = DAG.getNode(ISD::SUB, MVT::i64, DataPtr,
+ DAG.getConstant(8*6, MVT::i64));
+ SDOperand CC = DAG.getSetCC(MVT::i64, Offset,
+ DAG.getConstant(8*6, MVT::i64), ISD::SETLT);
+ DataPtr = DAG.getNode(ISD::SELECT, MVT::i64, CC, FPDataPtr, DataPtr);
+ }
+
+ SDOperand NewOffset = DAG.getNode(ISD::ADD, MVT::i64, Offset,
+ DAG.getConstant(8, MVT::i64));
+ SDOperand Update = DAG.getTruncStore(Offset.getValue(1), NewOffset,
+ Tmp, NULL, 0, MVT::i32);
+
+ SDOperand Result;
+ if (Op.getValueType() == MVT::i32)
+ Result = DAG.getExtLoad(ISD::SEXTLOAD, MVT::i64, Update, DataPtr,
+ NULL, 0, MVT::i32);
+ else
+ Result = DAG.getLoad(Op.getValueType(), Update, DataPtr, NULL, 0);
+ return Result;
+ }
+ case ISD::VACOPY: {
+ SDOperand Chain = Op.getOperand(0);
+ SDOperand DestP = Op.getOperand(1);
+ SDOperand SrcP = Op.getOperand(2);
+ SrcValueSDNode *DestS = cast<SrcValueSDNode>(Op.getOperand(3));
+ SrcValueSDNode *SrcS = cast<SrcValueSDNode>(Op.getOperand(4));
+
+ SDOperand Val = DAG.getLoad(getPointerTy(), Chain, SrcP,
+ SrcS->getValue(), SrcS->getOffset());
+ SDOperand Result = DAG.getStore(Val.getValue(1), Val, DestP, DestS->getValue(),
+ DestS->getOffset());
+ SDOperand NP = DAG.getNode(ISD::ADD, MVT::i64, SrcP,
+ DAG.getConstant(8, MVT::i64));
+ Val = DAG.getExtLoad(ISD::SEXTLOAD, MVT::i64, Result, NP, NULL,0, MVT::i32);
+ SDOperand NPD = DAG.getNode(ISD::ADD, MVT::i64, DestP,
+ DAG.getConstant(8, MVT::i64));
+ return DAG.getTruncStore(Val.getValue(1), Val, NPD, NULL, 0, MVT::i32);
+ }
+ case ISD::VASTART: {
+ SDOperand Chain = Op.getOperand(0);
+ SDOperand VAListP = Op.getOperand(1);
+ SrcValueSDNode *VAListS = cast<SrcValueSDNode>(Op.getOperand(2));
+
+ // vastart stores the address of the VarArgsBase and VarArgsOffset
+ SDOperand FR = DAG.getFrameIndex(VarArgsBase, MVT::i64);
+ SDOperand S1 = DAG.getStore(Chain, FR, VAListP, VAListS->getValue(),
+ VAListS->getOffset());
+ SDOperand SA2 = DAG.getNode(ISD::ADD, MVT::i64, VAListP,
+ DAG.getConstant(8, MVT::i64));
+ return DAG.getTruncStore(S1, DAG.getConstant(VarArgsOffset, MVT::i64),
+ SA2, NULL, 0, MVT::i32);
+ }
+ case ISD::RETURNADDR:
+ return DAG.getNode(AlphaISD::GlobalRetAddr, MVT::i64);
+ //FIXME: implement
+ case ISD::FRAMEADDR: break;
+ }
+
+ return SDOperand();
+}
+
+SDOperand AlphaTargetLowering::CustomPromoteOperation(SDOperand Op,
+ SelectionDAG &DAG) {
+ assert(Op.getValueType() == MVT::i32 &&
+ Op.getOpcode() == ISD::VAARG &&
+ "Unknown node to custom promote!");
+
+ // The code in LowerOperation already handles i32 vaarg
+ return LowerOperation(Op, DAG);
+}
+
+
+//Inline Asm
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+AlphaTargetLowering::ConstraintType
+AlphaTargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'f':
+ case 'r':
+ return C_RegisterClass;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+std::vector<unsigned> AlphaTargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT::ValueType VT) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break; // Unknown constriant letter
+ case 'f':
+ return make_vector<unsigned>(Alpha::F0 , Alpha::F1 , Alpha::F2 ,
+ Alpha::F3 , Alpha::F4 , Alpha::F5 ,
+ Alpha::F6 , Alpha::F7 , Alpha::F8 ,
+ Alpha::F9 , Alpha::F10, Alpha::F11,
+ Alpha::F12, Alpha::F13, Alpha::F14,
+ Alpha::F15, Alpha::F16, Alpha::F17,
+ Alpha::F18, Alpha::F19, Alpha::F20,
+ Alpha::F21, Alpha::F22, Alpha::F23,
+ Alpha::F24, Alpha::F25, Alpha::F26,
+ Alpha::F27, Alpha::F28, Alpha::F29,
+ Alpha::F30, Alpha::F31, 0);
+ case 'r':
+ return make_vector<unsigned>(Alpha::R0 , Alpha::R1 , Alpha::R2 ,
+ Alpha::R3 , Alpha::R4 , Alpha::R5 ,
+ Alpha::R6 , Alpha::R7 , Alpha::R8 ,
+ Alpha::R9 , Alpha::R10, Alpha::R11,
+ Alpha::R12, Alpha::R13, Alpha::R14,
+ Alpha::R15, Alpha::R16, Alpha::R17,
+ Alpha::R18, Alpha::R19, Alpha::R20,
+ Alpha::R21, Alpha::R22, Alpha::R23,
+ Alpha::R24, Alpha::R25, Alpha::R26,
+ Alpha::R27, Alpha::R28, Alpha::R29,
+ Alpha::R30, Alpha::R31, 0);
+ }
+ }
+
+ return std::vector<unsigned>();
+}
diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h
new file mode 100644
index 0000000..24e40a5
--- /dev/null
+++ b/lib/Target/Alpha/AlphaISelLowering.h
@@ -0,0 +1,94 @@
+//===-- AlphaISelLowering.h - Alpha DAG Lowering Interface ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Andrew Lenharth and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Alpha uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ALPHA_ALPHAISELLOWERING_H
+#define LLVM_TARGET_ALPHA_ALPHAISELLOWERING_H
+
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "Alpha.h"
+
+namespace llvm {
+
+ namespace AlphaISD {
+ enum NodeType {
+ // Start the numbering where the builting ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END+Alpha::INSTRUCTION_LIST_END,
+ //These corrospond to the identical Instruction
+ CVTQT_, CVTQS_, CVTTQ_,
+
+ /// GPRelHi/GPRelLo - These represent the high and low 16-bit
+ /// parts of a global address respectively.
+ GPRelHi, GPRelLo,
+
+ /// RetLit - Literal Relocation of a Global
+ RelLit,
+
+ /// GlobalRetAddr - used to restore the return address
+ GlobalRetAddr,
+
+ /// CALL - Normal call.
+ CALL,
+
+ /// DIVCALL - used for special library calls for div and rem
+ DivCall,
+
+ /// return flag operand
+ RET_FLAG,
+
+ /// CHAIN = COND_BRANCH CHAIN, OPC, (G|F)PRC, DESTBB [, INFLAG] - This
+ /// corresponds to the COND_BRANCH pseudo instruction.
+ /// *PRC is the input register to compare to zero,
+ /// OPC is the branch opcode to use (e.g. Alpha::BEQ),
+ /// DESTBB is the destination block to branch to, and INFLAG is
+ /// an optional input flag argument.
+ COND_BRANCH_I, COND_BRANCH_F
+
+ };
+ }
+
+ class AlphaTargetLowering : public TargetLowering {
+ int VarArgsOffset; // What is the offset to the first vaarg
+ int VarArgsBase; // What is the base FrameIndex
+ bool useITOF;
+ public:
+ AlphaTargetLowering(TargetMachine &TM);
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ ///
+ virtual SDOperand LowerOperation(SDOperand Op, SelectionDAG &DAG);
+ virtual SDOperand CustomPromoteOperation(SDOperand Op, SelectionDAG &DAG);
+
+ //Friendly names for dumps
+ const char *getTargetNodeName(unsigned Opcode) const;
+
+ /// LowerCallTo - This hook lowers an abstract call to a function into an
+ /// actual call.
+ virtual std::pair<SDOperand, SDOperand>
+ LowerCallTo(SDOperand Chain, const Type *RetTy, bool RetTyIsSigned,
+ bool isVarArg, unsigned CC, bool isTailCall, SDOperand Callee,
+ ArgListTy &Args, SelectionDAG &DAG);
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ std::vector<unsigned>
+ getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT::ValueType VT) const;
+
+ bool hasITOF() { return useITOF; }
+ };
+}
+
+#endif // LLVM_TARGET_ALPHA_ALPHAISELLOWERING_H
diff --git a/lib/Target/Alpha/AlphaInstrFormats.td b/lib/Target/Alpha/AlphaInstrFormats.td
new file mode 100644
index 0000000..259e9af
--- /dev/null
+++ b/lib/Target/Alpha/AlphaInstrFormats.td
@@ -0,0 +1,249 @@
+//===- AlphaInstrFormats.td - Alpha Instruction Formats ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//3.3:
+//Memory
+//Branch
+//Operate
+//Floating-point
+//PALcode
+
+def u8imm : Operand<i64>;
+def s14imm : Operand<i64>;
+def s16imm : Operand<i64>;
+def s21imm : Operand<i64>;
+def s64imm : Operand<i64>;
+def u64imm : Operand<i64>;
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+// Alpha instruction baseline
+class InstAlpha<bits<6> op, string asmstr, InstrItinClass itin> : Instruction {
+ field bits<32> Inst;
+ let Namespace = "Alpha";
+ let AsmString = asmstr;
+ let Inst{31-26} = op;
+ let Itinerary = itin;
+}
+
+
+//3.3.1
+class MForm<bits<6> opcode, bit store, bit load, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let isStore = store;
+ let isLoad = load;
+ let Defs = [R28]; //We may use this for frame index calculations, so reserve it here
+
+ bits<5> Ra;
+ bits<16> disp;
+ bits<5> Rb;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-16} = Rb;
+ let Inst{15-0} = disp;
+}
+class MfcForm<bits<6> opcode, bits<16> fc, string asmstr, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ bits<5> Ra;
+
+ let OperandList = (ops GPRC:$RA);
+ let Inst{25-21} = Ra;
+ let Inst{20-16} = 0;
+ let Inst{15-0} = fc;
+}
+
+class MbrForm<bits<6> opcode, bits<2> TB, dag OL, string asmstr, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ bits<5> Ra;
+ bits<5> Rb;
+ bits<14> disp;
+
+ let OperandList = OL;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-16} = Rb;
+ let Inst{15-14} = TB;
+ let Inst{13-0} = disp;
+}
+class MbrpForm<bits<6> opcode, bits<2> TB, dag OL, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern=pattern;
+ bits<5> Ra;
+ bits<5> Rb;
+ bits<14> disp;
+
+ let OperandList = OL;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-16} = Rb;
+ let Inst{15-14} = TB;
+ let Inst{13-0} = disp;
+}
+
+//3.3.2
+def target : Operand<OtherVT> {}
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, noResults = 1 in {
+class BFormN<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let OperandList = OL;
+ bits<64> Opc; //dummy
+ bits<5> Ra;
+ bits<21> disp;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-0} = disp;
+}
+}
+
+let isBranch = 1, isTerminator = 1 in
+class BFormD<bits<6> opcode, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let OperandList = (ops target:$DISP);
+ bits<5> Ra;
+ bits<21> disp;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-0} = disp;
+}
+
+//3.3.3
+class OForm<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let OperandList = (ops GPRC:$RC, GPRC:$RA, GPRC:$RB);
+
+ bits<5> Rc;
+ bits<5> Ra;
+ bits<5> Rb;
+ bits<7> Function = fun;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-16} = Rb;
+ let Inst{15-13} = 0;
+ let Inst{12} = 0;
+ let Inst{11-5} = Function;
+ let Inst{4-0} = Rc;
+}
+
+class OForm2<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let OperandList = (ops GPRC:$RC, GPRC:$RB);
+
+ bits<5> Rc;
+ bits<5> Rb;
+ bits<7> Function = fun;
+
+ let Inst{25-21} = 31;
+ let Inst{20-16} = Rb;
+ let Inst{15-13} = 0;
+ let Inst{12} = 0;
+ let Inst{11-5} = Function;
+ let Inst{4-0} = Rc;
+}
+
+class OForm4<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let OperandList = (ops GPRC:$RDEST, GPRC:$RCOND, GPRC:$RTRUE, GPRC:$RFALSE);
+ let Constraints = "$RFALSE = $RDEST";
+ let DisableEncoding = "$RFALSE";
+
+ bits<5> Rc;
+ bits<5> Ra;
+ bits<5> Rb;
+ bits<7> Function = fun;
+
+// let isTwoAddress = 1;
+ let Inst{25-21} = Ra;
+ let Inst{20-16} = Rb;
+ let Inst{15-13} = 0;
+ let Inst{12} = 0;
+ let Inst{11-5} = Function;
+ let Inst{4-0} = Rc;
+}
+
+
+class OFormL<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let OperandList = (ops GPRC:$RC, GPRC:$RA, u8imm:$L);
+
+ bits<5> Rc;
+ bits<5> Ra;
+ bits<8> LIT;
+ bits<7> Function = fun;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-13} = LIT;
+ let Inst{12} = 1;
+ let Inst{11-5} = Function;
+ let Inst{4-0} = Rc;
+}
+
+class OForm4L<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let OperandList = (ops GPRC:$RDEST, GPRC:$RCOND, s64imm:$RTRUE, GPRC:$RFALSE);
+ let Constraints = "$RFALSE = $RDEST";
+ let DisableEncoding = "$RFALSE";
+
+ bits<5> Rc;
+ bits<5> Ra;
+ bits<8> LIT;
+ bits<7> Function = fun;
+
+// let isTwoAddress = 1;
+ let Inst{25-21} = Ra;
+ let Inst{20-13} = LIT;
+ let Inst{12} = 1;
+ let Inst{11-5} = Function;
+ let Inst{4-0} = Rc;
+}
+
+//3.3.4
+class FPForm<bits<6> opcode, bits<11> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+
+ bits<5> Fc;
+ bits<5> Fa;
+ bits<5> Fb;
+ bits<11> Function = fun;
+
+ let Inst{25-21} = Fa;
+ let Inst{20-16} = Fb;
+ let Inst{15-5} = Function;
+ let Inst{4-0} = Fc;
+}
+
+//3.3.5
+class PALForm<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let OperandList = OL;
+ bits<26> Function;
+
+ let Inst{25-0} = Function;
+}
+
+
+// Pseudo instructions.
+class PseudoInstAlpha<dag OL, string nm, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<0, nm, itin> {
+ let OperandList = OL;
+ let Pattern = pattern;
+
+}
diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp
new file mode 100644
index 0000000..718587d
--- /dev/null
+++ b/lib/Target/Alpha/AlphaInstrInfo.cpp
@@ -0,0 +1,266 @@
+//===- AlphaInstrInfo.cpp - Alpha Instruction Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Alpha.h"
+#include "AlphaInstrInfo.h"
+#include "AlphaGenInstrInfo.inc"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+using namespace llvm;
+
+AlphaInstrInfo::AlphaInstrInfo()
+ : TargetInstrInfo(AlphaInsts, sizeof(AlphaInsts)/sizeof(AlphaInsts[0])),
+ RI(*this) { }
+
+
+bool AlphaInstrInfo::isMoveInstr(const MachineInstr& MI,
+ unsigned& sourceReg,
+ unsigned& destReg) const {
+ MachineOpCode oc = MI.getOpcode();
+ if (oc == Alpha::BISr ||
+ oc == Alpha::CPYSS ||
+ oc == Alpha::CPYST ||
+ oc == Alpha::CPYSSt ||
+ oc == Alpha::CPYSTs) {
+ // or r1, r2, r2
+ // cpys(s|t) r1 r2 r2
+ assert(MI.getNumOperands() >= 3 &&
+ MI.getOperand(0).isRegister() &&
+ MI.getOperand(1).isRegister() &&
+ MI.getOperand(2).isRegister() &&
+ "invalid Alpha BIS instruction!");
+ if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
+ sourceReg = MI.getOperand(1).getReg();
+ destReg = MI.getOperand(0).getReg();
+ return true;
+ }
+ }
+ return false;
+}
+
+unsigned
+AlphaInstrInfo::isLoadFromStackSlot(MachineInstr *MI, int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ case Alpha::LDL:
+ case Alpha::LDQ:
+ case Alpha::LDBU:
+ case Alpha::LDWU:
+ case Alpha::LDS:
+ case Alpha::LDT:
+ if (MI->getOperand(1).isFrameIndex()) {
+ FrameIndex = MI->getOperand(1).getFrameIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+unsigned
+AlphaInstrInfo::isStoreToStackSlot(MachineInstr *MI, int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ case Alpha::STL:
+ case Alpha::STQ:
+ case Alpha::STB:
+ case Alpha::STW:
+ case Alpha::STS:
+ case Alpha::STT:
+ if (MI->getOperand(1).isFrameIndex()) {
+ FrameIndex = MI->getOperand(1).getFrameIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+static bool isAlphaIntCondCode(unsigned Opcode) {
+ switch (Opcode) {
+ case Alpha::BEQ:
+ case Alpha::BNE:
+ case Alpha::BGE:
+ case Alpha::BGT:
+ case Alpha::BLE:
+ case Alpha::BLT:
+ case Alpha::BLBC:
+ case Alpha::BLBS:
+ return true;
+ default:
+ return false;
+ }
+}
+
+unsigned AlphaInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const std::vector<MachineOperand> &Cond)const{
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "Alpha branch conditions have two components!");
+
+ // One-way branch.
+ if (FBB == 0) {
+ if (Cond.empty()) // Unconditional branch
+ BuildMI(&MBB, get(Alpha::BR)).addMBB(TBB);
+ else // Conditional branch
+ if (isAlphaIntCondCode(Cond[0].getImm()))
+ BuildMI(&MBB, get(Alpha::COND_BRANCH_I))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ else
+ BuildMI(&MBB, get(Alpha::COND_BRANCH_F))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ return 1;
+ }
+
+ // Two-way Conditional Branch.
+ if (isAlphaIntCondCode(Cond[0].getImm()))
+ BuildMI(&MBB, get(Alpha::COND_BRANCH_I))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ else
+ BuildMI(&MBB, get(Alpha::COND_BRANCH_F))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ BuildMI(&MBB, get(Alpha::BR)).addMBB(FBB);
+ return 2;
+}
+
+static unsigned AlphaRevCondCode(unsigned Opcode) {
+ switch (Opcode) {
+ case Alpha::BEQ: return Alpha::BNE;
+ case Alpha::BNE: return Alpha::BEQ;
+ case Alpha::BGE: return Alpha::BLT;
+ case Alpha::BGT: return Alpha::BLE;
+ case Alpha::BLE: return Alpha::BGT;
+ case Alpha::BLT: return Alpha::BGE;
+ case Alpha::BLBC: return Alpha::BLBS;
+ case Alpha::BLBS: return Alpha::BLBC;
+ case Alpha::FBEQ: return Alpha::FBNE;
+ case Alpha::FBNE: return Alpha::FBEQ;
+ case Alpha::FBGE: return Alpha::FBLT;
+ case Alpha::FBGT: return Alpha::FBLE;
+ case Alpha::FBLE: return Alpha::FBGT;
+ case Alpha::FBLT: return Alpha::FBGE;
+ default:
+ assert(0 && "Unknown opcode");
+ }
+}
+
+// Branch analysis.
+bool AlphaInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ std::vector<MachineOperand> &Cond) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (LastInst->getOpcode() == Alpha::BR) {
+ TBB = LastInst->getOperand(0).getMachineBasicBlock();
+ return false;
+ } else if (LastInst->getOpcode() == Alpha::COND_BRANCH_I ||
+ LastInst->getOpcode() == Alpha::COND_BRANCH_F) {
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(2).getMachineBasicBlock();
+ Cond.push_back(LastInst->getOperand(0));
+ Cond.push_back(LastInst->getOperand(1));
+ return false;
+ }
+ // Otherwise, don't know what this is.
+ return true;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() &&
+ isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with Alpha::BR and Alpha::COND_BRANCH_*, handle it.
+ if ((SecondLastInst->getOpcode() == Alpha::COND_BRANCH_I ||
+ SecondLastInst->getOpcode() == Alpha::COND_BRANCH_F) &&
+ LastInst->getOpcode() == Alpha::BR) {
+ TBB = SecondLastInst->getOperand(2).getMachineBasicBlock();
+ Cond.push_back(SecondLastInst->getOperand(0));
+ Cond.push_back(SecondLastInst->getOperand(1));
+ FBB = LastInst->getOperand(0).getMachineBasicBlock();
+ return false;
+ }
+
+ // If the block ends with two Alpha::BRs, handle it. The second one is not
+ // executed, so remove it.
+ if (SecondLastInst->getOpcode() == Alpha::BR &&
+ LastInst->getOpcode() == Alpha::BR) {
+ TBB = SecondLastInst->getOperand(0).getMachineBasicBlock();
+ I = LastInst;
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+unsigned AlphaInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ if (I->getOpcode() != Alpha::BR &&
+ I->getOpcode() != Alpha::COND_BRANCH_I &&
+ I->getOpcode() != Alpha::COND_BRANCH_F)
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (I->getOpcode() != Alpha::COND_BRANCH_I &&
+ I->getOpcode() != Alpha::COND_BRANCH_F)
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+void AlphaInstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ BuildMI(MBB, MI, get(Alpha::BISr), Alpha::R31).addReg(Alpha::R31)
+ .addReg(Alpha::R31);
+}
+
+bool AlphaInstrInfo::BlockHasNoFallThrough(MachineBasicBlock &MBB) const {
+ if (MBB.empty()) return false;
+
+ switch (MBB.back().getOpcode()) {
+ case Alpha::RETDAG: // Return.
+ case Alpha::RETDAGp:
+ case Alpha::BR: // Uncond branch.
+ case Alpha::JMP: // Indirect branch.
+ return true;
+ default: return false;
+ }
+}
+bool AlphaInstrInfo::
+ReverseBranchCondition(std::vector<MachineOperand> &Cond) const {
+ assert(Cond.size() == 2 && "Invalid Alpha branch opcode!");
+ Cond[0].setImm(AlphaRevCondCode(Cond[0].getImm()));
+ return false;
+}
+
diff --git a/lib/Target/Alpha/AlphaInstrInfo.h b/lib/Target/Alpha/AlphaInstrInfo.h
new file mode 100644
index 0000000..84009be
--- /dev/null
+++ b/lib/Target/Alpha/AlphaInstrInfo.h
@@ -0,0 +1,57 @@
+//===- AlphaInstrInfo.h - Alpha Instruction Information ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHAINSTRUCTIONINFO_H
+#define ALPHAINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "AlphaRegisterInfo.h"
+
+namespace llvm {
+
+class AlphaInstrInfo : public TargetInstrInfo {
+ const AlphaRegisterInfo RI;
+public:
+ AlphaInstrInfo();
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const MRegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// Return true if the instruction is a register to register move and
+ /// leave the source and dest operands in the passed parameters.
+ ///
+ virtual bool isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg) const;
+
+ virtual unsigned isLoadFromStackSlot(MachineInstr *MI, int &FrameIndex) const;
+ virtual unsigned isStoreToStackSlot(MachineInstr *MI, int &FrameIndex) const;
+
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const std::vector<MachineOperand> &Cond) const;
+ bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ std::vector<MachineOperand> &Cond) const;
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+ bool BlockHasNoFallThrough(MachineBasicBlock &MBB) const;
+ bool ReverseBranchCondition(std::vector<MachineOperand> &Cond) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td
new file mode 100644
index 0000000..4a834da
--- /dev/null
+++ b/lib/Target/Alpha/AlphaInstrInfo.td
@@ -0,0 +1,1088 @@
+//===- AlphaInstrInfo.td - The Alpha Instruction Set -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+include "AlphaInstrFormats.td"
+
+//********************
+//Custom DAG Nodes
+//********************
+
+def SDTFPUnaryOpUnC : SDTypeProfile<1, 1, [
+ SDTCisFP<1>, SDTCisFP<0>
+]>;
+def Alpha_cvtqt : SDNode<"AlphaISD::CVTQT_", SDTFPUnaryOpUnC, []>;
+def Alpha_cvtqs : SDNode<"AlphaISD::CVTQS_", SDTFPUnaryOpUnC, []>;
+def Alpha_cvttq : SDNode<"AlphaISD::CVTTQ_" , SDTFPUnaryOp, []>;
+def Alpha_gprello : SDNode<"AlphaISD::GPRelLo", SDTIntBinOp, []>;
+def Alpha_gprelhi : SDNode<"AlphaISD::GPRelHi", SDTIntBinOp, []>;
+def Alpha_rellit : SDNode<"AlphaISD::RelLit", SDTIntBinOp, []>;
+
+def retflag : SDNode<"AlphaISD::RET_FLAG", SDTRet,
+ [SDNPHasChain, SDNPOptInFlag]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def SDT_AlphaCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i64> ]>;
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_AlphaCallSeq,
+ [SDNPHasChain, SDNPOutFlag]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_AlphaCallSeq,
+ [SDNPHasChain, SDNPOutFlag]>;
+
+//********************
+//Paterns for matching
+//********************
+def invX : SDNodeXForm<imm, [{ //invert
+ return getI64Imm(~N->getValue());
+}]>;
+def negX : SDNodeXForm<imm, [{ //negate
+ return getI64Imm(~N->getValue() + 1);
+}]>;
+def SExt32 : SDNodeXForm<imm, [{ //signed extend int to long
+ return getI64Imm(((int64_t)N->getValue() << 32) >> 32);
+}]>;
+def SExt16 : SDNodeXForm<imm, [{ //signed extend int to long
+ return getI64Imm(((int64_t)N->getValue() << 48) >> 48);
+}]>;
+def LL16 : SDNodeXForm<imm, [{ //lda part of constant
+ return getI64Imm(get_lda16(N->getValue()));
+}]>;
+def LH16 : SDNodeXForm<imm, [{ //ldah part of constant (or more if too big)
+ return getI64Imm(get_ldah16(N->getValue()));
+}]>;
+def iZAPX : SDNodeXForm<and, [{ // get imm to ZAPi
+ ConstantSDNode *RHS = cast<ConstantSDNode>(N->getOperand(1));
+ return getI64Imm(get_zapImm(SDOperand(), RHS->getValue()));
+}]>;
+def nearP2X : SDNodeXForm<imm, [{
+ return getI64Imm(Log2_64(getNearPower2((uint64_t)N->getValue())));
+}]>;
+def nearP2RemX : SDNodeXForm<imm, [{
+ uint64_t x = abs(N->getValue() - getNearPower2((uint64_t)N->getValue()));
+ return getI64Imm(Log2_64(x));
+}]>;
+
+def immUExt8 : PatLeaf<(imm), [{ //imm fits in 8 bit zero extended field
+ return (uint64_t)N->getValue() == (uint8_t)N->getValue();
+}]>;
+def immUExt8inv : PatLeaf<(imm), [{ //inverted imm fits in 8 bit zero extended field
+ return (uint64_t)~N->getValue() == (uint8_t)~N->getValue();
+}], invX>;
+def immUExt8neg : PatLeaf<(imm), [{ //negated imm fits in 8 bit zero extended field
+ return ((uint64_t)~N->getValue() + 1) == (uint8_t)((uint64_t)~N->getValue() + 1);
+}], negX>;
+def immSExt16 : PatLeaf<(imm), [{ //imm fits in 16 bit sign extended field
+ return ((int64_t)N->getValue() << 48) >> 48 == (int64_t)N->getValue();
+}]>;
+def immSExt16int : PatLeaf<(imm), [{ //(int)imm fits in a 16 bit sign extended field
+ return ((int64_t)N->getValue() << 48) >> 48 == ((int64_t)N->getValue() << 32) >> 32;
+}], SExt16>;
+
+def zappat : PatFrag<(ops node:$LHS), (and node:$LHS, imm:$L), [{
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+ uint64_t build = get_zapImm(N->getOperand(0), (uint64_t)RHS->getValue());
+ return build != 0;
+ }
+ return false;
+}]>;
+
+def immFPZ : PatLeaf<(fpimm), [{ //the only fpconstant nodes are +/- 0.0
+ (void)N; // silence warning.
+ return true;
+}]>;
+
+def immRem1 : PatLeaf<(imm), [{return chkRemNearPower2(N->getValue(),1, 0);}]>;
+def immRem2 : PatLeaf<(imm), [{return chkRemNearPower2(N->getValue(),2, 0);}]>;
+def immRem3 : PatLeaf<(imm), [{return chkRemNearPower2(N->getValue(),3, 0);}]>;
+def immRem4 : PatLeaf<(imm), [{return chkRemNearPower2(N->getValue(),4, 0);}]>;
+def immRem5 : PatLeaf<(imm), [{return chkRemNearPower2(N->getValue(),5, 0);}]>;
+def immRem1n : PatLeaf<(imm), [{return chkRemNearPower2(N->getValue(),1, 1);}]>;
+def immRem2n : PatLeaf<(imm), [{return chkRemNearPower2(N->getValue(),2, 1);}]>;
+def immRem3n : PatLeaf<(imm), [{return chkRemNearPower2(N->getValue(),3, 1);}]>;
+def immRem4n : PatLeaf<(imm), [{return chkRemNearPower2(N->getValue(),4, 1);}]>;
+def immRem5n : PatLeaf<(imm), [{return chkRemNearPower2(N->getValue(),5, 1);}]>;
+
+def immRemP2n : PatLeaf<(imm), [{
+ return isPowerOf2_64(getNearPower2((uint64_t)N->getValue()) - N->getValue());
+}]>;
+def immRemP2 : PatLeaf<(imm), [{
+ return isPowerOf2_64(N->getValue() - getNearPower2((uint64_t)N->getValue()));
+}]>;
+def immUExt8ME : PatLeaf<(imm), [{ //use this imm for mulqi
+ int64_t d = abs((int64_t)N->getValue() - (int64_t)getNearPower2((uint64_t)N->getValue()));
+ if (isPowerOf2_64(d)) return false;
+ switch (d) {
+ case 1: case 3: case 5: return false;
+ default: return (uint64_t)N->getValue() == (uint8_t)N->getValue();
+ };
+}]>;
+
+def intop : PatFrag<(ops node:$op), (sext_inreg node:$op, i32)>;
+def add4 : PatFrag<(ops node:$op1, node:$op2),
+ (add (shl node:$op1, 2), node:$op2)>;
+def sub4 : PatFrag<(ops node:$op1, node:$op2),
+ (sub (shl node:$op1, 2), node:$op2)>;
+def add8 : PatFrag<(ops node:$op1, node:$op2),
+ (add (shl node:$op1, 3), node:$op2)>;
+def sub8 : PatFrag<(ops node:$op1, node:$op2),
+ (sub (shl node:$op1, 3), node:$op2)>;
+class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
+class CmpOpFrag<dag res> : PatFrag<(ops node:$R), res>;
+
+//Pseudo ops for selection
+
+def IDEF_I : PseudoInstAlpha<(ops GPRC:$RA), ";#idef $RA",
+ [(set GPRC:$RA, (undef))], s_pseudo>;
+def IDEF_F32 : PseudoInstAlpha<(ops F4RC:$RA), ";#idef $RA",
+ [(set F4RC:$RA, (undef))], s_pseudo>;
+def IDEF_F64 : PseudoInstAlpha<(ops F8RC:$RA), ";#idef $RA",
+ [(set F8RC:$RA, (undef))], s_pseudo>;
+
+def WTF : PseudoInstAlpha<(ops variable_ops), "#wtf", [], s_pseudo>;
+
+let isLoad = 1, hasCtrlDep = 1 in {
+def ADJUSTSTACKUP : PseudoInstAlpha<(ops s64imm:$amt), "; ADJUP $amt",
+ [(callseq_start imm:$amt)], s_pseudo>, Imp<[R30],[R30]>;
+def ADJUSTSTACKDOWN : PseudoInstAlpha<(ops s64imm:$amt), "; ADJDOWN $amt",
+ [(callseq_end imm:$amt)], s_pseudo>, Imp<[R30],[R30]>;
+}
+def ALTENT : PseudoInstAlpha<(ops s64imm:$TARGET), "$$$TARGET..ng:\n", [], s_pseudo>;
+def PCLABEL : PseudoInstAlpha<(ops s64imm:$num), "PCMARKER_$num:\n",[], s_pseudo>;
+def MEMLABEL : PseudoInstAlpha<(ops s64imm:$i, s64imm:$j, s64imm:$k, s64imm:$m),
+ "LSMARKER$$$i$$$j$$$k$$$m:", [], s_pseudo>;
+
+
+//***********************
+//Real instructions
+//***********************
+
+//Operation Form:
+
+//conditional moves, int
+
+multiclass cmov_inst<bits<7> fun, string asmstr, PatFrag OpNode> {
+def r : OForm4<0x11, fun, !strconcat(asmstr, " $RCOND,$RTRUE,$RDEST"),
+ [(set GPRC:$RDEST, (select (OpNode GPRC:$RCOND), GPRC:$RTRUE, GPRC:$RFALSE))], s_cmov>;
+def i : OForm4L<0x11, fun, !strconcat(asmstr, " $RCOND,$RTRUE,$RDEST"),
+ [(set GPRC:$RDEST, (select (OpNode GPRC:$RCOND), immUExt8:$RTRUE, GPRC:$RFALSE))], s_cmov>;
+}
+
+defm CMOVEQ : cmov_inst<0x24, "cmoveq", CmpOpFrag<(seteq node:$R, 0)>>;
+defm CMOVNE : cmov_inst<0x26, "cmovne", CmpOpFrag<(setne node:$R, 0)>>;
+defm CMOVLT : cmov_inst<0x44, "cmovlt", CmpOpFrag<(setlt node:$R, 0)>>;
+defm CMOVLE : cmov_inst<0x64, "cmovle", CmpOpFrag<(setle node:$R, 0)>>;
+defm CMOVGT : cmov_inst<0x66, "cmovgt", CmpOpFrag<(setgt node:$R, 0)>>;
+defm CMOVGE : cmov_inst<0x46, "cmovge", CmpOpFrag<(setge node:$R, 0)>>;
+defm CMOVLBC : cmov_inst<0x16, "cmovlbc", CmpOpFrag<(xor node:$R, 1)>>;
+defm CMOVLBS : cmov_inst<0x14, "cmovlbs", CmpOpFrag<(and node:$R, 1)>>;
+
+//General pattern for cmov
+def : Pat<(select GPRC:$which, GPRC:$src1, GPRC:$src2),
+ (CMOVNEr GPRC:$src2, GPRC:$src1, GPRC:$which)>;
+def : Pat<(select GPRC:$which, GPRC:$src1, immUExt8:$src2),
+ (CMOVEQi GPRC:$src1, immUExt8:$src2, GPRC:$which)>;
+
+//Invert sense when we can for constants:
+def : Pat<(select (setne GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+ (CMOVEQi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+def : Pat<(select (setgt GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+ (CMOVLEi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+def : Pat<(select (setge GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+ (CMOVLTi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+def : Pat<(select (setlt GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+ (CMOVGEi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+def : Pat<(select (setle GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+ (CMOVGTi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+
+multiclass all_inst<bits<6> opc, bits<7> funl, bits<7> funq,
+ string asmstr, PatFrag OpNode, InstrItinClass itin> {
+ def Lr : OForm< opc, funl, !strconcat(asmstr, "l $RA,$RB,$RC"),
+ [(set GPRC:$RC, (intop (OpNode GPRC:$RA, GPRC:$RB)))], itin>;
+ def Li : OFormL<opc, funl, !strconcat(asmstr, "l $RA,$L,$RC"),
+ [(set GPRC:$RC, (intop (OpNode GPRC:$RA, immUExt8:$L)))], itin>;
+ def Qr : OForm< opc, funq, !strconcat(asmstr, "q $RA,$RB,$RC"),
+ [(set GPRC:$RC, (OpNode GPRC:$RA, GPRC:$RB))], itin>;
+ def Qi : OFormL<opc, funq, !strconcat(asmstr, "q $RA,$L,$RC"),
+ [(set GPRC:$RC, (OpNode GPRC:$RA, immUExt8:$L))], itin>;
+}
+
+defm MUL : all_inst<0x13, 0x00, 0x20, "mul", BinOpFrag<(mul node:$LHS, node:$RHS)>, s_imul>;
+defm ADD : all_inst<0x10, 0x00, 0x20, "add", BinOpFrag<(add node:$LHS, node:$RHS)>, s_iadd>;
+defm S4ADD : all_inst<0x10, 0x02, 0x22, "s4add", add4, s_iadd>;
+defm S8ADD : all_inst<0x10, 0x12, 0x32, "s8add", add8, s_iadd>;
+defm S4SUB : all_inst<0x10, 0x0B, 0x2B, "s4sub", sub4, s_iadd>;
+defm S8SUB : all_inst<0x10, 0x1B, 0x3B, "s8sub", sub8, s_iadd>;
+defm SUB : all_inst<0x10, 0x09, 0x29, "sub", BinOpFrag<(sub node:$LHS, node:$RHS)>, s_iadd>;
+//Const cases since legalize does sub x, int -> add x, inv(int) + 1
+def : Pat<(intop (add GPRC:$RA, immUExt8neg:$L)), (SUBLi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(add GPRC:$RA, immUExt8neg:$L), (SUBQi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(intop (add4 GPRC:$RA, immUExt8neg:$L)), (S4SUBLi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(add4 GPRC:$RA, immUExt8neg:$L), (S4SUBQi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(intop (add8 GPRC:$RA, immUExt8neg:$L)), (S8SUBLi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(add8 GPRC:$RA, immUExt8neg:$L), (S8SUBQi GPRC:$RA, immUExt8neg:$L)>;
+
+multiclass log_inst<bits<6> opc, bits<7> fun, string asmstr, SDNode OpNode, InstrItinClass itin> {
+def r : OForm<opc, fun, !strconcat(asmstr, " $RA,$RB,$RC"),
+ [(set GPRC:$RC, (OpNode GPRC:$RA, GPRC:$RB))], itin>;
+def i : OFormL<opc, fun, !strconcat(asmstr, " $RA,$L,$RC"),
+ [(set GPRC:$RC, (OpNode GPRC:$RA, immUExt8:$L))], itin>;
+}
+multiclass inv_inst<bits<6> opc, bits<7> fun, string asmstr, SDNode OpNode, InstrItinClass itin> {
+def r : OForm<opc, fun, !strconcat(asmstr, " $RA,$RB,$RC"),
+ [(set GPRC:$RC, (OpNode GPRC:$RA, (not GPRC:$RB)))], itin>;
+def i : OFormL<opc, fun, !strconcat(asmstr, " $RA,$L,$RC"),
+ [(set GPRC:$RC, (OpNode GPRC:$RA, immUExt8inv:$L))], itin>;
+}
+
+defm AND : log_inst<0x11, 0x00, "and", and, s_ilog>;
+defm BIC : inv_inst<0x11, 0x08, "bic", and, s_ilog>;
+defm BIS : log_inst<0x11, 0x20, "bis", or, s_ilog>;
+defm ORNOT : inv_inst<0x11, 0x28, "ornot", or, s_ilog>;
+defm XOR : log_inst<0x11, 0x40, "xor", xor, s_ilog>;
+defm EQV : inv_inst<0x11, 0x48, "eqv", xor, s_ilog>;
+
+defm SL : log_inst<0x12, 0x39, "sll", shl, s_ishf>;
+defm SRA : log_inst<0x12, 0x3c, "sra", sra, s_ishf>;
+defm SRL : log_inst<0x12, 0x34, "srl", srl, s_ishf>;
+defm UMULH : log_inst<0x13, 0x30, "umulh", mulhu, s_imul>;
+
+def CTLZ : OForm2<0x1C, 0x32, "CTLZ $RB,$RC",
+ [(set GPRC:$RC, (ctlz GPRC:$RB))], s_imisc>;
+def CTPOP : OForm2<0x1C, 0x30, "CTPOP $RB,$RC",
+ [(set GPRC:$RC, (ctpop GPRC:$RB))], s_imisc>;
+def CTTZ : OForm2<0x1C, 0x33, "CTTZ $RB,$RC",
+ [(set GPRC:$RC, (cttz GPRC:$RB))], s_imisc>;
+def EXTBL : OForm< 0x12, 0x06, "EXTBL $RA,$RB,$RC",
+ [(set GPRC:$RC, (and (srl GPRC:$RA, (shl GPRC:$RB, 3)), 255))], s_ishf>;
+def EXTWL : OForm< 0x12, 0x16, "EXTWL $RA,$RB,$RC",
+ [(set GPRC:$RC, (and (srl GPRC:$RA, (shl GPRC:$RB, 3)), 65535))], s_ishf>;
+def EXTLL : OForm< 0x12, 0x26, "EXTLL $RA,$RB,$RC",
+ [(set GPRC:$RC, (and (srl GPRC:$RA, (shl GPRC:$RB, 3)), 4294967295))], s_ishf>;
+def SEXTB : OForm2<0x1C, 0x00, "sextb $RB,$RC",
+ [(set GPRC:$RC, (sext_inreg GPRC:$RB, i8))], s_ishf>;
+def SEXTW : OForm2<0x1C, 0x01, "sextw $RB,$RC",
+ [(set GPRC:$RC, (sext_inreg GPRC:$RB, i16))], s_ishf>;
+
+//def EXTBLi : OFormL<0x12, 0x06, "EXTBL $RA,$L,$RC", []>; //Extract byte low
+//def EXTLH : OForm< 0x12, 0x6A, "EXTLH $RA,$RB,$RC", []>; //Extract longword high
+//def EXTLHi : OFormL<0x12, 0x6A, "EXTLH $RA,$L,$RC", []>; //Extract longword high
+//def EXTLLi : OFormL<0x12, 0x26, "EXTLL $RA,$L,$RC", []>; //Extract longword low
+//def EXTQH : OForm< 0x12, 0x7A, "EXTQH $RA,$RB,$RC", []>; //Extract quadword high
+//def EXTQHi : OFormL<0x12, 0x7A, "EXTQH $RA,$L,$RC", []>; //Extract quadword high
+//def EXTQ : OForm< 0x12, 0x36, "EXTQ $RA,$RB,$RC", []>; //Extract quadword low
+//def EXTQi : OFormL<0x12, 0x36, "EXTQ $RA,$L,$RC", []>; //Extract quadword low
+//def EXTWH : OForm< 0x12, 0x5A, "EXTWH $RA,$RB,$RC", []>; //Extract word high
+//def EXTWHi : OFormL<0x12, 0x5A, "EXTWH $RA,$L,$RC", []>; //Extract word high
+//def EXTWLi : OFormL<0x12, 0x16, "EXTWL $RA,$L,$RC", []>; //Extract word low
+
+//def INSBL : OForm< 0x12, 0x0B, "INSBL $RA,$RB,$RC", []>; //Insert byte low
+//def INSBLi : OFormL<0x12, 0x0B, "INSBL $RA,$L,$RC", []>; //Insert byte low
+//def INSLH : OForm< 0x12, 0x67, "INSLH $RA,$RB,$RC", []>; //Insert longword high
+//def INSLHi : OFormL<0x12, 0x67, "INSLH $RA,$L,$RC", []>; //Insert longword high
+//def INSLL : OForm< 0x12, 0x2B, "INSLL $RA,$RB,$RC", []>; //Insert longword low
+//def INSLLi : OFormL<0x12, 0x2B, "INSLL $RA,$L,$RC", []>; //Insert longword low
+//def INSQH : OForm< 0x12, 0x77, "INSQH $RA,$RB,$RC", []>; //Insert quadword high
+//def INSQHi : OFormL<0x12, 0x77, "INSQH $RA,$L,$RC", []>; //Insert quadword high
+//def INSQL : OForm< 0x12, 0x3B, "INSQL $RA,$RB,$RC", []>; //Insert quadword low
+//def INSQLi : OFormL<0x12, 0x3B, "INSQL $RA,$L,$RC", []>; //Insert quadword low
+//def INSWH : OForm< 0x12, 0x57, "INSWH $RA,$RB,$RC", []>; //Insert word high
+//def INSWHi : OFormL<0x12, 0x57, "INSWH $RA,$L,$RC", []>; //Insert word high
+//def INSWL : OForm< 0x12, 0x1B, "INSWL $RA,$RB,$RC", []>; //Insert word low
+//def INSWLi : OFormL<0x12, 0x1B, "INSWL $RA,$L,$RC", []>; //Insert word low
+
+//def MSKBL : OForm< 0x12, 0x02, "MSKBL $RA,$RB,$RC", []>; //Mask byte low
+//def MSKBLi : OFormL<0x12, 0x02, "MSKBL $RA,$L,$RC", []>; //Mask byte low
+//def MSKLH : OForm< 0x12, 0x62, "MSKLH $RA,$RB,$RC", []>; //Mask longword high
+//def MSKLHi : OFormL<0x12, 0x62, "MSKLH $RA,$L,$RC", []>; //Mask longword high
+//def MSKLL : OForm< 0x12, 0x22, "MSKLL $RA,$RB,$RC", []>; //Mask longword low
+//def MSKLLi : OFormL<0x12, 0x22, "MSKLL $RA,$L,$RC", []>; //Mask longword low
+//def MSKQH : OForm< 0x12, 0x72, "MSKQH $RA,$RB,$RC", []>; //Mask quadword high
+//def MSKQHi : OFormL<0x12, 0x72, "MSKQH $RA,$L,$RC", []>; //Mask quadword high
+//def MSKQL : OForm< 0x12, 0x32, "MSKQL $RA,$RB,$RC", []>; //Mask quadword low
+//def MSKQLi : OFormL<0x12, 0x32, "MSKQL $RA,$L,$RC", []>; //Mask quadword low
+//def MSKWH : OForm< 0x12, 0x52, "MSKWH $RA,$RB,$RC", []>; //Mask word high
+//def MSKWHi : OFormL<0x12, 0x52, "MSKWH $RA,$L,$RC", []>; //Mask word high
+//def MSKWL : OForm< 0x12, 0x12, "MSKWL $RA,$RB,$RC", []>; //Mask word low
+//def MSKWLi : OFormL<0x12, 0x12, "MSKWL $RA,$L,$RC", []>; //Mask word low
+
+def ZAPNOTi : OFormL<0x12, 0x31, "zapnot $RA,$L,$RC", [], s_ishf>;
+
+// Define the pattern that produces ZAPNOTi.
+def : Pat<(i64 (zappat GPRC:$RA):$imm),
+ (ZAPNOTi GPRC:$RA, (iZAPX GPRC:$imm))>;
+
+
+//Comparison, int
+//So this is a waste of what this instruction can do, but it still saves something
+def CMPBGE : OForm< 0x10, 0x0F, "cmpbge $RA,$RB,$RC",
+ [(set GPRC:$RC, (setuge (and GPRC:$RA, 255), (and GPRC:$RB, 255)))], s_ilog>;
+def CMPBGEi : OFormL<0x10, 0x0F, "cmpbge $RA,$L,$RC",
+ [(set GPRC:$RC, (setuge (and GPRC:$RA, 255), immUExt8:$L))], s_ilog>;
+def CMPEQ : OForm< 0x10, 0x2D, "cmpeq $RA,$RB,$RC",
+ [(set GPRC:$RC, (seteq GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPEQi : OFormL<0x10, 0x2D, "cmpeq $RA,$L,$RC",
+ [(set GPRC:$RC, (seteq GPRC:$RA, immUExt8:$L))], s_iadd>;
+def CMPLE : OForm< 0x10, 0x6D, "cmple $RA,$RB,$RC",
+ [(set GPRC:$RC, (setle GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPLEi : OFormL<0x10, 0x6D, "cmple $RA,$L,$RC",
+ [(set GPRC:$RC, (setle GPRC:$RA, immUExt8:$L))], s_iadd>;
+def CMPLT : OForm< 0x10, 0x4D, "cmplt $RA,$RB,$RC",
+ [(set GPRC:$RC, (setlt GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPLTi : OFormL<0x10, 0x4D, "cmplt $RA,$L,$RC",
+ [(set GPRC:$RC, (setlt GPRC:$RA, immUExt8:$L))], s_iadd>;
+def CMPULE : OForm< 0x10, 0x3D, "cmpule $RA,$RB,$RC",
+ [(set GPRC:$RC, (setule GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPULEi : OFormL<0x10, 0x3D, "cmpule $RA,$L,$RC",
+ [(set GPRC:$RC, (setule GPRC:$RA, immUExt8:$L))], s_iadd>;
+def CMPULT : OForm< 0x10, 0x1D, "cmpult $RA,$RB,$RC",
+ [(set GPRC:$RC, (setult GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPULTi : OFormL<0x10, 0x1D, "cmpult $RA,$L,$RC",
+ [(set GPRC:$RC, (setult GPRC:$RA, immUExt8:$L))], s_iadd>;
+
+//Patterns for unsupported int comparisons
+def : Pat<(setueq GPRC:$X, GPRC:$Y), (CMPEQ GPRC:$X, GPRC:$Y)>;
+def : Pat<(setueq GPRC:$X, immUExt8:$Y), (CMPEQi GPRC:$X, immUExt8:$Y)>;
+
+def : Pat<(setugt GPRC:$X, GPRC:$Y), (CMPULT GPRC:$Y, GPRC:$X)>;
+def : Pat<(setugt immUExt8:$X, GPRC:$Y), (CMPULTi GPRC:$Y, immUExt8:$X)>;
+
+def : Pat<(setuge GPRC:$X, GPRC:$Y), (CMPULE GPRC:$Y, GPRC:$X)>;
+def : Pat<(setuge immUExt8:$X, GPRC:$Y), (CMPULEi GPRC:$Y, immUExt8:$X)>;
+
+def : Pat<(setgt GPRC:$X, GPRC:$Y), (CMPLT GPRC:$Y, GPRC:$X)>;
+def : Pat<(setgt immUExt8:$X, GPRC:$Y), (CMPLTi GPRC:$Y, immUExt8:$X)>;
+
+def : Pat<(setge GPRC:$X, GPRC:$Y), (CMPLE GPRC:$Y, GPRC:$X)>;
+def : Pat<(setge immUExt8:$X, GPRC:$Y), (CMPLEi GPRC:$Y, immUExt8:$X)>;
+
+def : Pat<(setne GPRC:$X, GPRC:$Y), (CMPEQi (CMPEQ GPRC:$X, GPRC:$Y), 0)>;
+def : Pat<(setne GPRC:$X, immUExt8:$Y), (CMPEQi (CMPEQi GPRC:$X, immUExt8:$Y), 0)>;
+
+def : Pat<(setune GPRC:$X, GPRC:$Y), (CMPEQi (CMPEQ GPRC:$X, GPRC:$Y), 0)>;
+def : Pat<(setune GPRC:$X, immUExt8:$Y), (CMPEQi (CMPEQ GPRC:$X, immUExt8:$Y), 0)>;
+
+
+let isReturn = 1, isTerminator = 1, noResults = 1, Ra = 31, Rb = 26, disp = 1, Uses = [R26] in {
+ def RETDAG : MbrForm< 0x1A, 0x02, (ops), "ret $$31,($$26),1", s_jsr>; //Return from subroutine
+ def RETDAGp : MbrpForm< 0x1A, 0x02, (ops), "ret $$31,($$26),1", [(retflag)], s_jsr>; //Return from subroutine
+}
+
+let isBranch = 1, isTerminator = 1, noResults = 1, isBarrier = 1,
+Ra = 31, disp = 0 in
+def JMP : MbrpForm< 0x1A, 0x00, (ops GPRC:$RS), "jmp $$31,($RS),0",
+ [(brind GPRC:$RS)], s_jsr>; //Jump
+
+let isCall = 1, noResults = 1, Ra = 26,
+ Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19,
+ R20, R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ F0, F1,
+ F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
+ F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30], Uses = [R29] in {
+ def BSR : BFormD<0x34, "bsr $$26,$$$DISP..ng", [], s_jsr>; //Branch to subroutine
+}
+let isCall = 1, noResults = 1, Ra = 26, Rb = 27, disp = 0,
+ Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19,
+ R20, R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ F0, F1,
+ F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
+ F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30], Uses = [R27, R29] in {
+ def JSR : MbrForm< 0x1A, 0x01, (ops ), "jsr $$26,($$27),0", s_jsr>; //Jump to subroutine
+}
+
+let isCall = 1, noResults = 1, Ra = 23, Rb = 27, disp = 0,
+ Defs = [R23, R24, R25, R27, R28], Uses = [R24, R25, R27] in
+ def JSRs : MbrForm< 0x1A, 0x01, (ops ), "jsr $$23,($$27),0", s_jsr>; //Jump to div or rem
+
+
+def JSR_COROUTINE : MbrForm< 0x1A, 0x03, (ops GPRC:$RD, GPRC:$RS, s14imm:$DISP), "jsr_coroutine $RD,($RS),$DISP", s_jsr>; //Jump to subroutine return
+
+
+let OperandList = (ops GPRC:$RA, s64imm:$DISP, GPRC:$RB) in {
+def LDQ : MForm<0x29, 0, 1, "ldq $RA,$DISP($RB)",
+ [(set GPRC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
+def LDQr : MForm<0x29, 0, 1, "ldq $RA,$DISP($RB)\t\t!gprellow",
+ [(set GPRC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
+def LDL : MForm<0x28, 0, 1, "ldl $RA,$DISP($RB)",
+ [(set GPRC:$RA, (sextloadi32 (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
+def LDLr : MForm<0x28, 0, 1, "ldl $RA,$DISP($RB)\t\t!gprellow",
+ [(set GPRC:$RA, (sextloadi32 (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
+def LDBU : MForm<0x0A, 0, 1, "ldbu $RA,$DISP($RB)",
+ [(set GPRC:$RA, (zextloadi8 (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
+def LDBUr : MForm<0x0A, 0, 1, "ldbu $RA,$DISP($RB)\t\t!gprellow",
+ [(set GPRC:$RA, (zextloadi8 (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
+def LDWU : MForm<0x0C, 0, 1, "ldwu $RA,$DISP($RB)",
+ [(set GPRC:$RA, (zextloadi16 (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
+def LDWUr : MForm<0x0C, 0, 1, "ldwu $RA,$DISP($RB)\t\t!gprellow",
+ [(set GPRC:$RA, (zextloadi16 (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
+
+
+def STB : MForm<0x0E, 1, 0, "stb $RA,$DISP($RB)",
+ [(truncstorei8 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
+def STBr : MForm<0x0E, 1, 0, "stb $RA,$DISP($RB)\t\t!gprellow",
+ [(truncstorei8 GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
+def STW : MForm<0x0D, 1, 0, "stw $RA,$DISP($RB)",
+ [(truncstorei16 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
+def STWr : MForm<0x0D, 1, 0, "stw $RA,$DISP($RB)\t\t!gprellow",
+ [(truncstorei16 GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
+def STL : MForm<0x2C, 1, 0, "stl $RA,$DISP($RB)",
+ [(truncstorei32 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
+def STLr : MForm<0x2C, 1, 0, "stl $RA,$DISP($RB)\t\t!gprellow",
+ [(truncstorei32 GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
+def STQ : MForm<0x2D, 1, 0, "stq $RA,$DISP($RB)",
+ [(store GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
+def STQr : MForm<0x2D, 1, 0, "stq $RA,$DISP($RB)\t\t!gprellow",
+ [(store GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
+
+//Load address
+def LDA : MForm<0x08, 0, 0, "lda $RA,$DISP($RB)",
+ [(set GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_lda>;
+def LDAr : MForm<0x08, 0, 0, "lda $RA,$DISP($RB)\t\t!gprellow",
+ [(set GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_lda>; //Load address
+def LDAH : MForm<0x09, 0, 0, "ldah $RA,$DISP($RB)",
+ [], s_lda>; //Load address high
+def LDAHr : MForm<0x09, 0, 0, "ldah $RA,$DISP($RB)\t\t!gprelhigh",
+ [(set GPRC:$RA, (Alpha_gprelhi tglobaladdr:$DISP, GPRC:$RB))], s_lda>; //Load address high
+}
+
+let OperandList = (ops F4RC:$RA, s64imm:$DISP, GPRC:$RB) in {
+def STS : MForm<0x26, 1, 0, "sts $RA,$DISP($RB)",
+ [(store F4RC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_fst>;
+def STSr : MForm<0x26, 1, 0, "sts $RA,$DISP($RB)\t\t!gprellow",
+ [(store F4RC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_fst>;
+def LDS : MForm<0x22, 0, 1, "lds $RA,$DISP($RB)",
+ [(set F4RC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_fld>;
+def LDSr : MForm<0x22, 0, 1, "lds $RA,$DISP($RB)\t\t!gprellow",
+ [(set F4RC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_fld>;
+}
+let OperandList = (ops F8RC:$RA, s64imm:$DISP, GPRC:$RB) in {
+def STT : MForm<0x27, 1, 0, "stt $RA,$DISP($RB)",
+ [(store F8RC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_fst>;
+def STTr : MForm<0x27, 1, 0, "stt $RA,$DISP($RB)\t\t!gprellow",
+ [(store F8RC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_fst>;
+def LDT : MForm<0x23, 0, 1, "ldt $RA,$DISP($RB)",
+ [(set F8RC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_fld>;
+def LDTr : MForm<0x23, 0, 1, "ldt $RA,$DISP($RB)\t\t!gprellow",
+ [(set F8RC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_fld>;
+}
+
+
+//constpool rels
+def : Pat<(i64 (load (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+ (LDQr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (sextloadi32 (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+ (LDLr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (zextloadi8 (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+ (LDBUr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (zextloadi16 (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+ (LDWUr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (Alpha_gprello tconstpool:$DISP, GPRC:$RB)),
+ (LDAr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (Alpha_gprelhi tconstpool:$DISP, GPRC:$RB)),
+ (LDAHr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(f32 (load (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+ (LDSr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(f64 (load (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+ (LDTr tconstpool:$DISP, GPRC:$RB)>;
+
+//jumptable rels
+def : Pat<(i64 (Alpha_gprelhi tjumptable:$DISP, GPRC:$RB)),
+ (LDAHr tjumptable:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (Alpha_gprello tjumptable:$DISP, GPRC:$RB)),
+ (LDAr tjumptable:$DISP, GPRC:$RB)>;
+
+
+//misc ext patterns
+def : Pat<(i64 (extloadi8 (add GPRC:$RB, immSExt16:$DISP))),
+ (LDBU immSExt16:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (extloadi16 (add GPRC:$RB, immSExt16:$DISP))),
+ (LDWU immSExt16:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (extloadi32 (add GPRC:$RB, immSExt16:$DISP))),
+ (LDL immSExt16:$DISP, GPRC:$RB)>;
+
+//0 disp patterns
+def : Pat<(i64 (load GPRC:$addr)),
+ (LDQ 0, GPRC:$addr)>;
+def : Pat<(f64 (load GPRC:$addr)),
+ (LDT 0, GPRC:$addr)>;
+def : Pat<(f32 (load GPRC:$addr)),
+ (LDS 0, GPRC:$addr)>;
+def : Pat<(i64 (sextloadi32 GPRC:$addr)),
+ (LDL 0, GPRC:$addr)>;
+def : Pat<(i64 (zextloadi16 GPRC:$addr)),
+ (LDWU 0, GPRC:$addr)>;
+def : Pat<(i64 (zextloadi8 GPRC:$addr)),
+ (LDBU 0, GPRC:$addr)>;
+def : Pat<(i64 (extloadi8 GPRC:$addr)),
+ (LDBU 0, GPRC:$addr)>;
+def : Pat<(i64 (extloadi16 GPRC:$addr)),
+ (LDWU 0, GPRC:$addr)>;
+def : Pat<(i64 (extloadi32 GPRC:$addr)),
+ (LDL 0, GPRC:$addr)>;
+
+def : Pat<(store GPRC:$DATA, GPRC:$addr),
+ (STQ GPRC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(store F8RC:$DATA, GPRC:$addr),
+ (STT F8RC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(store F4RC:$DATA, GPRC:$addr),
+ (STS F4RC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(truncstorei32 GPRC:$DATA, GPRC:$addr),
+ (STL GPRC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(truncstorei16 GPRC:$DATA, GPRC:$addr),
+ (STW GPRC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(truncstorei8 GPRC:$DATA, GPRC:$addr),
+ (STB GPRC:$DATA, 0, GPRC:$addr)>;
+
+
+//load address, rellocated gpdist form
+let OperandList = (ops GPRC:$RA, s16imm:$DISP, GPRC:$RB, s16imm:$NUM) in {
+def LDAg : MForm<0x08, 0, 1, "lda $RA,0($RB)\t\t!gpdisp!$NUM", [], s_lda>; //Load address
+def LDAHg : MForm<0x09, 0, 1, "ldah $RA,0($RB)\t\t!gpdisp!$NUM", [], s_lda>; //Load address
+}
+
+//Load quad, rellocated literal form
+let OperandList = (ops GPRC:$RA, s64imm:$DISP, GPRC:$RB) in
+def LDQl : MForm<0x29, 0, 1, "ldq $RA,$DISP($RB)\t\t!literal",
+ [(set GPRC:$RA, (Alpha_rellit tglobaladdr:$DISP, GPRC:$RB))], s_ild>;
+def : Pat<(Alpha_rellit texternalsym:$ext, GPRC:$RB),
+ (LDQl texternalsym:$ext, GPRC:$RB)>;
+
+
+def RPCC : MfcForm<0x18, 0xC000, "rpcc $RA", s_rpcc>; //Read process cycle counter
+
+//Basic Floating point ops
+
+//Floats
+
+let OperandList = (ops F4RC:$RC, F4RC:$RB), Fa = 31 in
+def SQRTS : FPForm<0x14, 0x58B, "sqrts/su $RB,$RC",
+ [(set F4RC:$RC, (fsqrt F4RC:$RB))], s_fsqrts>;
+
+let OperandList = (ops F4RC:$RC, F4RC:$RA, F4RC:$RB) in {
+def ADDS : FPForm<0x16, 0x580, "adds/su $RA,$RB,$RC",
+ [(set F4RC:$RC, (fadd F4RC:$RA, F4RC:$RB))], s_fadd>;
+def SUBS : FPForm<0x16, 0x581, "subs/su $RA,$RB,$RC",
+ [(set F4RC:$RC, (fsub F4RC:$RA, F4RC:$RB))], s_fadd>;
+def DIVS : FPForm<0x16, 0x583, "divs/su $RA,$RB,$RC",
+ [(set F4RC:$RC, (fdiv F4RC:$RA, F4RC:$RB))], s_fdivs>;
+def MULS : FPForm<0x16, 0x582, "muls/su $RA,$RB,$RC",
+ [(set F4RC:$RC, (fmul F4RC:$RA, F4RC:$RB))], s_fmul>;
+
+def CPYSS : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
+ [(set F4RC:$RC, (fcopysign F4RC:$RB, F4RC:$RA))], s_fadd>;
+def CPYSES : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent
+def CPYSNS : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
+ [(set F4RC:$RC, (fneg (fcopysign F4RC:$RB, F4RC:$RA)))], s_fadd>;
+}
+
+//Doubles
+
+let OperandList = (ops F8RC:$RC, F8RC:$RB), Fa = 31 in
+def SQRTT : FPForm<0x14, 0x5AB, "sqrtt/su $RB,$RC",
+ [(set F8RC:$RC, (fsqrt F8RC:$RB))], s_fsqrtt>;
+
+let OperandList = (ops F8RC:$RC, F8RC:$RA, F8RC:$RB) in {
+def ADDT : FPForm<0x16, 0x5A0, "addt/su $RA,$RB,$RC",
+ [(set F8RC:$RC, (fadd F8RC:$RA, F8RC:$RB))], s_fadd>;
+def SUBT : FPForm<0x16, 0x5A1, "subt/su $RA,$RB,$RC",
+ [(set F8RC:$RC, (fsub F8RC:$RA, F8RC:$RB))], s_fadd>;
+def DIVT : FPForm<0x16, 0x5A3, "divt/su $RA,$RB,$RC",
+ [(set F8RC:$RC, (fdiv F8RC:$RA, F8RC:$RB))], s_fdivt>;
+def MULT : FPForm<0x16, 0x5A2, "mult/su $RA,$RB,$RC",
+ [(set F8RC:$RC, (fmul F8RC:$RA, F8RC:$RB))], s_fmul>;
+
+def CPYST : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
+ [(set F8RC:$RC, (fcopysign F8RC:$RB, F8RC:$RA))], s_fadd>;
+def CPYSET : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent
+def CPYSNT : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
+ [(set F8RC:$RC, (fneg (fcopysign F8RC:$RB, F8RC:$RA)))], s_fadd>;
+
+def CMPTEQ : FPForm<0x16, 0x5A5, "cmpteq/su $RA,$RB,$RC", [], s_fadd>;
+// [(set F8RC:$RC, (seteq F8RC:$RA, F8RC:$RB))]>;
+def CMPTLE : FPForm<0x16, 0x5A7, "cmptle/su $RA,$RB,$RC", [], s_fadd>;
+// [(set F8RC:$RC, (setle F8RC:$RA, F8RC:$RB))]>;
+def CMPTLT : FPForm<0x16, 0x5A6, "cmptlt/su $RA,$RB,$RC", [], s_fadd>;
+// [(set F8RC:$RC, (setlt F8RC:$RA, F8RC:$RB))]>;
+def CMPTUN : FPForm<0x16, 0x5A4, "cmptun/su $RA,$RB,$RC", [], s_fadd>;
+// [(set F8RC:$RC, (setuo F8RC:$RA, F8RC:$RB))]>;
+}
+
+//More CPYS forms:
+let OperandList = (ops F8RC:$RC, F4RC:$RA, F8RC:$RB) in {
+def CPYSTs : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
+ [(set F8RC:$RC, (fcopysign F8RC:$RB, F4RC:$RA))], s_fadd>;
+def CPYSNTs : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
+ [(set F8RC:$RC, (fneg (fcopysign F8RC:$RB, F4RC:$RA)))], s_fadd>;
+}
+let OperandList = (ops F4RC:$RC, F8RC:$RA, F4RC:$RB) in {
+def CPYSSt : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
+ [(set F4RC:$RC, (fcopysign F4RC:$RB, F8RC:$RA))], s_fadd>;
+def CPYSESt : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent
+def CPYSNSt : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
+ [(set F4RC:$RC, (fneg (fcopysign F4RC:$RB, F8RC:$RA)))], s_fadd>;
+}
+
+//conditional moves, floats
+let OperandList = (ops F4RC:$RDEST, F4RC:$RFALSE, F4RC:$RTRUE, F8RC:$RCOND),
+ isTwoAddress = 1 in {
+def FCMOVEQS : FPForm<0x17, 0x02A, "fcmoveq $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if = zero
+def FCMOVGES : FPForm<0x17, 0x02D, "fcmovge $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if >= zero
+def FCMOVGTS : FPForm<0x17, 0x02F, "fcmovgt $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if > zero
+def FCMOVLES : FPForm<0x17, 0x02E, "fcmovle $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if <= zero
+def FCMOVLTS : FPForm<0x17, 0x02C, "fcmovlt $RCOND,$RTRUE,$RDEST",[], s_fcmov>; // FCMOVE if < zero
+def FCMOVNES : FPForm<0x17, 0x02B, "fcmovne $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if != zero
+}
+//conditional moves, doubles
+let OperandList = (ops F8RC:$RDEST, F8RC:$RFALSE, F8RC:$RTRUE, F8RC:$RCOND),
+ isTwoAddress = 1 in {
+def FCMOVEQT : FPForm<0x17, 0x02A, "fcmoveq $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVGET : FPForm<0x17, 0x02D, "fcmovge $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVGTT : FPForm<0x17, 0x02F, "fcmovgt $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVLET : FPForm<0x17, 0x02E, "fcmovle $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVLTT : FPForm<0x17, 0x02C, "fcmovlt $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVNET : FPForm<0x17, 0x02B, "fcmovne $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+}
+
+//misc FP selects
+//Select double
+
+def : Pat<(select (seteq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setoeq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setueq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setne F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVEQT F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setone F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVEQT F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setune F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVEQT F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setgt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setogt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setugt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+
+def : Pat<(select (setge F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setoge F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setuge F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+
+def : Pat<(select (setlt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setolt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setult F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setle F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setole F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setule F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+
+//Select single
+def : Pat<(select (seteq F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setoeq F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setueq F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setne F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVEQS F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setone F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVEQS F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setune F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVEQS F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setgt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setogt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setugt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+
+def : Pat<(select (setge F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setoge F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setuge F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+
+def : Pat<(select (setlt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setolt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setult F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setle F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setole F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setule F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+
+
+
+let OperandList = (ops GPRC:$RC, F4RC:$RA), Fb = 31 in
+def FTOIS : FPForm<0x1C, 0x078, "ftois $RA,$RC",[], s_ftoi>; //Floating to integer move, S_floating
+let OperandList = (ops GPRC:$RC, F8RC:$RA), Fb = 31 in
+def FTOIT : FPForm<0x1C, 0x070, "ftoit $RA,$RC",
+ [(set GPRC:$RC, (bitconvert F8RC:$RA))], s_ftoi>; //Floating to integer move
+let OperandList = (ops F4RC:$RC, GPRC:$RA), Fb = 31 in
+def ITOFS : FPForm<0x14, 0x004, "itofs $RA,$RC",[], s_itof>; //Integer to floating move, S_floating
+let OperandList = (ops F8RC:$RC, GPRC:$RA), Fb = 31 in
+def ITOFT : FPForm<0x14, 0x024, "itoft $RA,$RC",
+ [(set F8RC:$RC, (bitconvert GPRC:$RA))], s_itof>; //Integer to floating move
+
+
+let OperandList = (ops F4RC:$RC, F8RC:$RB), Fa = 31 in
+def CVTQS : FPForm<0x16, 0x7BC, "cvtqs/sui $RB,$RC",
+ [(set F4RC:$RC, (Alpha_cvtqs F8RC:$RB))], s_fadd>;
+let OperandList = (ops F8RC:$RC, F8RC:$RB), Fa = 31 in
+def CVTQT : FPForm<0x16, 0x7BE, "cvtqt/sui $RB,$RC",
+ [(set F8RC:$RC, (Alpha_cvtqt F8RC:$RB))], s_fadd>;
+let OperandList = (ops F8RC:$RC, F8RC:$RB), Fa = 31 in
+def CVTTQ : FPForm<0x16, 0x52F, "cvttq/svc $RB,$RC",
+ [(set F8RC:$RC, (Alpha_cvttq F8RC:$RB))], s_fadd>;
+let OperandList = (ops F8RC:$RC, F4RC:$RB), Fa = 31 in
+def CVTST : FPForm<0x16, 0x6AC, "cvtst/s $RB,$RC",
+ [(set F8RC:$RC, (fextend F4RC:$RB))], s_fadd>;
+let OperandList = (ops F4RC:$RC, F8RC:$RB), Fa = 31 in
+def CVTTS : FPForm<0x16, 0x7AC, "cvtts/sui $RB,$RC",
+ [(set F4RC:$RC, (fround F8RC:$RB))], s_fadd>;
+
+
+/////////////////////////////////////////////////////////
+//Branching
+/////////////////////////////////////////////////////////
+class br_icc<bits<6> opc, string asmstr>
+ : BFormN<opc, (ops u64imm:$opc, GPRC:$R, target:$dst),
+ !strconcat(asmstr, " $R,$dst"), s_icbr>;
+class br_fcc<bits<6> opc, string asmstr>
+ : BFormN<opc, (ops u64imm:$opc, F8RC:$R, target:$dst),
+ !strconcat(asmstr, " $R,$dst"), s_fbr>;
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, noResults = 1 in {
+let Ra = 31 in
+def BR : BFormD<0x30, "br $$31,$DISP", [(br bb:$DISP)], s_ubr>;
+
+def COND_BRANCH_I : BFormN<0, (ops u64imm:$opc, GPRC:$R, target:$dst),
+ "{:comment} COND_BRANCH imm:$opc, GPRC:$R, bb:$dst",
+ s_icbr>;
+def COND_BRANCH_F : BFormN<0, (ops u64imm:$opc, F8RC:$R, target:$dst),
+ "{:comment} COND_BRANCH imm:$opc, F8RC:$R, bb:$dst",
+ s_fbr>;
+//Branches, int
+def BEQ : br_icc<0x39, "beq">;
+def BGE : br_icc<0x3E, "bge">;
+def BGT : br_icc<0x3F, "bgt">;
+def BLBC : br_icc<0x38, "blbc">;
+def BLBS : br_icc<0x3C, "blbs">;
+def BLE : br_icc<0x3B, "ble">;
+def BLT : br_icc<0x3A, "blt">;
+def BNE : br_icc<0x3D, "bne">;
+
+//Branches, float
+def FBEQ : br_fcc<0x31, "fbeq">;
+def FBGE : br_fcc<0x36, "fbge">;
+def FBGT : br_fcc<0x37, "fbgt">;
+def FBLE : br_fcc<0x33, "fble">;
+def FBLT : br_fcc<0x32, "fblt">;
+def FBNE : br_fcc<0x36, "fbne">;
+}
+
+//An ugly trick to get the opcode as an imm I can use
+def immBRCond : SDNodeXForm<imm, [{
+ switch((uint64_t)N->getValue()) {
+ case 0: return getI64Imm(Alpha::BEQ);
+ case 1: return getI64Imm(Alpha::BNE);
+ case 2: return getI64Imm(Alpha::BGE);
+ case 3: return getI64Imm(Alpha::BGT);
+ case 4: return getI64Imm(Alpha::BLE);
+ case 5: return getI64Imm(Alpha::BLT);
+ case 6: return getI64Imm(Alpha::BLBS);
+ case 7: return getI64Imm(Alpha::BLBC);
+ case 20: return getI64Imm(Alpha::FBEQ);
+ case 21: return getI64Imm(Alpha::FBNE);
+ case 22: return getI64Imm(Alpha::FBGE);
+ case 23: return getI64Imm(Alpha::FBGT);
+ case 24: return getI64Imm(Alpha::FBLE);
+ case 25: return getI64Imm(Alpha::FBLT);
+ default: assert(0 && "Unknown branch type");
+ }
+}]>;
+
+//Int cond patterns
+def : Pat<(brcond (seteq GPRC:$RA, 0), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 0), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setge GPRC:$RA, 0), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 2), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setgt GPRC:$RA, 0), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 3), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (and GPRC:$RA, 1), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 6), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setle GPRC:$RA, 0), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 4), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setlt GPRC:$RA, 0), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 5), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setne GPRC:$RA, 0), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 1), GPRC:$RA, bb:$DISP)>;
+
+def : Pat<(brcond GPRC:$RA, bb:$DISP),
+ (COND_BRANCH_I (immBRCond 1), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setne GPRC:$RA, GPRC:$RB), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 0), (CMPEQ GPRC:$RA, GPRC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setne GPRC:$RA, immUExt8:$L), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 0), (CMPEQi GPRC:$RA, immUExt8:$L), bb:$DISP)>;
+
+//FP cond patterns
+def : Pat<(brcond (seteq F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 20), F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setne F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setge F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 22), F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setgt F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 23), F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setle F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 24), F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setlt F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 25), F8RC:$RA, bb:$DISP)>;
+
+
+def : Pat<(brcond (seteq F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setoeq F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setueq F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+
+def : Pat<(brcond (setlt F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setolt F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setult F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+
+def : Pat<(brcond (setle F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setole F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setule F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+
+def : Pat<(brcond (setgt F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+def : Pat<(brcond (setogt F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+def : Pat<(brcond (setugt F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+
+def : Pat<(brcond (setge F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+def : Pat<(brcond (setoge F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+def : Pat<(brcond (setuge F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+
+def : Pat<(brcond (setne F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 20), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setone F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 20), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setune F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 20), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+
+
+def : Pat<(brcond (setoeq F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 20), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setueq F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 20), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setoge F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 22), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setuge F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 22), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setogt F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 23), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setugt F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 23), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setole F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 24), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setule F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 24), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setolt F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 25), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setult F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 25), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setone F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setune F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), F8RC:$RA,bb:$DISP)>;
+
+//End Branches
+
+//S_floating : IEEE Single
+//T_floating : IEEE Double
+
+//Unused instructions
+//Mnemonic Format Opcode Description
+//CALL_PAL Pcd 00 Trap to PALcode
+//ECB Mfc 18.E800 Evict cache block
+//EXCB Mfc 18.0400 Exception barrier
+//FETCH Mfc 18.8000 Prefetch data
+//FETCH_M Mfc 18.A000 Prefetch data, modify intent
+//LDL_L Mem 2A Load sign-extended longword locked
+//LDQ_L Mem 2B Load quadword locked
+//LDQ_U Mem 0B Load unaligned quadword
+//MB Mfc 18.4000 Memory barrier
+//STL_C Mem 2E Store longword conditional
+//STQ_C Mem 2F Store quadword conditional
+//STQ_U Mem 0F Store unaligned quadword
+//TRAPB Mfc 18.0000 Trap barrier
+//WH64 Mfc 18.F800 Write hint  64 bytes
+//WMB Mfc 18.4400 Write memory barrier
+//MF_FPCR F-P 17.025 Move from FPCR
+//MT_FPCR F-P 17.024 Move to FPCR
+//There are in the Multimedia extentions, so let's not use them yet
+//def MAXSB8 : OForm<0x1C, 0x3E, "MAXSB8 $RA,$RB,$RC">; //Vector signed byte maximum
+//def MAXSW4 : OForm< 0x1C, 0x3F, "MAXSW4 $RA,$RB,$RC">; //Vector signed word maximum
+//def MAXUB8 : OForm<0x1C, 0x3C, "MAXUB8 $RA,$RB,$RC">; //Vector unsigned byte maximum
+//def MAXUW4 : OForm< 0x1C, 0x3D, "MAXUW4 $RA,$RB,$RC">; //Vector unsigned word maximum
+//def MINSB8 : OForm< 0x1C, 0x38, "MINSB8 $RA,$RB,$RC">; //Vector signed byte minimum
+//def MINSW4 : OForm< 0x1C, 0x39, "MINSW4 $RA,$RB,$RC">; //Vector signed word minimum
+//def MINUB8 : OForm< 0x1C, 0x3A, "MINUB8 $RA,$RB,$RC">; //Vector unsigned byte minimum
+//def MINUW4 : OForm< 0x1C, 0x3B, "MINUW4 $RA,$RB,$RC">; //Vector unsigned word minimum
+//def PERR : OForm< 0x1C, 0x31, "PERR $RA,$RB,$RC">; //Pixel error
+//def PKLB : OForm< 0x1C, 0x37, "PKLB $RA,$RB,$RC">; //Pack longwords to bytes
+//def PKWB : OForm<0x1C, 0x36, "PKWB $RA,$RB,$RC">; //Pack words to bytes
+//def UNPKBL : OForm< 0x1C, 0x35, "UNPKBL $RA,$RB,$RC">; //Unpack bytes to longwords
+//def UNPKBW : OForm< 0x1C, 0x34, "UNPKBW $RA,$RB,$RC">; //Unpack bytes to words
+//CVTLQ F-P 17.010 Convert longword to quadword
+//CVTQL F-P 17.030 Convert quadword to longword
+
+
+//Constant handling
+
+def immConst2Part : PatLeaf<(imm), [{
+ //true if imm fits in a LDAH LDA pair
+ int64_t val = (int64_t)N->getValue();
+ return (val <= IMM_FULLHIGH && val >= IMM_FULLLOW);
+}]>;
+def immConst2PartInt : PatLeaf<(imm), [{
+ //true if imm fits in a LDAH LDA pair with zeroext
+ uint64_t uval = N->getValue();
+ int32_t val32 = (int32_t)uval;
+ return ((uval >> 32) == 0 && //empty upper bits
+ val32 <= IMM_FULLHIGH);
+// val32 >= IMM_FULLLOW + IMM_LOW * IMM_MULT); //Always True
+}], SExt32>;
+
+def : Pat<(i64 immConst2Part:$imm),
+ (LDA (LL16 immConst2Part:$imm), (LDAH (LH16 immConst2Part:$imm), R31))>;
+
+def : Pat<(i64 immSExt16:$imm),
+ (LDA immSExt16:$imm, R31)>;
+
+def : Pat<(i64 immSExt16int:$imm),
+ (ZAPNOTi (LDA (SExt16 immSExt16int:$imm), R31), 15)>;
+def : Pat<(i64 immConst2PartInt:$imm),
+ (ZAPNOTi (LDA (LL16 (SExt32 immConst2PartInt:$imm)),
+ (LDAH (LH16 (SExt32 immConst2PartInt:$imm)), R31)), 15)>;
+
+
+//TODO: I want to just define these like this!
+//def : Pat<(i64 0),
+// (R31)>;
+//def : Pat<(f64 0.0),
+// (F31)>;
+//def : Pat<(f64 -0.0),
+// (CPYSNT F31, F31)>;
+//def : Pat<(f32 0.0),
+// (F31)>;
+//def : Pat<(f32 -0.0),
+// (CPYSNS F31, F31)>;
+
+//Misc Patterns:
+
+def : Pat<(sext_inreg GPRC:$RB, i32),
+ (ADDLi GPRC:$RB, 0)>;
+
+def : Pat<(fabs F8RC:$RB),
+ (CPYST F31, F8RC:$RB)>;
+def : Pat<(fabs F4RC:$RB),
+ (CPYSS F31, F4RC:$RB)>;
+def : Pat<(fneg F8RC:$RB),
+ (CPYSNT F8RC:$RB, F8RC:$RB)>;
+def : Pat<(fneg F4RC:$RB),
+ (CPYSNS F4RC:$RB, F4RC:$RB)>;
+
+def : Pat<(fcopysign F4RC:$A, (fneg F4RC:$B)),
+ (CPYSNS F4RC:$B, F4RC:$A)>;
+def : Pat<(fcopysign F8RC:$A, (fneg F8RC:$B)),
+ (CPYSNT F8RC:$B, F8RC:$A)>;
+def : Pat<(fcopysign F4RC:$A, (fneg F8RC:$B)),
+ (CPYSNSt F8RC:$B, F4RC:$A)>;
+def : Pat<(fcopysign F8RC:$A, (fneg F4RC:$B)),
+ (CPYSNTs F4RC:$B, F8RC:$A)>;
+
+//Yes, signed multiply high is ugly
+def : Pat<(mulhs GPRC:$RA, GPRC:$RB),
+ (SUBQr (UMULHr GPRC:$RA, GPRC:$RB), (ADDQr (CMOVGEr GPRC:$RB, R31, GPRC:$RA),
+ (CMOVGEr GPRC:$RA, R31, GPRC:$RB)))>;
+
+//Stupid crazy arithmetic stuff:
+let AddedComplexity = 1 in {
+def : Pat<(mul GPRC:$RA, 5), (S4ADDQr GPRC:$RA, GPRC:$RA)>;
+def : Pat<(mul GPRC:$RA, 9), (S8ADDQr GPRC:$RA, GPRC:$RA)>;
+def : Pat<(mul GPRC:$RA, 3), (S4SUBQr GPRC:$RA, GPRC:$RA)>;
+def : Pat<(mul GPRC:$RA, 7), (S8SUBQr GPRC:$RA, GPRC:$RA)>;
+
+//slight tree expansion if we are multiplying near to a power of 2
+//n is above a power of 2
+def : Pat<(mul GPRC:$RA, immRem1:$imm),
+ (ADDQr (SLr GPRC:$RA, (nearP2X immRem1:$imm)), GPRC:$RA)>;
+def : Pat<(mul GPRC:$RA, immRem2:$imm),
+ (ADDQr (SLr GPRC:$RA, (nearP2X immRem2:$imm)), (ADDQr GPRC:$RA, GPRC:$RA))>;
+def : Pat<(mul GPRC:$RA, immRem3:$imm),
+ (ADDQr (SLr GPRC:$RA, (nearP2X immRem3:$imm)), (S4SUBQr GPRC:$RA, GPRC:$RA))>;
+def : Pat<(mul GPRC:$RA, immRem4:$imm),
+ (S4ADDQr GPRC:$RA, (SLr GPRC:$RA, (nearP2X immRem4:$imm)))>;
+def : Pat<(mul GPRC:$RA, immRem5:$imm),
+ (ADDQr (SLr GPRC:$RA, (nearP2X immRem5:$imm)), (S4ADDQr GPRC:$RA, GPRC:$RA))>;
+def : Pat<(mul GPRC:$RA, immRemP2:$imm),
+ (ADDQr (SLr GPRC:$RA, (nearP2X immRemP2:$imm)), (SLi GPRC:$RA, (nearP2RemX immRemP2:$imm)))>;
+
+//n is below a power of 2
+def : Pat<(mul GPRC:$RA, immRem1n:$imm),
+ (SUBQr (SLr GPRC:$RA, (nearP2X immRem1n:$imm)), GPRC:$RA)>;
+def : Pat<(mul GPRC:$RA, immRem2n:$imm),
+ (SUBQr (SLr GPRC:$RA, (nearP2X immRem2n:$imm)), (ADDQr GPRC:$RA, GPRC:$RA))>;
+def : Pat<(mul GPRC:$RA, immRem3n:$imm),
+ (SUBQr (SLr GPRC:$RA, (nearP2X immRem3n:$imm)), (S4SUBQr GPRC:$RA, GPRC:$RA))>;
+def : Pat<(mul GPRC:$RA, immRem4n:$imm),
+ (SUBQr (SLr GPRC:$RA, (nearP2X immRem4n:$imm)), (SLi GPRC:$RA, 2))>;
+def : Pat<(mul GPRC:$RA, immRem5n:$imm),
+ (SUBQr (SLr GPRC:$RA, (nearP2X immRem5n:$imm)), (S4ADDQr GPRC:$RA, GPRC:$RA))>;
+def : Pat<(mul GPRC:$RA, immRemP2n:$imm),
+ (SUBQr (SLr GPRC:$RA, (nearP2X immRemP2n:$imm)), (SLi GPRC:$RA, (nearP2RemX immRemP2n:$imm)))>;
+} //Added complexity
diff --git a/lib/Target/Alpha/AlphaJITInfo.cpp b/lib/Target/Alpha/AlphaJITInfo.cpp
new file mode 100644
index 0000000..669a2d5
--- /dev/null
+++ b/lib/Target/Alpha/AlphaJITInfo.cpp
@@ -0,0 +1,305 @@
+//===-- AlphaJITInfo.cpp - Implement the JIT interfaces for the Alpha ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the Alpha target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "AlphaJITInfo.h"
+#include "AlphaRelocations.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/Config/alloca.h"
+#include "llvm/Support/Debug.h"
+#include <cstdlib>
+#include <map>
+using namespace llvm;
+
+#define BUILD_OFormatI(Op, RA, LIT, FUN, RC) \
+ ((Op << 26) | (RA << 21) | (LIT << 13) | (1 << 12) | (FUN << 5) | (RC))
+#define BUILD_OFormat(Op, RA, RB, FUN, RC) \
+ ((Op << 26) | (RA << 21) | (RB << 16) | (FUN << 5) | (RC))
+
+#define BUILD_LDA(RD, RS, IMM16) \
+ ((0x08 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535))
+#define BUILD_LDAH(RD, RS, IMM16) \
+ ((0x09 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535))
+
+#define BUILD_LDQ(RD, RS, IMM16) \
+ ((0x29 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 0xFFFF))
+
+#define BUILD_JMP(RD, RS, IMM16) \
+ ((0x1A << 26) | ((RD) << 21) | ((RS) << 16) | (0x00 << 14) | ((IMM16) & 0x3FFF))
+#define BUILD_JSR(RD, RS, IMM16) \
+ ((0x1A << 26) | ((RD) << 21) | ((RS) << 16) | (0x01 << 14) | ((IMM16) & 0x3FFF))
+
+#define BUILD_SLLi(RD, RS, IMM8) \
+ (BUILD_OFormatI(0x12, RS, IMM8, 0x39, RD))
+
+#define BUILD_ORi(RD, RS, IMM8) \
+ (BUILD_OFormatI(0x11, RS, IMM8, 0x20, RD))
+
+#define BUILD_OR(RD, RS, RT) \
+ (BUILD_OFormat(0x11, RS, RT, 0x20, RD))
+
+
+
+static void EmitBranchToAt(void *At, void *To) {
+ unsigned long Fn = (unsigned long)To;
+
+ unsigned *AtI = (unsigned*)At;
+
+ AtI[0] = BUILD_OR(0, 27, 27);
+
+ DOUT << "Stub targeting " << To << "\n";
+
+ for (int x = 1; x <= 8; ++x) {
+ AtI[2*x - 1] = BUILD_SLLi(27,27,8);
+ unsigned d = (Fn >> (64 - 8 * x)) & 0x00FF;
+ //DOUT << "outputing " << hex << d << dec << "\n";
+ AtI[2*x] = BUILD_ORi(27, 27, d);
+ }
+ AtI[17] = BUILD_JMP(31,27,0); //jump, preserving ra, and setting pv
+ AtI[18] = 0x00FFFFFF; //mark this as a stub
+}
+
+void AlphaJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+ //FIXME
+ assert(0);
+}
+
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+//static AlphaJITInfo* AlphaJTI;
+
+extern "C" {
+#ifdef __alpha
+
+ void AlphaCompilationCallbackC(long* oldpv, void* CameFromStub)
+ {
+ void* Target = JITCompilerFunction(CameFromStub);
+
+ //rewrite the stub to an unconditional branch
+ if (((unsigned*)CameFromStub)[18] == 0x00FFFFFF) {
+ DOUT << "Came from a stub, rewriting\n";
+ EmitBranchToAt(CameFromStub, Target);
+ } else {
+ DOUT << "confused, didn't come from stub at " << CameFromStub
+ << " old jump vector " << oldpv
+ << " new jump vector " << Target << "\n";
+ }
+
+ //Change pv to new Target
+ *oldpv = (long)Target;
+ }
+
+ void AlphaCompilationCallback(void);
+
+ asm(
+ ".text\n"
+ ".globl AlphaComilationCallbackC\n"
+ ".align 4\n"
+ ".globl AlphaCompilationCallback\n"
+ ".ent AlphaCompilationCallback\n"
+"AlphaCompilationCallback:\n"
+ // //get JIT's GOT
+ "ldgp $29, 0($27)\n"
+ //Save args, callee saved, and perhaps others?
+ //args: $16-$21 $f16-$f21 (12)
+ //callee: $9-$14 $f2-$f9 (14)
+ //others: fp:$15 ra:$26 pv:$27 (3)
+ "lda $30, -232($30)\n"
+ "stq $16, 0($30)\n"
+ "stq $17, 8($30)\n"
+ "stq $18, 16($30)\n"
+ "stq $19, 24($30)\n"
+ "stq $20, 32($30)\n"
+ "stq $21, 40($30)\n"
+ "stt $f16, 48($30)\n"
+ "stt $f17, 56($30)\n"
+ "stt $f18, 64($30)\n"
+ "stt $f19, 72($30)\n"
+ "stt $f20, 80($30)\n"
+ "stt $f21, 88($30)\n"
+ "stq $9, 96($30)\n"
+ "stq $10, 104($30)\n"
+ "stq $11, 112($30)\n"
+ "stq $12, 120($30)\n"
+ "stq $13, 128($30)\n"
+ "stq $14, 136($30)\n"
+ "stt $f2, 144($30)\n"
+ "stt $f3, 152($30)\n"
+ "stt $f4, 160($30)\n"
+ "stt $f5, 168($30)\n"
+ "stt $f6, 176($30)\n"
+ "stt $f7, 184($30)\n"
+ "stt $f8, 192($30)\n"
+ "stt $f9, 200($30)\n"
+ "stq $15, 208($30)\n"
+ "stq $26, 216($30)\n"
+ "stq $27, 224($30)\n"
+
+ "addq $30, 224, $16\n" //pass the addr of saved pv as the first arg
+ "bis $0, $0, $17\n" //pass the roughly stub addr in second arg
+ "jsr $26, AlphaCompilationCallbackC\n" //call without saving ra
+
+ "ldq $16, 0($30)\n"
+ "ldq $17, 8($30)\n"
+ "ldq $18, 16($30)\n"
+ "ldq $19, 24($30)\n"
+ "ldq $20, 32($30)\n"
+ "ldq $21, 40($30)\n"
+ "ldt $f16, 48($30)\n"
+ "ldt $f17, 56($30)\n"
+ "ldt $f18, 64($30)\n"
+ "ldt $f19, 72($30)\n"
+ "ldt $f20, 80($30)\n"
+ "ldt $f21, 88($30)\n"
+ "ldq $9, 96($30)\n"
+ "ldq $10, 104($30)\n"
+ "ldq $11, 112($30)\n"
+ "ldq $12, 120($30)\n"
+ "ldq $13, 128($30)\n"
+ "ldq $14, 136($30)\n"
+ "ldt $f2, 144($30)\n"
+ "ldt $f3, 152($30)\n"
+ "ldt $f4, 160($30)\n"
+ "ldt $f5, 168($30)\n"
+ "ldt $f6, 176($30)\n"
+ "ldt $f7, 184($30)\n"
+ "ldt $f8, 192($30)\n"
+ "ldt $f9, 200($30)\n"
+ "ldq $15, 208($30)\n"
+ "ldq $26, 216($30)\n"
+ "ldq $27, 224($30)\n" //this was updated in the callback with the target
+
+ "lda $30, 232($30)\n" //restore sp
+ "jmp $31, ($27)\n" //jump to the new function
+ ".end AlphaCompilationCallback\n"
+ );
+#else
+ void AlphaCompilationCallback() {
+ cerr << "Cannot call AlphaCompilationCallback() on a non-Alpha arch!\n";
+ abort();
+ }
+#endif
+}
+
+void *AlphaJITInfo::emitFunctionStub(void *Fn, MachineCodeEmitter &MCE) {
+ //assert(Fn == AlphaCompilationCallback && "Where are you going?\n");
+ //Do things in a stupid slow way!
+ MCE.startFunctionStub(19*4);
+ void* Addr = (void*)(intptr_t)MCE.getCurrentPCValue();
+ for (int x = 0; x < 19; ++ x)
+ MCE.emitWordLE(0);
+ EmitBranchToAt(Addr, Fn);
+ DOUT << "Emitting Stub to " << Fn << " at [" << Addr << "]\n";
+ return MCE.finishFunctionStub(0);
+}
+
+TargetJITInfo::LazyResolverFn
+AlphaJITInfo::getLazyResolverFunction(JITCompilerFn F) {
+ JITCompilerFunction = F;
+ // setZerothGOTEntry((void*)AlphaCompilationCallback);
+ return AlphaCompilationCallback;
+}
+
+//These describe LDAx
+static const int IMM_LOW = -32768;
+static const int IMM_HIGH = 32767;
+static const int IMM_MULT = 65536;
+
+static long getUpper16(long l)
+{
+ long y = l / IMM_MULT;
+ if (l % IMM_MULT > IMM_HIGH)
+ ++y;
+ if (l % IMM_MULT < IMM_LOW)
+ --y;
+ assert((short)y == y && "displacement out of range");
+ return y;
+}
+
+static long getLower16(long l)
+{
+ long h = getUpper16(l);
+ long y = l - h * IMM_MULT;
+ assert(y == (short)y && "Displacement out of range");
+ return y;
+}
+
+void AlphaJITInfo::relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase) {
+ //because gpdist are paired and relative to the pc of the first inst,
+ //we need to have some state
+
+ static std::map<std::pair<void*, int>, void*> gpdistmap;
+
+ for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+ unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4;
+ long idx = 0;
+ bool doCommon = true;
+ switch ((Alpha::RelocationType)MR->getRelocationType()) {
+ default: assert(0 && "Unknown relocation type!");
+ case Alpha::reloc_literal:
+ //This is a LDQl
+ idx = MR->getGOTIndex();
+ DOUT << "Literal relocation to slot " << idx;
+ idx = (idx - GOToffset) * 8;
+ DOUT << " offset " << idx << "\n";
+ break;
+ case Alpha::reloc_gprellow:
+ idx = (unsigned char*)MR->getResultPointer() - &GOTBase[GOToffset * 8];
+ idx = getLower16(idx);
+ DOUT << "gprellow relocation offset " << idx << "\n";
+ DOUT << " Pointer is " << (void*)MR->getResultPointer()
+ << " GOT is " << (void*)&GOTBase[GOToffset * 8] << "\n";
+ break;
+ case Alpha::reloc_gprelhigh:
+ idx = (unsigned char*)MR->getResultPointer() - &GOTBase[GOToffset * 8];
+ idx = getUpper16(idx);
+ DOUT << "gprelhigh relocation offset " << idx << "\n";
+ DOUT << " Pointer is " << (void*)MR->getResultPointer()
+ << " GOT is " << (void*)&GOTBase[GOToffset * 8] << "\n";
+ break;
+ case Alpha::reloc_gpdist:
+ switch (*RelocPos >> 26) {
+ case 0x09: //LDAH
+ idx = &GOTBase[GOToffset * 8] - (unsigned char*)RelocPos;
+ idx = getUpper16(idx);
+ DOUT << "LDAH: " << idx << "\n";
+ //add the relocation to the map
+ gpdistmap[std::make_pair(Function, MR->getConstantVal())] = RelocPos;
+ break;
+ case 0x08: //LDA
+ assert(gpdistmap[std::make_pair(Function, MR->getConstantVal())] &&
+ "LDAg without seeing LDAHg");
+ idx = &GOTBase[GOToffset * 8] -
+ (unsigned char*)gpdistmap[std::make_pair(Function, MR->getConstantVal())];
+ idx = getLower16(idx);
+ DOUT << "LDA: " << idx << "\n";
+ break;
+ default:
+ assert(0 && "Cannot handle gpdist yet");
+ }
+ break;
+ case Alpha::reloc_bsr: {
+ idx = (((unsigned char*)MR->getResultPointer() -
+ (unsigned char*)RelocPos) >> 2) + 1; //skip first 2 inst of fun
+ *RelocPos |= (idx & ((1 << 21)-1));
+ doCommon = false;
+ break;
+ }
+ }
+ if (doCommon) {
+ short x = (short)idx;
+ assert(x == idx);
+ *(short*)RelocPos = x;
+ }
+ }
+}
diff --git a/lib/Target/Alpha/AlphaJITInfo.h b/lib/Target/Alpha/AlphaJITInfo.h
new file mode 100644
index 0000000..26c45b1
--- /dev/null
+++ b/lib/Target/Alpha/AlphaJITInfo.h
@@ -0,0 +1,49 @@
+//===- AlphaJITInfo.h - Alpha impl. of the JIT interface ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of the TargetJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHA_JITINFO_H
+#define ALPHA_JITINFO_H
+
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/GlobalValue.h"
+#include <string>
+#include <map>
+
+namespace llvm {
+ class TargetMachine;
+
+ class AlphaJITInfo : public TargetJITInfo {
+ protected:
+ TargetMachine &TM;
+ public:
+ AlphaJITInfo(TargetMachine &tm) : TM(tm)
+ { useGOT = true; }
+
+ virtual void *emitFunctionStub(void *Fn, MachineCodeEmitter &MCE);
+ virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+ virtual void relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase);
+
+ /// replaceMachineCodeForFunction - Make it so that calling the function
+ /// whose machine code is at OLD turns into a call to NEW, perhaps by
+ /// overwriting OLD with a branch to NEW. This is used for self-modifying
+ /// code.
+ ///
+ virtual void replaceMachineCodeForFunction(void *Old, void *New);
+ private:
+ static const unsigned GOToffset = 4096;
+
+ };
+}
+
+#endif
diff --git a/lib/Target/Alpha/AlphaLLRP.cpp b/lib/Target/Alpha/AlphaLLRP.cpp
new file mode 100644
index 0000000..27c2738
--- /dev/null
+++ b/lib/Target/Alpha/AlphaLLRP.cpp
@@ -0,0 +1,162 @@
+//===-- AlphaLLRP.cpp - Alpha Load Load Replay Trap elimination pass. -- --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Andrew Lenharth and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Here we check for potential replay traps introduced by the spiller
+// We also align some branch targets if we can do so for free.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "alpha-nops"
+#include "Alpha.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+STATISTIC(nopintro, "Number of nops inserted");
+STATISTIC(nopalign, "Number of nops inserted for alignment");
+
+namespace {
+ cl::opt<bool>
+ AlignAll("alpha-align-all", cl::Hidden,
+ cl::desc("Align all blocks"));
+
+ struct AlphaLLRPPass : public MachineFunctionPass {
+ /// Target machine description which we query for reg. names, data
+ /// layout, etc.
+ ///
+ AlphaTargetMachine &TM;
+
+ static char ID;
+ AlphaLLRPPass(AlphaTargetMachine &tm)
+ : MachineFunctionPass((intptr_t)&ID), TM(tm) { }
+
+ virtual const char *getPassName() const {
+ return "Alpha NOP inserter";
+ }
+
+ bool runOnMachineFunction(MachineFunction &F) {
+ const TargetInstrInfo *TII = F.getTarget().getInstrInfo();
+ bool Changed = false;
+ MachineInstr* prev[3] = {0,0,0};
+ unsigned count = 0;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI) {
+ MachineBasicBlock& MBB = *FI;
+ bool ub = false;
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
+ if (count%4 == 0)
+ prev[0] = prev[1] = prev[2] = 0; //Slots cleared at fetch boundary
+ ++count;
+ MachineInstr *MI = I++;
+ switch (MI->getOpcode()) {
+ case Alpha::LDQ: case Alpha::LDL:
+ case Alpha::LDWU: case Alpha::LDBU:
+ case Alpha::LDT: case Alpha::LDS:
+ case Alpha::STQ: case Alpha::STL:
+ case Alpha::STW: case Alpha::STB:
+ case Alpha::STT: case Alpha::STS:
+ if (MI->getOperand(2).getReg() == Alpha::R30) {
+ if (prev[0]
+ && prev[0]->getOperand(2).getReg() ==
+ MI->getOperand(2).getReg()
+ && prev[0]->getOperand(1).getImmedValue() ==
+ MI->getOperand(1).getImmedValue()) {
+ prev[0] = prev[1];
+ prev[1] = prev[2];
+ prev[2] = 0;
+ BuildMI(MBB, MI, TII->get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31)
+ .addReg(Alpha::R31);
+ Changed = true; nopintro += 1;
+ count += 1;
+ } else if (prev[1]
+ && prev[1]->getOperand(2).getReg() ==
+ MI->getOperand(2).getReg()
+ && prev[1]->getOperand(1).getImmedValue() ==
+ MI->getOperand(1).getImmedValue()) {
+ prev[0] = prev[2];
+ prev[1] = prev[2] = 0;
+ BuildMI(MBB, MI, TII->get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31)
+ .addReg(Alpha::R31);
+ BuildMI(MBB, MI, TII->get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31)
+ .addReg(Alpha::R31);
+ Changed = true; nopintro += 2;
+ count += 2;
+ } else if (prev[2]
+ && prev[2]->getOperand(2).getReg() ==
+ MI->getOperand(2).getReg()
+ && prev[2]->getOperand(1).getImmedValue() ==
+ MI->getOperand(1).getImmedValue()) {
+ prev[0] = prev[1] = prev[2] = 0;
+ BuildMI(MBB, MI, TII->get(Alpha::BISr), Alpha::R31).addReg(Alpha::R31)
+ .addReg(Alpha::R31);
+ BuildMI(MBB, MI, TII->get(Alpha::BISr), Alpha::R31).addReg(Alpha::R31)
+ .addReg(Alpha::R31);
+ BuildMI(MBB, MI, TII->get(Alpha::BISr), Alpha::R31).addReg(Alpha::R31)
+ .addReg(Alpha::R31);
+ Changed = true; nopintro += 3;
+ count += 3;
+ }
+ prev[0] = prev[1];
+ prev[1] = prev[2];
+ prev[2] = MI;
+ break;
+ }
+ prev[0] = prev[1];
+ prev[1] = prev[2];
+ prev[2] = 0;
+ break;
+ case Alpha::ALTENT:
+ case Alpha::MEMLABEL:
+ case Alpha::PCLABEL:
+ case Alpha::IDEF_I:
+ case Alpha::IDEF_F32:
+ case Alpha::IDEF_F64:
+ --count;
+ break;
+ case Alpha::BR:
+ case Alpha::JMP:
+ ub = true;
+ //fall through
+ default:
+ prev[0] = prev[1];
+ prev[1] = prev[2];
+ prev[2] = 0;
+ break;
+ }
+ }
+ if (ub || AlignAll) {
+ //we can align stuff for free at this point
+ while (count % 4) {
+ BuildMI(MBB, MBB.end(), TII->get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31).addReg(Alpha::R31);
+ ++count;
+ ++nopalign;
+ prev[0] = prev[1];
+ prev[1] = prev[2];
+ prev[2] = 0;
+ }
+ }
+ }
+ return Changed;
+ }
+ };
+ char AlphaLLRPPass::ID = 0;
+} // end of anonymous namespace
+
+FunctionPass *llvm::createAlphaLLRPPass(AlphaTargetMachine &tm) {
+ return new AlphaLLRPPass(tm);
+}
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp
new file mode 100644
index 0000000..59d3e81
--- /dev/null
+++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp
@@ -0,0 +1,433 @@
+//===- AlphaRegisterInfo.cpp - Alpha Register Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reginfo"
+#include "Alpha.h"
+#include "AlphaRegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include <cstdlib>
+using namespace llvm;
+
+//These describe LDAx
+static const int IMM_LOW = -32768;
+static const int IMM_HIGH = 32767;
+static const int IMM_MULT = 65536;
+
+static long getUpper16(long l)
+{
+ long y = l / IMM_MULT;
+ if (l % IMM_MULT > IMM_HIGH)
+ ++y;
+ return y;
+}
+
+static long getLower16(long l)
+{
+ long h = getUpper16(l);
+ return l - h * IMM_MULT;
+}
+
+AlphaRegisterInfo::AlphaRegisterInfo(const TargetInstrInfo &tii)
+ : AlphaGenRegisterInfo(Alpha::ADJUSTSTACKDOWN, Alpha::ADJUSTSTACKUP),
+ TII(tii)
+{
+}
+
+void
+AlphaRegisterInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, int FrameIdx,
+ const TargetRegisterClass *RC) const {
+ //cerr << "Trying to store " << getPrettyName(SrcReg) << " to "
+ // << FrameIdx << "\n";
+ //BuildMI(MBB, MI, Alpha::WTF, 0).addReg(SrcReg);
+ if (RC == Alpha::F4RCRegisterClass)
+ BuildMI(MBB, MI, TII.get(Alpha::STS))
+ .addReg(SrcReg, false, false, true)
+ .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+ else if (RC == Alpha::F8RCRegisterClass)
+ BuildMI(MBB, MI, TII.get(Alpha::STT))
+ .addReg(SrcReg, false, false, true)
+ .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+ else if (RC == Alpha::GPRCRegisterClass)
+ BuildMI(MBB, MI, TII.get(Alpha::STQ))
+ .addReg(SrcReg, false, false, true)
+ .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+ else
+ abort();
+}
+
+void
+AlphaRegisterInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC) const {
+ //cerr << "Trying to load " << getPrettyName(DestReg) << " to "
+ // << FrameIdx << "\n";
+ if (RC == Alpha::F4RCRegisterClass)
+ BuildMI(MBB, MI, TII.get(Alpha::LDS), DestReg)
+ .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+ else if (RC == Alpha::F8RCRegisterClass)
+ BuildMI(MBB, MI, TII.get(Alpha::LDT), DestReg)
+ .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+ else if (RC == Alpha::GPRCRegisterClass)
+ BuildMI(MBB, MI, TII.get(Alpha::LDQ), DestReg)
+ .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+ else
+ abort();
+}
+
+MachineInstr *AlphaRegisterInfo::foldMemoryOperand(MachineInstr *MI,
+ unsigned OpNum,
+ int FrameIndex) const {
+ // Make sure this is a reg-reg copy.
+ unsigned Opc = MI->getOpcode();
+
+ MachineInstr *NewMI = NULL;
+ switch(Opc) {
+ default:
+ break;
+ case Alpha::BISr:
+ case Alpha::CPYSS:
+ case Alpha::CPYST:
+ if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
+ if (OpNum == 0) { // move -> store
+ unsigned InReg = MI->getOperand(1).getReg();
+ Opc = (Opc == Alpha::BISr) ? Alpha::STQ :
+ ((Opc == Alpha::CPYSS) ? Alpha::STS : Alpha::STT);
+ NewMI = BuildMI(TII.get(Opc)).addReg(InReg).addFrameIndex(FrameIndex)
+ .addReg(Alpha::F31);
+ } else { // load -> move
+ unsigned OutReg = MI->getOperand(0).getReg();
+ Opc = (Opc == Alpha::BISr) ? Alpha::LDQ :
+ ((Opc == Alpha::CPYSS) ? Alpha::LDS : Alpha::LDT);
+ NewMI = BuildMI(TII.get(Opc), OutReg).addFrameIndex(FrameIndex)
+ .addReg(Alpha::F31);
+ }
+ }
+ break;
+ }
+ if (NewMI)
+ NewMI->copyKillDeadInfo(MI);
+ return 0;
+}
+
+
+void AlphaRegisterInfo::copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *RC) const {
+ //cerr << "copyRegToReg " << DestReg << " <- " << SrcReg << "\n";
+ if (RC == Alpha::GPRCRegisterClass) {
+ BuildMI(MBB, MI, TII.get(Alpha::BISr), DestReg).addReg(SrcReg).addReg(SrcReg);
+ } else if (RC == Alpha::F4RCRegisterClass) {
+ BuildMI(MBB, MI, TII.get(Alpha::CPYSS), DestReg).addReg(SrcReg).addReg(SrcReg);
+ } else if (RC == Alpha::F8RCRegisterClass) {
+ BuildMI(MBB, MI, TII.get(Alpha::CPYST), DestReg).addReg(SrcReg).addReg(SrcReg);
+ } else {
+ cerr << "Attempt to copy register that is not GPR or FPR";
+ abort();
+ }
+}
+
+void AlphaRegisterInfo::reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg,
+ const MachineInstr *Orig) const {
+ MachineInstr *MI = Orig->clone();
+ MI->getOperand(0).setReg(DestReg);
+ MBB.insert(I, MI);
+}
+
+const unsigned* AlphaRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+ const {
+ static const unsigned CalleeSavedRegs[] = {
+ Alpha::R9, Alpha::R10,
+ Alpha::R11, Alpha::R12,
+ Alpha::R13, Alpha::R14,
+ Alpha::F2, Alpha::F3,
+ Alpha::F4, Alpha::F5,
+ Alpha::F6, Alpha::F7,
+ Alpha::F8, Alpha::F9, 0
+ };
+ return CalleeSavedRegs;
+}
+
+const TargetRegisterClass* const*
+AlphaRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
+ &Alpha::GPRCRegClass, &Alpha::GPRCRegClass,
+ &Alpha::GPRCRegClass, &Alpha::GPRCRegClass,
+ &Alpha::GPRCRegClass, &Alpha::GPRCRegClass,
+ &Alpha::F8RCRegClass, &Alpha::F8RCRegClass,
+ &Alpha::F8RCRegClass, &Alpha::F8RCRegClass,
+ &Alpha::F8RCRegClass, &Alpha::F8RCRegClass,
+ &Alpha::F8RCRegClass, &Alpha::F8RCRegClass, 0
+ };
+ return CalleeSavedRegClasses;
+}
+
+BitVector AlphaRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(Alpha::R15);
+ Reserved.set(Alpha::R30);
+ Reserved.set(Alpha::R31);
+ return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+//
+bool AlphaRegisterInfo::hasFP(const MachineFunction &MF) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->hasVarSizedObjects();
+}
+
+void AlphaRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ if (hasFP(MF)) {
+ // If we have a frame pointer, turn the adjcallstackup instruction into a
+ // 'sub ESP, <amt>' and the adjcallstackdown instruction into 'add ESP,
+ // <amt>'
+ MachineInstr *Old = I;
+ uint64_t Amount = Old->getOperand(0).getImmedValue();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ MachineInstr *New;
+ if (Old->getOpcode() == Alpha::ADJUSTSTACKDOWN) {
+ New=BuildMI(TII.get(Alpha::LDA), Alpha::R30)
+ .addImm(-Amount).addReg(Alpha::R30);
+ } else {
+ assert(Old->getOpcode() == Alpha::ADJUSTSTACKUP);
+ New=BuildMI(TII.get(Alpha::LDA), Alpha::R30)
+ .addImm(Amount).addReg(Alpha::R30);
+ }
+
+ // Replace the pseudo instruction with a new instruction...
+ MBB.insert(I, New);
+ }
+ }
+
+ MBB.erase(I);
+}
+
+//Alpha has a slightly funny stack:
+//Args
+//<- incoming SP
+//fixed locals (and spills, callee saved, etc)
+//<- FP
+//variable locals
+//<- SP
+
+void AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ bool FP = hasFP(MF);
+
+ while (!MI.getOperand(i).isFrameIndex()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(i).getFrameIndex();
+
+ // Add the base register of R30 (SP) or R15 (FP).
+ MI.getOperand(i + 1).ChangeToRegister(FP ? Alpha::R15 : Alpha::R30, false);
+
+ // Now add the frame object offset to the offset from the virtual frame index.
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+
+ DOUT << "FI: " << FrameIndex << " Offset: " << Offset << "\n";
+
+ Offset += MF.getFrameInfo()->getStackSize();
+
+ DOUT << "Corrected Offset " << Offset
+ << " for stack size: " << MF.getFrameInfo()->getStackSize() << "\n";
+
+ if (Offset > IMM_HIGH || Offset < IMM_LOW) {
+ DOUT << "Unconditionally using R28 for evil purposes Offset: "
+ << Offset << "\n";
+ //so in this case, we need to use a temporary register, and move the
+ //original inst off the SP/FP
+ //fix up the old:
+ MI.getOperand(i + 1).ChangeToRegister(Alpha::R28, false);
+ MI.getOperand(i).ChangeToImmediate(getLower16(Offset));
+ //insert the new
+ MachineInstr* nMI=BuildMI(TII.get(Alpha::LDAH), Alpha::R28)
+ .addImm(getUpper16(Offset)).addReg(FP ? Alpha::R15 : Alpha::R30);
+ MBB.insert(II, nMI);
+ } else {
+ MI.getOperand(i).ChangeToImmediate(Offset);
+ }
+}
+
+
+void AlphaRegisterInfo::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool FP = hasFP(MF);
+
+ static int curgpdist = 0;
+
+ //handle GOP offset
+ BuildMI(MBB, MBBI, TII.get(Alpha::LDAHg), Alpha::R29)
+ .addGlobalAddress(const_cast<Function*>(MF.getFunction()))
+ .addReg(Alpha::R27).addImm(++curgpdist);
+ BuildMI(MBB, MBBI, TII.get(Alpha::LDAg), Alpha::R29)
+ .addGlobalAddress(const_cast<Function*>(MF.getFunction()))
+ .addReg(Alpha::R29).addImm(curgpdist);
+
+ //evil const_cast until MO stuff setup to handle const
+ BuildMI(MBB, MBBI, TII.get(Alpha::ALTENT))
+ .addGlobalAddress(const_cast<Function*>(MF.getFunction()));
+
+ // Get the number of bytes to allocate from the FrameInfo
+ long NumBytes = MFI->getStackSize();
+
+ if (FP)
+ NumBytes += 8; //reserve space for the old FP
+
+ // Do we need to allocate space on the stack?
+ if (NumBytes == 0) return;
+
+ unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+ NumBytes = (NumBytes+Align-1)/Align*Align;
+
+ // Update frame info to pretend that this is part of the stack...
+ MFI->setStackSize(NumBytes);
+
+ // adjust stack pointer: r30 -= numbytes
+ NumBytes = -NumBytes;
+ if (NumBytes >= IMM_LOW) {
+ BuildMI(MBB, MBBI, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes)
+ .addReg(Alpha::R30);
+ } else if (getUpper16(NumBytes) >= IMM_LOW) {
+ BuildMI(MBB, MBBI, TII.get(Alpha::LDAH), Alpha::R30).addImm(getUpper16(NumBytes))
+ .addReg(Alpha::R30);
+ BuildMI(MBB, MBBI, TII.get(Alpha::LDA), Alpha::R30).addImm(getLower16(NumBytes))
+ .addReg(Alpha::R30);
+ } else {
+ cerr << "Too big a stack frame at " << NumBytes << "\n";
+ abort();
+ }
+
+ //now if we need to, save the old FP and set the new
+ if (FP)
+ {
+ BuildMI(MBB, MBBI, TII.get(Alpha::STQ))
+ .addReg(Alpha::R15).addImm(0).addReg(Alpha::R30);
+ //this must be the last instr in the prolog
+ BuildMI(MBB, MBBI, TII.get(Alpha::BISr), Alpha::R15)
+ .addReg(Alpha::R30).addReg(Alpha::R30);
+ }
+
+}
+
+void AlphaRegisterInfo::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ assert(MBBI->getOpcode() == Alpha::RETDAG ||
+ MBBI->getOpcode() == Alpha::RETDAGp
+ && "Can only insert epilog into returning blocks");
+
+ bool FP = hasFP(MF);
+
+ // Get the number of bytes allocated from the FrameInfo...
+ long NumBytes = MFI->getStackSize();
+
+ //now if we need to, restore the old FP
+ if (FP)
+ {
+ //copy the FP into the SP (discards allocas)
+ BuildMI(MBB, MBBI, TII.get(Alpha::BISr), Alpha::R30).addReg(Alpha::R15)
+ .addReg(Alpha::R15);
+ //restore the FP
+ BuildMI(MBB, MBBI, TII.get(Alpha::LDQ), Alpha::R15).addImm(0).addReg(Alpha::R15);
+ }
+
+ if (NumBytes != 0)
+ {
+ if (NumBytes <= IMM_HIGH) {
+ BuildMI(MBB, MBBI, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes)
+ .addReg(Alpha::R30);
+ } else if (getUpper16(NumBytes) <= IMM_HIGH) {
+ BuildMI(MBB, MBBI, TII.get(Alpha::LDAH), Alpha::R30)
+ .addImm(getUpper16(NumBytes)).addReg(Alpha::R30);
+ BuildMI(MBB, MBBI, TII.get(Alpha::LDA), Alpha::R30)
+ .addImm(getLower16(NumBytes)).addReg(Alpha::R30);
+ } else {
+ cerr << "Too big a stack frame at " << NumBytes << "\n";
+ abort();
+ }
+ }
+}
+
+unsigned AlphaRegisterInfo::getRARegister() const {
+ assert(0 && "What is the return address register");
+ return 0;
+}
+
+unsigned AlphaRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+ return hasFP(MF) ? Alpha::R15 : Alpha::R30;
+}
+
+unsigned AlphaRegisterInfo::getEHExceptionRegister() const {
+ assert(0 && "What is the exception register");
+ return 0;
+}
+
+unsigned AlphaRegisterInfo::getEHHandlerRegister() const {
+ assert(0 && "What is the exception handler register");
+ return 0;
+}
+
+#include "AlphaGenRegisterInfo.inc"
+
+std::string AlphaRegisterInfo::getPrettyName(unsigned reg)
+{
+ std::string s(RegisterDescriptors[reg].Name);
+ return s;
+}
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h
new file mode 100644
index 0000000..2872e59
--- /dev/null
+++ b/lib/Target/Alpha/AlphaRegisterInfo.h
@@ -0,0 +1,85 @@
+//===- AlphaRegisterInfo.h - Alpha Register Information Impl ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHAREGISTERINFO_H
+#define ALPHAREGISTERINFO_H
+
+#include "llvm/Target/MRegisterInfo.h"
+#include "AlphaGenRegisterInfo.h.inc"
+
+namespace llvm {
+
+class TargetInstrInfo;
+class Type;
+
+struct AlphaRegisterInfo : public AlphaGenRegisterInfo {
+ const TargetInstrInfo &TII;
+
+ AlphaRegisterInfo(const TargetInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ MachineInstr* foldMemoryOperand(MachineInstr *MI, unsigned OpNum,
+ int FrameIndex) const;
+
+ void copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *RC) const;
+
+ void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ unsigned DestReg, const MachineInstr *Orig) const;
+
+ const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ const TargetRegisterClass* const* getCalleeSavedRegClasses(
+ const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ //void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(MachineFunction &MF) const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+
+ static std::string getPrettyName(unsigned reg);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.td b/lib/Target/Alpha/AlphaRegisterInfo.td
new file mode 100644
index 0000000..9855ce2
--- /dev/null
+++ b/lib/Target/Alpha/AlphaRegisterInfo.td
@@ -0,0 +1,171 @@
+//===- AlphaRegisterInfo.td - The Alpha Register File ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Alpha register set.
+//
+//===----------------------------------------------------------------------===//
+
+class AlphaReg<string n> : Register<n> {
+ field bits<5> Num;
+ let Namespace = "Alpha";
+}
+
+// We identify all our registers with a 5-bit ID, for consistency's sake.
+
+// GPR - One of the 32 32-bit general-purpose registers
+class GPR<bits<5> num, string n> : AlphaReg<n> {
+ let Num = num;
+}
+
+// FPR - One of the 32 64-bit floating-point registers
+class FPR<bits<5> num, string n> : AlphaReg<n> {
+ let Num = num;
+}
+
+//#define FP $15
+//#define RA $26
+//#define PV $27
+//#define GP $29
+//#define SP $30
+
+// General-purpose registers
+def R0 : GPR< 0, "$0">, DwarfRegNum<0>;
+def R1 : GPR< 1, "$1">, DwarfRegNum<1>;
+def R2 : GPR< 2, "$2">, DwarfRegNum<2>;
+def R3 : GPR< 3, "$3">, DwarfRegNum<3>;
+def R4 : GPR< 4, "$4">, DwarfRegNum<4>;
+def R5 : GPR< 5, "$5">, DwarfRegNum<5>;
+def R6 : GPR< 6, "$6">, DwarfRegNum<6>;
+def R7 : GPR< 7, "$7">, DwarfRegNum<7>;
+def R8 : GPR< 8, "$8">, DwarfRegNum<8>;
+def R9 : GPR< 9, "$9">, DwarfRegNum<9>;
+def R10 : GPR<10, "$10">, DwarfRegNum<10>;
+def R11 : GPR<11, "$11">, DwarfRegNum<11>;
+def R12 : GPR<12, "$12">, DwarfRegNum<12>;
+def R13 : GPR<13, "$13">, DwarfRegNum<13>;
+def R14 : GPR<14, "$14">, DwarfRegNum<14>;
+def R15 : GPR<15, "$15">, DwarfRegNum<15>;
+def R16 : GPR<16, "$16">, DwarfRegNum<16>;
+def R17 : GPR<17, "$17">, DwarfRegNum<17>;
+def R18 : GPR<18, "$18">, DwarfRegNum<18>;
+def R19 : GPR<19, "$19">, DwarfRegNum<19>;
+def R20 : GPR<20, "$20">, DwarfRegNum<20>;
+def R21 : GPR<21, "$21">, DwarfRegNum<21>;
+def R22 : GPR<22, "$22">, DwarfRegNum<22>;
+def R23 : GPR<23, "$23">, DwarfRegNum<23>;
+def R24 : GPR<24, "$24">, DwarfRegNum<24>;
+def R25 : GPR<25, "$25">, DwarfRegNum<25>;
+def R26 : GPR<26, "$26">, DwarfRegNum<26>;
+def R27 : GPR<27, "$27">, DwarfRegNum<27>;
+def R28 : GPR<28, "$28">, DwarfRegNum<28>;
+def R29 : GPR<29, "$29">, DwarfRegNum<29>;
+def R30 : GPR<30, "$30">, DwarfRegNum<30>;
+def R31 : GPR<31, "$31">, DwarfRegNum<31>;
+
+// Floating-point registers
+def F0 : FPR< 0, "$f0">, DwarfRegNum<33>;
+def F1 : FPR< 1, "$f1">, DwarfRegNum<34>;
+def F2 : FPR< 2, "$f2">, DwarfRegNum<35>;
+def F3 : FPR< 3, "$f3">, DwarfRegNum<36>;
+def F4 : FPR< 4, "$f4">, DwarfRegNum<37>;
+def F5 : FPR< 5, "$f5">, DwarfRegNum<38>;
+def F6 : FPR< 6, "$f6">, DwarfRegNum<39>;
+def F7 : FPR< 7, "$f7">, DwarfRegNum<40>;
+def F8 : FPR< 8, "$f8">, DwarfRegNum<41>;
+def F9 : FPR< 9, "$f9">, DwarfRegNum<42>;
+def F10 : FPR<10, "$f10">, DwarfRegNum<43>;
+def F11 : FPR<11, "$f11">, DwarfRegNum<44>;
+def F12 : FPR<12, "$f12">, DwarfRegNum<45>;
+def F13 : FPR<13, "$f13">, DwarfRegNum<46>;
+def F14 : FPR<14, "$f14">, DwarfRegNum<47>;
+def F15 : FPR<15, "$f15">, DwarfRegNum<48>;
+def F16 : FPR<16, "$f16">, DwarfRegNum<49>;
+def F17 : FPR<17, "$f17">, DwarfRegNum<50>;
+def F18 : FPR<18, "$f18">, DwarfRegNum<51>;
+def F19 : FPR<19, "$f19">, DwarfRegNum<52>;
+def F20 : FPR<20, "$f20">, DwarfRegNum<53>;
+def F21 : FPR<21, "$f21">, DwarfRegNum<54>;
+def F22 : FPR<22, "$f22">, DwarfRegNum<55>;
+def F23 : FPR<23, "$f23">, DwarfRegNum<56>;
+def F24 : FPR<24, "$f24">, DwarfRegNum<57>;
+def F25 : FPR<25, "$f25">, DwarfRegNum<58>;
+def F26 : FPR<26, "$f26">, DwarfRegNum<59>;
+def F27 : FPR<27, "$f27">, DwarfRegNum<60>;
+def F28 : FPR<28, "$f28">, DwarfRegNum<61>;
+def F29 : FPR<29, "$f29">, DwarfRegNum<62>;
+def F30 : FPR<30, "$f30">, DwarfRegNum<63>;
+def F31 : FPR<31, "$f31">, DwarfRegNum<64>;
+
+ // //#define FP $15
+ // //#define RA $26
+ // //#define PV $27
+ // //#define GP $29
+ // //#define SP $30
+ // $28 is undefined after any and all calls
+
+/// Register classes
+def GPRC : RegisterClass<"Alpha", [i64], 64,
+ // Volatile
+ [R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19, R20, R21, R22,
+ R23, R24, R25, R28,
+ //Special meaning, but volatile
+ R27, //procedure address
+ R26, //return address
+ R29, //global offset table address
+ // Non-volatile
+ R9, R10, R11, R12, R13, R14,
+// Don't allocate 15, 30, 31
+ R15, R30, R31 ]> //zero
+{
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ GPRCClass::iterator
+ GPRCClass::allocation_order_end(const MachineFunction &MF) const {
+ return end()-3;
+ }
+ }];
+}
+
+def F4RC : RegisterClass<"Alpha", [f32], 64, [F0, F1,
+ F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
+ F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30,
+ // Saved:
+ F2, F3, F4, F5, F6, F7, F8, F9,
+ F31 ]> //zero
+{
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ F4RCClass::iterator
+ F4RCClass::allocation_order_end(const MachineFunction &MF) const {
+ return end()-1;
+ }
+ }];
+}
+
+def F8RC : RegisterClass<"Alpha", [f64], 64, [F0, F1,
+ F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
+ F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30,
+ // Saved:
+ F2, F3, F4, F5, F6, F7, F8, F9,
+ F31 ]> //zero
+{
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ F8RCClass::iterator
+ F8RCClass::allocation_order_end(const MachineFunction &MF) const {
+ return end()-1;
+ }
+ }];
+}
diff --git a/lib/Target/Alpha/AlphaRelocations.h b/lib/Target/Alpha/AlphaRelocations.h
new file mode 100644
index 0000000..c532f21
--- /dev/null
+++ b/lib/Target/Alpha/AlphaRelocations.h
@@ -0,0 +1,31 @@
+//===- AlphaRelocations.h - Alpha Code Relocations --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Alpha target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHARELOCATIONS_H
+#define ALPHARELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+ namespace Alpha {
+ enum RelocationType {
+ reloc_literal,
+ reloc_gprellow,
+ reloc_gprelhigh,
+ reloc_gpdist,
+ reloc_bsr
+ };
+ }
+}
+
+#endif
diff --git a/lib/Target/Alpha/AlphaSchedule.td b/lib/Target/Alpha/AlphaSchedule.td
new file mode 100644
index 0000000..b3aab97
--- /dev/null
+++ b/lib/Target/Alpha/AlphaSchedule.td
@@ -0,0 +1,84 @@
+//===- AlphaSchedule.td - Alpha Scheduling Definitions -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Andrew Lenharth and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//This is table 2-2 from the 21264 compiler writers guide
+//modified some
+
+//Pipelines
+
+def L0 : FuncUnit;
+def L1 : FuncUnit;
+def FST0 : FuncUnit;
+def FST1 : FuncUnit;
+def U0 : FuncUnit;
+def U1 : FuncUnit;
+def FA : FuncUnit;
+def FM : FuncUnit;
+
+def s_ild : InstrItinClass;
+def s_fld : InstrItinClass;
+def s_ist : InstrItinClass;
+def s_fst : InstrItinClass;
+def s_lda : InstrItinClass;
+def s_rpcc : InstrItinClass;
+def s_rx : InstrItinClass;
+def s_mxpr : InstrItinClass;
+def s_icbr : InstrItinClass;
+def s_ubr : InstrItinClass;
+def s_jsr : InstrItinClass;
+def s_iadd : InstrItinClass;
+def s_ilog : InstrItinClass;
+def s_ishf : InstrItinClass;
+def s_cmov : InstrItinClass;
+def s_imul : InstrItinClass;
+def s_imisc : InstrItinClass;
+def s_fbr : InstrItinClass;
+def s_fadd : InstrItinClass;
+def s_fmul : InstrItinClass;
+def s_fcmov : InstrItinClass;
+def s_fdivt : InstrItinClass;
+def s_fdivs : InstrItinClass;
+def s_fsqrts: InstrItinClass;
+def s_fsqrtt: InstrItinClass;
+def s_ftoi : InstrItinClass;
+def s_itof : InstrItinClass;
+def s_pseudo : InstrItinClass;
+
+//Table 2­4 Instruction Class Latency in Cycles
+//modified some
+
+def Alpha21264Itineraries : ProcessorItineraries<[
+ InstrItinData<s_ild , [InstrStage<3, [L0, L1]>]>,
+ InstrItinData<s_fld , [InstrStage<4, [L0, L1]>]>,
+ InstrItinData<s_ist , [InstrStage<0, [L0, L1]>]>,
+ InstrItinData<s_fst , [InstrStage<0, [FST0, FST1, L0, L1]>]>,
+ InstrItinData<s_lda , [InstrStage<1, [L0, L1, U0, U1]>]>,
+ InstrItinData<s_rpcc , [InstrStage<1, [L1]>]>,
+ InstrItinData<s_rx , [InstrStage<1, [L1]>]>,
+ InstrItinData<s_mxpr , [InstrStage<1, [L0, L1]>]>,
+ InstrItinData<s_icbr , [InstrStage<0, [U0, U1]>]>,
+ InstrItinData<s_ubr , [InstrStage<3, [U0, U1]>]>,
+ InstrItinData<s_jsr , [InstrStage<3, [L0]>]>,
+ InstrItinData<s_iadd , [InstrStage<1, [L0, U0, L1, U1]>]>,
+ InstrItinData<s_ilog , [InstrStage<1, [L0, U0, L1, U1]>]>,
+ InstrItinData<s_ishf , [InstrStage<1, [U0, U1]>]>,
+ InstrItinData<s_cmov , [InstrStage<1, [L0, U0, L1, U1]>]>,
+ InstrItinData<s_imul , [InstrStage<7, [U1]>]>,
+ InstrItinData<s_imisc , [InstrStage<3, [U0]>]>,
+ InstrItinData<s_fbr , [InstrStage<0, [FA]>]>,
+ InstrItinData<s_fadd , [InstrStage<6, [FA]>]>,
+ InstrItinData<s_fmul , [InstrStage<6, [FM]>]>,
+ InstrItinData<s_fcmov , [InstrStage<6, [FA]>]>,
+ InstrItinData<s_fdivs , [InstrStage<12, [FA]>]>,
+ InstrItinData<s_fdivt , [InstrStage<15, [FA]>]>,
+ InstrItinData<s_fsqrts , [InstrStage<18, [FA]>]>,
+ InstrItinData<s_fsqrtt , [InstrStage<33, [FA]>]>,
+ InstrItinData<s_ftoi , [InstrStage<3, [FST0, FST1, L0, L1]>]>,
+ InstrItinData<s_itof , [InstrStage<4, [L0, L1]>]>
+]>;
diff --git a/lib/Target/Alpha/AlphaSubtarget.cpp b/lib/Target/Alpha/AlphaSubtarget.cpp
new file mode 100644
index 0000000..4b7d612
--- /dev/null
+++ b/lib/Target/Alpha/AlphaSubtarget.cpp
@@ -0,0 +1,25 @@
+//===- AlphaSubtarget.cpp - Alpha Subtarget Information ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Andrew Lenharth and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Alpha specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AlphaSubtarget.h"
+#include "Alpha.h"
+#include "AlphaGenSubtarget.inc"
+using namespace llvm;
+
+AlphaSubtarget::AlphaSubtarget(const Module &M, const std::string &FS)
+ : HasCT(false) {
+ std::string CPU = "generic";
+
+ // Parse features string.
+ ParseSubtargetFeatures(FS, CPU);
+}
diff --git a/lib/Target/Alpha/AlphaSubtarget.h b/lib/Target/Alpha/AlphaSubtarget.h
new file mode 100644
index 0000000..3fb95ad
--- /dev/null
+++ b/lib/Target/Alpha/AlphaSubtarget.h
@@ -0,0 +1,46 @@
+//=====-- AlphaSubtarget.h - Define Subtarget for the Alpha --*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Andrew Lenharth and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Alpha specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHASUBTARGET_H
+#define ALPHASUBTARGET_H
+
+#include "llvm/Target/TargetInstrItineraries.h"
+#include "llvm/Target/TargetSubtarget.h"
+
+#include <string>
+
+namespace llvm {
+class Module;
+
+class AlphaSubtarget : public TargetSubtarget {
+protected:
+
+ bool HasCT;
+
+ InstrItineraryData InstrItins;
+
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified module.
+ ///
+ AlphaSubtarget(const Module &M, const std::string &FS);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(const std::string &FS, const std::string &CPU);
+
+ bool hasCT() const { return HasCT; }
+};
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/Alpha/AlphaTargetAsmInfo.cpp b/lib/Target/Alpha/AlphaTargetAsmInfo.cpp
new file mode 100644
index 0000000..233d2c7
--- /dev/null
+++ b/lib/Target/Alpha/AlphaTargetAsmInfo.cpp
@@ -0,0 +1,24 @@
+//===-- AlphaTargetAsmInfo.cpp - Alpha asm properties -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by James M. Laskey and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the AlphaTargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AlphaTargetAsmInfo.h"
+
+using namespace llvm;
+
+AlphaTargetAsmInfo::AlphaTargetAsmInfo(const AlphaTargetMachine &TM) {
+ AlignmentIsInBytes = false;
+ PrivateGlobalPrefix = "$";
+ JumpTableDirective = ".gprel32";
+ JumpTableDataSection = "\t.section .rodata\n";
+ WeakRefDirective = "\t.weak\t";
+}
diff --git a/lib/Target/Alpha/AlphaTargetAsmInfo.h b/lib/Target/Alpha/AlphaTargetAsmInfo.h
new file mode 100644
index 0000000..c8b4fd5
--- /dev/null
+++ b/lib/Target/Alpha/AlphaTargetAsmInfo.h
@@ -0,0 +1,30 @@
+//=====-- AlphaTargetAsmInfo.h - Alpha asm properties ---------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by James M. Laskey and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the AlphaTargetAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHATARGETASMINFO_H
+#define ALPHATARGETASMINFO_H
+
+#include "llvm/Target/TargetAsmInfo.h"
+
+namespace llvm {
+
+ // Forward declaration.
+ class AlphaTargetMachine;
+
+ struct AlphaTargetAsmInfo : public TargetAsmInfo {
+ AlphaTargetAsmInfo(const AlphaTargetMachine &TM);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp
new file mode 100644
index 0000000..d4137a5
--- /dev/null
+++ b/lib/Target/Alpha/AlphaTargetMachine.cpp
@@ -0,0 +1,97 @@
+//===-- AlphaTargetMachine.cpp - Define TargetMachine for Alpha -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "Alpha.h"
+#include "AlphaJITInfo.h"
+#include "AlphaTargetAsmInfo.h"
+#include "AlphaTargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+
+using namespace llvm;
+
+namespace {
+ // Register the targets
+ RegisterTarget<AlphaTargetMachine> X("alpha", " Alpha (incomplete)");
+}
+
+const TargetAsmInfo *AlphaTargetMachine::createTargetAsmInfo() const {
+ return new AlphaTargetAsmInfo(*this);
+}
+
+unsigned AlphaTargetMachine::getModuleMatchQuality(const Module &M) {
+ // We strongly match "alpha*".
+ std::string TT = M.getTargetTriple();
+ if (TT.size() >= 5 && TT[0] == 'a' && TT[1] == 'l' && TT[2] == 'p' &&
+ TT[3] == 'h' && TT[4] == 'a')
+ return 20;
+ // If the target triple is something non-alpha, we don't match.
+ if (!TT.empty()) return 0;
+
+ if (M.getEndianness() == Module::LittleEndian &&
+ M.getPointerSize() == Module::Pointer64)
+ return 10; // Weak match
+ else if (M.getEndianness() != Module::AnyEndianness ||
+ M.getPointerSize() != Module::AnyPointerSize)
+ return 0; // Match for some other target
+
+ return getJITMatchQuality()/2;
+}
+
+unsigned AlphaTargetMachine::getJITMatchQuality() {
+#ifdef __alpha
+ return 10;
+#else
+ return 0;
+#endif
+}
+
+AlphaTargetMachine::AlphaTargetMachine(const Module &M, const std::string &FS)
+ : DataLayout("e"),
+ FrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0),
+ JITInfo(*this),
+ Subtarget(M, FS),
+ TLInfo(*this) {
+ setRelocationModel(Reloc::PIC_);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+bool AlphaTargetMachine::addInstSelector(FunctionPassManager &PM, bool Fast) {
+ PM.add(createAlphaISelDag(*this));
+ return false;
+}
+bool AlphaTargetMachine::addPreEmitPass(FunctionPassManager &PM, bool Fast) {
+ // Must run branch selection immediately preceding the asm printer
+ PM.add(createAlphaBranchSelectionPass());
+ return false;
+}
+bool AlphaTargetMachine::addAssemblyEmitter(FunctionPassManager &PM, bool Fast,
+ std::ostream &Out) {
+ PM.add(createAlphaLLRPPass(*this));
+ PM.add(createAlphaCodePrinterPass(Out, *this));
+ return false;
+}
+bool AlphaTargetMachine::addCodeEmitter(FunctionPassManager &PM, bool Fast,
+ MachineCodeEmitter &MCE) {
+ PM.add(createAlphaCodeEmitterPass(*this, MCE));
+ return false;
+}
+bool AlphaTargetMachine::addSimpleCodeEmitter(FunctionPassManager &PM,
+ bool Fast,
+ MachineCodeEmitter &MCE) {
+ return addCodeEmitter(PM, Fast, MCE);
+}
diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h
new file mode 100644
index 0000000..5a57f63
--- /dev/null
+++ b/lib/Target/Alpha/AlphaTargetMachine.h
@@ -0,0 +1,73 @@
+//===-- AlphaTargetMachine.h - Define TargetMachine for Alpha ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Alpha-specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHA_TARGETMACHINE_H
+#define ALPHA_TARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "AlphaInstrInfo.h"
+#include "AlphaJITInfo.h"
+#include "AlphaISelLowering.h"
+#include "AlphaSubtarget.h"
+
+namespace llvm {
+
+class GlobalValue;
+
+class AlphaTargetMachine : public LLVMTargetMachine {
+ const TargetData DataLayout; // Calculates type size & alignment
+ AlphaInstrInfo InstrInfo;
+ TargetFrameInfo FrameInfo;
+ AlphaJITInfo JITInfo;
+ AlphaSubtarget Subtarget;
+ AlphaTargetLowering TLInfo;
+
+protected:
+ virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
+public:
+ AlphaTargetMachine(const Module &M, const std::string &FS);
+
+ virtual const AlphaInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
+ virtual const TargetSubtarget *getSubtargetImpl() const{ return &Subtarget; }
+ virtual const MRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ virtual TargetLowering* getTargetLowering() const {
+ return const_cast<AlphaTargetLowering*>(&TLInfo);
+ }
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual TargetJITInfo* getJITInfo() {
+ return &JITInfo;
+ }
+
+ static unsigned getJITMatchQuality();
+ static unsigned getModuleMatchQuality(const Module &M);
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(FunctionPassManager &PM, bool Fast);
+ virtual bool addPreEmitPass(FunctionPassManager &PM, bool Fast);
+ virtual bool addAssemblyEmitter(FunctionPassManager &PM, bool Fast,
+ std::ostream &Out);
+ virtual bool addCodeEmitter(FunctionPassManager &PM, bool Fast,
+ MachineCodeEmitter &MCE);
+ virtual bool addSimpleCodeEmitter(FunctionPassManager &PM, bool Fast,
+ MachineCodeEmitter &MCE);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Alpha/Makefile b/lib/Target/Alpha/Makefile
new file mode 100644
index 0000000..bb9895a
--- /dev/null
+++ b/lib/Target/Alpha/Makefile
@@ -0,0 +1,20 @@
+##===- lib/Target/Alpha/Makefile -------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file was developed by the LLVM research group and is distributed under
+# the University of Illinois Open Source License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMAlpha
+TARGET = Alpha
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = AlphaGenRegisterInfo.h.inc AlphaGenRegisterNames.inc \
+ AlphaGenRegisterInfo.inc AlphaGenInstrNames.inc \
+ AlphaGenInstrInfo.inc AlphaGenCodeEmitter.inc \
+ AlphaGenAsmWriter.inc AlphaGenDAGISel.inc \
+ AlphaGenSubtarget.inc
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Alpha/README.txt b/lib/Target/Alpha/README.txt
new file mode 100644
index 0000000..9ae1517
--- /dev/null
+++ b/lib/Target/Alpha/README.txt
@@ -0,0 +1,42 @@
+***
+
+add gcc builtins for alpha instructions
+
+
+***
+
+custom expand byteswap into nifty
+extract/insert/mask byte/word/longword/quadword low/high
+sequences
+
+***
+
+see if any of the extract/insert/mask operations can be added
+
+***
+
+match more interesting things for cmovlbc cmovlbs (move if low bit clear/set)
+
+***
+
+lower srem and urem
+
+remq(i,j): i - (j * divq(i,j)) if j != 0
+remqu(i,j): i - (j * divqu(i,j)) if j != 0
+reml(i,j): i - (j * divl(i,j)) if j != 0
+remlu(i,j): i - (j * divlu(i,j)) if j != 0
+
+***
+
+add crazy vector instructions (MVI):
+
+(MIN|MAX)(U|S)(B8|W4) min and max, signed and unsigned, byte and word
+PKWB, UNPKBW pack/unpack word to byte
+PKLB UNPKBL pack/unpack long to byte
+PERR pixel error (sum accross bytes of bytewise abs(i8v8 a - i8v8 b))
+
+cmpbytes bytewise cmpeq of i8v8 a and i8v8 b (not part of MVI extentions)
+
+this has some good examples for other operations that can be synthesised well
+from these rather meager vector ops (such as saturating add).
+http://www.alphalinux.org/docs/MVI-full.html
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
new file mode 100644
index 0000000..b0c76c8
--- /dev/null
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -0,0 +1,2930 @@
+//===-- CBackend.cpp - Library for converting LLVM code to C --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This library converts LLVM code to C code, compilable by GCC and other C
+// compilers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CTargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/ParameterAttributes.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/TypeSymbolTable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Analysis/ConstantsScanner.h"
+#include "llvm/Analysis/FindUsedTypes.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Config/config.h"
+#include <algorithm>
+#include <sstream>
+using namespace llvm;
+
+namespace {
+ // Register the target.
+ RegisterTarget<CTargetMachine> X("c", " C backend");
+
+ /// CBackendNameAllUsedStructsAndMergeFunctions - This pass inserts names for
+ /// any unnamed structure types that are used by the program, and merges
+ /// external functions with the same name.
+ ///
+ class CBackendNameAllUsedStructsAndMergeFunctions : public ModulePass {
+ public:
+ static char ID;
+ CBackendNameAllUsedStructsAndMergeFunctions()
+ : ModulePass((intptr_t)&ID) {}
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<FindUsedTypes>();
+ }
+
+ virtual const char *getPassName() const {
+ return "C backend type canonicalizer";
+ }
+
+ virtual bool runOnModule(Module &M);
+ };
+
+ char CBackendNameAllUsedStructsAndMergeFunctions::ID = 0;
+
+ /// CWriter - This class is the main chunk of code that converts an LLVM
+ /// module to a C translation unit.
+ class CWriter : public FunctionPass, public InstVisitor<CWriter> {
+ std::ostream &Out;
+ IntrinsicLowering *IL;
+ Mangler *Mang;
+ LoopInfo *LI;
+ const Module *TheModule;
+ const TargetAsmInfo* TAsm;
+ const TargetData* TD;
+ std::map<const Type *, std::string> TypeNames;
+ std::map<const ConstantFP *, unsigned> FPConstantMap;
+ std::set<Function*> intrinsicPrototypesAlreadyGenerated;
+
+ public:
+ static char ID;
+ CWriter(std::ostream &o)
+ : FunctionPass((intptr_t)&ID), Out(o), IL(0), Mang(0), LI(0),
+ TheModule(0), TAsm(0), TD(0) {}
+
+ virtual const char *getPassName() const { return "C backend"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LoopInfo>();
+ AU.setPreservesAll();
+ }
+
+ virtual bool doInitialization(Module &M);
+
+ bool runOnFunction(Function &F) {
+ LI = &getAnalysis<LoopInfo>();
+
+ // Get rid of intrinsics we can't handle.
+ lowerIntrinsics(F);
+
+ // Output all floating point constants that cannot be printed accurately.
+ printFloatingPointConstants(F);
+
+ printFunction(F);
+ FPConstantMap.clear();
+ return false;
+ }
+
+ virtual bool doFinalization(Module &M) {
+ // Free memory...
+ delete Mang;
+ TypeNames.clear();
+ return false;
+ }
+
+ std::ostream &printType(std::ostream &Out, const Type *Ty,
+ bool isSigned = false,
+ const std::string &VariableName = "",
+ bool IgnoreName = false);
+ std::ostream &printSimpleType(std::ostream &Out, const Type *Ty,
+ bool isSigned,
+ const std::string &NameSoFar = "");
+
+ void printStructReturnPointerFunctionType(std::ostream &Out,
+ const PointerType *Ty);
+
+ void writeOperand(Value *Operand);
+ void writeOperandRaw(Value *Operand);
+ void writeOperandInternal(Value *Operand);
+ void writeOperandWithCast(Value* Operand, unsigned Opcode);
+ void writeOperandWithCast(Value* Operand, ICmpInst::Predicate predicate);
+ bool writeInstructionCast(const Instruction &I);
+
+ private :
+ std::string InterpretASMConstraint(InlineAsm::ConstraintInfo& c);
+
+ void lowerIntrinsics(Function &F);
+
+ void printModule(Module *M);
+ void printModuleTypes(const TypeSymbolTable &ST);
+ void printContainedStructs(const Type *Ty, std::set<const StructType *> &);
+ void printFloatingPointConstants(Function &F);
+ void printFunctionSignature(const Function *F, bool Prototype);
+
+ void printFunction(Function &);
+ void printBasicBlock(BasicBlock *BB);
+ void printLoop(Loop *L);
+
+ void printCast(unsigned opcode, const Type *SrcTy, const Type *DstTy);
+ void printConstant(Constant *CPV);
+ void printConstantWithCast(Constant *CPV, unsigned Opcode);
+ bool printConstExprCast(const ConstantExpr *CE);
+ void printConstantArray(ConstantArray *CPA);
+ void printConstantVector(ConstantVector *CP);
+
+ // isInlinableInst - Attempt to inline instructions into their uses to build
+ // trees as much as possible. To do this, we have to consistently decide
+ // what is acceptable to inline, so that variable declarations don't get
+ // printed and an extra copy of the expr is not emitted.
+ //
+ static bool isInlinableInst(const Instruction &I) {
+ // Always inline cmp instructions, even if they are shared by multiple
+ // expressions. GCC generates horrible code if we don't.
+ if (isa<CmpInst>(I))
+ return true;
+
+ // Must be an expression, must be used exactly once. If it is dead, we
+ // emit it inline where it would go.
+ if (I.getType() == Type::VoidTy || !I.hasOneUse() ||
+ isa<TerminatorInst>(I) || isa<CallInst>(I) || isa<PHINode>(I) ||
+ isa<LoadInst>(I) || isa<VAArgInst>(I))
+ // Don't inline a load across a store or other bad things!
+ return false;
+
+ // Must not be used in inline asm
+ if (I.hasOneUse() && isInlineAsm(*I.use_back())) return false;
+
+ // Only inline instruction it if it's use is in the same BB as the inst.
+ return I.getParent() == cast<Instruction>(I.use_back())->getParent();
+ }
+
+ // isDirectAlloca - Define fixed sized allocas in the entry block as direct
+ // variables which are accessed with the & operator. This causes GCC to
+ // generate significantly better code than to emit alloca calls directly.
+ //
+ static const AllocaInst *isDirectAlloca(const Value *V) {
+ const AllocaInst *AI = dyn_cast<AllocaInst>(V);
+ if (!AI) return false;
+ if (AI->isArrayAllocation())
+ return 0; // FIXME: we can also inline fixed size array allocas!
+ if (AI->getParent() != &AI->getParent()->getParent()->getEntryBlock())
+ return 0;
+ return AI;
+ }
+
+ // isInlineAsm - Check if the instruction is a call to an inline asm chunk
+ static bool isInlineAsm(const Instruction& I) {
+ if (isa<CallInst>(&I) && isa<InlineAsm>(I.getOperand(0)))
+ return true;
+ return false;
+ }
+
+ // Instruction visitation functions
+ friend class InstVisitor<CWriter>;
+
+ void visitReturnInst(ReturnInst &I);
+ void visitBranchInst(BranchInst &I);
+ void visitSwitchInst(SwitchInst &I);
+ void visitInvokeInst(InvokeInst &I) {
+ assert(0 && "Lowerinvoke pass didn't work!");
+ }
+
+ void visitUnwindInst(UnwindInst &I) {
+ assert(0 && "Lowerinvoke pass didn't work!");
+ }
+ void visitUnreachableInst(UnreachableInst &I);
+
+ void visitPHINode(PHINode &I);
+ void visitBinaryOperator(Instruction &I);
+ void visitICmpInst(ICmpInst &I);
+ void visitFCmpInst(FCmpInst &I);
+
+ void visitCastInst (CastInst &I);
+ void visitSelectInst(SelectInst &I);
+ void visitCallInst (CallInst &I);
+ void visitInlineAsm(CallInst &I);
+
+ void visitMallocInst(MallocInst &I);
+ void visitAllocaInst(AllocaInst &I);
+ void visitFreeInst (FreeInst &I);
+ void visitLoadInst (LoadInst &I);
+ void visitStoreInst (StoreInst &I);
+ void visitGetElementPtrInst(GetElementPtrInst &I);
+ void visitVAArgInst (VAArgInst &I);
+
+ void visitInstruction(Instruction &I) {
+ cerr << "C Writer does not know about " << I;
+ abort();
+ }
+
+ void outputLValue(Instruction *I) {
+ Out << " " << GetValueName(I) << " = ";
+ }
+
+ bool isGotoCodeNecessary(BasicBlock *From, BasicBlock *To);
+ void printPHICopiesForSuccessor(BasicBlock *CurBlock,
+ BasicBlock *Successor, unsigned Indent);
+ void printBranchToBlock(BasicBlock *CurBlock, BasicBlock *SuccBlock,
+ unsigned Indent);
+ void printIndexingExpression(Value *Ptr, gep_type_iterator I,
+ gep_type_iterator E);
+
+ std::string GetValueName(const Value *Operand);
+ };
+}
+
+char CWriter::ID = 0;
+
+/// This method inserts names for any unnamed structure types that are used by
+/// the program, and removes names from structure types that are not used by the
+/// program.
+///
+bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) {
+ // Get a set of types that are used by the program...
+ std::set<const Type *> UT = getAnalysis<FindUsedTypes>().getTypes();
+
+ // Loop over the module symbol table, removing types from UT that are
+ // already named, and removing names for types that are not used.
+ //
+ TypeSymbolTable &TST = M.getTypeSymbolTable();
+ for (TypeSymbolTable::iterator TI = TST.begin(), TE = TST.end();
+ TI != TE; ) {
+ TypeSymbolTable::iterator I = TI++;
+
+ // If this isn't a struct type, remove it from our set of types to name.
+ // This simplifies emission later.
+ if (!isa<StructType>(I->second) && !isa<OpaqueType>(I->second)) {
+ TST.remove(I);
+ } else {
+ // If this is not used, remove it from the symbol table.
+ std::set<const Type *>::iterator UTI = UT.find(I->second);
+ if (UTI == UT.end())
+ TST.remove(I);
+ else
+ UT.erase(UTI); // Only keep one name for this type.
+ }
+ }
+
+ // UT now contains types that are not named. Loop over it, naming
+ // structure types.
+ //
+ bool Changed = false;
+ unsigned RenameCounter = 0;
+ for (std::set<const Type *>::const_iterator I = UT.begin(), E = UT.end();
+ I != E; ++I)
+ if (const StructType *ST = dyn_cast<StructType>(*I)) {
+ while (M.addTypeName("unnamed"+utostr(RenameCounter), ST))
+ ++RenameCounter;
+ Changed = true;
+ }
+
+
+ // Loop over all external functions and globals. If we have two with
+ // identical names, merge them.
+ // FIXME: This code should disappear when we don't allow values with the same
+ // names when they have different types!
+ std::map<std::string, GlobalValue*> ExtSymbols;
+ for (Module::iterator I = M.begin(), E = M.end(); I != E;) {
+ Function *GV = I++;
+ if (GV->isDeclaration() && GV->hasName()) {
+ std::pair<std::map<std::string, GlobalValue*>::iterator, bool> X
+ = ExtSymbols.insert(std::make_pair(GV->getName(), GV));
+ if (!X.second) {
+ // Found a conflict, replace this global with the previous one.
+ GlobalValue *OldGV = X.first->second;
+ GV->replaceAllUsesWith(ConstantExpr::getBitCast(OldGV, GV->getType()));
+ GV->eraseFromParent();
+ Changed = true;
+ }
+ }
+ }
+ // Do the same for globals.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E;) {
+ GlobalVariable *GV = I++;
+ if (GV->isDeclaration() && GV->hasName()) {
+ std::pair<std::map<std::string, GlobalValue*>::iterator, bool> X
+ = ExtSymbols.insert(std::make_pair(GV->getName(), GV));
+ if (!X.second) {
+ // Found a conflict, replace this global with the previous one.
+ GlobalValue *OldGV = X.first->second;
+ GV->replaceAllUsesWith(ConstantExpr::getBitCast(OldGV, GV->getType()));
+ GV->eraseFromParent();
+ Changed = true;
+ }
+ }
+ }
+
+ return Changed;
+}
+
+/// printStructReturnPointerFunctionType - This is like printType for a struct
+/// return type, except, instead of printing the type as void (*)(Struct*, ...)
+/// print it as "Struct (*)(...)", for struct return functions.
+void CWriter::printStructReturnPointerFunctionType(std::ostream &Out,
+ const PointerType *TheTy) {
+ const FunctionType *FTy = cast<FunctionType>(TheTy->getElementType());
+ std::stringstream FunctionInnards;
+ FunctionInnards << " (*) (";
+ bool PrintedType = false;
+
+ FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end();
+ const Type *RetTy = cast<PointerType>(I->get())->getElementType();
+ unsigned Idx = 1;
+ const ParamAttrsList *Attrs = FTy->getParamAttrs();
+ for (++I; I != E; ++I) {
+ if (PrintedType)
+ FunctionInnards << ", ";
+ printType(FunctionInnards, *I,
+ /*isSigned=*/Attrs && Attrs->paramHasAttr(Idx, ParamAttr::SExt), "");
+ PrintedType = true;
+ }
+ if (FTy->isVarArg()) {
+ if (PrintedType)
+ FunctionInnards << ", ...";
+ } else if (!PrintedType) {
+ FunctionInnards << "void";
+ }
+ FunctionInnards << ')';
+ std::string tstr = FunctionInnards.str();
+ printType(Out, RetTy,
+ /*isSigned=*/Attrs && Attrs->paramHasAttr(0, ParamAttr::SExt), tstr);
+}
+
+std::ostream &
+CWriter::printSimpleType(std::ostream &Out, const Type *Ty, bool isSigned,
+ const std::string &NameSoFar) {
+ assert((Ty->isPrimitiveType() || Ty->isInteger()) &&
+ "Invalid type for printSimpleType");
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: return Out << "void " << NameSoFar;
+ case Type::IntegerTyID: {
+ unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
+ if (NumBits == 1)
+ return Out << "bool " << NameSoFar;
+ else if (NumBits <= 8)
+ return Out << (isSigned?"signed":"unsigned") << " char " << NameSoFar;
+ else if (NumBits <= 16)
+ return Out << (isSigned?"signed":"unsigned") << " short " << NameSoFar;
+ else if (NumBits <= 32)
+ return Out << (isSigned?"signed":"unsigned") << " int " << NameSoFar;
+ else {
+ assert(NumBits <= 64 && "Bit widths > 64 not implemented yet");
+ return Out << (isSigned?"signed":"unsigned") << " long long "<< NameSoFar;
+ }
+ }
+ case Type::FloatTyID: return Out << "float " << NameSoFar;
+ case Type::DoubleTyID: return Out << "double " << NameSoFar;
+ default :
+ cerr << "Unknown primitive type: " << *Ty << "\n";
+ abort();
+ }
+}
+
+// Pass the Type* and the variable name and this prints out the variable
+// declaration.
+//
+std::ostream &CWriter::printType(std::ostream &Out, const Type *Ty,
+ bool isSigned, const std::string &NameSoFar,
+ bool IgnoreName) {
+ if (Ty->isPrimitiveType() || Ty->isInteger()) {
+ printSimpleType(Out, Ty, isSigned, NameSoFar);
+ return Out;
+ }
+
+ // Check to see if the type is named.
+ if (!IgnoreName || isa<OpaqueType>(Ty)) {
+ std::map<const Type *, std::string>::iterator I = TypeNames.find(Ty);
+ if (I != TypeNames.end()) return Out << I->second << ' ' << NameSoFar;
+ }
+
+ switch (Ty->getTypeID()) {
+ case Type::FunctionTyID: {
+ const FunctionType *FTy = cast<FunctionType>(Ty);
+ std::stringstream FunctionInnards;
+ FunctionInnards << " (" << NameSoFar << ") (";
+ const ParamAttrsList *Attrs = FTy->getParamAttrs();
+ unsigned Idx = 1;
+ for (FunctionType::param_iterator I = FTy->param_begin(),
+ E = FTy->param_end(); I != E; ++I) {
+ if (I != FTy->param_begin())
+ FunctionInnards << ", ";
+ printType(FunctionInnards, *I,
+ /*isSigned=*/Attrs && Attrs->paramHasAttr(Idx, ParamAttr::SExt), "");
+ ++Idx;
+ }
+ if (FTy->isVarArg()) {
+ if (FTy->getNumParams())
+ FunctionInnards << ", ...";
+ } else if (!FTy->getNumParams()) {
+ FunctionInnards << "void";
+ }
+ FunctionInnards << ')';
+ std::string tstr = FunctionInnards.str();
+ printType(Out, FTy->getReturnType(),
+ /*isSigned=*/Attrs && Attrs->paramHasAttr(0, ParamAttr::SExt), tstr);
+ return Out;
+ }
+ case Type::StructTyID: {
+ const StructType *STy = cast<StructType>(Ty);
+ Out << NameSoFar + " {\n";
+ unsigned Idx = 0;
+ for (StructType::element_iterator I = STy->element_begin(),
+ E = STy->element_end(); I != E; ++I) {
+ Out << " ";
+ printType(Out, *I, false, "field" + utostr(Idx++));
+ Out << ";\n";
+ }
+ Out << '}';
+ if (STy->isPacked())
+ Out << " __attribute__ ((packed))";
+ return Out;
+ }
+
+ case Type::PointerTyID: {
+ const PointerType *PTy = cast<PointerType>(Ty);
+ std::string ptrName = "*" + NameSoFar;
+
+ if (isa<ArrayType>(PTy->getElementType()) ||
+ isa<VectorType>(PTy->getElementType()))
+ ptrName = "(" + ptrName + ")";
+
+ return printType(Out, PTy->getElementType(), false, ptrName);
+ }
+
+ case Type::ArrayTyID: {
+ const ArrayType *ATy = cast<ArrayType>(Ty);
+ unsigned NumElements = ATy->getNumElements();
+ if (NumElements == 0) NumElements = 1;
+ return printType(Out, ATy->getElementType(), false,
+ NameSoFar + "[" + utostr(NumElements) + "]");
+ }
+
+ case Type::VectorTyID: {
+ const VectorType *PTy = cast<VectorType>(Ty);
+ unsigned NumElements = PTy->getNumElements();
+ if (NumElements == 0) NumElements = 1;
+ return printType(Out, PTy->getElementType(), false,
+ NameSoFar + "[" + utostr(NumElements) + "]");
+ }
+
+ case Type::OpaqueTyID: {
+ static int Count = 0;
+ std::string TyName = "struct opaque_" + itostr(Count++);
+ assert(TypeNames.find(Ty) == TypeNames.end());
+ TypeNames[Ty] = TyName;
+ return Out << TyName << ' ' << NameSoFar;
+ }
+ default:
+ assert(0 && "Unhandled case in getTypeProps!");
+ abort();
+ }
+
+ return Out;
+}
+
+void CWriter::printConstantArray(ConstantArray *CPA) {
+
+ // As a special case, print the array as a string if it is an array of
+ // ubytes or an array of sbytes with positive values.
+ //
+ const Type *ETy = CPA->getType()->getElementType();
+ bool isString = (ETy == Type::Int8Ty || ETy == Type::Int8Ty);
+
+ // Make sure the last character is a null char, as automatically added by C
+ if (isString && (CPA->getNumOperands() == 0 ||
+ !cast<Constant>(*(CPA->op_end()-1))->isNullValue()))
+ isString = false;
+
+ if (isString) {
+ Out << '\"';
+ // Keep track of whether the last number was a hexadecimal escape
+ bool LastWasHex = false;
+
+ // Do not include the last character, which we know is null
+ for (unsigned i = 0, e = CPA->getNumOperands()-1; i != e; ++i) {
+ unsigned char C = cast<ConstantInt>(CPA->getOperand(i))->getZExtValue();
+
+ // Print it out literally if it is a printable character. The only thing
+ // to be careful about is when the last letter output was a hex escape
+ // code, in which case we have to be careful not to print out hex digits
+ // explicitly (the C compiler thinks it is a continuation of the previous
+ // character, sheesh...)
+ //
+ if (isprint(C) && (!LastWasHex || !isxdigit(C))) {
+ LastWasHex = false;
+ if (C == '"' || C == '\\')
+ Out << "\\" << C;
+ else
+ Out << C;
+ } else {
+ LastWasHex = false;
+ switch (C) {
+ case '\n': Out << "\\n"; break;
+ case '\t': Out << "\\t"; break;
+ case '\r': Out << "\\r"; break;
+ case '\v': Out << "\\v"; break;
+ case '\a': Out << "\\a"; break;
+ case '\"': Out << "\\\""; break;
+ case '\'': Out << "\\\'"; break;
+ default:
+ Out << "\\x";
+ Out << (char)(( C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A'));
+ Out << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A'));
+ LastWasHex = true;
+ break;
+ }
+ }
+ }
+ Out << '\"';
+ } else {
+ Out << '{';
+ if (CPA->getNumOperands()) {
+ Out << ' ';
+ printConstant(cast<Constant>(CPA->getOperand(0)));
+ for (unsigned i = 1, e = CPA->getNumOperands(); i != e; ++i) {
+ Out << ", ";
+ printConstant(cast<Constant>(CPA->getOperand(i)));
+ }
+ }
+ Out << " }";
+ }
+}
+
+void CWriter::printConstantVector(ConstantVector *CP) {
+ Out << '{';
+ if (CP->getNumOperands()) {
+ Out << ' ';
+ printConstant(cast<Constant>(CP->getOperand(0)));
+ for (unsigned i = 1, e = CP->getNumOperands(); i != e; ++i) {
+ Out << ", ";
+ printConstant(cast<Constant>(CP->getOperand(i)));
+ }
+ }
+ Out << " }";
+}
+
+// isFPCSafeToPrint - Returns true if we may assume that CFP may be written out
+// textually as a double (rather than as a reference to a stack-allocated
+// variable). We decide this by converting CFP to a string and back into a
+// double, and then checking whether the conversion results in a bit-equal
+// double to the original value of CFP. This depends on us and the target C
+// compiler agreeing on the conversion process (which is pretty likely since we
+// only deal in IEEE FP).
+//
+static bool isFPCSafeToPrint(const ConstantFP *CFP) {
+#if HAVE_PRINTF_A && ENABLE_CBE_PRINTF_A
+ char Buffer[100];
+ sprintf(Buffer, "%a", CFP->getValue());
+
+ if (!strncmp(Buffer, "0x", 2) ||
+ !strncmp(Buffer, "-0x", 3) ||
+ !strncmp(Buffer, "+0x", 3))
+ return atof(Buffer) == CFP->getValue();
+ return false;
+#else
+ std::string StrVal = ftostr(CFP->getValue());
+
+ while (StrVal[0] == ' ')
+ StrVal.erase(StrVal.begin());
+
+ // Check to make sure that the stringized number is not some string like "Inf"
+ // or NaN. Check that the string matches the "[-+]?[0-9]" regex.
+ if ((StrVal[0] >= '0' && StrVal[0] <= '9') ||
+ ((StrVal[0] == '-' || StrVal[0] == '+') &&
+ (StrVal[1] >= '0' && StrVal[1] <= '9')))
+ // Reparse stringized version!
+ return atof(StrVal.c_str()) == CFP->getValue();
+ return false;
+#endif
+}
+
+/// Print out the casting for a cast operation. This does the double casting
+/// necessary for conversion to the destination type, if necessary.
+/// @brief Print a cast
+void CWriter::printCast(unsigned opc, const Type *SrcTy, const Type *DstTy) {
+ // Print the destination type cast
+ switch (opc) {
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::IntToPtr:
+ case Instruction::Trunc:
+ case Instruction::BitCast:
+ case Instruction::FPExt:
+ case Instruction::FPTrunc: // For these the DstTy sign doesn't matter
+ Out << '(';
+ printType(Out, DstTy);
+ Out << ')';
+ break;
+ case Instruction::ZExt:
+ case Instruction::PtrToInt:
+ case Instruction::FPToUI: // For these, make sure we get an unsigned dest
+ Out << '(';
+ printSimpleType(Out, DstTy, false);
+ Out << ')';
+ break;
+ case Instruction::SExt:
+ case Instruction::FPToSI: // For these, make sure we get a signed dest
+ Out << '(';
+ printSimpleType(Out, DstTy, true);
+ Out << ')';
+ break;
+ default:
+ assert(0 && "Invalid cast opcode");
+ }
+
+ // Print the source type cast
+ switch (opc) {
+ case Instruction::UIToFP:
+ case Instruction::ZExt:
+ Out << '(';
+ printSimpleType(Out, SrcTy, false);
+ Out << ')';
+ break;
+ case Instruction::SIToFP:
+ case Instruction::SExt:
+ Out << '(';
+ printSimpleType(Out, SrcTy, true);
+ Out << ')';
+ break;
+ case Instruction::IntToPtr:
+ case Instruction::PtrToInt:
+ // Avoid "cast to pointer from integer of different size" warnings
+ Out << "(unsigned long)";
+ break;
+ case Instruction::Trunc:
+ case Instruction::BitCast:
+ case Instruction::FPExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPToSI:
+ case Instruction::FPToUI:
+ break; // These don't need a source cast.
+ default:
+ assert(0 && "Invalid cast opcode");
+ break;
+ }
+}
+
+// printConstant - The LLVM Constant to C Constant converter.
+void CWriter::printConstant(Constant *CPV) {
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CPV)) {
+ switch (CE->getOpcode()) {
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ Out << "(";
+ printCast(CE->getOpcode(), CE->getOperand(0)->getType(), CE->getType());
+ if (CE->getOpcode() == Instruction::SExt &&
+ CE->getOperand(0)->getType() == Type::Int1Ty) {
+ // Make sure we really sext from bool here by subtracting from 0
+ Out << "0-";
+ }
+ printConstant(CE->getOperand(0));
+ if (CE->getType() == Type::Int1Ty &&
+ (CE->getOpcode() == Instruction::Trunc ||
+ CE->getOpcode() == Instruction::FPToUI ||
+ CE->getOpcode() == Instruction::FPToSI ||
+ CE->getOpcode() == Instruction::PtrToInt)) {
+ // Make sure we really truncate to bool here by anding with 1
+ Out << "&1u";
+ }
+ Out << ')';
+ return;
+
+ case Instruction::GetElementPtr:
+ Out << "(&(";
+ printIndexingExpression(CE->getOperand(0), gep_type_begin(CPV),
+ gep_type_end(CPV));
+ Out << "))";
+ return;
+ case Instruction::Select:
+ Out << '(';
+ printConstant(CE->getOperand(0));
+ Out << '?';
+ printConstant(CE->getOperand(1));
+ Out << ':';
+ printConstant(CE->getOperand(2));
+ Out << ')';
+ return;
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::ICmp:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ {
+ Out << '(';
+ bool NeedsClosingParens = printConstExprCast(CE);
+ printConstantWithCast(CE->getOperand(0), CE->getOpcode());
+ switch (CE->getOpcode()) {
+ case Instruction::Add: Out << " + "; break;
+ case Instruction::Sub: Out << " - "; break;
+ case Instruction::Mul: Out << " * "; break;
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem: Out << " % "; break;
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv: Out << " / "; break;
+ case Instruction::And: Out << " & "; break;
+ case Instruction::Or: Out << " | "; break;
+ case Instruction::Xor: Out << " ^ "; break;
+ case Instruction::Shl: Out << " << "; break;
+ case Instruction::LShr:
+ case Instruction::AShr: Out << " >> "; break;
+ case Instruction::ICmp:
+ switch (CE->getPredicate()) {
+ case ICmpInst::ICMP_EQ: Out << " == "; break;
+ case ICmpInst::ICMP_NE: Out << " != "; break;
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_ULT: Out << " < "; break;
+ case ICmpInst::ICMP_SLE:
+ case ICmpInst::ICMP_ULE: Out << " <= "; break;
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_UGT: Out << " > "; break;
+ case ICmpInst::ICMP_SGE:
+ case ICmpInst::ICMP_UGE: Out << " >= "; break;
+ default: assert(0 && "Illegal ICmp predicate");
+ }
+ break;
+ default: assert(0 && "Illegal opcode here!");
+ }
+ printConstantWithCast(CE->getOperand(1), CE->getOpcode());
+ if (NeedsClosingParens)
+ Out << "))";
+ Out << ')';
+ return;
+ }
+ case Instruction::FCmp: {
+ Out << '(';
+ bool NeedsClosingParens = printConstExprCast(CE);
+ if (CE->getPredicate() == FCmpInst::FCMP_FALSE)
+ Out << "0";
+ else if (CE->getPredicate() == FCmpInst::FCMP_TRUE)
+ Out << "1";
+ else {
+ const char* op = 0;
+ switch (CE->getPredicate()) {
+ default: assert(0 && "Illegal FCmp predicate");
+ case FCmpInst::FCMP_ORD: op = "ord"; break;
+ case FCmpInst::FCMP_UNO: op = "uno"; break;
+ case FCmpInst::FCMP_UEQ: op = "ueq"; break;
+ case FCmpInst::FCMP_UNE: op = "une"; break;
+ case FCmpInst::FCMP_ULT: op = "ult"; break;
+ case FCmpInst::FCMP_ULE: op = "ule"; break;
+ case FCmpInst::FCMP_UGT: op = "ugt"; break;
+ case FCmpInst::FCMP_UGE: op = "uge"; break;
+ case FCmpInst::FCMP_OEQ: op = "oeq"; break;
+ case FCmpInst::FCMP_ONE: op = "one"; break;
+ case FCmpInst::FCMP_OLT: op = "olt"; break;
+ case FCmpInst::FCMP_OLE: op = "ole"; break;
+ case FCmpInst::FCMP_OGT: op = "ogt"; break;
+ case FCmpInst::FCMP_OGE: op = "oge"; break;
+ }
+ Out << "llvm_fcmp_" << op << "(";
+ printConstantWithCast(CE->getOperand(0), CE->getOpcode());
+ Out << ", ";
+ printConstantWithCast(CE->getOperand(1), CE->getOpcode());
+ Out << ")";
+ }
+ if (NeedsClosingParens)
+ Out << "))";
+ Out << ')';
+ }
+ default:
+ cerr << "CWriter Error: Unhandled constant expression: "
+ << *CE << "\n";
+ abort();
+ }
+ } else if (isa<UndefValue>(CPV) && CPV->getType()->isFirstClassType()) {
+ Out << "((";
+ printType(Out, CPV->getType()); // sign doesn't matter
+ Out << ")/*UNDEF*/0)";
+ return;
+ }
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
+ const Type* Ty = CI->getType();
+ if (Ty == Type::Int1Ty)
+ Out << (CI->getZExtValue() ? '1' : '0') ;
+ else {
+ Out << "((";
+ printSimpleType(Out, Ty, false) << ')';
+ if (CI->isMinValue(true))
+ Out << CI->getZExtValue() << 'u';
+ else
+ Out << CI->getSExtValue();
+ if (Ty->getPrimitiveSizeInBits() > 32)
+ Out << "ll";
+ Out << ')';
+ }
+ return;
+ }
+
+ switch (CPV->getType()->getTypeID()) {
+ case Type::FloatTyID:
+ case Type::DoubleTyID: {
+ ConstantFP *FPC = cast<ConstantFP>(CPV);
+ std::map<const ConstantFP*, unsigned>::iterator I = FPConstantMap.find(FPC);
+ if (I != FPConstantMap.end()) {
+ // Because of FP precision problems we must load from a stack allocated
+ // value that holds the value in hex.
+ Out << "(*(" << (FPC->getType() == Type::FloatTy ? "float" : "double")
+ << "*)&FPConstant" << I->second << ')';
+ } else {
+ if (IsNAN(FPC->getValue())) {
+ // The value is NaN
+
+ // The prefix for a quiet NaN is 0x7FF8. For a signalling NaN,
+ // it's 0x7ff4.
+ const unsigned long QuietNaN = 0x7ff8UL;
+ //const unsigned long SignalNaN = 0x7ff4UL;
+
+ // We need to grab the first part of the FP #
+ char Buffer[100];
+
+ uint64_t ll = DoubleToBits(FPC->getValue());
+ sprintf(Buffer, "0x%llx", static_cast<long long>(ll));
+
+ std::string Num(&Buffer[0], &Buffer[6]);
+ unsigned long Val = strtoul(Num.c_str(), 0, 16);
+
+ if (FPC->getType() == Type::FloatTy)
+ Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "F(\""
+ << Buffer << "\") /*nan*/ ";
+ else
+ Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "(\""
+ << Buffer << "\") /*nan*/ ";
+ } else if (IsInf(FPC->getValue())) {
+ // The value is Inf
+ if (FPC->getValue() < 0) Out << '-';
+ Out << "LLVM_INF" << (FPC->getType() == Type::FloatTy ? "F" : "")
+ << " /*inf*/ ";
+ } else {
+ std::string Num;
+#if HAVE_PRINTF_A && ENABLE_CBE_PRINTF_A
+ // Print out the constant as a floating point number.
+ char Buffer[100];
+ sprintf(Buffer, "%a", FPC->getValue());
+ Num = Buffer;
+#else
+ Num = ftostr(FPC->getValue());
+#endif
+ Out << Num;
+ }
+ }
+ break;
+ }
+
+ case Type::ArrayTyID:
+ if (isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)) {
+ const ArrayType *AT = cast<ArrayType>(CPV->getType());
+ Out << '{';
+ if (AT->getNumElements()) {
+ Out << ' ';
+ Constant *CZ = Constant::getNullValue(AT->getElementType());
+ printConstant(CZ);
+ for (unsigned i = 1, e = AT->getNumElements(); i != e; ++i) {
+ Out << ", ";
+ printConstant(CZ);
+ }
+ }
+ Out << " }";
+ } else {
+ printConstantArray(cast<ConstantArray>(CPV));
+ }
+ break;
+
+ case Type::VectorTyID:
+ if (isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)) {
+ const VectorType *AT = cast<VectorType>(CPV->getType());
+ Out << '{';
+ if (AT->getNumElements()) {
+ Out << ' ';
+ Constant *CZ = Constant::getNullValue(AT->getElementType());
+ printConstant(CZ);
+ for (unsigned i = 1, e = AT->getNumElements(); i != e; ++i) {
+ Out << ", ";
+ printConstant(CZ);
+ }
+ }
+ Out << " }";
+ } else {
+ printConstantVector(cast<ConstantVector>(CPV));
+ }
+ break;
+
+ case Type::StructTyID:
+ if (isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)) {
+ const StructType *ST = cast<StructType>(CPV->getType());
+ Out << '{';
+ if (ST->getNumElements()) {
+ Out << ' ';
+ printConstant(Constant::getNullValue(ST->getElementType(0)));
+ for (unsigned i = 1, e = ST->getNumElements(); i != e; ++i) {
+ Out << ", ";
+ printConstant(Constant::getNullValue(ST->getElementType(i)));
+ }
+ }
+ Out << " }";
+ } else {
+ Out << '{';
+ if (CPV->getNumOperands()) {
+ Out << ' ';
+ printConstant(cast<Constant>(CPV->getOperand(0)));
+ for (unsigned i = 1, e = CPV->getNumOperands(); i != e; ++i) {
+ Out << ", ";
+ printConstant(cast<Constant>(CPV->getOperand(i)));
+ }
+ }
+ Out << " }";
+ }
+ break;
+
+ case Type::PointerTyID:
+ if (isa<ConstantPointerNull>(CPV)) {
+ Out << "((";
+ printType(Out, CPV->getType()); // sign doesn't matter
+ Out << ")/*NULL*/0)";
+ break;
+ } else if (GlobalValue *GV = dyn_cast<GlobalValue>(CPV)) {
+ writeOperand(GV);
+ break;
+ }
+ // FALL THROUGH
+ default:
+ cerr << "Unknown constant type: " << *CPV << "\n";
+ abort();
+ }
+}
+
+// Some constant expressions need to be casted back to the original types
+// because their operands were casted to the expected type. This function takes
+// care of detecting that case and printing the cast for the ConstantExpr.
+bool CWriter::printConstExprCast(const ConstantExpr* CE) {
+ bool NeedsExplicitCast = false;
+ const Type *Ty = CE->getOperand(0)->getType();
+ bool TypeIsSigned = false;
+ switch (CE->getOpcode()) {
+ case Instruction::LShr:
+ case Instruction::URem:
+ case Instruction::UDiv: NeedsExplicitCast = true; break;
+ case Instruction::AShr:
+ case Instruction::SRem:
+ case Instruction::SDiv: NeedsExplicitCast = true; TypeIsSigned = true; break;
+ case Instruction::SExt:
+ Ty = CE->getType();
+ NeedsExplicitCast = true;
+ TypeIsSigned = true;
+ break;
+ case Instruction::ZExt:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ Ty = CE->getType();
+ NeedsExplicitCast = true;
+ break;
+ default: break;
+ }
+ if (NeedsExplicitCast) {
+ Out << "((";
+ if (Ty->isInteger() && Ty != Type::Int1Ty)
+ printSimpleType(Out, Ty, TypeIsSigned);
+ else
+ printType(Out, Ty); // not integer, sign doesn't matter
+ Out << ")(";
+ }
+ return NeedsExplicitCast;
+}
+
+// Print a constant assuming that it is the operand for a given Opcode. The
+// opcodes that care about sign need to cast their operands to the expected
+// type before the operation proceeds. This function does the casting.
+void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) {
+
+ // Extract the operand's type, we'll need it.
+ const Type* OpTy = CPV->getType();
+
+ // Indicate whether to do the cast or not.
+ bool shouldCast = false;
+ bool typeIsSigned = false;
+
+ // Based on the Opcode for which this Constant is being written, determine
+ // the new type to which the operand should be casted by setting the value
+ // of OpTy. If we change OpTy, also set shouldCast to true so it gets
+ // casted below.
+ switch (Opcode) {
+ default:
+ // for most instructions, it doesn't matter
+ break;
+ case Instruction::LShr:
+ case Instruction::UDiv:
+ case Instruction::URem:
+ shouldCast = true;
+ break;
+ case Instruction::AShr:
+ case Instruction::SDiv:
+ case Instruction::SRem:
+ shouldCast = true;
+ typeIsSigned = true;
+ break;
+ }
+
+ // Write out the casted constant if we should, otherwise just write the
+ // operand.
+ if (shouldCast) {
+ Out << "((";
+ printSimpleType(Out, OpTy, typeIsSigned);
+ Out << ")";
+ printConstant(CPV);
+ Out << ")";
+ } else
+ printConstant(CPV);
+}
+
+std::string CWriter::GetValueName(const Value *Operand) {
+ std::string Name;
+
+ if (!isa<GlobalValue>(Operand) && Operand->getName() != "") {
+ std::string VarName;
+
+ Name = Operand->getName();
+ VarName.reserve(Name.capacity());
+
+ for (std::string::iterator I = Name.begin(), E = Name.end();
+ I != E; ++I) {
+ char ch = *I;
+
+ if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
+ (ch >= '0' && ch <= '9') || ch == '_'))
+ VarName += '_';
+ else
+ VarName += ch;
+ }
+
+ Name = "llvm_cbe_" + VarName;
+ } else {
+ Name = Mang->getValueName(Operand);
+ }
+
+ return Name;
+}
+
+void CWriter::writeOperandInternal(Value *Operand) {
+ if (Instruction *I = dyn_cast<Instruction>(Operand))
+ if (isInlinableInst(*I) && !isDirectAlloca(I)) {
+ // Should we inline this instruction to build a tree?
+ Out << '(';
+ visit(*I);
+ Out << ')';
+ return;
+ }
+
+ Constant* CPV = dyn_cast<Constant>(Operand);
+
+ if (CPV && !isa<GlobalValue>(CPV))
+ printConstant(CPV);
+ else
+ Out << GetValueName(Operand);
+}
+
+void CWriter::writeOperandRaw(Value *Operand) {
+ Constant* CPV = dyn_cast<Constant>(Operand);
+ if (CPV && !isa<GlobalValue>(CPV)) {
+ printConstant(CPV);
+ } else {
+ Out << GetValueName(Operand);
+ }
+}
+
+void CWriter::writeOperand(Value *Operand) {
+ if (isa<GlobalVariable>(Operand) || isDirectAlloca(Operand))
+ Out << "(&"; // Global variables are referenced as their addresses by llvm
+
+ writeOperandInternal(Operand);
+
+ if (isa<GlobalVariable>(Operand) || isDirectAlloca(Operand))
+ Out << ')';
+}
+
+// Some instructions need to have their result value casted back to the
+// original types because their operands were casted to the expected type.
+// This function takes care of detecting that case and printing the cast
+// for the Instruction.
+bool CWriter::writeInstructionCast(const Instruction &I) {
+ const Type *Ty = I.getOperand(0)->getType();
+ switch (I.getOpcode()) {
+ case Instruction::LShr:
+ case Instruction::URem:
+ case Instruction::UDiv:
+ Out << "((";
+ printSimpleType(Out, Ty, false);
+ Out << ")(";
+ return true;
+ case Instruction::AShr:
+ case Instruction::SRem:
+ case Instruction::SDiv:
+ Out << "((";
+ printSimpleType(Out, Ty, true);
+ Out << ")(";
+ return true;
+ default: break;
+ }
+ return false;
+}
+
+// Write the operand with a cast to another type based on the Opcode being used.
+// This will be used in cases where an instruction has specific type
+// requirements (usually signedness) for its operands.
+void CWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) {
+
+ // Extract the operand's type, we'll need it.
+ const Type* OpTy = Operand->getType();
+
+ // Indicate whether to do the cast or not.
+ bool shouldCast = false;
+
+ // Indicate whether the cast should be to a signed type or not.
+ bool castIsSigned = false;
+
+ // Based on the Opcode for which this Operand is being written, determine
+ // the new type to which the operand should be casted by setting the value
+ // of OpTy. If we change OpTy, also set shouldCast to true.
+ switch (Opcode) {
+ default:
+ // for most instructions, it doesn't matter
+ break;
+ case Instruction::LShr:
+ case Instruction::UDiv:
+ case Instruction::URem: // Cast to unsigned first
+ shouldCast = true;
+ castIsSigned = false;
+ break;
+ case Instruction::AShr:
+ case Instruction::SDiv:
+ case Instruction::SRem: // Cast to signed first
+ shouldCast = true;
+ castIsSigned = true;
+ break;
+ }
+
+ // Write out the casted operand if we should, otherwise just write the
+ // operand.
+ if (shouldCast) {
+ Out << "((";
+ printSimpleType(Out, OpTy, castIsSigned);
+ Out << ")";
+ writeOperand(Operand);
+ Out << ")";
+ } else
+ writeOperand(Operand);
+}
+
+// Write the operand with a cast to another type based on the icmp predicate
+// being used.
+void CWriter::writeOperandWithCast(Value* Operand, ICmpInst::Predicate predicate) {
+
+ // Extract the operand's type, we'll need it.
+ const Type* OpTy = Operand->getType();
+
+ // Indicate whether to do the cast or not.
+ bool shouldCast = false;
+
+ // Indicate whether the cast should be to a signed type or not.
+ bool castIsSigned = false;
+
+ // Based on the Opcode for which this Operand is being written, determine
+ // the new type to which the operand should be casted by setting the value
+ // of OpTy. If we change OpTy, also set shouldCast to true.
+ switch (predicate) {
+ default:
+ // for eq and ne, it doesn't matter
+ break;
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE:
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE:
+ shouldCast = true;
+ break;
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE:
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE:
+ shouldCast = true;
+ castIsSigned = true;
+ break;
+ }
+
+ // Write out the casted operand if we should, otherwise just write the
+ // operand.
+ if (shouldCast) {
+ Out << "((";
+ if (OpTy->isInteger() && OpTy != Type::Int1Ty)
+ printSimpleType(Out, OpTy, castIsSigned);
+ else
+ printType(Out, OpTy); // not integer, sign doesn't matter
+ Out << ")";
+ writeOperand(Operand);
+ Out << ")";
+ } else
+ writeOperand(Operand);
+}
+
+// generateCompilerSpecificCode - This is where we add conditional compilation
+// directives to cater to specific compilers as need be.
+//
+static void generateCompilerSpecificCode(std::ostream& Out) {
+ // Alloca is hard to get, and we don't want to include stdlib.h here.
+ Out << "/* get a declaration for alloca */\n"
+ << "#if defined(__CYGWIN__) || defined(__MINGW32__)\n"
+ << "#define alloca(x) __builtin_alloca((x))\n"
+ << "#define _alloca(x) __builtin_alloca((x))\n"
+ << "#elif defined(__APPLE__)\n"
+ << "extern void *__builtin_alloca(unsigned long);\n"
+ << "#define alloca(x) __builtin_alloca(x)\n"
+ << "#define longjmp _longjmp\n"
+ << "#define setjmp _setjmp\n"
+ << "#elif defined(__sun__)\n"
+ << "#if defined(__sparcv9)\n"
+ << "extern void *__builtin_alloca(unsigned long);\n"
+ << "#else\n"
+ << "extern void *__builtin_alloca(unsigned int);\n"
+ << "#endif\n"
+ << "#define alloca(x) __builtin_alloca(x)\n"
+ << "#elif defined(__FreeBSD__) || defined(__OpenBSD__)\n"
+ << "#define alloca(x) __builtin_alloca(x)\n"
+ << "#elif defined(_MSC_VER)\n"
+ << "#define inline _inline\n"
+ << "#define alloca(x) _alloca(x)\n"
+ << "#else\n"
+ << "#include <alloca.h>\n"
+ << "#endif\n\n";
+
+ // We output GCC specific attributes to preserve 'linkonce'ness on globals.
+ // If we aren't being compiled with GCC, just drop these attributes.
+ Out << "#ifndef __GNUC__ /* Can only support \"linkonce\" vars with GCC */\n"
+ << "#define __attribute__(X)\n"
+ << "#endif\n\n";
+
+ // On Mac OS X, "external weak" is spelled "__attribute__((weak_import))".
+ Out << "#if defined(__GNUC__) && defined(__APPLE_CC__)\n"
+ << "#define __EXTERNAL_WEAK__ __attribute__((weak_import))\n"
+ << "#elif defined(__GNUC__)\n"
+ << "#define __EXTERNAL_WEAK__ __attribute__((weak))\n"
+ << "#else\n"
+ << "#define __EXTERNAL_WEAK__\n"
+ << "#endif\n\n";
+
+ // For now, turn off the weak linkage attribute on Mac OS X. (See above.)
+ Out << "#if defined(__GNUC__) && defined(__APPLE_CC__)\n"
+ << "#define __ATTRIBUTE_WEAK__\n"
+ << "#elif defined(__GNUC__)\n"
+ << "#define __ATTRIBUTE_WEAK__ __attribute__((weak))\n"
+ << "#else\n"
+ << "#define __ATTRIBUTE_WEAK__\n"
+ << "#endif\n\n";
+
+ // Add hidden visibility support. FIXME: APPLE_CC?
+ Out << "#if defined(__GNUC__)\n"
+ << "#define __HIDDEN__ __attribute__((visibility(\"hidden\")))\n"
+ << "#endif\n\n";
+
+ // Define NaN and Inf as GCC builtins if using GCC, as 0 otherwise
+ // From the GCC documentation:
+ //
+ // double __builtin_nan (const char *str)
+ //
+ // This is an implementation of the ISO C99 function nan.
+ //
+ // Since ISO C99 defines this function in terms of strtod, which we do
+ // not implement, a description of the parsing is in order. The string is
+ // parsed as by strtol; that is, the base is recognized by leading 0 or
+ // 0x prefixes. The number parsed is placed in the significand such that
+ // the least significant bit of the number is at the least significant
+ // bit of the significand. The number is truncated to fit the significand
+ // field provided. The significand is forced to be a quiet NaN.
+ //
+ // This function, if given a string literal, is evaluated early enough
+ // that it is considered a compile-time constant.
+ //
+ // float __builtin_nanf (const char *str)
+ //
+ // Similar to __builtin_nan, except the return type is float.
+ //
+ // double __builtin_inf (void)
+ //
+ // Similar to __builtin_huge_val, except a warning is generated if the
+ // target floating-point format does not support infinities. This
+ // function is suitable for implementing the ISO C99 macro INFINITY.
+ //
+ // float __builtin_inff (void)
+ //
+ // Similar to __builtin_inf, except the return type is float.
+ Out << "#ifdef __GNUC__\n"
+ << "#define LLVM_NAN(NanStr) __builtin_nan(NanStr) /* Double */\n"
+ << "#define LLVM_NANF(NanStr) __builtin_nanf(NanStr) /* Float */\n"
+ << "#define LLVM_NANS(NanStr) __builtin_nans(NanStr) /* Double */\n"
+ << "#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */\n"
+ << "#define LLVM_INF __builtin_inf() /* Double */\n"
+ << "#define LLVM_INFF __builtin_inff() /* Float */\n"
+ << "#define LLVM_PREFETCH(addr,rw,locality) "
+ "__builtin_prefetch(addr,rw,locality)\n"
+ << "#define __ATTRIBUTE_CTOR__ __attribute__((constructor))\n"
+ << "#define __ATTRIBUTE_DTOR__ __attribute__((destructor))\n"
+ << "#define LLVM_ASM __asm__\n"
+ << "#else\n"
+ << "#define LLVM_NAN(NanStr) ((double)0.0) /* Double */\n"
+ << "#define LLVM_NANF(NanStr) 0.0F /* Float */\n"
+ << "#define LLVM_NANS(NanStr) ((double)0.0) /* Double */\n"
+ << "#define LLVM_NANSF(NanStr) 0.0F /* Float */\n"
+ << "#define LLVM_INF ((double)0.0) /* Double */\n"
+ << "#define LLVM_INFF 0.0F /* Float */\n"
+ << "#define LLVM_PREFETCH(addr,rw,locality) /* PREFETCH */\n"
+ << "#define __ATTRIBUTE_CTOR__\n"
+ << "#define __ATTRIBUTE_DTOR__\n"
+ << "#define LLVM_ASM(X)\n"
+ << "#endif\n\n";
+
+ Out << "#if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */ \n"
+ << "#define __builtin_stack_save() 0 /* not implemented */\n"
+ << "#define __builtin_stack_restore(X) /* noop */\n"
+ << "#endif\n\n";
+
+ // Output target-specific code that should be inserted into main.
+ Out << "#define CODE_FOR_MAIN() /* Any target-specific code for main()*/\n";
+ // On X86, set the FP control word to 64-bits of precision instead of 80 bits.
+ Out << "#if defined(__GNUC__) && !defined(__llvm__)\n"
+ << "#if defined(i386) || defined(__i386__) || defined(__i386) || "
+ << "defined(__x86_64__)\n"
+ << "#undef CODE_FOR_MAIN\n"
+ << "#define CODE_FOR_MAIN() \\\n"
+ << " {short F;__asm__ (\"fnstcw %0\" : \"=m\" (*&F)); \\\n"
+ << " F=(F&~0x300)|0x200;__asm__(\"fldcw %0\"::\"m\"(*&F));}\n"
+ << "#endif\n#endif\n";
+
+}
+
+/// FindStaticTors - Given a static ctor/dtor list, unpack its contents into
+/// the StaticTors set.
+static void FindStaticTors(GlobalVariable *GV, std::set<Function*> &StaticTors){
+ ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (!InitList) return;
+
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){
+ if (CS->getNumOperands() != 2) return; // Not array of 2-element structs.
+
+ if (CS->getOperand(1)->isNullValue())
+ return; // Found a null terminator, exit printing.
+ Constant *FP = CS->getOperand(1);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(FP))
+ if (CE->isCast())
+ FP = CE->getOperand(0);
+ if (Function *F = dyn_cast<Function>(FP))
+ StaticTors.insert(F);
+ }
+}
+
+enum SpecialGlobalClass {
+ NotSpecial = 0,
+ GlobalCtors, GlobalDtors,
+ NotPrinted
+};
+
+/// getGlobalVariableClass - If this is a global that is specially recognized
+/// by LLVM, return a code that indicates how we should handle it.
+static SpecialGlobalClass getGlobalVariableClass(const GlobalVariable *GV) {
+ // If this is a global ctors/dtors list, handle it now.
+ if (GV->hasAppendingLinkage() && GV->use_empty()) {
+ if (GV->getName() == "llvm.global_ctors")
+ return GlobalCtors;
+ else if (GV->getName() == "llvm.global_dtors")
+ return GlobalDtors;
+ }
+
+ // Otherwise, it it is other metadata, don't print it. This catches things
+ // like debug information.
+ if (GV->getSection() == "llvm.metadata")
+ return NotPrinted;
+
+ return NotSpecial;
+}
+
+
+bool CWriter::doInitialization(Module &M) {
+ // Initialize
+ TheModule = &M;
+
+ TD = new TargetData(&M);
+ IL = new IntrinsicLowering(*TD);
+ IL->AddPrototypes(M);
+
+ // Ensure that all structure types have names...
+ Mang = new Mangler(M);
+ Mang->markCharUnacceptable('.');
+
+ // Keep track of which functions are static ctors/dtors so they can have
+ // an attribute added to their prototypes.
+ std::set<Function*> StaticCtors, StaticDtors;
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ switch (getGlobalVariableClass(I)) {
+ default: break;
+ case GlobalCtors:
+ FindStaticTors(I, StaticCtors);
+ break;
+ case GlobalDtors:
+ FindStaticTors(I, StaticDtors);
+ break;
+ }
+ }
+
+ // get declaration for alloca
+ Out << "/* Provide Declarations */\n";
+ Out << "#include <stdarg.h>\n"; // Varargs support
+ Out << "#include <setjmp.h>\n"; // Unwind support
+ generateCompilerSpecificCode(Out);
+
+ // Provide a definition for `bool' if not compiling with a C++ compiler.
+ Out << "\n"
+ << "#ifndef __cplusplus\ntypedef unsigned char bool;\n#endif\n"
+
+ << "\n\n/* Support for floating point constants */\n"
+ << "typedef unsigned long long ConstantDoubleTy;\n"
+ << "typedef unsigned int ConstantFloatTy;\n"
+
+ << "\n\n/* Global Declarations */\n";
+
+ // First output all the declarations for the program, because C requires
+ // Functions & globals to be declared before they are used.
+ //
+
+ // Loop over the symbol table, emitting all named constants...
+ printModuleTypes(M.getTypeSymbolTable());
+
+ // Global variable declarations...
+ if (!M.global_empty()) {
+ Out << "\n/* External Global Variable Declarations */\n";
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+
+ if (I->hasExternalLinkage() || I->hasExternalWeakLinkage())
+ Out << "extern ";
+ else if (I->hasDLLImportLinkage())
+ Out << "__declspec(dllimport) ";
+ else
+ continue; // Internal Global
+
+ // Thread Local Storage
+ if (I->isThreadLocal())
+ Out << "__thread ";
+
+ printType(Out, I->getType()->getElementType(), false, GetValueName(I));
+
+ if (I->hasExternalWeakLinkage())
+ Out << " __EXTERNAL_WEAK__";
+ Out << ";\n";
+ }
+ }
+
+ // Function declarations
+ Out << "\n/* Function Declarations */\n";
+ Out << "double fmod(double, double);\n"; // Support for FP rem
+ Out << "float fmodf(float, float);\n";
+
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ // Don't print declarations for intrinsic functions.
+ if (!I->getIntrinsicID() && I->getName() != "setjmp" &&
+ I->getName() != "longjmp" && I->getName() != "_setjmp") {
+ if (I->hasExternalWeakLinkage())
+ Out << "extern ";
+ printFunctionSignature(I, true);
+ if (I->hasWeakLinkage() || I->hasLinkOnceLinkage())
+ Out << " __ATTRIBUTE_WEAK__";
+ if (I->hasExternalWeakLinkage())
+ Out << " __EXTERNAL_WEAK__";
+ if (StaticCtors.count(I))
+ Out << " __ATTRIBUTE_CTOR__";
+ if (StaticDtors.count(I))
+ Out << " __ATTRIBUTE_DTOR__";
+ if (I->hasHiddenVisibility())
+ Out << " __HIDDEN__";
+
+ if (I->hasName() && I->getName()[0] == 1)
+ Out << " LLVM_ASM(\"" << I->getName().c_str()+1 << "\")";
+
+ Out << ";\n";
+ }
+ }
+
+ // Output the global variable declarations
+ if (!M.global_empty()) {
+ Out << "\n\n/* Global Variable Declarations */\n";
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ if (!I->isDeclaration()) {
+ // Ignore special globals, such as debug info.
+ if (getGlobalVariableClass(I))
+ continue;
+
+ if (I->hasInternalLinkage())
+ Out << "static ";
+ else
+ Out << "extern ";
+
+ // Thread Local Storage
+ if (I->isThreadLocal())
+ Out << "__thread ";
+
+ printType(Out, I->getType()->getElementType(), false,
+ GetValueName(I));
+
+ if (I->hasLinkOnceLinkage())
+ Out << " __attribute__((common))";
+ else if (I->hasWeakLinkage())
+ Out << " __ATTRIBUTE_WEAK__";
+ else if (I->hasExternalWeakLinkage())
+ Out << " __EXTERNAL_WEAK__";
+ if (I->hasHiddenVisibility())
+ Out << " __HIDDEN__";
+ Out << ";\n";
+ }
+ }
+
+ // Output the global variable definitions and contents...
+ if (!M.global_empty()) {
+ Out << "\n\n/* Global Variable Definitions and Initialization */\n";
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ if (!I->isDeclaration()) {
+ // Ignore special globals, such as debug info.
+ if (getGlobalVariableClass(I))
+ continue;
+
+ if (I->hasInternalLinkage())
+ Out << "static ";
+ else if (I->hasDLLImportLinkage())
+ Out << "__declspec(dllimport) ";
+ else if (I->hasDLLExportLinkage())
+ Out << "__declspec(dllexport) ";
+
+ // Thread Local Storage
+ if (I->isThreadLocal())
+ Out << "__thread ";
+
+ printType(Out, I->getType()->getElementType(), false,
+ GetValueName(I));
+ if (I->hasLinkOnceLinkage())
+ Out << " __attribute__((common))";
+ else if (I->hasWeakLinkage())
+ Out << " __ATTRIBUTE_WEAK__";
+
+ if (I->hasHiddenVisibility())
+ Out << " __HIDDEN__";
+
+ // If the initializer is not null, emit the initializer. If it is null,
+ // we try to avoid emitting large amounts of zeros. The problem with
+ // this, however, occurs when the variable has weak linkage. In this
+ // case, the assembler will complain about the variable being both weak
+ // and common, so we disable this optimization.
+ if (!I->getInitializer()->isNullValue()) {
+ Out << " = " ;
+ writeOperand(I->getInitializer());
+ } else if (I->hasWeakLinkage()) {
+ // We have to specify an initializer, but it doesn't have to be
+ // complete. If the value is an aggregate, print out { 0 }, and let
+ // the compiler figure out the rest of the zeros.
+ Out << " = " ;
+ if (isa<StructType>(I->getInitializer()->getType()) ||
+ isa<ArrayType>(I->getInitializer()->getType()) ||
+ isa<VectorType>(I->getInitializer()->getType())) {
+ Out << "{ 0 }";
+ } else {
+ // Just print it out normally.
+ writeOperand(I->getInitializer());
+ }
+ }
+ Out << ";\n";
+ }
+ }
+
+ if (!M.empty())
+ Out << "\n\n/* Function Bodies */\n";
+
+ // Emit some helper functions for dealing with FCMP instruction's
+ // predicates
+ Out << "static inline int llvm_fcmp_ord(double X, double Y) { ";
+ Out << "return X == X && Y == Y; }\n";
+ Out << "static inline int llvm_fcmp_uno(double X, double Y) { ";
+ Out << "return X != X || Y != Y; }\n";
+ Out << "static inline int llvm_fcmp_ueq(double X, double Y) { ";
+ Out << "return X == Y || llvm_fcmp_uno(X, Y); }\n";
+ Out << "static inline int llvm_fcmp_une(double X, double Y) { ";
+ Out << "return X != Y; }\n";
+ Out << "static inline int llvm_fcmp_ult(double X, double Y) { ";
+ Out << "return X < Y || llvm_fcmp_uno(X, Y); }\n";
+ Out << "static inline int llvm_fcmp_ugt(double X, double Y) { ";
+ Out << "return X > Y || llvm_fcmp_uno(X, Y); }\n";
+ Out << "static inline int llvm_fcmp_ule(double X, double Y) { ";
+ Out << "return X <= Y || llvm_fcmp_uno(X, Y); }\n";
+ Out << "static inline int llvm_fcmp_uge(double X, double Y) { ";
+ Out << "return X >= Y || llvm_fcmp_uno(X, Y); }\n";
+ Out << "static inline int llvm_fcmp_oeq(double X, double Y) { ";
+ Out << "return X == Y ; }\n";
+ Out << "static inline int llvm_fcmp_one(double X, double Y) { ";
+ Out << "return X != Y && llvm_fcmp_ord(X, Y); }\n";
+ Out << "static inline int llvm_fcmp_olt(double X, double Y) { ";
+ Out << "return X < Y ; }\n";
+ Out << "static inline int llvm_fcmp_ogt(double X, double Y) { ";
+ Out << "return X > Y ; }\n";
+ Out << "static inline int llvm_fcmp_ole(double X, double Y) { ";
+ Out << "return X <= Y ; }\n";
+ Out << "static inline int llvm_fcmp_oge(double X, double Y) { ";
+ Out << "return X >= Y ; }\n";
+ return false;
+}
+
+
+/// Output all floating point constants that cannot be printed accurately...
+void CWriter::printFloatingPointConstants(Function &F) {
+ // Scan the module for floating point constants. If any FP constant is used
+ // in the function, we want to redirect it here so that we do not depend on
+ // the precision of the printed form, unless the printed form preserves
+ // precision.
+ //
+ static unsigned FPCounter = 0;
+ for (constant_iterator I = constant_begin(&F), E = constant_end(&F);
+ I != E; ++I)
+ if (const ConstantFP *FPC = dyn_cast<ConstantFP>(*I))
+ if (!isFPCSafeToPrint(FPC) && // Do not put in FPConstantMap if safe.
+ !FPConstantMap.count(FPC)) {
+ double Val = FPC->getValue();
+
+ FPConstantMap[FPC] = FPCounter; // Number the FP constants
+
+ if (FPC->getType() == Type::DoubleTy) {
+ Out << "static const ConstantDoubleTy FPConstant" << FPCounter++
+ << " = 0x" << std::hex << DoubleToBits(Val) << std::dec
+ << "ULL; /* " << Val << " */\n";
+ } else if (FPC->getType() == Type::FloatTy) {
+ Out << "static const ConstantFloatTy FPConstant" << FPCounter++
+ << " = 0x" << std::hex << FloatToBits(Val) << std::dec
+ << "U; /* " << Val << " */\n";
+ } else
+ assert(0 && "Unknown float type!");
+ }
+
+ Out << '\n';
+}
+
+
+/// printSymbolTable - Run through symbol table looking for type names. If a
+/// type name is found, emit its declaration...
+///
+void CWriter::printModuleTypes(const TypeSymbolTable &TST) {
+ Out << "/* Helper union for bitcasts */\n";
+ Out << "typedef union {\n";
+ Out << " unsigned int Int32;\n";
+ Out << " unsigned long long Int64;\n";
+ Out << " float Float;\n";
+ Out << " double Double;\n";
+ Out << "} llvmBitCastUnion;\n";
+
+ // We are only interested in the type plane of the symbol table.
+ TypeSymbolTable::const_iterator I = TST.begin();
+ TypeSymbolTable::const_iterator End = TST.end();
+
+ // If there are no type names, exit early.
+ if (I == End) return;
+
+ // Print out forward declarations for structure types before anything else!
+ Out << "/* Structure forward decls */\n";
+ for (; I != End; ++I) {
+ std::string Name = "struct l_" + Mang->makeNameProper(I->first);
+ Out << Name << ";\n";
+ TypeNames.insert(std::make_pair(I->second, Name));
+ }
+
+ Out << '\n';
+
+ // Now we can print out typedefs. Above, we guaranteed that this can only be
+ // for struct or opaque types.
+ Out << "/* Typedefs */\n";
+ for (I = TST.begin(); I != End; ++I) {
+ std::string Name = "l_" + Mang->makeNameProper(I->first);
+ Out << "typedef ";
+ printType(Out, I->second, false, Name);
+ Out << ";\n";
+ }
+
+ Out << '\n';
+
+ // Keep track of which structures have been printed so far...
+ std::set<const StructType *> StructPrinted;
+
+ // Loop over all structures then push them into the stack so they are
+ // printed in the correct order.
+ //
+ Out << "/* Structure contents */\n";
+ for (I = TST.begin(); I != End; ++I)
+ if (const StructType *STy = dyn_cast<StructType>(I->second))
+ // Only print out used types!
+ printContainedStructs(STy, StructPrinted);
+}
+
+// Push the struct onto the stack and recursively push all structs
+// this one depends on.
+//
+// TODO: Make this work properly with vector types
+//
+void CWriter::printContainedStructs(const Type *Ty,
+ std::set<const StructType*> &StructPrinted){
+ // Don't walk through pointers.
+ if (isa<PointerType>(Ty) || Ty->isPrimitiveType() || Ty->isInteger()) return;
+
+ // Print all contained types first.
+ for (Type::subtype_iterator I = Ty->subtype_begin(),
+ E = Ty->subtype_end(); I != E; ++I)
+ printContainedStructs(*I, StructPrinted);
+
+ if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ // Check to see if we have already printed this struct.
+ if (StructPrinted.insert(STy).second) {
+ // Print structure type out.
+ std::string Name = TypeNames[STy];
+ printType(Out, STy, false, Name, true);
+ Out << ";\n\n";
+ }
+ }
+}
+
+void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
+ /// isStructReturn - Should this function actually return a struct by-value?
+ bool isStructReturn = F->getFunctionType()->isStructReturn();
+
+ if (F->hasInternalLinkage()) Out << "static ";
+ if (F->hasDLLImportLinkage()) Out << "__declspec(dllimport) ";
+ if (F->hasDLLExportLinkage()) Out << "__declspec(dllexport) ";
+ switch (F->getCallingConv()) {
+ case CallingConv::X86_StdCall:
+ Out << "__stdcall ";
+ break;
+ case CallingConv::X86_FastCall:
+ Out << "__fastcall ";
+ break;
+ }
+
+ // Loop over the arguments, printing them...
+ const FunctionType *FT = cast<FunctionType>(F->getFunctionType());
+ const ParamAttrsList *Attrs = FT->getParamAttrs();
+
+ std::stringstream FunctionInnards;
+
+ // Print out the name...
+ FunctionInnards << GetValueName(F) << '(';
+
+ bool PrintedArg = false;
+ if (!F->isDeclaration()) {
+ if (!F->arg_empty()) {
+ Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+
+ // If this is a struct-return function, don't print the hidden
+ // struct-return argument.
+ if (isStructReturn) {
+ assert(I != E && "Invalid struct return function!");
+ ++I;
+ }
+
+ std::string ArgName;
+ unsigned Idx = 1;
+ for (; I != E; ++I) {
+ if (PrintedArg) FunctionInnards << ", ";
+ if (I->hasName() || !Prototype)
+ ArgName = GetValueName(I);
+ else
+ ArgName = "";
+ printType(FunctionInnards, I->getType(),
+ /*isSigned=*/Attrs && Attrs->paramHasAttr(Idx, ParamAttr::SExt),
+ ArgName);
+ PrintedArg = true;
+ ++Idx;
+ }
+ }
+ } else {
+ // Loop over the arguments, printing them.
+ FunctionType::param_iterator I = FT->param_begin(), E = FT->param_end();
+
+ // If this is a struct-return function, don't print the hidden
+ // struct-return argument.
+ if (isStructReturn) {
+ assert(I != E && "Invalid struct return function!");
+ ++I;
+ }
+
+ unsigned Idx = 1;
+ for (; I != E; ++I) {
+ if (PrintedArg) FunctionInnards << ", ";
+ printType(FunctionInnards, *I,
+ /*isSigned=*/Attrs && Attrs->paramHasAttr(Idx, ParamAttr::SExt));
+ PrintedArg = true;
+ ++Idx;
+ }
+ }
+
+ // Finish printing arguments... if this is a vararg function, print the ...,
+ // unless there are no known types, in which case, we just emit ().
+ //
+ if (FT->isVarArg() && PrintedArg) {
+ if (PrintedArg) FunctionInnards << ", ";
+ FunctionInnards << "..."; // Output varargs portion of signature!
+ } else if (!FT->isVarArg() && !PrintedArg) {
+ FunctionInnards << "void"; // ret() -> ret(void) in C.
+ }
+ FunctionInnards << ')';
+
+ // Get the return tpe for the function.
+ const Type *RetTy;
+ if (!isStructReturn)
+ RetTy = F->getReturnType();
+ else {
+ // If this is a struct-return function, print the struct-return type.
+ RetTy = cast<PointerType>(FT->getParamType(0))->getElementType();
+ }
+
+ // Print out the return type and the signature built above.
+ printType(Out, RetTy,
+ /*isSigned=*/ Attrs && Attrs->paramHasAttr(0, ParamAttr::SExt),
+ FunctionInnards.str());
+}
+
+static inline bool isFPIntBitCast(const Instruction &I) {
+ if (!isa<BitCastInst>(I))
+ return false;
+ const Type *SrcTy = I.getOperand(0)->getType();
+ const Type *DstTy = I.getType();
+ return (SrcTy->isFloatingPoint() && DstTy->isInteger()) ||
+ (DstTy->isFloatingPoint() && SrcTy->isInteger());
+}
+
+void CWriter::printFunction(Function &F) {
+ /// isStructReturn - Should this function actually return a struct by-value?
+ bool isStructReturn = F.getFunctionType()->isStructReturn();
+
+ printFunctionSignature(&F, false);
+ Out << " {\n";
+
+ // If this is a struct return function, handle the result with magic.
+ if (isStructReturn) {
+ const Type *StructTy =
+ cast<PointerType>(F.arg_begin()->getType())->getElementType();
+ Out << " ";
+ printType(Out, StructTy, false, "StructReturn");
+ Out << "; /* Struct return temporary */\n";
+
+ Out << " ";
+ printType(Out, F.arg_begin()->getType(), false,
+ GetValueName(F.arg_begin()));
+ Out << " = &StructReturn;\n";
+ }
+
+ bool PrintedVar = false;
+
+ // print local variable information for the function
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
+ if (const AllocaInst *AI = isDirectAlloca(&*I)) {
+ Out << " ";
+ printType(Out, AI->getAllocatedType(), false, GetValueName(AI));
+ Out << "; /* Address-exposed local */\n";
+ PrintedVar = true;
+ } else if (I->getType() != Type::VoidTy && !isInlinableInst(*I)) {
+ Out << " ";
+ printType(Out, I->getType(), false, GetValueName(&*I));
+ Out << ";\n";
+
+ if (isa<PHINode>(*I)) { // Print out PHI node temporaries as well...
+ Out << " ";
+ printType(Out, I->getType(), false,
+ GetValueName(&*I)+"__PHI_TEMPORARY");
+ Out << ";\n";
+ }
+ PrintedVar = true;
+ }
+ // We need a temporary for the BitCast to use so it can pluck a value out
+ // of a union to do the BitCast. This is separate from the need for a
+ // variable to hold the result of the BitCast.
+ if (isFPIntBitCast(*I)) {
+ Out << " llvmBitCastUnion " << GetValueName(&*I)
+ << "__BITCAST_TEMPORARY;\n";
+ PrintedVar = true;
+ }
+ }
+
+ if (PrintedVar)
+ Out << '\n';
+
+ if (F.hasExternalLinkage() && F.getName() == "main")
+ Out << " CODE_FOR_MAIN();\n";
+
+ // print the basic blocks
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (Loop *L = LI->getLoopFor(BB)) {
+ if (L->getHeader() == BB && L->getParentLoop() == 0)
+ printLoop(L);
+ } else {
+ printBasicBlock(BB);
+ }
+ }
+
+ Out << "}\n\n";
+}
+
+void CWriter::printLoop(Loop *L) {
+ Out << " do { /* Syntactic loop '" << L->getHeader()->getName()
+ << "' to make GCC happy */\n";
+ for (unsigned i = 0, e = L->getBlocks().size(); i != e; ++i) {
+ BasicBlock *BB = L->getBlocks()[i];
+ Loop *BBLoop = LI->getLoopFor(BB);
+ if (BBLoop == L)
+ printBasicBlock(BB);
+ else if (BB == BBLoop->getHeader() && BBLoop->getParentLoop() == L)
+ printLoop(BBLoop);
+ }
+ Out << " } while (1); /* end of syntactic loop '"
+ << L->getHeader()->getName() << "' */\n";
+}
+
+void CWriter::printBasicBlock(BasicBlock *BB) {
+
+ // Don't print the label for the basic block if there are no uses, or if
+ // the only terminator use is the predecessor basic block's terminator.
+ // We have to scan the use list because PHI nodes use basic blocks too but
+ // do not require a label to be generated.
+ //
+ bool NeedsLabel = false;
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ if (isGotoCodeNecessary(*PI, BB)) {
+ NeedsLabel = true;
+ break;
+ }
+
+ if (NeedsLabel) Out << GetValueName(BB) << ":\n";
+
+ // Output all of the instructions in the basic block...
+ for (BasicBlock::iterator II = BB->begin(), E = --BB->end(); II != E;
+ ++II) {
+ if (!isInlinableInst(*II) && !isDirectAlloca(II)) {
+ if (II->getType() != Type::VoidTy && !isInlineAsm(*II))
+ outputLValue(II);
+ else
+ Out << " ";
+ visit(*II);
+ Out << ";\n";
+ }
+ }
+
+ // Don't emit prefix or suffix for the terminator...
+ visit(*BB->getTerminator());
+}
+
+
+// Specific Instruction type classes... note that all of the casts are
+// necessary because we use the instruction classes as opaque types...
+//
+void CWriter::visitReturnInst(ReturnInst &I) {
+ // If this is a struct return function, return the temporary struct.
+ bool isStructReturn = I.getParent()->getParent()->
+ getFunctionType()->isStructReturn();
+
+ if (isStructReturn) {
+ Out << " return StructReturn;\n";
+ return;
+ }
+
+ // Don't output a void return if this is the last basic block in the function
+ if (I.getNumOperands() == 0 &&
+ &*--I.getParent()->getParent()->end() == I.getParent() &&
+ !I.getParent()->size() == 1) {
+ return;
+ }
+
+ Out << " return";
+ if (I.getNumOperands()) {
+ Out << ' ';
+ writeOperand(I.getOperand(0));
+ }
+ Out << ";\n";
+}
+
+void CWriter::visitSwitchInst(SwitchInst &SI) {
+
+ Out << " switch (";
+ writeOperand(SI.getOperand(0));
+ Out << ") {\n default:\n";
+ printPHICopiesForSuccessor (SI.getParent(), SI.getDefaultDest(), 2);
+ printBranchToBlock(SI.getParent(), SI.getDefaultDest(), 2);
+ Out << ";\n";
+ for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2) {
+ Out << " case ";
+ writeOperand(SI.getOperand(i));
+ Out << ":\n";
+ BasicBlock *Succ = cast<BasicBlock>(SI.getOperand(i+1));
+ printPHICopiesForSuccessor (SI.getParent(), Succ, 2);
+ printBranchToBlock(SI.getParent(), Succ, 2);
+ if (Function::iterator(Succ) == next(Function::iterator(SI.getParent())))
+ Out << " break;\n";
+ }
+ Out << " }\n";
+}
+
+void CWriter::visitUnreachableInst(UnreachableInst &I) {
+ Out << " /*UNREACHABLE*/;\n";
+}
+
+bool CWriter::isGotoCodeNecessary(BasicBlock *From, BasicBlock *To) {
+ /// FIXME: This should be reenabled, but loop reordering safe!!
+ return true;
+
+ if (next(Function::iterator(From)) != Function::iterator(To))
+ return true; // Not the direct successor, we need a goto.
+
+ //isa<SwitchInst>(From->getTerminator())
+
+ if (LI->getLoopFor(From) != LI->getLoopFor(To))
+ return true;
+ return false;
+}
+
+void CWriter::printPHICopiesForSuccessor (BasicBlock *CurBlock,
+ BasicBlock *Successor,
+ unsigned Indent) {
+ for (BasicBlock::iterator I = Successor->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ // Now we have to do the printing.
+ Value *IV = PN->getIncomingValueForBlock(CurBlock);
+ if (!isa<UndefValue>(IV)) {
+ Out << std::string(Indent, ' ');
+ Out << " " << GetValueName(I) << "__PHI_TEMPORARY = ";
+ writeOperand(IV);
+ Out << "; /* for PHI node */\n";
+ }
+ }
+}
+
+void CWriter::printBranchToBlock(BasicBlock *CurBB, BasicBlock *Succ,
+ unsigned Indent) {
+ if (isGotoCodeNecessary(CurBB, Succ)) {
+ Out << std::string(Indent, ' ') << " goto ";
+ writeOperand(Succ);
+ Out << ";\n";
+ }
+}
+
+// Branch instruction printing - Avoid printing out a branch to a basic block
+// that immediately succeeds the current one.
+//
+void CWriter::visitBranchInst(BranchInst &I) {
+
+ if (I.isConditional()) {
+ if (isGotoCodeNecessary(I.getParent(), I.getSuccessor(0))) {
+ Out << " if (";
+ writeOperand(I.getCondition());
+ Out << ") {\n";
+
+ printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(0), 2);
+ printBranchToBlock(I.getParent(), I.getSuccessor(0), 2);
+
+ if (isGotoCodeNecessary(I.getParent(), I.getSuccessor(1))) {
+ Out << " } else {\n";
+ printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(1), 2);
+ printBranchToBlock(I.getParent(), I.getSuccessor(1), 2);
+ }
+ } else {
+ // First goto not necessary, assume second one is...
+ Out << " if (!";
+ writeOperand(I.getCondition());
+ Out << ") {\n";
+
+ printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(1), 2);
+ printBranchToBlock(I.getParent(), I.getSuccessor(1), 2);
+ }
+
+ Out << " }\n";
+ } else {
+ printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(0), 0);
+ printBranchToBlock(I.getParent(), I.getSuccessor(0), 0);
+ }
+ Out << "\n";
+}
+
+// PHI nodes get copied into temporary values at the end of predecessor basic
+// blocks. We now need to copy these temporary values into the REAL value for
+// the PHI.
+void CWriter::visitPHINode(PHINode &I) {
+ writeOperand(&I);
+ Out << "__PHI_TEMPORARY";
+}
+
+
+void CWriter::visitBinaryOperator(Instruction &I) {
+ // binary instructions, shift instructions, setCond instructions.
+ assert(!isa<PointerType>(I.getType()));
+
+ // We must cast the results of binary operations which might be promoted.
+ bool needsCast = false;
+ if ((I.getType() == Type::Int8Ty) || (I.getType() == Type::Int16Ty)
+ || (I.getType() == Type::FloatTy)) {
+ needsCast = true;
+ Out << "((";
+ printType(Out, I.getType(), false);
+ Out << ")(";
+ }
+
+ // If this is a negation operation, print it out as such. For FP, we don't
+ // want to print "-0.0 - X".
+ if (BinaryOperator::isNeg(&I)) {
+ Out << "-(";
+ writeOperand(BinaryOperator::getNegArgument(cast<BinaryOperator>(&I)));
+ Out << ")";
+ } else if (I.getOpcode() == Instruction::FRem) {
+ // Output a call to fmod/fmodf instead of emitting a%b
+ if (I.getType() == Type::FloatTy)
+ Out << "fmodf(";
+ else
+ Out << "fmod(";
+ writeOperand(I.getOperand(0));
+ Out << ", ";
+ writeOperand(I.getOperand(1));
+ Out << ")";
+ } else {
+
+ // Write out the cast of the instruction's value back to the proper type
+ // if necessary.
+ bool NeedsClosingParens = writeInstructionCast(I);
+
+ // Certain instructions require the operand to be forced to a specific type
+ // so we use writeOperandWithCast here instead of writeOperand. Similarly
+ // below for operand 1
+ writeOperandWithCast(I.getOperand(0), I.getOpcode());
+
+ switch (I.getOpcode()) {
+ case Instruction::Add: Out << " + "; break;
+ case Instruction::Sub: Out << " - "; break;
+ case Instruction::Mul: Out << " * "; break;
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem: Out << " % "; break;
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv: Out << " / "; break;
+ case Instruction::And: Out << " & "; break;
+ case Instruction::Or: Out << " | "; break;
+ case Instruction::Xor: Out << " ^ "; break;
+ case Instruction::Shl : Out << " << "; break;
+ case Instruction::LShr:
+ case Instruction::AShr: Out << " >> "; break;
+ default: cerr << "Invalid operator type!" << I; abort();
+ }
+
+ writeOperandWithCast(I.getOperand(1), I.getOpcode());
+ if (NeedsClosingParens)
+ Out << "))";
+ }
+
+ if (needsCast) {
+ Out << "))";
+ }
+}
+
+void CWriter::visitICmpInst(ICmpInst &I) {
+ // We must cast the results of icmp which might be promoted.
+ bool needsCast = false;
+
+ // Write out the cast of the instruction's value back to the proper type
+ // if necessary.
+ bool NeedsClosingParens = writeInstructionCast(I);
+
+ // Certain icmp predicate require the operand to be forced to a specific type
+ // so we use writeOperandWithCast here instead of writeOperand. Similarly
+ // below for operand 1
+ writeOperandWithCast(I.getOperand(0), I.getPredicate());
+
+ switch (I.getPredicate()) {
+ case ICmpInst::ICMP_EQ: Out << " == "; break;
+ case ICmpInst::ICMP_NE: Out << " != "; break;
+ case ICmpInst::ICMP_ULE:
+ case ICmpInst::ICMP_SLE: Out << " <= "; break;
+ case ICmpInst::ICMP_UGE:
+ case ICmpInst::ICMP_SGE: Out << " >= "; break;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_SLT: Out << " < "; break;
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_SGT: Out << " > "; break;
+ default: cerr << "Invalid icmp predicate!" << I; abort();
+ }
+
+ writeOperandWithCast(I.getOperand(1), I.getPredicate());
+ if (NeedsClosingParens)
+ Out << "))";
+
+ if (needsCast) {
+ Out << "))";
+ }
+}
+
+void CWriter::visitFCmpInst(FCmpInst &I) {
+ if (I.getPredicate() == FCmpInst::FCMP_FALSE) {
+ Out << "0";
+ return;
+ }
+ if (I.getPredicate() == FCmpInst::FCMP_TRUE) {
+ Out << "1";
+ return;
+ }
+
+ const char* op = 0;
+ switch (I.getPredicate()) {
+ default: assert(0 && "Illegal FCmp predicate");
+ case FCmpInst::FCMP_ORD: op = "ord"; break;
+ case FCmpInst::FCMP_UNO: op = "uno"; break;
+ case FCmpInst::FCMP_UEQ: op = "ueq"; break;
+ case FCmpInst::FCMP_UNE: op = "une"; break;
+ case FCmpInst::FCMP_ULT: op = "ult"; break;
+ case FCmpInst::FCMP_ULE: op = "ule"; break;
+ case FCmpInst::FCMP_UGT: op = "ugt"; break;
+ case FCmpInst::FCMP_UGE: op = "uge"; break;
+ case FCmpInst::FCMP_OEQ: op = "oeq"; break;
+ case FCmpInst::FCMP_ONE: op = "one"; break;
+ case FCmpInst::FCMP_OLT: op = "olt"; break;
+ case FCmpInst::FCMP_OLE: op = "ole"; break;
+ case FCmpInst::FCMP_OGT: op = "ogt"; break;
+ case FCmpInst::FCMP_OGE: op = "oge"; break;
+ }
+
+ Out << "llvm_fcmp_" << op << "(";
+ // Write the first operand
+ writeOperand(I.getOperand(0));
+ Out << ", ";
+ // Write the second operand
+ writeOperand(I.getOperand(1));
+ Out << ")";
+}
+
+static const char * getFloatBitCastField(const Type *Ty) {
+ switch (Ty->getTypeID()) {
+ default: assert(0 && "Invalid Type");
+ case Type::FloatTyID: return "Float";
+ case Type::DoubleTyID: return "Double";
+ case Type::IntegerTyID: {
+ unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
+ if (NumBits <= 32)
+ return "Int32";
+ else
+ return "Int64";
+ }
+ }
+}
+
+void CWriter::visitCastInst(CastInst &I) {
+ const Type *DstTy = I.getType();
+ const Type *SrcTy = I.getOperand(0)->getType();
+ Out << '(';
+ if (isFPIntBitCast(I)) {
+ // These int<->float and long<->double casts need to be handled specially
+ Out << GetValueName(&I) << "__BITCAST_TEMPORARY."
+ << getFloatBitCastField(I.getOperand(0)->getType()) << " = ";
+ writeOperand(I.getOperand(0));
+ Out << ", " << GetValueName(&I) << "__BITCAST_TEMPORARY."
+ << getFloatBitCastField(I.getType());
+ } else {
+ printCast(I.getOpcode(), SrcTy, DstTy);
+ if (I.getOpcode() == Instruction::SExt && SrcTy == Type::Int1Ty) {
+ // Make sure we really get a sext from bool by subtracing the bool from 0
+ Out << "0-";
+ }
+ writeOperand(I.getOperand(0));
+ if (DstTy == Type::Int1Ty &&
+ (I.getOpcode() == Instruction::Trunc ||
+ I.getOpcode() == Instruction::FPToUI ||
+ I.getOpcode() == Instruction::FPToSI ||
+ I.getOpcode() == Instruction::PtrToInt)) {
+ // Make sure we really get a trunc to bool by anding the operand with 1
+ Out << "&1u";
+ }
+ }
+ Out << ')';
+}
+
+void CWriter::visitSelectInst(SelectInst &I) {
+ Out << "((";
+ writeOperand(I.getCondition());
+ Out << ") ? (";
+ writeOperand(I.getTrueValue());
+ Out << ") : (";
+ writeOperand(I.getFalseValue());
+ Out << "))";
+}
+
+
+void CWriter::lowerIntrinsics(Function &F) {
+ // This is used to keep track of intrinsics that get generated to a lowered
+ // function. We must generate the prototypes before the function body which
+ // will only be expanded on first use (by the loop below).
+ std::vector<Function*> prototypesToGen;
+
+ // Examine all the instructions in this function to find the intrinsics that
+ // need to be lowered.
+ for (Function::iterator BB = F.begin(), EE = F.end(); BB != EE; ++BB)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; )
+ if (CallInst *CI = dyn_cast<CallInst>(I++))
+ if (Function *F = CI->getCalledFunction())
+ switch (F->getIntrinsicID()) {
+ case Intrinsic::not_intrinsic:
+ case Intrinsic::vastart:
+ case Intrinsic::vacopy:
+ case Intrinsic::vaend:
+ case Intrinsic::returnaddress:
+ case Intrinsic::frameaddress:
+ case Intrinsic::setjmp:
+ case Intrinsic::longjmp:
+ case Intrinsic::prefetch:
+ case Intrinsic::dbg_stoppoint:
+ case Intrinsic::powi_f32:
+ case Intrinsic::powi_f64:
+ // We directly implement these intrinsics
+ break;
+ default:
+ // If this is an intrinsic that directly corresponds to a GCC
+ // builtin, we handle it.
+ const char *BuiltinName = "";
+#define GET_GCC_BUILTIN_NAME
+#include "llvm/Intrinsics.gen"
+#undef GET_GCC_BUILTIN_NAME
+ // If we handle it, don't lower it.
+ if (BuiltinName[0]) break;
+
+ // All other intrinsic calls we must lower.
+ Instruction *Before = 0;
+ if (CI != &BB->front())
+ Before = prior(BasicBlock::iterator(CI));
+
+ IL->LowerIntrinsicCall(CI);
+ if (Before) { // Move iterator to instruction after call
+ I = Before; ++I;
+ } else {
+ I = BB->begin();
+ }
+ // If the intrinsic got lowered to another call, and that call has
+ // a definition then we need to make sure its prototype is emitted
+ // before any calls to it.
+ if (CallInst *Call = dyn_cast<CallInst>(I))
+ if (Function *NewF = Call->getCalledFunction())
+ if (!NewF->isDeclaration())
+ prototypesToGen.push_back(NewF);
+
+ break;
+ }
+
+ // We may have collected some prototypes to emit in the loop above.
+ // Emit them now, before the function that uses them is emitted. But,
+ // be careful not to emit them twice.
+ std::vector<Function*>::iterator I = prototypesToGen.begin();
+ std::vector<Function*>::iterator E = prototypesToGen.end();
+ for ( ; I != E; ++I) {
+ if (intrinsicPrototypesAlreadyGenerated.insert(*I).second) {
+ Out << '\n';
+ printFunctionSignature(*I, true);
+ Out << ";\n";
+ }
+ }
+}
+
+
+void CWriter::visitCallInst(CallInst &I) {
+ //check if we have inline asm
+ if (isInlineAsm(I)) {
+ visitInlineAsm(I);
+ return;
+ }
+
+ bool WroteCallee = false;
+
+ // Handle intrinsic function calls first...
+ if (Function *F = I.getCalledFunction())
+ if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID()) {
+ switch (ID) {
+ default: {
+ // If this is an intrinsic that directly corresponds to a GCC
+ // builtin, we emit it here.
+ const char *BuiltinName = "";
+#define GET_GCC_BUILTIN_NAME
+#include "llvm/Intrinsics.gen"
+#undef GET_GCC_BUILTIN_NAME
+ assert(BuiltinName[0] && "Unknown LLVM intrinsic!");
+
+ Out << BuiltinName;
+ WroteCallee = true;
+ break;
+ }
+ case Intrinsic::vastart:
+ Out << "0; ";
+
+ Out << "va_start(*(va_list*)";
+ writeOperand(I.getOperand(1));
+ Out << ", ";
+ // Output the last argument to the enclosing function...
+ if (I.getParent()->getParent()->arg_empty()) {
+ cerr << "The C backend does not currently support zero "
+ << "argument varargs functions, such as '"
+ << I.getParent()->getParent()->getName() << "'!\n";
+ abort();
+ }
+ writeOperand(--I.getParent()->getParent()->arg_end());
+ Out << ')';
+ return;
+ case Intrinsic::vaend:
+ if (!isa<ConstantPointerNull>(I.getOperand(1))) {
+ Out << "0; va_end(*(va_list*)";
+ writeOperand(I.getOperand(1));
+ Out << ')';
+ } else {
+ Out << "va_end(*(va_list*)0)";
+ }
+ return;
+ case Intrinsic::vacopy:
+ Out << "0; ";
+ Out << "va_copy(*(va_list*)";
+ writeOperand(I.getOperand(1));
+ Out << ", *(va_list*)";
+ writeOperand(I.getOperand(2));
+ Out << ')';
+ return;
+ case Intrinsic::returnaddress:
+ Out << "__builtin_return_address(";
+ writeOperand(I.getOperand(1));
+ Out << ')';
+ return;
+ case Intrinsic::frameaddress:
+ Out << "__builtin_frame_address(";
+ writeOperand(I.getOperand(1));
+ Out << ')';
+ return;
+ case Intrinsic::powi_f32:
+ case Intrinsic::powi_f64:
+ Out << "__builtin_powi(";
+ writeOperand(I.getOperand(1));
+ Out << ", ";
+ writeOperand(I.getOperand(2));
+ Out << ')';
+ return;
+ case Intrinsic::setjmp:
+ Out << "setjmp(*(jmp_buf*)";
+ writeOperand(I.getOperand(1));
+ Out << ')';
+ return;
+ case Intrinsic::longjmp:
+ Out << "longjmp(*(jmp_buf*)";
+ writeOperand(I.getOperand(1));
+ Out << ", ";
+ writeOperand(I.getOperand(2));
+ Out << ')';
+ return;
+ case Intrinsic::prefetch:
+ Out << "LLVM_PREFETCH((const void *)";
+ writeOperand(I.getOperand(1));
+ Out << ", ";
+ writeOperand(I.getOperand(2));
+ Out << ", ";
+ writeOperand(I.getOperand(3));
+ Out << ")";
+ return;
+ case Intrinsic::dbg_stoppoint: {
+ // If we use writeOperand directly we get a "u" suffix which is rejected
+ // by gcc.
+ DbgStopPointInst &SPI = cast<DbgStopPointInst>(I);
+
+ Out << "\n#line "
+ << SPI.getLine()
+ << " \"" << SPI.getDirectory()
+ << SPI.getFileName() << "\"\n";
+ return;
+ }
+ }
+ }
+
+ Value *Callee = I.getCalledValue();
+
+ const PointerType *PTy = cast<PointerType>(Callee->getType());
+ const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+
+ // If this is a call to a struct-return function, assign to the first
+ // parameter instead of passing it to the call.
+ bool isStructRet = FTy->isStructReturn();
+ if (isStructRet) {
+ Out << "*(";
+ writeOperand(I.getOperand(1));
+ Out << ") = ";
+ }
+
+ if (I.isTailCall()) Out << " /*tail*/ ";
+
+ if (!WroteCallee) {
+ // If this is an indirect call to a struct return function, we need to cast
+ // the pointer.
+ bool NeedsCast = isStructRet && !isa<Function>(Callee);
+
+ // GCC is a real PITA. It does not permit codegening casts of functions to
+ // function pointers if they are in a call (it generates a trap instruction
+ // instead!). We work around this by inserting a cast to void* in between
+ // the function and the function pointer cast. Unfortunately, we can't just
+ // form the constant expression here, because the folder will immediately
+ // nuke it.
+ //
+ // Note finally, that this is completely unsafe. ANSI C does not guarantee
+ // that void* and function pointers have the same size. :( To deal with this
+ // in the common case, we handle casts where the number of arguments passed
+ // match exactly.
+ //
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Callee))
+ if (CE->isCast())
+ if (Function *RF = dyn_cast<Function>(CE->getOperand(0))) {
+ NeedsCast = true;
+ Callee = RF;
+ }
+
+ if (NeedsCast) {
+ // Ok, just cast the pointer type.
+ Out << "((";
+ if (!isStructRet)
+ printType(Out, I.getCalledValue()->getType());
+ else
+ printStructReturnPointerFunctionType(Out,
+ cast<PointerType>(I.getCalledValue()->getType()));
+ Out << ")(void*)";
+ }
+ writeOperand(Callee);
+ if (NeedsCast) Out << ')';
+ }
+
+ Out << '(';
+
+ unsigned NumDeclaredParams = FTy->getNumParams();
+
+ CallSite::arg_iterator AI = I.op_begin()+1, AE = I.op_end();
+ unsigned ArgNo = 0;
+ if (isStructRet) { // Skip struct return argument.
+ ++AI;
+ ++ArgNo;
+ }
+
+ const ParamAttrsList *Attrs = FTy->getParamAttrs();
+ bool PrintedArg = false;
+ unsigned Idx = 1;
+ for (; AI != AE; ++AI, ++ArgNo, ++Idx) {
+ if (PrintedArg) Out << ", ";
+ if (ArgNo < NumDeclaredParams &&
+ (*AI)->getType() != FTy->getParamType(ArgNo)) {
+ Out << '(';
+ printType(Out, FTy->getParamType(ArgNo),
+ /*isSigned=*/Attrs && Attrs->paramHasAttr(Idx, ParamAttr::SExt));
+ Out << ')';
+ }
+ writeOperand(*AI);
+ PrintedArg = true;
+ }
+ Out << ')';
+}
+
+
+//This converts the llvm constraint string to something gcc is expecting.
+//TODO: work out platform independent constraints and factor those out
+// of the per target tables
+// handle multiple constraint codes
+std::string CWriter::InterpretASMConstraint(InlineAsm::ConstraintInfo& c) {
+
+ assert(c.Codes.size() == 1 && "Too many asm constraint codes to handle");
+
+ const char** table = 0;
+
+ //Grab the translation table from TargetAsmInfo if it exists
+ if (!TAsm) {
+ std::string E;
+ const TargetMachineRegistry::Entry* Match =
+ TargetMachineRegistry::getClosestStaticTargetForModule(*TheModule, E);
+ if (Match) {
+ //Per platform Target Machines don't exist, so create it
+ // this must be done only once
+ const TargetMachine* TM = Match->CtorFn(*TheModule, "");
+ TAsm = TM->getTargetAsmInfo();
+ }
+ }
+ if (TAsm)
+ table = TAsm->getAsmCBE();
+
+ //Search the translation table if it exists
+ for (int i = 0; table && table[i]; i += 2)
+ if (c.Codes[0] == table[i])
+ return table[i+1];
+
+ //default is identity
+ return c.Codes[0];
+}
+
+//TODO: import logic from AsmPrinter.cpp
+static std::string gccifyAsm(std::string asmstr) {
+ for (std::string::size_type i = 0; i != asmstr.size(); ++i)
+ if (asmstr[i] == '\n')
+ asmstr.replace(i, 1, "\\n");
+ else if (asmstr[i] == '\t')
+ asmstr.replace(i, 1, "\\t");
+ else if (asmstr[i] == '$') {
+ if (asmstr[i + 1] == '{') {
+ std::string::size_type a = asmstr.find_first_of(':', i + 1);
+ std::string::size_type b = asmstr.find_first_of('}', i + 1);
+ std::string n = "%" +
+ asmstr.substr(a + 1, b - a - 1) +
+ asmstr.substr(i + 2, a - i - 2);
+ asmstr.replace(i, b - i + 1, n);
+ i += n.size() - 1;
+ } else
+ asmstr.replace(i, 1, "%");
+ }
+ else if (asmstr[i] == '%')//grr
+ { asmstr.replace(i, 1, "%%"); ++i;}
+
+ return asmstr;
+}
+
+//TODO: assumptions about what consume arguments from the call are likely wrong
+// handle communitivity
+void CWriter::visitInlineAsm(CallInst &CI) {
+ InlineAsm* as = cast<InlineAsm>(CI.getOperand(0));
+ std::vector<InlineAsm::ConstraintInfo> Constraints = as->ParseConstraints();
+ std::vector<std::pair<std::string, Value*> > Input;
+ std::vector<std::pair<std::string, Value*> > Output;
+ std::string Clobber;
+ int count = CI.getType() == Type::VoidTy ? 1 : 0;
+ for (std::vector<InlineAsm::ConstraintInfo>::iterator I = Constraints.begin(),
+ E = Constraints.end(); I != E; ++I) {
+ assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
+ std::string c =
+ InterpretASMConstraint(*I);
+ switch(I->Type) {
+ default:
+ assert(0 && "Unknown asm constraint");
+ break;
+ case InlineAsm::isInput: {
+ if (c.size()) {
+ Input.push_back(std::make_pair(c, count ? CI.getOperand(count) : &CI));
+ ++count; //consume arg
+ }
+ break;
+ }
+ case InlineAsm::isOutput: {
+ if (c.size()) {
+ Output.push_back(std::make_pair("="+((I->isEarlyClobber ? "&" : "")+c),
+ count ? CI.getOperand(count) : &CI));
+ ++count; //consume arg
+ }
+ break;
+ }
+ case InlineAsm::isClobber: {
+ if (c.size())
+ Clobber += ",\"" + c + "\"";
+ break;
+ }
+ }
+ }
+
+ //fix up the asm string for gcc
+ std::string asmstr = gccifyAsm(as->getAsmString());
+
+ Out << "__asm__ volatile (\"" << asmstr << "\"\n";
+ Out << " :";
+ for (std::vector<std::pair<std::string, Value*> >::iterator I = Output.begin(),
+ E = Output.end(); I != E; ++I) {
+ Out << "\"" << I->first << "\"(";
+ writeOperandRaw(I->second);
+ Out << ")";
+ if (I + 1 != E)
+ Out << ",";
+ }
+ Out << "\n :";
+ for (std::vector<std::pair<std::string, Value*> >::iterator I = Input.begin(),
+ E = Input.end(); I != E; ++I) {
+ Out << "\"" << I->first << "\"(";
+ writeOperandRaw(I->second);
+ Out << ")";
+ if (I + 1 != E)
+ Out << ",";
+ }
+ if (Clobber.size())
+ Out << "\n :" << Clobber.substr(1);
+ Out << ")";
+}
+
+void CWriter::visitMallocInst(MallocInst &I) {
+ assert(0 && "lowerallocations pass didn't work!");
+}
+
+void CWriter::visitAllocaInst(AllocaInst &I) {
+ Out << '(';
+ printType(Out, I.getType());
+ Out << ") alloca(sizeof(";
+ printType(Out, I.getType()->getElementType());
+ Out << ')';
+ if (I.isArrayAllocation()) {
+ Out << " * " ;
+ writeOperand(I.getOperand(0));
+ }
+ Out << ')';
+}
+
+void CWriter::visitFreeInst(FreeInst &I) {
+ assert(0 && "lowerallocations pass didn't work!");
+}
+
+void CWriter::printIndexingExpression(Value *Ptr, gep_type_iterator I,
+ gep_type_iterator E) {
+ bool HasImplicitAddress = false;
+ // If accessing a global value with no indexing, avoid *(&GV) syndrome
+ if (isa<GlobalValue>(Ptr)) {
+ HasImplicitAddress = true;
+ } else if (isDirectAlloca(Ptr)) {
+ HasImplicitAddress = true;
+ }
+
+ if (I == E) {
+ if (!HasImplicitAddress)
+ Out << '*'; // Implicit zero first argument: '*x' is equivalent to 'x[0]'
+
+ writeOperandInternal(Ptr);
+ return;
+ }
+
+ const Constant *CI = dyn_cast<Constant>(I.getOperand());
+ if (HasImplicitAddress && (!CI || !CI->isNullValue()))
+ Out << "(&";
+
+ writeOperandInternal(Ptr);
+
+ if (HasImplicitAddress && (!CI || !CI->isNullValue())) {
+ Out << ')';
+ HasImplicitAddress = false; // HIA is only true if we haven't addressed yet
+ }
+
+ assert(!HasImplicitAddress || (CI && CI->isNullValue()) &&
+ "Can only have implicit address with direct accessing");
+
+ if (HasImplicitAddress) {
+ ++I;
+ } else if (CI && CI->isNullValue()) {
+ gep_type_iterator TmpI = I; ++TmpI;
+
+ // Print out the -> operator if possible...
+ if (TmpI != E && isa<StructType>(*TmpI)) {
+ Out << (HasImplicitAddress ? "." : "->");
+ Out << "field" << cast<ConstantInt>(TmpI.getOperand())->getZExtValue();
+ I = ++TmpI;
+ }
+ }
+
+ for (; I != E; ++I)
+ if (isa<StructType>(*I)) {
+ Out << ".field" << cast<ConstantInt>(I.getOperand())->getZExtValue();
+ } else {
+ Out << '[';
+ writeOperand(I.getOperand());
+ Out << ']';
+ }
+}
+
+void CWriter::visitLoadInst(LoadInst &I) {
+ Out << '*';
+ if (I.isVolatile()) {
+ Out << "((";
+ printType(Out, I.getType(), false, "volatile*");
+ Out << ")";
+ }
+
+ writeOperand(I.getOperand(0));
+
+ if (I.isVolatile())
+ Out << ')';
+}
+
+void CWriter::visitStoreInst(StoreInst &I) {
+ Out << '*';
+ if (I.isVolatile()) {
+ Out << "((";
+ printType(Out, I.getOperand(0)->getType(), false, " volatile*");
+ Out << ")";
+ }
+ writeOperand(I.getPointerOperand());
+ if (I.isVolatile()) Out << ')';
+ Out << " = ";
+ Value *Operand = I.getOperand(0);
+ Constant *BitMask = 0;
+ if (const IntegerType* ITy = dyn_cast<IntegerType>(Operand->getType()))
+ if (!ITy->isPowerOf2ByteWidth())
+ // We have a bit width that doesn't match an even power-of-2 byte
+ // size. Consequently we must & the value with the type's bit mask
+ BitMask = ConstantInt::get(ITy, ITy->getBitMask());
+ if (BitMask)
+ Out << "((";
+ writeOperand(Operand);
+ if (BitMask) {
+ Out << ") & ";
+ printConstant(BitMask);
+ Out << ")";
+ }
+}
+
+void CWriter::visitGetElementPtrInst(GetElementPtrInst &I) {
+ Out << '&';
+ printIndexingExpression(I.getPointerOperand(), gep_type_begin(I),
+ gep_type_end(I));
+}
+
+void CWriter::visitVAArgInst(VAArgInst &I) {
+ Out << "va_arg(*(va_list*)";
+ writeOperand(I.getOperand(0));
+ Out << ", ";
+ printType(Out, I.getType());
+ Out << ");\n ";
+}
+
+//===----------------------------------------------------------------------===//
+// External Interface declaration
+//===----------------------------------------------------------------------===//
+
+bool CTargetMachine::addPassesToEmitWholeFile(PassManager &PM,
+ std::ostream &o,
+ CodeGenFileType FileType,
+ bool Fast) {
+ if (FileType != TargetMachine::AssemblyFile) return true;
+
+ PM.add(createLowerGCPass());
+ PM.add(createLowerAllocationsPass(true));
+ PM.add(createLowerInvokePass());
+ PM.add(createCFGSimplificationPass()); // clean up after lower invoke.
+ PM.add(new CBackendNameAllUsedStructsAndMergeFunctions());
+ PM.add(new CWriter(o));
+ return false;
+}
diff --git a/lib/Target/CBackend/CTargetMachine.h b/lib/Target/CBackend/CTargetMachine.h
new file mode 100644
index 0000000..38c738e
--- /dev/null
+++ b/lib/Target/CBackend/CTargetMachine.h
@@ -0,0 +1,41 @@
+//===-- CTargetMachine.h - TargetMachine for the C backend ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the TargetMachine that is used by the C backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CTARGETMACHINE_H
+#define CTARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+
+struct CTargetMachine : public TargetMachine {
+ const TargetData DataLayout; // Calculates type size & alignment
+
+ CTargetMachine(const Module &M, const std::string &FS)
+ : DataLayout(&M) {}
+
+ virtual bool WantsWholeFile() const { return true; }
+ virtual bool addPassesToEmitWholeFile(PassManager &PM, std::ostream &Out,
+ CodeGenFileType FileType, bool Fast);
+
+ // This class always works, but shouldn't be the default in most cases.
+ static unsigned getModuleMatchQuality(const Module &M) { return 1; }
+
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+};
+
+} // End llvm namespace
+
+
+#endif
diff --git a/lib/Target/CBackend/Makefile b/lib/Target/CBackend/Makefile
new file mode 100644
index 0000000..fea2494
--- /dev/null
+++ b/lib/Target/CBackend/Makefile
@@ -0,0 +1,14 @@
+##===- lib/Target/CBackend/Makefile ------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file was developed by the LLVM research group and is distributed under
+# the University of Illinois Open Source License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMCBackend
+include $(LEVEL)/Makefile.common
+
+CompileCommonOpts += -Wno-format
diff --git a/lib/Target/CellSPU/README.txt b/lib/Target/CellSPU/README.txt
new file mode 100644
index 0000000..cf2a974
--- /dev/null
+++ b/lib/Target/CellSPU/README.txt
@@ -0,0 +1,10 @@
+//===- README.txt - Notes for improving CellSPU-specific code gen ---------===//
+
+TODO:
+* Check in the actual code.
+
+===-------------------------------------------------------------------------===
+
+Note: The CellSPU work is work-in-progress and "alpha" quality code. No code
+has been officially checked into the llvm repo, but this will happen Real Soon,
+Real Soon Now.
diff --git a/lib/Target/IA64/IA64.h b/lib/Target/IA64/IA64.h
new file mode 100644
index 0000000..e5b84e6
--- /dev/null
+++ b/lib/Target/IA64/IA64.h
@@ -0,0 +1,54 @@
+//===-- IA64.h - Top-level interface for IA64 representation ------*- C++ -*-===//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Duraid Madina and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the IA64
+// target library, as used by the LLVM JIT.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_IA64_H
+#define TARGET_IA64_H
+
+#include <iosfwd>
+
+namespace llvm {
+
+class IA64TargetMachine;
+class FunctionPass;
+
+/// createIA64DAGToDAGInstructionSelector - This pass converts an LLVM
+/// function into IA64 machine code in a sane, DAG->DAG transform.
+///
+FunctionPass *createIA64DAGToDAGInstructionSelector(IA64TargetMachine &TM);
+
+/// createIA64BundlingPass - This pass adds stop bits and bundles
+/// instructions.
+///
+FunctionPass *createIA64BundlingPass(IA64TargetMachine &TM);
+
+/// createIA64CodePrinterPass - Returns a pass that prints the IA64
+/// assembly code for a MachineFunction to the given output stream,
+/// using the given target machine description. This should work
+/// regardless of whether the function is in SSA form.
+///
+FunctionPass *createIA64CodePrinterPass(std::ostream &o, IA64TargetMachine &tm);
+
+} // End llvm namespace
+
+// Defines symbolic names for IA64 registers. This defines a mapping from
+// register name to register number.
+//
+#include "IA64GenRegisterNames.inc"
+
+// Defines symbolic names for the IA64 instructions.
+//
+#include "IA64GenInstrNames.inc"
+
+#endif
+
+
diff --git a/lib/Target/IA64/IA64.td b/lib/Target/IA64/IA64.td
new file mode 100644
index 0000000..2e231d4
--- /dev/null
+++ b/lib/Target/IA64/IA64.td
@@ -0,0 +1,39 @@
+//===-- IA64.td - Target definition file for Intel IA64 -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Duraid Madina and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a target description file for the Intel IA64 architecture,
+// also known variously as ia64, IA-64, IPF, "the Itanium architecture" etc.
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing...
+//
+include "../Target.td"
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "IA64RegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "IA64InstrInfo.td"
+
+def IA64InstrInfo : InstrInfo { }
+
+def IA64 : Target {
+ // Our instruction set
+ let InstructionSet = IA64InstrInfo;
+
+}
+
+
diff --git a/lib/Target/IA64/IA64AsmPrinter.cpp b/lib/Target/IA64/IA64AsmPrinter.cpp
new file mode 100644
index 0000000..d576c4c
--- /dev/null
+++ b/lib/Target/IA64/IA64AsmPrinter.cpp
@@ -0,0 +1,360 @@
+//===-- IA64AsmPrinter.cpp - Print out IA64 LLVM as assembly --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Duraid Madina and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to assembly accepted by the GNU binutils 'gas'
+// assembler. The Intel 'ias' and HP-UX 'as' assemblers *may* choke on this
+// output, but if so that's a bug I'd like to hear about: please file a bug
+// report in bugzilla. FYI, the not too bad 'ias' assembler is bundled with
+// the Intel C/C++ compiler for Itanium Linux.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "IA64.h"
+#include "IA64TargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+namespace {
+ struct IA64AsmPrinter : public AsmPrinter {
+ std::set<std::string> ExternalFunctionNames, ExternalObjectNames;
+
+ IA64AsmPrinter(std::ostream &O, TargetMachine &TM, const TargetAsmInfo *T)
+ : AsmPrinter(O, TM, T) {
+ }
+
+ virtual const char *getPassName() const {
+ return "IA64 Assembly Printer";
+ }
+
+ /// printInstruction - This method is automatically generated by tablegen
+ /// from the instruction set description. This method returns true if the
+ /// machine instruction was sufficiently described to print it, otherwise it
+ /// returns false.
+ bool printInstruction(const MachineInstr *MI);
+
+ // This method is used by the tablegen'erated instruction printer.
+ void printOperand(const MachineInstr *MI, unsigned OpNo){
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ if (MO.getType() == MachineOperand::MO_Register) {
+ assert(MRegisterInfo::isPhysicalRegister(MO.getReg())&&"Not physref??");
+ //XXX Bug Workaround: See note in Printer::doInitialization about %.
+ O << TM.getRegisterInfo()->get(MO.getReg()).Name;
+ } else {
+ printOp(MO);
+ }
+ }
+
+ void printS8ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+ int val=(unsigned int)MI->getOperand(OpNo).getImmedValue();
+ if(val>=128) val=val-256; // if negative, flip sign
+ O << val;
+ }
+ void printS14ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+ int val=(unsigned int)MI->getOperand(OpNo).getImmedValue();
+ if(val>=8192) val=val-16384; // if negative, flip sign
+ O << val;
+ }
+ void printS22ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+ int val=(unsigned int)MI->getOperand(OpNo).getImmedValue();
+ if(val>=2097152) val=val-4194304; // if negative, flip sign
+ O << val;
+ }
+ void printU64ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+ O << (uint64_t)MI->getOperand(OpNo).getImmedValue();
+ }
+ void printS64ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+// XXX : nasty hack to avoid GPREL22 "relocation truncated to fit" linker
+// errors - instead of add rX = @gprel(CPI<whatever>), r1;; we now
+// emit movl rX = @gprel(CPI<whatever);;
+// add rX = rX, r1;
+// this gives us 64 bits instead of 22 (for the add long imm) to play
+// with, which shuts up the linker. The problem is that the constant
+// pool entries aren't immediates at this stage, so we check here.
+// If it's an immediate, print it the old fashioned way. If it's
+// not, we print it as a constant pool index.
+ if(MI->getOperand(OpNo).isImmediate()) {
+ O << (int64_t)MI->getOperand(OpNo).getImmedValue();
+ } else { // this is a constant pool reference: FIXME: assert this
+ printOp(MI->getOperand(OpNo));
+ }
+ }
+
+ void printGlobalOperand(const MachineInstr *MI, unsigned OpNo) {
+ printOp(MI->getOperand(OpNo), false); // this is NOT a br.call instruction
+ }
+
+ void printCallOperand(const MachineInstr *MI, unsigned OpNo) {
+ printOp(MI->getOperand(OpNo), true); // this is a br.call instruction
+ }
+
+ std::string getSectionForFunction(const Function &F) const;
+
+ void printMachineInstruction(const MachineInstr *MI);
+ void printOp(const MachineOperand &MO, bool isBRCALLinsn= false);
+ bool runOnMachineFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+ };
+} // end of anonymous namespace
+
+
+// Include the auto-generated portion of the assembly writer.
+#include "IA64GenAsmWriter.inc"
+
+
+std::string IA64AsmPrinter::getSectionForFunction(const Function &F) const {
+ // This means "Allocated instruXions in mem, initialized".
+ return "\n\t.section .text, \"ax\", \"progbits\"\n";
+}
+
+/// runOnMachineFunction - This uses the printMachineInstruction()
+/// method to print assembly for each instruction.
+///
+bool IA64AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ SetupMachineFunction(MF);
+ O << "\n\n";
+
+ // Print out constants referenced by the function
+ EmitConstantPool(MF.getConstantPool());
+
+ const Function *F = MF.getFunction();
+ SwitchToTextSection(getSectionForFunction(*F).c_str(), F);
+
+ // Print out labels for the function.
+ EmitAlignment(5);
+ O << "\t.global\t" << CurrentFnName << "\n";
+ O << "\t.type\t" << CurrentFnName << ", @function\n";
+ O << CurrentFnName << ":\n";
+
+ // Print out code for the function.
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ // Print a label for the basic block if there are any predecessors.
+ if (I->pred_begin() != I->pred_end()) {
+ printBasicBlockLabel(I, true);
+ O << '\n';
+ }
+ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+ II != E; ++II) {
+ // Print the assembly for the instruction.
+ O << "\t";
+ printMachineInstruction(II);
+ }
+ }
+
+ // We didn't modify anything.
+ return false;
+}
+
+void IA64AsmPrinter::printOp(const MachineOperand &MO,
+ bool isBRCALLinsn /* = false */) {
+ const MRegisterInfo &RI = *TM.getRegisterInfo();
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ O << RI.get(MO.getReg()).Name;
+ return;
+
+ case MachineOperand::MO_Immediate:
+ O << MO.getImmedValue();
+ return;
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMachineBasicBlock());
+ return;
+ case MachineOperand::MO_ConstantPoolIndex: {
+ O << "@gprel(" << TAI->getPrivateGlobalPrefix()
+ << "CPI" << getFunctionNumber() << "_"
+ << MO.getConstantPoolIndex() << ")";
+ return;
+ }
+
+ case MachineOperand::MO_GlobalAddress: {
+
+ // functions need @ltoff(@fptr(fn_name)) form
+ GlobalValue *GV = MO.getGlobal();
+ Function *F = dyn_cast<Function>(GV);
+
+ bool Needfptr=false; // if we're computing an address @ltoff(X), do
+ // we need to decorate it so it becomes
+ // @ltoff(@fptr(X)) ?
+ if (F && !isBRCALLinsn /*&& F->isDeclaration()*/)
+ Needfptr=true;
+
+ // if this is the target of a call instruction, we should define
+ // the function somewhere (GNU gas has no problem without this, but
+ // Intel ias rightly complains of an 'undefined symbol')
+
+ if (F /*&& isBRCALLinsn*/ && F->isDeclaration())
+ ExternalFunctionNames.insert(Mang->getValueName(MO.getGlobal()));
+ else
+ if (GV->isDeclaration()) // e.g. stuff like 'stdin'
+ ExternalObjectNames.insert(Mang->getValueName(MO.getGlobal()));
+
+ if (!isBRCALLinsn)
+ O << "@ltoff(";
+ if (Needfptr)
+ O << "@fptr(";
+ O << Mang->getValueName(MO.getGlobal());
+
+ if (Needfptr && !isBRCALLinsn)
+ O << "#))"; // close both fptr( and ltoff(
+ else {
+ if (Needfptr)
+ O << "#)"; // close only fptr(
+ if (!isBRCALLinsn)
+ O << "#)"; // close only ltoff(
+ }
+
+ int Offset = MO.getOffset();
+ if (Offset > 0)
+ O << " + " << Offset;
+ else if (Offset < 0)
+ O << " - " << -Offset;
+ return;
+ }
+ case MachineOperand::MO_ExternalSymbol:
+ O << MO.getSymbolName();
+ ExternalFunctionNames.insert(MO.getSymbolName());
+ return;
+ default:
+ O << "<AsmPrinter: unknown operand type: " << MO.getType() << " >"; return;
+ }
+}
+
+/// printMachineInstruction -- Print out a single IA64 LLVM instruction
+/// MI to the current output stream.
+///
+void IA64AsmPrinter::printMachineInstruction(const MachineInstr *MI) {
+ ++EmittedInsts;
+
+ // Call the autogenerated instruction printer routines.
+ printInstruction(MI);
+}
+
+bool IA64AsmPrinter::doInitialization(Module &M) {
+ AsmPrinter::doInitialization(M);
+
+ O << "\n.ident \"LLVM-ia64\"\n\n"
+ << "\t.psr lsb\n" // should be "msb" on HP-UX, for starters
+ << "\t.radix C\n"
+ << "\t.psr abi64\n"; // we only support 64 bits for now
+ return false;
+}
+
+bool IA64AsmPrinter::doFinalization(Module &M) {
+ const TargetData *TD = TM.getTargetData();
+
+ // Print out module-level global variables here.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ if (I->hasInitializer()) { // External global require no code
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(I))
+ continue;
+
+ O << "\n\n";
+ std::string name = Mang->getValueName(I);
+ Constant *C = I->getInitializer();
+ unsigned Size = TD->getTypeSize(C->getType());
+ unsigned Align = TD->getPreferredTypeAlignmentShift(C->getType());
+
+ if (C->isNullValue() &&
+ (I->hasLinkOnceLinkage() || I->hasInternalLinkage() ||
+ I->hasWeakLinkage() /* FIXME: Verify correct */)) {
+ SwitchToDataSection(".data", I);
+ if (I->hasInternalLinkage()) {
+ O << "\t.lcomm " << name << "#," << TD->getTypeSize(C->getType())
+ << "," << (1 << Align);
+ O << "\n";
+ } else {
+ O << "\t.common " << name << "#," << TD->getTypeSize(C->getType())
+ << "," << (1 << Align);
+ O << "\n";
+ }
+ } else {
+ switch (I->getLinkage()) {
+ case GlobalValue::LinkOnceLinkage:
+ case GlobalValue::WeakLinkage: // FIXME: Verify correct for weak.
+ // Nonnull linkonce -> weak
+ O << "\t.weak " << name << "\n";
+ O << "\t.section\t.llvm.linkonce.d." << name
+ << ", \"aw\", \"progbits\"\n";
+ SwitchToDataSection("", I);
+ break;
+ case GlobalValue::AppendingLinkage:
+ // FIXME: appending linkage variables should go into a section of
+ // their name or something. For now, just emit them as external.
+ case GlobalValue::ExternalLinkage:
+ // If external or appending, declare as a global symbol
+ O << "\t.global " << name << "\n";
+ // FALL THROUGH
+ case GlobalValue::InternalLinkage:
+ SwitchToDataSection(C->isNullValue() ? ".bss" : ".data", I);
+ break;
+ case GlobalValue::GhostLinkage:
+ cerr << "GhostLinkage cannot appear in IA64AsmPrinter!\n";
+ abort();
+ case GlobalValue::DLLImportLinkage:
+ cerr << "DLLImport linkage is not supported by this target!\n";
+ abort();
+ case GlobalValue::DLLExportLinkage:
+ cerr << "DLLExport linkage is not supported by this target!\n";
+ abort();
+ default:
+ assert(0 && "Unknown linkage type!");
+ }
+
+ EmitAlignment(Align);
+ O << "\t.type " << name << ",@object\n";
+ O << "\t.size " << name << "," << Size << "\n";
+ O << name << ":\t\t\t\t// " << *C << "\n";
+ EmitGlobalConstant(C);
+ }
+ }
+
+ // we print out ".global X \n .type X, @function" for each external function
+ O << "\n\n// br.call targets referenced (and not defined) above: \n";
+ for (std::set<std::string>::iterator i = ExternalFunctionNames.begin(),
+ e = ExternalFunctionNames.end(); i!=e; ++i) {
+ O << "\t.global " << *i << "\n\t.type " << *i << ", @function\n";
+ }
+ O << "\n\n";
+
+ // we print out ".global X \n .type X, @object" for each external object
+ O << "\n\n// (external) symbols referenced (and not defined) above: \n";
+ for (std::set<std::string>::iterator i = ExternalObjectNames.begin(),
+ e = ExternalObjectNames.end(); i!=e; ++i) {
+ O << "\t.global " << *i << "\n\t.type " << *i << ", @object\n";
+ }
+ O << "\n\n";
+
+ AsmPrinter::doFinalization(M);
+ return false; // success
+}
+
+/// createIA64CodePrinterPass - Returns a pass that prints the IA64
+/// assembly code for a MachineFunction to the given output stream, using
+/// the given target machine description.
+///
+FunctionPass *llvm::createIA64CodePrinterPass(std::ostream &o,
+ IA64TargetMachine &tm) {
+ return new IA64AsmPrinter(o, tm, tm.getTargetAsmInfo());
+}
+
+
diff --git a/lib/Target/IA64/IA64Bundling.cpp b/lib/Target/IA64/IA64Bundling.cpp
new file mode 100644
index 0000000..6c9fa29
--- /dev/null
+++ b/lib/Target/IA64/IA64Bundling.cpp
@@ -0,0 +1,118 @@
+//===-- IA64Bundling.cpp - IA-64 instruction bundling pass. ------------ --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Duraid Madina and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Add stops where required to prevent read-after-write and write-after-write
+// dependencies, for both registers and memory addresses. There are exceptions:
+//
+// - Compare instructions (cmp*, tbit, tnat, fcmp, frcpa) are OK with
+// WAW dependencies so long as they all target p0, or are of parallel
+// type (.and*/.or*)
+//
+// FIXME: bundling, for now, is left to the assembler.
+// FIXME: this might be an appropriate place to translate between different
+// instructions that do the same thing, if this helps bundling.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ia64-codegen"
+#include "IA64.h"
+#include "IA64InstrInfo.h"
+#include "IA64TargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(StopBitsAdded, "Number of stop bits added");
+
+namespace {
+ struct IA64BundlingPass : public MachineFunctionPass {
+ static char ID;
+ /// Target machine description which we query for reg. names, data
+ /// layout, etc.
+ ///
+ IA64TargetMachine &TM;
+
+ IA64BundlingPass(IA64TargetMachine &tm)
+ : MachineFunctionPass((intptr_t)&ID), TM(tm) { }
+
+ virtual const char *getPassName() const {
+ return "IA64 (Itanium) Bundling Pass";
+ }
+
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+ bool runOnMachineFunction(MachineFunction &F) {
+ bool Changed = false;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ Changed |= runOnMachineBasicBlock(*FI);
+ return Changed;
+ }
+
+ // XXX: ugly global, but pending writes can cross basic blocks. Note that
+ // taken branches end instruction groups. So we only need to worry about
+ // 'fallthrough' code
+ std::set<unsigned> PendingRegWrites;
+ };
+ char IA64BundlingPass::ID = 0;
+} // end of anonymous namespace
+
+/// createIA64BundlingPass - Returns a pass that adds STOP (;;) instructions
+/// and arranges the result into bundles.
+///
+FunctionPass *llvm::createIA64BundlingPass(IA64TargetMachine &tm) {
+ return new IA64BundlingPass(tm);
+}
+
+/// runOnMachineBasicBlock - add stops and bundle this MBB.
+///
+bool IA64BundlingPass::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
+ MachineInstr *CurrentInsn = I++;
+ std::set<unsigned> CurrentReads, CurrentWrites, OrigWrites;
+
+ for(unsigned i=0; i < CurrentInsn->getNumOperands(); i++) {
+ MachineOperand &MO=CurrentInsn->getOperand(i);
+ if(MO.isRegister()) {
+ if(MO.isUse()) { // TODO: exclude p0
+ CurrentReads.insert(MO.getReg());
+ }
+ if(MO.isDef()) { // TODO: exclude p0
+ CurrentWrites.insert(MO.getReg());
+ OrigWrites.insert(MO.getReg()); // FIXME: use a nondestructive
+ // set_intersect instead?
+ }
+ }
+ }
+
+ // CurrentReads/CurrentWrites contain info for the current instruction.
+ // Does it read or write any registers that are pending a write?
+ // (i.e. not separated by a stop)
+ set_intersect(CurrentReads, PendingRegWrites);
+ set_intersect(CurrentWrites, PendingRegWrites);
+
+ if(! (CurrentReads.empty() && CurrentWrites.empty()) ) {
+ // there is a conflict, insert a stop and reset PendingRegWrites
+ CurrentInsn = BuildMI(MBB, CurrentInsn,
+ TM.getInstrInfo()->get(IA64::STOP), 0);
+ PendingRegWrites=OrigWrites; // carry over current writes to next insn
+ Changed=true; StopBitsAdded++; // update stats
+ } else { // otherwise, track additional pending writes
+ set_union(PendingRegWrites, OrigWrites);
+ }
+ } // onto the next insn in the MBB
+
+ return Changed;
+}
+
diff --git a/lib/Target/IA64/IA64ISelDAGToDAG.cpp b/lib/Target/IA64/IA64ISelDAGToDAG.cpp
new file mode 100644
index 0000000..53b704e
--- /dev/null
+++ b/lib/Target/IA64/IA64ISelDAGToDAG.cpp
@@ -0,0 +1,587 @@
+//===---- IA64ISelDAGToDAG.cpp - IA64 pattern matching inst selector ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Duraid Madina and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pattern matching instruction selector for IA64,
+// converting a legalized dag to an IA64 dag.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ia64-codegen"
+#include "IA64.h"
+#include "IA64TargetMachine.h"
+#include "IA64ISelLowering.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include <queue>
+#include <set>
+using namespace llvm;
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ /// IA64DAGToDAGISel - IA64 specific code to select IA64 machine
+ /// instructions for SelectionDAG operations.
+ ///
+ class IA64DAGToDAGISel : public SelectionDAGISel {
+ IA64TargetLowering IA64Lowering;
+ unsigned GlobalBaseReg;
+ public:
+ IA64DAGToDAGISel(IA64TargetMachine &TM)
+ : SelectionDAGISel(IA64Lowering), IA64Lowering(*TM.getTargetLowering()) {}
+
+ virtual bool runOnFunction(Function &Fn) {
+ // Make sure we re-emit a set of the global base reg if necessary
+ GlobalBaseReg = 0;
+ return SelectionDAGISel::runOnFunction(Fn);
+ }
+
+ /// getI64Imm - Return a target constant with the specified value, of type
+ /// i64.
+ inline SDOperand getI64Imm(uint64_t Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i64);
+ }
+
+ /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
+ /// base register. Return the virtual register that holds this value.
+ // SDOperand getGlobalBaseReg(); TODO: hmm
+
+ // Select - Convert the specified operand from a target-independent to a
+ // target-specific node if it hasn't already been changed.
+ SDNode *Select(SDOperand N);
+
+ SDNode *SelectIntImmediateExpr(SDOperand LHS, SDOperand RHS,
+ unsigned OCHi, unsigned OCLo,
+ bool IsArithmetic = false,
+ bool Negate = false);
+ SDNode *SelectBitfieldInsert(SDNode *N);
+
+ /// SelectCC - Select a comparison of the specified values with the
+ /// specified condition code, returning the CR# of the expression.
+ SDOperand SelectCC(SDOperand LHS, SDOperand RHS, ISD::CondCode CC);
+
+ /// SelectAddr - Given the specified address, return the two operands for a
+ /// load/store instruction, and return true if it should be an indexed [r+r]
+ /// operation.
+ bool SelectAddr(SDOperand Addr, SDOperand &Op1, SDOperand &Op2);
+
+ /// InstructionSelectBasicBlock - This callback is invoked by
+ /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+ virtual void InstructionSelectBasicBlock(SelectionDAG &DAG);
+
+ virtual const char *getPassName() const {
+ return "IA64 (Itanium) DAG->DAG Instruction Selector";
+ }
+
+// Include the pieces autogenerated from the target description.
+#include "IA64GenDAGISel.inc"
+
+private:
+ SDNode *SelectDIV(SDOperand Op);
+ };
+}
+
+/// InstructionSelectBasicBlock - This callback is invoked by
+/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+void IA64DAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) {
+ DEBUG(BB->dump());
+
+ // Select target instructions for the DAG.
+ DAG.setRoot(SelectRoot(DAG.getRoot()));
+ DAG.RemoveDeadNodes();
+
+ // Emit machine code to BB.
+ ScheduleAndEmitDAG(DAG);
+}
+
+SDNode *IA64DAGToDAGISel::SelectDIV(SDOperand Op) {
+ SDNode *N = Op.Val;
+ SDOperand Chain = N->getOperand(0);
+ SDOperand Tmp1 = N->getOperand(0);
+ SDOperand Tmp2 = N->getOperand(1);
+ AddToISelQueue(Chain);
+
+ AddToISelQueue(Tmp1);
+ AddToISelQueue(Tmp2);
+
+ bool isFP=false;
+
+ if(MVT::isFloatingPoint(Tmp1.getValueType()))
+ isFP=true;
+
+ bool isModulus=false; // is it a division or a modulus?
+ bool isSigned=false;
+
+ switch(N->getOpcode()) {
+ case ISD::FDIV:
+ case ISD::SDIV: isModulus=false; isSigned=true; break;
+ case ISD::UDIV: isModulus=false; isSigned=false; break;
+ case ISD::FREM:
+ case ISD::SREM: isModulus=true; isSigned=true; break;
+ case ISD::UREM: isModulus=true; isSigned=false; break;
+ }
+
+ // TODO: check for integer divides by powers of 2 (or other simple patterns?)
+
+ SDOperand TmpPR, TmpPR2;
+ SDOperand TmpF1, TmpF2, TmpF3, TmpF4, TmpF5, TmpF6, TmpF7, TmpF8;
+ SDOperand TmpF9, TmpF10,TmpF11,TmpF12,TmpF13,TmpF14,TmpF15;
+ SDNode *Result;
+
+ // we'll need copies of F0 and F1
+ SDOperand F0 = CurDAG->getRegister(IA64::F0, MVT::f64);
+ SDOperand F1 = CurDAG->getRegister(IA64::F1, MVT::f64);
+
+ // OK, emit some code:
+
+ if(!isFP) {
+ // first, load the inputs into FP regs.
+ TmpF1 =
+ SDOperand(CurDAG->getTargetNode(IA64::SETFSIG, MVT::f64, Tmp1), 0);
+ Chain = TmpF1.getValue(1);
+ TmpF2 =
+ SDOperand(CurDAG->getTargetNode(IA64::SETFSIG, MVT::f64, Tmp2), 0);
+ Chain = TmpF2.getValue(1);
+
+ // next, convert the inputs to FP
+ if(isSigned) {
+ TmpF3 =
+ SDOperand(CurDAG->getTargetNode(IA64::FCVTXF, MVT::f64, TmpF1), 0);
+ Chain = TmpF3.getValue(1);
+ TmpF4 =
+ SDOperand(CurDAG->getTargetNode(IA64::FCVTXF, MVT::f64, TmpF2), 0);
+ Chain = TmpF4.getValue(1);
+ } else { // is unsigned
+ TmpF3 =
+ SDOperand(CurDAG->getTargetNode(IA64::FCVTXUFS1, MVT::f64, TmpF1), 0);
+ Chain = TmpF3.getValue(1);
+ TmpF4 =
+ SDOperand(CurDAG->getTargetNode(IA64::FCVTXUFS1, MVT::f64, TmpF2), 0);
+ Chain = TmpF4.getValue(1);
+ }
+
+ } else { // this is an FP divide/remainder, so we 'leak' some temp
+ // regs and assign TmpF3=Tmp1, TmpF4=Tmp2
+ TmpF3=Tmp1;
+ TmpF4=Tmp2;
+ }
+
+ // we start by computing an approximate reciprocal (good to 9 bits?)
+ // note, this instruction writes _both_ TmpF5 (answer) and TmpPR (predicate)
+ if(isFP)
+ TmpF5 = SDOperand(CurDAG->getTargetNode(IA64::FRCPAS0, MVT::f64, MVT::i1,
+ TmpF3, TmpF4), 0);
+ else
+ TmpF5 = SDOperand(CurDAG->getTargetNode(IA64::FRCPAS1, MVT::f64, MVT::i1,
+ TmpF3, TmpF4), 0);
+
+ TmpPR = TmpF5.getValue(1);
+ Chain = TmpF5.getValue(2);
+
+ SDOperand minusB;
+ if(isModulus) { // for remainders, it'll be handy to have
+ // copies of -input_b
+ minusB = SDOperand(CurDAG->getTargetNode(IA64::SUB, MVT::i64,
+ CurDAG->getRegister(IA64::r0, MVT::i64), Tmp2), 0);
+ Chain = minusB.getValue(1);
+ }
+
+ SDOperand TmpE0, TmpY1, TmpE1, TmpY2;
+
+ SDOperand OpsE0[] = { TmpF4, TmpF5, F1, TmpPR };
+ TmpE0 = SDOperand(CurDAG->getTargetNode(IA64::CFNMAS1, MVT::f64,
+ OpsE0, 4), 0);
+ Chain = TmpE0.getValue(1);
+ SDOperand OpsY1[] = { TmpF5, TmpE0, TmpF5, TmpPR };
+ TmpY1 = SDOperand(CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64,
+ OpsY1, 4), 0);
+ Chain = TmpY1.getValue(1);
+ SDOperand OpsE1[] = { TmpE0, TmpE0, F0, TmpPR };
+ TmpE1 = SDOperand(CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64,
+ OpsE1, 4), 0);
+ Chain = TmpE1.getValue(1);
+ SDOperand OpsY2[] = { TmpY1, TmpE1, TmpY1, TmpPR };
+ TmpY2 = SDOperand(CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64,
+ OpsY2, 4), 0);
+ Chain = TmpY2.getValue(1);
+
+ if(isFP) { // if this is an FP divide, we finish up here and exit early
+ if(isModulus)
+ assert(0 && "Sorry, try another FORTRAN compiler.");
+
+ SDOperand TmpE2, TmpY3, TmpQ0, TmpR0;
+
+ SDOperand OpsE2[] = { TmpE1, TmpE1, F0, TmpPR };
+ TmpE2 = SDOperand(CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64,
+ OpsE2, 4), 0);
+ Chain = TmpE2.getValue(1);
+ SDOperand OpsY3[] = { TmpY2, TmpE2, TmpY2, TmpPR };
+ TmpY3 = SDOperand(CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64,
+ OpsY3, 4), 0);
+ Chain = TmpY3.getValue(1);
+ SDOperand OpsQ0[] = { Tmp1, TmpY3, F0, TmpPR };
+ TmpQ0 =
+ SDOperand(CurDAG->getTargetNode(IA64::CFMADS1, MVT::f64, // double prec!
+ OpsQ0, 4), 0);
+ Chain = TmpQ0.getValue(1);
+ SDOperand OpsR0[] = { Tmp2, TmpQ0, Tmp1, TmpPR };
+ TmpR0 =
+ SDOperand(CurDAG->getTargetNode(IA64::CFNMADS1, MVT::f64, // double prec!
+ OpsR0, 4), 0);
+ Chain = TmpR0.getValue(1);
+
+// we want Result to have the same target register as the frcpa, so
+// we two-address hack it. See the comment "for this to work..." on
+// page 48 of Intel application note #245415
+ SDOperand Ops[] = { TmpF5, TmpY3, TmpR0, TmpQ0, TmpPR };
+ Result = CurDAG->getTargetNode(IA64::TCFMADS0, MVT::f64, // d.p. s0 rndg!
+ Ops, 5);
+ Chain = SDOperand(Result, 1);
+ return Result; // XXX: early exit!
+ } else { // this is *not* an FP divide, so there's a bit left to do:
+
+ SDOperand TmpQ2, TmpR2, TmpQ3, TmpQ;
+
+ SDOperand OpsQ2[] = { TmpF3, TmpY2, F0, TmpPR };
+ TmpQ2 = SDOperand(CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64,
+ OpsQ2, 4), 0);
+ Chain = TmpQ2.getValue(1);
+ SDOperand OpsR2[] = { TmpF4, TmpQ2, TmpF3, TmpPR };
+ TmpR2 = SDOperand(CurDAG->getTargetNode(IA64::CFNMAS1, MVT::f64,
+ OpsR2, 4), 0);
+ Chain = TmpR2.getValue(1);
+
+// we want TmpQ3 to have the same target register as the frcpa? maybe we
+// should two-address hack it. See the comment "for this to work..." on page
+// 48 of Intel application note #245415
+ SDOperand OpsQ3[] = { TmpF5, TmpR2, TmpY2, TmpQ2, TmpPR };
+ TmpQ3 = SDOperand(CurDAG->getTargetNode(IA64::TCFMAS1, MVT::f64,
+ OpsQ3, 5), 0);
+ Chain = TmpQ3.getValue(1);
+
+ // STORY: without these two-address instructions (TCFMAS1 and TCFMADS0)
+ // the FPSWA won't be able to help out in the case of large/tiny
+ // arguments. Other fun bugs may also appear, e.g. 0/x = x, not 0.
+
+ if(isSigned)
+ TmpQ = SDOperand(CurDAG->getTargetNode(IA64::FCVTFXTRUNCS1,
+ MVT::f64, TmpQ3), 0);
+ else
+ TmpQ = SDOperand(CurDAG->getTargetNode(IA64::FCVTFXUTRUNCS1,
+ MVT::f64, TmpQ3), 0);
+
+ Chain = TmpQ.getValue(1);
+
+ if(isModulus) {
+ SDOperand FPminusB =
+ SDOperand(CurDAG->getTargetNode(IA64::SETFSIG, MVT::f64, minusB), 0);
+ Chain = FPminusB.getValue(1);
+ SDOperand Remainder =
+ SDOperand(CurDAG->getTargetNode(IA64::XMAL, MVT::f64,
+ TmpQ, FPminusB, TmpF1), 0);
+ Chain = Remainder.getValue(1);
+ Result = CurDAG->getTargetNode(IA64::GETFSIG, MVT::i64, Remainder);
+ Chain = SDOperand(Result, 1);
+ } else { // just an integer divide
+ Result = CurDAG->getTargetNode(IA64::GETFSIG, MVT::i64, TmpQ);
+ Chain = SDOperand(Result, 1);
+ }
+
+ return Result;
+ } // wasn't an FP divide
+}
+
+// Select - Convert the specified operand from a target-independent to a
+// target-specific node if it hasn't already been changed.
+SDNode *IA64DAGToDAGISel::Select(SDOperand Op) {
+ SDNode *N = Op.Val;
+ if (N->getOpcode() >= ISD::BUILTIN_OP_END &&
+ N->getOpcode() < IA64ISD::FIRST_NUMBER)
+ return NULL; // Already selected.
+
+ switch (N->getOpcode()) {
+ default: break;
+
+ case IA64ISD::BRCALL: { // XXX: this is also a hack!
+ SDOperand Chain = N->getOperand(0);
+ SDOperand InFlag; // Null incoming flag value.
+
+ AddToISelQueue(Chain);
+ if(N->getNumOperands()==3) { // we have an incoming chain, callee and flag
+ InFlag = N->getOperand(2);
+ AddToISelQueue(InFlag);
+ }
+
+ unsigned CallOpcode;
+ SDOperand CallOperand;
+
+ // if we can call directly, do so
+ if (GlobalAddressSDNode *GASD =
+ dyn_cast<GlobalAddressSDNode>(N->getOperand(1))) {
+ CallOpcode = IA64::BRCALL_IPREL_GA;
+ CallOperand = CurDAG->getTargetGlobalAddress(GASD->getGlobal(), MVT::i64);
+ } else if (isa<ExternalSymbolSDNode>(N->getOperand(1))) {
+ // FIXME: we currently NEED this case for correctness, to avoid
+ // "non-pic code with imm reloc.n against dynamic symbol" errors
+ CallOpcode = IA64::BRCALL_IPREL_ES;
+ CallOperand = N->getOperand(1);
+ } else {
+ // otherwise we need to load the function descriptor,
+ // load the branch target (function)'s entry point and GP,
+ // branch (call) then restore the GP
+ SDOperand FnDescriptor = N->getOperand(1);
+ AddToISelQueue(FnDescriptor);
+
+ // load the branch target's entry point [mem] and
+ // GP value [mem+8]
+ SDOperand targetEntryPoint=
+ SDOperand(CurDAG->getTargetNode(IA64::LD8, MVT::i64, FnDescriptor), 0);
+ Chain = targetEntryPoint.getValue(1);
+ SDOperand targetGPAddr=
+ SDOperand(CurDAG->getTargetNode(IA64::ADDS, MVT::i64,
+ FnDescriptor,
+ CurDAG->getConstant(8, MVT::i64)), 0);
+ Chain = targetGPAddr.getValue(1);
+ SDOperand targetGP =
+ SDOperand(CurDAG->getTargetNode(IA64::LD8, MVT::i64, targetGPAddr), 0);
+ Chain = targetGP.getValue(1);
+
+ Chain = CurDAG->getCopyToReg(Chain, IA64::r1, targetGP, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = CurDAG->getCopyToReg(Chain, IA64::B6, targetEntryPoint, InFlag); // FLAG these?
+ InFlag = Chain.getValue(1);
+
+ CallOperand = CurDAG->getRegister(IA64::B6, MVT::i64);
+ CallOpcode = IA64::BRCALL_INDIRECT;
+ }
+
+ // Finally, once everything is setup, emit the call itself
+ if(InFlag.Val)
+ Chain = SDOperand(CurDAG->getTargetNode(CallOpcode, MVT::Other, MVT::Flag,
+ CallOperand, InFlag), 0);
+ else // there might be no arguments
+ Chain = SDOperand(CurDAG->getTargetNode(CallOpcode, MVT::Other, MVT::Flag,
+ CallOperand, Chain), 0);
+ InFlag = Chain.getValue(1);
+
+ std::vector<SDOperand> CallResults;
+
+ CallResults.push_back(Chain);
+ CallResults.push_back(InFlag);
+
+ for (unsigned i = 0, e = CallResults.size(); i != e; ++i)
+ ReplaceUses(Op.getValue(i), CallResults[i]);
+ return NULL;
+ }
+
+ case IA64ISD::GETFD: {
+ SDOperand Input = N->getOperand(0);
+ AddToISelQueue(Input);
+ return CurDAG->getTargetNode(IA64::GETFD, MVT::i64, Input);
+ }
+
+ case ISD::FDIV:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM:
+ return SelectDIV(Op);
+
+ case ISD::TargetConstantFP: {
+ SDOperand Chain = CurDAG->getEntryNode(); // this is a constant, so..
+
+ SDOperand V;
+ if (cast<ConstantFPSDNode>(N)->isExactlyValue(+0.0)) {
+ V = CurDAG->getCopyFromReg(Chain, IA64::F0, MVT::f64);
+ } else if (cast<ConstantFPSDNode>(N)->isExactlyValue(+1.0)) {
+ V = CurDAG->getCopyFromReg(Chain, IA64::F1, MVT::f64);
+ } else
+ assert(0 && "Unexpected FP constant!");
+
+ ReplaceUses(SDOperand(N, 0), V);
+ return 0;
+ }
+
+ case ISD::FrameIndex: { // TODO: reduce creepyness
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ if (N->hasOneUse())
+ return CurDAG->SelectNodeTo(N, IA64::MOV, MVT::i64,
+ CurDAG->getTargetFrameIndex(FI, MVT::i64));
+ else
+ return CurDAG->getTargetNode(IA64::MOV, MVT::i64,
+ CurDAG->getTargetFrameIndex(FI, MVT::i64));
+ }
+
+ case ISD::ConstantPool: { // TODO: nuke the constant pool
+ // (ia64 doesn't need one)
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N);
+ Constant *C = CP->getConstVal();
+ SDOperand CPI = CurDAG->getTargetConstantPool(C, MVT::i64,
+ CP->getAlignment());
+ return CurDAG->getTargetNode(IA64::ADDL_GA, MVT::i64, // ?
+ CurDAG->getRegister(IA64::r1, MVT::i64), CPI);
+ }
+
+ case ISD::GlobalAddress: {
+ GlobalValue *GV = cast<GlobalAddressSDNode>(N)->getGlobal();
+ SDOperand GA = CurDAG->getTargetGlobalAddress(GV, MVT::i64);
+ SDOperand Tmp =
+ SDOperand(CurDAG->getTargetNode(IA64::ADDL_GA, MVT::i64,
+ CurDAG->getRegister(IA64::r1,
+ MVT::i64), GA), 0);
+ return CurDAG->getTargetNode(IA64::LD8, MVT::i64, Tmp);
+ }
+
+/* XXX
+ case ISD::ExternalSymbol: {
+ SDOperand EA = CurDAG->getTargetExternalSymbol(
+ cast<ExternalSymbolSDNode>(N)->getSymbol(),
+ MVT::i64);
+ SDOperand Tmp = CurDAG->getTargetNode(IA64::ADDL_EA, MVT::i64,
+ CurDAG->getRegister(IA64::r1,
+ MVT::i64),
+ EA);
+ return CurDAG->getTargetNode(IA64::LD8, MVT::i64, Tmp);
+ }
+*/
+
+ case ISD::LOAD: { // FIXME: load -1, not 1, for bools?
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ SDOperand Chain = LD->getChain();
+ SDOperand Address = LD->getBasePtr();
+ AddToISelQueue(Chain);
+ AddToISelQueue(Address);
+
+ MVT::ValueType TypeBeingLoaded = LD->getLoadedVT();
+ unsigned Opc;
+ switch (TypeBeingLoaded) {
+ default:
+#ifndef NDEBUG
+ N->dump(CurDAG);
+#endif
+ assert(0 && "Cannot load this type!");
+ case MVT::i1: { // this is a bool
+ Opc = IA64::LD1; // first we load a byte, then compare for != 0
+ if(N->getValueType(0) == MVT::i1) { // XXX: early exit!
+ return CurDAG->SelectNodeTo(N, IA64::CMPNE, MVT::i1, MVT::Other,
+ SDOperand(CurDAG->getTargetNode(Opc, MVT::i64, Address), 0),
+ CurDAG->getRegister(IA64::r0, MVT::i64),
+ Chain);
+ }
+ /* otherwise, we want to load a bool into something bigger: LD1
+ will do that for us, so we just fall through */
+ }
+ case MVT::i8: Opc = IA64::LD1; break;
+ case MVT::i16: Opc = IA64::LD2; break;
+ case MVT::i32: Opc = IA64::LD4; break;
+ case MVT::i64: Opc = IA64::LD8; break;
+
+ case MVT::f32: Opc = IA64::LDF4; break;
+ case MVT::f64: Opc = IA64::LDF8; break;
+ }
+
+ // TODO: comment this
+ return CurDAG->SelectNodeTo(N, Opc, N->getValueType(0), MVT::Other,
+ Address, Chain);
+ }
+
+ case ISD::STORE: {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDOperand Address = ST->getBasePtr();
+ SDOperand Chain = ST->getChain();
+ AddToISelQueue(Address);
+ AddToISelQueue(Chain);
+
+ unsigned Opc;
+ if (ISD::isNON_TRUNCStore(N)) {
+ switch (N->getOperand(1).getValueType()) {
+ default: assert(0 && "unknown type in store");
+ case MVT::i1: { // this is a bool
+ Opc = IA64::ST1; // we store either 0 or 1 as a byte
+ // first load zero!
+ SDOperand Initial = CurDAG->getCopyFromReg(Chain, IA64::r0, MVT::i64);
+ Chain = Initial.getValue(1);
+ // then load 1 into the same reg iff the predicate to store is 1
+ SDOperand Tmp = ST->getValue();
+ AddToISelQueue(Tmp);
+ Tmp =
+ SDOperand(CurDAG->getTargetNode(IA64::TPCADDS, MVT::i64, Initial,
+ CurDAG->getTargetConstant(1, MVT::i64),
+ Tmp), 0);
+ return CurDAG->SelectNodeTo(N, Opc, MVT::Other, Address, Tmp, Chain);
+ }
+ case MVT::i64: Opc = IA64::ST8; break;
+ case MVT::f64: Opc = IA64::STF8; break;
+ }
+ } else { // Truncating store
+ switch(ST->getStoredVT()) {
+ default: assert(0 && "unknown type in truncstore");
+ case MVT::i8: Opc = IA64::ST1; break;
+ case MVT::i16: Opc = IA64::ST2; break;
+ case MVT::i32: Opc = IA64::ST4; break;
+ case MVT::f32: Opc = IA64::STF4; break;
+ }
+ }
+
+ SDOperand N1 = N->getOperand(1);
+ SDOperand N2 = N->getOperand(2);
+ AddToISelQueue(N1);
+ AddToISelQueue(N2);
+ return CurDAG->SelectNodeTo(N, Opc, MVT::Other, N2, N1, Chain);
+ }
+
+ case ISD::BRCOND: {
+ SDOperand Chain = N->getOperand(0);
+ SDOperand CC = N->getOperand(1);
+ AddToISelQueue(Chain);
+ AddToISelQueue(CC);
+ MachineBasicBlock *Dest =
+ cast<BasicBlockSDNode>(N->getOperand(2))->getBasicBlock();
+ //FIXME - we do NOT need long branches all the time
+ return CurDAG->SelectNodeTo(N, IA64::BRLCOND_NOTCALL, MVT::Other, CC,
+ CurDAG->getBasicBlock(Dest), Chain);
+ }
+
+ case ISD::CALLSEQ_START:
+ case ISD::CALLSEQ_END: {
+ int64_t Amt = cast<ConstantSDNode>(N->getOperand(1))->getValue();
+ unsigned Opc = N->getOpcode() == ISD::CALLSEQ_START ?
+ IA64::ADJUSTCALLSTACKDOWN : IA64::ADJUSTCALLSTACKUP;
+ SDOperand N0 = N->getOperand(0);
+ AddToISelQueue(N0);
+ return CurDAG->SelectNodeTo(N, Opc, MVT::Other, getI64Imm(Amt), N0);
+ }
+
+ case ISD::BR:
+ // FIXME: we don't need long branches all the time!
+ SDOperand N0 = N->getOperand(0);
+ AddToISelQueue(N0);
+ return CurDAG->SelectNodeTo(N, IA64::BRL_NOTCALL, MVT::Other,
+ N->getOperand(1), N0);
+ }
+
+ return SelectCode(Op);
+}
+
+
+/// createIA64DAGToDAGInstructionSelector - This pass converts a legalized DAG
+/// into an IA64-specific DAG, ready for instruction scheduling.
+///
+FunctionPass
+*llvm::createIA64DAGToDAGInstructionSelector(IA64TargetMachine &TM) {
+ return new IA64DAGToDAGISel(TM);
+}
+
diff --git a/lib/Target/IA64/IA64ISelLowering.cpp b/lib/Target/IA64/IA64ISelLowering.cpp
new file mode 100644
index 0000000..0237a9a
--- /dev/null
+++ b/lib/Target/IA64/IA64ISelLowering.cpp
@@ -0,0 +1,602 @@
+//===-- IA64ISelLowering.cpp - IA64 DAG Lowering Implementation -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Duraid Madina and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IA64ISelLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "IA64ISelLowering.h"
+#include "IA64MachineFunctionInfo.h"
+#include "IA64TargetMachine.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+using namespace llvm;
+
+IA64TargetLowering::IA64TargetLowering(TargetMachine &TM)
+ : TargetLowering(TM) {
+
+ // register class for general registers
+ addRegisterClass(MVT::i64, IA64::GRRegisterClass);
+
+ // register class for FP registers
+ addRegisterClass(MVT::f64, IA64::FPRegisterClass);
+
+ // register class for predicate registers
+ addRegisterClass(MVT::i1, IA64::PRRegisterClass);
+
+ setLoadXAction(ISD::EXTLOAD , MVT::i1 , Promote);
+
+ setLoadXAction(ISD::ZEXTLOAD , MVT::i1 , Expand);
+
+ setLoadXAction(ISD::SEXTLOAD , MVT::i1 , Expand);
+ setLoadXAction(ISD::SEXTLOAD , MVT::i8 , Expand);
+ setLoadXAction(ISD::SEXTLOAD , MVT::i16 , Expand);
+ setLoadXAction(ISD::SEXTLOAD , MVT::i32 , Expand);
+
+ setOperationAction(ISD::BRIND , MVT::Other, Expand);
+ setOperationAction(ISD::BR_JT , MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC , MVT::Other, Expand);
+ setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
+
+ // ia64 uses SELECT not SELECT_CC
+ setOperationAction(ISD::SELECT_CC , MVT::Other, Expand);
+
+ // We need to handle ISD::RET for void functions ourselves,
+ // so we get a chance to restore ar.pfs before adding a
+ // br.ret insn
+ setOperationAction(ISD::RET, MVT::Other, Custom);
+
+ setSetCCResultType(MVT::i1);
+ setShiftAmountType(MVT::i64);
+
+ setOperationAction(ISD::FREM , MVT::f32 , Expand);
+ setOperationAction(ISD::FREM , MVT::f64 , Expand);
+
+ setOperationAction(ISD::UREM , MVT::f32 , Expand);
+ setOperationAction(ISD::UREM , MVT::f64 , Expand);
+
+ setOperationAction(ISD::MEMMOVE , MVT::Other, Expand);
+ setOperationAction(ISD::MEMSET , MVT::Other, Expand);
+ setOperationAction(ISD::MEMCPY , MVT::Other, Expand);
+
+ setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
+ setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
+
+ // We don't support sin/cos/sqrt
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f32, Expand);
+
+ // FIXME: IA64 supports fcopysign natively!
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+
+ // We don't have line number support yet.
+ setOperationAction(ISD::LOCATION, MVT::Other, Expand);
+ setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
+ setOperationAction(ISD::LABEL, MVT::Other, Expand);
+
+ //IA64 has these, but they are not implemented
+ setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
+ setOperationAction(ISD::CTLZ , MVT::i64 , Expand);
+ setOperationAction(ISD::ROTL , MVT::i64 , Expand);
+ setOperationAction(ISD::ROTR , MVT::i64 , Expand);
+ setOperationAction(ISD::BSWAP, MVT::i64 , Expand); // mux @rev
+
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex
+ setOperationAction(ISD::VAARG , MVT::Other, Custom);
+ setOperationAction(ISD::VASTART , MVT::Other, Custom);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
+
+ // Thread Local Storage
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+
+ setStackPointerRegisterToSaveRestore(IA64::r12);
+
+ setJumpBufSize(704); // on ia64-linux, jmp_bufs are 704 bytes..
+ setJumpBufAlignment(16); // ...and must be 16-byte aligned
+
+ computeRegisterProperties();
+
+ setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
+ addLegalFPImmediate(+0.0);
+ addLegalFPImmediate(+1.0);
+}
+
+const char *IA64TargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case IA64ISD::GETFD: return "IA64ISD::GETFD";
+ case IA64ISD::BRCALL: return "IA64ISD::BRCALL";
+ case IA64ISD::RET_FLAG: return "IA64ISD::RET_FLAG";
+ }
+}
+
+
+std::vector<SDOperand>
+IA64TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) {
+ std::vector<SDOperand> ArgValues;
+ //
+ // add beautiful description of IA64 stack frame format
+ // here (from intel 24535803.pdf most likely)
+ //
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ GP = MF.getSSARegMap()->createVirtualRegister(getRegClassFor(MVT::i64));
+ SP = MF.getSSARegMap()->createVirtualRegister(getRegClassFor(MVT::i64));
+ RP = MF.getSSARegMap()->createVirtualRegister(getRegClassFor(MVT::i64));
+
+ MachineBasicBlock& BB = MF.front();
+
+ unsigned args_int[] = {IA64::r32, IA64::r33, IA64::r34, IA64::r35,
+ IA64::r36, IA64::r37, IA64::r38, IA64::r39};
+
+ unsigned args_FP[] = {IA64::F8, IA64::F9, IA64::F10, IA64::F11,
+ IA64::F12,IA64::F13,IA64::F14, IA64::F15};
+
+ unsigned argVreg[8];
+ unsigned argPreg[8];
+ unsigned argOpc[8];
+
+ unsigned used_FPArgs = 0; // how many FP args have been used so far?
+
+ unsigned ArgOffset = 0;
+ int count = 0;
+
+ for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I)
+ {
+ SDOperand newroot, argt;
+ if(count < 8) { // need to fix this logic? maybe.
+
+ switch (getValueType(I->getType())) {
+ default:
+ assert(0 && "ERROR in LowerArgs: can't lower this type of arg.\n");
+ case MVT::f32:
+ // fixme? (well, will need to for weird FP structy stuff,
+ // see intel ABI docs)
+ case MVT::f64:
+//XXX BuildMI(&BB, IA64::IDEF, 0, args_FP[used_FPArgs]);
+ MF.addLiveIn(args_FP[used_FPArgs]); // mark this reg as liveIn
+ // floating point args go into f8..f15 as-needed, the increment
+ argVreg[count] = // is below..:
+ MF.getSSARegMap()->createVirtualRegister(getRegClassFor(MVT::f64));
+ // FP args go into f8..f15 as needed: (hence the ++)
+ argPreg[count] = args_FP[used_FPArgs++];
+ argOpc[count] = IA64::FMOV;
+ argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), argVreg[count],
+ MVT::f64);
+ if (I->getType() == Type::FloatTy)
+ argt = DAG.getNode(ISD::FP_ROUND, MVT::f32, argt);
+ break;
+ case MVT::i1: // NOTE: as far as C abi stuff goes,
+ // bools are just boring old ints
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+//XXX BuildMI(&BB, IA64::IDEF, 0, args_int[count]);
+ MF.addLiveIn(args_int[count]); // mark this register as liveIn
+ argVreg[count] =
+ MF.getSSARegMap()->createVirtualRegister(getRegClassFor(MVT::i64));
+ argPreg[count] = args_int[count];
+ argOpc[count] = IA64::MOV;
+ argt = newroot =
+ DAG.getCopyFromReg(DAG.getRoot(), argVreg[count], MVT::i64);
+ if ( getValueType(I->getType()) != MVT::i64)
+ argt = DAG.getNode(ISD::TRUNCATE, getValueType(I->getType()),
+ newroot);
+ break;
+ }
+ } else { // more than 8 args go into the frame
+ // Create the frame index object for this incoming parameter...
+ ArgOffset = 16 + 8 * (count - 8);
+ int FI = MFI->CreateFixedObject(8, ArgOffset);
+
+ // Create the SelectionDAG nodes corresponding to a load
+ //from this parameter
+ SDOperand FIN = DAG.getFrameIndex(FI, MVT::i64);
+ argt = newroot = DAG.getLoad(getValueType(I->getType()),
+ DAG.getEntryNode(), FIN, NULL, 0);
+ }
+ ++count;
+ DAG.setRoot(newroot.getValue(1));
+ ArgValues.push_back(argt);
+ }
+
+
+ // Create a vreg to hold the output of (what will become)
+ // the "alloc" instruction
+ VirtGPR = MF.getSSARegMap()->createVirtualRegister(getRegClassFor(MVT::i64));
+ BuildMI(&BB, TII->get(IA64::PSEUDO_ALLOC), VirtGPR);
+ // we create a PSEUDO_ALLOC (pseudo)instruction for now
+/*
+ BuildMI(&BB, IA64::IDEF, 0, IA64::r1);
+
+ // hmm:
+ BuildMI(&BB, IA64::IDEF, 0, IA64::r12);
+ BuildMI(&BB, IA64::IDEF, 0, IA64::rp);
+ // ..hmm.
+
+ BuildMI(&BB, IA64::MOV, 1, GP).addReg(IA64::r1);
+
+ // hmm:
+ BuildMI(&BB, IA64::MOV, 1, SP).addReg(IA64::r12);
+ BuildMI(&BB, IA64::MOV, 1, RP).addReg(IA64::rp);
+ // ..hmm.
+*/
+
+ unsigned tempOffset=0;
+
+ // if this is a varargs function, we simply lower llvm.va_start by
+ // pointing to the first entry
+ if(F.isVarArg()) {
+ tempOffset=0;
+ VarArgsFrameIndex = MFI->CreateFixedObject(8, tempOffset);
+ }
+
+ // here we actually do the moving of args, and store them to the stack
+ // too if this is a varargs function:
+ for (int i = 0; i < count && i < 8; ++i) {
+ BuildMI(&BB, TII->get(argOpc[i]), argVreg[i]).addReg(argPreg[i]);
+ if(F.isVarArg()) {
+ // if this is a varargs function, we copy the input registers to the stack
+ int FI = MFI->CreateFixedObject(8, tempOffset);
+ tempOffset+=8; //XXX: is it safe to use r22 like this?
+ BuildMI(&BB, TII->get(IA64::MOV), IA64::r22).addFrameIndex(FI);
+ // FIXME: we should use st8.spill here, one day
+ BuildMI(&BB, TII->get(IA64::ST8), IA64::r22).addReg(argPreg[i]);
+ }
+ }
+
+ // Finally, inform the code generator which regs we return values in.
+ // (see the ISD::RET: case in the instruction selector)
+ switch (getValueType(F.getReturnType())) {
+ default: assert(0 && "i have no idea where to return this type!");
+ case MVT::isVoid: break;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ MF.addLiveOut(IA64::r8);
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ MF.addLiveOut(IA64::F8);
+ break;
+ }
+
+ return ArgValues;
+}
+
+std::pair<SDOperand, SDOperand>
+IA64TargetLowering::LowerCallTo(SDOperand Chain,
+ const Type *RetTy, bool RetTyIsSigned,
+ bool isVarArg, unsigned CallingConv,
+ bool isTailCall, SDOperand Callee,
+ ArgListTy &Args, SelectionDAG &DAG) {
+
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ unsigned NumBytes = 16;
+ unsigned outRegsUsed = 0;
+
+ if (Args.size() > 8) {
+ NumBytes += (Args.size() - 8) * 8;
+ outRegsUsed = 8;
+ } else {
+ outRegsUsed = Args.size();
+ }
+
+ // FIXME? this WILL fail if we ever try to pass around an arg that
+ // consumes more than a single output slot (a 'real' double, int128
+ // some sort of aggregate etc.), as we'll underestimate how many 'outX'
+ // registers we use. Hopefully, the assembler will notice.
+ MF.getInfo<IA64FunctionInfo>()->outRegsUsed=
+ std::max(outRegsUsed, MF.getInfo<IA64FunctionInfo>()->outRegsUsed);
+
+ // keep stack frame 16-byte aligned
+ // assert(NumBytes==((NumBytes+15) & ~15) &&
+ // "stack frame not 16-byte aligned!");
+ NumBytes = (NumBytes+15) & ~15;
+
+ Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
+
+ SDOperand StackPtr;
+ std::vector<SDOperand> Stores;
+ std::vector<SDOperand> Converts;
+ std::vector<SDOperand> RegValuesToPass;
+ unsigned ArgOffset = 16;
+
+ for (unsigned i = 0, e = Args.size(); i != e; ++i)
+ {
+ SDOperand Val = Args[i].Node;
+ MVT::ValueType ObjectVT = Val.getValueType();
+ SDOperand ValToStore(0, 0), ValToConvert(0, 0);
+ unsigned ObjSize=8;
+ switch (ObjectVT) {
+ default: assert(0 && "unexpected argument type!");
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32: {
+ //promote to 64-bits, sign/zero extending based on type
+ //of the argument
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+ if (Args[i].isSExt)
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (Args[i].isZExt)
+ ExtendKind = ISD::ZERO_EXTEND;
+ Val = DAG.getNode(ExtendKind, MVT::i64, Val);
+ // XXX: fall through
+ }
+ case MVT::i64:
+ //ObjSize = 8;
+ if(RegValuesToPass.size() >= 8) {
+ ValToStore = Val;
+ } else {
+ RegValuesToPass.push_back(Val);
+ }
+ break;
+ case MVT::f32:
+ //promote to 64-bits
+ Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);
+ // XXX: fall through
+ case MVT::f64:
+ if(RegValuesToPass.size() >= 8) {
+ ValToStore = Val;
+ } else {
+ RegValuesToPass.push_back(Val);
+ if(1 /* TODO: if(calling external or varadic function)*/ ) {
+ ValToConvert = Val; // additionally pass this FP value as an int
+ }
+ }
+ break;
+ }
+
+ if(ValToStore.Val) {
+ if(!StackPtr.Val) {
+ StackPtr = DAG.getRegister(IA64::r12, MVT::i64);
+ }
+ SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
+ PtrOff = DAG.getNode(ISD::ADD, MVT::i64, StackPtr, PtrOff);
+ Stores.push_back(DAG.getStore(Chain, ValToStore, PtrOff, NULL, 0));
+ ArgOffset += ObjSize;
+ }
+
+ if(ValToConvert.Val) {
+ Converts.push_back(DAG.getNode(IA64ISD::GETFD, MVT::i64, ValToConvert));
+ }
+ }
+
+ // Emit all stores, make sure they occur before any copies into physregs.
+ if (!Stores.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, &Stores[0],Stores.size());
+
+ static const unsigned IntArgRegs[] = {
+ IA64::out0, IA64::out1, IA64::out2, IA64::out3,
+ IA64::out4, IA64::out5, IA64::out6, IA64::out7
+ };
+
+ static const unsigned FPArgRegs[] = {
+ IA64::F8, IA64::F9, IA64::F10, IA64::F11,
+ IA64::F12, IA64::F13, IA64::F14, IA64::F15
+ };
+
+ SDOperand InFlag;
+
+ // save the current GP, SP and RP : FIXME: do we need to do all 3 always?
+ SDOperand GPBeforeCall = DAG.getCopyFromReg(Chain, IA64::r1, MVT::i64, InFlag);
+ Chain = GPBeforeCall.getValue(1);
+ InFlag = Chain.getValue(2);
+ SDOperand SPBeforeCall = DAG.getCopyFromReg(Chain, IA64::r12, MVT::i64, InFlag);
+ Chain = SPBeforeCall.getValue(1);
+ InFlag = Chain.getValue(2);
+ SDOperand RPBeforeCall = DAG.getCopyFromReg(Chain, IA64::rp, MVT::i64, InFlag);
+ Chain = RPBeforeCall.getValue(1);
+ InFlag = Chain.getValue(2);
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing integer args into regs out[0-7]
+ // mapped 1:1 and the FP args into regs F8-F15 "lazily"
+ // TODO: for performance, we should only copy FP args into int regs when we
+ // know this is required (i.e. for varardic or external (unknown) functions)
+
+ // first to the FP->(integer representation) conversions, these are
+ // flagged for now, but shouldn't have to be (TODO)
+ unsigned seenConverts = 0;
+ for (unsigned i = 0, e = RegValuesToPass.size(); i != e; ++i) {
+ if(MVT::isFloatingPoint(RegValuesToPass[i].getValueType())) {
+ Chain = DAG.getCopyToReg(Chain, IntArgRegs[i], Converts[seenConverts++],
+ InFlag);
+ InFlag = Chain.getValue(1);
+ }
+ }
+
+ // next copy args into the usual places, these are flagged
+ unsigned usedFPArgs = 0;
+ for (unsigned i = 0, e = RegValuesToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain,
+ MVT::isInteger(RegValuesToPass[i].getValueType()) ?
+ IntArgRegs[i] : FPArgRegs[usedFPArgs++], RegValuesToPass[i], InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress node (quite common, every direct call is)
+ // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+/*
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i64);
+ }
+*/
+
+ std::vector<MVT::ValueType> NodeTys;
+ std::vector<SDOperand> CallOperands;
+ NodeTys.push_back(MVT::Other); // Returns a chain
+ NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
+ CallOperands.push_back(Chain);
+ CallOperands.push_back(Callee);
+
+ // emit the call itself
+ if (InFlag.Val)
+ CallOperands.push_back(InFlag);
+ else
+ assert(0 && "this should never happen!\n");
+
+ // to make way for a hack:
+ Chain = DAG.getNode(IA64ISD::BRCALL, NodeTys,
+ &CallOperands[0], CallOperands.size());
+ InFlag = Chain.getValue(1);
+
+ // restore the GP, SP and RP after the call
+ Chain = DAG.getCopyToReg(Chain, IA64::r1, GPBeforeCall, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, IA64::r12, SPBeforeCall, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, IA64::rp, RPBeforeCall, InFlag);
+ InFlag = Chain.getValue(1);
+
+ std::vector<MVT::ValueType> RetVals;
+ RetVals.push_back(MVT::Other);
+ RetVals.push_back(MVT::Flag);
+
+ MVT::ValueType RetTyVT = getValueType(RetTy);
+ SDOperand RetVal;
+ if (RetTyVT != MVT::isVoid) {
+ switch (RetTyVT) {
+ default: assert(0 && "Unknown value type to return!");
+ case MVT::i1: { // bools are just like other integers (returned in r8)
+ // we *could* fall through to the truncate below, but this saves a
+ // few redundant predicate ops
+ SDOperand boolInR8 = DAG.getCopyFromReg(Chain, IA64::r8, MVT::i64,InFlag);
+ InFlag = boolInR8.getValue(2);
+ Chain = boolInR8.getValue(1);
+ SDOperand zeroReg = DAG.getCopyFromReg(Chain, IA64::r0, MVT::i64, InFlag);
+ InFlag = zeroReg.getValue(2);
+ Chain = zeroReg.getValue(1);
+
+ RetVal = DAG.getSetCC(MVT::i1, boolInR8, zeroReg, ISD::SETNE);
+ break;
+ }
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ RetVal = DAG.getCopyFromReg(Chain, IA64::r8, MVT::i64, InFlag);
+ Chain = RetVal.getValue(1);
+
+ // keep track of whether it is sign or zero extended (todo: bools?)
+/* XXX
+ RetVal = DAG.getNode(RetTy->isSigned() ? ISD::AssertSext :ISD::AssertZext,
+ MVT::i64, RetVal, DAG.getValueType(RetTyVT));
+*/
+ RetVal = DAG.getNode(ISD::TRUNCATE, RetTyVT, RetVal);
+ break;
+ case MVT::i64:
+ RetVal = DAG.getCopyFromReg(Chain, IA64::r8, MVT::i64, InFlag);
+ Chain = RetVal.getValue(1);
+ InFlag = RetVal.getValue(2); // XXX dead
+ break;
+ case MVT::f32:
+ RetVal = DAG.getCopyFromReg(Chain, IA64::F8, MVT::f64, InFlag);
+ Chain = RetVal.getValue(1);
+ RetVal = DAG.getNode(ISD::TRUNCATE, MVT::f32, RetVal);
+ break;
+ case MVT::f64:
+ RetVal = DAG.getCopyFromReg(Chain, IA64::F8, MVT::f64, InFlag);
+ Chain = RetVal.getValue(1);
+ InFlag = RetVal.getValue(2); // XXX dead
+ break;
+ }
+ }
+
+ Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
+ DAG.getConstant(NumBytes, getPointerTy()));
+
+ return std::make_pair(RetVal, Chain);
+}
+
+SDOperand IA64TargetLowering::
+LowerOperation(SDOperand Op, SelectionDAG &DAG) {
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Should not custom lower this!");
+ case ISD::GlobalTLSAddress:
+ assert(0 && "TLS not implemented for IA64.");
+ case ISD::RET: {
+ SDOperand AR_PFSVal, Copy;
+
+ switch(Op.getNumOperands()) {
+ default:
+ assert(0 && "Do not know how to return this many arguments!");
+ abort();
+ case 1:
+ AR_PFSVal = DAG.getCopyFromReg(Op.getOperand(0), VirtGPR, MVT::i64);
+ AR_PFSVal = DAG.getCopyToReg(AR_PFSVal.getValue(1), IA64::AR_PFS,
+ AR_PFSVal);
+ return DAG.getNode(IA64ISD::RET_FLAG, MVT::Other, AR_PFSVal);
+ case 3: {
+ // Copy the result into the output register & restore ar.pfs
+ MVT::ValueType ArgVT = Op.getOperand(1).getValueType();
+ unsigned ArgReg = MVT::isInteger(ArgVT) ? IA64::r8 : IA64::F8;
+
+ AR_PFSVal = DAG.getCopyFromReg(Op.getOperand(0), VirtGPR, MVT::i64);
+ Copy = DAG.getCopyToReg(AR_PFSVal.getValue(1), ArgReg, Op.getOperand(1),
+ SDOperand());
+ AR_PFSVal = DAG.getCopyToReg(Copy.getValue(0), IA64::AR_PFS, AR_PFSVal,
+ Copy.getValue(1));
+ return DAG.getNode(IA64ISD::RET_FLAG, MVT::Other,
+ AR_PFSVal, AR_PFSVal.getValue(1));
+ }
+ }
+ return SDOperand();
+ }
+ case ISD::VAARG: {
+ MVT::ValueType VT = getPointerTy();
+ SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2));
+ SDOperand VAList = DAG.getLoad(VT, Op.getOperand(0), Op.getOperand(1),
+ SV->getValue(), SV->getOffset());
+ // Increment the pointer, VAList, to the next vaarg
+ SDOperand VAIncr = DAG.getNode(ISD::ADD, VT, VAList,
+ DAG.getConstant(MVT::getSizeInBits(VT)/8,
+ VT));
+ // Store the incremented VAList to the legalized pointer
+ VAIncr = DAG.getStore(VAList.getValue(1), VAIncr,
+ Op.getOperand(1), SV->getValue(), SV->getOffset());
+ // Load the actual argument out of the pointer VAList
+ return DAG.getLoad(Op.getValueType(), VAIncr, VAList, NULL, 0);
+ }
+ case ISD::VASTART: {
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i64);
+ SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2));
+ return DAG.getStore(Op.getOperand(0), FR,
+ Op.getOperand(1), SV->getValue(), SV->getOffset());
+ }
+ // Frame & Return address. Currently unimplemented
+ case ISD::RETURNADDR: break;
+ case ISD::FRAMEADDR: break;
+ }
+ return SDOperand();
+}
diff --git a/lib/Target/IA64/IA64ISelLowering.h b/lib/Target/IA64/IA64ISelLowering.h
new file mode 100644
index 0000000..6bc5534
--- /dev/null
+++ b/lib/Target/IA64/IA64ISelLowering.h
@@ -0,0 +1,74 @@
+//===-- IA64ISelLowering.h - IA64 DAG Lowering Interface --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Duraid Madina and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that IA64 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_IA64_IA64ISELLOWERING_H
+#define LLVM_TARGET_IA64_IA64ISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "IA64.h"
+
+namespace llvm {
+ namespace IA64ISD {
+ enum NodeType {
+ // Start the numbering where the builting ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END+IA64::INSTRUCTION_LIST_END,
+
+ /// GETFD - the getf.d instruction takes a floating point operand and
+ /// returns its 64-bit memory representation as an i64
+ GETFD,
+
+ // TODO: explain this hack
+ BRCALL,
+
+ // RET_FLAG - Return with a flag operand
+ RET_FLAG
+ };
+ }
+
+ class IA64TargetLowering : public TargetLowering {
+ int VarArgsFrameIndex; // FrameIndex for start of varargs area.
+ //int ReturnAddrIndex; // FrameIndex for return slot.
+ unsigned GP, SP, RP; // FIXME - clean this mess up
+
+ public:
+ IA64TargetLowering(TargetMachine &TM);
+
+ unsigned VirtGPR; // this is public so it can be accessed in the selector
+ // for ISD::RET. add an accessor instead? FIXME
+
+ const char *getTargetNodeName(unsigned Opcode) const;
+
+ /// LowerArguments - This hook must be implemented to indicate how we should
+ /// lower the arguments for the specified function, into the specified DAG.
+ virtual std::vector<SDOperand>
+ LowerArguments(Function &F, SelectionDAG &DAG);
+
+ /// LowerCallTo - This hook lowers an abstract call to a function into an
+ /// actual call.
+ virtual std::pair<SDOperand, SDOperand>
+ LowerCallTo(SDOperand Chain, const Type *RetTy, bool RetTyIsSigned,
+ bool isVarArg, unsigned CC, bool isTailCall,
+ SDOperand Callee, ArgListTy &Args, SelectionDAG &DAG);
+
+ /// LowerOperation - for custom lowering specific ops
+ /// (currently, only "ret void")
+ virtual SDOperand LowerOperation(SDOperand Op, SelectionDAG &DAG);
+
+// XXX virtual MachineBasicBlock *InsertAtEndOfBasicBlock(MachineInstr *MI,
+// XXX MachineBasicBlock *MBB);
+ };
+}
+
+#endif // LLVM_TARGET_IA64_IA64ISELLOWERING_H
diff --git a/lib/Target/IA64/IA64InstrBuilder.h b/lib/Target/IA64/IA64InstrBuilder.h
new file mode 100644
index 0000000..f9b5004
--- /dev/null
+++ b/lib/Target/IA64/IA64InstrBuilder.h
@@ -0,0 +1,52 @@
+//===-- IA64PCInstrBuilder.h - Aids for building IA64 insts -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Duraid Madina and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to simplify generating frame and constant pool
+// references.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef IA64_INSTRBUILDER_H
+#define IA64_INSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+namespace llvm {
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function. This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+inline const MachineInstrBuilder&
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0,
+ bool mem = true) {
+ if (mem)
+ return MIB.addImm(Offset).addFrameIndex(FI);
+ else
+ return MIB.addFrameIndex(FI).addImm(Offset);
+}
+
+/// addConstantPoolReference - This function is used to add a reference to the
+/// base of a constant value spilled to the per-function constant pool. The
+/// reference has base register ConstantPoolIndex offset which is retained until
+/// either machine code emission or assembly output. This allows an optional
+/// offset to be added as well.
+///
+inline const MachineInstrBuilder&
+addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI,
+ int Offset = 0) {
+ return MIB.addImm(Offset).addConstantPoolIndex(CPI);
+}
+
+} // End llvm namespace
+
+#endif
+
diff --git a/lib/Target/IA64/IA64InstrFormats.td b/lib/Target/IA64/IA64InstrFormats.td
new file mode 100644
index 0000000..ba6c574
--- /dev/null
+++ b/lib/Target/IA64/IA64InstrFormats.td
@@ -0,0 +1,79 @@
+//===- IA64InstrFormats.td - IA64 Instruction Formats --*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Duraid Madina and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// - Warning: the stuff in here isn't really being used, so is mostly
+// junk. It'll get fixed as the JIT gets built.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+class InstIA64<bits<4> op, dag OL, string asmstr> : Instruction {
+ // IA64 instruction baseline
+ field bits<41> Inst;
+ let Namespace = "IA64";
+ let OperandList = OL;
+ let AsmString = asmstr;
+
+ let Inst{40-37} = op;
+}
+
+//"Each Itanium instruction is categorized into one of six types."
+//We should have:
+// A, I, M, F, B, L+X
+
+class AForm<bits<4> opcode, bits<6> qpReg, dag OL, string asmstr> :
+ InstIA64<opcode, OL, asmstr> {
+
+ let Inst{5-0} = qpReg;
+}
+
+class AForm_DAG<bits<4> opcode, bits<6> qpReg, dag OL, string asmstr,
+ list<dag> pattern> :
+ InstIA64<opcode, OL, asmstr> {
+
+ let Pattern = pattern;
+ let Inst{5-0} = qpReg;
+}
+
+let isBranch = 1, isTerminator = 1 in
+class BForm<bits<4> opcode, bits<6> x6, bits<3> btype, dag OL, string asmstr> :
+ InstIA64<opcode, OL, asmstr> {
+
+ let Inst{32-27} = x6;
+ let Inst{8-6} = btype;
+}
+
+class MForm<bits<4> opcode, bits<6> x6, dag OL, string asmstr> :
+ InstIA64<opcode, OL, asmstr> {
+ bits<7> Ra;
+ bits<7> Rb;
+ bits<16> disp;
+
+ let Inst{35-30} = x6;
+// let Inst{20-16} = Rb;
+ let Inst{15-0} = disp;
+}
+
+class RawForm<bits<4> opcode, bits<26> rest, dag OL, string asmstr> :
+ InstIA64<opcode, OL, asmstr> {
+ let Inst{25-0} = rest;
+}
+
+// Pseudo instructions.
+class PseudoInstIA64<dag OL, string nm> : InstIA64<0, OL, nm> {
+}
+
+class PseudoInstIA64_DAG<dag OL, string nm, list<dag> pattern>
+ : InstIA64<0, OL, nm> {
+ let Pattern = pattern;
+}
+
diff --git a/lib/Target/IA64/IA64InstrInfo.cpp b/lib/Target/IA64/IA64InstrInfo.cpp
new file mode 100644
index 0000000..a66c9bc
--- /dev/null
+++ b/lib/Target/IA64/IA64InstrInfo.cpp
@@ -0,0 +1,58 @@
+//===- IA64InstrInfo.cpp - IA64 Instruction Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the IA64 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "IA64InstrInfo.h"
+#include "IA64.h"
+#include "IA64InstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "IA64GenInstrInfo.inc"
+using namespace llvm;
+
+IA64InstrInfo::IA64InstrInfo()
+ : TargetInstrInfo(IA64Insts, sizeof(IA64Insts)/sizeof(IA64Insts[0])),
+ RI(*this) {
+}
+
+
+bool IA64InstrInfo::isMoveInstr(const MachineInstr& MI,
+ unsigned& sourceReg,
+ unsigned& destReg) const {
+ MachineOpCode oc = MI.getOpcode();
+ if (oc == IA64::MOV || oc == IA64::FMOV) {
+ // TODO: this doesn't detect predicate moves
+ assert(MI.getNumOperands() >= 2 &&
+ /* MI.getOperand(0).isRegister() &&
+ MI.getOperand(1).isRegister() && */
+ "invalid register-register move instruction");
+ if( MI.getOperand(0).isRegister() &&
+ MI.getOperand(1).isRegister() ) {
+ // if both operands of the MOV/FMOV are registers, then
+ // yes, this is a move instruction
+ sourceReg = MI.getOperand(1).getReg();
+ destReg = MI.getOperand(0).getReg();
+ return true;
+ }
+ }
+ return false; // we don't consider e.g. %regN = MOV <FrameIndex #x> a
+ // move instruction
+}
+
+unsigned
+IA64InstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const std::vector<MachineOperand> &Cond)const {
+ // Can only insert uncond branches so far.
+ assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!");
+ BuildMI(&MBB, get(IA64::BRL_NOTCALL)).addMBB(TBB);
+ return 1;
+}
diff --git a/lib/Target/IA64/IA64InstrInfo.h b/lib/Target/IA64/IA64InstrInfo.h
new file mode 100644
index 0000000..3bb14e0
--- /dev/null
+++ b/lib/Target/IA64/IA64InstrInfo.h
@@ -0,0 +1,49 @@
+//===- IA64InstrInfo.h - IA64 Instruction Information ----------*- C++ -*- ===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Duraid Madina and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the IA64 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef IA64INSTRUCTIONINFO_H
+#define IA64INSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "IA64RegisterInfo.h"
+
+namespace llvm {
+
+class IA64InstrInfo : public TargetInstrInfo {
+ const IA64RegisterInfo RI;
+public:
+ IA64InstrInfo();
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const MRegisterInfo &getRegisterInfo() const { return RI; }
+
+ //
+ // Return true if the instruction is a register to register move and
+ // leave the source and dest operands in the passed parameters.
+ //
+ virtual bool isMoveInstr(const MachineInstr& MI,
+ unsigned& sourceReg,
+ unsigned& destReg) const;
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const std::vector<MachineOperand> &Cond) const;
+
+};
+
+} // End llvm namespace
+
+#endif
+
diff --git a/lib/Target/IA64/IA64InstrInfo.td b/lib/Target/IA64/IA64InstrInfo.td
new file mode 100644
index 0000000..57f5f66
--- /dev/null
+++ b/lib/Target/IA64/IA64InstrInfo.td
@@ -0,0 +1,744 @@
+//===- IA64InstrInfo.td - Describe the IA64 Instruction Set -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Duraid Madina and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the IA64 instruction set, defining the instructions, and
+// properties of the instructions which are needed for code generation, machine
+// code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+include "IA64InstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// IA-64 specific DAG Nodes.
+//
+
+def IA64getfd : SDNode<"IA64ISD::GETFD", SDTFPToIntOp, []>;
+
+def SDT_IA64RetFlag : SDTypeProfile<0, 0, []>;
+def retflag : SDNode<"IA64ISD::RET_FLAG", SDT_IA64RetFlag,
+ [SDNPHasChain, SDNPOptInFlag]>;
+
+//===---------
+// Instruction types
+
+class isA { bit A=1; } // I or M unit
+class isM { bit M=1; } // M unit
+class isI { bit I=1; } // I unit
+class isB { bit B=1; } // B unit
+class isF { bit F=1; } // F unit
+class isLX { bit LX=1; } // I/B
+
+//===---------
+
+def u2imm : Operand<i8>;
+def u6imm : Operand<i8>;
+def s8imm : Operand<i8> {
+ let PrintMethod = "printS8ImmOperand";
+}
+def s14imm : Operand<i64> {
+ let PrintMethod = "printS14ImmOperand";
+}
+def s22imm : Operand<i64> {
+ let PrintMethod = "printS22ImmOperand";
+}
+def u64imm : Operand<i64> {
+ let PrintMethod = "printU64ImmOperand";
+}
+def s64imm : Operand<i64> {
+ let PrintMethod = "printS64ImmOperand";
+}
+
+let PrintMethod = "printGlobalOperand" in
+ def globaladdress : Operand<i64>;
+
+// the asmprinter needs to know about calls
+let PrintMethod = "printCallOperand" in
+ def calltarget : Operand<i64>;
+
+/* new daggy action!!! */
+
+def is32ones : PatLeaf<(i64 imm), [{
+ // is32ones predicate - True if the immediate is 0x00000000FFFFFFFF
+ // Used to create ZXT4s appropriately
+ uint64_t v = (uint64_t)N->getValue();
+ return (v == 0x00000000FFFFFFFFLL);
+}]>;
+
+// isMIXable predicates - True if the immediate is
+// 0xFF00FF00FF00FF00, 0x00FF00FF00FF00FF
+// etc, through 0x00000000FFFFFFFF
+// Used to test for the suitability of mix*
+def isMIX1Lable: PatLeaf<(i64 imm), [{
+ return((uint64_t)N->getValue()==0xFF00FF00FF00FF00LL);
+}]>;
+def isMIX1Rable: PatLeaf<(i64 imm), [{
+ return((uint64_t)N->getValue()==0x00FF00FF00FF00FFLL);
+}]>;
+def isMIX2Lable: PatLeaf<(i64 imm), [{
+ return((uint64_t)N->getValue()==0xFFFF0000FFFF0000LL);
+}]>;
+def isMIX2Rable: PatLeaf<(i64 imm), [{
+ return((uint64_t)N->getValue()==0x0000FFFF0000FFFFLL);
+}]>;
+def isMIX4Lable: PatLeaf<(i64 imm), [{
+ return((uint64_t)N->getValue()==0xFFFFFFFF00000000LL);
+}]>;
+def isMIX4Rable: PatLeaf<(i64 imm), [{
+ return((uint64_t)N->getValue()==0x00000000FFFFFFFFLL);
+}]>;
+
+def isSHLADDimm: PatLeaf<(i64 imm), [{
+ // isSHLADDimm predicate - True if the immediate is exactly 1, 2, 3 or 4
+ // - 0 is *not* okay.
+ // Used to create shladd instructions appropriately
+ int64_t v = (int64_t)N->getValue();
+ return (v >= 1 && v <= 4);
+}]>;
+
+def immSExt14 : PatLeaf<(i64 imm), [{
+ // immSExt14 predicate - True if the immediate fits in a 14-bit sign extended
+ // field. Used by instructions like 'adds'.
+ int64_t v = (int64_t)N->getValue();
+ return (v <= 8191 && v >= -8192);
+}]>;
+
+// imm64 predicate - True if the immediate fits in a 64-bit
+// field - i.e., true. used to keep movl happy
+def imm64 : PatLeaf<(i64 imm)>;
+
+def ADD : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+ "add $dst = $src1, $src2",
+ [(set GR:$dst, (add GR:$src1, GR:$src2))]>, isA;
+
+def ADD1 : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+ "add $dst = $src1, $src2, 1",
+ [(set GR:$dst, (add (add GR:$src1, GR:$src2), 1))]>, isA;
+
+def ADDS : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, s14imm:$imm),
+ "adds $dst = $imm, $src1",
+ [(set GR:$dst, (add GR:$src1, immSExt14:$imm))]>, isA;
+
+def MOVL : AForm_DAG<0x03, 0x0b, (ops GR:$dst, s64imm:$imm),
+ "movl $dst = $imm",
+ [(set GR:$dst, imm64:$imm)]>, isLX;
+
+def ADDL_GA : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, globaladdress:$imm),
+ "addl $dst = $imm, $src1",
+ []>, isA;
+
+// hmm
+def ADDL_EA : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, calltarget:$imm),
+ "addl $dst = $imm, $src1",
+ []>, isA;
+
+def SUB : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+ "sub $dst = $src1, $src2",
+ [(set GR:$dst, (sub GR:$src1, GR:$src2))]>, isA;
+
+def SUB1 : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+ "sub $dst = $src1, $src2, 1",
+ [(set GR:$dst, (add (sub GR: $src1, GR:$src2), -1))]>, isA;
+
+let isTwoAddress = 1 in {
+def TPCADDIMM22 : AForm<0x03, 0x0b,
+ (ops GR:$dst, GR:$src1, s22imm:$imm, PR:$qp),
+ "($qp) add $dst = $imm, $dst">, isA;
+def TPCADDS : AForm_DAG<0x03, 0x0b,
+ (ops GR:$dst, GR:$src1, s14imm:$imm, PR:$qp),
+ "($qp) adds $dst = $imm, $dst",
+ []>, isA;
+def TPCMPIMM8NE : AForm<0x03, 0x0b,
+ (ops PR:$dst, PR:$src1, s22imm:$imm, GR:$src2, PR:$qp),
+ "($qp) cmp.ne $dst , p0 = $imm, $src2">, isA;
+}
+
+// zero extend a bool (predicate reg) into an integer reg
+def ZXTb : Pat<(zext PR:$src),
+ (TPCADDIMM22 (ADDS r0, 0), 1, PR:$src)>;
+def AXTb : Pat<(anyext PR:$src),
+ (TPCADDIMM22 (ADDS r0, 0), 1, PR:$src)>;
+
+// normal sign/zero-extends
+def SXT1 : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src), "sxt1 $dst = $src",
+ [(set GR:$dst, (sext_inreg GR:$src, i8))]>, isI;
+def ZXT1 : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src), "zxt1 $dst = $src",
+ [(set GR:$dst, (and GR:$src, 255))]>, isI;
+def SXT2 : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src), "sxt2 $dst = $src",
+ [(set GR:$dst, (sext_inreg GR:$src, i16))]>, isI;
+def ZXT2 : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src), "zxt2 $dst = $src",
+ [(set GR:$dst, (and GR:$src, 65535))]>, isI;
+def SXT4 : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src), "sxt4 $dst = $src",
+ [(set GR:$dst, (sext_inreg GR:$src, i32))]>, isI;
+def ZXT4 : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src), "zxt4 $dst = $src",
+ [(set GR:$dst, (and GR:$src, is32ones))]>, isI;
+
+// fixme: shrs vs shru?
+def MIX1L : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+ "mix1.l $dst = $src1, $src2",
+ [(set GR:$dst, (or (and GR:$src1, isMIX1Lable),
+ (and (srl GR:$src2, (i64 8)), isMIX1Lable)))]>, isI;
+
+def MIX2L : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+ "mix2.l $dst = $src1, $src2",
+ [(set GR:$dst, (or (and GR:$src1, isMIX2Lable),
+ (and (srl GR:$src2, (i64 16)), isMIX2Lable)))]>, isI;
+
+def MIX4L : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+ "mix4.l $dst = $src1, $src2",
+ [(set GR:$dst, (or (and GR:$src1, isMIX4Lable),
+ (and (srl GR:$src2, (i64 32)), isMIX4Lable)))]>, isI;
+
+def MIX1R : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+ "mix1.r $dst = $src1, $src2",
+ [(set GR:$dst, (or (and (shl GR:$src1, (i64 8)), isMIX1Rable),
+ (and GR:$src2, isMIX1Rable)))]>, isI;
+
+def MIX2R : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+ "mix2.r $dst = $src1, $src2",
+ [(set GR:$dst, (or (and (shl GR:$src1, (i64 16)), isMIX2Rable),
+ (and GR:$src2, isMIX2Rable)))]>, isI;
+
+def MIX4R : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+ "mix4.r $dst = $src1, $src2",
+ [(set GR:$dst, (or (and (shl GR:$src1, (i64 32)), isMIX4Rable),
+ (and GR:$src2, isMIX4Rable)))]>, isI;
+
+def GETFSIGD : AForm_DAG<0x03, 0x0b, (ops GR:$dst, FP:$src),
+ "getf.sig $dst = $src",
+ []>, isM;
+
+def SETFSIGD : AForm_DAG<0x03, 0x0b, (ops FP:$dst, GR:$src),
+ "setf.sig $dst = $src",
+ []>, isM;
+
+def XMALD : AForm_DAG<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3),
+ "xma.l $dst = $src1, $src2, $src3",
+ []>, isF;
+def XMAHD : AForm_DAG<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3),
+ "xma.h $dst = $src1, $src2, $src3",
+ []>, isF;
+def XMAHUD : AForm_DAG<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3),
+ "xma.hu $dst = $src1, $src2, $src3",
+ []>, isF;
+
+// pseudocode for integer multiplication
+def : Pat<(mul GR:$src1, GR:$src2),
+ (GETFSIGD (XMALD (SETFSIGD GR:$src1), (SETFSIGD GR:$src2), F0))>;
+def : Pat<(mulhs GR:$src1, GR:$src2),
+ (GETFSIGD (XMAHD (SETFSIGD GR:$src1), (SETFSIGD GR:$src2), F0))>;
+def : Pat<(mulhu GR:$src1, GR:$src2),
+ (GETFSIGD (XMAHUD (SETFSIGD GR:$src1), (SETFSIGD GR:$src2), F0))>;
+
+// TODO: addp4 (addp4 dst = src, r0 is a 32-bit add)
+// has imm form, too
+
+// def ADDS : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, s14imm:$imm),
+// "adds $dst = $imm, $src1">;
+
+def AND : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+ "and $dst = $src1, $src2",
+ [(set GR:$dst, (and GR:$src1, GR:$src2))]>, isA;
+def ANDCM : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+ "andcm $dst = $src1, $src2",
+ [(set GR:$dst, (and GR:$src1, (not GR:$src2)))]>, isA;
+// TODO: and/andcm/or/xor/add/sub/shift immediate forms
+def OR : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+ "or $dst = $src1, $src2",
+ [(set GR:$dst, (or GR:$src1, GR:$src2))]>, isA;
+
+def pOR : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2, PR:$qp),
+ "($qp) or $dst = $src1, $src2">, isA;
+
+// the following are all a bit unfortunate: we throw away the complement
+// of the compare!
+def CMPEQ : AForm_DAG<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2),
+ "cmp.eq $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (seteq GR:$src1, GR:$src2))]>, isA;
+def CMPGT : AForm_DAG<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2),
+ "cmp.gt $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setgt GR:$src1, GR:$src2))]>, isA;
+def CMPGE : AForm_DAG<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2),
+ "cmp.ge $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setge GR:$src1, GR:$src2))]>, isA;
+def CMPLT : AForm_DAG<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2),
+ "cmp.lt $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setlt GR:$src1, GR:$src2))]>, isA;
+def CMPLE : AForm_DAG<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2),
+ "cmp.le $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setle GR:$src1, GR:$src2))]>, isA;
+def CMPNE : AForm_DAG<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2),
+ "cmp.ne $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setne GR:$src1, GR:$src2))]>, isA;
+def CMPLTU: AForm_DAG<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2),
+ "cmp.ltu $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setult GR:$src1, GR:$src2))]>, isA;
+def CMPGTU: AForm_DAG<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2),
+ "cmp.gtu $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setugt GR:$src1, GR:$src2))]>, isA;
+def CMPLEU: AForm_DAG<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2),
+ "cmp.leu $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setule GR:$src1, GR:$src2))]>, isA;
+def CMPGEU: AForm_DAG<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2),
+ "cmp.geu $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setuge GR:$src1, GR:$src2))]>, isA;
+
+// and we do the whole thing again for FP compares!
+def FCMPEQ : AForm_DAG<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2),
+ "fcmp.eq $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (seteq FP:$src1, FP:$src2))]>, isF;
+def FCMPGT : AForm_DAG<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2),
+ "fcmp.gt $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setgt FP:$src1, FP:$src2))]>, isF;
+def FCMPGE : AForm_DAG<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2),
+ "fcmp.ge $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setge FP:$src1, FP:$src2))]>, isF;
+def FCMPLT : AForm_DAG<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2),
+ "fcmp.lt $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setlt FP:$src1, FP:$src2))]>, isF;
+def FCMPLE : AForm_DAG<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2),
+ "fcmp.le $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setle FP:$src1, FP:$src2))]>, isF;
+def FCMPNE : AForm_DAG<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2),
+ "fcmp.neq $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setne FP:$src1, FP:$src2))]>, isF;
+def FCMPLTU: AForm_DAG<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2),
+ "fcmp.lt $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setult FP:$src1, FP:$src2))]>, isF;
+def FCMPGTU: AForm_DAG<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2),
+ "fcmp.gt $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setugt FP:$src1, FP:$src2))]>, isF;
+def FCMPLEU: AForm_DAG<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2),
+ "fcmp.le $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setule FP:$src1, FP:$src2))]>, isF;
+def FCMPGEU: AForm_DAG<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2),
+ "fcmp.ge $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setuge FP:$src1, FP:$src2))]>, isF;
+
+def PCMPEQUNCR0R0 : AForm<0x03, 0x0b, (ops PR:$dst, PR:$qp),
+ "($qp) cmp.eq.unc $dst, p0 = r0, r0">, isA;
+
+def : Pat<(trunc GR:$src), // truncate i64 to i1
+ (CMPNE GR:$src, r0)>; // $src!=0? If so, PR:$dst=true
+
+let isTwoAddress=1 in {
+ def TPCMPEQR0R0 : AForm<0x03, 0x0b, (ops PR:$dst, PR:$bogus, PR:$qp),
+ "($qp) cmp.eq $dst, p0 = r0, r0">, isA;
+ def TPCMPNER0R0 : AForm<0x03, 0x0b, (ops PR:$dst, PR:$bogus, PR:$qp),
+ "($qp) cmp.ne $dst, p0 = r0, r0">, isA;
+}
+
+/* our pseudocode for OR on predicates is:
+pC = pA OR pB
+-------------
+(pA) cmp.eq.unc pC,p0 = r0,r0 // pC = pA
+ ;;
+(pB) cmp.eq pC,p0 = r0,r0 // if (pB) pC = 1 */
+
+def bOR : Pat<(or PR:$src1, PR:$src2),
+ (TPCMPEQR0R0 (PCMPEQUNCR0R0 PR:$src1), PR:$src2)>;
+
+/* our pseudocode for AND on predicates is:
+ *
+(pA) cmp.eq.unc pC,p0 = r0,r0 // pC = pA
+ cmp.eq pTemp,p0 = r0,r0 // pTemp = NOT pB
+ ;;
+(pB) cmp.ne pTemp,p0 = r0,r0
+ ;;
+(pTemp)cmp.ne pC,p0 = r0,r0 // if (NOT pB) pC = 0 */
+
+def bAND : Pat<(and PR:$src1, PR:$src2),
+ ( TPCMPNER0R0 (PCMPEQUNCR0R0 PR:$src1),
+ (TPCMPNER0R0 (CMPEQ r0, r0), PR:$src2) )>;
+
+/* one possible routine for XOR on predicates is:
+
+ // Compute px = py ^ pz
+ // using sum of products: px = (py & !pz) | (pz & !py)
+ // Uses 5 instructions in 3 cycles.
+ // cycle 1
+(pz) cmp.eq.unc px = r0, r0 // px = pz
+(py) cmp.eq.unc pt = r0, r0 // pt = py
+ ;;
+ // cycle 2
+(pt) cmp.ne.and px = r0, r0 // px = px & !pt (px = pz & !pt)
+(pz) cmp.ne.and pt = r0, r0 // pt = pt & !pz
+ ;;
+ } { .mmi
+ // cycle 3
+(pt) cmp.eq.or px = r0, r0 // px = px | pt
+
+*** Another, which we use here, requires one scratch GR. it is:
+
+ mov rt = 0 // initialize rt off critical path
+ ;;
+
+ // cycle 1
+(pz) cmp.eq.unc px = r0, r0 // px = pz
+(pz) mov rt = 1 // rt = pz
+ ;;
+ // cycle 2
+(py) cmp.ne px = 1, rt // if (py) px = !pz
+
+.. these routines kindly provided by Jim Hull
+*/
+
+def bXOR : Pat<(xor PR:$src1, PR:$src2),
+ (TPCMPIMM8NE (PCMPEQUNCR0R0 PR:$src2), 1,
+ (TPCADDS (ADDS r0, 0), 1, PR:$src2),
+ PR:$src1)>;
+
+def XOR : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+ "xor $dst = $src1, $src2",
+ [(set GR:$dst, (xor GR:$src1, GR:$src2))]>, isA;
+
+def SHLADD: AForm_DAG<0x03, 0x0b, (ops GR:$dst,GR:$src1,s64imm:$imm,GR:$src2),
+ "shladd $dst = $src1, $imm, $src2",
+ [(set GR:$dst, (add GR:$src2, (shl GR:$src1, isSHLADDimm:$imm)))]>, isA;
+
+def SHL : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+ "shl $dst = $src1, $src2",
+ [(set GR:$dst, (shl GR:$src1, GR:$src2))]>, isI;
+
+def SHRU : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+ "shr.u $dst = $src1, $src2",
+ [(set GR:$dst, (srl GR:$src1, GR:$src2))]>, isI;
+
+def SHRS : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2),
+ "shr $dst = $src1, $src2",
+ [(set GR:$dst, (sra GR:$src1, GR:$src2))]>, isI;
+
+def MOV : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src), "mov $dst = $src">, isA;
+def FMOV : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src),
+ "mov $dst = $src">, isF; // XXX: there _is_ no fmov
+def PMOV : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src, PR:$qp),
+ "($qp) mov $dst = $src">, isA;
+
+def SPILL_ALL_PREDICATES_TO_GR : AForm<0x03, 0x0b, (ops GR:$dst),
+ "mov $dst = pr">, isI;
+def FILL_ALL_PREDICATES_FROM_GR : AForm<0x03, 0x0b, (ops GR:$src),
+ "mov pr = $src">, isI;
+
+let isTwoAddress = 1 in {
+ def CMOV : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src2, GR:$src, PR:$qp),
+ "($qp) mov $dst = $src">, isA;
+}
+
+def PFMOV : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src, PR:$qp),
+ "($qp) mov $dst = $src">, isF;
+
+let isTwoAddress = 1 in {
+ def CFMOV : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src2, FP:$src, PR:$qp),
+ "($qp) mov $dst = $src">, isF;
+}
+
+def SELECTINT : Pat<(select PR:$which, GR:$src1, GR:$src2),
+ (CMOV (MOV GR:$src2), GR:$src1, PR:$which)>; // note order!
+def SELECTFP : Pat<(select PR:$which, FP:$src1, FP:$src2),
+ (CFMOV (FMOV FP:$src2), FP:$src1, PR:$which)>; // note order!
+// TODO: can do this faster, w/o using any integer regs (see pattern isel)
+def SELECTBOOL : Pat<(select PR:$which, PR:$src1, PR:$src2), // note order!
+ (CMPNE (CMOV
+ (MOV (TPCADDIMM22 (ADDS r0, 0), 1, PR:$src2)),
+ (TPCADDIMM22 (ADDS r0, 0), 1, PR:$src1), PR:$which), r0)>;
+
+// load constants of various sizes // FIXME: prettyprint -ve constants
+def : Pat<(i64 immSExt14:$imm), (ADDS r0, immSExt14:$imm)>;
+def : Pat<(i1 -1), (CMPEQ r0, r0)>; // TODO: this should just be a ref to p0
+def : Pat<(i1 0), (CMPNE r0, r0)>; // TODO: any instruction actually *using*
+ // this predicate should be killed!
+
+// TODO: support postincrement (reg, imm9) loads+stores - this needs more
+// tablegen support
+
+def IDEF : PseudoInstIA64<(ops variable_ops), "// IDEF">;
+
+def IDEF_GR_D : PseudoInstIA64_DAG<(ops GR:$reg), "// $reg = IDEF",
+ [(set GR:$reg, (undef))]>;
+def IDEF_FP_D : PseudoInstIA64_DAG<(ops FP:$reg), "// $reg = IDEF",
+ [(set FP:$reg, (undef))]>;
+def IDEF_PR_D : PseudoInstIA64_DAG<(ops PR:$reg), "// $reg = IDEF",
+ [(set PR:$reg, (undef))]>;
+
+def IUSE : PseudoInstIA64<(ops variable_ops), "// IUSE">;
+def ADJUSTCALLSTACKUP : PseudoInstIA64<(ops variable_ops),
+ "// ADJUSTCALLSTACKUP">;
+def ADJUSTCALLSTACKDOWN : PseudoInstIA64<(ops variable_ops),
+ "// ADJUSTCALLSTACKDOWN">;
+def PSEUDO_ALLOC : PseudoInstIA64<(ops GR:$foo), "// PSEUDO_ALLOC">;
+
+def ALLOC : AForm<0x03, 0x0b,
+ (ops GR:$dst, i8imm:$inputs, i8imm:$locals, i8imm:$outputs, i8imm:$rotating),
+ "alloc $dst = ar.pfs,$inputs,$locals,$outputs,$rotating">, isM;
+
+let isTwoAddress = 1 in {
+ def TCMPNE : AForm<0x03, 0x0b,
+ (ops PR:$dst, PR:$src2, GR:$src3, GR:$src4),
+ "cmp.ne $dst, p0 = $src3, $src4">, isA;
+
+ def TPCMPEQOR : AForm<0x03, 0x0b,
+ (ops PR:$dst, PR:$src2, GR:$src3, GR:$src4, PR:$qp),
+ "($qp) cmp.eq.or $dst, p0 = $src3, $src4">, isA;
+
+ def TPCMPNE : AForm<0x03, 0x0b,
+ (ops PR:$dst, PR:$src2, GR:$src3, GR:$src4, PR:$qp),
+ "($qp) cmp.ne $dst, p0 = $src3, $src4">, isA;
+
+ def TPCMPEQ : AForm<0x03, 0x0b,
+ (ops PR:$dst, PR:$src2, GR:$src3, GR:$src4, PR:$qp),
+ "($qp) cmp.eq $dst, p0 = $src3, $src4">, isA;
+}
+
+def MOVSIMM14 : AForm<0x03, 0x0b, (ops GR:$dst, s14imm:$imm),
+ "mov $dst = $imm">, isA;
+def MOVSIMM22 : AForm<0x03, 0x0b, (ops GR:$dst, s22imm:$imm),
+ "mov $dst = $imm">, isA;
+def MOVLIMM64 : AForm<0x03, 0x0b, (ops GR:$dst, s64imm:$imm),
+ "movl $dst = $imm">, isLX;
+
+def SHLI : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, u6imm:$imm),
+ "shl $dst = $src1, $imm">, isI;
+def SHRUI : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, u6imm:$imm),
+ "shr.u $dst = $src1, $imm">, isI;
+def SHRSI : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, u6imm:$imm),
+ "shr $dst = $src1, $imm">, isI;
+
+def EXTRU : AForm<0x03, 0x0b,
+ (ops GR:$dst, GR:$src1, u6imm:$imm1, u6imm:$imm2),
+ "extr.u $dst = $src1, $imm1, $imm2">, isI;
+
+def DEPZ : AForm<0x03, 0x0b,
+ (ops GR:$dst, GR:$src1, u6imm:$imm1, u6imm:$imm2),
+ "dep.z $dst = $src1, $imm1, $imm2">, isI;
+
+def PCMPEQOR : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2, PR:$qp),
+ "($qp) cmp.eq.or $dst, p0 = $src1, $src2">, isA;
+def PCMPEQUNC : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2, PR:$qp),
+ "($qp) cmp.eq.unc $dst, p0 = $src1, $src2">, isA;
+def PCMPNE : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2, PR:$qp),
+ "($qp) cmp.ne $dst, p0 = $src1, $src2">, isA;
+
+// two destinations!
+def BCMPEQ : AForm<0x03, 0x0b, (ops PR:$dst1, PR:$dst2, GR:$src1, GR:$src2),
+ "cmp.eq $dst1, dst2 = $src1, $src2">, isA;
+
+def ADDIMM14 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, s14imm:$imm),
+ "adds $dst = $imm, $src1">, isA;
+
+def ADDIMM22 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, s22imm:$imm),
+ "add $dst = $imm, $src1">, isA;
+def CADDIMM22 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, s22imm:$imm, PR:$qp),
+ "($qp) add $dst = $imm, $src1">, isA;
+
+def SUBIMM8 : AForm<0x03, 0x0b, (ops GR:$dst, s8imm:$imm, GR:$src2),
+ "sub $dst = $imm, $src2">, isA;
+
+let isStore = 1, noResults = 1 in {
+ def ST1 : AForm<0x03, 0x0b, (ops GR:$dstPtr, GR:$value),
+ "st1 [$dstPtr] = $value">, isM;
+ def ST2 : AForm<0x03, 0x0b, (ops GR:$dstPtr, GR:$value),
+ "st2 [$dstPtr] = $value">, isM;
+ def ST4 : AForm<0x03, 0x0b, (ops GR:$dstPtr, GR:$value),
+ "st4 [$dstPtr] = $value">, isM;
+ def ST8 : AForm<0x03, 0x0b, (ops GR:$dstPtr, GR:$value),
+ "st8 [$dstPtr] = $value">, isM;
+ def STF4 : AForm<0x03, 0x0b, (ops GR:$dstPtr, FP:$value),
+ "stfs [$dstPtr] = $value">, isM;
+ def STF8 : AForm<0x03, 0x0b, (ops GR:$dstPtr, FP:$value),
+ "stfd [$dstPtr] = $value">, isM;
+ def STF_SPILL : AForm<0x03, 0x0b, (ops GR:$dstPtr, FP:$value),
+ "stf.spill [$dstPtr] = $value">, isM;
+}
+
+let isLoad = 1 in {
+ def LD1 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$srcPtr),
+ "ld1 $dst = [$srcPtr]">, isM;
+ def LD2 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$srcPtr),
+ "ld2 $dst = [$srcPtr]">, isM;
+ def LD4 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$srcPtr),
+ "ld4 $dst = [$srcPtr]">, isM;
+ def LD8 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$srcPtr),
+ "ld8 $dst = [$srcPtr]">, isM;
+ def LDF4 : AForm<0x03, 0x0b, (ops FP:$dst, GR:$srcPtr),
+ "ldfs $dst = [$srcPtr]">, isM;
+ def LDF8 : AForm<0x03, 0x0b, (ops FP:$dst, GR:$srcPtr),
+ "ldfd $dst = [$srcPtr]">, isM;
+ def LDF_FILL : AForm<0x03, 0x0b, (ops FP:$dst, GR:$srcPtr),
+ "ldf.fill $dst = [$srcPtr]">, isM;
+}
+
+def POPCNT : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src),
+ "popcnt $dst = $src",
+ [(set GR:$dst, (ctpop GR:$src))]>, isI;
+
+// some FP stuff: // TODO: single-precision stuff?
+def FADD : AForm_DAG<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2),
+ "fadd $dst = $src1, $src2",
+ [(set FP:$dst, (fadd FP:$src1, FP:$src2))]>, isF;
+def FADDS: AForm<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2),
+ "fadd.s $dst = $src1, $src2">, isF;
+def FSUB : AForm_DAG<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2),
+ "fsub $dst = $src1, $src2",
+ [(set FP:$dst, (fsub FP:$src1, FP:$src2))]>, isF;
+def FMPY : AForm_DAG<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2),
+ "fmpy $dst = $src1, $src2",
+ [(set FP:$dst, (fmul FP:$src1, FP:$src2))]>, isF;
+def FMA : AForm_DAG<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3),
+ "fma $dst = $src1, $src2, $src3",
+ [(set FP:$dst, (fadd (fmul FP:$src1, FP:$src2), FP:$src3))]>, isF;
+def FMS : AForm_DAG<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3),
+ "fms $dst = $src1, $src2, $src3",
+ [(set FP:$dst, (fsub (fmul FP:$src1, FP:$src2), FP:$src3))]>, isF;
+def FNMA : AForm_DAG<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3),
+ "fnma $dst = $src1, $src2, $src3",
+ [(set FP:$dst, (fneg (fadd (fmul FP:$src1, FP:$src2), FP:$src3)))]>, isF;
+def FABS : AForm_DAG<0x03, 0x0b, (ops FP:$dst, FP:$src),
+ "fabs $dst = $src",
+ [(set FP:$dst, (fabs FP:$src))]>, isF;
+def FNEG : AForm_DAG<0x03, 0x0b, (ops FP:$dst, FP:$src),
+ "fneg $dst = $src",
+ [(set FP:$dst, (fneg FP:$src))]>, isF;
+def FNEGABS : AForm_DAG<0x03, 0x0b, (ops FP:$dst, FP:$src),
+ "fnegabs $dst = $src",
+ [(set FP:$dst, (fneg (fabs FP:$src)))]>, isF;
+
+let isTwoAddress=1 in {
+def TCFMAS1 : AForm<0x03, 0x0b,
+ (ops FP:$dst, FP:$bogussrc, FP:$src1, FP:$src2, FP:$src3, PR:$qp),
+ "($qp) fma.s1 $dst = $src1, $src2, $src3">, isF;
+def TCFMADS0 : AForm<0x03, 0x0b,
+ (ops FP:$dst, FP:$bogussrc, FP:$src1, FP:$src2, FP:$src3, PR:$qp),
+ "($qp) fma.d.s0 $dst = $src1, $src2, $src3">, isF;
+}
+
+def CFMAS1 : AForm<0x03, 0x0b,
+ (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3, PR:$qp),
+ "($qp) fma.s1 $dst = $src1, $src2, $src3">, isF;
+def CFNMAS1 : AForm<0x03, 0x0b,
+ (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3, PR:$qp),
+ "($qp) fnma.s1 $dst = $src1, $src2, $src3">, isF;
+
+def CFMADS1 : AForm<0x03, 0x0b,
+ (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3, PR:$qp),
+ "($qp) fma.d.s1 $dst = $src1, $src2, $src3">, isF;
+def CFMADS0 : AForm<0x03, 0x0b,
+ (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3, PR:$qp),
+ "($qp) fma.d.s0 $dst = $src1, $src2, $src3">, isF;
+def CFNMADS1 : AForm<0x03, 0x0b,
+ (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3, PR:$qp),
+ "($qp) fnma.d.s1 $dst = $src1, $src2, $src3">, isF;
+
+def FRCPAS0 : AForm<0x03, 0x0b, (ops FP:$dstFR, PR:$dstPR, FP:$src1, FP:$src2),
+ "frcpa.s0 $dstFR, $dstPR = $src1, $src2">, isF;
+def FRCPAS1 : AForm<0x03, 0x0b, (ops FP:$dstFR, PR:$dstPR, FP:$src1, FP:$src2),
+ "frcpa.s1 $dstFR, $dstPR = $src1, $src2">, isF;
+
+def XMAL : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3),
+ "xma.l $dst = $src1, $src2, $src3">, isF;
+
+def FCVTXF : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src),
+ "fcvt.xf $dst = $src">, isF;
+def FCVTXUF : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src),
+ "fcvt.xuf $dst = $src">, isF;
+def FCVTXUFS1 : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src),
+ "fcvt.xuf.s1 $dst = $src">, isF;
+def FCVTFX : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src),
+ "fcvt.fx $dst = $src">, isF;
+def FCVTFXU : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src),
+ "fcvt.fxu $dst = $src">, isF;
+
+def FCVTFXTRUNC : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src),
+ "fcvt.fx.trunc $dst = $src">, isF;
+def FCVTFXUTRUNC : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src),
+ "fcvt.fxu.trunc $dst = $src">, isF;
+
+def FCVTFXTRUNCS1 : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src),
+ "fcvt.fx.trunc.s1 $dst = $src">, isF;
+def FCVTFXUTRUNCS1 : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src),
+ "fcvt.fxu.trunc.s1 $dst = $src">, isF;
+
+def FNORMD : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src),
+ "fnorm.d $dst = $src">, isF;
+
+def GETFD : AForm<0x03, 0x0b, (ops GR:$dst, FP:$src),
+ "getf.d $dst = $src">, isM;
+def SETFD : AForm<0x03, 0x0b, (ops FP:$dst, GR:$src),
+ "setf.d $dst = $src">, isM;
+
+def GETFSIG : AForm<0x03, 0x0b, (ops GR:$dst, FP:$src),
+ "getf.sig $dst = $src">, isM;
+def SETFSIG : AForm<0x03, 0x0b, (ops FP:$dst, GR:$src),
+ "setf.sig $dst = $src">, isM;
+
+// these four FP<->int conversion patterns need checking/cleaning
+def SINT_TO_FP : Pat<(sint_to_fp GR:$src),
+ (FNORMD (FCVTXF (SETFSIG GR:$src)))>;
+def UINT_TO_FP : Pat<(uint_to_fp GR:$src),
+ (FNORMD (FCVTXUF (SETFSIG GR:$src)))>;
+def FP_TO_SINT : Pat<(i64 (fp_to_sint FP:$src)),
+ (GETFSIG (FCVTFXTRUNC FP:$src))>;
+def FP_TO_UINT : Pat<(i64 (fp_to_uint FP:$src)),
+ (GETFSIG (FCVTFXUTRUNC FP:$src))>;
+
+
+let isTerminator = 1, isBranch = 1, noResults = 1 in {
+ def BRL_NOTCALL : RawForm<0x03, 0xb0, (ops i64imm:$dst),
+ "(p0) brl.cond.sptk $dst">, isB;
+ def BRLCOND_NOTCALL : RawForm<0x03, 0xb0, (ops PR:$qp, i64imm:$dst),
+ "($qp) brl.cond.sptk $dst">, isB;
+ def BRCOND_NOTCALL : RawForm<0x03, 0xb0, (ops PR:$qp, GR:$dst),
+ "($qp) br.cond.sptk $dst">, isB;
+}
+
+let isCall = 1, noResults = 1, /* isTerminator = 1, isBranch = 1, */
+ Uses = [out0,out1,out2,out3,out4,out5,out6,out7],
+// all calls clobber non-callee-saved registers, and for now, they are these:
+ Defs = [r2,r3,r8,r9,r10,r11,r14,r15,r16,r17,r18,r19,r20,r21,r22,r23,r24,
+ r25,r26,r27,r28,r29,r30,r31,
+ p6,p7,p8,p9,p10,p11,p12,p13,p14,p15,
+ F6,F7,F8,F9,F10,F11,F12,F13,F14,F15,
+ F32,F33,F34,F35,F36,F37,F38,F39,F40,F41,F42,F43,F44,F45,F46,F47,F48,F49,
+ F50,F51,F52,F53,F54,F55,F56,
+ F57,F58,F59,F60,F61,F62,F63,F64,F65,F66,F67,F68,F69,F70,F71,F72,F73,F74,
+ F75,F76,F77,F78,F79,F80,F81,
+ F82,F83,F84,F85,F86,F87,F88,F89,F90,F91,F92,F93,F94,F95,F96,F97,F98,F99,
+ F100,F101,F102,F103,F104,F105,
+ F106,F107,F108,F109,F110,F111,F112,F113,F114,F115,F116,F117,F118,F119,
+ F120,F121,F122,F123,F124,F125,F126,F127,
+ out0,out1,out2,out3,out4,out5,out6,out7] in {
+// old pattern call
+ def BRCALL: RawForm<0x03, 0xb0, (ops calltarget:$dst),
+ "br.call.sptk rp = $dst">, isB; // FIXME: teach llvm about branch regs?
+// new daggy stuff!
+
+// calls a globaladdress
+ def BRCALL_IPREL_GA : RawForm<0x03, 0xb0, (ops calltarget:$dst),
+ "br.call.sptk rp = $dst">, isB; // FIXME: teach llvm about branch regs?
+// calls an externalsymbol
+ def BRCALL_IPREL_ES : RawForm<0x03, 0xb0, (ops calltarget:$dst),
+ "br.call.sptk rp = $dst">, isB; // FIXME: teach llvm about branch regs?
+// calls through a function descriptor
+ def BRCALL_INDIRECT : RawForm<0x03, 0xb0, (ops GR:$branchreg),
+ "br.call.sptk rp = $branchreg">, isB; // FIXME: teach llvm about branch regs?
+ def BRLCOND_CALL : RawForm<0x03, 0xb0, (ops PR:$qp, i64imm:$dst),
+ "($qp) brl.cond.call.sptk $dst">, isB;
+ def BRCOND_CALL : RawForm<0x03, 0xb0, (ops PR:$qp, GR:$dst),
+ "($qp) br.cond.call.sptk $dst">, isB;
+}
+
+// Return branch:
+let isTerminator = 1, isReturn = 1, noResults = 1 in
+ def RET : AForm_DAG<0x03, 0x0b, (ops),
+ "br.ret.sptk.many rp",
+ [(retflag)]>, isB; // return
+def : Pat<(ret), (RET)>;
+
+// the evil stop bit of despair
+def STOP : PseudoInstIA64<(ops variable_ops), ";;">;
+
diff --git a/lib/Target/IA64/IA64MachineFunctionInfo.h b/lib/Target/IA64/IA64MachineFunctionInfo.h
new file mode 100644
index 0000000..fb93056
--- /dev/null
+++ b/lib/Target/IA64/IA64MachineFunctionInfo.h
@@ -0,0 +1,34 @@
+//===-- IA64MachineFunctionInfo.h - IA64-specific information ---*- C++ -*-===//
+//===-- for MachineFunction ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares IA64-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef IA64MACHINEFUNCTIONINFO_H
+#define IA64MACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+//#include "IA64JITInfo.h"
+
+namespace llvm {
+
+class IA64FunctionInfo : public MachineFunctionInfo {
+
+public:
+ unsigned outRegsUsed; // how many 'out' registers are used
+ // by this machinefunction? (used to compute the appropriate
+ // entry in the 'alloc' instruction at the top of the
+ // machinefunction)
+ IA64FunctionInfo(MachineFunction& MF) { outRegsUsed=0; };
+
+};
+
+} // End llvm namespace
+
+#endif
+
diff --git a/lib/Target/IA64/IA64RegisterInfo.cpp b/lib/Target/IA64/IA64RegisterInfo.cpp
new file mode 100644
index 0000000..08327f2
--- /dev/null
+++ b/lib/Target/IA64/IA64RegisterInfo.cpp
@@ -0,0 +1,388 @@
+//===- IA64RegisterInfo.cpp - IA64 Register Information ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Duraid Madina and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the IA64 implementation of the MRegisterInfo class. This
+// file is responsible for the frame pointer elimination optimization on IA64.
+//
+//===----------------------------------------------------------------------===//
+
+#include "IA64.h"
+#include "IA64RegisterInfo.h"
+#include "IA64InstrBuilder.h"
+#include "IA64MachineFunctionInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+IA64RegisterInfo::IA64RegisterInfo(const TargetInstrInfo &tii)
+ : IA64GenRegisterInfo(IA64::ADJUSTCALLSTACKDOWN, IA64::ADJUSTCALLSTACKUP),
+ TII(tii) {}
+
+void IA64RegisterInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, int FrameIdx,
+ const TargetRegisterClass *RC) const{
+
+ if (RC == IA64::FPRegisterClass) {
+ BuildMI(MBB, MI, TII.get(IA64::STF_SPILL)).addFrameIndex(FrameIdx)
+ .addReg(SrcReg, false, false, true);
+ } else if (RC == IA64::GRRegisterClass) {
+ BuildMI(MBB, MI, TII.get(IA64::ST8)).addFrameIndex(FrameIdx)
+ .addReg(SrcReg, false, false, true);
+ } else if (RC == IA64::PRRegisterClass) {
+ /* we use IA64::r2 as a temporary register for doing this hackery. */
+ // first we load 0:
+ BuildMI(MBB, MI, TII.get(IA64::MOV), IA64::r2).addReg(IA64::r0);
+ // then conditionally add 1:
+ BuildMI(MBB, MI, TII.get(IA64::CADDIMM22), IA64::r2).addReg(IA64::r2)
+ .addImm(1).addReg(SrcReg, false, false, true);
+ // and then store it to the stack
+ BuildMI(MBB, MI, TII.get(IA64::ST8)).addFrameIndex(FrameIdx).addReg(IA64::r2);
+ } else assert(0 &&
+ "sorry, I don't know how to store this sort of reg in the stack\n");
+}
+
+void IA64RegisterInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC)const{
+
+ if (RC == IA64::FPRegisterClass) {
+ BuildMI(MBB, MI, TII.get(IA64::LDF_FILL), DestReg).addFrameIndex(FrameIdx);
+ } else if (RC == IA64::GRRegisterClass) {
+ BuildMI(MBB, MI, TII.get(IA64::LD8), DestReg).addFrameIndex(FrameIdx);
+ } else if (RC == IA64::PRRegisterClass) {
+ // first we load a byte from the stack into r2, our 'predicate hackery'
+ // scratch reg
+ BuildMI(MBB, MI, TII.get(IA64::LD8), IA64::r2).addFrameIndex(FrameIdx);
+ // then we compare it to zero. If it _is_ zero, compare-not-equal to
+ // r0 gives us 0, which is what we want, so that's nice.
+ BuildMI(MBB, MI, TII.get(IA64::CMPNE), DestReg).addReg(IA64::r2).addReg(IA64::r0);
+ } else assert(0 &&
+ "sorry, I don't know how to load this sort of reg from the stack\n");
+}
+
+void IA64RegisterInfo::copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *RC) const {
+
+ if(RC == IA64::PRRegisterClass ) // if a bool, we use pseudocode
+ // (SrcReg) DestReg = cmp.eq.unc(r0, r0)
+ BuildMI(MBB, MI, TII.get(IA64::PCMPEQUNC), DestReg)
+ .addReg(IA64::r0).addReg(IA64::r0).addReg(SrcReg);
+ else // otherwise, MOV works (for both gen. regs and FP regs)
+ BuildMI(MBB, MI, TII.get(IA64::MOV), DestReg).addReg(SrcReg);
+}
+
+void IA64RegisterInfo::reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg,
+ const MachineInstr *Orig) const {
+ MachineInstr *MI = Orig->clone();
+ MI->getOperand(0).setReg(DestReg);
+ MBB.insert(I, MI);
+}
+
+const unsigned* IA64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+ const {
+ static const unsigned CalleeSavedRegs[] = {
+ IA64::r5, 0
+ };
+ return CalleeSavedRegs;
+}
+
+const TargetRegisterClass* const*
+IA64RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
+ &IA64::GRRegClass, 0
+ };
+ return CalleeSavedRegClasses;
+}
+
+BitVector IA64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(IA64::r0);
+ Reserved.set(IA64::r1);
+ Reserved.set(IA64::r2);
+ Reserved.set(IA64::r5);
+ Reserved.set(IA64::r12);
+ Reserved.set(IA64::r13);
+ Reserved.set(IA64::r22);
+ Reserved.set(IA64::rp);
+ return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+//
+bool IA64RegisterInfo::hasFP(const MachineFunction &MF) const {
+ return NoFramePointerElim || MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+void IA64RegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ if (hasFP(MF)) {
+ // If we have a frame pointer, turn the adjcallstackup instruction into a
+ // 'sub SP, <amt>' and the adjcallstackdown instruction into 'add SP,
+ // <amt>'
+ MachineInstr *Old = I;
+ unsigned Amount = Old->getOperand(0).getImmedValue();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ MachineInstr *New;
+ if (Old->getOpcode() == IA64::ADJUSTCALLSTACKDOWN) {
+ New=BuildMI(TII.get(IA64::ADDIMM22), IA64::r12).addReg(IA64::r12)
+ .addImm(-Amount);
+ } else {
+ assert(Old->getOpcode() == IA64::ADJUSTCALLSTACKUP);
+ New=BuildMI(TII.get(IA64::ADDIMM22), IA64::r12).addReg(IA64::r12)
+ .addImm(Amount);
+ }
+
+ // Replace the pseudo instruction with a new instruction...
+ MBB.insert(I, New);
+ }
+ }
+
+ MBB.erase(I);
+}
+
+void IA64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS)const{
+ assert(SPAdj == 0 && "Unexpected");
+
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+
+ bool FP = hasFP(MF);
+
+ while (!MI.getOperand(i).isFrameIndex()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(i).getFrameIndex();
+
+ // choose a base register: ( hasFP? framepointer : stack pointer )
+ unsigned BaseRegister = FP ? IA64::r5 : IA64::r12;
+ // Add the base register
+ MI.getOperand(i).ChangeToRegister(BaseRegister, false);
+
+ // Now add the frame object offset to the offset from r1.
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+
+ // If we're not using a Frame Pointer that has been set to the value of the
+ // SP before having the stack size subtracted from it, then add the stack size
+ // to Offset to get the correct offset.
+ Offset += MF.getFrameInfo()->getStackSize();
+
+ // XXX: we use 'r22' as another hack+slash temporary register here :(
+ if (Offset <= 8191 && Offset >= -8192) { // smallish offset
+ // Fix up the old:
+ MI.getOperand(i).ChangeToRegister(IA64::r22, false);
+ //insert the new
+ MachineInstr* nMI=BuildMI(TII.get(IA64::ADDIMM22), IA64::r22)
+ .addReg(BaseRegister).addImm(Offset);
+ MBB.insert(II, nMI);
+ } else { // it's big
+ //fix up the old:
+ MI.getOperand(i).ChangeToRegister(IA64::r22, false);
+ MachineInstr* nMI;
+ nMI=BuildMI(TII.get(IA64::MOVLIMM64), IA64::r22).addImm(Offset);
+ MBB.insert(II, nMI);
+ nMI=BuildMI(TII.get(IA64::ADD), IA64::r22).addReg(BaseRegister)
+ .addReg(IA64::r22);
+ MBB.insert(II, nMI);
+ }
+
+}
+
+void IA64RegisterInfo::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineInstr *MI;
+ bool FP = hasFP(MF);
+
+ // first, we handle the 'alloc' instruction, that should be right up the
+ // top of any function
+ static const unsigned RegsInOrder[96] = { // there are 96 GPRs the
+ // RSE worries about
+ IA64::r32, IA64::r33, IA64::r34, IA64::r35,
+ IA64::r36, IA64::r37, IA64::r38, IA64::r39, IA64::r40, IA64::r41,
+ IA64::r42, IA64::r43, IA64::r44, IA64::r45, IA64::r46, IA64::r47,
+ IA64::r48, IA64::r49, IA64::r50, IA64::r51, IA64::r52, IA64::r53,
+ IA64::r54, IA64::r55, IA64::r56, IA64::r57, IA64::r58, IA64::r59,
+ IA64::r60, IA64::r61, IA64::r62, IA64::r63, IA64::r64, IA64::r65,
+ IA64::r66, IA64::r67, IA64::r68, IA64::r69, IA64::r70, IA64::r71,
+ IA64::r72, IA64::r73, IA64::r74, IA64::r75, IA64::r76, IA64::r77,
+ IA64::r78, IA64::r79, IA64::r80, IA64::r81, IA64::r82, IA64::r83,
+ IA64::r84, IA64::r85, IA64::r86, IA64::r87, IA64::r88, IA64::r89,
+ IA64::r90, IA64::r91, IA64::r92, IA64::r93, IA64::r94, IA64::r95,
+ IA64::r96, IA64::r97, IA64::r98, IA64::r99, IA64::r100, IA64::r101,
+ IA64::r102, IA64::r103, IA64::r104, IA64::r105, IA64::r106, IA64::r107,
+ IA64::r108, IA64::r109, IA64::r110, IA64::r111, IA64::r112, IA64::r113,
+ IA64::r114, IA64::r115, IA64::r116, IA64::r117, IA64::r118, IA64::r119,
+ IA64::r120, IA64::r121, IA64::r122, IA64::r123, IA64::r124, IA64::r125,
+ IA64::r126, IA64::r127 };
+
+ unsigned numStackedGPRsUsed=0;
+ for(int i=0; i<96; i++) {
+ if(MF.isPhysRegUsed(RegsInOrder[i]))
+ numStackedGPRsUsed=i+1; // (i+1 and not ++ - consider fn(fp, fp, int)
+ }
+
+ unsigned numOutRegsUsed=MF.getInfo<IA64FunctionInfo>()->outRegsUsed;
+
+ // XXX FIXME : this code should be a bit more reliable (in case there _isn't_
+ // a pseudo_alloc in the MBB)
+ unsigned dstRegOfPseudoAlloc;
+ for(MBBI = MBB.begin(); /*MBBI->getOpcode() != IA64::PSEUDO_ALLOC*/; ++MBBI) {
+ assert(MBBI != MBB.end());
+ if(MBBI->getOpcode() == IA64::PSEUDO_ALLOC) {
+ dstRegOfPseudoAlloc=MBBI->getOperand(0).getReg();
+ break;
+ }
+ }
+
+ MI=BuildMI(TII.get(IA64::ALLOC)).addReg(dstRegOfPseudoAlloc).addImm(0). \
+ addImm(numStackedGPRsUsed).addImm(numOutRegsUsed).addImm(0);
+ MBB.insert(MBBI, MI);
+
+ // Get the number of bytes to allocate from the FrameInfo
+ unsigned NumBytes = MFI->getStackSize();
+
+ if(FP)
+ NumBytes += 8; // reserve space for the old FP
+
+ // Do we need to allocate space on the stack?
+ if (NumBytes == 0)
+ return;
+
+ // Add 16 bytes at the bottom of the stack (scratch area)
+ // and round the size to a multiple of the alignment.
+ unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+ unsigned Size = 16 + (FP ? 8 : 0);
+ NumBytes = (NumBytes+Size+Align-1)/Align*Align;
+
+ // Update frame info to pretend that this is part of the stack...
+ MFI->setStackSize(NumBytes);
+
+ // adjust stack pointer: r12 -= numbytes
+ if (NumBytes <= 8191) {
+ MI=BuildMI(TII.get(IA64::ADDIMM22),IA64::r12).addReg(IA64::r12).
+ addImm(-NumBytes);
+ MBB.insert(MBBI, MI);
+ } else { // we use r22 as a scratch register here
+ MI=BuildMI(TII.get(IA64::MOVLIMM64), IA64::r22).addImm(-NumBytes);
+ // FIXME: MOVLSI32 expects a _u_32imm
+ MBB.insert(MBBI, MI); // first load the decrement into r22
+ MI=BuildMI(TII.get(IA64::ADD), IA64::r12).addReg(IA64::r12).addReg(IA64::r22);
+ MBB.insert(MBBI, MI); // then add (subtract) it to r12 (stack ptr)
+ }
+
+ // now if we need to, save the old FP and set the new
+ if (FP) {
+ MI = BuildMI(TII.get(IA64::ST8)).addReg(IA64::r12).addReg(IA64::r5);
+ MBB.insert(MBBI, MI);
+ // this must be the last instr in the prolog ? (XXX: why??)
+ MI = BuildMI(TII.get(IA64::MOV), IA64::r5).addReg(IA64::r12);
+ MBB.insert(MBBI, MI);
+ }
+
+}
+
+void IA64RegisterInfo::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ MachineInstr *MI;
+ assert(MBBI->getOpcode() == IA64::RET &&
+ "Can only insert epilog into returning blocks");
+
+ bool FP = hasFP(MF);
+
+ // Get the number of bytes allocated from the FrameInfo...
+ unsigned NumBytes = MFI->getStackSize();
+
+ //now if we need to, restore the old FP
+ if (FP)
+ {
+ //copy the FP into the SP (discards allocas)
+ MI=BuildMI(TII.get(IA64::MOV), IA64::r12).addReg(IA64::r5);
+ MBB.insert(MBBI, MI);
+ //restore the FP
+ MI=BuildMI(TII.get(IA64::LD8), IA64::r5).addReg(IA64::r5);
+ MBB.insert(MBBI, MI);
+ }
+
+ if (NumBytes != 0)
+ {
+ if (NumBytes <= 8191) {
+ MI=BuildMI(TII.get(IA64::ADDIMM22),IA64::r12).addReg(IA64::r12).
+ addImm(NumBytes);
+ MBB.insert(MBBI, MI);
+ } else {
+ MI=BuildMI(TII.get(IA64::MOVLIMM64), IA64::r22).addImm(NumBytes);
+ MBB.insert(MBBI, MI);
+ MI=BuildMI(TII.get(IA64::ADD), IA64::r12).addReg(IA64::r12).
+ addReg(IA64::r22);
+ MBB.insert(MBBI, MI);
+ }
+ }
+
+}
+
+unsigned IA64RegisterInfo::getRARegister() const {
+ assert(0 && "What is the return address register");
+ return 0;
+}
+
+unsigned IA64RegisterInfo::getFrameRegister(MachineFunction &MF) const {
+ return hasFP(MF) ? IA64::r5 : IA64::r12;
+}
+
+unsigned IA64RegisterInfo::getEHExceptionRegister() const {
+ assert(0 && "What is the exception register");
+ return 0;
+}
+
+unsigned IA64RegisterInfo::getEHHandlerRegister() const {
+ assert(0 && "What is the exception handler register");
+ return 0;
+}
+
+#include "IA64GenRegisterInfo.inc"
+
diff --git a/lib/Target/IA64/IA64RegisterInfo.h b/lib/Target/IA64/IA64RegisterInfo.h
new file mode 100644
index 0000000..162ad5a
--- /dev/null
+++ b/lib/Target/IA64/IA64RegisterInfo.h
@@ -0,0 +1,81 @@
+//===- IA64RegisterInfo.h - IA64 Register Information Impl ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Duraid Madina and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the IA64 implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef IA64REGISTERINFO_H
+#define IA64REGISTERINFO_H
+
+#include "llvm/Target/MRegisterInfo.h"
+#include "IA64GenRegisterInfo.h.inc"
+
+namespace llvm { class llvm::Type; }
+
+namespace llvm {
+
+class TargetInstrInfo;
+
+struct IA64RegisterInfo : public IA64GenRegisterInfo {
+ const TargetInstrInfo &TII;
+
+ IA64RegisterInfo(const TargetInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ void copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *RC) const;
+
+ void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ unsigned DestReg, const MachineInstr *Orig) const;
+
+ const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ const TargetRegisterClass* const* getCalleeSavedRegClasses(
+ const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(MachineFunction &MF) const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+};
+
+} // End llvm namespace
+
+#endif
+
diff --git a/lib/Target/IA64/IA64RegisterInfo.td b/lib/Target/IA64/IA64RegisterInfo.td
new file mode 100644
index 0000000..087c18f
--- /dev/null
+++ b/lib/Target/IA64/IA64RegisterInfo.td
@@ -0,0 +1,508 @@
+//===- IA64RegisterInfo.td - Describe the IA64 Register File ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Duraid Madina and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the IA64 register file, defining the registers
+// themselves, aliases between the registers, and the register classes built
+// out of the registers.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Register definitions...
+//
+
+class IA64Register<string n> : Register<n> {
+ let Namespace = "IA64";
+}
+
+// GR - One of 128 32-bit general registers
+class GR<bits<7> num, string n> : IA64Register<n> {
+ field bits<7> Num = num;
+}
+
+// FP - One of 128 82-bit floating-point registers
+class FP<bits<7> num, string n> : IA64Register<n> {
+ field bits<7> Num = num;
+}
+
+// PR - One of 64 1-bit predicate registers
+class PR<bits<6> num, string n> : IA64Register<n> {
+ field bits<6> Num = num;
+}
+
+/* general registers */
+def r0 : GR< 0, "r0">, DwarfRegNum<0>;
+def r1 : GR< 1, "r1">, DwarfRegNum<1>;
+def r2 : GR< 2, "r2">, DwarfRegNum<2>;
+def r3 : GR< 3, "r3">, DwarfRegNum<3>;
+def r4 : GR< 4, "r4">, DwarfRegNum<4>;
+def r5 : GR< 5, "r5">, DwarfRegNum<5>;
+def r6 : GR< 6, "r6">, DwarfRegNum<6>;
+def r7 : GR< 7, "r7">, DwarfRegNum<7>;
+def r8 : GR< 8, "r8">, DwarfRegNum<8>;
+def r9 : GR< 9, "r9">, DwarfRegNum<9>;
+def r10 : GR< 10, "r10">, DwarfRegNum<10>;
+def r11 : GR< 11, "r11">, DwarfRegNum<11>;
+def r12 : GR< 12, "r12">, DwarfRegNum<12>;
+def r13 : GR< 13, "r13">, DwarfRegNum<13>;
+def r14 : GR< 14, "r14">, DwarfRegNum<14>;
+def r15 : GR< 15, "r15">, DwarfRegNum<15>;
+def r16 : GR< 16, "r16">, DwarfRegNum<16>;
+def r17 : GR< 17, "r17">, DwarfRegNum<17>;
+def r18 : GR< 18, "r18">, DwarfRegNum<18>;
+def r19 : GR< 19, "r19">, DwarfRegNum<19>;
+def r20 : GR< 20, "r20">, DwarfRegNum<20>;
+def r21 : GR< 21, "r21">, DwarfRegNum<21>;
+def r22 : GR< 22, "r22">, DwarfRegNum<22>;
+def r23 : GR< 23, "r23">, DwarfRegNum<23>;
+def r24 : GR< 24, "r24">, DwarfRegNum<24>;
+def r25 : GR< 25, "r25">, DwarfRegNum<25>;
+def r26 : GR< 26, "r26">, DwarfRegNum<26>;
+def r27 : GR< 27, "r27">, DwarfRegNum<27>;
+def r28 : GR< 28, "r28">, DwarfRegNum<28>;
+def r29 : GR< 29, "r29">, DwarfRegNum<29>;
+def r30 : GR< 30, "r30">, DwarfRegNum<30>;
+def r31 : GR< 31, "r31">, DwarfRegNum<31>;
+def r32 : GR< 32, "r32">, DwarfRegNum<32>;
+def r33 : GR< 33, "r33">, DwarfRegNum<33>;
+def r34 : GR< 34, "r34">, DwarfRegNum<34>;
+def r35 : GR< 35, "r35">, DwarfRegNum<35>;
+def r36 : GR< 36, "r36">, DwarfRegNum<36>;
+def r37 : GR< 37, "r37">, DwarfRegNum<37>;
+def r38 : GR< 38, "r38">, DwarfRegNum<38>;
+def r39 : GR< 39, "r39">, DwarfRegNum<39>;
+def r40 : GR< 40, "r40">, DwarfRegNum<40>;
+def r41 : GR< 41, "r41">, DwarfRegNum<41>;
+def r42 : GR< 42, "r42">, DwarfRegNum<42>;
+def r43 : GR< 43, "r43">, DwarfRegNum<43>;
+def r44 : GR< 44, "r44">, DwarfRegNum<44>;
+def r45 : GR< 45, "r45">, DwarfRegNum<45>;
+def r46 : GR< 46, "r46">, DwarfRegNum<46>;
+def r47 : GR< 47, "r47">, DwarfRegNum<47>;
+def r48 : GR< 48, "r48">, DwarfRegNum<48>;
+def r49 : GR< 49, "r49">, DwarfRegNum<49>;
+def r50 : GR< 50, "r50">, DwarfRegNum<50>;
+def r51 : GR< 51, "r51">, DwarfRegNum<51>;
+def r52 : GR< 52, "r52">, DwarfRegNum<52>;
+def r53 : GR< 53, "r53">, DwarfRegNum<53>;
+def r54 : GR< 54, "r54">, DwarfRegNum<54>;
+def r55 : GR< 55, "r55">, DwarfRegNum<55>;
+def r56 : GR< 56, "r56">, DwarfRegNum<56>;
+def r57 : GR< 57, "r57">, DwarfRegNum<57>;
+def r58 : GR< 58, "r58">, DwarfRegNum<58>;
+def r59 : GR< 59, "r59">, DwarfRegNum<59>;
+def r60 : GR< 60, "r60">, DwarfRegNum<60>;
+def r61 : GR< 61, "r61">, DwarfRegNum<61>;
+def r62 : GR< 62, "r62">, DwarfRegNum<62>;
+def r63 : GR< 63, "r63">, DwarfRegNum<63>;
+def r64 : GR< 64, "r64">, DwarfRegNum<64>;
+def r65 : GR< 65, "r65">, DwarfRegNum<65>;
+def r66 : GR< 66, "r66">, DwarfRegNum<66>;
+def r67 : GR< 67, "r67">, DwarfRegNum<67>;
+def r68 : GR< 68, "r68">, DwarfRegNum<68>;
+def r69 : GR< 69, "r69">, DwarfRegNum<69>;
+def r70 : GR< 70, "r70">, DwarfRegNum<70>;
+def r71 : GR< 71, "r71">, DwarfRegNum<71>;
+def r72 : GR< 72, "r72">, DwarfRegNum<72>;
+def r73 : GR< 73, "r73">, DwarfRegNum<73>;
+def r74 : GR< 74, "r74">, DwarfRegNum<74>;
+def r75 : GR< 75, "r75">, DwarfRegNum<75>;
+def r76 : GR< 76, "r76">, DwarfRegNum<76>;
+def r77 : GR< 77, "r77">, DwarfRegNum<77>;
+def r78 : GR< 78, "r78">, DwarfRegNum<78>;
+def r79 : GR< 79, "r79">, DwarfRegNum<79>;
+def r80 : GR< 80, "r80">, DwarfRegNum<80>;
+def r81 : GR< 81, "r81">, DwarfRegNum<81>;
+def r82 : GR< 82, "r82">, DwarfRegNum<82>;
+def r83 : GR< 83, "r83">, DwarfRegNum<83>;
+def r84 : GR< 84, "r84">, DwarfRegNum<84>;
+def r85 : GR< 85, "r85">, DwarfRegNum<85>;
+def r86 : GR< 86, "r86">, DwarfRegNum<86>;
+def r87 : GR< 87, "r87">, DwarfRegNum<87>;
+def r88 : GR< 88, "r88">, DwarfRegNum<88>;
+def r89 : GR< 89, "r89">, DwarfRegNum<89>;
+def r90 : GR< 90, "r90">, DwarfRegNum<90>;
+def r91 : GR< 91, "r91">, DwarfRegNum<91>;
+def r92 : GR< 92, "r92">, DwarfRegNum<92>;
+def r93 : GR< 93, "r93">, DwarfRegNum<93>;
+def r94 : GR< 94, "r94">, DwarfRegNum<94>;
+def r95 : GR< 95, "r95">, DwarfRegNum<95>;
+def r96 : GR< 96, "r96">, DwarfRegNum<96>;
+def r97 : GR< 97, "r97">, DwarfRegNum<97>;
+def r98 : GR< 98, "r98">, DwarfRegNum<98>;
+def r99 : GR< 99, "r99">, DwarfRegNum<99>;
+def r100 : GR< 100, "r100">, DwarfRegNum<100>;
+def r101 : GR< 101, "r101">, DwarfRegNum<101>;
+def r102 : GR< 102, "r102">, DwarfRegNum<102>;
+def r103 : GR< 103, "r103">, DwarfRegNum<103>;
+def r104 : GR< 104, "r104">, DwarfRegNum<104>;
+def r105 : GR< 105, "r105">, DwarfRegNum<105>;
+def r106 : GR< 106, "r106">, DwarfRegNum<106>;
+def r107 : GR< 107, "r107">, DwarfRegNum<107>;
+def r108 : GR< 108, "r108">, DwarfRegNum<108>;
+def r109 : GR< 109, "r109">, DwarfRegNum<109>;
+def r110 : GR< 110, "r110">, DwarfRegNum<110>;
+def r111 : GR< 111, "r111">, DwarfRegNum<111>;
+def r112 : GR< 112, "r112">, DwarfRegNum<112>;
+def r113 : GR< 113, "r113">, DwarfRegNum<113>;
+def r114 : GR< 114, "r114">, DwarfRegNum<114>;
+def r115 : GR< 115, "r115">, DwarfRegNum<115>;
+def r116 : GR< 116, "r116">, DwarfRegNum<116>;
+def r117 : GR< 117, "r117">, DwarfRegNum<117>;
+def r118 : GR< 118, "r118">, DwarfRegNum<118>;
+def r119 : GR< 119, "r119">, DwarfRegNum<119>;
+def r120 : GR< 120, "r120">, DwarfRegNum<120>;
+def r121 : GR< 121, "r121">, DwarfRegNum<121>;
+def r122 : GR< 122, "r122">, DwarfRegNum<122>;
+def r123 : GR< 123, "r123">, DwarfRegNum<123>;
+def r124 : GR< 124, "r124">, DwarfRegNum<124>;
+def r125 : GR< 125, "r125">, DwarfRegNum<125>;
+def r126 : GR< 126, "r126">, DwarfRegNum<126>;
+def r127 : GR< 127, "r127">, DwarfRegNum<127>;
+
+/* floating-point registers */
+def F0 : FP< 0, "f0">, DwarfRegNum<128>;
+def F1 : FP< 1, "f1">, DwarfRegNum<129>;
+def F2 : FP< 2, "f2">, DwarfRegNum<130>;
+def F3 : FP< 3, "f3">, DwarfRegNum<131>;
+def F4 : FP< 4, "f4">, DwarfRegNum<132>;
+def F5 : FP< 5, "f5">, DwarfRegNum<133>;
+def F6 : FP< 6, "f6">, DwarfRegNum<134>;
+def F7 : FP< 7, "f7">, DwarfRegNum<135>;
+def F8 : FP< 8, "f8">, DwarfRegNum<136>;
+def F9 : FP< 9, "f9">, DwarfRegNum<137>;
+def F10 : FP< 10, "f10">, DwarfRegNum<138>;
+def F11 : FP< 11, "f11">, DwarfRegNum<139>;
+def F12 : FP< 12, "f12">, DwarfRegNum<140>;
+def F13 : FP< 13, "f13">, DwarfRegNum<141>;
+def F14 : FP< 14, "f14">, DwarfRegNum<142>;
+def F15 : FP< 15, "f15">, DwarfRegNum<143>;
+def F16 : FP< 16, "f16">, DwarfRegNum<144>;
+def F17 : FP< 17, "f17">, DwarfRegNum<145>;
+def F18 : FP< 18, "f18">, DwarfRegNum<146>;
+def F19 : FP< 19, "f19">, DwarfRegNum<147>;
+def F20 : FP< 20, "f20">, DwarfRegNum<148>;
+def F21 : FP< 21, "f21">, DwarfRegNum<149>;
+def F22 : FP< 22, "f22">, DwarfRegNum<150>;
+def F23 : FP< 23, "f23">, DwarfRegNum<151>;
+def F24 : FP< 24, "f24">, DwarfRegNum<152>;
+def F25 : FP< 25, "f25">, DwarfRegNum<153>;
+def F26 : FP< 26, "f26">, DwarfRegNum<154>;
+def F27 : FP< 27, "f27">, DwarfRegNum<155>;
+def F28 : FP< 28, "f28">, DwarfRegNum<156>;
+def F29 : FP< 29, "f29">, DwarfRegNum<157>;
+def F30 : FP< 30, "f30">, DwarfRegNum<158>;
+def F31 : FP< 31, "f31">, DwarfRegNum<159>;
+def F32 : FP< 32, "f32">, DwarfRegNum<160>;
+def F33 : FP< 33, "f33">, DwarfRegNum<161>;
+def F34 : FP< 34, "f34">, DwarfRegNum<162>;
+def F35 : FP< 35, "f35">, DwarfRegNum<163>;
+def F36 : FP< 36, "f36">, DwarfRegNum<164>;
+def F37 : FP< 37, "f37">, DwarfRegNum<165>;
+def F38 : FP< 38, "f38">, DwarfRegNum<166>;
+def F39 : FP< 39, "f39">, DwarfRegNum<167>;
+def F40 : FP< 40, "f40">, DwarfRegNum<168>;
+def F41 : FP< 41, "f41">, DwarfRegNum<169>;
+def F42 : FP< 42, "f42">, DwarfRegNum<170>;
+def F43 : FP< 43, "f43">, DwarfRegNum<171>;
+def F44 : FP< 44, "f44">, DwarfRegNum<172>;
+def F45 : FP< 45, "f45">, DwarfRegNum<173>;
+def F46 : FP< 46, "f46">, DwarfRegNum<174>;
+def F47 : FP< 47, "f47">, DwarfRegNum<175>;
+def F48 : FP< 48, "f48">, DwarfRegNum<176>;
+def F49 : FP< 49, "f49">, DwarfRegNum<177>;
+def F50 : FP< 50, "f50">, DwarfRegNum<178>;
+def F51 : FP< 51, "f51">, DwarfRegNum<179>;
+def F52 : FP< 52, "f52">, DwarfRegNum<180>;
+def F53 : FP< 53, "f53">, DwarfRegNum<181>;
+def F54 : FP< 54, "f54">, DwarfRegNum<182>;
+def F55 : FP< 55, "f55">, DwarfRegNum<183>;
+def F56 : FP< 56, "f56">, DwarfRegNum<184>;
+def F57 : FP< 57, "f57">, DwarfRegNum<185>;
+def F58 : FP< 58, "f58">, DwarfRegNum<186>;
+def F59 : FP< 59, "f59">, DwarfRegNum<187>;
+def F60 : FP< 60, "f60">, DwarfRegNum<188>;
+def F61 : FP< 61, "f61">, DwarfRegNum<189>;
+def F62 : FP< 62, "f62">, DwarfRegNum<190>;
+def F63 : FP< 63, "f63">, DwarfRegNum<191>;
+def F64 : FP< 64, "f64">, DwarfRegNum<192>;
+def F65 : FP< 65, "f65">, DwarfRegNum<193>;
+def F66 : FP< 66, "f66">, DwarfRegNum<194>;
+def F67 : FP< 67, "f67">, DwarfRegNum<195>;
+def F68 : FP< 68, "f68">, DwarfRegNum<196>;
+def F69 : FP< 69, "f69">, DwarfRegNum<197>;
+def F70 : FP< 70, "f70">, DwarfRegNum<198>;
+def F71 : FP< 71, "f71">, DwarfRegNum<199>;
+def F72 : FP< 72, "f72">, DwarfRegNum<200>;
+def F73 : FP< 73, "f73">, DwarfRegNum<201>;
+def F74 : FP< 74, "f74">, DwarfRegNum<202>;
+def F75 : FP< 75, "f75">, DwarfRegNum<203>;
+def F76 : FP< 76, "f76">, DwarfRegNum<204>;
+def F77 : FP< 77, "f77">, DwarfRegNum<205>;
+def F78 : FP< 78, "f78">, DwarfRegNum<206>;
+def F79 : FP< 79, "f79">, DwarfRegNum<207>;
+def F80 : FP< 80, "f80">, DwarfRegNum<208>;
+def F81 : FP< 81, "f81">, DwarfRegNum<209>;
+def F82 : FP< 82, "f82">, DwarfRegNum<210>;
+def F83 : FP< 83, "f83">, DwarfRegNum<211>;
+def F84 : FP< 84, "f84">, DwarfRegNum<212>;
+def F85 : FP< 85, "f85">, DwarfRegNum<213>;
+def F86 : FP< 86, "f86">, DwarfRegNum<214>;
+def F87 : FP< 87, "f87">, DwarfRegNum<215>;
+def F88 : FP< 88, "f88">, DwarfRegNum<216>;
+def F89 : FP< 89, "f89">, DwarfRegNum<217>;
+def F90 : FP< 90, "f90">, DwarfRegNum<218>;
+def F91 : FP< 91, "f91">, DwarfRegNum<219>;
+def F92 : FP< 92, "f92">, DwarfRegNum<220>;
+def F93 : FP< 93, "f93">, DwarfRegNum<221>;
+def F94 : FP< 94, "f94">, DwarfRegNum<222>;
+def F95 : FP< 95, "f95">, DwarfRegNum<223>;
+def F96 : FP< 96, "f96">, DwarfRegNum<224>;
+def F97 : FP< 97, "f97">, DwarfRegNum<225>;
+def F98 : FP< 98, "f98">, DwarfRegNum<226>;
+def F99 : FP< 99, "f99">, DwarfRegNum<227>;
+def F100 : FP< 100, "f100">, DwarfRegNum<228>;
+def F101 : FP< 101, "f101">, DwarfRegNum<229>;
+def F102 : FP< 102, "f102">, DwarfRegNum<230>;
+def F103 : FP< 103, "f103">, DwarfRegNum<231>;
+def F104 : FP< 104, "f104">, DwarfRegNum<232>;
+def F105 : FP< 105, "f105">, DwarfRegNum<233>;
+def F106 : FP< 106, "f106">, DwarfRegNum<234>;
+def F107 : FP< 107, "f107">, DwarfRegNum<235>;
+def F108 : FP< 108, "f108">, DwarfRegNum<236>;
+def F109 : FP< 109, "f109">, DwarfRegNum<237>;
+def F110 : FP< 110, "f110">, DwarfRegNum<238>;
+def F111 : FP< 111, "f111">, DwarfRegNum<239>;
+def F112 : FP< 112, "f112">, DwarfRegNum<240>;
+def F113 : FP< 113, "f113">, DwarfRegNum<241>;
+def F114 : FP< 114, "f114">, DwarfRegNum<242>;
+def F115 : FP< 115, "f115">, DwarfRegNum<243>;
+def F116 : FP< 116, "f116">, DwarfRegNum<244>;
+def F117 : FP< 117, "f117">, DwarfRegNum<245>;
+def F118 : FP< 118, "f118">, DwarfRegNum<246>;
+def F119 : FP< 119, "f119">, DwarfRegNum<247>;
+def F120 : FP< 120, "f120">, DwarfRegNum<248>;
+def F121 : FP< 121, "f121">, DwarfRegNum<249>;
+def F122 : FP< 122, "f122">, DwarfRegNum<250>;
+def F123 : FP< 123, "f123">, DwarfRegNum<251>;
+def F124 : FP< 124, "f124">, DwarfRegNum<252>;
+def F125 : FP< 125, "f125">, DwarfRegNum<253>;
+def F126 : FP< 126, "f126">, DwarfRegNum<254>;
+def F127 : FP< 127, "f127">, DwarfRegNum<255>;
+
+/* predicate registers */
+def p0 : PR< 0, "p0">, DwarfRegNum<256>;
+def p1 : PR< 1, "p1">, DwarfRegNum<257>;
+def p2 : PR< 2, "p2">, DwarfRegNum<258>;
+def p3 : PR< 3, "p3">, DwarfRegNum<259>;
+def p4 : PR< 4, "p4">, DwarfRegNum<260>;
+def p5 : PR< 5, "p5">, DwarfRegNum<261>;
+def p6 : PR< 6, "p6">, DwarfRegNum<262>;
+def p7 : PR< 7, "p7">, DwarfRegNum<263>;
+def p8 : PR< 8, "p8">, DwarfRegNum<264>;
+def p9 : PR< 9, "p9">, DwarfRegNum<265>;
+def p10 : PR< 10, "p10">, DwarfRegNum<266>;
+def p11 : PR< 11, "p11">, DwarfRegNum<267>;
+def p12 : PR< 12, "p12">, DwarfRegNum<268>;
+def p13 : PR< 13, "p13">, DwarfRegNum<269>;
+def p14 : PR< 14, "p14">, DwarfRegNum<270>;
+def p15 : PR< 15, "p15">, DwarfRegNum<271>;
+def p16 : PR< 16, "p16">, DwarfRegNum<272>;
+def p17 : PR< 17, "p17">, DwarfRegNum<273>;
+def p18 : PR< 18, "p18">, DwarfRegNum<274>;
+def p19 : PR< 19, "p19">, DwarfRegNum<275>;
+def p20 : PR< 20, "p20">, DwarfRegNum<276>;
+def p21 : PR< 21, "p21">, DwarfRegNum<277>;
+def p22 : PR< 22, "p22">, DwarfRegNum<278>;
+def p23 : PR< 23, "p23">, DwarfRegNum<279>;
+def p24 : PR< 24, "p24">, DwarfRegNum<280>;
+def p25 : PR< 25, "p25">, DwarfRegNum<281>;
+def p26 : PR< 26, "p26">, DwarfRegNum<282>;
+def p27 : PR< 27, "p27">, DwarfRegNum<283>;
+def p28 : PR< 28, "p28">, DwarfRegNum<284>;
+def p29 : PR< 29, "p29">, DwarfRegNum<285>;
+def p30 : PR< 30, "p30">, DwarfRegNum<286>;
+def p31 : PR< 31, "p31">, DwarfRegNum<287>;
+def p32 : PR< 32, "p32">, DwarfRegNum<288>;
+def p33 : PR< 33, "p33">, DwarfRegNum<289>;
+def p34 : PR< 34, "p34">, DwarfRegNum<290>;
+def p35 : PR< 35, "p35">, DwarfRegNum<291>;
+def p36 : PR< 36, "p36">, DwarfRegNum<292>;
+def p37 : PR< 37, "p37">, DwarfRegNum<293>;
+def p38 : PR< 38, "p38">, DwarfRegNum<294>;
+def p39 : PR< 39, "p39">, DwarfRegNum<295>;
+def p40 : PR< 40, "p40">, DwarfRegNum<296>;
+def p41 : PR< 41, "p41">, DwarfRegNum<297>;
+def p42 : PR< 42, "p42">, DwarfRegNum<298>;
+def p43 : PR< 43, "p43">, DwarfRegNum<299>;
+def p44 : PR< 44, "p44">, DwarfRegNum<300>;
+def p45 : PR< 45, "p45">, DwarfRegNum<301>;
+def p46 : PR< 46, "p46">, DwarfRegNum<302>;
+def p47 : PR< 47, "p47">, DwarfRegNum<303>;
+def p48 : PR< 48, "p48">, DwarfRegNum<304>;
+def p49 : PR< 49, "p49">, DwarfRegNum<305>;
+def p50 : PR< 50, "p50">, DwarfRegNum<306>;
+def p51 : PR< 51, "p51">, DwarfRegNum<307>;
+def p52 : PR< 52, "p52">, DwarfRegNum<308>;
+def p53 : PR< 53, "p53">, DwarfRegNum<309>;
+def p54 : PR< 54, "p54">, DwarfRegNum<310>;
+def p55 : PR< 55, "p55">, DwarfRegNum<311>;
+def p56 : PR< 56, "p56">, DwarfRegNum<312>;
+def p57 : PR< 57, "p57">, DwarfRegNum<313>;
+def p58 : PR< 58, "p58">, DwarfRegNum<314>;
+def p59 : PR< 59, "p59">, DwarfRegNum<315>;
+def p60 : PR< 60, "p60">, DwarfRegNum<316>;
+def p61 : PR< 61, "p61">, DwarfRegNum<317>;
+def p62 : PR< 62, "p62">, DwarfRegNum<318>;
+def p63 : PR< 63, "p63">, DwarfRegNum<319>;
+
+// XXX : this is temporary, we'll eventually have the output registers
+// in the general purpose register class too?
+def out0 : GR<0, "out0">, DwarfRegNum<120>;
+def out1 : GR<1, "out1">, DwarfRegNum<121>;
+def out2 : GR<2, "out2">, DwarfRegNum<122>;
+def out3 : GR<3, "out3">, DwarfRegNum<123>;
+def out4 : GR<4, "out4">, DwarfRegNum<124>;
+def out5 : GR<5, "out5">, DwarfRegNum<125>;
+def out6 : GR<6, "out6">, DwarfRegNum<126>;
+def out7 : GR<7, "out7">, DwarfRegNum<127>;
+
+// application (special) registers:
+
+// "previous function state" application register
+def AR_PFS : GR<0, "ar.pfs">, DwarfRegNum<331>;
+
+// "return pointer" (this is really branch register b0)
+def rp : GR<0, "rp">, DwarfRegNum<-1>;
+
+// branch reg 6
+def B6 : GR<0, "b6">, DwarfRegNum<326>;
+
+//===----------------------------------------------------------------------===//
+// Register Class Definitions... now that we have all of the pieces, define the
+// top-level register classes. The order specified in the register list is
+// implicitly defined to be the register allocation order.
+//
+
+// these are the scratch (+stacked) general registers
+// FIXME/XXX we also reserve a frame pointer (r5)
+// FIXME/XXX we also reserve r2 for spilling/filling predicates
+// in IA64RegisterInfo.cpp
+// FIXME/XXX we also reserve r22 for calculating addresses
+// in IA64RegisterInfo.cpp
+
+def GR : RegisterClass<"IA64", [i64], 64,
+ [
+
+//FIXME!: for both readability and performance, we don't want the out
+// registers to be the first ones allocated
+
+ out7, out6, out5, out4, out3, out2, out1, out0,
+ r3, r8, r9, r10, r11, r14, r15,
+ r16, r17, r18, r19, r20, r21, r23,
+ r24, r25, r26, r27, r28, r29, r30, r31,
+ r32, r33, r34, r35, r36, r37, r38, r39,
+ r40, r41, r42, r43, r44, r45, r46, r47,
+ r48, r49, r50, r51, r52, r53, r54, r55,
+ r56, r57, r58, r59, r60, r61, r62, r63,
+ r64, r65, r66, r67, r68, r69, r70, r71,
+ r72, r73, r74, r75, r76, r77, r78, r79,
+ r80, r81, r82, r83, r84, r85, r86, r87,
+ r88, r89, r90, r91, r92, r93, r94, r95,
+ r96, r97, r98, r99, r100, r101, r102, r103,
+ r104, r105, r106, r107, r108, r109, r110, r111,
+ r112, r113, r114, r115, r116, r117, r118, r119,
+ r120, r121, r122, r123, r124, r125, r126, r127,
+ r0, r1, r2, r5, r12, r13, r22, rp]> // the last 16 are special (look down)
+ {
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ GRClass::iterator
+ GRClass::allocation_order_begin(const MachineFunction &MF) const {
+ // hide the 8 out? registers appropriately:
+ return begin()+(8-(MF.getInfo<IA64FunctionInfo>()->outRegsUsed));
+ }
+
+ GRClass::iterator
+ GRClass::allocation_order_end(const MachineFunction &MF) const {
+ int numReservedRegs=8; // the 8 special registers r0,r1,r2,r5,r12,r13 etc
+
+ // we also can't allocate registers for use as locals if they're
+ // already required as 'out' registers
+ numReservedRegs+=MF.getInfo<IA64FunctionInfo>()->outRegsUsed;
+
+ return end()-numReservedRegs; // hide registers appropriately
+ }
+ }];
+}
+
+
+// these are the scratch (+stacked) FP registers
+
+def FP : RegisterClass<"IA64", [f64], 64,
+ [F6, F7,
+ F8, F9, F10, F11, F12, F13, F14, F15,
+ F32, F33, F34, F35, F36, F37, F38, F39,
+ F40, F41, F42, F43, F44, F45, F46, F47,
+ F48, F49, F50, F51, F52, F53, F54, F55,
+ F56, F57, F58, F59, F60, F61, F62, F63,
+ F64, F65, F66, F67, F68, F69, F70, F71,
+ F72, F73, F74, F75, F76, F77, F78, F79,
+ F80, F81, F82, F83, F84, F85, F86, F87,
+ F88, F89, F90, F91, F92, F93, F94, F95,
+ F96, F97, F98, F99, F100, F101, F102, F103,
+ F104, F105, F106, F107, F108, F109, F110, F111,
+ F112, F113, F114, F115, F116, F117, F118, F119,
+ F120, F121, F122, F123, F124, F125, F126, F127,
+ F0, F1]> // these last two are hidden
+ {
+// the 128s here are to make stf.spill/ldf.fill happy,
+// when storing full (82-bit) FP regs to stack slots
+// we need to 16-byte align
+ let Size=128;
+ let Alignment=128;
+
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ FPClass::iterator
+ FPClass::allocation_order_begin(const MachineFunction &MF) const {
+ return begin(); // we don't hide any FP regs from the start
+ }
+
+ FPClass::iterator
+ FPClass::allocation_order_end(const MachineFunction &MF) const {
+ return end()-2; // we hide regs F0, F1 from the end
+ }
+ }];
+}
+
+// these are the predicate registers, p0 (1/TRUE) is not here
+def PR : RegisterClass<"IA64", [i1], 64,
+
+// for now, let's be wimps and only have the scratch predicate regs
+ [p6, p7, p8, p9, p10, p11, p12, p13, p14, p15]> {
+ let Size = 64;
+ }
+
+/*
+ [p1, p2, p3, p4, p5, p6, p7,
+ p8, p9, p10, p11, p12, p13, p14, p15,
+ p16, p17, p18, p19, p20, p21, p22, p23,
+ p24, p25, p26, p27, p28, p29, p30, p31,
+ p32, p33, p34, p35, p36, p37, p38, p39,
+ p40, p41, p42, p43, p44, p45, p46, p47,
+ p48, p49, p50, p51, p52, p53, p54, p55,
+ p56, p57, p58, p59, p60, p61, p62, p63]>;
+ */
diff --git a/lib/Target/IA64/IA64TargetAsmInfo.cpp b/lib/Target/IA64/IA64TargetAsmInfo.cpp
new file mode 100644
index 0000000..1a7e2b2
--- /dev/null
+++ b/lib/Target/IA64/IA64TargetAsmInfo.cpp
@@ -0,0 +1,34 @@
+//===-- IA64TargetAsmInfo.cpp - IA64 asm properties -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by James M. Laskey and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the IA64TargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "IA64TargetAsmInfo.h"
+
+using namespace llvm;
+
+IA64TargetAsmInfo::IA64TargetAsmInfo(const IA64TargetMachine &TM) {
+ CommentString = "//";
+ Data8bitsDirective = "\tdata1\t"; // FIXME: check that we are
+ Data16bitsDirective = "\tdata2.ua\t"; // disabling auto-alignment
+ Data32bitsDirective = "\tdata4.ua\t"; // properly
+ Data64bitsDirective = "\tdata8.ua\t";
+ ZeroDirective = "\t.skip\t";
+ AsciiDirective = "\tstring\t";
+
+ GlobalVarAddrPrefix="";
+ GlobalVarAddrSuffix="";
+ FunctionAddrPrefix="@fptr(";
+ FunctionAddrSuffix=")";
+
+ // FIXME: would be nice to have rodata (no 'w') when appropriate?
+ ConstantPoolSection = "\n\t.section .data, \"aw\", \"progbits\"\n";
+}
diff --git a/lib/Target/IA64/IA64TargetAsmInfo.h b/lib/Target/IA64/IA64TargetAsmInfo.h
new file mode 100644
index 0000000..681253c
--- /dev/null
+++ b/lib/Target/IA64/IA64TargetAsmInfo.h
@@ -0,0 +1,31 @@
+//=====-- IA64TargetAsmInfo.h - IA64 asm properties -----------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by James M. Laskey and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the IA64TargetAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef IA64TARGETASMINFO_H
+#define IA64TARGETASMINFO_H
+
+#include "llvm/Target/TargetAsmInfo.h"
+
+namespace llvm {
+
+ // Forward declaration.
+ class IA64TargetMachine;
+
+ struct IA64TargetAsmInfo : public TargetAsmInfo {
+ IA64TargetAsmInfo(const IA64TargetMachine &TM);
+ };
+
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/IA64/IA64TargetMachine.cpp b/lib/Target/IA64/IA64TargetMachine.cpp
new file mode 100644
index 0000000..51beaa1
--- /dev/null
+++ b/lib/Target/IA64/IA64TargetMachine.cpp
@@ -0,0 +1,91 @@
+//===-- IA64TargetMachine.cpp - Define TargetMachine for IA64 -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Duraid Madina and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IA64 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "IA64TargetAsmInfo.h"
+#include "IA64TargetMachine.h"
+#include "IA64.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+using namespace llvm;
+
+/// IA64TargetMachineModule - Note that this is used on hosts that cannot link
+/// in a library unless there are references into the library. In particular,
+/// it seems that it is not possible to get things to work on Win32 without
+/// this. Though it is unused, do not remove it.
+extern "C" int IA64TargetMachineModule;
+int IA64TargetMachineModule = 0;
+
+namespace {
+ RegisterTarget<IA64TargetMachine> X("ia64", " IA-64 (Itanium)");
+}
+
+const TargetAsmInfo *IA64TargetMachine::createTargetAsmInfo() const {
+ return new IA64TargetAsmInfo(*this);
+}
+
+unsigned IA64TargetMachine::getModuleMatchQuality(const Module &M) {
+ // we match [iI][aA]*64
+ bool seenIA64=false;
+ std::string TT = M.getTargetTriple();
+
+ if (TT.size() >= 4) {
+ if( (TT[0]=='i' || TT[0]=='I') &&
+ (TT[1]=='a' || TT[1]=='A') ) {
+ for(unsigned int i=2; i<(TT.size()-1); i++)
+ if(TT[i]=='6' && TT[i+1]=='4')
+ seenIA64=true;
+ }
+
+ if (seenIA64)
+ return 20; // strong match
+ }
+ // If the target triple is something non-ia64, we don't match.
+ if (!TT.empty()) return 0;
+
+#if defined(__ia64__) || defined(__IA64__)
+ return 5;
+#else
+ return 0;
+#endif
+}
+
+/// IA64TargetMachine ctor - Create an LP64 architecture model
+///
+IA64TargetMachine::IA64TargetMachine(const Module &M, const std::string &FS)
+ : DataLayout("e"),
+ FrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0),
+ TLInfo(*this) { // FIXME? check this stuff
+}
+
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+bool IA64TargetMachine::addInstSelector(FunctionPassManager &PM, bool Fast) {
+ PM.add(createIA64DAGToDAGInstructionSelector(*this));
+ return false;
+}
+
+bool IA64TargetMachine::addPreEmitPass(FunctionPassManager &PM, bool Fast) {
+ // Make sure everything is bundled happily
+ PM.add(createIA64BundlingPass(*this));
+ return true;
+}
+bool IA64TargetMachine::addAssemblyEmitter(FunctionPassManager &PM, bool Fast,
+ std::ostream &Out) {
+ PM.add(createIA64CodePrinterPass(Out, *this));
+ return false;
+}
+
diff --git a/lib/Target/IA64/IA64TargetMachine.h b/lib/Target/IA64/IA64TargetMachine.h
new file mode 100644
index 0000000..538a330
--- /dev/null
+++ b/lib/Target/IA64/IA64TargetMachine.h
@@ -0,0 +1,60 @@
+//===-- IA64TargetMachine.h - Define TargetMachine for IA64 ---*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Duraid Madina and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the IA64 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_IA64TARGETMACHINE_H
+#define LLVM_TARGET_IA64TARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "IA64InstrInfo.h"
+#include "IA64ISelLowering.h"
+
+namespace llvm {
+
+class IA64TargetMachine : public LLVMTargetMachine {
+ const TargetData DataLayout; // Calculates type size & alignment
+ IA64InstrInfo InstrInfo;
+ TargetFrameInfo FrameInfo;
+ //IA64JITInfo JITInfo;
+ IA64TargetLowering TLInfo;
+
+protected:
+ virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
+public:
+ IA64TargetMachine(const Module &M, const std::string &FS);
+
+ virtual const IA64InstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
+ virtual IA64TargetLowering *getTargetLowering() const {
+ return const_cast<IA64TargetLowering*>(&TLInfo);
+ }
+ virtual const MRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+
+ static unsigned getModuleMatchQuality(const Module &M);
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(FunctionPassManager &PM, bool Fast);
+ virtual bool addPreEmitPass(FunctionPassManager &PM, bool Fast);
+ virtual bool addAssemblyEmitter(FunctionPassManager &PM, bool Fast,
+ std::ostream &Out);
+};
+} // End llvm namespace
+
+#endif
+
+
diff --git a/lib/Target/IA64/Makefile b/lib/Target/IA64/Makefile
new file mode 100644
index 0000000..f519cf9
--- /dev/null
+++ b/lib/Target/IA64/Makefile
@@ -0,0 +1,18 @@
+##===- lib/Target/IA64/Makefile -----------------------------*- Makefile -*-===##
+# The LLVM Compiler Infrastructure
+#
+# This file was developed by Duraid Madina and is distributed under the
+# University of Illinois Open Source License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMIA64
+TARGET = IA64
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = IA64GenRegisterInfo.h.inc IA64GenRegisterNames.inc \
+ IA64GenRegisterInfo.inc IA64GenInstrNames.inc \
+ IA64GenInstrInfo.inc IA64GenAsmWriter.inc \
+ IA64GenDAGISel.inc
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Target/IA64/README b/lib/Target/IA64/README
new file mode 100644
index 0000000..852d512
--- /dev/null
+++ b/lib/Target/IA64/README
@@ -0,0 +1,106 @@
+*** README for the LLVM IA64 Backend "Version 0.01" - March 18, 2005
+*** Quote for this version:
+
+ "Kaori and Hitomi are naughty!!"
+
+
+Congratulations, you have found:
+
+****************************************************************
+* @@@ @@@ @@@ @@@ @@@@@@@@@@ *
+* @@@ @@@ @@@ @@@ @@@@@@@@@@@ *
+* @@! @@! @@! @@@ @@! @@! @@! *
+* !@! !@! !@! @!@ !@! !@! !@! *
+* @!! @!! @!@ !@! @!! !!@ @!@ *
+* !!! !!! !@! !!! !@! ! !@! *
+* !!: !!: :!: !!: !!: !!: *
+* :!: :!: ::!!:! :!: :!: *
+* :: :::: :: :::: :::: ::: :: *
+* : :: : : : :: : : : : : *
+* *
+* *
+* @@@@@@ @@@ @@@ @@@ @@@@@@ @@@@@@ @@@ *
+* @@@@@@@@ @@@@ @@@ @@@ @@@@@@@@ @@@@@@@ @@@@ *
+* @@! @@@ @@!@!@@@ @@! @@! @@@ !@@ @@!@! *
+* !@! @!@ !@!!@!@! !@! !@! @!@ !@! !@!!@! *
+* @!@ !@! @!@ !!@! !!@ @!@!@!@! !!@@!@! @!! @!! *
+* !@! !!! !@! !!! !!! !!!@!!!! @!!@!!!! !!! !@! *
+* !!: !!! !!: !!! !!: !!: !!! !:! !:! :!!:!:!!: *
+* :!: !:! :!: !:! :!: :!: !:! :!: !:! !:::!!::: *
+* ::::: :: :: :: :: :: ::: :::: ::: ::: *
+* : : : :: : : : : : :: : : ::: *
+* *
+****************************************************************
+* Bow down, bow down, before the power of IA64! Or be crushed, *
+* be crushed, by its jolly registers of doom!! *
+****************************************************************
+
+DEVELOPMENT PLAN:
+
+ _ you are 2005 maybe 2005 2006 2006 and
+ / here | | | beyond
+ v v v v |
+ v
+CLEAN UP ADD INSTRUCTION ADD PLAY WITH
+INSTRUCTION --> SCHEDULING AND --> JIT --> DYNAMIC --> FUTURE WORK
+SELECTION BUNDLING SUPPORT REOPTIMIZATION
+
+DISCLAIMER AND PROMISE:
+
+As of the time of this release, you are probably better off using Intel C/C++
+or GCC. The performance of the code emitted right now is, in a word,
+terrible. Check back in a few months - the story will be different then,
+I guarantee it.
+
+TODO:
+
+ - stop passing FP args in both FP *and* integer regs when not required
+ - allocate low (nonstacked) registers more aggressively
+ - clean up and thoroughly test the isel patterns.
+ - fix stacked register allocation order: (for readability) we don't want
+ the out? registers being the first ones used
+ - fix up floating point
+ (nb http://gcc.gnu.org/wiki?pagename=ia64%20floating%20point )
+ - bundling!
+ (we will avoid the mess that is:
+ http://gcc.gnu.org/ml/gcc/2003-12/msg00832.html )
+ - instruction scheduling (hmmmm! ;)
+ - write truly inspirational documentation
+ - if-conversion (predicate database/knowledge? etc etc)
+ - counted loop support
+ - make integer + FP mul/div more clever (we have fixed pseudocode atm)
+ - track and use comparison complements
+
+INFO:
+
+ - we are strictly LP64 here, no support for ILP32 on HP-UX. Linux users
+ don't need to worry about this.
+ - i have instruction scheduling/bundling pseudocode, that really works
+ (has been tested, albeit at the perl-script level).
+ so, before you go write your own, send me an email!
+
+KNOWN DEFECTS AT THE CURRENT TIME:
+
+ - C++ vtables contain naked function pointers, not function descriptors,
+ which is bad. see http://llvm.cs.uiuc.edu/bugs/show_bug.cgi?id=406
+ - varargs are broken
+ - alloca doesn't work (indeed, stack frame layout is bogus)
+ - no support for big-endian environments
+ - (not really the backend, but...) the CFE has some issues on IA64.
+ these will probably be fixed soon.
+
+ACKNOWLEDGEMENTS:
+
+ - Chris Lattner (x100)
+ - Other LLVM developers ("hey, that looks familiar")
+
+CONTACT:
+
+ - You can email me at duraid@octopus.com.au. If you find a small bug,
+ just email me. If you find a big bug, please file a bug report
+ in bugzilla! http://llvm.cs.uiuc.edu is your one stop shop for all
+ things LLVM.
+
+
+
+
diff --git a/lib/Target/MRegisterInfo.cpp b/lib/Target/MRegisterInfo.cpp
new file mode 100644
index 0000000..3af611d
--- /dev/null
+++ b/lib/Target/MRegisterInfo.cpp
@@ -0,0 +1,81 @@
+//===- MRegisterInfo.cpp - Target Register Information Implementation -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MRegisterInfo interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/ADT/BitVector.h"
+
+using namespace llvm;
+
+MRegisterInfo::MRegisterInfo(const TargetRegisterDesc *D, unsigned NR,
+ regclass_iterator RCB, regclass_iterator RCE,
+ int CFSO, int CFDO)
+ : Desc(D), NumRegs(NR), RegClassBegin(RCB), RegClassEnd(RCE) {
+ assert(NumRegs < FirstVirtualRegister &&
+ "Target has too many physical registers!");
+
+ CallFrameSetupOpcode = CFSO;
+ CallFrameDestroyOpcode = CFDO;
+}
+
+MRegisterInfo::~MRegisterInfo() {}
+
+/// getAllocatableSetForRC - Toggle the bits that represent allocatable
+/// registers for the specific register class.
+static void getAllocatableSetForRC(MachineFunction &MF,
+ const TargetRegisterClass *RC, BitVector &R){
+ for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
+ E = RC->allocation_order_end(MF); I != E; ++I)
+ R.set(*I);
+}
+
+BitVector MRegisterInfo::getAllocatableSet(MachineFunction &MF,
+ const TargetRegisterClass *RC) const {
+ BitVector Allocatable(NumRegs);
+ if (RC) {
+ getAllocatableSetForRC(MF, RC, Allocatable);
+ return Allocatable;
+ }
+
+ for (MRegisterInfo::regclass_iterator I = regclass_begin(),
+ E = regclass_end(); I != E; ++I)
+ getAllocatableSetForRC(MF, *I, Allocatable);
+ return Allocatable;
+}
+
+/// getLocation - This method should return the actual location of a frame
+/// variable given the frame index. The location is returned in ML.
+/// Subclasses should override this method for special handling of frame
+/// variables and then call MRegisterInfo::getLocation for the default action.
+void MRegisterInfo::getLocation(MachineFunction &MF, unsigned Index,
+ MachineLocation &ML) const {
+ const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ML.set(getFrameRegister(MF),
+ MFI->getObjectOffset(Index) +
+ MFI->getStackSize() -
+ TFI.getOffsetOfLocalArea() +
+ MFI->getOffsetAdjustment());
+}
+
+/// getInitialFrameState - Returns a list of machine moves that are assumed
+/// on entry to a function.
+void
+MRegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const {
+ // Default is to do nothing.
+}
+
diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp
new file mode 100644
index 0000000..5859adf
--- /dev/null
+++ b/lib/Target/MSIL/MSILWriter.cpp
@@ -0,0 +1,1657 @@
+//===-- MSILWriter.cpp - Library for converting LLVM code to MSIL ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Roman Samoilov and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This library converts LLVM code to MSIL code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSILWriter.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/ParameterAttributes.h"
+#include "llvm/TypeSymbolTable.h"
+#include "llvm/Analysis/ConstantsScanner.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/StringExtras.h"
+
+namespace {
+ // TargetMachine for the MSIL
+ struct VISIBILITY_HIDDEN MSILTarget : public TargetMachine {
+ const TargetData DataLayout; // Calculates type size & alignment
+
+ MSILTarget(const Module &M, const std::string &FS)
+ : DataLayout(&M) {}
+
+ virtual bool WantsWholeFile() const { return true; }
+ virtual bool addPassesToEmitWholeFile(PassManager &PM, std::ostream &Out,
+ CodeGenFileType FileType, bool Fast);
+
+ // This class always works, but shouldn't be the default in most cases.
+ static unsigned getModuleMatchQuality(const Module &M) { return 1; }
+
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ };
+}
+
+
+RegisterTarget<MSILTarget> X("msil", " MSIL backend");
+
+bool MSILModule::runOnModule(Module &M) {
+ ModulePtr = &M;
+ TD = &getAnalysis<TargetData>();
+ bool Changed = false;
+ // Find named types.
+ TypeSymbolTable& Table = M.getTypeSymbolTable();
+ std::set<const Type *> Types = getAnalysis<FindUsedTypes>().getTypes();
+ for (TypeSymbolTable::iterator I = Table.begin(), E = Table.end(); I!=E; ) {
+ if (!isa<StructType>(I->second) && !isa<OpaqueType>(I->second))
+ Table.remove(I++);
+ else {
+ std::set<const Type *>::iterator T = Types.find(I->second);
+ if (T==Types.end())
+ Table.remove(I++);
+ else {
+ Types.erase(T);
+ ++I;
+ }
+ }
+ }
+ // Find unnamed types.
+ unsigned RenameCounter = 0;
+ for (std::set<const Type *>::const_iterator I = Types.begin(),
+ E = Types.end(); I!=E; ++I)
+ if (const StructType *STy = dyn_cast<StructType>(*I)) {
+ while (ModulePtr->addTypeName("unnamed$"+utostr(RenameCounter), STy))
+ ++RenameCounter;
+ Changed = true;
+ }
+ // Pointer for FunctionPass.
+ UsedTypes = &getAnalysis<FindUsedTypes>().getTypes();
+ return Changed;
+}
+
+char MSILModule::ID = 0;
+char MSILWriter::ID = 0;
+
+bool MSILWriter::runOnFunction(Function &F) {
+ if (F.isDeclaration()) return false;
+ LInfo = &getAnalysis<LoopInfo>();
+ printFunction(F);
+ return false;
+}
+
+
+bool MSILWriter::doInitialization(Module &M) {
+ ModulePtr = &M;
+ Mang = new Mangler(M);
+ Out << ".assembly extern mscorlib {}\n";
+ Out << ".assembly MSIL {}\n\n";
+ Out << "// External\n";
+ printExternals();
+ Out << "// Declarations\n";
+ printDeclarations(M.getTypeSymbolTable());
+ Out << "// Definitions\n";
+ printGlobalVariables();
+ Out << "// Startup code\n";
+ printModuleStartup();
+ return false;
+}
+
+
+bool MSILWriter::doFinalization(Module &M) {
+ delete Mang;
+ return false;
+}
+
+
+void MSILWriter::printModuleStartup() {
+ Out <<
+ ".method static public int32 $MSIL_Startup() {\n"
+ "\t.entrypoint\n"
+ "\t.locals (native int i)\n"
+ "\t.locals (native int argc)\n"
+ "\t.locals (native int ptr)\n"
+ "\t.locals (void* argv)\n"
+ "\t.locals (string[] args)\n"
+ "\tcall\tstring[] [mscorlib]System.Environment::GetCommandLineArgs()\n"
+ "\tdup\n"
+ "\tstloc\targs\n"
+ "\tldlen\n"
+ "\tconv.i4\n"
+ "\tdup\n"
+ "\tstloc\targc\n";
+ printPtrLoad(TD->getPointerSize());
+ Out <<
+ "\tmul\n"
+ "\tlocalloc\n"
+ "\tstloc\targv\n"
+ "\tldc.i4.0\n"
+ "\tstloc\ti\n"
+ "L_01:\n"
+ "\tldloc\ti\n"
+ "\tldloc\targc\n"
+ "\tceq\n"
+ "\tbrtrue\tL_02\n"
+ "\tldloc\targs\n"
+ "\tldloc\ti\n"
+ "\tldelem.ref\n"
+ "\tcall\tnative int [mscorlib]System.Runtime.InteropServices.Marshal::"
+ "StringToHGlobalAnsi(string)\n"
+ "\tstloc\tptr\n"
+ "\tldloc\targv\n"
+ "\tldloc\ti\n";
+ printPtrLoad(TD->getPointerSize());
+ Out <<
+ "\tmul\n"
+ "\tadd\n"
+ "\tldloc\tptr\n"
+ "\tstind.i\n"
+ "\tldloc\ti\n"
+ "\tldc.i4.1\n"
+ "\tadd\n"
+ "\tstloc\ti\n"
+ "\tbr\tL_01\n"
+ "L_02:\n"
+ "\tcall void $MSIL_Init()\n";
+
+ // Call user 'main' function.
+ const Function* F = ModulePtr->getFunction("main");
+ if (!F || F->isDeclaration()) {
+ Out << "\tldc.i4.0\n\tret\n}\n";
+ return;
+ }
+ bool BadSig = true;;
+ std::string Args("");
+ Function::const_arg_iterator Arg1,Arg2;
+
+ switch (F->arg_size()) {
+ case 0:
+ BadSig = false;
+ break;
+ case 1:
+ Arg1 = F->arg_begin();
+ if (Arg1->getType()->isInteger()) {
+ Out << "\tldloc\targc\n";
+ Args = getTypeName(Arg1->getType());
+ BadSig = false;
+ }
+ break;
+ case 2:
+ Arg1 = Arg2 = F->arg_begin(); ++Arg2;
+ if (Arg1->getType()->isInteger() &&
+ Arg2->getType()->getTypeID() == Type::PointerTyID) {
+ Out << "\tldloc\targc\n\tldloc\targv\n";
+ Args = getTypeName(Arg1->getType())+","+getTypeName(Arg2->getType());
+ BadSig = false;
+ }
+ break;
+ default:
+ BadSig = true;
+ }
+
+ bool RetVoid = (F->getReturnType()->getTypeID() == Type::VoidTyID);
+ if (BadSig || !F->getReturnType()->isInteger() && !RetVoid) {
+ Out << "\tldc.i4.0\n";
+ } else {
+ Out << "\tcall\t" << getTypeName(F->getReturnType()) <<
+ getConvModopt(F->getCallingConv()) << "main(" << Args << ")\n";
+ if (RetVoid)
+ Out << "\tldc.i4.0\n";
+ else
+ Out << "\tconv.i4\n";
+ }
+ Out << "\tret\n}\n";
+}
+
+bool MSILWriter::isZeroValue(const Value* V) {
+ if (const Constant *C = dyn_cast<Constant>(V))
+ return C->isNullValue();
+ return false;
+}
+
+
+std::string MSILWriter::getValueName(const Value* V) {
+ // Name into the quotes allow control and space characters.
+ return "'"+Mang->getValueName(V)+"'";
+}
+
+
+std::string MSILWriter::getLabelName(const std::string& Name) {
+ if (Name.find('.')!=std::string::npos) {
+ std::string Tmp(Name);
+ // Replace unaccepable characters in the label name.
+ for (std::string::iterator I = Tmp.begin(), E = Tmp.end(); I!=E; ++I)
+ if (*I=='.') *I = '@';
+ return Tmp;
+ }
+ return Name;
+}
+
+
+std::string MSILWriter::getLabelName(const Value* V) {
+ return getLabelName(Mang->getValueName(V));
+}
+
+
+std::string MSILWriter::getConvModopt(unsigned CallingConvID) {
+ switch (CallingConvID) {
+ case CallingConv::C:
+ case CallingConv::Cold:
+ case CallingConv::Fast:
+ return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvCdecl) ";
+ case CallingConv::X86_FastCall:
+ return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvFastcall) ";
+ case CallingConv::X86_StdCall:
+ return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvStdcall) ";
+ default:
+ cerr << "CallingConvID = " << CallingConvID << '\n';
+ assert(0 && "Unsupported calling convention");
+ }
+}
+
+
+std::string MSILWriter::getArrayTypeName(Type::TypeID TyID, const Type* Ty) {
+ std::string Tmp = "";
+ const Type* ElemTy = Ty;
+ assert(Ty->getTypeID()==TyID && "Invalid type passed");
+ // Walk trought array element types.
+ for (;;) {
+ // Multidimensional array.
+ if (ElemTy->getTypeID()==TyID) {
+ if (const ArrayType* ATy = dyn_cast<ArrayType>(ElemTy))
+ Tmp += utostr(ATy->getNumElements());
+ else if (const VectorType* VTy = dyn_cast<VectorType>(ElemTy))
+ Tmp += utostr(VTy->getNumElements());
+ ElemTy = cast<SequentialType>(ElemTy)->getElementType();
+ }
+ // Base element type found.
+ if (ElemTy->getTypeID()!=TyID) break;
+ Tmp += ",";
+ }
+ return getTypeName(ElemTy, false, true)+"["+Tmp+"]";
+}
+
+
+std::string MSILWriter::getPrimitiveTypeName(const Type* Ty, bool isSigned) {
+ unsigned NumBits = 0;
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID:
+ return "void ";
+ case Type::IntegerTyID:
+ NumBits = getBitWidth(Ty);
+ if(NumBits==1)
+ return "bool ";
+ if (!isSigned)
+ return "unsigned int"+utostr(NumBits)+" ";
+ return "int"+utostr(NumBits)+" ";
+ case Type::FloatTyID:
+ return "float32 ";
+ case Type::DoubleTyID:
+ return "float64 ";
+ default:
+ cerr << "Type = " << *Ty << '\n';
+ assert(0 && "Invalid primitive type");
+ }
+}
+
+
+std::string MSILWriter::getTypeName(const Type* Ty, bool isSigned,
+ bool isNested) {
+ if (Ty->isPrimitiveType() || Ty->isInteger())
+ return getPrimitiveTypeName(Ty,isSigned);
+ // FIXME: "OpaqueType" support
+ switch (Ty->getTypeID()) {
+ case Type::PointerTyID:
+ return "void* ";
+ case Type::StructTyID:
+ if (isNested)
+ return ModulePtr->getTypeName(Ty);
+ return "valuetype '"+ModulePtr->getTypeName(Ty)+"' ";
+ case Type::ArrayTyID:
+ if (isNested)
+ return getArrayTypeName(Ty->getTypeID(),Ty);
+ return "valuetype '"+getArrayTypeName(Ty->getTypeID(),Ty)+"' ";
+ case Type::VectorTyID:
+ if (isNested)
+ return getArrayTypeName(Ty->getTypeID(),Ty);
+ return "valuetype '"+getArrayTypeName(Ty->getTypeID(),Ty)+"' ";
+ default:
+ cerr << "Type = " << *Ty << '\n';
+ assert(0 && "Invalid type in getTypeName()");
+ }
+}
+
+
+MSILWriter::ValueType MSILWriter::getValueLocation(const Value* V) {
+ // Function argument
+ if (isa<Argument>(V))
+ return ArgumentVT;
+ // Function
+ else if (const Function* F = dyn_cast<Function>(V))
+ return F->hasInternalLinkage() ? InternalVT : GlobalVT;
+ // Variable
+ else if (const GlobalVariable* G = dyn_cast<GlobalVariable>(V))
+ return G->hasInternalLinkage() ? InternalVT : GlobalVT;
+ // Constant
+ else if (isa<Constant>(V))
+ return isa<ConstantExpr>(V) ? ConstExprVT : ConstVT;
+ // Local variable
+ return LocalVT;
+}
+
+
+std::string MSILWriter::getTypePostfix(const Type* Ty, bool Expand,
+ bool isSigned) {
+ unsigned NumBits = 0;
+ switch (Ty->getTypeID()) {
+ // Integer constant, expanding for stack operations.
+ case Type::IntegerTyID:
+ NumBits = getBitWidth(Ty);
+ // Expand integer value to "int32" or "int64".
+ if (Expand) return (NumBits<=32 ? "i4" : "i8");
+ if (NumBits==1) return "i1";
+ return (isSigned ? "i" : "u")+utostr(NumBits/8);
+ // Float constant.
+ case Type::FloatTyID:
+ return "r4";
+ case Type::DoubleTyID:
+ return "r8";
+ case Type::PointerTyID:
+ return "i"+utostr(TD->getTypeSize(Ty));
+ default:
+ cerr << "TypeID = " << Ty->getTypeID() << '\n';
+ assert(0 && "Invalid type in TypeToPostfix()");
+ }
+}
+
+
+void MSILWriter::printConvToPtr() {
+ switch (ModulePtr->getPointerSize()) {
+ case Module::Pointer32:
+ printSimpleInstruction("conv.u4");
+ break;
+ case Module::Pointer64:
+ printSimpleInstruction("conv.u8");
+ break;
+ default:
+ assert(0 && "Module use not supporting pointer size");
+ }
+}
+
+
+void MSILWriter::printPtrLoad(uint64_t N) {
+ switch (ModulePtr->getPointerSize()) {
+ case Module::Pointer32:
+ printSimpleInstruction("ldc.i4",utostr(N).c_str());
+ // FIXME: Need overflow test?
+ if (!isUInt32(N)) {
+ cerr << "Value = " << utostr(N) << '\n';
+ assert(0 && "32-bit pointer overflowed");
+ }
+ break;
+ case Module::Pointer64:
+ printSimpleInstruction("ldc.i8",utostr(N).c_str());
+ break;
+ default:
+ assert(0 && "Module use not supporting pointer size");
+ }
+}
+
+
+void MSILWriter::printValuePtrLoad(const Value* V) {
+ printValueLoad(V);
+ printConvToPtr();
+}
+
+
+void MSILWriter::printConstLoad(const Constant* C) {
+ if (const ConstantInt* CInt = dyn_cast<ConstantInt>(C)) {
+ // Integer constant
+ Out << "\tldc." << getTypePostfix(C->getType(),true) << '\t';
+ if (CInt->isMinValue(true))
+ Out << CInt->getSExtValue();
+ else
+ Out << CInt->getZExtValue();
+ } else if (const ConstantFP* FP = dyn_cast<ConstantFP>(C)) {
+ // Float constant
+ uint64_t X;
+ unsigned Size;
+ if (FP->getType()->getTypeID()==Type::FloatTyID) {
+ X = FloatToBits(FP->getValue());
+ Size = 4;
+ } else {
+ X = DoubleToBits(FP->getValue());
+ Size = 8;
+ }
+ Out << "\tldc.r" << Size << "\t( " << utohexstr(X) << ')';
+ } else if (isa<UndefValue>(C)) {
+ // Undefined constant value = NULL.
+ printPtrLoad(0);
+ } else {
+ cerr << "Constant = " << *C << '\n';
+ assert(0 && "Invalid constant value");
+ }
+ Out << '\n';
+}
+
+
+void MSILWriter::printValueLoad(const Value* V) {
+ MSILWriter::ValueType Location = getValueLocation(V);
+ switch (Location) {
+ // Global variable or function address.
+ case GlobalVT:
+ case InternalVT:
+ if (const Function* F = dyn_cast<Function>(V)) {
+ std::string Name = getConvModopt(F->getCallingConv())+getValueName(F);
+ printSimpleInstruction("ldftn",
+ getCallSignature(F->getFunctionType(),NULL,Name).c_str());
+ } else {
+ std::string Tmp;
+ const Type* ElemTy = cast<PointerType>(V->getType())->getElementType();
+ if (Location==GlobalVT && cast<GlobalVariable>(V)->hasDLLImportLinkage()) {
+ Tmp = "void* "+getValueName(V);
+ printSimpleInstruction("ldsfld",Tmp.c_str());
+ } else {
+ Tmp = getTypeName(ElemTy)+getValueName(V);
+ printSimpleInstruction("ldsflda",Tmp.c_str());
+ }
+ }
+ break;
+ // Function argument.
+ case ArgumentVT:
+ printSimpleInstruction("ldarg",getValueName(V).c_str());
+ break;
+ // Local function variable.
+ case LocalVT:
+ printSimpleInstruction("ldloc",getValueName(V).c_str());
+ break;
+ // Constant value.
+ case ConstVT:
+ if (isa<ConstantPointerNull>(V))
+ printPtrLoad(0);
+ else
+ printConstLoad(cast<Constant>(V));
+ break;
+ // Constant expression.
+ case ConstExprVT:
+ printConstantExpr(cast<ConstantExpr>(V));
+ break;
+ default:
+ cerr << "Value = " << *V << '\n';
+ assert(0 && "Invalid value location");
+ }
+}
+
+
+void MSILWriter::printValueSave(const Value* V) {
+ switch (getValueLocation(V)) {
+ case ArgumentVT:
+ printSimpleInstruction("starg",getValueName(V).c_str());
+ break;
+ case LocalVT:
+ printSimpleInstruction("stloc",getValueName(V).c_str());
+ break;
+ default:
+ cerr << "Value = " << *V << '\n';
+ assert(0 && "Invalid value location");
+ }
+}
+
+
+void MSILWriter::printBinaryInstruction(const char* Name, const Value* Left,
+ const Value* Right) {
+ printValueLoad(Left);
+ printValueLoad(Right);
+ Out << '\t' << Name << '\n';
+}
+
+
+void MSILWriter::printSimpleInstruction(const char* Inst, const char* Operand) {
+ if(Operand)
+ Out << '\t' << Inst << '\t' << Operand << '\n';
+ else
+ Out << '\t' << Inst << '\n';
+}
+
+
+void MSILWriter::printPHICopy(const BasicBlock* Src, const BasicBlock* Dst) {
+ for (BasicBlock::const_iterator I = Dst->begin(), E = Dst->end();
+ isa<PHINode>(I); ++I) {
+ const PHINode* Phi = cast<PHINode>(I);
+ const Value* Val = Phi->getIncomingValueForBlock(Src);
+ if (isa<UndefValue>(Val)) continue;
+ printValueLoad(Val);
+ printValueSave(Phi);
+ }
+}
+
+
+void MSILWriter::printBranchToBlock(const BasicBlock* CurrBB,
+ const BasicBlock* TrueBB,
+ const BasicBlock* FalseBB) {
+ if (TrueBB==FalseBB) {
+ // "TrueBB" and "FalseBB" destination equals
+ printPHICopy(CurrBB,TrueBB);
+ printSimpleInstruction("pop");
+ printSimpleInstruction("br",getLabelName(TrueBB).c_str());
+ } else if (FalseBB==NULL) {
+ // If "FalseBB" not used the jump have condition
+ printPHICopy(CurrBB,TrueBB);
+ printSimpleInstruction("brtrue",getLabelName(TrueBB).c_str());
+ } else if (TrueBB==NULL) {
+ // If "TrueBB" not used the jump is unconditional
+ printPHICopy(CurrBB,FalseBB);
+ printSimpleInstruction("br",getLabelName(FalseBB).c_str());
+ } else {
+ // Copy PHI instructions for each block
+ std::string TmpLabel;
+ // Print PHI instructions for "TrueBB"
+ if (isa<PHINode>(TrueBB->begin())) {
+ TmpLabel = getLabelName(TrueBB)+"$phi_"+utostr(getUniqID());
+ printSimpleInstruction("brtrue",TmpLabel.c_str());
+ } else {
+ printSimpleInstruction("brtrue",getLabelName(TrueBB).c_str());
+ }
+ // Print PHI instructions for "FalseBB"
+ if (isa<PHINode>(FalseBB->begin())) {
+ printPHICopy(CurrBB,FalseBB);
+ printSimpleInstruction("br",getLabelName(FalseBB).c_str());
+ } else {
+ printSimpleInstruction("br",getLabelName(FalseBB).c_str());
+ }
+ if (isa<PHINode>(TrueBB->begin())) {
+ // Handle "TrueBB" PHI Copy
+ Out << TmpLabel << ":\n";
+ printPHICopy(CurrBB,TrueBB);
+ printSimpleInstruction("br",getLabelName(TrueBB).c_str());
+ }
+ }
+}
+
+
+void MSILWriter::printBranchInstruction(const BranchInst* Inst) {
+ if (Inst->isUnconditional()) {
+ printBranchToBlock(Inst->getParent(),NULL,Inst->getSuccessor(0));
+ } else {
+ printValueLoad(Inst->getCondition());
+ printBranchToBlock(Inst->getParent(),Inst->getSuccessor(0),
+ Inst->getSuccessor(1));
+ }
+}
+
+
+void MSILWriter::printSelectInstruction(const Value* Cond, const Value* VTrue,
+ const Value* VFalse) {
+ std::string TmpLabel = std::string("select$true_")+utostr(getUniqID());
+ printValueLoad(VTrue);
+ printValueLoad(Cond);
+ printSimpleInstruction("brtrue",TmpLabel.c_str());
+ printSimpleInstruction("pop");
+ printValueLoad(VFalse);
+ Out << TmpLabel << ":\n";
+}
+
+
+void MSILWriter::printIndirectLoad(const Value* V) {
+ const Type* Ty = V->getType();
+ printValueLoad(V);
+ if (const PointerType* P = dyn_cast<PointerType>(Ty))
+ Ty = P->getElementType();
+ std::string Tmp = "ldind."+getTypePostfix(Ty, false);
+ printSimpleInstruction(Tmp.c_str());
+}
+
+
+void MSILWriter::printIndirectSave(const Value* Ptr, const Value* Val) {
+ printValueLoad(Ptr);
+ printValueLoad(Val);
+ printIndirectSave(Val->getType());
+}
+
+
+void MSILWriter::printIndirectSave(const Type* Ty) {
+ // Instruction need signed postfix for any type.
+ std::string postfix = getTypePostfix(Ty, false);
+ if (*postfix.begin()=='u') *postfix.begin() = 'i';
+ postfix = "stind."+postfix;
+ printSimpleInstruction(postfix.c_str());
+}
+
+
+void MSILWriter::printCastInstruction(unsigned int Op, const Value* V,
+ const Type* Ty) {
+ std::string Tmp("");
+ printValueLoad(V);
+ switch (Op) {
+ // Signed
+ case Instruction::SExt:
+ case Instruction::SIToFP:
+ case Instruction::FPToSI:
+ Tmp = "conv."+getTypePostfix(Ty,false,true);
+ printSimpleInstruction(Tmp.c_str());
+ break;
+ // Unsigned
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::FPToUI:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ Tmp = "conv."+getTypePostfix(Ty,false);
+ printSimpleInstruction(Tmp.c_str());
+ break;
+ // Do nothing
+ case Instruction::BitCast:
+ // FIXME: meaning that ld*/st* instruction do not change data format.
+ break;
+ default:
+ cerr << "Opcode = " << Op << '\n';
+ assert(0 && "Invalid conversion instruction");
+ }
+}
+
+
+void MSILWriter::printGepInstruction(const Value* V, gep_type_iterator I,
+ gep_type_iterator E) {
+ unsigned Size;
+ // Load address
+ printValuePtrLoad(V);
+ // Calculate element offset.
+ for (; I!=E; ++I){
+ Size = 0;
+ const Value* IndexValue = I.getOperand();
+ if (const StructType* StrucTy = dyn_cast<StructType>(*I)) {
+ uint64_t FieldIndex = cast<ConstantInt>(IndexValue)->getZExtValue();
+ // Offset is the sum of all previous structure fields.
+ for (uint64_t F = 0; F<FieldIndex; ++F)
+ Size += TD->getTypeSize(StrucTy->getContainedType((unsigned)F));
+ printPtrLoad(Size);
+ printSimpleInstruction("add");
+ continue;
+ } else if (const SequentialType* SeqTy = dyn_cast<SequentialType>(*I)) {
+ Size = TD->getTypeSize(SeqTy->getElementType());
+ } else {
+ Size = TD->getTypeSize(*I);
+ }
+ // Add offset of current element to stack top.
+ if (!isZeroValue(IndexValue)) {
+ // Constant optimization.
+ if (const ConstantInt* C = dyn_cast<ConstantInt>(IndexValue)) {
+ if (C->getValue().isNegative()) {
+ printPtrLoad(C->getValue().abs().getZExtValue()*Size);
+ printSimpleInstruction("sub");
+ continue;
+ } else
+ printPtrLoad(C->getZExtValue()*Size);
+ } else {
+ printPtrLoad(Size);
+ printValuePtrLoad(IndexValue);
+ printSimpleInstruction("mul");
+ }
+ printSimpleInstruction("add");
+ }
+ }
+}
+
+
+std::string MSILWriter::getCallSignature(const FunctionType* Ty,
+ const Instruction* Inst,
+ std::string Name) {
+ std::string Tmp("");
+ if (Ty->isVarArg()) Tmp += "vararg ";
+ // Name and return type.
+ Tmp += getTypeName(Ty->getReturnType())+Name+"(";
+ // Function argument type list.
+ unsigned NumParams = Ty->getNumParams();
+ for (unsigned I = 0; I!=NumParams; ++I) {
+ if (I!=0) Tmp += ",";
+ Tmp += getTypeName(Ty->getParamType(I));
+ }
+ // CLR needs to know the exact amount of parameters received by vararg
+ // function, because caller cleans the stack.
+ if (Ty->isVarArg() && Inst) {
+ // Origin to function arguments in "CallInst" or "InvokeInst".
+ unsigned Org = isa<InvokeInst>(Inst) ? 3 : 1;
+ // Print variable argument types.
+ unsigned NumOperands = Inst->getNumOperands()-Org;
+ if (NumParams<NumOperands) {
+ if (NumParams!=0) Tmp += ", ";
+ Tmp += "... , ";
+ for (unsigned J = NumParams; J!=NumOperands; ++J) {
+ if (J!=NumParams) Tmp += ", ";
+ Tmp += getTypeName(Inst->getOperand(J+Org)->getType());
+ }
+ }
+ }
+ return Tmp+")";
+}
+
+
+void MSILWriter::printFunctionCall(const Value* FnVal,
+ const Instruction* Inst) {
+ // Get function calling convention.
+ std::string Name = "";
+ if (const CallInst* Call = dyn_cast<CallInst>(Inst))
+ Name = getConvModopt(Call->getCallingConv());
+ else if (const InvokeInst* Invoke = dyn_cast<InvokeInst>(Inst))
+ Name = getConvModopt(Invoke->getCallingConv());
+ else {
+ cerr << "Instruction = " << Inst->getName() << '\n';
+ assert(0 && "Need \"Invoke\" or \"Call\" instruction only");
+ }
+ if (const Function* F = dyn_cast<Function>(FnVal)) {
+ // Direct call.
+ Name += getValueName(F);
+ printSimpleInstruction("call",
+ getCallSignature(F->getFunctionType(),Inst,Name).c_str());
+ } else {
+ // Indirect function call.
+ const PointerType* PTy = cast<PointerType>(FnVal->getType());
+ const FunctionType* FTy = cast<FunctionType>(PTy->getElementType());
+ // Load function address.
+ printValueLoad(FnVal);
+ printSimpleInstruction("calli",getCallSignature(FTy,Inst,Name).c_str());
+ }
+}
+
+
+void MSILWriter::printIntrinsicCall(const IntrinsicInst* Inst) {
+ std::string Name;
+ switch (Inst->getIntrinsicID()) {
+ case Intrinsic::vastart:
+ Name = getValueName(Inst->getOperand(1));
+ Name.insert(Name.length()-1,"$valist");
+ // Obtain the argument handle.
+ printSimpleInstruction("ldloca",Name.c_str());
+ printSimpleInstruction("arglist");
+ printSimpleInstruction("call",
+ "instance void [mscorlib]System.ArgIterator::.ctor"
+ "(valuetype [mscorlib]System.RuntimeArgumentHandle)");
+ // Save as pointer type "void*"
+ printValueLoad(Inst->getOperand(1));
+ printSimpleInstruction("ldloca",Name.c_str());
+ printIndirectSave(PointerType::get(IntegerType::get(8)));
+ break;
+ case Intrinsic::vaend:
+ // Close argument list handle.
+ printIndirectLoad(Inst->getOperand(1));
+ printSimpleInstruction("call","instance void [mscorlib]System.ArgIterator::End()");
+ break;
+ case Intrinsic::vacopy:
+ // Copy "ArgIterator" valuetype.
+ printIndirectLoad(Inst->getOperand(1));
+ printIndirectLoad(Inst->getOperand(2));
+ printSimpleInstruction("cpobj","[mscorlib]System.ArgIterator");
+ break;
+ default:
+ cerr << "Intrinsic ID = " << Inst->getIntrinsicID() << '\n';
+ assert(0 && "Invalid intrinsic function");
+ }
+}
+
+
+void MSILWriter::printCallInstruction(const Instruction* Inst) {
+ if (isa<IntrinsicInst>(Inst)) {
+ // Handle intrinsic function.
+ printIntrinsicCall(cast<IntrinsicInst>(Inst));
+ } else {
+ // Load arguments to stack and call function.
+ for (int I = 1, E = Inst->getNumOperands(); I!=E; ++I)
+ printValueLoad(Inst->getOperand(I));
+ printFunctionCall(Inst->getOperand(0),Inst);
+ }
+}
+
+
+void MSILWriter::printICmpInstruction(unsigned Predicate, const Value* Left,
+ const Value* Right) {
+ switch (Predicate) {
+ case ICmpInst::ICMP_EQ:
+ printBinaryInstruction("ceq",Left,Right);
+ break;
+ case ICmpInst::ICMP_NE:
+ // Emulate = not neg (Op1 eq Op2)
+ printBinaryInstruction("ceq",Left,Right);
+ printSimpleInstruction("neg");
+ printSimpleInstruction("not");
+ break;
+ case ICmpInst::ICMP_ULE:
+ case ICmpInst::ICMP_SLE:
+ // Emulate = (Op1 eq Op2) or (Op1 lt Op2)
+ printBinaryInstruction("ceq",Left,Right);
+ if (Predicate==ICmpInst::ICMP_ULE)
+ printBinaryInstruction("clt.un",Left,Right);
+ else
+ printBinaryInstruction("clt",Left,Right);
+ printSimpleInstruction("or");
+ break;
+ case ICmpInst::ICMP_UGE:
+ case ICmpInst::ICMP_SGE:
+ // Emulate = (Op1 eq Op2) or (Op1 gt Op2)
+ printBinaryInstruction("ceq",Left,Right);
+ if (Predicate==ICmpInst::ICMP_UGE)
+ printBinaryInstruction("cgt.un",Left,Right);
+ else
+ printBinaryInstruction("cgt",Left,Right);
+ printSimpleInstruction("or");
+ break;
+ case ICmpInst::ICMP_ULT:
+ printBinaryInstruction("clt.un",Left,Right);
+ break;
+ case ICmpInst::ICMP_SLT:
+ printBinaryInstruction("clt",Left,Right);
+ break;
+ case ICmpInst::ICMP_UGT:
+ printBinaryInstruction("cgt.un",Left,Right);
+ case ICmpInst::ICMP_SGT:
+ printBinaryInstruction("cgt",Left,Right);
+ break;
+ default:
+ cerr << "Predicate = " << Predicate << '\n';
+ assert(0 && "Invalid icmp predicate");
+ }
+}
+
+
+void MSILWriter::printFCmpInstruction(unsigned Predicate, const Value* Left,
+ const Value* Right) {
+ // FIXME: Correct comparison
+ std::string NanFunc = "bool [mscorlib]System.Double::IsNaN(float64)";
+ switch (Predicate) {
+ case FCmpInst::FCMP_UGT:
+ // X > Y || llvm_fcmp_uno(X, Y)
+ printBinaryInstruction("cgt",Left,Right);
+ printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right);
+ printSimpleInstruction("or");
+ break;
+ case FCmpInst::FCMP_OGT:
+ // X > Y
+ printBinaryInstruction("cgt",Left,Right);
+ break;
+ case FCmpInst::FCMP_UGE:
+ // X >= Y || llvm_fcmp_uno(X, Y)
+ printBinaryInstruction("ceq",Left,Right);
+ printBinaryInstruction("cgt",Left,Right);
+ printSimpleInstruction("or");
+ printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right);
+ printSimpleInstruction("or");
+ break;
+ case FCmpInst::FCMP_OGE:
+ // X >= Y
+ printBinaryInstruction("ceq",Left,Right);
+ printBinaryInstruction("cgt",Left,Right);
+ printSimpleInstruction("or");
+ break;
+ case FCmpInst::FCMP_ULT:
+ // X < Y || llvm_fcmp_uno(X, Y)
+ printBinaryInstruction("clt",Left,Right);
+ printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right);
+ printSimpleInstruction("or");
+ break;
+ case FCmpInst::FCMP_OLT:
+ // X < Y
+ printBinaryInstruction("clt",Left,Right);
+ break;
+ case FCmpInst::FCMP_ULE:
+ // X <= Y || llvm_fcmp_uno(X, Y)
+ printBinaryInstruction("ceq",Left,Right);
+ printBinaryInstruction("clt",Left,Right);
+ printSimpleInstruction("or");
+ printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right);
+ printSimpleInstruction("or");
+ break;
+ case FCmpInst::FCMP_OLE:
+ // X <= Y
+ printBinaryInstruction("ceq",Left,Right);
+ printBinaryInstruction("clt",Left,Right);
+ printSimpleInstruction("or");
+ break;
+ case FCmpInst::FCMP_UEQ:
+ // X == Y || llvm_fcmp_uno(X, Y)
+ printBinaryInstruction("ceq",Left,Right);
+ printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right);
+ printSimpleInstruction("or");
+ break;
+ case FCmpInst::FCMP_OEQ:
+ // X == Y
+ printBinaryInstruction("ceq",Left,Right);
+ break;
+ case FCmpInst::FCMP_UNE:
+ // X != Y
+ printBinaryInstruction("ceq",Left,Right);
+ printSimpleInstruction("neg");
+ printSimpleInstruction("not");
+ break;
+ case FCmpInst::FCMP_ONE:
+ // X != Y && llvm_fcmp_ord(X, Y)
+ printBinaryInstruction("ceq",Left,Right);
+ printSimpleInstruction("not");
+ break;
+ case FCmpInst::FCMP_ORD:
+ // return X == X && Y == Y
+ printBinaryInstruction("ceq",Left,Left);
+ printBinaryInstruction("ceq",Right,Right);
+ printSimpleInstruction("or");
+ break;
+ case FCmpInst::FCMP_UNO:
+ // X != X || Y != Y
+ printBinaryInstruction("ceq",Left,Left);
+ printSimpleInstruction("not");
+ printBinaryInstruction("ceq",Right,Right);
+ printSimpleInstruction("not");
+ printSimpleInstruction("or");
+ break;
+ default:
+ assert(0 && "Illegal FCmp predicate");
+ }
+}
+
+
+void MSILWriter::printInvokeInstruction(const InvokeInst* Inst) {
+ std::string Label = "leave$normal_"+utostr(getUniqID());
+ Out << ".try {\n";
+ // Load arguments
+ for (int I = 3, E = Inst->getNumOperands(); I!=E; ++I)
+ printValueLoad(Inst->getOperand(I));
+ // Print call instruction
+ printFunctionCall(Inst->getOperand(0),Inst);
+ // Save function result and leave "try" block
+ printValueSave(Inst);
+ printSimpleInstruction("leave",Label.c_str());
+ Out << "}\n";
+ Out << "catch [mscorlib]System.Exception {\n";
+ // Redirect to unwind block
+ printSimpleInstruction("pop");
+ printBranchToBlock(Inst->getParent(),NULL,Inst->getUnwindDest());
+ Out << "}\n" << Label << ":\n";
+ // Redirect to continue block
+ printBranchToBlock(Inst->getParent(),NULL,Inst->getNormalDest());
+}
+
+
+void MSILWriter::printSwitchInstruction(const SwitchInst* Inst) {
+ // FIXME: Emulate with IL "switch" instruction
+ // Emulate = if () else if () else if () else ...
+ for (unsigned int I = 1, E = Inst->getNumCases(); I!=E; ++I) {
+ printValueLoad(Inst->getCondition());
+ printValueLoad(Inst->getCaseValue(I));
+ printSimpleInstruction("ceq");
+ // Condition jump to successor block
+ printBranchToBlock(Inst->getParent(),Inst->getSuccessor(I),NULL);
+ }
+ // Jump to default block
+ printBranchToBlock(Inst->getParent(),NULL,Inst->getDefaultDest());
+}
+
+
+void MSILWriter::printVAArgInstruction(const VAArgInst* Inst) {
+ printIndirectLoad(Inst->getOperand(0));
+ printSimpleInstruction("call",
+ "instance typedref [mscorlib]System.ArgIterator::GetNextArg()");
+ printSimpleInstruction("refanyval","void*");
+ std::string Name = "ldind."+getTypePostfix(PointerType::get(IntegerType::get(8)),false);
+ printSimpleInstruction(Name.c_str());
+}
+
+
+void MSILWriter::printAllocaInstruction(const AllocaInst* Inst) {
+ uint64_t Size = TD->getTypeSize(Inst->getAllocatedType());
+ // Constant optimization.
+ if (const ConstantInt* CInt = dyn_cast<ConstantInt>(Inst->getOperand(0))) {
+ printPtrLoad(CInt->getZExtValue()*Size);
+ } else {
+ printPtrLoad(Size);
+ printValueLoad(Inst->getOperand(0));
+ printSimpleInstruction("mul");
+ }
+ printSimpleInstruction("localloc");
+}
+
+
+void MSILWriter::printInstruction(const Instruction* Inst) {
+ const Value *Left = 0, *Right = 0;
+ if (Inst->getNumOperands()>=1) Left = Inst->getOperand(0);
+ if (Inst->getNumOperands()>=2) Right = Inst->getOperand(1);
+ // Print instruction
+ // FIXME: "ShuffleVector","ExtractElement","InsertElement" support.
+ switch (Inst->getOpcode()) {
+ // Terminator
+ case Instruction::Ret:
+ if (Inst->getNumOperands()) {
+ printValueLoad(Left);
+ printSimpleInstruction("ret");
+ } else
+ printSimpleInstruction("ret");
+ break;
+ case Instruction::Br:
+ printBranchInstruction(cast<BranchInst>(Inst));
+ break;
+ // Binary
+ case Instruction::Add:
+ printBinaryInstruction("add",Left,Right);
+ break;
+ case Instruction::Sub:
+ printBinaryInstruction("sub",Left,Right);
+ break;
+ case Instruction::Mul:
+ printBinaryInstruction("mul",Left,Right);
+ break;
+ case Instruction::UDiv:
+ printBinaryInstruction("div.un",Left,Right);
+ break;
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ printBinaryInstruction("div",Left,Right);
+ break;
+ case Instruction::URem:
+ printBinaryInstruction("rem.un",Left,Right);
+ break;
+ case Instruction::SRem:
+ case Instruction::FRem:
+ printBinaryInstruction("rem",Left,Right);
+ break;
+ // Binary Condition
+ case Instruction::ICmp:
+ printICmpInstruction(cast<ICmpInst>(Inst)->getPredicate(),Left,Right);
+ break;
+ case Instruction::FCmp:
+ printFCmpInstruction(cast<FCmpInst>(Inst)->getPredicate(),Left,Right);
+ break;
+ // Bitwise Binary
+ case Instruction::And:
+ printBinaryInstruction("and",Left,Right);
+ break;
+ case Instruction::Or:
+ printBinaryInstruction("or",Left,Right);
+ break;
+ case Instruction::Xor:
+ printBinaryInstruction("xor",Left,Right);
+ break;
+ case Instruction::Shl:
+ printValueLoad(Left);
+ printValueLoad(Right);
+ printSimpleInstruction("conv.i4");
+ printSimpleInstruction("shl");
+ break;
+ case Instruction::LShr:
+ printValueLoad(Left);
+ printValueLoad(Right);
+ printSimpleInstruction("conv.i4");
+ printSimpleInstruction("shr.un");
+ break;
+ case Instruction::AShr:
+ printValueLoad(Left);
+ printValueLoad(Right);
+ printSimpleInstruction("conv.i4");
+ printSimpleInstruction("shr");
+ break;
+ case Instruction::Select:
+ printSelectInstruction(Inst->getOperand(0),Inst->getOperand(1),Inst->getOperand(2));
+ break;
+ case Instruction::Load:
+ printIndirectLoad(Inst->getOperand(0));
+ break;
+ case Instruction::Store:
+ printIndirectSave(Inst->getOperand(1), Inst->getOperand(0));
+ break;
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ printCastInstruction(Inst->getOpcode(),Left,
+ cast<CastInst>(Inst)->getDestTy());
+ break;
+ case Instruction::GetElementPtr:
+ printGepInstruction(Inst->getOperand(0),gep_type_begin(Inst),
+ gep_type_end(Inst));
+ break;
+ case Instruction::Call:
+ printCallInstruction(cast<CallInst>(Inst));
+ break;
+ case Instruction::Invoke:
+ printInvokeInstruction(cast<InvokeInst>(Inst));
+ break;
+ case Instruction::Unwind:
+ printSimpleInstruction("newobj",
+ "instance void [mscorlib]System.Exception::.ctor()");
+ printSimpleInstruction("throw");
+ break;
+ case Instruction::Switch:
+ printSwitchInstruction(cast<SwitchInst>(Inst));
+ break;
+ case Instruction::Alloca:
+ printAllocaInstruction(cast<AllocaInst>(Inst));
+ break;
+ case Instruction::Malloc:
+ assert(0 && "LowerAllocationsPass used");
+ break;
+ case Instruction::Free:
+ assert(0 && "LowerAllocationsPass used");
+ break;
+ case Instruction::Unreachable:
+ printSimpleInstruction("ldstr", "\"Unreachable instruction\"");
+ printSimpleInstruction("newobj",
+ "instance void [mscorlib]System.Exception::.ctor(string)");
+ printSimpleInstruction("throw");
+ break;
+ case Instruction::VAArg:
+ printVAArgInstruction(cast<VAArgInst>(Inst));
+ break;
+ default:
+ cerr << "Instruction = " << Inst->getName() << '\n';
+ assert(0 && "Unsupported instruction");
+ }
+}
+
+
+void MSILWriter::printLoop(const Loop* L) {
+ Out << getLabelName(L->getHeader()->getName()) << ":\n";
+ const std::vector<BasicBlock*>& blocks = L->getBlocks();
+ for (unsigned I = 0, E = blocks.size(); I!=E; I++) {
+ BasicBlock* BB = blocks[I];
+ Loop* BBLoop = LInfo->getLoopFor(BB);
+ if (BBLoop == L)
+ printBasicBlock(BB);
+ else if (BB==BBLoop->getHeader() && BBLoop->getParentLoop()==L)
+ printLoop(BBLoop);
+ }
+ printSimpleInstruction("br",getLabelName(L->getHeader()->getName()).c_str());
+}
+
+
+void MSILWriter::printBasicBlock(const BasicBlock* BB) {
+ Out << getLabelName(BB) << ":\n";
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) {
+ const Instruction* Inst = I;
+ // Comment llvm original instruction
+ Out << "\n//" << *Inst << "\n";
+ // Do not handle PHI instruction in current block
+ if (Inst->getOpcode()==Instruction::PHI) continue;
+ // Print instruction
+ printInstruction(Inst);
+ // Save result
+ if (Inst->getType()!=Type::VoidTy) {
+ // Do not save value after invoke, it done in "try" block
+ if (Inst->getOpcode()==Instruction::Invoke) continue;
+ printValueSave(Inst);
+ }
+ }
+}
+
+
+void MSILWriter::printLocalVariables(const Function& F) {
+ std::string Name;
+ const Type* Ty = NULL;
+ std::set<const Value*> Printed;
+ const Value* VaList = NULL;
+ unsigned StackDepth = 8;
+ // Find local variables
+ for (const_inst_iterator I = inst_begin(&F), E = inst_end(&F); I!=E; ++I) {
+ if (I->getOpcode()==Instruction::Call ||
+ I->getOpcode()==Instruction::Invoke) {
+ // Test stack depth.
+ if (StackDepth<I->getNumOperands())
+ StackDepth = I->getNumOperands();
+ }
+ const AllocaInst* AI = dyn_cast<AllocaInst>(&*I);
+ if (AI && !isa<GlobalVariable>(AI)) {
+ // Local variable allocation.
+ Ty = PointerType::get(AI->getAllocatedType());
+ Name = getValueName(AI);
+ Out << "\t.locals (" << getTypeName(Ty) << Name << ")\n";
+ } else if (I->getType()!=Type::VoidTy) {
+ // Operation result.
+ Ty = I->getType();
+ Name = getValueName(&*I);
+ Out << "\t.locals (" << getTypeName(Ty) << Name << ")\n";
+ }
+ // Test on 'va_list' variable
+ bool isVaList = false;
+ if (const VAArgInst* VaInst = dyn_cast<VAArgInst>(&*I)) {
+ // "va_list" as "va_arg" instruction operand.
+ isVaList = true;
+ VaList = VaInst->getOperand(0);
+ } else if (const IntrinsicInst* Inst = dyn_cast<IntrinsicInst>(&*I)) {
+ // "va_list" as intrinsic function operand.
+ switch (Inst->getIntrinsicID()) {
+ case Intrinsic::vastart:
+ case Intrinsic::vaend:
+ case Intrinsic::vacopy:
+ isVaList = true;
+ VaList = Inst->getOperand(1);
+ break;
+ default:
+ isVaList = false;
+ }
+ }
+ // Print "va_list" variable.
+ if (isVaList && Printed.insert(VaList).second) {
+ Name = getValueName(VaList);
+ Name.insert(Name.length()-1,"$valist");
+ Out << "\t.locals (valuetype [mscorlib]System.ArgIterator "
+ << Name << ")\n";
+ }
+ }
+ printSimpleInstruction(".maxstack",utostr(StackDepth*2).c_str());
+}
+
+
+void MSILWriter::printFunctionBody(const Function& F) {
+ // Print body
+ for (Function::const_iterator I = F.begin(), E = F.end(); I!=E; ++I) {
+ if (Loop *L = LInfo->getLoopFor(I)) {
+ if (L->getHeader()==I && L->getParentLoop()==0)
+ printLoop(L);
+ } else {
+ printBasicBlock(I);
+ }
+ }
+}
+
+
+void MSILWriter::printConstantExpr(const ConstantExpr* CE) {
+ const Value *left = 0, *right = 0;
+ if (CE->getNumOperands()>=1) left = CE->getOperand(0);
+ if (CE->getNumOperands()>=2) right = CE->getOperand(1);
+ // Print instruction
+ switch (CE->getOpcode()) {
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ printCastInstruction(CE->getOpcode(),left,CE->getType());
+ break;
+ case Instruction::GetElementPtr:
+ printGepInstruction(CE->getOperand(0),gep_type_begin(CE),gep_type_end(CE));
+ break;
+ case Instruction::ICmp:
+ printICmpInstruction(CE->getPredicate(),left,right);
+ break;
+ case Instruction::FCmp:
+ printFCmpInstruction(CE->getPredicate(),left,right);
+ break;
+ case Instruction::Select:
+ printSelectInstruction(CE->getOperand(0),CE->getOperand(1),CE->getOperand(2));
+ break;
+ case Instruction::Add:
+ printBinaryInstruction("add",left,right);
+ break;
+ case Instruction::Sub:
+ printBinaryInstruction("sub",left,right);
+ break;
+ case Instruction::Mul:
+ printBinaryInstruction("mul",left,right);
+ break;
+ case Instruction::UDiv:
+ printBinaryInstruction("div.un",left,right);
+ break;
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ printBinaryInstruction("div",left,right);
+ break;
+ case Instruction::URem:
+ printBinaryInstruction("rem.un",left,right);
+ break;
+ case Instruction::SRem:
+ case Instruction::FRem:
+ printBinaryInstruction("rem",left,right);
+ break;
+ case Instruction::And:
+ printBinaryInstruction("and",left,right);
+ break;
+ case Instruction::Or:
+ printBinaryInstruction("or",left,right);
+ break;
+ case Instruction::Xor:
+ printBinaryInstruction("xor",left,right);
+ break;
+ case Instruction::Shl:
+ printBinaryInstruction("shl",left,right);
+ break;
+ case Instruction::LShr:
+ printBinaryInstruction("shr.un",left,right);
+ break;
+ case Instruction::AShr:
+ printBinaryInstruction("shr",left,right);
+ break;
+ default:
+ cerr << "Expression = " << *CE << "\n";
+ assert(0 && "Invalid constant expression");
+ }
+}
+
+
+void MSILWriter::printStaticInitializerList() {
+ // List of global variables with uninitialized fields.
+ for (std::map<const GlobalVariable*,std::vector<StaticInitializer> >::iterator
+ VarI = StaticInitList.begin(), VarE = StaticInitList.end(); VarI!=VarE;
+ ++VarI) {
+ const std::vector<StaticInitializer>& InitList = VarI->second;
+ if (InitList.empty()) continue;
+ // For each uninitialized field.
+ for (std::vector<StaticInitializer>::const_iterator I = InitList.begin(),
+ E = InitList.end(); I!=E; ++I) {
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(I->constant)) {
+ Out << "\n// Init " << getValueName(VarI->first) << ", offset " <<
+ utostr(I->offset) << ", type "<< *I->constant->getType() << "\n\n";
+ // Load variable address
+ printValueLoad(VarI->first);
+ // Add offset
+ if (I->offset!=0) {
+ printPtrLoad(I->offset);
+ printSimpleInstruction("add");
+ }
+ // Load value
+ printConstantExpr(CE);
+ // Save result at offset
+ std::string postfix = getTypePostfix(CE->getType(),true);
+ if (*postfix.begin()=='u') *postfix.begin() = 'i';
+ postfix = "stind."+postfix;
+ printSimpleInstruction(postfix.c_str());
+ } else {
+ cerr << "Constant = " << *I->constant << '\n';
+ assert(0 && "Invalid static initializer");
+ }
+ }
+ }
+}
+
+
+void MSILWriter::printFunction(const Function& F) {
+ const FunctionType* FTy = F.getFunctionType();
+ const ParamAttrsList *Attrs = FTy->getParamAttrs();
+ bool isSigned = Attrs && Attrs->paramHasAttr(0, ParamAttr::SExt);
+ Out << "\n.method static ";
+ Out << (F.hasInternalLinkage() ? "private " : "public ");
+ if (F.isVarArg()) Out << "vararg ";
+ Out << getTypeName(F.getReturnType(),isSigned) <<
+ getConvModopt(F.getCallingConv()) << getValueName(&F) << '\n';
+ // Arguments
+ Out << "\t(";
+ unsigned ArgIdx = 1;
+ for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I!=E;
+ ++I, ++ArgIdx) {
+ isSigned = Attrs && Attrs->paramHasAttr(ArgIdx, ParamAttr::SExt);
+ if (I!=F.arg_begin()) Out << ", ";
+ Out << getTypeName(I->getType(),isSigned) << getValueName(I);
+ }
+ Out << ") cil managed\n";
+ // Body
+ Out << "{\n";
+ printLocalVariables(F);
+ printFunctionBody(F);
+ Out << "}\n";
+}
+
+
+void MSILWriter::printDeclarations(const TypeSymbolTable& ST) {
+ std::string Name;
+ std::set<const Type*> Printed;
+ for (std::set<const Type*>::const_iterator
+ UI = UsedTypes->begin(), UE = UsedTypes->end(); UI!=UE; ++UI) {
+ const Type* Ty = *UI;
+ if (isa<ArrayType>(Ty) || isa<VectorType>(Ty) || isa<StructType>(Ty))
+ Name = getTypeName(Ty, false, true);
+ // Type with no need to declare.
+ else continue;
+ // Print not duplicated type
+ if (Printed.insert(Ty).second) {
+ Out << ".class value explicit ansi sealed '" << Name << "'";
+ Out << " { .pack " << 1 << " .size " << TD->getTypeSize(Ty) << " }\n\n";
+ }
+ }
+}
+
+
+unsigned int MSILWriter::getBitWidth(const Type* Ty) {
+ unsigned int N = Ty->getPrimitiveSizeInBits();
+ assert(N!=0 && "Invalid type in getBitWidth()");
+ switch (N) {
+ case 1:
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ return N;
+ default:
+ cerr << "Bits = " << N << '\n';
+ assert(0 && "Unsupported integer width");
+ }
+}
+
+
+void MSILWriter::printStaticConstant(const Constant* C, uint64_t& Offset) {
+ uint64_t TySize = 0;
+ const Type* Ty = C->getType();
+ // Print zero initialized constant.
+ if (isa<ConstantAggregateZero>(C) || C->isNullValue()) {
+ TySize = TD->getTypeSize(C->getType());
+ Offset += TySize;
+ Out << "int8 (0) [" << TySize << "]";
+ return;
+ }
+ // Print constant initializer
+ switch (Ty->getTypeID()) {
+ case Type::IntegerTyID: {
+ TySize = TD->getTypeSize(Ty);
+ const ConstantInt* Int = cast<ConstantInt>(C);
+ Out << getPrimitiveTypeName(Ty,true) << "(" << Int->getSExtValue() << ")";
+ break;
+ }
+ case Type::FloatTyID:
+ case Type::DoubleTyID: {
+ TySize = TD->getTypeSize(Ty);
+ const ConstantFP* FP = cast<ConstantFP>(C);
+ if (Ty->getTypeID() == Type::FloatTyID)
+ Out << "int32 (" << FloatToBits(FP->getValue()) << ')';
+ else
+ Out << "int64 (" << DoubleToBits(FP->getValue()) << ')';
+ break;
+ }
+ case Type::ArrayTyID:
+ case Type::VectorTyID:
+ case Type::StructTyID:
+ for (unsigned I = 0, E = C->getNumOperands(); I<E; I++) {
+ if (I!=0) Out << ",\n";
+ printStaticConstant(C->getOperand(I),Offset);
+ }
+ break;
+ case Type::PointerTyID:
+ TySize = TD->getTypeSize(C->getType());
+ // Initialize with global variable address
+ if (const GlobalVariable *G = dyn_cast<GlobalVariable>(C)) {
+ std::string name = getValueName(G);
+ Out << "&(" << name.insert(name.length()-1,"$data") << ")";
+ } else {
+ // Dynamic initialization
+ if (!isa<ConstantPointerNull>(C) && !C->isNullValue())
+ InitListPtr->push_back(StaticInitializer(C,Offset));
+ // Null pointer initialization
+ if (TySize==4) Out << "int32 (0)";
+ else if (TySize==8) Out << "int64 (0)";
+ else assert(0 && "Invalid pointer size");
+ }
+ break;
+ default:
+ cerr << "TypeID = " << Ty->getTypeID() << '\n';
+ assert(0 && "Invalid type in printStaticConstant()");
+ }
+ // Increase offset.
+ Offset += TySize;
+}
+
+
+void MSILWriter::printStaticInitializer(const Constant* C,
+ const std::string& Name) {
+ switch (C->getType()->getTypeID()) {
+ case Type::IntegerTyID:
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ Out << getPrimitiveTypeName(C->getType(), false);
+ break;
+ case Type::ArrayTyID:
+ case Type::VectorTyID:
+ case Type::StructTyID:
+ case Type::PointerTyID:
+ Out << getTypeName(C->getType());
+ break;
+ default:
+ cerr << "Type = " << *C << "\n";
+ assert(0 && "Invalid constant type");
+ }
+ // Print initializer
+ std::string label = Name;
+ label.insert(label.length()-1,"$data");
+ Out << Name << " at " << label << '\n';
+ Out << ".data " << label << " = {\n";
+ uint64_t offset = 0;
+ printStaticConstant(C,offset);
+ Out << "\n}\n\n";
+}
+
+
+void MSILWriter::printVariableDefinition(const GlobalVariable* G) {
+ const Constant* C = G->getInitializer();
+ if (C->isNullValue() || isa<ConstantAggregateZero>(C) || isa<UndefValue>(C))
+ InitListPtr = 0;
+ else
+ InitListPtr = &StaticInitList[G];
+ printStaticInitializer(C,getValueName(G));
+}
+
+
+void MSILWriter::printGlobalVariables() {
+ if (ModulePtr->global_empty()) return;
+ Module::global_iterator I,E;
+ for (I = ModulePtr->global_begin(), E = ModulePtr->global_end(); I!=E; ++I) {
+ // Variable definition
+ Out << ".field static " << (I->isDeclaration() ? "public " :
+ "private ");
+ if (I->isDeclaration()) {
+ Out << getTypeName(I->getType()) << getValueName(&*I) << "\n\n";
+ } else
+ printVariableDefinition(&*I);
+ }
+}
+
+
+const char* MSILWriter::getLibraryName(const Function* F) {
+ return getLibraryForSymbol(F->getName().c_str(), true, F->getCallingConv());
+}
+
+
+const char* MSILWriter::getLibraryName(const GlobalVariable* GV) {
+ return getLibraryForSymbol(Mang->getValueName(GV).c_str(), false, 0);
+}
+
+
+const char* MSILWriter::getLibraryForSymbol(const char* Name, bool isFunction,
+ unsigned CallingConv) {
+ // TODO: Read *.def file with function and libraries definitions.
+ return "MSVCRT.DLL";
+}
+
+
+void MSILWriter::printExternals() {
+ Module::const_iterator I,E;
+ // Functions.
+ for (I=ModulePtr->begin(),E=ModulePtr->end(); I!=E; ++I) {
+ // Skip intrisics
+ if (I->getIntrinsicID()) continue;
+ if (I->isDeclaration()) {
+ const Function* F = I;
+ std::string Name = getConvModopt(F->getCallingConv())+getValueName(F);
+ std::string Sig =
+ getCallSignature(cast<FunctionType>(F->getFunctionType()), NULL, Name);
+ Out << ".method static hidebysig pinvokeimpl(\""
+ << getLibraryName(F) << "\")\n\t" << Sig << " preservesig {}\n\n";
+ }
+ }
+ // External variables and static initialization.
+ Out <<
+ ".method public hidebysig static pinvokeimpl(\"KERNEL32.DLL\" ansi winapi)"
+ " native int LoadLibrary(string) preservesig {}\n"
+ ".method public hidebysig static pinvokeimpl(\"KERNEL32.DLL\" ansi winapi)"
+ " native int GetProcAddress(native int, string) preservesig {}\n";
+ Out <<
+ ".method private static void* $MSIL_Import(string lib,string sym)\n"
+ " managed cil\n{\n"
+ "\tldarg\tlib\n"
+ "\tcall\tnative int LoadLibrary(string)\n"
+ "\tldarg\tsym\n"
+ "\tcall\tnative int GetProcAddress(native int,string)\n"
+ "\tdup\n"
+ "\tbrtrue\tL_01\n"
+ "\tldstr\t\"Can no import variable\"\n"
+ "\tnewobj\tinstance void [mscorlib]System.Exception::.ctor(string)\n"
+ "\tthrow\n"
+ "L_01:\n"
+ "\tret\n"
+ "}\n\n"
+ ".method static private void $MSIL_Init() managed cil\n{\n";
+ printStaticInitializerList();
+ // Foreach global variable.
+ for (Module::global_iterator I = ModulePtr->global_begin(),
+ E = ModulePtr->global_end(); I!=E; ++I) {
+ if (!I->isDeclaration() || !I->hasDLLImportLinkage()) continue;
+ // Use "LoadLibrary"/"GetProcAddress" to recive variable address.
+ std::string Label = "not_null$_"+utostr(getUniqID());
+ std::string Tmp = getTypeName(I->getType())+getValueName(&*I);
+ printSimpleInstruction("ldsflda",Tmp.c_str());
+ Out << "\tldstr\t\"" << getLibraryName(&*I) << "\"\n";
+ Out << "\tldstr\t\"" << Mang->getValueName(&*I) << "\"\n";
+ printSimpleInstruction("call","void* $MSIL_Import(string,string)");
+ printIndirectSave(I->getType());
+ }
+ printSimpleInstruction("ret");
+ Out << "}\n\n";
+}
+
+
+//===----------------------------------------------------------------------===//
+// External Interface declaration
+//===----------------------------------------------------------------------===//
+
+bool MSILTarget::addPassesToEmitWholeFile(PassManager &PM, std::ostream &o,
+ CodeGenFileType FileType, bool Fast)
+{
+ if (FileType != TargetMachine::AssemblyFile) return true;
+ MSILWriter* Writer = new MSILWriter(o);
+ PM.add(createLowerGCPass());
+ PM.add(createLowerAllocationsPass(true));
+ // FIXME: Handle switch trougth native IL instruction "switch"
+ PM.add(createLowerSwitchPass());
+ PM.add(createCFGSimplificationPass());
+ PM.add(new MSILModule(Writer->UsedTypes,Writer->TD));
+ PM.add(Writer);
+ return false;
+}
diff --git a/lib/Target/MSIL/MSILWriter.h b/lib/Target/MSIL/MSILWriter.h
new file mode 100644
index 0000000..c2cd0ab
--- /dev/null
+++ b/lib/Target/MSIL/MSILWriter.h
@@ -0,0 +1,255 @@
+//===-- MSILWriter.h - TargetMachine for the MSIL ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Roman Samoilov and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MSILWriter that is used by the MSIL.
+//
+//===----------------------------------------------------------------------===//
+#ifndef MSILWRITER_H
+#define MSILWRITER_H
+
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/FindUsedTypes.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+#include "llvm/Support/Mangler.h"
+#include <algorithm>
+#include <ios>
+using namespace llvm;
+
+namespace {
+
+ class MSILModule : public ModulePass {
+ Module *ModulePtr;
+ const std::set<const Type *>*& UsedTypes;
+ const TargetData*& TD;
+
+ public:
+ static char ID;
+ MSILModule(const std::set<const Type *>*& _UsedTypes,
+ const TargetData*& _TD)
+ : ModulePass((intptr_t)&ID), UsedTypes(_UsedTypes), TD(_TD) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<FindUsedTypes>();
+ AU.addRequired<TargetData>();
+ }
+
+ virtual const char *getPassName() const {
+ return "MSIL backend definitions";
+ }
+
+ virtual bool runOnModule(Module &M);
+
+ };
+
+ class MSILWriter : public FunctionPass {
+ struct StaticInitializer {
+ const Constant* constant;
+ uint64_t offset;
+
+ StaticInitializer()
+ : constant(0), offset(0) {}
+
+ StaticInitializer(const Constant* _constant, uint64_t _offset)
+ : constant(_constant), offset(_offset) {}
+ };
+
+ uint64_t UniqID;
+
+ uint64_t getUniqID() {
+ return ++UniqID;
+ }
+
+ public:
+ std::ostream &Out;
+ Module* ModulePtr;
+ const TargetData* TD;
+ Mangler* Mang;
+ LoopInfo *LInfo;
+ std::vector<StaticInitializer>* InitListPtr;
+ std::map<const GlobalVariable*,std::vector<StaticInitializer> >
+ StaticInitList;
+ const std::set<const Type *>* UsedTypes;
+ static char ID;
+ MSILWriter(std::ostream &o) : FunctionPass((intptr_t)&ID), Out(o) {
+ UniqID = 0;
+ }
+
+ enum ValueType {
+ UndefVT,
+ GlobalVT,
+ InternalVT,
+ ArgumentVT,
+ LocalVT,
+ ConstVT,
+ ConstExprVT
+ };
+
+ bool isVariable(ValueType V) {
+ return V==GlobalVT || V==InternalVT || V==ArgumentVT || V==LocalVT;
+ }
+
+ bool isConstValue(ValueType V) {
+ return V==ConstVT || V==ConstExprVT;
+ }
+
+ virtual const char *getPassName() const { return "MSIL backend"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LoopInfo>();
+ AU.setPreservesAll();
+ }
+
+ bool runOnFunction(Function &F);
+
+ virtual bool doInitialization(Module &M);
+
+ virtual bool doFinalization(Module &M);
+
+ void printModuleStartup();
+
+ bool isZeroValue(const Value* V);
+
+ std::string getValueName(const Value* V);
+
+ std::string getLabelName(const Value* V);
+
+ std::string getLabelName(const std::string& Name);
+
+ std::string getConvModopt(unsigned CallingConvID);
+
+ std::string getArrayTypeName(Type::TypeID TyID, const Type* Ty);
+
+ std::string getPrimitiveTypeName(const Type* Ty, bool isSigned);
+
+ std::string getFunctionTypeName(const Type* Ty);
+
+ std::string getPointerTypeName(const Type* Ty);
+
+ std::string getTypeName(const Type* Ty, bool isSigned = false,
+ bool isNested = false);
+
+ ValueType getValueLocation(const Value* V);
+
+ std::string getTypePostfix(const Type* Ty, bool Expand,
+ bool isSigned = false);
+
+ void printConvToPtr();
+
+ void printPtrLoad(uint64_t N);
+
+ void printValuePtrLoad(const Value* V);
+
+ void printConstLoad(const Constant* C);
+
+ void printValueLoad(const Value* V);
+
+ void printValueSave(const Value* V);
+
+ void printBinaryInstruction(const char* Name, const Value* Left,
+ const Value* Right);
+
+ void printSimpleInstruction(const char* Inst, const char* Operand = NULL);
+
+ void printPHICopy(const BasicBlock* Src, const BasicBlock* Dst);
+
+ void printBranchToBlock(const BasicBlock* CurrBB,
+ const BasicBlock* TrueBB,
+ const BasicBlock* FalseBB);
+
+ void printBranchInstruction(const BranchInst* Inst);
+
+ void printSelectInstruction(const Value* Cond, const Value* VTrue,
+ const Value* VFalse);
+
+ void printIndirectLoad(const Value* V);
+
+ void printIndirectSave(const Value* Ptr, const Value* Val);
+
+ void printIndirectSave(const Type* Ty);
+
+ void printCastInstruction(unsigned int Op, const Value* V,
+ const Type* Ty);
+
+ void printGepInstruction(const Value* V, gep_type_iterator I,
+ gep_type_iterator E);
+
+ std::string getCallSignature(const FunctionType* Ty,
+ const Instruction* Inst,
+ std::string Name);
+
+ void printFunctionCall(const Value* FnVal, const Instruction* Inst);
+
+ void printIntrinsicCall(const IntrinsicInst* Inst);
+
+ void printCallInstruction(const Instruction* Inst);
+
+ void printICmpInstruction(unsigned Predicate, const Value* Left,
+ const Value* Right);
+
+ void printFCmpInstruction(unsigned Predicate, const Value* Left,
+ const Value* Right);
+
+ void printInvokeInstruction(const InvokeInst* Inst);
+
+ void printSwitchInstruction(const SwitchInst* Inst);
+
+ void printVAArgInstruction(const VAArgInst* Inst);
+
+ void printAllocaInstruction(const AllocaInst* Inst);
+
+ void printInstruction(const Instruction* Inst);
+
+ void printLoop(const Loop* L);
+
+ void printBasicBlock(const BasicBlock* BB);
+
+ void printLocalVariables(const Function& F);
+
+ void printFunctionBody(const Function& F);
+
+ void printConstantExpr(const ConstantExpr* CE);
+
+ void printStaticInitializerList();
+
+ void printFunction(const Function& F);
+
+ void printDeclarations(const TypeSymbolTable& ST);
+
+ unsigned int getBitWidth(const Type* Ty);
+
+ void printStaticConstant(const Constant* C, uint64_t& Offset);
+
+ void printStaticInitializer(const Constant* C, const std::string& Name);
+
+ void printVariableDefinition(const GlobalVariable* G);
+
+ void printGlobalVariables();
+
+ const char* getLibraryName(const Function* F);
+
+ const char* getLibraryName(const GlobalVariable* GV);
+
+ const char* getLibraryForSymbol(const char* Name, bool isFunction,
+ unsigned CallingConv);
+
+ void printExternals();
+ };
+}
+
+#endif
+
diff --git a/lib/Target/MSIL/Makefile b/lib/Target/MSIL/Makefile
new file mode 100644
index 0000000..17f7247
--- /dev/null
+++ b/lib/Target/MSIL/Makefile
@@ -0,0 +1,14 @@
+##===- lib/Target/MSIL/Makefile ----------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file was developed by Roman Samoilov and is distributed under
+# the University of Illinois Open Source License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMMSIL
+include $(LEVEL)/Makefile.common
+
+CompileCommonOpts := $(CompileCommonOpts) -Wno-format
diff --git a/lib/Target/MSIL/README.TXT b/lib/Target/MSIL/README.TXT
new file mode 100644
index 0000000..2b9a569
--- /dev/null
+++ b/lib/Target/MSIL/README.TXT
@@ -0,0 +1,26 @@
+//===---------------------------------------------------------------------===//
+
+Vector instructions support.
+
+ShuffleVector
+ExtractElement
+InsertElement
+
+//===---------------------------------------------------------------------===//
+
+Add "OpaqueType" type.
+
+//===---------------------------------------------------------------------===//
+
+"switch" instruction emulation with CLI "switch" instruction.
+
+//===---------------------------------------------------------------------===//
+
+Write linker for external function, because function export need to know
+dynamic library where function located.
+
+.method static hidebysig pinvokeimpl("msvcrt.dll" cdecl)
+ void free(void*) preservesig {}
+
+
+
diff --git a/lib/Target/Makefile b/lib/Target/Makefile
new file mode 100644
index 0000000..59f50fe
--- /dev/null
+++ b/lib/Target/Makefile
@@ -0,0 +1,20 @@
+#===- lib/Target/Makefile ----------------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file was developed by the LLVM research group and is distributed under
+# the University of Illinois Open Source License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMTarget
+BUILD_ARCHIVE = 1
+
+# We include this early so we can access the value of TARGETS_TO_BUILD as the
+# value for PARALLEL_DIRS which must be set before Makefile.rules is included
+include $(LEVEL)/Makefile.config
+
+PARALLEL_DIRS := $(TARGETS_TO_BUILD)
+
+include $(LLVM_SRC_ROOT)/Makefile.rules
diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile
new file mode 100644
index 0000000..6ebffc7
--- /dev/null
+++ b/lib/Target/Mips/Makefile
@@ -0,0 +1,21 @@
+##===- lib/Target/Mips/Makefile ----------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file was developed by Bruno Cardoso Lopes and is distributed under the
+# University of Illinois Open Source License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMMips
+TARGET = Mips
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = MipsGenRegisterInfo.h.inc MipsGenRegisterNames.inc \
+ MipsGenRegisterInfo.inc MipsGenInstrNames.inc \
+ MipsGenInstrInfo.inc MipsGenAsmWriter.inc \
+ MipsGenDAGISel.inc MipsGenCallingConv.inc \
+ MipsGenSubtarget.inc
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
new file mode 100644
index 0000000..48b08ea
--- /dev/null
+++ b/lib/Target/Mips/Mips.h
@@ -0,0 +1,38 @@
+//===-- Mips.h - Top-level interface for Mips representation ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Bruno Cardoso Lopes and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM Mips back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_MIPS_H
+#define TARGET_MIPS_H
+
+#include <iosfwd>
+
+namespace llvm {
+ class MipsTargetMachine;
+ class FunctionPassManager;
+ class FunctionPass;
+ class MachineCodeEmitter;
+
+ FunctionPass *createMipsCodePrinterPass(std::ostream &OS,
+ MipsTargetMachine &TM);
+ FunctionPass *createMipsISelDag(MipsTargetMachine &TM);
+} // end namespace llvm;
+
+// Defines symbolic names for Mips registers. This defines a mapping from
+// register name to register number.
+#include "MipsGenRegisterNames.inc"
+
+// Defines symbolic names for the Mips instructions.
+#include "MipsGenInstrNames.inc"
+
+#endif
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
new file mode 100644
index 0000000..662bc3b
--- /dev/null
+++ b/lib/Target/Mips/Mips.td
@@ -0,0 +1,63 @@
+//===- Mips.td - Describe the Mips Target Machine ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Bruno Cardoso Lopes and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "../Target.td"
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "MipsRegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget features
+//===----------------------------------------------------------------------===//
+
+// TODO: dummy, needed to compile
+def FeatureCIX : SubtargetFeature<"r3000", "isR3000", "true",
+ "Enable r3000 extentions">;
+
+//===----------------------------------------------------------------------===//
+// Instruction Description
+//===----------------------------------------------------------------------===//
+
+include "MipsInstrInfo.td"
+
+def MipsInstrInfo : InstrInfo {
+ // Define how we want to layout our target-specific information field.
+ let TSFlagsFields = [];
+ let TSFlagsShifts = [];
+}
+//===----------------------------------------------------------------------===//
+// Calling Conventions
+//===----------------------------------------------------------------------===//
+
+include "MipsCallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Mips processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic", []>;
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def Mips : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = MipsInstrInfo;
+}
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
new file mode 100644
index 0000000..1df1291
--- /dev/null
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -0,0 +1,433 @@
+//===-- MipsAsmPrinter.cpp - Mips LLVM assembly writer --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Bruno Cardoso Lopes and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format MIPS assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-asm-printer"
+
+#include "Mips.h"
+#include "MipsInstrInfo.h"
+#include "MipsTargetMachine.h"
+#include "MipsMachineFunction.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MathExtras.h"
+#include <cctype>
+
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+namespace {
+ struct VISIBILITY_HIDDEN MipsAsmPrinter : public AsmPrinter {
+ MipsAsmPrinter(std::ostream &O, MipsTargetMachine &TM,
+ const TargetAsmInfo *T):
+ AsmPrinter(O, TM, T) {}
+
+ virtual const char *getPassName() const {
+ return "Mips Assembly Printer";
+ }
+
+ enum SetDirectiveFlags {
+ REORDER, // enables instruction reordering.
+ NOREORDER, // disables instruction reordering.
+ MACRO, // enables GAS macros.
+ NOMACRO // disables GAS macros.
+ };
+
+ void printOperand(const MachineInstr *MI, int opNum);
+ void printMemOperand(const MachineInstr *MI, int opNum,
+ const char *Modifier = 0);
+
+ void printHex32(unsigned int Value);
+ void emitFunctionStart(MachineFunction &MF);
+ void emitFunctionEnd();
+ void emitFrameDirective(MachineFunction &MF);
+ void emitMaskDirective(MachineFunction &MF);
+ void emitFMaskDirective();
+ void emitSetDirective(SetDirectiveFlags Flag);
+
+ bool printInstruction(const MachineInstr *MI); // autogenerated.
+ bool runOnMachineFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+ };
+} // end of anonymous namespace
+
+#include "MipsGenAsmWriter.inc"
+
+/// createMipsCodePrinterPass - Returns a pass that prints the MIPS
+/// assembly code for a MachineFunction to the given output stream,
+/// using the given target machine description. This should work
+/// regardless of whether the function is in SSA form.
+FunctionPass *llvm::createMipsCodePrinterPass(std::ostream &o,
+ MipsTargetMachine &tm)
+{
+ return new MipsAsmPrinter(o, tm, tm.getTargetAsmInfo());
+}
+
+/// This pattern will be emitted :
+/// .frame reg1, size, reg2
+/// It describes the stack frame.
+/// reg1 - stack pointer
+/// size - stack size allocated for the function
+/// reg2 - return address register
+void MipsAsmPrinter::
+emitFrameDirective(MachineFunction &MF)
+{
+ const MRegisterInfo &RI = *TM.getRegisterInfo();
+
+ unsigned stackReg = RI.getFrameRegister(MF);
+ unsigned returnReg = RI.getRARegister();
+ unsigned stackSize = MF.getFrameInfo()->getStackSize();
+
+
+ O << "\t.frame\t" << "$" << LowercaseString(RI.get(stackReg).Name)
+ << "," << stackSize << ","
+ << "$" << LowercaseString(RI.get(returnReg).Name)
+ << "\n";
+}
+
+/// This pattern will be emitted :
+/// .mask bitmask, offset
+/// Tells the assembler (and possibly linker) which registers are saved and where.
+/// bitmask - mask of all GPRs (little endian)
+/// offset - negative value. offset+stackSize should give where on the stack
+/// the first GPR is saved.
+/// TODO: consider calle saved GPR regs here, not hardcode register numbers.
+void MipsAsmPrinter::
+emitMaskDirective(MachineFunction &MF)
+{
+ const MRegisterInfo &RI = *TM.getRegisterInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ bool hasFP = RI.hasFP(MF);
+ bool saveRA = MF.getFrameInfo()->hasCalls();
+
+ int offset;
+
+ if (!MipsFI->getTopSavedRegOffset())
+ offset = 0;
+ else
+ offset = -(MF.getFrameInfo()->getStackSize()
+ -MipsFI->getTopSavedRegOffset());
+
+ #ifndef NDEBUG
+ DOUT << "<--ASM PRINTER--emitMaskDirective-->" << "\n";
+ DOUT << "StackSize : " << MF.getFrameInfo()->getStackSize() << "\n";
+ DOUT << "getTopSavedRegOffset() : " << MipsFI->getTopSavedRegOffset() << "\n";
+ DOUT << "offset : " << offset << "\n\n";
+ #endif
+
+ unsigned int bitmask = 0;
+
+ if (hasFP)
+ bitmask |= (1 << 30);
+
+ if (saveRA)
+ bitmask |= (1 << 31);
+
+ O << "\t.mask\t";
+ printHex32(bitmask);
+ O << "," << offset << "\n";
+}
+
+/// This pattern will be emitted :
+/// .fmask bitmask, offset
+/// Tells the assembler (and possibly linker) which float registers are saved.
+/// bitmask - mask of all Float Point registers (little endian)
+/// offset - negative value. offset+stackSize should give where on the stack
+/// the first Float Point register is saved.
+/// TODO: implement this, dummy for now
+void MipsAsmPrinter::
+emitFMaskDirective()
+{
+ O << "\t.fmask\t0x00000000,0" << "\n";
+}
+
+/// Print a 32 bit hex number filling with 0's on the left.
+/// TODO: make this setfill and setw
+void MipsAsmPrinter::
+printHex32(unsigned int Value) {
+ O << "0x" << std::hex << Value << std::dec;
+}
+
+/// Emit Set directives.
+void MipsAsmPrinter::
+emitSetDirective(SetDirectiveFlags Flag) {
+
+ O << "\t.set\t";
+ switch(Flag) {
+ case REORDER: O << "reorder" << "\n"; break;
+ case NOREORDER: O << "noreorder" << "\n"; break;
+ case MACRO: O << "macro" << "\n"; break;
+ case NOMACRO: O << "nomacro" << "\n"; break;
+ default: break;
+ }
+}
+
+/// Emit the directives used by GAS on the start of functions
+void MipsAsmPrinter::
+emitFunctionStart(MachineFunction &MF)
+{
+ // Print out the label for the function.
+ const Function *F = MF.getFunction();
+ SwitchToTextSection(getSectionForFunction(*F).c_str(), F);
+
+ // On Mips GAS, if .align #n is present, #n means the number of bits
+ // to be cleared. So, if we want 4 byte alignment, we must have .align 2
+ EmitAlignment(1, F);
+
+ O << "\t.globl\t" << CurrentFnName << "\n";
+ O << "\t.ent\t" << CurrentFnName << "\n";
+ O << "\t.type\t" << CurrentFnName << ", @function\n";
+ O << CurrentFnName << ":\n";
+
+ emitFrameDirective(MF);
+ emitMaskDirective(MF);
+ emitFMaskDirective();
+
+ emitSetDirective(NOREORDER);
+ emitSetDirective(NOMACRO);
+}
+
+/// Emit the directives used by GAS on the end of functions
+void MipsAsmPrinter::
+emitFunctionEnd() {
+ emitSetDirective(MACRO);
+ emitSetDirective(REORDER);
+ O << "\t.end\t" << CurrentFnName << "\n";
+}
+
+/// runOnMachineFunction - This uses the printMachineInstruction()
+/// method to print assembly for each instruction.
+bool MipsAsmPrinter::
+runOnMachineFunction(MachineFunction &MF)
+{
+ SetupMachineFunction(MF);
+
+ // Print out constants referenced by the function
+ EmitConstantPool(MF.getConstantPool());
+
+ O << "\n\n";
+
+ // What's my mangled name?
+ CurrentFnName = Mang->getValueName(MF.getFunction());
+
+ // Emit the function start directives
+ emitFunctionStart(MF);
+
+ // Print out code for the function.
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+
+ // Print a label for the basic block.
+ if (I != MF.begin()) {
+ printBasicBlockLabel(I, true);
+ O << '\n';
+ }
+
+ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+ II != E; ++II) {
+ // Print the assembly for the instruction.
+ O << "\t";
+ printInstruction(II);
+ ++EmittedInsts;
+ }
+ }
+
+ // Emit function end directives
+ emitFunctionEnd();
+
+ // We didn't modify anything.
+ return false;
+}
+
+void MipsAsmPrinter::
+printOperand(const MachineInstr *MI, int opNum)
+{
+ const MachineOperand &MO = MI->getOperand(opNum);
+ const MRegisterInfo &RI = *TM.getRegisterInfo();
+ bool closeP=false;
+
+ // %hi and %lo used on mips gas to break large constants
+ if (MI->getOpcode() == Mips::LUi && !MO.isRegister()
+ && !MO.isImmediate()) {
+ O << "%hi(";
+ closeP = true;
+ } else if ((MI->getOpcode() == Mips::ADDiu) && !MO.isRegister()
+ && !MO.isImmediate()) {
+ O << "%lo(";
+ closeP = true;
+ }
+
+ switch (MO.getType())
+ {
+ case MachineOperand::MO_Register:
+ if (MRegisterInfo::isPhysicalRegister(MO.getReg()))
+ O << "$" << LowercaseString (RI.get(MO.getReg()).Name);
+ else
+ O << "$" << MO.getReg();
+ break;
+
+ case MachineOperand::MO_Immediate:
+ if ((MI->getOpcode() == Mips::SLTiu) || (MI->getOpcode() == Mips::ORi) ||
+ (MI->getOpcode() == Mips::LUi) || (MI->getOpcode() == Mips::ANDi))
+ O << (unsigned short int)MO.getImmedValue();
+ else
+ O << (short int)MO.getImmedValue();
+ break;
+
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMachineBasicBlock());
+ return;
+
+ case MachineOperand::MO_GlobalAddress:
+ O << Mang->getValueName(MO.getGlobal());
+ break;
+
+ case MachineOperand::MO_ExternalSymbol:
+ O << MO.getSymbolName();
+ break;
+
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << TAI->getPrivateGlobalPrefix() << "CPI"
+ << getFunctionNumber() << "_" << MO.getConstantPoolIndex();
+ break;
+
+ default:
+ O << "<unknown operand type>"; abort (); break;
+ }
+
+ if (closeP) O << ")";
+}
+
+void MipsAsmPrinter::
+printMemOperand(const MachineInstr *MI, int opNum, const char *Modifier)
+{
+ // lw/sw $reg, MemOperand
+ // will turn into :
+ // lw/sw $reg, imm($reg)
+ printOperand(MI, opNum);
+ O << "(";
+ printOperand(MI, opNum+1);
+ O << ")";
+}
+
+bool MipsAsmPrinter::
+doInitialization(Module &M)
+{
+ Mang = new Mangler(M);
+ return false; // success
+}
+
+bool MipsAsmPrinter::
+doFinalization(Module &M)
+{
+ const TargetData *TD = TM.getTargetData();
+
+ // Print out module-level global variables here.
+ for (Module::const_global_iterator I = M.global_begin(),
+ E = M.global_end(); I != E; ++I)
+
+ // External global require no code
+ if (I->hasInitializer()) {
+
+ // Check to see if this is a special global
+ // used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(I))
+ continue;
+
+ O << "\n\n";
+ std::string name = Mang->getValueName(I);
+ Constant *C = I->getInitializer();
+ unsigned Size = TD->getTypeSize(C->getType());
+ unsigned Align = TD->getPrefTypeAlignment(C->getType());
+
+ if (C->isNullValue() && (I->hasLinkOnceLinkage() ||
+ I->hasInternalLinkage() || I->hasWeakLinkage()
+ /* FIXME: Verify correct */)) {
+
+ SwitchToDataSection(".data", I);
+ if (I->hasInternalLinkage())
+ O << "\t.local " << name << "\n";
+
+ O << "\t.comm " << name << ","
+ << TD->getTypeSize(C->getType())
+ << "," << Align << "\n";
+
+ } else {
+
+ switch (I->getLinkage())
+ {
+ case GlobalValue::LinkOnceLinkage:
+ case GlobalValue::WeakLinkage:
+ // FIXME: Verify correct for weak.
+ // Nonnull linkonce -> weak
+ O << "\t.weak " << name << "\n";
+ SwitchToDataSection("", I);
+ O << "\t.section\t\".llvm.linkonce.d." << name
+ << "\",\"aw\",@progbits\n";
+ break;
+ case GlobalValue::AppendingLinkage:
+ // FIXME: appending linkage variables
+ // should go into a section of their name or
+ // something. For now, just emit them as external.
+ case GlobalValue::ExternalLinkage:
+ // If external or appending, declare as a global symbol
+ O << "\t.globl " << name << "\n";
+ case GlobalValue::InternalLinkage:
+ if (C->isNullValue())
+ SwitchToDataSection(".bss", I);
+ else
+ SwitchToDataSection(".data", I);
+ break;
+ case GlobalValue::GhostLinkage:
+ cerr << "Should not have any"
+ << "unmaterialized functions!\n";
+ abort();
+ case GlobalValue::DLLImportLinkage:
+ cerr << "DLLImport linkage is"
+ << "not supported by this target!\n";
+ abort();
+ case GlobalValue::DLLExportLinkage:
+ cerr << "DLLExport linkage is"
+ << "not supported by this target!\n";
+ abort();
+ default:
+ assert(0 && "Unknown linkage type!");
+ }
+ O << "\t.align " << Align << "\n";
+ O << "\t.type " << name << ",@object\n";
+ O << "\t.size " << name << "," << Size << "\n";
+ O << name << ":\n";
+ EmitGlobalConstant(C);
+ }
+ }
+
+ AsmPrinter::doFinalization(M);
+ return false; // success
+}
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
new file mode 100644
index 0000000..23ef850
--- /dev/null
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -0,0 +1,39 @@
+//===- MipsCallingConv.td - Calling Conventions for Mips --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Bruno Cardoso Lopes and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for Mips architecture.
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>:
+ CCIf<!strconcat("State.getTarget().getSubtarget<MipsSubtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// Mips Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+def RetCC_Mips : CallingConv<[
+ // i32 are returned in registers V0, V1
+ CCIfType<[i32], CCAssignToReg<[V0, V1]>>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// Mips Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+def CC_Mips : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The first 4 integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToReg<[A0, A1, A2, A3]>>,
+
+ // Integer values get stored in stack slots that are 4 bytes in
+ // size and 4-byte aligned.
+ CCIfType<[i32], CCAssignToStack<4, 4>>
+]>;
+
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
new file mode 100644
index 0000000..d6e3830
--- /dev/null
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -0,0 +1,272 @@
+//===-- MipsISelDAGToDAG.cpp - A dag to dag inst selector for Mips --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Bruno Cardoso Lopes is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the MIPS target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-isel"
+
+#include "Mips.h"
+#include "MipsISelLowering.h"
+#include "MipsRegisterInfo.h"
+#include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include <queue>
+#include <set>
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MipsDAGToDAGISel - MIPS specific code to select MIPS machine
+// instructions for SelectionDAG operations.
+//===----------------------------------------------------------------------===//
+namespace {
+
+class VISIBILITY_HIDDEN MipsDAGToDAGISel : public SelectionDAGISel {
+
+ /// TM - Keep a reference to MipsTargetMachine.
+ MipsTargetMachine &TM;
+
+ /// MipsLowering - This object fully describes how to lower LLVM code to an
+ /// Mips-specific SelectionDAG.
+ MipsTargetLowering MipsLowering;
+
+ /// Subtarget - Keep a pointer to the MipsSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ //TODO: add initialization on constructor
+ //const MipsSubtarget *Subtarget;
+
+public:
+ MipsDAGToDAGISel(MipsTargetMachine &tm) :
+ SelectionDAGISel(MipsLowering),
+ TM(tm), MipsLowering(*TM.getTargetLowering()) {}
+
+ virtual void InstructionSelectBasicBlock(SelectionDAG &SD);
+
+ // Pass Name
+ virtual const char *getPassName() const {
+ return "MIPS DAG->DAG Pattern Instruction Selection";
+ }
+
+
+private:
+ // Include the pieces autogenerated from the target description.
+ #include "MipsGenDAGISel.inc"
+
+ SDNode *Select(SDOperand N);
+
+ // Complex Pattern.
+ bool SelectAddr(SDOperand Op, SDOperand N,
+ SDOperand &Base, SDOperand &Offset);
+
+
+ // getI32Imm - Return a target constant with the specified
+ // value, of type i32.
+ inline SDOperand getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+
+ #ifndef NDEBUG
+ unsigned Indent;
+ #endif
+};
+
+}
+
+/// InstructionSelectBasicBlock - This callback is invoked by
+/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+void MipsDAGToDAGISel::
+InstructionSelectBasicBlock(SelectionDAG &SD)
+{
+ DEBUG(BB->dump());
+ // Codegen the basic block.
+ #ifndef NDEBUG
+ DOUT << "===== Instruction selection begins:\n";
+ Indent = 0;
+ #endif
+
+ // Select target instructions for the DAG.
+ SD.setRoot(SelectRoot(SD.getRoot()));
+
+ #ifndef NDEBUG
+ DOUT << "===== Instruction selection ends:\n";
+ #endif
+
+ SD.RemoveDeadNodes();
+
+ // Emit machine code to BB.
+ ScheduleAndEmitDAG(SD);
+}
+
+/// ComplexPattern used on MipsInstrInfo
+/// Used on Mips Load/Store instructions
+bool MipsDAGToDAGISel::
+SelectAddr(SDOperand Op, SDOperand Addr, SDOperand &Offset, SDOperand &Base)
+{
+ // if Address is FI, get the TargetFrameIndex.
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ // TargetExternalSymbol and TargetGlobalAddress are
+ // lowered and their addresses go into registers, so
+ // they should not be touched here.
+ if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress))
+ return false;
+
+ // Operand is an result from an ADD.
+ if (Addr.getOpcode() == ISD::ADD)
+ {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+ {
+ if (Predicate_immSExt16(CN))
+ {
+ // If the first operand is a FI, get the TargetFI Node
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
+ (Addr.getOperand(0))) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ } else {
+ Base = Addr.getOperand(0);
+ }
+
+ Offset = CurDAG->getTargetConstant(CN->getValue(), MVT::i32);
+ return true;
+ }
+ }
+ }
+
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+/// Select instructions not customized! Used for
+/// expanded, promoted and normal instructions
+SDNode* MipsDAGToDAGISel::
+Select(SDOperand N)
+{
+ SDNode *Node = N.Val;
+ unsigned Opcode = Node->getOpcode();
+
+ // Dump information about the Node being selected
+ #ifndef NDEBUG
+ DOUT << std::string(Indent, ' ') << "Selecting: ";
+ DEBUG(Node->dump(CurDAG));
+ DOUT << "\n";
+ Indent += 2;
+ #endif
+
+ // If we have a custom node, we already have selected!
+ if (Opcode >= ISD::BUILTIN_OP_END && Opcode < MipsISD::FIRST_NUMBER) {
+ #ifndef NDEBUG
+ DOUT << std::string(Indent-2, ' ') << "== ";
+ DEBUG(Node->dump(CurDAG));
+ DOUT << "\n";
+ Indent -= 2;
+ #endif
+ return NULL;
+ }
+
+ ///
+ // Instruction Selection not handled by custom or by the
+ // auto-generated tablegen selection should be handled here
+ ///
+ switch(Opcode) {
+
+ default: break;
+
+ /// Special Mul operations
+ case ISD::MULHS:
+ case ISD::MULHU: {
+ SDOperand MulOp1 = Node->getOperand(0);
+ SDOperand MulOp2 = Node->getOperand(1);
+ AddToISelQueue(MulOp1);
+ AddToISelQueue(MulOp2);
+
+ unsigned MulOp = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT);
+ SDNode *MulNode = CurDAG->getTargetNode(MulOp, MVT::Flag, MulOp1, MulOp2);
+
+ SDOperand MFInFlag = SDOperand(MulNode, 0);
+ return CurDAG->getTargetNode(Mips::MFHI, MVT::i32, MFInFlag);
+ }
+
+ /// Div operations
+ case ISD::SDIV:
+ case ISD::UDIV: {
+ SDOperand DivOp1 = Node->getOperand(0);
+ SDOperand DivOp2 = Node->getOperand(1);
+ AddToISelQueue(DivOp1);
+ AddToISelQueue(DivOp2);
+
+ unsigned DivOp = (Opcode == ISD::SDIV ? Mips::DIV : Mips::DIVu);
+ SDNode *DivNode = CurDAG->getTargetNode(DivOp, MVT::Flag, DivOp1, DivOp2);
+
+ SDOperand MFInFlag = SDOperand(DivNode, 0);
+ return CurDAG->getTargetNode(Mips::MFLO, MVT::i32, MFInFlag);
+ }
+
+ /// Rem operations
+ case ISD::SREM:
+ case ISD::UREM: {
+ SDOperand RemOp1 = Node->getOperand(0);
+ SDOperand RemOp2 = Node->getOperand(1);
+ AddToISelQueue(RemOp1);
+ AddToISelQueue(RemOp2);
+
+ unsigned RemOp = (Opcode == ISD::SREM ? Mips::DIV : Mips::DIVu);
+ SDNode *RemNode = CurDAG->getTargetNode(RemOp, MVT::Flag, RemOp1, RemOp2);
+
+ SDOperand MFInFlag = SDOperand(RemNode, 0);
+ return CurDAG->getTargetNode(Mips::MFHI, MVT::i32, MFInFlag);
+ }
+ }
+
+ // Select the default instruction
+ SDNode *ResNode = SelectCode(N);
+
+ #ifndef NDEBUG
+ DOUT << std::string(Indent-2, ' ') << "=> ";
+ if (ResNode == NULL || ResNode == N.Val)
+ DEBUG(N.Val->dump(CurDAG));
+ else
+ DEBUG(ResNode->dump(CurDAG));
+ DOUT << "\n";
+ Indent -= 2;
+ #endif
+
+ return ResNode;
+}
+
+/// createMipsISelDag - This pass converts a legalized DAG into a
+/// MIPS-specific DAG, ready for instruction scheduling.
+FunctionPass *llvm::createMipsISelDag(MipsTargetMachine &TM) {
+ return new MipsDAGToDAGISel(TM);
+}
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
new file mode 100644
index 0000000..790cdaf
--- /dev/null
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -0,0 +1,557 @@
+//===-- MipsISelLowering.cpp - Mips DAG Lowering Implementation -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Bruno Cardoso Lopes and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Mips uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-lower"
+
+#include "MipsISelLowering.h"
+#include "MipsTargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Debug.h"
+#include <queue>
+#include <set>
+
+using namespace llvm;
+
+const char *MipsTargetLowering::
+getTargetNodeName(unsigned Opcode) const
+{
+ switch (Opcode)
+ {
+ case MipsISD::JmpLink : return "MipsISD::JmpLink";
+ case MipsISD::Hi : return "MipsISD::Hi";
+ case MipsISD::Lo : return "MipsISD::Lo";
+ case MipsISD::Ret : return "MipsISD::Ret";
+ default : return NULL;
+ }
+}
+
+MipsTargetLowering::
+MipsTargetLowering(MipsTargetMachine &TM): TargetLowering(TM)
+{
+ // Mips does not have i1 type, so use i32 for
+ // setcc operations results (slt, sgt, ...).
+ setSetCCResultType(MVT::i32);
+ setSetCCResultContents(ZeroOrOneSetCCResult);
+
+ // Set up the register classes
+ addRegisterClass(MVT::i32, Mips::CPURegsRegisterClass);
+
+ // Custom
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::RET, MVT::Other, Custom);
+
+ // Load extented operations for i1 types must be promoted
+ setLoadXAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+ // Store operations for i1 types must be promoted
+ setStoreXAction(MVT::i1, Promote);
+
+ // Mips does not have these NodeTypes below.
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+ setOperationAction(ISD::SELECT, MVT::i32, Expand);
+
+ // Mips not supported intrinsics.
+ setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
+ setOperationAction(ISD::MEMSET, MVT::Other, Expand);
+ setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
+
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ , MVT::i32, Expand);
+ setOperationAction(ISD::ROTL , MVT::i32, Expand);
+ setOperationAction(ISD::ROTR , MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+
+ // We don't have line number support yet.
+ setOperationAction(ISD::LOCATION, MVT::Other, Expand);
+ setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
+ setOperationAction(ISD::LABEL, MVT::Other, Expand);
+
+ // Use the default for now
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+
+ setStackPointerRegisterToSaveRestore(Mips::SP);
+ computeRegisterProperties();
+}
+
+
+SDOperand MipsTargetLowering::
+LowerOperation(SDOperand Op, SelectionDAG &DAG)
+{
+ switch (Op.getOpcode())
+ {
+ case ISD::CALL: return LowerCALL(Op, DAG);
+ case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG);
+ case ISD::RET: return LowerRET(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ }
+ return SDOperand();
+}
+
+//===----------------------------------------------------------------------===//
+// Lower helper functions
+//===----------------------------------------------------------------------===//
+
+// AddLiveIn - This helper function adds the specified physical register to the
+// MachineFunction as a live in value. It also creates a corresponding
+// virtual register for it.
+static unsigned
+AddLiveIn(MachineFunction &MF, unsigned PReg, TargetRegisterClass *RC)
+{
+ assert(RC->contains(PReg) && "Not the correct regclass!");
+ unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC);
+ MF.addLiveIn(PReg, VReg);
+ return VReg;
+}
+
+// Set up a frame object for the return address.
+//SDOperand MipsTargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
+// if (ReturnAddrIndex == 0) {
+// MachineFunction &MF = DAG.getMachineFunction();
+// ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, 0);
+// }
+//
+// return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
+//}
+
+
+//===----------------------------------------------------------------------===//
+// Misc Lower Operation implementation
+//===----------------------------------------------------------------------===//
+SDOperand MipsTargetLowering::
+LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG)
+{
+ GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+
+ SDOperand GA = DAG.getTargetGlobalAddress(GV, MVT::i32);
+ SDOperand Hi = DAG.getNode(MipsISD::Hi, MVT::i32, GA);
+ SDOperand Lo = DAG.getNode(MipsISD::Lo, MVT::i32, GA);
+
+ return DAG.getNode(ISD::ADD, MVT::i32, Lo, Hi);
+}
+
+SDOperand MipsTargetLowering::
+LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG)
+{
+ assert(0 && "TLS not implemented for MIPS.");
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//
+// The lower operations present on calling convention works on this order:
+// LowerCALL (virt regs --> phys regs, virt regs --> stack)
+// LowerFORMAL_ARGUMENTS (phys --> virt regs, stack --> virt regs)
+// LowerRET (virt regs --> phys regs)
+// LowerCALL (phys regs --> virt regs)
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// CALL Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// Mips custom CALL implementation
+SDOperand MipsTargetLowering::
+LowerCALL(SDOperand Op, SelectionDAG &DAG)
+{
+ unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue();
+
+ // By now, only CallingConv::C implemented
+ switch (CallingConv)
+ {
+ default:
+ assert(0 && "Unsupported calling convention");
+ case CallingConv::Fast:
+ case CallingConv::C:
+ return LowerCCCCallTo(Op, DAG, CallingConv);
+ }
+}
+
+/// LowerCCCCallTo - functions arguments are copied from virtual
+/// regs to (physical regs)/(stack frame), CALLSEQ_START and
+/// CALLSEQ_END are emitted.
+/// TODO: isVarArg, isTailCall, sret, GOT, linkage types.
+SDOperand MipsTargetLowering::
+LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG, unsigned CC)
+{
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF);
+
+ SDOperand Chain = Op.getOperand(0);
+ SDOperand Callee = Op.getOperand(4);
+ bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
+
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+
+ // To meet ABI, Mips must always allocate 16 bytes on
+ // the stack (even if less than 4 are used as arguments)
+ int VTsize = MVT::getSizeInBits(MVT::i32)/8;
+ MFI->CreateFixedObject(VTsize, -(VTsize*3));
+
+ CCInfo.AnalyzeCallOperands(Op.Val, CC_Mips);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+ Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes,
+ getPointerTy()));
+
+ SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
+ SmallVector<SDOperand, 8> MemOpChains;
+
+ SDOperand StackPtr;
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ // Arguments start after the 5 first operands of ISD::CALL
+ SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
+ break;
+ }
+
+ // Arguments that can be passed on register,
+ // must be kept at RegsToPass vector
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+
+ assert(VA.isMemLoc());
+
+ if (StackPtr.Val == 0)
+ StackPtr = DAG.getRegister(StackReg, getPointerTy());
+
+ // Create the frame index object for this incoming parameter
+ // This guarantees that when allocating Local Area our room
+ // will not be overwritten.
+ int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8,
+ -(16 + VA.getLocMemOffset()) );
+
+ SDOperand PtrOff = DAG.getFrameIndex(FI,getPointerTy());
+
+ // emit ISD::STORE whichs stores the
+ // parameter value to a stack Location
+ MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
+ }
+ }
+
+ // Transform all store nodes into one single node because
+ // all store nodes are independent of each other.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token
+ // chain and flag operands which copy the outgoing args into registers.
+ // The InFlag in necessary since all emited instructions must be
+ // stuck together.
+ SDOperand InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress node (quite common, every direct
+ // call is) turn it into a TargetGlobalAddress node so that legalize
+ // doesn't hack it.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
+ } else
+ if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
+
+ // MipsJmpLink = #chain, #target_address, #opt_in_flags...
+ // = Chain, Callee, Reg#1, Reg#2, ...
+ //
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SmallVector<SDOperand, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.Val)
+ Ops.push_back(InFlag);
+
+ Chain = DAG.getNode(MipsISD::JmpLink, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ Ops.clear();
+ Ops.push_back(Chain);
+ Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
+ Ops.push_back(InFlag);
+ Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
+}
+
+/// LowerCallResult - Lower the result values of an ISD::CALL into the
+/// appropriate copies out of appropriate physical registers. This assumes that
+/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
+/// being lowered. Returns a SDNode with the same number of values as the
+/// ISD::CALL.
+SDNode *MipsTargetLowering::
+LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall,
+ unsigned CallingConv, SelectionDAG &DAG) {
+
+ bool isVarArg = cast<ConstantSDNode>(TheCall->getOperand(2))->getValue() != 0;
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
+
+ CCInfo.AnalyzeCallResult(TheCall, RetCC_Mips);
+ SmallVector<SDOperand, 8> ResultVals;
+
+ // Returns void
+ if (!RVLocs.size())
+ return Chain.Val;
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(),
+ RVLocs[i].getValVT(), InFlag).getValue(1);
+ InFlag = Chain.getValue(2);
+ ResultVals.push_back(Chain.getValue(0));
+ }
+
+ // Merge everything together with a MERGE_VALUES node.
+ ResultVals.push_back(Chain);
+ return DAG.getNode(ISD::MERGE_VALUES, TheCall->getVTList(),
+ &ResultVals[0], ResultVals.size()).Val;
+}
+
+//===----------------------------------------------------------------------===//
+// FORMAL_ARGUMENTS Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// Mips custom FORMAL_ARGUMENTS implementation
+SDOperand MipsTargetLowering::
+LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG)
+{
+ unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
+ switch(CC)
+ {
+ default:
+ assert(0 && "Unsupported calling convention");
+ case CallingConv::C:
+ return LowerCCCArguments(Op, DAG);
+ }
+}
+
+/// LowerCCCArguments - transform physical registers into
+/// virtual registers and generate load operations for
+/// arguments places on the stack.
+/// TODO: isVarArg, sret
+SDOperand MipsTargetLowering::
+LowerCCCArguments(SDOperand Op, SelectionDAG &DAG)
+{
+ SDOperand Root = Op.getOperand(0);
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
+ unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
+
+ unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF);
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+
+ CCInfo.AnalyzeFormalArguments(Op.Val, CC_Mips);
+ SmallVector<SDOperand, 8> ArgValues;
+ SDOperand StackPtr;
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+
+ CCValAssign &VA = ArgLocs[i];
+
+ // Arguments stored on registers
+ if (VA.isRegLoc()) {
+ MVT::ValueType RegVT = VA.getLocVT();
+ TargetRegisterClass *RC;
+
+ if (RegVT == MVT::i32)
+ RC = Mips::CPURegsRegisterClass;
+ else
+ assert(0 && "support only Mips::CPURegsRegisterClass");
+
+
+ // Transform the arguments stored on
+ // physical registers into virtual ones
+ unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
+ SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT);
+
+ // If this is an 8 or 16-bit value, it is really passed promoted
+ // to 32 bits. Insert an assert[sz]ext to capture this, then
+ // truncate to the right size.
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+
+ if (VA.getLocInfo() != CCValAssign::Full)
+ ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
+
+ ArgValues.push_back(ArgValue);
+
+ // To meet ABI, when VARARGS are passed on registers, the registers
+ // containt must be written to the their always reserved home location
+ // on the stack.
+ if (isVarArg) {
+
+ if (StackPtr.Val == 0)
+ StackPtr = DAG.getRegister(StackReg, getPointerTy());
+
+ // Create the frame index object for this incoming parameter
+ // The first 16 bytes are reserved.
+ int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8,
+ i*4);
+ SDOperand PtrOff = DAG.getFrameIndex(FI, getPointerTy());
+
+ // emit ISD::STORE whichs stores the
+ // parameter value to a stack Location
+ ArgValues.push_back(DAG.getStore(Root, ArgValue, PtrOff, NULL, 0));
+ }
+
+ } else {
+ // sanity check
+ assert(VA.isMemLoc());
+
+ // Create the frame index object for this incoming parameter...
+ int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8,
+ (16 + VA.getLocMemOffset()));
+
+ // Create load nodes to retrieve arguments from the stack
+ SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
+ ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0));
+ }
+ }
+ ArgValues.push_back(Root);
+
+ // Return the new list of results.
+ return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
+ &ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
+}
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+SDOperand MipsTargetLowering::
+LowerRET(SDOperand Op, SelectionDAG &DAG)
+{
+ // CCValAssign - represent the assignment of
+ // the return value to a location
+ SmallVector<CCValAssign, 16> RVLocs;
+ unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
+ bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs);
+
+ // Analize return values of ISD::RET
+ CCInfo.AnalyzeReturn(Op.Val, RetCC_Mips);
+
+ // If this is the first return lowered for this function, add
+ // the regs to the liveout set for the function.
+ if (DAG.getMachineFunction().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ // The chain is always operand #0
+ SDOperand Chain = Op.getOperand(0);
+ SDOperand Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ // ISD::RET => ret chain, (regnum1,val1), ...
+ // So i*2+1 index only the regnums
+ Chain = DAG.getCopyToReg(Chain, VA.getLocReg(),
+ Op.getOperand(i*2+1), Flag);
+
+ // guarantee that all emitted copies are
+ // stuck together, avoiding something bad
+ Flag = Chain.getValue(1);
+ }
+
+ // Return on Mips is always a "jr $ra"
+ if (Flag.Val)
+ return DAG.getNode(MipsISD::Ret, MVT::Other,
+ Chain, DAG.getRegister(Mips::RA, MVT::i32), Flag);
+ else // Return Void
+ return DAG.getNode(MipsISD::Ret, MVT::Other,
+ Chain, DAG.getRegister(Mips::RA, MVT::i32));
+}
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
new file mode 100644
index 0000000..0199175
--- /dev/null
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -0,0 +1,84 @@
+//===-- MipsISelLowering.h - Mips DAG Lowering Interface --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Bruno Cardoso Lopes and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Mips uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MipsISELLOWERING_H
+#define MipsISELLOWERING_H
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+#include "Mips.h"
+#include "MipsSubtarget.h"
+
+namespace llvm {
+ namespace MipsISD {
+ enum NodeType {
+ // Start the numbering from where ISD NodeType finishes.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END+Mips::INSTRUCTION_LIST_END,
+
+ // Jump and link (call)
+ JmpLink,
+
+ // Get the Higher 16 bits from a 32-bit immediate
+ // No relation with Mips Hi register
+ Hi,
+
+ // Get the Lower 16 bits from a 32-bit immediate
+ // No relation with Mips Lo register
+ Lo,
+
+ // Return
+ Ret
+ };
+ }
+
+ //===--------------------------------------------------------------------===//
+ // TargetLowering Implementation
+ //===--------------------------------------------------------------------===//
+ class MipsTargetLowering : public TargetLowering
+ {
+ // FrameIndex for return slot.
+ int ReturnAddrIndex;
+
+ // const MipsSubtarget &MipsSubTarget;
+ public:
+
+ MipsTargetLowering(MipsTargetMachine &TM);
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ virtual SDOperand LowerOperation(SDOperand Op, SelectionDAG &DAG);
+
+ /// getTargetNodeName - This method returns the name of a target specific
+ // DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ private:
+ // Lower Operand helpers
+ SDOperand LowerCCCArguments(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG, unsigned CC);
+ SDNode *LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode*TheCall,
+ unsigned CallingConv, SelectionDAG &DAG);
+ SDOperand getReturnAddressFrameIndex(SelectionDAG &DAG);
+
+ // Lower Operand specifics
+ SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG);
+
+ };
+}
+
+#endif // MipsISELLOWERING_H
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
new file mode 100644
index 0000000..b88fa90
--- /dev/null
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -0,0 +1,96 @@
+//===- MipsRegisterInfo.td - Mips Register defs -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Bruno Cardoso Lopes and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Describe MIPS instructions format
+//
+// All the possible Mips fields are:
+//
+// opcode - operation code.
+// rs - src reg.
+// rt - dst reg (on a 2 regs instr) or src reg (on a 3 reg instr).
+// rd - dst reg, only used on 3 regs instr.
+// shamt - only used on shift instructions, contains the shift amount.
+// funct - combined with opcode field give us an operation code.
+//
+//===----------------------------------------------------------------------===//
+
+// Generic Mips Format
+class MipsInst<dag ops, string asmstr, list<dag> pattern>:
+ Instruction
+{
+ field bits<32> Inst;
+
+ let Namespace = "Mips";
+
+ bits<6> opcode;
+
+ // Top 5 bits are the 'opcode' field
+ let Inst{31-26} = opcode;
+
+ dag OperandList = ops;
+ let AsmString = asmstr;
+ let Pattern = pattern;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Format R instruction class in Mips : <|opcode|rs|rt|rd|shamt|funct|>
+//===----------------------------------------------------------------------===//
+
+class FR<bits<6> op, bits<6> _funct, dag ops, string asmstr, list<dag> pattern>:
+ MipsInst<ops, asmstr, pattern>
+{
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+ bits<5> shamt;
+ bits<6> funct;
+
+ let opcode = op;
+ let funct = _funct;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = shamt;
+ let Inst{5-0} = funct;
+}
+
+//===----------------------------------------------------------------------===//
+// Format I instruction class in Mips : <|opcode|rs|rt|immediate|>
+//===----------------------------------------------------------------------===//
+
+class FI<bits<6> op, dag ops, string asmstr, list<dag> pattern>:
+ MipsInst<ops, asmstr, pattern>
+{
+ bits<5> rt;
+ bits<5> rs;
+ bits<16> imm16;
+
+ let opcode = op;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-0} = imm16;
+}
+
+//===----------------------------------------------------------------------===//
+// Format J instruction class in Mips : <|opcode|address|>
+//===----------------------------------------------------------------------===//
+
+class FJ<bits<6> op, dag ops, string asmstr, list<dag> pattern>:
+ MipsInst<ops, asmstr, pattern>
+{
+ bits<26> addr;
+
+ let opcode = op;
+
+ let Inst{25-0} = addr;
+}
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
new file mode 100644
index 0000000..8084030
--- /dev/null
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -0,0 +1,114 @@
+//===- MipsInstrInfo.cpp - Mips Instruction Information ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Bruno Cardoso Lopes and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mips.h"
+#include "MipsInstrInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "MipsGenInstrInfo.inc"
+
+using namespace llvm;
+
+// TODO: Add the subtarget support on this constructor
+MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm)
+ : TargetInstrInfo(MipsInsts, sizeof(MipsInsts)/sizeof(MipsInsts[0])),
+ TM(tm), RI(*this) {}
+
+static bool isZeroImm(const MachineOperand &op) {
+ return op.isImmediate() && op.getImmedValue() == 0;
+}
+
+/// Return true if the instruction is a register to register move and
+/// leave the source and dest operands in the passed parameters.
+bool MipsInstrInfo::
+isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg) const
+{
+ // addu $dst, $src, $zero || addu $dst, $zero, $src
+ // or $dst, $src, $zero || or $dst, $zero, $src
+ if ((MI.getOpcode() == Mips::ADDu) || (MI.getOpcode() == Mips::OR))
+ {
+ if (MI.getOperand(1).getReg() == Mips::ZERO) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(2).getReg();
+ return true;
+ } else if (MI.getOperand(2).getReg() == Mips::ZERO) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ return true;
+ }
+ }
+
+ // addiu $dst, $src, 0
+ if (MI.getOpcode() == Mips::ADDiu)
+ {
+ if ((MI.getOperand(1).isRegister()) && (isZeroImm(MI.getOperand(2)))) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ return true;
+ }
+ }
+ return false;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned MipsInstrInfo::
+isLoadFromStackSlot(MachineInstr *MI, int &FrameIndex) const
+{
+ // TODO: add lhu, lbu ???
+ if (MI->getOpcode() == Mips::LW)
+ {
+ if ((MI->getOperand(2).isFrameIndex()) && // is a stack slot
+ (MI->getOperand(1).isImmediate()) && // the imm is zero
+ (isZeroImm(MI->getOperand(1))))
+ {
+ FrameIndex = MI->getOperand(2).getFrameIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+
+ return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned MipsInstrInfo::
+isStoreToStackSlot(MachineInstr *MI, int &FrameIndex) const
+{
+ // TODO: add sb, sh ???
+ if (MI->getOpcode() == Mips::SW) {
+ if ((MI->getOperand(0).isFrameIndex()) && // is a stack slot
+ (MI->getOperand(1).isImmediate()) && // the imm is zero
+ (isZeroImm(MI->getOperand(1))))
+ {
+ FrameIndex = MI->getOperand(0).getFrameIndex();
+ return MI->getOperand(2).getReg();
+ }
+ }
+ return 0;
+}
+
+unsigned MipsInstrInfo::
+InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB, const std::vector<MachineOperand> &Cond)
+ const
+{
+ // TODO: add Mips::J here.
+ assert(0 && "Cant handle any kind of branches!");
+ return 1;
+}
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
new file mode 100644
index 0000000..356cf3d
--- /dev/null
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -0,0 +1,63 @@
+//===- MipsInstrInfo.h - Mips Instruction Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Bruno Cardoso Lopes and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSINSTRUCTIONINFO_H
+#define MIPSINSTRUCTIONINFO_H
+
+#include "Mips.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "MipsRegisterInfo.h"
+
+namespace llvm {
+
+class MipsInstrInfo : public TargetInstrInfo
+{
+ MipsTargetMachine &TM;
+ const MipsRegisterInfo RI;
+public:
+ MipsInstrInfo(MipsTargetMachine &TM);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const MRegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// Return true if the instruction is a register to register move and
+ /// leave the source and dest operands in the passed parameters.
+ ///
+ virtual bool isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg) const;
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(MachineInstr *MI, int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(MachineInstr *MI, int &FrameIndex) const;
+
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const std::vector<MachineOperand> &Cond) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
new file mode 100644
index 0000000..1f5d152
--- /dev/null
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -0,0 +1,469 @@
+//===- MipsInstrInfo.td - Mips Register defs --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Bruno Cardoso Lopes and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+include "MipsInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Mips profiles and nodes
+//===----------------------------------------------------------------------===//
+
+// Call
+def SDT_MipsJmpLink : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
+def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink, [SDNPHasChain,
+ SDNPOutFlag]>;
+
+// Hi and Lo nodes are created to let easy manipulation of 16-bit when
+// handling 32-bit immediates. They are used on MipsISelLowering to
+// lower stuff like GlobalAddress, ExternalSymbol, ...
+// This two nodes have nothing to do with Mips Registers Hi and Lo.
+def MipsHi : SDNode<"MipsISD::Hi", SDTIntUnaryOp>;
+def MipsLo : SDNode<"MipsISD::Lo", SDTIntUnaryOp>;
+
+// Return
+def SDT_MipsRet : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def MipsRet : SDNode<"MipsISD::Ret", SDT_MipsRet, [SDNPHasChain,
+ SDNPOptInFlag]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def SDT_MipsCallSeq : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MipsCallSeq,
+ [SDNPHasChain, SDNPOutFlag]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MipsCallSeq,
+ [SDNPHasChain, SDNPOutFlag]>;
+
+// Instruction operand types
+def brtarget : Operand<OtherVT>;
+def calltarget : Operand<i32>;
+def uimm16 : Operand<i32>;
+def simm16 : Operand<i32>;
+def shamt : Operand<i32>;
+
+// Address operand
+def mem : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops simm16, CPURegs);
+}
+
+//===----------------------------------------------------------------------===//
+// Mips Patterns and Transformations
+//===----------------------------------------------------------------------===//
+
+// Transformation Function - get the lower 16 bits.
+def LO16 : SDNodeXForm<imm, [{
+ return getI32Imm((unsigned)N->getValue() & 0xFFFF);
+}]>;
+
+// Transformation Function - get the higher 16 bits.
+def HI16 : SDNodeXForm<imm, [{
+ return getI32Imm((unsigned)N->getValue() >> 16);
+}]>;
+
+// Node immediate fits as 16-bit sign extended on target immediate.
+// e.g. addi, andi
+def immSExt16 : PatLeaf<(imm), [{
+ if (N->getValueType(0) == MVT::i32)
+ return (int32_t)N->getValue() == (short)N->getValue();
+ else
+ return (int64_t)N->getValue() == (short)N->getValue();
+}]>;
+
+// Node immediate fits as 16-bit zero extended on target immediate.
+// The LO16 param means that only the lower 16 bits of the node
+// immediate are caught.
+// e.g. addiu, sltiu
+def immZExt16 : PatLeaf<(imm), [{
+ if (N->getValueType(0) == MVT::i32)
+ return (uint32_t)N->getValue() == (unsigned short)N->getValue();
+ else
+ return (uint64_t)N->getValue() == (unsigned short)N->getValue();
+}], LO16>;
+
+// shamt field must fit in 5 bits.
+def immZExt5 : PatLeaf<(imm), [{
+ return N->getValue() == ((N->getValue()) & 0x1f) ;
+}]>;
+
+// Mips Address Mode! SDNode frameindex could possibily be a match
+// since load and store instructions from stack used it.
+def addr : ComplexPattern<i32, 2, "SelectAddr", [frameindex], []>;
+
+//===----------------------------------------------------------------------===//
+// Instructions specific format
+//===----------------------------------------------------------------------===//
+
+// Arithmetic 3 register operands
+let isCommutable = 1 in
+class ArithR< bits<6> op, bits<6> func, string instr_asm, SDNode OpNode>:
+ FR< op,
+ func,
+ (ops CPURegs:$dst, CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))] >;
+
+let isCommutable = 1 in
+class ArithOverflowR< bits<6> op, bits<6> func, string instr_asm>:
+ FR< op,
+ func,
+ (ops CPURegs:$dst, CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ []>;
+
+// Arithmetic 2 register operands
+let isCommutable = 1 in
+class ArithI<bits<6> op, string instr_asm, SDNode OpNode,
+ Operand Od, PatLeaf imm_type> :
+ FI< op,
+ (ops CPURegs:$dst, CPURegs:$b, Od:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, imm_type:$c))] >;
+
+// Arithmetic Multiply ADD/SUB
+let rd=0 in
+class MArithR<bits<6> func, string instr_asm> :
+ FR< 0x1c,
+ func,
+ (ops CPURegs:$rs, CPURegs:$rt),
+ !strconcat(instr_asm, " $rs, $rt"),
+ []>;
+
+// Logical
+class LogicR<bits<6> func, string instr_asm, SDNode OpNode>:
+ FR< 0x00,
+ func,
+ (ops CPURegs:$dst, CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))] >;
+
+class LogicI<bits<6> op, string instr_asm, SDNode OpNode>:
+ FI< op,
+ (ops CPURegs:$dst, CPURegs:$b, uimm16:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, immSExt16:$c))]>;
+
+class LogicNOR<bits<6> op, bits<6> func, string instr_asm>:
+ FR< op,
+ func,
+ (ops CPURegs:$dst, CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set CPURegs:$dst, (not (or CPURegs:$b, CPURegs:$c)))] >;
+
+// Shifts
+let rt = 0 in
+class LogicR_shift_imm<bits<6> func, string instr_asm, SDNode OpNode>:
+ FR< 0x00,
+ func,
+ (ops CPURegs:$dst, CPURegs:$b, shamt:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt5:$c))] >;
+
+class LogicR_shift_reg<bits<6> func, string instr_asm, SDNode OpNode>:
+ FR< 0x00,
+ func,
+ (ops CPURegs:$dst, CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))] >;
+
+// Load Upper Imediate
+class LoadUpper<bits<6> op, string instr_asm>:
+ FI< op,
+ (ops CPURegs:$dst, uimm16:$imm),
+ !strconcat(instr_asm, " $dst, $imm"),
+ []>;
+
+// Memory Load/Store
+let isLoad = 1 in
+class LoadM<bits<6> op, string instr_asm, PatFrag OpNode>:
+ FI< op,
+ (ops CPURegs:$dst, mem:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(set CPURegs:$dst, (OpNode addr:$addr))]>;
+
+let isStore = 1 in
+class StoreM<bits<6> op, string instr_asm, PatFrag OpNode>:
+ FI< op,
+ (ops CPURegs:$dst, mem:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(OpNode CPURegs:$dst, addr:$addr)]>;
+
+// Conditional Branch
+let isBranch = 1, noResults=1, isTerminator=1 in
+class CBranch<bits<6> op, string instr_asm, PatFrag cond_op>:
+ FI< op,
+ (ops CPURegs:$a, CPURegs:$b, brtarget:$offset),
+ !strconcat(instr_asm, " $a, $b, $offset"),
+ [(brcond (cond_op CPURegs:$a, CPURegs:$b), bb:$offset)]>;
+
+class SetCC_R<bits<6> op, bits<6> func, string instr_asm,
+ PatFrag cond_op>:
+ FR< op,
+ func,
+ (ops CPURegs:$dst, CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set CPURegs:$dst, (cond_op CPURegs:$b, CPURegs:$c))]>;
+
+class SetCC_I<bits<6> op, string instr_asm, PatFrag cond_op,
+ Operand Od, PatLeaf imm_type>:
+ FI< op,
+ (ops CPURegs:$dst, CPURegs:$b, Od:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set CPURegs:$dst, (cond_op CPURegs:$b, imm_type:$c))]>;
+
+// Unconditional branch
+let hasCtrlDep=1, noResults=1, isTerminator=1 in
+class JumpFJ<bits<6> op, string instr_asm>:
+ FJ< op,
+ (ops brtarget:$target),
+ !strconcat(instr_asm, " $target"),
+ [(br bb:$target)]>;
+
+let hasCtrlDep=1, noResults=1, isTerminator=1, rd=0 in
+class JumpFR<bits<6> op, bits<6> func, string instr_asm>:
+ FR< op,
+ func,
+ (ops CPURegs:$target),
+ !strconcat(instr_asm, " $target"),
+ []>;
+
+// Jump and Link (Call)
+let isCall=1 in
+class JumpLink<bits<6> op, string instr_asm>:
+ FJ< op,
+ (ops calltarget:$target),
+ !strconcat(instr_asm, " $target"),
+ [(MipsJmpLink imm:$target)]>;
+
+let isCall=1 in
+class JumpLinkReg<bits<6> op, bits<6> func, string instr_asm>:
+ FR< op,
+ func,
+ (ops CPURegs:$rd, CPURegs:$rs),
+ !strconcat(instr_asm, " $rs, $rd"),
+ []>;
+
+// Mul, Div
+class MulDiv<bits<6> func, string instr_asm>:
+ FR< 0x00,
+ func,
+ (ops CPURegs:$a, CPURegs:$b),
+ !strconcat(instr_asm, " $a, $b"),
+ []>;
+
+// Move from Hi/Lo
+class MoveFromTo<bits<6> func, string instr_asm>:
+ FR< 0x00,
+ func,
+ (ops CPURegs:$dst),
+ !strconcat(instr_asm, " $dst"),
+ []>;
+
+// Count Leading Ones/Zeros in Word
+class CountLeading<bits<6> func, string instr_asm>:
+ FR< 0x1c,
+ func,
+ (ops CPURegs:$dst, CPURegs:$src),
+ !strconcat(instr_asm, " $dst, $src"),
+ []>;
+
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+class Pseudo<dag ops, string asmstr, list<dag> pattern>:
+ MipsInst<ops, asmstr, pattern>;
+
+// As stack alignment is always done with addiu, we need a 16-bit immediate
+def ADJCALLSTACKDOWN : Pseudo<(ops uimm16:$amt),
+ "!ADJCALLSTACKDOWN $amt",
+ [(callseq_start imm:$amt)]>, Imp<[SP],[SP]>;
+def ADJCALLSTACKUP : Pseudo<(ops uimm16:$amt),
+ "!ADJCALLSTACKUP $amt",
+ [(callseq_end imm:$amt)]>, Imp<[SP],[SP]>;
+
+def IMPLICIT_DEF_CPURegs : Pseudo<(ops CPURegs:$dst),
+ "!IMPLICIT_DEF $dst",
+ [(set CPURegs:$dst, (undef))]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction definition
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Mips32 I
+//===----------------------------------------------------------------------===//
+
+// Arithmetic
+def ADDiu : ArithI<0x09, "addiu", add, uimm16, immSExt16>;
+def ADDi : ArithI<0x08, "addi", add, simm16, immZExt16>;
+def MUL : ArithR<0x1c, 0x02, "mul", mul>;
+def ADDu : ArithR<0x00, 0x21, "addu", add>;
+def SUBu : ArithR<0x00, 0x23, "subu", sub>;
+def ADD : ArithOverflowR<0x00, 0x20, "add">;
+def SUB : ArithOverflowR<0x00, 0x22, "sub">;
+def MADD : MArithR<0x00, "madd">;
+def MADDU : MArithR<0x01, "maddu">;
+def MSUB : MArithR<0x04, "msub">;
+def MSUBU : MArithR<0x05, "msubu">;
+
+// Logical
+def AND : LogicR<0x24, "and", and>;
+def OR : LogicR<0x25, "or", or>;
+def XOR : LogicR<0x26, "xor", xor>;
+def ANDi : LogicI<0x0c, "andi", and>;
+def ORi : LogicI<0x0d, "ori", or>;
+def XORi : LogicI<0x0e, "xori", xor>;
+def NOR : LogicNOR<0x00, 0x27, "nor">;
+
+// Shifts
+def SLL : LogicR_shift_imm<0x00, "sll", shl>;
+def SRL : LogicR_shift_imm<0x02, "srl", srl>;
+def SRA : LogicR_shift_imm<0x03, "sra", sra>;
+def SLLV : LogicR_shift_reg<0x04, "sllv", shl>;
+def SRLV : LogicR_shift_reg<0x06, "srlv", srl>;
+def SRAV : LogicR_shift_reg<0x07, "srav", sra>;
+
+// Load Upper Immediate
+def LUi : LoadUpper<0x0f, "lui">;
+
+// Load/Store
+def LB : LoadM<0x20, "lb", sextloadi8>;
+def LBu : LoadM<0x24, "lbu", zextloadi8>;
+def LH : LoadM<0x21, "lh", sextloadi16>;
+def LHu : LoadM<0x25, "lhu", zextloadi16>;
+def LW : LoadM<0x23, "lw", load>;
+def SB : StoreM<0x28, "sb", truncstorei8>;
+def SH : StoreM<0x29, "sh", truncstorei16>;
+def SW : StoreM<0x2b, "sw", store>;
+
+// Conditional Branch
+def BEQ : CBranch<0x04, "beq", seteq>;
+def BNE : CBranch<0x05, "bne", setne>;
+def SLT : SetCC_R<0x00, 0x2a, "slt", setlt>;
+def SLTu : SetCC_R<0x00, 0x2b, "sltu", setult>;
+def SLTi : SetCC_I<0x0a, "slti", setlt, simm16, immSExt16>;
+def SLTiu : SetCC_I<0x0b, "sltiu", setult, uimm16, immZExt16>;
+
+// Unconditional jump
+def J : JumpFJ<0x02, "j">;
+def JR : JumpFR<0x00, 0x08, "jr">;
+
+// Jump and Link (Call)
+def JAL : JumpLink<0x03, "jal">;
+def JALR : JumpLinkReg<0x00, 0x09, "jalr">;
+
+// MulDiv and Move From Hi/Lo operations, have
+// their correpondent SDNodes created on ISelDAG.
+// Special Mul, Div operations
+def MULT : MulDiv<0x18, "mult">;
+def MULTu : MulDiv<0x19, "multu">;
+def DIV : MulDiv<0x1a, "div">;
+def DIVu : MulDiv<0x1b, "divu">;
+
+// Move From Hi/Lo
+def MFHI : MoveFromTo<0x10, "mfhi">;
+def MFLO : MoveFromTo<0x12, "mflo">;
+def MTHI : MoveFromTo<0x11, "mthi">;
+def MTLO : MoveFromTo<0x13, "mtlo">;
+
+// Count Leading
+def CLO : CountLeading<0x21, "clo">;
+def CLZ : CountLeading<0x20, "clz">;
+
+// No operation
+let addr=0 in
+def NOOP : FJ<0, (ops), "nop", []>;
+
+// Ret instruction - as mips does not have "ret" a
+// jr $ra must be generated.
+let isReturn=1, isTerminator=1, hasDelaySlot=1, noResults=1,
+ isBarrier=1, hasCtrlDep=1, rs=0, rt=0, shamt=0 in
+{
+ def RET : FR <0x00, 0x02, (ops CPURegs:$target),
+ "jr $target", [(MipsRet CPURegs:$target)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Arbitrary patterns that map to one or more instructions
+//===----------------------------------------------------------------------===//
+
+// Small immediates
+def : Pat<(i32 immSExt16:$in),
+ (ADDiu ZERO, imm:$in)>;
+def : Pat<(i32 immZExt16:$in),
+ (ORi ZERO, imm:$in)>;
+
+// Arbitrary immediates
+def : Pat<(i32 imm:$imm),
+ (ORi (LUi (HI16 imm:$imm)), (LO16 imm:$imm))>;
+
+// Call
+def : Pat<(MipsJmpLink (i32 tglobaladdr:$dst)),
+ (JAL tglobaladdr:$dst)>;
+def : Pat<(MipsJmpLink (i32 texternalsym:$dst)),
+ (JAL texternalsym:$dst)>;
+
+// GlobalAddress, Constant Pool, ExternalSymbol, and JumpTable
+def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>;
+def : Pat<(MipsLo tglobaladdr:$in), (ADDiu ZERO, tglobaladdr:$in)>;
+
+// When extracting the address from GlobalAddress we
+// need something of the form "addiu $reg, %lo(addr)"
+def : Pat<(add CPURegs:$a, (MipsLo tglobaladdr:$in)),
+ (ADDiu CPURegs:$a, tglobaladdr:$in)>;
+
+// Mips does not have not, so we increase the operation
+def : Pat<(not CPURegs:$in),
+ (NOR CPURegs:$in, CPURegs:$in)>;
+
+// extended load and stores
+def : Pat<(i32 (extloadi8 addr:$src)), (LBu addr:$src)>;
+def : Pat<(i32 (extloadi16 addr:$src)), (LHu addr:$src)>;
+def : Pat<(truncstorei1 CPURegs:$src, addr:$addr),
+ (SB CPURegs:$src, addr:$src)>;
+
+def : Pat<(brcond (setne CPURegs:$lhs, (add ZERO, 0)), bb:$dst),
+ (BNE CPURegs:$lhs, ZERO, bb:$dst)>;
+
+
+// Conditional branch patterns.
+// cond branches patterns, 2 register operands signed.
+def : Pat<(brcond (setlt CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
+ (BNE (SLT CPURegs:$lhs, CPURegs:$rhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (setle CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
+ (BEQ (SLT CPURegs:$rhs, CPURegs:$lhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (setgt CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
+ (BNE (SLT CPURegs:$rhs, CPURegs:$lhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (setge CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
+ (BEQ (SLT CPURegs:$lhs, CPURegs:$rhs), ZERO, bb:$dst)>;
+
+// cond branches patterns, 2 register operands unsigned.
+def : Pat<(brcond (setult CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
+ (BNE (SLTu CPURegs:$lhs, CPURegs:$rhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (setule CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
+ (BEQ (SLTu CPURegs:$rhs, CPURegs:$lhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (setugt CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
+ (BNE (SLTu CPURegs:$rhs, CPURegs:$lhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (setuge CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
+ (BEQ (SLTu CPURegs:$lhs, CPURegs:$rhs), ZERO, bb:$dst)>;
+
+// cond branches patterns, reg/imm operands signed.
+def : Pat<(brcond (setult CPURegs:$lhs, immSExt16:$rhs), bb:$dst),
+ (BNE (SLTi CPURegs:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (setuge CPURegs:$lhs, immSExt16:$rhs), bb:$dst),
+ (BEQ (SLTi CPURegs:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
+
+// cond branches patterns, reg/imm operands unsigned.
+def : Pat<(brcond (setult CPURegs:$lhs, immZExt16:$rhs), bb:$dst),
+ (BNE (SLTiu CPURegs:$lhs, immZExt16:$rhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (setuge CPURegs:$lhs, immZExt16:$rhs), bb:$dst),
+ (BEQ (SLTiu CPURegs:$lhs, immZExt16:$rhs), ZERO, bb:$dst)>;
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
new file mode 100644
index 0000000..b362dc3
--- /dev/null
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -0,0 +1,54 @@
+//===-- MipsMachineFunctionInfo.h - Private data used for Mips ----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Bruno Cardoso Lopes and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Mips specific subclass of MachineFunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS_MACHINE_FUNCTION_INFO_H
+#define MIPS_MACHINE_FUNCTION_INFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// MipsFunctionInfo - This class is derived from MachineFunction private
+/// Mips target-specific information for each MachineFunction.
+class MipsFunctionInfo : public MachineFunctionInfo {
+
+private:
+ /// Holds for each function where on the stack
+ /// the Frame Pointer must be saved
+ int FPStackOffset;
+
+ /// Holds for each function where on the stack
+ /// the Return Address must be saved
+ int RAStackOffset;
+
+public:
+ MipsFunctionInfo(MachineFunction& MF)
+ : FPStackOffset(0), RAStackOffset(0)
+ {}
+
+ int getFPStackOffset() const { return FPStackOffset; }
+ void setFPStackOffset(int Off) { FPStackOffset = Off; }
+
+ int getRAStackOffset() const { return RAStackOffset; }
+ void setRAStackOffset(int Off) { RAStackOffset = Off; }
+
+ int getTopSavedRegOffset() const {
+ return (RAStackOffset > FPStackOffset) ?
+ (RAStackOffset) : (FPStackOffset);
+ }
+};
+
+} // end of namespace llvm
+
+
+#endif
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
new file mode 100644
index 0000000..c7a87ca
--- /dev/null
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -0,0 +1,422 @@
+//===- MipsRegisterInfo.cpp - MIPS Register Information -== -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Bruno Cardoso Lopes and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MIPS implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-reg-info"
+
+#include "Mips.h"
+#include "MipsRegisterInfo.h"
+#include "MipsMachineFunction.h"
+#include "llvm/Constants.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+//#include "MipsSubtarget.h"
+
+using namespace llvm;
+
+// TODO: add subtarget support
+MipsRegisterInfo::MipsRegisterInfo(const TargetInstrInfo &tii)
+ : MipsGenRegisterInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP),
+ TII(tii) {}
+
+void MipsRegisterInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, int FI,
+ const TargetRegisterClass *RC) const
+{
+ if (RC == Mips::CPURegsRegisterClass)
+ BuildMI(MBB, I, TII.get(Mips::SW)).addReg(SrcReg, false, false, true)
+ .addImm(0).addFrameIndex(FI);
+ else
+ assert(0 && "Can't store this register to stack slot");
+}
+
+void MipsRegisterInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC) const
+{
+ if (RC == Mips::CPURegsRegisterClass)
+ BuildMI(MBB, I, TII.get(Mips::LW), DestReg).addImm(0).addFrameIndex(FI);
+ else
+ assert(0 && "Can't load this register from stack slot");
+}
+
+void MipsRegisterInfo::
+copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *RC) const
+{
+ if (RC == Mips::CPURegsRegisterClass)
+ BuildMI(MBB, I, TII.get(Mips::ADDu), DestReg).addReg(Mips::ZERO)
+ .addReg(SrcReg);
+ else
+ assert (0 && "Can't copy this register");
+}
+
+void MipsRegisterInfo::reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg,
+ const MachineInstr *Orig) const
+{
+ MachineInstr *MI = Orig->clone();
+ MI->getOperand(0).setReg(DestReg);
+ MBB.insert(I, MI);
+}
+
+MachineInstr *MipsRegisterInfo::
+foldMemoryOperand(MachineInstr* MI, unsigned OpNum, int FI) const
+{
+ MachineInstr *NewMI = NULL;
+
+ switch (MI->getOpcode())
+ {
+ case Mips::ADDu:
+ if ((MI->getOperand(0).isRegister()) &&
+ (MI->getOperand(1).isRegister()) &&
+ (MI->getOperand(1).getReg() == Mips::ZERO) &&
+ (MI->getOperand(2).isRegister()))
+ {
+ if (OpNum == 0) // COPY -> STORE
+ NewMI = BuildMI(TII.get(Mips::SW)).addFrameIndex(FI)
+ .addImm(0).addReg(MI->getOperand(2).getReg());
+ else // COPY -> LOAD
+ NewMI = BuildMI(TII.get(Mips::LW), MI->getOperand(0)
+ .getReg()).addImm(0).addFrameIndex(FI);
+ }
+ break;
+ }
+
+ if (NewMI)
+ NewMI->copyKillDeadInfo(MI);
+ return NewMI;
+}
+
+/// Mips Callee Saved Registers
+const unsigned* MipsRegisterInfo::
+getCalleeSavedRegs() const
+{
+ // Mips calle-save register range is $16-$26(s0-s7)
+ static const unsigned CalleeSavedRegs[] = {
+ Mips::S0, Mips::S1, Mips::S2, Mips::S3,
+ Mips::S4, Mips::S5, Mips::S6, Mips::S7, 0
+ };
+ return CalleeSavedRegs;
+}
+
+/// Mips Callee Saved Register Classes
+const TargetRegisterClass* const*
+MipsRegisterInfo::getCalleeSavedRegClasses() const
+{
+ static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
+ &Mips::CPURegsRegClass, &Mips::CPURegsRegClass,
+ &Mips::CPURegsRegClass, &Mips::CPURegsRegClass,
+ &Mips::CPURegsRegClass, &Mips::CPURegsRegClass,
+ &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, 0
+ };
+ return CalleeSavedRegClasses;
+}
+
+BitVector MipsRegisterInfo::
+getReservedRegs(const MachineFunction &MF) const
+{
+ BitVector Reserved(getNumRegs());
+ Reserved.set(Mips::ZERO);
+ Reserved.set(Mips::AT);
+ Reserved.set(Mips::K0);
+ Reserved.set(Mips::K1);
+ Reserved.set(Mips::GP);
+ Reserved.set(Mips::SP);
+ Reserved.set(Mips::FP);
+ Reserved.set(Mips::RA);
+ return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Stack Frame Processing methods
+// +----------------------------+
+//
+// Too meet ABI, we construct the frame on the reverse
+// of natural order.
+//
+// The LLVM Frame will look like this:
+//
+// As the stack grows down, we start at 0, and the reference
+// is decrement.
+//
+// 0 ----------
+// -4 Args to pass
+// . saved "Callee Saved" Registers
+// . Local Area
+// . saved FP
+// . saved RA
+// -StackSize -----------
+//
+// On the EliminateFrameIndex we just negate the address above
+// and we get the stack frame required by the ABI, which is:
+//
+// sp + stacksize -------------
+// saved $RA (only on non-leaf functions)
+// saved $FP (only with frame pointer)
+// saved "Callee Saved" Registers
+// Local Area
+// saved $GP (used in PIC - not supported yet)
+// Args to pass area
+// sp -------------
+//
+// The sp is the stack pointer subtracted/added from the stack size
+// at the Prologue/Epilogue
+//
+// References to the previous stack (to obtain arguments) are done
+// with fixed location stack frames using positive stack offsets.
+//
+// Examples:
+// - reference to the actual stack frame
+// for any local area var there is smt like : FI >= 0, StackOffset: -4
+// sw REGX, 4(REGY)
+//
+// - reference to previous stack frame
+// suppose there's a store to the 5th arguments : FI < 0, StackOffset: 16.
+// The emitted instruction will be something like:
+// sw REGX, 16+StackSize (REGY)
+//
+//===----------------------------------------------------------------------===//
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool MipsRegisterInfo::
+hasFP(const MachineFunction &MF) const {
+ return (NoFramePointerElim || MF.getFrameInfo()->hasVarSizedObjects());
+}
+
+// This function eliminate ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void MipsRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
+
+// FrameIndex represent objects inside a abstract stack.
+// We must replace FrameIndex with an stack/frame pointer
+// direct reference.
+void MipsRegisterInfo::
+eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ RegScavenger *RS) const
+{
+ MachineInstr &MI = *II;
+ MachineFunction &MF = *MI.getParent()->getParent();
+
+ unsigned i = 0;
+ while (!MI.getOperand(i).isFrameIndex()) {
+ ++i;
+ assert(i < MI.getNumOperands() &&
+ "Instr doesn't have FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(i).getFrameIndex();
+ int stackSize = MF.getFrameInfo()->getStackSize();
+ int spOffset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+
+ #ifndef NDEBUG
+ DOUT << "\nFunction : " << MF.getFunction()->getName() << "\n";
+ DOUT << "<--------->\n";
+ MI.print(DOUT);
+ DOUT << "FrameIndex : " << FrameIndex << "\n";
+ DOUT << "spOffset : " << spOffset << "\n";
+ DOUT << "stackSize : " << stackSize << "\n";
+ #endif
+
+ int Offset = ( (spOffset >= 0) ? (stackSize + spOffset) : (-spOffset));
+
+ #ifndef NDEBUG
+ DOUT << "Offset : " << Offset << "\n";
+ DOUT << "<--------->\n";
+ #endif
+
+ MI.getOperand(i-1).ChangeToImmediate(Offset);
+ MI.getOperand(i).ChangeToRegister(getFrameRegister(MF),false);
+}
+
+void MipsRegisterInfo::
+emitPrologue(MachineFunction &MF) const
+{
+ MachineBasicBlock &MBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+
+ // Get the number of bytes to allocate from the FrameInfo
+ int NumBytes = (int) MFI->getStackSize();
+
+ #ifndef NDEBUG
+ DOUT << "\n<--- EMIT PROLOGUE --->";
+ DOUT << "Stack size :" << NumBytes << "\n";
+ #endif
+
+ // Do we need to allocate space on the stack?
+ if (NumBytes == 0) return;
+
+ int FPOffset, RAOffset;
+
+ // Always allocate space for saved RA and FP,
+ // even if FramePointer is not used. When not
+ // using FP, the last stack slot becomes empty
+ // and RA is saved before it.
+ if ((hasFP(MF)) && (MFI->hasCalls())) {
+ FPOffset = NumBytes;
+ RAOffset = (NumBytes+4);
+ } else if ((!hasFP(MF)) && (MFI->hasCalls())) {
+ FPOffset = 0;
+ RAOffset = NumBytes;
+ } else if ((hasFP(MF)) && (!MFI->hasCalls())) {
+ FPOffset = NumBytes;
+ RAOffset = 0;
+ }
+
+ MFI->setObjectOffset(MFI->CreateStackObject(4,4), -FPOffset);
+ MFI->setObjectOffset(MFI->CreateStackObject(4,4), -RAOffset);
+ MipsFI->setFPStackOffset(FPOffset);
+ MipsFI->setRAStackOffset(RAOffset);
+
+ #ifndef NDEBUG
+ DOUT << "FPOffset :" << FPOffset << "\n";
+ DOUT << "RAOffset :" << RAOffset << "\n";
+ #endif
+
+ // Align stack.
+ NumBytes += 8;
+ unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+ NumBytes = ((NumBytes+Align-1)/Align*Align);
+
+ #ifndef NDEBUG
+ DOUT << "New stack size :" << NumBytes << "\n\n";
+ #endif
+
+ // Update frame info
+ MFI->setStackSize(NumBytes);
+
+ // Adjust stack : addi sp, sp, (-imm)
+ BuildMI(MBB, MBBI, TII.get(Mips::ADDiu), Mips::SP)
+ .addReg(Mips::SP).addImm(-NumBytes);
+
+ // Save the return address only if the function isnt a leaf one.
+ // sw $ra, stack_loc($sp)
+ if (MFI->hasCalls()) {
+ BuildMI(MBB, MBBI, TII.get(Mips::SW))
+ .addReg(Mips::RA).addImm(RAOffset).addReg(Mips::SP);
+ }
+
+ // if framepointer enabled, save it and set it
+ // to point to the stack pointer
+ if (hasFP(MF)) {
+ // sw $fp,stack_loc($sp)
+ BuildMI(MBB, MBBI, TII.get(Mips::SW))
+ .addReg(Mips::FP).addImm(FPOffset).addReg(Mips::SP);
+
+ // move $fp, $sp
+ BuildMI(MBB, MBBI, TII.get(Mips::ADDu), Mips::FP)
+ .addReg(Mips::SP).addReg(Mips::ZERO);
+ }
+}
+
+void MipsRegisterInfo::
+emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
+{
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ // Get the number of bytes from FrameInfo
+ int NumBytes = (int) MFI->getStackSize();
+
+ // Get the FI's where RA and FP are saved.
+ int FPOffset = MipsFI->getFPStackOffset();
+ int RAOffset = MipsFI->getRAStackOffset();
+
+ #ifndef NDEBUG
+ DOUT << "\n<--- EMIT EPILOGUE --->" << "\n";
+ DOUT << "Stack size :" << NumBytes << "\n";
+ DOUT << "FPOffset :" << FPOffset << "\n";
+ DOUT << "RAOffset :" << RAOffset << "\n\n";
+ #endif
+
+ // if framepointer enabled, restore it and restore the
+ // stack pointer
+ if (hasFP(MF)) {
+ // move $sp, $fp
+ BuildMI(MBB, MBBI, TII.get(Mips::ADDu), Mips::SP)
+ .addReg(Mips::FP).addReg(Mips::ZERO);
+
+ // lw $fp,stack_loc($sp)
+ BuildMI(MBB, MBBI, TII.get(Mips::LW))
+ .addReg(Mips::FP).addImm(FPOffset).addReg(Mips::SP);
+ }
+
+ // Restore the return address only if the function isnt a leaf one.
+ // lw $ra, stack_loc($sp)
+ if (MFI->hasCalls()) {
+ BuildMI(MBB, MBBI, TII.get(Mips::LW))
+ .addReg(Mips::RA).addImm(RAOffset).addReg(Mips::SP);
+ }
+
+ // adjust stack : insert addi sp, sp, (imm)
+ if (NumBytes) {
+ BuildMI(MBB, MBBI, TII.get(Mips::ADDiu), Mips::SP)
+ .addReg(Mips::SP).addImm(NumBytes);
+ }
+}
+
+void MipsRegisterInfo::
+processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
+}
+
+unsigned MipsRegisterInfo::
+getRARegister() const {
+ return Mips::RA;
+}
+
+unsigned MipsRegisterInfo::
+getFrameRegister(MachineFunction &MF) const {
+ return hasFP(MF) ? Mips::FP : Mips::SP;
+}
+
+unsigned MipsRegisterInfo::
+getEHExceptionRegister() const {
+ assert(0 && "What is the exception register");
+ return 0;
+}
+
+unsigned MipsRegisterInfo::
+getEHHandlerRegister() const {
+ assert(0 && "What is the exception handler register");
+ return 0;
+}
+
+#include "MipsGenRegisterInfo.inc"
+
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
new file mode 100644
index 0000000..d84194f
--- /dev/null
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -0,0 +1,83 @@
+//===- MipsRegisterInfo.h - Mips Register Information Impl ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Bruno Cardoso Lopes and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSREGISTERINFO_H
+#define MIPSREGISTERINFO_H
+
+#include "llvm/Target/MRegisterInfo.h"
+#include "MipsGenRegisterInfo.h.inc"
+
+namespace llvm {
+
+class TargetInstrInfo;
+class Type;
+
+struct MipsRegisterInfo : public MipsGenRegisterInfo {
+ const TargetInstrInfo &TII;
+
+ MipsRegisterInfo(const TargetInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ unsigned DestReg, const MachineInstr *Orig) const;
+
+ MachineInstr* foldMemoryOperand(MachineInstr* MI, unsigned OpNum,
+ int FrameIndex) const;
+
+ void copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *RC) const;
+
+
+ const unsigned *getCalleeSavedRegs() const;
+
+ const TargetRegisterClass* const* getCalleeSavedRegClasses() const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(MachineFunction &MF) const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
new file mode 100644
index 0000000..2b7d15f
--- /dev/null
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -0,0 +1,80 @@
+//===- MipsRegisterInfo.td - Mips Register defs -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Bruno Cardoso Lopes and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the MIPS register file
+//===----------------------------------------------------------------------===//
+
+// We have banks of 32 registers each.
+class MipsReg<string n> : Register<n> {
+ field bits<5> Num;
+ let Namespace = "Mips";
+}
+
+// Mips CPU Registers
+class MipsGPRReg<bits<5> num, string n> : MipsReg<n> {
+ let Num = num;
+}
+
+// CPU GPR Registers
+def ZERO : MipsGPRReg< 0, "ZERO">, DwarfRegNum<0>;
+def AT : MipsGPRReg< 1, "AT">, DwarfRegNum<1>;
+def V0 : MipsGPRReg< 2, "2">, DwarfRegNum<2>;
+def V1 : MipsGPRReg< 3, "3">, DwarfRegNum<3>;
+def A0 : MipsGPRReg< 4, "4">, DwarfRegNum<5>;
+def A1 : MipsGPRReg< 5, "5">, DwarfRegNum<5>;
+def A2 : MipsGPRReg< 6, "6">, DwarfRegNum<6>;
+def A3 : MipsGPRReg< 7, "7">, DwarfRegNum<7>;
+def T0 : MipsGPRReg< 8, "8">, DwarfRegNum<8>;
+def T1 : MipsGPRReg< 9, "9">, DwarfRegNum<9>;
+def T2 : MipsGPRReg< 10, "10">, DwarfRegNum<10>;
+def T3 : MipsGPRReg< 11, "11">, DwarfRegNum<11>;
+def T4 : MipsGPRReg< 12, "12">, DwarfRegNum<12>;
+def T5 : MipsGPRReg< 13, "13">, DwarfRegNum<13>;
+def T6 : MipsGPRReg< 14, "14">, DwarfRegNum<14>;
+def T7 : MipsGPRReg< 15, "15">, DwarfRegNum<15>;
+def S0 : MipsGPRReg< 16, "16">, DwarfRegNum<16>;
+def S1 : MipsGPRReg< 17, "17">, DwarfRegNum<17>;
+def S2 : MipsGPRReg< 18, "18">, DwarfRegNum<18>;
+def S3 : MipsGPRReg< 19, "19">, DwarfRegNum<19>;
+def S4 : MipsGPRReg< 20, "20">, DwarfRegNum<20>;
+def S5 : MipsGPRReg< 21, "21">, DwarfRegNum<21>;
+def S6 : MipsGPRReg< 22, "22">, DwarfRegNum<22>;
+def S7 : MipsGPRReg< 23, "23">, DwarfRegNum<23>;
+def T8 : MipsGPRReg< 24, "24">, DwarfRegNum<24>;
+def T9 : MipsGPRReg< 25, "25">, DwarfRegNum<25>;
+def K0 : MipsGPRReg< 26, "26">, DwarfRegNum<26>;
+def K1 : MipsGPRReg< 27, "27">, DwarfRegNum<27>;
+def GP : MipsGPRReg< 28, "GP">, DwarfRegNum<28>;
+def SP : MipsGPRReg< 29, "SP">, DwarfRegNum<29>;
+def FP : MipsGPRReg< 30, "FP">, DwarfRegNum<30>;
+def RA : MipsGPRReg< 31, "RA">, DwarfRegNum<31>;
+
+// CPU Registers Class
+def CPURegs : RegisterClass<"Mips", [i32], 32,
+ // Return Values and Arguments
+ [V0, V1, A0, A1, A2, A3,
+ // Not preserved across procedure calls
+ T0, T1, T2, T3, T4, T5, T6, T7, T8, T9,
+ // Callee save
+ S0, S1, S2, S3, S4, S5, S6, S7,
+ // Reserved
+ ZERO, AT, K0, K1, GP, SP, FP, RA]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ CPURegsClass::iterator
+ CPURegsClass::allocation_order_end(const MachineFunction &MF) const {
+ // The last 8 registers on the list above are reserved
+ return end()-8;
+ }
+ }];
+}
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
new file mode 100644
index 0000000..a394f77
--- /dev/null
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -0,0 +1,26 @@
+//===- MipsSubtarget.cpp - Mips Subtarget Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Bruno Cardoso Lopes and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Mips specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsSubtarget.h"
+#include "Mips.h"
+#include "MipsGenSubtarget.inc"
+using namespace llvm;
+
+MipsSubtarget::MipsSubtarget(const TargetMachine &TM, const Module &M,
+ const std::string &FS) : isR3000(false)
+{
+ std::string CPU = "generic";
+
+ // Parse features string.
+ ParseSubtargetFeatures(FS, CPU);
+}
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
new file mode 100644
index 0000000..7ec61ca
--- /dev/null
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -0,0 +1,43 @@
+//=====-- MipsSubtarget.h - Define Subtarget for the Mips -----*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Bruno Cardoso Lopes and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Mips specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSUBTARGET_H
+#define MIPSSUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include <string>
+
+namespace llvm {
+class Module;
+
+class MipsSubtarget : public TargetSubtarget {
+protected:
+ bool isR3000;
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified module.
+ ///
+ MipsSubtarget(const TargetMachine &TM, const Module &M,
+ const std::string &FS);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(const std::string &FS, const std::string &CPU);
+
+ bool IsR3000() const { return isR3000; }
+};
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/Mips/MipsTargetAsmInfo.cpp b/lib/Target/Mips/MipsTargetAsmInfo.cpp
new file mode 100644
index 0000000..08166f6
--- /dev/null
+++ b/lib/Target/Mips/MipsTargetAsmInfo.cpp
@@ -0,0 +1,22 @@
+//===-- MipsTargetAsmInfo.cpp - Mips asm properties -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Bruno Cardoso Lopes and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the MipsTargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsTargetAsmInfo.h"
+
+using namespace llvm;
+
+MipsTargetAsmInfo::MipsTargetAsmInfo(const MipsTargetMachine &TM) {
+ Data16bitsDirective = "\t.half\t";
+ Data32bitsDirective = "\t.word\t";
+ CommentString = "#";
+}
diff --git a/lib/Target/Mips/MipsTargetAsmInfo.h b/lib/Target/Mips/MipsTargetAsmInfo.h
new file mode 100644
index 0000000..908f036
--- /dev/null
+++ b/lib/Target/Mips/MipsTargetAsmInfo.h
@@ -0,0 +1,30 @@
+//=====-- MipsTargetAsmInfo.h - Mips asm properties -----------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Bruno Cardoso Lopes and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MipsTargetAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSTARGETASMINFO_H
+#define MIPSTARGETASMINFO_H
+
+#include "llvm/Target/TargetAsmInfo.h"
+
+namespace llvm {
+
+ // Forward declaration.
+ class MipsTargetMachine;
+
+ struct MipsTargetAsmInfo : public TargetAsmInfo {
+ MipsTargetAsmInfo(const MipsTargetMachine &TM);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
new file mode 100644
index 0000000..7fdba30
--- /dev/null
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -0,0 +1,83 @@
+//===-- MipsTargetMachine.cpp - Define TargetMachine for Mips -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Bruno Cardoso Lopes and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the info about Mips target spec.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mips.h"
+#include "MipsTargetAsmInfo.h"
+#include "MipsTargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+using namespace llvm;
+
+namespace {
+ // Register the target.
+ RegisterTarget<MipsTargetMachine> X("mips", " Mips");
+}
+
+const TargetAsmInfo *MipsTargetMachine::
+createTargetAsmInfo() const
+{
+ return new MipsTargetAsmInfo(*this);
+}
+
+// DataLayout --> Big-endian, 32-bit pointer/ABI/alignment
+//
+// FrameInfo --> StackGrowsDown, 8 bytes aligned,
+// LOA : 0
+MipsTargetMachine::
+MipsTargetMachine(const Module &M, const std::string &FS):
+ Subtarget(*this, M, FS), DataLayout("E-p:32:32:32"),
+ InstrInfo(*this), FrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0),
+ TLInfo(*this) {}
+
+// return 0 and must specify -march to gen MIPS code.
+unsigned MipsTargetMachine::
+getModuleMatchQuality(const Module &M)
+{
+ // We strongly match "mips-*".
+ std::string TT = M.getTargetTriple();
+ if (TT.size() >= 5 && std::string(TT.begin(), TT.begin()+5) == "mips-")
+ return 20;
+
+ return 0;
+}
+
+// Install an instruction selector pass using
+// the ISelDag to gen Mips code.
+bool MipsTargetMachine::
+addInstSelector(FunctionPassManager &PM, bool Fast)
+{
+ PM.add(createMipsISelDag(*this));
+ return false;
+}
+
+// Implemented by targets that want to run passes immediately before
+// machine code is emitted. return true if -print-machineinstrs should
+// print out the code after the passes.
+// TODO: Delay slot must be implemented here.
+bool MipsTargetMachine::
+addPreEmitPass(FunctionPassManager &PM, bool Fast)
+{
+ return false;
+}
+
+// Implements the AssemblyEmitter for the target. Must return
+// true if AssemblyEmitter is supported
+bool MipsTargetMachine::
+addAssemblyEmitter(FunctionPassManager &PM, bool Fast,
+ std::ostream &Out)
+{
+ // Output assembly language.
+ PM.add(createMipsCodePrinterPass(Out, *this));
+ return false;
+}
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
new file mode 100644
index 0000000..9e1ccc3
--- /dev/null
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -0,0 +1,65 @@
+//===-- MipsTargetMachine.h - Define TargetMachine for Mips -00--*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Bruno Cardoso Lopes and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Mips specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSTARGETMACHINE_H
+#define MIPSTARGETMACHINE_H
+
+#include "MipsSubtarget.h"
+#include "MipsInstrInfo.h"
+#include "MipsISelLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+
+namespace llvm {
+ class MipsTargetMachine : public LLVMTargetMachine {
+ MipsSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ MipsInstrInfo InstrInfo;
+ TargetFrameInfo FrameInfo;
+ MipsTargetLowering TLInfo;
+
+ protected:
+ virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
+ public:
+ MipsTargetMachine(const Module &M, const std::string &FS);
+
+ virtual const MipsInstrInfo *getInstrInfo() const
+ { return &InstrInfo; }
+ virtual const TargetFrameInfo *getFrameInfo() const
+ { return &FrameInfo; }
+ virtual const TargetSubtarget *getSubtargetImpl() const
+ { return &Subtarget; }
+ virtual const TargetData *getTargetData() const
+ { return &DataLayout;}
+
+ virtual const MRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ virtual MipsTargetLowering *getTargetLowering() const {
+ return const_cast<MipsTargetLowering*>(&TLInfo);
+ }
+
+ static unsigned getModuleMatchQuality(const Module &M);
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(FunctionPassManager &PM, bool Fast);
+ virtual bool addPreEmitPass(FunctionPassManager &PM, bool Fast);
+ virtual bool addAssemblyEmitter(FunctionPassManager &PM, bool Fast,
+ std::ostream &Out);
+ };
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/PowerPC/Makefile b/lib/Target/PowerPC/Makefile
new file mode 100644
index 0000000..77288ed
--- /dev/null
+++ b/lib/Target/PowerPC/Makefile
@@ -0,0 +1,20 @@
+##===- lib/Target/PowerPC/Makefile -------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file was developed by the LLVM research group and is distributed under
+# the University of Illinois Open Source License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMPowerPC
+TARGET = PPC
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = PPCGenInstrNames.inc PPCGenRegisterNames.inc \
+ PPCGenAsmWriter.inc PPCGenCodeEmitter.inc \
+ PPCGenRegisterInfo.h.inc PPCGenRegisterInfo.inc \
+ PPCGenInstrInfo.inc PPCGenDAGISel.inc \
+ PPCGenSubtarget.inc PPCGenCallingConv.inc
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
new file mode 100644
index 0000000..9327f30
--- /dev/null
+++ b/lib/Target/PowerPC/PPC.h
@@ -0,0 +1,47 @@
+//===-- PPC.h - Top-level interface for PowerPC Target ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// PowerPC back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_POWERPC_H
+#define LLVM_TARGET_POWERPC_H
+
+#include <iosfwd>
+
+
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
+namespace llvm {
+ class PPCTargetMachine;
+ class FunctionPassManager;
+ class FunctionPass;
+ class MachineCodeEmitter;
+
+FunctionPass *createPPCBranchSelectionPass();
+FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
+FunctionPass *createPPCAsmPrinterPass(std::ostream &OS,
+ PPCTargetMachine &TM);
+FunctionPass *createPPCCodeEmitterPass(PPCTargetMachine &TM,
+ MachineCodeEmitter &MCE);
+} // end namespace llvm;
+
+// Defines symbolic names for PowerPC registers. This defines a mapping from
+// register name to register number.
+//
+#include "PPCGenRegisterNames.inc"
+
+// Defines symbolic names for the PowerPC instructions.
+//
+#include "PPCGenInstrNames.inc"
+
+#endif
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
new file mode 100644
index 0000000..76f8ac4
--- /dev/null
+++ b/lib/Target/PowerPC/PPC.td
@@ -0,0 +1,114 @@
+//===- PPC.td - Describe the PowerPC Target Machine --------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the top level entry point for the PowerPC target.
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing.
+//
+include "../Target.td"
+
+//===----------------------------------------------------------------------===//
+// PowerPC Subtarget features.
+//
+
+//===----------------------------------------------------------------------===//
+// CPU Directives //
+//===----------------------------------------------------------------------===//
+
+def Directive601 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_601", "">;
+def Directive602 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_602", "">;
+def Directive603 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
+def Directive604 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
+def Directive620 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
+def Directive7400: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_7400", "">;
+def Directive750 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_750", "">;
+def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">;
+def Directive32 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">;
+def Directive64 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">;
+
+def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true",
+ "Enable 64-bit instructions">;
+def Feature64BitRegs : SubtargetFeature<"64bitregs","Use64BitRegs", "true",
+ "Enable 64-bit registers usage for ppc32 [beta]">;
+def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true",
+ "Enable Altivec instructions">;
+def FeatureGPUL : SubtargetFeature<"gpul","IsGigaProcessor", "true",
+ "Enable GPUL instructions">;
+def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true",
+ "Enable the fsqrt instruction">;
+def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true",
+ "Enable the stfiwx instruction">;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "PPCRegisterInfo.td"
+include "PPCSchedule.td"
+include "PPCInstrInfo.td"
+
+//===----------------------------------------------------------------------===//
+// PowerPC processors supported.
+//
+
+def : Processor<"generic", G3Itineraries, [Directive32]>;
+def : Processor<"601", G3Itineraries, [Directive601]>;
+def : Processor<"602", G3Itineraries, [Directive602]>;
+def : Processor<"603", G3Itineraries, [Directive603]>;
+def : Processor<"603e", G3Itineraries, [Directive603]>;
+def : Processor<"603ev", G3Itineraries, [Directive603]>;
+def : Processor<"604", G3Itineraries, [Directive604]>;
+def : Processor<"604e", G3Itineraries, [Directive604]>;
+def : Processor<"620", G3Itineraries, [Directive620]>;
+def : Processor<"g3", G3Itineraries, [Directive7400]>;
+def : Processor<"7400", G4Itineraries, [Directive7400, FeatureAltivec]>;
+def : Processor<"g4", G4Itineraries, [Directive7400, FeatureAltivec]>;
+def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec]>;
+def : Processor<"g4+", G4PlusItineraries, [Directive750, FeatureAltivec]>;
+def : Processor<"750", G4Itineraries, [Directive750, FeatureAltivec]>;
+def : Processor<"970", G5Itineraries,
+ [Directive970, FeatureAltivec,
+ FeatureGPUL, FeatureFSqrt, FeatureSTFIWX,
+ Feature64Bit /*, Feature64BitRegs */]>;
+def : Processor<"g5", G5Itineraries,
+ [Directive970, FeatureAltivec,
+ FeatureGPUL, FeatureFSqrt, FeatureSTFIWX,
+ Feature64Bit /*, Feature64BitRegs */]>;
+def : Processor<"ppc", G3Itineraries, [Directive32]>;
+def : Processor<"ppc64", G5Itineraries,
+ [Directive64, FeatureAltivec,
+ FeatureGPUL, FeatureFSqrt, FeatureSTFIWX,
+ Feature64Bit /*, Feature64BitRegs */]>;
+
+
+//===----------------------------------------------------------------------===//
+// Calling Conventions
+//===----------------------------------------------------------------------===//
+
+include "PPCCallingConv.td"
+
+def PPCInstrInfo : InstrInfo {
+ // Define how we want to layout our TargetSpecific information field... This
+ // should be kept up-to-date with the fields in the PPCInstrInfo.h file.
+ let TSFlagsFields = ["PPC970_First",
+ "PPC970_Single",
+ "PPC970_Cracked",
+ "PPC970_Unit"];
+ let TSFlagsShifts = [0, 1, 2, 3];
+
+ let isLittleEndianEncoding = 1;
+}
+
+
+def PPC : Target {
+ // Information about the instructions.
+ let InstructionSet = PPCInstrInfo;
+}
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
new file mode 100644
index 0000000..2880196
--- /dev/null
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -0,0 +1,1101 @@
+//===-- PPCAsmPrinter.cpp - Print machine instrs to PowerPC assembly --------=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to PowerPC assembly language. This printer is
+// the output mechanism used by `llc'.
+//
+// Documentation at http://developer.apple.com/documentation/DeveloperTools/
+// Reference/Assembler/ASMIntroduction/chapter_1_section_1.html
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asmprinter"
+#include "PPC.h"
+#include "PPCPredicates.h"
+#include "PPCTargetMachine.h"
+#include "PPCSubtarget.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+namespace {
+ struct VISIBILITY_HIDDEN PPCAsmPrinter : public AsmPrinter {
+ std::set<std::string> FnStubs, GVStubs;
+ const PPCSubtarget &Subtarget;
+
+ PPCAsmPrinter(std::ostream &O, TargetMachine &TM, const TargetAsmInfo *T)
+ : AsmPrinter(O, TM, T), Subtarget(TM.getSubtarget<PPCSubtarget>()) {
+ }
+
+ virtual const char *getPassName() const {
+ return "PowerPC Assembly Printer";
+ }
+
+ PPCTargetMachine &getTM() {
+ return static_cast<PPCTargetMachine&>(TM);
+ }
+
+ unsigned enumRegToMachineReg(unsigned enumReg) {
+ switch (enumReg) {
+ default: assert(0 && "Unhandled register!"); break;
+ case PPC::CR0: return 0;
+ case PPC::CR1: return 1;
+ case PPC::CR2: return 2;
+ case PPC::CR3: return 3;
+ case PPC::CR4: return 4;
+ case PPC::CR5: return 5;
+ case PPC::CR6: return 6;
+ case PPC::CR7: return 7;
+ }
+ abort();
+ }
+
+ /// printInstruction - This method is automatically generated by tablegen
+ /// from the instruction set description. This method returns true if the
+ /// machine instruction was sufficiently described to print it, otherwise it
+ /// returns false.
+ bool printInstruction(const MachineInstr *MI);
+
+ void printMachineInstruction(const MachineInstr *MI);
+ void printOp(const MachineOperand &MO);
+
+ /// stripRegisterPrefix - This method strips the character prefix from a
+ /// register name so that only the number is left. Used by for linux asm.
+ const char *stripRegisterPrefix(const char *RegName) {
+ switch (RegName[0]) {
+ case 'r':
+ case 'f':
+ case 'v': return RegName + 1;
+ case 'c': if (RegName[1] == 'r') return RegName + 2;
+ }
+
+ return RegName;
+ }
+
+ /// printRegister - Print register according to target requirements.
+ ///
+ void printRegister(const MachineOperand &MO, bool R0AsZero) {
+ unsigned RegNo = MO.getReg();
+ assert(MRegisterInfo::isPhysicalRegister(RegNo) && "Not physreg??");
+
+ // If we should use 0 for R0.
+ if (R0AsZero && RegNo == PPC::R0) {
+ O << "0";
+ return;
+ }
+
+ const char *RegName = TM.getRegisterInfo()->get(RegNo).Name;
+ // Linux assembler (Others?) does not take register mnemonics.
+ // FIXME - What about special registers used in mfspr/mtspr?
+ if (!Subtarget.isDarwin()) RegName = stripRegisterPrefix(RegName);
+ O << RegName;
+ }
+
+ void printOperand(const MachineInstr *MI, unsigned OpNo) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ if (MO.isRegister()) {
+ printRegister(MO, false);
+ } else if (MO.isImmediate()) {
+ O << MO.getImmedValue();
+ } else {
+ printOp(MO);
+ }
+ }
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+
+
+ void printS5ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+ char value = MI->getOperand(OpNo).getImmedValue();
+ value = (value << (32-5)) >> (32-5);
+ O << (int)value;
+ }
+ void printU5ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+ unsigned char value = MI->getOperand(OpNo).getImmedValue();
+ assert(value <= 31 && "Invalid u5imm argument!");
+ O << (unsigned int)value;
+ }
+ void printU6ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+ unsigned char value = MI->getOperand(OpNo).getImmedValue();
+ assert(value <= 63 && "Invalid u6imm argument!");
+ O << (unsigned int)value;
+ }
+ void printS16ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+ O << (short)MI->getOperand(OpNo).getImmedValue();
+ }
+ void printU16ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+ O << (unsigned short)MI->getOperand(OpNo).getImmedValue();
+ }
+ void printS16X4ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+ if (MI->getOperand(OpNo).isImmediate()) {
+ O << (short)(MI->getOperand(OpNo).getImmedValue()*4);
+ } else {
+ O << "lo16(";
+ printOp(MI->getOperand(OpNo));
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ O << "-\"L" << getFunctionNumber() << "$pb\")";
+ else
+ O << ')';
+ }
+ }
+ void printBranchOperand(const MachineInstr *MI, unsigned OpNo) {
+ // Branches can take an immediate operand. This is used by the branch
+ // selection pass to print $+8, an eight byte displacement from the PC.
+ if (MI->getOperand(OpNo).isImmediate()) {
+ O << "$+" << MI->getOperand(OpNo).getImmedValue()*4;
+ } else {
+ printOp(MI->getOperand(OpNo));
+ }
+ }
+ void printCallOperand(const MachineInstr *MI, unsigned OpNo) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ if (TM.getRelocationModel() != Reloc::Static) {
+ if (MO.getType() == MachineOperand::MO_GlobalAddress) {
+ GlobalValue *GV = MO.getGlobal();
+ if (((GV->isDeclaration() || GV->hasWeakLinkage() ||
+ GV->hasLinkOnceLinkage()))) {
+ // Dynamically-resolved functions need a stub for the function.
+ std::string Name = Mang->getValueName(GV);
+ FnStubs.insert(Name);
+ O << "L" << Name << "$stub";
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+ return;
+ }
+ }
+ if (MO.getType() == MachineOperand::MO_ExternalSymbol) {
+ std::string Name(TAI->getGlobalPrefix()); Name += MO.getSymbolName();
+ FnStubs.insert(Name);
+ O << "L" << Name << "$stub";
+ return;
+ }
+ }
+
+ printOp(MI->getOperand(OpNo));
+ }
+ void printAbsAddrOperand(const MachineInstr *MI, unsigned OpNo) {
+ O << (int)MI->getOperand(OpNo).getImmedValue()*4;
+ }
+ void printPICLabel(const MachineInstr *MI, unsigned OpNo) {
+ O << "\"L" << getFunctionNumber() << "$pb\"\n";
+ O << "\"L" << getFunctionNumber() << "$pb\":";
+ }
+ void printSymbolHi(const MachineInstr *MI, unsigned OpNo) {
+ if (MI->getOperand(OpNo).isImmediate()) {
+ printS16ImmOperand(MI, OpNo);
+ } else {
+ if (Subtarget.isDarwin()) O << "ha16(";
+ printOp(MI->getOperand(OpNo));
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ O << "-\"L" << getFunctionNumber() << "$pb\"";
+ if (Subtarget.isDarwin())
+ O << ')';
+ else
+ O << "@ha";
+ }
+ }
+ void printSymbolLo(const MachineInstr *MI, unsigned OpNo) {
+ if (MI->getOperand(OpNo).isImmediate()) {
+ printS16ImmOperand(MI, OpNo);
+ } else {
+ if (Subtarget.isDarwin()) O << "lo16(";
+ printOp(MI->getOperand(OpNo));
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ O << "-\"L" << getFunctionNumber() << "$pb\"";
+ if (Subtarget.isDarwin())
+ O << ')';
+ else
+ O << "@l";
+ }
+ }
+ void printcrbitm(const MachineInstr *MI, unsigned OpNo) {
+ unsigned CCReg = MI->getOperand(OpNo).getReg();
+ unsigned RegNo = enumRegToMachineReg(CCReg);
+ O << (0x80 >> RegNo);
+ }
+ // The new addressing mode printers.
+ void printMemRegImm(const MachineInstr *MI, unsigned OpNo) {
+ printSymbolLo(MI, OpNo);
+ O << '(';
+ if (MI->getOperand(OpNo+1).isRegister() &&
+ MI->getOperand(OpNo+1).getReg() == PPC::R0)
+ O << "0";
+ else
+ printOperand(MI, OpNo+1);
+ O << ')';
+ }
+ void printMemRegImmShifted(const MachineInstr *MI, unsigned OpNo) {
+ if (MI->getOperand(OpNo).isImmediate())
+ printS16X4ImmOperand(MI, OpNo);
+ else
+ printSymbolLo(MI, OpNo);
+ O << '(';
+ if (MI->getOperand(OpNo+1).isRegister() &&
+ MI->getOperand(OpNo+1).getReg() == PPC::R0)
+ O << "0";
+ else
+ printOperand(MI, OpNo+1);
+ O << ')';
+ }
+
+ void printMemRegReg(const MachineInstr *MI, unsigned OpNo) {
+ // When used as the base register, r0 reads constant zero rather than
+ // the value contained in the register. For this reason, the darwin
+ // assembler requires that we print r0 as 0 (no r) when used as the base.
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ printRegister(MO, true);
+ O << ", ";
+ printOperand(MI, OpNo+1);
+ }
+
+ void printPredicateOperand(const MachineInstr *MI, unsigned OpNo,
+ const char *Modifier);
+
+ virtual bool runOnMachineFunction(MachineFunction &F) = 0;
+ virtual bool doFinalization(Module &M) = 0;
+
+ virtual void EmitExternalGlobal(const GlobalVariable *GV);
+ };
+
+ /// LinuxAsmPrinter - PowerPC assembly printer, customized for Linux
+ struct VISIBILITY_HIDDEN LinuxAsmPrinter : public PPCAsmPrinter {
+
+ DwarfWriter DW;
+
+ LinuxAsmPrinter(std::ostream &O, PPCTargetMachine &TM,
+ const TargetAsmInfo *T)
+ : PPCAsmPrinter(O, TM, T), DW(O, this, T) {
+ }
+
+ virtual const char *getPassName() const {
+ return "Linux PPC Assembly Printer";
+ }
+
+ bool runOnMachineFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineModuleInfo>();
+ PPCAsmPrinter::getAnalysisUsage(AU);
+ }
+
+ /// getSectionForFunction - Return the section that we should emit the
+ /// specified function body into.
+ virtual std::string getSectionForFunction(const Function &F) const;
+ };
+
+ /// DarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac OS
+ /// X
+ struct VISIBILITY_HIDDEN DarwinAsmPrinter : public PPCAsmPrinter {
+
+ DwarfWriter DW;
+
+ DarwinAsmPrinter(std::ostream &O, PPCTargetMachine &TM,
+ const TargetAsmInfo *T)
+ : PPCAsmPrinter(O, TM, T), DW(O, this, T) {
+ }
+
+ virtual const char *getPassName() const {
+ return "Darwin PPC Assembly Printer";
+ }
+
+ bool runOnMachineFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineModuleInfo>();
+ PPCAsmPrinter::getAnalysisUsage(AU);
+ }
+
+ /// getSectionForFunction - Return the section that we should emit the
+ /// specified function body into.
+ virtual std::string getSectionForFunction(const Function &F) const;
+ };
+} // end of anonymous namespace
+
+// Include the auto-generated portion of the assembly writer
+#include "PPCGenAsmWriter.inc"
+
+void PPCAsmPrinter::printOp(const MachineOperand &MO) {
+ switch (MO.getType()) {
+ case MachineOperand::MO_Immediate:
+ cerr << "printOp() does not handle immediate values\n";
+ abort();
+ return;
+
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMachineBasicBlock());
+ return;
+ case MachineOperand::MO_JumpTableIndex:
+ O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getJumpTableIndex();
+ // FIXME: PIC relocation model
+ return;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+ << '_' << MO.getConstantPoolIndex();
+ return;
+ case MachineOperand::MO_ExternalSymbol:
+ // Computing the address of an external symbol, not calling it.
+ if (TM.getRelocationModel() != Reloc::Static) {
+ std::string Name(TAI->getGlobalPrefix()); Name += MO.getSymbolName();
+ GVStubs.insert(Name);
+ O << "L" << Name << "$non_lazy_ptr";
+ return;
+ }
+ O << TAI->getGlobalPrefix() << MO.getSymbolName();
+ return;
+ case MachineOperand::MO_GlobalAddress: {
+ // Computing the address of a global symbol, not calling it.
+ GlobalValue *GV = MO.getGlobal();
+ std::string Name = Mang->getValueName(GV);
+
+ // External or weakly linked global variables need non-lazily-resolved stubs
+ if (TM.getRelocationModel() != Reloc::Static) {
+ if (((GV->isDeclaration() || GV->hasWeakLinkage() ||
+ GV->hasLinkOnceLinkage()))) {
+ GVStubs.insert(Name);
+ O << "L" << Name << "$non_lazy_ptr";
+ return;
+ }
+ }
+ O << Name;
+
+ if (MO.getOffset() > 0)
+ O << "+" << MO.getOffset();
+ else if (MO.getOffset() < 0)
+ O << MO.getOffset();
+
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+ return;
+ }
+
+ default:
+ O << "<unknown operand type: " << MO.getType() << ">";
+ return;
+ }
+}
+
+/// EmitExternalGlobal - In this case we need to use the indirect symbol.
+///
+void PPCAsmPrinter::EmitExternalGlobal(const GlobalVariable *GV) {
+ std::string Name = getGlobalLinkName(GV);
+ if (TM.getRelocationModel() != Reloc::Static) {
+ GVStubs.insert(Name);
+ O << "L" << Name << "$non_lazy_ptr";
+ return;
+ }
+ O << Name;
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'c': // Don't print "$" before a global var name or constant.
+ // PPC never has a prefix.
+ printOperand(MI, OpNo);
+ return false;
+ case 'L': // Write second word of DImode reference.
+ // Verify that this operand has two consecutive registers.
+ if (!MI->getOperand(OpNo).isRegister() ||
+ OpNo+1 == MI->getNumOperands() ||
+ !MI->getOperand(OpNo+1).isRegister())
+ return true;
+ ++OpNo; // Return the high-part.
+ break;
+ case 'I':
+ // Write 'i' if an integer constant, otherwise nothing. Used to print
+ // addi vs add, etc.
+ if (MI->getOperand(OpNo).isImm())
+ O << "i";
+ return false;
+ }
+ }
+
+ printOperand(MI, OpNo);
+ return false;
+}
+
+bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+ if (MI->getOperand(OpNo).isRegister())
+ printMemRegReg(MI, OpNo);
+ else
+ printMemRegImm(MI, OpNo);
+ return false;
+}
+
+void PPCAsmPrinter::printPredicateOperand(const MachineInstr *MI, unsigned OpNo,
+ const char *Modifier) {
+ assert(Modifier && "Must specify 'cc' or 'reg' as predicate op modifier!");
+ unsigned Code = MI->getOperand(OpNo).getImm();
+ if (!strcmp(Modifier, "cc")) {
+ switch ((PPC::Predicate)Code) {
+ case PPC::PRED_ALWAYS: return; // Don't print anything for always.
+ case PPC::PRED_LT: O << "lt"; return;
+ case PPC::PRED_LE: O << "le"; return;
+ case PPC::PRED_EQ: O << "eq"; return;
+ case PPC::PRED_GE: O << "ge"; return;
+ case PPC::PRED_GT: O << "gt"; return;
+ case PPC::PRED_NE: O << "ne"; return;
+ case PPC::PRED_UN: O << "un"; return;
+ case PPC::PRED_NU: O << "nu"; return;
+ }
+
+ } else {
+ assert(!strcmp(Modifier, "reg") &&
+ "Need to specify 'cc' or 'reg' as predicate op modifier!");
+ // Don't print the register for 'always'.
+ if (Code == PPC::PRED_ALWAYS) return;
+ printOperand(MI, OpNo+1);
+ }
+}
+
+
+/// printMachineInstruction -- Print out a single PowerPC MI in Darwin syntax to
+/// the current output stream.
+///
+void PPCAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
+ ++EmittedInsts;
+
+ // Check for slwi/srwi mnemonics.
+ if (MI->getOpcode() == PPC::RLWINM) {
+ bool FoundMnemonic = false;
+ unsigned char SH = MI->getOperand(2).getImmedValue();
+ unsigned char MB = MI->getOperand(3).getImmedValue();
+ unsigned char ME = MI->getOperand(4).getImmedValue();
+ if (SH <= 31 && MB == 0 && ME == (31-SH)) {
+ O << "slwi "; FoundMnemonic = true;
+ }
+ if (SH <= 31 && MB == (32-SH) && ME == 31) {
+ O << "srwi "; FoundMnemonic = true;
+ SH = 32-SH;
+ }
+ if (FoundMnemonic) {
+ printOperand(MI, 0);
+ O << ", ";
+ printOperand(MI, 1);
+ O << ", " << (unsigned int)SH << "\n";
+ return;
+ }
+ } else if (MI->getOpcode() == PPC::OR || MI->getOpcode() == PPC::OR8) {
+ if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
+ O << "mr ";
+ printOperand(MI, 0);
+ O << ", ";
+ printOperand(MI, 1);
+ O << "\n";
+ return;
+ }
+ } else if (MI->getOpcode() == PPC::RLDICR) {
+ unsigned char SH = MI->getOperand(2).getImmedValue();
+ unsigned char ME = MI->getOperand(3).getImmedValue();
+ // rldicr RA, RS, SH, 63-SH == sldi RA, RS, SH
+ if (63-SH == ME) {
+ O << "sldi ";
+ printOperand(MI, 0);
+ O << ", ";
+ printOperand(MI, 1);
+ O << ", " << (unsigned int)SH << "\n";
+ return;
+ }
+ }
+
+ if (printInstruction(MI))
+ return; // Printer was automatically generated
+
+ assert(0 && "Unhandled instruction in asm writer!");
+ abort();
+ return;
+}
+
+/// runOnMachineFunction - This uses the printMachineInstruction()
+/// method to print assembly for each instruction.
+///
+bool LinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ DW.SetModuleInfo(&getAnalysis<MachineModuleInfo>());
+
+ SetupMachineFunction(MF);
+ O << "\n\n";
+
+ // Print out constants referenced by the function
+ EmitConstantPool(MF.getConstantPool());
+
+ // Print out labels for the function.
+ const Function *F = MF.getFunction();
+ SwitchToTextSection(getSectionForFunction(*F).c_str(), F);
+
+ switch (F->getLinkage()) {
+ default: assert(0 && "Unknown linkage type!");
+ case Function::InternalLinkage: // Symbols default to internal.
+ break;
+ case Function::ExternalLinkage:
+ O << "\t.global\t" << CurrentFnName << '\n'
+ << "\t.type\t" << CurrentFnName << ", @function\n";
+ break;
+ case Function::WeakLinkage:
+ case Function::LinkOnceLinkage:
+ O << "\t.global\t" << CurrentFnName << '\n';
+ O << "\t.weak\t" << CurrentFnName << '\n';
+ break;
+ }
+
+ if (F->hasHiddenVisibility())
+ if (const char *Directive = TAI->getHiddenDirective())
+ O << Directive << CurrentFnName << "\n";
+
+ EmitAlignment(2, F);
+ O << CurrentFnName << ":\n";
+
+ // Emit pre-function debug information.
+ DW.BeginFunction(&MF);
+
+ // Print out code for the function.
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ // Print a label for the basic block.
+ if (I != MF.begin()) {
+ printBasicBlockLabel(I, true);
+ O << '\n';
+ }
+ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+ II != E; ++II) {
+ // Print the assembly for the instruction.
+ O << "\t";
+ printMachineInstruction(II);
+ }
+ }
+
+ O << "\t.size\t" << CurrentFnName << ",.-" << CurrentFnName << "\n";
+
+ // Print out jump tables referenced by the function.
+ EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+
+ // Emit post-function debug information.
+ DW.EndFunction();
+
+ // We didn't modify anything.
+ return false;
+}
+
+bool LinuxAsmPrinter::doInitialization(Module &M) {
+ AsmPrinter::doInitialization(M);
+
+ // GNU as handles section names wrapped in quotes
+ Mang->setUseQuotes(true);
+
+ SwitchToTextSection(TAI->getTextSection());
+
+ // Emit initial debug information.
+ DW.BeginModule(&M);
+ return false;
+}
+
+bool LinuxAsmPrinter::doFinalization(Module &M) {
+ const TargetData *TD = TM.getTargetData();
+
+ // Print out module-level global variables here.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ if (!I->hasInitializer()) continue; // External global require no code
+
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(I))
+ continue;
+
+ std::string name = Mang->getValueName(I);
+
+ if (I->hasHiddenVisibility())
+ if (const char *Directive = TAI->getHiddenDirective())
+ O << Directive << name << "\n";
+
+ Constant *C = I->getInitializer();
+ unsigned Size = TD->getTypeSize(C->getType());
+ unsigned Align = TD->getPreferredAlignmentLog(I);
+
+ if (C->isNullValue() && /* FIXME: Verify correct */
+ (I->hasInternalLinkage() || I->hasWeakLinkage() ||
+ I->hasLinkOnceLinkage() ||
+ (I->hasExternalLinkage() && !I->hasSection()))) {
+ if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
+ if (I->hasExternalLinkage()) {
+ O << "\t.global " << name << '\n';
+ O << "\t.type " << name << ", @object\n";
+ //O << "\t.zerofill __DATA, __common, " << name << ", "
+ // << Size << ", " << Align;
+ } else if (I->hasInternalLinkage()) {
+ SwitchToDataSection("\t.data", I);
+ O << TAI->getLCOMMDirective() << name << "," << Size;
+ } else {
+ SwitchToDataSection("\t.data", I);
+ O << ".comm " << name << "," << Size;
+ }
+ O << "\t\t" << TAI->getCommentString() << " '" << I->getName() << "'\n";
+ } else {
+ switch (I->getLinkage()) {
+ case GlobalValue::LinkOnceLinkage:
+ case GlobalValue::WeakLinkage:
+ O << "\t.global " << name << '\n'
+ << "\t.type " << name << ", @object\n"
+ << "\t.weak " << name << '\n';
+ SwitchToDataSection("\t.data", I);
+ break;
+ case GlobalValue::AppendingLinkage:
+ // FIXME: appending linkage variables should go into a section of
+ // their name or something. For now, just emit them as external.
+ case GlobalValue::ExternalLinkage:
+ // If external or appending, declare as a global symbol
+ O << "\t.global " << name << "\n"
+ << "\t.type " << name << ", @object\n";
+ // FALL THROUGH
+ case GlobalValue::InternalLinkage:
+ if (I->isConstant()) {
+ const ConstantArray *CVA = dyn_cast<ConstantArray>(C);
+ if (TAI->getCStringSection() && CVA && CVA->isCString()) {
+ SwitchToDataSection(TAI->getCStringSection(), I);
+ break;
+ }
+ }
+
+ // FIXME: special handling for ".ctors" & ".dtors" sections
+ if (I->hasSection() &&
+ (I->getSection() == ".ctors" ||
+ I->getSection() == ".dtors")) {
+ std::string SectionName = ".section " + I->getSection()
+ + ",\"aw\",@progbits";
+ SwitchToDataSection(SectionName.c_str());
+ } else {
+ if (I->isConstant() && TAI->getReadOnlySection())
+ SwitchToDataSection(TAI->getReadOnlySection(), I);
+ else
+ SwitchToDataSection(TAI->getDataSection(), I);
+ }
+ break;
+ default:
+ cerr << "Unknown linkage type!";
+ abort();
+ }
+
+ EmitAlignment(Align, I);
+ O << name << ":\t\t\t\t" << TAI->getCommentString() << " '"
+ << I->getName() << "'\n";
+
+ // If the initializer is a extern weak symbol, remember to emit the weak
+ // reference!
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+
+ EmitGlobalConstant(C);
+ O << '\n';
+ }
+ }
+
+ // TODO
+
+ // Emit initial debug information.
+ DW.EndModule();
+
+ AsmPrinter::doFinalization(M);
+ return false; // success
+}
+
+std::string LinuxAsmPrinter::getSectionForFunction(const Function &F) const {
+ switch (F.getLinkage()) {
+ default: assert(0 && "Unknown linkage type!");
+ case Function::ExternalLinkage:
+ case Function::InternalLinkage: return TAI->getTextSection();
+ case Function::WeakLinkage:
+ case Function::LinkOnceLinkage:
+ return ".text";
+ }
+}
+
+std::string DarwinAsmPrinter::getSectionForFunction(const Function &F) const {
+ switch (F.getLinkage()) {
+ default: assert(0 && "Unknown linkage type!");
+ case Function::ExternalLinkage:
+ case Function::InternalLinkage: return TAI->getTextSection();
+ case Function::WeakLinkage:
+ case Function::LinkOnceLinkage:
+ return ".section __TEXT,__textcoal_nt,coalesced,pure_instructions";
+ }
+}
+
+/// runOnMachineFunction - This uses the printMachineInstruction()
+/// method to print assembly for each instruction.
+///
+bool DarwinAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ DW.SetModuleInfo(&getAnalysis<MachineModuleInfo>());
+
+ SetupMachineFunction(MF);
+ O << "\n\n";
+
+ // Print out constants referenced by the function
+ EmitConstantPool(MF.getConstantPool());
+
+ // Print out labels for the function.
+ const Function *F = MF.getFunction();
+ SwitchToTextSection(getSectionForFunction(*F).c_str(), F);
+
+ switch (F->getLinkage()) {
+ default: assert(0 && "Unknown linkage type!");
+ case Function::InternalLinkage: // Symbols default to internal.
+ break;
+ case Function::ExternalLinkage:
+ O << "\t.globl\t" << CurrentFnName << "\n";
+ break;
+ case Function::WeakLinkage:
+ case Function::LinkOnceLinkage:
+ O << "\t.globl\t" << CurrentFnName << "\n";
+ O << "\t.weak_definition\t" << CurrentFnName << "\n";
+ break;
+ }
+
+ if (F->hasHiddenVisibility())
+ if (const char *Directive = TAI->getHiddenDirective())
+ O << Directive << CurrentFnName << "\n";
+
+ EmitAlignment(4, F);
+ O << CurrentFnName << ":\n";
+
+ // Emit pre-function debug information.
+ DW.BeginFunction(&MF);
+
+ // Print out code for the function.
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ // Print a label for the basic block.
+ if (I != MF.begin()) {
+ printBasicBlockLabel(I, true);
+ O << '\n';
+ }
+ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+ II != E; ++II) {
+ // Print the assembly for the instruction.
+ O << "\t";
+ printMachineInstruction(II);
+ }
+ }
+
+ // Print out jump tables referenced by the function.
+ EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+
+ // Emit post-function debug information.
+ DW.EndFunction();
+
+ // We didn't modify anything.
+ return false;
+}
+
+
+bool DarwinAsmPrinter::doInitialization(Module &M) {
+ static const char *CPUDirectives[] = {
+ "ppc",
+ "ppc601",
+ "ppc602",
+ "ppc603",
+ "ppc7400",
+ "ppc750",
+ "ppc970",
+ "ppc64"
+ };
+
+ unsigned Directive = Subtarget.getDarwinDirective();
+ if (Subtarget.isGigaProcessor() && Directive < PPC::DIR_970)
+ Directive = PPC::DIR_970;
+ if (Subtarget.hasAltivec() && Directive < PPC::DIR_7400)
+ Directive = PPC::DIR_7400;
+ if (Subtarget.isPPC64() && Directive < PPC::DIR_970)
+ Directive = PPC::DIR_64;
+ assert(Directive <= PPC::DIR_64 && "Directive out of range.");
+ O << "\t.machine " << CPUDirectives[Directive] << "\n";
+
+ AsmPrinter::doInitialization(M);
+
+ // Darwin wants symbols to be quoted if they have complex names.
+ Mang->setUseQuotes(true);
+
+ // Prime text sections so they are adjacent. This reduces the likelihood a
+ // large data or debug section causes a branch to exceed 16M limit.
+ SwitchToTextSection(".section __TEXT,__textcoal_nt,coalesced,"
+ "pure_instructions");
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ SwitchToTextSection(".section __TEXT,__picsymbolstub1,symbol_stubs,"
+ "pure_instructions,32");
+ } else if (TM.getRelocationModel() == Reloc::DynamicNoPIC) {
+ SwitchToTextSection(".section __TEXT,__symbol_stub1,symbol_stubs,"
+ "pure_instructions,16");
+ }
+ SwitchToTextSection(TAI->getTextSection());
+
+ // Emit initial debug information.
+ DW.BeginModule(&M);
+ return false;
+}
+
+bool DarwinAsmPrinter::doFinalization(Module &M) {
+ const TargetData *TD = TM.getTargetData();
+
+ // Print out module-level global variables here.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ if (!I->hasInitializer()) continue; // External global require no code
+
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(I)) {
+ if (TM.getRelocationModel() == Reloc::Static) {
+ if (I->getName() == "llvm.global_ctors")
+ O << ".reference .constructors_used\n";
+ else if (I->getName() == "llvm.global_dtors")
+ O << ".reference .destructors_used\n";
+ }
+ continue;
+ }
+
+ std::string name = Mang->getValueName(I);
+
+ if (I->hasHiddenVisibility())
+ if (const char *Directive = TAI->getHiddenDirective())
+ O << Directive << name << "\n";
+
+ Constant *C = I->getInitializer();
+ const Type *Type = C->getType();
+ unsigned Size = TD->getTypeSize(Type);
+ unsigned Align = TD->getPreferredAlignmentLog(I);
+
+ if (C->isNullValue() && /* FIXME: Verify correct */
+ (I->hasInternalLinkage() || I->hasWeakLinkage() ||
+ I->hasLinkOnceLinkage() ||
+ (I->hasExternalLinkage() && !I->hasSection()))) {
+ if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
+ if (I->hasExternalLinkage()) {
+ O << "\t.globl " << name << '\n';
+ O << "\t.zerofill __DATA, __common, " << name << ", "
+ << Size << ", " << Align;
+ } else if (I->hasInternalLinkage()) {
+ SwitchToDataSection("\t.data", I);
+ O << TAI->getLCOMMDirective() << name << "," << Size << "," << Align;
+ } else {
+ SwitchToDataSection("\t.data", I);
+ O << ".comm " << name << "," << Size;
+ }
+ O << "\t\t" << TAI->getCommentString() << " '" << I->getName() << "'\n";
+ } else {
+ switch (I->getLinkage()) {
+ case GlobalValue::LinkOnceLinkage:
+ case GlobalValue::WeakLinkage:
+ O << "\t.globl " << name << '\n'
+ << "\t.weak_definition " << name << '\n';
+ SwitchToDataSection(".section __DATA,__datacoal_nt,coalesced", I);
+ break;
+ case GlobalValue::AppendingLinkage:
+ // FIXME: appending linkage variables should go into a section of
+ // their name or something. For now, just emit them as external.
+ case GlobalValue::ExternalLinkage:
+ // If external or appending, declare as a global symbol
+ O << "\t.globl " << name << "\n";
+ // FALL THROUGH
+ case GlobalValue::InternalLinkage:
+ if (I->isConstant()) {
+ const ConstantArray *CVA = dyn_cast<ConstantArray>(C);
+ if (TAI->getCStringSection() && CVA && CVA->isCString()) {
+ SwitchToDataSection(TAI->getCStringSection(), I);
+ break;
+ }
+ }
+
+ if (!I->isConstant())
+ SwitchToDataSection(TAI->getDataSection(), I);
+ else {
+ // Read-only data.
+ bool HasReloc = C->ContainsRelocations();
+ if (HasReloc &&
+ TM.getRelocationModel() != Reloc::Static)
+ SwitchToDataSection("\t.const_data\n");
+ else if (!HasReloc && Size == 4 &&
+ TAI->getFourByteConstantSection())
+ SwitchToDataSection(TAI->getFourByteConstantSection(), I);
+ else if (!HasReloc && Size == 8 &&
+ TAI->getEightByteConstantSection())
+ SwitchToDataSection(TAI->getEightByteConstantSection(), I);
+ else if (!HasReloc && Size == 16 &&
+ TAI->getSixteenByteConstantSection())
+ SwitchToDataSection(TAI->getSixteenByteConstantSection(), I);
+ else if (TAI->getReadOnlySection())
+ SwitchToDataSection(TAI->getReadOnlySection(), I);
+ else
+ SwitchToDataSection(TAI->getDataSection(), I);
+ }
+ break;
+ default:
+ cerr << "Unknown linkage type!";
+ abort();
+ }
+
+ EmitAlignment(Align, I);
+ O << name << ":\t\t\t\t" << TAI->getCommentString() << " '"
+ << I->getName() << "'\n";
+
+ // If the initializer is a extern weak symbol, remember to emit the weak
+ // reference!
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+
+ EmitGlobalConstant(C);
+ O << '\n';
+ }
+ }
+
+ bool isPPC64 = TD->getPointerSizeInBits() == 64;
+
+ // Output stubs for dynamically-linked functions
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ for (std::set<std::string>::iterator i = FnStubs.begin(), e = FnStubs.end();
+ i != e; ++i) {
+ SwitchToTextSection(".section __TEXT,__picsymbolstub1,symbol_stubs,"
+ "pure_instructions,32");
+ EmitAlignment(4);
+ O << "L" << *i << "$stub:\n";
+ O << "\t.indirect_symbol " << *i << "\n";
+ O << "\tmflr r0\n";
+ O << "\tbcl 20,31,L0$" << *i << "\n";
+ O << "L0$" << *i << ":\n";
+ O << "\tmflr r11\n";
+ O << "\taddis r11,r11,ha16(L" << *i << "$lazy_ptr-L0$" << *i << ")\n";
+ O << "\tmtlr r0\n";
+ if (isPPC64)
+ O << "\tldu r12,lo16(L" << *i << "$lazy_ptr-L0$" << *i << ")(r11)\n";
+ else
+ O << "\tlwzu r12,lo16(L" << *i << "$lazy_ptr-L0$" << *i << ")(r11)\n";
+ O << "\tmtctr r12\n";
+ O << "\tbctr\n";
+ SwitchToDataSection(".lazy_symbol_pointer");
+ O << "L" << *i << "$lazy_ptr:\n";
+ O << "\t.indirect_symbol " << *i << "\n";
+ if (isPPC64)
+ O << "\t.quad dyld_stub_binding_helper\n";
+ else
+ O << "\t.long dyld_stub_binding_helper\n";
+ }
+ } else {
+ for (std::set<std::string>::iterator i = FnStubs.begin(), e = FnStubs.end();
+ i != e; ++i) {
+ SwitchToTextSection(".section __TEXT,__symbol_stub1,symbol_stubs,"
+ "pure_instructions,16");
+ EmitAlignment(4);
+ O << "L" << *i << "$stub:\n";
+ O << "\t.indirect_symbol " << *i << "\n";
+ O << "\tlis r11,ha16(L" << *i << "$lazy_ptr)\n";
+ if (isPPC64)
+ O << "\tldu r12,lo16(L" << *i << "$lazy_ptr)(r11)\n";
+ else
+ O << "\tlwzu r12,lo16(L" << *i << "$lazy_ptr)(r11)\n";
+ O << "\tmtctr r12\n";
+ O << "\tbctr\n";
+ SwitchToDataSection(".lazy_symbol_pointer");
+ O << "L" << *i << "$lazy_ptr:\n";
+ O << "\t.indirect_symbol " << *i << "\n";
+ if (isPPC64)
+ O << "\t.quad dyld_stub_binding_helper\n";
+ else
+ O << "\t.long dyld_stub_binding_helper\n";
+ }
+ }
+
+ O << "\n";
+
+ // Output stubs for external and common global variables.
+ if (GVStubs.begin() != GVStubs.end()) {
+ SwitchToDataSection(".non_lazy_symbol_pointer");
+ for (std::set<std::string>::iterator I = GVStubs.begin(),
+ E = GVStubs.end(); I != E; ++I) {
+ O << "L" << *I << "$non_lazy_ptr:\n";
+ O << "\t.indirect_symbol " << *I << "\n";
+ if (isPPC64)
+ O << "\t.quad\t0\n";
+ else
+ O << "\t.long\t0\n";
+
+ }
+ }
+
+ // Emit initial debug information.
+ DW.EndModule();
+
+ // Funny Darwin hack: This flag tells the linker that no global symbols
+ // contain code that falls through to other global symbols (e.g. the obvious
+ // implementation of multiple entry points). If this doesn't occur, the
+ // linker can safely perform dead code stripping. Since LLVM never generates
+ // code that does this, it is always safe to set.
+ O << "\t.subsections_via_symbols\n";
+
+ AsmPrinter::doFinalization(M);
+ return false; // success
+}
+
+
+
+/// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code
+/// for a MachineFunction to the given output stream, in a format that the
+/// Darwin assembler can deal with.
+///
+FunctionPass *llvm::createPPCAsmPrinterPass(std::ostream &o,
+ PPCTargetMachine &tm) {
+ const PPCSubtarget *Subtarget = &tm.getSubtarget<PPCSubtarget>();
+
+ if (Subtarget->isDarwin()) {
+ return new DarwinAsmPrinter(o, tm, tm.getTargetAsmInfo());
+ } else {
+ return new LinuxAsmPrinter(o, tm, tm.getTargetAsmInfo());
+ }
+}
+
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
new file mode 100644
index 0000000..4286f01
--- /dev/null
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -0,0 +1,199 @@
+//===-- PPCBranchSelector.cpp - Emit long conditional branches-----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Nate Baegeman and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that scans a machine function to determine which
+// conditional branches need more than 16 bits of displacement to reach their
+// target basic block. It does this in two passes; a calculation of basic block
+// positions pass, and a branch psuedo op to machine branch opcode pass. This
+// pass should be run last, just before the assembly printer.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ppc-branch-select"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCInstrInfo.h"
+#include "PPCPredicates.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+STATISTIC(NumExpanded, "Number of branches expanded to long format");
+
+namespace {
+ struct VISIBILITY_HIDDEN PPCBSel : public MachineFunctionPass {
+ static char ID;
+ PPCBSel() : MachineFunctionPass((intptr_t)&ID) {}
+
+ /// BlockSizes - The sizes of the basic blocks in the function.
+ std::vector<unsigned> BlockSizes;
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "PowerPC Branch Selector";
+ }
+ };
+ char PPCBSel::ID = 0;
+}
+
+/// createPPCBranchSelectionPass - returns an instance of the Branch Selection
+/// Pass
+///
+FunctionPass *llvm::createPPCBranchSelectionPass() {
+ return new PPCBSel();
+}
+
+/// getNumBytesForInstruction - Return the number of bytes of code the specified
+/// instruction may be. This returns the maximum number of bytes.
+///
+static unsigned getNumBytesForInstruction(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ case PPC::IMPLICIT_DEF_GPRC: // no asm emitted
+ case PPC::IMPLICIT_DEF_G8RC: // no asm emitted
+ case PPC::IMPLICIT_DEF_F4: // no asm emitted
+ case PPC::IMPLICIT_DEF_F8: // no asm emitted
+ case PPC::IMPLICIT_DEF_VRRC: // no asm emitted
+ return 0;
+ case PPC::INLINEASM: { // Inline Asm: Variable size.
+ MachineFunction *MF = MI->getParent()->getParent();
+ const char *AsmStr = MI->getOperand(0).getSymbolName();
+ return MF->getTarget().getTargetAsmInfo()->getInlineAsmLength(AsmStr);
+ }
+ case PPC::LABEL: {
+ return 0;
+ }
+ default:
+ return 4; // PowerPC instructions are all 4 bytes
+ }
+}
+
+
+bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
+ const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
+ // Give the blocks of the function a dense, in-order, numbering.
+ Fn.RenumberBlocks();
+ BlockSizes.resize(Fn.getNumBlockIDs());
+
+ // Measure each MBB and compute a size for the entire function.
+ unsigned FuncSize = 0;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock *MBB = MFI;
+
+ unsigned BlockSize = 0;
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
+ MBBI != EE; ++MBBI)
+ BlockSize += getNumBytesForInstruction(MBBI);
+
+ BlockSizes[MBB->getNumber()] = BlockSize;
+ FuncSize += BlockSize;
+ }
+
+ // If the entire function is smaller than the displacement of a branch field,
+ // we know we don't need to shrink any branches in this function. This is a
+ // common case.
+ if (FuncSize < (1 << 15)) {
+ BlockSizes.clear();
+ return false;
+ }
+
+ // For each conditional branch, if the offset to its destination is larger
+ // than the offset field allows, transform it into a long branch sequence
+ // like this:
+ // short branch:
+ // bCC MBB
+ // long branch:
+ // b!CC $PC+8
+ // b MBB
+ //
+ bool MadeChange = true;
+ bool EverMadeChange = false;
+ while (MadeChange) {
+ // Iteratively expand branches until we reach a fixed point.
+ MadeChange = false;
+
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock &MBB = *MFI;
+ unsigned MBBStartOffset = 0;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ if (I->getOpcode() != PPC::BCC || I->getOperand(2).isImm()) {
+ MBBStartOffset += getNumBytesForInstruction(I);
+ continue;
+ }
+
+ // Determine the offset from the current branch to the destination
+ // block.
+ MachineBasicBlock *Dest = I->getOperand(2).getMachineBasicBlock();
+
+ int BranchSize;
+ if (Dest->getNumber() <= MBB.getNumber()) {
+ // If this is a backwards branch, the delta is the offset from the
+ // start of this block to this branch, plus the sizes of all blocks
+ // from this block to the dest.
+ BranchSize = MBBStartOffset;
+
+ for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i)
+ BranchSize += BlockSizes[i];
+ } else {
+ // Otherwise, add the size of the blocks between this block and the
+ // dest to the number of bytes left in this block.
+ BranchSize = -MBBStartOffset;
+
+ for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i)
+ BranchSize += BlockSizes[i];
+ }
+
+ // If this branch is in range, ignore it.
+ if (isInt16(BranchSize)) {
+ MBBStartOffset += 4;
+ continue;
+ }
+
+ // Otherwise, we have to expand it to a long branch.
+ // The BCC operands are:
+ // 0. PPC branch predicate
+ // 1. CR register
+ // 2. Target MBB
+ PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm();
+ unsigned CRReg = I->getOperand(1).getReg();
+
+ MachineInstr *OldBranch = I;
+
+ // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition.
+ BuildMI(MBB, I, TII->get(PPC::BCC))
+ .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2);
+
+ // Uncond branch to the real destination.
+ I = BuildMI(MBB, I, TII->get(PPC::B)).addMBB(Dest);
+
+ // Remove the old branch from the function.
+ OldBranch->eraseFromParent();
+
+ // Remember that this instruction is 8-bytes, increase the size of the
+ // block by 4, remember to iterate.
+ BlockSizes[MBB.getNumber()] += 4;
+ MBBStartOffset += 8;
+ ++NumExpanded;
+ MadeChange = true;
+ }
+ }
+ EverMadeChange |= MadeChange;
+ }
+
+ BlockSizes.clear();
+ return true;
+}
+
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
new file mode 100644
index 0000000..9e31b5a
--- /dev/null
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -0,0 +1,65 @@
+//===- PPCCallingConv.td - Calling Conventions for PowerPC ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the PowerPC 32- and 64-bit
+// architectures.
+//
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+
+// Return-value convention for PowerPC
+def RetCC_PPC : CallingConv<[
+ CCIfType<[i32], CCAssignToReg<[R3, R4]>>,
+ CCIfType<[i64], CCAssignToReg<[X3, X4]>>,
+
+ CCIfType<[f32, f64], CCAssignToReg<[F1]>>,
+
+ // Vector types are always returned in V2.
+ CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+/*
+def CC_PPC : CallingConv<[
+ // The first 8 integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
+ CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>,
+
+ // Common sub-targets passes FP values in F1 - F13
+ CCIfType<[f32, f64], CCIfSubtarget<"isMachoABI()",
+ CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8,F9,F10,F11,F12,F13]>>>,
+ // ELF32 sub-target pass FP values in F1 - F8.
+ CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+
+ // The first 12 Vector arguments are passed in altivec registers.
+ CCIfType<[v16i8, v8i16, v4i32, v4f32],
+ CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10,V11,V12,V13]>>
+
+/*
+ // Integer/FP values get stored in stack slots that are 8 bytes in size and
+ // 8-byte aligned if there are no more registers to hold them.
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
+
+ // Vectors get 16-byte stack slots that are 16-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToStack<16, 16>>*/
+]>;
+
+*/
+
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
new file mode 100644
index 0000000..5dceffd
--- /dev/null
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -0,0 +1,237 @@
+//===-- PPCCodeEmitter.cpp - JIT Code Emitter for PowerPC32 -------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PowerPC 32-bit CodeEmitter and associated machinery to
+// JIT-compile bitcode to native PowerPC.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCTargetMachine.h"
+#include "PPCRelocations.h"
+#include "PPC.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+namespace {
+ class VISIBILITY_HIDDEN PPCCodeEmitter : public MachineFunctionPass {
+ TargetMachine &TM;
+ MachineCodeEmitter &MCE;
+
+ /// MovePCtoLROffset - When/if we see a MovePCtoLR instruction, we record
+ /// its address in the function into this pointer.
+ void *MovePCtoLROffset;
+
+ /// getMachineOpValue - evaluates the MachineOperand of a given MachineInstr
+ ///
+ int getMachineOpValue(MachineInstr &MI, MachineOperand &MO);
+
+ public:
+ static char ID;
+ PPCCodeEmitter(TargetMachine &T, MachineCodeEmitter &M)
+ : MachineFunctionPass((intptr_t)&ID), TM(T), MCE(M) {}
+
+ const char *getPassName() const { return "PowerPC Machine Code Emitter"; }
+
+ /// runOnMachineFunction - emits the given MachineFunction to memory
+ ///
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ /// emitBasicBlock - emits the given MachineBasicBlock to memory
+ ///
+ void emitBasicBlock(MachineBasicBlock &MBB);
+
+ /// getValueBit - return the particular bit of Val
+ ///
+ unsigned getValueBit(int64_t Val, unsigned bit) { return (Val >> bit) & 1; }
+
+ /// getBinaryCodeForInstr - This function, generated by the
+ /// CodeEmitterGenerator using TableGen, produces the binary encoding for
+ /// machine instructions.
+ ///
+ unsigned getBinaryCodeForInstr(MachineInstr &MI);
+ };
+ char PPCCodeEmitter::ID = 0;
+}
+
+/// createPPCCodeEmitterPass - Return a pass that emits the collected PPC code
+/// to the specified MCE object.
+FunctionPass *llvm::createPPCCodeEmitterPass(PPCTargetMachine &TM,
+ MachineCodeEmitter &MCE) {
+ return new PPCCodeEmitter(TM, MCE);
+}
+
+#ifdef __APPLE__
+extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
+#endif
+
+bool PPCCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
+ assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
+ MF.getTarget().getRelocationModel() != Reloc::Static) &&
+ "JIT relocation model must be set to static or default!");
+ do {
+ MovePCtoLROffset = 0;
+ MCE.startFunction(MF);
+ for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
+ emitBasicBlock(*BB);
+ } while (MCE.finishFunction(MF));
+
+ return false;
+}
+
+void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) {
+ MCE.StartMachineBasicBlock(&MBB);
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I){
+ MachineInstr &MI = *I;
+ switch (MI.getOpcode()) {
+ default:
+ MCE.emitWordBE(getBinaryCodeForInstr(*I));
+ break;
+ case PPC::IMPLICIT_DEF_GPRC:
+ case PPC::IMPLICIT_DEF_G8RC:
+ case PPC::IMPLICIT_DEF_F8:
+ case PPC::IMPLICIT_DEF_F4:
+ case PPC::IMPLICIT_DEF_VRRC:
+ break; // pseudo opcode, no side effects
+ case PPC::MovePCtoLR:
+ case PPC::MovePCtoLR8:
+ assert(TM.getRelocationModel() == Reloc::PIC_);
+ MovePCtoLROffset = (void*)MCE.getCurrentPCValue();
+ MCE.emitWordBE(0x48000005); // bl 1
+ break;
+ }
+ }
+}
+
+int PPCCodeEmitter::getMachineOpValue(MachineInstr &MI, MachineOperand &MO) {
+
+ intptr_t rv = 0; // Return value; defaults to 0 for unhandled cases
+ // or things that get fixed up later by the JIT.
+ if (MO.isRegister()) {
+ rv = PPCRegisterInfo::getRegisterNumbering(MO.getReg());
+
+ // Special encoding for MTCRF and MFOCRF, which uses a bit mask for the
+ // register, not the register number directly.
+ if ((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) &&
+ (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)) {
+ rv = 0x80 >> rv;
+ }
+ } else if (MO.isImmediate()) {
+ rv = MO.getImmedValue();
+ } else if (MO.isGlobalAddress() || MO.isExternalSymbol() ||
+ MO.isConstantPoolIndex() || MO.isJumpTableIndex()) {
+ unsigned Reloc = 0;
+ if (MI.getOpcode() == PPC::BL_Macho || MI.getOpcode() == PPC::BL8_Macho ||
+ MI.getOpcode() == PPC::BL_ELF || MI.getOpcode() == PPC::BL8_ELF)
+ Reloc = PPC::reloc_pcrel_bx;
+ else {
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ assert(MovePCtoLROffset && "MovePCtoLR not seen yet?");
+ }
+ switch (MI.getOpcode()) {
+ default: MI.dump(); assert(0 && "Unknown instruction for relocation!");
+ case PPC::LIS:
+ case PPC::LIS8:
+ case PPC::ADDIS:
+ case PPC::ADDIS8:
+ Reloc = PPC::reloc_absolute_high; // Pointer to symbol
+ break;
+ case PPC::LI:
+ case PPC::LI8:
+ case PPC::LA:
+ // Loads.
+ case PPC::LBZ:
+ case PPC::LBZ8:
+ case PPC::LHA:
+ case PPC::LHA8:
+ case PPC::LHZ:
+ case PPC::LHZ8:
+ case PPC::LWZ:
+ case PPC::LWZ8:
+ case PPC::LFS:
+ case PPC::LFD:
+
+ // Stores.
+ case PPC::STB:
+ case PPC::STB8:
+ case PPC::STH:
+ case PPC::STH8:
+ case PPC::STW:
+ case PPC::STW8:
+ case PPC::STFS:
+ case PPC::STFD:
+ Reloc = PPC::reloc_absolute_low;
+ break;
+
+ case PPC::LWA:
+ case PPC::LD:
+ case PPC::STD:
+ case PPC::STD_32:
+ Reloc = PPC::reloc_absolute_low_ix;
+ break;
+ }
+ }
+
+ MachineRelocation R;
+ if (MO.isGlobalAddress()) {
+ R = MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
+ MO.getGlobal(), 0);
+ } else if (MO.isExternalSymbol()) {
+ R = MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+ Reloc, MO.getSymbolName(), 0);
+ } else if (MO.isConstantPoolIndex()) {
+ R = MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+ Reloc, MO.getConstantPoolIndex(), 0);
+ } else {
+ assert(MO.isJumpTableIndex());
+ R = MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+ Reloc, MO.getJumpTableIndex(), 0);
+ }
+
+ // If in PIC mode, we need to encode the negated address of the
+ // 'movepctolr' into the unrelocated field. After relocation, we'll have
+ // &gv-&movepctolr-4 in the imm field. Once &movepctolr is added to the imm
+ // field, we get &gv. This doesn't happen for branch relocations, which are
+ // always implicitly pc relative.
+ if (TM.getRelocationModel() == Reloc::PIC_ && Reloc != PPC::reloc_pcrel_bx){
+ assert(MovePCtoLROffset && "MovePCtoLR not seen yet?");
+ R.setConstantVal(-(intptr_t)MovePCtoLROffset - 4);
+ }
+ MCE.addRelocation(R);
+
+ } else if (MO.isMachineBasicBlock()) {
+ unsigned Reloc = 0;
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode == PPC::B || Opcode == PPC::BL_Macho ||
+ Opcode == PPC::BLA_Macho || Opcode == PPC::BL_ELF ||
+ Opcode == PPC::BLA_ELF)
+ Reloc = PPC::reloc_pcrel_bx;
+ else // BCC instruction
+ Reloc = PPC::reloc_pcrel_bcx;
+ MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+ Reloc,
+ MO.getMachineBasicBlock()));
+ } else {
+ cerr << "ERROR: Unknown type of MachineOperand: " << MO << "\n";
+ abort();
+ }
+
+ return rv;
+}
+
+#include "PPCGenCodeEmitter.inc"
+
diff --git a/lib/Target/PowerPC/PPCFrameInfo.h b/lib/Target/PowerPC/PPCFrameInfo.h
new file mode 100644
index 0000000..81365e9
--- /dev/null
+++ b/lib/Target/PowerPC/PPCFrameInfo.h
@@ -0,0 +1,93 @@
+//===-- PPCFrameInfo.h - Define TargetFrameInfo for PowerPC -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC_FRAMEINFO_H
+#define POWERPC_FRAMEINFO_H
+
+#include "PPC.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class PPCFrameInfo: public TargetFrameInfo {
+ const TargetMachine &TM;
+
+public:
+ PPCFrameInfo(const TargetMachine &tm, bool LP64)
+ : TargetFrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0), TM(tm) {
+ }
+
+ /// getReturnSaveOffset - Return the previous frame offset to save the
+ /// return address.
+ static unsigned getReturnSaveOffset(bool LP64, bool isMacho) {
+ if (isMacho)
+ return LP64 ? 16 : 8;
+ // For ELF 32 ABI:
+ return 4;
+ }
+
+ /// getFramePointerSaveOffset - Return the previous frame offset to save the
+ /// frame pointer.
+ static unsigned getFramePointerSaveOffset(bool LP64, bool isMacho) {
+ // For MachO ABI:
+ // Use the TOC save slot in the PowerPC linkage area for saving the frame
+ // pointer (if needed.) LLVM does not generate code that uses the TOC (R2
+ // is treated as a caller saved register.)
+ if (isMacho)
+ return LP64 ? 40 : 20;
+
+ // For ELF 32 ABI:
+ // Save it right before the link register
+ return -4U;
+ }
+
+ /// getLinkageSize - Return the size of the PowerPC ABI linkage area.
+ ///
+ static unsigned getLinkageSize(bool LP64, bool isMacho) {
+ if (isMacho)
+ return 6 * (LP64 ? 8 : 4);
+
+ // For ELF 32 ABI:
+ return 8;
+ }
+
+ /// getMinCallArgumentsSize - Return the size of the minium PowerPC ABI
+ /// argument area.
+ static unsigned getMinCallArgumentsSize(bool LP64, bool isMacho) {
+ // For Macho ABI:
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if its varargs.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ if (isMacho)
+ return 8 * (LP64 ? 8 : 4);
+
+ // For ELF 32 ABI:
+ // There is no default stack allocated for the 8 first GPR arguments.
+ return 0;
+ }
+
+ /// getMinCallFrameSize - Return the minimum size a call frame can be using
+ /// the PowerPC ABI.
+ static unsigned getMinCallFrameSize(bool LP64, bool isMacho) {
+ // The call frame needs to be at least big enough for linkage and 8 args.
+ return getLinkageSize(LP64, isMacho) +
+ getMinCallArgumentsSize(LP64, isMacho);
+ }
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
new file mode 100644
index 0000000..26e1f47
--- /dev/null
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -0,0 +1,303 @@
+//===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements hazard recognizers for scheduling on PowerPC processors.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "PPCHazardRecognizers.h"
+#include "PPC.h"
+#include "PPCInstrInfo.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// PowerPC 970 Hazard Recognizer
+//
+// This models the dispatch group formation of the PPC970 processor. Dispatch
+// groups are bundles of up to five instructions that can contain various mixes
+// of instructions. The PPC970 can dispatch a peak of 4 non-branch and one
+// branch instruction per-cycle.
+//
+// There are a number of restrictions to dispatch group formation: some
+// instructions can only be issued in the first slot of a dispatch group, & some
+// instructions fill an entire dispatch group. Additionally, only branches can
+// issue in the 5th (last) slot.
+//
+// Finally, there are a number of "structural" hazards on the PPC970. These
+// conditions cause large performance penalties due to misprediction, recovery,
+// and replay logic that has to happen. These cases include setting a CTR and
+// branching through it in the same dispatch group, and storing to an address,
+// then loading from the same address within a dispatch group. To avoid these
+// conditions, we insert no-op instructions when appropriate.
+//
+// FIXME: This is missing some significant cases:
+// 1. Modeling of microcoded instructions.
+// 2. Handling of serialized operations.
+// 3. Handling of the esoteric cases in "Resource-based Instruction Grouping".
+//
+
+PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii)
+ : TII(tii) {
+ EndDispatchGroup();
+}
+
+void PPCHazardRecognizer970::EndDispatchGroup() {
+ DOUT << "=== Start of dispatch group\n";
+ NumIssued = 0;
+
+ // Structural hazard info.
+ HasCTRSet = false;
+ NumStores = 0;
+}
+
+
+PPCII::PPC970_Unit
+PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
+ bool &isFirst, bool &isSingle,
+ bool &isCracked,
+ bool &isLoad, bool &isStore) {
+ if (Opcode < ISD::BUILTIN_OP_END) {
+ isFirst = isSingle = isCracked = isLoad = isStore = false;
+ return PPCII::PPC970_Pseudo;
+ }
+ Opcode -= ISD::BUILTIN_OP_END;
+
+ const TargetInstrDescriptor &TID = TII.get(Opcode);
+
+ isLoad = TID.Flags & M_LOAD_FLAG;
+ isStore = TID.Flags & M_STORE_FLAG;
+
+ unsigned TSFlags = TID.TSFlags;
+
+ isFirst = TSFlags & PPCII::PPC970_First;
+ isSingle = TSFlags & PPCII::PPC970_Single;
+ isCracked = TSFlags & PPCII::PPC970_Cracked;
+ return (PPCII::PPC970_Unit)(TSFlags & PPCII::PPC970_Mask);
+}
+
+/// isLoadOfStoredAddress - If we have a load from the previously stored pointer
+/// as indicated by StorePtr1/StorePtr2/StoreSize, return true.
+bool PPCHazardRecognizer970::
+isLoadOfStoredAddress(unsigned LoadSize, SDOperand Ptr1, SDOperand Ptr2) const {
+ for (unsigned i = 0, e = NumStores; i != e; ++i) {
+ // Handle exact and commuted addresses.
+ if (Ptr1 == StorePtr1[i] && Ptr2 == StorePtr2[i])
+ return true;
+ if (Ptr2 == StorePtr1[i] && Ptr1 == StorePtr2[i])
+ return true;
+
+ // Okay, we don't have an exact match, if this is an indexed offset, see if
+ // we have overlap (which happens during fp->int conversion for example).
+ if (StorePtr2[i] == Ptr2) {
+ if (ConstantSDNode *StoreOffset = dyn_cast<ConstantSDNode>(StorePtr1[i]))
+ if (ConstantSDNode *LoadOffset = dyn_cast<ConstantSDNode>(Ptr1)) {
+ // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check
+ // to see if the load and store actually overlap.
+ int StoreOffs = StoreOffset->getValue();
+ int LoadOffs = LoadOffset->getValue();
+ if (StoreOffs < LoadOffs) {
+ if (int(StoreOffs+StoreSize[i]) > LoadOffs) return true;
+ } else {
+ if (int(LoadOffs+LoadSize) > StoreOffs) return true;
+ }
+ }
+ }
+ }
+ return false;
+}
+
+/// getHazardType - We return hazard for any non-branch instruction that would
+/// terminate terminate the dispatch group. We turn NoopHazard for any
+/// instructions that wouldn't terminate the dispatch group that would cause a
+/// pipeline flush.
+HazardRecognizer::HazardType PPCHazardRecognizer970::
+getHazardType(SDNode *Node) {
+ bool isFirst, isSingle, isCracked, isLoad, isStore;
+ PPCII::PPC970_Unit InstrType =
+ GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked,
+ isLoad, isStore);
+ if (InstrType == PPCII::PPC970_Pseudo) return NoHazard;
+ unsigned Opcode = Node->getOpcode()-ISD::BUILTIN_OP_END;
+
+ // We can only issue a PPC970_First/PPC970_Single instruction (such as
+ // crand/mtspr/etc) if this is the first cycle of the dispatch group.
+ if (NumIssued != 0 && (isFirst || isSingle))
+ return Hazard;
+
+ // If this instruction is cracked into two ops by the decoder, we know that
+ // it is not a branch and that it cannot issue if 3 other instructions are
+ // already in the dispatch group.
+ if (isCracked && NumIssued > 2)
+ return Hazard;
+
+ switch (InstrType) {
+ default: assert(0 && "Unknown instruction type!");
+ case PPCII::PPC970_FXU:
+ case PPCII::PPC970_LSU:
+ case PPCII::PPC970_FPU:
+ case PPCII::PPC970_VALU:
+ case PPCII::PPC970_VPERM:
+ // We can only issue a branch as the last instruction in a group.
+ if (NumIssued == 4) return Hazard;
+ break;
+ case PPCII::PPC970_CRU:
+ // We can only issue a CR instruction in the first two slots.
+ if (NumIssued >= 2) return Hazard;
+ break;
+ case PPCII::PPC970_BRU:
+ break;
+ }
+
+ // Do not allow MTCTR and BCTRL to be in the same dispatch group.
+ if (HasCTRSet && (Opcode == PPC::BCTRL_Macho || Opcode == PPC::BCTRL_ELF))
+ return NoopHazard;
+
+ // If this is a load following a store, make sure it's not to the same or
+ // overlapping address.
+ if (isLoad && NumStores) {
+ unsigned LoadSize;
+ switch (Opcode) {
+ default: assert(0 && "Unknown load!");
+ case PPC::LBZ: case PPC::LBZU:
+ case PPC::LBZX:
+ case PPC::LBZ8: case PPC::LBZU8:
+ case PPC::LBZX8:
+ case PPC::LVEBX:
+ LoadSize = 1;
+ break;
+ case PPC::LHA: case PPC::LHAU:
+ case PPC::LHAX:
+ case PPC::LHZ: case PPC::LHZU:
+ case PPC::LHZX:
+ case PPC::LVEHX:
+ case PPC::LHBRX:
+ case PPC::LHA8: case PPC::LHAU8:
+ case PPC::LHAX8:
+ case PPC::LHZ8: case PPC::LHZU8:
+ case PPC::LHZX8:
+ LoadSize = 2;
+ break;
+ case PPC::LFS: case PPC::LFSU:
+ case PPC::LFSX:
+ case PPC::LWZ: case PPC::LWZU:
+ case PPC::LWZX:
+ case PPC::LWA:
+ case PPC::LWAX:
+ case PPC::LVEWX:
+ case PPC::LWBRX:
+ case PPC::LWZ8:
+ case PPC::LWZX8:
+ LoadSize = 4;
+ break;
+ case PPC::LFD: case PPC::LFDU:
+ case PPC::LFDX:
+ case PPC::LD: case PPC::LDU:
+ case PPC::LDX:
+ LoadSize = 8;
+ break;
+ case PPC::LVX:
+ LoadSize = 16;
+ break;
+ }
+
+ if (isLoadOfStoredAddress(LoadSize,
+ Node->getOperand(0), Node->getOperand(1)))
+ return NoopHazard;
+ }
+
+ return NoHazard;
+}
+
+void PPCHazardRecognizer970::EmitInstruction(SDNode *Node) {
+ bool isFirst, isSingle, isCracked, isLoad, isStore;
+ PPCII::PPC970_Unit InstrType =
+ GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked,
+ isLoad, isStore);
+ if (InstrType == PPCII::PPC970_Pseudo) return;
+ unsigned Opcode = Node->getOpcode()-ISD::BUILTIN_OP_END;
+
+ // Update structural hazard information.
+ if (Opcode == PPC::MTCTR) HasCTRSet = true;
+
+ // Track the address stored to.
+ if (isStore) {
+ unsigned ThisStoreSize;
+ switch (Opcode) {
+ default: assert(0 && "Unknown store instruction!");
+ case PPC::STB: case PPC::STB8:
+ case PPC::STBU: case PPC::STBU8:
+ case PPC::STBX: case PPC::STBX8:
+ case PPC::STVEBX:
+ ThisStoreSize = 1;
+ break;
+ case PPC::STH: case PPC::STH8:
+ case PPC::STHU: case PPC::STHU8:
+ case PPC::STHX: case PPC::STHX8:
+ case PPC::STVEHX:
+ case PPC::STHBRX:
+ ThisStoreSize = 2;
+ break;
+ case PPC::STFS:
+ case PPC::STFSU:
+ case PPC::STFSX:
+ case PPC::STWX: case PPC::STWX8:
+ case PPC::STWUX:
+ case PPC::STW: case PPC::STW8:
+ case PPC::STWU: case PPC::STWU8:
+ case PPC::STVEWX:
+ case PPC::STFIWX:
+ case PPC::STWBRX:
+ ThisStoreSize = 4;
+ break;
+ case PPC::STD_32:
+ case PPC::STDX_32:
+ case PPC::STD:
+ case PPC::STDU:
+ case PPC::STFD:
+ case PPC::STFDX:
+ case PPC::STDX:
+ case PPC::STDUX:
+ ThisStoreSize = 8;
+ break;
+ case PPC::STVX:
+ ThisStoreSize = 16;
+ break;
+ }
+
+ StoreSize[NumStores] = ThisStoreSize;
+ StorePtr1[NumStores] = Node->getOperand(1);
+ StorePtr2[NumStores] = Node->getOperand(2);
+ ++NumStores;
+ }
+
+ if (InstrType == PPCII::PPC970_BRU || isSingle)
+ NumIssued = 4; // Terminate a d-group.
+ ++NumIssued;
+
+ // If this instruction is cracked into two ops by the decoder, remember that
+ // we issued two pieces.
+ if (isCracked)
+ ++NumIssued;
+
+ if (NumIssued == 5)
+ EndDispatchGroup();
+}
+
+void PPCHazardRecognizer970::AdvanceCycle() {
+ assert(NumIssued < 5 && "Illegal dispatch group!");
+ ++NumIssued;
+ if (NumIssued == 5)
+ EndDispatchGroup();
+}
+
+void PPCHazardRecognizer970::EmitNoop() {
+ AdvanceCycle();
+}
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h
new file mode 100644
index 0000000..cbff943
--- /dev/null
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -0,0 +1,73 @@
+//===-- PPCHazardRecognizers.h - PowerPC Hazard Recognizers -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines hazard recognizers for scheduling on PowerPC processors.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCHAZRECS_H
+#define PPCHAZRECS_H
+
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "PPCInstrInfo.h"
+
+namespace llvm {
+
+/// PPCHazardRecognizer970 - This class defines a finite state automata that
+/// models the dispatch logic on the PowerPC 970 (aka G5) processor. This
+/// promotes good dispatch group formation and implements noop insertion to
+/// avoid structural hazards that cause significant performance penalties (e.g.
+/// setting the CTR register then branching through it within a dispatch group),
+/// or storing then loading from the same address within a dispatch group.
+class PPCHazardRecognizer970 : public HazardRecognizer {
+ const TargetInstrInfo &TII;
+
+ unsigned NumIssued; // Number of insts issued, including advanced cycles.
+
+ // Various things that can cause a structural hazard.
+
+ // HasCTRSet - If the CTR register is set in this group, disallow BCTRL.
+ bool HasCTRSet;
+
+ // StoredPtr - Keep track of the address of any store. If we see a load from
+ // the same address (or one that aliases it), disallow the store. We can have
+ // up to four stores in one dispatch group, hence we track up to 4.
+ //
+ // This is null if we haven't seen a store yet. We keep track of both
+ // operands of the store here, since we support [r+r] and [r+i] addressing.
+ SDOperand StorePtr1[4], StorePtr2[4];
+ unsigned StoreSize[4];
+ unsigned NumStores;
+
+public:
+ PPCHazardRecognizer970(const TargetInstrInfo &TII);
+ virtual HazardType getHazardType(SDNode *Node);
+ virtual void EmitInstruction(SDNode *Node);
+ virtual void AdvanceCycle();
+ virtual void EmitNoop();
+
+private:
+ /// EndDispatchGroup - Called when we are finishing a new dispatch group.
+ ///
+ void EndDispatchGroup();
+
+ /// GetInstrType - Classify the specified powerpc opcode according to its
+ /// pipeline.
+ PPCII::PPC970_Unit GetInstrType(unsigned Opcode,
+ bool &isFirst, bool &isSingle,bool &isCracked,
+ bool &isLoad, bool &isStore);
+
+ bool isLoadOfStoredAddress(unsigned LoadSize,
+ SDOperand Ptr1, SDOperand Ptr2) const;
+};
+
+} // end namespace llvm
+
+#endif
+
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
new file mode 100644
index 0000000..730bac6
--- /dev/null
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -0,0 +1,1122 @@
+//===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pattern matching instruction selector for PowerPC,
+// converting from a legalized dag to a PPC dag.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ppc-codegen"
+#include "PPC.h"
+#include "PPCPredicates.h"
+#include "PPCTargetMachine.h"
+#include "PPCISelLowering.h"
+#include "PPCHazardRecognizers.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Compiler.h"
+#include <queue>
+#include <set>
+using namespace llvm;
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ /// PPCDAGToDAGISel - PPC specific code to select PPC machine
+ /// instructions for SelectionDAG operations.
+ ///
+ class VISIBILITY_HIDDEN PPCDAGToDAGISel : public SelectionDAGISel {
+ PPCTargetMachine &TM;
+ PPCTargetLowering PPCLowering;
+ unsigned GlobalBaseReg;
+ public:
+ PPCDAGToDAGISel(PPCTargetMachine &tm)
+ : SelectionDAGISel(PPCLowering), TM(tm),
+ PPCLowering(*TM.getTargetLowering()) {}
+
+ virtual bool runOnFunction(Function &Fn) {
+ // Make sure we re-emit a set of the global base reg if necessary
+ GlobalBaseReg = 0;
+ SelectionDAGISel::runOnFunction(Fn);
+
+ InsertVRSaveCode(Fn);
+ return true;
+ }
+
+ /// getI32Imm - Return a target constant with the specified value, of type
+ /// i32.
+ inline SDOperand getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ /// getI64Imm - Return a target constant with the specified value, of type
+ /// i64.
+ inline SDOperand getI64Imm(uint64_t Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i64);
+ }
+
+ /// getSmallIPtrImm - Return a target constant of pointer type.
+ inline SDOperand getSmallIPtrImm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, PPCLowering.getPointerTy());
+ }
+
+ /// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s
+ /// with any number of 0s on either side. The 1s are allowed to wrap from
+ /// LSB to MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.
+ /// 0x0F0F0000 is not, since all 1s are not contiguous.
+ static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME);
+
+
+ /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
+ /// rotate and mask opcode and mask operation.
+ static bool isRotateAndMask(SDNode *N, unsigned Mask, bool IsShiftMask,
+ unsigned &SH, unsigned &MB, unsigned &ME);
+
+ /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
+ /// base register. Return the virtual register that holds this value.
+ SDNode *getGlobalBaseReg();
+
+ // Select - Convert the specified operand from a target-independent to a
+ // target-specific node if it hasn't already been changed.
+ SDNode *Select(SDOperand Op);
+
+ SDNode *SelectBitfieldInsert(SDNode *N);
+
+ /// SelectCC - Select a comparison of the specified values with the
+ /// specified condition code, returning the CR# of the expression.
+ SDOperand SelectCC(SDOperand LHS, SDOperand RHS, ISD::CondCode CC);
+
+ /// SelectAddrImm - Returns true if the address N can be represented by
+ /// a base register plus a signed 16-bit displacement [r+imm].
+ bool SelectAddrImm(SDOperand Op, SDOperand N, SDOperand &Disp,
+ SDOperand &Base) {
+ return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG);
+ }
+
+ /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
+ /// immediate field. Because preinc imms have already been validated, just
+ /// accept it.
+ bool SelectAddrImmOffs(SDOperand Op, SDOperand N, SDOperand &Out) const {
+ Out = N;
+ return true;
+ }
+
+ /// SelectAddrIdx - Given the specified addressed, check to see if it can be
+ /// represented as an indexed [r+r] operation. Returns false if it can
+ /// be represented by [r+imm], which are preferred.
+ bool SelectAddrIdx(SDOperand Op, SDOperand N, SDOperand &Base,
+ SDOperand &Index) {
+ return PPCLowering.SelectAddressRegReg(N, Base, Index, *CurDAG);
+ }
+
+ /// SelectAddrIdxOnly - Given the specified addressed, force it to be
+ /// represented as an indexed [r+r] operation.
+ bool SelectAddrIdxOnly(SDOperand Op, SDOperand N, SDOperand &Base,
+ SDOperand &Index) {
+ return PPCLowering.SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
+ }
+
+ /// SelectAddrImmShift - Returns true if the address N can be represented by
+ /// a base register plus a signed 14-bit displacement [r+imm*4]. Suitable
+ /// for use by STD and friends.
+ bool SelectAddrImmShift(SDOperand Op, SDOperand N, SDOperand &Disp,
+ SDOperand &Base) {
+ return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG);
+ }
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDOperand &Op,
+ char ConstraintCode,
+ std::vector<SDOperand> &OutOps,
+ SelectionDAG &DAG) {
+ SDOperand Op0, Op1;
+ switch (ConstraintCode) {
+ default: return true;
+ case 'm': // memory
+ if (!SelectAddrIdx(Op, Op, Op0, Op1))
+ SelectAddrImm(Op, Op, Op0, Op1);
+ break;
+ case 'o': // offsetable
+ if (!SelectAddrImm(Op, Op, Op0, Op1)) {
+ Op0 = Op;
+ AddToISelQueue(Op0); // r+0.
+ Op1 = getSmallIPtrImm(0);
+ }
+ break;
+ case 'v': // not offsetable
+ SelectAddrIdxOnly(Op, Op, Op0, Op1);
+ break;
+ }
+
+ OutOps.push_back(Op0);
+ OutOps.push_back(Op1);
+ return false;
+ }
+
+ SDOperand BuildSDIVSequence(SDNode *N);
+ SDOperand BuildUDIVSequence(SDNode *N);
+
+ /// InstructionSelectBasicBlock - This callback is invoked by
+ /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+ virtual void InstructionSelectBasicBlock(SelectionDAG &DAG);
+
+ void InsertVRSaveCode(Function &Fn);
+
+ virtual const char *getPassName() const {
+ return "PowerPC DAG->DAG Pattern Instruction Selection";
+ }
+
+ /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
+ /// this target when scheduling the DAG.
+ virtual HazardRecognizer *CreateTargetHazardRecognizer() {
+ // Should use subtarget info to pick the right hazard recognizer. For
+ // now, always return a PPC970 recognizer.
+ const TargetInstrInfo *II = PPCLowering.getTargetMachine().getInstrInfo();
+ assert(II && "No InstrInfo?");
+ return new PPCHazardRecognizer970(*II);
+ }
+
+// Include the pieces autogenerated from the target description.
+#include "PPCGenDAGISel.inc"
+
+private:
+ SDNode *SelectSETCC(SDOperand Op);
+ };
+}
+
+/// InstructionSelectBasicBlock - This callback is invoked by
+/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+void PPCDAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) {
+ DEBUG(BB->dump());
+
+ // Select target instructions for the DAG.
+ DAG.setRoot(SelectRoot(DAG.getRoot()));
+ DAG.RemoveDeadNodes();
+
+ // Emit machine code to BB.
+ ScheduleAndEmitDAG(DAG);
+}
+
+/// InsertVRSaveCode - Once the entire function has been instruction selected,
+/// all virtual registers are created and all machine instructions are built,
+/// check to see if we need to save/restore VRSAVE. If so, do it.
+void PPCDAGToDAGISel::InsertVRSaveCode(Function &F) {
+ // Check to see if this function uses vector registers, which means we have to
+ // save and restore the VRSAVE register and update it with the regs we use.
+ //
+ // In this case, there will be virtual registers of vector type type created
+ // by the scheduler. Detect them now.
+ MachineFunction &Fn = MachineFunction::get(&F);
+ SSARegMap *RegMap = Fn.getSSARegMap();
+ bool HasVectorVReg = false;
+ for (unsigned i = MRegisterInfo::FirstVirtualRegister,
+ e = RegMap->getLastVirtReg()+1; i != e; ++i)
+ if (RegMap->getRegClass(i) == &PPC::VRRCRegClass) {
+ HasVectorVReg = true;
+ break;
+ }
+ if (!HasVectorVReg) return; // nothing to do.
+
+ // If we have a vector register, we want to emit code into the entry and exit
+ // blocks to save and restore the VRSAVE register. We do this here (instead
+ // of marking all vector instructions as clobbering VRSAVE) for two reasons:
+ //
+ // 1. This (trivially) reduces the load on the register allocator, by not
+ // having to represent the live range of the VRSAVE register.
+ // 2. This (more significantly) allows us to create a temporary virtual
+ // register to hold the saved VRSAVE value, allowing this temporary to be
+ // register allocated, instead of forcing it to be spilled to the stack.
+
+ // Create two vregs - one to hold the VRSAVE register that is live-in to the
+ // function and one for the value after having bits or'd into it.
+ unsigned InVRSAVE = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
+ unsigned UpdatedVRSAVE = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
+
+ const TargetInstrInfo &TII = *TM.getInstrInfo();
+ MachineBasicBlock &EntryBB = *Fn.begin();
+ // Emit the following code into the entry block:
+ // InVRSAVE = MFVRSAVE
+ // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE
+ // MTVRSAVE UpdatedVRSAVE
+ MachineBasicBlock::iterator IP = EntryBB.begin(); // Insert Point
+ BuildMI(EntryBB, IP, TII.get(PPC::MFVRSAVE), InVRSAVE);
+ BuildMI(EntryBB, IP, TII.get(PPC::UPDATE_VRSAVE), UpdatedVRSAVE).addReg(InVRSAVE);
+ BuildMI(EntryBB, IP, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE);
+
+ // Find all return blocks, outputting a restore in each epilog.
+ for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+ if (!BB->empty() && TII.isReturn(BB->back().getOpcode())) {
+ IP = BB->end(); --IP;
+
+ // Skip over all terminator instructions, which are part of the return
+ // sequence.
+ MachineBasicBlock::iterator I2 = IP;
+ while (I2 != BB->begin() && TII.isTerminatorInstr((--I2)->getOpcode()))
+ IP = I2;
+
+ // Emit: MTVRSAVE InVRSave
+ BuildMI(*BB, IP, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE);
+ }
+ }
+}
+
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// base address to use for accessing globals into a register.
+///
+SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
+ if (!GlobalBaseReg) {
+ const TargetInstrInfo &TII = *TM.getInstrInfo();
+ // Insert the set of GlobalBaseReg into the first MBB of the function
+ MachineBasicBlock &FirstMBB = BB->getParent()->front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ SSARegMap *RegMap = BB->getParent()->getSSARegMap();
+
+ if (PPCLowering.getPointerTy() == MVT::i32) {
+ GlobalBaseReg = RegMap->createVirtualRegister(PPC::GPRCRegisterClass);
+ BuildMI(FirstMBB, MBBI, TII.get(PPC::MovePCtoLR), PPC::LR);
+ BuildMI(FirstMBB, MBBI, TII.get(PPC::MFLR), GlobalBaseReg);
+ } else {
+ GlobalBaseReg = RegMap->createVirtualRegister(PPC::G8RCRegisterClass);
+ BuildMI(FirstMBB, MBBI, TII.get(PPC::MovePCtoLR8), PPC::LR8);
+ BuildMI(FirstMBB, MBBI, TII.get(PPC::MFLR8), GlobalBaseReg);
+ }
+ }
+ return CurDAG->getRegister(GlobalBaseReg, PPCLowering.getPointerTy()).Val;
+}
+
+/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// sign extension from a 16-bit value. If so, this returns true and the
+/// immediate.
+static bool isIntS16Immediate(SDNode *N, short &Imm) {
+ if (N->getOpcode() != ISD::Constant)
+ return false;
+
+ Imm = (short)cast<ConstantSDNode>(N)->getValue();
+ if (N->getValueType(0) == MVT::i32)
+ return Imm == (int32_t)cast<ConstantSDNode>(N)->getValue();
+ else
+ return Imm == (int64_t)cast<ConstantSDNode>(N)->getValue();
+}
+
+static bool isIntS16Immediate(SDOperand Op, short &Imm) {
+ return isIntS16Immediate(Op.Val, Imm);
+}
+
+
+/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
+/// operand. If so Imm will receive the 32-bit value.
+static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
+ if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
+ Imm = cast<ConstantSDNode>(N)->getValue();
+ return true;
+ }
+ return false;
+}
+
+/// isInt64Immediate - This method tests to see if the node is a 64-bit constant
+/// operand. If so Imm will receive the 64-bit value.
+static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
+ if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
+ Imm = cast<ConstantSDNode>(N)->getValue();
+ return true;
+ }
+ return false;
+}
+
+// isInt32Immediate - This method tests to see if a constant operand.
+// If so Imm will receive the 32 bit value.
+static bool isInt32Immediate(SDOperand N, unsigned &Imm) {
+ return isInt32Immediate(N.Val, Imm);
+}
+
+
+// isOpcWithIntImmediate - This method tests to see if the node is a specific
+// opcode and that it has a immediate integer right operand.
+// If so Imm will receive the 32 bit value.
+static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
+ return N->getOpcode() == Opc && isInt32Immediate(N->getOperand(1).Val, Imm);
+}
+
+bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
+ if (isShiftedMask_32(Val)) {
+ // look for the first non-zero bit
+ MB = CountLeadingZeros_32(Val);
+ // look for the first zero bit after the run of ones
+ ME = CountLeadingZeros_32((Val - 1) ^ Val);
+ return true;
+ } else {
+ Val = ~Val; // invert mask
+ if (isShiftedMask_32(Val)) {
+ // effectively look for the first zero bit
+ ME = CountLeadingZeros_32(Val) - 1;
+ // effectively look for the first one bit after the run of zeros
+ MB = CountLeadingZeros_32((Val - 1) ^ Val) + 1;
+ return true;
+ }
+ }
+ // no run present
+ return false;
+}
+
+bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
+ bool IsShiftMask, unsigned &SH,
+ unsigned &MB, unsigned &ME) {
+ // Don't even go down this path for i64, since different logic will be
+ // necessary for rldicl/rldicr/rldimi.
+ if (N->getValueType(0) != MVT::i32)
+ return false;
+
+ unsigned Shift = 32;
+ unsigned Indeterminant = ~0; // bit mask marking indeterminant results
+ unsigned Opcode = N->getOpcode();
+ if (N->getNumOperands() != 2 ||
+ !isInt32Immediate(N->getOperand(1).Val, Shift) || (Shift > 31))
+ return false;
+
+ if (Opcode == ISD::SHL) {
+ // apply shift left to mask if it comes first
+ if (IsShiftMask) Mask = Mask << Shift;
+ // determine which bits are made indeterminant by shift
+ Indeterminant = ~(0xFFFFFFFFu << Shift);
+ } else if (Opcode == ISD::SRL) {
+ // apply shift right to mask if it comes first
+ if (IsShiftMask) Mask = Mask >> Shift;
+ // determine which bits are made indeterminant by shift
+ Indeterminant = ~(0xFFFFFFFFu >> Shift);
+ // adjust for the left rotate
+ Shift = 32 - Shift;
+ } else if (Opcode == ISD::ROTL) {
+ Indeterminant = 0;
+ } else {
+ return false;
+ }
+
+ // if the mask doesn't intersect any Indeterminant bits
+ if (Mask && !(Mask & Indeterminant)) {
+ SH = Shift & 31;
+ // make sure the mask is still a mask (wrap arounds may not be)
+ return isRunOfOnes(Mask, MB, ME);
+ }
+ return false;
+}
+
+/// SelectBitfieldInsert - turn an or of two masked values into
+/// the rotate left word immediate then mask insert (rlwimi) instruction.
+SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
+ SDOperand Op0 = N->getOperand(0);
+ SDOperand Op1 = N->getOperand(1);
+
+ uint64_t LKZ, LKO, RKZ, RKO;
+ CurDAG->ComputeMaskedBits(Op0, 0xFFFFFFFFULL, LKZ, LKO);
+ CurDAG->ComputeMaskedBits(Op1, 0xFFFFFFFFULL, RKZ, RKO);
+
+ unsigned TargetMask = LKZ;
+ unsigned InsertMask = RKZ;
+
+ if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
+ unsigned Op0Opc = Op0.getOpcode();
+ unsigned Op1Opc = Op1.getOpcode();
+ unsigned Value, SH = 0;
+ TargetMask = ~TargetMask;
+ InsertMask = ~InsertMask;
+
+ // If the LHS has a foldable shift and the RHS does not, then swap it to the
+ // RHS so that we can fold the shift into the insert.
+ if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
+ if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
+ Op0.getOperand(0).getOpcode() == ISD::SRL) {
+ if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
+ Op1.getOperand(0).getOpcode() != ISD::SRL) {
+ std::swap(Op0, Op1);
+ std::swap(Op0Opc, Op1Opc);
+ std::swap(TargetMask, InsertMask);
+ }
+ }
+ } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
+ if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
+ Op1.getOperand(0).getOpcode() != ISD::SRL) {
+ std::swap(Op0, Op1);
+ std::swap(Op0Opc, Op1Opc);
+ std::swap(TargetMask, InsertMask);
+ }
+ }
+
+ unsigned MB, ME;
+ if (InsertMask && isRunOfOnes(InsertMask, MB, ME)) {
+ SDOperand Tmp1, Tmp2, Tmp3;
+ bool DisjointMask = (TargetMask ^ InsertMask) == 0xFFFFFFFF;
+
+ if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
+ isInt32Immediate(Op1.getOperand(1), Value)) {
+ Op1 = Op1.getOperand(0);
+ SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
+ }
+ if (Op1Opc == ISD::AND) {
+ unsigned SHOpc = Op1.getOperand(0).getOpcode();
+ if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) &&
+ isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {
+ Op1 = Op1.getOperand(0).getOperand(0);
+ SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
+ } else {
+ Op1 = Op1.getOperand(0);
+ }
+ }
+
+ Tmp3 = (Op0Opc == ISD::AND && DisjointMask) ? Op0.getOperand(0) : Op0;
+ AddToISelQueue(Tmp3);
+ AddToISelQueue(Op1);
+ SH &= 31;
+ SDOperand Ops[] = { Tmp3, Op1, getI32Imm(SH), getI32Imm(MB),
+ getI32Imm(ME) };
+ return CurDAG->getTargetNode(PPC::RLWIMI, MVT::i32, Ops, 5);
+ }
+ }
+ return 0;
+}
+
+/// SelectCC - Select a comparison of the specified values with the specified
+/// condition code, returning the CR# of the expression.
+SDOperand PPCDAGToDAGISel::SelectCC(SDOperand LHS, SDOperand RHS,
+ ISD::CondCode CC) {
+ // Always select the LHS.
+ AddToISelQueue(LHS);
+ unsigned Opc;
+
+ if (LHS.getValueType() == MVT::i32) {
+ unsigned Imm;
+ if (CC == ISD::SETEQ || CC == ISD::SETNE) {
+ if (isInt32Immediate(RHS, Imm)) {
+ // SETEQ/SETNE comparison with 16-bit immediate, fold it.
+ if (isUInt16(Imm))
+ return SDOperand(CurDAG->getTargetNode(PPC::CMPLWI, MVT::i32, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+ // If this is a 16-bit signed immediate, fold it.
+ if (isInt16((int)Imm))
+ return SDOperand(CurDAG->getTargetNode(PPC::CMPWI, MVT::i32, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+
+ // For non-equality comparisons, the default code would materialize the
+ // constant, then compare against it, like this:
+ // lis r2, 4660
+ // ori r2, r2, 22136
+ // cmpw cr0, r3, r2
+ // Since we are just comparing for equality, we can emit this instead:
+ // xoris r0,r3,0x1234
+ // cmplwi cr0,r0,0x5678
+ // beq cr0,L6
+ SDOperand Xor(CurDAG->getTargetNode(PPC::XORIS, MVT::i32, LHS,
+ getI32Imm(Imm >> 16)), 0);
+ return SDOperand(CurDAG->getTargetNode(PPC::CMPLWI, MVT::i32, Xor,
+ getI32Imm(Imm & 0xFFFF)), 0);
+ }
+ Opc = PPC::CMPLW;
+ } else if (ISD::isUnsignedIntSetCC(CC)) {
+ if (isInt32Immediate(RHS, Imm) && isUInt16(Imm))
+ return SDOperand(CurDAG->getTargetNode(PPC::CMPLWI, MVT::i32, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+ Opc = PPC::CMPLW;
+ } else {
+ short SImm;
+ if (isIntS16Immediate(RHS, SImm))
+ return SDOperand(CurDAG->getTargetNode(PPC::CMPWI, MVT::i32, LHS,
+ getI32Imm((int)SImm & 0xFFFF)),
+ 0);
+ Opc = PPC::CMPW;
+ }
+ } else if (LHS.getValueType() == MVT::i64) {
+ uint64_t Imm;
+ if (CC == ISD::SETEQ || CC == ISD::SETNE) {
+ if (isInt64Immediate(RHS.Val, Imm)) {
+ // SETEQ/SETNE comparison with 16-bit immediate, fold it.
+ if (isUInt16(Imm))
+ return SDOperand(CurDAG->getTargetNode(PPC::CMPLDI, MVT::i64, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+ // If this is a 16-bit signed immediate, fold it.
+ if (isInt16(Imm))
+ return SDOperand(CurDAG->getTargetNode(PPC::CMPDI, MVT::i64, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+
+ // For non-equality comparisons, the default code would materialize the
+ // constant, then compare against it, like this:
+ // lis r2, 4660
+ // ori r2, r2, 22136
+ // cmpd cr0, r3, r2
+ // Since we are just comparing for equality, we can emit this instead:
+ // xoris r0,r3,0x1234
+ // cmpldi cr0,r0,0x5678
+ // beq cr0,L6
+ if (isUInt32(Imm)) {
+ SDOperand Xor(CurDAG->getTargetNode(PPC::XORIS8, MVT::i64, LHS,
+ getI64Imm(Imm >> 16)), 0);
+ return SDOperand(CurDAG->getTargetNode(PPC::CMPLDI, MVT::i64, Xor,
+ getI64Imm(Imm & 0xFFFF)), 0);
+ }
+ }
+ Opc = PPC::CMPLD;
+ } else if (ISD::isUnsignedIntSetCC(CC)) {
+ if (isInt64Immediate(RHS.Val, Imm) && isUInt16(Imm))
+ return SDOperand(CurDAG->getTargetNode(PPC::CMPLDI, MVT::i64, LHS,
+ getI64Imm(Imm & 0xFFFF)), 0);
+ Opc = PPC::CMPLD;
+ } else {
+ short SImm;
+ if (isIntS16Immediate(RHS, SImm))
+ return SDOperand(CurDAG->getTargetNode(PPC::CMPDI, MVT::i64, LHS,
+ getI64Imm(SImm & 0xFFFF)),
+ 0);
+ Opc = PPC::CMPD;
+ }
+ } else if (LHS.getValueType() == MVT::f32) {
+ Opc = PPC::FCMPUS;
+ } else {
+ assert(LHS.getValueType() == MVT::f64 && "Unknown vt!");
+ Opc = PPC::FCMPUD;
+ }
+ AddToISelQueue(RHS);
+ return SDOperand(CurDAG->getTargetNode(Opc, MVT::i32, LHS, RHS), 0);
+}
+
+static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) {
+ switch (CC) {
+ default: assert(0 && "Unknown condition!"); abort();
+ case ISD::SETOEQ: // FIXME: This is incorrect see PR642.
+ case ISD::SETUEQ:
+ case ISD::SETEQ: return PPC::PRED_EQ;
+ case ISD::SETONE: // FIXME: This is incorrect see PR642.
+ case ISD::SETUNE:
+ case ISD::SETNE: return PPC::PRED_NE;
+ case ISD::SETOLT: // FIXME: This is incorrect see PR642.
+ case ISD::SETULT:
+ case ISD::SETLT: return PPC::PRED_LT;
+ case ISD::SETOLE: // FIXME: This is incorrect see PR642.
+ case ISD::SETULE:
+ case ISD::SETLE: return PPC::PRED_LE;
+ case ISD::SETOGT: // FIXME: This is incorrect see PR642.
+ case ISD::SETUGT:
+ case ISD::SETGT: return PPC::PRED_GT;
+ case ISD::SETOGE: // FIXME: This is incorrect see PR642.
+ case ISD::SETUGE:
+ case ISD::SETGE: return PPC::PRED_GE;
+
+ case ISD::SETO: return PPC::PRED_NU;
+ case ISD::SETUO: return PPC::PRED_UN;
+ }
+}
+
+/// getCRIdxForSetCC - Return the index of the condition register field
+/// associated with the SetCC condition, and whether or not the field is
+/// treated as inverted. That is, lt = 0; ge = 0 inverted.
+static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool& Inv) {
+ switch (CC) {
+ default: assert(0 && "Unknown condition!"); abort();
+ case ISD::SETOLT: // FIXME: This is incorrect see PR642.
+ case ISD::SETULT:
+ case ISD::SETLT: Inv = false; return 0;
+ case ISD::SETOGE: // FIXME: This is incorrect see PR642.
+ case ISD::SETUGE:
+ case ISD::SETGE: Inv = true; return 0;
+ case ISD::SETOGT: // FIXME: This is incorrect see PR642.
+ case ISD::SETUGT:
+ case ISD::SETGT: Inv = false; return 1;
+ case ISD::SETOLE: // FIXME: This is incorrect see PR642.
+ case ISD::SETULE:
+ case ISD::SETLE: Inv = true; return 1;
+ case ISD::SETOEQ: // FIXME: This is incorrect see PR642.
+ case ISD::SETUEQ:
+ case ISD::SETEQ: Inv = false; return 2;
+ case ISD::SETONE: // FIXME: This is incorrect see PR642.
+ case ISD::SETUNE:
+ case ISD::SETNE: Inv = true; return 2;
+ case ISD::SETO: Inv = true; return 3;
+ case ISD::SETUO: Inv = false; return 3;
+ }
+ return 0;
+}
+
+SDNode *PPCDAGToDAGISel::SelectSETCC(SDOperand Op) {
+ SDNode *N = Op.Val;
+ unsigned Imm;
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ if (isInt32Immediate(N->getOperand(1), Imm)) {
+ // We can codegen setcc op, imm very efficiently compared to a brcond.
+ // Check for those cases here.
+ // setcc op, 0
+ if (Imm == 0) {
+ SDOperand Op = N->getOperand(0);
+ AddToISelQueue(Op);
+ switch (CC) {
+ default: break;
+ case ISD::SETEQ: {
+ Op = SDOperand(CurDAG->getTargetNode(PPC::CNTLZW, MVT::i32, Op), 0);
+ SDOperand Ops[] = { Op, getI32Imm(27), getI32Imm(5), getI32Imm(31) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ case ISD::SETNE: {
+ SDOperand AD =
+ SDOperand(CurDAG->getTargetNode(PPC::ADDIC, MVT::i32, MVT::Flag,
+ Op, getI32Imm(~0U)), 0);
+ return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op,
+ AD.getValue(1));
+ }
+ case ISD::SETLT: {
+ SDOperand Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ case ISD::SETGT: {
+ SDOperand T =
+ SDOperand(CurDAG->getTargetNode(PPC::NEG, MVT::i32, Op), 0);
+ T = SDOperand(CurDAG->getTargetNode(PPC::ANDC, MVT::i32, T, Op), 0);
+ SDOperand Ops[] = { T, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ }
+ } else if (Imm == ~0U) { // setcc op, -1
+ SDOperand Op = N->getOperand(0);
+ AddToISelQueue(Op);
+ switch (CC) {
+ default: break;
+ case ISD::SETEQ:
+ Op = SDOperand(CurDAG->getTargetNode(PPC::ADDIC, MVT::i32, MVT::Flag,
+ Op, getI32Imm(1)), 0);
+ return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
+ SDOperand(CurDAG->getTargetNode(PPC::LI, MVT::i32,
+ getI32Imm(0)), 0),
+ Op.getValue(1));
+ case ISD::SETNE: {
+ Op = SDOperand(CurDAG->getTargetNode(PPC::NOR, MVT::i32, Op, Op), 0);
+ SDNode *AD = CurDAG->getTargetNode(PPC::ADDIC, MVT::i32, MVT::Flag,
+ Op, getI32Imm(~0U));
+ return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDOperand(AD, 0),
+ Op, SDOperand(AD, 1));
+ }
+ case ISD::SETLT: {
+ SDOperand AD = SDOperand(CurDAG->getTargetNode(PPC::ADDI, MVT::i32, Op,
+ getI32Imm(1)), 0);
+ SDOperand AN = SDOperand(CurDAG->getTargetNode(PPC::AND, MVT::i32, AD,
+ Op), 0);
+ SDOperand Ops[] = { AN, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ case ISD::SETGT: {
+ SDOperand Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+ Op = SDOperand(CurDAG->getTargetNode(PPC::RLWINM, MVT::i32, Ops, 4), 0);
+ return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op,
+ getI32Imm(1));
+ }
+ }
+ }
+ }
+
+ bool Inv;
+ unsigned Idx = getCRIdxForSetCC(CC, Inv);
+ SDOperand CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC);
+ SDOperand IntCR;
+
+ // Force the ccreg into CR7.
+ SDOperand CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
+
+ SDOperand InFlag(0, 0); // Null incoming flag value.
+ CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), CR7Reg, CCReg,
+ InFlag).getValue(1);
+
+ if (TLI.getTargetMachine().getSubtarget<PPCSubtarget>().isGigaProcessor())
+ IntCR = SDOperand(CurDAG->getTargetNode(PPC::MFOCRF, MVT::i32, CR7Reg,
+ CCReg), 0);
+ else
+ IntCR = SDOperand(CurDAG->getTargetNode(PPC::MFCR, MVT::i32, CCReg), 0);
+
+ SDOperand Ops[] = { IntCR, getI32Imm((32-(3-Idx)) & 31),
+ getI32Imm(31), getI32Imm(31) };
+ if (!Inv) {
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ } else {
+ SDOperand Tmp =
+ SDOperand(CurDAG->getTargetNode(PPC::RLWINM, MVT::i32, Ops, 4), 0);
+ return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1));
+ }
+}
+
+
+// Select - Convert the specified operand from a target-independent to a
+// target-specific node if it hasn't already been changed.
+SDNode *PPCDAGToDAGISel::Select(SDOperand Op) {
+ SDNode *N = Op.Val;
+ if (N->getOpcode() >= ISD::BUILTIN_OP_END &&
+ N->getOpcode() < PPCISD::FIRST_NUMBER)
+ return NULL; // Already selected.
+
+ switch (N->getOpcode()) {
+ default: break;
+
+ case ISD::Constant: {
+ if (N->getValueType(0) == MVT::i64) {
+ // Get 64 bit value.
+ int64_t Imm = cast<ConstantSDNode>(N)->getValue();
+ // Assume no remaining bits.
+ unsigned Remainder = 0;
+ // Assume no shift required.
+ unsigned Shift = 0;
+
+ // If it can't be represented as a 32 bit value.
+ if (!isInt32(Imm)) {
+ Shift = CountTrailingZeros_64(Imm);
+ int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
+
+ // If the shifted value fits 32 bits.
+ if (isInt32(ImmSh)) {
+ // Go with the shifted value.
+ Imm = ImmSh;
+ } else {
+ // Still stuck with a 64 bit value.
+ Remainder = Imm;
+ Shift = 32;
+ Imm >>= 32;
+ }
+ }
+
+ // Intermediate operand.
+ SDNode *Result;
+
+ // Handle first 32 bits.
+ unsigned Lo = Imm & 0xFFFF;
+ unsigned Hi = (Imm >> 16) & 0xFFFF;
+
+ // Simple value.
+ if (isInt16(Imm)) {
+ // Just the Lo bits.
+ Result = CurDAG->getTargetNode(PPC::LI8, MVT::i64, getI32Imm(Lo));
+ } else if (Lo) {
+ // Handle the Hi bits.
+ unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8;
+ Result = CurDAG->getTargetNode(OpC, MVT::i64, getI32Imm(Hi));
+ // And Lo bits.
+ Result = CurDAG->getTargetNode(PPC::ORI8, MVT::i64,
+ SDOperand(Result, 0), getI32Imm(Lo));
+ } else {
+ // Just the Hi bits.
+ Result = CurDAG->getTargetNode(PPC::LIS8, MVT::i64, getI32Imm(Hi));
+ }
+
+ // If no shift, we're done.
+ if (!Shift) return Result;
+
+ // Shift for next step if the upper 32-bits were not zero.
+ if (Imm) {
+ Result = CurDAG->getTargetNode(PPC::RLDICR, MVT::i64,
+ SDOperand(Result, 0),
+ getI32Imm(Shift), getI32Imm(63 - Shift));
+ }
+
+ // Add in the last bits as required.
+ if ((Hi = (Remainder >> 16) & 0xFFFF)) {
+ Result = CurDAG->getTargetNode(PPC::ORIS8, MVT::i64,
+ SDOperand(Result, 0), getI32Imm(Hi));
+ }
+ if ((Lo = Remainder & 0xFFFF)) {
+ Result = CurDAG->getTargetNode(PPC::ORI8, MVT::i64,
+ SDOperand(Result, 0), getI32Imm(Lo));
+ }
+
+ return Result;
+ }
+ break;
+ }
+
+ case ISD::SETCC:
+ return SelectSETCC(Op);
+ case PPCISD::GlobalBaseReg:
+ return getGlobalBaseReg();
+
+ case ISD::FrameIndex: {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ SDOperand TFI = CurDAG->getTargetFrameIndex(FI, Op.getValueType());
+ unsigned Opc = Op.getValueType() == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
+ if (N->hasOneUse())
+ return CurDAG->SelectNodeTo(N, Opc, Op.getValueType(), TFI,
+ getSmallIPtrImm(0));
+ return CurDAG->getTargetNode(Opc, Op.getValueType(), TFI,
+ getSmallIPtrImm(0));
+ }
+
+ case PPCISD::MFCR: {
+ SDOperand InFlag = N->getOperand(1);
+ AddToISelQueue(InFlag);
+ // Use MFOCRF if supported.
+ if (TLI.getTargetMachine().getSubtarget<PPCSubtarget>().isGigaProcessor())
+ return CurDAG->getTargetNode(PPC::MFOCRF, MVT::i32,
+ N->getOperand(0), InFlag);
+ else
+ return CurDAG->getTargetNode(PPC::MFCR, MVT::i32, InFlag);
+ }
+
+ case ISD::SDIV: {
+ // FIXME: since this depends on the setting of the carry flag from the srawi
+ // we should really be making notes about that for the scheduler.
+ // FIXME: It sure would be nice if we could cheaply recognize the
+ // srl/add/sra pattern the dag combiner will generate for this as
+ // sra/addze rather than having to handle sdiv ourselves. oh well.
+ unsigned Imm;
+ if (isInt32Immediate(N->getOperand(1), Imm)) {
+ SDOperand N0 = N->getOperand(0);
+ AddToISelQueue(N0);
+ if ((signed)Imm > 0 && isPowerOf2_32(Imm)) {
+ SDNode *Op =
+ CurDAG->getTargetNode(PPC::SRAWI, MVT::i32, MVT::Flag,
+ N0, getI32Imm(Log2_32(Imm)));
+ return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
+ SDOperand(Op, 0), SDOperand(Op, 1));
+ } else if ((signed)Imm < 0 && isPowerOf2_32(-Imm)) {
+ SDNode *Op =
+ CurDAG->getTargetNode(PPC::SRAWI, MVT::i32, MVT::Flag,
+ N0, getI32Imm(Log2_32(-Imm)));
+ SDOperand PT =
+ SDOperand(CurDAG->getTargetNode(PPC::ADDZE, MVT::i32,
+ SDOperand(Op, 0), SDOperand(Op, 1)),
+ 0);
+ return CurDAG->SelectNodeTo(N, PPC::NEG, MVT::i32, PT);
+ }
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+
+ case ISD::LOAD: {
+ // Handle preincrement loads.
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ MVT::ValueType LoadedVT = LD->getLoadedVT();
+
+ // Normal loads are handled by code generated from the .td file.
+ if (LD->getAddressingMode() != ISD::PRE_INC)
+ break;
+
+ SDOperand Offset = LD->getOffset();
+ if (isa<ConstantSDNode>(Offset) ||
+ Offset.getOpcode() == ISD::TargetGlobalAddress) {
+
+ unsigned Opcode;
+ bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
+ if (LD->getValueType(0) != MVT::i64) {
+ // Handle PPC32 integer and normal FP loads.
+ assert(!isSExt || LoadedVT == MVT::i16 && "Invalid sext update load");
+ switch (LoadedVT) {
+ default: assert(0 && "Invalid PPC load type!");
+ case MVT::f64: Opcode = PPC::LFDU; break;
+ case MVT::f32: Opcode = PPC::LFSU; break;
+ case MVT::i32: Opcode = PPC::LWZU; break;
+ case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
+ case MVT::i1:
+ case MVT::i8: Opcode = PPC::LBZU; break;
+ }
+ } else {
+ assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
+ assert(!isSExt || LoadedVT == MVT::i16 && "Invalid sext update load");
+ switch (LoadedVT) {
+ default: assert(0 && "Invalid PPC load type!");
+ case MVT::i64: Opcode = PPC::LDU; break;
+ case MVT::i32: Opcode = PPC::LWZU8; break;
+ case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
+ case MVT::i1:
+ case MVT::i8: Opcode = PPC::LBZU8; break;
+ }
+ }
+
+ SDOperand Chain = LD->getChain();
+ SDOperand Base = LD->getBasePtr();
+ AddToISelQueue(Chain);
+ AddToISelQueue(Base);
+ AddToISelQueue(Offset);
+ SDOperand Ops[] = { Offset, Base, Chain };
+ // FIXME: PPC64
+ return CurDAG->getTargetNode(Opcode, MVT::i32, MVT::i32,
+ MVT::Other, Ops, 3);
+ } else {
+ assert(0 && "R+R preindex loads not supported yet!");
+ }
+ }
+
+ case ISD::AND: {
+ unsigned Imm, Imm2, SH, MB, ME;
+
+ // If this is an and of a value rotated between 0 and 31 bits and then and'd
+ // with a mask, emit rlwinm
+ if (isInt32Immediate(N->getOperand(1), Imm) &&
+ isRotateAndMask(N->getOperand(0).Val, Imm, false, SH, MB, ME)) {
+ SDOperand Val = N->getOperand(0).getOperand(0);
+ AddToISelQueue(Val);
+ SDOperand Ops[] = { Val, getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ // If this is just a masked value where the input is not handled above, and
+ // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
+ if (isInt32Immediate(N->getOperand(1), Imm) &&
+ isRunOfOnes(Imm, MB, ME) &&
+ N->getOperand(0).getOpcode() != ISD::ROTL) {
+ SDOperand Val = N->getOperand(0);
+ AddToISelQueue(Val);
+ SDOperand Ops[] = { Val, getI32Imm(0), getI32Imm(MB), getI32Imm(ME) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ // AND X, 0 -> 0, not "rlwinm 32".
+ if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
+ AddToISelQueue(N->getOperand(1));
+ ReplaceUses(SDOperand(N, 0), N->getOperand(1));
+ return NULL;
+ }
+ // ISD::OR doesn't get all the bitfield insertion fun.
+ // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) is a bitfield insert
+ if (isInt32Immediate(N->getOperand(1), Imm) &&
+ N->getOperand(0).getOpcode() == ISD::OR &&
+ isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
+ unsigned MB, ME;
+ Imm = ~(Imm^Imm2);
+ if (isRunOfOnes(Imm, MB, ME)) {
+ AddToISelQueue(N->getOperand(0).getOperand(0));
+ AddToISelQueue(N->getOperand(0).getOperand(1));
+ SDOperand Ops[] = { N->getOperand(0).getOperand(0),
+ N->getOperand(0).getOperand(1),
+ getI32Imm(0), getI32Imm(MB),getI32Imm(ME) };
+ return CurDAG->getTargetNode(PPC::RLWIMI, MVT::i32, Ops, 5);
+ }
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::OR:
+ if (N->getValueType(0) == MVT::i32)
+ if (SDNode *I = SelectBitfieldInsert(N))
+ return I;
+
+ // Other cases are autogenerated.
+ break;
+ case ISD::SHL: {
+ unsigned Imm, SH, MB, ME;
+ if (isOpcWithIntImmediate(N->getOperand(0).Val, ISD::AND, Imm) &&
+ isRotateAndMask(N, Imm, true, SH, MB, ME)) {
+ AddToISelQueue(N->getOperand(0).getOperand(0));
+ SDOperand Ops[] = { N->getOperand(0).getOperand(0),
+ getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::SRL: {
+ unsigned Imm, SH, MB, ME;
+ if (isOpcWithIntImmediate(N->getOperand(0).Val, ISD::AND, Imm) &&
+ isRotateAndMask(N, Imm, true, SH, MB, ME)) {
+ AddToISelQueue(N->getOperand(0).getOperand(0));
+ SDOperand Ops[] = { N->getOperand(0).getOperand(0),
+ getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::SELECT_CC: {
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+
+ // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
+ if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
+ if (N1C->isNullValue() && N3C->isNullValue() &&
+ N2C->getValue() == 1ULL && CC == ISD::SETNE &&
+ // FIXME: Implement this optzn for PPC64.
+ N->getValueType(0) == MVT::i32) {
+ AddToISelQueue(N->getOperand(0));
+ SDNode *Tmp =
+ CurDAG->getTargetNode(PPC::ADDIC, MVT::i32, MVT::Flag,
+ N->getOperand(0), getI32Imm(~0U));
+ return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32,
+ SDOperand(Tmp, 0), N->getOperand(0),
+ SDOperand(Tmp, 1));
+ }
+
+ SDOperand CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC);
+ unsigned BROpc = getPredicateForSetCC(CC);
+
+ unsigned SelectCCOp;
+ if (N->getValueType(0) == MVT::i32)
+ SelectCCOp = PPC::SELECT_CC_I4;
+ else if (N->getValueType(0) == MVT::i64)
+ SelectCCOp = PPC::SELECT_CC_I8;
+ else if (N->getValueType(0) == MVT::f32)
+ SelectCCOp = PPC::SELECT_CC_F4;
+ else if (N->getValueType(0) == MVT::f64)
+ SelectCCOp = PPC::SELECT_CC_F8;
+ else
+ SelectCCOp = PPC::SELECT_CC_VRRC;
+
+ AddToISelQueue(N->getOperand(2));
+ AddToISelQueue(N->getOperand(3));
+ SDOperand Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
+ getI32Imm(BROpc) };
+ return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops, 4);
+ }
+ case PPCISD::COND_BRANCH: {
+ AddToISelQueue(N->getOperand(0)); // Op #0 is the Chain.
+ // Op #1 is the PPC::PRED_* number.
+ // Op #2 is the CR#
+ // Op #3 is the Dest MBB
+ AddToISelQueue(N->getOperand(4)); // Op #4 is the Flag.
+ // Prevent PPC::PRED_* from being selected into LI.
+ SDOperand Pred =
+ getI32Imm(cast<ConstantSDNode>(N->getOperand(1))->getValue());
+ SDOperand Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
+ N->getOperand(0), N->getOperand(4) };
+ return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 5);
+ }
+ case ISD::BR_CC: {
+ AddToISelQueue(N->getOperand(0));
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ SDOperand CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC);
+ SDOperand Ops[] = { getI32Imm(getPredicateForSetCC(CC)), CondCode,
+ N->getOperand(4), N->getOperand(0) };
+ return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 4);
+ }
+ case ISD::BRIND: {
+ // FIXME: Should custom lower this.
+ SDOperand Chain = N->getOperand(0);
+ SDOperand Target = N->getOperand(1);
+ AddToISelQueue(Chain);
+ AddToISelQueue(Target);
+ unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
+ Chain = SDOperand(CurDAG->getTargetNode(Opc, MVT::Other, Target,
+ Chain), 0);
+ return CurDAG->SelectNodeTo(N, PPC::BCTR, MVT::Other, Chain);
+ }
+ }
+
+ return SelectCode(Op);
+}
+
+
+
+/// createPPCISelDag - This pass converts a legalized DAG into a
+/// PowerPC-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) {
+ return new PPCDAGToDAGISel(TM);
+}
+
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
new file mode 100644
index 0000000..6c2f383
--- /dev/null
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -0,0 +1,3451 @@
+//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPCISelLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCISelLowering.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCPredicates.h"
+#include "PPCTargetMachine.h"
+#include "PPCPerfectShuffle.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+static cl::opt<bool> EnablePPCPreinc("enable-ppc-preinc",
+cl::desc("enable preincrement load/store generation on PPC (experimental)"),
+ cl::Hidden);
+
+PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
+ : TargetLowering(TM), PPCSubTarget(*TM.getSubtargetImpl()) {
+
+ setPow2DivIsCheap();
+
+ // Use _setjmp/_longjmp instead of setjmp/longjmp.
+ setUseUnderscoreSetJmp(true);
+ setUseUnderscoreLongJmp(true);
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i32, PPC::GPRCRegisterClass);
+ addRegisterClass(MVT::f32, PPC::F4RCRegisterClass);
+ addRegisterClass(MVT::f64, PPC::F8RCRegisterClass);
+
+ // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
+ setLoadXAction(ISD::SEXTLOAD, MVT::i1, Expand);
+ setLoadXAction(ISD::SEXTLOAD, MVT::i8, Expand);
+
+ // PowerPC does not have truncstore for i1.
+ setStoreXAction(MVT::i1, Promote);
+
+ // PowerPC has pre-inc load and store's.
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
+
+ setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
+
+ // PowerPC has no intrinsics for these particular operations
+ setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
+ setOperationAction(ISD::MEMSET, MVT::Other, Expand);
+ setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
+
+ // PowerPC has no SREM/UREM instructions
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+
+ // We don't support sin/cos/sqrt/fmod
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FREM , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FREM , MVT::f32, Expand);
+
+ // If we're enabling GP optimizations, use hardware square root
+ if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) {
+ setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f32, Expand);
+ }
+
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+
+ // PowerPC does not have BSWAP, CTPOP or CTTZ
+ setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
+ setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
+ setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
+ setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
+
+ // PowerPC does not have ROTR
+ setOperationAction(ISD::ROTR, MVT::i32 , Expand);
+
+ // PowerPC does not have Select
+ setOperationAction(ISD::SELECT, MVT::i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::i64, Expand);
+ setOperationAction(ISD::SELECT, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f64, Expand);
+
+ // PowerPC wants to turn select_cc of FP into fsel when possible.
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+ // PowerPC wants to optimize integer setcc a bit
+ setOperationAction(ISD::SETCC, MVT::i32, Custom);
+
+ // PowerPC does not have BRCOND which requires SetCC
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+
+ // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+
+ // PowerPC does not have [U|S]INT_TO_FP
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+
+ setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
+ setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
+ setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand);
+ setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand);
+
+ // We cannot sextinreg(i1). Expand to shifts.
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ // Support label based line numbers.
+ setOperationAction(ISD::LOCATION, MVT::Other, Expand);
+ setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
+ if (!TM.getSubtarget<PPCSubtarget>().isDarwin()) {
+ setOperationAction(ISD::LABEL, MVT::Other, Expand);
+ } else {
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
+ }
+
+ // We want to legalize GlobalAddress and ConstantPool nodes into the
+ // appropriate instructions to materialize the address.
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i64, Custom);
+
+ // RET must be custom lowered, to meet ABI requirements
+ setOperationAction(ISD::RET , MVT::Other, Custom);
+
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex
+ setOperationAction(ISD::VASTART , MVT::Other, Custom);
+
+ // VAARG is custom lowered with ELF 32 ABI
+ if (TM.getSubtarget<PPCSubtarget>().isELF32_ABI())
+ setOperationAction(ISD::VAARG, MVT::Other, Custom);
+ else
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
+
+ // We want to custom lower some of our intrinsics.
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+ if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
+ // They also have instructions for converting between i64 and fp.
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+
+ // FIXME: disable this lowered code. This generates 64-bit register values,
+ // and we don't model the fact that the top part is clobbered by calls. We
+ // need to flag these together so that the value isn't live across a call.
+ //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+
+ // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
+ } else {
+ // PowerPC does not have FP_TO_UINT on 32-bit implementations.
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ }
+
+ if (TM.getSubtarget<PPCSubtarget>().use64BitRegs()) {
+ // 64 bit PowerPC implementations can support i64 types directly
+ addRegisterClass(MVT::i64, PPC::G8RCRegisterClass);
+ // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
+ setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+ } else {
+ // 32 bit PowerPC wants to expand i64 shifts itself.
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
+ }
+
+ if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {
+ // First set operation action for all vector types to expand. Then we
+ // will selectively turn on ones that can be effectively codegen'd.
+ for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
+ // add/sub are legal for all supported vector VT's.
+ setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
+ setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
+
+ // We promote all shuffles to v16i8.
+ setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Promote);
+ AddPromotedToType (ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, MVT::v16i8);
+
+ // We promote all non-typed operations to v4i32.
+ setOperationAction(ISD::AND , (MVT::ValueType)VT, Promote);
+ AddPromotedToType (ISD::AND , (MVT::ValueType)VT, MVT::v4i32);
+ setOperationAction(ISD::OR , (MVT::ValueType)VT, Promote);
+ AddPromotedToType (ISD::OR , (MVT::ValueType)VT, MVT::v4i32);
+ setOperationAction(ISD::XOR , (MVT::ValueType)VT, Promote);
+ AddPromotedToType (ISD::XOR , (MVT::ValueType)VT, MVT::v4i32);
+ setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Promote);
+ AddPromotedToType (ISD::LOAD , (MVT::ValueType)VT, MVT::v4i32);
+ setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote);
+ AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v4i32);
+ setOperationAction(ISD::STORE, (MVT::ValueType)VT, Promote);
+ AddPromotedToType (ISD::STORE, (MVT::ValueType)VT, MVT::v4i32);
+
+ // No other operations are legal.
+ setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Expand);
+
+ setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Expand);
+ }
+
+ // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
+ // with merges, splats, etc.
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
+
+ setOperationAction(ISD::AND , MVT::v4i32, Legal);
+ setOperationAction(ISD::OR , MVT::v4i32, Legal);
+ setOperationAction(ISD::XOR , MVT::v4i32, Legal);
+ setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
+ setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
+ setOperationAction(ISD::STORE , MVT::v4i32, Legal);
+
+ addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass);
+ addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass);
+ addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass);
+ addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass);
+
+ setOperationAction(ISD::MUL, MVT::v4f32, Legal);
+ setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+ setOperationAction(ISD::MUL, MVT::v8i16, Custom);
+ setOperationAction(ISD::MUL, MVT::v16i8, Custom);
+
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
+
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
+ }
+
+ setSetCCResultType(MVT::i32);
+ setShiftAmountType(MVT::i32);
+ setSetCCResultContents(ZeroOrOneSetCCResult);
+
+ if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
+ setStackPointerRegisterToSaveRestore(PPC::X1);
+ setExceptionPointerRegister(PPC::X3);
+ setExceptionSelectorRegister(PPC::X4);
+ } else {
+ setStackPointerRegisterToSaveRestore(PPC::R1);
+ setExceptionPointerRegister(PPC::R3);
+ setExceptionSelectorRegister(PPC::R4);
+ }
+
+ // We have target-specific dag combine patterns for the following nodes:
+ setTargetDAGCombine(ISD::SINT_TO_FP);
+ setTargetDAGCombine(ISD::STORE);
+ setTargetDAGCombine(ISD::BR_CC);
+ setTargetDAGCombine(ISD::BSWAP);
+
+ computeRegisterProperties();
+}
+
+const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case PPCISD::FSEL: return "PPCISD::FSEL";
+ case PPCISD::FCFID: return "PPCISD::FCFID";
+ case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
+ case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
+ case PPCISD::STFIWX: return "PPCISD::STFIWX";
+ case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
+ case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
+ case PPCISD::VPERM: return "PPCISD::VPERM";
+ case PPCISD::Hi: return "PPCISD::Hi";
+ case PPCISD::Lo: return "PPCISD::Lo";
+ case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
+ case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
+ case PPCISD::SRL: return "PPCISD::SRL";
+ case PPCISD::SRA: return "PPCISD::SRA";
+ case PPCISD::SHL: return "PPCISD::SHL";
+ case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32";
+ case PPCISD::STD_32: return "PPCISD::STD_32";
+ case PPCISD::CALL_ELF: return "PPCISD::CALL_ELF";
+ case PPCISD::CALL_Macho: return "PPCISD::CALL_Macho";
+ case PPCISD::MTCTR: return "PPCISD::MTCTR";
+ case PPCISD::BCTRL_Macho: return "PPCISD::BCTRL_Macho";
+ case PPCISD::BCTRL_ELF: return "PPCISD::BCTRL_ELF";
+ case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
+ case PPCISD::MFCR: return "PPCISD::MFCR";
+ case PPCISD::VCMP: return "PPCISD::VCMP";
+ case PPCISD::VCMPo: return "PPCISD::VCMPo";
+ case PPCISD::LBRX: return "PPCISD::LBRX";
+ case PPCISD::STBRX: return "PPCISD::STBRX";
+ case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Node matching predicates, for use by the tblgen matching code.
+//===----------------------------------------------------------------------===//
+
+/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
+static bool isFloatingPointZero(SDOperand Op) {
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
+ return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0);
+ else if (ISD::isEXTLoad(Op.Val) || ISD::isNON_EXTLoad(Op.Val)) {
+ // Maybe this has already been legalized into the constant pool?
+ if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
+ return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0);
+ }
+ return false;
+}
+
+/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
+/// true if Op is undef or if it matches the specified value.
+static bool isConstantOrUndef(SDOperand Op, unsigned Val) {
+ return Op.getOpcode() == ISD::UNDEF ||
+ cast<ConstantSDNode>(Op)->getValue() == Val;
+}
+
+/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
+/// VPKUHUM instruction.
+bool PPC::isVPKUHUMShuffleMask(SDNode *N, bool isUnary) {
+ if (!isUnary) {
+ for (unsigned i = 0; i != 16; ++i)
+ if (!isConstantOrUndef(N->getOperand(i), i*2+1))
+ return false;
+ } else {
+ for (unsigned i = 0; i != 8; ++i)
+ if (!isConstantOrUndef(N->getOperand(i), i*2+1) ||
+ !isConstantOrUndef(N->getOperand(i+8), i*2+1))
+ return false;
+ }
+ return true;
+}
+
+/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
+/// VPKUWUM instruction.
+bool PPC::isVPKUWUMShuffleMask(SDNode *N, bool isUnary) {
+ if (!isUnary) {
+ for (unsigned i = 0; i != 16; i += 2)
+ if (!isConstantOrUndef(N->getOperand(i ), i*2+2) ||
+ !isConstantOrUndef(N->getOperand(i+1), i*2+3))
+ return false;
+ } else {
+ for (unsigned i = 0; i != 8; i += 2)
+ if (!isConstantOrUndef(N->getOperand(i ), i*2+2) ||
+ !isConstantOrUndef(N->getOperand(i+1), i*2+3) ||
+ !isConstantOrUndef(N->getOperand(i+8), i*2+2) ||
+ !isConstantOrUndef(N->getOperand(i+9), i*2+3))
+ return false;
+ }
+ return true;
+}
+
+/// isVMerge - Common function, used to match vmrg* shuffles.
+///
+static bool isVMerge(SDNode *N, unsigned UnitSize,
+ unsigned LHSStart, unsigned RHSStart) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR &&
+ N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!");
+ assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
+ "Unsupported merge size!");
+
+ for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
+ for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
+ if (!isConstantOrUndef(N->getOperand(i*UnitSize*2+j),
+ LHSStart+j+i*UnitSize) ||
+ !isConstantOrUndef(N->getOperand(i*UnitSize*2+UnitSize+j),
+ RHSStart+j+i*UnitSize))
+ return false;
+ }
+ return true;
+}
+
+/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
+/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
+bool PPC::isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) {
+ if (!isUnary)
+ return isVMerge(N, UnitSize, 8, 24);
+ return isVMerge(N, UnitSize, 8, 8);
+}
+
+/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
+/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
+bool PPC::isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) {
+ if (!isUnary)
+ return isVMerge(N, UnitSize, 0, 16);
+ return isVMerge(N, UnitSize, 0, 0);
+}
+
+
+/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
+/// amount, otherwise return -1.
+int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR &&
+ N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!");
+ // Find the first non-undef value in the shuffle mask.
+ unsigned i;
+ for (i = 0; i != 16 && N->getOperand(i).getOpcode() == ISD::UNDEF; ++i)
+ /*search*/;
+
+ if (i == 16) return -1; // all undef.
+
+ // Otherwise, check to see if the rest of the elements are consequtively
+ // numbered from this value.
+ unsigned ShiftAmt = cast<ConstantSDNode>(N->getOperand(i))->getValue();
+ if (ShiftAmt < i) return -1;
+ ShiftAmt -= i;
+
+ if (!isUnary) {
+ // Check the rest of the elements to see if they are consequtive.
+ for (++i; i != 16; ++i)
+ if (!isConstantOrUndef(N->getOperand(i), ShiftAmt+i))
+ return -1;
+ } else {
+ // Check the rest of the elements to see if they are consequtive.
+ for (++i; i != 16; ++i)
+ if (!isConstantOrUndef(N->getOperand(i), (ShiftAmt+i) & 15))
+ return -1;
+ }
+
+ return ShiftAmt;
+}
+
+/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a splat of a single element that is suitable for input to
+/// VSPLTB/VSPLTH/VSPLTW.
+bool PPC::isSplatShuffleMask(SDNode *N, unsigned EltSize) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR &&
+ N->getNumOperands() == 16 &&
+ (EltSize == 1 || EltSize == 2 || EltSize == 4));
+
+ // This is a splat operation if each element of the permute is the same, and
+ // if the value doesn't reference the second vector.
+ unsigned ElementBase = 0;
+ SDOperand Elt = N->getOperand(0);
+ if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt))
+ ElementBase = EltV->getValue();
+ else
+ return false; // FIXME: Handle UNDEF elements too!
+
+ if (cast<ConstantSDNode>(Elt)->getValue() >= 16)
+ return false;
+
+ // Check that they are consequtive.
+ for (unsigned i = 1; i != EltSize; ++i) {
+ if (!isa<ConstantSDNode>(N->getOperand(i)) ||
+ cast<ConstantSDNode>(N->getOperand(i))->getValue() != i+ElementBase)
+ return false;
+ }
+
+ assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
+ for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ assert(isa<ConstantSDNode>(N->getOperand(i)) &&
+ "Invalid VECTOR_SHUFFLE mask!");
+ for (unsigned j = 0; j != EltSize; ++j)
+ if (N->getOperand(i+j) != N->getOperand(j))
+ return false;
+ }
+
+ return true;
+}
+
+/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
+/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
+unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {
+ assert(isSplatShuffleMask(N, EltSize));
+ return cast<ConstantSDNode>(N->getOperand(0))->getValue() / EltSize;
+}
+
+/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
+/// by using a vspltis[bhw] instruction of the specified element size, return
+/// the constant being splatted. The ByteSize field indicates the number of
+/// bytes of each element [124] -> [bhw].
+SDOperand PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
+ SDOperand OpVal(0, 0);
+
+ // If ByteSize of the splat is bigger than the element size of the
+ // build_vector, then we have a case where we are checking for a splat where
+ // multiple elements of the buildvector are folded together into a single
+ // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
+ unsigned EltSize = 16/N->getNumOperands();
+ if (EltSize < ByteSize) {
+ unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
+ SDOperand UniquedVals[4];
+ assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
+
+ // See if all of the elements in the buildvector agree across.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ // If the element isn't a constant, bail fully out.
+ if (!isa<ConstantSDNode>(N->getOperand(i))) return SDOperand();
+
+
+ if (UniquedVals[i&(Multiple-1)].Val == 0)
+ UniquedVals[i&(Multiple-1)] = N->getOperand(i);
+ else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
+ return SDOperand(); // no match.
+ }
+
+ // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
+ // either constant or undef values that are identical for each chunk. See
+ // if these chunks can form into a larger vspltis*.
+
+ // Check to see if all of the leading entries are either 0 or -1. If
+ // neither, then this won't fit into the immediate field.
+ bool LeadingZero = true;
+ bool LeadingOnes = true;
+ for (unsigned i = 0; i != Multiple-1; ++i) {
+ if (UniquedVals[i].Val == 0) continue; // Must have been undefs.
+
+ LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
+ LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
+ }
+ // Finally, check the least significant entry.
+ if (LeadingZero) {
+ if (UniquedVals[Multiple-1].Val == 0)
+ return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef
+ int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getValue();
+ if (Val < 16)
+ return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4)
+ }
+ if (LeadingOnes) {
+ if (UniquedVals[Multiple-1].Val == 0)
+ return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef
+ int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSignExtended();
+ if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
+ return DAG.getTargetConstant(Val, MVT::i32);
+ }
+
+ return SDOperand();
+ }
+
+ // Check to see if this buildvec has a single non-undef value in its elements.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ if (OpVal.Val == 0)
+ OpVal = N->getOperand(i);
+ else if (OpVal != N->getOperand(i))
+ return SDOperand();
+ }
+
+ if (OpVal.Val == 0) return SDOperand(); // All UNDEF: use implicit def.
+
+ unsigned ValSizeInBytes = 0;
+ uint64_t Value = 0;
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
+ Value = CN->getValue();
+ ValSizeInBytes = MVT::getSizeInBits(CN->getValueType(0))/8;
+ } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
+ assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
+ Value = FloatToBits(CN->getValue());
+ ValSizeInBytes = 4;
+ }
+
+ // If the splat value is larger than the element value, then we can never do
+ // this splat. The only case that we could fit the replicated bits into our
+ // immediate field for would be zero, and we prefer to use vxor for it.
+ if (ValSizeInBytes < ByteSize) return SDOperand();
+
+ // If the element value is larger than the splat value, cut it in half and
+ // check to see if the two halves are equal. Continue doing this until we
+ // get to ByteSize. This allows us to handle 0x01010101 as 0x01.
+ while (ValSizeInBytes > ByteSize) {
+ ValSizeInBytes >>= 1;
+
+ // If the top half equals the bottom half, we're still ok.
+ if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=
+ (Value & ((1 << (8*ValSizeInBytes))-1)))
+ return SDOperand();
+ }
+
+ // Properly sign extend the value.
+ int ShAmt = (4-ByteSize)*8;
+ int MaskVal = ((int)Value << ShAmt) >> ShAmt;
+
+ // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
+ if (MaskVal == 0) return SDOperand();
+
+ // Finally, if this value fits in a 5 bit sext field, return it
+ if (((MaskVal << (32-5)) >> (32-5)) == MaskVal)
+ return DAG.getTargetConstant(MaskVal, MVT::i32);
+ return SDOperand();
+}
+
+//===----------------------------------------------------------------------===//
+// Addressing Mode Selection
+//===----------------------------------------------------------------------===//
+
+/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// sign extension from a 16-bit value. If so, this returns true and the
+/// immediate.
+static bool isIntS16Immediate(SDNode *N, short &Imm) {
+ if (N->getOpcode() != ISD::Constant)
+ return false;
+
+ Imm = (short)cast<ConstantSDNode>(N)->getValue();
+ if (N->getValueType(0) == MVT::i32)
+ return Imm == (int32_t)cast<ConstantSDNode>(N)->getValue();
+ else
+ return Imm == (int64_t)cast<ConstantSDNode>(N)->getValue();
+}
+static bool isIntS16Immediate(SDOperand Op, short &Imm) {
+ return isIntS16Immediate(Op.Val, Imm);
+}
+
+
+/// SelectAddressRegReg - Given the specified addressed, check to see if it
+/// can be represented as an indexed [r+r] operation. Returns false if it
+/// can be more efficiently represented with [r+imm].
+bool PPCTargetLowering::SelectAddressRegReg(SDOperand N, SDOperand &Base,
+ SDOperand &Index,
+ SelectionDAG &DAG) {
+ short imm = 0;
+ if (N.getOpcode() == ISD::ADD) {
+ if (isIntS16Immediate(N.getOperand(1), imm))
+ return false; // r+i
+ if (N.getOperand(1).getOpcode() == PPCISD::Lo)
+ return false; // r+i
+
+ Base = N.getOperand(0);
+ Index = N.getOperand(1);
+ return true;
+ } else if (N.getOpcode() == ISD::OR) {
+ if (isIntS16Immediate(N.getOperand(1), imm))
+ return false; // r+i can fold it if we can.
+
+ // If this is an or of disjoint bitfields, we can codegen this as an add
+ // (for better address arithmetic) if the LHS and RHS of the OR are provably
+ // disjoint.
+ uint64_t LHSKnownZero, LHSKnownOne;
+ uint64_t RHSKnownZero, RHSKnownOne;
+ DAG.ComputeMaskedBits(N.getOperand(0), ~0U, LHSKnownZero, LHSKnownOne);
+
+ if (LHSKnownZero) {
+ DAG.ComputeMaskedBits(N.getOperand(1), ~0U, RHSKnownZero, RHSKnownOne);
+ // If all of the bits are known zero on the LHS or RHS, the add won't
+ // carry.
+ if ((LHSKnownZero | RHSKnownZero) == ~0U) {
+ Base = N.getOperand(0);
+ Index = N.getOperand(1);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+/// Returns true if the address N can be represented by a base register plus
+/// a signed 16-bit displacement [r+imm], and if it is not better
+/// represented as reg+reg.
+bool PPCTargetLowering::SelectAddressRegImm(SDOperand N, SDOperand &Disp,
+ SDOperand &Base, SelectionDAG &DAG){
+ // If this can be more profitably realized as r+r, fail.
+ if (SelectAddressRegReg(N, Disp, Base, DAG))
+ return false;
+
+ if (N.getOpcode() == ISD::ADD) {
+ short imm = 0;
+ if (isIntS16Immediate(N.getOperand(1), imm)) {
+ Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ } else {
+ Base = N.getOperand(0);
+ }
+ return true; // [r+i]
+ } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
+ // Match LOAD (ADD (X, Lo(G))).
+ assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getValue()
+ && "Cannot handle constant offsets yet!");
+ Disp = N.getOperand(1).getOperand(0); // The global address.
+ assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
+ Disp.getOpcode() == ISD::TargetConstantPool ||
+ Disp.getOpcode() == ISD::TargetJumpTable);
+ Base = N.getOperand(0);
+ return true; // [&g+r]
+ }
+ } else if (N.getOpcode() == ISD::OR) {
+ short imm = 0;
+ if (isIntS16Immediate(N.getOperand(1), imm)) {
+ // If this is an or of disjoint bitfields, we can codegen this as an add
+ // (for better address arithmetic) if the LHS and RHS of the OR are
+ // provably disjoint.
+ uint64_t LHSKnownZero, LHSKnownOne;
+ DAG.ComputeMaskedBits(N.getOperand(0), ~0U, LHSKnownZero, LHSKnownOne);
+ if ((LHSKnownZero|~(unsigned)imm) == ~0U) {
+ // If all of the bits are known zero on the LHS or RHS, the add won't
+ // carry.
+ Base = N.getOperand(0);
+ Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
+ return true;
+ }
+ }
+ } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
+ // Loading from a constant address.
+
+ // If this address fits entirely in a 16-bit sext immediate field, codegen
+ // this as "d, 0"
+ short Imm;
+ if (isIntS16Immediate(CN, Imm)) {
+ Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
+ Base = DAG.getRegister(PPC::R0, CN->getValueType(0));
+ return true;
+ }
+
+ // Handle 32-bit sext immediates with LIS + addr mode.
+ if (CN->getValueType(0) == MVT::i32 ||
+ (int64_t)CN->getValue() == (int)CN->getValue()) {
+ int Addr = (int)CN->getValue();
+
+ // Otherwise, break this down into an LIS + disp.
+ Disp = DAG.getTargetConstant((short)Addr, MVT::i32);
+
+ Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32);
+ unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
+ Base = SDOperand(DAG.getTargetNode(Opc, CN->getValueType(0), Base), 0);
+ return true;
+ }
+ }
+
+ Disp = DAG.getTargetConstant(0, getPointerTy());
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ else
+ Base = N;
+ return true; // [r+0]
+}
+
+/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
+/// represented as an indexed [r+r] operation.
+bool PPCTargetLowering::SelectAddressRegRegOnly(SDOperand N, SDOperand &Base,
+ SDOperand &Index,
+ SelectionDAG &DAG) {
+ // Check to see if we can easily represent this as an [r+r] address. This
+ // will fail if it thinks that the address is more profitably represented as
+ // reg+imm, e.g. where imm = 0.
+ if (SelectAddressRegReg(N, Base, Index, DAG))
+ return true;
+
+ // If the operand is an addition, always emit this as [r+r], since this is
+ // better (for code size, and execution, as the memop does the add for free)
+ // than emitting an explicit add.
+ if (N.getOpcode() == ISD::ADD) {
+ Base = N.getOperand(0);
+ Index = N.getOperand(1);
+ return true;
+ }
+
+ // Otherwise, do it the hard way, using R0 as the base register.
+ Base = DAG.getRegister(PPC::R0, N.getValueType());
+ Index = N;
+ return true;
+}
+
+/// SelectAddressRegImmShift - Returns true if the address N can be
+/// represented by a base register plus a signed 14-bit displacement
+/// [r+imm*4]. Suitable for use by STD and friends.
+bool PPCTargetLowering::SelectAddressRegImmShift(SDOperand N, SDOperand &Disp,
+ SDOperand &Base,
+ SelectionDAG &DAG) {
+ // If this can be more profitably realized as r+r, fail.
+ if (SelectAddressRegReg(N, Disp, Base, DAG))
+ return false;
+
+ if (N.getOpcode() == ISD::ADD) {
+ short imm = 0;
+ if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
+ Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ } else {
+ Base = N.getOperand(0);
+ }
+ return true; // [r+i]
+ } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
+ // Match LOAD (ADD (X, Lo(G))).
+ assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getValue()
+ && "Cannot handle constant offsets yet!");
+ Disp = N.getOperand(1).getOperand(0); // The global address.
+ assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
+ Disp.getOpcode() == ISD::TargetConstantPool ||
+ Disp.getOpcode() == ISD::TargetJumpTable);
+ Base = N.getOperand(0);
+ return true; // [&g+r]
+ }
+ } else if (N.getOpcode() == ISD::OR) {
+ short imm = 0;
+ if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
+ // If this is an or of disjoint bitfields, we can codegen this as an add
+ // (for better address arithmetic) if the LHS and RHS of the OR are
+ // provably disjoint.
+ uint64_t LHSKnownZero, LHSKnownOne;
+ DAG.ComputeMaskedBits(N.getOperand(0), ~0U, LHSKnownZero, LHSKnownOne);
+ if ((LHSKnownZero|~(unsigned)imm) == ~0U) {
+ // If all of the bits are known zero on the LHS or RHS, the add won't
+ // carry.
+ Base = N.getOperand(0);
+ Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
+ return true;
+ }
+ }
+ } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
+ // Loading from a constant address. Verify low two bits are clear.
+ if ((CN->getValue() & 3) == 0) {
+ // If this address fits entirely in a 14-bit sext immediate field, codegen
+ // this as "d, 0"
+ short Imm;
+ if (isIntS16Immediate(CN, Imm)) {
+ Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy());
+ Base = DAG.getRegister(PPC::R0, CN->getValueType(0));
+ return true;
+ }
+
+ // Fold the low-part of 32-bit absolute addresses into addr mode.
+ if (CN->getValueType(0) == MVT::i32 ||
+ (int64_t)CN->getValue() == (int)CN->getValue()) {
+ int Addr = (int)CN->getValue();
+
+ // Otherwise, break this down into an LIS + disp.
+ Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32);
+
+ Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32);
+ unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
+ Base = SDOperand(DAG.getTargetNode(Opc, CN->getValueType(0), Base), 0);
+ return true;
+ }
+ }
+ }
+
+ Disp = DAG.getTargetConstant(0, getPointerTy());
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ else
+ Base = N;
+ return true; // [r+0]
+}
+
+
+/// getPreIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if the node's address
+/// can be legally represented as pre-indexed load / store address.
+bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDOperand &Base,
+ SDOperand &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) {
+ // Disabled by default for now.
+ if (!EnablePPCPreinc) return false;
+
+ SDOperand Ptr;
+ MVT::ValueType VT;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ Ptr = LD->getBasePtr();
+ VT = LD->getLoadedVT();
+
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ ST = ST;
+ Ptr = ST->getBasePtr();
+ VT = ST->getStoredVT();
+ } else
+ return false;
+
+ // PowerPC doesn't have preinc load/store instructions for vectors.
+ if (MVT::isVector(VT))
+ return false;
+
+ // TODO: Check reg+reg first.
+
+ // LDU/STU use reg+imm*4, others use reg+imm.
+ if (VT != MVT::i64) {
+ // reg + imm
+ if (!SelectAddressRegImm(Ptr, Offset, Base, DAG))
+ return false;
+ } else {
+ // reg + imm * 4.
+ if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG))
+ return false;
+ }
+
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
+ // sext i32 to i64 when addr mode is r+i.
+ if (LD->getValueType(0) == MVT::i64 && LD->getLoadedVT() == MVT::i32 &&
+ LD->getExtensionType() == ISD::SEXTLOAD &&
+ isa<ConstantSDNode>(Offset))
+ return false;
+ }
+
+ AM = ISD::PRE_INC;
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// LowerOperation implementation
+//===----------------------------------------------------------------------===//
+
+static SDOperand LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {
+ MVT::ValueType PtrVT = Op.getValueType();
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ Constant *C = CP->getConstVal();
+ SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
+ SDOperand Zero = DAG.getConstant(0, PtrVT);
+
+ const TargetMachine &TM = DAG.getTarget();
+
+ SDOperand Hi = DAG.getNode(PPCISD::Hi, PtrVT, CPI, Zero);
+ SDOperand Lo = DAG.getNode(PPCISD::Lo, PtrVT, CPI, Zero);
+
+ // If this is a non-darwin platform, we don't support non-static relo models
+ // yet.
+ if (TM.getRelocationModel() == Reloc::Static ||
+ !TM.getSubtarget<PPCSubtarget>().isDarwin()) {
+ // Generate non-pic code that has direct accesses to the constant pool.
+ // The address of the global is just (hi(&g)+lo(&g)).
+ return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo);
+ }
+
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ // With PIC, the first instruction is actually "GR+hi(&G)".
+ Hi = DAG.getNode(ISD::ADD, PtrVT,
+ DAG.getNode(PPCISD::GlobalBaseReg, PtrVT), Hi);
+ }
+
+ Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo);
+ return Lo;
+}
+
+static SDOperand LowerJumpTable(SDOperand Op, SelectionDAG &DAG) {
+ MVT::ValueType PtrVT = Op.getValueType();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+ SDOperand Zero = DAG.getConstant(0, PtrVT);
+
+ const TargetMachine &TM = DAG.getTarget();
+
+ SDOperand Hi = DAG.getNode(PPCISD::Hi, PtrVT, JTI, Zero);
+ SDOperand Lo = DAG.getNode(PPCISD::Lo, PtrVT, JTI, Zero);
+
+ // If this is a non-darwin platform, we don't support non-static relo models
+ // yet.
+ if (TM.getRelocationModel() == Reloc::Static ||
+ !TM.getSubtarget<PPCSubtarget>().isDarwin()) {
+ // Generate non-pic code that has direct accesses to the constant pool.
+ // The address of the global is just (hi(&g)+lo(&g)).
+ return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo);
+ }
+
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ // With PIC, the first instruction is actually "GR+hi(&G)".
+ Hi = DAG.getNode(ISD::ADD, PtrVT,
+ DAG.getNode(PPCISD::GlobalBaseReg, PtrVT), Hi);
+ }
+
+ Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo);
+ return Lo;
+}
+
+static SDOperand LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) {
+ assert(0 && "TLS not implemented for PPC.");
+}
+
+static SDOperand LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {
+ MVT::ValueType PtrVT = Op.getValueType();
+ GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
+ GlobalValue *GV = GSDN->getGlobal();
+ SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
+ SDOperand Zero = DAG.getConstant(0, PtrVT);
+
+ const TargetMachine &TM = DAG.getTarget();
+
+ SDOperand Hi = DAG.getNode(PPCISD::Hi, PtrVT, GA, Zero);
+ SDOperand Lo = DAG.getNode(PPCISD::Lo, PtrVT, GA, Zero);
+
+ // If this is a non-darwin platform, we don't support non-static relo models
+ // yet.
+ if (TM.getRelocationModel() == Reloc::Static ||
+ !TM.getSubtarget<PPCSubtarget>().isDarwin()) {
+ // Generate non-pic code that has direct accesses to globals.
+ // The address of the global is just (hi(&g)+lo(&g)).
+ return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo);
+ }
+
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ // With PIC, the first instruction is actually "GR+hi(&G)".
+ Hi = DAG.getNode(ISD::ADD, PtrVT,
+ DAG.getNode(PPCISD::GlobalBaseReg, PtrVT), Hi);
+ }
+
+ Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo);
+
+ if (!TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV))
+ return Lo;
+
+ // If the global is weak or external, we have to go through the lazy
+ // resolution stub.
+ return DAG.getLoad(PtrVT, DAG.getEntryNode(), Lo, NULL, 0);
+}
+
+static SDOperand LowerSETCC(SDOperand Op, SelectionDAG &DAG) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+
+ // If we're comparing for equality to zero, expose the fact that this is
+ // implented as a ctlz/srl pair on ppc, so that the dag combiner can
+ // fold the new nodes.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ if (C->isNullValue() && CC == ISD::SETEQ) {
+ MVT::ValueType VT = Op.getOperand(0).getValueType();
+ SDOperand Zext = Op.getOperand(0);
+ if (VT < MVT::i32) {
+ VT = MVT::i32;
+ Zext = DAG.getNode(ISD::ZERO_EXTEND, VT, Op.getOperand(0));
+ }
+ unsigned Log2b = Log2_32(MVT::getSizeInBits(VT));
+ SDOperand Clz = DAG.getNode(ISD::CTLZ, VT, Zext);
+ SDOperand Scc = DAG.getNode(ISD::SRL, VT, Clz,
+ DAG.getConstant(Log2b, MVT::i32));
+ return DAG.getNode(ISD::TRUNCATE, MVT::i32, Scc);
+ }
+ // Leave comparisons against 0 and -1 alone for now, since they're usually
+ // optimized. FIXME: revisit this when we can custom lower all setcc
+ // optimizations.
+ if (C->isAllOnesValue() || C->isNullValue())
+ return SDOperand();
+ }
+
+ // If we have an integer seteq/setne, turn it into a compare against zero
+ // by xor'ing the rhs with the lhs, which is faster than setting a
+ // condition register, reading it back out, and masking the correct bit. The
+ // normal approach here uses sub to do this instead of xor. Using xor exposes
+ // the result to other bit-twiddling opportunities.
+ MVT::ValueType LHSVT = Op.getOperand(0).getValueType();
+ if (MVT::isInteger(LHSVT) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ MVT::ValueType VT = Op.getValueType();
+ SDOperand Sub = DAG.getNode(ISD::XOR, LHSVT, Op.getOperand(0),
+ Op.getOperand(1));
+ return DAG.getSetCC(VT, Sub, DAG.getConstant(0, LHSVT), CC);
+ }
+ return SDOperand();
+}
+
+static SDOperand LowerVAARG(SDOperand Op, SelectionDAG &DAG,
+ int VarArgsFrameIndex,
+ int VarArgsStackOffset,
+ unsigned VarArgsNumGPR,
+ unsigned VarArgsNumFPR,
+ const PPCSubtarget &Subtarget) {
+
+ assert(0 && "VAARG in ELF32 ABI not implemented yet!");
+}
+
+static SDOperand LowerVASTART(SDOperand Op, SelectionDAG &DAG,
+ int VarArgsFrameIndex,
+ int VarArgsStackOffset,
+ unsigned VarArgsNumGPR,
+ unsigned VarArgsNumFPR,
+ const PPCSubtarget &Subtarget) {
+
+ if (Subtarget.isMachoABI()) {
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
+ SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2));
+ return DAG.getStore(Op.getOperand(0), FR, Op.getOperand(1), SV->getValue(),
+ SV->getOffset());
+ }
+
+ // For ELF 32 ABI we follow the layout of the va_list struct.
+ // We suppose the given va_list is already allocated.
+ //
+ // typedef struct {
+ // char gpr; /* index into the array of 8 GPRs
+ // * stored in the register save area
+ // * gpr=0 corresponds to r3,
+ // * gpr=1 to r4, etc.
+ // */
+ // char fpr; /* index into the array of 8 FPRs
+ // * stored in the register save area
+ // * fpr=0 corresponds to f1,
+ // * fpr=1 to f2, etc.
+ // */
+ // char *overflow_arg_area;
+ // /* location on stack that holds
+ // * the next overflow argument
+ // */
+ // char *reg_save_area;
+ // /* where r3:r10 and f1:f8 (if saved)
+ // * are stored
+ // */
+ // } va_list[1];
+
+
+ SDOperand ArgGPR = DAG.getConstant(VarArgsNumGPR, MVT::i8);
+ SDOperand ArgFPR = DAG.getConstant(VarArgsNumFPR, MVT::i8);
+
+
+ MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ SDOperand StackOffset = DAG.getFrameIndex(VarArgsStackOffset, PtrVT);
+ SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
+
+ SDOperand ConstFrameOffset = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8,
+ PtrVT);
+ SDOperand ConstStackOffset = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8 - 1,
+ PtrVT);
+ SDOperand ConstFPROffset = DAG.getConstant(1, PtrVT);
+
+ SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2));
+
+ // Store first byte : number of int regs
+ SDOperand firstStore = DAG.getStore(Op.getOperand(0), ArgGPR,
+ Op.getOperand(1), SV->getValue(),
+ SV->getOffset());
+ SDOperand nextPtr = DAG.getNode(ISD::ADD, PtrVT, Op.getOperand(1),
+ ConstFPROffset);
+
+ // Store second byte : number of float regs
+ SDOperand secondStore = DAG.getStore(firstStore, ArgFPR, nextPtr,
+ SV->getValue(), SV->getOffset());
+ nextPtr = DAG.getNode(ISD::ADD, PtrVT, nextPtr, ConstStackOffset);
+
+ // Store second word : arguments given on stack
+ SDOperand thirdStore = DAG.getStore(secondStore, StackOffset, nextPtr,
+ SV->getValue(), SV->getOffset());
+ nextPtr = DAG.getNode(ISD::ADD, PtrVT, nextPtr, ConstFrameOffset);
+
+ // Store third word : arguments given in registers
+ return DAG.getStore(thirdStore, FR, nextPtr, SV->getValue(),
+ SV->getOffset());
+
+}
+
+#include "PPCGenCallingConv.inc"
+
+/// GetFPR - Get the set of FP registers that should be allocated for arguments,
+/// depending on which subtarget is selected.
+static const unsigned *GetFPR(const PPCSubtarget &Subtarget) {
+ if (Subtarget.isMachoABI()) {
+ static const unsigned FPR[] = {
+ PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
+ };
+ return FPR;
+ }
+
+
+ static const unsigned FPR[] = {
+ PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8
+ };
+ return FPR;
+}
+
+static SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG,
+ int &VarArgsFrameIndex,
+ int &VarArgsStackOffset,
+ unsigned &VarArgsNumGPR,
+ unsigned &VarArgsNumFPR,
+ const PPCSubtarget &Subtarget) {
+ // TODO: add description of PPC stack frame format, or at least some docs.
+ //
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ SSARegMap *RegMap = MF.getSSARegMap();
+ SmallVector<SDOperand, 8> ArgValues;
+ SDOperand Root = Op.getOperand(0);
+
+ MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = PtrVT == MVT::i64;
+ bool isMachoABI = Subtarget.isMachoABI();
+ bool isELF32_ABI = Subtarget.isELF32_ABI();
+ unsigned PtrByteSize = isPPC64 ? 8 : 4;
+
+ unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI);
+
+ static const unsigned GPR_32[] = { // 32-bit registers.
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+ };
+ static const unsigned GPR_64[] = { // 64-bit registers.
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+ };
+
+ static const unsigned *FPR = GetFPR(Subtarget);
+
+ static const unsigned VR[] = {
+ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+ PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+ };
+
+ const unsigned Num_GPR_Regs = sizeof(GPR_32)/sizeof(GPR_32[0]);
+ const unsigned Num_FPR_Regs = isMachoABI ? 13 : 8;
+ const unsigned Num_VR_Regs = sizeof( VR)/sizeof( VR[0]);
+
+ unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+
+ const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
+
+ // Add DAG nodes to load the arguments or copy them out of registers. On
+ // entry to a function on PPC, the arguments start after the linkage area,
+ // although the first ones are often in registers.
+ //
+ // In the ELF 32 ABI, GPRs and stack are double word align: an argument
+ // represented with two words (long long or double) must be copied to an
+ // even GPR_idx value or to an even ArgOffset value.
+
+ for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
+ SDOperand ArgVal;
+ bool needsLoad = false;
+ MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
+ unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
+ unsigned ArgSize = ObjSize;
+ unsigned Flags = cast<ConstantSDNode>(Op.getOperand(ArgNo+3))->getValue();
+ unsigned AlignFlag = 1 << ISD::ParamFlags::OrigAlignmentOffs;
+ // See if next argument requires stack alignment in ELF
+ bool Expand = (ObjectVT == MVT::f64) || ((ArgNo + 1 < e) &&
+ (cast<ConstantSDNode>(Op.getOperand(ArgNo+4))->getValue() & AlignFlag) &&
+ (!(Flags & AlignFlag)));
+
+ unsigned CurArgOffset = ArgOffset;
+ switch (ObjectVT) {
+ default: assert(0 && "Unhandled argument type!");
+ case MVT::i32:
+ // Double word align in ELF
+ if (Expand && isELF32_ABI) GPR_idx += (GPR_idx % 2);
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
+ MF.addLiveIn(GPR[GPR_idx], VReg);
+ ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
+ ++GPR_idx;
+ } else {
+ needsLoad = true;
+ ArgSize = PtrByteSize;
+ }
+ // Stack align in ELF
+ if (needsLoad && Expand && isELF32_ABI)
+ ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;
+ // All int arguments reserve stack space in Macho ABI.
+ if (isMachoABI || needsLoad) ArgOffset += PtrByteSize;
+ break;
+
+ case MVT::i64: // PPC64
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg = RegMap->createVirtualRegister(&PPC::G8RCRegClass);
+ MF.addLiveIn(GPR[GPR_idx], VReg);
+ ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
+ ++GPR_idx;
+ } else {
+ needsLoad = true;
+ }
+ // All int arguments reserve stack space in Macho ABI.
+ if (isMachoABI || needsLoad) ArgOffset += 8;
+ break;
+
+ case MVT::f32:
+ case MVT::f64:
+ // Every 4 bytes of argument space consumes one of the GPRs available for
+ // argument passing.
+ if (GPR_idx != Num_GPR_Regs && isMachoABI) {
+ ++GPR_idx;
+ if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
+ ++GPR_idx;
+ }
+ if (FPR_idx != Num_FPR_Regs) {
+ unsigned VReg;
+ if (ObjectVT == MVT::f32)
+ VReg = RegMap->createVirtualRegister(&PPC::F4RCRegClass);
+ else
+ VReg = RegMap->createVirtualRegister(&PPC::F8RCRegClass);
+ MF.addLiveIn(FPR[FPR_idx], VReg);
+ ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
+ ++FPR_idx;
+ } else {
+ needsLoad = true;
+ }
+
+ // Stack align in ELF
+ if (needsLoad && Expand && isELF32_ABI)
+ ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;
+ // All FP arguments reserve stack space in Macho ABI.
+ if (isMachoABI || needsLoad) ArgOffset += isPPC64 ? 8 : ObjSize;
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ // Note that vector arguments in registers don't reserve stack space.
+ if (VR_idx != Num_VR_Regs) {
+ unsigned VReg = RegMap->createVirtualRegister(&PPC::VRRCRegClass);
+ MF.addLiveIn(VR[VR_idx], VReg);
+ ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
+ ++VR_idx;
+ } else {
+ // This should be simple, but requires getting 16-byte aligned stack
+ // values.
+ assert(0 && "Loading VR argument not implemented yet!");
+ needsLoad = true;
+ }
+ break;
+ }
+
+ // We need to load the argument to a virtual register if we determined above
+ // that we ran out of physical registers of the appropriate type
+ if (needsLoad) {
+ // If the argument is actually used, emit a load from the right stack
+ // slot.
+ if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
+ int FI = MFI->CreateFixedObject(ObjSize,
+ CurArgOffset + (ArgSize - ObjSize));
+ SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
+ ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
+ } else {
+ // Don't emit a dead load.
+ ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
+ }
+ }
+
+ ArgValues.push_back(ArgVal);
+ }
+
+ // If the function takes variable number of arguments, make a frame index for
+ // the start of the first vararg value... for expansion of llvm.va_start.
+ bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
+ if (isVarArg) {
+
+ int depth;
+ if (isELF32_ABI) {
+ VarArgsNumGPR = GPR_idx;
+ VarArgsNumFPR = FPR_idx;
+
+ // Make room for Num_GPR_Regs, Num_FPR_Regs and for a possible frame
+ // pointer.
+ depth = -(Num_GPR_Regs * MVT::getSizeInBits(PtrVT)/8 +
+ Num_FPR_Regs * MVT::getSizeInBits(MVT::f64)/8 +
+ MVT::getSizeInBits(PtrVT)/8);
+
+ VarArgsStackOffset = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
+ ArgOffset);
+
+ }
+ else
+ depth = ArgOffset;
+
+ VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
+ depth);
+ SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
+
+ SmallVector<SDOperand, 8> MemOps;
+
+ // In ELF 32 ABI, the fixed integer arguments of a variadic function are
+ // stored to the VarArgsFrameIndex on the stack.
+ if (isELF32_ABI) {
+ for (GPR_idx = 0; GPR_idx != VarArgsNumGPR; ++GPR_idx) {
+ SDOperand Val = DAG.getRegister(GPR[GPR_idx], PtrVT);
+ SDOperand Store = DAG.getStore(Root, Val, FIN, NULL, 0);
+ MemOps.push_back(Store);
+ // Increment the address by four for the next argument to store
+ SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
+ FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
+ }
+ }
+
+ // If this function is vararg, store any remaining integer argument regs
+ // to their spots on the stack so that they may be loaded by deferencing the
+ // result of va_next.
+ for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
+ unsigned VReg;
+ if (isPPC64)
+ VReg = RegMap->createVirtualRegister(&PPC::G8RCRegClass);
+ else
+ VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
+
+ MF.addLiveIn(GPR[GPR_idx], VReg);
+ SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
+ SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
+ MemOps.push_back(Store);
+ // Increment the address by four for the next argument to store
+ SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
+ FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
+ }
+
+ // In ELF 32 ABI, the double arguments are stored to the VarArgsFrameIndex
+ // on the stack.
+ if (isELF32_ABI) {
+ for (FPR_idx = 0; FPR_idx != VarArgsNumFPR; ++FPR_idx) {
+ SDOperand Val = DAG.getRegister(FPR[FPR_idx], MVT::f64);
+ SDOperand Store = DAG.getStore(Root, Val, FIN, NULL, 0);
+ MemOps.push_back(Store);
+ // Increment the address by eight for the next argument to store
+ SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(MVT::f64)/8,
+ PtrVT);
+ FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
+ }
+
+ for (; FPR_idx != Num_FPR_Regs; ++FPR_idx) {
+ unsigned VReg;
+ VReg = RegMap->createVirtualRegister(&PPC::F8RCRegClass);
+
+ MF.addLiveIn(FPR[FPR_idx], VReg);
+ SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::f64);
+ SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
+ MemOps.push_back(Store);
+ // Increment the address by eight for the next argument to store
+ SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(MVT::f64)/8,
+ PtrVT);
+ FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
+ }
+ }
+
+ if (!MemOps.empty())
+ Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
+ }
+
+ ArgValues.push_back(Root);
+
+ // Return the new list of results.
+ std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
+ Op.Val->value_end());
+ return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
+}
+
+/// isCallCompatibleAddress - Return the immediate to use if the specified
+/// 32-bit value is representable in the immediate field of a BxA instruction.
+static SDNode *isBLACompatibleAddress(SDOperand Op, SelectionDAG &DAG) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+ if (!C) return 0;
+
+ int Addr = C->getValue();
+ if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
+ (Addr << 6 >> 6) != Addr)
+ return 0; // Top 6 bits have to be sext of immediate.
+
+ return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
+}
+
+
+static SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) {
+ SDOperand Chain = Op.getOperand(0);
+ bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
+ SDOperand Callee = Op.getOperand(4);
+ unsigned NumOps = (Op.getNumOperands() - 5) / 2;
+
+ bool isMachoABI = Subtarget.isMachoABI();
+ bool isELF32_ABI = Subtarget.isELF32_ABI();
+
+ MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = PtrVT == MVT::i64;
+ unsigned PtrByteSize = isPPC64 ? 8 : 4;
+
+ // args_to_use will accumulate outgoing args for the PPCISD::CALL case in
+ // SelectExpr to use to put the arguments in the appropriate registers.
+ std::vector<SDOperand> args_to_use;
+
+ // Count how many bytes are to be pushed on the stack, including the linkage
+ // area, and parameter passing area. We start with 24/48 bytes, which is
+ // prereserved space for [SP][CR][LR][3 x unused].
+ unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI);
+
+ // Add up all the space actually used.
+ for (unsigned i = 0; i != NumOps; ++i) {
+ unsigned ArgSize =MVT::getSizeInBits(Op.getOperand(5+2*i).getValueType())/8;
+ ArgSize = std::max(ArgSize, PtrByteSize);
+ NumBytes += ArgSize;
+ }
+
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if its varargs.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ NumBytes = std::max(NumBytes,
+ PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI));
+
+ // Adjust the stack pointer for the new arguments...
+ // These operations are automatically eliminated by the prolog/epilog pass
+ Chain = DAG.getCALLSEQ_START(Chain,
+ DAG.getConstant(NumBytes, PtrVT));
+
+ // Set up a copy of the stack pointer for use loading and storing any
+ // arguments that may not fit in the registers available for argument
+ // passing.
+ SDOperand StackPtr;
+ if (isPPC64)
+ StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
+ else
+ StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
+
+ // Figure out which arguments are going to go in registers, and which in
+ // memory. Also, if this is a vararg function, floating point operations
+ // must be stored to our stack, and loaded into integer regs as well, if
+ // any integer regs are available for argument passing.
+ unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI);
+ unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+
+ static const unsigned GPR_32[] = { // 32-bit registers.
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+ };
+ static const unsigned GPR_64[] = { // 64-bit registers.
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+ };
+ static const unsigned *FPR = GetFPR(Subtarget);
+
+ static const unsigned VR[] = {
+ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+ PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+ };
+ const unsigned NumGPRs = sizeof(GPR_32)/sizeof(GPR_32[0]);
+ const unsigned NumFPRs = isMachoABI ? 13 : 8;
+ const unsigned NumVRs = sizeof( VR)/sizeof( VR[0]);
+
+ const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
+
+ std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
+ SmallVector<SDOperand, 8> MemOpChains;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ bool inMem = false;
+ SDOperand Arg = Op.getOperand(5+2*i);
+ unsigned Flags = cast<ConstantSDNode>(Op.getOperand(5+2*i+1))->getValue();
+ unsigned AlignFlag = 1 << ISD::ParamFlags::OrigAlignmentOffs;
+ // See if next argument requires stack alignment in ELF
+ unsigned next = 5+2*(i+1)+1;
+ bool Expand = (Arg.getValueType() == MVT::f64) || ((i + 1 < NumOps) &&
+ (cast<ConstantSDNode>(Op.getOperand(next))->getValue() & AlignFlag) &&
+ (!(Flags & AlignFlag)));
+
+ // PtrOff will be used to store the current argument to the stack if a
+ // register cannot be found for it.
+ SDOperand PtrOff;
+
+ // Stack align in ELF 32
+ if (isELF32_ABI && Expand)
+ PtrOff = DAG.getConstant(ArgOffset + ((ArgOffset/4) % 2) * PtrByteSize,
+ StackPtr.getValueType());
+ else
+ PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+
+ PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
+
+ // On PPC64, promote integers to 64-bit values.
+ if (isPPC64 && Arg.getValueType() == MVT::i32) {
+ unsigned ExtOp = (Flags & 1) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+
+ Arg = DAG.getNode(ExtOp, MVT::i64, Arg);
+ }
+
+ switch (Arg.getValueType()) {
+ default: assert(0 && "Unexpected ValueType for argument!");
+ case MVT::i32:
+ case MVT::i64:
+ // Double word align in ELF
+ if (isELF32_ABI && Expand) GPR_idx += (GPR_idx % 2);
+ if (GPR_idx != NumGPRs) {
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
+ } else {
+ MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
+ inMem = true;
+ }
+ if (inMem || isMachoABI) {
+ // Stack align in ELF
+ if (isELF32_ABI && Expand)
+ ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;
+
+ ArgOffset += PtrByteSize;
+ }
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ if (isVarArg) {
+ // Float varargs need to be promoted to double.
+ if (Arg.getValueType() == MVT::f32)
+ Arg = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Arg);
+ }
+
+ if (FPR_idx != NumFPRs) {
+ RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
+
+ if (isVarArg) {
+ SDOperand Store = DAG.getStore(Chain, Arg, PtrOff, NULL, 0);
+ MemOpChains.push_back(Store);
+
+ // Float varargs are always shadowed in available integer registers
+ if (GPR_idx != NumGPRs) {
+ SDOperand Load = DAG.getLoad(PtrVT, Store, PtrOff, NULL, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ if (isMachoABI) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++],
+ Load));
+ }
+ if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
+ SDOperand ConstFour = DAG.getConstant(4, PtrOff.getValueType());
+ PtrOff = DAG.getNode(ISD::ADD, PtrVT, PtrOff, ConstFour);
+ SDOperand Load = DAG.getLoad(PtrVT, Store, PtrOff, NULL, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ if (isMachoABI) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++],
+ Load));
+ }
+ } else {
+ // If we have any FPRs remaining, we may also have GPRs remaining.
+ // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
+ // GPRs.
+ if (isMachoABI) {
+ if (GPR_idx != NumGPRs)
+ ++GPR_idx;
+ if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
+ !isPPC64) // PPC64 has 64-bit GPR's obviously :)
+ ++GPR_idx;
+ }
+ }
+ } else {
+ MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
+ inMem = true;
+ }
+ if (inMem || isMachoABI) {
+ // Stack align in ELF
+ if (isELF32_ABI && Expand)
+ ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;
+ if (isPPC64)
+ ArgOffset += 8;
+ else
+ ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
+ }
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ assert(!isVarArg && "Don't support passing vectors to varargs yet!");
+ assert(VR_idx != NumVRs &&
+ "Don't support passing more than 12 vector args yet!");
+ RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
+ break;
+ }
+ }
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDOperand InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
+ InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // With the ELF 32 ABI, set CR6 to true if this is a vararg call.
+ if (isVarArg && isELF32_ABI) {
+ SDOperand SetCR(DAG.getTargetNode(PPC::SETCR, MVT::i32), 0);
+ Chain = DAG.getCopyToReg(Chain, PPC::CR6, SetCR, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ std::vector<MVT::ValueType> NodeTys;
+ NodeTys.push_back(MVT::Other); // Returns a chain
+ NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
+
+ SmallVector<SDOperand, 8> Ops;
+ unsigned CallOpc = isMachoABI? PPCISD::CALL_Macho : PPCISD::CALL_ELF;
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), Callee.getValueType());
+ else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType());
+ else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
+ // If this is an absolute destination address, use the munged value.
+ Callee = SDOperand(Dest, 0);
+ else {
+ // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
+ // to do the call, we can't use PPCISD::CALL.
+ SDOperand MTCTROps[] = {Chain, Callee, InFlag};
+ Chain = DAG.getNode(PPCISD::MTCTR, NodeTys, MTCTROps, 2+(InFlag.Val!=0));
+ InFlag = Chain.getValue(1);
+
+ // Copy the callee address into R12 on darwin.
+ if (isMachoABI) {
+ Chain = DAG.getCopyToReg(Chain, PPC::R12, Callee, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ NodeTys.clear();
+ NodeTys.push_back(MVT::Other);
+ NodeTys.push_back(MVT::Flag);
+ Ops.push_back(Chain);
+ CallOpc = isMachoABI ? PPCISD::BCTRL_Macho : PPCISD::BCTRL_ELF;
+ Callee.Val = 0;
+ }
+
+ // If this is a direct call, pass the chain and the callee.
+ if (Callee.Val) {
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+ }
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.Val)
+ Ops.push_back(InFlag);
+ Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ SDOperand ResultVals[3];
+ unsigned NumResults = 0;
+ NodeTys.clear();
+
+ // If the call has results, copy the values out of the ret val registers.
+ switch (Op.Val->getValueType(0)) {
+ default: assert(0 && "Unexpected ret value!");
+ case MVT::Other: break;
+ case MVT::i32:
+ if (Op.Val->getValueType(1) == MVT::i32) {
+ Chain = DAG.getCopyFromReg(Chain, PPC::R3, MVT::i32, InFlag).getValue(1);
+ ResultVals[0] = Chain.getValue(0);
+ Chain = DAG.getCopyFromReg(Chain, PPC::R4, MVT::i32,
+ Chain.getValue(2)).getValue(1);
+ ResultVals[1] = Chain.getValue(0);
+ NumResults = 2;
+ NodeTys.push_back(MVT::i32);
+ } else {
+ Chain = DAG.getCopyFromReg(Chain, PPC::R3, MVT::i32, InFlag).getValue(1);
+ ResultVals[0] = Chain.getValue(0);
+ NumResults = 1;
+ }
+ NodeTys.push_back(MVT::i32);
+ break;
+ case MVT::i64:
+ Chain = DAG.getCopyFromReg(Chain, PPC::X3, MVT::i64, InFlag).getValue(1);
+ ResultVals[0] = Chain.getValue(0);
+ NumResults = 1;
+ NodeTys.push_back(MVT::i64);
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ Chain = DAG.getCopyFromReg(Chain, PPC::F1, Op.Val->getValueType(0),
+ InFlag).getValue(1);
+ ResultVals[0] = Chain.getValue(0);
+ NumResults = 1;
+ NodeTys.push_back(Op.Val->getValueType(0));
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ Chain = DAG.getCopyFromReg(Chain, PPC::V2, Op.Val->getValueType(0),
+ InFlag).getValue(1);
+ ResultVals[0] = Chain.getValue(0);
+ NumResults = 1;
+ NodeTys.push_back(Op.Val->getValueType(0));
+ break;
+ }
+
+ Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
+ DAG.getConstant(NumBytes, PtrVT));
+ NodeTys.push_back(MVT::Other);
+
+ // If the function returns void, just return the chain.
+ if (NumResults == 0)
+ return Chain;
+
+ // Otherwise, merge everything together with a MERGE_VALUES node.
+ ResultVals[NumResults++] = Chain;
+ SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
+ ResultVals, NumResults);
+ return Res.getValue(Op.ResNo);
+}
+
+static SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
+ bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+ CCState CCInfo(CC, isVarArg, TM, RVLocs);
+ CCInfo.AnalyzeReturn(Op.Val, RetCC_PPC);
+
+ // If this is the first return lowered for this function, add the regs to the
+ // liveout set for the function.
+ if (DAG.getMachineFunction().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDOperand Chain = Op.getOperand(0);
+ SDOperand Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
+ Flag = Chain.getValue(1);
+ }
+
+ if (Flag.Val)
+ return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Chain, Flag);
+ else
+ return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Chain);
+}
+
+static SDOperand LowerSTACKRESTORE(SDOperand Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) {
+ // When we pop the dynamic allocation we need to restore the SP link.
+
+ // Get the corect type for pointers.
+ MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ // Construct the stack pointer operand.
+ bool IsPPC64 = Subtarget.isPPC64();
+ unsigned SP = IsPPC64 ? PPC::X1 : PPC::R1;
+ SDOperand StackPtr = DAG.getRegister(SP, PtrVT);
+
+ // Get the operands for the STACKRESTORE.
+ SDOperand Chain = Op.getOperand(0);
+ SDOperand SaveSP = Op.getOperand(1);
+
+ // Load the old link SP.
+ SDOperand LoadLinkSP = DAG.getLoad(PtrVT, Chain, StackPtr, NULL, 0);
+
+ // Restore the stack pointer.
+ Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), SP, SaveSP);
+
+ // Store the old link SP.
+ return DAG.getStore(Chain, LoadLinkSP, StackPtr, NULL, 0);
+}
+
+static SDOperand LowerDYNAMIC_STACKALLOC(SDOperand Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool IsPPC64 = Subtarget.isPPC64();
+ bool isMachoABI = Subtarget.isMachoABI();
+
+ // Get current frame pointer save index. The users of this index will be
+ // primarily DYNALLOC instructions.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ int FPSI = FI->getFramePointerSaveIndex();
+
+ // If the frame pointer save index hasn't been defined yet.
+ if (!FPSI) {
+ // Find out what the fix offset of the frame pointer save area.
+ int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isMachoABI);
+
+ // Allocate the frame index for frame pointer save area.
+ FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset);
+ // Save the result.
+ FI->setFramePointerSaveIndex(FPSI);
+ }
+
+ // Get the inputs.
+ SDOperand Chain = Op.getOperand(0);
+ SDOperand Size = Op.getOperand(1);
+
+ // Get the corect type for pointers.
+ MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ // Negate the size.
+ SDOperand NegSize = DAG.getNode(ISD::SUB, PtrVT,
+ DAG.getConstant(0, PtrVT), Size);
+ // Construct a node for the frame pointer save index.
+ SDOperand FPSIdx = DAG.getFrameIndex(FPSI, PtrVT);
+ // Build a DYNALLOC node.
+ SDOperand Ops[3] = { Chain, NegSize, FPSIdx };
+ SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
+ return DAG.getNode(PPCISD::DYNALLOC, VTs, Ops, 3);
+}
+
+
+/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
+/// possible.
+static SDOperand LowerSELECT_CC(SDOperand Op, SelectionDAG &DAG) {
+ // Not FP? Not a fsel.
+ if (!MVT::isFloatingPoint(Op.getOperand(0).getValueType()) ||
+ !MVT::isFloatingPoint(Op.getOperand(2).getValueType()))
+ return SDOperand();
+
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+
+ // Cannot handle SETEQ/SETNE.
+ if (CC == ISD::SETEQ || CC == ISD::SETNE) return SDOperand();
+
+ MVT::ValueType ResVT = Op.getValueType();
+ MVT::ValueType CmpVT = Op.getOperand(0).getValueType();
+ SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+ SDOperand TV = Op.getOperand(2), FV = Op.getOperand(3);
+
+ // If the RHS of the comparison is a 0.0, we don't need to do the
+ // subtraction at all.
+ if (isFloatingPointZero(RHS))
+ switch (CC) {
+ default: break; // SETUO etc aren't handled by fsel.
+ case ISD::SETULT:
+ case ISD::SETOLT:
+ case ISD::SETLT:
+ std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
+ case ISD::SETUGE:
+ case ISD::SETOGE:
+ case ISD::SETGE:
+ if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
+ LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS);
+ return DAG.getNode(PPCISD::FSEL, ResVT, LHS, TV, FV);
+ case ISD::SETUGT:
+ case ISD::SETOGT:
+ case ISD::SETGT:
+ std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
+ case ISD::SETULE:
+ case ISD::SETOLE:
+ case ISD::SETLE:
+ if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
+ LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS);
+ return DAG.getNode(PPCISD::FSEL, ResVT,
+ DAG.getNode(ISD::FNEG, MVT::f64, LHS), TV, FV);
+ }
+
+ SDOperand Cmp;
+ switch (CC) {
+ default: break; // SETUO etc aren't handled by fsel.
+ case ISD::SETULT:
+ case ISD::SETOLT:
+ case ISD::SETLT:
+ Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV);
+ case ISD::SETUGE:
+ case ISD::SETOGE:
+ case ISD::SETGE:
+ Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV);
+ case ISD::SETUGT:
+ case ISD::SETOGT:
+ case ISD::SETGT:
+ Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV);
+ case ISD::SETULE:
+ case ISD::SETOLE:
+ case ISD::SETLE:
+ Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV);
+ }
+ return SDOperand();
+}
+
+static SDOperand LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
+ assert(MVT::isFloatingPoint(Op.getOperand(0).getValueType()));
+ SDOperand Src = Op.getOperand(0);
+ if (Src.getValueType() == MVT::f32)
+ Src = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Src);
+
+ SDOperand Tmp;
+ switch (Op.getValueType()) {
+ default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!");
+ case MVT::i32:
+ Tmp = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Src);
+ break;
+ case MVT::i64:
+ Tmp = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Src);
+ break;
+ }
+
+ // Convert the FP value to an int value through memory.
+ SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Tmp);
+ if (Op.getValueType() == MVT::i32)
+ Bits = DAG.getNode(ISD::TRUNCATE, MVT::i32, Bits);
+ return Bits;
+}
+
+static SDOperand LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
+ if (Op.getOperand(0).getValueType() == MVT::i64) {
+ SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0));
+ SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits);
+ if (Op.getValueType() == MVT::f32)
+ FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP);
+ return FP;
+ }
+
+ assert(Op.getOperand(0).getValueType() == MVT::i32 &&
+ "Unhandled SINT_TO_FP type in custom expander!");
+ // Since we only generate this in 64-bit mode, we can take advantage of
+ // 64-bit registers. In particular, sign extend the input value into the
+ // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
+ // then lfd it and fcfid it.
+ MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ int FrameIdx = FrameInfo->CreateStackObject(8, 8);
+ MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDOperand FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ SDOperand Ext64 = DAG.getNode(PPCISD::EXTSW_32, MVT::i32,
+ Op.getOperand(0));
+
+ // STD the extended value into the stack slot.
+ SDOperand Store = DAG.getNode(PPCISD::STD_32, MVT::Other,
+ DAG.getEntryNode(), Ext64, FIdx,
+ DAG.getSrcValue(NULL));
+ // Load the value as a double.
+ SDOperand Ld = DAG.getLoad(MVT::f64, Store, FIdx, NULL, 0);
+
+ // FCFID it and return it.
+ SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Ld);
+ if (Op.getValueType() == MVT::f32)
+ FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP);
+ return FP;
+}
+
+static SDOperand LowerSHL_PARTS(SDOperand Op, SelectionDAG &DAG) {
+ assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
+ Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!");
+
+ // Expand into a bunch of logical ops. Note that these ops
+ // depend on the PPC behavior for oversized shift amounts.
+ SDOperand Lo = Op.getOperand(0);
+ SDOperand Hi = Op.getOperand(1);
+ SDOperand Amt = Op.getOperand(2);
+
+ SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
+ DAG.getConstant(32, MVT::i32), Amt);
+ SDOperand Tmp2 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Amt);
+ SDOperand Tmp3 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Tmp1);
+ SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
+ SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
+ DAG.getConstant(-32U, MVT::i32));
+ SDOperand Tmp6 = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Tmp5);
+ SDOperand OutHi = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6);
+ SDOperand OutLo = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Amt);
+ SDOperand OutOps[] = { OutLo, OutHi };
+ return DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(MVT::i32, MVT::i32),
+ OutOps, 2);
+}
+
+static SDOperand LowerSRL_PARTS(SDOperand Op, SelectionDAG &DAG) {
+ assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
+ Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SRL!");
+
+ // Otherwise, expand into a bunch of logical ops. Note that these ops
+ // depend on the PPC behavior for oversized shift amounts.
+ SDOperand Lo = Op.getOperand(0);
+ SDOperand Hi = Op.getOperand(1);
+ SDOperand Amt = Op.getOperand(2);
+
+ SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
+ DAG.getConstant(32, MVT::i32), Amt);
+ SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt);
+ SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1);
+ SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
+ SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
+ DAG.getConstant(-32U, MVT::i32));
+ SDOperand Tmp6 = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Tmp5);
+ SDOperand OutLo = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6);
+ SDOperand OutHi = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Amt);
+ SDOperand OutOps[] = { OutLo, OutHi };
+ return DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(MVT::i32, MVT::i32),
+ OutOps, 2);
+}
+
+static SDOperand LowerSRA_PARTS(SDOperand Op, SelectionDAG &DAG) {
+ assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
+ Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SRA!");
+
+ // Otherwise, expand into a bunch of logical ops, followed by a select_cc.
+ SDOperand Lo = Op.getOperand(0);
+ SDOperand Hi = Op.getOperand(1);
+ SDOperand Amt = Op.getOperand(2);
+
+ SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
+ DAG.getConstant(32, MVT::i32), Amt);
+ SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt);
+ SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1);
+ SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
+ SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
+ DAG.getConstant(-32U, MVT::i32));
+ SDOperand Tmp6 = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Tmp5);
+ SDOperand OutHi = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Amt);
+ SDOperand OutLo = DAG.getSelectCC(Tmp5, DAG.getConstant(0, MVT::i32),
+ Tmp4, Tmp6, ISD::SETLE);
+ SDOperand OutOps[] = { OutLo, OutHi };
+ return DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(MVT::i32, MVT::i32),
+ OutOps, 2);
+}
+
+//===----------------------------------------------------------------------===//
+// Vector related lowering.
+//
+
+// If this is a vector of constants or undefs, get the bits. A bit in
+// UndefBits is set if the corresponding element of the vector is an
+// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
+// zero. Return true if this is not an array of constants, false if it is.
+//
+static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
+ uint64_t UndefBits[2]) {
+ // Start with zero'd results.
+ VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
+
+ unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+ SDOperand OpVal = BV->getOperand(i);
+
+ unsigned PartNo = i >= e/2; // In the upper 128 bits?
+ unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
+
+ uint64_t EltBits = 0;
+ if (OpVal.getOpcode() == ISD::UNDEF) {
+ uint64_t EltUndefBits = ~0U >> (32-EltBitSize);
+ UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
+ continue;
+ } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
+ EltBits = CN->getValue() & (~0U >> (32-EltBitSize));
+ } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
+ assert(CN->getValueType(0) == MVT::f32 &&
+ "Only one legal FP vector type!");
+ EltBits = FloatToBits(CN->getValue());
+ } else {
+ // Nonconstant element.
+ return true;
+ }
+
+ VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
+ }
+
+ //printf("%llx %llx %llx %llx\n",
+ // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
+ return false;
+}
+
+// If this is a splat (repetition) of a value across the whole vector, return
+// the smallest size that splats it. For example, "0x01010101010101..." is a
+// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
+// SplatSize = 1 byte.
+static bool isConstantSplat(const uint64_t Bits128[2],
+ const uint64_t Undef128[2],
+ unsigned &SplatBits, unsigned &SplatUndef,
+ unsigned &SplatSize) {
+
+ // Don't let undefs prevent splats from matching. See if the top 64-bits are
+ // the same as the lower 64-bits, ignoring undefs.
+ if ((Bits128[0] & ~Undef128[1]) != (Bits128[1] & ~Undef128[0]))
+ return false; // Can't be a splat if two pieces don't match.
+
+ uint64_t Bits64 = Bits128[0] | Bits128[1];
+ uint64_t Undef64 = Undef128[0] & Undef128[1];
+
+ // Check that the top 32-bits are the same as the lower 32-bits, ignoring
+ // undefs.
+ if ((Bits64 & (~Undef64 >> 32)) != ((Bits64 >> 32) & ~Undef64))
+ return false; // Can't be a splat if two pieces don't match.
+
+ uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
+ uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
+
+ // If the top 16-bits are different than the lower 16-bits, ignoring
+ // undefs, we have an i32 splat.
+ if ((Bits32 & (~Undef32 >> 16)) != ((Bits32 >> 16) & ~Undef32)) {
+ SplatBits = Bits32;
+ SplatUndef = Undef32;
+ SplatSize = 4;
+ return true;
+ }
+
+ uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
+ uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
+
+ // If the top 8-bits are different than the lower 8-bits, ignoring
+ // undefs, we have an i16 splat.
+ if ((Bits16 & (uint16_t(~Undef16) >> 8)) != ((Bits16 >> 8) & ~Undef16)) {
+ SplatBits = Bits16;
+ SplatUndef = Undef16;
+ SplatSize = 2;
+ return true;
+ }
+
+ // Otherwise, we have an 8-bit splat.
+ SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
+ SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
+ SplatSize = 1;
+ return true;
+}
+
+/// BuildSplatI - Build a canonical splati of Val with an element size of
+/// SplatSize. Cast the result to VT.
+static SDOperand BuildSplatI(int Val, unsigned SplatSize, MVT::ValueType VT,
+ SelectionDAG &DAG) {
+ assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
+
+ static const MVT::ValueType VTys[] = { // canonical VT to use for each size.
+ MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
+ };
+
+ MVT::ValueType ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
+
+ // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
+ if (Val == -1)
+ SplatSize = 1;
+
+ MVT::ValueType CanonicalVT = VTys[SplatSize-1];
+
+ // Build a canonical splat for this value.
+ SDOperand Elt = DAG.getConstant(Val, MVT::getVectorElementType(CanonicalVT));
+ SmallVector<SDOperand, 8> Ops;
+ Ops.assign(MVT::getVectorNumElements(CanonicalVT), Elt);
+ SDOperand Res = DAG.getNode(ISD::BUILD_VECTOR, CanonicalVT,
+ &Ops[0], Ops.size());
+ return DAG.getNode(ISD::BIT_CONVERT, ReqVT, Res);
+}
+
+/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
+/// specified intrinsic ID.
+static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand LHS, SDOperand RHS,
+ SelectionDAG &DAG,
+ MVT::ValueType DestVT = MVT::Other) {
+ if (DestVT == MVT::Other) DestVT = LHS.getValueType();
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT,
+ DAG.getConstant(IID, MVT::i32), LHS, RHS);
+}
+
+/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
+/// specified intrinsic ID.
+static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand Op0, SDOperand Op1,
+ SDOperand Op2, SelectionDAG &DAG,
+ MVT::ValueType DestVT = MVT::Other) {
+ if (DestVT == MVT::Other) DestVT = Op0.getValueType();
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT,
+ DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
+}
+
+
+/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
+/// amount. The result has the specified value type.
+static SDOperand BuildVSLDOI(SDOperand LHS, SDOperand RHS, unsigned Amt,
+ MVT::ValueType VT, SelectionDAG &DAG) {
+ // Force LHS/RHS to be the right type.
+ LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, LHS);
+ RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, RHS);
+
+ SDOperand Ops[16];
+ for (unsigned i = 0; i != 16; ++i)
+ Ops[i] = DAG.getConstant(i+Amt, MVT::i32);
+ SDOperand T = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, LHS, RHS,
+ DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops,16));
+ return DAG.getNode(ISD::BIT_CONVERT, VT, T);
+}
+
+// If this is a case we can't handle, return null and let the default
+// expansion code take care of it. If we CAN select this case, and if it
+// selects to a single instruction, return Op. Otherwise, if we can codegen
+// this case more efficiently than a constant pool load, lower it to the
+// sequence of ops that should be used.
+static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
+ // If this is a vector of constants or undefs, get the bits. A bit in
+ // UndefBits is set if the corresponding element of the vector is an
+ // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
+ // zero.
+ uint64_t VectorBits[2];
+ uint64_t UndefBits[2];
+ if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits))
+ return SDOperand(); // Not a constant vector.
+
+ // If this is a splat (repetition) of a value across the whole vector, return
+ // the smallest size that splats it. For example, "0x01010101010101..." is a
+ // splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
+ // SplatSize = 1 byte.
+ unsigned SplatBits, SplatUndef, SplatSize;
+ if (isConstantSplat(VectorBits, UndefBits, SplatBits, SplatUndef, SplatSize)){
+ bool HasAnyUndefs = (UndefBits[0] | UndefBits[1]) != 0;
+
+ // First, handle single instruction cases.
+
+ // All zeros?
+ if (SplatBits == 0) {
+ // Canonicalize all zero vectors to be v4i32.
+ if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
+ SDOperand Z = DAG.getConstant(0, MVT::i32);
+ Z = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Z, Z, Z, Z);
+ Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Z);
+ }
+ return Op;
+ }
+
+ // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
+ int32_t SextVal= int32_t(SplatBits << (32-8*SplatSize)) >> (32-8*SplatSize);
+ if (SextVal >= -16 && SextVal <= 15)
+ return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG);
+
+
+ // Two instruction sequences.
+
+ // If this value is in the range [-32,30] and is even, use:
+ // tmp = VSPLTI[bhw], result = add tmp, tmp
+ if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {
+ Op = BuildSplatI(SextVal >> 1, SplatSize, Op.getValueType(), DAG);
+ return DAG.getNode(ISD::ADD, Op.getValueType(), Op, Op);
+ }
+
+ // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
+ // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
+ // for fneg/fabs.
+ if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
+ // Make -1 and vspltisw -1:
+ SDOperand OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG);
+
+ // Make the VSLW intrinsic, computing 0x8000_0000.
+ SDOperand Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
+ OnesV, DAG);
+
+ // xor by OnesV to invert it.
+ Res = DAG.getNode(ISD::XOR, MVT::v4i32, Res, OnesV);
+ return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);
+ }
+
+ // Check to see if this is a wide variety of vsplti*, binop self cases.
+ unsigned SplatBitSize = SplatSize*8;
+ static const signed char SplatCsts[] = {
+ -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
+ -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
+ };
+
+ for (unsigned idx = 0; idx < sizeof(SplatCsts)/sizeof(SplatCsts[0]); ++idx){
+ // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
+ // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
+ int i = SplatCsts[idx];
+
+ // Figure out what shift amount will be used by altivec if shifted by i in
+ // this splat size.
+ unsigned TypeShiftAmt = i & (SplatBitSize-1);
+
+ // vsplti + shl self.
+ if (SextVal == (i << (int)TypeShiftAmt)) {
+ SDOperand Res = BuildSplatI(i, SplatSize, MVT::Other, DAG);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
+ Intrinsic::ppc_altivec_vslw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG);
+ return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);
+ }
+
+ // vsplti + srl self.
+ if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
+ SDOperand Res = BuildSplatI(i, SplatSize, MVT::Other, DAG);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
+ Intrinsic::ppc_altivec_vsrw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG);
+ return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);
+ }
+
+ // vsplti + sra self.
+ if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
+ SDOperand Res = BuildSplatI(i, SplatSize, MVT::Other, DAG);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
+ Intrinsic::ppc_altivec_vsraw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG);
+ return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);
+ }
+
+ // vsplti + rol self.
+ if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
+ ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
+ SDOperand Res = BuildSplatI(i, SplatSize, MVT::Other, DAG);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
+ Intrinsic::ppc_altivec_vrlw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG);
+ return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);
+ }
+
+ // t = vsplti c, result = vsldoi t, t, 1
+ if (SextVal == ((i << 8) | (i >> (TypeShiftAmt-8)))) {
+ SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG);
+ return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG);
+ }
+ // t = vsplti c, result = vsldoi t, t, 2
+ if (SextVal == ((i << 16) | (i >> (TypeShiftAmt-16)))) {
+ SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG);
+ return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG);
+ }
+ // t = vsplti c, result = vsldoi t, t, 3
+ if (SextVal == ((i << 24) | (i >> (TypeShiftAmt-24)))) {
+ SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG);
+ return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG);
+ }
+ }
+
+ // Three instruction sequences.
+
+ // Odd, in range [17,31]: (vsplti C)-(vsplti -16).
+ if (SextVal >= 0 && SextVal <= 31) {
+ SDOperand LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG);
+ SDOperand RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG);
+ LHS = DAG.getNode(ISD::SUB, Op.getValueType(), LHS, RHS);
+ return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), LHS);
+ }
+ // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16).
+ if (SextVal >= -31 && SextVal <= 0) {
+ SDOperand LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG);
+ SDOperand RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG);
+ LHS = DAG.getNode(ISD::ADD, Op.getValueType(), LHS, RHS);
+ return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), LHS);
+ }
+ }
+
+ return SDOperand();
+}
+
+/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
+/// the specified operations to build the shuffle.
+static SDOperand GeneratePerfectShuffle(unsigned PFEntry, SDOperand LHS,
+ SDOperand RHS, SelectionDAG &DAG) {
+ unsigned OpNum = (PFEntry >> 26) & 0x0F;
+ unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
+ unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
+
+ enum {
+ OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
+ OP_VMRGHW,
+ OP_VMRGLW,
+ OP_VSPLTISW0,
+ OP_VSPLTISW1,
+ OP_VSPLTISW2,
+ OP_VSPLTISW3,
+ OP_VSLDOI4,
+ OP_VSLDOI8,
+ OP_VSLDOI12
+ };
+
+ if (OpNum == OP_COPY) {
+ if (LHSID == (1*9+2)*9+3) return LHS;
+ assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
+ return RHS;
+ }
+
+ SDOperand OpLHS, OpRHS;
+ OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG);
+ OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG);
+
+ unsigned ShufIdxs[16];
+ switch (OpNum) {
+ default: assert(0 && "Unknown i32 permute!");
+ case OP_VMRGHW:
+ ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
+ ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
+ ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
+ ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
+ break;
+ case OP_VMRGLW:
+ ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
+ ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
+ ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
+ ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
+ break;
+ case OP_VSPLTISW0:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+0;
+ break;
+ case OP_VSPLTISW1:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+4;
+ break;
+ case OP_VSPLTISW2:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+8;
+ break;
+ case OP_VSPLTISW3:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+12;
+ break;
+ case OP_VSLDOI4:
+ return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG);
+ case OP_VSLDOI8:
+ return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG);
+ case OP_VSLDOI12:
+ return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG);
+ }
+ SDOperand Ops[16];
+ for (unsigned i = 0; i != 16; ++i)
+ Ops[i] = DAG.getConstant(ShufIdxs[i], MVT::i32);
+
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, OpLHS.getValueType(), OpLHS, OpRHS,
+ DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops, 16));
+}
+
+/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
+/// is a shuffle we can handle in a single instruction, return it. Otherwise,
+/// return the code it can be lowered into. Worst case, it can always be
+/// lowered into a vperm.
+static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
+ SDOperand V1 = Op.getOperand(0);
+ SDOperand V2 = Op.getOperand(1);
+ SDOperand PermMask = Op.getOperand(2);
+
+ // Cases that are handled by instructions that take permute immediates
+ // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
+ // selected by the instruction selector.
+ if (V2.getOpcode() == ISD::UNDEF) {
+ if (PPC::isSplatShuffleMask(PermMask.Val, 1) ||
+ PPC::isSplatShuffleMask(PermMask.Val, 2) ||
+ PPC::isSplatShuffleMask(PermMask.Val, 4) ||
+ PPC::isVPKUWUMShuffleMask(PermMask.Val, true) ||
+ PPC::isVPKUHUMShuffleMask(PermMask.Val, true) ||
+ PPC::isVSLDOIShuffleMask(PermMask.Val, true) != -1 ||
+ PPC::isVMRGLShuffleMask(PermMask.Val, 1, true) ||
+ PPC::isVMRGLShuffleMask(PermMask.Val, 2, true) ||
+ PPC::isVMRGLShuffleMask(PermMask.Val, 4, true) ||
+ PPC::isVMRGHShuffleMask(PermMask.Val, 1, true) ||
+ PPC::isVMRGHShuffleMask(PermMask.Val, 2, true) ||
+ PPC::isVMRGHShuffleMask(PermMask.Val, 4, true)) {
+ return Op;
+ }
+ }
+
+ // Altivec has a variety of "shuffle immediates" that take two vector inputs
+ // and produce a fixed permutation. If any of these match, do not lower to
+ // VPERM.
+ if (PPC::isVPKUWUMShuffleMask(PermMask.Val, false) ||
+ PPC::isVPKUHUMShuffleMask(PermMask.Val, false) ||
+ PPC::isVSLDOIShuffleMask(PermMask.Val, false) != -1 ||
+ PPC::isVMRGLShuffleMask(PermMask.Val, 1, false) ||
+ PPC::isVMRGLShuffleMask(PermMask.Val, 2, false) ||
+ PPC::isVMRGLShuffleMask(PermMask.Val, 4, false) ||
+ PPC::isVMRGHShuffleMask(PermMask.Val, 1, false) ||
+ PPC::isVMRGHShuffleMask(PermMask.Val, 2, false) ||
+ PPC::isVMRGHShuffleMask(PermMask.Val, 4, false))
+ return Op;
+
+ // Check to see if this is a shuffle of 4-byte values. If so, we can use our
+ // perfect shuffle table to emit an optimal matching sequence.
+ unsigned PFIndexes[4];
+ bool isFourElementShuffle = true;
+ for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
+ unsigned EltNo = 8; // Start out undef.
+ for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
+ if (PermMask.getOperand(i*4+j).getOpcode() == ISD::UNDEF)
+ continue; // Undef, ignore it.
+
+ unsigned ByteSource =
+ cast<ConstantSDNode>(PermMask.getOperand(i*4+j))->getValue();
+ if ((ByteSource & 3) != j) {
+ isFourElementShuffle = false;
+ break;
+ }
+
+ if (EltNo == 8) {
+ EltNo = ByteSource/4;
+ } else if (EltNo != ByteSource/4) {
+ isFourElementShuffle = false;
+ break;
+ }
+ }
+ PFIndexes[i] = EltNo;
+ }
+
+ // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
+ // perfect shuffle vector to determine if it is cost effective to do this as
+ // discrete instructions, or whether we should use a vperm.
+ if (isFourElementShuffle) {
+ // Compute the index in the perfect shuffle table.
+ unsigned PFTableIndex =
+ PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
+
+ unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
+ unsigned Cost = (PFEntry >> 30);
+
+ // Determining when to avoid vperm is tricky. Many things affect the cost
+ // of vperm, particularly how many times the perm mask needs to be computed.
+ // For example, if the perm mask can be hoisted out of a loop or is already
+ // used (perhaps because there are multiple permutes with the same shuffle
+ // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of
+ // the loop requires an extra register.
+ //
+ // As a compromise, we only emit discrete instructions if the shuffle can be
+ // generated in 3 or fewer operations. When we have loop information
+ // available, if this block is within a loop, we should avoid using vperm
+ // for 3-operation perms and use a constant pool load instead.
+ if (Cost < 3)
+ return GeneratePerfectShuffle(PFEntry, V1, V2, DAG);
+ }
+
+ // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
+ // vector that will get spilled to the constant pool.
+ if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
+
+ // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
+ // that it is in input element units, not in bytes. Convert now.
+ MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
+ unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
+
+ SmallVector<SDOperand, 16> ResultMask;
+ for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
+ unsigned SrcElt;
+ if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
+ SrcElt = 0;
+ else
+ SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
+
+ for (unsigned j = 0; j != BytesPerElement; ++j)
+ ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
+ MVT::i8));
+ }
+
+ SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
+ &ResultMask[0], ResultMask.size());
+ return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask);
+}
+
+/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
+/// altivec comparison. If it is, return true and fill in Opc/isDot with
+/// information about the intrinsic.
+static bool getAltivecCompareInfo(SDOperand Intrin, int &CompareOpc,
+ bool &isDot) {
+ unsigned IntrinsicID = cast<ConstantSDNode>(Intrin.getOperand(0))->getValue();
+ CompareOpc = -1;
+ isDot = false;
+ switch (IntrinsicID) {
+ default: return false;
+ // Comparison predicates.
+ case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
+
+ // Normal Comparisons.
+ case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break;
+ }
+ return true;
+}
+
+/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
+/// lower, do it, otherwise return null.
+static SDOperand LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) {
+ // If this is a lowered altivec predicate compare, CompareOpc is set to the
+ // opcode number of the comparison.
+ int CompareOpc;
+ bool isDot;
+ if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
+ return SDOperand(); // Don't custom lower most intrinsics.
+
+ // If this is a non-dot comparison, make the VCMP node and we are done.
+ if (!isDot) {
+ SDOperand Tmp = DAG.getNode(PPCISD::VCMP, Op.getOperand(2).getValueType(),
+ Op.getOperand(1), Op.getOperand(2),
+ DAG.getConstant(CompareOpc, MVT::i32));
+ return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Tmp);
+ }
+
+ // Create the PPCISD altivec 'dot' comparison node.
+ SDOperand Ops[] = {
+ Op.getOperand(2), // LHS
+ Op.getOperand(3), // RHS
+ DAG.getConstant(CompareOpc, MVT::i32)
+ };
+ std::vector<MVT::ValueType> VTs;
+ VTs.push_back(Op.getOperand(2).getValueType());
+ VTs.push_back(MVT::Flag);
+ SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops, 3);
+
+ // Now that we have the comparison, emit a copy from the CR to a GPR.
+ // This is flagged to the above dot comparison.
+ SDOperand Flags = DAG.getNode(PPCISD::MFCR, MVT::i32,
+ DAG.getRegister(PPC::CR6, MVT::i32),
+ CompNode.getValue(1));
+
+ // Unpack the result based on how the target uses it.
+ unsigned BitNo; // Bit # of CR6.
+ bool InvertBit; // Invert result?
+ switch (cast<ConstantSDNode>(Op.getOperand(1))->getValue()) {
+ default: // Can't happen, don't crash on invalid number though.
+ case 0: // Return the value of the EQ bit of CR6.
+ BitNo = 0; InvertBit = false;
+ break;
+ case 1: // Return the inverted value of the EQ bit of CR6.
+ BitNo = 0; InvertBit = true;
+ break;
+ case 2: // Return the value of the LT bit of CR6.
+ BitNo = 2; InvertBit = false;
+ break;
+ case 3: // Return the inverted value of the LT bit of CR6.
+ BitNo = 2; InvertBit = true;
+ break;
+ }
+
+ // Shift the bit into the low position.
+ Flags = DAG.getNode(ISD::SRL, MVT::i32, Flags,
+ DAG.getConstant(8-(3-BitNo), MVT::i32));
+ // Isolate the bit.
+ Flags = DAG.getNode(ISD::AND, MVT::i32, Flags,
+ DAG.getConstant(1, MVT::i32));
+
+ // If we are supposed to, toggle the bit.
+ if (InvertBit)
+ Flags = DAG.getNode(ISD::XOR, MVT::i32, Flags,
+ DAG.getConstant(1, MVT::i32));
+ return Flags;
+}
+
+static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
+ // Create a stack slot that is 16-byte aligned.
+ MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ int FrameIdx = FrameInfo->CreateStackObject(16, 16);
+ MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDOperand FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ // Store the input value into Value#0 of the stack slot.
+ SDOperand Store = DAG.getStore(DAG.getEntryNode(),
+ Op.getOperand(0), FIdx, NULL, 0);
+ // Load it out.
+ return DAG.getLoad(Op.getValueType(), Store, FIdx, NULL, 0);
+}
+
+static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG) {
+ if (Op.getValueType() == MVT::v4i32) {
+ SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+
+ SDOperand Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG);
+ SDOperand Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG); // +16 as shift amt.
+
+ SDOperand RHSSwap = // = vrlw RHS, 16
+ BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG);
+
+ // Shrinkify inputs to v8i16.
+ LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, LHS);
+ RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHS);
+ RHSSwap = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHSSwap);
+
+ // Low parts multiplied together, generating 32-bit results (we ignore the
+ // top parts).
+ SDOperand LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
+ LHS, RHS, DAG, MVT::v4i32);
+
+ SDOperand HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
+ LHS, RHSSwap, Zero, DAG, MVT::v4i32);
+ // Shift the high parts up 16 bits.
+ HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, Neg16, DAG);
+ return DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd);
+ } else if (Op.getValueType() == MVT::v8i16) {
+ SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+
+ SDOperand Zero = BuildSplatI(0, 1, MVT::v8i16, DAG);
+
+ return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
+ LHS, RHS, Zero, DAG);
+ } else if (Op.getValueType() == MVT::v16i8) {
+ SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+
+ // Multiply the even 8-bit parts, producing 16-bit sums.
+ SDOperand EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
+ LHS, RHS, DAG, MVT::v8i16);
+ EvenParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, EvenParts);
+
+ // Multiply the odd 8-bit parts, producing 16-bit sums.
+ SDOperand OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
+ LHS, RHS, DAG, MVT::v8i16);
+ OddParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, OddParts);
+
+ // Merge the results together.
+ SDOperand Ops[16];
+ for (unsigned i = 0; i != 8; ++i) {
+ Ops[i*2 ] = DAG.getConstant(2*i+1, MVT::i8);
+ Ops[i*2+1] = DAG.getConstant(2*i+1+16, MVT::i8);
+ }
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, EvenParts, OddParts,
+ DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops, 16));
+ } else {
+ assert(0 && "Unknown mul to lower!");
+ abort();
+ }
+}
+
+/// LowerOperation - Provide custom lowering hooks for some operations.
+///
+SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Wasn't expecting to be able to lower this!");
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ case ISD::SETCC: return LowerSETCC(Op, DAG);
+ case ISD::VASTART:
+ return LowerVASTART(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset,
+ VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget);
+
+ case ISD::VAARG:
+ return LowerVAARG(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset,
+ VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget);
+
+ case ISD::FORMAL_ARGUMENTS:
+ return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex,
+ VarArgsStackOffset, VarArgsNumGPR,
+ VarArgsNumFPR, PPCSubTarget);
+
+ case ISD::CALL: return LowerCALL(Op, DAG, PPCSubTarget);
+ case ISD::RET: return LowerRET(Op, DAG, getTargetMachine());
+ case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
+ case ISD::DYNAMIC_STACKALLOC:
+ return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
+
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
+ case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
+
+ // Lower 64-bit shifts.
+ case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
+ case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
+ case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
+
+ // Vector-related lowering.
+ case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
+ case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
+ case ISD::MUL: return LowerMUL(Op, DAG);
+
+ // Frame & Return address. Currently unimplemented
+ case ISD::RETURNADDR: break;
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ }
+ return SDOperand();
+}
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+PPCTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
+ MachineBasicBlock *BB) {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ assert((MI->getOpcode() == PPC::SELECT_CC_I4 ||
+ MI->getOpcode() == PPC::SELECT_CC_I8 ||
+ MI->getOpcode() == PPC::SELECT_CC_F4 ||
+ MI->getOpcode() == PPC::SELECT_CC_F8 ||
+ MI->getOpcode() == PPC::SELECT_CC_VRRC) &&
+ "Unexpected instr type to insert");
+
+ // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
+ // control-flow pattern. The incoming instruction knows the destination vreg
+ // to set, the condition code register to branch on, the true/false values to
+ // select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ ilist<MachineBasicBlock>::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
+ unsigned SelectPred = MI->getOperand(4).getImm();
+ BuildMI(BB, TII->get(PPC::BCC))
+ .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+ MachineFunction *F = BB->getParent();
+ F->getBasicBlockList().insert(It, copy0MBB);
+ F->getBasicBlockList().insert(It, sinkMBB);
+ // Update machine-CFG edges by first adding all successors of the current
+ // block to the new block which will contain the Phi node for the select.
+ for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
+ e = BB->succ_end(); i != e; ++i)
+ sinkMBB->addSuccessor(*i);
+ // Next, remove all successors of the current block, and add the true
+ // and fallthrough blocks as its successors.
+ while(!BB->succ_empty())
+ BB->removeSuccessor(BB->succ_begin());
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(BB, TII->get(PPC::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+ delete MI; // The pseudo instruction is gone now.
+ return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Target Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ TargetMachine &TM = getTargetMachine();
+ SelectionDAG &DAG = DCI.DAG;
+ switch (N->getOpcode()) {
+ default: break;
+ case PPCISD::SHL:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ if (C->getValue() == 0) // 0 << V -> 0.
+ return N->getOperand(0);
+ }
+ break;
+ case PPCISD::SRL:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ if (C->getValue() == 0) // 0 >>u V -> 0.
+ return N->getOperand(0);
+ }
+ break;
+ case PPCISD::SRA:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ if (C->getValue() == 0 || // 0 >>s V -> 0.
+ C->isAllOnesValue()) // -1 >>s V -> -1.
+ return N->getOperand(0);
+ }
+ break;
+
+ case ISD::SINT_TO_FP:
+ if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
+ if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
+ // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
+ // We allow the src/dst to be either f32/f64, but the intermediate
+ // type must be i64.
+ if (N->getOperand(0).getValueType() == MVT::i64) {
+ SDOperand Val = N->getOperand(0).getOperand(0);
+ if (Val.getValueType() == MVT::f32) {
+ Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);
+ DCI.AddToWorklist(Val.Val);
+ }
+
+ Val = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Val);
+ DCI.AddToWorklist(Val.Val);
+ Val = DAG.getNode(PPCISD::FCFID, MVT::f64, Val);
+ DCI.AddToWorklist(Val.Val);
+ if (N->getValueType(0) == MVT::f32) {
+ Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val);
+ DCI.AddToWorklist(Val.Val);
+ }
+ return Val;
+ } else if (N->getOperand(0).getValueType() == MVT::i32) {
+ // If the intermediate type is i32, we can avoid the load/store here
+ // too.
+ }
+ }
+ }
+ break;
+ case ISD::STORE:
+ // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
+ if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
+ N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
+ N->getOperand(1).getValueType() == MVT::i32) {
+ SDOperand Val = N->getOperand(1).getOperand(0);
+ if (Val.getValueType() == MVT::f32) {
+ Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);
+ DCI.AddToWorklist(Val.Val);
+ }
+ Val = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Val);
+ DCI.AddToWorklist(Val.Val);
+
+ Val = DAG.getNode(PPCISD::STFIWX, MVT::Other, N->getOperand(0), Val,
+ N->getOperand(2), N->getOperand(3));
+ DCI.AddToWorklist(Val.Val);
+ return Val;
+ }
+
+ // Turn STORE (BSWAP) -> sthbrx/stwbrx.
+ if (N->getOperand(1).getOpcode() == ISD::BSWAP &&
+ N->getOperand(1).Val->hasOneUse() &&
+ (N->getOperand(1).getValueType() == MVT::i32 ||
+ N->getOperand(1).getValueType() == MVT::i16)) {
+ SDOperand BSwapOp = N->getOperand(1).getOperand(0);
+ // Do an any-extend to 32-bits if this is a half-word input.
+ if (BSwapOp.getValueType() == MVT::i16)
+ BSwapOp = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, BSwapOp);
+
+ return DAG.getNode(PPCISD::STBRX, MVT::Other, N->getOperand(0), BSwapOp,
+ N->getOperand(2), N->getOperand(3),
+ DAG.getValueType(N->getOperand(1).getValueType()));
+ }
+ break;
+ case ISD::BSWAP:
+ // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
+ if (ISD::isNON_EXTLoad(N->getOperand(0).Val) &&
+ N->getOperand(0).hasOneUse() &&
+ (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) {
+ SDOperand Load = N->getOperand(0);
+ LoadSDNode *LD = cast<LoadSDNode>(Load);
+ // Create the byte-swapping load.
+ std::vector<MVT::ValueType> VTs;
+ VTs.push_back(MVT::i32);
+ VTs.push_back(MVT::Other);
+ SDOperand SV = DAG.getSrcValue(LD->getSrcValue(), LD->getSrcValueOffset());
+ SDOperand Ops[] = {
+ LD->getChain(), // Chain
+ LD->getBasePtr(), // Ptr
+ SV, // SrcValue
+ DAG.getValueType(N->getValueType(0)) // VT
+ };
+ SDOperand BSLoad = DAG.getNode(PPCISD::LBRX, VTs, Ops, 4);
+
+ // If this is an i16 load, insert the truncate.
+ SDOperand ResVal = BSLoad;
+ if (N->getValueType(0) == MVT::i16)
+ ResVal = DAG.getNode(ISD::TRUNCATE, MVT::i16, BSLoad);
+
+ // First, combine the bswap away. This makes the value produced by the
+ // load dead.
+ DCI.CombineTo(N, ResVal);
+
+ // Next, combine the load away, we give it a bogus result value but a real
+ // chain result. The result value is dead because the bswap is dead.
+ DCI.CombineTo(Load.Val, ResVal, BSLoad.getValue(1));
+
+ // Return N so it doesn't get rechecked!
+ return SDOperand(N, 0);
+ }
+
+ break;
+ case PPCISD::VCMP: {
+ // If a VCMPo node already exists with exactly the same operands as this
+ // node, use its result instead of this node (VCMPo computes both a CR6 and
+ // a normal output).
+ //
+ if (!N->getOperand(0).hasOneUse() &&
+ !N->getOperand(1).hasOneUse() &&
+ !N->getOperand(2).hasOneUse()) {
+
+ // Scan all of the users of the LHS, looking for VCMPo's that match.
+ SDNode *VCMPoNode = 0;
+
+ SDNode *LHSN = N->getOperand(0).Val;
+ for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
+ UI != E; ++UI)
+ if ((*UI)->getOpcode() == PPCISD::VCMPo &&
+ (*UI)->getOperand(1) == N->getOperand(1) &&
+ (*UI)->getOperand(2) == N->getOperand(2) &&
+ (*UI)->getOperand(0) == N->getOperand(0)) {
+ VCMPoNode = *UI;
+ break;
+ }
+
+ // If there is no VCMPo node, or if the flag value has a single use, don't
+ // transform this.
+ if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
+ break;
+
+ // Look at the (necessarily single) use of the flag value. If it has a
+ // chain, this transformation is more complex. Note that multiple things
+ // could use the value result, which we should ignore.
+ SDNode *FlagUser = 0;
+ for (SDNode::use_iterator UI = VCMPoNode->use_begin();
+ FlagUser == 0; ++UI) {
+ assert(UI != VCMPoNode->use_end() && "Didn't find user!");
+ SDNode *User = *UI;
+ for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
+ if (User->getOperand(i) == SDOperand(VCMPoNode, 1)) {
+ FlagUser = User;
+ break;
+ }
+ }
+ }
+
+ // If the user is a MFCR instruction, we know this is safe. Otherwise we
+ // give up for right now.
+ if (FlagUser->getOpcode() == PPCISD::MFCR)
+ return SDOperand(VCMPoNode, 0);
+ }
+ break;
+ }
+ case ISD::BR_CC: {
+ // If this is a branch on an altivec predicate comparison, lower this so
+ // that we don't have to do a MFCR: instead, branch directly on CR6. This
+ // lowering is done pre-legalize, because the legalizer lowers the predicate
+ // compare down to code that is difficult to reassemble.
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ SDOperand LHS = N->getOperand(2), RHS = N->getOperand(3);
+ int CompareOpc;
+ bool isDot;
+
+ if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+ isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
+ getAltivecCompareInfo(LHS, CompareOpc, isDot)) {
+ assert(isDot && "Can't compare against a vector result!");
+
+ // If this is a comparison against something other than 0/1, then we know
+ // that the condition is never/always true.
+ unsigned Val = cast<ConstantSDNode>(RHS)->getValue();
+ if (Val != 0 && Val != 1) {
+ if (CC == ISD::SETEQ) // Cond never true, remove branch.
+ return N->getOperand(0);
+ // Always !=, turn it into an unconditional branch.
+ return DAG.getNode(ISD::BR, MVT::Other,
+ N->getOperand(0), N->getOperand(4));
+ }
+
+ bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
+
+ // Create the PPCISD altivec 'dot' comparison node.
+ std::vector<MVT::ValueType> VTs;
+ SDOperand Ops[] = {
+ LHS.getOperand(2), // LHS of compare
+ LHS.getOperand(3), // RHS of compare
+ DAG.getConstant(CompareOpc, MVT::i32)
+ };
+ VTs.push_back(LHS.getOperand(2).getValueType());
+ VTs.push_back(MVT::Flag);
+ SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops, 3);
+
+ // Unpack the result based on how the target uses it.
+ PPC::Predicate CompOpc;
+ switch (cast<ConstantSDNode>(LHS.getOperand(1))->getValue()) {
+ default: // Can't happen, don't crash on invalid number though.
+ case 0: // Branch on the value of the EQ bit of CR6.
+ CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
+ break;
+ case 1: // Branch on the inverted value of the EQ bit of CR6.
+ CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
+ break;
+ case 2: // Branch on the value of the LT bit of CR6.
+ CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
+ break;
+ case 3: // Branch on the inverted value of the LT bit of CR6.
+ CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
+ break;
+ }
+
+ return DAG.getNode(PPCISD::COND_BRANCH, MVT::Other, N->getOperand(0),
+ DAG.getConstant(CompOpc, MVT::i32),
+ DAG.getRegister(PPC::CR6, MVT::i32),
+ N->getOperand(4), CompNode.getValue(1));
+ }
+ break;
+ }
+ }
+
+ return SDOperand();
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
+ uint64_t Mask,
+ uint64_t &KnownZero,
+ uint64_t &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ KnownZero = 0;
+ KnownOne = 0;
+ switch (Op.getOpcode()) {
+ default: break;
+ case PPCISD::LBRX: {
+ // lhbrx is known to have the top bits cleared out.
+ if (cast<VTSDNode>(Op.getOperand(3))->getVT() == MVT::i16)
+ KnownZero = 0xFFFF0000;
+ break;
+ }
+ case ISD::INTRINSIC_WO_CHAIN: {
+ switch (cast<ConstantSDNode>(Op.getOperand(0))->getValue()) {
+ default: break;
+ case Intrinsic::ppc_altivec_vcmpbfp_p:
+ case Intrinsic::ppc_altivec_vcmpeqfp_p:
+ case Intrinsic::ppc_altivec_vcmpequb_p:
+ case Intrinsic::ppc_altivec_vcmpequh_p:
+ case Intrinsic::ppc_altivec_vcmpequw_p:
+ case Intrinsic::ppc_altivec_vcmpgefp_p:
+ case Intrinsic::ppc_altivec_vcmpgtfp_p:
+ case Intrinsic::ppc_altivec_vcmpgtsb_p:
+ case Intrinsic::ppc_altivec_vcmpgtsh_p:
+ case Intrinsic::ppc_altivec_vcmpgtsw_p:
+ case Intrinsic::ppc_altivec_vcmpgtub_p:
+ case Intrinsic::ppc_altivec_vcmpgtuh_p:
+ case Intrinsic::ppc_altivec_vcmpgtuw_p:
+ KnownZero = ~1U; // All bits but the low one are known to be zero.
+ break;
+ }
+ }
+ }
+}
+
+
+/// getConstraintType - Given a constraint, return the type of
+/// constraint it is for this target.
+PPCTargetLowering::ConstraintType
+PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'b':
+ case 'r':
+ case 'f':
+ case 'v':
+ case 'y':
+ return C_RegisterClass;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT::ValueType VT) const {
+ if (Constraint.size() == 1) {
+ // GCC RS6000 Constraint Letters
+ switch (Constraint[0]) {
+ case 'b': // R1-R31
+ case 'r': // R0-R31
+ if (VT == MVT::i64 && PPCSubTarget.isPPC64())
+ return std::make_pair(0U, PPC::G8RCRegisterClass);
+ return std::make_pair(0U, PPC::GPRCRegisterClass);
+ case 'f':
+ if (VT == MVT::f32)
+ return std::make_pair(0U, PPC::F4RCRegisterClass);
+ else if (VT == MVT::f64)
+ return std::make_pair(0U, PPC::F8RCRegisterClass);
+ break;
+ case 'v':
+ return std::make_pair(0U, PPC::VRRCRegisterClass);
+ case 'y': // crrc
+ return std::make_pair(0U, PPC::CRRCRegisterClass);
+ }
+ }
+
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+
+// isOperandValidForConstraint
+SDOperand PPCTargetLowering::
+isOperandValidForConstraint(SDOperand Op, char Letter, SelectionDAG &DAG) {
+ switch (Letter) {
+ default: break;
+ case 'I':
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'O':
+ case 'P': {
+ ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
+ if (!CST) return SDOperand(0, 0); // Must be an immediate to match.
+ unsigned Value = CST->getValue();
+ switch (Letter) {
+ default: assert(0 && "Unknown constraint letter!");
+ case 'I': // "I" is a signed 16-bit constant.
+ if ((short)Value == (int)Value)
+ return DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
+ case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
+ if ((short)Value == 0)
+ return DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
+ if ((Value >> 16) == 0)
+ return DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'M': // "M" is a constant that is greater than 31.
+ if (Value > 31)
+ return DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'N': // "N" is a positive constant that is an exact power of two.
+ if ((int)Value > 0 && isPowerOf2_32(Value))
+ return DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'O': // "O" is the constant zero.
+ if (Value == 0)
+ return DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
+ if ((short)-Value == (int)-Value)
+ return DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ }
+ break;
+ }
+ }
+
+ // Handle standard constraint letters.
+ return TargetLowering::isOperandValidForConstraint(Op, Letter, DAG);
+}
+
+// isLegalAddressingMode - Return true if the addressing mode represented
+// by AM is legal for this target, for a load/store of the specified type.
+bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ const Type *Ty) const {
+ // FIXME: PPC does not allow r+i addressing modes for vectors!
+
+ // PPC allows a sign-extended 16-bit immediate field.
+ if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
+ return false;
+
+ // No global is ever allowed as a base.
+ if (AM.BaseGV)
+ return false;
+
+ // PPC only support r+r,
+ switch (AM.Scale) {
+ case 0: // "r+i" or just "i", depending on HasBaseReg.
+ break;
+ case 1:
+ if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
+ return false;
+ // Otherwise we have r+r or r+i.
+ break;
+ case 2:
+ if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
+ return false;
+ // Allow 2*r as r+r.
+ break;
+ default:
+ // No other scales are supported.
+ return false;
+ }
+
+ return true;
+}
+
+/// isLegalAddressImmediate - Return true if the integer value can be used
+/// as the offset of the target addressing mode for load / store of the
+/// given type.
+bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,const Type *Ty) const{
+ // PPC allows a sign-extended 16-bit immediate field.
+ return (V > -(1 << 16) && V < (1 << 16)-1);
+}
+
+bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
+ return false;
+}
+
+SDOperand PPCTargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG)
+{
+ // Depths > 0 not supported yet!
+ if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0)
+ return SDOperand();
+
+ MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = PtrVT == MVT::i64;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool is31 = (NoFramePointerElim || MFI->hasVarSizedObjects())
+ && MFI->getStackSize();
+
+ if (isPPC64)
+ return DAG.getCopyFromReg(DAG.getEntryNode(), is31 ? PPC::X31 : PPC::X1,
+ MVT::i32);
+ else
+ return DAG.getCopyFromReg(DAG.getEntryNode(), is31 ? PPC::R31 : PPC::R1,
+ MVT::i32);
+}
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
new file mode 100644
index 0000000..0581865
--- /dev/null
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -0,0 +1,263 @@
+//===-- PPCISelLowering.h - PPC32 DAG Lowering Interface --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that PPC uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
+#define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "PPC.h"
+#include "PPCSubtarget.h"
+
+namespace llvm {
+ namespace PPCISD {
+ enum NodeType {
+ // Start the numbering where the builtin ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END+PPC::INSTRUCTION_LIST_END,
+
+ /// FSEL - Traditional three-operand fsel node.
+ ///
+ FSEL,
+
+ /// FCFID - The FCFID instruction, taking an f64 operand and producing
+ /// and f64 value containing the FP representation of the integer that
+ /// was temporarily in the f64 operand.
+ FCFID,
+
+ /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64
+ /// operand, producing an f64 value containing the integer representation
+ /// of that FP value.
+ FCTIDZ, FCTIWZ,
+
+ /// STFIWX - The STFIWX instruction. The first operand is an input token
+ /// chain, then an f64 value to store, then an address to store it to,
+ /// then a SRCVALUE for the address.
+ STFIWX,
+
+ // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking
+ // three v4f32 operands and producing a v4f32 result.
+ VMADDFP, VNMSUBFP,
+
+ /// VPERM - The PPC VPERM Instruction.
+ ///
+ VPERM,
+
+ /// Hi/Lo - These represent the high and low 16-bit parts of a global
+ /// address respectively. These nodes have two operands, the first of
+ /// which must be a TargetGlobalAddress, and the second of which must be a
+ /// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C',
+ /// though these are usually folded into other nodes.
+ Hi, Lo,
+
+ /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
+ /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
+ /// compute an allocation on the stack.
+ DYNALLOC,
+
+ /// GlobalBaseReg - On Darwin, this node represents the result of the mflr
+ /// at function entry, used for PIC code.
+ GlobalBaseReg,
+
+ /// These nodes represent the 32-bit PPC shifts that operate on 6-bit
+ /// shift amounts. These nodes are generated by the multi-precision shift
+ /// code.
+ SRL, SRA, SHL,
+
+ /// EXTSW_32 - This is the EXTSW instruction for use with "32-bit"
+ /// registers.
+ EXTSW_32,
+
+ /// STD_32 - This is the STD instruction for use with "32-bit" registers.
+ STD_32,
+
+ /// CALL - A direct function call.
+ CALL_Macho, CALL_ELF,
+
+ /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
+ /// MTCTR instruction.
+ MTCTR,
+
+ /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a
+ /// BCTRL instruction.
+ BCTRL_Macho, BCTRL_ELF,
+
+ /// Return with a flag operand, matched by 'blr'
+ RET_FLAG,
+
+ /// R32 = MFCR(CRREG, INFLAG) - Represents the MFCR/MFOCRF instructions.
+ /// This copies the bits corresponding to the specified CRREG into the
+ /// resultant GPR. Bits corresponding to other CR regs are undefined.
+ MFCR,
+
+ /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*
+ /// instructions. For lack of better number, we use the opcode number
+ /// encoding for the OPC field to identify the compare. For example, 838
+ /// is VCMPGTSH.
+ VCMP,
+
+ /// RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the
+ /// altivec VCMP*o instructions. For lack of better number, we use the
+ /// opcode number encoding for the OPC field to identify the compare. For
+ /// example, 838 is VCMPGTSH.
+ VCMPo,
+
+ /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This
+ /// corresponds to the COND_BRANCH pseudo instruction. CRRC is the
+ /// condition register to branch on, OPC is the branch opcode to use (e.g.
+ /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is
+ /// an optional input flag argument.
+ COND_BRANCH,
+
+ /// CHAIN = STBRX CHAIN, GPRC, Ptr, SRCVALUE, Type - This is a
+ /// byte-swapping store instruction. It byte-swaps the low "Type" bits of
+ /// the GPRC input, then stores it through Ptr. Type can be either i16 or
+ /// i32.
+ STBRX,
+
+ /// GPRC, CHAIN = LBRX CHAIN, Ptr, SRCVALUE, Type - This is a
+ /// byte-swapping load instruction. It loads "Type" bits, byte swaps it,
+ /// then puts it in the bottom bits of the GPRC. TYPE can be either i16
+ /// or i32.
+ LBRX
+ };
+ }
+
+ /// Define some predicates that are used for node matching.
+ namespace PPC {
+ /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
+ /// VPKUHUM instruction.
+ bool isVPKUHUMShuffleMask(SDNode *N, bool isUnary);
+
+ /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
+ /// VPKUWUM instruction.
+ bool isVPKUWUMShuffleMask(SDNode *N, bool isUnary);
+
+ /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
+ /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
+ bool isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary);
+
+ /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
+ /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
+ bool isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary);
+
+ /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
+ /// amount, otherwise return -1.
+ int isVSLDOIShuffleMask(SDNode *N, bool isUnary);
+
+ /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a splat of a single element that is suitable for input to
+ /// VSPLTB/VSPLTH/VSPLTW.
+ bool isSplatShuffleMask(SDNode *N, unsigned EltSize);
+
+ /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
+ /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
+ unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize);
+
+ /// get_VSPLTI_elt - If this is a build_vector of constants which can be
+ /// formed by using a vspltis[bhw] instruction of the specified element
+ /// size, return the constant being splatted. The ByteSize field indicates
+ /// the number of bytes of each element [124] -> [bhw].
+ SDOperand get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
+ }
+
+ class PPCTargetLowering : public TargetLowering {
+ int VarArgsFrameIndex; // FrameIndex for start of varargs area.
+ int VarArgsStackOffset; // StackOffset for start of stack
+ // arguments.
+ unsigned VarArgsNumGPR; // Index of the first unused integer
+ // register for parameter passing.
+ unsigned VarArgsNumFPR; // Index of the first unused double
+ // register for parameter passing.
+ int ReturnAddrIndex; // FrameIndex for return slot.
+ const PPCSubtarget &PPCSubTarget;
+ public:
+ PPCTargetLowering(PPCTargetMachine &TM);
+
+ /// getTargetNodeName() - This method returns the name of a target specific
+ /// DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ /// getPreIndexedAddressParts - returns true by value, base pointer and
+ /// offset pointer and addressing mode by reference if the node's address
+ /// can be legally represented as pre-indexed load / store address.
+ virtual bool getPreIndexedAddressParts(SDNode *N, SDOperand &Base,
+ SDOperand &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG);
+
+ /// SelectAddressRegReg - Given the specified addressed, check to see if it
+ /// can be represented as an indexed [r+r] operation. Returns false if it
+ /// can be more efficiently represented with [r+imm].
+ bool SelectAddressRegReg(SDOperand N, SDOperand &Base, SDOperand &Index,
+ SelectionDAG &DAG);
+
+ /// SelectAddressRegImm - Returns true if the address N can be represented
+ /// by a base register plus a signed 16-bit displacement [r+imm], and if it
+ /// is not better represented as reg+reg.
+ bool SelectAddressRegImm(SDOperand N, SDOperand &Disp, SDOperand &Base,
+ SelectionDAG &DAG);
+
+ /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
+ /// represented as an indexed [r+r] operation.
+ bool SelectAddressRegRegOnly(SDOperand N, SDOperand &Base, SDOperand &Index,
+ SelectionDAG &DAG);
+
+ /// SelectAddressRegImmShift - Returns true if the address N can be
+ /// represented by a base register plus a signed 14-bit displacement
+ /// [r+imm*4]. Suitable for use by STD and friends.
+ bool SelectAddressRegImmShift(SDOperand N, SDOperand &Disp, SDOperand &Base,
+ SelectionDAG &DAG);
+
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ ///
+ virtual SDOperand LowerOperation(SDOperand Op, SelectionDAG &DAG);
+
+ virtual SDOperand PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ virtual void computeMaskedBitsForTargetNode(const SDOperand Op,
+ uint64_t Mask,
+ uint64_t &KnownZero,
+ uint64_t &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const;
+
+ virtual MachineBasicBlock *InsertAtEndOfBasicBlock(MachineInstr *MI,
+ MachineBasicBlock *MBB);
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT::ValueType VT) const;
+ SDOperand isOperandValidForConstraint(SDOperand Op, char ConstraintLetter,
+ SelectionDAG &DAG);
+
+ /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// by AM is legal for this target, for a load/store of the specified type.
+ virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
+
+ /// isLegalAddressImmediate - Return true if the integer value can be used
+ /// as the offset of the target addressing mode for load / store of the
+ /// given type.
+ virtual bool isLegalAddressImmediate(int64_t V, const Type *Ty) const;
+
+ /// isLegalAddressImmediate - Return true if the GlobalValue can be used as
+ /// the offset of the target addressing mode.
+ virtual bool isLegalAddressImmediate(GlobalValue *GV) const;
+
+ SDOperand LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG);
+ };
+}
+
+#endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
new file mode 100644
index 0000000..a7e25cf
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -0,0 +1,590 @@
+//===- PPCInstr64Bit.td - The PowerPC 64-bit Support -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PowerPC 64-bit instructions. These patterns are used
+// both when in ppc64 mode and when in "use 64-bit extensions in 32-bit" mode.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// 64-bit operands.
+//
+def s16imm64 : Operand<i64> {
+ let PrintMethod = "printS16ImmOperand";
+}
+def u16imm64 : Operand<i64> {
+ let PrintMethod = "printU16ImmOperand";
+}
+def symbolHi64 : Operand<i64> {
+ let PrintMethod = "printSymbolHi";
+}
+def symbolLo64 : Operand<i64> {
+ let PrintMethod = "printSymbolLo";
+}
+
+//===----------------------------------------------------------------------===//
+// 64-bit transformation functions.
+//
+
+def SHL64 : SDNodeXForm<imm, [{
+ // Transformation function: 63 - imm
+ return getI32Imm(63 - N->getValue());
+}]>;
+
+def SRL64 : SDNodeXForm<imm, [{
+ // Transformation function: 64 - imm
+ return N->getValue() ? getI32Imm(64 - N->getValue()) : getI32Imm(0);
+}]>;
+
+def HI32_48 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return getI32Imm((unsigned short)(N->getValue() >> 32));
+}]>;
+
+def HI48_64 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return getI32Imm((unsigned short)(N->getValue() >> 48));
+}]>;
+
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions.
+//
+
+def IMPLICIT_DEF_G8RC : Pseudo<(ops G8RC:$rD), "; IMPLICIT_DEF_G8RC $rD",
+ [(set G8RC:$rD, (undef))]>;
+
+
+//===----------------------------------------------------------------------===//
+// Calls.
+//
+
+let Defs = [LR8] in
+ def MovePCtoLR8 : Pseudo<(ops piclabel:$label), "bl $label", []>,
+ PPC970_Unit_BRU;
+
+// Macho ABI Calls.
+let isCall = 1, noResults = 1, PPC970_Unit = 7,
+ // All calls clobber the PPC64 non-callee saved registers.
+ Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,
+ F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
+ V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
+ LR8,CTR8,
+ CR0,CR1,CR5,CR6,CR7] in {
+ // Convenient aliases for call instructions
+ def BL8_Macho : IForm<18, 0, 1,
+ (ops calltarget:$func, variable_ops),
+ "bl $func", BrB, []>; // See Pat patterns below.
+
+ def BLA8_Macho : IForm<18, 1, 1,
+ (ops aaddr:$func, variable_ops),
+ "bla $func", BrB, [(PPCcall_Macho (i64 imm:$func))]>;
+}
+
+// ELF 64 ABI Calls = Macho ABI Calls
+// Used to define BL8_ELF and BLA8_ELF
+let isCall = 1, noResults = 1, PPC970_Unit = 7,
+ // All calls clobber the PPC64 non-callee saved registers.
+ Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,
+ F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
+ V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
+ LR8,CTR8,
+ CR0,CR1,CR5,CR6,CR7] in {
+ // Convenient aliases for call instructions
+ def BL8_ELF : IForm<18, 0, 1,
+ (ops calltarget:$func, variable_ops),
+ "bl $func", BrB, []>; // See Pat patterns below.
+
+ def BLA8_ELF : IForm<18, 1, 1,
+ (ops aaddr:$func, variable_ops),
+ "bla $func", BrB, [(PPCcall_ELF (i64 imm:$func))]>;
+}
+
+
+// Calls
+def : Pat<(PPCcall_Macho (i64 tglobaladdr:$dst)),
+ (BL8_Macho tglobaladdr:$dst)>;
+def : Pat<(PPCcall_Macho (i64 texternalsym:$dst)),
+ (BL8_Macho texternalsym:$dst)>;
+
+def : Pat<(PPCcall_ELF (i64 tglobaladdr:$dst)),
+ (BL8_ELF tglobaladdr:$dst)>;
+def : Pat<(PPCcall_ELF (i64 texternalsym:$dst)),
+ (BL8_ELF texternalsym:$dst)>;
+
+//===----------------------------------------------------------------------===//
+// 64-bit SPR manipulation instrs.
+
+def MFCTR8 : XFXForm_1_ext<31, 339, 9, (ops G8RC:$rT), "mfctr $rT", SprMFSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+let Pattern = [(PPCmtctr G8RC:$rS)] in {
+def MTCTR8 : XFXForm_7_ext<31, 467, 9, (ops G8RC:$rS), "mtctr $rS", SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+def DYNALLOC8 : Pseudo<(ops G8RC:$result, G8RC:$negsize, memri:$fpsi),
+ "${:comment} DYNALLOC8 $result, $negsize, $fpsi",
+ [(set G8RC:$result,
+ (PPCdynalloc G8RC:$negsize, iaddr:$fpsi))]>,
+ Imp<[X1],[X1]>;
+
+def MTLR8 : XFXForm_7_ext<31, 467, 8, (ops G8RC:$rS), "mtlr $rS", SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+def MFLR8 : XFXForm_1_ext<31, 339, 8, (ops G8RC:$rT), "mflr $rT", SprMFSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+
+
+//===----------------------------------------------------------------------===//
+// Fixed point instructions.
+//
+
+let PPC970_Unit = 1 in { // FXU Operations.
+
+// Copies, extends, truncates.
+def OR4To8 : XForm_6<31, 444, (ops G8RC:$rA, GPRC:$rS, GPRC:$rB),
+ "or $rA, $rS, $rB", IntGeneral,
+ []>;
+def OR8To4 : XForm_6<31, 444, (ops GPRC:$rA, G8RC:$rS, G8RC:$rB),
+ "or $rA, $rS, $rB", IntGeneral,
+ []>;
+
+def LI8 : DForm_2_r0<14, (ops G8RC:$rD, symbolLo64:$imm),
+ "li $rD, $imm", IntGeneral,
+ [(set G8RC:$rD, immSExt16:$imm)]>;
+def LIS8 : DForm_2_r0<15, (ops G8RC:$rD, symbolHi64:$imm),
+ "lis $rD, $imm", IntGeneral,
+ [(set G8RC:$rD, imm16ShiftedSExt:$imm)]>;
+
+// Logical ops.
+def NAND8: XForm_6<31, 476, (ops G8RC:$rA, G8RC:$rS, G8RC:$rB),
+ "nand $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (not (and G8RC:$rS, G8RC:$rB)))]>;
+def AND8 : XForm_6<31, 28, (ops G8RC:$rA, G8RC:$rS, G8RC:$rB),
+ "and $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (and G8RC:$rS, G8RC:$rB))]>;
+def ANDC8: XForm_6<31, 60, (ops G8RC:$rA, G8RC:$rS, G8RC:$rB),
+ "andc $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (and G8RC:$rS, (not G8RC:$rB)))]>;
+def OR8 : XForm_6<31, 444, (ops G8RC:$rA, G8RC:$rS, G8RC:$rB),
+ "or $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (or G8RC:$rS, G8RC:$rB))]>;
+def NOR8 : XForm_6<31, 124, (ops G8RC:$rA, G8RC:$rS, G8RC:$rB),
+ "nor $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (not (or G8RC:$rS, G8RC:$rB)))]>;
+def ORC8 : XForm_6<31, 412, (ops G8RC:$rA, G8RC:$rS, G8RC:$rB),
+ "orc $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (or G8RC:$rS, (not G8RC:$rB)))]>;
+def EQV8 : XForm_6<31, 284, (ops G8RC:$rA, G8RC:$rS, G8RC:$rB),
+ "eqv $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (not (xor G8RC:$rS, G8RC:$rB)))]>;
+def XOR8 : XForm_6<31, 316, (ops G8RC:$rA, G8RC:$rS, G8RC:$rB),
+ "xor $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (xor G8RC:$rS, G8RC:$rB))]>;
+
+// Logical ops with immediate.
+def ANDIo8 : DForm_4<28, (ops G8RC:$dst, G8RC:$src1, u16imm:$src2),
+ "andi. $dst, $src1, $src2", IntGeneral,
+ [(set G8RC:$dst, (and G8RC:$src1, immZExt16:$src2))]>,
+ isDOT;
+def ANDISo8 : DForm_4<29, (ops G8RC:$dst, G8RC:$src1, u16imm:$src2),
+ "andis. $dst, $src1, $src2", IntGeneral,
+ [(set G8RC:$dst, (and G8RC:$src1,imm16ShiftedZExt:$src2))]>,
+ isDOT;
+def ORI8 : DForm_4<24, (ops G8RC:$dst, G8RC:$src1, u16imm:$src2),
+ "ori $dst, $src1, $src2", IntGeneral,
+ [(set G8RC:$dst, (or G8RC:$src1, immZExt16:$src2))]>;
+def ORIS8 : DForm_4<25, (ops G8RC:$dst, G8RC:$src1, u16imm:$src2),
+ "oris $dst, $src1, $src2", IntGeneral,
+ [(set G8RC:$dst, (or G8RC:$src1, imm16ShiftedZExt:$src2))]>;
+def XORI8 : DForm_4<26, (ops G8RC:$dst, G8RC:$src1, u16imm:$src2),
+ "xori $dst, $src1, $src2", IntGeneral,
+ [(set G8RC:$dst, (xor G8RC:$src1, immZExt16:$src2))]>;
+def XORIS8 : DForm_4<27, (ops G8RC:$dst, G8RC:$src1, u16imm:$src2),
+ "xoris $dst, $src1, $src2", IntGeneral,
+ [(set G8RC:$dst, (xor G8RC:$src1, imm16ShiftedZExt:$src2))]>;
+
+def ADD8 : XOForm_1<31, 266, 0, (ops G8RC:$rT, G8RC:$rA, G8RC:$rB),
+ "add $rT, $rA, $rB", IntGeneral,
+ [(set G8RC:$rT, (add G8RC:$rA, G8RC:$rB))]>;
+
+def ADDC8 : XOForm_1<31, 10, 0, (ops G8RC:$rT, G8RC:$rA, G8RC:$rB),
+ "addc $rT, $rA, $rB", IntGeneral,
+ [(set G8RC:$rT, (addc G8RC:$rA, G8RC:$rB))]>,
+ PPC970_DGroup_Cracked;
+def ADDE8 : XOForm_1<31, 138, 0, (ops G8RC:$rT, G8RC:$rA, G8RC:$rB),
+ "adde $rT, $rA, $rB", IntGeneral,
+ [(set G8RC:$rT, (adde G8RC:$rA, G8RC:$rB))]>;
+
+def ADDI8 : DForm_2<14, (ops G8RC:$rD, G8RC:$rA, s16imm64:$imm),
+ "addi $rD, $rA, $imm", IntGeneral,
+ [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>;
+def ADDIS8 : DForm_2<15, (ops G8RC:$rD, G8RC:$rA, symbolHi64:$imm),
+ "addis $rD, $rA, $imm", IntGeneral,
+ [(set G8RC:$rD, (add G8RC:$rA, imm16ShiftedSExt:$imm))]>;
+
+def SUBFIC8: DForm_2< 8, (ops G8RC:$rD, G8RC:$rA, s16imm64:$imm),
+ "subfic $rD, $rA, $imm", IntGeneral,
+ [(set G8RC:$rD, (subc immSExt16:$imm, G8RC:$rA))]>;
+def SUBF8 : XOForm_1<31, 40, 0, (ops G8RC:$rT, G8RC:$rA, G8RC:$rB),
+ "subf $rT, $rA, $rB", IntGeneral,
+ [(set G8RC:$rT, (sub G8RC:$rB, G8RC:$rA))]>;
+
+def SUBFC8 : XOForm_1<31, 8, 0, (ops G8RC:$rT, G8RC:$rA, G8RC:$rB),
+ "subfc $rT, $rA, $rB", IntGeneral,
+ [(set G8RC:$rT, (subc G8RC:$rB, G8RC:$rA))]>,
+ PPC970_DGroup_Cracked;
+
+def SUBFE8 : XOForm_1<31, 136, 0, (ops G8RC:$rT, G8RC:$rA, G8RC:$rB),
+ "subfe $rT, $rA, $rB", IntGeneral,
+ [(set G8RC:$rT, (sube G8RC:$rB, G8RC:$rA))]>;
+def ADDME8 : XOForm_3<31, 234, 0, (ops G8RC:$rT, G8RC:$rA),
+ "addme $rT, $rA", IntGeneral,
+ [(set G8RC:$rT, (adde G8RC:$rA, immAllOnes))]>;
+def ADDZE8 : XOForm_3<31, 202, 0, (ops G8RC:$rT, G8RC:$rA),
+ "addze $rT, $rA", IntGeneral,
+ [(set G8RC:$rT, (adde G8RC:$rA, 0))]>;
+def NEG8 : XOForm_3<31, 104, 0, (ops G8RC:$rT, G8RC:$rA),
+ "neg $rT, $rA", IntGeneral,
+ [(set G8RC:$rT, (ineg G8RC:$rA))]>;
+def SUBFME8 : XOForm_3<31, 232, 0, (ops G8RC:$rT, G8RC:$rA),
+ "subfme $rT, $rA", IntGeneral,
+ [(set G8RC:$rT, (sube immAllOnes, G8RC:$rA))]>;
+def SUBFZE8 : XOForm_3<31, 200, 0, (ops G8RC:$rT, G8RC:$rA),
+ "subfze $rT, $rA", IntGeneral,
+ [(set G8RC:$rT, (sube 0, G8RC:$rA))]>;
+
+
+
+def MULHD : XOForm_1<31, 73, 0, (ops G8RC:$rT, G8RC:$rA, G8RC:$rB),
+ "mulhd $rT, $rA, $rB", IntMulHW,
+ [(set G8RC:$rT, (mulhs G8RC:$rA, G8RC:$rB))]>;
+def MULHDU : XOForm_1<31, 9, 0, (ops G8RC:$rT, G8RC:$rA, G8RC:$rB),
+ "mulhdu $rT, $rA, $rB", IntMulHWU,
+ [(set G8RC:$rT, (mulhu G8RC:$rA, G8RC:$rB))]>;
+
+def CMPD : XForm_16_ext<31, 0, (ops CRRC:$crD, G8RC:$rA, G8RC:$rB),
+ "cmpd $crD, $rA, $rB", IntCompare>, isPPC64;
+def CMPLD : XForm_16_ext<31, 32, (ops CRRC:$crD, G8RC:$rA, G8RC:$rB),
+ "cmpld $crD, $rA, $rB", IntCompare>, isPPC64;
+def CMPDI : DForm_5_ext<11, (ops CRRC:$crD, G8RC:$rA, s16imm:$imm),
+ "cmpdi $crD, $rA, $imm", IntCompare>, isPPC64;
+def CMPLDI : DForm_6_ext<10, (ops CRRC:$dst, G8RC:$src1, u16imm:$src2),
+ "cmpldi $dst, $src1, $src2", IntCompare>, isPPC64;
+
+def SLD : XForm_6<31, 27, (ops G8RC:$rA, G8RC:$rS, GPRC:$rB),
+ "sld $rA, $rS, $rB", IntRotateD,
+ [(set G8RC:$rA, (shl G8RC:$rS, GPRC:$rB))]>, isPPC64;
+def SRD : XForm_6<31, 539, (ops G8RC:$rA, G8RC:$rS, GPRC:$rB),
+ "srd $rA, $rS, $rB", IntRotateD,
+ [(set G8RC:$rA, (srl G8RC:$rS, GPRC:$rB))]>, isPPC64;
+def SRAD : XForm_6<31, 794, (ops G8RC:$rA, G8RC:$rS, GPRC:$rB),
+ "srad $rA, $rS, $rB", IntRotateD,
+ [(set G8RC:$rA, (sra G8RC:$rS, GPRC:$rB))]>, isPPC64;
+
+def EXTSB8 : XForm_11<31, 954, (ops G8RC:$rA, G8RC:$rS),
+ "extsb $rA, $rS", IntGeneral,
+ [(set G8RC:$rA, (sext_inreg G8RC:$rS, i8))]>;
+def EXTSH8 : XForm_11<31, 922, (ops G8RC:$rA, G8RC:$rS),
+ "extsh $rA, $rS", IntGeneral,
+ [(set G8RC:$rA, (sext_inreg G8RC:$rS, i16))]>;
+
+def EXTSW : XForm_11<31, 986, (ops G8RC:$rA, G8RC:$rS),
+ "extsw $rA, $rS", IntGeneral,
+ [(set G8RC:$rA, (sext_inreg G8RC:$rS, i32))]>, isPPC64;
+/// EXTSW_32 - Just like EXTSW, but works on '32-bit' registers.
+def EXTSW_32 : XForm_11<31, 986, (ops GPRC:$rA, GPRC:$rS),
+ "extsw $rA, $rS", IntGeneral,
+ [(set GPRC:$rA, (PPCextsw_32 GPRC:$rS))]>, isPPC64;
+def EXTSW_32_64 : XForm_11<31, 986, (ops G8RC:$rA, GPRC:$rS),
+ "extsw $rA, $rS", IntGeneral,
+ [(set G8RC:$rA, (sext GPRC:$rS))]>, isPPC64;
+
+def SRADI : XSForm_1<31, 413, (ops G8RC:$rA, G8RC:$rS, u6imm:$SH),
+ "sradi $rA, $rS, $SH", IntRotateD,
+ [(set G8RC:$rA, (sra G8RC:$rS, (i32 imm:$SH)))]>, isPPC64;
+def CNTLZD : XForm_11<31, 58, (ops G8RC:$rA, G8RC:$rS),
+ "cntlzd $rA, $rS", IntGeneral,
+ [(set G8RC:$rA, (ctlz G8RC:$rS))]>;
+
+def DIVD : XOForm_1<31, 489, 0, (ops G8RC:$rT, G8RC:$rA, G8RC:$rB),
+ "divd $rT, $rA, $rB", IntDivD,
+ [(set G8RC:$rT, (sdiv G8RC:$rA, G8RC:$rB))]>, isPPC64,
+ PPC970_DGroup_First, PPC970_DGroup_Cracked;
+def DIVDU : XOForm_1<31, 457, 0, (ops G8RC:$rT, G8RC:$rA, G8RC:$rB),
+ "divdu $rT, $rA, $rB", IntDivD,
+ [(set G8RC:$rT, (udiv G8RC:$rA, G8RC:$rB))]>, isPPC64,
+ PPC970_DGroup_First, PPC970_DGroup_Cracked;
+def MULLD : XOForm_1<31, 233, 0, (ops G8RC:$rT, G8RC:$rA, G8RC:$rB),
+ "mulld $rT, $rA, $rB", IntMulHD,
+ [(set G8RC:$rT, (mul G8RC:$rA, G8RC:$rB))]>, isPPC64;
+
+
+let isCommutable = 1 in {
+def RLDIMI : MDForm_1<30, 3,
+ (ops G8RC:$rA, G8RC:$rSi, G8RC:$rS, u6imm:$SH, u6imm:$MB),
+ "rldimi $rA, $rS, $SH, $MB", IntRotateD,
+ []>, isPPC64, RegConstraint<"$rSi = $rA">,
+ NoEncode<"$rSi">;
+}
+
+// Rotate instructions.
+def RLDICL : MDForm_1<30, 0,
+ (ops G8RC:$rA, G8RC:$rS, u6imm:$SH, u6imm:$MB),
+ "rldicl $rA, $rS, $SH, $MB", IntRotateD,
+ []>, isPPC64;
+def RLDICR : MDForm_1<30, 1,
+ (ops G8RC:$rA, G8RC:$rS, u6imm:$SH, u6imm:$ME),
+ "rldicr $rA, $rS, $SH, $ME", IntRotateD,
+ []>, isPPC64;
+} // End FXU Operations.
+
+
+//===----------------------------------------------------------------------===//
+// Load/Store instructions.
+//
+
+
+// Sign extending loads.
+let isLoad = 1, PPC970_Unit = 2 in {
+def LHA8: DForm_1<42, (ops G8RC:$rD, memri:$src),
+ "lha $rD, $src", LdStLHA,
+ [(set G8RC:$rD, (sextloadi16 iaddr:$src))]>,
+ PPC970_DGroup_Cracked;
+def LWA : DSForm_1<58, 2, (ops G8RC:$rD, memrix:$src),
+ "lwa $rD, $src", LdStLWA,
+ [(set G8RC:$rD, (sextloadi32 ixaddr:$src))]>, isPPC64,
+ PPC970_DGroup_Cracked;
+def LHAX8: XForm_1<31, 343, (ops G8RC:$rD, memrr:$src),
+ "lhax $rD, $src", LdStLHA,
+ [(set G8RC:$rD, (sextloadi16 xaddr:$src))]>,
+ PPC970_DGroup_Cracked;
+def LWAX : XForm_1<31, 341, (ops G8RC:$rD, memrr:$src),
+ "lwax $rD, $src", LdStLHA,
+ [(set G8RC:$rD, (sextloadi32 xaddr:$src))]>, isPPC64,
+ PPC970_DGroup_Cracked;
+
+// Update forms.
+def LHAU8 : DForm_1<43, (ops G8RC:$rD, ptr_rc:$ea_result, symbolLo:$disp,
+ ptr_rc:$rA),
+ "lhau $rD, $disp($rA)", LdStGeneral,
+ []>, RegConstraint<"$rA = $ea_result">,
+ NoEncode<"$ea_result">;
+// NO LWAU!
+
+}
+
+// Zero extending loads.
+let isLoad = 1, PPC970_Unit = 2 in {
+def LBZ8 : DForm_1<34, (ops G8RC:$rD, memri:$src),
+ "lbz $rD, $src", LdStGeneral,
+ [(set G8RC:$rD, (zextloadi8 iaddr:$src))]>;
+def LHZ8 : DForm_1<40, (ops G8RC:$rD, memri:$src),
+ "lhz $rD, $src", LdStGeneral,
+ [(set G8RC:$rD, (zextloadi16 iaddr:$src))]>;
+def LWZ8 : DForm_1<32, (ops G8RC:$rD, memri:$src),
+ "lwz $rD, $src", LdStGeneral,
+ [(set G8RC:$rD, (zextloadi32 iaddr:$src))]>, isPPC64;
+
+def LBZX8 : XForm_1<31, 87, (ops G8RC:$rD, memrr:$src),
+ "lbzx $rD, $src", LdStGeneral,
+ [(set G8RC:$rD, (zextloadi8 xaddr:$src))]>;
+def LHZX8 : XForm_1<31, 279, (ops G8RC:$rD, memrr:$src),
+ "lhzx $rD, $src", LdStGeneral,
+ [(set G8RC:$rD, (zextloadi16 xaddr:$src))]>;
+def LWZX8 : XForm_1<31, 23, (ops G8RC:$rD, memrr:$src),
+ "lwzx $rD, $src", LdStGeneral,
+ [(set G8RC:$rD, (zextloadi32 xaddr:$src))]>;
+
+
+// Update forms.
+def LBZU8 : DForm_1<35, (ops G8RC:$rD, ptr_rc:$ea_result, memri:$addr),
+ "lbzu $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LHZU8 : DForm_1<41, (ops G8RC:$rD, ptr_rc:$ea_result, memri:$addr),
+ "lhzu $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LWZU8 : DForm_1<33, (ops G8RC:$rD, ptr_rc:$ea_result, memri:$addr),
+ "lwzu $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+}
+
+
+// Full 8-byte loads.
+let isLoad = 1, PPC970_Unit = 2 in {
+def LD : DSForm_1<58, 0, (ops G8RC:$rD, memrix:$src),
+ "ld $rD, $src", LdStLD,
+ [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64;
+def LDX : XForm_1<31, 21, (ops G8RC:$rD, memrr:$src),
+ "ldx $rD, $src", LdStLD,
+ [(set G8RC:$rD, (load xaddr:$src))]>, isPPC64;
+
+def LDU : DSForm_1<58, 1, (ops G8RC:$rD, ptr_rc:$ea_result, memrix:$addr),
+ "ldu $rD, $addr", LdStLD,
+ []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64,
+ NoEncode<"$ea_result">;
+
+}
+
+let isStore = 1, noResults = 1, PPC970_Unit = 2 in {
+// Truncating stores.
+def STB8 : DForm_1<38, (ops G8RC:$rS, memri:$src),
+ "stb $rS, $src", LdStGeneral,
+ [(truncstorei8 G8RC:$rS, iaddr:$src)]>;
+def STH8 : DForm_1<44, (ops G8RC:$rS, memri:$src),
+ "sth $rS, $src", LdStGeneral,
+ [(truncstorei16 G8RC:$rS, iaddr:$src)]>;
+def STW8 : DForm_1<36, (ops G8RC:$rS, memri:$src),
+ "stw $rS, $src", LdStGeneral,
+ [(truncstorei32 G8RC:$rS, iaddr:$src)]>;
+def STBX8 : XForm_8<31, 215, (ops G8RC:$rS, memrr:$dst),
+ "stbx $rS, $dst", LdStGeneral,
+ [(truncstorei8 G8RC:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+def STHX8 : XForm_8<31, 407, (ops G8RC:$rS, memrr:$dst),
+ "sthx $rS, $dst", LdStGeneral,
+ [(truncstorei16 G8RC:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+def STWX8 : XForm_8<31, 151, (ops G8RC:$rS, memrr:$dst),
+ "stwx $rS, $dst", LdStGeneral,
+ [(truncstorei32 G8RC:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+// Normal 8-byte stores.
+def STD : DSForm_1<62, 0, (ops G8RC:$rS, memrix:$dst),
+ "std $rS, $dst", LdStSTD,
+ [(store G8RC:$rS, ixaddr:$dst)]>, isPPC64;
+def STDX : XForm_8<31, 149, (ops G8RC:$rS, memrr:$dst),
+ "stdx $rS, $dst", LdStSTD,
+ [(store G8RC:$rS, xaddr:$dst)]>, isPPC64,
+ PPC970_DGroup_Cracked;
+}
+
+let isStore = 1, PPC970_Unit = 2 in {
+
+def STBU8 : DForm_1<38, (ops ptr_rc:$ea_res, G8RC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "stbu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STHU8 : DForm_1<45, (ops ptr_rc:$ea_res, G8RC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "sthu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STWU8 : DForm_1<37, (ops ptr_rc:$ea_res, G8RC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "stwu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+
+
+def STDU : DSForm_1<62, 1, (ops ptr_rc:$ea_res, G8RC:$rS,
+ s16immX4:$ptroff, ptr_rc:$ptrreg),
+ "stdu $rS, $ptroff($ptrreg)", LdStSTD,
+ [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ isPPC64;
+
+}
+
+let isStore = 1, noResults = 1, PPC970_Unit = 2 in {
+
+def STDUX : XForm_8<31, 181, (ops G8RC:$rS, memrr:$dst),
+ "stdux $rS, $dst", LdStSTD,
+ []>, isPPC64;
+
+
+// STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register.
+def STD_32 : DSForm_1<62, 0, (ops GPRC:$rT, memrix:$dst),
+ "std $rT, $dst", LdStSTD,
+ [(PPCstd_32 GPRC:$rT, ixaddr:$dst)]>, isPPC64;
+def STDX_32 : XForm_8<31, 149, (ops GPRC:$rT, memrr:$dst),
+ "stdx $rT, $dst", LdStSTD,
+ [(PPCstd_32 GPRC:$rT, xaddr:$dst)]>, isPPC64,
+ PPC970_DGroup_Cracked;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Floating point instructions.
+//
+
+
+let PPC970_Unit = 3 in { // FPU Operations.
+def FCFID : XForm_26<63, 846, (ops F8RC:$frD, F8RC:$frB),
+ "fcfid $frD, $frB", FPGeneral,
+ [(set F8RC:$frD, (PPCfcfid F8RC:$frB))]>, isPPC64;
+def FCTIDZ : XForm_26<63, 815, (ops F8RC:$frD, F8RC:$frB),
+ "fctidz $frD, $frB", FPGeneral,
+ [(set F8RC:$frD, (PPCfctidz F8RC:$frB))]>, isPPC64;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Instruction Patterns
+//
+
+// Extensions and truncates to/from 32-bit regs.
+def : Pat<(i64 (zext GPRC:$in)),
+ (RLDICL (OR4To8 GPRC:$in, GPRC:$in), 0, 32)>;
+def : Pat<(i64 (anyext GPRC:$in)),
+ (OR4To8 GPRC:$in, GPRC:$in)>;
+def : Pat<(i32 (trunc G8RC:$in)),
+ (OR8To4 G8RC:$in, G8RC:$in)>;
+
+// Extending loads with i64 targets.
+def : Pat<(zextloadi1 iaddr:$src),
+ (LBZ8 iaddr:$src)>;
+def : Pat<(zextloadi1 xaddr:$src),
+ (LBZX8 xaddr:$src)>;
+def : Pat<(extloadi1 iaddr:$src),
+ (LBZ8 iaddr:$src)>;
+def : Pat<(extloadi1 xaddr:$src),
+ (LBZX8 xaddr:$src)>;
+def : Pat<(extloadi8 iaddr:$src),
+ (LBZ8 iaddr:$src)>;
+def : Pat<(extloadi8 xaddr:$src),
+ (LBZX8 xaddr:$src)>;
+def : Pat<(extloadi16 iaddr:$src),
+ (LHZ8 iaddr:$src)>;
+def : Pat<(extloadi16 xaddr:$src),
+ (LHZX8 xaddr:$src)>;
+def : Pat<(extloadi32 iaddr:$src),
+ (LWZ8 iaddr:$src)>;
+def : Pat<(extloadi32 xaddr:$src),
+ (LWZX8 xaddr:$src)>;
+
+// SHL/SRL
+def : Pat<(shl G8RC:$in, (i32 imm:$imm)),
+ (RLDICR G8RC:$in, imm:$imm, (SHL64 imm:$imm))>;
+def : Pat<(srl G8RC:$in, (i32 imm:$imm)),
+ (RLDICL G8RC:$in, (SRL64 imm:$imm), imm:$imm)>;
+
+// Hi and Lo for Darwin Global Addresses.
+def : Pat<(PPChi tglobaladdr:$in, 0), (LIS8 tglobaladdr:$in)>;
+def : Pat<(PPClo tglobaladdr:$in, 0), (LI8 tglobaladdr:$in)>;
+def : Pat<(PPChi tconstpool:$in , 0), (LIS8 tconstpool:$in)>;
+def : Pat<(PPClo tconstpool:$in , 0), (LI8 tconstpool:$in)>;
+def : Pat<(PPChi tjumptable:$in , 0), (LIS8 tjumptable:$in)>;
+def : Pat<(PPClo tjumptable:$in , 0), (LI8 tjumptable:$in)>;
+def : Pat<(add G8RC:$in, (PPChi tglobaladdr:$g, 0)),
+ (ADDIS8 G8RC:$in, tglobaladdr:$g)>;
+def : Pat<(add G8RC:$in, (PPChi tconstpool:$g, 0)),
+ (ADDIS8 G8RC:$in, tconstpool:$g)>;
+def : Pat<(add G8RC:$in, (PPChi tjumptable:$g, 0)),
+ (ADDIS8 G8RC:$in, tjumptable:$g)>;
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
new file mode 100644
index 0000000..8a2f255
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -0,0 +1,622 @@
+//===- PPCInstrAltivec.td - The PowerPC Altivec Extension --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Altivec extension to the PowerPC instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Altivec transformation functions and pattern fragments.
+//
+
+/// VPKUHUM_shuffle_mask/VPKUWUM_shuffle_mask - Return true if this is a valid
+/// shuffle mask for the VPKUHUM or VPKUWUM instructions.
+def VPKUHUM_shuffle_mask : PatLeaf<(build_vector), [{
+ return PPC::isVPKUHUMShuffleMask(N, false);
+}]>;
+def VPKUWUM_shuffle_mask : PatLeaf<(build_vector), [{
+ return PPC::isVPKUWUMShuffleMask(N, false);
+}]>;
+
+def VPKUHUM_unary_shuffle_mask : PatLeaf<(build_vector), [{
+ return PPC::isVPKUHUMShuffleMask(N, true);
+}]>;
+def VPKUWUM_unary_shuffle_mask : PatLeaf<(build_vector), [{
+ return PPC::isVPKUWUMShuffleMask(N, true);
+}]>;
+
+
+def VMRGLB_shuffle_mask : PatLeaf<(build_vector), [{
+ return PPC::isVMRGLShuffleMask(N, 1, false);
+}]>;
+def VMRGLH_shuffle_mask : PatLeaf<(build_vector), [{
+ return PPC::isVMRGLShuffleMask(N, 2, false);
+}]>;
+def VMRGLW_shuffle_mask : PatLeaf<(build_vector), [{
+ return PPC::isVMRGLShuffleMask(N, 4, false);
+}]>;
+def VMRGHB_shuffle_mask : PatLeaf<(build_vector), [{
+ return PPC::isVMRGHShuffleMask(N, 1, false);
+}]>;
+def VMRGHH_shuffle_mask : PatLeaf<(build_vector), [{
+ return PPC::isVMRGHShuffleMask(N, 2, false);
+}]>;
+def VMRGHW_shuffle_mask : PatLeaf<(build_vector), [{
+ return PPC::isVMRGHShuffleMask(N, 4, false);
+}]>;
+
+def VMRGLB_unary_shuffle_mask : PatLeaf<(build_vector), [{
+ return PPC::isVMRGLShuffleMask(N, 1, true);
+}]>;
+def VMRGLH_unary_shuffle_mask : PatLeaf<(build_vector), [{
+ return PPC::isVMRGLShuffleMask(N, 2, true);
+}]>;
+def VMRGLW_unary_shuffle_mask : PatLeaf<(build_vector), [{
+ return PPC::isVMRGLShuffleMask(N, 4, true);
+}]>;
+def VMRGHB_unary_shuffle_mask : PatLeaf<(build_vector), [{
+ return PPC::isVMRGHShuffleMask(N, 1, true);
+}]>;
+def VMRGHH_unary_shuffle_mask : PatLeaf<(build_vector), [{
+ return PPC::isVMRGHShuffleMask(N, 2, true);
+}]>;
+def VMRGHW_unary_shuffle_mask : PatLeaf<(build_vector), [{
+ return PPC::isVMRGHShuffleMask(N, 4, true);
+}]>;
+
+
+def VSLDOI_get_imm : SDNodeXForm<build_vector, [{
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, false));
+}]>;
+def VSLDOI_shuffle_mask : PatLeaf<(build_vector), [{
+ return PPC::isVSLDOIShuffleMask(N, false) != -1;
+}], VSLDOI_get_imm>;
+
+/// VSLDOI_unary* - These are used to match vsldoi(X,X), which is turned into
+/// vector_shuffle(X,undef,mask) by the dag combiner.
+def VSLDOI_unary_get_imm : SDNodeXForm<build_vector, [{
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, true));
+}]>;
+def VSLDOI_unary_shuffle_mask : PatLeaf<(build_vector), [{
+ return PPC::isVSLDOIShuffleMask(N, true) != -1;
+}], VSLDOI_unary_get_imm>;
+
+
+// VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm.
+def VSPLTB_get_imm : SDNodeXForm<build_vector, [{
+ return getI32Imm(PPC::getVSPLTImmediate(N, 1));
+}]>;
+def VSPLTB_shuffle_mask : PatLeaf<(build_vector), [{
+ return PPC::isSplatShuffleMask(N, 1);
+}], VSPLTB_get_imm>;
+def VSPLTH_get_imm : SDNodeXForm<build_vector, [{
+ return getI32Imm(PPC::getVSPLTImmediate(N, 2));
+}]>;
+def VSPLTH_shuffle_mask : PatLeaf<(build_vector), [{
+ return PPC::isSplatShuffleMask(N, 2);
+}], VSPLTH_get_imm>;
+def VSPLTW_get_imm : SDNodeXForm<build_vector, [{
+ return getI32Imm(PPC::getVSPLTImmediate(N, 4));
+}]>;
+def VSPLTW_shuffle_mask : PatLeaf<(build_vector), [{
+ return PPC::isSplatShuffleMask(N, 4);
+}], VSPLTW_get_imm>;
+
+
+// VSPLTISB_get_imm xform function: convert build_vector to VSPLTISB imm.
+def VSPLTISB_get_imm : SDNodeXForm<build_vector, [{
+ return PPC::get_VSPLTI_elt(N, 1, *CurDAG);
+}]>;
+def vecspltisb : PatLeaf<(build_vector), [{
+ return PPC::get_VSPLTI_elt(N, 1, *CurDAG).Val != 0;
+}], VSPLTISB_get_imm>;
+
+// VSPLTISH_get_imm xform function: convert build_vector to VSPLTISH imm.
+def VSPLTISH_get_imm : SDNodeXForm<build_vector, [{
+ return PPC::get_VSPLTI_elt(N, 2, *CurDAG);
+}]>;
+def vecspltish : PatLeaf<(build_vector), [{
+ return PPC::get_VSPLTI_elt(N, 2, *CurDAG).Val != 0;
+}], VSPLTISH_get_imm>;
+
+// VSPLTISW_get_imm xform function: convert build_vector to VSPLTISW imm.
+def VSPLTISW_get_imm : SDNodeXForm<build_vector, [{
+ return PPC::get_VSPLTI_elt(N, 4, *CurDAG);
+}]>;
+def vecspltisw : PatLeaf<(build_vector), [{
+ return PPC::get_VSPLTI_elt(N, 4, *CurDAG).Val != 0;
+}], VSPLTISW_get_imm>;
+
+//===----------------------------------------------------------------------===//
+// Helpers for defining instructions that directly correspond to intrinsics.
+
+// VA1a_Int - A VAForm_1a intrinsic definition.
+class VA1a_Int<bits<6> xo, string opc, Intrinsic IntID>
+ : VAForm_1a<xo, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB, VRRC:$vC),
+ !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
+ [(set VRRC:$vD, (IntID VRRC:$vA, VRRC:$vB, VRRC:$vC))]>;
+
+// VX1_Int - A VXForm_1 intrinsic definition.
+class VX1_Int<bits<11> xo, string opc, Intrinsic IntID>
+ : VXForm_1<xo, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB),
+ !strconcat(opc, " $vD, $vA, $vB"), VecFP,
+ [(set VRRC:$vD, (IntID VRRC:$vA, VRRC:$vB))]>;
+
+// VX2_Int - A VXForm_2 intrinsic definition.
+class VX2_Int<bits<11> xo, string opc, Intrinsic IntID>
+ : VXForm_2<xo, (ops VRRC:$vD, VRRC:$vB),
+ !strconcat(opc, " $vD, $vB"), VecFP,
+ [(set VRRC:$vD, (IntID VRRC:$vB))]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Definitions.
+
+def IMPLICIT_DEF_VRRC : Pseudo<(ops VRRC:$rD), "; IMPLICIT_DEF_VRRC $rD",
+ [(set VRRC:$rD, (v4i32 (undef)))]>;
+
+let noResults = 1 in {
+def DSS : DSS_Form<822, (ops u5imm:$A, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2),
+ "dss $STRM, $A", LdStGeneral /*FIXME*/, []>;
+def DST : DSS_Form<342, (ops u5imm:$T, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+ "dst $rA, $rB, $STRM, $T", LdStGeneral /*FIXME*/, []>;
+def DSTST : DSS_Form<374, (ops u5imm:$T, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+ "dstst $rA, $rB, $STRM, $T", LdStGeneral /*FIXME*/, []>;
+}
+
+def MFVSCR : VXForm_4<1540, (ops VRRC:$vD),
+ "mfvcr $vD", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_mfvscr))]>;
+def MTVSCR : VXForm_5<1604, (ops VRRC:$vB),
+ "mtvcr $vB", LdStGeneral,
+ [(int_ppc_altivec_mtvscr VRRC:$vB)]>;
+
+let isLoad = 1, PPC970_Unit = 2 in { // Loads.
+def LVEBX: XForm_1<31, 7, (ops VRRC:$vD, memrr:$src),
+ "lvebx $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
+def LVEHX: XForm_1<31, 39, (ops VRRC:$vD, memrr:$src),
+ "lvehx $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>;
+def LVEWX: XForm_1<31, 71, (ops VRRC:$vD, memrr:$src),
+ "lvewx $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>;
+def LVX : XForm_1<31, 103, (ops VRRC:$vD, memrr:$src),
+ "lvx $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>;
+def LVXL : XForm_1<31, 359, (ops VRRC:$vD, memrr:$src),
+ "lvxl $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>;
+}
+
+def LVSL : XForm_1<31, 6, (ops VRRC:$vD, memrr:$src),
+ "lvsl $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>,
+ PPC970_Unit_LSU;
+def LVSR : XForm_1<31, 38, (ops VRRC:$vD, memrr:$src),
+ "lvsr $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>,
+ PPC970_Unit_LSU;
+
+let isStore = 1, noResults = 1, PPC970_Unit = 2 in { // Stores.
+def STVEBX: XForm_8<31, 135, (ops VRRC:$rS, memrr:$dst),
+ "stvebx $rS, $dst", LdStGeneral,
+ [(int_ppc_altivec_stvebx VRRC:$rS, xoaddr:$dst)]>;
+def STVEHX: XForm_8<31, 167, (ops VRRC:$rS, memrr:$dst),
+ "stvehx $rS, $dst", LdStGeneral,
+ [(int_ppc_altivec_stvehx VRRC:$rS, xoaddr:$dst)]>;
+def STVEWX: XForm_8<31, 199, (ops VRRC:$rS, memrr:$dst),
+ "stvewx $rS, $dst", LdStGeneral,
+ [(int_ppc_altivec_stvewx VRRC:$rS, xoaddr:$dst)]>;
+def STVX : XForm_8<31, 231, (ops VRRC:$rS, memrr:$dst),
+ "stvx $rS, $dst", LdStGeneral,
+ [(int_ppc_altivec_stvx VRRC:$rS, xoaddr:$dst)]>;
+def STVXL : XForm_8<31, 487, (ops VRRC:$rS, memrr:$dst),
+ "stvxl $rS, $dst", LdStGeneral,
+ [(int_ppc_altivec_stvxl VRRC:$rS, xoaddr:$dst)]>;
+}
+
+let PPC970_Unit = 5 in { // VALU Operations.
+// VA-Form instructions. 3-input AltiVec ops.
+def VMADDFP : VAForm_1<46, (ops VRRC:$vD, VRRC:$vA, VRRC:$vC, VRRC:$vB),
+ "vmaddfp $vD, $vA, $vC, $vB", VecFP,
+ [(set VRRC:$vD, (fadd (fmul VRRC:$vA, VRRC:$vC),
+ VRRC:$vB))]>,
+ Requires<[FPContractions]>;
+def VNMSUBFP: VAForm_1<47, (ops VRRC:$vD, VRRC:$vA, VRRC:$vC, VRRC:$vB),
+ "vnmsubfp $vD, $vA, $vC, $vB", VecFP,
+ [(set VRRC:$vD, (fneg (fsub (fmul VRRC:$vA, VRRC:$vC),
+ VRRC:$vB)))]>,
+ Requires<[FPContractions]>;
+
+def VMHADDSHS : VA1a_Int<32, "vmhaddshs", int_ppc_altivec_vmhaddshs>;
+def VMHRADDSHS : VA1a_Int<33, "vmhraddshs", int_ppc_altivec_vmhraddshs>;
+def VMLADDUHM : VA1a_Int<34, "vmladduhm", int_ppc_altivec_vmladduhm>;
+def VPERM : VA1a_Int<43, "vperm", int_ppc_altivec_vperm>;
+def VSEL : VA1a_Int<42, "vsel", int_ppc_altivec_vsel>;
+
+// Shuffles.
+def VSLDOI : VAForm_2<44, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB, u5imm:$SH),
+ "vsldoi $vD, $vA, $vB, $SH", VecFP,
+ [(set VRRC:$vD,
+ (vector_shuffle (v16i8 VRRC:$vA), VRRC:$vB,
+ VSLDOI_shuffle_mask:$SH))]>;
+
+// VX-Form instructions. AltiVec arithmetic ops.
+def VADDFP : VXForm_1<10, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB),
+ "vaddfp $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (fadd VRRC:$vA, VRRC:$vB))]>;
+
+def VADDUBM : VXForm_1<0, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB),
+ "vaddubm $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (add (v16i8 VRRC:$vA), VRRC:$vB))]>;
+def VADDUHM : VXForm_1<64, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB),
+ "vadduhm $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (add (v8i16 VRRC:$vA), VRRC:$vB))]>;
+def VADDUWM : VXForm_1<128, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB),
+ "vadduwm $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (add (v4i32 VRRC:$vA), VRRC:$vB))]>;
+
+def VADDCUW : VX1_Int<384, "vaddcuw", int_ppc_altivec_vaddcuw>;
+def VADDSBS : VX1_Int<768, "vaddsbs", int_ppc_altivec_vaddsbs>;
+def VADDSHS : VX1_Int<832, "vaddshs", int_ppc_altivec_vaddshs>;
+def VADDSWS : VX1_Int<896, "vaddsws", int_ppc_altivec_vaddsws>;
+def VADDUBS : VX1_Int<512, "vaddubs", int_ppc_altivec_vaddubs>;
+def VADDUHS : VX1_Int<576, "vadduhs", int_ppc_altivec_vadduhs>;
+def VADDUWS : VX1_Int<640, "vadduws", int_ppc_altivec_vadduws>;
+
+
+def VAND : VXForm_1<1028, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB),
+ "vand $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (and (v4i32 VRRC:$vA), VRRC:$vB))]>;
+def VANDC : VXForm_1<1092, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB),
+ "vandc $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (and (v4i32 VRRC:$vA), (vnot VRRC:$vB)))]>;
+
+def VCFSX : VXForm_1<842, (ops VRRC:$vD, u5imm:$UIMM, VRRC:$vB),
+ "vcfsx $vD, $vB, $UIMM", VecFP,
+ [(set VRRC:$vD,
+ (int_ppc_altivec_vcfsx VRRC:$vB, imm:$UIMM))]>;
+def VCFUX : VXForm_1<778, (ops VRRC:$vD, u5imm:$UIMM, VRRC:$vB),
+ "vcfux $vD, $vB, $UIMM", VecFP,
+ [(set VRRC:$vD,
+ (int_ppc_altivec_vcfux VRRC:$vB, imm:$UIMM))]>;
+def VCTSXS : VXForm_1<970, (ops VRRC:$vD, u5imm:$UIMM, VRRC:$vB),
+ "vctsxs $vD, $vB, $UIMM", VecFP,
+ [(set VRRC:$vD,
+ (int_ppc_altivec_vctsxs VRRC:$vB, imm:$UIMM))]>;
+def VCTUXS : VXForm_1<906, (ops VRRC:$vD, u5imm:$UIMM, VRRC:$vB),
+ "vctuxs $vD, $vB, $UIMM", VecFP,
+ [(set VRRC:$vD,
+ (int_ppc_altivec_vctuxs VRRC:$vB, imm:$UIMM))]>;
+def VEXPTEFP : VX2_Int<394, "vexptefp", int_ppc_altivec_vexptefp>;
+def VLOGEFP : VX2_Int<458, "vlogefp", int_ppc_altivec_vlogefp>;
+
+def VAVGSB : VX1_Int<1282, "vavgsb", int_ppc_altivec_vavgsb>;
+def VAVGSH : VX1_Int<1346, "vavgsh", int_ppc_altivec_vavgsh>;
+def VAVGSW : VX1_Int<1410, "vavgsw", int_ppc_altivec_vavgsw>;
+def VAVGUB : VX1_Int<1026, "vavgub", int_ppc_altivec_vavgub>;
+def VAVGUH : VX1_Int<1090, "vavguh", int_ppc_altivec_vavguh>;
+def VAVGUW : VX1_Int<1154, "vavguw", int_ppc_altivec_vavguw>;
+
+def VMAXFP : VX1_Int<1034, "vmaxfp", int_ppc_altivec_vmaxfp>;
+def VMAXSB : VX1_Int< 258, "vmaxsb", int_ppc_altivec_vmaxsb>;
+def VMAXSH : VX1_Int< 322, "vmaxsh", int_ppc_altivec_vmaxsh>;
+def VMAXSW : VX1_Int< 386, "vmaxsw", int_ppc_altivec_vmaxsw>;
+def VMAXUB : VX1_Int< 2, "vmaxub", int_ppc_altivec_vmaxub>;
+def VMAXUH : VX1_Int< 66, "vmaxuh", int_ppc_altivec_vmaxuh>;
+def VMAXUW : VX1_Int< 130, "vmaxuw", int_ppc_altivec_vmaxuw>;
+def VMINFP : VX1_Int<1098, "vminfp", int_ppc_altivec_vminfp>;
+def VMINSB : VX1_Int< 770, "vminsb", int_ppc_altivec_vminsb>;
+def VMINSH : VX1_Int< 834, "vminsh", int_ppc_altivec_vminsh>;
+def VMINSW : VX1_Int< 898, "vminsw", int_ppc_altivec_vminsw>;
+def VMINUB : VX1_Int< 514, "vminub", int_ppc_altivec_vminub>;
+def VMINUH : VX1_Int< 578, "vminuh", int_ppc_altivec_vminuh>;
+def VMINUW : VX1_Int< 642, "vminuw", int_ppc_altivec_vminuw>;
+
+def VMRGHB : VXForm_1< 12, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB),
+ "vmrghb $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA),
+ VRRC:$vB, VMRGHB_shuffle_mask))]>;
+def VMRGHH : VXForm_1< 76, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB),
+ "vmrghh $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA),
+ VRRC:$vB, VMRGHH_shuffle_mask))]>;
+def VMRGHW : VXForm_1<140, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB),
+ "vmrghw $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA),
+ VRRC:$vB, VMRGHW_shuffle_mask))]>;
+def VMRGLB : VXForm_1<268, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB),
+ "vmrglb $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA),
+ VRRC:$vB, VMRGLB_shuffle_mask))]>;
+def VMRGLH : VXForm_1<332, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB),
+ "vmrglh $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA),
+ VRRC:$vB, VMRGLH_shuffle_mask))]>;
+def VMRGLW : VXForm_1<396, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB),
+ "vmrglw $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA),
+ VRRC:$vB, VMRGLW_shuffle_mask))]>;
+
+def VMSUMMBM : VA1a_Int<37, "vmsummbm", int_ppc_altivec_vmsummbm>;
+def VMSUMSHM : VA1a_Int<40, "vmsumshm", int_ppc_altivec_vmsumshm>;
+def VMSUMSHS : VA1a_Int<41, "vmsumshs", int_ppc_altivec_vmsumshs>;
+def VMSUMUBM : VA1a_Int<36, "vmsumubm", int_ppc_altivec_vmsumubm>;
+def VMSUMUHM : VA1a_Int<38, "vmsumuhm", int_ppc_altivec_vmsumuhm>;
+def VMSUMUHS : VA1a_Int<39, "vmsumuhs", int_ppc_altivec_vmsumuhs>;
+
+def VMULESB : VX1_Int<776, "vmulesb", int_ppc_altivec_vmulesb>;
+def VMULESH : VX1_Int<840, "vmulesh", int_ppc_altivec_vmulesh>;
+def VMULEUB : VX1_Int<520, "vmuleub", int_ppc_altivec_vmuleub>;
+def VMULEUH : VX1_Int<584, "vmuleuh", int_ppc_altivec_vmuleuh>;
+def VMULOSB : VX1_Int<264, "vmulosb", int_ppc_altivec_vmulosb>;
+def VMULOSH : VX1_Int<328, "vmulosh", int_ppc_altivec_vmulosh>;
+def VMULOUB : VX1_Int< 8, "vmuloub", int_ppc_altivec_vmuloub>;
+def VMULOUH : VX1_Int< 72, "vmulouh", int_ppc_altivec_vmulouh>;
+
+def VREFP : VX2_Int<266, "vrefp", int_ppc_altivec_vrefp>;
+def VRFIM : VX2_Int<714, "vrfim", int_ppc_altivec_vrfim>;
+def VRFIN : VX2_Int<522, "vrfin", int_ppc_altivec_vrfin>;
+def VRFIP : VX2_Int<650, "vrfip", int_ppc_altivec_vrfip>;
+def VRFIZ : VX2_Int<586, "vrfiz", int_ppc_altivec_vrfiz>;
+def VRSQRTEFP : VX2_Int<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>;
+
+def VSUBCUW : VX1_Int<74, "vsubcuw", int_ppc_altivec_vsubcuw>;
+
+def VSUBFP : VXForm_1<74, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB),
+ "vsubfp $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (fsub VRRC:$vA, VRRC:$vB))]>;
+def VSUBUBM : VXForm_1<1024, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB),
+ "vsububm $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (sub (v16i8 VRRC:$vA), VRRC:$vB))]>;
+def VSUBUHM : VXForm_1<1088, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB),
+ "vsubuhm $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (sub (v8i16 VRRC:$vA), VRRC:$vB))]>;
+def VSUBUWM : VXForm_1<1152, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB),
+ "vsubuwm $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (sub (v4i32 VRRC:$vA), VRRC:$vB))]>;
+
+def VSUBSBS : VX1_Int<1792, "vsubsbs" , int_ppc_altivec_vsubsbs>;
+def VSUBSHS : VX1_Int<1856, "vsubshs" , int_ppc_altivec_vsubshs>;
+def VSUBSWS : VX1_Int<1920, "vsubsws" , int_ppc_altivec_vsubsws>;
+def VSUBUBS : VX1_Int<1536, "vsububs" , int_ppc_altivec_vsububs>;
+def VSUBUHS : VX1_Int<1600, "vsubuhs" , int_ppc_altivec_vsubuhs>;
+def VSUBUWS : VX1_Int<1664, "vsubuws" , int_ppc_altivec_vsubuws>;
+def VSUMSWS : VX1_Int<1928, "vsumsws" , int_ppc_altivec_vsumsws>;
+def VSUM2SWS: VX1_Int<1672, "vsum2sws", int_ppc_altivec_vsum2sws>;
+def VSUM4SBS: VX1_Int<1672, "vsum4sbs", int_ppc_altivec_vsum4sbs>;
+def VSUM4SHS: VX1_Int<1608, "vsum4shs", int_ppc_altivec_vsum4shs>;
+def VSUM4UBS: VX1_Int<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs>;
+
+def VNOR : VXForm_1<1284, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB),
+ "vnor $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vnot (or (v4i32 VRRC:$vA), VRRC:$vB)))]>;
+def VOR : VXForm_1<1156, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB),
+ "vor $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (or (v4i32 VRRC:$vA), VRRC:$vB))]>;
+def VXOR : VXForm_1<1220, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB),
+ "vxor $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (xor (v4i32 VRRC:$vA), VRRC:$vB))]>;
+
+def VRLB : VX1_Int< 4, "vrlb", int_ppc_altivec_vrlb>;
+def VRLH : VX1_Int< 68, "vrlh", int_ppc_altivec_vrlh>;
+def VRLW : VX1_Int< 132, "vrlw", int_ppc_altivec_vrlw>;
+
+def VSL : VX1_Int< 452, "vsl" , int_ppc_altivec_vsl >;
+def VSLO : VX1_Int<1036, "vslo", int_ppc_altivec_vslo>;
+def VSLB : VX1_Int< 260, "vslb", int_ppc_altivec_vslb>;
+def VSLH : VX1_Int< 324, "vslh", int_ppc_altivec_vslh>;
+def VSLW : VX1_Int< 388, "vslw", int_ppc_altivec_vslw>;
+
+def VSPLTB : VXForm_1<524, (ops VRRC:$vD, u5imm:$UIMM, VRRC:$vB),
+ "vspltb $vD, $vB, $UIMM", VecPerm,
+ [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vB), (undef),
+ VSPLTB_shuffle_mask:$UIMM))]>;
+def VSPLTH : VXForm_1<588, (ops VRRC:$vD, u5imm:$UIMM, VRRC:$vB),
+ "vsplth $vD, $vB, $UIMM", VecPerm,
+ [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vB), (undef),
+ VSPLTH_shuffle_mask:$UIMM))]>;
+def VSPLTW : VXForm_1<652, (ops VRRC:$vD, u5imm:$UIMM, VRRC:$vB),
+ "vspltw $vD, $vB, $UIMM", VecPerm,
+ [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vB), (undef),
+ VSPLTW_shuffle_mask:$UIMM))]>;
+
+def VSR : VX1_Int< 708, "vsr" , int_ppc_altivec_vsr>;
+def VSRO : VX1_Int<1100, "vsro" , int_ppc_altivec_vsro>;
+def VSRAB : VX1_Int< 772, "vsrab", int_ppc_altivec_vsrab>;
+def VSRAH : VX1_Int< 836, "vsrah", int_ppc_altivec_vsrah>;
+def VSRAW : VX1_Int< 900, "vsraw", int_ppc_altivec_vsraw>;
+def VSRB : VX1_Int< 516, "vsrb" , int_ppc_altivec_vsrb>;
+def VSRH : VX1_Int< 580, "vsrh" , int_ppc_altivec_vsrh>;
+def VSRW : VX1_Int< 644, "vsrw" , int_ppc_altivec_vsrw>;
+
+
+def VSPLTISB : VXForm_3<780, (ops VRRC:$vD, s5imm:$SIMM),
+ "vspltisb $vD, $SIMM", VecPerm,
+ [(set VRRC:$vD, (v16i8 vecspltisb:$SIMM))]>;
+def VSPLTISH : VXForm_3<844, (ops VRRC:$vD, s5imm:$SIMM),
+ "vspltish $vD, $SIMM", VecPerm,
+ [(set VRRC:$vD, (v8i16 vecspltish:$SIMM))]>;
+def VSPLTISW : VXForm_3<908, (ops VRRC:$vD, s5imm:$SIMM),
+ "vspltisw $vD, $SIMM", VecPerm,
+ [(set VRRC:$vD, (v4i32 vecspltisw:$SIMM))]>;
+
+// Vector Pack.
+def VPKPX : VX1_Int<782, "vpkpx", int_ppc_altivec_vpkpx>;
+def VPKSHSS : VX1_Int<398, "vpkshss", int_ppc_altivec_vpkshss>;
+def VPKSHUS : VX1_Int<270, "vpkshus", int_ppc_altivec_vpkshus>;
+def VPKSWSS : VX1_Int<462, "vpkswss", int_ppc_altivec_vpkswss>;
+def VPKSWUS : VX1_Int<334, "vpkswus", int_ppc_altivec_vpkswus>;
+def VPKUHUM : VXForm_1<14, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB),
+ "vpkuhum $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA),
+ VRRC:$vB, VPKUHUM_shuffle_mask))]>;
+def VPKUHUS : VX1_Int<142, "vpkuhus", int_ppc_altivec_vpkuhus>;
+def VPKUWUM : VXForm_1<78, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB),
+ "vpkuwum $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA),
+ VRRC:$vB, VPKUWUM_shuffle_mask))]>;
+def VPKUWUS : VX1_Int<206, "vpkuwus", int_ppc_altivec_vpkuwus>;
+
+// Vector Unpack.
+def VUPKHPX : VX2_Int<846, "vupkhpx", int_ppc_altivec_vupkhpx>;
+def VUPKHSB : VX2_Int<526, "vupkhsb", int_ppc_altivec_vupkhsb>;
+def VUPKHSH : VX2_Int<590, "vupkhsh", int_ppc_altivec_vupkhsh>;
+def VUPKLPX : VX2_Int<974, "vupklpx", int_ppc_altivec_vupklpx>;
+def VUPKLSB : VX2_Int<654, "vupklsb", int_ppc_altivec_vupklsb>;
+def VUPKLSH : VX2_Int<718, "vupklsh", int_ppc_altivec_vupklsh>;
+
+
+// Altivec Comparisons.
+
+class VCMP<bits<10> xo, string asmstr, ValueType Ty>
+ : VXRForm_1<xo, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB), asmstr, VecFPCompare,
+ [(set VRRC:$vD, (Ty (PPCvcmp VRRC:$vA, VRRC:$vB, xo)))]>;
+class VCMPo<bits<10> xo, string asmstr, ValueType Ty>
+ : VXRForm_1<xo, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB), asmstr, VecFPCompare,
+ [(set VRRC:$vD, (Ty (PPCvcmp_o VRRC:$vA, VRRC:$vB, xo)))]> {
+ let Defs = [CR6];
+ let RC = 1;
+}
+
+// f32 element comparisons.0
+def VCMPBFP : VCMP <966, "vcmpbfp $vD, $vA, $vB" , v4f32>;
+def VCMPBFPo : VCMPo<966, "vcmpbfp. $vD, $vA, $vB" , v4f32>;
+def VCMPEQFP : VCMP <198, "vcmpeqfp $vD, $vA, $vB" , v4f32>;
+def VCMPEQFPo : VCMPo<198, "vcmpeqfp. $vD, $vA, $vB", v4f32>;
+def VCMPGEFP : VCMP <454, "vcmpgefp $vD, $vA, $vB" , v4f32>;
+def VCMPGEFPo : VCMPo<454, "vcmpgefp. $vD, $vA, $vB", v4f32>;
+def VCMPGTFP : VCMP <710, "vcmpgtfp $vD, $vA, $vB" , v4f32>;
+def VCMPGTFPo : VCMPo<710, "vcmpgtfp. $vD, $vA, $vB", v4f32>;
+
+// i8 element comparisons.
+def VCMPEQUB : VCMP < 6, "vcmpequb $vD, $vA, $vB" , v16i8>;
+def VCMPEQUBo : VCMPo< 6, "vcmpequb. $vD, $vA, $vB", v16i8>;
+def VCMPGTSB : VCMP <774, "vcmpgtsb $vD, $vA, $vB" , v16i8>;
+def VCMPGTSBo : VCMPo<774, "vcmpgtsb. $vD, $vA, $vB", v16i8>;
+def VCMPGTUB : VCMP <518, "vcmpgtub $vD, $vA, $vB" , v16i8>;
+def VCMPGTUBo : VCMPo<518, "vcmpgtub. $vD, $vA, $vB", v16i8>;
+
+// i16 element comparisons.
+def VCMPEQUH : VCMP < 70, "vcmpequh $vD, $vA, $vB" , v8i16>;
+def VCMPEQUHo : VCMPo< 70, "vcmpequh. $vD, $vA, $vB", v8i16>;
+def VCMPGTSH : VCMP <838, "vcmpgtsh $vD, $vA, $vB" , v8i16>;
+def VCMPGTSHo : VCMPo<838, "vcmpgtsh. $vD, $vA, $vB", v8i16>;
+def VCMPGTUH : VCMP <582, "vcmpgtuh $vD, $vA, $vB" , v8i16>;
+def VCMPGTUHo : VCMPo<582, "vcmpgtuh. $vD, $vA, $vB", v8i16>;
+
+// i32 element comparisons.
+def VCMPEQUW : VCMP <134, "vcmpequw $vD, $vA, $vB" , v4i32>;
+def VCMPEQUWo : VCMPo<134, "vcmpequw. $vD, $vA, $vB", v4i32>;
+def VCMPGTSW : VCMP <902, "vcmpgtsw $vD, $vA, $vB" , v4i32>;
+def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
+def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
+def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
+
+def V_SET0 : VXForm_setzero<1220, (ops VRRC:$vD),
+ "vxor $vD, $vD, $vD", VecFP,
+ [(set VRRC:$vD, (v4i32 immAllZerosV))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Additional Altivec Patterns
+//
+
+// DS* intrinsics.
+def : Pat<(int_ppc_altivec_dss imm:$STRM), (DSS 0, imm:$STRM, 0, 0)>;
+def : Pat<(int_ppc_altivec_dssall), (DSS 1, 0, 0, 0)>;
+def : Pat<(int_ppc_altivec_dst GPRC:$rA, GPRC:$rB, imm:$STRM),
+ (DST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dstt GPRC:$rA, GPRC:$rB, imm:$STRM),
+ (DST 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dstst GPRC:$rA, GPRC:$rB, imm:$STRM),
+ (DSTST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dststt GPRC:$rA, GPRC:$rB, imm:$STRM),
+ (DSTST 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
+
+// Undef.
+def : Pat<(v16i8 (undef)), (IMPLICIT_DEF_VRRC)>;
+def : Pat<(v8i16 (undef)), (IMPLICIT_DEF_VRRC)>;
+def : Pat<(v4f32 (undef)), (IMPLICIT_DEF_VRRC)>;
+
+// Loads.
+def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>;
+
+// Stores.
+def : Pat<(store (v4i32 VRRC:$rS), xoaddr:$dst),
+ (STVX (v4i32 VRRC:$rS), xoaddr:$dst)>;
+
+// Bit conversions.
+def : Pat<(v16i8 (bitconvert (v8i16 VRRC:$src))), (v16i8 VRRC:$src)>;
+def : Pat<(v16i8 (bitconvert (v4i32 VRRC:$src))), (v16i8 VRRC:$src)>;
+def : Pat<(v16i8 (bitconvert (v4f32 VRRC:$src))), (v16i8 VRRC:$src)>;
+
+def : Pat<(v8i16 (bitconvert (v16i8 VRRC:$src))), (v8i16 VRRC:$src)>;
+def : Pat<(v8i16 (bitconvert (v4i32 VRRC:$src))), (v8i16 VRRC:$src)>;
+def : Pat<(v8i16 (bitconvert (v4f32 VRRC:$src))), (v8i16 VRRC:$src)>;
+
+def : Pat<(v4i32 (bitconvert (v16i8 VRRC:$src))), (v4i32 VRRC:$src)>;
+def : Pat<(v4i32 (bitconvert (v8i16 VRRC:$src))), (v4i32 VRRC:$src)>;
+def : Pat<(v4i32 (bitconvert (v4f32 VRRC:$src))), (v4i32 VRRC:$src)>;
+
+def : Pat<(v4f32 (bitconvert (v16i8 VRRC:$src))), (v4f32 VRRC:$src)>;
+def : Pat<(v4f32 (bitconvert (v8i16 VRRC:$src))), (v4f32 VRRC:$src)>;
+def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>;
+
+// Shuffles.
+
+// Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x)
+def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VSLDOI_unary_shuffle_mask:$in),
+ (VSLDOI VRRC:$vA, VRRC:$vA, VSLDOI_unary_shuffle_mask:$in)>;
+def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef,VPKUWUM_unary_shuffle_mask:$in),
+ (VPKUWUM VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef,VPKUHUM_unary_shuffle_mask:$in),
+ (VPKUHUM VRRC:$vA, VRRC:$vA)>;
+
+// Match vmrg*(x,x)
+def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGLB_unary_shuffle_mask:$in),
+ (VMRGLB VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGLH_unary_shuffle_mask:$in),
+ (VMRGLH VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGLW_unary_shuffle_mask:$in),
+ (VMRGLW VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGHB_unary_shuffle_mask:$in),
+ (VMRGHB VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGHH_unary_shuffle_mask:$in),
+ (VMRGHH VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGHW_unary_shuffle_mask:$in),
+ (VMRGHW VRRC:$vA, VRRC:$vA)>;
+
+// Logical Operations
+def : Pat<(v4i32 (vnot VRRC:$vA)), (VNOR VRRC:$vA, VRRC:$vA)>;
+def : Pat<(v4i32 (vnot_conv VRRC:$vA)), (VNOR VRRC:$vA, VRRC:$vA)>;
+
+def : Pat<(v4i32 (vnot_conv (or VRRC:$A, VRRC:$B))),
+ (VNOR VRRC:$A, VRRC:$B)>;
+def : Pat<(v4i32 (and VRRC:$A, (vnot_conv VRRC:$B))),
+ (VANDC VRRC:$A, VRRC:$B)>;
+
+def : Pat<(fmul VRRC:$vA, VRRC:$vB),
+ (VMADDFP VRRC:$vA, VRRC:$vB, (v4i32 (V_SET0)))>;
+
+// Fused multiply add and multiply sub for packed float. These are represented
+// separately from the real instructions above, for operations that must have
+// the additional precision, such as Newton-Rhapson (used by divide, sqrt)
+def : Pat<(PPCvmaddfp VRRC:$A, VRRC:$B, VRRC:$C),
+ (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+def : Pat<(PPCvnmsubfp VRRC:$A, VRRC:$B, VRRC:$C),
+ (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+
+def : Pat<(int_ppc_altivec_vmaddfp VRRC:$A, VRRC:$B, VRRC:$C),
+ (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+def : Pat<(int_ppc_altivec_vnmsubfp VRRC:$A, VRRC:$B, VRRC:$C),
+ (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+
+def : Pat<(PPCvperm (v16i8 VRRC:$vA), VRRC:$vB, VRRC:$vC),
+ (VPERM VRRC:$vA, VRRC:$vB, VRRC:$vC)>;
diff --git a/lib/Target/PowerPC/PPCInstrBuilder.h b/lib/Target/PowerPC/PPCInstrBuilder.h
new file mode 100644
index 0000000..3861918
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstrBuilder.h
@@ -0,0 +1,55 @@
+//===-- PPCInstrBuilder.h - Aides for building PPC insts --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to simplify generating frame and constant pool
+// references.
+//
+// For reference, the order of operands for memory references is:
+// (Operand), Dest Reg, Base Reg, and either Reg Index or Immediate
+// Displacement.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC_INSTRBUILDER_H
+#define POWERPC_INSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+namespace llvm {
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function. This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+inline const MachineInstrBuilder&
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0,
+ bool mem = true) {
+ if (mem)
+ return MIB.addImm(Offset).addFrameIndex(FI);
+ else
+ return MIB.addFrameIndex(FI).addImm(Offset);
+}
+
+/// addConstantPoolReference - This function is used to add a reference to the
+/// base of a constant value spilled to the per-function constant pool. The
+/// reference has base register ConstantPoolIndex offset which is retained until
+/// either machine code emission or assembly output. This allows an optional
+/// offset to be added as well.
+///
+inline const MachineInstrBuilder&
+addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI,
+ int Offset = 0) {
+ return MIB.addImm(Offset).addConstantPoolIndex(CPI);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
new file mode 100644
index 0000000..6a4a59b
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -0,0 +1,800 @@
+//===- PowerPCInstrFormats.td - PowerPC Instruction Formats --*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// PowerPC instruction formats
+
+class I<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin>
+ : Instruction {
+ field bits<32> Inst;
+
+ bit PPC64 = 0; // Default value, override with isPPC64
+
+ let Name = "";
+ let Namespace = "PPC";
+ let Inst{0-5} = opcode;
+ let OperandList = OL;
+ let AsmString = asmstr;
+ let Itinerary = itin;
+
+ /// These fields correspond to the fields in PPCInstrInfo.h. Any changes to
+ /// these must be reflected there! See comments there for what these are.
+ bits<1> PPC970_First = 0;
+ bits<1> PPC970_Single = 0;
+ bits<1> PPC970_Cracked = 0;
+ bits<3> PPC970_Unit = 0;
+}
+
+class PPC970_DGroup_First { bits<1> PPC970_First = 1; }
+class PPC970_DGroup_Single { bits<1> PPC970_Single = 1; }
+class PPC970_DGroup_Cracked { bits<1> PPC970_Cracked = 1; }
+class PPC970_MicroCode;
+
+class PPC970_Unit_Pseudo { bits<3> PPC970_Unit = 0; }
+class PPC970_Unit_FXU { bits<3> PPC970_Unit = 1; }
+class PPC970_Unit_LSU { bits<3> PPC970_Unit = 2; }
+class PPC970_Unit_FPU { bits<3> PPC970_Unit = 3; }
+class PPC970_Unit_CRU { bits<3> PPC970_Unit = 4; }
+class PPC970_Unit_VALU { bits<3> PPC970_Unit = 5; }
+class PPC970_Unit_VPERM { bits<3> PPC970_Unit = 6; }
+class PPC970_Unit_BRU { bits<3> PPC970_Unit = 7; }
+
+
+// 1.7.1 I-Form
+class IForm<bits<6> opcode, bit aa, bit lk, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OL, asmstr, itin> {
+ let Pattern = pattern;
+ bits<24> LI;
+
+ let Inst{6-29} = LI;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+}
+
+// 1.7.2 B-Form
+class BForm<bits<6> opcode, bit aa, bit lk, dag OL, string asmstr>
+ : I<opcode, OL, asmstr, BrB> {
+ bits<7> BIBO; // 2 bits of BI and 5 bits of BO.
+ bits<3> CR;
+ bits<14> BD;
+
+ bits<5> BI;
+ let BI{0-1} = BIBO{5-6};
+ let BI{2-4} = CR{0-2};
+
+ let Inst{6-10} = BIBO{4-0};
+ let Inst{11-15} = BI;
+ let Inst{16-29} = BD;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+}
+
+
+// 1.7.4 D-Form
+class DForm_base<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : I<opcode, OL, asmstr, itin> {
+ bits<5> A;
+ bits<5> B;
+ bits<16> C;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = B;
+ let Inst{16-31} = C;
+}
+
+class DForm_1<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : I<opcode, OL, asmstr, itin> {
+ bits<5> A;
+ bits<16> C;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = B;
+ let Inst{16-31} = C;
+}
+
+class DForm_2<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : DForm_base<opcode, OL, asmstr, itin, pattern>;
+
+class DForm_2_r0<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : I<opcode, OL, asmstr, itin> {
+ bits<5> A;
+ bits<16> B;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = 0;
+ let Inst{16-31} = B;
+}
+
+class DForm_4<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : I<opcode, OL, asmstr, itin> {
+ bits<5> B;
+ bits<5> A;
+ bits<16> C;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = B;
+ let Inst{16-31} = C;
+}
+
+class DForm_4_zero<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : DForm_1<opcode, OL, asmstr, itin, pattern> {
+ let A = 0;
+ let B = 0;
+ let C = 0;
+}
+
+class DForm_5<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin>
+ : I<opcode, OL, asmstr, itin> {
+ bits<3> BF;
+ bits<1> L;
+ bits<5> RA;
+ bits<16> I;
+
+ let Inst{6-8} = BF;
+ let Inst{9} = 0;
+ let Inst{10} = L;
+ let Inst{11-15} = RA;
+ let Inst{16-31} = I;
+}
+
+class DForm_5_ext<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin>
+ : DForm_5<opcode, OL, asmstr, itin> {
+ let L = PPC64;
+}
+
+class DForm_6<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin>
+ : DForm_5<opcode, OL, asmstr, itin>;
+
+class DForm_6_ext<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin>
+ : DForm_6<opcode, OL, asmstr, itin> {
+ let L = PPC64;
+}
+
+
+// 1.7.5 DS-Form
+class DSForm_1<bits<6> opcode, bits<2> xo, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OL, asmstr, itin> {
+ bits<5> RST;
+ bits<14> DS;
+ bits<5> RA;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = RA;
+ let Inst{16-29} = DS;
+ let Inst{30-31} = xo;
+}
+
+// 1.7.6 X-Form
+class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OL, asmstr, itin> {
+ bits<5> RST;
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+// This is the same as XForm_base_r3xo, but the first two operands are swapped
+// when code is emitted.
+class XForm_base_r3xo_swapped
+ <bits<6> opcode, bits<10> xo, dag OL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OL, asmstr, itin> {
+ bits<5> A;
+ bits<5> RST;
+ bits<5> B;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+
+class XForm_1<bits<6> opcode, bits<10> xo, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OL, asmstr, itin, pattern>;
+
+class XForm_6<bits<6> opcode, bits<10> xo, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo_swapped<opcode, xo, OL, asmstr, itin> {
+ let Pattern = pattern;
+}
+
+class XForm_8<bits<6> opcode, bits<10> xo, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OL, asmstr, itin, pattern>;
+
+class XForm_10<bits<6> opcode, bits<10> xo, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo_swapped<opcode, xo, OL, asmstr, itin> {
+ let Pattern = pattern;
+}
+
+class XForm_11<bits<6> opcode, bits<10> xo, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo_swapped<opcode, xo, OL, asmstr, itin> {
+ let B = 0;
+ let Pattern = pattern;
+}
+
+class XForm_16<bits<6> opcode, bits<10> xo, dag OL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OL, asmstr, itin> {
+ bits<3> BF;
+ bits<1> L;
+ bits<5> RA;
+ bits<5> RB;
+
+ let Inst{6-8} = BF;
+ let Inst{9} = 0;
+ let Inst{10} = L;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_16_ext<bits<6> opcode, bits<10> xo, dag OL, string asmstr,
+ InstrItinClass itin>
+ : XForm_16<opcode, xo, OL, asmstr, itin> {
+ let L = PPC64;
+}
+
+class XForm_17<bits<6> opcode, bits<10> xo, dag OL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OL, asmstr, itin> {
+ bits<3> BF;
+ bits<5> FRA;
+ bits<5> FRB;
+
+ let Inst{6-8} = BF;
+ let Inst{9-10} = 0;
+ let Inst{11-15} = FRA;
+ let Inst{16-20} = FRB;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_25<bits<6> opcode, bits<10> xo, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OL, asmstr, itin, pattern> {
+}
+
+class XForm_26<bits<6> opcode, bits<10> xo, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OL, asmstr, itin, pattern> {
+ let A = 0;
+}
+
+class XForm_28<bits<6> opcode, bits<10> xo, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OL, asmstr, itin, pattern> {
+}
+
+// DCB_Form - Form X instruction, used for dcb* instructions.
+class DCB_Form<bits<10> xo, bits<5> immfield, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<31, OL, asmstr, itin> {
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = immfield;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+
+// DSS_Form - Form X instruction, used for altivec dss* instructions.
+class DSS_Form<bits<10> xo, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<31, OL, asmstr, itin> {
+ bits<1> T;
+ bits<2> STRM;
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ let Inst{6} = T;
+ let Inst{7-8} = 0;
+ let Inst{9-10} = STRM;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+// 1.7.7 XL-Form
+class XLForm_1<bits<6> opcode, bits<10> xo, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OL, asmstr, itin> {
+ bits<5> CRD;
+ bits<5> CRA;
+ bits<5> CRB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = CRD;
+ let Inst{11-15} = CRA;
+ let Inst{16-20} = CRB;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XLForm_1_ext<bits<6> opcode, bits<10> xo, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OL, asmstr, itin> {
+ bits<5> CRD;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = CRD;
+ let Inst{11-15} = CRD;
+ let Inst{16-20} = CRD;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XLForm_2<bits<6> opcode, bits<10> xo, bit lk, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OL, asmstr, itin> {
+ bits<5> BO;
+ bits<5> BI;
+ bits<2> BH;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = BO;
+ let Inst{11-15} = BI;
+ let Inst{16-18} = 0;
+ let Inst{19-20} = BH;
+ let Inst{21-30} = xo;
+ let Inst{31} = lk;
+}
+
+class XLForm_2_br<bits<6> opcode, bits<10> xo, bit lk,
+ dag OL, string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XLForm_2<opcode, xo, lk, OL, asmstr, itin, pattern> {
+ bits<7> BIBO; // 2 bits of BI and 5 bits of BO.
+ bits<3> CR;
+
+ let BO = BIBO{2-6};
+ let BI{0-1} = BIBO{0-1};
+ let BI{2-4} = CR;
+ let BH = 0;
+}
+
+
+class XLForm_2_ext<bits<6> opcode, bits<10> xo, bits<5> bo, bits<5> bi, bit lk,
+ dag OL, string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XLForm_2<opcode, xo, lk, OL, asmstr, itin, pattern> {
+ let BO = bo;
+ let BI = bi;
+ let BH = 0;
+}
+
+class XLForm_3<bits<6> opcode, bits<10> xo, dag OL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OL, asmstr, itin> {
+ bits<3> BF;
+ bits<3> BFA;
+
+ let Inst{6-8} = BF;
+ let Inst{9-10} = 0;
+ let Inst{11-13} = BFA;
+ let Inst{14-15} = 0;
+ let Inst{16-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+// 1.7.8 XFX-Form
+class XFXForm_1<bits<6> opcode, bits<10> xo, dag OL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OL, asmstr, itin> {
+ bits<5> RT;
+ bits<10> SPR;
+
+ let Inst{6-10} = RT;
+ let Inst{11} = SPR{4};
+ let Inst{12} = SPR{3};
+ let Inst{13} = SPR{2};
+ let Inst{14} = SPR{1};
+ let Inst{15} = SPR{0};
+ let Inst{16} = SPR{9};
+ let Inst{17} = SPR{8};
+ let Inst{18} = SPR{7};
+ let Inst{19} = SPR{6};
+ let Inst{20} = SPR{5};
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XFXForm_1_ext<bits<6> opcode, bits<10> xo, bits<10> spr,
+ dag OL, string asmstr, InstrItinClass itin>
+ : XFXForm_1<opcode, xo, OL, asmstr, itin> {
+ let SPR = spr;
+}
+
+class XFXForm_3<bits<6> opcode, bits<10> xo, dag OL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OL, asmstr, itin> {
+ bits<5> RT;
+
+ let Inst{6-10} = RT;
+ let Inst{11-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XFXForm_5<bits<6> opcode, bits<10> xo, dag OL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OL, asmstr, itin> {
+ bits<8> FXM;
+ bits<5> ST;
+
+ let Inst{6-10} = ST;
+ let Inst{11} = 0;
+ let Inst{12-19} = FXM;
+ let Inst{20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XFXForm_5a<bits<6> opcode, bits<10> xo, dag OL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OL, asmstr, itin> {
+ bits<5> ST;
+ bits<8> FXM;
+
+ let Inst{6-10} = ST;
+ let Inst{11} = 1;
+ let Inst{12-19} = FXM;
+ let Inst{20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XFXForm_7<bits<6> opcode, bits<10> xo, dag OL, string asmstr,
+ InstrItinClass itin>
+ : XFXForm_1<opcode, xo, OL, asmstr, itin>;
+
+class XFXForm_7_ext<bits<6> opcode, bits<10> xo, bits<10> spr,
+ dag OL, string asmstr, InstrItinClass itin>
+ : XFXForm_7<opcode, xo, OL, asmstr, itin> {
+ let SPR = spr;
+}
+
+// 1.7.10 XS-Form - SRADI.
+class XSForm_1<bits<6> opcode, bits<9> xo, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OL, asmstr, itin> {
+ bits<5> A;
+ bits<5> RS;
+ bits<6> SH;
+
+ bit RC = 0; // set by isDOT
+ let Pattern = pattern;
+
+ let Inst{6-10} = RS;
+ let Inst{11-15} = A;
+ let Inst{16-20} = SH{4,3,2,1,0};
+ let Inst{21-29} = xo;
+ let Inst{30} = SH{5};
+ let Inst{31} = RC;
+}
+
+// 1.7.11 XO-Form
+class XOForm_1<bits<6> opcode, bits<9> xo, bit oe, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OL, asmstr, itin> {
+ bits<5> RT;
+ bits<5> RA;
+ bits<5> RB;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RT;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21} = oe;
+ let Inst{22-30} = xo;
+ let Inst{31} = RC;
+}
+
+class XOForm_3<bits<6> opcode, bits<9> xo, bit oe,
+ dag OL, string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XOForm_1<opcode, xo, oe, OL, asmstr, itin, pattern> {
+ let RB = 0;
+}
+
+// 1.7.12 A-Form
+class AForm_1<bits<6> opcode, bits<5> xo, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OL, asmstr, itin> {
+ bits<5> FRT;
+ bits<5> FRA;
+ bits<5> FRC;
+ bits<5> FRB;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = FRT;
+ let Inst{11-15} = FRA;
+ let Inst{16-20} = FRB;
+ let Inst{21-25} = FRC;
+ let Inst{26-30} = xo;
+ let Inst{31} = RC;
+}
+
+class AForm_2<bits<6> opcode, bits<5> xo, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : AForm_1<opcode, xo, OL, asmstr, itin, pattern> {
+ let FRC = 0;
+}
+
+class AForm_3<bits<6> opcode, bits<5> xo, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : AForm_1<opcode, xo, OL, asmstr, itin, pattern> {
+ let FRB = 0;
+}
+
+// 1.7.13 M-Form
+class MForm_1<bits<6> opcode, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OL, asmstr, itin> {
+ bits<5> RA;
+ bits<5> RS;
+ bits<5> RB;
+ bits<5> MB;
+ bits<5> ME;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RS;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-25} = MB;
+ let Inst{26-30} = ME;
+ let Inst{31} = RC;
+}
+
+class MForm_2<bits<6> opcode, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : MForm_1<opcode, OL, asmstr, itin, pattern> {
+}
+
+// 1.7.14 MD-Form
+class MDForm_1<bits<6> opcode, bits<3> xo, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OL, asmstr, itin> {
+ bits<5> RA;
+ bits<5> RS;
+ bits<6> SH;
+ bits<6> MBE;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RS;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = SH{4,3,2,1,0};
+ let Inst{21-26} = MBE{4,3,2,1,0,5};
+ let Inst{27-29} = xo;
+ let Inst{30} = SH{5};
+ let Inst{31} = RC;
+}
+
+
+
+// E-1 VA-Form
+
+// VAForm_1 - DACB ordering.
+class VAForm_1<bits<6> xo, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VC;
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21-25} = VC;
+ let Inst{26-31} = xo;
+}
+
+// VAForm_1a - DABC ordering.
+class VAForm_1a<bits<6> xo, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VB;
+ bits<5> VC;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21-25} = VC;
+ let Inst{26-31} = xo;
+}
+
+class VAForm_2<bits<6> xo, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VB;
+ bits<4> SH;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21} = 0;
+ let Inst{22-25} = SH;
+ let Inst{26-31} = xo;
+}
+
+// E-2 VX-Form
+class VXForm_1<bits<11> xo, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21-31} = xo;
+}
+
+class VXForm_setzero<bits<11> xo, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : VXForm_1<xo, OL, asmstr, itin, pattern> {
+ let VA = VD;
+ let VB = VD;
+}
+
+
+class VXForm_2<bits<11> xo, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = VB;
+ let Inst{21-31} = xo;
+}
+
+class VXForm_3<bits<11> xo, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> IMM;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = IMM;
+ let Inst{16-20} = 0;
+ let Inst{21-31} = xo;
+}
+
+/// VXForm_4 - VX instructions with "VD,0,0" register fields, like mfvscr.
+class VXForm_4<bits<11> xo, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OL, asmstr, itin> {
+ bits<5> VD;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = 0;
+ let Inst{21-31} = xo;
+}
+
+/// VXForm_5 - VX instructions with "0,0,VB" register fields, like mtvscr.
+class VXForm_5<bits<11> xo, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OL, asmstr, itin> {
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = 0;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = VB;
+ let Inst{21-31} = xo;
+}
+
+// E-4 VXR-Form
+class VXRForm_1<bits<10> xo, dag OL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VB;
+ bit RC = 0;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21} = RC;
+ let Inst{22-31} = xo;
+}
+
+//===----------------------------------------------------------------------===//
+class Pseudo<dag OL, string asmstr, list<dag> pattern>
+ : I<0, OL, asmstr, NoItinerary> {
+ let PPC64 = 0;
+ let Pattern = pattern;
+ let Inst{31-0} = 0;
+}
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
new file mode 100644
index 0000000..d7ee5ed
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -0,0 +1,303 @@
+//===- PPCInstrInfo.cpp - PowerPC32 Instruction Information -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "PPCPredicates.h"
+#include "PPCGenInstrInfo.inc"
+#include "PPCTargetMachine.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+using namespace llvm;
+
+PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
+ : TargetInstrInfo(PPCInsts, sizeof(PPCInsts)/sizeof(PPCInsts[0])), TM(tm),
+ RI(*TM.getSubtargetImpl(), *this) {}
+
+/// getPointerRegClass - Return the register class to use to hold pointers.
+/// This is used for addressing modes.
+const TargetRegisterClass *PPCInstrInfo::getPointerRegClass() const {
+ if (TM.getSubtargetImpl()->isPPC64())
+ return &PPC::G8RCRegClass;
+ else
+ return &PPC::GPRCRegClass;
+}
+
+
+bool PPCInstrInfo::isMoveInstr(const MachineInstr& MI,
+ unsigned& sourceReg,
+ unsigned& destReg) const {
+ MachineOpCode oc = MI.getOpcode();
+ if (oc == PPC::OR || oc == PPC::OR8 || oc == PPC::VOR ||
+ oc == PPC::OR4To8 || oc == PPC::OR8To4) { // or r1, r2, r2
+ assert(MI.getNumOperands() >= 3 &&
+ MI.getOperand(0).isRegister() &&
+ MI.getOperand(1).isRegister() &&
+ MI.getOperand(2).isRegister() &&
+ "invalid PPC OR instruction!");
+ if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
+ sourceReg = MI.getOperand(1).getReg();
+ destReg = MI.getOperand(0).getReg();
+ return true;
+ }
+ } else if (oc == PPC::ADDI) { // addi r1, r2, 0
+ assert(MI.getNumOperands() >= 3 &&
+ MI.getOperand(0).isRegister() &&
+ MI.getOperand(2).isImmediate() &&
+ "invalid PPC ADDI instruction!");
+ if (MI.getOperand(1).isRegister() && MI.getOperand(2).getImmedValue()==0) {
+ sourceReg = MI.getOperand(1).getReg();
+ destReg = MI.getOperand(0).getReg();
+ return true;
+ }
+ } else if (oc == PPC::ORI) { // ori r1, r2, 0
+ assert(MI.getNumOperands() >= 3 &&
+ MI.getOperand(0).isRegister() &&
+ MI.getOperand(1).isRegister() &&
+ MI.getOperand(2).isImmediate() &&
+ "invalid PPC ORI instruction!");
+ if (MI.getOperand(2).getImmedValue()==0) {
+ sourceReg = MI.getOperand(1).getReg();
+ destReg = MI.getOperand(0).getReg();
+ return true;
+ }
+ } else if (oc == PPC::FMRS || oc == PPC::FMRD ||
+ oc == PPC::FMRSD) { // fmr r1, r2
+ assert(MI.getNumOperands() >= 2 &&
+ MI.getOperand(0).isRegister() &&
+ MI.getOperand(1).isRegister() &&
+ "invalid PPC FMR instruction");
+ sourceReg = MI.getOperand(1).getReg();
+ destReg = MI.getOperand(0).getReg();
+ return true;
+ } else if (oc == PPC::MCRF) { // mcrf cr1, cr2
+ assert(MI.getNumOperands() >= 2 &&
+ MI.getOperand(0).isRegister() &&
+ MI.getOperand(1).isRegister() &&
+ "invalid PPC MCRF instruction");
+ sourceReg = MI.getOperand(1).getReg();
+ destReg = MI.getOperand(0).getReg();
+ return true;
+ }
+ return false;
+}
+
+unsigned PPCInstrInfo::isLoadFromStackSlot(MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case PPC::LD:
+ case PPC::LWZ:
+ case PPC::LFS:
+ case PPC::LFD:
+ if (MI->getOperand(1).isImmediate() && !MI->getOperand(1).getImmedValue() &&
+ MI->getOperand(2).isFrameIndex()) {
+ FrameIndex = MI->getOperand(2).getFrameIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+unsigned PPCInstrInfo::isStoreToStackSlot(MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case PPC::STD:
+ case PPC::STW:
+ case PPC::STFS:
+ case PPC::STFD:
+ if (MI->getOperand(1).isImmediate() && !MI->getOperand(1).getImmedValue() &&
+ MI->getOperand(2).isFrameIndex()) {
+ FrameIndex = MI->getOperand(2).getFrameIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+// commuteInstruction - We can commute rlwimi instructions, but only if the
+// rotate amt is zero. We also have to munge the immediates a bit.
+MachineInstr *PPCInstrInfo::commuteInstruction(MachineInstr *MI) const {
+ // Normal instructions can be commuted the obvious way.
+ if (MI->getOpcode() != PPC::RLWIMI)
+ return TargetInstrInfo::commuteInstruction(MI);
+
+ // Cannot commute if it has a non-zero rotate count.
+ if (MI->getOperand(3).getImmedValue() != 0)
+ return 0;
+
+ // If we have a zero rotate count, we have:
+ // M = mask(MB,ME)
+ // Op0 = (Op1 & ~M) | (Op2 & M)
+ // Change this to:
+ // M = mask((ME+1)&31, (MB-1)&31)
+ // Op0 = (Op2 & ~M) | (Op1 & M)
+
+ // Swap op1/op2
+ unsigned Reg1 = MI->getOperand(1).getReg();
+ unsigned Reg2 = MI->getOperand(2).getReg();
+ bool Reg1IsKill = MI->getOperand(1).isKill();
+ bool Reg2IsKill = MI->getOperand(2).isKill();
+ MI->getOperand(2).setReg(Reg1);
+ MI->getOperand(1).setReg(Reg2);
+ if (Reg1IsKill)
+ MI->getOperand(2).setIsKill();
+ else
+ MI->getOperand(2).unsetIsKill();
+ if (Reg2IsKill)
+ MI->getOperand(1).setIsKill();
+ else
+ MI->getOperand(1).unsetIsKill();
+
+ // Swap the mask around.
+ unsigned MB = MI->getOperand(4).getImmedValue();
+ unsigned ME = MI->getOperand(5).getImmedValue();
+ MI->getOperand(4).setImmedValue((ME+1) & 31);
+ MI->getOperand(5).setImmedValue((MB-1) & 31);
+ return MI;
+}
+
+void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ BuildMI(MBB, MI, get(PPC::NOP));
+}
+
+
+// Branch analysis.
+bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ std::vector<MachineOperand> &Cond) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (LastInst->getOpcode() == PPC::B) {
+ TBB = LastInst->getOperand(0).getMachineBasicBlock();
+ return false;
+ } else if (LastInst->getOpcode() == PPC::BCC) {
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(2).getMachineBasicBlock();
+ Cond.push_back(LastInst->getOperand(0));
+ Cond.push_back(LastInst->getOperand(1));
+ return false;
+ }
+ // Otherwise, don't know what this is.
+ return true;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() &&
+ isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with PPC::B and PPC:BCC, handle it.
+ if (SecondLastInst->getOpcode() == PPC::BCC &&
+ LastInst->getOpcode() == PPC::B) {
+ TBB = SecondLastInst->getOperand(2).getMachineBasicBlock();
+ Cond.push_back(SecondLastInst->getOperand(0));
+ Cond.push_back(SecondLastInst->getOperand(1));
+ FBB = LastInst->getOperand(0).getMachineBasicBlock();
+ return false;
+ }
+
+ // If the block ends with two PPC:Bs, handle it. The second one is not
+ // executed, so remove it.
+ if (SecondLastInst->getOpcode() == PPC::B &&
+ LastInst->getOpcode() == PPC::B) {
+ TBB = SecondLastInst->getOperand(0).getMachineBasicBlock();
+ I = LastInst;
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC)
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (I->getOpcode() != PPC::BCC)
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+unsigned
+PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const std::vector<MachineOperand> &Cond) const {
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "PPC branch conditions have two components!");
+
+ // One-way branch.
+ if (FBB == 0) {
+ if (Cond.empty()) // Unconditional branch
+ BuildMI(&MBB, get(PPC::B)).addMBB(TBB);
+ else // Conditional branch
+ BuildMI(&MBB, get(PPC::BCC))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ return 1;
+ }
+
+ // Two-way Conditional Branch.
+ BuildMI(&MBB, get(PPC::BCC))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ BuildMI(&MBB, get(PPC::B)).addMBB(FBB);
+ return 2;
+}
+
+bool PPCInstrInfo::BlockHasNoFallThrough(MachineBasicBlock &MBB) const {
+ if (MBB.empty()) return false;
+
+ switch (MBB.back().getOpcode()) {
+ case PPC::BLR: // Return.
+ case PPC::B: // Uncond branch.
+ case PPC::BCTR: // Indirect branch.
+ return true;
+ default: return false;
+ }
+}
+
+bool PPCInstrInfo::
+ReverseBranchCondition(std::vector<MachineOperand> &Cond) const {
+ assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
+ // Leave the CR# the same, but invert the condition.
+ Cond[0].setImm(PPC::InvertPredicate((PPC::Predicate)Cond[0].getImm()));
+ return false;
+}
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
new file mode 100644
index 0000000..498a8e5
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -0,0 +1,112 @@
+//===- PPCInstrInfo.h - PowerPC Instruction Information ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC32_INSTRUCTIONINFO_H
+#define POWERPC32_INSTRUCTIONINFO_H
+
+#include "PPC.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "PPCRegisterInfo.h"
+
+namespace llvm {
+
+/// PPCII - This namespace holds all of the PowerPC target-specific
+/// per-instruction flags. These must match the corresponding definitions in
+/// PPC.td and PPCInstrFormats.td.
+namespace PPCII {
+enum {
+ // PPC970 Instruction Flags. These flags describe the characteristics of the
+ // PowerPC 970 (aka G5) dispatch groups and how they are formed out of
+ // raw machine instructions.
+
+ /// PPC970_First - This instruction starts a new dispatch group, so it will
+ /// always be the first one in the group.
+ PPC970_First = 0x1,
+
+ /// PPC970_Single - This instruction starts a new dispatch group and
+ /// terminates it, so it will be the sole instruction in the group.
+ PPC970_Single = 0x2,
+
+ /// PPC970_Cracked - This instruction is cracked into two pieces, requiring
+ /// two dispatch pipes to be available to issue.
+ PPC970_Cracked = 0x4,
+
+ /// PPC970_Mask/Shift - This is a bitmask that selects the pipeline type that
+ /// an instruction is issued to.
+ PPC970_Shift = 3,
+ PPC970_Mask = 0x07 << PPC970_Shift
+};
+enum PPC970_Unit {
+ /// These are the various PPC970 execution unit pipelines. Each instruction
+ /// is one of these.
+ PPC970_Pseudo = 0 << PPC970_Shift, // Pseudo instruction
+ PPC970_FXU = 1 << PPC970_Shift, // Fixed Point (aka Integer/ALU) Unit
+ PPC970_LSU = 2 << PPC970_Shift, // Load Store Unit
+ PPC970_FPU = 3 << PPC970_Shift, // Floating Point Unit
+ PPC970_CRU = 4 << PPC970_Shift, // Control Register Unit
+ PPC970_VALU = 5 << PPC970_Shift, // Vector ALU
+ PPC970_VPERM = 6 << PPC970_Shift, // Vector Permute Unit
+ PPC970_BRU = 7 << PPC970_Shift // Branch Unit
+};
+}
+
+
+class PPCInstrInfo : public TargetInstrInfo {
+ PPCTargetMachine &TM;
+ const PPCRegisterInfo RI;
+public:
+ PPCInstrInfo(PPCTargetMachine &TM);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const MRegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// getPointerRegClass - Return the register class to use to hold pointers.
+ /// This is used for addressing modes.
+ virtual const TargetRegisterClass *getPointerRegClass() const;
+
+ // Return true if the instruction is a register to register move and
+ // leave the source and dest operands in the passed parameters.
+ //
+ virtual bool isMoveInstr(const MachineInstr& MI,
+ unsigned& sourceReg,
+ unsigned& destReg) const;
+
+ unsigned isLoadFromStackSlot(MachineInstr *MI, int &FrameIndex) const;
+ unsigned isStoreToStackSlot(MachineInstr *MI, int &FrameIndex) const;
+
+ // commuteInstruction - We can commute rlwimi instructions, but only if the
+ // rotate amt is zero. We also have to munge the immediates a bit.
+ virtual MachineInstr *commuteInstruction(MachineInstr *MI) const;
+
+ virtual void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+
+
+ // Branch analysis.
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ std::vector<MachineOperand> &Cond) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const std::vector<MachineOperand> &Cond) const;
+ virtual bool BlockHasNoFallThrough(MachineBasicBlock &MBB) const;
+ virtual bool ReverseBranchCondition(std::vector<MachineOperand> &Cond) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
new file mode 100644
index 0000000..fe18978
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -0,0 +1,1164 @@
+//===- PPCInstrInfo.td - The PowerPC Instruction Set -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the subset of the 32-bit PowerPC instruction set, as used
+// by the PowerPC instruction selector.
+//
+//===----------------------------------------------------------------------===//
+
+include "PPCInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// PowerPC specific type constraints.
+//
+def SDT_PPCstfiwx : SDTypeProfile<0, 2, [ // stfiwx
+ SDTCisVT<0, f64>, SDTCisPtrTy<1>
+]>;
+def SDT_PPCShiftOp : SDTypeProfile<1, 2, [ // PPCshl, PPCsra, PPCsrl
+ SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>
+]>;
+def SDT_PPCCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>;
+
+def SDT_PPCvperm : SDTypeProfile<1, 3, [
+ SDTCisVT<3, v16i8>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>
+]>;
+
+def SDT_PPCvcmp : SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>
+]>;
+
+def SDT_PPCcondbr : SDTypeProfile<0, 3, [
+ SDTCisVT<0, i32>, SDTCisVT<2, OtherVT>
+]>;
+
+def SDT_PPClbrx : SDTypeProfile<1, 3, [
+ SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>, SDTCisVT<3, OtherVT>
+]>;
+def SDT_PPCstbrx : SDTypeProfile<0, 4, [
+ SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>, SDTCisVT<3, OtherVT>
+]>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC specific DAG Nodes.
+//
+
+def PPCfcfid : SDNode<"PPCISD::FCFID" , SDTFPUnaryOp, []>;
+def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>;
+def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>;
+def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx, [SDNPHasChain]>;
+
+def PPCfsel : SDNode<"PPCISD::FSEL",
+ // Type constraint for fsel.
+ SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,
+ SDTCisFP<0>, SDTCisVT<1, f64>]>, []>;
+
+def PPChi : SDNode<"PPCISD::Hi", SDTIntBinOp, []>;
+def PPClo : SDNode<"PPCISD::Lo", SDTIntBinOp, []>;
+def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
+def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
+
+def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
+
+// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
+// amounts. These nodes are generated by the multi-precision shift code.
+def PPCsrl : SDNode<"PPCISD::SRL" , SDT_PPCShiftOp>;
+def PPCsra : SDNode<"PPCISD::SRA" , SDT_PPCShiftOp>;
+def PPCshl : SDNode<"PPCISD::SHL" , SDT_PPCShiftOp>;
+
+def PPCextsw_32 : SDNode<"PPCISD::EXTSW_32" , SDTIntUnaryOp>;
+def PPCstd_32 : SDNode<"PPCISD::STD_32" , SDTStore, [SDNPHasChain]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeq,
+ [SDNPHasChain, SDNPOutFlag]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PPCCallSeq,
+ [SDNPHasChain, SDNPOutFlag]>;
+
+def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
+def PPCcall_Macho : SDNode<"PPCISD::CALL_Macho", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+def PPCcall_ELF : SDNode<"PPCISD::CALL_ELF", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+def PPCbctrl_Macho : SDNode<"PPCISD::BCTRL_Macho", SDTRet,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+def PPCbctrl_ELF : SDNode<"PPCISD::BCTRL_ELF", SDTRet,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+def retflag : SDNode<"PPCISD::RET_FLAG", SDTRet,
+ [SDNPHasChain, SDNPOptInFlag]>;
+
+def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>;
+def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutFlag]>;
+
+def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr,
+ [SDNPHasChain, SDNPOptInFlag]>;
+
+def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx, [SDNPHasChain]>;
+def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx, [SDNPHasChain]>;
+
+// Instructions to support dynamic alloca.
+def SDTDynOp : SDTypeProfile<1, 2, []>;
+def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC specific transformation functions and pattern fragments.
+//
+
+def SHL32 : SDNodeXForm<imm, [{
+ // Transformation function: 31 - imm
+ return getI32Imm(31 - N->getValue());
+}]>;
+
+def SRL32 : SDNodeXForm<imm, [{
+ // Transformation function: 32 - imm
+ return N->getValue() ? getI32Imm(32 - N->getValue()) : getI32Imm(0);
+}]>;
+
+def LO16 : SDNodeXForm<imm, [{
+ // Transformation function: get the low 16 bits.
+ return getI32Imm((unsigned short)N->getValue());
+}]>;
+
+def HI16 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return getI32Imm((unsigned)N->getValue() >> 16);
+}]>;
+
+def HA16 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ signed int Val = N->getValue();
+ return getI32Imm((Val - (signed short)Val) >> 16);
+}]>;
+def MB : SDNodeXForm<imm, [{
+ // Transformation function: get the start bit of a mask
+ unsigned mb, me;
+ (void)isRunOfOnes((unsigned)N->getValue(), mb, me);
+ return getI32Imm(mb);
+}]>;
+
+def ME : SDNodeXForm<imm, [{
+ // Transformation function: get the end bit of a mask
+ unsigned mb, me;
+ (void)isRunOfOnes((unsigned)N->getValue(), mb, me);
+ return getI32Imm(me);
+}]>;
+def maskimm32 : PatLeaf<(imm), [{
+ // maskImm predicate - True if immediate is a run of ones.
+ unsigned mb, me;
+ if (N->getValueType(0) == MVT::i32)
+ return isRunOfOnes((unsigned)N->getValue(), mb, me);
+ else
+ return false;
+}]>;
+
+def immSExt16 : PatLeaf<(imm), [{
+ // immSExt16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field. Used by instructions like 'addi'.
+ if (N->getValueType(0) == MVT::i32)
+ return (int32_t)N->getValue() == (short)N->getValue();
+ else
+ return (int64_t)N->getValue() == (short)N->getValue();
+}]>;
+def immZExt16 : PatLeaf<(imm), [{
+ // immZExt16 predicate - True if the immediate fits in a 16-bit zero extended
+ // field. Used by instructions like 'ori'.
+ return (uint64_t)N->getValue() == (unsigned short)N->getValue();
+}], LO16>;
+
+// imm16Shifted* - These match immediates where the low 16-bits are zero. There
+// are two forms: imm16ShiftedSExt and imm16ShiftedZExt. These two forms are
+// identical in 32-bit mode, but in 64-bit mode, they return true if the
+// immediate fits into a sign/zero extended 32-bit immediate (with the low bits
+// clear).
+def imm16ShiftedZExt : PatLeaf<(imm), [{
+ // imm16ShiftedZExt predicate - True if only bits in the top 16-bits of the
+ // immediate are set. Used by instructions like 'xoris'.
+ return (N->getValue() & ~uint64_t(0xFFFF0000)) == 0;
+}], HI16>;
+
+def imm16ShiftedSExt : PatLeaf<(imm), [{
+ // imm16ShiftedSExt predicate - True if only bits in the top 16-bits of the
+ // immediate are set. Used by instructions like 'addis'. Identical to
+ // imm16ShiftedZExt in 32-bit mode.
+ if (N->getValue() & 0xFFFF) return false;
+ if (N->getValueType(0) == MVT::i32)
+ return true;
+ // For 64-bit, make sure it is sext right.
+ return N->getValue() == (uint64_t)(int)N->getValue();
+}], HI16>;
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Flag Definitions.
+
+class isPPC64 { bit PPC64 = 1; }
+class isDOT {
+ list<Register> Defs = [CR0];
+ bit RC = 1;
+}
+
+class RegConstraint<string C> {
+ string Constraints = C;
+}
+class NoEncode<string E> {
+ string DisableEncoding = E;
+}
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Operand Definitions.
+
+def s5imm : Operand<i32> {
+ let PrintMethod = "printS5ImmOperand";
+}
+def u5imm : Operand<i32> {
+ let PrintMethod = "printU5ImmOperand";
+}
+def u6imm : Operand<i32> {
+ let PrintMethod = "printU6ImmOperand";
+}
+def s16imm : Operand<i32> {
+ let PrintMethod = "printS16ImmOperand";
+}
+def u16imm : Operand<i32> {
+ let PrintMethod = "printU16ImmOperand";
+}
+def s16immX4 : Operand<i32> { // Multiply imm by 4 before printing.
+ let PrintMethod = "printS16X4ImmOperand";
+}
+def target : Operand<OtherVT> {
+ let PrintMethod = "printBranchOperand";
+}
+def calltarget : Operand<iPTR> {
+ let PrintMethod = "printCallOperand";
+}
+def aaddr : Operand<iPTR> {
+ let PrintMethod = "printAbsAddrOperand";
+}
+def piclabel: Operand<iPTR> {
+ let PrintMethod = "printPICLabel";
+}
+def symbolHi: Operand<i32> {
+ let PrintMethod = "printSymbolHi";
+}
+def symbolLo: Operand<i32> {
+ let PrintMethod = "printSymbolLo";
+}
+def crbitm: Operand<i8> {
+ let PrintMethod = "printcrbitm";
+}
+// Address operands
+def memri : Operand<iPTR> {
+ let PrintMethod = "printMemRegImm";
+ let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg);
+}
+def memrr : Operand<iPTR> {
+ let PrintMethod = "printMemRegReg";
+ let MIOperandInfo = (ops ptr_rc, ptr_rc);
+}
+def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits.
+ let PrintMethod = "printMemRegImmShifted";
+ let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg);
+}
+
+// PowerPC Predicate operand. 20 = (0<<5)|20 = always, CR0 is a dummy reg
+// that doesn't matter.
+def pred : PredicateOperand<OtherVT, (ops imm, CRRC),
+ (ops (i32 20), CR0)> {
+ let PrintMethod = "printPredicateOperand";
+}
+
+// Define PowerPC specific addressing mode.
+def iaddr : ComplexPattern<iPTR, 2, "SelectAddrImm", [], []>;
+def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>;
+def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>;
+def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmShift", [], []>; // "std"
+
+/// This is just the offset part of iaddr, used for preinc.
+def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instruction Predicate Definitions.
+def FPContractions : Predicate<"!NoExcessFPPrecision">;
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instruction Definitions.
+
+// Pseudo-instructions:
+
+let hasCtrlDep = 1 in {
+def ADJCALLSTACKDOWN : Pseudo<(ops u16imm:$amt),
+ "${:comment} ADJCALLSTACKDOWN",
+ [(callseq_start imm:$amt)]>, Imp<[R1],[R1]>;
+def ADJCALLSTACKUP : Pseudo<(ops u16imm:$amt),
+ "${:comment} ADJCALLSTACKUP",
+ [(callseq_end imm:$amt)]>, Imp<[R1],[R1]>;
+
+def UPDATE_VRSAVE : Pseudo<(ops GPRC:$rD, GPRC:$rS),
+ "UPDATE_VRSAVE $rD, $rS", []>;
+}
+
+def DYNALLOC : Pseudo<(ops GPRC:$result, GPRC:$negsize, memri:$fpsi),
+ "${:comment} DYNALLOC $result, $negsize, $fpsi",
+ [(set GPRC:$result,
+ (PPCdynalloc GPRC:$negsize, iaddr:$fpsi))]>,
+ Imp<[R1],[R1]>;
+
+def IMPLICIT_DEF_GPRC: Pseudo<(ops GPRC:$rD),"${:comment}IMPLICIT_DEF_GPRC $rD",
+ [(set GPRC:$rD, (undef))]>;
+def IMPLICIT_DEF_F8 : Pseudo<(ops F8RC:$rD), "${:comment} IMPLICIT_DEF_F8 $rD",
+ [(set F8RC:$rD, (undef))]>;
+def IMPLICIT_DEF_F4 : Pseudo<(ops F4RC:$rD), "${:comment} IMPLICIT_DEF_F4 $rD",
+ [(set F4RC:$rD, (undef))]>;
+
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded by the
+// scheduler into a branch sequence.
+let usesCustomDAGSchedInserter = 1, // Expanded by the scheduler.
+ PPC970_Single = 1 in {
+ def SELECT_CC_I4 : Pseudo<(ops GPRC:$dst, CRRC:$cond, GPRC:$T, GPRC:$F,
+ i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!",
+ []>;
+ def SELECT_CC_I8 : Pseudo<(ops G8RC:$dst, CRRC:$cond, G8RC:$T, G8RC:$F,
+ i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!",
+ []>;
+ def SELECT_CC_F4 : Pseudo<(ops F4RC:$dst, CRRC:$cond, F4RC:$T, F4RC:$F,
+ i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!",
+ []>;
+ def SELECT_CC_F8 : Pseudo<(ops F8RC:$dst, CRRC:$cond, F8RC:$T, F8RC:$F,
+ i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!",
+ []>;
+ def SELECT_CC_VRRC: Pseudo<(ops VRRC:$dst, CRRC:$cond, VRRC:$T, VRRC:$F,
+ i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!",
+ []>;
+}
+
+let isTerminator = 1, isBarrier = 1, noResults = 1, PPC970_Unit = 7 in {
+ let isReturn = 1 in
+ def BLR : XLForm_2_br<19, 16, 0, (ops pred:$p),
+ "b${p:cc}lr ${p:reg}", BrB,
+ [(retflag)]>;
+ def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (ops), "bctr", BrB, []>;
+}
+
+
+
+let Defs = [LR] in
+ def MovePCtoLR : Pseudo<(ops piclabel:$label), "bl $label", []>,
+ PPC970_Unit_BRU;
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1,
+ noResults = 1, PPC970_Unit = 7 in {
+ let isBarrier = 1 in {
+ def B : IForm<18, 0, 0, (ops target:$dst),
+ "b $dst", BrB,
+ [(br bb:$dst)]>;
+ }
+
+ // BCC represents an arbitrary conditional branch on a predicate.
+ // FIXME: should be able to write a pattern for PPCcondbranch, but can't use
+ // a two-value operand where a dag node expects two operands. :(
+ def BCC : BForm<16, 0, 0, (ops pred:$cond, target:$dst),
+ "b${cond:cc} ${cond:reg}, $dst"
+ /*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>;
+}
+
+// Macho ABI Calls.
+let isCall = 1, noResults = 1, PPC970_Unit = 7,
+ // All calls clobber the non-callee saved registers...
+ Defs = [R0,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,
+ F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
+ V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
+ LR,CTR,
+ CR0,CR1,CR5,CR6,CR7] in {
+ // Convenient aliases for call instructions
+ def BL_Macho : IForm<18, 0, 1,
+ (ops calltarget:$func, variable_ops),
+ "bl $func", BrB, []>; // See Pat patterns below.
+ def BLA_Macho : IForm<18, 1, 1,
+ (ops aaddr:$func, variable_ops),
+ "bla $func", BrB, [(PPCcall_Macho (i32 imm:$func))]>;
+ def BCTRL_Macho : XLForm_2_ext<19, 528, 20, 0, 1,
+ (ops variable_ops),
+ "bctrl", BrB,
+ [(PPCbctrl_Macho)]>;
+}
+
+// ELF ABI Calls.
+let isCall = 1, noResults = 1, PPC970_Unit = 7,
+ // All calls clobber the non-callee saved registers...
+ Defs = [R0,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,
+ F0,F1,F2,F3,F4,F5,F6,F7,F8,
+ V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
+ LR,CTR,
+ CR0,CR1,CR5,CR6,CR7] in {
+ // Convenient aliases for call instructions
+ def BL_ELF : IForm<18, 0, 1,
+ (ops calltarget:$func, variable_ops),
+ "bl $func", BrB, []>; // See Pat patterns below.
+ def BLA_ELF : IForm<18, 1, 1,
+ (ops aaddr:$func, variable_ops),
+ "bla $func", BrB,
+ [(PPCcall_ELF (i32 imm:$func))]>;
+ def BCTRL_ELF : XLForm_2_ext<19, 528, 20, 0, 1,
+ (ops variable_ops),
+ "bctrl", BrB,
+ [(PPCbctrl_ELF)]>;
+}
+
+// DCB* instructions.
+def DCBA : DCB_Form<758, 0, (ops memrr:$dst),
+ "dcba $dst", LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBF : DCB_Form<86, 0, (ops memrr:$dst),
+ "dcbf $dst", LdStDCBF, [(int_ppc_dcbf xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBI : DCB_Form<470, 0, (ops memrr:$dst),
+ "dcbi $dst", LdStDCBF, [(int_ppc_dcbi xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBST : DCB_Form<54, 0, (ops memrr:$dst),
+ "dcbst $dst", LdStDCBF, [(int_ppc_dcbst xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBT : DCB_Form<278, 0, (ops memrr:$dst),
+ "dcbt $dst", LdStDCBF, [(int_ppc_dcbt xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBTST : DCB_Form<246, 0, (ops memrr:$dst),
+ "dcbtst $dst", LdStDCBF, [(int_ppc_dcbtst xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBZ : DCB_Form<1014, 0, (ops memrr:$dst),
+ "dcbz $dst", LdStDCBF, [(int_ppc_dcbz xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBZL : DCB_Form<1014, 1, (ops memrr:$dst),
+ "dcbzl $dst", LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+
+//===----------------------------------------------------------------------===//
+// PPC32 Load Instructions.
+//
+
+// Unindexed (r+i) Loads.
+let isLoad = 1, PPC970_Unit = 2 in {
+def LBZ : DForm_1<34, (ops GPRC:$rD, memri:$src),
+ "lbz $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (zextloadi8 iaddr:$src))]>;
+def LHA : DForm_1<42, (ops GPRC:$rD, memri:$src),
+ "lha $rD, $src", LdStLHA,
+ [(set GPRC:$rD, (sextloadi16 iaddr:$src))]>,
+ PPC970_DGroup_Cracked;
+def LHZ : DForm_1<40, (ops GPRC:$rD, memri:$src),
+ "lhz $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (zextloadi16 iaddr:$src))]>;
+def LWZ : DForm_1<32, (ops GPRC:$rD, memri:$src),
+ "lwz $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (load iaddr:$src))]>;
+
+def LFS : DForm_1<48, (ops F4RC:$rD, memri:$src),
+ "lfs $rD, $src", LdStLFDU,
+ [(set F4RC:$rD, (load iaddr:$src))]>;
+def LFD : DForm_1<50, (ops F8RC:$rD, memri:$src),
+ "lfd $rD, $src", LdStLFD,
+ [(set F8RC:$rD, (load iaddr:$src))]>;
+
+
+// Unindexed (r+i) Loads with Update (preinc).
+def LBZU : DForm_1<35, (ops GPRC:$rD, ptr_rc:$ea_result, memri:$addr),
+ "lbzu $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LHAU : DForm_1<43, (ops GPRC:$rD, ptr_rc:$ea_result, memri:$addr),
+ "lhau $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LHZU : DForm_1<41, (ops GPRC:$rD, ptr_rc:$ea_result, memri:$addr),
+ "lhzu $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LWZU : DForm_1<33, (ops GPRC:$rD, ptr_rc:$ea_result, memri:$addr),
+ "lwzu $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LFSU : DForm_1<49, (ops F4RC:$rD, ptr_rc:$ea_result, memri:$addr),
+ "lfs $rD, $addr", LdStLFDU,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LFDU : DForm_1<51, (ops F8RC:$rD, ptr_rc:$ea_result, memri:$addr),
+ "lfd $rD, $addr", LdStLFD,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+}
+
+// Indexed (r+r) Loads.
+//
+let isLoad = 1, PPC970_Unit = 2 in {
+def LBZX : XForm_1<31, 87, (ops GPRC:$rD, memrr:$src),
+ "lbzx $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (zextloadi8 xaddr:$src))]>;
+def LHAX : XForm_1<31, 343, (ops GPRC:$rD, memrr:$src),
+ "lhax $rD, $src", LdStLHA,
+ [(set GPRC:$rD, (sextloadi16 xaddr:$src))]>,
+ PPC970_DGroup_Cracked;
+def LHZX : XForm_1<31, 279, (ops GPRC:$rD, memrr:$src),
+ "lhzx $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (zextloadi16 xaddr:$src))]>;
+def LWZX : XForm_1<31, 23, (ops GPRC:$rD, memrr:$src),
+ "lwzx $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (load xaddr:$src))]>;
+
+
+def LHBRX : XForm_1<31, 790, (ops GPRC:$rD, memrr:$src),
+ "lhbrx $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (PPClbrx xoaddr:$src, srcvalue:$sv, i16))]>;
+def LWBRX : XForm_1<31, 534, (ops GPRC:$rD, memrr:$src),
+ "lwbrx $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (PPClbrx xoaddr:$src, srcvalue:$sv, i32))]>;
+
+def LFSX : XForm_25<31, 535, (ops F4RC:$frD, memrr:$src),
+ "lfsx $frD, $src", LdStLFDU,
+ [(set F4RC:$frD, (load xaddr:$src))]>;
+def LFDX : XForm_25<31, 599, (ops F8RC:$frD, memrr:$src),
+ "lfdx $frD, $src", LdStLFDU,
+ [(set F8RC:$frD, (load xaddr:$src))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// PPC32 Store Instructions.
+//
+
+// Unindexed (r+i) Stores.
+let isStore = 1, noResults = 1, PPC970_Unit = 2 in {
+def STB : DForm_1<38, (ops GPRC:$rS, memri:$src),
+ "stb $rS, $src", LdStGeneral,
+ [(truncstorei8 GPRC:$rS, iaddr:$src)]>;
+def STH : DForm_1<44, (ops GPRC:$rS, memri:$src),
+ "sth $rS, $src", LdStGeneral,
+ [(truncstorei16 GPRC:$rS, iaddr:$src)]>;
+def STW : DForm_1<36, (ops GPRC:$rS, memri:$src),
+ "stw $rS, $src", LdStGeneral,
+ [(store GPRC:$rS, iaddr:$src)]>;
+def STFS : DForm_1<52, (ops F4RC:$rS, memri:$dst),
+ "stfs $rS, $dst", LdStUX,
+ [(store F4RC:$rS, iaddr:$dst)]>;
+def STFD : DForm_1<54, (ops F8RC:$rS, memri:$dst),
+ "stfd $rS, $dst", LdStUX,
+ [(store F8RC:$rS, iaddr:$dst)]>;
+}
+
+// Unindexed (r+i) Stores with Update (preinc).
+let isStore = 1, PPC970_Unit = 2 in {
+def STBU : DForm_1<39, (ops ptr_rc:$ea_res, GPRC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "stbu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STHU : DForm_1<45, (ops ptr_rc:$ea_res, GPRC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "sthu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STWU : DForm_1<37, (ops ptr_rc:$ea_res, GPRC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "stwu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STFSU : DForm_1<37, (ops ptr_rc:$ea_res, F4RC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "stfsu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STFDU : DForm_1<37, (ops ptr_rc:$ea_res, F8RC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "stfdu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+}
+
+
+// Indexed (r+r) Stores.
+//
+let isStore = 1, noResults = 1, PPC970_Unit = 2 in {
+def STBX : XForm_8<31, 215, (ops GPRC:$rS, memrr:$dst),
+ "stbx $rS, $dst", LdStGeneral,
+ [(truncstorei8 GPRC:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+def STHX : XForm_8<31, 407, (ops GPRC:$rS, memrr:$dst),
+ "sthx $rS, $dst", LdStGeneral,
+ [(truncstorei16 GPRC:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+def STWX : XForm_8<31, 151, (ops GPRC:$rS, memrr:$dst),
+ "stwx $rS, $dst", LdStGeneral,
+ [(store GPRC:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+def STWUX : XForm_8<31, 183, (ops GPRC:$rS, GPRC:$rA, GPRC:$rB),
+ "stwux $rS, $rA, $rB", LdStGeneral,
+ []>;
+def STHBRX: XForm_8<31, 918, (ops GPRC:$rS, memrr:$dst),
+ "sthbrx $rS, $dst", LdStGeneral,
+ [(PPCstbrx GPRC:$rS, xoaddr:$dst, srcvalue:$dummy, i16)]>,
+ PPC970_DGroup_Cracked;
+def STWBRX: XForm_8<31, 662, (ops GPRC:$rS, memrr:$dst),
+ "stwbrx $rS, $dst", LdStGeneral,
+ [(PPCstbrx GPRC:$rS, xoaddr:$dst, srcvalue:$dummy, i32)]>,
+ PPC970_DGroup_Cracked;
+
+def STFIWX: XForm_28<31, 983, (ops F8RC:$frS, memrr:$dst),
+ "stfiwx $frS, $dst", LdStUX,
+ [(PPCstfiwx F8RC:$frS, xoaddr:$dst)]>;
+def STFSX : XForm_28<31, 663, (ops F4RC:$frS, memrr:$dst),
+ "stfsx $frS, $dst", LdStUX,
+ [(store F4RC:$frS, xaddr:$dst)]>;
+def STFDX : XForm_28<31, 727, (ops F8RC:$frS, memrr:$dst),
+ "stfdx $frS, $dst", LdStUX,
+ [(store F8RC:$frS, xaddr:$dst)]>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// PPC32 Arithmetic Instructions.
+//
+
+let PPC970_Unit = 1 in { // FXU Operations.
+def ADDI : DForm_2<14, (ops GPRC:$rD, GPRC:$rA, s16imm:$imm),
+ "addi $rD, $rA, $imm", IntGeneral,
+ [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>;
+def ADDIC : DForm_2<12, (ops GPRC:$rD, GPRC:$rA, s16imm:$imm),
+ "addic $rD, $rA, $imm", IntGeneral,
+ [(set GPRC:$rD, (addc GPRC:$rA, immSExt16:$imm))]>,
+ PPC970_DGroup_Cracked;
+def ADDICo : DForm_2<13, (ops GPRC:$rD, GPRC:$rA, s16imm:$imm),
+ "addic. $rD, $rA, $imm", IntGeneral,
+ []>;
+def ADDIS : DForm_2<15, (ops GPRC:$rD, GPRC:$rA, symbolHi:$imm),
+ "addis $rD, $rA, $imm", IntGeneral,
+ [(set GPRC:$rD, (add GPRC:$rA, imm16ShiftedSExt:$imm))]>;
+def LA : DForm_2<14, (ops GPRC:$rD, GPRC:$rA, symbolLo:$sym),
+ "la $rD, $sym($rA)", IntGeneral,
+ [(set GPRC:$rD, (add GPRC:$rA,
+ (PPClo tglobaladdr:$sym, 0)))]>;
+def MULLI : DForm_2< 7, (ops GPRC:$rD, GPRC:$rA, s16imm:$imm),
+ "mulli $rD, $rA, $imm", IntMulLI,
+ [(set GPRC:$rD, (mul GPRC:$rA, immSExt16:$imm))]>;
+def SUBFIC : DForm_2< 8, (ops GPRC:$rD, GPRC:$rA, s16imm:$imm),
+ "subfic $rD, $rA, $imm", IntGeneral,
+ [(set GPRC:$rD, (subc immSExt16:$imm, GPRC:$rA))]>;
+def LI : DForm_2_r0<14, (ops GPRC:$rD, symbolLo:$imm),
+ "li $rD, $imm", IntGeneral,
+ [(set GPRC:$rD, immSExt16:$imm)]>;
+def LIS : DForm_2_r0<15, (ops GPRC:$rD, symbolHi:$imm),
+ "lis $rD, $imm", IntGeneral,
+ [(set GPRC:$rD, imm16ShiftedSExt:$imm)]>;
+}
+
+let PPC970_Unit = 1 in { // FXU Operations.
+def ANDIo : DForm_4<28, (ops GPRC:$dst, GPRC:$src1, u16imm:$src2),
+ "andi. $dst, $src1, $src2", IntGeneral,
+ [(set GPRC:$dst, (and GPRC:$src1, immZExt16:$src2))]>,
+ isDOT;
+def ANDISo : DForm_4<29, (ops GPRC:$dst, GPRC:$src1, u16imm:$src2),
+ "andis. $dst, $src1, $src2", IntGeneral,
+ [(set GPRC:$dst, (and GPRC:$src1,imm16ShiftedZExt:$src2))]>,
+ isDOT;
+def ORI : DForm_4<24, (ops GPRC:$dst, GPRC:$src1, u16imm:$src2),
+ "ori $dst, $src1, $src2", IntGeneral,
+ [(set GPRC:$dst, (or GPRC:$src1, immZExt16:$src2))]>;
+def ORIS : DForm_4<25, (ops GPRC:$dst, GPRC:$src1, u16imm:$src2),
+ "oris $dst, $src1, $src2", IntGeneral,
+ [(set GPRC:$dst, (or GPRC:$src1, imm16ShiftedZExt:$src2))]>;
+def XORI : DForm_4<26, (ops GPRC:$dst, GPRC:$src1, u16imm:$src2),
+ "xori $dst, $src1, $src2", IntGeneral,
+ [(set GPRC:$dst, (xor GPRC:$src1, immZExt16:$src2))]>;
+def XORIS : DForm_4<27, (ops GPRC:$dst, GPRC:$src1, u16imm:$src2),
+ "xoris $dst, $src1, $src2", IntGeneral,
+ [(set GPRC:$dst, (xor GPRC:$src1,imm16ShiftedZExt:$src2))]>;
+def NOP : DForm_4_zero<24, (ops), "nop", IntGeneral,
+ []>;
+def CMPWI : DForm_5_ext<11, (ops CRRC:$crD, GPRC:$rA, s16imm:$imm),
+ "cmpwi $crD, $rA, $imm", IntCompare>;
+def CMPLWI : DForm_6_ext<10, (ops CRRC:$dst, GPRC:$src1, u16imm:$src2),
+ "cmplwi $dst, $src1, $src2", IntCompare>;
+}
+
+
+let PPC970_Unit = 1 in { // FXU Operations.
+def NAND : XForm_6<31, 476, (ops GPRC:$rA, GPRC:$rS, GPRC:$rB),
+ "nand $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (not (and GPRC:$rS, GPRC:$rB)))]>;
+def AND : XForm_6<31, 28, (ops GPRC:$rA, GPRC:$rS, GPRC:$rB),
+ "and $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (and GPRC:$rS, GPRC:$rB))]>;
+def ANDC : XForm_6<31, 60, (ops GPRC:$rA, GPRC:$rS, GPRC:$rB),
+ "andc $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (and GPRC:$rS, (not GPRC:$rB)))]>;
+def OR : XForm_6<31, 444, (ops GPRC:$rA, GPRC:$rS, GPRC:$rB),
+ "or $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (or GPRC:$rS, GPRC:$rB))]>;
+def NOR : XForm_6<31, 124, (ops GPRC:$rA, GPRC:$rS, GPRC:$rB),
+ "nor $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (not (or GPRC:$rS, GPRC:$rB)))]>;
+def ORC : XForm_6<31, 412, (ops GPRC:$rA, GPRC:$rS, GPRC:$rB),
+ "orc $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (or GPRC:$rS, (not GPRC:$rB)))]>;
+def EQV : XForm_6<31, 284, (ops GPRC:$rA, GPRC:$rS, GPRC:$rB),
+ "eqv $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (not (xor GPRC:$rS, GPRC:$rB)))]>;
+def XOR : XForm_6<31, 316, (ops GPRC:$rA, GPRC:$rS, GPRC:$rB),
+ "xor $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (xor GPRC:$rS, GPRC:$rB))]>;
+def SLW : XForm_6<31, 24, (ops GPRC:$rA, GPRC:$rS, GPRC:$rB),
+ "slw $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (PPCshl GPRC:$rS, GPRC:$rB))]>;
+def SRW : XForm_6<31, 536, (ops GPRC:$rA, GPRC:$rS, GPRC:$rB),
+ "srw $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (PPCsrl GPRC:$rS, GPRC:$rB))]>;
+def SRAW : XForm_6<31, 792, (ops GPRC:$rA, GPRC:$rS, GPRC:$rB),
+ "sraw $rA, $rS, $rB", IntShift,
+ [(set GPRC:$rA, (PPCsra GPRC:$rS, GPRC:$rB))]>;
+}
+
+let PPC970_Unit = 1 in { // FXU Operations.
+def SRAWI : XForm_10<31, 824, (ops GPRC:$rA, GPRC:$rS, u5imm:$SH),
+ "srawi $rA, $rS, $SH", IntShift,
+ [(set GPRC:$rA, (sra GPRC:$rS, (i32 imm:$SH)))]>;
+def CNTLZW : XForm_11<31, 26, (ops GPRC:$rA, GPRC:$rS),
+ "cntlzw $rA, $rS", IntGeneral,
+ [(set GPRC:$rA, (ctlz GPRC:$rS))]>;
+def EXTSB : XForm_11<31, 954, (ops GPRC:$rA, GPRC:$rS),
+ "extsb $rA, $rS", IntGeneral,
+ [(set GPRC:$rA, (sext_inreg GPRC:$rS, i8))]>;
+def EXTSH : XForm_11<31, 922, (ops GPRC:$rA, GPRC:$rS),
+ "extsh $rA, $rS", IntGeneral,
+ [(set GPRC:$rA, (sext_inreg GPRC:$rS, i16))]>;
+
+def CMPW : XForm_16_ext<31, 0, (ops CRRC:$crD, GPRC:$rA, GPRC:$rB),
+ "cmpw $crD, $rA, $rB", IntCompare>;
+def CMPLW : XForm_16_ext<31, 32, (ops CRRC:$crD, GPRC:$rA, GPRC:$rB),
+ "cmplw $crD, $rA, $rB", IntCompare>;
+}
+let PPC970_Unit = 3 in { // FPU Operations.
+//def FCMPO : XForm_17<63, 32, (ops CRRC:$crD, FPRC:$fA, FPRC:$fB),
+// "fcmpo $crD, $fA, $fB", FPCompare>;
+def FCMPUS : XForm_17<63, 0, (ops CRRC:$crD, F4RC:$fA, F4RC:$fB),
+ "fcmpu $crD, $fA, $fB", FPCompare>;
+def FCMPUD : XForm_17<63, 0, (ops CRRC:$crD, F8RC:$fA, F8RC:$fB),
+ "fcmpu $crD, $fA, $fB", FPCompare>;
+
+def FCTIWZ : XForm_26<63, 15, (ops F8RC:$frD, F8RC:$frB),
+ "fctiwz $frD, $frB", FPGeneral,
+ [(set F8RC:$frD, (PPCfctiwz F8RC:$frB))]>;
+def FRSP : XForm_26<63, 12, (ops F4RC:$frD, F8RC:$frB),
+ "frsp $frD, $frB", FPGeneral,
+ [(set F4RC:$frD, (fround F8RC:$frB))]>;
+def FSQRT : XForm_26<63, 22, (ops F8RC:$frD, F8RC:$frB),
+ "fsqrt $frD, $frB", FPSqrt,
+ [(set F8RC:$frD, (fsqrt F8RC:$frB))]>;
+def FSQRTS : XForm_26<59, 22, (ops F4RC:$frD, F4RC:$frB),
+ "fsqrts $frD, $frB", FPSqrt,
+ [(set F4RC:$frD, (fsqrt F4RC:$frB))]>;
+}
+
+/// FMR is split into 3 versions, one for 4/8 byte FP, and one for extending.
+///
+/// Note that these are defined as pseudo-ops on the PPC970 because they are
+/// often coalesced away and we don't want the dispatch group builder to think
+/// that they will fill slots (which could cause the load of a LSU reject to
+/// sneak into a d-group with a store).
+def FMRS : XForm_26<63, 72, (ops F4RC:$frD, F4RC:$frB),
+ "fmr $frD, $frB", FPGeneral,
+ []>, // (set F4RC:$frD, F4RC:$frB)
+ PPC970_Unit_Pseudo;
+def FMRD : XForm_26<63, 72, (ops F8RC:$frD, F8RC:$frB),
+ "fmr $frD, $frB", FPGeneral,
+ []>, // (set F8RC:$frD, F8RC:$frB)
+ PPC970_Unit_Pseudo;
+def FMRSD : XForm_26<63, 72, (ops F8RC:$frD, F4RC:$frB),
+ "fmr $frD, $frB", FPGeneral,
+ [(set F8RC:$frD, (fextend F4RC:$frB))]>,
+ PPC970_Unit_Pseudo;
+
+let PPC970_Unit = 3 in { // FPU Operations.
+// These are artificially split into two different forms, for 4/8 byte FP.
+def FABSS : XForm_26<63, 264, (ops F4RC:$frD, F4RC:$frB),
+ "fabs $frD, $frB", FPGeneral,
+ [(set F4RC:$frD, (fabs F4RC:$frB))]>;
+def FABSD : XForm_26<63, 264, (ops F8RC:$frD, F8RC:$frB),
+ "fabs $frD, $frB", FPGeneral,
+ [(set F8RC:$frD, (fabs F8RC:$frB))]>;
+def FNABSS : XForm_26<63, 136, (ops F4RC:$frD, F4RC:$frB),
+ "fnabs $frD, $frB", FPGeneral,
+ [(set F4RC:$frD, (fneg (fabs F4RC:$frB)))]>;
+def FNABSD : XForm_26<63, 136, (ops F8RC:$frD, F8RC:$frB),
+ "fnabs $frD, $frB", FPGeneral,
+ [(set F8RC:$frD, (fneg (fabs F8RC:$frB)))]>;
+def FNEGS : XForm_26<63, 40, (ops F4RC:$frD, F4RC:$frB),
+ "fneg $frD, $frB", FPGeneral,
+ [(set F4RC:$frD, (fneg F4RC:$frB))]>;
+def FNEGD : XForm_26<63, 40, (ops F8RC:$frD, F8RC:$frB),
+ "fneg $frD, $frB", FPGeneral,
+ [(set F8RC:$frD, (fneg F8RC:$frB))]>;
+}
+
+
+// XL-Form instructions. condition register logical ops.
+//
+def MCRF : XLForm_3<19, 0, (ops CRRC:$BF, CRRC:$BFA),
+ "mcrf $BF, $BFA", BrMCR>,
+ PPC970_DGroup_First, PPC970_Unit_CRU;
+
+def CREQV : XLForm_1<19, 289, (ops CRRC:$CRD, CRRC:$CRA, CRRC:$CRB),
+ "creqv $CRD, $CRA, $CRB", BrCR,
+ []>;
+
+def SETCR : XLForm_1_ext<19, 289, (ops CRRC:$dst),
+ "creqv $dst, $dst, $dst", BrCR,
+ []>;
+
+// XFX-Form instructions. Instructions that deal with SPRs.
+//
+def MFCTR : XFXForm_1_ext<31, 339, 9, (ops GPRC:$rT), "mfctr $rT", SprMFSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+let Pattern = [(PPCmtctr GPRC:$rS)] in {
+def MTCTR : XFXForm_7_ext<31, 467, 9, (ops GPRC:$rS), "mtctr $rS", SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+def MTLR : XFXForm_7_ext<31, 467, 8, (ops GPRC:$rS), "mtlr $rS", SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+def MFLR : XFXForm_1_ext<31, 339, 8, (ops GPRC:$rT), "mflr $rT", SprMFSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+
+// Move to/from VRSAVE: despite being a SPR, the VRSAVE register is renamed like
+// a GPR on the PPC970. As such, copies in and out have the same performance
+// characteristics as an OR instruction.
+def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (ops GPRC:$rS),
+ "mtspr 256, $rS", IntGeneral>,
+ PPC970_DGroup_Single, PPC970_Unit_FXU;
+def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (ops GPRC:$rT),
+ "mfspr $rT, 256", IntGeneral>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+
+def MTCRF : XFXForm_5<31, 144, (ops crbitm:$FXM, GPRC:$rS),
+ "mtcrf $FXM, $rS", BrMCRX>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
+def MFCR : XFXForm_3<31, 19, (ops GPRC:$rT), "mfcr $rT", SprMFCR>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
+def MFOCRF: XFXForm_5a<31, 19, (ops GPRC:$rT, crbitm:$FXM),
+ "mfcr $rT, $FXM", SprMFCR>,
+ PPC970_DGroup_First, PPC970_Unit_CRU;
+
+let PPC970_Unit = 1 in { // FXU Operations.
+
+// XO-Form instructions. Arithmetic instructions that can set overflow bit
+//
+def ADD4 : XOForm_1<31, 266, 0, (ops GPRC:$rT, GPRC:$rA, GPRC:$rB),
+ "add $rT, $rA, $rB", IntGeneral,
+ [(set GPRC:$rT, (add GPRC:$rA, GPRC:$rB))]>;
+def ADDC : XOForm_1<31, 10, 0, (ops GPRC:$rT, GPRC:$rA, GPRC:$rB),
+ "addc $rT, $rA, $rB", IntGeneral,
+ [(set GPRC:$rT, (addc GPRC:$rA, GPRC:$rB))]>,
+ PPC970_DGroup_Cracked;
+def ADDE : XOForm_1<31, 138, 0, (ops GPRC:$rT, GPRC:$rA, GPRC:$rB),
+ "adde $rT, $rA, $rB", IntGeneral,
+ [(set GPRC:$rT, (adde GPRC:$rA, GPRC:$rB))]>;
+def DIVW : XOForm_1<31, 491, 0, (ops GPRC:$rT, GPRC:$rA, GPRC:$rB),
+ "divw $rT, $rA, $rB", IntDivW,
+ [(set GPRC:$rT, (sdiv GPRC:$rA, GPRC:$rB))]>,
+ PPC970_DGroup_First, PPC970_DGroup_Cracked;
+def DIVWU : XOForm_1<31, 459, 0, (ops GPRC:$rT, GPRC:$rA, GPRC:$rB),
+ "divwu $rT, $rA, $rB", IntDivW,
+ [(set GPRC:$rT, (udiv GPRC:$rA, GPRC:$rB))]>,
+ PPC970_DGroup_First, PPC970_DGroup_Cracked;
+def MULHW : XOForm_1<31, 75, 0, (ops GPRC:$rT, GPRC:$rA, GPRC:$rB),
+ "mulhw $rT, $rA, $rB", IntMulHW,
+ [(set GPRC:$rT, (mulhs GPRC:$rA, GPRC:$rB))]>;
+def MULHWU : XOForm_1<31, 11, 0, (ops GPRC:$rT, GPRC:$rA, GPRC:$rB),
+ "mulhwu $rT, $rA, $rB", IntMulHWU,
+ [(set GPRC:$rT, (mulhu GPRC:$rA, GPRC:$rB))]>;
+def MULLW : XOForm_1<31, 235, 0, (ops GPRC:$rT, GPRC:$rA, GPRC:$rB),
+ "mullw $rT, $rA, $rB", IntMulHW,
+ [(set GPRC:$rT, (mul GPRC:$rA, GPRC:$rB))]>;
+def SUBF : XOForm_1<31, 40, 0, (ops GPRC:$rT, GPRC:$rA, GPRC:$rB),
+ "subf $rT, $rA, $rB", IntGeneral,
+ [(set GPRC:$rT, (sub GPRC:$rB, GPRC:$rA))]>;
+def SUBFC : XOForm_1<31, 8, 0, (ops GPRC:$rT, GPRC:$rA, GPRC:$rB),
+ "subfc $rT, $rA, $rB", IntGeneral,
+ [(set GPRC:$rT, (subc GPRC:$rB, GPRC:$rA))]>,
+ PPC970_DGroup_Cracked;
+def SUBFE : XOForm_1<31, 136, 0, (ops GPRC:$rT, GPRC:$rA, GPRC:$rB),
+ "subfe $rT, $rA, $rB", IntGeneral,
+ [(set GPRC:$rT, (sube GPRC:$rB, GPRC:$rA))]>;
+def ADDME : XOForm_3<31, 234, 0, (ops GPRC:$rT, GPRC:$rA),
+ "addme $rT, $rA", IntGeneral,
+ [(set GPRC:$rT, (adde GPRC:$rA, immAllOnes))]>;
+def ADDZE : XOForm_3<31, 202, 0, (ops GPRC:$rT, GPRC:$rA),
+ "addze $rT, $rA", IntGeneral,
+ [(set GPRC:$rT, (adde GPRC:$rA, 0))]>;
+def NEG : XOForm_3<31, 104, 0, (ops GPRC:$rT, GPRC:$rA),
+ "neg $rT, $rA", IntGeneral,
+ [(set GPRC:$rT, (ineg GPRC:$rA))]>;
+def SUBFME : XOForm_3<31, 232, 0, (ops GPRC:$rT, GPRC:$rA),
+ "subfme $rT, $rA", IntGeneral,
+ [(set GPRC:$rT, (sube immAllOnes, GPRC:$rA))]>;
+def SUBFZE : XOForm_3<31, 200, 0, (ops GPRC:$rT, GPRC:$rA),
+ "subfze $rT, $rA", IntGeneral,
+ [(set GPRC:$rT, (sube 0, GPRC:$rA))]>;
+}
+
+// A-Form instructions. Most of the instructions executed in the FPU are of
+// this type.
+//
+let PPC970_Unit = 3 in { // FPU Operations.
+def FMADD : AForm_1<63, 29,
+ (ops F8RC:$FRT, F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+ "fmadd $FRT, $FRA, $FRC, $FRB", FPFused,
+ [(set F8RC:$FRT, (fadd (fmul F8RC:$FRA, F8RC:$FRC),
+ F8RC:$FRB))]>,
+ Requires<[FPContractions]>;
+def FMADDS : AForm_1<59, 29,
+ (ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+ "fmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fadd (fmul F4RC:$FRA, F4RC:$FRC),
+ F4RC:$FRB))]>,
+ Requires<[FPContractions]>;
+def FMSUB : AForm_1<63, 28,
+ (ops F8RC:$FRT, F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+ "fmsub $FRT, $FRA, $FRC, $FRB", FPFused,
+ [(set F8RC:$FRT, (fsub (fmul F8RC:$FRA, F8RC:$FRC),
+ F8RC:$FRB))]>,
+ Requires<[FPContractions]>;
+def FMSUBS : AForm_1<59, 28,
+ (ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+ "fmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fsub (fmul F4RC:$FRA, F4RC:$FRC),
+ F4RC:$FRB))]>,
+ Requires<[FPContractions]>;
+def FNMADD : AForm_1<63, 31,
+ (ops F8RC:$FRT, F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+ "fnmadd $FRT, $FRA, $FRC, $FRB", FPFused,
+ [(set F8RC:$FRT, (fneg (fadd (fmul F8RC:$FRA, F8RC:$FRC),
+ F8RC:$FRB)))]>,
+ Requires<[FPContractions]>;
+def FNMADDS : AForm_1<59, 31,
+ (ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+ "fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fneg (fadd (fmul F4RC:$FRA, F4RC:$FRC),
+ F4RC:$FRB)))]>,
+ Requires<[FPContractions]>;
+def FNMSUB : AForm_1<63, 30,
+ (ops F8RC:$FRT, F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+ "fnmsub $FRT, $FRA, $FRC, $FRB", FPFused,
+ [(set F8RC:$FRT, (fneg (fsub (fmul F8RC:$FRA, F8RC:$FRC),
+ F8RC:$FRB)))]>,
+ Requires<[FPContractions]>;
+def FNMSUBS : AForm_1<59, 30,
+ (ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+ "fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fneg (fsub (fmul F4RC:$FRA, F4RC:$FRC),
+ F4RC:$FRB)))]>,
+ Requires<[FPContractions]>;
+// FSEL is artificially split into 4 and 8-byte forms for the result. To avoid
+// having 4 of these, force the comparison to always be an 8-byte double (code
+// should use an FMRSD if the input comparison value really wants to be a float)
+// and 4/8 byte forms for the result and operand type..
+def FSELD : AForm_1<63, 23,
+ (ops F8RC:$FRT, F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+ "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set F8RC:$FRT, (PPCfsel F8RC:$FRA,F8RC:$FRC,F8RC:$FRB))]>;
+def FSELS : AForm_1<63, 23,
+ (ops F4RC:$FRT, F8RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+ "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (PPCfsel F8RC:$FRA,F4RC:$FRC,F4RC:$FRB))]>;
+def FADD : AForm_2<63, 21,
+ (ops F8RC:$FRT, F8RC:$FRA, F8RC:$FRB),
+ "fadd $FRT, $FRA, $FRB", FPGeneral,
+ [(set F8RC:$FRT, (fadd F8RC:$FRA, F8RC:$FRB))]>;
+def FADDS : AForm_2<59, 21,
+ (ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRB),
+ "fadds $FRT, $FRA, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fadd F4RC:$FRA, F4RC:$FRB))]>;
+def FDIV : AForm_2<63, 18,
+ (ops F8RC:$FRT, F8RC:$FRA, F8RC:$FRB),
+ "fdiv $FRT, $FRA, $FRB", FPDivD,
+ [(set F8RC:$FRT, (fdiv F8RC:$FRA, F8RC:$FRB))]>;
+def FDIVS : AForm_2<59, 18,
+ (ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRB),
+ "fdivs $FRT, $FRA, $FRB", FPDivS,
+ [(set F4RC:$FRT, (fdiv F4RC:$FRA, F4RC:$FRB))]>;
+def FMUL : AForm_3<63, 25,
+ (ops F8RC:$FRT, F8RC:$FRA, F8RC:$FRB),
+ "fmul $FRT, $FRA, $FRB", FPFused,
+ [(set F8RC:$FRT, (fmul F8RC:$FRA, F8RC:$FRB))]>;
+def FMULS : AForm_3<59, 25,
+ (ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRB),
+ "fmuls $FRT, $FRA, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRB))]>;
+def FSUB : AForm_2<63, 20,
+ (ops F8RC:$FRT, F8RC:$FRA, F8RC:$FRB),
+ "fsub $FRT, $FRA, $FRB", FPGeneral,
+ [(set F8RC:$FRT, (fsub F8RC:$FRA, F8RC:$FRB))]>;
+def FSUBS : AForm_2<59, 20,
+ (ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRB),
+ "fsubs $FRT, $FRA, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fsub F4RC:$FRA, F4RC:$FRB))]>;
+}
+
+let PPC970_Unit = 1 in { // FXU Operations.
+// M-Form instructions. rotate and mask instructions.
+//
+let isCommutable = 1 in {
+// RLWIMI can be commuted if the rotate amount is zero.
+def RLWIMI : MForm_2<20,
+ (ops GPRC:$rA, GPRC:$rSi, GPRC:$rS, u5imm:$SH, u5imm:$MB,
+ u5imm:$ME), "rlwimi $rA, $rS, $SH, $MB, $ME", IntRotate,
+ []>, PPC970_DGroup_Cracked, RegConstraint<"$rSi = $rA">,
+ NoEncode<"$rSi">;
+}
+def RLWINM : MForm_2<21,
+ (ops GPRC:$rA, GPRC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+ "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral,
+ []>;
+def RLWINMo : MForm_2<21,
+ (ops GPRC:$rA, GPRC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+ "rlwinm. $rA, $rS, $SH, $MB, $ME", IntGeneral,
+ []>, isDOT, PPC970_DGroup_Cracked;
+def RLWNM : MForm_2<23,
+ (ops GPRC:$rA, GPRC:$rS, GPRC:$rB, u5imm:$MB, u5imm:$ME),
+ "rlwnm $rA, $rS, $rB, $MB, $ME", IntGeneral,
+ []>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// DWARF Pseudo Instructions
+//
+
+def DWARF_LOC : Pseudo<(ops i32imm:$line, i32imm:$col, i32imm:$file),
+ "${:comment} .loc $file, $line, $col",
+ [(dwarf_loc (i32 imm:$line), (i32 imm:$col),
+ (i32 imm:$file))]>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instruction Patterns
+//
+
+// Arbitrary immediate support. Implement in terms of LIS/ORI.
+def : Pat<(i32 imm:$imm),
+ (ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))>;
+
+// Implement the 'not' operation with the NOR instruction.
+def NOT : Pat<(not GPRC:$in),
+ (NOR GPRC:$in, GPRC:$in)>;
+
+// ADD an arbitrary immediate.
+def : Pat<(add GPRC:$in, imm:$imm),
+ (ADDIS (ADDI GPRC:$in, (LO16 imm:$imm)), (HA16 imm:$imm))>;
+// OR an arbitrary immediate.
+def : Pat<(or GPRC:$in, imm:$imm),
+ (ORIS (ORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
+// XOR an arbitrary immediate.
+def : Pat<(xor GPRC:$in, imm:$imm),
+ (XORIS (XORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
+// SUBFIC
+def : Pat<(sub immSExt16:$imm, GPRC:$in),
+ (SUBFIC GPRC:$in, imm:$imm)>;
+
+// SHL/SRL
+def : Pat<(shl GPRC:$in, (i32 imm:$imm)),
+ (RLWINM GPRC:$in, imm:$imm, 0, (SHL32 imm:$imm))>;
+def : Pat<(srl GPRC:$in, (i32 imm:$imm)),
+ (RLWINM GPRC:$in, (SRL32 imm:$imm), imm:$imm, 31)>;
+
+// ROTL
+def : Pat<(rotl GPRC:$in, GPRC:$sh),
+ (RLWNM GPRC:$in, GPRC:$sh, 0, 31)>;
+def : Pat<(rotl GPRC:$in, (i32 imm:$imm)),
+ (RLWINM GPRC:$in, imm:$imm, 0, 31)>;
+
+// RLWNM
+def : Pat<(and (rotl GPRC:$in, GPRC:$sh), maskimm32:$imm),
+ (RLWNM GPRC:$in, GPRC:$sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>;
+
+// Calls
+def : Pat<(PPCcall_Macho (i32 tglobaladdr:$dst)),
+ (BL_Macho tglobaladdr:$dst)>;
+def : Pat<(PPCcall_Macho (i32 texternalsym:$dst)),
+ (BL_Macho texternalsym:$dst)>;
+def : Pat<(PPCcall_ELF (i32 tglobaladdr:$dst)),
+ (BL_ELF tglobaladdr:$dst)>;
+def : Pat<(PPCcall_ELF (i32 texternalsym:$dst)),
+ (BL_ELF texternalsym:$dst)>;
+
+// Hi and Lo for Darwin Global Addresses.
+def : Pat<(PPChi tglobaladdr:$in, 0), (LIS tglobaladdr:$in)>;
+def : Pat<(PPClo tglobaladdr:$in, 0), (LI tglobaladdr:$in)>;
+def : Pat<(PPChi tconstpool:$in, 0), (LIS tconstpool:$in)>;
+def : Pat<(PPClo tconstpool:$in, 0), (LI tconstpool:$in)>;
+def : Pat<(PPChi tjumptable:$in, 0), (LIS tjumptable:$in)>;
+def : Pat<(PPClo tjumptable:$in, 0), (LI tjumptable:$in)>;
+def : Pat<(add GPRC:$in, (PPChi tglobaladdr:$g, 0)),
+ (ADDIS GPRC:$in, tglobaladdr:$g)>;
+def : Pat<(add GPRC:$in, (PPChi tconstpool:$g, 0)),
+ (ADDIS GPRC:$in, tconstpool:$g)>;
+def : Pat<(add GPRC:$in, (PPChi tjumptable:$g, 0)),
+ (ADDIS GPRC:$in, tjumptable:$g)>;
+
+// Fused negative multiply subtract, alternate pattern
+def : Pat<(fsub F8RC:$B, (fmul F8RC:$A, F8RC:$C)),
+ (FNMSUB F8RC:$A, F8RC:$C, F8RC:$B)>,
+ Requires<[FPContractions]>;
+def : Pat<(fsub F4RC:$B, (fmul F4RC:$A, F4RC:$C)),
+ (FNMSUBS F4RC:$A, F4RC:$C, F4RC:$B)>,
+ Requires<[FPContractions]>;
+
+// Standard shifts. These are represented separately from the real shifts above
+// so that we can distinguish between shifts that allow 5-bit and 6-bit shift
+// amounts.
+def : Pat<(sra GPRC:$rS, GPRC:$rB),
+ (SRAW GPRC:$rS, GPRC:$rB)>;
+def : Pat<(srl GPRC:$rS, GPRC:$rB),
+ (SRW GPRC:$rS, GPRC:$rB)>;
+def : Pat<(shl GPRC:$rS, GPRC:$rB),
+ (SLW GPRC:$rS, GPRC:$rB)>;
+
+def : Pat<(zextloadi1 iaddr:$src),
+ (LBZ iaddr:$src)>;
+def : Pat<(zextloadi1 xaddr:$src),
+ (LBZX xaddr:$src)>;
+def : Pat<(extloadi1 iaddr:$src),
+ (LBZ iaddr:$src)>;
+def : Pat<(extloadi1 xaddr:$src),
+ (LBZX xaddr:$src)>;
+def : Pat<(extloadi8 iaddr:$src),
+ (LBZ iaddr:$src)>;
+def : Pat<(extloadi8 xaddr:$src),
+ (LBZX xaddr:$src)>;
+def : Pat<(extloadi16 iaddr:$src),
+ (LHZ iaddr:$src)>;
+def : Pat<(extloadi16 xaddr:$src),
+ (LHZX xaddr:$src)>;
+def : Pat<(extloadf32 iaddr:$src),
+ (FMRSD (LFS iaddr:$src))>;
+def : Pat<(extloadf32 xaddr:$src),
+ (FMRSD (LFSX xaddr:$src))>;
+
+include "PPCInstrAltivec.td"
+include "PPCInstr64Bit.td"
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
new file mode 100644
index 0000000..acaed0b
--- /dev/null
+++ b/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -0,0 +1,429 @@
+//===-- PPCJITInfo.cpp - Implement the JIT interfaces for the PowerPC -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the 32-bit PowerPC target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "PPCJITInfo.h"
+#include "PPCRelocations.h"
+#include "PPCTargetMachine.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/Config/alloca.h"
+#include "llvm/Support/Debug.h"
+#include <set>
+using namespace llvm;
+
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+#define BUILD_ADDIS(RD,RS,IMM16) \
+ ((15 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535))
+#define BUILD_ORI(RD,RS,UIMM16) \
+ ((24 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535))
+#define BUILD_ORIS(RD,RS,UIMM16) \
+ ((25 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535))
+#define BUILD_RLDICR(RD,RS,SH,ME) \
+ ((30 << 26) | ((RS) << 21) | ((RD) << 16) | (((SH) & 31) << 11) | \
+ (((ME) & 63) << 6) | (1 << 2) | ((((SH) >> 5) & 1) << 1))
+#define BUILD_MTSPR(RS,SPR) \
+ ((31 << 26) | ((RS) << 21) | ((SPR) << 16) | (467 << 1))
+#define BUILD_BCCTRx(BO,BI,LINK) \
+ ((19 << 26) | ((BO) << 21) | ((BI) << 16) | (528 << 1) | ((LINK) & 1))
+#define BUILD_B(TARGET, LINK) \
+ ((18 << 26) | (((TARGET) & 0x00FFFFFF) << 2) | ((LINK) & 1))
+
+// Pseudo-ops
+#define BUILD_LIS(RD,IMM16) BUILD_ADDIS(RD,0,IMM16)
+#define BUILD_SLDI(RD,RS,IMM6) BUILD_RLDICR(RD,RS,IMM6,63-IMM6)
+#define BUILD_MTCTR(RS) BUILD_MTSPR(RS,9)
+#define BUILD_BCTR(LINK) BUILD_BCCTRx(20,0,LINK)
+
+static void EmitBranchToAt(uint64_t At, uint64_t To, bool isCall, bool is64Bit){
+ intptr_t Offset = ((intptr_t)To - (intptr_t)At) >> 2;
+ unsigned *AtI = (unsigned*)(intptr_t)At;
+
+ if (Offset >= -(1 << 23) && Offset < (1 << 23)) { // In range?
+ AtI[0] = BUILD_B(Offset, isCall); // b/bl target
+ } else if (!is64Bit) {
+ AtI[0] = BUILD_LIS(12, To >> 16); // lis r12, hi16(address)
+ AtI[1] = BUILD_ORI(12, 12, To); // ori r12, r12, lo16(address)
+ AtI[2] = BUILD_MTCTR(12); // mtctr r12
+ AtI[3] = BUILD_BCTR(isCall); // bctr/bctrl
+ } else {
+ AtI[0] = BUILD_LIS(12, To >> 48); // lis r12, hi16(address)
+ AtI[1] = BUILD_ORI(12, 12, To >> 32); // ori r12, r12, lo16(address)
+ AtI[2] = BUILD_SLDI(12, 12, 32); // sldi r12, r12, 32
+ AtI[3] = BUILD_ORIS(12, 12, To >> 16); // oris r12, r12, hi16(address)
+ AtI[4] = BUILD_ORI(12, 12, To); // ori r12, r12, lo16(address)
+ AtI[5] = BUILD_MTCTR(12); // mtctr r12
+ AtI[6] = BUILD_BCTR(isCall); // bctr/bctrl
+ }
+}
+
+extern "C" void PPC32CompilationCallback();
+extern "C" void PPC64CompilationCallback();
+
+#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
+ !defined(__ppc64__)
+// CompilationCallback stub - We can't use a C function with inline assembly in
+// it, because we the prolog/epilog inserted by GCC won't work for us. Instead,
+// write our own wrapper, which does things our way, so we have complete control
+// over register saving and restoring.
+asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl _PPC32CompilationCallback\n"
+"_PPC32CompilationCallback:\n"
+ // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the
+ // FIXME: need to save v[0-19] for altivec?
+ // FIXME: could shrink frame
+ // Set up a proper stack frame
+ // FIXME Layout
+ // PowerPC64 ABI linkage - 24 bytes
+ // parameters - 32 bytes
+ // 13 double registers - 104 bytes
+ // 8 int registers - 32 bytes
+ "mflr r0\n"
+ "stw r0, 8(r1)\n"
+ "stwu r1, -208(r1)\n"
+ // Save all int arg registers
+ "stw r10, 204(r1)\n" "stw r9, 200(r1)\n"
+ "stw r8, 196(r1)\n" "stw r7, 192(r1)\n"
+ "stw r6, 188(r1)\n" "stw r5, 184(r1)\n"
+ "stw r4, 180(r1)\n" "stw r3, 176(r1)\n"
+ // Save all call-clobbered FP regs.
+ "stfd f13, 168(r1)\n" "stfd f12, 160(r1)\n"
+ "stfd f11, 152(r1)\n" "stfd f10, 144(r1)\n"
+ "stfd f9, 136(r1)\n" "stfd f8, 128(r1)\n"
+ "stfd f7, 120(r1)\n" "stfd f6, 112(r1)\n"
+ "stfd f5, 104(r1)\n" "stfd f4, 96(r1)\n"
+ "stfd f3, 88(r1)\n" "stfd f2, 80(r1)\n"
+ "stfd f1, 72(r1)\n"
+ // Arguments to Compilation Callback:
+ // r3 - our lr (address of the call instruction in stub plus 4)
+ // r4 - stub's lr (address of instruction that called the stub plus 4)
+ // r5 - is64Bit - always 0.
+ "mr r3, r0\n"
+ "lwz r2, 208(r1)\n" // stub's frame
+ "lwz r4, 8(r2)\n" // stub's lr
+ "li r5, 0\n" // 0 == 32 bit
+ "bl _PPCCompilationCallbackC\n"
+ "mtctr r3\n"
+ // Restore all int arg registers
+ "lwz r10, 204(r1)\n" "lwz r9, 200(r1)\n"
+ "lwz r8, 196(r1)\n" "lwz r7, 192(r1)\n"
+ "lwz r6, 188(r1)\n" "lwz r5, 184(r1)\n"
+ "lwz r4, 180(r1)\n" "lwz r3, 176(r1)\n"
+ // Restore all FP arg registers
+ "lfd f13, 168(r1)\n" "lfd f12, 160(r1)\n"
+ "lfd f11, 152(r1)\n" "lfd f10, 144(r1)\n"
+ "lfd f9, 136(r1)\n" "lfd f8, 128(r1)\n"
+ "lfd f7, 120(r1)\n" "lfd f6, 112(r1)\n"
+ "lfd f5, 104(r1)\n" "lfd f4, 96(r1)\n"
+ "lfd f3, 88(r1)\n" "lfd f2, 80(r1)\n"
+ "lfd f1, 72(r1)\n"
+ // Pop 3 frames off the stack and branch to target
+ "lwz r1, 208(r1)\n"
+ "lwz r2, 8(r1)\n"
+ "mtlr r2\n"
+ "bctr\n"
+ );
+
+#elif defined(__PPC__) && !defined(__ppc64__)
+// Linux/PPC support
+
+// CompilationCallback stub - We can't use a C function with inline assembly in
+// it, because we the prolog/epilog inserted by GCC won't work for us. Instead,
+// write our own wrapper, which does things our way, so we have complete control
+// over register saving and restoring.
+asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl PPC32CompilationCallback\n"
+"PPC32CompilationCallback:\n"
+ // Make space for 8 ints r[3-10] and 8 doubles f[1-8] and the
+ // FIXME: need to save v[0-19] for altivec?
+ // FIXME: could shrink frame
+ // Set up a proper stack frame
+ // FIXME Layout
+ // 8 double registers - 64 bytes
+ // 8 int registers - 32 bytes
+ "mflr 0\n"
+ "stw 0, 4(1)\n"
+ "stwu 1, -104(1)\n"
+ // Save all int arg registers
+ "stw 10, 100(1)\n" "stw 9, 96(1)\n"
+ "stw 8, 92(1)\n" "stw 7, 88(1)\n"
+ "stw 6, 84(1)\n" "stw 5, 80(1)\n"
+ "stw 4, 76(1)\n" "stw 3, 72(1)\n"
+ // Save all call-clobbered FP regs.
+ "stfd 8, 64(1)\n"
+ "stfd 7, 56(1)\n" "stfd 6, 48(1)\n"
+ "stfd 5, 40(1)\n" "stfd 4, 32(1)\n"
+ "stfd 3, 24(1)\n" "stfd 2, 16(1)\n"
+ "stfd 1, 8(1)\n"
+ // Arguments to Compilation Callback:
+ // r3 - our lr (address of the call instruction in stub plus 4)
+ // r4 - stub's lr (address of instruction that called the stub plus 4)
+ // r5 - is64Bit - always 0.
+ "mr 3, 0\n"
+ "lwz 5, 104(1)\n" // stub's frame
+ "lwz 4, 4(5)\n" // stub's lr
+ "li 5, 0\n" // 0 == 32 bit
+ "bl PPCCompilationCallbackC\n"
+ "mtctr 3\n"
+ // Restore all int arg registers
+ "lwz 10, 100(1)\n" "lwz 9, 96(1)\n"
+ "lwz 8, 92(1)\n" "lwz 7, 88(1)\n"
+ "lwz 6, 84(1)\n" "lwz 5, 80(1)\n"
+ "lwz 4, 76(1)\n" "lwz 3, 72(1)\n"
+ // Restore all FP arg registers
+ "lfd 8, 64(1)\n"
+ "lfd 7, 56(1)\n" "lfd 6, 48(1)\n"
+ "lfd 5, 40(1)\n" "lfd 4, 32(1)\n"
+ "lfd 3, 24(1)\n" "lfd 2, 16(1)\n"
+ "lfd 1, 8(1)\n"
+ // Pop 3 frames off the stack and branch to target
+ "lwz 1, 104(1)\n"
+ "lwz 0, 4(1)\n"
+ "mtlr 0\n"
+ "bctr\n"
+ );
+#else
+void PPC32CompilationCallback() {
+ assert(0 && "This is not a power pc, you can't execute this!");
+ abort();
+}
+#endif
+
+#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
+ defined(__ppc64__)
+asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl _PPC64CompilationCallback\n"
+"_PPC64CompilationCallback:\n"
+ // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the
+ // FIXME: need to save v[0-19] for altivec?
+ // Set up a proper stack frame
+ // Layout
+ // PowerPC64 ABI linkage - 48 bytes
+ // parameters - 64 bytes
+ // 13 double registers - 104 bytes
+ // 8 int registers - 64 bytes
+ "mflr r0\n"
+ "std r0, 16(r1)\n"
+ "stdu r1, -280(r1)\n"
+ // Save all int arg registers
+ "std r10, 272(r1)\n" "std r9, 264(r1)\n"
+ "std r8, 256(r1)\n" "std r7, 248(r1)\n"
+ "std r6, 240(r1)\n" "std r5, 232(r1)\n"
+ "std r4, 224(r1)\n" "std r3, 216(r1)\n"
+ // Save all call-clobbered FP regs.
+ "stfd f13, 208(r1)\n" "stfd f12, 200(r1)\n"
+ "stfd f11, 192(r1)\n" "stfd f10, 184(r1)\n"
+ "stfd f9, 176(r1)\n" "stfd f8, 168(r1)\n"
+ "stfd f7, 160(r1)\n" "stfd f6, 152(r1)\n"
+ "stfd f5, 144(r1)\n" "stfd f4, 136(r1)\n"
+ "stfd f3, 128(r1)\n" "stfd f2, 120(r1)\n"
+ "stfd f1, 112(r1)\n"
+ // Arguments to Compilation Callback:
+ // r3 - our lr (address of the call instruction in stub plus 4)
+ // r4 - stub's lr (address of instruction that called the stub plus 4)
+ // r5 - is64Bit - always 1.
+ "mr r3, r0\n"
+ "ld r2, 280(r1)\n" // stub's frame
+ "ld r4, 16(r2)\n" // stub's lr
+ "li r5, 1\n" // 1 == 64 bit
+ "bl _PPCCompilationCallbackC\n"
+ "mtctr r3\n"
+ // Restore all int arg registers
+ "ld r10, 272(r1)\n" "ld r9, 264(r1)\n"
+ "ld r8, 256(r1)\n" "ld r7, 248(r1)\n"
+ "ld r6, 240(r1)\n" "ld r5, 232(r1)\n"
+ "ld r4, 224(r1)\n" "ld r3, 216(r1)\n"
+ // Restore all FP arg registers
+ "lfd f13, 208(r1)\n" "lfd f12, 200(r1)\n"
+ "lfd f11, 192(r1)\n" "lfd f10, 184(r1)\n"
+ "lfd f9, 176(r1)\n" "lfd f8, 168(r1)\n"
+ "lfd f7, 160(r1)\n" "lfd f6, 152(r1)\n"
+ "lfd f5, 144(r1)\n" "lfd f4, 136(r1)\n"
+ "lfd f3, 128(r1)\n" "lfd f2, 120(r1)\n"
+ "lfd f1, 112(r1)\n"
+ // Pop 3 frames off the stack and branch to target
+ "ld r1, 280(r1)\n"
+ "ld r2, 16(r1)\n"
+ "mtlr r2\n"
+ "bctr\n"
+ );
+#else
+void PPC64CompilationCallback() {
+ assert(0 && "This is not a power pc, you can't execute this!");
+ abort();
+}
+#endif
+
+extern "C" void *PPCCompilationCallbackC(unsigned *StubCallAddrPlus4,
+ unsigned *OrigCallAddrPlus4,
+ bool is64Bit) {
+ // Adjust the pointer to the address of the call instruction in the stub
+ // emitted by emitFunctionStub, rather than the instruction after it.
+ unsigned *StubCallAddr = StubCallAddrPlus4 - 1;
+ unsigned *OrigCallAddr = OrigCallAddrPlus4 - 1;
+
+ void *Target = JITCompilerFunction(StubCallAddr);
+
+ // Check to see if *OrigCallAddr is a 'bl' instruction, and if we can rewrite
+ // it to branch directly to the destination. If so, rewrite it so it does not
+ // need to go through the stub anymore.
+ unsigned OrigCallInst = *OrigCallAddr;
+ if ((OrigCallInst >> 26) == 18) { // Direct call.
+ intptr_t Offset = ((intptr_t)Target - (intptr_t)OrigCallAddr) >> 2;
+
+ if (Offset >= -(1 << 23) && Offset < (1 << 23)) { // In range?
+ // Clear the original target out.
+ OrigCallInst &= (63 << 26) | 3;
+ // Fill in the new target.
+ OrigCallInst |= (Offset & ((1 << 24)-1)) << 2;
+ // Replace the call.
+ *OrigCallAddr = OrigCallInst;
+ }
+ }
+
+ // Assert that we are coming from a stub that was created with our
+ // emitFunctionStub.
+ if ((*StubCallAddr >> 26) == 18)
+ StubCallAddr -= 3;
+ else {
+ assert((*StubCallAddr >> 26) == 19 && "Call in stub is not indirect!");
+ StubCallAddr -= is64Bit ? 9 : 6;
+ }
+
+ // Rewrite the stub with an unconditional branch to the target, for any users
+ // who took the address of the stub.
+ EmitBranchToAt((intptr_t)StubCallAddr, (intptr_t)Target, false, is64Bit);
+
+ // Put the address of the target function to call and the address to return to
+ // after calling the target function in a place that is easy to get on the
+ // stack after we restore all regs.
+ return Target;
+}
+
+
+
+TargetJITInfo::LazyResolverFn
+PPCJITInfo::getLazyResolverFunction(JITCompilerFn Fn) {
+ JITCompilerFunction = Fn;
+ return is64Bit ? PPC64CompilationCallback : PPC32CompilationCallback;
+}
+
+void *PPCJITInfo::emitFunctionStub(void *Fn, MachineCodeEmitter &MCE) {
+ // If this is just a call to an external function, emit a branch instead of a
+ // call. The code is the same except for one bit of the last instruction.
+ if (Fn != (void*)(intptr_t)PPC32CompilationCallback &&
+ Fn != (void*)(intptr_t)PPC64CompilationCallback) {
+ MCE.startFunctionStub(7*4);
+ intptr_t Addr = (intptr_t)MCE.getCurrentPCValue();
+ MCE.emitWordBE(0);
+ MCE.emitWordBE(0);
+ MCE.emitWordBE(0);
+ MCE.emitWordBE(0);
+ MCE.emitWordBE(0);
+ MCE.emitWordBE(0);
+ MCE.emitWordBE(0);
+ EmitBranchToAt(Addr, (intptr_t)Fn, false, is64Bit);
+ return MCE.finishFunctionStub(0);
+ }
+
+ MCE.startFunctionStub(10*4);
+ if (is64Bit) {
+ MCE.emitWordBE(0xf821ffb1); // stdu r1,-80(r1)
+ MCE.emitWordBE(0x7d6802a6); // mflr r11
+ MCE.emitWordBE(0xf9610060); // std r11, 96(r1)
+ } else if (TM.getSubtargetImpl()->isMachoABI()){
+ MCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1)
+ MCE.emitWordBE(0x7d6802a6); // mflr r11
+ MCE.emitWordBE(0x91610028); // stw r11, 40(r1)
+ } else {
+ MCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1)
+ MCE.emitWordBE(0x7d6802a6); // mflr r11
+ MCE.emitWordBE(0x91610024); // stw r11, 36(r1)
+ }
+ intptr_t Addr = (intptr_t)MCE.getCurrentPCValue();
+ MCE.emitWordBE(0);
+ MCE.emitWordBE(0);
+ MCE.emitWordBE(0);
+ MCE.emitWordBE(0);
+ MCE.emitWordBE(0);
+ MCE.emitWordBE(0);
+ MCE.emitWordBE(0);
+ EmitBranchToAt(Addr, (intptr_t)Fn, true, is64Bit);
+ return MCE.finishFunctionStub(0);
+}
+
+
+void PPCJITInfo::relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase) {
+ for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+ unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4;
+ intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
+ switch ((PPC::RelocationType)MR->getRelocationType()) {
+ default: assert(0 && "Unknown relocation type!");
+ case PPC::reloc_pcrel_bx:
+ // PC-relative relocation for b and bl instructions.
+ ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;
+ assert(ResultPtr >= -(1 << 23) && ResultPtr < (1 << 23) &&
+ "Relocation out of range!");
+ *RelocPos |= (ResultPtr & ((1 << 24)-1)) << 2;
+ break;
+ case PPC::reloc_pcrel_bcx:
+ // PC-relative relocation for BLT,BLE,BEQ,BGE,BGT,BNE, or other
+ // bcx instructions.
+ ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;
+ assert(ResultPtr >= -(1 << 13) && ResultPtr < (1 << 13) &&
+ "Relocation out of range!");
+ *RelocPos |= (ResultPtr & ((1 << 14)-1)) << 2;
+ break;
+ case PPC::reloc_absolute_high: // high bits of ref -> low 16 of instr
+ case PPC::reloc_absolute_low: { // low bits of ref -> low 16 of instr
+ ResultPtr += MR->getConstantVal();
+
+ // If this is a high-part access, get the high-part.
+ if (MR->getRelocationType() == PPC::reloc_absolute_high) {
+ // If the low part will have a carry (really a borrow) from the low
+ // 16-bits into the high 16, add a bit to borrow from.
+ if (((int)ResultPtr << 16) < 0)
+ ResultPtr += 1 << 16;
+ ResultPtr >>= 16;
+ }
+
+ // Do the addition then mask, so the addition does not overflow the 16-bit
+ // immediate section of the instruction.
+ unsigned LowBits = (*RelocPos + ResultPtr) & 65535;
+ unsigned HighBits = *RelocPos & ~65535;
+ *RelocPos = LowBits | HighBits; // Slam into low 16-bits
+ break;
+ }
+ case PPC::reloc_absolute_low_ix: { // low bits of ref -> low 14 of instr
+ ResultPtr += MR->getConstantVal();
+ // Do the addition then mask, so the addition does not overflow the 16-bit
+ // immediate section of the instruction.
+ unsigned LowBits = (*RelocPos + ResultPtr) & 0xFFFC;
+ unsigned HighBits = *RelocPos & 0xFFFF0003;
+ *RelocPos = LowBits | HighBits; // Slam into low 14-bits.
+ break;
+ }
+ }
+ }
+}
+
+void PPCJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+ EmitBranchToAt((intptr_t)Old, (intptr_t)New, false, is64Bit);
+}
diff --git a/lib/Target/PowerPC/PPCJITInfo.h b/lib/Target/PowerPC/PPCJITInfo.h
new file mode 100644
index 0000000..66ee0ee
--- /dev/null
+++ b/lib/Target/PowerPC/PPCJITInfo.h
@@ -0,0 +1,46 @@
+//===- PPCJITInfo.h - PowerPC impl. of the JIT interface --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC_JITINFO_H
+#define POWERPC_JITINFO_H
+
+#include "llvm/Target/TargetJITInfo.h"
+
+namespace llvm {
+ class PPCTargetMachine;
+
+ class PPCJITInfo : public TargetJITInfo {
+ protected:
+ PPCTargetMachine &TM;
+ bool is64Bit;
+ public:
+ PPCJITInfo(PPCTargetMachine &tm, bool tmIs64Bit) : TM(tm) {
+ useGOT = 0;
+ is64Bit = tmIs64Bit;
+ }
+
+ virtual void *emitFunctionStub(void *Fn, MachineCodeEmitter &MCE);
+ virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+ virtual void relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase);
+
+ /// replaceMachineCodeForFunction - Make it so that calling the function
+ /// whose machine code is at OLD turns into a call to NEW, perhaps by
+ /// overwriting OLD with a branch to NEW. This is used for self-modifying
+ /// code.
+ ///
+ virtual void replaceMachineCodeForFunction(void *Old, void *New);
+ };
+}
+
+#endif
diff --git a/lib/Target/PowerPC/PPCMachOWriterInfo.cpp b/lib/Target/PowerPC/PPCMachOWriterInfo.cpp
new file mode 100644
index 0000000..5e2dc9e
--- /dev/null
+++ b/lib/Target/PowerPC/PPCMachOWriterInfo.cpp
@@ -0,0 +1,150 @@
+//===-- PPCMachOWriterInfo.cpp - Mach-O Writer Info for the PowerPC -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Bill Wendling and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements Mach-O writer information for the PowerPC backend.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCMachOWriterInfo.h"
+#include "PPCRelocations.h"
+#include "PPCTargetMachine.h"
+#include "llvm/CodeGen/MachORelocation.h"
+#include "llvm/Support/OutputBuffer.h"
+using namespace llvm;
+
+PPCMachOWriterInfo::PPCMachOWriterInfo(const PPCTargetMachine &TM)
+ : TargetMachOWriterInfo(TM.getTargetData()->getPointerSizeInBits() == 64 ?
+ HDR_CPU_TYPE_POWERPC64 :
+ HDR_CPU_TYPE_POWERPC,
+ HDR_CPU_SUBTYPE_POWERPC_ALL) {}
+PPCMachOWriterInfo::~PPCMachOWriterInfo() {}
+
+/// GetTargetRelocation - For the MachineRelocation MR, convert it to one or
+/// more PowerPC MachORelocation(s), add the new relocations to the
+/// MachOSection, and rewrite the instruction at the section offset if required
+/// by that relocation type.
+unsigned PPCMachOWriterInfo::GetTargetRelocation(MachineRelocation &MR,
+ unsigned FromIdx,
+ unsigned ToAddr,
+ unsigned ToIdx,
+ OutputBuffer &RelocOut,
+ OutputBuffer &SecOut,
+ bool Scattered,
+ bool isExtern) const {
+ unsigned NumRelocs = 0;
+ uint64_t Addr = 0;
+
+ // Get the address of whatever it is we're relocating, if possible.
+ if (!isExtern)
+ Addr = (uintptr_t)MR.getResultPointer() + ToAddr;
+
+ switch ((PPC::RelocationType)MR.getRelocationType()) {
+ default: assert(0 && "Unknown PPC relocation type!");
+ case PPC::reloc_absolute_low_ix:
+ assert(0 && "Unhandled PPC relocation type!");
+ break;
+ case PPC::reloc_vanilla:
+ {
+ // FIXME: need to handle 64 bit vanilla relocs
+ MachORelocation VANILLA(MR.getMachineCodeOffset(), ToIdx,
+ false, 2, isExtern,
+ PPC_RELOC_VANILLA,
+ Scattered, (intptr_t)MR.getResultPointer());
+ ++NumRelocs;
+
+ if (Scattered) {
+ RelocOut.outword(VANILLA.getPackedFields());
+ RelocOut.outword(VANILLA.getAddress());
+ } else {
+ RelocOut.outword(VANILLA.getAddress());
+ RelocOut.outword(VANILLA.getPackedFields());
+ }
+
+ intptr_t SymbolOffset;
+
+ if (Scattered)
+ SymbolOffset = Addr + MR.getConstantVal();
+ else
+ SymbolOffset = Addr;
+
+ printf("vanilla fixup: sec_%x[%x] = %x\n", FromIdx,
+ unsigned(MR.getMachineCodeOffset()),
+ unsigned(SymbolOffset));
+ SecOut.fixword(SymbolOffset, MR.getMachineCodeOffset());
+ }
+ break;
+ case PPC::reloc_pcrel_bx:
+ {
+ // FIXME: Presumably someday we will need to branch to other, non-extern
+ // functions too. Need to figure out some way to distinguish between
+ // target is BB and target is function.
+ if (isExtern) {
+ MachORelocation BR24(MR.getMachineCodeOffset(), ToIdx, true, 2,
+ isExtern, PPC_RELOC_BR24, Scattered,
+ (intptr_t)MR.getMachineCodeOffset());
+ RelocOut.outword(BR24.getAddress());
+ RelocOut.outword(BR24.getPackedFields());
+ ++NumRelocs;
+ }
+
+ Addr -= MR.getMachineCodeOffset();
+ Addr >>= 2;
+ Addr &= 0xFFFFFF;
+ Addr <<= 2;
+ Addr |= (SecOut[MR.getMachineCodeOffset()] << 24);
+ Addr |= (SecOut[MR.getMachineCodeOffset()+3] & 0x3);
+ SecOut.fixword(Addr, MR.getMachineCodeOffset());
+ break;
+ }
+ case PPC::reloc_pcrel_bcx:
+ {
+ Addr -= MR.getMachineCodeOffset();
+ Addr &= 0xFFFC;
+
+ SecOut.fixhalf(Addr, MR.getMachineCodeOffset() + 2);
+ break;
+ }
+ case PPC::reloc_absolute_high:
+ {
+ MachORelocation HA16(MR.getMachineCodeOffset(), ToIdx, false, 2,
+ isExtern, PPC_RELOC_HA16);
+ MachORelocation PAIR(Addr & 0xFFFF, 0xFFFFFF, false, 2, isExtern,
+ PPC_RELOC_PAIR);
+ NumRelocs = 2;
+
+ RelocOut.outword(HA16.getRawAddress());
+ RelocOut.outword(HA16.getPackedFields());
+ RelocOut.outword(PAIR.getRawAddress());
+ RelocOut.outword(PAIR.getPackedFields());
+
+ Addr += 0x8000;
+
+ SecOut.fixhalf(Addr >> 16, MR.getMachineCodeOffset() + 2);
+ break;
+ }
+ case PPC::reloc_absolute_low:
+ {
+ MachORelocation LO16(MR.getMachineCodeOffset(), ToIdx, false, 2,
+ isExtern, PPC_RELOC_LO16);
+ MachORelocation PAIR(Addr >> 16, 0xFFFFFF, false, 2, isExtern,
+ PPC_RELOC_PAIR);
+ NumRelocs = 2;
+
+ RelocOut.outword(LO16.getRawAddress());
+ RelocOut.outword(LO16.getPackedFields());
+ RelocOut.outword(PAIR.getRawAddress());
+ RelocOut.outword(PAIR.getPackedFields());
+
+ SecOut.fixhalf(Addr, MR.getMachineCodeOffset() + 2);
+ break;
+ }
+ }
+
+ return NumRelocs;
+}
diff --git a/lib/Target/PowerPC/PPCMachOWriterInfo.h b/lib/Target/PowerPC/PPCMachOWriterInfo.h
new file mode 100644
index 0000000..69ed9f7
--- /dev/null
+++ b/lib/Target/PowerPC/PPCMachOWriterInfo.h
@@ -0,0 +1,55 @@
+//===-- PPCMachOWriterInfo.h - Mach-O Writer Info for PowerPC ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Bill Wendling and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements Mach-O writer information for the PowerPC backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPC_MACHO_WRITER_INFO_H
+#define PPC_MACHO_WRITER_INFO_H
+
+#include "llvm/Target/TargetMachOWriterInfo.h"
+
+namespace llvm {
+
+ // Forward declarations
+ class MachineRelocation;
+ class OutputBuffer;
+ class PPCTargetMachine;
+
+ class PPCMachOWriterInfo : public TargetMachOWriterInfo {
+ public:
+ PPCMachOWriterInfo(const PPCTargetMachine &TM);
+ virtual ~PPCMachOWriterInfo();
+
+ virtual unsigned GetTargetRelocation(MachineRelocation &MR,
+ unsigned FromIdx,
+ unsigned ToAddr,
+ unsigned ToIdx,
+ OutputBuffer &RelocOut,
+ OutputBuffer &SecOut,
+ bool Scattered, bool Extern) const;
+
+ // Constants for the relocation r_type field.
+ // See <mach-o/ppc/reloc.h>
+ enum {
+ PPC_RELOC_VANILLA, // generic relocation
+ PPC_RELOC_PAIR, // the second relocation entry of a pair
+ PPC_RELOC_BR14, // 14 bit branch displacement to word address
+ PPC_RELOC_BR24, // 24 bit branch displacement to word address
+ PPC_RELOC_HI16, // a PAIR follows with the low 16 bits
+ PPC_RELOC_LO16, // a PAIR follows with the high 16 bits
+ PPC_RELOC_HA16, // a PAIR follows, which is sign extended to 32b
+ PPC_RELOC_LO14 // LO16 with low 2 bits implicitly zero
+ };
+ };
+
+} // end llvm namespace
+
+#endif // PPC_MACHO_WRITER_INFO_H
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
new file mode 100644
index 0000000..e227456
--- /dev/null
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -0,0 +1,50 @@
+//===-- PPCMachineFunctionInfo.h - Private data used for PowerPC --*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by James M. Laskey and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PowerPC specific subclass of MachineFunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPC_MACHINE_FUNCTION_INFO_H
+#define PPC_MACHINE_FUNCTION_INFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// PPCFunctionInfo - This class is derived from MachineFunction private
+/// PowerPC target-specific information for each MachineFunction.
+class PPCFunctionInfo : public MachineFunctionInfo {
+private:
+ /// FramePointerSaveIndex - Frame index of where the old frame pointer is
+ /// stored. Also used as an anchor for instructions that need to be altered
+ /// when using frame pointers (dyna_add, dyna_sub.)
+ int FramePointerSaveIndex;
+
+ /// UsesLR - Indicates whether LR is used in the current function.
+ ///
+ bool UsesLR;
+
+public:
+ PPCFunctionInfo(MachineFunction& MF)
+ : FramePointerSaveIndex(0)
+ {}
+
+ int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
+ void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
+
+ void setUsesLR(bool U) { UsesLR = U; }
+ bool usesLR() { return UsesLR; }
+
+};
+
+} // end of namespace llvm
+
+
+#endif
diff --git a/lib/Target/PowerPC/PPCPerfectShuffle.h b/lib/Target/PowerPC/PPCPerfectShuffle.h
new file mode 100644
index 0000000..d0f833e
--- /dev/null
+++ b/lib/Target/PowerPC/PPCPerfectShuffle.h
@@ -0,0 +1,6586 @@
+//===-- PPCPerfectShuffle.h - Altivec Perfect Shuffle Table ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file, which was autogenerated by llvm-PerfectShuffle, contains data
+// for the optimal way to build a perfect shuffle without using vperm.
+//
+//===----------------------------------------------------------------------===//
+
+// 31 entries have cost 0
+// 292 entries have cost 1
+// 1384 entries have cost 2
+// 3061 entries have cost 3
+// 1733 entries have cost 4
+// 60 entries have cost 5
+
+// This table is 6561*4 = 26244 bytes in size.
+static const unsigned PerfectShuffleTable[6561+1] = {
+ 202162278U, // <0,0,0,0>: Cost 1 vspltisw0 LHS
+ 1140850790U, // <0,0,0,1>: Cost 2 vmrghw <0,0,0,0>, LHS
+ 2617247181U, // <0,0,0,2>: Cost 3 vsldoi4 <0,0,0,0>, <2,0,3,0>
+ 2635163787U, // <0,0,0,3>: Cost 3 vsldoi4 <3,0,0,0>, <3,0,0,0>
+ 1543507254U, // <0,0,0,4>: Cost 2 vsldoi4 <0,0,0,0>, RHS
+ 2281701705U, // <0,0,0,5>: Cost 3 vmrglw <0,0,0,0>, <0,4,0,5>
+ 2617250133U, // <0,0,0,6>: Cost 3 vsldoi4 <0,0,0,0>, <6,0,7,0>
+ 2659054575U, // <0,0,0,7>: Cost 3 vsldoi4 <7,0,0,0>, <7,0,0,0>
+ 202162278U, // <0,0,0,u>: Cost 1 vspltisw0 LHS
+ 1141686282U, // <0,0,1,0>: Cost 2 vmrghw LHS, <0,0,1,1>
+ 67944550U, // <0,0,1,1>: Cost 1 vmrghw LHS, LHS
+ 1685241958U, // <0,0,1,2>: Cost 2 vsldoi12 <1,2,3,0>, LHS
+ 2215870716U, // <0,0,1,3>: Cost 3 vmrghw LHS, <0,3,1,0>
+ 1141727570U, // <0,0,1,4>: Cost 2 vmrghw LHS, <0,4,1,5>
+ 2215428562U, // <0,0,1,5>: Cost 3 vmrghw LHS, <0,5,6,7>
+ 2215428589U, // <0,0,1,6>: Cost 3 vmrghw LHS, <0,6,0,7>
+ 2659062768U, // <0,0,1,7>: Cost 3 vsldoi4 <7,0,0,1>, <7,0,0,1>
+ 67945117U, // <0,0,1,u>: Cost 1 vmrghw LHS, LHS
+ 2684356045U, // <0,0,2,0>: Cost 3 vsldoi8 <0,0,0,0>, <2,0,3,0>
+ 2216009830U, // <0,0,2,1>: Cost 3 vmrghw <0,2,1,2>, LHS
+ 2216009901U, // <0,0,2,2>: Cost 3 vmrghw <0,2,1,2>, <0,2,1,2>
+ 2698290853U, // <0,0,2,3>: Cost 3 vsldoi8 <2,3,0,0>, <2,3,0,0>
+ 3289751890U, // <0,0,2,4>: Cost 4 vmrghw <0,2,1,2>, <0,4,1,5>
+ 3758098275U, // <0,0,2,5>: Cost 4 vsldoi8 <0,0,0,0>, <2,5,3,1>
+ 2684356538U, // <0,0,2,6>: Cost 3 vsldoi8 <0,0,0,0>, <2,6,3,7>
+ 3758098410U, // <0,0,2,7>: Cost 4 vsldoi8 <0,0,0,0>, <2,7,0,1>
+ 2216010397U, // <0,0,2,u>: Cost 3 vmrghw <0,2,1,2>, LHS
+ 2702272651U, // <0,0,3,0>: Cost 3 vsldoi8 <3,0,0,0>, <3,0,0,0>
+ 2216656998U, // <0,0,3,1>: Cost 3 vmrghw <0,3,1,0>, LHS
+ 3844669704U, // <0,0,3,2>: Cost 4 vsldoi12 <3,2,3,0>, <0,3,2,3>
+ 2216657148U, // <0,0,3,3>: Cost 3 vmrghw <0,3,1,0>, <0,3,1,0>
+ 2684357122U, // <0,0,3,4>: Cost 3 vsldoi8 <0,0,0,0>, <3,4,5,6>
+ 3732820066U, // <0,0,3,5>: Cost 4 vsldoi4 <7,0,0,3>, <5,6,7,0>
+ 3778005624U, // <0,0,3,6>: Cost 4 vsldoi8 <3,3,0,0>, <3,6,0,7>
+ 3374713464U, // <0,0,3,7>: Cost 4 vmrglw <3,2,0,3>, <3,6,0,7>
+ 2216657565U, // <0,0,3,u>: Cost 3 vmrghw <0,3,1,0>, LHS
+ 2217361408U, // <0,0,4,0>: Cost 3 vmrghw <0,4,1,5>, <0,0,0,0>
+ 1143619686U, // <0,0,4,1>: Cost 2 vmrghw <0,4,1,5>, LHS
+ 3291103405U, // <0,0,4,2>: Cost 4 vmrghw <0,4,1,5>, <0,2,1,2>
+ 3827269988U, // <0,0,4,3>: Cost 4 vsldoi12 <0,3,1,0>, <0,4,3,5>
+ 1143619922U, // <0,0,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5>
+ 1610616118U, // <0,0,4,5>: Cost 2 vsldoi8 <0,0,0,0>, RHS
+ 3758099833U, // <0,0,4,6>: Cost 4 vsldoi8 <0,0,0,0>, <4,6,5,2>
+ 3854107016U, // <0,0,4,7>: Cost 4 vsldoi12 <4,7,5,0>, <0,4,7,5>
+ 1143620253U, // <0,0,4,u>: Cost 2 vmrghw <0,4,1,5>, LHS
+ 2284396544U, // <0,0,5,0>: Cost 3 vmrglw <0,4,0,5>, <0,0,0,0>
+ 2218025062U, // <0,0,5,1>: Cost 3 vmrghw <0,5,1,5>, LHS
+ 3758100203U, // <0,0,5,2>: Cost 4 vsldoi8 <0,0,0,0>, <5,2,1,3>
+ 3395966100U, // <0,0,5,3>: Cost 4 vmrglw <6,7,0,5>, <7,2,0,3>
+ 3804549052U, // <0,0,5,4>: Cost 4 vsldoi8 <7,7,0,0>, <5,4,6,5>
+ 2302314964U, // <0,0,5,5>: Cost 3 vmrglw <3,4,0,5>, <3,4,0,5>
+ 2785821138U, // <0,0,5,6>: Cost 3 vsldoi12 <5,6,7,0>, <0,5,6,7>
+ 3395966428U, // <0,0,5,7>: Cost 4 vmrglw <6,7,0,5>, <7,6,0,7>
+ 2787148260U, // <0,0,5,u>: Cost 3 vsldoi12 <5,u,7,0>, <0,5,u,7>
+ 2684358997U, // <0,0,6,0>: Cost 3 vsldoi8 <0,0,0,0>, <6,0,7,0>
+ 2218631270U, // <0,0,6,1>: Cost 3 vmrghw <0,6,0,7>, LHS
+ 2684359162U, // <0,0,6,2>: Cost 3 vsldoi8 <0,0,0,0>, <6,2,7,3>
+ 3758101042U, // <0,0,6,3>: Cost 4 vsldoi8 <0,0,0,0>, <6,3,4,5>
+ 3732843830U, // <0,0,6,4>: Cost 4 vsldoi4 <7,0,0,6>, RHS
+ 3758101227U, // <0,0,6,5>: Cost 4 vsldoi8 <0,0,0,0>, <6,5,7,1>
+ 2684359480U, // <0,0,6,6>: Cost 3 vsldoi8 <0,0,0,0>, <6,6,6,6>
+ 2724836173U, // <0,0,6,7>: Cost 3 vsldoi8 <6,7,0,0>, <6,7,0,0>
+ 2725499806U, // <0,0,6,u>: Cost 3 vsldoi8 <6,u,0,0>, <6,u,0,0>
+ 2726163439U, // <0,0,7,0>: Cost 3 vsldoi8 <7,0,0,0>, <7,0,0,0>
+ 2219311206U, // <0,0,7,1>: Cost 3 vmrghw <0,7,1,0>, LHS
+ 3868557900U, // <0,0,7,2>: Cost 4 vsldoi12 <7,2,3,0>, <0,7,2,3>
+ 3377400112U, // <0,0,7,3>: Cost 4 vmrglw <3,6,0,7>, <3,2,0,3>
+ 2684360038U, // <0,0,7,4>: Cost 3 vsldoi8 <0,0,0,0>, <7,4,5,6>
+ 3732852834U, // <0,0,7,5>: Cost 4 vsldoi4 <7,0,0,7>, <5,6,7,0>
+ 3871507060U, // <0,0,7,6>: Cost 4 vsldoi12 <7,6,7,0>, <0,7,6,7>
+ 2303658616U, // <0,0,7,7>: Cost 3 vmrglw <3,6,0,7>, <3,6,0,7>
+ 2726163439U, // <0,0,7,u>: Cost 3 vsldoi8 <7,0,0,0>, <7,0,0,0>
+ 202162278U, // <0,0,u,0>: Cost 1 vspltisw0 LHS
+ 72589414U, // <0,0,u,1>: Cost 1 vmrghw LHS, LHS
+ 1685242525U, // <0,0,u,2>: Cost 2 vsldoi12 <1,2,3,0>, LHS
+ 2220073212U, // <0,0,u,3>: Cost 3 vmrghw LHS, <0,3,1,0>
+ 1146331474U, // <0,0,u,4>: Cost 2 vmrghw LHS, <0,4,1,5>
+ 1610619034U, // <0,0,u,5>: Cost 2 vsldoi8 <0,0,0,0>, RHS
+ 2785821138U, // <0,0,u,6>: Cost 3 vsldoi12 <5,6,7,0>, <0,5,6,7>
+ 2659120119U, // <0,0,u,7>: Cost 3 vsldoi4 <7,0,0,u>, <7,0,0,u>
+ 72589981U, // <0,0,u,u>: Cost 1 vmrghw LHS, LHS
+ 2698297344U, // <0,1,0,0>: Cost 3 vsldoi8 <2,3,0,1>, <0,0,0,0>
+ 1624555622U, // <0,1,0,1>: Cost 2 vsldoi8 <2,3,0,1>, LHS
+ 2758984428U, // <0,1,0,2>: Cost 3 vsldoi12 <1,2,3,0>, <1,0,2,1>
+ 2635237524U, // <0,1,0,3>: Cost 3 vsldoi4 <3,0,1,0>, <3,0,1,0>
+ 2693652818U, // <0,1,0,4>: Cost 3 vsldoi8 <1,5,0,1>, <0,4,1,5>
+ 2281701714U, // <0,1,0,5>: Cost 3 vmrglw <0,0,0,0>, <0,4,1,5>
+ 2698297846U, // <0,1,0,6>: Cost 3 vsldoi8 <2,3,0,1>, <0,6,1,7>
+ 2659128312U, // <0,1,0,7>: Cost 3 vsldoi4 <7,0,1,0>, <7,0,1,0>
+ 1624556189U, // <0,1,0,u>: Cost 2 vsldoi8 <2,3,0,1>, LHS
+ 1543585802U, // <0,1,1,0>: Cost 2 vsldoi4 <0,0,1,1>, <0,0,1,1>
+ 1141728052U, // <0,1,1,1>: Cost 2 vmrghw LHS, <1,1,1,1>
+ 1141728150U, // <0,1,1,2>: Cost 2 vmrghw LHS, <1,2,3,0>
+ 2295644334U, // <0,1,1,3>: Cost 3 vmrglw <2,3,0,1>, <0,2,1,3>
+ 1543589174U, // <0,1,1,4>: Cost 2 vsldoi4 <0,0,1,1>, RHS
+ 2290999634U, // <0,1,1,5>: Cost 3 vmrglw <1,5,0,1>, <0,4,1,5>
+ 2617332135U, // <0,1,1,6>: Cost 3 vsldoi4 <0,0,1,1>, <6,1,7,1>
+ 2617332720U, // <0,1,1,7>: Cost 3 vsldoi4 <0,0,1,1>, <7,0,0,1>
+ 1142171004U, // <0,1,1,u>: Cost 2 vmrghw LHS, <1,u,3,0>
+ 1561509990U, // <0,1,2,0>: Cost 2 vsldoi4 <3,0,1,2>, LHS
+ 2623308516U, // <0,1,2,1>: Cost 3 vsldoi4 <1,0,1,2>, <1,0,1,2>
+ 2698298984U, // <0,1,2,2>: Cost 3 vsldoi8 <2,3,0,1>, <2,2,2,2>
+ 835584U, // <0,1,2,3>: Cost 0 copy LHS
+ 1561513270U, // <0,1,2,4>: Cost 2 vsldoi4 <3,0,1,2>, RHS
+ 2647199304U, // <0,1,2,5>: Cost 3 vsldoi4 <5,0,1,2>, <5,0,1,2>
+ 2698299322U, // <0,1,2,6>: Cost 3 vsldoi8 <2,3,0,1>, <2,6,3,7>
+ 1585402874U, // <0,1,2,7>: Cost 2 vsldoi4 <7,0,1,2>, <7,0,1,2>
+ 835584U, // <0,1,2,u>: Cost 0 copy LHS
+ 2698299540U, // <0,1,3,0>: Cost 3 vsldoi8 <2,3,0,1>, <3,0,1,0>
+ 3290399540U, // <0,1,3,1>: Cost 4 vmrghw <0,3,1,0>, <1,1,1,1>
+ 2698299720U, // <0,1,3,2>: Cost 3 vsldoi8 <2,3,0,1>, <3,2,3,0>
+ 2698299804U, // <0,1,3,3>: Cost 3 vsldoi8 <2,3,0,1>, <3,3,3,3>
+ 2698299906U, // <0,1,3,4>: Cost 3 vsldoi8 <2,3,0,1>, <3,4,5,6>
+ 3832726521U, // <0,1,3,5>: Cost 4 vsldoi12 <1,2,3,0>, <1,3,5,0>
+ 2724842160U, // <0,1,3,6>: Cost 3 vsldoi8 <6,7,0,1>, <3,6,7,0>
+ 2706926275U, // <0,1,3,7>: Cost 3 vsldoi8 <3,7,0,1>, <3,7,0,1>
+ 2698300190U, // <0,1,3,u>: Cost 3 vsldoi8 <2,3,0,1>, <3,u,1,2>
+ 2635268198U, // <0,1,4,0>: Cost 3 vsldoi4 <3,0,1,4>, LHS
+ 2217362228U, // <0,1,4,1>: Cost 3 vmrghw <0,4,1,5>, <1,1,1,1>
+ 2217362326U, // <0,1,4,2>: Cost 3 vmrghw <0,4,1,5>, <1,2,3,0>
+ 2635270296U, // <0,1,4,3>: Cost 3 vsldoi4 <3,0,1,4>, <3,0,1,4>
+ 2635271478U, // <0,1,4,4>: Cost 3 vsldoi4 <3,0,1,4>, RHS
+ 1624558902U, // <0,1,4,5>: Cost 2 vsldoi8 <2,3,0,1>, RHS
+ 2659160910U, // <0,1,4,6>: Cost 3 vsldoi4 <7,0,1,4>, <6,7,0,1>
+ 2659161084U, // <0,1,4,7>: Cost 3 vsldoi4 <7,0,1,4>, <7,0,1,4>
+ 1624559145U, // <0,1,4,u>: Cost 2 vsldoi8 <2,3,0,1>, RHS
+ 3832726639U, // <0,1,5,0>: Cost 4 vsldoi12 <1,2,3,0>, <1,5,0,1>
+ 2714889871U, // <0,1,5,1>: Cost 3 vsldoi8 <5,1,0,1>, <5,1,0,1>
+ 2302314646U, // <0,1,5,2>: Cost 3 vmrglw <3,4,0,5>, <3,0,1,2>
+ 3834717321U, // <0,1,5,3>: Cost 4 vsldoi12 <1,5,3,0>, <1,5,3,0>
+ 3832726679U, // <0,1,5,4>: Cost 4 vsldoi12 <1,2,3,0>, <1,5,4,5>
+ 2717544403U, // <0,1,5,5>: Cost 3 vsldoi8 <5,5,0,1>, <5,5,0,1>
+ 2718208036U, // <0,1,5,6>: Cost 3 vsldoi8 <5,6,0,1>, <5,6,0,1>
+ 3792613493U, // <0,1,5,7>: Cost 4 vsldoi8 <5,7,0,1>, <5,7,0,1>
+ 2719535302U, // <0,1,5,u>: Cost 3 vsldoi8 <5,u,0,1>, <5,u,0,1>
+ 2659172454U, // <0,1,6,0>: Cost 3 vsldoi4 <7,0,1,6>, LHS
+ 3832726735U, // <0,1,6,1>: Cost 4 vsldoi12 <1,2,3,0>, <1,6,1,7>
+ 2724844026U, // <0,1,6,2>: Cost 3 vsldoi8 <6,7,0,1>, <6,2,7,3>
+ 3775361608U, // <0,1,6,3>: Cost 4 vsldoi8 <2,u,0,1>, <6,3,7,0>
+ 2659175734U, // <0,1,6,4>: Cost 3 vsldoi4 <7,0,1,6>, RHS
+ 3832726771U, // <0,1,6,5>: Cost 4 vsldoi12 <1,2,3,0>, <1,6,5,7>
+ 2724844344U, // <0,1,6,6>: Cost 3 vsldoi8 <6,7,0,1>, <6,6,6,6>
+ 1651102542U, // <0,1,6,7>: Cost 2 vsldoi8 <6,7,0,1>, <6,7,0,1>
+ 1651766175U, // <0,1,6,u>: Cost 2 vsldoi8 <6,u,0,1>, <6,u,0,1>
+ 2724844536U, // <0,1,7,0>: Cost 3 vsldoi8 <6,7,0,1>, <7,0,1,0>
+ 3377397770U, // <0,1,7,1>: Cost 4 vmrglw <3,6,0,7>, <0,0,1,1>
+ 2698302636U, // <0,1,7,2>: Cost 3 vsldoi8 <2,3,0,1>, <7,2,3,0>
+ 2728162531U, // <0,1,7,3>: Cost 3 vsldoi8 <7,3,0,1>, <7,3,0,1>
+ 2724844902U, // <0,1,7,4>: Cost 3 vsldoi8 <6,7,0,1>, <7,4,5,6>
+ 3377398098U, // <0,1,7,5>: Cost 4 vmrglw <3,6,0,7>, <0,4,1,5>
+ 2724845076U, // <0,1,7,6>: Cost 3 vsldoi8 <6,7,0,1>, <7,6,7,0>
+ 2724845164U, // <0,1,7,7>: Cost 3 vsldoi8 <6,7,0,1>, <7,7,7,7>
+ 2724845186U, // <0,1,7,u>: Cost 3 vsldoi8 <6,7,0,1>, <7,u,1,2>
+ 1561559142U, // <0,1,u,0>: Cost 2 vsldoi4 <3,0,1,u>, LHS
+ 1146331956U, // <0,1,u,1>: Cost 2 vmrghw LHS, <1,1,1,1>
+ 1146332054U, // <0,1,u,2>: Cost 2 vmrghw LHS, <1,2,3,0>
+ 835584U, // <0,1,u,3>: Cost 0 copy LHS
+ 1561562422U, // <0,1,u,4>: Cost 2 vsldoi4 <3,0,1,u>, RHS
+ 1624561818U, // <0,1,u,5>: Cost 2 vsldoi8 <2,3,0,1>, RHS
+ 2220074191U, // <0,1,u,6>: Cost 3 vmrghw LHS, <1,6,1,7>
+ 1585452032U, // <0,1,u,7>: Cost 2 vsldoi4 <7,0,1,u>, <7,0,1,u>
+ 835584U, // <0,1,u,u>: Cost 0 copy LHS
+ 2214593997U, // <0,2,0,0>: Cost 3 vmrghw <0,0,0,0>, <2,0,3,0>
+ 2214675999U, // <0,2,0,1>: Cost 3 vmrghw <0,0,1,1>, <2,1,3,1>
+ 2214594152U, // <0,2,0,2>: Cost 3 vmrghw <0,0,0,0>, <2,2,2,2>
+ 1207959654U, // <0,2,0,3>: Cost 2 vmrglw <0,0,0,0>, LHS
+ 3709054262U, // <0,2,0,4>: Cost 4 vsldoi4 <3,0,2,0>, RHS
+ 3375350836U, // <0,2,0,5>: Cost 4 vmrglw <3,3,0,0>, <1,4,2,5>
+ 2214594490U, // <0,2,0,6>: Cost 3 vmrghw <0,0,0,0>, <2,6,3,7>
+ 3288336362U, // <0,2,0,7>: Cost 4 vmrghw <0,0,0,0>, <2,7,0,1>
+ 1207959659U, // <0,2,0,u>: Cost 2 vmrglw <0,0,0,0>, LHS
+ 2215871994U, // <0,2,1,0>: Cost 3 vmrghw LHS, <2,0,u,0>
+ 2215470623U, // <0,2,1,1>: Cost 3 vmrghw LHS, <2,1,3,1>
+ 1141728872U, // <0,2,1,2>: Cost 2 vmrghw LHS, <2,2,2,2>
+ 1141728934U, // <0,2,1,3>: Cost 2 vmrghw LHS, <2,3,0,1>
+ 2215872323U, // <0,2,1,4>: Cost 3 vmrghw LHS, <2,4,u,5>
+ 2215872405U, // <0,2,1,5>: Cost 3 vmrghw LHS, <2,5,u,6>
+ 1141729210U, // <0,2,1,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+ 2215430122U, // <0,2,1,7>: Cost 3 vmrghw LHS, <2,7,0,1>
+ 1141729368U, // <0,2,1,u>: Cost 2 vmrghw LHS, <2,u,3,3>
+ 3289736698U, // <0,2,2,0>: Cost 4 vmrghw <0,2,1,0>, <2,0,u,0>
+ 3289744927U, // <0,2,2,1>: Cost 4 vmrghw <0,2,1,1>, <2,1,3,1>
+ 2216011368U, // <0,2,2,2>: Cost 3 vmrghw <0,2,1,2>, <2,2,2,2>
+ 2216019622U, // <0,2,2,3>: Cost 3 vmrghw <0,2,1,3>, <2,3,0,1>
+ 3289769795U, // <0,2,2,4>: Cost 4 vmrghw <0,2,1,4>, <2,4,u,5>
+ 3289778069U, // <0,2,2,5>: Cost 4 vmrghw <0,2,1,5>, <2,5,u,6>
+ 2216044474U, // <0,2,2,6>: Cost 3 vmrghw <0,2,1,6>, <2,6,3,7>
+ 3732960259U, // <0,2,2,7>: Cost 4 vsldoi4 <7,0,2,2>, <7,0,2,2>
+ 2216061016U, // <0,2,2,u>: Cost 3 vmrghw <0,2,1,u>, <2,u,3,3>
+ 2758985382U, // <0,2,3,0>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,0,1>
+ 2758985392U, // <0,2,3,1>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,1,2>
+ 3290400360U, // <0,2,3,2>: Cost 4 vmrghw <0,3,1,0>, <2,2,2,2>
+ 2758985408U, // <0,2,3,3>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,3,0>
+ 2758985422U, // <0,2,3,4>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,4,5>
+ 2785822424U, // <0,2,3,5>: Cost 3 vsldoi12 <5,6,7,0>, <2,3,5,6>
+ 3290400698U, // <0,2,3,6>: Cost 4 vmrghw <0,3,1,0>, <2,6,3,7>
+ 2765915876U, // <0,2,3,7>: Cost 3 vsldoi12 <2,3,7,0>, <2,3,7,0>
+ 2758985453U, // <0,2,3,u>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,u,0>
+ 3291104762U, // <0,2,4,0>: Cost 4 vmrghw <0,4,1,5>, <2,0,u,0>
+ 2217362979U, // <0,2,4,1>: Cost 3 vmrghw <0,4,1,5>, <2,1,3,5>
+ 2217363048U, // <0,2,4,2>: Cost 3 vmrghw <0,4,1,5>, <2,2,2,2>
+ 2217363110U, // <0,2,4,3>: Cost 3 vmrghw <0,4,1,5>, <2,3,0,1>
+ 3291105087U, // <0,2,4,4>: Cost 4 vmrghw <0,4,1,5>, <2,4,u,1>
+ 3291105173U, // <0,2,4,5>: Cost 4 vmrghw <0,4,1,5>, <2,5,u,6>
+ 2217363386U, // <0,2,4,6>: Cost 3 vmrghw <0,4,1,5>, <2,6,3,7>
+ 3788639688U, // <0,2,4,7>: Cost 4 vsldoi8 <5,1,0,2>, <4,7,5,0>
+ 2217363515U, // <0,2,4,u>: Cost 3 vmrghw <0,4,1,5>, <2,u,0,1>
+ 3376054371U, // <0,2,5,0>: Cost 4 vmrglw <3,4,0,5>, <0,1,2,0>
+ 3788639888U, // <0,2,5,1>: Cost 4 vsldoi8 <5,1,0,2>, <5,1,0,2>
+ 3376055912U, // <0,2,5,2>: Cost 4 vmrglw <3,4,0,5>, <2,2,2,2>
+ 2302312550U, // <0,2,5,3>: Cost 3 vmrglw <3,4,0,5>, LHS
+ 3376054375U, // <0,2,5,4>: Cost 4 vmrglw <3,4,0,5>, <0,1,2,4>
+ 3374728244U, // <0,2,5,5>: Cost 4 vmrglw <3,2,0,5>, <1,4,2,5>
+ 3805229154U, // <0,2,5,6>: Cost 4 vsldoi8 <7,u,0,2>, <5,6,7,0>
+ 3376055512U, // <0,2,5,7>: Cost 4 vmrglw <3,4,0,5>, <1,6,2,7>
+ 2302312555U, // <0,2,5,u>: Cost 3 vmrglw <3,4,0,5>, LHS
+ 3709100134U, // <0,2,6,0>: Cost 4 vsldoi4 <3,0,2,6>, LHS
+ 3709100950U, // <0,2,6,1>: Cost 4 vsldoi4 <3,0,2,6>, <1,2,3,0>
+ 3709102010U, // <0,2,6,2>: Cost 4 vsldoi4 <3,0,2,6>, <2,6,3,7>
+ 2758985658U, // <0,2,6,3>: Cost 3 vsldoi12 <1,2,3,0>, <2,6,3,7>
+ 3709103414U, // <0,2,6,4>: Cost 4 vsldoi4 <3,0,2,6>, RHS
+ 3732992098U, // <0,2,6,5>: Cost 4 vsldoi4 <7,0,2,6>, <5,6,7,0>
+ 3292374970U, // <0,2,6,6>: Cost 4 vmrghw <0,6,0,7>, <2,6,3,7>
+ 3798594383U, // <0,2,6,7>: Cost 4 vsldoi8 <6,7,0,2>, <6,7,0,2>
+ 2758985703U, // <0,2,6,u>: Cost 3 vsldoi12 <1,2,3,0>, <2,6,u,7>
+ 3788641274U, // <0,2,7,0>: Cost 4 vsldoi8 <5,1,0,2>, <7,0,1,2>
+ 3377398508U, // <0,2,7,1>: Cost 4 vmrglw <3,6,0,7>, <1,0,2,1>
+ 3377398590U, // <0,2,7,2>: Cost 4 vmrglw <3,6,0,7>, <1,1,2,2>
+ 2303656038U, // <0,2,7,3>: Cost 3 vmrglw <3,6,0,7>, LHS
+ 3709111606U, // <0,2,7,4>: Cost 4 vsldoi4 <3,0,2,7>, RHS
+ 3377398836U, // <0,2,7,5>: Cost 4 vmrglw <3,6,0,7>, <1,4,2,5>
+ 3803903447U, // <0,2,7,6>: Cost 4 vsldoi8 <7,6,0,2>, <7,6,0,2>
+ 3293054954U, // <0,2,7,7>: Cost 4 vmrghw <0,7,1,0>, <2,7,0,1>
+ 2303656043U, // <0,2,7,u>: Cost 3 vmrglw <3,6,0,7>, LHS
+ 2220074490U, // <0,2,u,0>: Cost 3 vmrghw LHS, <2,0,u,0>
+ 2220074527U, // <0,2,u,1>: Cost 3 vmrghw LHS, <2,1,3,1>
+ 1146332776U, // <0,2,u,2>: Cost 2 vmrghw LHS, <2,2,2,2>
+ 1146332838U, // <0,2,u,3>: Cost 2 vmrghw LHS, <2,3,0,1>
+ 2220074819U, // <0,2,u,4>: Cost 3 vmrghw LHS, <2,4,u,5>
+ 2220074901U, // <0,2,u,5>: Cost 3 vmrghw LHS, <2,5,u,6>
+ 1146333114U, // <0,2,u,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+ 2220074986U, // <0,2,u,7>: Cost 3 vmrghw LHS, <2,7,0,1>
+ 1146333243U, // <0,2,u,u>: Cost 2 vmrghw LHS, <2,u,0,1>
+ 2629410816U, // <0,3,0,0>: Cost 3 vsldoi4 <2,0,3,0>, <0,0,0,0>
+ 2753530006U, // <0,3,0,1>: Cost 3 vsldoi12 <0,3,1,0>, <3,0,1,2>
+ 2629412301U, // <0,3,0,2>: Cost 3 vsldoi4 <2,0,3,0>, <2,0,3,0>
+ 2214594972U, // <0,3,0,3>: Cost 3 vmrghw <0,0,0,0>, <3,3,3,3>
+ 2758985908U, // <0,3,0,4>: Cost 3 vsldoi12 <1,2,3,0>, <3,0,4,5>
+ 3733016674U, // <0,3,0,5>: Cost 4 vsldoi4 <7,0,3,0>, <5,6,7,0>
+ 3777364488U, // <0,3,0,6>: Cost 4 vsldoi8 <3,2,0,3>, <0,6,3,7>
+ 2281703354U, // <0,3,0,7>: Cost 3 vmrglw <0,0,0,0>, <2,6,3,7>
+ 2758985941U, // <0,3,0,u>: Cost 3 vsldoi12 <1,2,3,0>, <3,0,u,2>
+ 1141729430U, // <0,3,1,0>: Cost 2 vmrghw LHS, <3,0,1,2>
+ 2215471334U, // <0,3,1,1>: Cost 3 vmrghw LHS, <3,1,1,1>
+ 2215471425U, // <0,3,1,2>: Cost 3 vmrghw LHS, <3,2,2,2>
+ 1141729692U, // <0,3,1,3>: Cost 2 vmrghw LHS, <3,3,3,3>
+ 1141729794U, // <0,3,1,4>: Cost 2 vmrghw LHS, <3,4,5,6>
+ 2215430738U, // <0,3,1,5>: Cost 3 vmrghw LHS, <3,5,5,5>
+ 2215430776U, // <0,3,1,6>: Cost 3 vmrghw LHS, <3,6,0,7>
+ 2295646138U, // <0,3,1,7>: Cost 3 vmrglw <2,3,0,1>, <2,6,3,7>
+ 1141730078U, // <0,3,1,u>: Cost 2 vmrghw LHS, <3,u,1,2>
+ 2758986032U, // <0,3,2,0>: Cost 3 vsldoi12 <1,2,3,0>, <3,2,0,3>
+ 3709141910U, // <0,3,2,1>: Cost 4 vsldoi4 <3,0,3,2>, <1,2,3,0>
+ 3289753921U, // <0,3,2,2>: Cost 4 vmrghw <0,2,1,2>, <3,2,2,2>
+ 2770929992U, // <0,3,2,3>: Cost 3 vsldoi12 <3,2,3,0>, <3,2,3,0>
+ 3289754114U, // <0,3,2,4>: Cost 4 vmrghw <0,2,1,2>, <3,4,5,6>
+ 3362095460U, // <0,3,2,5>: Cost 5 vmrglw <1,1,0,2>, <0,4,3,5>
+ 3832727910U, // <0,3,2,6>: Cost 4 vsldoi12 <1,2,3,0>, <3,2,6,3>
+ 3365414842U, // <0,3,2,7>: Cost 4 vmrglw <1,6,0,2>, <2,6,3,7>
+ 2771298677U, // <0,3,2,u>: Cost 3 vsldoi12 <3,2,u,0>, <3,2,u,0>
+ 2216659094U, // <0,3,3,0>: Cost 3 vmrghw <0,3,1,0>, <3,0,1,2>
+ 3290409190U, // <0,3,3,1>: Cost 4 vmrghw <0,3,1,1>, <3,1,1,1>
+ 2703624496U, // <0,3,3,2>: Cost 3 vsldoi8 <3,2,0,3>, <3,2,0,3>
+ 2216683932U, // <0,3,3,3>: Cost 3 vmrghw <0,3,1,3>, <3,3,3,3>
+ 2216692226U, // <0,3,3,4>: Cost 3 vmrghw <0,3,1,4>, <3,4,5,6>
+ 3733041250U, // <0,3,3,5>: Cost 4 vsldoi4 <7,0,3,3>, <5,6,7,0>
+ 3832727988U, // <0,3,3,6>: Cost 4 vsldoi12 <1,2,3,0>, <3,3,6,0>
+ 3374712762U, // <0,3,3,7>: Cost 4 vmrglw <3,2,0,3>, <2,6,3,7>
+ 2216725278U, // <0,3,3,u>: Cost 3 vmrghw <0,3,1,u>, <3,u,1,2>
+ 2217363606U, // <0,3,4,0>: Cost 3 vmrghw <0,4,1,5>, <3,0,1,2>
+ 3291105510U, // <0,3,4,1>: Cost 4 vmrghw <0,4,1,5>, <3,1,1,1>
+ 3291105601U, // <0,3,4,2>: Cost 4 vmrghw <0,4,1,5>, <3,2,2,2>
+ 2217363868U, // <0,3,4,3>: Cost 3 vmrghw <0,4,1,5>, <3,3,3,3>
+ 2217363970U, // <0,3,4,4>: Cost 3 vmrghw <0,4,1,5>, <3,4,5,6>
+ 2758986242U, // <0,3,4,5>: Cost 3 vsldoi12 <1,2,3,0>, <3,4,5,6>
+ 3727077685U, // <0,3,4,6>: Cost 4 vsldoi4 <6,0,3,4>, <6,0,3,4>
+ 3364767674U, // <0,3,4,7>: Cost 4 vmrglw <1,5,0,4>, <2,6,3,7>
+ 2217364254U, // <0,3,4,u>: Cost 3 vmrghw <0,4,1,5>, <3,u,1,2>
+ 3832728102U, // <0,3,5,0>: Cost 4 vsldoi12 <1,2,3,0>, <3,5,0,6>
+ 3405916003U, // <0,3,5,1>: Cost 4 vmrglw <u,4,0,5>, <2,5,3,1>
+ 3376055840U, // <0,3,5,2>: Cost 4 vmrglw <3,4,0,5>, <2,1,3,2>
+ 3376055679U, // <0,3,5,3>: Cost 4 vmrglw <3,4,0,5>, <1,u,3,3>
+ 3376055194U, // <0,3,5,4>: Cost 4 vmrglw <3,4,0,5>, <1,2,3,4>
+ 3859565138U, // <0,3,5,5>: Cost 4 vsldoi12 <5,6,7,0>, <3,5,5,5>
+ 2727514210U, // <0,3,5,6>: Cost 3 vsldoi8 <7,2,0,3>, <5,6,7,0>
+ 3376056250U, // <0,3,5,7>: Cost 4 vmrglw <3,4,0,5>, <2,6,3,7>
+ 2727514210U, // <0,3,5,u>: Cost 3 vsldoi8 <7,2,0,3>, <5,6,7,0>
+ 2758986360U, // <0,3,6,0>: Cost 3 vsldoi12 <1,2,3,0>, <3,6,0,7>
+ 3709174678U, // <0,3,6,1>: Cost 4 vsldoi4 <3,0,3,6>, <1,2,3,0>
+ 3795284411U, // <0,3,6,2>: Cost 4 vsldoi8 <6,2,0,3>, <6,2,0,3>
+ 3709175980U, // <0,3,6,3>: Cost 4 vsldoi4 <3,0,3,6>, <3,0,3,6>
+ 3833096860U, // <0,3,6,4>: Cost 4 vsldoi12 <1,2,u,0>, <3,6,4,7>
+ 3376728235U, // <0,3,6,5>: Cost 5 vmrglw <3,5,0,6>, <3,0,3,5>
+ 3859565229U, // <0,3,6,6>: Cost 4 vsldoi12 <5,6,7,0>, <3,6,6,6>
+ 2773879472U, // <0,3,6,7>: Cost 3 vsldoi12 <3,6,7,0>, <3,6,7,0>
+ 2758986360U, // <0,3,6,u>: Cost 3 vsldoi12 <1,2,3,0>, <3,6,0,7>
+ 2303656854U, // <0,3,7,0>: Cost 3 vmrglw <3,6,0,7>, <1,2,3,0>
+ 3807229018U, // <0,3,7,1>: Cost 4 vsldoi8 <u,2,0,3>, <7,1,2,u>
+ 2727515284U, // <0,3,7,2>: Cost 3 vsldoi8 <7,2,0,3>, <7,2,0,3>
+ 3377399410U, // <0,3,7,3>: Cost 4 vmrglw <3,6,0,7>, <2,2,3,3>
+ 3377398682U, // <0,3,7,4>: Cost 4 vmrglw <3,6,0,7>, <1,2,3,4>
+ 3801257409U, // <0,3,7,5>: Cost 4 vsldoi8 <7,2,0,3>, <7,5,6,7>
+ 3377399980U, // <0,3,7,6>: Cost 4 vmrglw <3,6,0,7>, <3,0,3,6>
+ 3375409082U, // <0,3,7,7>: Cost 4 vmrglw <3,3,0,7>, <2,6,3,7>
+ 2731497082U, // <0,3,7,u>: Cost 3 vsldoi8 <7,u,0,3>, <7,u,0,3>
+ 1146333334U, // <0,3,u,0>: Cost 2 vmrghw LHS, <3,0,1,2>
+ 2220075238U, // <0,3,u,1>: Cost 3 vmrghw LHS, <3,1,1,1>
+ 2220075329U, // <0,3,u,2>: Cost 3 vmrghw LHS, <3,2,2,2>
+ 1146333596U, // <0,3,u,3>: Cost 2 vmrghw LHS, <3,3,3,3>
+ 1146333698U, // <0,3,u,4>: Cost 2 vmrghw LHS, <3,4,5,6>
+ 2758986566U, // <0,3,u,5>: Cost 3 vsldoi12 <1,2,3,0>, <3,u,5,6>
+ 2803739472U, // <0,3,u,6>: Cost 3 vsldoi12 <u,6,7,0>, <3,u,6,7>
+ 2295703482U, // <0,3,u,7>: Cost 3 vmrglw <2,3,0,u>, <2,6,3,7>
+ 1146333982U, // <0,3,u,u>: Cost 2 vmrghw LHS, <3,u,1,2>
+ 2214595473U, // <0,4,0,0>: Cost 3 vmrghw <0,0,0,0>, <4,0,5,0>
+ 2693677158U, // <0,4,0,1>: Cost 3 vsldoi8 <1,5,0,4>, LHS
+ 3839437689U, // <0,4,0,2>: Cost 4 vsldoi12 <2,3,4,0>, <4,0,2,3>
+ 3709200559U, // <0,4,0,3>: Cost 4 vsldoi4 <3,0,4,0>, <3,0,4,0>
+ 2693677394U, // <0,4,0,4>: Cost 3 vsldoi8 <1,5,0,4>, <0,4,1,5>
+ 1140854070U, // <0,4,0,5>: Cost 2 vmrghw <0,0,0,0>, RHS
+ 3767419409U, // <0,4,0,6>: Cost 4 vsldoi8 <1,5,0,4>, <0,6,4,7>
+ 3854109604U, // <0,4,0,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,0,7,1>
+ 1140854313U, // <0,4,0,u>: Cost 2 vmrghw <0,0,0,0>, RHS
+ 1141689234U, // <0,4,1,0>: Cost 2 vmrghw LHS, <4,0,5,1>
+ 2215431114U, // <0,4,1,1>: Cost 3 vmrghw LHS, <4,1,2,3>
+ 2215431221U, // <0,4,1,2>: Cost 3 vmrghw LHS, <4,2,5,2>
+ 2635466928U, // <0,4,1,3>: Cost 3 vsldoi4 <3,0,4,1>, <3,0,4,1>
+ 1141689552U, // <0,4,1,4>: Cost 2 vmrghw LHS, <4,4,4,4>
+ 67947830U, // <0,4,1,5>: Cost 1 vmrghw LHS, RHS
+ 2215431545U, // <0,4,1,6>: Cost 3 vmrghw LHS, <4,6,5,2>
+ 2659357716U, // <0,4,1,7>: Cost 3 vsldoi4 <7,0,4,1>, <7,0,4,1>
+ 67948073U, // <0,4,1,u>: Cost 1 vmrghw LHS, RHS
+ 3767420369U, // <0,4,2,0>: Cost 4 vsldoi8 <1,5,0,4>, <2,0,3,4>
+ 3767420451U, // <0,4,2,1>: Cost 4 vsldoi8 <1,5,0,4>, <2,1,3,5>
+ 3767420520U, // <0,4,2,2>: Cost 4 vsldoi8 <1,5,0,4>, <2,2,2,2>
+ 2698323625U, // <0,4,2,3>: Cost 3 vsldoi8 <2,3,0,4>, <2,3,0,4>
+ 3709218102U, // <0,4,2,4>: Cost 4 vsldoi4 <3,0,4,2>, RHS
+ 2216013110U, // <0,4,2,5>: Cost 3 vmrghw <0,2,1,2>, RHS
+ 3767420858U, // <0,4,2,6>: Cost 4 vsldoi8 <1,5,0,4>, <2,6,3,7>
+ 3774719981U, // <0,4,2,7>: Cost 4 vsldoi8 <2,7,0,4>, <2,7,0,4>
+ 2216013353U, // <0,4,2,u>: Cost 3 vmrghw <0,2,1,2>, RHS
+ 3767421078U, // <0,4,3,0>: Cost 4 vsldoi8 <1,5,0,4>, <3,0,1,2>
+ 3776710880U, // <0,4,3,1>: Cost 4 vsldoi8 <3,1,0,4>, <3,1,0,4>
+ 3833097325U, // <0,4,3,2>: Cost 5 vsldoi12 <1,2,u,0>, <4,3,2,4>
+ 3767421340U, // <0,4,3,3>: Cost 4 vsldoi8 <1,5,0,4>, <3,3,3,3>
+ 3767421442U, // <0,4,3,4>: Cost 4 vsldoi8 <1,5,0,4>, <3,4,5,6>
+ 2216660278U, // <0,4,3,5>: Cost 3 vmrghw <0,3,1,0>, RHS
+ 3833097361U, // <0,4,3,6>: Cost 5 vsldoi12 <1,2,u,0>, <4,3,6,4>
+ 3780692678U, // <0,4,3,7>: Cost 4 vsldoi8 <3,7,0,4>, <3,7,0,4>
+ 2216660521U, // <0,4,3,u>: Cost 3 vmrghw <0,3,1,0>, RHS
+ 2617573416U, // <0,4,4,0>: Cost 3 vsldoi4 <0,0,4,4>, <0,0,4,4>
+ 2217364450U, // <0,4,4,1>: Cost 3 vmrghw <0,4,1,5>, <4,1,5,0>
+ 3691316771U, // <0,4,4,2>: Cost 4 vsldoi4 <0,0,4,4>, <2,1,3,5>
+ 3709233331U, // <0,4,4,3>: Cost 4 vsldoi4 <3,0,4,4>, <3,0,4,4>
+ 2785823952U, // <0,4,4,4>: Cost 3 vsldoi12 <5,6,7,0>, <4,4,4,4>
+ 1143622966U, // <0,4,4,5>: Cost 2 vmrghw <0,4,1,5>, RHS
+ 3691319723U, // <0,4,4,6>: Cost 4 vsldoi4 <0,0,4,4>, <6,1,7,5>
+ 3854109932U, // <0,4,4,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,4,7,5>
+ 1143623209U, // <0,4,4,u>: Cost 2 vmrghw <0,4,1,5>, RHS
+ 2635497574U, // <0,4,5,0>: Cost 3 vsldoi4 <3,0,4,5>, LHS
+ 2635498390U, // <0,4,5,1>: Cost 3 vsldoi4 <3,0,4,5>, <1,2,3,0>
+ 3709240936U, // <0,4,5,2>: Cost 4 vsldoi4 <3,0,4,5>, <2,2,2,2>
+ 2635499700U, // <0,4,5,3>: Cost 3 vsldoi4 <3,0,4,5>, <3,0,4,5>
+ 2635500854U, // <0,4,5,4>: Cost 3 vsldoi4 <3,0,4,5>, RHS
+ 2785824044U, // <0,4,5,5>: Cost 3 vsldoi12 <5,6,7,0>, <4,5,5,6>
+ 1685245238U, // <0,4,5,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 2659390488U, // <0,4,5,7>: Cost 3 vsldoi4 <7,0,4,5>, <7,0,4,5>
+ 1685245256U, // <0,4,5,u>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 3839438161U, // <0,4,6,0>: Cost 4 vsldoi12 <2,3,4,0>, <4,6,0,7>
+ 3798610347U, // <0,4,6,1>: Cost 4 vsldoi8 <6,7,0,4>, <6,1,7,5>
+ 3798610426U, // <0,4,6,2>: Cost 4 vsldoi8 <6,7,0,4>, <6,2,7,3>
+ 3795956237U, // <0,4,6,3>: Cost 4 vsldoi8 <6,3,0,4>, <6,3,0,4>
+ 3733138742U, // <0,4,6,4>: Cost 4 vsldoi4 <7,0,4,6>, RHS
+ 2218634550U, // <0,4,6,5>: Cost 3 vmrghw <0,6,0,7>, RHS
+ 3798610744U, // <0,4,6,6>: Cost 4 vsldoi8 <6,7,0,4>, <6,6,6,6>
+ 2724868945U, // <0,4,6,7>: Cost 3 vsldoi8 <6,7,0,4>, <6,7,0,4>
+ 2725532578U, // <0,4,6,u>: Cost 3 vsldoi8 <6,u,0,4>, <6,u,0,4>
+ 3383371465U, // <0,4,7,0>: Cost 4 vmrglw <4,6,0,7>, <2,3,4,0>
+ 3800601668U, // <0,4,7,1>: Cost 4 vsldoi8 <7,1,0,4>, <7,1,0,4>
+ 3775386826U, // <0,4,7,2>: Cost 5 vsldoi8 <2,u,0,4>, <7,2,6,3>
+ 3801928934U, // <0,4,7,3>: Cost 4 vsldoi8 <7,3,0,4>, <7,3,0,4>
+ 3721202998U, // <0,4,7,4>: Cost 4 vsldoi4 <5,0,4,7>, RHS
+ 2780368328U, // <0,4,7,5>: Cost 3 vsldoi12 <4,7,5,0>, <4,7,5,0>
+ 3383372686U, // <0,4,7,6>: Cost 5 vmrglw <4,6,0,7>, <4,0,4,6>
+ 3854110170U, // <0,4,7,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,7,7,0>
+ 2780368328U, // <0,4,7,u>: Cost 3 vsldoi12 <4,7,5,0>, <4,7,5,0>
+ 1146334098U, // <0,4,u,0>: Cost 2 vmrghw LHS, <4,0,5,1>
+ 2220076002U, // <0,4,u,1>: Cost 3 vmrghw LHS, <4,1,5,0>
+ 2220076085U, // <0,4,u,2>: Cost 3 vmrghw LHS, <4,2,5,2>
+ 2635524279U, // <0,4,u,3>: Cost 3 vsldoi4 <3,0,4,u>, <3,0,4,u>
+ 1146334416U, // <0,4,u,4>: Cost 2 vmrghw LHS, <4,4,4,4>
+ 72592694U, // <0,4,u,5>: Cost 1 vmrghw LHS, RHS
+ 1685245481U, // <0,4,u,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 2659415067U, // <0,4,u,7>: Cost 3 vsldoi4 <7,0,4,u>, <7,0,4,u>
+ 72592937U, // <0,4,u,u>: Cost 1 vmrghw LHS, RHS
+ 2281704337U, // <0,5,0,0>: Cost 3 vmrglw <0,0,0,0>, <4,0,5,0>
+ 2704965734U, // <0,5,0,1>: Cost 3 vsldoi8 <3,4,0,5>, LHS
+ 3778707666U, // <0,5,0,2>: Cost 4 vsldoi8 <3,4,0,5>, <0,2,5,3>
+ 3778707708U, // <0,5,0,3>: Cost 4 vsldoi8 <3,4,0,5>, <0,3,1,0>
+ 2687050057U, // <0,5,0,4>: Cost 3 vsldoi8 <0,4,0,5>, <0,4,0,5>
+ 2214596612U, // <0,5,0,5>: Cost 3 vmrghw <0,0,0,0>, <5,5,5,5>
+ 2785824372U, // <0,5,0,6>: Cost 3 vsldoi12 <5,6,7,0>, <5,0,6,1>
+ 3854110332U, // <0,5,0,7>: Cost 4 vsldoi12 <4,7,5,0>, <5,0,7,0>
+ 2704966301U, // <0,5,0,u>: Cost 3 vsldoi8 <3,4,0,5>, LHS
+ 1567768678U, // <0,5,1,0>: Cost 2 vsldoi4 <4,0,5,1>, LHS
+ 2312236570U, // <0,5,1,1>: Cost 3 vmrglw <5,1,0,1>, <4,u,5,1>
+ 2215431915U, // <0,5,1,2>: Cost 3 vmrghw LHS, <5,2,1,3>
+ 2641512598U, // <0,5,1,3>: Cost 3 vsldoi4 <4,0,5,1>, <3,0,1,2>
+ 1567771538U, // <0,5,1,4>: Cost 2 vsldoi4 <4,0,5,1>, <4,0,5,1>
+ 1141690372U, // <0,5,1,5>: Cost 2 vmrghw LHS, <5,5,5,5>
+ 1141690466U, // <0,5,1,6>: Cost 2 vmrghw LHS, <5,6,7,0>
+ 2641515514U, // <0,5,1,7>: Cost 3 vsldoi4 <4,0,5,1>, <7,0,1,2>
+ 1141690615U, // <0,5,1,u>: Cost 2 vmrghw LHS, <5,u,5,5>
+ 3772736973U, // <0,5,2,0>: Cost 4 vsldoi8 <2,4,0,5>, <2,0,3,0>
+ 3778709024U, // <0,5,2,1>: Cost 4 vsldoi8 <3,4,0,5>, <2,1,3,2>
+ 3778709096U, // <0,5,2,2>: Cost 4 vsldoi8 <3,4,0,5>, <2,2,2,2>
+ 3778709158U, // <0,5,2,3>: Cost 4 vsldoi8 <3,4,0,5>, <2,3,0,1>
+ 3772737275U, // <0,5,2,4>: Cost 4 vsldoi8 <2,4,0,5>, <2,4,0,5>
+ 3859566351U, // <0,5,2,5>: Cost 4 vsldoi12 <5,6,7,0>, <5,2,5,3>
+ 3778709434U, // <0,5,2,6>: Cost 4 vsldoi8 <3,4,0,5>, <2,6,3,7>
+ 3805251562U, // <0,5,2,7>: Cost 4 vsldoi8 <7,u,0,5>, <2,7,0,1>
+ 3775391807U, // <0,5,2,u>: Cost 4 vsldoi8 <2,u,0,5>, <2,u,0,5>
+ 2704967830U, // <0,5,3,0>: Cost 3 vsldoi8 <3,4,0,5>, <3,0,1,2>
+ 3776719073U, // <0,5,3,1>: Cost 4 vsldoi8 <3,1,0,5>, <3,1,0,5>
+ 3777382706U, // <0,5,3,2>: Cost 4 vsldoi8 <3,2,0,5>, <3,2,0,5>
+ 3778709887U, // <0,5,3,3>: Cost 4 vsldoi8 <3,4,0,5>, <3,3,0,1>
+ 2704968148U, // <0,5,3,4>: Cost 3 vsldoi8 <3,4,0,5>, <3,4,0,5>
+ 3857428317U, // <0,5,3,5>: Cost 4 vsldoi12 <5,3,5,0>, <5,3,5,0>
+ 3364096514U, // <0,5,3,6>: Cost 4 vmrglw <1,4,0,3>, <3,4,5,6>
+ 3780700871U, // <0,5,3,7>: Cost 4 vsldoi8 <3,7,0,5>, <3,7,0,5>
+ 2707622680U, // <0,5,3,u>: Cost 3 vsldoi8 <3,u,0,5>, <3,u,0,5>
+ 2728856466U, // <0,5,4,0>: Cost 3 vsldoi8 <7,4,0,5>, <4,0,5,1>
+ 3697361674U, // <0,5,4,1>: Cost 4 vsldoi4 <1,0,5,4>, <1,0,5,4>
+ 3697362601U, // <0,5,4,2>: Cost 4 vsldoi4 <1,0,5,4>, <2,3,0,4>
+ 3364766635U, // <0,5,4,3>: Cost 4 vmrglw <1,5,0,4>, <1,2,5,3>
+ 2217365428U, // <0,5,4,4>: Cost 3 vmrghw <0,4,1,5>, <5,4,5,6>
+ 2704969014U, // <0,5,4,5>: Cost 3 vsldoi8 <3,4,0,5>, RHS
+ 2785824700U, // <0,5,4,6>: Cost 3 vsldoi12 <5,6,7,0>, <5,4,6,5>
+ 3364766963U, // <0,5,4,7>: Cost 4 vmrglw <1,5,0,4>, <1,6,5,7>
+ 2704969257U, // <0,5,4,u>: Cost 3 vsldoi8 <3,4,0,5>, RHS
+ 3846148050U, // <0,5,5,0>: Cost 4 vsldoi12 <3,4,5,0>, <5,5,0,0>
+ 2326203282U, // <0,5,5,1>: Cost 3 vmrglw <7,4,0,5>, <4,0,5,1>
+ 3291746027U, // <0,5,5,2>: Cost 4 vmrghw <0,5,1,2>, <5,2,1,3>
+ 3376054482U, // <0,5,5,3>: Cost 4 vmrglw <3,4,0,5>, <0,2,5,3>
+ 3790655366U, // <0,5,5,4>: Cost 4 vsldoi8 <5,4,0,5>, <5,4,0,5>
+ 2785824772U, // <0,5,5,5>: Cost 3 vsldoi12 <5,6,7,0>, <5,5,5,5>
+ 2724876386U, // <0,5,5,6>: Cost 3 vsldoi8 <6,7,0,5>, <5,6,7,0>
+ 3858903057U, // <0,5,5,7>: Cost 4 vsldoi12 <5,5,7,0>, <5,5,7,0>
+ 2736820484U, // <0,5,5,u>: Cost 3 vsldoi8 <u,7,0,5>, <5,u,7,0>
+ 2659467366U, // <0,5,6,0>: Cost 3 vsldoi4 <7,0,5,6>, LHS
+ 3859566643U, // <0,5,6,1>: Cost 4 vsldoi12 <5,6,7,0>, <5,6,1,7>
+ 3798618618U, // <0,5,6,2>: Cost 4 vsldoi8 <6,7,0,5>, <6,2,7,3>
+ 3852857410U, // <0,5,6,3>: Cost 4 vsldoi12 <4,5,6,0>, <5,6,3,4>
+ 2659470646U, // <0,5,6,4>: Cost 3 vsldoi4 <7,0,5,6>, RHS
+ 2659471458U, // <0,5,6,5>: Cost 3 vsldoi4 <7,0,5,6>, <5,6,7,0>
+ 3832729696U, // <0,5,6,6>: Cost 4 vsldoi12 <1,2,3,0>, <5,6,6,7>
+ 1712083042U, // <0,5,6,7>: Cost 2 vsldoi12 <5,6,7,0>, <5,6,7,0>
+ 1712156779U, // <0,5,6,u>: Cost 2 vsldoi12 <5,6,u,0>, <5,6,u,0>
+ 2731512826U, // <0,5,7,0>: Cost 3 vsldoi8 <7,u,0,5>, <7,0,1,2>
+ 3859566717U, // <0,5,7,1>: Cost 4 vsldoi12 <5,6,7,0>, <5,7,1,0>
+ 3798619284U, // <0,5,7,2>: Cost 4 vsldoi8 <6,7,0,5>, <7,2,0,3>
+ 3778712803U, // <0,5,7,3>: Cost 4 vsldoi8 <3,4,0,5>, <7,3,0,1>
+ 2728858936U, // <0,5,7,4>: Cost 3 vsldoi8 <7,4,0,5>, <7,4,0,5>
+ 3859566753U, // <0,5,7,5>: Cost 4 vsldoi12 <5,6,7,0>, <5,7,5,0>
+ 3377398135U, // <0,5,7,6>: Cost 4 vmrglw <3,6,0,7>, <0,4,5,6>
+ 3798619686U, // <0,5,7,7>: Cost 4 vsldoi8 <6,7,0,5>, <7,7,0,0>
+ 2731513468U, // <0,5,7,u>: Cost 3 vsldoi8 <7,u,0,5>, <7,u,0,5>
+ 1567826022U, // <0,5,u,0>: Cost 2 vsldoi4 <4,0,5,u>, LHS
+ 2704971566U, // <0,5,u,1>: Cost 3 vsldoi8 <3,4,0,5>, LHS
+ 2220076779U, // <0,5,u,2>: Cost 3 vmrghw LHS, <5,2,1,3>
+ 2641569942U, // <0,5,u,3>: Cost 3 vsldoi4 <4,0,5,u>, <3,0,1,2>
+ 1567828889U, // <0,5,u,4>: Cost 2 vsldoi4 <4,0,5,u>, <4,0,5,u>
+ 1146335236U, // <0,5,u,5>: Cost 2 vmrghw LHS, <5,5,5,5>
+ 1146335330U, // <0,5,u,6>: Cost 2 vmrghw LHS, <5,6,7,0>
+ 1713410308U, // <0,5,u,7>: Cost 2 vsldoi12 <5,u,7,0>, <5,u,7,0>
+ 1713484045U, // <0,5,u,u>: Cost 2 vsldoi12 <5,u,u,0>, <5,u,u,0>
+ 2214596949U, // <0,6,0,0>: Cost 3 vmrghw <0,0,0,0>, <6,0,7,0>
+ 2214678951U, // <0,6,0,1>: Cost 3 vmrghw <0,0,1,1>, <6,1,7,1>
+ 2214597114U, // <0,6,0,2>: Cost 3 vmrghw <0,0,0,0>, <6,2,7,3>
+ 3852857653U, // <0,6,0,3>: Cost 4 vsldoi12 <4,5,6,0>, <6,0,3,4>
+ 3832729919U, // <0,6,0,4>: Cost 4 vsldoi12 <1,2,3,0>, <6,0,4,5>
+ 3721293427U, // <0,6,0,5>: Cost 4 vsldoi4 <5,0,6,0>, <5,0,6,0>
+ 2214597432U, // <0,6,0,6>: Cost 3 vmrghw <0,0,0,0>, <6,6,6,6>
+ 1207962934U, // <0,6,0,7>: Cost 2 vmrglw <0,0,0,0>, RHS
+ 1207962935U, // <0,6,0,u>: Cost 2 vmrglw <0,0,0,0>, RHS
+ 2215432481U, // <0,6,1,0>: Cost 3 vmrghw LHS, <6,0,1,2>
+ 2215432615U, // <0,6,1,1>: Cost 3 vmrghw LHS, <6,1,7,1>
+ 1141690874U, // <0,6,1,2>: Cost 2 vmrghw LHS, <6,2,7,3>
+ 2215432754U, // <0,6,1,3>: Cost 3 vmrghw LHS, <6,3,4,5>
+ 2215432817U, // <0,6,1,4>: Cost 3 vmrghw LHS, <6,4,2,5>
+ 2215432939U, // <0,6,1,5>: Cost 3 vmrghw LHS, <6,5,7,1>
+ 1141691192U, // <0,6,1,6>: Cost 2 vmrghw LHS, <6,6,6,6>
+ 1221905718U, // <0,6,1,7>: Cost 2 vmrglw <2,3,0,1>, RHS
+ 1221905719U, // <0,6,1,u>: Cost 2 vmrglw <2,3,0,1>, RHS
+ 3852857787U, // <0,6,2,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,2,0,3>
+ 3289764265U, // <0,6,2,1>: Cost 4 vmrghw <0,2,1,3>, <6,1,7,3>
+ 3289690618U, // <0,6,2,2>: Cost 4 vmrghw <0,2,0,3>, <6,2,7,3>
+ 3862589907U, // <0,6,2,3>: Cost 4 vsldoi12 <6,2,3,0>, <6,2,3,0>
+ 3733253430U, // <0,6,2,4>: Cost 4 vsldoi4 <7,0,6,2>, RHS
+ 3733254242U, // <0,6,2,5>: Cost 4 vsldoi4 <7,0,6,2>, <5,6,7,0>
+ 3777390522U, // <0,6,2,6>: Cost 4 vsldoi8 <3,2,0,6>, <2,6,3,7>
+ 2785825274U, // <0,6,2,7>: Cost 3 vsldoi12 <5,6,7,0>, <6,2,7,3>
+ 2785825283U, // <0,6,2,u>: Cost 3 vsldoi12 <5,6,7,0>, <6,2,u,3>
+ 3777390742U, // <0,6,3,0>: Cost 4 vsldoi8 <3,2,0,6>, <3,0,1,2>
+ 3863106066U, // <0,6,3,1>: Cost 4 vsldoi12 <6,3,1,0>, <6,3,1,0>
+ 3777390899U, // <0,6,3,2>: Cost 4 vsldoi8 <3,2,0,6>, <3,2,0,6>
+ 3290436146U, // <0,6,3,3>: Cost 4 vmrghw <0,3,1,4>, <6,3,4,5>
+ 3779381762U, // <0,6,3,4>: Cost 4 vsldoi8 <3,5,0,6>, <3,4,5,6>
+ 3779381798U, // <0,6,3,5>: Cost 4 vsldoi8 <3,5,0,6>, <3,5,0,6>
+ 3733262920U, // <0,6,3,6>: Cost 4 vsldoi4 <7,0,6,3>, <6,3,7,0>
+ 2300972342U, // <0,6,3,7>: Cost 3 vmrglw <3,2,0,3>, RHS
+ 2300972343U, // <0,6,3,u>: Cost 3 vmrglw <3,2,0,3>, RHS
+ 3802606482U, // <0,6,4,0>: Cost 4 vsldoi8 <7,4,0,6>, <4,0,5,1>
+ 2217365931U, // <0,6,4,1>: Cost 3 vmrghw <0,4,1,5>, <6,1,7,5>
+ 2217366010U, // <0,6,4,2>: Cost 3 vmrghw <0,4,1,5>, <6,2,7,3>
+ 3291107890U, // <0,6,4,3>: Cost 4 vmrghw <0,4,1,5>, <6,3,4,5>
+ 3291099805U, // <0,6,4,4>: Cost 4 vmrghw <0,4,1,4>, <6,4,7,4>
+ 3777391926U, // <0,6,4,5>: Cost 4 vsldoi8 <3,2,0,6>, RHS
+ 2217366328U, // <0,6,4,6>: Cost 3 vmrghw <0,4,1,5>, <6,6,6,6>
+ 2291027254U, // <0,6,4,7>: Cost 3 vmrglw <1,5,0,4>, RHS
+ 2291027255U, // <0,6,4,u>: Cost 3 vmrglw <1,5,0,4>, RHS
+ 3852858033U, // <0,6,5,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,5,0,6>
+ 3395964532U, // <0,6,5,1>: Cost 4 vmrglw <6,7,0,5>, <5,0,6,1>
+ 3864507069U, // <0,6,5,2>: Cost 4 vsldoi12 <6,5,2,0>, <6,5,2,0>
+ 3376056678U, // <0,6,5,3>: Cost 5 vmrglw <3,4,0,5>, <3,2,6,3>
+ 3721334070U, // <0,6,5,4>: Cost 4 vsldoi4 <5,0,6,5>, RHS
+ 3395964860U, // <0,6,5,5>: Cost 4 vmrglw <6,7,0,5>, <5,4,6,5>
+ 3864802017U, // <0,6,5,6>: Cost 4 vsldoi12 <6,5,6,0>, <6,5,6,0>
+ 2302315830U, // <0,6,5,7>: Cost 3 vmrglw <3,4,0,5>, RHS
+ 2302315831U, // <0,6,5,u>: Cost 3 vmrglw <3,4,0,5>, RHS
+ 3852858108U, // <0,6,6,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,6,0,0>
+ 3398624745U, // <0,6,6,1>: Cost 4 vmrglw <7,2,0,6>, <2,0,6,1>
+ 2218668538U, // <0,6,6,2>: Cost 3 vmrghw <0,6,1,2>, <6,2,7,3>
+ 3292418610U, // <0,6,6,3>: Cost 4 vmrghw <0,6,1,3>, <6,3,4,5>
+ 3733286198U, // <0,6,6,4>: Cost 4 vsldoi4 <7,0,6,6>, RHS
+ 3797299889U, // <0,6,6,5>: Cost 4 vsldoi8 <6,5,0,6>, <6,5,0,6>
+ 2785825592U, // <0,6,6,6>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,6,6>
+ 2785825602U, // <0,6,6,7>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,7,7>
+ 2785825611U, // <0,6,6,u>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,u,7>
+ 2785825614U, // <0,6,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,0,1>
+ 2758988632U, // <0,6,7,1>: Cost 3 vsldoi12 <1,2,3,0>, <6,7,1,2>
+ 3377400084U, // <0,6,7,2>: Cost 4 vmrglw <3,6,0,7>, <3,1,6,2>
+ 2792166248U, // <0,6,7,3>: Cost 3 vsldoi12 <6,7,3,0>, <6,7,3,0>
+ 2785825654U, // <0,6,7,4>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,4,5>
+ 2785825664U, // <0,6,7,5>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,5,6>
+ 3859567493U, // <0,6,7,6>: Cost 4 vsldoi12 <5,6,7,0>, <6,7,6,2>
+ 2303659318U, // <0,6,7,7>: Cost 3 vmrglw <3,6,0,7>, RHS
+ 2303659319U, // <0,6,7,u>: Cost 3 vmrglw <3,6,0,7>, RHS
+ 2785825695U, // <0,6,u,0>: Cost 3 vsldoi12 <5,6,7,0>, <6,u,0,1>
+ 2220077479U, // <0,6,u,1>: Cost 3 vmrghw LHS, <6,1,7,1>
+ 1146335738U, // <0,6,u,2>: Cost 2 vmrghw LHS, <6,2,7,3>
+ 2792829881U, // <0,6,u,3>: Cost 3 vsldoi12 <6,u,3,0>, <6,u,3,0>
+ 2785825735U, // <0,6,u,4>: Cost 3 vsldoi12 <5,6,7,0>, <6,u,4,5>
+ 2785825664U, // <0,6,u,5>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,5,6>
+ 1146336056U, // <0,6,u,6>: Cost 2 vmrghw LHS, <6,6,6,6>
+ 1221963062U, // <0,6,u,7>: Cost 2 vmrglw <2,3,0,u>, RHS
+ 1221963063U, // <0,6,u,u>: Cost 2 vmrglw <2,3,0,u>, RHS
+ 2653593600U, // <0,7,0,0>: Cost 3 vsldoi4 <6,0,7,0>, <0,0,0,0>
+ 2706309222U, // <0,7,0,1>: Cost 3 vsldoi8 <3,6,0,7>, LHS
+ 3709421498U, // <0,7,0,2>: Cost 4 vsldoi4 <3,0,7,0>, <2,6,3,7>
+ 2281705978U, // <0,7,0,3>: Cost 3 vmrglw <0,0,0,0>, <6,2,7,3>
+ 2785825816U, // <0,7,0,4>: Cost 3 vsldoi12 <5,6,7,0>, <7,0,4,5>
+ 2785825826U, // <0,7,0,5>: Cost 3 vsldoi12 <5,6,7,0>, <7,0,5,6>
+ 2653598037U, // <0,7,0,6>: Cost 3 vsldoi4 <6,0,7,0>, <6,0,7,0>
+ 2214598252U, // <0,7,0,7>: Cost 3 vmrghw <0,0,0,0>, <7,7,7,7>
+ 2706309789U, // <0,7,0,u>: Cost 3 vsldoi8 <3,6,0,7>, LHS
+ 1141691386U, // <0,7,1,0>: Cost 2 vmrghw LHS, <7,0,1,2>
+ 2215433290U, // <0,7,1,1>: Cost 3 vmrghw LHS, <7,1,1,1>
+ 2706310038U, // <0,7,1,2>: Cost 3 vsldoi8 <3,6,0,7>, <1,2,3,0>
+ 2322190842U, // <0,7,1,3>: Cost 3 vmrglw <6,7,0,1>, <6,2,7,3>
+ 1141691750U, // <0,7,1,4>: Cost 2 vmrghw LHS, <7,4,5,6>
+ 2215433654U, // <0,7,1,5>: Cost 3 vmrghw LHS, <7,5,5,5>
+ 2653606230U, // <0,7,1,6>: Cost 3 vsldoi4 <6,0,7,1>, <6,0,7,1>
+ 1141692012U, // <0,7,1,7>: Cost 2 vmrghw LHS, <7,7,7,7>
+ 1141692034U, // <0,7,1,u>: Cost 2 vmrghw LHS, <7,u,1,2>
+ 2785825940U, // <0,7,2,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,2,0,3>
+ 3768108576U, // <0,7,2,1>: Cost 5 vsldoi8 <1,6,0,7>, <2,1,3,2>
+ 3780052584U, // <0,7,2,2>: Cost 4 vsldoi8 <3,6,0,7>, <2,2,2,2>
+ 2794820780U, // <0,7,2,3>: Cost 3 vsldoi12 <7,2,3,0>, <7,2,3,0>
+ 3859641528U, // <0,7,2,4>: Cost 4 vsldoi12 <5,6,u,0>, <7,2,4,3>
+ 3733327970U, // <0,7,2,5>: Cost 4 vsldoi4 <7,0,7,2>, <5,6,7,0>
+ 3778062266U, // <0,7,2,6>: Cost 4 vsldoi8 <3,3,0,7>, <2,6,3,7>
+ 3733328944U, // <0,7,2,7>: Cost 4 vsldoi4 <7,0,7,2>, <7,0,7,2>
+ 2795189465U, // <0,7,2,u>: Cost 3 vsldoi12 <7,2,u,0>, <7,2,u,0>
+ 2324861026U, // <0,7,3,0>: Cost 3 vmrglw <7,2,0,3>, <5,6,7,0>
+ 3780053233U, // <0,7,3,1>: Cost 4 vsldoi8 <3,6,0,7>, <3,1,2,3>
+ 3780053296U, // <0,7,3,2>: Cost 4 vsldoi8 <3,6,0,7>, <3,2,0,3>
+ 3778062725U, // <0,7,3,3>: Cost 4 vsldoi8 <3,3,0,7>, <3,3,0,7>
+ 3780053506U, // <0,7,3,4>: Cost 4 vsldoi8 <3,6,0,7>, <3,4,5,6>
+ 3803941469U, // <0,7,3,5>: Cost 4 vsldoi8 <7,6,0,7>, <3,5,6,7>
+ 2706311800U, // <0,7,3,6>: Cost 3 vsldoi8 <3,6,0,7>, <3,6,0,7>
+ 3398603586U, // <0,7,3,7>: Cost 4 vmrglw <7,2,0,3>, <6,6,7,7>
+ 2707639066U, // <0,7,3,u>: Cost 3 vsldoi8 <3,u,0,7>, <3,u,0,7>
+ 2217366522U, // <0,7,4,0>: Cost 3 vmrghw <0,4,1,5>, <7,0,1,2>
+ 3727369110U, // <0,7,4,1>: Cost 4 vsldoi4 <6,0,7,4>, <1,2,3,0>
+ 3291108500U, // <0,7,4,2>: Cost 4 vmrghw <0,4,1,5>, <7,2,0,3>
+ 3727370872U, // <0,7,4,3>: Cost 4 vsldoi4 <6,0,7,4>, <3,6,0,7>
+ 2217366886U, // <0,7,4,4>: Cost 3 vmrghw <0,4,1,5>, <7,4,5,6>
+ 2706312502U, // <0,7,4,5>: Cost 3 vsldoi8 <3,6,0,7>, RHS
+ 3786026321U, // <0,7,4,6>: Cost 4 vsldoi8 <4,6,0,7>, <4,6,0,7>
+ 2217367148U, // <0,7,4,7>: Cost 3 vmrghw <0,4,1,5>, <7,7,7,7>
+ 2706312745U, // <0,7,4,u>: Cost 3 vsldoi8 <3,6,0,7>, RHS
+ 2322223202U, // <0,7,5,0>: Cost 3 vmrglw <6,7,0,5>, <5,6,7,0>
+ 3399946987U, // <0,7,5,1>: Cost 4 vmrglw <7,4,0,5>, <6,5,7,1>
+ 3291780244U, // <0,7,5,2>: Cost 4 vmrghw <0,5,1,6>, <7,2,0,3>
+ 3727378582U, // <0,7,5,3>: Cost 4 vsldoi4 <6,0,7,5>, <3,0,1,2>
+ 3727379766U, // <0,7,5,4>: Cost 4 vsldoi4 <6,0,7,5>, RHS
+ 3859568054U, // <0,7,5,5>: Cost 4 vsldoi12 <5,6,7,0>, <7,5,5,5>
+ 2785826241U, // <0,7,5,6>: Cost 3 vsldoi12 <5,6,7,0>, <7,5,6,7>
+ 3395965762U, // <0,7,5,7>: Cost 4 vmrglw <6,7,0,5>, <6,6,7,7>
+ 2787153363U, // <0,7,5,u>: Cost 3 vsldoi12 <5,u,7,0>, <7,5,u,7>
+ 2785826268U, // <0,7,6,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,6,0,7>
+ 3780055420U, // <0,7,6,1>: Cost 5 vsldoi8 <3,6,0,7>, <6,1,2,3>
+ 3859568110U, // <0,7,6,2>: Cost 4 vsldoi12 <5,6,7,0>, <7,6,2,7>
+ 3874534903U, // <0,7,6,3>: Cost 4 vsldoi12 <u,2,3,0>, <7,6,3,7>
+ 3859641856U, // <0,7,6,4>: Cost 4 vsldoi12 <5,6,u,0>, <7,6,4,7>
+ 3733360738U, // <0,7,6,5>: Cost 4 vsldoi4 <7,0,7,6>, <5,6,7,0>
+ 3859568145U, // <0,7,6,6>: Cost 4 vsldoi12 <5,6,7,0>, <7,6,6,6>
+ 2797770260U, // <0,7,6,7>: Cost 3 vsldoi12 <7,6,7,0>, <7,6,7,0>
+ 2797843997U, // <0,7,6,u>: Cost 3 vsldoi12 <7,6,u,0>, <7,6,u,0>
+ 2785826342U, // <0,7,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,7,0,0>
+ 3727393686U, // <0,7,7,1>: Cost 4 vsldoi4 <6,0,7,7>, <1,2,3,0>
+ 3868563003U, // <0,7,7,2>: Cost 4 vsldoi12 <7,2,3,0>, <7,7,2,3>
+ 3377397988U, // <0,7,7,3>: Cost 4 vmrglw <3,6,0,7>, <0,2,7,3>
+ 2219349350U, // <0,7,7,4>: Cost 3 vmrghw <0,7,1,4>, <7,4,5,6>
+ 3859568217U, // <0,7,7,5>: Cost 4 vsldoi12 <5,6,7,0>, <7,7,5,6>
+ 2730202588U, // <0,7,7,6>: Cost 3 vsldoi8 <7,6,0,7>, <7,6,0,7>
+ 2785826412U, // <0,7,7,7>: Cost 3 vsldoi12 <5,6,7,0>, <7,7,7,7>
+ 2731529854U, // <0,7,7,u>: Cost 3 vsldoi8 <7,u,0,7>, <7,u,0,7>
+ 1146336250U, // <0,7,u,0>: Cost 2 vmrghw LHS, <7,0,1,2>
+ 2706315054U, // <0,7,u,1>: Cost 3 vsldoi8 <3,6,0,7>, LHS
+ 2653660845U, // <0,7,u,2>: Cost 3 vsldoi4 <6,0,7,u>, <2,3,0,u>
+ 2322248186U, // <0,7,u,3>: Cost 3 vmrglw <6,7,0,u>, <6,2,7,3>
+ 1146336614U, // <0,7,u,4>: Cost 2 vmrghw LHS, <7,4,5,6>
+ 2706315418U, // <0,7,u,5>: Cost 3 vsldoi8 <3,6,0,7>, RHS
+ 2653663581U, // <0,7,u,6>: Cost 3 vsldoi4 <6,0,7,u>, <6,0,7,u>
+ 1146336876U, // <0,7,u,7>: Cost 2 vmrghw LHS, <7,7,7,7>
+ 1146336898U, // <0,7,u,u>: Cost 2 vmrghw LHS, <7,u,1,2>
+ 202162278U, // <0,u,0,0>: Cost 1 vspltisw0 LHS
+ 1624612966U, // <0,u,0,1>: Cost 2 vsldoi8 <2,3,0,u>, LHS
+ 2629780986U, // <0,u,0,2>: Cost 3 vsldoi4 <2,0,u,0>, <2,0,u,0>
+ 1207959708U, // <0,u,0,3>: Cost 2 vmrglw <0,0,0,0>, LHS
+ 1544097078U, // <0,u,0,4>: Cost 2 vsldoi4 <0,0,u,0>, RHS
+ 1140856986U, // <0,u,0,5>: Cost 2 vmrghw <0,0,0,0>, RHS
+ 2698355253U, // <0,u,0,6>: Cost 3 vsldoi8 <2,3,0,u>, <0,6,u,7>
+ 1207962952U, // <0,u,0,7>: Cost 2 vmrglw <0,0,0,0>, RHS
+ 202162278U, // <0,u,0,u>: Cost 1 vspltisw0 LHS
+ 1142134483U, // <0,u,1,0>: Cost 2 vmrghw LHS, <u,0,1,2>
+ 67950382U, // <0,u,1,1>: Cost 1 vmrghw LHS, LHS
+ 1142175624U, // <0,u,1,2>: Cost 2 vmrghw LHS, <u,2,3,3>
+ 1142175676U, // <0,u,1,3>: Cost 2 vmrghw LHS, <u,3,0,1>
+ 1142134847U, // <0,u,1,4>: Cost 2 vmrghw LHS, <u,4,5,6>
+ 67950746U, // <0,u,1,5>: Cost 1 vmrghw LHS, RHS
+ 1142175952U, // <0,u,1,6>: Cost 2 vmrghw LHS, <u,6,3,7>
+ 1221905736U, // <0,u,1,7>: Cost 2 vmrglw <2,3,0,1>, RHS
+ 67950949U, // <0,u,1,u>: Cost 1 vmrghw LHS, LHS
+ 1562026086U, // <0,u,2,0>: Cost 2 vsldoi4 <3,0,u,2>, LHS
+ 2216015662U, // <0,u,2,1>: Cost 3 vmrghw <0,2,1,2>, LHS
+ 2698356328U, // <0,u,2,2>: Cost 3 vsldoi8 <2,3,0,u>, <2,2,2,2>
+ 835584U, // <0,u,2,3>: Cost 0 copy LHS
+ 1562029366U, // <0,u,2,4>: Cost 2 vsldoi4 <3,0,u,2>, RHS
+ 2216016026U, // <0,u,2,5>: Cost 3 vmrghw <0,2,1,2>, RHS
+ 2698356666U, // <0,u,2,6>: Cost 3 vsldoi8 <2,3,0,u>, <2,6,3,7>
+ 1585919033U, // <0,u,2,7>: Cost 2 vsldoi4 <7,0,u,2>, <7,0,u,2>
+ 835584U, // <0,u,2,u>: Cost 0 copy LHS
+ 2758989756U, // <0,u,3,0>: Cost 3 vsldoi12 <1,2,3,0>, <u,3,0,1>
+ 2216662830U, // <0,u,3,1>: Cost 3 vmrghw <0,3,1,0>, LHS
+ 2703665461U, // <0,u,3,2>: Cost 3 vsldoi8 <3,2,0,u>, <3,2,0,u>
+ 2758989782U, // <0,u,3,3>: Cost 3 vsldoi12 <1,2,3,0>, <u,3,3,0>
+ 2758989796U, // <0,u,3,4>: Cost 3 vsldoi12 <1,2,3,0>, <u,3,4,5>
+ 2216663194U, // <0,u,3,5>: Cost 3 vmrghw <0,3,1,0>, RHS
+ 2706319993U, // <0,u,3,6>: Cost 3 vsldoi8 <3,6,0,u>, <3,6,0,u>
+ 2300972360U, // <0,u,3,7>: Cost 3 vmrglw <3,2,0,3>, RHS
+ 2216663397U, // <0,u,3,u>: Cost 3 vmrghw <0,3,1,0>, LHS
+ 2217367251U, // <0,u,4,0>: Cost 3 vmrghw <0,4,1,5>, <u,0,1,2>
+ 1143625518U, // <0,u,4,1>: Cost 2 vmrghw <0,4,1,5>, LHS
+ 2217367432U, // <0,u,4,2>: Cost 3 vmrghw <0,4,1,5>, <u,2,3,3>
+ 2217367484U, // <0,u,4,3>: Cost 3 vmrghw <0,4,1,5>, <u,3,0,1>
+ 1143619922U, // <0,u,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5>
+ 1143625882U, // <0,u,4,5>: Cost 2 vmrghw <0,4,1,5>, RHS
+ 2217367760U, // <0,u,4,6>: Cost 3 vmrghw <0,4,1,5>, <u,6,3,7>
+ 2291027272U, // <0,u,4,7>: Cost 3 vmrglw <1,5,0,4>, RHS
+ 1143626085U, // <0,u,4,u>: Cost 2 vmrghw <0,4,1,5>, LHS
+ 2635792486U, // <0,u,5,0>: Cost 3 vsldoi4 <3,0,u,5>, LHS
+ 2635793302U, // <0,u,5,1>: Cost 3 vsldoi4 <3,0,u,5>, <1,2,3,0>
+ 2302314646U, // <0,u,5,2>: Cost 3 vmrglw <3,4,0,5>, <3,0,1,2>
+ 2635794648U, // <0,u,5,3>: Cost 3 vsldoi4 <3,0,u,5>, <3,0,u,5>
+ 2635795766U, // <0,u,5,4>: Cost 3 vsldoi4 <3,0,u,5>, RHS
+ 2717601754U, // <0,u,5,5>: Cost 3 vsldoi8 <5,5,0,u>, <5,5,0,u>
+ 1685248154U, // <0,u,5,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 2302315848U, // <0,u,5,7>: Cost 3 vmrglw <3,4,0,5>, RHS
+ 1685248172U, // <0,u,5,u>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 2759358645U, // <0,u,6,0>: Cost 3 vsldoi12 <1,2,u,0>, <u,6,0,7>
+ 2218637102U, // <0,u,6,1>: Cost 3 vmrghw <0,6,0,7>, LHS
+ 2724901370U, // <0,u,6,2>: Cost 3 vsldoi8 <6,7,0,u>, <6,2,7,3>
+ 2758990032U, // <0,u,6,3>: Cost 3 vsldoi12 <1,2,3,0>, <u,6,3,7>
+ 2659691830U, // <0,u,6,4>: Cost 3 vsldoi4 <7,0,u,6>, RHS
+ 2659471458U, // <0,u,6,5>: Cost 3 vsldoi4 <7,0,5,6>, <5,6,7,0>
+ 2724901688U, // <0,u,6,6>: Cost 3 vsldoi8 <6,7,0,u>, <6,6,6,6>
+ 1651159893U, // <0,u,6,7>: Cost 2 vsldoi8 <6,7,0,u>, <6,7,0,u>
+ 1651823526U, // <0,u,6,u>: Cost 2 vsldoi8 <6,u,0,u>, <6,u,0,u>
+ 2785827072U, // <0,u,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <u,7,0,1>
+ 2803964168U, // <0,u,7,1>: Cost 3 vsldoi12 <u,7,1,0>, <u,7,1,0>
+ 2727556249U, // <0,u,7,2>: Cost 3 vsldoi8 <7,2,0,u>, <7,2,0,u>
+ 2303656092U, // <0,u,7,3>: Cost 3 vmrglw <3,6,0,7>, LHS
+ 2785827112U, // <0,u,7,4>: Cost 3 vsldoi12 <5,6,7,0>, <u,7,4,5>
+ 2785827122U, // <0,u,7,5>: Cost 3 vsldoi12 <5,6,7,0>, <u,7,5,6>
+ 2730210781U, // <0,u,7,6>: Cost 3 vsldoi8 <7,6,0,u>, <7,6,0,u>
+ 2303659336U, // <0,u,7,7>: Cost 3 vmrglw <3,6,0,7>, RHS
+ 2303656097U, // <0,u,7,u>: Cost 3 vmrglw <3,6,0,7>, LHS
+ 202162278U, // <0,u,u,0>: Cost 1 vspltisw0 LHS
+ 72595246U, // <0,u,u,1>: Cost 1 vmrghw LHS, LHS
+ 1146337160U, // <0,u,u,2>: Cost 2 vmrghw LHS, <u,2,3,3>
+ 835584U, // <0,u,u,3>: Cost 0 copy LHS
+ 1146337343U, // <0,u,u,4>: Cost 2 vmrghw LHS, <u,4,5,6>
+ 72595610U, // <0,u,u,5>: Cost 1 vmrghw LHS, RHS
+ 1146337488U, // <0,u,u,6>: Cost 2 vmrghw LHS, <u,6,3,7>
+ 1221963080U, // <0,u,u,7>: Cost 2 vmrglw <2,3,0,u>, RHS
+ 835584U, // <0,u,u,u>: Cost 0 copy LHS
+ 2756853760U, // <1,0,0,0>: Cost 3 vsldoi12 <0,u,1,1>, <0,0,0,0>
+ 1677803530U, // <1,0,0,1>: Cost 2 vsldoi12 <0,0,1,1>, <0,0,1,1>
+ 3759497387U, // <1,0,0,2>: Cost 4 vsldoi8 <0,2,1,0>, <0,2,1,0>
+ 2686419196U, // <1,0,0,3>: Cost 3 vsldoi8 <0,3,1,0>, <0,3,1,0>
+ 2751766565U, // <1,0,0,4>: Cost 3 vsldoi12 <0,0,4,1>, <0,0,4,1>
+ 2687746462U, // <1,0,0,5>: Cost 3 vsldoi8 <0,5,1,0>, <0,5,1,0>
+ 3776086518U, // <1,0,0,6>: Cost 4 vsldoi8 <3,0,1,0>, <0,6,1,7>
+ 2689073728U, // <1,0,0,7>: Cost 3 vsldoi8 <0,7,1,0>, <0,7,1,0>
+ 1678319689U, // <1,0,0,u>: Cost 2 vsldoi12 <0,0,u,1>, <0,0,u,1>
+ 2287091712U, // <1,0,1,0>: Cost 3 vmrglw <0,u,1,1>, <0,0,0,0>
+ 1147568230U, // <1,0,1,1>: Cost 2 vmrghw <1,1,1,1>, LHS
+ 1683112038U, // <1,0,1,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 3294970108U, // <1,0,1,3>: Cost 4 vmrghw <1,1,0,0>, <0,3,1,0>
+ 2623892790U, // <1,0,1,4>: Cost 3 vsldoi4 <1,1,0,1>, RHS
+ 2647781007U, // <1,0,1,5>: Cost 3 vsldoi4 <5,1,0,1>, <5,1,0,1>
+ 2791948430U, // <1,0,1,6>: Cost 3 vsldoi12 <6,7,0,1>, <0,1,6,7>
+ 3721524218U, // <1,0,1,7>: Cost 4 vsldoi4 <5,1,0,1>, <7,0,1,2>
+ 1683112092U, // <1,0,1,u>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 2222112768U, // <1,0,2,0>: Cost 3 vmrghw <1,2,3,0>, <0,0,0,0>
+ 1148371046U, // <1,0,2,1>: Cost 2 vmrghw <1,2,3,0>, LHS
+ 3356862524U, // <1,0,2,2>: Cost 4 vmrglw <0,2,1,2>, <2,u,0,2>
+ 2702345894U, // <1,0,2,3>: Cost 3 vsldoi8 <3,0,1,0>, <2,3,0,1>
+ 2222113106U, // <1,0,2,4>: Cost 3 vmrghw <1,2,3,0>, <0,4,1,5>
+ 2299709908U, // <1,0,2,5>: Cost 3 vmrglw <3,0,1,2>, <3,4,0,5>
+ 3760162746U, // <1,0,2,6>: Cost 4 vsldoi8 <0,3,1,0>, <2,6,3,7>
+ 3369470584U, // <1,0,2,7>: Cost 4 vmrglw <2,3,1,2>, <3,6,0,7>
+ 1148371613U, // <1,0,2,u>: Cost 2 vmrghw <1,2,3,0>, LHS
+ 2686421142U, // <1,0,3,0>: Cost 3 vsldoi8 <0,3,1,0>, <3,0,1,2>
+ 2283128486U, // <1,0,3,1>: Cost 3 vmrglw <0,2,1,3>, <2,3,0,1>
+ 3296305326U, // <1,0,3,2>: Cost 4 vmrghw <1,3,0,1>, <0,2,1,3>
+ 3760163199U, // <1,0,3,3>: Cost 4 vsldoi8 <0,3,1,0>, <3,3,0,1>
+ 3760163330U, // <1,0,3,4>: Cost 4 vsldoi8 <0,3,1,0>, <3,4,5,6>
+ 3779406377U, // <1,0,3,5>: Cost 4 vsldoi8 <3,5,1,0>, <3,5,1,0>
+ 3865690416U, // <1,0,3,6>: Cost 4 vsldoi12 <6,7,0,1>, <0,3,6,7>
+ 3366824568U, // <1,0,3,7>: Cost 5 vmrglw <1,u,1,3>, <3,6,0,7>
+ 2707655452U, // <1,0,3,u>: Cost 3 vsldoi8 <3,u,1,0>, <3,u,1,0>
+ 2734861202U, // <1,0,4,0>: Cost 3 vsldoi8 <u,4,1,0>, <4,0,5,1>
+ 2756854098U, // <1,0,4,1>: Cost 3 vsldoi12 <0,u,1,1>, <0,4,1,5>
+ 3830595931U, // <1,0,4,2>: Cost 5 vsldoi12 <0,u,1,1>, <0,4,2,5>
+ 3296968960U, // <1,0,4,3>: Cost 4 vmrghw <1,4,0,1>, <0,3,1,4>
+ 3830595949U, // <1,0,4,4>: Cost 4 vsldoi12 <0,u,1,1>, <0,4,4,5>
+ 2686422326U, // <1,0,4,5>: Cost 3 vsldoi8 <0,3,1,0>, RHS
+ 3297378806U, // <1,0,4,6>: Cost 5 vmrghw <1,4,5,6>, <0,6,1,7>
+ 3810594248U, // <1,0,4,7>: Cost 4 vsldoi8 <u,7,1,0>, <4,7,5,0>
+ 2686422569U, // <1,0,4,u>: Cost 3 vsldoi8 <0,3,1,0>, RHS
+ 2284470272U, // <1,0,5,0>: Cost 3 vmrglw <0,4,1,5>, <0,0,0,0>
+ 2284471974U, // <1,0,5,1>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,1>
+ 3809267435U, // <1,0,5,2>: Cost 4 vsldoi8 <u,5,1,0>, <5,2,1,3>
+ 3297968384U, // <1,0,5,3>: Cost 4 vmrghw <1,5,4,6>, <0,3,1,4>
+ 2284471977U, // <1,0,5,4>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,4>
+ 3721555603U, // <1,0,5,5>: Cost 4 vsldoi4 <5,1,0,5>, <5,1,0,5>
+ 3792679010U, // <1,0,5,6>: Cost 4 vsldoi8 <5,7,1,0>, <5,6,7,0>
+ 3792679037U, // <1,0,5,7>: Cost 4 vsldoi8 <5,7,1,0>, <5,7,1,0>
+ 2284471981U, // <1,0,5,u>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,u>
+ 3356893184U, // <1,0,6,0>: Cost 4 vmrglw <0,2,1,6>, <0,0,0,0>
+ 2224676966U, // <1,0,6,1>: Cost 3 vmrghw <1,6,1,7>, LHS
+ 3298295985U, // <1,0,6,2>: Cost 4 vmrghw <1,6,0,1>, <0,2,1,6>
+ 3298345212U, // <1,0,6,3>: Cost 4 vmrghw <1,6,0,7>, <0,3,1,0>
+ 2224972114U, // <1,0,6,4>: Cost 3 vmrghw <1,6,5,7>, <0,4,1,5>
+ 3808604907U, // <1,0,6,5>: Cost 4 vsldoi8 <u,4,1,0>, <6,5,7,1>
+ 3799978808U, // <1,0,6,6>: Cost 4 vsldoi8 <7,0,1,0>, <6,6,6,6>
+ 2726237006U, // <1,0,6,7>: Cost 3 vsldoi8 <7,0,1,0>, <6,7,0,1>
+ 2224677522U, // <1,0,6,u>: Cost 3 vmrghw <1,6,1,7>, <0,u,1,1>
+ 2726237176U, // <1,0,7,0>: Cost 3 vsldoi8 <7,0,1,0>, <7,0,1,0>
+ 2285815462U, // <1,0,7,1>: Cost 3 vmrglw <0,6,1,7>, <2,3,0,1>
+ 3805951193U, // <1,0,7,2>: Cost 4 vsldoi8 <u,0,1,0>, <7,2,u,0>
+ 3807941859U, // <1,0,7,3>: Cost 4 vsldoi8 <u,3,1,0>, <7,3,0,1>
+ 3799979366U, // <1,0,7,4>: Cost 4 vsldoi8 <7,0,1,0>, <7,4,5,6>
+ 3803297165U, // <1,0,7,5>: Cost 4 vsldoi8 <7,5,1,0>, <7,5,1,0>
+ 3799979540U, // <1,0,7,6>: Cost 4 vsldoi8 <7,0,1,0>, <7,6,7,0>
+ 3799979628U, // <1,0,7,7>: Cost 4 vsldoi8 <7,0,1,0>, <7,7,7,7>
+ 2731546240U, // <1,0,7,u>: Cost 3 vsldoi8 <7,u,1,0>, <7,u,1,0>
+ 2284494848U, // <1,0,u,0>: Cost 3 vmrglw <0,4,1,u>, <0,0,0,0>
+ 1683112594U, // <1,0,u,1>: Cost 2 vsldoi12 <0,u,1,1>, <0,u,1,1>
+ 1683112605U, // <1,0,u,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 2734200772U, // <1,0,u,3>: Cost 3 vsldoi8 <u,3,1,0>, <u,3,1,0>
+ 2757075629U, // <1,0,u,4>: Cost 3 vsldoi12 <0,u,4,1>, <0,u,4,1>
+ 2686425242U, // <1,0,u,5>: Cost 3 vsldoi8 <0,3,1,0>, RHS
+ 2791948430U, // <1,0,u,6>: Cost 3 vsldoi12 <6,7,0,1>, <0,1,6,7>
+ 2736855304U, // <1,0,u,7>: Cost 3 vsldoi8 <u,7,1,0>, <u,7,1,0>
+ 1683112659U, // <1,0,u,u>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 1610694666U, // <1,1,0,0>: Cost 2 vsldoi8 <0,0,1,1>, <0,0,1,1>
+ 1616003174U, // <1,1,0,1>: Cost 2 vsldoi8 <0,u,1,1>, LHS
+ 2283767958U, // <1,1,0,2>: Cost 3 vmrglw <0,3,1,0>, <3,0,1,2>
+ 3357507596U, // <1,1,0,3>: Cost 4 vmrglw <0,3,1,0>, <0,0,1,3>
+ 2689745234U, // <1,1,0,4>: Cost 3 vsldoi8 <0,u,1,1>, <0,4,1,5>
+ 3357507922U, // <1,1,0,5>: Cost 4 vmrglw <0,3,1,0>, <0,4,1,5>
+ 3294397647U, // <1,1,0,6>: Cost 4 vmrghw <1,0,1,2>, <1,6,1,7>
+ 3373433334U, // <1,1,0,7>: Cost 4 vmrglw <3,0,1,0>, <0,6,1,7>
+ 1616003730U, // <1,1,0,u>: Cost 2 vsldoi8 <0,u,1,1>, <0,u,1,1>
+ 1550221414U, // <1,1,1,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS
+ 269271142U, // <1,1,1,1>: Cost 1 vspltisw1 LHS
+ 2287093910U, // <1,1,1,2>: Cost 3 vmrglw <0,u,1,1>, <3,0,1,2>
+ 2287092615U, // <1,1,1,3>: Cost 3 vmrglw <0,u,1,1>, <1,2,1,3>
+ 1550224694U, // <1,1,1,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS
+ 2287092050U, // <1,1,1,5>: Cost 3 vmrglw <0,u,1,1>, <0,4,1,5>
+ 2689746127U, // <1,1,1,6>: Cost 3 vsldoi8 <0,u,1,1>, <1,6,1,7>
+ 2659800138U, // <1,1,1,7>: Cost 3 vsldoi4 <7,1,1,1>, <7,1,1,1>
+ 269271142U, // <1,1,1,u>: Cost 1 vspltisw1 LHS
+ 2222113516U, // <1,1,2,0>: Cost 3 vmrghw <1,2,3,0>, <1,0,2,1>
+ 2756854663U, // <1,1,2,1>: Cost 3 vsldoi12 <0,u,1,1>, <1,2,1,3>
+ 1148371862U, // <1,1,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 2689746598U, // <1,1,2,3>: Cost 3 vsldoi8 <0,u,1,1>, <2,3,0,1>
+ 2618002742U, // <1,1,2,4>: Cost 3 vsldoi4 <0,1,1,2>, RHS
+ 2299707730U, // <1,1,2,5>: Cost 3 vmrglw <3,0,1,2>, <0,4,1,5>
+ 2689746874U, // <1,1,2,6>: Cost 3 vsldoi8 <0,u,1,1>, <2,6,3,7>
+ 3361506511U, // <1,1,2,7>: Cost 4 vmrglw <1,0,1,2>, <1,6,1,7>
+ 1148371862U, // <1,1,2,u>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 2689747094U, // <1,1,3,0>: Cost 3 vsldoi8 <0,u,1,1>, <3,0,1,2>
+ 2691074278U, // <1,1,3,1>: Cost 3 vsldoi8 <1,1,1,1>, <3,1,1,1>
+ 3356870806U, // <1,1,3,2>: Cost 4 vmrglw <0,2,1,3>, <3,0,1,2>
+ 2283126958U, // <1,1,3,3>: Cost 3 vmrglw <0,2,1,3>, <0,2,1,3>
+ 2689747458U, // <1,1,3,4>: Cost 3 vsldoi8 <0,u,1,1>, <3,4,5,6>
+ 3356868946U, // <1,1,3,5>: Cost 4 vmrglw <0,2,1,3>, <0,4,1,5>
+ 3811265144U, // <1,1,3,6>: Cost 4 vsldoi8 <u,u,1,1>, <3,6,0,7>
+ 3362841807U, // <1,1,3,7>: Cost 4 vmrglw <1,2,1,3>, <1,6,1,7>
+ 2689747742U, // <1,1,3,u>: Cost 3 vsldoi8 <0,u,1,1>, <3,u,1,2>
+ 2623987814U, // <1,1,4,0>: Cost 3 vsldoi4 <1,1,1,4>, LHS
+ 2758181931U, // <1,1,4,1>: Cost 3 vsldoi12 <1,1,1,1>, <1,4,1,5>
+ 2223408022U, // <1,1,4,2>: Cost 3 vmrghw <1,4,2,5>, <1,2,3,0>
+ 3697731734U, // <1,1,4,3>: Cost 4 vsldoi4 <1,1,1,4>, <3,0,1,2>
+ 2283798784U, // <1,1,4,4>: Cost 3 vmrglw <0,3,1,4>, <0,3,1,4>
+ 1616006454U, // <1,1,4,5>: Cost 2 vsldoi8 <0,u,1,1>, RHS
+ 3297379535U, // <1,1,4,6>: Cost 4 vmrghw <1,4,5,6>, <1,6,1,7>
+ 3373466102U, // <1,1,4,7>: Cost 4 vmrglw <3,0,1,4>, <0,6,1,7>
+ 1616006697U, // <1,1,4,u>: Cost 2 vsldoi8 <0,u,1,1>, RHS
+ 2760762479U, // <1,1,5,0>: Cost 3 vsldoi12 <1,5,0,1>, <1,5,0,1>
+ 2284470282U, // <1,1,5,1>: Cost 3 vmrglw <0,4,1,5>, <0,0,1,1>
+ 2284472470U, // <1,1,5,2>: Cost 3 vmrglw <0,4,1,5>, <3,0,1,2>
+ 3358212270U, // <1,1,5,3>: Cost 4 vmrglw <0,4,1,5>, <0,2,1,3>
+ 2284470285U, // <1,1,5,4>: Cost 3 vmrglw <0,4,1,5>, <0,0,1,4>
+ 1210728786U, // <1,1,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+ 2737524834U, // <1,1,5,6>: Cost 3 vsldoi8 <u,u,1,1>, <5,6,7,0>
+ 3360867535U, // <1,1,5,7>: Cost 4 vmrglw <0,u,1,5>, <1,6,1,7>
+ 1210728786U, // <1,1,5,u>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+ 3697746022U, // <1,1,6,0>: Cost 4 vsldoi4 <1,1,1,6>, LHS
+ 2756854991U, // <1,1,6,1>: Cost 3 vsldoi12 <0,u,1,1>, <1,6,1,7>
+ 2737525242U, // <1,1,6,2>: Cost 3 vsldoi8 <u,u,1,1>, <6,2,7,3>
+ 3839149281U, // <1,1,6,3>: Cost 4 vsldoi12 <2,3,0,1>, <1,6,3,7>
+ 3697749302U, // <1,1,6,4>: Cost 4 vsldoi4 <1,1,1,6>, RHS
+ 3356893522U, // <1,1,6,5>: Cost 4 vmrglw <0,2,1,6>, <0,4,1,5>
+ 2283151537U, // <1,1,6,6>: Cost 3 vmrglw <0,2,1,6>, <0,2,1,6>
+ 2791949566U, // <1,1,6,7>: Cost 3 vsldoi12 <6,7,0,1>, <1,6,7,0>
+ 2792613127U, // <1,1,6,u>: Cost 3 vsldoi12 <6,u,0,1>, <1,6,u,0>
+ 2737525754U, // <1,1,7,0>: Cost 3 vsldoi8 <u,u,1,1>, <7,0,1,2>
+ 2291786386U, // <1,1,7,1>: Cost 3 vmrglw <1,6,1,7>, <0,u,1,1>
+ 3365528292U, // <1,1,7,2>: Cost 4 vmrglw <1,6,1,7>, <1,0,1,2>
+ 3365528455U, // <1,1,7,3>: Cost 4 vmrglw <1,6,1,7>, <1,2,1,3>
+ 2737526118U, // <1,1,7,4>: Cost 3 vsldoi8 <u,u,1,1>, <7,4,5,6>
+ 3365527890U, // <1,1,7,5>: Cost 4 vmrglw <1,6,1,7>, <0,4,1,5>
+ 3365528377U, // <1,1,7,6>: Cost 4 vmrglw <1,6,1,7>, <1,1,1,6>
+ 2291786959U, // <1,1,7,7>: Cost 3 vmrglw <1,6,1,7>, <1,6,1,7>
+ 2737526402U, // <1,1,7,u>: Cost 3 vsldoi8 <u,u,1,1>, <7,u,1,2>
+ 1550221414U, // <1,1,u,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS
+ 269271142U, // <1,1,u,1>: Cost 1 vspltisw1 LHS
+ 1148371862U, // <1,1,u,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 2689750972U, // <1,1,u,3>: Cost 3 vsldoi8 <0,u,1,1>, <u,3,0,1>
+ 1550224694U, // <1,1,u,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS
+ 1616009370U, // <1,1,u,5>: Cost 2 vsldoi8 <0,u,1,1>, RHS
+ 2689751248U, // <1,1,u,6>: Cost 3 vsldoi8 <0,u,1,1>, <u,6,3,7>
+ 2736863497U, // <1,1,u,7>: Cost 3 vsldoi8 <u,7,1,1>, <u,7,1,1>
+ 269271142U, // <1,1,u,u>: Cost 1 vspltisw1 LHS
+ 2702360576U, // <1,2,0,0>: Cost 3 vsldoi8 <3,0,1,2>, <0,0,0,0>
+ 1628618854U, // <1,2,0,1>: Cost 2 vsldoi8 <3,0,1,2>, LHS
+ 2685771949U, // <1,2,0,2>: Cost 3 vsldoi8 <0,2,1,2>, <0,2,1,2>
+ 2283765862U, // <1,2,0,3>: Cost 3 vmrglw <0,3,1,0>, LHS
+ 2702360914U, // <1,2,0,4>: Cost 3 vsldoi8 <3,0,1,2>, <0,4,1,5>
+ 3788046813U, // <1,2,0,5>: Cost 4 vsldoi8 <5,0,1,2>, <0,5,u,0>
+ 2688426481U, // <1,2,0,6>: Cost 3 vsldoi8 <0,6,1,2>, <0,6,1,2>
+ 2726249024U, // <1,2,0,7>: Cost 3 vsldoi8 <7,0,1,2>, <0,7,1,0>
+ 1628619421U, // <1,2,0,u>: Cost 2 vsldoi8 <3,0,1,2>, LHS
+ 2690417380U, // <1,2,1,0>: Cost 3 vsldoi8 <1,0,1,2>, <1,0,1,2>
+ 2702361396U, // <1,2,1,1>: Cost 3 vsldoi8 <3,0,1,2>, <1,1,1,1>
+ 2287093352U, // <1,2,1,2>: Cost 3 vmrglw <0,u,1,1>, <2,2,2,2>
+ 1213349990U, // <1,2,1,3>: Cost 2 vmrglw <0,u,1,1>, LHS
+ 3764159522U, // <1,2,1,4>: Cost 4 vsldoi8 <1,0,1,2>, <1,4,0,5>
+ 3295053672U, // <1,2,1,5>: Cost 4 vmrghw <1,1,1,1>, <2,5,3,6>
+ 2221311930U, // <1,2,1,6>: Cost 3 vmrghw <1,1,1,1>, <2,6,3,7>
+ 3799991593U, // <1,2,1,7>: Cost 4 vsldoi8 <7,0,1,2>, <1,7,2,7>
+ 1213349995U, // <1,2,1,u>: Cost 2 vmrglw <0,u,1,1>, LHS
+ 2624045158U, // <1,2,2,0>: Cost 3 vsldoi4 <1,1,2,2>, LHS
+ 2702362144U, // <1,2,2,1>: Cost 3 vsldoi8 <3,0,1,2>, <2,1,3,2>
+ 2283120232U, // <1,2,2,2>: Cost 3 vmrglw <0,2,1,2>, <2,2,2,2>
+ 1225965670U, // <1,2,2,3>: Cost 2 vmrglw <3,0,1,2>, LHS
+ 2624048438U, // <1,2,2,4>: Cost 3 vsldoi4 <1,1,2,2>, RHS
+ 3356860763U, // <1,2,2,5>: Cost 4 vmrglw <0,2,1,2>, <0,4,2,5>
+ 2222114746U, // <1,2,2,6>: Cost 3 vmrghw <1,2,3,0>, <2,6,3,7>
+ 2299708632U, // <1,2,2,7>: Cost 3 vmrglw <3,0,1,2>, <1,6,2,7>
+ 1225965675U, // <1,2,2,u>: Cost 2 vmrglw <3,0,1,2>, LHS
+ 470597734U, // <1,2,3,0>: Cost 1 vsldoi4 LHS, LHS
+ 1544340276U, // <1,2,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 1544341096U, // <1,2,3,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+ 1544341916U, // <1,2,3,3>: Cost 2 vsldoi4 LHS, <3,3,3,3>
+ 470601014U, // <1,2,3,4>: Cost 1 vsldoi4 LHS, RHS
+ 1592119300U, // <1,2,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5>
+ 1592119802U, // <1,2,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+ 1592120314U, // <1,2,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 470603566U, // <1,2,3,u>: Cost 1 vsldoi4 LHS, LHS
+ 2708335471U, // <1,2,4,0>: Cost 3 vsldoi8 <4,0,1,2>, <4,0,1,2>
+ 3838043908U, // <1,2,4,1>: Cost 4 vsldoi12 <2,1,3,1>, <2,4,1,5>
+ 3357541992U, // <1,2,4,2>: Cost 4 vmrglw <0,3,1,4>, <2,2,2,2>
+ 2283798630U, // <1,2,4,3>: Cost 3 vmrglw <0,3,1,4>, LHS
+ 2726251728U, // <1,2,4,4>: Cost 3 vsldoi8 <7,0,1,2>, <4,4,4,4>
+ 1628622134U, // <1,2,4,5>: Cost 2 vsldoi8 <3,0,1,2>, RHS
+ 3297077178U, // <1,2,4,6>: Cost 4 vmrghw <1,4,1,5>, <2,6,3,7>
+ 2726251976U, // <1,2,4,7>: Cost 3 vsldoi8 <7,0,1,2>, <4,7,5,0>
+ 1628622377U, // <1,2,4,u>: Cost 2 vsldoi8 <3,0,1,2>, RHS
+ 2714308168U, // <1,2,5,0>: Cost 3 vsldoi8 <5,0,1,2>, <5,0,1,2>
+ 3297633827U, // <1,2,5,1>: Cost 4 vmrghw <1,5,0,1>, <2,1,3,5>
+ 2284471912U, // <1,2,5,2>: Cost 3 vmrglw <0,4,1,5>, <2,2,2,2>
+ 1210728550U, // <1,2,5,3>: Cost 2 vmrglw <0,4,1,5>, LHS
+ 3776106420U, // <1,2,5,4>: Cost 4 vsldoi8 <3,0,1,2>, <5,4,5,6>
+ 2726252548U, // <1,2,5,5>: Cost 3 vsldoi8 <7,0,1,2>, <5,5,5,5>
+ 2726252642U, // <1,2,5,6>: Cost 3 vsldoi8 <7,0,1,2>, <5,6,7,0>
+ 3799994538U, // <1,2,5,7>: Cost 4 vsldoi8 <7,0,1,2>, <5,7,6,0>
+ 1210728555U, // <1,2,5,u>: Cost 2 vmrglw <0,4,1,5>, LHS
+ 2720280865U, // <1,2,6,0>: Cost 3 vsldoi8 <6,0,1,2>, <6,0,1,2>
+ 2702365096U, // <1,2,6,1>: Cost 3 vsldoi8 <3,0,1,2>, <6,1,7,2>
+ 2726253050U, // <1,2,6,2>: Cost 3 vsldoi8 <7,0,1,2>, <6,2,7,3>
+ 2283151462U, // <1,2,6,3>: Cost 3 vmrglw <0,2,1,6>, LHS
+ 3697823030U, // <1,2,6,4>: Cost 4 vsldoi4 <1,1,2,6>, RHS
+ 3298715497U, // <1,2,6,5>: Cost 4 vmrghw <1,6,5,7>, <2,5,3,7>
+ 2726253368U, // <1,2,6,6>: Cost 3 vsldoi8 <7,0,1,2>, <6,6,6,6>
+ 2724926296U, // <1,2,6,7>: Cost 3 vsldoi8 <6,7,1,2>, <6,7,1,2>
+ 2283151467U, // <1,2,6,u>: Cost 3 vmrglw <0,2,1,6>, LHS
+ 1652511738U, // <1,2,7,0>: Cost 2 vsldoi8 <7,0,1,2>, <7,0,1,2>
+ 3371500916U, // <1,2,7,1>: Cost 4 vmrglw <2,6,1,7>, <1,u,2,1>
+ 3365529192U, // <1,2,7,2>: Cost 4 vmrglw <1,6,1,7>, <2,2,2,2>
+ 2291785830U, // <1,2,7,3>: Cost 3 vmrglw <1,6,1,7>, LHS
+ 2726253926U, // <1,2,7,4>: Cost 3 vsldoi8 <7,0,1,2>, <7,4,5,6>
+ 3788051845U, // <1,2,7,5>: Cost 4 vsldoi8 <5,0,1,2>, <7,5,0,1>
+ 3794023894U, // <1,2,7,6>: Cost 4 vsldoi8 <6,0,1,2>, <7,6,0,1>
+ 2726254119U, // <1,2,7,7>: Cost 3 vsldoi8 <7,0,1,2>, <7,7,0,1>
+ 1657820802U, // <1,2,7,u>: Cost 2 vsldoi8 <7,u,1,2>, <7,u,1,2>
+ 470638699U, // <1,2,u,0>: Cost 1 vsldoi4 LHS, LHS
+ 1544381236U, // <1,2,u,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 1544382056U, // <1,2,u,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+ 1544382614U, // <1,2,u,3>: Cost 2 vsldoi4 LHS, <3,0,1,2>
+ 470641974U, // <1,2,u,4>: Cost 1 vsldoi4 LHS, RHS
+ 1628625050U, // <1,2,u,5>: Cost 2 vsldoi8 <3,0,1,2>, RHS
+ 1592160762U, // <1,2,u,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+ 1592161274U, // <1,2,u,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 470644526U, // <1,2,u,u>: Cost 1 vsldoi4 LHS, LHS
+ 2769389708U, // <1,3,0,0>: Cost 3 vsldoi12 <3,0,0,1>, <3,0,0,1>
+ 2685780070U, // <1,3,0,1>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+ 2685780142U, // <1,3,0,2>: Cost 3 vsldoi8 <0,2,1,3>, <0,2,1,3>
+ 2686443775U, // <1,3,0,3>: Cost 3 vsldoi8 <0,3,1,3>, <0,3,1,3>
+ 2769684656U, // <1,3,0,4>: Cost 3 vsldoi12 <3,0,4,1>, <3,0,4,1>
+ 3357507940U, // <1,3,0,5>: Cost 4 vmrglw <0,3,1,0>, <0,4,3,5>
+ 3759522294U, // <1,3,0,6>: Cost 4 vsldoi8 <0,2,1,3>, <0,6,1,7>
+ 3357509562U, // <1,3,0,7>: Cost 4 vmrglw <0,3,1,0>, <2,6,3,7>
+ 2685780637U, // <1,3,0,u>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+ 2287092630U, // <1,3,1,0>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,0>
+ 2221312230U, // <1,3,1,1>: Cost 3 vmrghw <1,1,1,1>, <3,1,1,1>
+ 2691752839U, // <1,3,1,2>: Cost 3 vsldoi8 <1,2,1,3>, <1,2,1,3>
+ 2287093362U, // <1,3,1,3>: Cost 3 vmrglw <0,u,1,1>, <2,2,3,3>
+ 2287092634U, // <1,3,1,4>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,4>
+ 3360835107U, // <1,3,1,5>: Cost 4 vmrglw <0,u,1,1>, <2,1,3,5>
+ 3759523041U, // <1,3,1,6>: Cost 4 vsldoi8 <0,2,1,3>, <1,6,3,7>
+ 2287093690U, // <1,3,1,7>: Cost 3 vmrglw <0,u,1,1>, <2,6,3,7>
+ 2287092638U, // <1,3,1,u>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,u>
+ 2222114966U, // <1,3,2,0>: Cost 3 vmrghw <1,2,3,0>, <3,0,1,2>
+ 2222115057U, // <1,3,2,1>: Cost 3 vmrghw <1,2,3,0>, <3,1,2,3>
+ 2630092320U, // <1,3,2,2>: Cost 3 vsldoi4 <2,1,3,2>, <2,1,3,2>
+ 2685781670U, // <1,3,2,3>: Cost 3 vsldoi8 <0,2,1,3>, <2,3,0,1>
+ 2222115330U, // <1,3,2,4>: Cost 3 vmrghw <1,2,3,0>, <3,4,5,6>
+ 3373449572U, // <1,3,2,5>: Cost 4 vmrglw <3,0,1,2>, <0,4,3,5>
+ 2222115448U, // <1,3,2,6>: Cost 3 vmrghw <1,2,3,0>, <3,6,0,7>
+ 2299709370U, // <1,3,2,7>: Cost 3 vmrglw <3,0,1,2>, <2,6,3,7>
+ 2222115614U, // <1,3,2,u>: Cost 3 vmrghw <1,2,3,0>, <3,u,1,2>
+ 2771380607U, // <1,3,3,0>: Cost 3 vsldoi12 <3,3,0,1>, <3,3,0,1>
+ 3356874468U, // <1,3,3,1>: Cost 4 vmrglw <0,2,1,3>, <u,0,3,1>
+ 3759524168U, // <1,3,3,2>: Cost 4 vsldoi8 <0,2,1,3>, <3,2,3,0>
+ 2283792796U, // <1,3,3,3>: Cost 3 vmrglw <0,3,1,3>, <3,3,3,3>
+ 3356869530U, // <1,3,3,4>: Cost 4 vmrglw <0,2,1,3>, <1,2,3,4>
+ 3721760428U, // <1,3,3,5>: Cost 4 vsldoi4 <5,1,3,3>, <5,1,3,3>
+ 3296496248U, // <1,3,3,6>: Cost 4 vmrghw <1,3,2,6>, <3,6,0,7>
+ 3356870586U, // <1,3,3,7>: Cost 4 vmrglw <0,2,1,3>, <2,6,3,7>
+ 2771970503U, // <1,3,3,u>: Cost 3 vsldoi12 <3,3,u,1>, <3,3,u,1>
+ 2772044240U, // <1,3,4,0>: Cost 3 vsldoi12 <3,4,0,1>, <3,4,0,1>
+ 3362186135U, // <1,3,4,1>: Cost 4 vmrglw <1,1,1,4>, <1,2,3,1>
+ 3297151280U, // <1,3,4,2>: Cost 4 vmrghw <1,4,2,5>, <3,2,0,3>
+ 3357542002U, // <1,3,4,3>: Cost 4 vmrglw <0,3,1,4>, <2,2,3,3>
+ 3357540626U, // <1,3,4,4>: Cost 4 vmrglw <0,3,1,4>, <0,3,3,4>
+ 2685783350U, // <1,3,4,5>: Cost 3 vsldoi8 <0,2,1,3>, RHS
+ 3357546622U, // <1,3,4,6>: Cost 4 vmrglw <0,3,1,4>, <u,5,3,6>
+ 3357542330U, // <1,3,4,7>: Cost 4 vmrglw <0,3,1,4>, <2,6,3,7>
+ 2685783593U, // <1,3,4,u>: Cost 3 vsldoi8 <0,2,1,3>, RHS
+ 2284471190U, // <1,3,5,0>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,0>
+ 3358213015U, // <1,3,5,1>: Cost 4 vmrglw <0,4,1,5>, <1,2,3,1>
+ 2630116899U, // <1,3,5,2>: Cost 3 vsldoi4 <2,1,3,5>, <2,1,3,5>
+ 2284471922U, // <1,3,5,3>: Cost 3 vmrglw <0,4,1,5>, <2,2,3,3>
+ 2284471194U, // <1,3,5,4>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,4>
+ 2284471843U, // <1,3,5,5>: Cost 3 vmrglw <0,4,1,5>, <2,1,3,5>
+ 3358218366U, // <1,3,5,6>: Cost 4 vmrglw <0,4,1,5>, <u,5,3,6>
+ 2284472250U, // <1,3,5,7>: Cost 3 vmrglw <0,4,1,5>, <2,6,3,7>
+ 2284471198U, // <1,3,5,u>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,u>
+ 2224752790U, // <1,3,6,0>: Cost 3 vmrghw <1,6,2,7>, <3,0,1,2>
+ 3832736385U, // <1,3,6,1>: Cost 4 vsldoi12 <1,2,3,1>, <3,6,1,7>
+ 3703866916U, // <1,3,6,2>: Cost 4 vsldoi4 <2,1,3,6>, <2,1,3,6>
+ 3356894834U, // <1,3,6,3>: Cost 4 vmrglw <0,2,1,6>, <2,2,3,3>
+ 3356894106U, // <1,3,6,4>: Cost 4 vmrglw <0,2,1,6>, <1,2,3,4>
+ 3356894755U, // <1,3,6,5>: Cost 5 vmrglw <0,2,1,6>, <2,1,3,5>
+ 3356899130U, // <1,3,6,6>: Cost 4 vmrglw <0,2,1,6>, <u,1,3,6>
+ 2283153338U, // <1,3,6,7>: Cost 3 vmrglw <0,2,1,6>, <2,6,3,7>
+ 2283153338U, // <1,3,6,u>: Cost 3 vmrglw <0,2,1,6>, <2,6,3,7>
+ 2774035139U, // <1,3,7,0>: Cost 3 vsldoi12 <3,7,0,1>, <3,7,0,1>
+ 3703874767U, // <1,3,7,1>: Cost 4 vsldoi4 <2,1,3,7>, <1,6,1,7>
+ 3703875109U, // <1,3,7,2>: Cost 4 vsldoi4 <2,1,3,7>, <2,1,3,7>
+ 3365529202U, // <1,3,7,3>: Cost 4 vmrglw <1,6,1,7>, <2,2,3,3>
+ 3365528474U, // <1,3,7,4>: Cost 4 vmrglw <1,6,1,7>, <1,2,3,4>
+ 3789387159U, // <1,3,7,5>: Cost 4 vsldoi8 <5,2,1,3>, <7,5,2,1>
+ 3865692927U, // <1,3,7,6>: Cost 4 vsldoi12 <6,7,0,1>, <3,7,6,7>
+ 3363538874U, // <1,3,7,7>: Cost 4 vmrglw <1,3,1,7>, <2,6,3,7>
+ 2774625035U, // <1,3,7,u>: Cost 3 vsldoi12 <3,7,u,1>, <3,7,u,1>
+ 2284495766U, // <1,3,u,0>: Cost 3 vmrglw <0,4,1,u>, <1,2,3,0>
+ 2685785902U, // <1,3,u,1>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+ 2630141478U, // <1,3,u,2>: Cost 3 vsldoi4 <2,1,3,u>, <2,1,3,u>
+ 2283169880U, // <1,3,u,3>: Cost 3 vmrglw <0,2,1,u>, <2,u,3,3>
+ 2284495770U, // <1,3,u,4>: Cost 3 vmrglw <0,4,1,u>, <1,2,3,4>
+ 2685786266U, // <1,3,u,5>: Cost 3 vsldoi8 <0,2,1,3>, RHS
+ 2222115448U, // <1,3,u,6>: Cost 3 vmrghw <1,2,3,0>, <3,6,0,7>
+ 2284496826U, // <1,3,u,7>: Cost 3 vmrglw <0,4,1,u>, <2,6,3,7>
+ 2685786469U, // <1,3,u,u>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+ 2684461069U, // <1,4,0,0>: Cost 3 vsldoi8 <0,0,1,4>, <0,0,1,4>
+ 2686451814U, // <1,4,0,1>: Cost 3 vsldoi8 <0,3,1,4>, LHS
+ 3759530159U, // <1,4,0,2>: Cost 4 vsldoi8 <0,2,1,4>, <0,2,1,4>
+ 2686451968U, // <1,4,0,3>: Cost 3 vsldoi8 <0,3,1,4>, <0,3,1,4>
+ 2684461394U, // <1,4,0,4>: Cost 3 vsldoi8 <0,0,1,4>, <0,4,1,5>
+ 1701989266U, // <1,4,0,5>: Cost 2 vsldoi12 <4,0,5,1>, <4,0,5,1>
+ 3776119286U, // <1,4,0,6>: Cost 4 vsldoi8 <3,0,1,4>, <0,6,1,7>
+ 2689106500U, // <1,4,0,7>: Cost 3 vsldoi8 <0,7,1,4>, <0,7,1,4>
+ 1702210477U, // <1,4,0,u>: Cost 2 vsldoi12 <4,0,u,1>, <4,0,u,1>
+ 2221312914U, // <1,4,1,0>: Cost 3 vmrghw <1,1,1,1>, <4,0,5,1>
+ 2691097399U, // <1,4,1,1>: Cost 3 vsldoi8 <1,1,1,4>, <1,1,1,4>
+ 3760194454U, // <1,4,1,2>: Cost 4 vsldoi8 <0,3,1,4>, <1,2,3,0>
+ 3766166489U, // <1,4,1,3>: Cost 4 vsldoi8 <1,3,1,4>, <1,3,1,4>
+ 2334870736U, // <1,4,1,4>: Cost 3 vmrglw <u,u,1,1>, <4,4,4,4>
+ 1147571510U, // <1,4,1,5>: Cost 2 vmrghw <1,1,1,1>, RHS
+ 3760194794U, // <1,4,1,6>: Cost 4 vsldoi8 <0,3,1,4>, <1,6,4,7>
+ 3867315188U, // <1,4,1,7>: Cost 4 vsldoi12 <7,0,4,1>, <4,1,7,0>
+ 1147571753U, // <1,4,1,u>: Cost 2 vmrghw <1,1,1,1>, RHS
+ 2222115730U, // <1,4,2,0>: Cost 3 vmrghw <1,2,3,0>, <4,0,5,1>
+ 2222115812U, // <1,4,2,1>: Cost 3 vmrghw <1,2,3,0>, <4,1,5,2>
+ 3760195176U, // <1,4,2,2>: Cost 4 vsldoi8 <0,3,1,4>, <2,2,2,2>
+ 2702378662U, // <1,4,2,3>: Cost 3 vsldoi8 <3,0,1,4>, <2,3,0,1>
+ 2323598544U, // <1,4,2,4>: Cost 3 vmrglw <7,0,1,2>, <4,4,4,4>
+ 1148374326U, // <1,4,2,5>: Cost 2 vmrghw <1,2,3,0>, RHS
+ 3760195514U, // <1,4,2,6>: Cost 4 vsldoi8 <0,3,1,4>, <2,6,3,7>
+ 3373451932U, // <1,4,2,7>: Cost 4 vmrglw <3,0,1,2>, <3,6,4,7>
+ 1148374569U, // <1,4,2,u>: Cost 2 vmrghw <1,2,3,0>, RHS
+ 2702379160U, // <1,4,3,0>: Cost 3 vsldoi8 <3,0,1,4>, <3,0,1,4>
+ 3760195840U, // <1,4,3,1>: Cost 4 vsldoi8 <0,3,1,4>, <3,1,4,0>
+ 3776121160U, // <1,4,3,2>: Cost 4 vsldoi8 <3,0,1,4>, <3,2,3,0>
+ 3760195996U, // <1,4,3,3>: Cost 4 vsldoi8 <0,3,1,4>, <3,3,3,3>
+ 2686454274U, // <1,4,3,4>: Cost 3 vsldoi8 <0,3,1,4>, <3,4,5,6>
+ 3356870350U, // <1,4,3,5>: Cost 4 vmrglw <0,2,1,3>, <2,3,4,5>
+ 3800009392U, // <1,4,3,6>: Cost 4 vsldoi8 <7,0,1,4>, <3,6,7,0>
+ 3366824604U, // <1,4,3,7>: Cost 5 vmrglw <1,u,1,3>, <3,6,4,7>
+ 2707688224U, // <1,4,3,u>: Cost 3 vsldoi8 <3,u,1,4>, <3,u,1,4>
+ 2775731368U, // <1,4,4,0>: Cost 3 vsldoi12 <4,0,5,1>, <4,4,0,0>
+ 3830820018U, // <1,4,4,1>: Cost 4 vsldoi12 <0,u,4,1>, <4,4,1,1>
+ 3691980454U, // <1,4,4,2>: Cost 4 vsldoi4 <0,1,4,4>, <2,3,0,1>
+ 3357541282U, // <1,4,4,3>: Cost 4 vmrglw <0,3,1,4>, <1,2,4,3>
+ 2781039824U, // <1,4,4,4>: Cost 3 vsldoi12 <4,u,5,1>, <4,4,4,4>
+ 2686455094U, // <1,4,4,5>: Cost 3 vsldoi8 <0,3,1,4>, RHS
+ 3357541528U, // <1,4,4,6>: Cost 4 vmrglw <0,3,1,4>, <1,5,4,6>
+ 3810627020U, // <1,4,4,7>: Cost 4 vsldoi8 <u,7,1,4>, <4,7,5,4>
+ 2686455337U, // <1,4,4,u>: Cost 3 vsldoi8 <0,3,1,4>, RHS
+ 2624217190U, // <1,4,5,0>: Cost 3 vsldoi4 <1,1,4,5>, LHS
+ 2284470309U, // <1,4,5,1>: Cost 3 vmrglw <0,4,1,5>, <0,0,4,1>
+ 2618246822U, // <1,4,5,2>: Cost 3 vsldoi4 <0,1,4,5>, <2,3,0,1>
+ 3358212297U, // <1,4,5,3>: Cost 4 vmrglw <0,4,1,5>, <0,2,4,3>
+ 2284470312U, // <1,4,5,4>: Cost 3 vmrglw <0,4,1,5>, <0,0,4,4>
+ 2284470637U, // <1,4,5,5>: Cost 3 vmrglw <0,4,1,5>, <0,4,4,5>
+ 1683115318U, // <1,4,5,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 3721851898U, // <1,4,5,7>: Cost 4 vsldoi4 <5,1,4,5>, <7,0,1,2>
+ 1683115336U, // <1,4,5,u>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 3794039075U, // <1,4,6,0>: Cost 4 vsldoi8 <6,0,1,4>, <6,0,1,4>
+ 3830820186U, // <1,4,6,1>: Cost 4 vsldoi12 <0,u,4,1>, <4,6,1,7>
+ 3800011258U, // <1,4,6,2>: Cost 4 vsldoi8 <7,0,1,4>, <6,2,7,3>
+ 3807973938U, // <1,4,6,3>: Cost 4 vsldoi8 <u,3,1,4>, <6,3,4,5>
+ 3298716880U, // <1,4,6,4>: Cost 4 vmrghw <1,6,5,7>, <4,4,4,4>
+ 2224680246U, // <1,4,6,5>: Cost 3 vmrghw <1,6,1,7>, RHS
+ 3800011576U, // <1,4,6,6>: Cost 4 vsldoi8 <7,0,1,4>, <6,6,6,6>
+ 2726269774U, // <1,4,6,7>: Cost 3 vsldoi8 <7,0,1,4>, <6,7,0,1>
+ 2224680489U, // <1,4,6,u>: Cost 3 vmrghw <1,6,1,7>, RHS
+ 2726269948U, // <1,4,7,0>: Cost 3 vsldoi8 <7,0,1,4>, <7,0,1,4>
+ 3383444141U, // <1,4,7,1>: Cost 4 vmrglw <4,6,1,7>, <0,u,4,1>
+ 3805983961U, // <1,4,7,2>: Cost 4 vsldoi8 <u,0,1,4>, <7,2,u,0>
+ 3807974667U, // <1,4,7,3>: Cost 4 vsldoi8 <u,3,1,4>, <7,3,4,5>
+ 2736887142U, // <1,4,7,4>: Cost 3 vsldoi8 <u,7,1,4>, <7,4,5,6>
+ 3365528403U, // <1,4,7,5>: Cost 4 vmrglw <1,6,1,7>, <1,1,4,5>
+ 3800012308U, // <1,4,7,6>: Cost 4 vsldoi8 <7,0,1,4>, <7,6,7,0>
+ 3800012396U, // <1,4,7,7>: Cost 4 vsldoi8 <7,0,1,4>, <7,7,7,7>
+ 2731579012U, // <1,4,7,u>: Cost 3 vsldoi8 <7,u,1,4>, <7,u,1,4>
+ 2624241766U, // <1,4,u,0>: Cost 3 vsldoi4 <1,1,4,u>, LHS
+ 2686457646U, // <1,4,u,1>: Cost 3 vsldoi8 <0,3,1,4>, LHS
+ 2618271398U, // <1,4,u,2>: Cost 3 vsldoi4 <0,1,4,u>, <2,3,0,1>
+ 2734233544U, // <1,4,u,3>: Cost 3 vsldoi8 <u,3,1,4>, <u,3,1,4>
+ 2689775679U, // <1,4,u,4>: Cost 3 vsldoi8 <0,u,1,4>, <u,4,5,6>
+ 1152355638U, // <1,4,u,5>: Cost 2 vmrghw <1,u,3,0>, RHS
+ 1683115561U, // <1,4,u,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 2736888076U, // <1,4,u,7>: Cost 3 vsldoi8 <u,7,1,4>, <u,7,1,4>
+ 1683115579U, // <1,4,u,u>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 2687123456U, // <1,5,0,0>: Cost 3 vsldoi8 <0,4,1,5>, <0,0,0,0>
+ 1613381734U, // <1,5,0,1>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+ 3759538352U, // <1,5,0,2>: Cost 4 vsldoi8 <0,2,1,5>, <0,2,1,5>
+ 3760865532U, // <1,5,0,3>: Cost 4 vsldoi8 <0,4,1,5>, <0,3,1,0>
+ 1613381970U, // <1,5,0,4>: Cost 2 vsldoi8 <0,4,1,5>, <0,4,1,5>
+ 2687787427U, // <1,5,0,5>: Cost 3 vsldoi8 <0,5,1,5>, <0,5,1,5>
+ 2781777524U, // <1,5,0,6>: Cost 3 vsldoi12 <5,0,6,1>, <5,0,6,1>
+ 3733828717U, // <1,5,0,7>: Cost 4 vsldoi4 <7,1,5,0>, <7,1,5,0>
+ 1613382301U, // <1,5,0,u>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+ 2781040271U, // <1,5,1,0>: Cost 3 vsldoi12 <4,u,5,1>, <5,1,0,1>
+ 2687124276U, // <1,5,1,1>: Cost 3 vsldoi8 <0,4,1,5>, <1,1,1,1>
+ 2687124374U, // <1,5,1,2>: Cost 3 vsldoi8 <0,4,1,5>, <1,2,3,0>
+ 3760866297U, // <1,5,1,3>: Cost 4 vsldoi8 <0,4,1,5>, <1,3,5,0>
+ 2693096491U, // <1,5,1,4>: Cost 3 vsldoi8 <1,4,1,5>, <1,4,1,5>
+ 2687124591U, // <1,5,1,5>: Cost 3 vsldoi8 <0,4,1,5>, <1,5,0,1>
+ 2687124723U, // <1,5,1,6>: Cost 3 vsldoi8 <0,4,1,5>, <1,6,5,7>
+ 3360834803U, // <1,5,1,7>: Cost 4 vmrglw <0,u,1,1>, <1,6,5,7>
+ 2687124860U, // <1,5,1,u>: Cost 3 vsldoi8 <0,4,1,5>, <1,u,3,0>
+ 2323598792U, // <1,5,2,0>: Cost 3 vmrglw <7,0,1,2>, <4,7,5,0>
+ 2687125027U, // <1,5,2,1>: Cost 3 vsldoi8 <0,4,1,5>, <2,1,3,5>
+ 2687125096U, // <1,5,2,2>: Cost 3 vsldoi8 <0,4,1,5>, <2,2,2,2>
+ 2687125158U, // <1,5,2,3>: Cost 3 vsldoi8 <0,4,1,5>, <2,3,0,1>
+ 2642185188U, // <1,5,2,4>: Cost 3 vsldoi4 <4,1,5,2>, <4,1,5,2>
+ 2323598554U, // <1,5,2,5>: Cost 3 vmrglw <7,0,1,2>, <4,4,5,5>
+ 2687125434U, // <1,5,2,6>: Cost 3 vsldoi8 <0,4,1,5>, <2,6,3,7>
+ 3373450483U, // <1,5,2,7>: Cost 4 vmrglw <3,0,1,2>, <1,6,5,7>
+ 2687125563U, // <1,5,2,u>: Cost 3 vsldoi8 <0,4,1,5>, <2,u,0,1>
+ 2687125654U, // <1,5,3,0>: Cost 3 vsldoi8 <0,4,1,5>, <3,0,1,2>
+ 2312990234U, // <1,5,3,1>: Cost 3 vmrglw <5,2,1,3>, <4,u,5,1>
+ 3760867649U, // <1,5,3,2>: Cost 4 vsldoi8 <0,4,1,5>, <3,2,2,2>
+ 2687125916U, // <1,5,3,3>: Cost 3 vsldoi8 <0,4,1,5>, <3,3,3,3>
+ 2687126018U, // <1,5,3,4>: Cost 3 vsldoi8 <0,4,1,5>, <3,4,5,6>
+ 3386731738U, // <1,5,3,5>: Cost 4 vmrglw <5,2,1,3>, <4,4,5,5>
+ 3356871170U, // <1,5,3,6>: Cost 4 vmrglw <0,2,1,3>, <3,4,5,6>
+ 3808643779U, // <1,5,3,7>: Cost 4 vsldoi8 <u,4,1,5>, <3,7,0,1>
+ 2687126302U, // <1,5,3,u>: Cost 3 vsldoi8 <0,4,1,5>, <3,u,1,2>
+ 2642198630U, // <1,5,4,0>: Cost 3 vsldoi4 <4,1,5,4>, LHS
+ 2687126498U, // <1,5,4,1>: Cost 3 vsldoi8 <0,4,1,5>, <4,1,5,0>
+ 3715941923U, // <1,5,4,2>: Cost 4 vsldoi4 <4,1,5,4>, <2,1,3,5>
+ 3709970701U, // <1,5,4,3>: Cost 4 vsldoi4 <3,1,5,4>, <3,1,5,4>
+ 2687126736U, // <1,5,4,4>: Cost 3 vsldoi8 <0,4,1,5>, <4,4,4,4>
+ 1613385014U, // <1,5,4,5>: Cost 2 vsldoi8 <0,4,1,5>, RHS
+ 2283801090U, // <1,5,4,6>: Cost 3 vmrglw <0,3,1,4>, <3,4,5,6>
+ 3733861489U, // <1,5,4,7>: Cost 4 vsldoi4 <7,1,5,4>, <7,1,5,4>
+ 1613385257U, // <1,5,4,u>: Cost 2 vsldoi8 <0,4,1,5>, RHS
+ 2624290918U, // <1,5,5,0>: Cost 3 vsldoi4 <1,1,5,5>, LHS
+ 2624291676U, // <1,5,5,1>: Cost 3 vsldoi4 <1,1,5,5>, <1,1,5,5>
+ 3698034211U, // <1,5,5,2>: Cost 4 vsldoi4 <1,1,5,5>, <2,1,3,5>
+ 2284471211U, // <1,5,5,3>: Cost 3 vmrglw <0,4,1,5>, <1,2,5,3>
+ 2624294198U, // <1,5,5,4>: Cost 3 vsldoi4 <1,1,5,5>, RHS
+ 2284471132U, // <1,5,5,5>: Cost 3 vmrglw <0,4,1,5>, <1,1,5,5>
+ 2284472834U, // <1,5,5,6>: Cost 3 vmrglw <0,4,1,5>, <3,4,5,6>
+ 2284471539U, // <1,5,5,7>: Cost 3 vmrglw <0,4,1,5>, <1,6,5,7>
+ 2284471216U, // <1,5,5,u>: Cost 3 vmrglw <0,4,1,5>, <1,2,5,u>
+ 2785316900U, // <1,5,6,0>: Cost 3 vsldoi12 <5,6,0,1>, <5,6,0,1>
+ 2781040691U, // <1,5,6,1>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,1,7>
+ 2734903802U, // <1,5,6,2>: Cost 3 vsldoi8 <u,4,1,5>, <6,2,7,3>
+ 3848736834U, // <1,5,6,3>: Cost 4 vsldoi12 <3,u,4,1>, <5,6,3,4>
+ 3298717620U, // <1,5,6,4>: Cost 4 vmrghw <1,6,5,7>, <5,4,5,6>
+ 3298717700U, // <1,5,6,5>: Cost 4 vmrghw <1,6,5,7>, <5,5,5,5>
+ 2734904120U, // <1,5,6,6>: Cost 3 vsldoi8 <u,4,1,5>, <6,6,6,6>
+ 2781040738U, // <1,5,6,7>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,7,0>
+ 2781040747U, // <1,5,6,u>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,u,0>
+ 2734904314U, // <1,5,7,0>: Cost 3 vsldoi8 <u,4,1,5>, <7,0,1,2>
+ 2315677210U, // <1,5,7,1>: Cost 3 vmrglw <5,6,1,7>, <4,u,5,1>
+ 3808646292U, // <1,5,7,2>: Cost 4 vsldoi8 <u,4,1,5>, <7,2,0,3>
+ 3808646371U, // <1,5,7,3>: Cost 4 vsldoi8 <u,4,1,5>, <7,3,0,1>
+ 2734904678U, // <1,5,7,4>: Cost 3 vsldoi8 <u,4,1,5>, <7,4,5,6>
+ 3389418714U, // <1,5,7,5>: Cost 4 vmrglw <5,6,1,7>, <4,4,5,5>
+ 3365528656U, // <1,5,7,6>: Cost 4 vmrglw <1,6,1,7>, <1,4,5,6>
+ 2734904940U, // <1,5,7,7>: Cost 3 vsldoi8 <u,4,1,5>, <7,7,7,7>
+ 2734904962U, // <1,5,7,u>: Cost 3 vsldoi8 <u,4,1,5>, <7,u,1,2>
+ 2687129299U, // <1,5,u,0>: Cost 3 vsldoi8 <0,4,1,5>, <u,0,1,2>
+ 1613387566U, // <1,5,u,1>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+ 2687129480U, // <1,5,u,2>: Cost 3 vsldoi8 <0,4,1,5>, <u,2,3,3>
+ 2687129532U, // <1,5,u,3>: Cost 3 vsldoi8 <0,4,1,5>, <u,3,0,1>
+ 1661163546U, // <1,5,u,4>: Cost 2 vsldoi8 <u,4,1,5>, <u,4,1,5>
+ 1613387930U, // <1,5,u,5>: Cost 2 vsldoi8 <0,4,1,5>, RHS
+ 2687129808U, // <1,5,u,6>: Cost 3 vsldoi8 <0,4,1,5>, <u,6,3,7>
+ 2781040900U, // <1,5,u,7>: Cost 3 vsldoi12 <4,u,5,1>, <5,u,7,0>
+ 1613388133U, // <1,5,u,u>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+ 3759546368U, // <1,6,0,0>: Cost 4 vsldoi8 <0,2,1,6>, <0,0,0,0>
+ 2685804646U, // <1,6,0,1>: Cost 3 vsldoi8 <0,2,1,6>, LHS
+ 2685804721U, // <1,6,0,2>: Cost 3 vsldoi8 <0,2,1,6>, <0,2,1,6>
+ 3861270834U, // <1,6,0,3>: Cost 4 vsldoi12 <6,0,3,1>, <6,0,3,1>
+ 3759546706U, // <1,6,0,4>: Cost 4 vsldoi8 <0,2,1,6>, <0,4,1,5>
+ 2687795620U, // <1,6,0,5>: Cost 3 vsldoi8 <0,5,1,6>, <0,5,1,6>
+ 2688459253U, // <1,6,0,6>: Cost 3 vsldoi8 <0,6,1,6>, <0,6,1,6>
+ 2283769142U, // <1,6,0,7>: Cost 3 vmrglw <0,3,1,0>, RHS
+ 2685805213U, // <1,6,0,u>: Cost 3 vsldoi8 <0,2,1,6>, LHS
+ 3698073702U, // <1,6,1,0>: Cost 4 vsldoi4 <1,1,6,1>, LHS
+ 3759547188U, // <1,6,1,1>: Cost 4 vsldoi8 <0,2,1,6>, <1,1,1,1>
+ 2221314554U, // <1,6,1,2>: Cost 3 vmrghw <1,1,1,1>, <6,2,7,3>
+ 3759547401U, // <1,6,1,3>: Cost 4 vsldoi8 <0,2,1,6>, <1,3,6,7>
+ 3698076982U, // <1,6,1,4>: Cost 4 vsldoi4 <1,1,6,1>, RHS
+ 3767510141U, // <1,6,1,5>: Cost 4 vsldoi8 <1,5,1,6>, <1,5,1,6>
+ 2334872376U, // <1,6,1,6>: Cost 3 vmrglw <u,u,1,1>, <6,6,6,6>
+ 1213353270U, // <1,6,1,7>: Cost 2 vmrglw <0,u,1,1>, RHS
+ 1213353271U, // <1,6,1,u>: Cost 2 vmrglw <0,u,1,1>, RHS
+ 3704053862U, // <1,6,2,0>: Cost 4 vsldoi4 <2,1,6,2>, LHS
+ 3759547961U, // <1,6,2,1>: Cost 4 vsldoi8 <0,2,1,6>, <2,1,6,0>
+ 2222117370U, // <1,6,2,2>: Cost 3 vmrghw <1,2,3,0>, <6,2,7,3>
+ 3759548070U, // <1,6,2,3>: Cost 4 vsldoi8 <0,2,1,6>, <2,3,0,1>
+ 3704057142U, // <1,6,2,4>: Cost 4 vsldoi4 <2,1,6,2>, RHS
+ 3373451057U, // <1,6,2,5>: Cost 4 vmrglw <3,0,1,2>, <2,4,6,5>
+ 2685806522U, // <1,6,2,6>: Cost 3 vsldoi8 <0,2,1,6>, <2,6,3,7>
+ 1225968950U, // <1,6,2,7>: Cost 2 vmrglw <3,0,1,2>, RHS
+ 1225968951U, // <1,6,2,u>: Cost 2 vmrglw <3,0,1,2>, RHS
+ 3759548566U, // <1,6,3,0>: Cost 4 vsldoi8 <0,2,1,6>, <3,0,1,2>
+ 3842912793U, // <1,6,3,1>: Cost 4 vsldoi12 <2,u,6,1>, <6,3,1,7>
+ 3759548774U, // <1,6,3,2>: Cost 4 vsldoi8 <0,2,1,6>, <3,2,6,3>
+ 3759548828U, // <1,6,3,3>: Cost 4 vsldoi8 <0,2,1,6>, <3,3,3,3>
+ 3759548930U, // <1,6,3,4>: Cost 4 vsldoi8 <0,2,1,6>, <3,4,5,6>
+ 3809315421U, // <1,6,3,5>: Cost 4 vsldoi8 <u,5,1,6>, <3,5,6,7>
+ 3386733368U, // <1,6,3,6>: Cost 4 vmrglw <5,2,1,3>, <6,6,6,6>
+ 2283130166U, // <1,6,3,7>: Cost 3 vmrglw <0,2,1,3>, RHS
+ 2283130167U, // <1,6,3,u>: Cost 3 vmrglw <0,2,1,3>, RHS
+ 3704070246U, // <1,6,4,0>: Cost 4 vsldoi4 <2,1,6,4>, LHS
+ 3862229608U, // <1,6,4,1>: Cost 4 vsldoi12 <6,1,7,1>, <6,4,1,5>
+ 3704071741U, // <1,6,4,2>: Cost 4 vsldoi4 <2,1,6,4>, <2,1,6,4>
+ 3721988610U, // <1,6,4,3>: Cost 4 vsldoi4 <5,1,6,4>, <3,4,5,6>
+ 3704073526U, // <1,6,4,4>: Cost 4 vsldoi4 <2,1,6,4>, RHS
+ 2685807926U, // <1,6,4,5>: Cost 3 vsldoi8 <0,2,1,6>, RHS
+ 3865621141U, // <1,6,4,6>: Cost 4 vsldoi12 <6,6,u,1>, <6,4,6,5>
+ 2283801910U, // <1,6,4,7>: Cost 3 vmrglw <0,3,1,4>, RHS
+ 2685808169U, // <1,6,4,u>: Cost 3 vsldoi8 <0,2,1,6>, RHS
+ 3710050406U, // <1,6,5,0>: Cost 4 vsldoi4 <3,1,6,5>, LHS
+ 3710051571U, // <1,6,5,1>: Cost 4 vsldoi4 <3,1,6,5>, <1,6,5,7>
+ 3405989597U, // <1,6,5,2>: Cost 4 vmrglw <u,4,1,5>, <2,3,6,2>
+ 3358214502U, // <1,6,5,3>: Cost 4 vmrglw <0,4,1,5>, <3,2,6,3>
+ 3710053686U, // <1,6,5,4>: Cost 4 vsldoi4 <3,1,6,5>, RHS
+ 3721998025U, // <1,6,5,5>: Cost 4 vsldoi4 <5,1,6,5>, <5,1,6,5>
+ 2332250936U, // <1,6,5,6>: Cost 3 vmrglw <u,4,1,5>, <6,6,6,6>
+ 1210731830U, // <1,6,5,7>: Cost 2 vmrglw <0,4,1,5>, RHS
+ 1210731831U, // <1,6,5,u>: Cost 2 vmrglw <0,4,1,5>, RHS
+ 2791289597U, // <1,6,6,0>: Cost 3 vsldoi12 <6,6,0,1>, <6,6,0,1>
+ 3698115430U, // <1,6,6,1>: Cost 4 vsldoi4 <1,1,6,6>, <1,1,6,6>
+ 3698116538U, // <1,6,6,2>: Cost 4 vsldoi4 <1,1,6,6>, <2,6,3,7>
+ 3356894132U, // <1,6,6,3>: Cost 4 vmrglw <0,2,1,6>, <1,2,6,3>
+ 3698117942U, // <1,6,6,4>: Cost 4 vsldoi4 <1,1,6,6>, RHS
+ 3722006218U, // <1,6,6,5>: Cost 4 vsldoi4 <5,1,6,6>, <5,1,6,6>
+ 2781041464U, // <1,6,6,6>: Cost 3 vsldoi12 <4,u,5,1>, <6,6,6,6>
+ 2283154742U, // <1,6,6,7>: Cost 3 vmrglw <0,2,1,6>, RHS
+ 2283154743U, // <1,6,6,u>: Cost 3 vmrglw <0,2,1,6>, RHS
+ 1718211406U, // <1,6,7,0>: Cost 2 vsldoi12 <6,7,0,1>, <6,7,0,1>
+ 2792026967U, // <1,6,7,1>: Cost 3 vsldoi12 <6,7,1,1>, <6,7,1,1>
+ 2765411170U, // <1,6,7,2>: Cost 3 vsldoi12 <2,3,0,1>, <6,7,2,3>
+ 3854783336U, // <1,6,7,3>: Cost 4 vsldoi12 <4,u,5,1>, <6,7,3,0>
+ 2781041526U, // <1,6,7,4>: Cost 3 vsldoi12 <4,u,5,1>, <6,7,4,5>
+ 3365528664U, // <1,6,7,5>: Cost 4 vmrglw <1,6,1,7>, <1,4,6,5>
+ 2791953290U, // <1,6,7,6>: Cost 3 vsldoi12 <6,7,0,1>, <6,7,6,7>
+ 2291789110U, // <1,6,7,7>: Cost 3 vmrglw <1,6,1,7>, RHS
+ 1718801302U, // <1,6,7,u>: Cost 2 vsldoi12 <6,7,u,1>, <6,7,u,1>
+ 1718875039U, // <1,6,u,0>: Cost 2 vsldoi12 <6,u,0,1>, <6,u,0,1>
+ 2685810478U, // <1,6,u,1>: Cost 3 vsldoi8 <0,2,1,6>, LHS
+ 2792764337U, // <1,6,u,2>: Cost 3 vsldoi12 <6,u,2,1>, <6,u,2,1>
+ 3759552444U, // <1,6,u,3>: Cost 4 vsldoi8 <0,2,1,6>, <u,3,0,1>
+ 2781041607U, // <1,6,u,4>: Cost 3 vsldoi12 <4,u,5,1>, <6,u,4,5>
+ 2685810842U, // <1,6,u,5>: Cost 3 vsldoi8 <0,2,1,6>, RHS
+ 2689792208U, // <1,6,u,6>: Cost 3 vsldoi8 <0,u,1,6>, <u,6,3,7>
+ 1210756406U, // <1,6,u,7>: Cost 2 vmrglw <0,4,1,u>, RHS
+ 1210756407U, // <1,6,u,u>: Cost 2 vmrglw <0,4,1,u>, RHS
+ 2793280496U, // <1,7,0,0>: Cost 3 vsldoi12 <7,0,0,1>, <7,0,0,1>
+ 2694439014U, // <1,7,0,1>: Cost 3 vsldoi8 <1,6,1,7>, LHS
+ 3393343912U, // <1,7,0,2>: Cost 4 vmrglw <6,3,1,0>, <6,1,7,2>
+ 3397325306U, // <1,7,0,3>: Cost 4 vmrglw <7,0,1,0>, <6,2,7,3>
+ 2793575444U, // <1,7,0,4>: Cost 3 vsldoi12 <7,0,4,1>, <7,0,4,1>
+ 3722030797U, // <1,7,0,5>: Cost 4 vsldoi4 <5,1,7,0>, <5,1,7,0>
+ 2688467446U, // <1,7,0,6>: Cost 3 vsldoi8 <0,6,1,7>, <0,6,1,7>
+ 2689131079U, // <1,7,0,7>: Cost 3 vsldoi8 <0,7,1,7>, <0,7,1,7>
+ 2694439570U, // <1,7,0,u>: Cost 3 vsldoi8 <1,6,1,7>, <0,u,1,1>
+ 2654265354U, // <1,7,1,0>: Cost 3 vsldoi4 <6,1,7,1>, <0,0,1,1>
+ 2794017866U, // <1,7,1,1>: Cost 3 vsldoi12 <7,1,1,1>, <7,1,1,1>
+ 3768181639U, // <1,7,1,2>: Cost 4 vsldoi8 <1,6,1,7>, <1,2,1,3>
+ 2334872058U, // <1,7,1,3>: Cost 3 vmrglw <u,u,1,1>, <6,2,7,3>
+ 2654268726U, // <1,7,1,4>: Cost 3 vsldoi4 <6,1,7,1>, RHS
+ 3792069797U, // <1,7,1,5>: Cost 4 vsldoi8 <5,6,1,7>, <1,5,6,1>
+ 2694440143U, // <1,7,1,6>: Cost 3 vsldoi8 <1,6,1,7>, <1,6,1,7>
+ 2334872386U, // <1,7,1,7>: Cost 3 vmrglw <u,u,1,1>, <6,6,7,7>
+ 2695767409U, // <1,7,1,u>: Cost 3 vsldoi8 <1,u,1,7>, <1,u,1,7>
+ 2654273638U, // <1,7,2,0>: Cost 3 vsldoi4 <6,1,7,2>, LHS
+ 2222117973U, // <1,7,2,1>: Cost 3 vmrghw <1,2,3,0>, <7,1,2,3>
+ 2299711912U, // <1,7,2,2>: Cost 3 vmrglw <3,0,1,2>, <6,1,7,2>
+ 2654275734U, // <1,7,2,3>: Cost 3 vsldoi4 <6,1,7,2>, <3,0,1,2>
+ 2654276918U, // <1,7,2,4>: Cost 3 vsldoi4 <6,1,7,2>, RHS
+ 3385397675U, // <1,7,2,5>: Cost 4 vmrglw <5,0,1,2>, <6,1,7,5>
+ 2654278056U, // <1,7,2,6>: Cost 3 vsldoi4 <6,1,7,2>, <6,1,7,2>
+ 2323599627U, // <1,7,2,7>: Cost 3 vmrglw <7,0,1,2>, <5,u,7,7>
+ 2654279470U, // <1,7,2,u>: Cost 3 vsldoi4 <6,1,7,2>, LHS
+ 2795271395U, // <1,7,3,0>: Cost 3 vsldoi12 <7,3,0,1>, <7,3,0,1>
+ 3768183059U, // <1,7,3,1>: Cost 4 vsldoi8 <1,6,1,7>, <3,1,6,1>
+ 3728025254U, // <1,7,3,2>: Cost 4 vsldoi4 <6,1,7,3>, <2,3,0,1>
+ 3768183196U, // <1,7,3,3>: Cost 4 vsldoi8 <1,6,1,7>, <3,3,3,3>
+ 3768183298U, // <1,7,3,4>: Cost 4 vsldoi8 <1,6,1,7>, <3,4,5,6>
+ 3792071255U, // <1,7,3,5>: Cost 4 vsldoi8 <5,6,1,7>, <3,5,6,1>
+ 3780127361U, // <1,7,3,6>: Cost 4 vsldoi8 <3,6,1,7>, <3,6,1,7>
+ 3847779617U, // <1,7,3,7>: Cost 4 vsldoi12 <3,7,0,1>, <7,3,7,0>
+ 2795861291U, // <1,7,3,u>: Cost 3 vsldoi12 <7,3,u,1>, <7,3,u,1>
+ 2795935028U, // <1,7,4,0>: Cost 3 vsldoi12 <7,4,0,1>, <7,4,0,1>
+ 3728032975U, // <1,7,4,1>: Cost 4 vsldoi4 <6,1,7,4>, <1,6,1,7>
+ 3839153480U, // <1,7,4,2>: Cost 4 vsldoi12 <2,3,0,1>, <7,4,2,3>
+ 3397358074U, // <1,7,4,3>: Cost 4 vmrglw <7,0,1,4>, <6,2,7,3>
+ 3854783835U, // <1,7,4,4>: Cost 4 vsldoi12 <4,u,5,1>, <7,4,4,4>
+ 2694442294U, // <1,7,4,5>: Cost 3 vsldoi8 <1,6,1,7>, RHS
+ 3786100058U, // <1,7,4,6>: Cost 4 vsldoi8 <4,6,1,7>, <4,6,1,7>
+ 3722065254U, // <1,7,4,7>: Cost 4 vsldoi4 <5,1,7,4>, <7,4,5,6>
+ 2694442537U, // <1,7,4,u>: Cost 3 vsldoi8 <1,6,1,7>, RHS
+ 2654298214U, // <1,7,5,0>: Cost 3 vsldoi4 <6,1,7,5>, LHS
+ 3854783893U, // <1,7,5,1>: Cost 4 vsldoi12 <4,u,5,1>, <7,5,1,u>
+ 3710126010U, // <1,7,5,2>: Cost 4 vsldoi4 <3,1,7,5>, <2,6,3,7>
+ 2332250618U, // <1,7,5,3>: Cost 3 vmrglw <u,4,1,5>, <6,2,7,3>
+ 2654301494U, // <1,7,5,4>: Cost 3 vsldoi4 <6,1,7,5>, RHS
+ 2284474795U, // <1,7,5,5>: Cost 3 vmrglw <0,4,1,5>, <6,1,7,5>
+ 2718330931U, // <1,7,5,6>: Cost 3 vsldoi8 <5,6,1,7>, <5,6,1,7>
+ 2332250946U, // <1,7,5,7>: Cost 3 vmrglw <u,4,1,5>, <6,6,7,7>
+ 2719658197U, // <1,7,5,u>: Cost 3 vsldoi8 <5,u,1,7>, <5,u,1,7>
+ 2332921954U, // <1,7,6,0>: Cost 3 vmrglw <u,5,1,6>, <5,6,7,0>
+ 3768185254U, // <1,7,6,1>: Cost 4 vsldoi8 <1,6,1,7>, <6,1,7,0>
+ 3710134202U, // <1,7,6,2>: Cost 4 vsldoi4 <3,1,7,6>, <2,6,3,7>
+ 3710134561U, // <1,7,6,3>: Cost 4 vsldoi4 <3,1,7,6>, <3,1,7,6>
+ 3710135606U, // <1,7,6,4>: Cost 4 vsldoi4 <3,1,7,6>, RHS
+ 3864884745U, // <1,7,6,5>: Cost 4 vsldoi12 <6,5,7,1>, <7,6,5,7>
+ 3854784017U, // <1,7,6,6>: Cost 4 vsldoi12 <4,u,5,1>, <7,6,6,6>
+ 2791953940U, // <1,7,6,7>: Cost 3 vsldoi12 <6,7,0,1>, <7,6,7,0>
+ 2792617501U, // <1,7,6,u>: Cost 3 vsldoi12 <6,u,0,1>, <7,6,u,0>
+ 2797925927U, // <1,7,7,0>: Cost 3 vsldoi12 <7,7,0,1>, <7,7,0,1>
+ 3365528426U, // <1,7,7,1>: Cost 4 vmrglw <1,6,1,7>, <1,1,7,1>
+ 3728058022U, // <1,7,7,2>: Cost 4 vsldoi4 <6,1,7,7>, <2,3,0,1>
+ 3365528509U, // <1,7,7,3>: Cost 4 vmrglw <1,6,1,7>, <1,2,7,3>
+ 3854784079U, // <1,7,7,4>: Cost 4 vsldoi12 <4,u,5,1>, <7,7,4,5>
+ 3722088148U, // <1,7,7,5>: Cost 4 vsldoi4 <5,1,7,7>, <5,1,7,7>
+ 3728060845U, // <1,7,7,6>: Cost 4 vsldoi4 <6,1,7,7>, <6,1,7,7>
+ 2781042284U, // <1,7,7,7>: Cost 3 vsldoi12 <4,u,5,1>, <7,7,7,7>
+ 2798515823U, // <1,7,7,u>: Cost 3 vsldoi12 <7,7,u,1>, <7,7,u,1>
+ 2654322705U, // <1,7,u,0>: Cost 3 vsldoi4 <6,1,7,u>, <0,0,1,u>
+ 2694444846U, // <1,7,u,1>: Cost 3 vsldoi8 <1,6,1,7>, LHS
+ 2299711912U, // <1,7,u,2>: Cost 3 vmrglw <3,0,1,2>, <6,1,7,2>
+ 2323649018U, // <1,7,u,3>: Cost 3 vmrglw <7,0,1,u>, <6,2,7,3>
+ 2654326070U, // <1,7,u,4>: Cost 3 vsldoi4 <6,1,7,u>, RHS
+ 2694445210U, // <1,7,u,5>: Cost 3 vsldoi8 <1,6,1,7>, RHS
+ 2654327214U, // <1,7,u,6>: Cost 3 vsldoi4 <6,1,7,u>, <6,1,7,u>
+ 2323649346U, // <1,7,u,7>: Cost 3 vmrglw <7,0,1,u>, <6,6,7,7>
+ 2694445413U, // <1,7,u,u>: Cost 3 vsldoi8 <1,6,1,7>, LHS
+ 1610752017U, // <1,u,0,0>: Cost 2 vsldoi8 <0,0,1,u>, <0,0,1,u>
+ 1613406310U, // <1,u,0,1>: Cost 2 vsldoi8 <0,4,1,u>, LHS
+ 2685821107U, // <1,u,0,2>: Cost 3 vsldoi8 <0,2,1,u>, <0,2,1,u>
+ 2283765916U, // <1,u,0,3>: Cost 3 vmrglw <0,3,1,0>, LHS
+ 1613406549U, // <1,u,0,4>: Cost 2 vsldoi8 <0,4,1,u>, <0,4,1,u>
+ 1725880054U, // <1,u,0,5>: Cost 2 vsldoi12 <u,0,5,1>, <u,0,5,1>
+ 2688475639U, // <1,u,0,6>: Cost 3 vsldoi8 <0,6,1,u>, <0,6,1,u>
+ 2283769160U, // <1,u,0,7>: Cost 3 vmrglw <0,3,1,0>, RHS
+ 1613406877U, // <1,u,0,u>: Cost 2 vsldoi8 <0,4,1,u>, LHS
+ 1550221414U, // <1,u,1,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS
+ 269271142U, // <1,u,1,1>: Cost 1 vspltisw1 LHS
+ 1683117870U, // <1,u,1,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 1213350044U, // <1,u,1,3>: Cost 2 vmrglw <0,u,1,1>, LHS
+ 1550224694U, // <1,u,1,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS
+ 1147574426U, // <1,u,1,5>: Cost 2 vmrghw <1,1,1,1>, RHS
+ 2687149326U, // <1,u,1,6>: Cost 3 vsldoi8 <0,4,1,u>, <1,6,u,7>
+ 1213353288U, // <1,u,1,7>: Cost 2 vmrglw <0,u,1,1>, RHS
+ 269271142U, // <1,u,1,u>: Cost 1 vspltisw1 LHS
+ 2222118611U, // <1,u,2,0>: Cost 3 vmrghw <1,2,3,0>, <u,0,1,2>
+ 1148376878U, // <1,u,2,1>: Cost 2 vmrghw <1,2,3,0>, LHS
+ 1148371862U, // <1,u,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 1225965724U, // <1,u,2,3>: Cost 2 vmrglw <3,0,1,2>, LHS
+ 2222118975U, // <1,u,2,4>: Cost 3 vmrghw <1,2,3,0>, <u,4,5,6>
+ 1148377242U, // <1,u,2,5>: Cost 2 vmrghw <1,2,3,0>, RHS
+ 2687150010U, // <1,u,2,6>: Cost 3 vsldoi8 <0,4,1,u>, <2,6,3,7>
+ 1225968968U, // <1,u,2,7>: Cost 2 vmrglw <3,0,1,2>, RHS
+ 1148377445U, // <1,u,2,u>: Cost 2 vmrghw <1,2,3,0>, LHS
+ 471040156U, // <1,u,3,0>: Cost 1 vsldoi4 LHS, LHS
+ 1544782644U, // <1,u,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 1544783464U, // <1,u,3,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+ 1544784022U, // <1,u,3,3>: Cost 2 vsldoi4 LHS, <3,0,1,2>
+ 471043382U, // <1,u,3,4>: Cost 1 vsldoi4 LHS, RHS
+ 1592561668U, // <1,u,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5>
+ 1592562170U, // <1,u,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+ 1592562682U, // <1,u,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 471045934U, // <1,u,3,u>: Cost 1 vsldoi4 LHS, LHS
+ 2708384629U, // <1,u,4,0>: Cost 3 vsldoi8 <4,0,1,u>, <4,0,1,u>
+ 2687151101U, // <1,u,4,1>: Cost 3 vsldoi8 <0,4,1,u>, <4,1,u,0>
+ 2223408022U, // <1,u,4,2>: Cost 3 vmrghw <1,4,2,5>, <1,2,3,0>
+ 2283798684U, // <1,u,4,3>: Cost 3 vmrglw <0,3,1,4>, LHS
+ 2642422785U, // <1,u,4,4>: Cost 3 vsldoi4 <4,1,u,4>, <4,1,u,4>
+ 1613409590U, // <1,u,4,5>: Cost 2 vsldoi8 <0,4,1,u>, RHS
+ 2283801090U, // <1,u,4,6>: Cost 3 vmrglw <0,3,1,4>, <3,4,5,6>
+ 2283801928U, // <1,u,4,7>: Cost 3 vmrglw <0,3,1,4>, RHS
+ 1613409833U, // <1,u,4,u>: Cost 2 vsldoi8 <0,4,1,u>, RHS
+ 2284471235U, // <1,u,5,0>: Cost 3 vmrglw <0,4,1,5>, <1,2,u,0>
+ 2284472046U, // <1,u,5,1>: Cost 3 vmrglw <0,4,1,5>, <2,3,u,1>
+ 2284472533U, // <1,u,5,2>: Cost 3 vmrglw <0,4,1,5>, <3,0,u,2>
+ 1210728604U, // <1,u,5,3>: Cost 2 vmrglw <0,4,1,5>, LHS
+ 2284471239U, // <1,u,5,4>: Cost 3 vmrglw <0,4,1,5>, <1,2,u,4>
+ 1210728786U, // <1,u,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+ 1683118234U, // <1,u,5,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 1210731848U, // <1,u,5,7>: Cost 2 vmrglw <0,4,1,5>, RHS
+ 1210728609U, // <1,u,5,u>: Cost 2 vmrglw <0,4,1,5>, LHS
+ 2720330023U, // <1,u,6,0>: Cost 3 vsldoi8 <6,0,1,u>, <6,0,1,u>
+ 2757376190U, // <1,u,6,1>: Cost 3 vsldoi12 <0,u,u,1>, <u,6,1,7>
+ 2726302202U, // <1,u,6,2>: Cost 3 vsldoi8 <7,0,1,u>, <6,2,7,3>
+ 2283151516U, // <1,u,6,3>: Cost 3 vmrglw <0,2,1,6>, LHS
+ 2224972114U, // <1,u,6,4>: Cost 3 vmrghw <1,6,5,7>, <0,4,1,5>
+ 2224683162U, // <1,u,6,5>: Cost 3 vmrghw <1,6,1,7>, RHS
+ 2726302520U, // <1,u,6,6>: Cost 3 vsldoi8 <7,0,1,u>, <6,6,6,6>
+ 2283154760U, // <1,u,6,7>: Cost 3 vmrglw <0,2,1,6>, RHS
+ 2283151521U, // <1,u,6,u>: Cost 3 vmrglw <0,2,1,6>, LHS
+ 1652560896U, // <1,u,7,0>: Cost 2 vsldoi8 <7,0,1,u>, <7,0,1,u>
+ 2333590225U, // <1,u,7,1>: Cost 3 vmrglw <u,6,1,7>, <0,u,u,1>
+ 2765412628U, // <1,u,7,2>: Cost 3 vsldoi12 <2,3,0,1>, <u,7,2,3>
+ 2291785884U, // <1,u,7,3>: Cost 3 vmrglw <1,6,1,7>, LHS
+ 2781042984U, // <1,u,7,4>: Cost 3 vsldoi12 <4,u,5,1>, <u,7,4,5>
+ 3365527953U, // <1,u,7,5>: Cost 4 vmrglw <1,6,1,7>, <0,4,u,5>
+ 2791954748U, // <1,u,7,6>: Cost 3 vsldoi12 <6,7,0,1>, <u,7,6,7>
+ 2291789128U, // <1,u,7,7>: Cost 3 vmrglw <1,6,1,7>, RHS
+ 1657869960U, // <1,u,7,u>: Cost 2 vsldoi8 <7,u,1,u>, <7,u,1,u>
+ 471081121U, // <1,u,u,0>: Cost 1 vsldoi4 LHS, LHS
+ 269271142U, // <1,u,u,1>: Cost 1 vspltisw1 LHS
+ 1544824424U, // <1,u,u,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+ 1544824982U, // <1,u,u,3>: Cost 2 vsldoi4 LHS, <3,0,1,2>
+ 471084342U, // <1,u,u,4>: Cost 1 vsldoi4 LHS, RHS
+ 1613412506U, // <1,u,u,5>: Cost 2 vsldoi8 <0,4,1,u>, RHS
+ 1683118477U, // <1,u,u,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 1210756424U, // <1,u,u,7>: Cost 2 vmrglw <0,4,1,u>, RHS
+ 471086894U, // <1,u,u,u>: Cost 1 vsldoi4 LHS, LHS
+ 2226757632U, // <2,0,0,0>: Cost 3 vmrghw <2,0,3,0>, <0,0,0,0>
+ 2226757734U, // <2,0,0,1>: Cost 3 vmrghw <2,0,3,0>, LHS
+ 3826622483U, // <2,0,0,2>: Cost 4 vsldoi12 <0,2,1,2>, <0,0,2,1>
+ 3843211292U, // <2,0,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <0,0,3,1>
+ 3300499794U, // <2,0,0,4>: Cost 4 vmrghw <2,0,3,0>, <0,4,1,5>
+ 3356256724U, // <2,0,0,5>: Cost 4 vmrglw <0,1,2,0>, <3,4,0,5>
+ 3825664056U, // <2,0,0,6>: Cost 4 vsldoi12 <0,0,6,2>, <0,0,6,2>
+ 3762889289U, // <2,0,0,7>: Cost 4 vsldoi8 <0,7,2,0>, <0,7,2,0>
+ 2226758301U, // <2,0,0,u>: Cost 3 vmrghw <2,0,3,0>, LHS
+ 2227429386U, // <2,0,1,0>: Cost 3 vmrghw <2,1,3,1>, <0,0,1,1>
+ 2227429478U, // <2,0,1,1>: Cost 3 vmrghw <2,1,3,1>, LHS
+ 1691156582U, // <2,0,1,2>: Cost 2 vsldoi12 <2,2,2,2>, LHS
+ 2666358997U, // <2,0,1,3>: Cost 3 vsldoi4 <u,2,0,1>, <3,0,u,2>
+ 2227462482U, // <2,0,1,4>: Cost 3 vmrghw <2,1,3,5>, <0,4,1,5>
+ 3722186464U, // <2,0,1,5>: Cost 4 vsldoi4 <5,2,0,1>, <5,2,0,1>
+ 3867099278U, // <2,0,1,6>: Cost 4 vsldoi12 <7,0,1,2>, <0,1,6,7>
+ 3366881912U, // <2,0,1,7>: Cost 4 vmrglw <1,u,2,1>, <3,6,0,7>
+ 1691156636U, // <2,0,1,u>: Cost 2 vsldoi12 <2,2,2,2>, LHS
+ 2228027392U, // <2,0,2,0>: Cost 3 vmrghw <2,2,2,2>, <0,0,0,0>
+ 1154285670U, // <2,0,2,1>: Cost 2 vmrghw <2,2,2,2>, LHS
+ 2228027565U, // <2,0,2,2>: Cost 3 vmrghw <2,2,2,2>, <0,2,1,2>
+ 3301769468U, // <2,0,2,3>: Cost 4 vmrghw <2,2,2,2>, <0,3,1,0>
+ 2228027730U, // <2,0,2,4>: Cost 3 vmrghw <2,2,2,2>, <0,4,1,5>
+ 3301769635U, // <2,0,2,5>: Cost 4 vmrghw <2,2,2,2>, <0,5,1,5>
+ 3780806586U, // <2,0,2,6>: Cost 4 vsldoi8 <3,7,2,0>, <2,6,3,7>
+ 3368880760U, // <2,0,2,7>: Cost 4 vmrglw <2,2,2,2>, <3,6,0,7>
+ 1154286237U, // <2,0,2,u>: Cost 2 vmrghw <2,2,2,2>, LHS
+ 1213440000U, // <2,0,3,0>: Cost 2 vmrglw LHS, <0,0,0,0>
+ 1213441702U, // <2,0,3,1>: Cost 2 vmrglw LHS, <2,3,0,1>
+ 2228535470U, // <2,0,3,2>: Cost 3 vmrghw <2,3,0,1>, <0,2,1,3>
+ 2636515632U, // <2,0,3,3>: Cost 3 vsldoi4 <3,2,0,3>, <3,2,0,3>
+ 2287182962U, // <2,0,3,4>: Cost 3 vmrglw LHS, <1,5,0,4>
+ 2660405346U, // <2,0,3,5>: Cost 3 vsldoi4 <7,2,0,3>, <5,6,7,0>
+ 2228535798U, // <2,0,3,6>: Cost 3 vmrghw <2,3,0,1>, <0,6,1,7>
+ 2660406420U, // <2,0,3,7>: Cost 3 vsldoi4 <7,2,0,3>, <7,2,0,3>
+ 1213441709U, // <2,0,3,u>: Cost 2 vmrglw LHS, <2,3,0,u>
+ 3368894464U, // <2,0,4,0>: Cost 4 vmrglw <2,2,2,4>, <0,0,0,0>
+ 2764898642U, // <2,0,4,1>: Cost 3 vsldoi12 <2,2,2,2>, <0,4,1,5>
+ 3826622811U, // <2,0,4,2>: Cost 4 vsldoi12 <0,2,1,2>, <0,4,2,5>
+ 3843211620U, // <2,0,4,3>: Cost 4 vsldoi12 <3,0,1,2>, <0,4,3,5>
+ 3838640493U, // <2,0,4,4>: Cost 4 vsldoi12 <2,2,2,2>, <0,4,4,5>
+ 2732944694U, // <2,0,4,5>: Cost 3 vsldoi8 <u,1,2,0>, RHS
+ 3797396857U, // <2,0,4,6>: Cost 4 vsldoi8 <6,5,2,0>, <4,6,5,2>
+ 3867099528U, // <2,0,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <0,4,7,5>
+ 2764898705U, // <2,0,4,u>: Cost 3 vsldoi12 <2,2,2,2>, <0,4,u,5>
+ 3364257792U, // <2,0,5,0>: Cost 4 vmrglw <1,4,2,5>, <0,0,0,0>
+ 2230124646U, // <2,0,5,1>: Cost 3 vmrghw <2,5,3,6>, LHS
+ 3304235184U, // <2,0,5,2>: Cost 4 vmrghw <2,5,u,6>, <0,2,1,5>
+ 3364260144U, // <2,0,5,3>: Cost 4 vmrglw <1,4,2,5>, <3,2,0,3>
+ 3303817554U, // <2,0,5,4>: Cost 4 vmrghw <2,5,3,0>, <0,4,1,5>
+ 3364260146U, // <2,0,5,5>: Cost 4 vmrglw <1,4,2,5>, <3,2,0,5>
+ 3867099602U, // <2,0,5,6>: Cost 4 vsldoi12 <7,0,1,2>, <0,5,6,7>
+ 3364260472U, // <2,0,5,7>: Cost 4 vmrglw <1,4,2,5>, <3,6,0,7>
+ 2230125213U, // <2,0,5,u>: Cost 3 vmrghw <2,5,3,6>, LHS
+ 2230796288U, // <2,0,6,0>: Cost 3 vmrghw <2,6,3,7>, <0,0,0,0>
+ 1157054566U, // <2,0,6,1>: Cost 2 vmrghw <2,6,3,7>, LHS
+ 2230796465U, // <2,0,6,2>: Cost 3 vmrghw <2,6,3,7>, <0,2,1,6>
+ 3304538364U, // <2,0,6,3>: Cost 4 vmrghw <2,6,3,7>, <0,3,1,0>
+ 2230796626U, // <2,0,6,4>: Cost 3 vmrghw <2,6,3,7>, <0,4,1,5>
+ 3797398205U, // <2,0,6,5>: Cost 4 vsldoi8 <6,5,2,0>, <6,5,2,0>
+ 3304538614U, // <2,0,6,6>: Cost 4 vmrghw <2,6,3,7>, <0,6,1,7>
+ 3798725471U, // <2,0,6,7>: Cost 4 vsldoi8 <6,7,2,0>, <6,7,2,0>
+ 1157055133U, // <2,0,6,u>: Cost 2 vmrghw <2,6,3,7>, LHS
+ 3371573248U, // <2,0,7,0>: Cost 4 vmrglw <2,6,2,7>, <0,0,0,0>
+ 2231189606U, // <2,0,7,1>: Cost 3 vmrghw <2,7,0,1>, LHS
+ 3801380003U, // <2,0,7,2>: Cost 4 vsldoi8 <7,2,2,0>, <7,2,2,0>
+ 3802043636U, // <2,0,7,3>: Cost 4 vsldoi8 <7,3,2,0>, <7,3,2,0>
+ 3806688614U, // <2,0,7,4>: Cost 4 vsldoi8 <u,1,2,0>, <7,4,5,6>
+ 3356317308U, // <2,0,7,5>: Cost 4 vmrglw <0,1,2,7>, <7,u,0,5>
+ 3804034535U, // <2,0,7,6>: Cost 4 vsldoi8 <7,6,2,0>, <7,6,2,0>
+ 3806688876U, // <2,0,7,7>: Cost 4 vsldoi8 <u,1,2,0>, <7,7,7,7>
+ 2231190173U, // <2,0,7,u>: Cost 3 vmrghw <2,7,0,1>, LHS
+ 1208836096U, // <2,0,u,0>: Cost 2 vmrglw LHS, <0,0,0,0>
+ 1208837798U, // <2,0,u,1>: Cost 2 vmrglw LHS, <2,3,0,1>
+ 1691157149U, // <2,0,u,2>: Cost 2 vsldoi12 <2,2,2,2>, LHS
+ 2636556597U, // <2,0,u,3>: Cost 3 vsldoi4 <3,2,0,u>, <3,2,0,u>
+ 2282579625U, // <2,0,u,4>: Cost 3 vmrglw LHS, <2,3,0,4>
+ 2660446306U, // <2,0,u,5>: Cost 3 vsldoi4 <7,2,0,u>, <5,6,7,0>
+ 2228535798U, // <2,0,u,6>: Cost 3 vmrghw <2,3,0,1>, <0,6,1,7>
+ 2660447385U, // <2,0,u,7>: Cost 3 vsldoi4 <7,2,0,u>, <7,2,0,u>
+ 1208837805U, // <2,0,u,u>: Cost 2 vmrglw LHS, <2,3,0,u>
+ 3692388523U, // <2,1,0,0>: Cost 4 vsldoi4 <0,2,1,0>, <0,2,1,0>
+ 2757526244U, // <2,1,0,1>: Cost 3 vsldoi12 <1,0,1,2>, <1,0,1,2>
+ 2330290974U, // <2,1,0,2>: Cost 3 vmrglw <u,1,2,0>, <3,u,1,2>
+ 3843212020U, // <2,1,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <1,0,3,0>
+ 3692391734U, // <2,1,0,4>: Cost 4 vsldoi4 <0,2,1,0>, RHS
+ 3300533362U, // <2,1,0,5>: Cost 4 vmrghw <2,0,3,4>, <1,5,0,4>
+ 3794084337U, // <2,1,0,6>: Cost 4 vsldoi8 <6,0,2,1>, <0,6,1,2>
+ 3374170614U, // <2,1,0,7>: Cost 5 vmrglw <3,1,2,0>, <0,6,1,7>
+ 2758042403U, // <2,1,0,u>: Cost 3 vsldoi12 <1,0,u,2>, <1,0,u,2>
+ 2690482924U, // <2,1,1,0>: Cost 3 vsldoi8 <1,0,2,1>, <1,0,2,1>
+ 2764899124U, // <2,1,1,1>: Cost 3 vsldoi12 <2,2,2,2>, <1,1,1,1>
+ 2695791510U, // <2,1,1,2>: Cost 3 vsldoi8 <1,u,2,1>, <1,2,3,0>
+ 3362235271U, // <2,1,1,3>: Cost 4 vmrglw <1,1,2,1>, <1,2,1,3>
+ 3692399926U, // <2,1,1,4>: Cost 4 vsldoi4 <0,2,1,1>, RHS
+ 3832226649U, // <2,1,1,5>: Cost 4 vsldoi12 <1,1,5,2>, <1,1,5,2>
+ 3301205235U, // <2,1,1,6>: Cost 4 vmrghw <2,1,3,5>, <1,6,5,7>
+ 3768870179U, // <2,1,1,7>: Cost 4 vsldoi8 <1,7,2,1>, <1,7,2,1>
+ 2695791988U, // <2,1,1,u>: Cost 3 vsldoi8 <1,u,2,1>, <1,u,2,1>
+ 2618663085U, // <2,1,2,0>: Cost 3 vsldoi4 <0,2,1,2>, <0,2,1,2>
+ 2228028212U, // <2,1,2,1>: Cost 3 vmrghw <2,2,2,2>, <1,1,1,1>
+ 2618664552U, // <2,1,2,2>: Cost 3 vsldoi4 <0,2,1,2>, <2,2,2,2>
+ 2759000984U, // <2,1,2,3>: Cost 3 vsldoi12 <1,2,3,2>, <1,2,3,2>
+ 2618666294U, // <2,1,2,4>: Cost 3 vsldoi4 <0,2,1,2>, RHS
+ 2295136594U, // <2,1,2,5>: Cost 3 vmrglw <2,2,2,2>, <0,4,1,5>
+ 3769534376U, // <2,1,2,6>: Cost 4 vsldoi8 <1,u,2,1>, <2,6,1,7>
+ 2793358266U, // <2,1,2,7>: Cost 3 vsldoi12 <7,0,1,2>, <1,2,7,0>
+ 2618668846U, // <2,1,2,u>: Cost 3 vsldoi4 <0,2,1,2>, LHS
+ 2282536969U, // <2,1,3,0>: Cost 3 vmrglw LHS, <0,0,1,0>
+ 1208795146U, // <2,1,3,1>: Cost 2 vmrglw LHS, <0,0,1,1>
+ 1213442198U, // <2,1,3,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+ 2287181998U, // <2,1,3,3>: Cost 3 vmrglw LHS, <0,2,1,3>
+ 2618674486U, // <2,1,3,4>: Cost 3 vsldoi4 <0,2,1,3>, RHS
+ 1208795474U, // <2,1,3,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+ 2287182001U, // <2,1,3,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+ 2287183055U, // <2,1,3,7>: Cost 3 vmrglw LHS, <1,6,1,7>
+ 1208795153U, // <2,1,3,u>: Cost 2 vmrglw LHS, <0,0,1,u>
+ 3692421295U, // <2,1,4,0>: Cost 4 vsldoi4 <0,2,1,4>, <0,2,1,4>
+ 3838641195U, // <2,1,4,1>: Cost 4 vsldoi12 <2,2,2,2>, <1,4,1,5>
+ 2330323742U, // <2,1,4,2>: Cost 3 vmrglw <u,1,2,4>, <3,u,1,2>
+ 3692423318U, // <2,1,4,3>: Cost 5 vsldoi4 <0,2,1,4>, <3,0,1,2>
+ 3692424502U, // <2,1,4,4>: Cost 4 vsldoi4 <0,2,1,4>, RHS
+ 2695793974U, // <2,1,4,5>: Cost 3 vsldoi8 <1,u,2,1>, RHS
+ 3799395705U, // <2,1,4,6>: Cost 4 vsldoi8 <6,u,2,1>, <4,6,5,2>
+ 3368895695U, // <2,1,4,7>: Cost 5 vmrglw <2,2,2,4>, <1,6,1,7>
+ 2695794217U, // <2,1,4,u>: Cost 3 vsldoi8 <1,u,2,1>, RHS
+ 3692429488U, // <2,1,5,0>: Cost 4 vsldoi4 <0,2,1,5>, <0,2,1,5>
+ 3364257802U, // <2,1,5,1>: Cost 4 vmrglw <1,4,2,5>, <0,0,1,1>
+ 3692431253U, // <2,1,5,2>: Cost 4 vsldoi4 <0,2,1,5>, <2,5,u,6>
+ 3692431874U, // <2,1,5,3>: Cost 4 vsldoi4 <0,2,1,5>, <3,4,5,6>
+ 3692432694U, // <2,1,5,4>: Cost 4 vsldoi4 <0,2,1,5>, RHS
+ 3364258130U, // <2,1,5,5>: Cost 4 vmrglw <1,4,2,5>, <0,4,1,5>
+ 3303875827U, // <2,1,5,6>: Cost 4 vmrghw <2,5,3,7>, <1,6,5,7>
+ 3867100333U, // <2,1,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <1,5,7,0>
+ 3692435246U, // <2,1,5,u>: Cost 4 vsldoi4 <0,2,1,5>, LHS
+ 2618695857U, // <2,1,6,0>: Cost 3 vsldoi4 <0,2,1,6>, <0,2,1,6>
+ 2230797108U, // <2,1,6,1>: Cost 3 vmrghw <2,6,3,7>, <1,1,1,1>
+ 2618697658U, // <2,1,6,2>: Cost 3 vsldoi4 <0,2,1,6>, <2,6,3,7>
+ 3692439702U, // <2,1,6,3>: Cost 4 vsldoi4 <0,2,1,6>, <3,0,1,2>
+ 2618699062U, // <2,1,6,4>: Cost 3 vsldoi4 <0,2,1,6>, RHS
+ 3364929874U, // <2,1,6,5>: Cost 4 vmrglw <1,5,2,6>, <0,4,1,5>
+ 3692442424U, // <2,1,6,6>: Cost 4 vsldoi4 <0,2,1,6>, <6,6,6,6>
+ 3798733664U, // <2,1,6,7>: Cost 4 vsldoi8 <6,7,2,1>, <6,7,2,1>
+ 2618701614U, // <2,1,6,u>: Cost 3 vsldoi4 <0,2,1,6>, LHS
+ 3799397370U, // <2,1,7,0>: Cost 4 vsldoi8 <6,u,2,1>, <7,0,1,2>
+ 3371573258U, // <2,1,7,1>: Cost 4 vmrglw <2,6,2,7>, <0,0,1,1>
+ 2330351234U, // <2,1,7,2>: Cost 3 vmrglw <u,1,2,7>, <7,u,1,2>
+ 3799397658U, // <2,1,7,3>: Cost 4 vsldoi8 <6,u,2,1>, <7,3,6,2>
+ 3799397734U, // <2,1,7,4>: Cost 4 vsldoi8 <6,u,2,1>, <7,4,5,6>
+ 3371573586U, // <2,1,7,5>: Cost 4 vmrglw <2,6,2,7>, <0,4,1,5>
+ 3799397870U, // <2,1,7,6>: Cost 4 vsldoi8 <6,u,2,1>, <7,6,2,7>
+ 3799397956U, // <2,1,7,7>: Cost 4 vsldoi8 <6,u,2,1>, <7,7,3,3>
+ 2330351234U, // <2,1,7,u>: Cost 3 vmrglw <u,1,2,7>, <7,u,1,2>
+ 2282577929U, // <2,1,u,0>: Cost 3 vmrglw LHS, <0,0,1,0>
+ 1208836106U, // <2,1,u,1>: Cost 2 vmrglw LHS, <0,0,1,1>
+ 1208838294U, // <2,1,u,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+ 2282578094U, // <2,1,u,3>: Cost 3 vmrglw LHS, <0,2,1,3>
+ 2282577933U, // <2,1,u,4>: Cost 3 vmrglw LHS, <0,0,1,4>
+ 1208836434U, // <2,1,u,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+ 2282578097U, // <2,1,u,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+ 2287224015U, // <2,1,u,7>: Cost 3 vmrglw LHS, <1,6,1,7>
+ 1208836113U, // <2,1,u,u>: Cost 2 vmrglw LHS, <0,0,1,u>
+ 2226759117U, // <2,2,0,0>: Cost 3 vmrghw <2,0,3,0>, <2,0,3,0>
+ 1624047718U, // <2,2,0,1>: Cost 2 vsldoi8 <2,2,2,2>, LHS
+ 2697789613U, // <2,2,0,2>: Cost 3 vsldoi8 <2,2,2,2>, <0,2,1,2>
+ 2226767526U, // <2,2,0,3>: Cost 3 vmrghw <2,0,3,1>, <2,3,0,1>
+ 2697789778U, // <2,2,0,4>: Cost 3 vsldoi8 <2,2,2,2>, <0,4,1,5>
+ 3300657000U, // <2,2,0,5>: Cost 4 vmrghw <2,0,5,1>, <2,5,3,6>
+ 2226988986U, // <2,2,0,6>: Cost 3 vmrghw <2,0,6,1>, <2,6,3,7>
+ 3734271139U, // <2,2,0,7>: Cost 4 vsldoi4 <7,2,2,0>, <7,2,2,0>
+ 1624048285U, // <2,2,0,u>: Cost 2 vsldoi8 <2,2,2,2>, LHS
+ 3831268868U, // <2,2,1,0>: Cost 4 vsldoi12 <1,0,1,2>, <2,1,0,1>
+ 2293138804U, // <2,2,1,1>: Cost 3 vmrglw <1,u,2,1>, <1,u,2,1>
+ 2697790358U, // <2,2,1,2>: Cost 3 vsldoi8 <2,2,2,2>, <1,2,3,0>
+ 2293137510U, // <2,2,1,3>: Cost 3 vmrglw <1,u,2,1>, LHS
+ 3771532331U, // <2,2,1,4>: Cost 4 vsldoi8 <2,2,2,2>, <1,4,1,5>
+ 3767551106U, // <2,2,1,5>: Cost 4 vsldoi8 <1,5,2,2>, <1,5,2,2>
+ 3301173178U, // <2,2,1,6>: Cost 4 vmrghw <2,1,3,1>, <2,6,3,7>
+ 3372853169U, // <2,2,1,7>: Cost 4 vmrglw <2,u,2,1>, <2,6,2,7>
+ 2293137515U, // <2,2,1,u>: Cost 3 vmrglw <1,u,2,1>, LHS
+ 1556938854U, // <2,2,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+ 2295137733U, // <2,2,2,1>: Cost 3 vmrglw <2,2,2,2>, <2,0,2,1>
+ 336380006U, // <2,2,2,2>: Cost 1 vspltisw2 LHS
+ 1221394534U, // <2,2,2,3>: Cost 2 vmrglw <2,2,2,2>, LHS
+ 1556942134U, // <2,2,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+ 2295138061U, // <2,2,2,5>: Cost 3 vmrglw <2,2,2,2>, <2,4,2,5>
+ 2228029370U, // <2,2,2,6>: Cost 3 vmrghw <2,2,2,2>, <2,6,3,7>
+ 2660545701U, // <2,2,2,7>: Cost 3 vsldoi4 <7,2,2,2>, <7,2,2,2>
+ 336380006U, // <2,2,2,u>: Cost 1 vspltisw2 LHS
+ 2697791638U, // <2,2,3,0>: Cost 3 vsldoi8 <2,2,2,2>, <3,0,1,2>
+ 2765489840U, // <2,2,3,1>: Cost 3 vsldoi12 <2,3,1,2>, <2,3,1,2>
+ 1213441640U, // <2,2,3,2>: Cost 2 vmrglw LHS, <2,2,2,2>
+ 135053414U, // <2,2,3,3>: Cost 1 vmrglw LHS, LHS
+ 2697792002U, // <2,2,3,4>: Cost 3 vsldoi8 <2,2,2,2>, <3,4,5,6>
+ 2330313780U, // <2,2,3,5>: Cost 3 vmrglw LHS, <1,4,2,5>
+ 2287183549U, // <2,2,3,6>: Cost 3 vmrglw LHS, <2,3,2,6>
+ 2660553894U, // <2,2,3,7>: Cost 3 vsldoi4 <7,2,2,3>, <7,2,2,3>
+ 135053419U, // <2,2,3,u>: Cost 1 vmrglw LHS, LHS
+ 2630697062U, // <2,2,4,0>: Cost 3 vsldoi4 <2,2,2,4>, LHS
+ 3771534282U, // <2,2,4,1>: Cost 4 vsldoi8 <2,2,2,2>, <4,1,2,3>
+ 2764900109U, // <2,2,4,2>: Cost 3 vsldoi12 <2,2,2,2>, <2,4,2,5>
+ 2295152742U, // <2,2,4,3>: Cost 3 vmrglw <2,2,2,4>, LHS
+ 2295154282U, // <2,2,4,4>: Cost 3 vmrglw <2,2,2,4>, <2,2,2,4>
+ 1624050998U, // <2,2,4,5>: Cost 2 vsldoi8 <2,2,2,2>, RHS
+ 2229675962U, // <2,2,4,6>: Cost 3 vmrghw <2,4,6,5>, <2,6,3,7>
+ 3368896433U, // <2,2,4,7>: Cost 4 vmrglw <2,2,2,4>, <2,6,2,7>
+ 1624051241U, // <2,2,4,u>: Cost 2 vsldoi8 <2,2,2,2>, RHS
+ 3771534920U, // <2,2,5,0>: Cost 4 vsldoi8 <2,2,2,2>, <5,0,1,2>
+ 3364258540U, // <2,2,5,1>: Cost 4 vmrglw <1,4,2,5>, <1,0,2,1>
+ 2296489576U, // <2,2,5,2>: Cost 3 vmrglw <2,4,2,5>, <2,2,2,2>
+ 2290516070U, // <2,2,5,3>: Cost 3 vmrglw <1,4,2,5>, LHS
+ 3771535284U, // <2,2,5,4>: Cost 4 vsldoi8 <2,2,2,2>, <5,4,5,6>
+ 2290517044U, // <2,2,5,5>: Cost 3 vmrglw <1,4,2,5>, <1,4,2,5>
+ 2697793634U, // <2,2,5,6>: Cost 3 vsldoi8 <2,2,2,2>, <5,6,7,0>
+ 3370231729U, // <2,2,5,7>: Cost 4 vmrglw <2,4,2,5>, <2,6,2,7>
+ 2290516075U, // <2,2,5,u>: Cost 3 vmrglw <1,4,2,5>, LHS
+ 2230797801U, // <2,2,6,0>: Cost 3 vmrghw <2,6,3,7>, <2,0,6,1>
+ 3304539679U, // <2,2,6,1>: Cost 4 vmrghw <2,6,3,7>, <2,1,3,1>
+ 2764900273U, // <2,2,6,2>: Cost 3 vsldoi12 <2,2,2,2>, <2,6,2,7>
+ 2764900282U, // <2,2,6,3>: Cost 3 vsldoi12 <2,2,2,2>, <2,6,3,7>
+ 2230798129U, // <2,2,6,4>: Cost 3 vmrghw <2,6,3,7>, <2,4,6,5>
+ 3304540008U, // <2,2,6,5>: Cost 4 vmrghw <2,6,3,7>, <2,5,3,6>
+ 1157056442U, // <2,2,6,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+ 2725000033U, // <2,2,6,7>: Cost 3 vsldoi8 <6,7,2,2>, <6,7,2,2>
+ 1157056442U, // <2,2,6,u>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+ 2793359338U, // <2,2,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <2,7,0,1>
+ 3371574725U, // <2,2,7,1>: Cost 4 vmrglw <2,6,2,7>, <2,0,2,1>
+ 2297833064U, // <2,2,7,2>: Cost 3 vmrglw <2,6,2,7>, <2,2,2,2>
+ 2297831526U, // <2,2,7,3>: Cost 3 vmrglw <2,6,2,7>, LHS
+ 2697794918U, // <2,2,7,4>: Cost 3 vsldoi8 <2,2,2,2>, <7,4,5,6>
+ 3371575053U, // <2,2,7,5>: Cost 4 vmrglw <2,6,2,7>, <2,4,2,5>
+ 3304933297U, // <2,2,7,6>: Cost 4 vmrghw <2,7,0,1>, <2,6,2,7>
+ 2297833393U, // <2,2,7,7>: Cost 3 vmrglw <2,6,2,7>, <2,6,2,7>
+ 2297831531U, // <2,2,7,u>: Cost 3 vmrglw <2,6,2,7>, LHS
+ 1556938854U, // <2,2,u,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+ 1624053550U, // <2,2,u,1>: Cost 2 vsldoi8 <2,2,2,2>, LHS
+ 336380006U, // <2,2,u,2>: Cost 1 vspltisw2 LHS
+ 135094374U, // <2,2,u,3>: Cost 1 vmrglw LHS, LHS
+ 1556942134U, // <2,2,u,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+ 1624053914U, // <2,2,u,5>: Cost 2 vsldoi8 <2,2,2,2>, RHS
+ 1157056442U, // <2,2,u,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+ 2660594859U, // <2,2,u,7>: Cost 3 vsldoi4 <7,2,2,u>, <7,2,2,u>
+ 135094379U, // <2,2,u,u>: Cost 1 vmrglw LHS, LHS
+ 1611448320U, // <2,3,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0>
+ 537706598U, // <2,3,0,1>: Cost 1 vsldoi8 LHS, LHS
+ 2689835181U, // <2,3,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+ 2689835260U, // <2,3,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0>
+ 1611448658U, // <2,3,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+ 2732966354U, // <2,3,0,5>: Cost 3 vsldoi8 LHS, <0,5,6,7>
+ 2732966390U, // <2,3,0,6>: Cost 3 vsldoi8 LHS, <0,6,1,7>
+ 2660603052U, // <2,3,0,7>: Cost 3 vsldoi4 <7,2,3,0>, <7,2,3,0>
+ 537707165U, // <2,3,0,u>: Cost 1 vsldoi8 LHS, LHS
+ 2689835748U, // <2,3,1,0>: Cost 3 vsldoi8 LHS, <1,0,1,2>
+ 1611449140U, // <2,3,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1>
+ 1611449238U, // <2,3,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0>
+ 3763577805U, // <2,3,1,3>: Cost 4 vsldoi8 LHS, <1,3,0,1>
+ 2689836112U, // <2,3,1,4>: Cost 3 vsldoi8 LHS, <1,4,5,6>
+ 2689836143U, // <2,3,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1>
+ 2689836239U, // <2,3,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7>
+ 3366881210U, // <2,3,1,7>: Cost 4 vmrglw <1,u,2,1>, <2,6,3,7>
+ 1616094588U, // <2,3,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0>
+ 2689836493U, // <2,3,2,0>: Cost 3 vsldoi8 LHS, <2,0,3,0>
+ 2685191711U, // <2,3,2,1>: Cost 3 vsldoi8 LHS, <2,1,3,1>
+ 1611449960U, // <2,3,2,2>: Cost 2 vsldoi8 LHS, <2,2,2,2>
+ 1611450022U, // <2,3,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1>
+ 2689836822U, // <2,3,2,4>: Cost 3 vsldoi8 LHS, <2,4,3,5>
+ 2689836904U, // <2,3,2,5>: Cost 3 vsldoi8 LHS, <2,5,3,6>
+ 1611450298U, // <2,3,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+ 2295138234U, // <2,3,2,7>: Cost 3 vmrglw <2,2,2,2>, <2,6,3,7>
+ 1611450456U, // <2,3,2,u>: Cost 2 vsldoi8 LHS, <2,u,3,3>
+ 1213440918U, // <2,3,3,0>: Cost 2 vmrglw LHS, <1,2,3,0>
+ 2282538527U, // <2,3,3,1>: Cost 3 vmrglw LHS, <2,1,3,1>
+ 1557022322U, // <2,3,3,2>: Cost 2 vsldoi4 <2,2,3,3>, <2,2,3,3>
+ 1208796786U, // <2,3,3,3>: Cost 2 vmrglw LHS, <2,2,3,3>
+ 1213440922U, // <2,3,3,4>: Cost 2 vmrglw LHS, <1,2,3,4>
+ 2282538531U, // <2,3,3,5>: Cost 3 vmrglw LHS, <2,1,3,5>
+ 2287188094U, // <2,3,3,6>: Cost 3 vmrglw LHS, <u,5,3,6>
+ 1213441978U, // <2,3,3,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+ 1208796791U, // <2,3,3,u>: Cost 2 vmrglw LHS, <2,2,3,u>
+ 1551056998U, // <2,3,4,0>: Cost 2 vsldoi4 <1,2,3,4>, LHS
+ 1551057818U, // <2,3,4,1>: Cost 2 vsldoi4 <1,2,3,4>, <1,2,3,4>
+ 2624800360U, // <2,3,4,2>: Cost 3 vsldoi4 <1,2,3,4>, <2,2,2,2>
+ 2624800918U, // <2,3,4,3>: Cost 3 vsldoi4 <1,2,3,4>, <3,0,1,2>
+ 1551060278U, // <2,3,4,4>: Cost 2 vsldoi4 <1,2,3,4>, RHS
+ 537709878U, // <2,3,4,5>: Cost 1 vsldoi8 LHS, RHS
+ 2732969337U, // <2,3,4,6>: Cost 3 vsldoi8 LHS, <4,6,5,2>
+ 2660635824U, // <2,3,4,7>: Cost 3 vsldoi4 <7,2,3,4>, <7,2,3,4>
+ 537710121U, // <2,3,4,u>: Cost 1 vsldoi8 LHS, RHS
+ 2689838664U, // <2,3,5,0>: Cost 3 vsldoi8 LHS, <5,0,1,2>
+ 2732969615U, // <2,3,5,1>: Cost 3 vsldoi8 LHS, <5,1,0,1>
+ 2732969707U, // <2,3,5,2>: Cost 3 vsldoi8 LHS, <5,2,1,3>
+ 3763580721U, // <2,3,5,3>: Cost 4 vsldoi8 LHS, <5,3,0,1>
+ 2689839028U, // <2,3,5,4>: Cost 3 vsldoi8 LHS, <5,4,5,6>
+ 1659228164U, // <2,3,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5>
+ 1659228258U, // <2,3,5,6>: Cost 2 vsldoi8 LHS, <5,6,7,0>
+ 3364259770U, // <2,3,5,7>: Cost 4 vmrglw <1,4,2,5>, <2,6,3,7>
+ 1659228420U, // <2,3,5,u>: Cost 2 vsldoi8 LHS, <5,u,7,0>
+ 2230798486U, // <2,3,6,0>: Cost 3 vmrghw <2,6,3,7>, <3,0,1,2>
+ 2732970407U, // <2,3,6,1>: Cost 3 vsldoi8 LHS, <6,1,7,1>
+ 1659228666U, // <2,3,6,2>: Cost 2 vsldoi8 LHS, <6,2,7,3>
+ 2230798748U, // <2,3,6,3>: Cost 3 vmrghw <2,6,3,7>, <3,3,3,3>
+ 2230798850U, // <2,3,6,4>: Cost 3 vmrghw <2,6,3,7>, <3,4,5,6>
+ 2732970731U, // <2,3,6,5>: Cost 3 vsldoi8 LHS, <6,5,7,1>
+ 1659228984U, // <2,3,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6>
+ 1659229006U, // <2,3,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1>
+ 1659229087U, // <2,3,6,u>: Cost 2 vsldoi8 LHS, <6,u,0,1>
+ 1659229178U, // <2,3,7,0>: Cost 2 vsldoi8 LHS, <7,0,1,2>
+ 2726999125U, // <2,3,7,1>: Cost 3 vsldoi8 <7,1,2,3>, <7,1,2,3>
+ 2727662758U, // <2,3,7,2>: Cost 3 vsldoi8 <7,2,2,3>, <7,2,2,3>
+ 2732971235U, // <2,3,7,3>: Cost 3 vsldoi8 LHS, <7,3,0,1>
+ 1659229542U, // <2,3,7,4>: Cost 2 vsldoi8 LHS, <7,4,5,6>
+ 2732971446U, // <2,3,7,5>: Cost 3 vsldoi8 LHS, <7,5,5,5>
+ 2732971484U, // <2,3,7,6>: Cost 3 vsldoi8 LHS, <7,6,0,7>
+ 1659229804U, // <2,3,7,7>: Cost 2 vsldoi8 LHS, <7,7,7,7>
+ 1659229826U, // <2,3,7,u>: Cost 2 vsldoi8 LHS, <7,u,1,2>
+ 1208837014U, // <2,3,u,0>: Cost 2 vmrglw LHS, <1,2,3,0>
+ 537712430U, // <2,3,u,1>: Cost 1 vsldoi8 LHS, LHS
+ 1616099205U, // <2,3,u,2>: Cost 2 vsldoi8 LHS, <u,2,3,0>
+ 1208837746U, // <2,3,u,3>: Cost 2 vmrglw LHS, <2,2,3,3>
+ 1208837018U, // <2,3,u,4>: Cost 2 vmrglw LHS, <1,2,3,4>
+ 537712794U, // <2,3,u,5>: Cost 1 vsldoi8 LHS, RHS
+ 1616099536U, // <2,3,u,6>: Cost 2 vsldoi8 LHS, <u,6,3,7>
+ 1208838074U, // <2,3,u,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+ 537712997U, // <2,3,u,u>: Cost 1 vsldoi8 LHS, LHS
+ 3771547648U, // <2,4,0,0>: Cost 4 vsldoi8 <2,2,2,4>, <0,0,0,0>
+ 2697805926U, // <2,4,0,1>: Cost 3 vsldoi8 <2,2,2,4>, LHS
+ 3770884269U, // <2,4,0,2>: Cost 4 vsldoi8 <2,1,2,4>, <0,2,1,2>
+ 3806716164U, // <2,4,0,3>: Cost 4 vsldoi8 <u,1,2,4>, <0,3,1,u>
+ 3771547986U, // <2,4,0,4>: Cost 4 vsldoi8 <2,2,2,4>, <0,4,1,5>
+ 2226761014U, // <2,4,0,5>: Cost 3 vmrghw <2,0,3,0>, RHS
+ 3853462427U, // <2,4,0,6>: Cost 4 vsldoi12 <4,6,5,2>, <4,0,6,1>
+ 3867102116U, // <2,4,0,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,0,7,1>
+ 2226761257U, // <2,4,0,u>: Cost 3 vmrghw <2,0,3,0>, RHS
+ 3849186231U, // <2,4,1,0>: Cost 4 vsldoi12 <4,0,1,2>, <4,1,0,2>
+ 3301207010U, // <2,4,1,1>: Cost 4 vmrghw <2,1,3,5>, <4,1,5,0>
+ 3766240150U, // <2,4,1,2>: Cost 4 vsldoi8 <1,3,2,4>, <1,2,3,0>
+ 3766240226U, // <2,4,1,3>: Cost 4 vsldoi8 <1,3,2,4>, <1,3,2,4>
+ 3301207248U, // <2,4,1,4>: Cost 4 vmrghw <2,1,3,5>, <4,4,4,4>
+ 2227432758U, // <2,4,1,5>: Cost 3 vmrghw <2,1,3,1>, RHS
+ 3758941400U, // <2,4,1,6>: Cost 4 vsldoi8 <0,1,2,4>, <1,6,2,7>
+ 3768894758U, // <2,4,1,7>: Cost 4 vsldoi8 <1,7,2,4>, <1,7,2,4>
+ 2227433001U, // <2,4,1,u>: Cost 3 vmrghw <2,1,3,1>, RHS
+ 2228030354U, // <2,4,2,0>: Cost 3 vmrghw <2,2,2,2>, <4,0,5,1>
+ 3770885657U, // <2,4,2,1>: Cost 4 vsldoi8 <2,1,2,4>, <2,1,2,4>
+ 2697807466U, // <2,4,2,2>: Cost 3 vsldoi8 <2,2,2,4>, <2,2,2,4>
+ 3368880468U, // <2,4,2,3>: Cost 4 vmrglw <2,2,2,2>, <3,2,4,3>
+ 2228030672U, // <2,4,2,4>: Cost 3 vmrghw <2,2,2,2>, <4,4,4,4>
+ 1154288950U, // <2,4,2,5>: Cost 2 vmrghw <2,2,2,2>, RHS
+ 3771549617U, // <2,4,2,6>: Cost 4 vsldoi8 <2,2,2,4>, <2,6,2,7>
+ 3368880796U, // <2,4,2,7>: Cost 4 vmrglw <2,2,2,2>, <3,6,4,7>
+ 1154289193U, // <2,4,2,u>: Cost 2 vmrghw <2,2,2,2>, RHS
+ 2636808294U, // <2,4,3,0>: Cost 3 vsldoi4 <3,2,4,3>, LHS
+ 2287181861U, // <2,4,3,1>: Cost 3 vmrglw LHS, <0,0,4,1>
+ 2228866102U, // <2,4,3,2>: Cost 3 vmrghw <2,3,4,5>, <4,2,5,3>
+ 2636810580U, // <2,4,3,3>: Cost 3 vsldoi4 <3,2,4,3>, <3,2,4,3>
+ 1256574160U, // <2,4,3,4>: Cost 2 vmrglw LHS, <4,4,4,4>
+ 1213441742U, // <2,4,3,5>: Cost 2 vmrglw LHS, <2,3,4,5>
+ 2228866430U, // <2,4,3,6>: Cost 3 vmrghw <2,3,4,5>, <4,6,5,7>
+ 2660701368U, // <2,4,3,7>: Cost 3 vsldoi4 <7,2,4,3>, <7,2,4,3>
+ 1213441745U, // <2,4,3,u>: Cost 2 vmrglw LHS, <2,3,4,u>
+ 3704586342U, // <2,4,4,0>: Cost 4 vsldoi4 <2,2,4,4>, LHS
+ 3782831051U, // <2,4,4,1>: Cost 4 vsldoi8 <4,1,2,4>, <4,1,2,4>
+ 3704587900U, // <2,4,4,2>: Cost 4 vsldoi4 <2,2,4,4>, <2,2,4,4>
+ 3368896123U, // <2,4,4,3>: Cost 4 vmrglw <2,2,2,4>, <2,2,4,3>
+ 2793360592U, // <2,4,4,4>: Cost 3 vsldoi12 <7,0,1,2>, <4,4,4,4>
+ 2697809206U, // <2,4,4,5>: Cost 3 vsldoi8 <2,2,2,4>, RHS
+ 3303198078U, // <2,4,4,6>: Cost 4 vmrghw <2,4,3,5>, <4,6,5,7>
+ 3867102444U, // <2,4,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,4,7,5>
+ 2697809449U, // <2,4,4,u>: Cost 3 vsldoi8 <2,2,2,4>, RHS
+ 2630852710U, // <2,4,5,0>: Cost 3 vsldoi4 <2,2,4,5>, LHS
+ 2624881572U, // <2,4,5,1>: Cost 3 vsldoi4 <1,2,4,5>, <1,2,4,5>
+ 2630854269U, // <2,4,5,2>: Cost 3 vsldoi4 <2,2,4,5>, <2,2,4,5>
+ 2666686677U, // <2,4,5,3>: Cost 3 vsldoi4 <u,2,4,5>, <3,0,u,2>
+ 2630855990U, // <2,4,5,4>: Cost 3 vsldoi4 <2,2,4,5>, RHS
+ 2230127926U, // <2,4,5,5>: Cost 3 vmrghw <2,5,3,6>, RHS
+ 1691159862U, // <2,4,5,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 3867102520U, // <2,4,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,5,7,0>
+ 1691159880U, // <2,4,5,u>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 2230799250U, // <2,4,6,0>: Cost 3 vmrghw <2,6,3,7>, <4,0,5,1>
+ 3304541130U, // <2,4,6,1>: Cost 4 vmrghw <2,6,3,7>, <4,1,2,3>
+ 2230799417U, // <2,4,6,2>: Cost 3 vmrghw <2,6,3,7>, <4,2,5,6>
+ 3304541323U, // <2,4,6,3>: Cost 4 vmrghw <2,6,3,7>, <4,3,5,7>
+ 2230799568U, // <2,4,6,4>: Cost 3 vmrghw <2,6,3,7>, <4,4,4,4>
+ 1157057846U, // <2,4,6,5>: Cost 2 vmrghw <2,6,3,7>, RHS
+ 3304541566U, // <2,4,6,6>: Cost 4 vmrghw <2,6,3,7>, <4,6,5,7>
+ 3798758243U, // <2,4,6,7>: Cost 4 vsldoi8 <6,7,2,4>, <6,7,2,4>
+ 1157058089U, // <2,4,6,u>: Cost 2 vmrghw <2,6,3,7>, RHS
+ 3806721018U, // <2,4,7,0>: Cost 4 vsldoi8 <u,1,2,4>, <7,0,1,2>
+ 3853831590U, // <2,4,7,1>: Cost 4 vsldoi12 <4,7,1,2>, <4,7,1,2>
+ 3801412775U, // <2,4,7,2>: Cost 4 vsldoi8 <7,2,2,4>, <7,2,2,4>
+ 3802076408U, // <2,4,7,3>: Cost 4 vsldoi8 <7,3,2,4>, <7,3,2,4>
+ 3401436368U, // <2,4,7,4>: Cost 4 vmrglw <7,6,2,7>, <4,4,4,4>
+ 2793360840U, // <2,4,7,5>: Cost 3 vsldoi12 <7,0,1,2>, <4,7,5,0>
+ 3804067307U, // <2,4,7,6>: Cost 4 vsldoi8 <7,6,2,4>, <7,6,2,4>
+ 3867102682U, // <2,4,7,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,7,7,0>
+ 2793360867U, // <2,4,7,u>: Cost 3 vsldoi12 <7,0,1,2>, <4,7,u,0>
+ 2630877286U, // <2,4,u,0>: Cost 3 vsldoi4 <2,2,4,u>, LHS
+ 2282580144U, // <2,4,u,1>: Cost 3 vmrglw LHS, <3,0,4,1>
+ 2630878848U, // <2,4,u,2>: Cost 3 vsldoi4 <2,2,4,u>, <2,2,4,u>
+ 2636851545U, // <2,4,u,3>: Cost 3 vsldoi4 <3,2,4,u>, <3,2,4,u>
+ 1256615120U, // <2,4,u,4>: Cost 2 vmrglw LHS, <4,4,4,4>
+ 1208837838U, // <2,4,u,5>: Cost 2 vmrglw LHS, <2,3,4,5>
+ 1691160105U, // <2,4,u,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 2660742333U, // <2,4,u,7>: Cost 3 vsldoi4 <7,2,4,u>, <7,2,4,u>
+ 1208837841U, // <2,4,u,u>: Cost 2 vmrglw LHS, <2,3,4,u>
+ 3766910976U, // <2,5,0,0>: Cost 4 vsldoi8 <1,4,2,5>, <0,0,0,0>
+ 2693169254U, // <2,5,0,1>: Cost 3 vsldoi8 <1,4,2,5>, LHS
+ 3760939181U, // <2,5,0,2>: Cost 4 vsldoi8 <0,4,2,5>, <0,2,1,2>
+ 3843214936U, // <2,5,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <5,0,3,0>
+ 3760939355U, // <2,5,0,4>: Cost 4 vsldoi8 <0,4,2,5>, <0,4,2,5>
+ 3867102827U, // <2,5,0,5>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,5,1>
+ 3867102836U, // <2,5,0,6>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,6,1>
+ 3867102844U, // <2,5,0,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,7,0>
+ 2693169821U, // <2,5,0,u>: Cost 3 vsldoi8 <1,4,2,5>, LHS
+ 3766911724U, // <2,5,1,0>: Cost 4 vsldoi8 <1,4,2,5>, <1,0,2,1>
+ 3766911796U, // <2,5,1,1>: Cost 4 vsldoi8 <1,4,2,5>, <1,1,1,1>
+ 2693170070U, // <2,5,1,2>: Cost 3 vsldoi8 <1,4,2,5>, <1,2,3,0>
+ 3384798262U, // <2,5,1,3>: Cost 4 vmrglw <4,u,2,1>, <4,2,5,3>
+ 2693170228U, // <2,5,1,4>: Cost 3 vsldoi8 <1,4,2,5>, <1,4,2,5>
+ 3301208068U, // <2,5,1,5>: Cost 4 vmrghw <2,1,3,5>, <5,5,5,5>
+ 3366879607U, // <2,5,1,6>: Cost 4 vmrglw <1,u,2,1>, <0,4,5,6>
+ 3867102925U, // <2,5,1,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,1,7,0>
+ 2695824760U, // <2,5,1,u>: Cost 3 vsldoi8 <1,u,2,5>, <1,u,2,5>
+ 2642845798U, // <2,5,2,0>: Cost 3 vsldoi4 <4,2,5,2>, LHS
+ 2295139218U, // <2,5,2,1>: Cost 3 vmrglw <2,2,2,2>, <4,0,5,1>
+ 2699142760U, // <2,5,2,2>: Cost 3 vsldoi8 <2,4,2,5>, <2,2,2,2>
+ 3766912678U, // <2,5,2,3>: Cost 4 vsldoi8 <1,4,2,5>, <2,3,0,1>
+ 2699142925U, // <2,5,2,4>: Cost 3 vsldoi8 <2,4,2,5>, <2,4,2,5>
+ 2228031492U, // <2,5,2,5>: Cost 3 vmrghw <2,2,2,2>, <5,5,5,5>
+ 2295138818U, // <2,5,2,6>: Cost 3 vmrglw <2,2,2,2>, <3,4,5,6>
+ 3368879347U, // <2,5,2,7>: Cost 4 vmrglw <2,2,2,2>, <1,6,5,7>
+ 2295138820U, // <2,5,2,u>: Cost 3 vmrglw <2,2,2,2>, <3,4,5,u>
+ 2287184866U, // <2,5,3,0>: Cost 3 vmrglw LHS, <4,1,5,0>
+ 1256573842U, // <2,5,3,1>: Cost 2 vmrglw LHS, <4,0,5,1>
+ 2642855630U, // <2,5,3,2>: Cost 3 vsldoi4 <4,2,5,3>, <2,3,4,5>
+ 2287182763U, // <2,5,3,3>: Cost 3 vmrglw LHS, <1,2,5,3>
+ 2287184870U, // <2,5,3,4>: Cost 3 vmrglw LHS, <4,1,5,4>
+ 1256574170U, // <2,5,3,5>: Cost 2 vmrglw LHS, <4,4,5,5>
+ 1213442562U, // <2,5,3,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+ 2287183091U, // <2,5,3,7>: Cost 3 vmrglw LHS, <1,6,5,7>
+ 1213442564U, // <2,5,3,u>: Cost 2 vmrglw LHS, <3,4,5,u>
+ 3716604006U, // <2,5,4,0>: Cost 4 vsldoi4 <4,2,5,4>, LHS
+ 3716604822U, // <2,5,4,1>: Cost 4 vsldoi4 <4,2,5,4>, <1,2,3,0>
+ 3766914099U, // <2,5,4,2>: Cost 4 vsldoi8 <1,4,2,5>, <4,2,5,0>
+ 3368895403U, // <2,5,4,3>: Cost 5 vmrglw <2,2,2,4>, <1,2,5,3>
+ 3716607031U, // <2,5,4,4>: Cost 4 vsldoi4 <4,2,5,4>, <4,2,5,4>
+ 2693172534U, // <2,5,4,5>: Cost 3 vsldoi8 <1,4,2,5>, RHS
+ 3363588610U, // <2,5,4,6>: Cost 4 vmrglw <1,3,2,4>, <3,4,5,6>
+ 3368895731U, // <2,5,4,7>: Cost 5 vmrglw <2,2,2,4>, <1,6,5,7>
+ 2693172777U, // <2,5,4,u>: Cost 3 vsldoi8 <1,4,2,5>, RHS
+ 3704668262U, // <2,5,5,0>: Cost 4 vsldoi4 <2,2,5,5>, LHS
+ 3704669078U, // <2,5,5,1>: Cost 4 vsldoi4 <2,2,5,5>, <1,2,3,0>
+ 3704669830U, // <2,5,5,2>: Cost 4 vsldoi4 <2,2,5,5>, <2,2,5,5>
+ 3364259460U, // <2,5,5,3>: Cost 4 vmrglw <1,4,2,5>, <2,2,5,3>
+ 3704671542U, // <2,5,5,4>: Cost 4 vsldoi4 <2,2,5,5>, RHS
+ 2793361412U, // <2,5,5,5>: Cost 3 vsldoi12 <7,0,1,2>, <5,5,5,5>
+ 3364258167U, // <2,5,5,6>: Cost 4 vmrglw <1,4,2,5>, <0,4,5,6>
+ 3867103249U, // <2,5,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,5,7,0>
+ 2793361412U, // <2,5,5,u>: Cost 3 vsldoi12 <7,0,1,2>, <5,5,5,5>
+ 2642878566U, // <2,5,6,0>: Cost 3 vsldoi4 <4,2,5,6>, LHS
+ 3386166810U, // <2,5,6,1>: Cost 4 vmrglw <5,1,2,6>, <4,u,5,1>
+ 2723033594U, // <2,5,6,2>: Cost 3 vsldoi8 <6,4,2,5>, <6,2,7,3>
+ 3848523842U, // <2,5,6,3>: Cost 4 vsldoi12 <3,u,1,2>, <5,6,3,4>
+ 2723033713U, // <2,5,6,4>: Cost 3 vsldoi8 <6,4,2,5>, <6,4,2,5>
+ 2230800388U, // <2,5,6,5>: Cost 3 vmrghw <2,6,3,7>, <5,5,5,5>
+ 2230800482U, // <2,5,6,6>: Cost 3 vmrghw <2,6,3,7>, <5,6,7,0>
+ 2785841252U, // <2,5,6,7>: Cost 3 vsldoi12 <5,6,7,2>, <5,6,7,2>
+ 2785914989U, // <2,5,6,u>: Cost 3 vsldoi12 <5,6,u,2>, <5,6,u,2>
+ 3796775930U, // <2,5,7,0>: Cost 4 vsldoi8 <6,4,2,5>, <7,0,1,2>
+ 3800757335U, // <2,5,7,1>: Cost 4 vsldoi8 <7,1,2,5>, <7,1,2,5>
+ 3853463689U, // <2,5,7,2>: Cost 4 vsldoi12 <4,6,5,2>, <5,7,2,3>
+ 3796776218U, // <2,5,7,3>: Cost 4 vsldoi8 <6,4,2,5>, <7,3,6,2>
+ 3796776294U, // <2,5,7,4>: Cost 4 vsldoi8 <6,4,2,5>, <7,4,5,6>
+ 3803411867U, // <2,5,7,5>: Cost 4 vsldoi8 <7,5,2,5>, <7,5,2,5>
+ 3371575081U, // <2,5,7,6>: Cost 4 vmrglw <2,6,2,7>, <2,4,5,6>
+ 3796776516U, // <2,5,7,7>: Cost 4 vsldoi8 <6,4,2,5>, <7,7,3,3>
+ 3371575083U, // <2,5,7,u>: Cost 4 vmrglw <2,6,2,7>, <2,4,5,u>
+ 2287225826U, // <2,5,u,0>: Cost 3 vmrglw LHS, <4,1,5,0>
+ 1256614802U, // <2,5,u,1>: Cost 2 vmrglw LHS, <4,0,5,1>
+ 2642896590U, // <2,5,u,2>: Cost 3 vsldoi4 <4,2,5,u>, <2,3,4,5>
+ 2287223723U, // <2,5,u,3>: Cost 3 vmrglw LHS, <1,2,5,3>
+ 2287225830U, // <2,5,u,4>: Cost 3 vmrglw LHS, <4,1,5,4>
+ 1256615130U, // <2,5,u,5>: Cost 2 vmrglw LHS, <4,4,5,5>
+ 1208838658U, // <2,5,u,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+ 2287224051U, // <2,5,u,7>: Cost 3 vmrglw LHS, <1,6,5,7>
+ 1208838660U, // <2,5,u,u>: Cost 2 vmrglw LHS, <3,4,5,u>
+ 3772227584U, // <2,6,0,0>: Cost 4 vsldoi8 <2,3,2,6>, <0,0,0,0>
+ 2698485862U, // <2,6,0,1>: Cost 3 vsldoi8 <2,3,2,6>, LHS
+ 3759620282U, // <2,6,0,2>: Cost 4 vsldoi8 <0,2,2,6>, <0,2,2,6>
+ 3710675299U, // <2,6,0,3>: Cost 4 vsldoi4 <3,2,6,0>, <3,2,6,0>
+ 3767583058U, // <2,6,0,4>: Cost 4 vsldoi8 <1,5,2,6>, <0,4,1,5>
+ 3378153265U, // <2,6,0,5>: Cost 5 vmrglw <3,7,2,0>, <2,4,6,5>
+ 3865186637U, // <2,6,0,6>: Cost 4 vsldoi12 <6,6,2,2>, <6,0,6,1>
+ 2330291510U, // <2,6,0,7>: Cost 3 vmrglw <u,1,2,0>, RHS
+ 2698486429U, // <2,6,0,u>: Cost 3 vsldoi8 <2,3,2,6>, LHS
+ 3734569062U, // <2,6,1,0>: Cost 4 vsldoi4 <7,2,6,1>, LHS
+ 3764929346U, // <2,6,1,1>: Cost 4 vsldoi8 <1,1,2,6>, <1,1,2,6>
+ 3772228502U, // <2,6,1,2>: Cost 4 vsldoi8 <2,3,2,6>, <1,2,3,0>
+ 3734571158U, // <2,6,1,3>: Cost 4 vsldoi4 <7,2,6,1>, <3,0,1,2>
+ 3734572342U, // <2,6,1,4>: Cost 4 vsldoi4 <7,2,6,1>, RHS
+ 3767583878U, // <2,6,1,5>: Cost 4 vsldoi8 <1,5,2,6>, <1,5,2,6>
+ 3768247511U, // <2,6,1,6>: Cost 4 vsldoi8 <1,6,2,6>, <1,6,2,6>
+ 2293140790U, // <2,6,1,7>: Cost 3 vmrglw <1,u,2,1>, RHS
+ 2293140791U, // <2,6,1,u>: Cost 3 vmrglw <1,u,2,1>, RHS
+ 3704717414U, // <2,6,2,0>: Cost 4 vsldoi4 <2,2,6,2>, LHS
+ 3395424589U, // <2,6,2,1>: Cost 4 vmrglw <6,6,2,2>, <6,0,6,1>
+ 2228031993U, // <2,6,2,2>: Cost 3 vmrghw <2,2,2,2>, <6,2,7,2>
+ 2698487485U, // <2,6,2,3>: Cost 3 vsldoi8 <2,3,2,6>, <2,3,2,6>
+ 3704720694U, // <2,6,2,4>: Cost 4 vsldoi4 <2,2,6,2>, RHS
+ 3773556575U, // <2,6,2,5>: Cost 4 vsldoi8 <2,5,2,6>, <2,5,2,6>
+ 2698487738U, // <2,6,2,6>: Cost 3 vsldoi8 <2,3,2,6>, <2,6,3,7>
+ 1221397814U, // <2,6,2,7>: Cost 2 vmrglw <2,2,2,2>, RHS
+ 1221397815U, // <2,6,2,u>: Cost 2 vmrglw <2,2,2,2>, RHS
+ 2636955750U, // <2,6,3,0>: Cost 3 vsldoi4 <3,2,6,3>, LHS
+ 2330314217U, // <2,6,3,1>: Cost 3 vmrglw LHS, <2,0,6,1>
+ 2636957626U, // <2,6,3,2>: Cost 3 vsldoi4 <3,2,6,3>, <2,6,3,7>
+ 2287184230U, // <2,6,3,3>: Cost 3 vmrglw LHS, <3,2,6,3>
+ 2636959030U, // <2,6,3,4>: Cost 3 vsldoi4 <3,2,6,3>, RHS
+ 2648903448U, // <2,6,3,5>: Cost 3 vsldoi4 <5,2,6,3>, <5,2,6,3>
+ 1256575800U, // <2,6,3,6>: Cost 2 vmrglw LHS, <6,6,6,6>
+ 135056694U, // <2,6,3,7>: Cost 1 vmrglw LHS, RHS
+ 135056695U, // <2,6,3,u>: Cost 1 vmrglw LHS, RHS
+ 3710705766U, // <2,6,4,0>: Cost 4 vsldoi4 <3,2,6,4>, LHS
+ 3698762677U, // <2,6,4,1>: Cost 5 vsldoi4 <1,2,6,4>, <1,2,6,4>
+ 3710707389U, // <2,6,4,2>: Cost 4 vsldoi4 <3,2,6,4>, <2,3,2,6>
+ 3710708071U, // <2,6,4,3>: Cost 4 vsldoi4 <3,2,6,4>, <3,2,6,4>
+ 3710709046U, // <2,6,4,4>: Cost 4 vsldoi4 <3,2,6,4>, RHS
+ 2698489142U, // <2,6,4,5>: Cost 3 vsldoi8 <2,3,2,6>, RHS
+ 3796782457U, // <2,6,4,6>: Cost 4 vsldoi8 <6,4,2,6>, <4,6,5,2>
+ 2295156022U, // <2,6,4,7>: Cost 3 vmrglw <2,2,2,4>, RHS
+ 2295156023U, // <2,6,4,u>: Cost 3 vmrglw <2,2,2,4>, RHS
+ 3303870753U, // <2,6,5,0>: Cost 4 vmrghw <2,5,3,6>, <6,0,1,2>
+ 3788820134U, // <2,6,5,1>: Cost 4 vsldoi8 <5,1,2,6>, <5,1,2,6>
+ 3779530520U, // <2,6,5,2>: Cost 4 vsldoi8 <3,5,2,6>, <5,2,6,3>
+ 3303871026U, // <2,6,5,3>: Cost 4 vmrghw <2,5,3,6>, <6,3,4,5>
+ 3303871117U, // <2,6,5,4>: Cost 4 vmrghw <2,5,3,6>, <6,4,5,6>
+ 3791474666U, // <2,6,5,5>: Cost 4 vsldoi8 <5,5,2,6>, <5,5,2,6>
+ 3792138299U, // <2,6,5,6>: Cost 4 vsldoi8 <5,6,2,6>, <5,6,2,6>
+ 2290519350U, // <2,6,5,7>: Cost 3 vmrglw <1,4,2,5>, RHS
+ 2290519351U, // <2,6,5,u>: Cost 3 vmrglw <1,4,2,5>, RHS
+ 2631008358U, // <2,6,6,0>: Cost 3 vsldoi4 <2,2,6,6>, LHS
+ 3372893673U, // <2,6,6,1>: Cost 4 vmrglw <2,u,2,6>, <2,0,6,1>
+ 2791445264U, // <2,6,6,2>: Cost 3 vsldoi12 <6,6,2,2>, <6,6,2,2>
+ 2230800968U, // <2,6,6,3>: Cost 3 vmrghw <2,6,3,7>, <6,3,7,0>
+ 2631011638U, // <2,6,6,4>: Cost 3 vsldoi4 <2,2,6,6>, RHS
+ 3372894001U, // <2,6,6,5>: Cost 4 vmrglw <2,u,2,6>, <2,4,6,5>
+ 2793362232U, // <2,6,6,6>: Cost 3 vsldoi12 <7,0,1,2>, <6,6,6,6>
+ 2295835958U, // <2,6,6,7>: Cost 3 vmrglw <2,3,2,6>, RHS
+ 2295835959U, // <2,6,6,u>: Cost 3 vmrglw <2,3,2,6>, RHS
+ 2793362254U, // <2,6,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <6,7,0,1>
+ 2792035160U, // <2,6,7,1>: Cost 3 vsldoi12 <6,7,1,2>, <6,7,1,2>
+ 2792108897U, // <2,6,7,2>: Cost 3 vsldoi12 <6,7,2,2>, <6,7,2,2>
+ 2769474408U, // <2,6,7,3>: Cost 3 vsldoi12 <3,0,1,2>, <6,7,3,0>
+ 2793362294U, // <2,6,7,4>: Cost 3 vsldoi12 <7,0,1,2>, <6,7,4,5>
+ 3371575089U, // <2,6,7,5>: Cost 4 vmrglw <2,6,2,7>, <2,4,6,5>
+ 2792403845U, // <2,6,7,6>: Cost 3 vsldoi12 <6,7,6,2>, <6,7,6,2>
+ 2297834806U, // <2,6,7,7>: Cost 3 vmrglw <2,6,2,7>, RHS
+ 2297834807U, // <2,6,7,u>: Cost 3 vmrglw <2,6,2,7>, RHS
+ 2636996710U, // <2,6,u,0>: Cost 3 vsldoi4 <3,2,6,u>, LHS
+ 2698491694U, // <2,6,u,1>: Cost 3 vsldoi8 <2,3,2,6>, LHS
+ 2636998631U, // <2,6,u,2>: Cost 3 vsldoi4 <3,2,6,u>, <2,6,u,7>
+ 2282580326U, // <2,6,u,3>: Cost 3 vmrglw LHS, <3,2,6,3>
+ 2636999990U, // <2,6,u,4>: Cost 3 vsldoi4 <3,2,6,u>, RHS
+ 2698492058U, // <2,6,u,5>: Cost 3 vsldoi8 <2,3,2,6>, RHS
+ 1256616760U, // <2,6,u,6>: Cost 2 vmrglw LHS, <6,6,6,6>
+ 135097654U, // <2,6,u,7>: Cost 1 vmrglw LHS, RHS
+ 135097655U, // <2,6,u,u>: Cost 1 vmrglw LHS, RHS
+ 2666864742U, // <2,7,0,0>: Cost 3 vsldoi4 <u,2,7,0>, LHS
+ 1719620602U, // <2,7,0,1>: Cost 2 vsldoi12 <7,0,1,2>, <7,0,1,2>
+ 3768254637U, // <2,7,0,2>: Cost 4 vsldoi8 <1,6,2,7>, <0,2,1,2>
+ 3393417722U, // <2,7,0,3>: Cost 4 vmrglw <6,3,2,0>, <6,2,7,3>
+ 2666868022U, // <2,7,0,4>: Cost 3 vsldoi4 <u,2,7,0>, RHS
+ 3867104290U, // <2,7,0,5>: Cost 4 vsldoi12 <7,0,1,2>, <7,0,5,6>
+ 3728667127U, // <2,7,0,6>: Cost 4 vsldoi4 <6,2,7,0>, <6,2,7,0>
+ 2666869817U, // <2,7,0,7>: Cost 3 vsldoi4 <u,2,7,0>, <7,0,u,2>
+ 1720136761U, // <2,7,0,u>: Cost 2 vsldoi12 <7,0,u,2>, <7,0,u,2>
+ 3728670822U, // <2,7,1,0>: Cost 4 vsldoi4 <6,2,7,1>, LHS
+ 3774227252U, // <2,7,1,1>: Cost 4 vsldoi8 <2,6,2,7>, <1,1,1,1>
+ 3774227350U, // <2,7,1,2>: Cost 4 vsldoi8 <2,6,2,7>, <1,2,3,0>
+ 2323001850U, // <2,7,1,3>: Cost 3 vmrglw <6,u,2,1>, <6,2,7,3>
+ 3728674102U, // <2,7,1,4>: Cost 4 vsldoi4 <6,2,7,1>, RHS
+ 3774227567U, // <2,7,1,5>: Cost 5 vsldoi8 <2,6,2,7>, <1,5,0,1>
+ 2694513880U, // <2,7,1,6>: Cost 3 vsldoi8 <1,6,2,7>, <1,6,2,7>
+ 3396744002U, // <2,7,1,7>: Cost 4 vmrglw <6,u,2,1>, <6,6,7,7>
+ 2323001850U, // <2,7,1,u>: Cost 3 vmrglw <6,u,2,1>, <6,2,7,3>
+ 2654937190U, // <2,7,2,0>: Cost 3 vsldoi4 <6,2,7,2>, LHS
+ 3728679732U, // <2,7,2,1>: Cost 4 vsldoi4 <6,2,7,2>, <1,1,1,1>
+ 2700486248U, // <2,7,2,2>: Cost 3 vsldoi8 <2,6,2,7>, <2,2,2,2>
+ 2321682938U, // <2,7,2,3>: Cost 3 vmrglw <6,6,2,2>, <6,2,7,3>
+ 2654940470U, // <2,7,2,4>: Cost 3 vsldoi4 <6,2,7,2>, RHS
+ 3859584196U, // <2,7,2,5>: Cost 4 vsldoi12 <5,6,7,2>, <7,2,5,6>
+ 2700486577U, // <2,7,2,6>: Cost 3 vsldoi8 <2,6,2,7>, <2,6,2,7>
+ 2228033132U, // <2,7,2,7>: Cost 3 vmrghw <2,2,2,2>, <7,7,7,7>
+ 2701813843U, // <2,7,2,u>: Cost 3 vsldoi8 <2,u,2,7>, <2,u,2,7>
+ 1581203558U, // <2,7,3,0>: Cost 2 vsldoi4 <6,2,7,3>, LHS
+ 2654946100U, // <2,7,3,1>: Cost 3 vsldoi4 <6,2,7,3>, <1,1,1,1>
+ 2637031354U, // <2,7,3,2>: Cost 3 vsldoi4 <3,2,7,3>, <2,6,3,7>
+ 1256575482U, // <2,7,3,3>: Cost 2 vmrglw LHS, <6,2,7,3>
+ 1581206838U, // <2,7,3,4>: Cost 2 vsldoi4 <6,2,7,3>, RHS
+ 2654949380U, // <2,7,3,5>: Cost 3 vsldoi4 <6,2,7,3>, <5,5,5,5>
+ 1581208058U, // <2,7,3,6>: Cost 2 vsldoi4 <6,2,7,3>, <6,2,7,3>
+ 1256575810U, // <2,7,3,7>: Cost 2 vmrglw LHS, <6,6,7,7>
+ 1581209390U, // <2,7,3,u>: Cost 2 vsldoi4 <6,2,7,3>, LHS
+ 3728695398U, // <2,7,4,0>: Cost 4 vsldoi4 <6,2,7,4>, LHS
+ 3869758782U, // <2,7,4,1>: Cost 4 vsldoi12 <7,4,1,2>, <7,4,1,2>
+ 3728696936U, // <2,7,4,2>: Cost 4 vsldoi4 <6,2,7,4>, <2,2,2,2>
+ 3393450490U, // <2,7,4,3>: Cost 4 vmrglw <6,3,2,4>, <6,2,7,3>
+ 3728698678U, // <2,7,4,4>: Cost 4 vsldoi4 <6,2,7,4>, RHS
+ 2700487990U, // <2,7,4,5>: Cost 3 vsldoi8 <2,6,2,7>, RHS
+ 3728699899U, // <2,7,4,6>: Cost 4 vsldoi4 <6,2,7,4>, <6,2,7,4>
+ 3867104626U, // <2,7,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <7,4,7,0>
+ 2700488233U, // <2,7,4,u>: Cost 3 vsldoi8 <2,6,2,7>, RHS
+ 3855160709U, // <2,7,5,0>: Cost 4 vsldoi12 <5,0,1,2>, <7,5,0,1>
+ 3728704406U, // <2,7,5,1>: Cost 4 vsldoi4 <6,2,7,5>, <1,2,3,0>
+ 3370233956U, // <2,7,5,2>: Cost 4 vmrglw <2,4,2,5>, <5,6,7,2>
+ 2320380410U, // <2,7,5,3>: Cost 3 vmrglw <6,4,2,5>, <6,2,7,3>
+ 3728706870U, // <2,7,5,4>: Cost 4 vsldoi4 <6,2,7,5>, RHS
+ 3867104694U, // <2,7,5,5>: Cost 4 vsldoi12 <7,0,1,2>, <7,5,5,5>
+ 3792146492U, // <2,7,5,6>: Cost 4 vsldoi8 <5,6,2,7>, <5,6,2,7>
+ 3394122562U, // <2,7,5,7>: Cost 4 vmrglw <6,4,2,5>, <6,6,7,7>
+ 2320380410U, // <2,7,5,u>: Cost 3 vmrglw <6,4,2,5>, <6,2,7,3>
+ 2230801402U, // <2,7,6,0>: Cost 3 vmrghw <2,6,3,7>, <7,0,1,2>
+ 3768258984U, // <2,7,6,1>: Cost 4 vsldoi8 <1,6,2,7>, <6,1,7,2>
+ 2730349050U, // <2,7,6,2>: Cost 3 vsldoi8 <7,6,2,7>, <6,2,7,3>
+ 3372894575U, // <2,7,6,3>: Cost 4 vmrglw <2,u,2,6>, <3,2,7,3>
+ 2230801766U, // <2,7,6,4>: Cost 3 vmrghw <2,6,3,7>, <7,4,5,6>
+ 3304543670U, // <2,7,6,5>: Cost 4 vmrghw <2,6,3,7>, <7,5,5,5>
+ 3728716285U, // <2,7,6,6>: Cost 4 vsldoi4 <6,2,7,6>, <6,2,7,6>
+ 2230802028U, // <2,7,6,7>: Cost 3 vmrghw <2,6,3,7>, <7,7,7,7>
+ 2730349050U, // <2,7,6,u>: Cost 3 vsldoi8 <7,6,2,7>, <6,2,7,3>
+ 2793362983U, // <2,7,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <7,7,0,1>
+ 3728721112U, // <2,7,7,1>: Cost 4 vsldoi4 <6,2,7,7>, <1,6,2,7>
+ 3371574933U, // <2,7,7,2>: Cost 4 vmrglw <2,6,2,7>, <2,2,7,2>
+ 2327695866U, // <2,7,7,3>: Cost 3 vmrglw <7,6,2,7>, <6,2,7,3>
+ 3728723254U, // <2,7,7,4>: Cost 4 vsldoi4 <6,2,7,7>, RHS
+ 3371574855U, // <2,7,7,5>: Cost 5 vmrglw <2,6,2,7>, <2,1,7,5>
+ 2730350062U, // <2,7,7,6>: Cost 3 vsldoi8 <7,6,2,7>, <7,6,2,7>
+ 2793363052U, // <2,7,7,7>: Cost 3 vsldoi12 <7,0,1,2>, <7,7,7,7>
+ 2798671471U, // <2,7,7,u>: Cost 3 vsldoi12 <7,u,1,2>, <7,7,u,1>
+ 1581244518U, // <2,7,u,0>: Cost 2 vsldoi4 <6,2,7,u>, LHS
+ 1724929666U, // <2,7,u,1>: Cost 2 vsldoi12 <7,u,1,2>, <7,u,1,2>
+ 2637072314U, // <2,7,u,2>: Cost 3 vsldoi4 <3,2,7,u>, <2,6,3,7>
+ 1256616442U, // <2,7,u,3>: Cost 2 vmrglw LHS, <6,2,7,3>
+ 1581247798U, // <2,7,u,4>: Cost 2 vsldoi4 <6,2,7,u>, RHS
+ 2700490906U, // <2,7,u,5>: Cost 3 vsldoi8 <2,6,2,7>, RHS
+ 1581249023U, // <2,7,u,6>: Cost 2 vsldoi4 <6,2,7,u>, <6,2,7,u>
+ 1256616770U, // <2,7,u,7>: Cost 2 vmrglw LHS, <6,6,7,7>
+ 1581250350U, // <2,7,u,u>: Cost 2 vsldoi4 <6,2,7,u>, LHS
+ 1611489280U, // <2,u,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0>
+ 537747563U, // <2,u,0,1>: Cost 1 vsldoi8 LHS, LHS
+ 2685231277U, // <2,u,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+ 2685231356U, // <2,u,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0>
+ 1611489618U, // <2,u,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+ 2226763930U, // <2,u,0,5>: Cost 3 vmrghw <2,0,3,0>, RHS
+ 2733007350U, // <2,u,0,6>: Cost 3 vsldoi8 LHS, <0,6,1,7>
+ 2660971737U, // <2,u,0,7>: Cost 3 vsldoi4 <7,2,u,0>, <7,2,u,0>
+ 537748125U, // <2,u,0,u>: Cost 1 vsldoi8 LHS, LHS
+ 2689876708U, // <2,u,1,0>: Cost 3 vsldoi8 LHS, <1,0,1,2>
+ 1611490100U, // <2,u,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1>
+ 1611490198U, // <2,u,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0>
+ 2293137564U, // <2,u,1,3>: Cost 3 vmrglw <1,u,2,1>, LHS
+ 2689877072U, // <2,u,1,4>: Cost 3 vsldoi8 LHS, <1,4,5,6>
+ 2689877103U, // <2,u,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1>
+ 2689877199U, // <2,u,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7>
+ 2293140808U, // <2,u,1,7>: Cost 3 vmrglw <1,u,2,1>, RHS
+ 1616135548U, // <2,u,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0>
+ 1556938854U, // <2,u,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+ 1154291502U, // <2,u,2,1>: Cost 2 vmrghw <2,2,2,2>, LHS
+ 336380006U, // <2,u,2,2>: Cost 1 vspltisw2 LHS
+ 1611490982U, // <2,u,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1>
+ 1556942134U, // <2,u,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+ 1154291866U, // <2,u,2,5>: Cost 2 vmrghw <2,2,2,2>, RHS
+ 1611491258U, // <2,u,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+ 1221397832U, // <2,u,2,7>: Cost 2 vmrglw <2,2,2,2>, RHS
+ 336380006U, // <2,u,2,u>: Cost 1 vspltisw2 LHS
+ 1611491478U, // <2,u,3,0>: Cost 2 vsldoi8 LHS, <3,0,1,2>
+ 1213440073U, // <2,u,3,1>: Cost 2 vmrglw LHS, <0,0,u,1>
+ 1213442261U, // <2,u,3,2>: Cost 2 vmrglw LHS, <3,0,u,2>
+ 135053468U, // <2,u,3,3>: Cost 1 vmrglw LHS, LHS
+ 1611491842U, // <2,u,3,4>: Cost 2 vsldoi8 LHS, <3,4,5,6>
+ 1213440401U, // <2,u,3,5>: Cost 2 vmrglw LHS, <0,4,u,5>
+ 1213442589U, // <2,u,3,6>: Cost 2 vmrglw LHS, <3,4,u,6>
+ 135056712U, // <2,u,3,7>: Cost 1 vmrglw LHS, RHS
+ 135053473U, // <2,u,3,u>: Cost 1 vmrglw LHS, LHS
+ 1551425638U, // <2,u,4,0>: Cost 2 vsldoi4 <1,2,u,4>, LHS
+ 1551426503U, // <2,u,4,1>: Cost 2 vsldoi4 <1,2,u,4>, <1,2,u,4>
+ 2625169000U, // <2,u,4,2>: Cost 3 vsldoi4 <1,2,u,4>, <2,2,2,2>
+ 2625169558U, // <2,u,4,3>: Cost 3 vsldoi4 <1,2,u,4>, <3,0,1,2>
+ 1551428918U, // <2,u,4,4>: Cost 2 vsldoi4 <1,2,u,4>, RHS
+ 537750838U, // <2,u,4,5>: Cost 1 vsldoi8 LHS, RHS
+ 2733010297U, // <2,u,4,6>: Cost 3 vsldoi8 LHS, <4,6,5,2>
+ 2295156040U, // <2,u,4,7>: Cost 3 vmrglw <2,2,2,4>, RHS
+ 537751081U, // <2,u,4,u>: Cost 1 vsldoi8 LHS, RHS
+ 2689879624U, // <2,u,5,0>: Cost 3 vsldoi8 LHS, <5,0,1,2>
+ 2230130478U, // <2,u,5,1>: Cost 3 vmrghw <2,5,3,6>, LHS
+ 2631149217U, // <2,u,5,2>: Cost 3 vsldoi4 <2,2,u,5>, <2,2,u,5>
+ 2290516124U, // <2,u,5,3>: Cost 3 vmrglw <1,4,2,5>, LHS
+ 2689879988U, // <2,u,5,4>: Cost 3 vsldoi8 LHS, <5,4,5,6>
+ 1659269124U, // <2,u,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5>
+ 1691162778U, // <2,u,5,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 2290519368U, // <2,u,5,7>: Cost 3 vmrglw <1,4,2,5>, RHS
+ 1691162796U, // <2,u,5,u>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 2230802131U, // <2,u,6,0>: Cost 3 vmrghw <2,6,3,7>, <u,0,1,2>
+ 1157060398U, // <2,u,6,1>: Cost 2 vmrghw <2,6,3,7>, LHS
+ 1659269626U, // <2,u,6,2>: Cost 2 vsldoi8 LHS, <6,2,7,3>
+ 2764904656U, // <2,u,6,3>: Cost 3 vsldoi12 <2,2,2,2>, <u,6,3,7>
+ 2230802495U, // <2,u,6,4>: Cost 3 vmrghw <2,6,3,7>, <u,4,5,6>
+ 1157060762U, // <2,u,6,5>: Cost 2 vmrghw <2,6,3,7>, RHS
+ 1659269944U, // <2,u,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6>
+ 1659269966U, // <2,u,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1>
+ 1157060965U, // <2,u,6,u>: Cost 2 vmrghw <2,6,3,7>, LHS
+ 1659270138U, // <2,u,7,0>: Cost 2 vsldoi8 LHS, <7,0,1,2>
+ 2727040090U, // <2,u,7,1>: Cost 3 vsldoi8 <7,1,2,u>, <7,1,2,u>
+ 2727703723U, // <2,u,7,2>: Cost 3 vsldoi8 <7,2,2,u>, <7,2,2,u>
+ 2297831580U, // <2,u,7,3>: Cost 3 vmrglw <2,6,2,7>, LHS
+ 1659270502U, // <2,u,7,4>: Cost 2 vsldoi8 LHS, <7,4,5,6>
+ 2733012406U, // <2,u,7,5>: Cost 3 vsldoi8 LHS, <7,5,5,5>
+ 2730358255U, // <2,u,7,6>: Cost 3 vsldoi8 <7,6,2,u>, <7,6,2,u>
+ 1659270764U, // <2,u,7,7>: Cost 2 vsldoi8 LHS, <7,7,7,7>
+ 1659270786U, // <2,u,7,u>: Cost 2 vsldoi8 LHS, <7,u,1,2>
+ 1213481923U, // <2,u,u,0>: Cost 2 vmrglw LHS, <1,2,u,0>
+ 537753390U, // <2,u,u,1>: Cost 1 vsldoi8 LHS, LHS
+ 336380006U, // <2,u,u,2>: Cost 1 vspltisw2 LHS
+ 135094428U, // <2,u,u,3>: Cost 1 vmrglw LHS, LHS
+ 1213481927U, // <2,u,u,4>: Cost 2 vmrglw LHS, <1,2,u,4>
+ 537753754U, // <2,u,u,5>: Cost 1 vsldoi8 LHS, RHS
+ 1208838685U, // <2,u,u,6>: Cost 2 vmrglw LHS, <3,4,u,6>
+ 135097672U, // <2,u,u,7>: Cost 1 vmrglw LHS, RHS
+ 135094433U, // <2,u,u,u>: Cost 1 vmrglw LHS, LHS
+ 1678557184U, // <3,0,0,0>: Cost 2 vsldoi12 LHS, <0,0,0,0>
+ 1678557194U, // <3,0,0,1>: Cost 2 vsldoi12 LHS, <0,0,1,1>
+ 2631181989U, // <3,0,0,2>: Cost 3 vsldoi4 <2,3,0,0>, <2,3,0,0>
+ 2289223984U, // <3,0,0,3>: Cost 3 vmrglw <1,2,3,0>, <3,2,0,3>
+ 2756943909U, // <3,0,0,4>: Cost 3 vsldoi12 LHS, <0,0,4,1>
+ 3362965729U, // <3,0,0,5>: Cost 4 vmrglw <1,2,3,0>, <3,1,0,5>
+ 3362966054U, // <3,0,0,6>: Cost 4 vmrglw <1,2,3,0>, <3,5,0,6>
+ 2289224312U, // <3,0,0,7>: Cost 3 vmrglw <1,2,3,0>, <3,6,0,7>
+ 1683202121U, // <3,0,0,u>: Cost 2 vsldoi12 LHS, <0,0,u,1>
+ 1557446758U, // <3,0,1,0>: Cost 2 vsldoi4 <2,3,0,1>, LHS
+ 2752741467U, // <3,0,1,1>: Cost 3 vsldoi12 LHS, <0,1,1,1>
+ 604815462U, // <3,0,1,2>: Cost 1 vsldoi12 LHS, LHS
+ 2631190676U, // <3,0,1,3>: Cost 3 vsldoi4 <2,3,0,1>, <3,0,1,0>
+ 1557450038U, // <3,0,1,4>: Cost 2 vsldoi4 <2,3,0,1>, RHS
+ 2667024388U, // <3,0,1,5>: Cost 3 vsldoi4 <u,3,0,1>, <5,5,5,5>
+ 2800074894U, // <3,0,1,6>: Cost 3 vsldoi12 LHS, <0,1,6,7>
+ 2661053667U, // <3,0,1,7>: Cost 3 vsldoi4 <7,3,0,1>, <7,3,0,1>
+ 604815516U, // <3,0,1,u>: Cost 1 vsldoi12 LHS, LHS
+ 2696521165U, // <3,0,2,0>: Cost 3 vsldoi8 <2,0,3,0>, <2,0,3,0>
+ 2752741549U, // <3,0,2,1>: Cost 3 vsldoi12 LHS, <0,2,1,2>
+ 2691876456U, // <3,0,2,2>: Cost 3 vsldoi8 <1,2,3,0>, <2,2,2,2>
+ 2691876518U, // <3,0,2,3>: Cost 3 vsldoi8 <1,2,3,0>, <2,3,0,1>
+ 3830685895U, // <3,0,2,4>: Cost 4 vsldoi12 LHS, <0,2,4,1>
+ 3765618536U, // <3,0,2,5>: Cost 4 vsldoi8 <1,2,3,0>, <2,5,3,6>
+ 2691876794U, // <3,0,2,6>: Cost 3 vsldoi8 <1,2,3,0>, <2,6,3,7>
+ 2701166596U, // <3,0,2,7>: Cost 3 vsldoi8 <2,7,3,0>, <2,7,3,0>
+ 2756944108U, // <3,0,2,u>: Cost 3 vsldoi12 LHS, <0,2,u,2>
+ 2691877014U, // <3,0,3,0>: Cost 3 vsldoi8 <1,2,3,0>, <3,0,1,2>
+ 1161003110U, // <3,0,3,1>: Cost 2 vmrghw <3,3,3,3>, LHS
+ 2691877168U, // <3,0,3,2>: Cost 3 vsldoi8 <1,2,3,0>, <3,2,0,3>
+ 2691877246U, // <3,0,3,3>: Cost 3 vsldoi8 <1,2,3,0>, <3,3,0,0>
+ 2691877378U, // <3,0,3,4>: Cost 3 vsldoi8 <1,2,3,0>, <3,4,5,6>
+ 3765619238U, // <3,0,3,5>: Cost 4 vsldoi8 <1,2,3,0>, <3,5,0,6>
+ 2691877496U, // <3,0,3,6>: Cost 3 vsldoi8 <1,2,3,0>, <3,6,0,7>
+ 3368962680U, // <3,0,3,7>: Cost 4 vmrglw <2,2,3,3>, <3,6,0,7>
+ 1161003677U, // <3,0,3,u>: Cost 2 vmrghw <3,3,3,3>, LHS
+ 2289254400U, // <3,0,4,0>: Cost 3 vmrglw <1,2,3,4>, <0,0,0,0>
+ 1678557522U, // <3,0,4,1>: Cost 2 vsldoi12 LHS, <0,4,1,5>
+ 2631214761U, // <3,0,4,2>: Cost 3 vsldoi4 <2,3,0,4>, <2,3,0,4>
+ 2235580672U, // <3,0,4,3>: Cost 3 vmrghw <3,4,5,6>, <0,3,1,4>
+ 2756944237U, // <3,0,4,4>: Cost 3 vsldoi12 LHS, <0,4,4,5>
+ 1618136374U, // <3,0,4,5>: Cost 2 vsldoi8 <1,2,3,0>, RHS
+ 3309322742U, // <3,0,4,6>: Cost 4 vmrghw <3,4,5,6>, <0,6,1,7>
+ 3362998904U, // <3,0,4,7>: Cost 4 vmrglw <1,2,3,4>, <3,6,0,7>
+ 1683202449U, // <3,0,4,u>: Cost 2 vsldoi12 LHS, <0,4,u,5>
+ 3765620296U, // <3,0,5,0>: Cost 4 vsldoi8 <1,2,3,0>, <5,0,1,2>
+ 2752299427U, // <3,0,5,1>: Cost 3 vsldoi12 LHS, <0,5,1,5>
+ 3789508346U, // <3,0,5,2>: Cost 4 vsldoi8 <5,2,3,0>, <5,2,3,0>
+ 3403486842U, // <3,0,5,3>: Cost 4 vmrglw <u,0,3,5>, <7,u,0,3>
+ 3765620660U, // <3,0,5,4>: Cost 4 vsldoi8 <1,2,3,0>, <5,4,5,6>
+ 2733682692U, // <3,0,5,5>: Cost 3 vsldoi8 <u,2,3,0>, <5,5,5,5>
+ 2800075218U, // <3,0,5,6>: Cost 3 vsldoi12 LHS, <0,5,6,7>
+ 3873817044U, // <3,0,5,7>: Cost 4 vsldoi12 LHS, <0,5,7,0>
+ 2800075234U, // <3,0,5,u>: Cost 3 vsldoi12 LHS, <0,5,u,5>
+ 2752299501U, // <3,0,6,0>: Cost 3 vsldoi12 LHS, <0,6,0,7>
+ 2236547174U, // <3,0,6,1>: Cost 3 vmrghw <3,6,0,7>, LHS
+ 2733683194U, // <3,0,6,2>: Cost 3 vsldoi8 <u,2,3,0>, <6,2,7,3>
+ 3844473352U, // <3,0,6,3>: Cost 4 vsldoi12 <3,2,0,3>, <0,6,3,7>
+ 3310289234U, // <3,0,6,4>: Cost 4 vmrghw <3,6,0,7>, <0,4,1,5>
+ 3873817114U, // <3,0,6,5>: Cost 4 vsldoi12 LHS, <0,6,5,7>
+ 2733683512U, // <3,0,6,6>: Cost 3 vsldoi8 <u,2,3,0>, <6,6,6,6>
+ 2725057384U, // <3,0,6,7>: Cost 3 vsldoi8 <6,7,3,0>, <6,7,3,0>
+ 2236547741U, // <3,0,6,u>: Cost 3 vmrghw <3,6,0,7>, LHS
+ 2297905152U, // <3,0,7,0>: Cost 3 vmrglw <2,6,3,7>, <0,0,0,0>
+ 2297906854U, // <3,0,7,1>: Cost 3 vmrglw <2,6,3,7>, <2,3,0,1>
+ 2727711916U, // <3,0,7,2>: Cost 3 vsldoi8 <7,2,3,0>, <7,2,3,0>
+ 3371649328U, // <3,0,7,3>: Cost 4 vmrglw <2,6,3,7>, <3,2,0,3>
+ 2733684070U, // <3,0,7,4>: Cost 3 vsldoi8 <u,2,3,0>, <7,4,5,6>
+ 3734843490U, // <3,0,7,5>: Cost 4 vsldoi4 <7,3,0,7>, <5,6,7,0>
+ 3798799895U, // <3,0,7,6>: Cost 4 vsldoi8 <6,7,3,0>, <7,6,7,3>
+ 2733684332U, // <3,0,7,7>: Cost 3 vsldoi8 <u,2,3,0>, <7,7,7,7>
+ 2297906861U, // <3,0,7,u>: Cost 3 vmrglw <2,6,3,7>, <2,3,0,u>
+ 1557504102U, // <3,0,u,0>: Cost 2 vsldoi4 <2,3,0,u>, LHS
+ 1678557842U, // <3,0,u,1>: Cost 2 vsldoi12 LHS, <0,u,1,1>
+ 604816029U, // <3,0,u,2>: Cost 1 vsldoi12 LHS, LHS
+ 2691880892U, // <3,0,u,3>: Cost 3 vsldoi8 <1,2,3,0>, <u,3,0,1>
+ 1557507382U, // <3,0,u,4>: Cost 2 vsldoi4 <2,3,0,u>, RHS
+ 1618139290U, // <3,0,u,5>: Cost 2 vsldoi8 <1,2,3,0>, RHS
+ 2691881168U, // <3,0,u,6>: Cost 3 vsldoi8 <1,2,3,0>, <u,6,3,7>
+ 2661111018U, // <3,0,u,7>: Cost 3 vsldoi4 <7,3,0,u>, <7,3,0,u>
+ 604816083U, // <3,0,u,u>: Cost 1 vsldoi12 LHS, LHS
+ 2619310332U, // <3,1,0,0>: Cost 3 vsldoi4 <0,3,1,0>, <0,3,1,0>
+ 2756944612U, // <3,1,0,1>: Cost 3 vsldoi12 LHS, <1,0,1,2>
+ 2289221724U, // <3,1,0,2>: Cost 3 vmrglw <1,2,3,0>, <0,1,1,2>
+ 2619312278U, // <3,1,0,3>: Cost 3 vsldoi4 <0,3,1,0>, <3,0,1,2>
+ 2619313462U, // <3,1,0,4>: Cost 3 vsldoi4 <0,3,1,0>, RHS
+ 2289221970U, // <3,1,0,5>: Cost 3 vmrglw <1,2,3,0>, <0,4,1,5>
+ 2232599768U, // <3,1,0,6>: Cost 3 vmrghw <3,0,1,2>, <1,6,2,7>
+ 3362964687U, // <3,1,0,7>: Cost 4 vmrglw <1,2,3,0>, <1,6,1,7>
+ 2619316014U, // <3,1,0,u>: Cost 3 vsldoi4 <0,3,1,0>, LHS
+ 2756944683U, // <3,1,1,0>: Cost 3 vsldoi12 LHS, <1,1,0,1>
+ 1678558004U, // <3,1,1,1>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+ 2691883927U, // <3,1,1,2>: Cost 3 vsldoi8 <1,2,3,1>, <1,2,3,1>
+ 3826631496U, // <3,1,1,3>: Cost 4 vsldoi12 <0,2,1,3>, <1,1,3,3>
+ 2756944723U, // <3,1,1,4>: Cost 3 vsldoi12 LHS, <1,1,4,5>
+ 2756944732U, // <3,1,1,5>: Cost 3 vsldoi12 LHS, <1,1,5,5>
+ 3830686561U, // <3,1,1,6>: Cost 4 vsldoi12 LHS, <1,1,6,1>
+ 3734869228U, // <3,1,1,7>: Cost 4 vsldoi4 <7,3,1,1>, <7,3,1,1>
+ 1678558004U, // <3,1,1,u>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+ 2696529358U, // <3,1,2,0>: Cost 3 vsldoi8 <2,0,3,1>, <2,0,3,1>
+ 2756944775U, // <3,1,2,1>: Cost 3 vsldoi12 LHS, <1,2,1,3>
+ 2294548630U, // <3,1,2,2>: Cost 3 vmrglw <2,1,3,2>, <3,0,1,2>
+ 1678558102U, // <3,1,2,3>: Cost 2 vsldoi12 LHS, <1,2,3,0>
+ 2631273782U, // <3,1,2,4>: Cost 3 vsldoi4 <2,3,1,2>, RHS
+ 2756944811U, // <3,1,2,5>: Cost 3 vsldoi12 LHS, <1,2,5,3>
+ 3830686644U, // <3,1,2,6>: Cost 4 vsldoi12 LHS, <1,2,6,3>
+ 2800075706U, // <3,1,2,7>: Cost 3 vsldoi12 LHS, <1,2,7,0>
+ 1679000515U, // <3,1,2,u>: Cost 2 vsldoi12 LHS, <1,2,u,0>
+ 2619334911U, // <3,1,3,0>: Cost 3 vsldoi4 <0,3,1,3>, <0,3,1,3>
+ 2295218186U, // <3,1,3,1>: Cost 3 vmrglw <2,2,3,3>, <0,0,1,1>
+ 2293229718U, // <3,1,3,2>: Cost 3 vmrglw <1,u,3,3>, <3,0,1,2>
+ 2619337116U, // <3,1,3,3>: Cost 3 vsldoi4 <0,3,1,3>, <3,3,3,3>
+ 2619338038U, // <3,1,3,4>: Cost 3 vsldoi4 <0,3,1,3>, RHS
+ 2295218514U, // <3,1,3,5>: Cost 3 vmrglw <2,2,3,3>, <0,4,1,5>
+ 3830686729U, // <3,1,3,6>: Cost 4 vsldoi12 LHS, <1,3,6,7>
+ 3368961231U, // <3,1,3,7>: Cost 4 vmrglw <2,2,3,3>, <1,6,1,7>
+ 2619340590U, // <3,1,3,u>: Cost 3 vsldoi4 <0,3,1,3>, LHS
+ 2619343104U, // <3,1,4,0>: Cost 3 vsldoi4 <0,3,1,4>, <0,3,1,4>
+ 2289254410U, // <3,1,4,1>: Cost 3 vmrglw <1,2,3,4>, <0,0,1,1>
+ 2289256598U, // <3,1,4,2>: Cost 3 vmrglw <1,2,3,4>, <3,0,1,2>
+ 2619345410U, // <3,1,4,3>: Cost 3 vsldoi4 <0,3,1,4>, <3,4,5,6>
+ 2619346230U, // <3,1,4,4>: Cost 3 vsldoi4 <0,3,1,4>, RHS
+ 2756944976U, // <3,1,4,5>: Cost 3 vsldoi12 LHS, <1,4,5,6>
+ 3362996401U, // <3,1,4,6>: Cost 4 vmrglw <1,2,3,4>, <0,2,1,6>
+ 3362997455U, // <3,1,4,7>: Cost 4 vmrglw <1,2,3,4>, <1,6,1,7>
+ 2619348782U, // <3,1,4,u>: Cost 3 vsldoi4 <0,3,1,4>, LHS
+ 2756945007U, // <3,1,5,0>: Cost 3 vsldoi12 LHS, <1,5,0,1>
+ 3830686840U, // <3,1,5,1>: Cost 4 vsldoi12 LHS, <1,5,1,1>
+ 3358361750U, // <3,1,5,2>: Cost 4 vmrglw <0,4,3,5>, <3,0,1,2>
+ 3830686857U, // <3,1,5,3>: Cost 4 vsldoi12 LHS, <1,5,3,0>
+ 2756945047U, // <3,1,5,4>: Cost 3 vsldoi12 LHS, <1,5,4,5>
+ 2294571346U, // <3,1,5,5>: Cost 3 vmrglw <2,1,3,5>, <0,4,1,5>
+ 3806105698U, // <3,1,5,6>: Cost 4 vsldoi8 <u,0,3,1>, <5,6,7,0>
+ 3873817774U, // <3,1,5,7>: Cost 4 vsldoi12 LHS, <1,5,7,1>
+ 2756945079U, // <3,1,5,u>: Cost 3 vsldoi12 LHS, <1,5,u,1>
+ 3830686912U, // <3,1,6,0>: Cost 4 vsldoi12 LHS, <1,6,0,1>
+ 2756945103U, // <3,1,6,1>: Cost 3 vsldoi12 LHS, <1,6,1,7>
+ 2236547990U, // <3,1,6,2>: Cost 3 vmrghw <3,6,0,7>, <1,2,3,0>
+ 3826631905U, // <3,1,6,3>: Cost 4 vsldoi12 <0,2,1,3>, <1,6,3,7>
+ 3830686952U, // <3,1,6,4>: Cost 4 vsldoi12 LHS, <1,6,4,5>
+ 2756945139U, // <3,1,6,5>: Cost 3 vsldoi12 LHS, <1,6,5,7>
+ 3830686972U, // <3,1,6,6>: Cost 4 vsldoi12 LHS, <1,6,6,7>
+ 2800076030U, // <3,1,6,7>: Cost 3 vsldoi12 LHS, <1,6,7,0>
+ 2756945166U, // <3,1,6,u>: Cost 3 vsldoi12 LHS, <1,6,u,7>
+ 3699081318U, // <3,1,7,0>: Cost 4 vsldoi4 <1,3,1,7>, LHS
+ 2297905162U, // <3,1,7,1>: Cost 3 vmrglw <2,6,3,7>, <0,0,1,1>
+ 2297907350U, // <3,1,7,2>: Cost 3 vmrglw <2,6,3,7>, <3,0,1,2>
+ 3365675182U, // <3,1,7,3>: Cost 4 vmrglw <1,6,3,7>, <0,2,1,3>
+ 3699084598U, // <3,1,7,4>: Cost 4 vsldoi4 <1,3,1,7>, RHS
+ 2297905490U, // <3,1,7,5>: Cost 3 vmrglw <2,6,3,7>, <0,4,1,5>
+ 2297905329U, // <3,1,7,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,1,6>
+ 3368330447U, // <3,1,7,7>: Cost 4 vmrglw <2,1,3,7>, <1,6,1,7>
+ 2297905169U, // <3,1,7,u>: Cost 3 vmrglw <2,6,3,7>, <0,0,1,u>
+ 2619375876U, // <3,1,u,0>: Cost 3 vsldoi4 <0,3,1,u>, <0,3,1,u>
+ 1678558004U, // <3,1,u,1>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+ 2289289366U, // <3,1,u,2>: Cost 3 vmrglw <1,2,3,u>, <3,0,1,2>
+ 1679000956U, // <3,1,u,3>: Cost 2 vsldoi12 LHS, <1,u,3,0>
+ 2619378998U, // <3,1,u,4>: Cost 3 vsldoi4 <0,3,1,u>, RHS
+ 2756945297U, // <3,1,u,5>: Cost 3 vsldoi12 LHS, <1,u,5,3>
+ 2297905329U, // <3,1,u,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,1,6>
+ 2800076192U, // <3,1,u,7>: Cost 3 vsldoi12 LHS, <1,u,7,0>
+ 1683203497U, // <3,1,u,u>: Cost 2 vsldoi12 LHS, <1,u,u,0>
+ 3362964203U, // <3,2,0,0>: Cost 4 vmrglw <1,2,3,0>, <1,0,2,0>
+ 2289222380U, // <3,2,0,1>: Cost 3 vmrglw <1,2,3,0>, <1,0,2,1>
+ 2289222462U, // <3,2,0,2>: Cost 3 vmrglw <1,2,3,0>, <1,1,2,2>
+ 1215479910U, // <3,2,0,3>: Cost 2 vmrglw <1,2,3,0>, LHS
+ 3362964207U, // <3,2,0,4>: Cost 4 vmrglw <1,2,3,0>, <1,0,2,4>
+ 2289222708U, // <3,2,0,5>: Cost 3 vmrglw <1,2,3,0>, <1,4,2,5>
+ 2232600506U, // <3,2,0,6>: Cost 3 vmrghw <3,0,1,2>, <2,6,3,7>
+ 3396142296U, // <3,2,0,7>: Cost 4 vmrglw <6,7,3,0>, <1,6,2,7>
+ 1215479915U, // <3,2,0,u>: Cost 2 vmrglw <1,2,3,0>, LHS
+ 3699105894U, // <3,2,1,0>: Cost 4 vsldoi4 <1,3,2,1>, LHS
+ 3765633844U, // <3,2,1,1>: Cost 4 vsldoi8 <1,2,3,2>, <1,1,1,1>
+ 2691892120U, // <3,2,1,2>: Cost 3 vsldoi8 <1,2,3,2>, <1,2,3,2>
+ 2752300575U, // <3,2,1,3>: Cost 3 vsldoi12 LHS, <2,1,3,1>
+ 3699109174U, // <3,2,1,4>: Cost 4 vsldoi4 <1,3,2,1>, RHS
+ 3830687280U, // <3,2,1,5>: Cost 5 vsldoi12 LHS, <2,1,5,0>
+ 3830687289U, // <3,2,1,6>: Cost 4 vsldoi12 LHS, <2,1,6,0>
+ 3874260548U, // <3,2,1,7>: Cost 4 vsldoi12 LHS, <2,1,7,2>
+ 2752742988U, // <3,2,1,u>: Cost 3 vsldoi12 LHS, <2,1,u,1>
+ 2631344230U, // <3,2,2,0>: Cost 3 vsldoi4 <2,3,2,2>, LHS
+ 2697201184U, // <3,2,2,1>: Cost 3 vsldoi8 <2,1,3,2>, <2,1,3,2>
+ 1678558824U, // <3,2,2,2>: Cost 2 vsldoi12 LHS, <2,2,2,2>
+ 1678558834U, // <3,2,2,3>: Cost 2 vsldoi12 LHS, <2,2,3,3>
+ 2631347510U, // <3,2,2,4>: Cost 3 vsldoi4 <2,3,2,2>, RHS
+ 3368953613U, // <3,2,2,5>: Cost 4 vmrglw <2,2,3,2>, <2,4,2,5>
+ 2234304442U, // <3,2,2,6>: Cost 3 vmrghw <3,2,6,3>, <2,6,3,7>
+ 3368953777U, // <3,2,2,7>: Cost 4 vmrglw <2,2,3,2>, <2,6,2,7>
+ 1679001247U, // <3,2,2,u>: Cost 2 vsldoi12 LHS, <2,2,u,3>
+ 1678558886U, // <3,2,3,0>: Cost 2 vsldoi12 LHS, <2,3,0,1>
+ 2752300719U, // <3,2,3,1>: Cost 3 vsldoi12 LHS, <2,3,1,1>
+ 2752300729U, // <3,2,3,2>: Cost 3 vsldoi12 LHS, <2,3,2,2>
+ 1221476454U, // <3,2,3,3>: Cost 2 vmrglw <2,2,3,3>, LHS
+ 1678558926U, // <3,2,3,4>: Cost 2 vsldoi12 LHS, <2,3,4,5>
+ 2800076503U, // <3,2,3,5>: Cost 3 vsldoi12 LHS, <2,3,5,5>
+ 2234746810U, // <3,2,3,6>: Cost 3 vmrghw <3,3,3,3>, <2,6,3,7>
+ 2800076516U, // <3,2,3,7>: Cost 3 vsldoi12 LHS, <2,3,7,0>
+ 1678558958U, // <3,2,3,u>: Cost 2 vsldoi12 LHS, <2,3,u,1>
+ 3699130470U, // <3,2,4,0>: Cost 4 vsldoi4 <1,3,2,4>, LHS
+ 3362996972U, // <3,2,4,1>: Cost 4 vmrglw <1,2,3,4>, <1,0,2,1>
+ 2289256040U, // <3,2,4,2>: Cost 3 vmrglw <1,2,3,4>, <2,2,2,2>
+ 1215512678U, // <3,2,4,3>: Cost 2 vmrglw <1,2,3,4>, LHS
+ 3362998676U, // <3,2,4,4>: Cost 4 vmrglw <1,2,3,4>, <3,3,2,4>
+ 2691894582U, // <3,2,4,5>: Cost 3 vsldoi8 <1,2,3,2>, RHS
+ 2235582394U, // <3,2,4,6>: Cost 3 vmrghw <3,4,5,6>, <2,6,3,7>
+ 3734967544U, // <3,2,4,7>: Cost 4 vsldoi4 <7,3,2,4>, <7,3,2,4>
+ 1215512683U, // <3,2,4,u>: Cost 2 vmrglw <1,2,3,4>, LHS
+ 3705110630U, // <3,2,5,0>: Cost 4 vsldoi4 <2,3,2,5>, LHS
+ 3368313985U, // <3,2,5,1>: Cost 4 vmrglw <2,1,3,5>, <1,5,2,1>
+ 3368314472U, // <3,2,5,2>: Cost 4 vmrglw <2,1,3,5>, <2,2,2,2>
+ 2756945768U, // <3,2,5,3>: Cost 3 vsldoi12 LHS, <2,5,3,6>
+ 3705113910U, // <3,2,5,4>: Cost 4 vsldoi4 <2,3,2,5>, RHS
+ 3310061416U, // <3,2,5,5>: Cost 4 vmrghw <3,5,6,6>, <2,5,3,6>
+ 3310135226U, // <3,2,5,6>: Cost 4 vmrghw <3,5,7,6>, <2,6,3,7>
+ 3370305457U, // <3,2,5,7>: Cost 5 vmrglw <2,4,3,5>, <2,6,2,7>
+ 2752743317U, // <3,2,5,u>: Cost 3 vsldoi12 LHS, <2,5,u,6>
+ 2631376998U, // <3,2,6,0>: Cost 3 vsldoi4 <2,3,2,6>, LHS
+ 3705119540U, // <3,2,6,1>: Cost 4 vsldoi4 <2,3,2,6>, <1,1,1,1>
+ 2631378621U, // <3,2,6,2>: Cost 3 vsldoi4 <2,3,2,6>, <2,3,2,6>
+ 1678559162U, // <3,2,6,3>: Cost 2 vsldoi12 LHS, <2,6,3,7>
+ 2631380278U, // <3,2,6,4>: Cost 3 vsldoi4 <2,3,2,6>, RHS
+ 3370976956U, // <3,2,6,5>: Cost 4 vmrglw <2,5,3,6>, <2,3,2,5>
+ 2237065146U, // <3,2,6,6>: Cost 3 vmrghw <3,6,7,7>, <2,6,3,7>
+ 3798815594U, // <3,2,6,7>: Cost 4 vsldoi8 <6,7,3,2>, <6,7,3,2>
+ 1679001575U, // <3,2,6,u>: Cost 2 vsldoi12 LHS, <2,6,u,7>
+ 2800076778U, // <3,2,7,0>: Cost 3 vsldoi12 LHS, <2,7,0,1>
+ 3371647724U, // <3,2,7,1>: Cost 4 vmrglw <2,6,3,7>, <1,0,2,1>
+ 2297906792U, // <3,2,7,2>: Cost 3 vmrglw <2,6,3,7>, <2,2,2,2>
+ 1224163430U, // <3,2,7,3>: Cost 2 vmrglw <2,6,3,7>, LHS
+ 3705130294U, // <3,2,7,4>: Cost 4 vsldoi4 <2,3,2,7>, RHS
+ 3371648052U, // <3,2,7,5>: Cost 4 vmrglw <2,6,3,7>, <1,4,2,5>
+ 2297906877U, // <3,2,7,6>: Cost 3 vmrglw <2,6,3,7>, <2,3,2,6>
+ 3371648702U, // <3,2,7,7>: Cost 4 vmrglw <2,6,3,7>, <2,3,2,7>
+ 1224163435U, // <3,2,7,u>: Cost 2 vmrglw <2,6,3,7>, LHS
+ 1679001659U, // <3,2,u,0>: Cost 2 vsldoi12 LHS, <2,u,0,1>
+ 2752743492U, // <3,2,u,1>: Cost 3 vsldoi12 LHS, <2,u,1,1>
+ 1678558824U, // <3,2,u,2>: Cost 2 vsldoi12 LHS, <2,2,2,2>
+ 1678559320U, // <3,2,u,3>: Cost 2 vsldoi12 LHS, <2,u,3,3>
+ 1679001699U, // <3,2,u,4>: Cost 2 vsldoi12 LHS, <2,u,4,5>
+ 2691897498U, // <3,2,u,5>: Cost 3 vsldoi8 <1,2,3,2>, RHS
+ 2237908922U, // <3,2,u,6>: Cost 3 vmrghw <3,u,1,2>, <2,6,3,7>
+ 2800519289U, // <3,2,u,7>: Cost 3 vsldoi12 LHS, <2,u,7,0>
+ 1679001731U, // <3,2,u,u>: Cost 2 vsldoi12 LHS, <2,u,u,1>
+ 1215480726U, // <3,3,0,0>: Cost 2 vmrglw <1,2,3,0>, <1,2,3,0>
+ 1678559382U, // <3,3,0,1>: Cost 2 vsldoi12 LHS, <3,0,1,2>
+ 2631403200U, // <3,3,0,2>: Cost 3 vsldoi4 <2,3,3,0>, <2,3,3,0>
+ 2289223282U, // <3,3,0,3>: Cost 3 vmrglw <1,2,3,0>, <2,2,3,3>
+ 2752301232U, // <3,3,0,4>: Cost 3 vsldoi12 LHS, <3,0,4,1>
+ 3362965027U, // <3,3,0,5>: Cost 4 vmrglw <1,2,3,0>, <2,1,3,5>
+ 3362965352U, // <3,3,0,6>: Cost 4 vmrglw <1,2,3,0>, <2,5,3,6>
+ 2289223610U, // <3,3,0,7>: Cost 3 vmrglw <1,2,3,0>, <2,6,3,7>
+ 1678559445U, // <3,3,0,u>: Cost 2 vsldoi12 LHS, <3,0,u,2>
+ 3830687964U, // <3,3,1,0>: Cost 4 vsldoi12 LHS, <3,1,0,0>
+ 2752301286U, // <3,3,1,1>: Cost 3 vsldoi12 LHS, <3,1,1,1>
+ 2752301297U, // <3,3,1,2>: Cost 3 vsldoi12 LHS, <3,1,2,3>
+ 2305157532U, // <3,3,1,3>: Cost 3 vmrglw <3,u,3,1>, <3,3,3,3>
+ 3830688000U, // <3,3,1,4>: Cost 4 vsldoi12 LHS, <3,1,4,0>
+ 3830688009U, // <3,3,1,5>: Cost 4 vsldoi12 LHS, <3,1,5,0>
+ 3830688019U, // <3,3,1,6>: Cost 4 vsldoi12 LHS, <3,1,6,1>
+ 3362973626U, // <3,3,1,7>: Cost 4 vmrglw <1,2,3,1>, <2,6,3,7>
+ 2752743719U, // <3,3,1,u>: Cost 3 vsldoi12 LHS, <3,1,u,3>
+ 2631417958U, // <3,3,2,0>: Cost 3 vsldoi4 <2,3,3,2>, LHS
+ 3826043193U, // <3,3,2,1>: Cost 4 vsldoi12 LHS, <3,2,1,3>
+ 1624131186U, // <3,3,2,2>: Cost 2 vsldoi8 <2,2,3,3>, <2,2,3,3>
+ 2752301384U, // <3,3,2,3>: Cost 3 vsldoi12 LHS, <3,2,3,0>
+ 2631421238U, // <3,3,2,4>: Cost 3 vsldoi4 <2,3,3,2>, RHS
+ 3826485602U, // <3,3,2,5>: Cost 4 vsldoi12 LHS, <3,2,5,u>
+ 2752301414U, // <3,3,2,6>: Cost 3 vsldoi12 LHS, <3,2,6,3>
+ 2771249519U, // <3,3,2,7>: Cost 3 vsldoi12 <3,2,7,3>, <3,2,7,3>
+ 1628112984U, // <3,3,2,u>: Cost 2 vsldoi8 <2,u,3,3>, <2,u,3,3>
+ 1563656294U, // <3,3,3,0>: Cost 2 vsldoi4 <3,3,3,3>, LHS
+ 2301855911U, // <3,3,3,1>: Cost 3 vmrglw <3,3,3,3>, <3,0,3,1>
+ 2697873730U, // <3,3,3,2>: Cost 3 vsldoi8 <2,2,3,3>, <3,2,2,3>
+ 403488870U, // <3,3,3,3>: Cost 1 vspltisw3 LHS
+ 1563659574U, // <3,3,3,4>: Cost 2 vsldoi4 <3,3,3,3>, RHS
+ 2301856239U, // <3,3,3,5>: Cost 3 vmrglw <3,3,3,3>, <3,4,3,5>
+ 2697874067U, // <3,3,3,6>: Cost 3 vsldoi8 <2,2,3,3>, <3,6,3,7>
+ 2295220154U, // <3,3,3,7>: Cost 3 vmrglw <2,2,3,3>, <2,6,3,7>
+ 403488870U, // <3,3,3,u>: Cost 1 vspltisw3 LHS
+ 2289255318U, // <3,3,4,0>: Cost 3 vmrglw <1,2,3,4>, <1,2,3,0>
+ 2631435162U, // <3,3,4,1>: Cost 3 vsldoi4 <2,3,3,4>, <1,2,3,4>
+ 2631435972U, // <3,3,4,2>: Cost 3 vsldoi4 <2,3,3,4>, <2,3,3,4>
+ 2289256050U, // <3,3,4,3>: Cost 3 vmrglw <1,2,3,4>, <2,2,3,3>
+ 1215513498U, // <3,3,4,4>: Cost 2 vmrglw <1,2,3,4>, <1,2,3,4>
+ 1679002114U, // <3,3,4,5>: Cost 2 vsldoi12 LHS, <3,4,5,6>
+ 3362998120U, // <3,3,4,6>: Cost 4 vmrglw <1,2,3,4>, <2,5,3,6>
+ 2289256378U, // <3,3,4,7>: Cost 3 vmrglw <1,2,3,4>, <2,6,3,7>
+ 1679002141U, // <3,3,4,u>: Cost 2 vsldoi12 LHS, <3,4,u,6>
+ 3831130657U, // <3,3,5,0>: Cost 4 vsldoi12 LHS, <3,5,0,1>
+ 3376277671U, // <3,3,5,1>: Cost 4 vmrglw <3,4,3,5>, <3,0,3,1>
+ 3771617012U, // <3,3,5,2>: Cost 4 vsldoi8 <2,2,3,3>, <5,2,2,3>
+ 2302536092U, // <3,3,5,3>: Cost 3 vmrglw <3,4,3,5>, <3,3,3,3>
+ 3831130697U, // <3,3,5,4>: Cost 4 vsldoi12 LHS, <3,5,4,5>
+ 2294572579U, // <3,3,5,5>: Cost 3 vmrglw <2,1,3,5>, <2,1,3,5>
+ 2800519773U, // <3,3,5,6>: Cost 3 vsldoi12 LHS, <3,5,6,7>
+ 3368314810U, // <3,3,5,7>: Cost 4 vmrglw <2,1,3,5>, <2,6,3,7>
+ 2800519791U, // <3,3,5,u>: Cost 3 vsldoi12 LHS, <3,5,u,7>
+ 2800077432U, // <3,3,6,0>: Cost 3 vsldoi12 LHS, <3,6,0,7>
+ 3310291185U, // <3,3,6,1>: Cost 4 vmrghw <3,6,0,7>, <3,1,2,3>
+ 2789165706U, // <3,3,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <3,6,2,7>
+ 2764982931U, // <3,3,6,3>: Cost 3 vsldoi12 <2,2,3,3>, <3,6,3,7>
+ 2800077468U, // <3,3,6,4>: Cost 3 vsldoi12 LHS, <3,6,4,7>
+ 3873819301U, // <3,3,6,5>: Cost 4 vsldoi12 LHS, <3,6,5,7>
+ 2297235304U, // <3,3,6,6>: Cost 3 vmrglw <2,5,3,6>, <2,5,3,6>
+ 2725081963U, // <3,3,6,7>: Cost 3 vsldoi8 <6,7,3,3>, <6,7,3,3>
+ 2725745596U, // <3,3,6,u>: Cost 3 vsldoi8 <6,u,3,3>, <6,u,3,3>
+ 2631458918U, // <3,3,7,0>: Cost 3 vsldoi4 <2,3,3,7>, LHS
+ 3705201460U, // <3,3,7,1>: Cost 4 vsldoi4 <2,3,3,7>, <1,1,1,1>
+ 2631460551U, // <3,3,7,2>: Cost 3 vsldoi4 <2,3,3,7>, <2,3,3,7>
+ 2297906802U, // <3,3,7,3>: Cost 3 vmrglw <2,6,3,7>, <2,2,3,3>
+ 2631462198U, // <3,3,7,4>: Cost 3 vsldoi4 <2,3,3,7>, RHS
+ 3371648547U, // <3,3,7,5>: Cost 4 vmrglw <2,6,3,7>, <2,1,3,5>
+ 3371648548U, // <3,3,7,6>: Cost 4 vmrglw <2,6,3,7>, <2,1,3,6>
+ 1224165306U, // <3,3,7,7>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7>
+ 1224165306U, // <3,3,7,u>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7>
+ 1215480726U, // <3,3,u,0>: Cost 2 vmrglw <1,2,3,0>, <1,2,3,0>
+ 1679002398U, // <3,3,u,1>: Cost 2 vsldoi12 LHS, <3,u,1,2>
+ 1659967368U, // <3,3,u,2>: Cost 2 vsldoi8 <u,2,3,3>, <u,2,3,3>
+ 403488870U, // <3,3,u,3>: Cost 1 vspltisw3 LHS
+ 1563659574U, // <3,3,u,4>: Cost 2 vsldoi4 <3,3,3,3>, RHS
+ 1679002438U, // <3,3,u,5>: Cost 2 vsldoi12 LHS, <3,u,5,6>
+ 2756946764U, // <3,3,u,6>: Cost 3 vsldoi12 LHS, <3,u,6,3>
+ 1224165306U, // <3,3,u,7>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7>
+ 403488870U, // <3,3,u,u>: Cost 1 vspltisw3 LHS
+ 2691907584U, // <3,4,0,0>: Cost 3 vsldoi8 <1,2,3,4>, <0,0,0,0>
+ 1618165862U, // <3,4,0,1>: Cost 2 vsldoi8 <1,2,3,4>, LHS
+ 2631476937U, // <3,4,0,2>: Cost 3 vsldoi4 <2,3,4,0>, <2,3,4,0>
+ 2232601732U, // <3,4,0,3>: Cost 3 vmrghw <3,0,1,2>, <4,3,5,0>
+ 2691907922U, // <3,4,0,4>: Cost 3 vsldoi8 <1,2,3,4>, <0,4,1,5>
+ 1158860086U, // <3,4,0,5>: Cost 2 vmrghw <3,0,1,2>, RHS
+ 3306343806U, // <3,4,0,6>: Cost 4 vmrghw <3,0,1,2>, <4,6,5,7>
+ 3366947484U, // <3,4,0,7>: Cost 4 vmrglw <1,u,3,0>, <3,6,4,7>
+ 1618166429U, // <3,4,0,u>: Cost 2 vsldoi8 <1,2,3,4>, LHS
+ 2631483494U, // <3,4,1,0>: Cost 3 vsldoi4 <2,3,4,1>, LHS
+ 2691908404U, // <3,4,1,1>: Cost 3 vsldoi8 <1,2,3,4>, <1,1,1,1>
+ 1618166682U, // <3,4,1,2>: Cost 2 vsldoi8 <1,2,3,4>, <1,2,3,4>
+ 3765650393U, // <3,4,1,3>: Cost 4 vsldoi8 <1,2,3,4>, <1,3,1,4>
+ 2631486774U, // <3,4,1,4>: Cost 3 vsldoi4 <2,3,4,1>, RHS
+ 2756946914U, // <3,4,1,5>: Cost 3 vsldoi12 LHS, <4,1,5,0>
+ 3765650639U, // <3,4,1,6>: Cost 4 vsldoi8 <1,2,3,4>, <1,6,1,7>
+ 3735090439U, // <3,4,1,7>: Cost 4 vsldoi4 <7,3,4,1>, <7,3,4,1>
+ 1622148480U, // <3,4,1,u>: Cost 2 vsldoi8 <1,u,3,4>, <1,u,3,4>
+ 3765650893U, // <3,4,2,0>: Cost 4 vsldoi8 <1,2,3,4>, <2,0,3,0>
+ 3831131154U, // <3,4,2,1>: Cost 4 vsldoi12 LHS, <4,2,1,3>
+ 2691909224U, // <3,4,2,2>: Cost 3 vsldoi8 <1,2,3,4>, <2,2,2,2>
+ 2691909286U, // <3,4,2,3>: Cost 3 vsldoi8 <1,2,3,4>, <2,3,0,1>
+ 2699208469U, // <3,4,2,4>: Cost 3 vsldoi8 <2,4,3,4>, <2,4,3,4>
+ 2233863478U, // <3,4,2,5>: Cost 3 vmrghw <3,2,0,3>, RHS
+ 2691909562U, // <3,4,2,6>: Cost 3 vsldoi8 <1,2,3,4>, <2,6,3,7>
+ 2701199368U, // <3,4,2,7>: Cost 3 vsldoi8 <2,7,3,4>, <2,7,3,4>
+ 2691909691U, // <3,4,2,u>: Cost 3 vsldoi8 <1,2,3,4>, <2,u,0,1>
+ 2691909782U, // <3,4,3,0>: Cost 3 vsldoi8 <1,2,3,4>, <3,0,1,2>
+ 3765651686U, // <3,4,3,1>: Cost 4 vsldoi8 <1,2,3,4>, <3,1,1,1>
+ 2691909972U, // <3,4,3,2>: Cost 3 vsldoi8 <1,2,3,4>, <3,2,4,3>
+ 2691910044U, // <3,4,3,3>: Cost 3 vsldoi8 <1,2,3,4>, <3,3,3,3>
+ 2691910096U, // <3,4,3,4>: Cost 3 vsldoi8 <1,2,3,4>, <3,4,0,1>
+ 1161006390U, // <3,4,3,5>: Cost 2 vmrghw <3,3,3,3>, RHS
+ 2691910300U, // <3,4,3,6>: Cost 3 vsldoi8 <1,2,3,4>, <3,6,4,7>
+ 3368962716U, // <3,4,3,7>: Cost 4 vmrglw <2,2,3,3>, <3,6,4,7>
+ 1161006633U, // <3,4,3,u>: Cost 2 vmrghw <3,3,3,3>, RHS
+ 2631508070U, // <3,4,4,0>: Cost 3 vsldoi4 <2,3,4,4>, LHS
+ 2631508890U, // <3,4,4,1>: Cost 3 vsldoi4 <2,3,4,4>, <1,2,3,4>
+ 2631509709U, // <3,4,4,2>: Cost 3 vsldoi4 <2,3,4,4>, <2,3,4,4>
+ 2289256788U, // <3,4,4,3>: Cost 3 vmrglw <1,2,3,4>, <3,2,4,3>
+ 1726336208U, // <3,4,4,4>: Cost 2 vsldoi12 LHS, <4,4,4,4>
+ 1618169142U, // <3,4,4,5>: Cost 2 vsldoi8 <1,2,3,4>, RHS
+ 3362998858U, // <3,4,4,6>: Cost 4 vmrglw <1,2,3,4>, <3,5,4,6>
+ 2289257116U, // <3,4,4,7>: Cost 3 vmrglw <1,2,3,4>, <3,6,4,7>
+ 1618169385U, // <3,4,4,u>: Cost 2 vsldoi8 <1,2,3,4>, RHS
+ 1557774438U, // <3,4,5,0>: Cost 2 vsldoi4 <2,3,4,5>, LHS
+ 2631516980U, // <3,4,5,1>: Cost 3 vsldoi4 <2,3,4,5>, <1,1,1,1>
+ 1557776078U, // <3,4,5,2>: Cost 2 vsldoi4 <2,3,4,5>, <2,3,4,5>
+ 2631518358U, // <3,4,5,3>: Cost 3 vsldoi4 <2,3,4,5>, <3,0,1,2>
+ 1557777718U, // <3,4,5,4>: Cost 2 vsldoi4 <2,3,4,5>, RHS
+ 2296563406U, // <3,4,5,5>: Cost 3 vmrglw <2,4,3,5>, <2,3,4,5>
+ 604818742U, // <3,4,5,6>: Cost 1 vsldoi12 LHS, RHS
+ 2661381387U, // <3,4,5,7>: Cost 3 vsldoi4 <7,3,4,5>, <7,3,4,5>
+ 604818760U, // <3,4,5,u>: Cost 1 vsldoi12 LHS, RHS
+ 3705266278U, // <3,4,6,0>: Cost 4 vsldoi4 <2,3,4,6>, LHS
+ 3831131482U, // <3,4,6,1>: Cost 4 vsldoi12 LHS, <4,6,1,7>
+ 2733715962U, // <3,4,6,2>: Cost 3 vsldoi8 <u,2,3,4>, <6,2,7,3>
+ 3844771180U, // <3,4,6,3>: Cost 4 vsldoi12 <3,2,4,3>, <4,6,3,7>
+ 2800078197U, // <3,4,6,4>: Cost 3 vsldoi12 LHS, <4,6,4,7>
+ 2236550454U, // <3,4,6,5>: Cost 3 vmrghw <3,6,0,7>, RHS
+ 2733716280U, // <3,4,6,6>: Cost 3 vsldoi8 <u,2,3,4>, <6,6,6,6>
+ 2725090156U, // <3,4,6,7>: Cost 3 vsldoi8 <6,7,3,4>, <6,7,3,4>
+ 2236550697U, // <3,4,6,u>: Cost 3 vmrghw <3,6,0,7>, RHS
+ 2733716474U, // <3,4,7,0>: Cost 3 vsldoi8 <u,2,3,4>, <7,0,1,2>
+ 3371647013U, // <3,4,7,1>: Cost 4 vmrglw <2,6,3,7>, <0,0,4,1>
+ 2727744688U, // <3,4,7,2>: Cost 3 vsldoi8 <7,2,3,4>, <7,2,3,4>
+ 3371649364U, // <3,4,7,3>: Cost 4 vmrglw <2,6,3,7>, <3,2,4,3>
+ 2733716838U, // <3,4,7,4>: Cost 3 vsldoi8 <u,2,3,4>, <7,4,5,6>
+ 2297906894U, // <3,4,7,5>: Cost 3 vmrglw <2,6,3,7>, <2,3,4,5>
+ 3371647180U, // <3,4,7,6>: Cost 4 vmrglw <2,6,3,7>, <0,2,4,6>
+ 2733717100U, // <3,4,7,7>: Cost 3 vsldoi8 <u,2,3,4>, <7,7,7,7>
+ 2297906897U, // <3,4,7,u>: Cost 3 vmrglw <2,6,3,7>, <2,3,4,u>
+ 1557799014U, // <3,4,u,0>: Cost 2 vsldoi4 <2,3,4,u>, LHS
+ 1618171694U, // <3,4,u,1>: Cost 2 vsldoi8 <1,2,3,4>, LHS
+ 1557800657U, // <3,4,u,2>: Cost 2 vsldoi4 <2,3,4,u>, <2,3,4,u>
+ 2691913660U, // <3,4,u,3>: Cost 3 vsldoi8 <1,2,3,4>, <u,3,0,1>
+ 1557802294U, // <3,4,u,4>: Cost 2 vsldoi4 <2,3,4,u>, RHS
+ 1618172058U, // <3,4,u,5>: Cost 2 vsldoi8 <1,2,3,4>, RHS
+ 604818985U, // <3,4,u,6>: Cost 1 vsldoi12 LHS, RHS
+ 2661405966U, // <3,4,u,7>: Cost 3 vsldoi4 <7,3,4,u>, <7,3,4,u>
+ 604819003U, // <3,4,u,u>: Cost 1 vsldoi12 LHS, RHS
+ 2643492966U, // <3,5,0,0>: Cost 3 vsldoi4 <4,3,5,0>, LHS
+ 2756947528U, // <3,5,0,1>: Cost 3 vsldoi12 LHS, <5,0,1,2>
+ 2331029019U, // <3,5,0,2>: Cost 3 vmrglw <u,2,3,0>, <4,u,5,2>
+ 2643495062U, // <3,5,0,3>: Cost 3 vsldoi4 <4,3,5,0>, <3,0,1,2>
+ 2756947554U, // <3,5,0,4>: Cost 3 vsldoi12 LHS, <5,0,4,1>
+ 2800078443U, // <3,5,0,5>: Cost 3 vsldoi12 LHS, <5,0,5,1>
+ 2289224194U, // <3,5,0,6>: Cost 3 vmrglw <1,2,3,0>, <3,4,5,6>
+ 3362964723U, // <3,5,0,7>: Cost 4 vmrglw <1,2,3,0>, <1,6,5,7>
+ 2756947590U, // <3,5,0,u>: Cost 3 vsldoi12 LHS, <5,0,u,1>
+ 2800078479U, // <3,5,1,0>: Cost 3 vsldoi12 LHS, <5,1,0,1>
+ 2333027218U, // <3,5,1,1>: Cost 3 vmrglw <u,5,3,1>, <4,0,5,1>
+ 2691916699U, // <3,5,1,2>: Cost 3 vsldoi8 <1,2,3,5>, <1,2,3,5>
+ 3832901294U, // <3,5,1,3>: Cost 4 vsldoi12 <1,2,5,3>, <5,1,3,5>
+ 2800078519U, // <3,5,1,4>: Cost 3 vsldoi12 LHS, <5,1,4,5>
+ 3830689467U, // <3,5,1,5>: Cost 4 vsldoi12 LHS, <5,1,5,0>
+ 3830689481U, // <3,5,1,6>: Cost 4 vsldoi12 LHS, <5,1,6,5>
+ 3873820365U, // <3,5,1,7>: Cost 4 vsldoi12 LHS, <5,1,7,0>
+ 2800078551U, // <3,5,1,u>: Cost 3 vsldoi12 LHS, <5,1,u,1>
+ 3770967487U, // <3,5,2,0>: Cost 4 vsldoi8 <2,1,3,5>, <2,0,1,4>
+ 2697225763U, // <3,5,2,1>: Cost 3 vsldoi8 <2,1,3,5>, <2,1,3,5>
+ 3830689523U, // <3,5,2,2>: Cost 4 vsldoi12 LHS, <5,2,2,2>
+ 2699216590U, // <3,5,2,3>: Cost 3 vsldoi8 <2,4,3,5>, <2,3,4,5>
+ 2699216662U, // <3,5,2,4>: Cost 3 vsldoi8 <2,4,3,5>, <2,4,3,5>
+ 2783047439U, // <3,5,2,5>: Cost 3 vsldoi12 <5,2,5,3>, <5,2,5,3>
+ 2783121176U, // <3,5,2,6>: Cost 3 vsldoi12 <5,2,6,3>, <5,2,6,3>
+ 3856936737U, // <3,5,2,7>: Cost 4 vsldoi12 <5,2,7,3>, <5,2,7,3>
+ 2701871194U, // <3,5,2,u>: Cost 3 vsldoi8 <2,u,3,5>, <2,u,3,5>
+ 2643517542U, // <3,5,3,0>: Cost 3 vsldoi4 <4,3,5,3>, LHS
+ 2331052946U, // <3,5,3,1>: Cost 3 vmrglw <u,2,3,3>, <4,0,5,1>
+ 3699345010U, // <3,5,3,2>: Cost 4 vsldoi4 <1,3,5,3>, <2,2,3,3>
+ 2705189276U, // <3,5,3,3>: Cost 3 vsldoi8 <3,4,3,5>, <3,3,3,3>
+ 2705189359U, // <3,5,3,4>: Cost 3 vsldoi8 <3,4,3,5>, <3,4,3,5>
+ 2331053274U, // <3,5,3,5>: Cost 3 vmrglw <u,2,3,3>, <4,4,5,5>
+ 2295220738U, // <3,5,3,6>: Cost 3 vmrglw <2,2,3,3>, <3,4,5,6>
+ 3368961267U, // <3,5,3,7>: Cost 4 vmrglw <2,2,3,3>, <1,6,5,7>
+ 2295220740U, // <3,5,3,u>: Cost 3 vmrglw <2,2,3,3>, <3,4,5,u>
+ 2643525734U, // <3,5,4,0>: Cost 3 vsldoi4 <4,3,5,4>, LHS
+ 2331061138U, // <3,5,4,1>: Cost 3 vmrglw <u,2,3,4>, <4,0,5,1>
+ 2235584280U, // <3,5,4,2>: Cost 3 vmrghw <3,4,5,6>, <5,2,6,3>
+ 2643528194U, // <3,5,4,3>: Cost 3 vsldoi4 <4,3,5,4>, <3,4,5,6>
+ 2735713498U, // <3,5,4,4>: Cost 3 vsldoi8 <u,5,3,5>, <4,4,5,5>
+ 2756947892U, // <3,5,4,5>: Cost 3 vsldoi12 LHS, <5,4,5,6>
+ 2289256962U, // <3,5,4,6>: Cost 3 vmrglw <1,2,3,4>, <3,4,5,6>
+ 3362997491U, // <3,5,4,7>: Cost 4 vmrglw <1,2,3,4>, <1,6,5,7>
+ 2756947919U, // <3,5,4,u>: Cost 3 vsldoi12 LHS, <5,4,u,6>
+ 2800078803U, // <3,5,5,0>: Cost 3 vsldoi12 LHS, <5,5,0,1>
+ 2800078812U, // <3,5,5,1>: Cost 3 vsldoi12 LHS, <5,5,1,1>
+ 2631591639U, // <3,5,5,2>: Cost 3 vsldoi4 <2,3,5,5>, <2,3,5,5>
+ 3832901616U, // <3,5,5,3>: Cost 4 vsldoi12 <1,2,5,3>, <5,5,3,3>
+ 2800078843U, // <3,5,5,4>: Cost 3 vsldoi12 LHS, <5,5,4,5>
+ 1726337028U, // <3,5,5,5>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+ 2800078862U, // <3,5,5,6>: Cost 3 vsldoi12 LHS, <5,5,6,6>
+ 3368314099U, // <3,5,5,7>: Cost 4 vmrglw <2,1,3,5>, <1,6,5,7>
+ 1726337028U, // <3,5,5,u>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+ 2800078884U, // <3,5,6,0>: Cost 3 vsldoi12 LHS, <5,6,0,1>
+ 2800078899U, // <3,5,6,1>: Cost 3 vsldoi12 LHS, <5,6,1,7>
+ 2631599832U, // <3,5,6,2>: Cost 3 vsldoi4 <2,3,5,6>, <2,3,5,6>
+ 2800078914U, // <3,5,6,3>: Cost 3 vsldoi12 LHS, <5,6,3,4>
+ 2800078924U, // <3,5,6,4>: Cost 3 vsldoi12 LHS, <5,6,4,5>
+ 2800078935U, // <3,5,6,5>: Cost 3 vsldoi12 LHS, <5,6,5,7>
+ 2297235970U, // <3,5,6,6>: Cost 3 vmrglw <2,5,3,6>, <3,4,5,6>
+ 1726337122U, // <3,5,6,7>: Cost 2 vsldoi12 LHS, <5,6,7,0>
+ 1726337131U, // <3,5,6,u>: Cost 2 vsldoi12 LHS, <5,6,u,0>
+ 3699376230U, // <3,5,7,0>: Cost 4 vsldoi4 <1,3,5,7>, LHS
+ 2333739922U, // <3,5,7,1>: Cost 3 vmrglw <u,6,3,7>, <4,0,5,1>
+ 3699378106U, // <3,5,7,2>: Cost 4 vsldoi4 <1,3,5,7>, <2,6,3,7>
+ 3371647915U, // <3,5,7,3>: Cost 4 vmrglw <2,6,3,7>, <1,2,5,3>
+ 3699379510U, // <3,5,7,4>: Cost 4 vsldoi4 <1,3,5,7>, RHS
+ 2333740250U, // <3,5,7,5>: Cost 3 vmrglw <u,6,3,7>, <4,4,5,5>
+ 2297907714U, // <3,5,7,6>: Cost 3 vmrglw <2,6,3,7>, <3,4,5,6>
+ 3370984691U, // <3,5,7,7>: Cost 4 vmrglw <2,5,3,7>, <1,6,5,7>
+ 2297907716U, // <3,5,7,u>: Cost 3 vmrglw <2,6,3,7>, <3,4,5,u>
+ 2800079046U, // <3,5,u,0>: Cost 3 vsldoi12 LHS, <5,u,0,1>
+ 2756948176U, // <3,5,u,1>: Cost 3 vsldoi12 LHS, <5,u,1,2>
+ 2331029019U, // <3,5,u,2>: Cost 3 vmrglw <u,2,3,0>, <4,u,5,2>
+ 2800079076U, // <3,5,u,3>: Cost 3 vsldoi12 LHS, <5,u,3,4>
+ 2800079085U, // <3,5,u,4>: Cost 3 vsldoi12 LHS, <5,u,4,4>
+ 1726337028U, // <3,5,u,5>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+ 2289289730U, // <3,5,u,6>: Cost 3 vmrglw <1,2,3,u>, <3,4,5,6>
+ 1726337284U, // <3,5,u,7>: Cost 2 vsldoi12 LHS, <5,u,7,0>
+ 1726337293U, // <3,5,u,u>: Cost 2 vsldoi12 LHS, <5,u,u,0>
+ 3773628416U, // <3,6,0,0>: Cost 4 vsldoi8 <2,5,3,6>, <0,0,0,0>
+ 2699886694U, // <3,6,0,1>: Cost 3 vsldoi8 <2,5,3,6>, LHS
+ 2789167401U, // <3,6,0,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,0,2,1>
+ 3362965862U, // <3,6,0,3>: Cost 4 vmrglw <1,2,3,0>, <3,2,6,3>
+ 3773628754U, // <3,6,0,4>: Cost 4 vsldoi8 <2,5,3,6>, <0,4,1,5>
+ 3723284326U, // <3,6,0,5>: Cost 4 vsldoi4 <5,3,6,0>, <5,3,6,0>
+ 2800079181U, // <3,6,0,6>: Cost 3 vsldoi12 LHS, <6,0,6,1>
+ 1215483190U, // <3,6,0,7>: Cost 2 vmrglw <1,2,3,0>, RHS
+ 1215483191U, // <3,6,0,u>: Cost 2 vmrglw <1,2,3,0>, RHS
+ 3873821032U, // <3,6,1,0>: Cost 4 vsldoi12 LHS, <6,1,0,1>
+ 3773629236U, // <3,6,1,1>: Cost 4 vsldoi8 <2,5,3,6>, <1,1,1,1>
+ 2691924892U, // <3,6,1,2>: Cost 3 vsldoi8 <1,2,3,6>, <1,2,3,6>
+ 3830690184U, // <3,6,1,3>: Cost 5 vsldoi12 LHS, <6,1,3,6>
+ 3873821072U, // <3,6,1,4>: Cost 4 vsldoi12 LHS, <6,1,4,5>
+ 3873821082U, // <3,6,1,5>: Cost 4 vsldoi12 LHS, <6,1,5,6>
+ 3403453240U, // <3,6,1,6>: Cost 4 vmrglw <u,0,3,1>, <6,6,6,6>
+ 2289233206U, // <3,6,1,7>: Cost 3 vmrglw <1,2,3,1>, RHS
+ 2289233207U, // <3,6,1,u>: Cost 3 vmrglw <1,2,3,1>, RHS
+ 2661498982U, // <3,6,2,0>: Cost 3 vsldoi4 <7,3,6,2>, LHS
+ 3770975780U, // <3,6,2,1>: Cost 4 vsldoi8 <2,1,3,6>, <2,1,3,6>
+ 2631640797U, // <3,6,2,2>: Cost 3 vsldoi4 <2,3,6,2>, <2,3,6,2>
+ 3771639485U, // <3,6,2,3>: Cost 4 vsldoi8 <2,2,3,6>, <2,3,2,6>
+ 2661502262U, // <3,6,2,4>: Cost 3 vsldoi4 <7,3,6,2>, RHS
+ 2699888488U, // <3,6,2,5>: Cost 3 vsldoi8 <2,5,3,6>, <2,5,3,6>
+ 2661503482U, // <3,6,2,6>: Cost 3 vsldoi4 <7,3,6,2>, <6,2,7,3>
+ 1715425786U, // <3,6,2,7>: Cost 2 vsldoi12 <6,2,7,3>, <6,2,7,3>
+ 1715499523U, // <3,6,2,u>: Cost 2 vsldoi12 <6,2,u,3>, <6,2,u,3>
+ 3773630614U, // <3,6,3,0>: Cost 4 vsldoi8 <2,5,3,6>, <3,0,1,2>
+ 3372942825U, // <3,6,3,1>: Cost 4 vmrglw <2,u,3,3>, <2,0,6,1>
+ 2234749434U, // <3,6,3,2>: Cost 3 vmrghw <3,3,3,3>, <6,2,7,3>
+ 3368962406U, // <3,6,3,3>: Cost 4 vmrglw <2,2,3,3>, <3,2,6,3>
+ 2699889154U, // <3,6,3,4>: Cost 3 vsldoi8 <2,5,3,6>, <3,4,5,6>
+ 3773631068U, // <3,6,3,5>: Cost 4 vsldoi8 <2,5,3,6>, <3,5,6,6>
+ 2331054904U, // <3,6,3,6>: Cost 3 vmrglw <u,2,3,3>, <6,6,6,6>
+ 1221479734U, // <3,6,3,7>: Cost 2 vmrglw <2,2,3,3>, RHS
+ 1221479735U, // <3,6,3,u>: Cost 2 vmrglw <2,2,3,3>, RHS
+ 2235584801U, // <3,6,4,0>: Cost 3 vmrghw <3,4,5,6>, <6,0,1,2>
+ 3717342106U, // <3,6,4,1>: Cost 4 vsldoi4 <4,3,6,4>, <1,2,3,4>
+ 2789167729U, // <3,6,4,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,4,2,5>
+ 2235585074U, // <3,6,4,3>: Cost 3 vmrghw <3,4,5,6>, <6,3,4,5>
+ 2235585165U, // <3,6,4,4>: Cost 3 vmrghw <3,4,5,6>, <6,4,5,6>
+ 2699889974U, // <3,6,4,5>: Cost 3 vsldoi8 <2,5,3,6>, RHS
+ 2800079509U, // <3,6,4,6>: Cost 3 vsldoi12 LHS, <6,4,6,5>
+ 1215515958U, // <3,6,4,7>: Cost 2 vmrglw <1,2,3,4>, RHS
+ 1215515959U, // <3,6,4,u>: Cost 2 vmrglw <1,2,3,4>, RHS
+ 3873821356U, // <3,6,5,0>: Cost 4 vsldoi12 LHS, <6,5,0,1>
+ 3372959209U, // <3,6,5,1>: Cost 5 vmrglw <2,u,3,5>, <2,0,6,1>
+ 3862909629U, // <3,6,5,2>: Cost 4 vsldoi12 <6,2,7,3>, <6,5,2,0>
+ 3773632358U, // <3,6,5,3>: Cost 4 vsldoi8 <2,5,3,6>, <5,3,6,0>
+ 3873821396U, // <3,6,5,4>: Cost 4 vsldoi12 LHS, <6,5,4,5>
+ 3873821405U, // <3,6,5,5>: Cost 4 vsldoi12 LHS, <6,5,5,5>
+ 3862909672U, // <3,6,5,6>: Cost 4 vsldoi12 <6,2,7,3>, <6,5,6,7>
+ 2294574390U, // <3,6,5,7>: Cost 3 vmrglw <2,1,3,5>, RHS
+ 2294574391U, // <3,6,5,u>: Cost 3 vmrglw <2,1,3,5>, RHS
+ 2800079613U, // <3,6,6,0>: Cost 3 vsldoi12 LHS, <6,6,0,1>
+ 3873821446U, // <3,6,6,1>: Cost 4 vsldoi12 LHS, <6,6,1,1>
+ 2789167888U, // <3,6,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,6,2,2>
+ 3844920090U, // <3,6,6,3>: Cost 4 vsldoi12 <3,2,6,3>, <6,6,3,3>
+ 2800079653U, // <3,6,6,4>: Cost 3 vsldoi12 LHS, <6,6,4,5>
+ 3723333484U, // <3,6,6,5>: Cost 4 vsldoi4 <5,3,6,6>, <5,3,6,6>
+ 1726337848U, // <3,6,6,6>: Cost 2 vsldoi12 LHS, <6,6,6,6>
+ 1726337858U, // <3,6,6,7>: Cost 2 vsldoi12 LHS, <6,6,7,7>
+ 1726337867U, // <3,6,6,u>: Cost 2 vsldoi12 LHS, <6,6,u,7>
+ 1726337870U, // <3,6,7,0>: Cost 2 vsldoi12 LHS, <6,7,0,1>
+ 2297906665U, // <3,6,7,1>: Cost 3 vmrglw <2,6,3,7>, <2,0,6,1>
+ 2792117090U, // <3,6,7,2>: Cost 3 vsldoi12 <6,7,2,3>, <6,7,2,3>
+ 2297907558U, // <3,6,7,3>: Cost 3 vmrglw <2,6,3,7>, <3,2,6,3>
+ 1726337910U, // <3,6,7,4>: Cost 2 vsldoi12 LHS, <6,7,4,5>
+ 2297906993U, // <3,6,7,5>: Cost 3 vmrglw <2,6,3,7>, <2,4,6,5>
+ 2297906832U, // <3,6,7,6>: Cost 3 vmrglw <2,6,3,7>, <2,2,6,6>
+ 1224166710U, // <3,6,7,7>: Cost 2 vmrglw <2,6,3,7>, RHS
+ 1224166711U, // <3,6,7,u>: Cost 2 vmrglw <2,6,3,7>, RHS
+ 1726337951U, // <3,6,u,0>: Cost 2 vsldoi12 LHS, <6,u,0,1>
+ 2699892526U, // <3,6,u,1>: Cost 3 vsldoi8 <2,5,3,6>, LHS
+ 2789168049U, // <3,6,u,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,u,2,1>
+ 2792854460U, // <3,6,u,3>: Cost 3 vsldoi12 <6,u,3,3>, <6,u,3,3>
+ 1726337991U, // <3,6,u,4>: Cost 2 vsldoi12 LHS, <6,u,4,5>
+ 2699892890U, // <3,6,u,5>: Cost 3 vsldoi8 <2,5,3,6>, RHS
+ 1726337848U, // <3,6,u,6>: Cost 2 vsldoi12 LHS, <6,6,6,6>
+ 1215548726U, // <3,6,u,7>: Cost 2 vmrglw <1,2,3,u>, RHS
+ 1215548727U, // <3,6,u,u>: Cost 2 vmrglw <1,2,3,u>, RHS
+ 2700558336U, // <3,7,0,0>: Cost 3 vsldoi8 <2,6,3,7>, <0,0,0,0>
+ 1626816614U, // <3,7,0,1>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+ 2700558513U, // <3,7,0,2>: Cost 3 vsldoi8 <2,6,3,7>, <0,2,1,6>
+ 2331030010U, // <3,7,0,3>: Cost 3 vmrglw <u,2,3,0>, <6,2,7,3>
+ 2700558674U, // <3,7,0,4>: Cost 3 vsldoi8 <2,6,3,7>, <0,4,1,5>
+ 2800079906U, // <3,7,0,5>: Cost 3 vsldoi12 LHS, <7,0,5,6>
+ 2655588936U, // <3,7,0,6>: Cost 3 vsldoi4 <6,3,7,0>, <6,3,7,0>
+ 2800079919U, // <3,7,0,7>: Cost 3 vsldoi12 LHS, <7,0,7,1>
+ 1626817181U, // <3,7,0,u>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+ 3774300899U, // <3,7,1,0>: Cost 4 vsldoi8 <2,6,3,7>, <1,0,1,1>
+ 2700559156U, // <3,7,1,1>: Cost 3 vsldoi8 <2,6,3,7>, <1,1,1,1>
+ 2700559254U, // <3,7,1,2>: Cost 3 vsldoi8 <2,6,3,7>, <1,2,3,0>
+ 3774301148U, // <3,7,1,3>: Cost 4 vsldoi8 <2,6,3,7>, <1,3,1,7>
+ 3774301227U, // <3,7,1,4>: Cost 4 vsldoi8 <2,6,3,7>, <1,4,1,5>
+ 3774301295U, // <3,7,1,5>: Cost 4 vsldoi8 <2,6,3,7>, <1,5,0,1>
+ 3768329441U, // <3,7,1,6>: Cost 4 vsldoi8 <1,6,3,7>, <1,6,3,7>
+ 3403453250U, // <3,7,1,7>: Cost 4 vmrglw <u,0,3,1>, <6,6,7,7>
+ 2700559740U, // <3,7,1,u>: Cost 3 vsldoi8 <2,6,3,7>, <1,u,3,0>
+ 2700559849U, // <3,7,2,0>: Cost 3 vsldoi8 <2,6,3,7>, <2,0,6,1>
+ 3770983973U, // <3,7,2,1>: Cost 4 vsldoi8 <2,1,3,7>, <2,1,3,7>
+ 2700559976U, // <3,7,2,2>: Cost 3 vsldoi8 <2,6,3,7>, <2,2,2,2>
+ 2698569415U, // <3,7,2,3>: Cost 3 vsldoi8 <2,3,3,7>, <2,3,3,7>
+ 2700560177U, // <3,7,2,4>: Cost 3 vsldoi8 <2,6,3,7>, <2,4,6,5>
+ 3773638505U, // <3,7,2,5>: Cost 4 vsldoi8 <2,5,3,7>, <2,5,3,7>
+ 1626818490U, // <3,7,2,6>: Cost 2 vsldoi8 <2,6,3,7>, <2,6,3,7>
+ 2795140307U, // <3,7,2,7>: Cost 3 vsldoi12 <7,2,7,3>, <7,2,7,3>
+ 1628145756U, // <3,7,2,u>: Cost 2 vsldoi8 <2,u,3,7>, <2,u,3,7>
+ 2700560534U, // <3,7,3,0>: Cost 3 vsldoi8 <2,6,3,7>, <3,0,1,2>
+ 3774302438U, // <3,7,3,1>: Cost 4 vsldoi8 <2,6,3,7>, <3,1,1,1>
+ 2700560742U, // <3,7,3,2>: Cost 3 vsldoi8 <2,6,3,7>, <3,2,6,3>
+ 2700560796U, // <3,7,3,3>: Cost 3 vsldoi8 <2,6,3,7>, <3,3,3,3>
+ 2700560898U, // <3,7,3,4>: Cost 3 vsldoi8 <2,6,3,7>, <3,4,5,6>
+ 3774302821U, // <3,7,3,5>: Cost 4 vsldoi8 <2,6,3,7>, <3,5,7,6>
+ 2700561079U, // <3,7,3,6>: Cost 3 vsldoi8 <2,6,3,7>, <3,6,7,7>
+ 2700561091U, // <3,7,3,7>: Cost 3 vsldoi8 <2,6,3,7>, <3,7,0,1>
+ 2700561182U, // <3,7,3,u>: Cost 3 vsldoi8 <2,6,3,7>, <3,u,1,2>
+ 2655617126U, // <3,7,4,0>: Cost 3 vsldoi4 <6,3,7,4>, LHS
+ 3774303178U, // <3,7,4,1>: Cost 4 vsldoi8 <2,6,3,7>, <4,1,2,3>
+ 2655619002U, // <3,7,4,2>: Cost 3 vsldoi4 <6,3,7,4>, <2,6,3,7>
+ 2331062778U, // <3,7,4,3>: Cost 3 vmrglw <u,2,3,4>, <6,2,7,3>
+ 2655620406U, // <3,7,4,4>: Cost 3 vsldoi4 <6,3,7,4>, RHS
+ 1626819894U, // <3,7,4,5>: Cost 2 vsldoi8 <2,6,3,7>, RHS
+ 2655621708U, // <3,7,4,6>: Cost 3 vsldoi4 <6,3,7,4>, <6,3,7,4>
+ 2800080247U, // <3,7,4,7>: Cost 3 vsldoi12 LHS, <7,4,7,5>
+ 1626820137U, // <3,7,4,u>: Cost 2 vsldoi8 <2,6,3,7>, RHS
+ 3774303816U, // <3,7,5,0>: Cost 4 vsldoi8 <2,6,3,7>, <5,0,1,2>
+ 3873822093U, // <3,7,5,1>: Cost 4 vsldoi12 LHS, <7,5,1,0>
+ 3774303998U, // <3,7,5,2>: Cost 4 vsldoi8 <2,6,3,7>, <5,2,3,4>
+ 3862910368U, // <3,7,5,3>: Cost 4 vsldoi12 <6,2,7,3>, <7,5,3,1>
+ 3774304180U, // <3,7,5,4>: Cost 4 vsldoi8 <2,6,3,7>, <5,4,5,6>
+ 2800080310U, // <3,7,5,5>: Cost 3 vsldoi12 LHS, <7,5,5,5>
+ 2800080321U, // <3,7,5,6>: Cost 3 vsldoi12 LHS, <7,5,6,7>
+ 3873822147U, // <3,7,5,7>: Cost 4 vsldoi12 LHS, <7,5,7,0>
+ 2800080339U, // <3,7,5,u>: Cost 3 vsldoi12 LHS, <7,5,u,7>
+ 2800080348U, // <3,7,6,0>: Cost 3 vsldoi12 LHS, <7,6,0,7>
+ 3873822181U, // <3,7,6,1>: Cost 4 vsldoi12 LHS, <7,6,1,7>
+ 2789168622U, // <3,7,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <7,6,2,7>
+ 2700563016U, // <3,7,6,3>: Cost 3 vsldoi8 <2,6,3,7>, <6,3,7,0>
+ 2800080384U, // <3,7,6,4>: Cost 3 vsldoi12 LHS, <7,6,4,7>
+ 3862910472U, // <3,7,6,5>: Cost 4 vsldoi12 <6,2,7,3>, <7,6,5,6>
+ 2700563256U, // <3,7,6,6>: Cost 3 vsldoi8 <2,6,3,7>, <6,6,6,6>
+ 2800080404U, // <3,7,6,7>: Cost 3 vsldoi12 LHS, <7,6,7,0>
+ 2793149988U, // <3,7,6,u>: Cost 3 vsldoi12 <6,u,7,3>, <7,6,u,7>
+ 2637725798U, // <3,7,7,0>: Cost 3 vsldoi4 <3,3,7,7>, LHS
+ 3371649227U, // <3,7,7,1>: Cost 4 vmrglw <2,6,3,7>, <3,0,7,1>
+ 2637727674U, // <3,7,7,2>: Cost 3 vsldoi4 <3,3,7,7>, <2,6,3,7>
+ 2297907567U, // <3,7,7,3>: Cost 3 vmrglw <2,6,3,7>, <3,2,7,3>
+ 2637729078U, // <3,7,7,4>: Cost 3 vsldoi4 <3,3,7,7>, RHS
+ 3371649312U, // <3,7,7,5>: Cost 4 vmrglw <2,6,3,7>, <3,1,7,5>
+ 2655646287U, // <3,7,7,6>: Cost 3 vsldoi4 <6,3,7,7>, <6,3,7,7>
+ 1726338668U, // <3,7,7,7>: Cost 2 vsldoi12 LHS, <7,7,7,7>
+ 1726338668U, // <3,7,7,u>: Cost 2 vsldoi12 LHS, <7,7,7,7>
+ 2700564179U, // <3,7,u,0>: Cost 3 vsldoi8 <2,6,3,7>, <u,0,1,2>
+ 1626822446U, // <3,7,u,1>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+ 2700564357U, // <3,7,u,2>: Cost 3 vsldoi8 <2,6,3,7>, <u,2,3,0>
+ 2700564412U, // <3,7,u,3>: Cost 3 vsldoi8 <2,6,3,7>, <u,3,0,1>
+ 2700564543U, // <3,7,u,4>: Cost 3 vsldoi8 <2,6,3,7>, <u,4,5,6>
+ 1626822810U, // <3,7,u,5>: Cost 2 vsldoi8 <2,6,3,7>, RHS
+ 1662654672U, // <3,7,u,6>: Cost 2 vsldoi8 <u,6,3,7>, <u,6,3,7>
+ 1726338668U, // <3,7,u,7>: Cost 2 vsldoi12 LHS, <7,7,7,7>
+ 1626823013U, // <3,7,u,u>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+ 1678557184U, // <3,u,0,0>: Cost 2 vsldoi12 LHS, <0,0,0,0>
+ 1679005395U, // <3,u,0,1>: Cost 2 vsldoi12 LHS, <u,0,1,2>
+ 2289221787U, // <3,u,0,2>: Cost 3 vmrglw <1,2,3,0>, <0,1,u,2>
+ 1215479964U, // <3,u,0,3>: Cost 2 vmrglw <1,2,3,0>, LHS
+ 2752747245U, // <3,u,0,4>: Cost 3 vsldoi12 LHS, <u,0,4,1>
+ 1158863002U, // <3,u,0,5>: Cost 2 vmrghw <3,0,1,2>, RHS
+ 2289224221U, // <3,u,0,6>: Cost 3 vmrglw <1,2,3,0>, <3,4,u,6>
+ 1215483208U, // <3,u,0,7>: Cost 2 vmrglw <1,2,3,0>, RHS
+ 1679005458U, // <3,u,0,u>: Cost 2 vsldoi12 LHS, <u,0,u,2>
+ 1558036582U, // <3,u,1,0>: Cost 2 vsldoi4 <2,3,u,1>, LHS
+ 1678558004U, // <3,u,1,1>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+ 604821294U, // <3,u,1,2>: Cost 1 vsldoi12 LHS, LHS
+ 2752747317U, // <3,u,1,3>: Cost 3 vsldoi12 LHS, <u,1,3,1>
+ 1558039862U, // <3,u,1,4>: Cost 2 vsldoi4 <2,3,u,1>, RHS
+ 2756949830U, // <3,u,1,5>: Cost 3 vsldoi12 LHS, <u,1,5,0>
+ 2800080726U, // <3,u,1,6>: Cost 3 vsldoi12 LHS, <u,1,6,7>
+ 2289233224U, // <3,u,1,7>: Cost 3 vmrglw <1,2,3,1>, RHS
+ 604821348U, // <3,u,1,u>: Cost 1 vsldoi12 LHS, LHS
+ 2696586709U, // <3,u,2,0>: Cost 3 vsldoi8 <2,0,3,u>, <2,0,3,u>
+ 2757392246U, // <3,u,2,1>: Cost 3 vsldoi12 LHS, <u,2,1,3>
+ 1624172151U, // <3,u,2,2>: Cost 2 vsldoi8 <2,2,3,u>, <2,2,3,u>
+ 1679005576U, // <3,u,2,3>: Cost 2 vsldoi12 LHS, <u,2,3,3>
+ 2631789878U, // <3,u,2,4>: Cost 3 vsldoi4 <2,3,u,2>, RHS
+ 2699904874U, // <3,u,2,5>: Cost 3 vsldoi8 <2,5,3,u>, <2,5,3,u>
+ 1626826683U, // <3,u,2,6>: Cost 2 vsldoi8 <2,6,3,u>, <2,6,3,u>
+ 1726338988U, // <3,u,2,7>: Cost 2 vsldoi12 LHS, <u,2,7,3>
+ 1683208117U, // <3,u,2,u>: Cost 2 vsldoi12 LHS, <u,2,u,3>
+ 1679005628U, // <3,u,3,0>: Cost 2 vsldoi12 LHS, <u,3,0,1>
+ 1161008942U, // <3,u,3,1>: Cost 2 vmrghw <3,3,3,3>, LHS
+ 2752747471U, // <3,u,3,2>: Cost 3 vsldoi12 LHS, <u,3,2,2>
+ 403488870U, // <3,u,3,3>: Cost 1 vspltisw3 LHS
+ 1679005668U, // <3,u,3,4>: Cost 2 vsldoi12 LHS, <u,3,4,5>
+ 1161009306U, // <3,u,3,5>: Cost 2 vmrghw <3,3,3,3>, RHS
+ 2691943104U, // <3,u,3,6>: Cost 3 vsldoi8 <1,2,3,u>, <3,6,u,7>
+ 1221479752U, // <3,u,3,7>: Cost 2 vmrglw <2,2,3,3>, RHS
+ 403488870U, // <3,u,3,u>: Cost 1 vspltisw3 LHS
+ 2289255363U, // <3,u,4,0>: Cost 3 vmrglw <1,2,3,4>, <1,2,u,0>
+ 1161844526U, // <3,u,4,1>: Cost 2 vmrghw <3,4,5,6>, LHS
+ 2289256661U, // <3,u,4,2>: Cost 3 vmrglw <1,2,3,4>, <3,0,u,2>
+ 1215512732U, // <3,u,4,3>: Cost 2 vmrglw <1,2,3,4>, LHS
+ 1215513498U, // <3,u,4,4>: Cost 2 vmrglw <1,2,3,4>, <1,2,3,4>
+ 1679005759U, // <3,u,4,5>: Cost 2 vsldoi12 LHS, <u,4,5,6>
+ 2289256989U, // <3,u,4,6>: Cost 3 vmrglw <1,2,3,4>, <3,4,u,6>
+ 1215515976U, // <3,u,4,7>: Cost 2 vmrglw <1,2,3,4>, RHS
+ 1679005786U, // <3,u,4,u>: Cost 2 vsldoi12 LHS, <u,4,u,6>
+ 1558069350U, // <3,u,5,0>: Cost 2 vsldoi4 <2,3,u,5>, LHS
+ 2631811892U, // <3,u,5,1>: Cost 3 vsldoi4 <2,3,u,5>, <1,1,1,1>
+ 1558071026U, // <3,u,5,2>: Cost 2 vsldoi4 <2,3,u,5>, <2,3,u,5>
+ 2752747646U, // <3,u,5,3>: Cost 3 vsldoi12 LHS, <u,5,3,6>
+ 1558072630U, // <3,u,5,4>: Cost 2 vsldoi4 <2,3,u,5>, RHS
+ 1726337028U, // <3,u,5,5>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+ 604821658U, // <3,u,5,6>: Cost 1 vsldoi12 LHS, RHS
+ 2294574408U, // <3,u,5,7>: Cost 3 vmrglw <2,1,3,5>, RHS
+ 604821676U, // <3,u,5,u>: Cost 1 vsldoi12 LHS, RHS
+ 2631819366U, // <3,u,6,0>: Cost 3 vsldoi4 <2,3,u,6>, LHS
+ 2757392574U, // <3,u,6,1>: Cost 3 vsldoi12 LHS, <u,6,1,7>
+ 2631821043U, // <3,u,6,2>: Cost 3 vsldoi4 <2,3,u,6>, <2,3,u,6>
+ 1679005904U, // <3,u,6,3>: Cost 2 vsldoi12 LHS, <u,6,3,7>
+ 2631822646U, // <3,u,6,4>: Cost 3 vsldoi4 <2,3,u,6>, RHS
+ 2236553370U, // <3,u,6,5>: Cost 3 vmrghw <3,6,0,7>, RHS
+ 1726337848U, // <3,u,6,6>: Cost 2 vsldoi12 LHS, <6,6,6,6>
+ 1726339309U, // <3,u,6,7>: Cost 2 vsldoi12 LHS, <u,6,7,0>
+ 1683208445U, // <3,u,6,u>: Cost 2 vsldoi12 LHS, <u,6,u,7>
+ 1726339328U, // <3,u,7,0>: Cost 2 vsldoi12 LHS, <u,7,0,1>
+ 2297905225U, // <3,u,7,1>: Cost 3 vmrglw <2,6,3,7>, <0,0,u,1>
+ 2631829236U, // <3,u,7,2>: Cost 3 vsldoi4 <2,3,u,7>, <2,3,u,7>
+ 1224163484U, // <3,u,7,3>: Cost 2 vmrglw <2,6,3,7>, LHS
+ 1726339368U, // <3,u,7,4>: Cost 2 vsldoi12 LHS, <u,7,4,5>
+ 2297905553U, // <3,u,7,5>: Cost 3 vmrglw <2,6,3,7>, <0,4,u,5>
+ 2297905392U, // <3,u,7,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,u,6>
+ 1224166728U, // <3,u,7,7>: Cost 2 vmrglw <2,6,3,7>, RHS
+ 1224163489U, // <3,u,7,u>: Cost 2 vmrglw <2,6,3,7>, LHS
+ 1683208529U, // <3,u,u,0>: Cost 2 vsldoi12 LHS, <u,u,0,1>
+ 1679006043U, // <3,u,u,1>: Cost 2 vsldoi12 LHS, <u,u,1,2>
+ 604821861U, // <3,u,u,2>: Cost 1 vsldoi12 LHS, LHS
+ 403488870U, // <3,u,u,3>: Cost 1 vspltisw3 LHS
+ 1683208569U, // <3,u,u,4>: Cost 2 vsldoi12 LHS, <u,u,4,5>
+ 1679006083U, // <3,u,u,5>: Cost 2 vsldoi12 LHS, <u,u,5,6>
+ 604821901U, // <3,u,u,6>: Cost 1 vsldoi12 LHS, RHS
+ 1215548744U, // <3,u,u,7>: Cost 2 vmrglw <1,2,3,u>, RHS
+ 604821915U, // <3,u,u,u>: Cost 1 vsldoi12 LHS, LHS
+ 2759016448U, // <4,0,0,0>: Cost 3 vsldoi12 <1,2,3,4>, <0,0,0,0>
+ 1165115494U, // <4,0,0,1>: Cost 2 vmrghw <4,0,5,1>, LHS
+ 3717531337U, // <4,0,0,2>: Cost 4 vsldoi4 <4,4,0,0>, <2,3,4,0>
+ 3369675785U, // <4,0,0,3>: Cost 4 vmrglw <2,3,4,0>, <4,2,0,3>
+ 2751791144U, // <4,0,0,4>: Cost 3 vsldoi12 <0,0,4,4>, <0,0,4,4>
+ 2238857630U, // <4,0,0,5>: Cost 3 vmrghw <4,0,5,1>, <0,5,1,0>
+ 3312591341U, // <4,0,0,6>: Cost 4 vmrghw <4,0,5,0>, <0,6,0,7>
+ 3369676113U, // <4,0,0,7>: Cost 4 vmrglw <2,3,4,0>, <4,6,0,7>
+ 1165116061U, // <4,0,0,u>: Cost 2 vmrghw <4,0,5,1>, LHS
+ 2637824102U, // <4,0,1,0>: Cost 3 vsldoi4 <3,4,0,1>, LHS
+ 2637824922U, // <4,0,1,1>: Cost 3 vsldoi4 <3,4,0,1>, <1,2,3,4>
+ 1685274726U, // <4,0,1,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 2637826512U, // <4,0,1,3>: Cost 3 vsldoi4 <3,4,0,1>, <3,4,0,1>
+ 2637827382U, // <4,0,1,4>: Cost 3 vsldoi4 <3,4,0,1>, RHS
+ 2661716070U, // <4,0,1,5>: Cost 3 vsldoi4 <7,4,0,1>, <5,6,7,4>
+ 3729486427U, // <4,0,1,6>: Cost 4 vsldoi4 <6,4,0,1>, <6,4,0,1>
+ 2661717300U, // <4,0,1,7>: Cost 3 vsldoi4 <7,4,0,1>, <7,4,0,1>
+ 1685274780U, // <4,0,1,u>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 3711574118U, // <4,0,2,0>: Cost 4 vsldoi4 <3,4,0,2>, LHS
+ 2240200806U, // <4,0,2,1>: Cost 3 vmrghw <4,2,5,3>, LHS
+ 3771663992U, // <4,0,2,2>: Cost 4 vsldoi8 <2,2,4,0>, <2,2,4,0>
+ 2698585801U, // <4,0,2,3>: Cost 3 vsldoi8 <2,3,4,0>, <2,3,4,0>
+ 3373672105U, // <4,0,2,4>: Cost 4 vmrglw <3,0,4,2>, <2,3,0,4>
+ 3810813795U, // <4,0,2,5>: Cost 4 vsldoi8 <u,7,4,0>, <2,5,3,1>
+ 3772327866U, // <4,0,2,6>: Cost 4 vsldoi8 <2,3,4,0>, <2,6,3,7>
+ 3386280568U, // <4,0,2,7>: Cost 5 vmrglw <5,1,4,2>, <3,6,0,7>
+ 2701903966U, // <4,0,2,u>: Cost 3 vsldoi8 <2,u,4,0>, <2,u,4,0>
+ 3699638374U, // <4,0,3,0>: Cost 4 vsldoi4 <1,4,0,3>, LHS
+ 2753560832U, // <4,0,3,1>: Cost 3 vsldoi12 <0,3,1,4>, <0,3,1,4>
+ 3772328276U, // <4,0,3,2>: Cost 4 vsldoi8 <2,3,4,0>, <3,2,4,3>
+ 3827302674U, // <4,0,3,3>: Cost 4 vsldoi12 <0,3,1,4>, <0,3,3,4>
+ 3699641654U, // <4,0,3,4>: Cost 4 vsldoi4 <1,4,0,3>, RHS
+ 3779627588U, // <4,0,3,5>: Cost 4 vsldoi8 <3,5,4,0>, <3,5,4,0>
+ 3772328604U, // <4,0,3,6>: Cost 4 vsldoi8 <2,3,4,0>, <3,6,4,7>
+ 3780954854U, // <4,0,3,7>: Cost 4 vsldoi8 <3,7,4,0>, <3,7,4,0>
+ 2753560832U, // <4,0,3,u>: Cost 3 vsldoi12 <0,3,1,4>, <0,3,1,4>
+ 2725129106U, // <4,0,4,0>: Cost 3 vsldoi8 <6,7,4,0>, <4,0,5,1>
+ 1167720550U, // <4,0,4,1>: Cost 2 vmrghw <4,4,4,4>, LHS
+ 3839172953U, // <4,0,4,2>: Cost 4 vsldoi12 <2,3,0,4>, <0,4,2,3>
+ 3772329051U, // <4,0,4,3>: Cost 4 vsldoi8 <2,3,4,0>, <4,3,0,4>
+ 2241462610U, // <4,0,4,4>: Cost 3 vmrghw <4,4,4,4>, <0,4,1,5>
+ 2698587446U, // <4,0,4,5>: Cost 3 vsldoi8 <2,3,4,0>, RHS
+ 3772329297U, // <4,0,4,6>: Cost 4 vsldoi8 <2,3,4,0>, <4,6,0,7>
+ 3735483703U, // <4,0,4,7>: Cost 4 vsldoi4 <7,4,0,4>, <7,4,0,4>
+ 1167721117U, // <4,0,4,u>: Cost 2 vmrghw <4,4,4,4>, LHS
+ 1168556032U, // <4,0,5,0>: Cost 2 vmrghw RHS, <0,0,0,0>
+ 94814310U, // <4,0,5,1>: Cost 1 vmrghw RHS, LHS
+ 2242298029U, // <4,0,5,2>: Cost 3 vmrghw RHS, <0,2,1,2>
+ 2637859284U, // <4,0,5,3>: Cost 3 vsldoi4 <3,4,0,5>, <3,4,0,5>
+ 1168556370U, // <4,0,5,4>: Cost 2 vmrghw RHS, <0,4,1,5>
+ 2242306530U, // <4,0,5,5>: Cost 3 vmrghw RHS, <0,5,u,5>
+ 2242298358U, // <4,0,5,6>: Cost 3 vmrghw RHS, <0,6,1,7>
+ 2661750072U, // <4,0,5,7>: Cost 3 vsldoi4 <7,4,0,5>, <7,4,0,5>
+ 94814877U, // <4,0,5,u>: Cost 1 vmrghw RHS, LHS
+ 3316580362U, // <4,0,6,0>: Cost 4 vmrghw <4,6,5,1>, <0,0,1,1>
+ 2242846822U, // <4,0,6,1>: Cost 3 vmrghw <4,6,5,2>, LHS
+ 3798872570U, // <4,0,6,2>: Cost 4 vsldoi8 <6,7,4,0>, <6,2,7,3>
+ 3796218413U, // <4,0,6,3>: Cost 4 vsldoi8 <6,3,4,0>, <6,3,4,0>
+ 3834528273U, // <4,0,6,4>: Cost 4 vsldoi12 <1,5,0,4>, <0,6,4,7>
+ 3798872811U, // <4,0,6,5>: Cost 4 vsldoi8 <6,7,4,0>, <6,5,7,1>
+ 3316621876U, // <4,0,6,6>: Cost 4 vmrghw <4,6,5,6>, <0,6,u,6>
+ 2725131121U, // <4,0,6,7>: Cost 3 vsldoi8 <6,7,4,0>, <6,7,4,0>
+ 2242847389U, // <4,0,6,u>: Cost 3 vmrghw <4,6,5,2>, LHS
+ 3377692672U, // <4,0,7,0>: Cost 4 vmrglw <3,6,4,7>, <0,0,0,0>
+ 2243493990U, // <4,0,7,1>: Cost 3 vmrghw <4,7,5,0>, LHS
+ 3775648970U, // <4,0,7,2>: Cost 5 vsldoi8 <2,u,4,0>, <7,2,6,3>
+ 3802191110U, // <4,0,7,3>: Cost 4 vsldoi8 <7,3,4,0>, <7,3,4,0>
+ 3317236050U, // <4,0,7,4>: Cost 4 vmrghw <4,7,5,0>, <0,4,1,5>
+ 3803518376U, // <4,0,7,5>: Cost 4 vsldoi8 <7,5,4,0>, <7,5,4,0>
+ 3317236214U, // <4,0,7,6>: Cost 5 vmrghw <4,7,5,0>, <0,6,1,7>
+ 3798873708U, // <4,0,7,7>: Cost 4 vsldoi8 <6,7,4,0>, <7,7,7,7>
+ 2243494557U, // <4,0,7,u>: Cost 3 vmrghw <4,7,5,0>, LHS
+ 1170546688U, // <4,0,u,0>: Cost 2 vmrghw RHS, <0,0,0,0>
+ 96804966U, // <4,0,u,1>: Cost 1 vmrghw RHS, LHS
+ 1685275293U, // <4,0,u,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 2637883863U, // <4,0,u,3>: Cost 3 vsldoi4 <3,4,0,u>, <3,4,0,u>
+ 1170547026U, // <4,0,u,4>: Cost 2 vmrghw RHS, <0,4,1,5>
+ 2698590362U, // <4,0,u,5>: Cost 3 vsldoi8 <2,3,4,0>, RHS
+ 2244289014U, // <4,0,u,6>: Cost 3 vmrghw RHS, <0,6,1,7>
+ 2661774651U, // <4,0,u,7>: Cost 3 vsldoi4 <7,4,0,u>, <7,4,0,u>
+ 96805533U, // <4,0,u,u>: Cost 1 vmrghw RHS, LHS
+ 2667749478U, // <4,1,0,0>: Cost 3 vsldoi4 <u,4,1,0>, LHS
+ 2689966182U, // <4,1,0,1>: Cost 3 vsldoi8 <0,u,4,1>, LHS
+ 2238571418U, // <4,1,0,2>: Cost 3 vmrghw <4,0,1,2>, <1,2,3,4>
+ 3711633880U, // <4,1,0,3>: Cost 4 vsldoi4 <3,4,1,0>, <3,4,1,0>
+ 2689966418U, // <4,1,0,4>: Cost 3 vsldoi8 <0,u,4,1>, <0,4,1,5>
+ 3361046866U, // <4,1,0,5>: Cost 4 vmrglw <0,u,4,0>, <0,4,1,5>
+ 3741495802U, // <4,1,0,6>: Cost 4 vsldoi4 <u,4,1,0>, <6,2,7,3>
+ 3741496314U, // <4,1,0,7>: Cost 4 vsldoi4 <u,4,1,0>, <7,0,1,2>
+ 2689966765U, // <4,1,0,u>: Cost 3 vsldoi8 <0,u,4,1>, <0,u,4,1>
+ 3764372222U, // <4,1,1,0>: Cost 4 vsldoi8 <1,0,4,1>, <1,0,4,1>
+ 2758206263U, // <4,1,1,1>: Cost 3 vsldoi12 <1,1,1,4>, <1,1,1,4>
+ 2698593178U, // <4,1,1,2>: Cost 3 vsldoi8 <2,3,4,1>, <1,2,3,4>
+ 3361057810U, // <4,1,1,3>: Cost 4 vmrglw <0,u,4,1>, <4,2,1,3>
+ 3827303250U, // <4,1,1,4>: Cost 4 vsldoi12 <0,3,1,4>, <1,1,4,4>
+ 2287313234U, // <4,1,1,5>: Cost 3 vmrglw <0,u,4,1>, <0,4,1,5>
+ 3763709171U, // <4,1,1,6>: Cost 4 vsldoi8 <0,u,4,1>, <1,6,5,7>
+ 3361058138U, // <4,1,1,7>: Cost 4 vmrglw <0,u,4,1>, <4,6,1,7>
+ 2239759744U, // <4,1,1,u>: Cost 3 vmrghw <4,1,u,3>, <1,u,3,4>
+ 2637906022U, // <4,1,2,0>: Cost 3 vsldoi4 <3,4,1,2>, LHS
+ 2637906842U, // <4,1,2,1>: Cost 3 vsldoi4 <3,4,1,2>, <1,2,3,4>
+ 3763709544U, // <4,1,2,2>: Cost 4 vsldoi8 <0,u,4,1>, <2,2,2,2>
+ 1685275546U, // <4,1,2,3>: Cost 2 vsldoi12 <1,2,3,4>, <1,2,3,4>
+ 2637909302U, // <4,1,2,4>: Cost 3 vsldoi4 <3,4,1,2>, RHS
+ 3361063250U, // <4,1,2,5>: Cost 4 vmrglw <0,u,4,2>, <0,4,1,5>
+ 3763709882U, // <4,1,2,6>: Cost 4 vsldoi8 <0,u,4,1>, <2,6,3,7>
+ 3735541054U, // <4,1,2,7>: Cost 4 vsldoi4 <7,4,1,2>, <7,4,1,2>
+ 1685644231U, // <4,1,2,u>: Cost 2 vsldoi12 <1,2,u,4>, <1,2,u,4>
+ 2702575792U, // <4,1,3,0>: Cost 3 vsldoi8 <3,0,4,1>, <3,0,4,1>
+ 3832759257U, // <4,1,3,1>: Cost 4 vsldoi12 <1,2,3,4>, <1,3,1,4>
+ 3833349090U, // <4,1,3,2>: Cost 4 vsldoi12 <1,3,2,4>, <1,3,2,4>
+ 3763710364U, // <4,1,3,3>: Cost 4 vsldoi8 <0,u,4,1>, <3,3,3,3>
+ 2707884546U, // <4,1,3,4>: Cost 3 vsldoi8 <3,u,4,1>, <3,4,5,6>
+ 3361071442U, // <4,1,3,5>: Cost 4 vmrglw <0,u,4,3>, <0,4,1,5>
+ 3772336796U, // <4,1,3,6>: Cost 4 vsldoi8 <2,3,4,1>, <3,6,4,7>
+ 3775654595U, // <4,1,3,7>: Cost 5 vsldoi8 <2,u,4,1>, <3,7,0,1>
+ 2707884856U, // <4,1,3,u>: Cost 3 vsldoi8 <3,u,4,1>, <3,u,4,1>
+ 2667782246U, // <4,1,4,0>: Cost 3 vsldoi4 <u,4,1,4>, LHS
+ 2241463092U, // <4,1,4,1>: Cost 3 vmrghw <4,4,4,4>, <1,1,1,1>
+ 2241553306U, // <4,1,4,2>: Cost 3 vmrghw <4,4,5,6>, <1,2,3,4>
+ 3827303484U, // <4,1,4,3>: Cost 4 vsldoi12 <0,3,1,4>, <1,4,3,4>
+ 2667785424U, // <4,1,4,4>: Cost 3 vsldoi4 <u,4,1,4>, <4,4,4,4>
+ 2689969462U, // <4,1,4,5>: Cost 3 vsldoi8 <0,u,4,1>, RHS
+ 3763711322U, // <4,1,4,6>: Cost 4 vsldoi8 <0,u,4,1>, <4,6,1,7>
+ 3867116636U, // <4,1,4,7>: Cost 4 vsldoi12 <7,0,1,4>, <1,4,7,0>
+ 2689969705U, // <4,1,4,u>: Cost 3 vsldoi8 <0,u,4,1>, RHS
+ 1546273106U, // <4,1,5,0>: Cost 2 vsldoi4 <0,4,1,5>, <0,4,1,5>
+ 1168556852U, // <4,1,5,1>: Cost 2 vmrghw RHS, <1,1,1,1>
+ 1168556950U, // <4,1,5,2>: Cost 2 vmrghw RHS, <1,2,3,0>
+ 2620016790U, // <4,1,5,3>: Cost 3 vsldoi4 <0,4,1,5>, <3,0,1,2>
+ 1546276150U, // <4,1,5,4>: Cost 2 vsldoi4 <0,4,1,5>, RHS
+ 2620018692U, // <4,1,5,5>: Cost 3 vsldoi4 <0,4,1,5>, <5,5,5,5>
+ 2242299087U, // <4,1,5,6>: Cost 3 vmrghw RHS, <1,6,1,7>
+ 2667795450U, // <4,1,5,7>: Cost 3 vsldoi4 <u,4,1,5>, <7,0,1,2>
+ 1546278702U, // <4,1,5,u>: Cost 2 vsldoi4 <0,4,1,5>, LHS
+ 3781628193U, // <4,1,6,0>: Cost 4 vsldoi8 <3,u,4,1>, <6,0,1,2>
+ 3832759503U, // <4,1,6,1>: Cost 4 vsldoi12 <1,2,3,4>, <1,6,1,7>
+ 3316261786U, // <4,1,6,2>: Cost 4 vmrghw <4,6,0,7>, <1,2,3,4>
+ 3781628466U, // <4,1,6,3>: Cost 4 vsldoi8 <3,u,4,1>, <6,3,4,5>
+ 3827303658U, // <4,1,6,4>: Cost 4 vsldoi12 <0,3,1,4>, <1,6,4,7>
+ 3361096018U, // <4,1,6,5>: Cost 4 vmrglw <0,u,4,6>, <0,4,1,5>
+ 3788264248U, // <4,1,6,6>: Cost 4 vsldoi8 <5,0,4,1>, <6,6,6,6>
+ 3788264270U, // <4,1,6,7>: Cost 4 vsldoi8 <5,0,4,1>, <6,7,0,1>
+ 3832759566U, // <4,1,6,u>: Cost 4 vsldoi12 <1,2,3,4>, <1,6,u,7>
+ 2726466580U, // <4,1,7,0>: Cost 3 vsldoi8 <7,0,4,1>, <7,0,4,1>
+ 3377692682U, // <4,1,7,1>: Cost 4 vmrglw <3,6,4,7>, <0,0,1,1>
+ 3377694870U, // <4,1,7,2>: Cost 4 vmrglw <3,6,4,7>, <3,0,1,2>
+ 3802199303U, // <4,1,7,3>: Cost 4 vsldoi8 <7,3,4,1>, <7,3,4,1>
+ 2731775334U, // <4,1,7,4>: Cost 3 vsldoi8 <7,u,4,1>, <7,4,5,6>
+ 3377693010U, // <4,1,7,5>: Cost 4 vmrglw <3,6,4,7>, <0,4,1,5>
+ 3365749804U, // <4,1,7,6>: Cost 5 vmrglw <1,6,4,7>, <1,4,1,6>
+ 3788265068U, // <4,1,7,7>: Cost 4 vsldoi8 <5,0,4,1>, <7,7,7,7>
+ 2731775644U, // <4,1,7,u>: Cost 3 vsldoi8 <7,u,4,1>, <7,u,4,1>
+ 1546297685U, // <4,1,u,0>: Cost 2 vsldoi4 <0,4,1,u>, <0,4,1,u>
+ 1170547508U, // <4,1,u,1>: Cost 2 vmrghw RHS, <1,1,1,1>
+ 1170547606U, // <4,1,u,2>: Cost 2 vmrghw RHS, <1,2,3,0>
+ 1689257344U, // <4,1,u,3>: Cost 2 vsldoi12 <1,u,3,4>, <1,u,3,4>
+ 1546300726U, // <4,1,u,4>: Cost 2 vsldoi4 <0,4,1,u>, RHS
+ 2284716370U, // <4,1,u,5>: Cost 3 vmrglw <0,4,4,u>, <0,4,1,5>
+ 2244289743U, // <4,1,u,6>: Cost 3 vmrghw RHS, <1,6,1,7>
+ 2667820026U, // <4,1,u,7>: Cost 3 vsldoi4 <u,4,1,u>, <7,0,1,2>
+ 1546303278U, // <4,1,u,u>: Cost 2 vsldoi4 <0,4,1,u>, LHS
+ 3729621094U, // <4,2,0,0>: Cost 4 vsldoi4 <6,4,2,0>, LHS
+ 3763716198U, // <4,2,0,1>: Cost 4 vsldoi8 <0,u,4,2>, LHS
+ 2238858856U, // <4,2,0,2>: Cost 3 vmrghw <4,0,5,1>, <2,2,2,2>
+ 2295930982U, // <4,2,0,3>: Cost 3 vmrglw <2,3,4,0>, LHS
+ 3763716434U, // <4,2,0,4>: Cost 4 vsldoi8 <0,u,4,2>, <0,4,1,5>
+ 2238859107U, // <4,2,0,5>: Cost 3 vmrghw <4,0,5,1>, <2,5,3,1>
+ 2238859194U, // <4,2,0,6>: Cost 3 vmrghw <4,0,5,1>, <2,6,3,7>
+ 3312601066U, // <4,2,0,7>: Cost 4 vmrghw <4,0,5,1>, <2,7,0,1>
+ 2295930987U, // <4,2,0,u>: Cost 3 vmrglw <2,3,4,0>, LHS
+ 3699769446U, // <4,2,1,0>: Cost 4 vsldoi4 <1,4,2,1>, LHS
+ 3313255971U, // <4,2,1,1>: Cost 4 vmrghw <4,1,5,0>, <2,1,3,5>
+ 3361056360U, // <4,2,1,2>: Cost 4 vmrglw <0,u,4,1>, <2,2,2,2>
+ 2287312998U, // <4,2,1,3>: Cost 3 vmrglw <0,u,4,1>, LHS
+ 3788932148U, // <4,2,1,4>: Cost 4 vsldoi8 <5,1,4,2>, <1,4,2,5>
+ 3313256290U, // <4,2,1,5>: Cost 4 vmrghw <4,1,5,0>, <2,5,3,0>
+ 3838289469U, // <4,2,1,6>: Cost 4 vsldoi12 <2,1,6,4>, <2,1,6,4>
+ 3369682865U, // <4,2,1,7>: Cost 5 vmrglw <2,3,4,1>, <2,6,2,7>
+ 2287313003U, // <4,2,1,u>: Cost 3 vmrglw <0,u,4,1>, LHS
+ 3838658133U, // <4,2,2,0>: Cost 4 vsldoi12 <2,2,2,4>, <2,2,0,1>
+ 3711722394U, // <4,2,2,1>: Cost 4 vsldoi4 <3,4,2,2>, <1,2,3,4>
+ 2759018088U, // <4,2,2,2>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,2,2>
+ 2759018098U, // <4,2,2,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,3,3>
+ 3838658168U, // <4,2,2,4>: Cost 4 vsldoi12 <2,2,2,4>, <2,2,4,0>
+ 3369027341U, // <4,2,2,5>: Cost 4 vmrglw <2,2,4,2>, <2,4,2,5>
+ 2240227258U, // <4,2,2,6>: Cost 3 vmrghw <4,2,5,6>, <2,6,3,7>
+ 3735614791U, // <4,2,2,7>: Cost 4 vsldoi4 <7,4,2,2>, <7,4,2,2>
+ 2759018143U, // <4,2,2,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,u,3>
+ 2759018150U, // <4,2,3,0>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,0,1>
+ 3831948975U, // <4,2,3,1>: Cost 4 vsldoi12 <1,1,1,4>, <2,3,1,1>
+ 3832759993U, // <4,2,3,2>: Cost 4 vsldoi12 <1,2,3,4>, <2,3,2,2>
+ 2759018180U, // <4,2,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,3,4>
+ 2759018185U, // <4,2,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,4,0>
+ 3839542998U, // <4,2,3,5>: Cost 4 vsldoi12 <2,3,5,4>, <2,3,5,4>
+ 3314640826U, // <4,2,3,6>: Cost 4 vmrghw <4,3,5,7>, <2,6,3,7>
+ 2765948648U, // <4,2,3,7>: Cost 3 vsldoi12 <2,3,7,4>, <2,3,7,4>
+ 2759018222U, // <4,2,3,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,u,1>
+ 3838658295U, // <4,2,4,0>: Cost 4 vsldoi12 <2,2,2,4>, <2,4,0,1>
+ 3315205667U, // <4,2,4,1>: Cost 4 vmrghw <4,4,4,4>, <2,1,3,5>
+ 2241463912U, // <4,2,4,2>: Cost 3 vmrghw <4,4,4,4>, <2,2,2,2>
+ 1234829414U, // <4,2,4,3>: Cost 2 vmrglw <4,4,4,4>, LHS
+ 2241464085U, // <4,2,4,4>: Cost 3 vmrghw <4,4,4,4>, <2,4,3,4>
+ 2241546087U, // <4,2,4,5>: Cost 3 vmrghw <4,4,5,5>, <2,5,3,5>
+ 2241464250U, // <4,2,4,6>: Cost 3 vmrghw <4,4,4,4>, <2,6,3,7>
+ 3741602873U, // <4,2,4,7>: Cost 4 vsldoi4 <u,4,2,4>, <7,0,u,2>
+ 1234829419U, // <4,2,4,u>: Cost 2 vmrglw <4,4,4,4>, LHS
+ 2626060390U, // <4,2,5,0>: Cost 3 vsldoi4 <1,4,2,5>, LHS
+ 2626061364U, // <4,2,5,1>: Cost 3 vsldoi4 <1,4,2,5>, <1,4,2,5>
+ 1168557672U, // <4,2,5,2>: Cost 2 vmrghw RHS, <2,2,2,2>
+ 1222230118U, // <4,2,5,3>: Cost 2 vmrglw <2,3,4,5>, LHS
+ 2626063670U, // <4,2,5,4>: Cost 3 vsldoi4 <1,4,2,5>, RHS
+ 2242299752U, // <4,2,5,5>: Cost 3 vmrghw RHS, <2,5,3,6>
+ 1168558010U, // <4,2,5,6>: Cost 2 vmrghw RHS, <2,6,3,7>
+ 2242299882U, // <4,2,5,7>: Cost 3 vmrghw RHS, <2,7,0,1>
+ 1222230123U, // <4,2,5,u>: Cost 2 vmrglw <2,3,4,5>, LHS
+ 3711754342U, // <4,2,6,0>: Cost 4 vsldoi4 <3,4,2,6>, LHS
+ 3711755162U, // <4,2,6,1>: Cost 4 vsldoi4 <3,4,2,6>, <1,2,3,4>
+ 3838658481U, // <4,2,6,2>: Cost 4 vsldoi12 <2,2,2,4>, <2,6,2,7>
+ 2759018426U, // <4,2,6,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,6,3,7>
+ 3838658499U, // <4,2,6,4>: Cost 4 vsldoi12 <2,2,2,4>, <2,6,4,7>
+ 3735646310U, // <4,2,6,5>: Cost 4 vsldoi4 <7,4,2,6>, <5,6,7,4>
+ 3316590522U, // <4,2,6,6>: Cost 4 vmrghw <4,6,5,2>, <2,6,3,7>
+ 3798889331U, // <4,2,6,7>: Cost 4 vsldoi8 <6,7,4,2>, <6,7,4,2>
+ 2759018471U, // <4,2,6,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,6,u,7>
+ 3874564074U, // <4,2,7,0>: Cost 4 vsldoi12 <u,2,3,4>, <2,7,0,1>
+ 3800880230U, // <4,2,7,1>: Cost 4 vsldoi8 <7,1,4,2>, <7,1,4,2>
+ 3371722344U, // <4,2,7,2>: Cost 4 vmrglw <2,6,4,7>, <2,2,2,2>
+ 2303950950U, // <4,2,7,3>: Cost 3 vmrglw <3,6,4,7>, LHS
+ 3371722346U, // <4,2,7,4>: Cost 4 vmrglw <2,6,4,7>, <2,2,2,4>
+ 3371722509U, // <4,2,7,5>: Cost 5 vmrglw <2,6,4,7>, <2,4,2,5>
+ 3317237690U, // <4,2,7,6>: Cost 4 vmrghw <4,7,5,0>, <2,6,3,7>
+ 3317237738U, // <4,2,7,7>: Cost 4 vmrghw <4,7,5,0>, <2,7,0,1>
+ 2303950955U, // <4,2,7,u>: Cost 3 vmrglw <3,6,4,7>, LHS
+ 2759018555U, // <4,2,u,0>: Cost 3 vsldoi12 <1,2,3,4>, <2,u,0,1>
+ 2626085943U, // <4,2,u,1>: Cost 3 vsldoi4 <1,4,2,u>, <1,4,2,u>
+ 1170548328U, // <4,2,u,2>: Cost 2 vmrghw RHS, <2,2,2,2>
+ 1222254694U, // <4,2,u,3>: Cost 2 vmrglw <2,3,4,u>, LHS
+ 2759018595U, // <4,2,u,4>: Cost 3 vsldoi12 <1,2,3,4>, <2,u,4,5>
+ 2244290408U, // <4,2,u,5>: Cost 3 vmrghw RHS, <2,5,3,6>
+ 1170548666U, // <4,2,u,6>: Cost 2 vmrghw RHS, <2,6,3,7>
+ 2769266813U, // <4,2,u,7>: Cost 3 vsldoi12 <2,u,7,4>, <2,u,7,4>
+ 1222254699U, // <4,2,u,u>: Cost 2 vmrglw <2,3,4,u>, LHS
+ 2238859414U, // <4,3,0,0>: Cost 3 vmrghw <4,0,5,1>, <3,0,1,2>
+ 2759018646U, // <4,3,0,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,0,1,2>
+ 3312314708U, // <4,3,0,2>: Cost 4 vmrghw <4,0,1,2>, <3,2,4,3>
+ 2238859676U, // <4,3,0,3>: Cost 3 vmrghw <4,0,5,1>, <3,3,3,3>
+ 2295931802U, // <4,3,0,4>: Cost 3 vmrglw <2,3,4,0>, <1,2,3,4>
+ 3735670886U, // <4,3,0,5>: Cost 4 vsldoi4 <7,4,3,0>, <5,6,7,4>
+ 3312315036U, // <4,3,0,6>: Cost 4 vmrghw <4,0,1,2>, <3,6,4,7>
+ 3369674682U, // <4,3,0,7>: Cost 4 vmrglw <2,3,4,0>, <2,6,3,7>
+ 2759018709U, // <4,3,0,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,0,u,2>
+ 3361055638U, // <4,3,1,0>: Cost 4 vmrglw <0,u,4,1>, <1,2,3,0>
+ 3831949542U, // <4,3,1,1>: Cost 4 vsldoi12 <1,1,1,4>, <3,1,1,1>
+ 2703917978U, // <4,3,1,2>: Cost 3 vsldoi8 <3,2,4,3>, <1,2,3,4>
+ 3361056370U, // <4,3,1,3>: Cost 4 vmrglw <0,u,4,1>, <2,2,3,3>
+ 2295939994U, // <4,3,1,4>: Cost 3 vmrglw <2,3,4,1>, <1,2,3,4>
+ 3361056291U, // <4,3,1,5>: Cost 4 vmrglw <0,u,4,1>, <2,1,3,5>
+ 3378972520U, // <4,3,1,6>: Cost 4 vmrglw <3,u,4,1>, <2,5,3,6>
+ 3361056698U, // <4,3,1,7>: Cost 4 vmrglw <0,u,4,1>, <2,6,3,7>
+ 2703917978U, // <4,3,1,u>: Cost 3 vsldoi8 <3,2,4,3>, <1,2,3,4>
+ 3832760624U, // <4,3,2,0>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,0,3>
+ 3711796122U, // <4,3,2,1>: Cost 4 vsldoi4 <3,4,3,2>, <1,2,3,4>
+ 3832760641U, // <4,3,2,2>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,2,2>
+ 2770962764U, // <4,3,2,3>: Cost 3 vsldoi12 <3,2,3,4>, <3,2,3,4>
+ 2759018836U, // <4,3,2,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,2,4,3>
+ 3827304802U, // <4,3,2,5>: Cost 5 vsldoi12 <0,3,1,4>, <3,2,5,u>
+ 3832760678U, // <4,3,2,6>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,6,3>
+ 3859597679U, // <4,3,2,7>: Cost 4 vsldoi12 <5,6,7,4>, <3,2,7,3>
+ 2771331449U, // <4,3,2,u>: Cost 3 vsldoi12 <3,2,u,4>, <3,2,u,4>
+ 2240841878U, // <4,3,3,0>: Cost 3 vmrghw <4,3,5,0>, <3,0,1,2>
+ 3776997635U, // <4,3,3,1>: Cost 4 vsldoi8 <3,1,4,3>, <3,1,4,3>
+ 2703919444U, // <4,3,3,2>: Cost 3 vsldoi8 <3,2,4,3>, <3,2,4,3>
+ 2759018908U, // <4,3,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <3,3,3,3>
+ 2759018918U, // <4,3,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,3,4,4>
+ 3386951446U, // <4,3,3,5>: Cost 4 vmrglw <5,2,4,3>, <2,4,3,5>
+ 3777661596U, // <4,3,3,6>: Cost 4 vsldoi8 <3,2,4,3>, <3,6,4,7>
+ 3375007674U, // <4,3,3,7>: Cost 4 vmrglw <3,2,4,3>, <2,6,3,7>
+ 2707901242U, // <4,3,3,u>: Cost 3 vsldoi8 <3,u,4,3>, <3,u,4,3>
+ 2759018960U, // <4,3,4,0>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,0,1>
+ 2759018970U, // <4,3,4,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,1,2>
+ 2632099605U, // <4,3,4,2>: Cost 3 vsldoi4 <2,4,3,4>, <2,4,3,4>
+ 2241464732U, // <4,3,4,3>: Cost 3 vmrghw <4,4,4,4>, <3,3,3,3>
+ 2759019000U, // <4,3,4,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,4,5>
+ 2753563138U, // <4,3,4,5>: Cost 3 vsldoi12 <0,3,1,4>, <3,4,5,6>
+ 3777662316U, // <4,3,4,6>: Cost 4 vsldoi8 <3,2,4,3>, <4,6,3,7>
+ 2308573114U, // <4,3,4,7>: Cost 3 vmrglw <4,4,4,4>, <2,6,3,7>
+ 2759019032U, // <4,3,4,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,u,1>
+ 1168558230U, // <4,3,5,0>: Cost 2 vmrghw RHS, <3,0,1,2>
+ 2242300134U, // <4,3,5,1>: Cost 3 vmrghw RHS, <3,1,1,1>
+ 2632107798U, // <4,3,5,2>: Cost 3 vsldoi4 <2,4,3,5>, <2,4,3,5>
+ 1168558492U, // <4,3,5,3>: Cost 2 vmrghw RHS, <3,3,3,3>
+ 1168558594U, // <4,3,5,4>: Cost 2 vmrghw RHS, <3,4,5,6>
+ 2295973654U, // <4,3,5,5>: Cost 3 vmrglw <2,3,4,5>, <2,4,3,5>
+ 2242300536U, // <4,3,5,6>: Cost 3 vmrghw RHS, <3,6,0,7>
+ 2295973818U, // <4,3,5,7>: Cost 3 vmrglw <2,3,4,5>, <2,6,3,7>
+ 1168558878U, // <4,3,5,u>: Cost 2 vmrghw RHS, <3,u,1,2>
+ 3832760952U, // <4,3,6,0>: Cost 4 vsldoi12 <1,2,3,4>, <3,6,0,7>
+ 3711828890U, // <4,3,6,1>: Cost 4 vsldoi4 <3,4,3,6>, <1,2,3,4>
+ 3316484436U, // <4,3,6,2>: Cost 4 vmrghw <4,6,3,7>, <3,2,4,3>
+ 3711830512U, // <4,3,6,3>: Cost 4 vsldoi4 <3,4,3,6>, <3,4,3,6>
+ 2759019164U, // <4,3,6,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,6,4,7>
+ 3361097251U, // <4,3,6,5>: Cost 5 vmrglw <0,u,4,6>, <2,1,3,5>
+ 3316624045U, // <4,3,6,6>: Cost 4 vmrghw <4,6,5,6>, <3,6,6,6>
+ 2773912244U, // <4,3,6,7>: Cost 3 vsldoi12 <3,6,7,4>, <3,6,7,4>
+ 2759019164U, // <4,3,6,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,6,4,7>
+ 3377693590U, // <4,3,7,0>: Cost 4 vmrglw <3,6,4,7>, <1,2,3,0>
+ 3365751680U, // <4,3,7,1>: Cost 5 vmrglw <1,6,4,7>, <4,0,3,1>
+ 2727810232U, // <4,3,7,2>: Cost 3 vsldoi8 <7,2,4,3>, <7,2,4,3>
+ 3377694322U, // <4,3,7,3>: Cost 4 vmrglw <3,6,4,7>, <2,2,3,3>
+ 2303951770U, // <4,3,7,4>: Cost 3 vmrglw <3,6,4,7>, <1,2,3,4>
+ 3741700198U, // <4,3,7,5>: Cost 4 vsldoi4 <u,4,3,7>, <5,6,7,4>
+ 3377695216U, // <4,3,7,6>: Cost 4 vmrglw <3,6,4,7>, <3,4,3,6>
+ 3375703994U, // <4,3,7,7>: Cost 4 vmrglw <3,3,4,7>, <2,6,3,7>
+ 2731792030U, // <4,3,7,u>: Cost 3 vsldoi8 <7,u,4,3>, <7,u,4,3>
+ 1170548886U, // <4,3,u,0>: Cost 2 vmrghw RHS, <3,0,1,2>
+ 2759019294U, // <4,3,u,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,u,1,2>
+ 2632132377U, // <4,3,u,2>: Cost 3 vsldoi4 <2,4,3,u>, <2,4,3,u>
+ 1170549148U, // <4,3,u,3>: Cost 2 vmrghw RHS, <3,3,3,3>
+ 1170549250U, // <4,3,u,4>: Cost 2 vmrghw RHS, <3,4,5,6>
+ 2759019334U, // <4,3,u,5>: Cost 3 vsldoi12 <1,2,3,4>, <3,u,5,6>
+ 2244291192U, // <4,3,u,6>: Cost 3 vmrghw RHS, <3,6,0,7>
+ 2295998394U, // <4,3,u,7>: Cost 3 vmrglw <2,3,4,u>, <2,6,3,7>
+ 1170549534U, // <4,3,u,u>: Cost 2 vmrghw RHS, <3,u,1,2>
+ 1165118354U, // <4,4,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1>
+ 1637482598U, // <4,4,0,1>: Cost 2 vsldoi8 <4,4,4,4>, LHS
+ 3711854285U, // <4,4,0,2>: Cost 4 vsldoi4 <3,4,4,0>, <2,3,4,4>
+ 3827305344U, // <4,4,0,3>: Cost 4 vsldoi12 <0,3,1,4>, <4,0,3,1>
+ 2711224658U, // <4,4,0,4>: Cost 3 vsldoi8 <4,4,4,4>, <0,4,1,5>
+ 1165118774U, // <4,4,0,5>: Cost 2 vmrghw <4,0,5,1>, RHS
+ 3312602489U, // <4,4,0,6>: Cost 4 vmrghw <4,0,5,1>, <4,6,5,2>
+ 3369675420U, // <4,4,0,7>: Cost 4 vmrglw <2,3,4,0>, <3,6,4,7>
+ 1165119017U, // <4,4,0,u>: Cost 2 vmrghw <4,0,5,1>, RHS
+ 3369682633U, // <4,4,1,0>: Cost 4 vmrglw <2,3,4,1>, <2,3,4,0>
+ 2287313581U, // <4,4,1,1>: Cost 3 vmrglw <0,u,4,1>, <0,u,4,1>
+ 2759019466U, // <4,4,1,2>: Cost 3 vsldoi12 <1,2,3,4>, <4,1,2,3>
+ 3369683284U, // <4,4,1,3>: Cost 4 vmrglw <2,3,4,1>, <3,2,4,3>
+ 2311204048U, // <4,4,1,4>: Cost 3 vmrglw <4,u,4,1>, <4,4,4,4>
+ 2239319350U, // <4,4,1,5>: Cost 3 vmrghw <4,1,2,3>, RHS
+ 3784967411U, // <4,4,1,6>: Cost 4 vsldoi8 <4,4,4,4>, <1,6,5,7>
+ 3369683612U, // <4,4,1,7>: Cost 4 vmrglw <2,3,4,1>, <3,6,4,7>
+ 2763000832U, // <4,4,1,u>: Cost 3 vsldoi12 <1,u,3,4>, <4,1,u,3>
+ 3711869030U, // <4,4,2,0>: Cost 4 vsldoi4 <3,4,4,2>, LHS
+ 3711869850U, // <4,4,2,1>: Cost 4 vsldoi4 <3,4,4,2>, <1,2,3,4>
+ 2240203830U, // <4,4,2,2>: Cost 3 vmrghw <4,2,5,3>, <4,2,5,3>
+ 2698618573U, // <4,4,2,3>: Cost 3 vsldoi8 <2,3,4,4>, <2,3,4,4>
+ 2711226133U, // <4,4,2,4>: Cost 3 vsldoi8 <4,4,4,4>, <2,4,3,4>
+ 2240204086U, // <4,4,2,5>: Cost 3 vmrghw <4,2,5,3>, RHS
+ 2711226298U, // <4,4,2,6>: Cost 3 vsldoi8 <4,4,4,4>, <2,6,3,7>
+ 3832761416U, // <4,4,2,7>: Cost 4 vsldoi12 <1,2,3,4>, <4,2,7,3>
+ 2701936738U, // <4,4,2,u>: Cost 3 vsldoi8 <2,u,4,4>, <2,u,4,4>
+ 2711226518U, // <4,4,3,0>: Cost 3 vsldoi8 <4,4,4,4>, <3,0,1,2>
+ 3777005828U, // <4,4,3,1>: Cost 4 vsldoi8 <3,1,4,4>, <3,1,4,4>
+ 3832761453U, // <4,4,3,2>: Cost 4 vsldoi12 <1,2,3,4>, <4,3,2,4>
+ 2301266260U, // <4,4,3,3>: Cost 3 vmrglw <3,2,4,3>, <3,2,4,3>
+ 2705254903U, // <4,4,3,4>: Cost 3 vsldoi8 <3,4,4,4>, <3,4,4,4>
+ 2240843062U, // <4,4,3,5>: Cost 3 vmrghw <4,3,5,0>, RHS
+ 3832761489U, // <4,4,3,6>: Cost 4 vsldoi12 <1,2,3,4>, <4,3,6,4>
+ 3375008412U, // <4,4,3,7>: Cost 4 vmrglw <3,2,4,3>, <3,6,4,7>
+ 2301266260U, // <4,4,3,u>: Cost 3 vmrglw <3,2,4,3>, <3,2,4,3>
+ 1570373734U, // <4,4,4,0>: Cost 2 vsldoi4 <4,4,4,4>, LHS
+ 2308574089U, // <4,4,4,1>: Cost 3 vmrglw <4,4,4,4>, <4,0,4,1>
+ 2644117096U, // <4,4,4,2>: Cost 3 vsldoi4 <4,4,4,4>, <2,2,2,2>
+ 2638146039U, // <4,4,4,3>: Cost 3 vsldoi4 <3,4,4,4>, <3,4,4,4>
+ 229035318U, // <4,4,4,4>: Cost 1 vspltisw0 RHS
+ 1167723830U, // <4,4,4,5>: Cost 2 vmrghw <4,4,4,4>, RHS
+ 2644120058U, // <4,4,4,6>: Cost 3 vsldoi4 <4,4,4,4>, <6,2,7,3>
+ 2662036827U, // <4,4,4,7>: Cost 3 vsldoi4 <7,4,4,4>, <7,4,4,4>
+ 229035318U, // <4,4,4,u>: Cost 1 vspltisw0 RHS
+ 1168558994U, // <4,4,5,0>: Cost 2 vmrghw RHS, <4,0,5,1>
+ 2638152602U, // <4,4,5,1>: Cost 3 vsldoi4 <3,4,4,5>, <1,2,3,4>
+ 2242300981U, // <4,4,5,2>: Cost 3 vmrghw RHS, <4,2,5,2>
+ 2638154232U, // <4,4,5,3>: Cost 3 vsldoi4 <3,4,4,5>, <3,4,4,5>
+ 1168559322U, // <4,4,5,4>: Cost 2 vmrghw RHS, <4,4,5,5>
+ 94817590U, // <4,4,5,5>: Cost 1 vmrghw RHS, RHS
+ 1685278006U, // <4,4,5,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+ 2242309576U, // <4,4,5,7>: Cost 3 vmrghw RHS, <4,7,5,0>
+ 94817833U, // <4,4,5,u>: Cost 1 vmrghw RHS, RHS
+ 3316591506U, // <4,4,6,0>: Cost 4 vmrghw <4,6,5,2>, <4,0,5,1>
+ 3758428587U, // <4,4,6,1>: Cost 4 vsldoi8 <0,0,4,4>, <6,1,7,5>
+ 2711228922U, // <4,4,6,2>: Cost 3 vsldoi8 <4,4,4,4>, <6,2,7,3>
+ 3796251185U, // <4,4,6,3>: Cost 4 vsldoi8 <6,3,4,4>, <6,3,4,4>
+ 2711229085U, // <4,4,6,4>: Cost 3 vsldoi8 <4,4,4,4>, <6,4,7,4>
+ 2242850102U, // <4,4,6,5>: Cost 3 vmrghw <4,6,5,2>, RHS
+ 2242850169U, // <4,4,6,6>: Cost 3 vmrghw <4,6,5,2>, <4,6,5,2>
+ 2725163893U, // <4,4,6,7>: Cost 3 vsldoi8 <6,7,4,4>, <6,7,4,4>
+ 2242850345U, // <4,4,6,u>: Cost 3 vmrghw <4,6,5,2>, RHS
+ 2711229434U, // <4,4,7,0>: Cost 3 vsldoi8 <4,4,4,4>, <7,0,1,2>
+ 3377694410U, // <4,4,7,1>: Cost 4 vmrglw <3,6,4,7>, <2,3,4,1>
+ 3868593584U, // <4,4,7,2>: Cost 4 vsldoi12 <7,2,3,4>, <4,7,2,3>
+ 3377695060U, // <4,4,7,3>: Cost 4 vmrglw <3,6,4,7>, <3,2,4,3>
+ 2729145691U, // <4,4,7,4>: Cost 3 vsldoi8 <7,4,4,4>, <7,4,4,4>
+ 2243497270U, // <4,4,7,5>: Cost 3 vmrghw <4,7,5,0>, RHS
+ 3871542744U, // <4,4,7,6>: Cost 4 vsldoi12 <7,6,7,4>, <4,7,6,7>
+ 2303953564U, // <4,4,7,7>: Cost 3 vmrglw <3,6,4,7>, <3,6,4,7>
+ 2243497513U, // <4,4,7,u>: Cost 3 vmrghw <4,7,5,0>, RHS
+ 1170549650U, // <4,4,u,0>: Cost 2 vmrghw RHS, <4,0,5,1>
+ 1637488430U, // <4,4,u,1>: Cost 2 vsldoi8 <4,4,4,4>, LHS
+ 2244291637U, // <4,4,u,2>: Cost 3 vmrghw RHS, <4,2,5,2>
+ 2638178811U, // <4,4,u,3>: Cost 3 vsldoi4 <3,4,4,u>, <3,4,4,u>
+ 229035318U, // <4,4,u,4>: Cost 1 vspltisw0 RHS
+ 96808246U, // <4,4,u,5>: Cost 1 vmrghw RHS, RHS
+ 1685278249U, // <4,4,u,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+ 2244292040U, // <4,4,u,7>: Cost 3 vmrghw RHS, <4,7,5,0>
+ 96808489U, // <4,4,u,u>: Cost 1 vmrghw RHS, RHS
+ 2698625024U, // <4,5,0,0>: Cost 3 vsldoi8 <2,3,4,5>, <0,0,0,0>
+ 1624883302U, // <4,5,0,1>: Cost 2 vsldoi8 <2,3,4,5>, LHS
+ 2638186190U, // <4,5,0,2>: Cost 3 vsldoi4 <3,4,5,0>, <2,3,4,5>
+ 2638187004U, // <4,5,0,3>: Cost 3 vsldoi4 <3,4,5,0>, <3,4,5,0>
+ 2687345005U, // <4,5,0,4>: Cost 3 vsldoi8 <0,4,4,5>, <0,4,4,5>
+ 2238861316U, // <4,5,0,5>: Cost 3 vmrghw <4,0,5,1>, <5,5,5,5>
+ 2662077302U, // <4,5,0,6>: Cost 3 vsldoi4 <7,4,5,0>, <6,7,4,5>
+ 2662077792U, // <4,5,0,7>: Cost 3 vsldoi4 <7,4,5,0>, <7,4,5,0>
+ 1624883869U, // <4,5,0,u>: Cost 2 vsldoi8 <2,3,4,5>, LHS
+ 3361057762U, // <4,5,1,0>: Cost 4 vmrglw <0,u,4,1>, <4,1,5,0>
+ 2691326803U, // <4,5,1,1>: Cost 3 vsldoi8 <1,1,4,5>, <1,1,4,5>
+ 2698625942U, // <4,5,1,2>: Cost 3 vsldoi8 <2,3,4,5>, <1,2,3,0>
+ 3361055659U, // <4,5,1,3>: Cost 4 vmrglw <0,u,4,1>, <1,2,5,3>
+ 3761087567U, // <4,5,1,4>: Cost 4 vsldoi8 <0,4,4,5>, <1,4,5,5>
+ 2693981335U, // <4,5,1,5>: Cost 3 vsldoi8 <1,5,4,5>, <1,5,4,5>
+ 2305231362U, // <4,5,1,6>: Cost 3 vmrglw <3,u,4,1>, <3,4,5,6>
+ 3361055987U, // <4,5,1,7>: Cost 4 vmrglw <0,u,4,1>, <1,6,5,7>
+ 2695972234U, // <4,5,1,u>: Cost 3 vsldoi8 <1,u,4,5>, <1,u,4,5>
+ 2638200934U, // <4,5,2,0>: Cost 3 vsldoi4 <3,4,5,2>, LHS
+ 3761088035U, // <4,5,2,1>: Cost 4 vsldoi8 <0,4,4,5>, <2,1,3,5>
+ 2697963133U, // <4,5,2,2>: Cost 3 vsldoi8 <2,2,4,5>, <2,2,4,5>
+ 1624884942U, // <4,5,2,3>: Cost 2 vsldoi8 <2,3,4,5>, <2,3,4,5>
+ 2698626838U, // <4,5,2,4>: Cost 3 vsldoi8 <2,3,4,5>, <2,4,3,5>
+ 3772368744U, // <4,5,2,5>: Cost 4 vsldoi8 <2,3,4,5>, <2,5,3,6>
+ 2698627002U, // <4,5,2,6>: Cost 3 vsldoi8 <2,3,4,5>, <2,6,3,7>
+ 3775023122U, // <4,5,2,7>: Cost 4 vsldoi8 <2,7,4,5>, <2,7,4,5>
+ 1628203107U, // <4,5,2,u>: Cost 2 vsldoi8 <2,u,4,5>, <2,u,4,5>
+ 2698627222U, // <4,5,3,0>: Cost 3 vsldoi8 <2,3,4,5>, <3,0,1,2>
+ 3765070057U, // <4,5,3,1>: Cost 4 vsldoi8 <1,1,4,5>, <3,1,1,4>
+ 2698627404U, // <4,5,3,2>: Cost 3 vsldoi8 <2,3,4,5>, <3,2,3,4>
+ 2698627484U, // <4,5,3,3>: Cost 3 vsldoi8 <2,3,4,5>, <3,3,3,3>
+ 2698627580U, // <4,5,3,4>: Cost 3 vsldoi8 <2,3,4,5>, <3,4,5,0>
+ 3779668553U, // <4,5,3,5>: Cost 4 vsldoi8 <3,5,4,5>, <3,5,4,5>
+ 2725169844U, // <4,5,3,6>: Cost 3 vsldoi8 <6,7,4,5>, <3,6,7,4>
+ 2707253995U, // <4,5,3,7>: Cost 3 vsldoi8 <3,7,4,5>, <3,7,4,5>
+ 2698627870U, // <4,5,3,u>: Cost 3 vsldoi8 <2,3,4,5>, <3,u,1,2>
+ 2638217318U, // <4,5,4,0>: Cost 3 vsldoi4 <3,4,5,4>, LHS
+ 2308574098U, // <4,5,4,1>: Cost 3 vmrglw <4,4,4,4>, <4,0,5,1>
+ 2698628150U, // <4,5,4,2>: Cost 3 vsldoi8 <2,3,4,5>, <4,2,5,3>
+ 2638219776U, // <4,5,4,3>: Cost 3 vsldoi4 <3,4,5,4>, <3,4,5,4>
+ 2698628314U, // <4,5,4,4>: Cost 3 vsldoi8 <2,3,4,5>, <4,4,5,5>
+ 1624886582U, // <4,5,4,5>: Cost 2 vsldoi8 <2,3,4,5>, RHS
+ 2698628478U, // <4,5,4,6>: Cost 3 vsldoi8 <2,3,4,5>, <4,6,5,7>
+ 2662110564U, // <4,5,4,7>: Cost 3 vsldoi4 <7,4,5,4>, <7,4,5,4>
+ 1624886825U, // <4,5,4,u>: Cost 2 vsldoi8 <2,3,4,5>, RHS
+ 1570455654U, // <4,5,5,0>: Cost 2 vsldoi4 <4,4,5,5>, LHS
+ 2312564250U, // <4,5,5,1>: Cost 3 vmrglw <5,1,4,5>, <4,u,5,1>
+ 2644199118U, // <4,5,5,2>: Cost 3 vsldoi4 <4,4,5,5>, <2,3,4,5>
+ 2295974966U, // <4,5,5,3>: Cost 3 vmrglw <2,3,4,5>, <4,2,5,3>
+ 1570458842U, // <4,5,5,4>: Cost 2 vsldoi4 <4,4,5,5>, <4,4,5,5>
+ 1168568324U, // <4,5,5,5>: Cost 2 vmrghw RHS, <5,5,5,5>
+ 1168568418U, // <4,5,5,6>: Cost 2 vmrghw RHS, <5,6,7,0>
+ 2295975294U, // <4,5,5,7>: Cost 3 vmrglw <2,3,4,5>, <4,6,5,7>
+ 1168716036U, // <4,5,5,u>: Cost 2 vmrghw RHS, <5,u,7,0>
+ 1564491878U, // <4,5,6,0>: Cost 2 vsldoi4 <3,4,5,6>, LHS
+ 2626290768U, // <4,5,6,1>: Cost 3 vsldoi4 <1,4,5,6>, <1,4,5,6>
+ 2632263465U, // <4,5,6,2>: Cost 3 vsldoi4 <2,4,5,6>, <2,4,5,6>
+ 1564494338U, // <4,5,6,3>: Cost 2 vsldoi4 <3,4,5,6>, <3,4,5,6>
+ 1564495158U, // <4,5,6,4>: Cost 2 vsldoi4 <3,4,5,6>, RHS
+ 2638237464U, // <4,5,6,5>: Cost 3 vsldoi4 <3,4,5,6>, <5,2,6,3>
+ 2656154253U, // <4,5,6,6>: Cost 3 vsldoi4 <6,4,5,6>, <6,4,5,6>
+ 27705344U, // <4,5,6,7>: Cost 0 copy RHS
+ 27705344U, // <4,5,6,u>: Cost 0 copy RHS
+ 2725172218U, // <4,5,7,0>: Cost 3 vsldoi8 <6,7,4,5>, <7,0,1,2>
+ 3859599489U, // <4,5,7,1>: Cost 4 vsldoi12 <5,6,7,4>, <5,7,1,4>
+ 2698630320U, // <4,5,7,2>: Cost 3 vsldoi8 <2,3,4,5>, <7,2,3,4>
+ 2728490251U, // <4,5,7,3>: Cost 3 vsldoi8 <7,3,4,5>, <7,3,4,5>
+ 2725172576U, // <4,5,7,4>: Cost 3 vsldoi8 <6,7,4,5>, <7,4,5,0>
+ 3317239812U, // <4,5,7,5>: Cost 4 vmrghw <4,7,5,0>, <5,5,5,5>
+ 2725172760U, // <4,5,7,6>: Cost 3 vsldoi8 <6,7,4,5>, <7,6,7,4>
+ 2725172844U, // <4,5,7,7>: Cost 3 vsldoi8 <6,7,4,5>, <7,7,7,7>
+ 2725172866U, // <4,5,7,u>: Cost 3 vsldoi8 <6,7,4,5>, <7,u,1,2>
+ 1564508262U, // <4,5,u,0>: Cost 2 vsldoi4 <3,4,5,u>, LHS
+ 1624889134U, // <4,5,u,1>: Cost 2 vsldoi8 <2,3,4,5>, LHS
+ 2698631045U, // <4,5,u,2>: Cost 3 vsldoi8 <2,3,4,5>, <u,2,3,0>
+ 1564510724U, // <4,5,u,3>: Cost 2 vsldoi4 <3,4,5,u>, <3,4,5,u>
+ 1564511542U, // <4,5,u,4>: Cost 2 vsldoi4 <3,4,5,u>, RHS
+ 1624889498U, // <4,5,u,5>: Cost 2 vsldoi8 <2,3,4,5>, RHS
+ 1170550882U, // <4,5,u,6>: Cost 2 vmrghw RHS, <5,6,7,0>
+ 27705344U, // <4,5,u,7>: Cost 0 copy RHS
+ 27705344U, // <4,5,u,u>: Cost 0 copy RHS
+ 3312595285U, // <4,6,0,0>: Cost 4 vmrghw <4,0,5,0>, <6,0,7,0>
+ 3763748966U, // <4,6,0,1>: Cost 4 vsldoi8 <0,u,4,6>, LHS
+ 2238861818U, // <4,6,0,2>: Cost 3 vmrghw <4,0,5,1>, <6,2,7,3>
+ 3767730432U, // <4,6,0,3>: Cost 4 vsldoi8 <1,5,4,6>, <0,3,1,4>
+ 3763749202U, // <4,6,0,4>: Cost 4 vsldoi8 <0,u,4,6>, <0,4,1,5>
+ 2238862059U, // <4,6,0,5>: Cost 3 vmrghw <4,0,5,1>, <6,5,7,1>
+ 2238862136U, // <4,6,0,6>: Cost 3 vmrghw <4,0,5,1>, <6,6,6,6>
+ 2295934262U, // <4,6,0,7>: Cost 3 vmrglw <2,3,4,0>, RHS
+ 2295934263U, // <4,6,0,u>: Cost 3 vmrglw <2,3,4,0>, RHS
+ 3378973999U, // <4,6,1,0>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,0>
+ 3378974648U, // <4,6,1,1>: Cost 4 vmrglw <3,u,4,1>, <5,4,6,1>
+ 3779675034U, // <4,6,1,2>: Cost 4 vsldoi8 <3,5,4,6>, <1,2,3,4>
+ 3378974002U, // <4,6,1,3>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,3>
+ 3378974003U, // <4,6,1,4>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,4>
+ 3767731352U, // <4,6,1,5>: Cost 4 vsldoi8 <1,5,4,6>, <1,5,4,6>
+ 3378974734U, // <4,6,1,6>: Cost 4 vmrglw <3,u,4,1>, <5,5,6,6>
+ 2287316278U, // <4,6,1,7>: Cost 3 vmrglw <0,u,4,1>, RHS
+ 2287316279U, // <4,6,1,u>: Cost 3 vmrglw <0,u,4,1>, RHS
+ 3735904358U, // <4,6,2,0>: Cost 4 vsldoi4 <7,4,6,2>, LHS
+ 3763750435U, // <4,6,2,1>: Cost 5 vsldoi8 <0,u,4,6>, <2,1,3,5>
+ 3313938937U, // <4,6,2,2>: Cost 4 vmrghw <4,2,5,2>, <6,2,7,2>
+ 3772376782U, // <4,6,2,3>: Cost 4 vsldoi8 <2,3,4,6>, <2,3,4,5>
+ 3852890591U, // <4,6,2,4>: Cost 4 vsldoi12 <4,5,6,4>, <6,2,4,3>
+ 3735908454U, // <4,6,2,5>: Cost 4 vsldoi4 <7,4,6,2>, <5,6,7,4>
+ 3801573306U, // <4,6,2,6>: Cost 4 vsldoi8 <7,2,4,6>, <2,6,3,7>
+ 2785858042U, // <4,6,2,7>: Cost 3 vsldoi12 <5,6,7,4>, <6,2,7,3>
+ 2785858051U, // <4,6,2,u>: Cost 3 vsldoi12 <5,6,7,4>, <6,2,u,3>
+ 3863065101U, // <4,6,3,0>: Cost 4 vsldoi12 <6,3,0,4>, <6,3,0,4>
+ 3314586024U, // <4,6,3,1>: Cost 4 vmrghw <4,3,5,0>, <6,1,7,2>
+ 3863212575U, // <4,6,3,2>: Cost 4 vsldoi12 <6,3,2,4>, <6,3,2,4>
+ 3863286312U, // <4,6,3,3>: Cost 4 vsldoi12 <6,3,3,4>, <6,3,3,4>
+ 3767732738U, // <4,6,3,4>: Cost 4 vsldoi8 <1,5,4,6>, <3,4,5,6>
+ 3779676746U, // <4,6,3,5>: Cost 4 vsldoi8 <3,5,4,6>, <3,5,4,6>
+ 3398898488U, // <4,6,3,6>: Cost 4 vmrglw <7,2,4,3>, <6,6,6,6>
+ 2301267254U, // <4,6,3,7>: Cost 3 vmrglw <3,2,4,3>, RHS
+ 2301267255U, // <4,6,3,u>: Cost 3 vmrglw <3,2,4,3>, RHS
+ 3852890715U, // <4,6,4,0>: Cost 4 vsldoi12 <4,5,6,4>, <6,4,0,1>
+ 3315208615U, // <4,6,4,1>: Cost 4 vmrghw <4,4,4,4>, <6,1,7,1>
+ 2241466874U, // <4,6,4,2>: Cost 3 vmrghw <4,4,4,4>, <6,2,7,3>
+ 3852890745U, // <4,6,4,3>: Cost 4 vsldoi12 <4,5,6,4>, <6,4,3,4>
+ 2241467037U, // <4,6,4,4>: Cost 3 vmrghw <4,4,4,4>, <6,4,7,4>
+ 2241549039U, // <4,6,4,5>: Cost 3 vmrghw <4,4,5,5>, <6,5,7,5>
+ 2241467192U, // <4,6,4,6>: Cost 3 vmrghw <4,4,4,4>, <6,6,6,6>
+ 1234832694U, // <4,6,4,7>: Cost 2 vmrglw <4,4,4,4>, RHS
+ 1234832695U, // <4,6,4,u>: Cost 2 vmrglw <4,4,4,4>, RHS
+ 2242302241U, // <4,6,5,0>: Cost 3 vmrghw RHS, <6,0,1,2>
+ 2242310567U, // <4,6,5,1>: Cost 3 vmrghw RHS, <6,1,7,1>
+ 1168568826U, // <4,6,5,2>: Cost 2 vmrghw RHS, <6,2,7,3>
+ 2242302514U, // <4,6,5,3>: Cost 3 vmrghw RHS, <6,3,4,5>
+ 2242302605U, // <4,6,5,4>: Cost 3 vmrghw RHS, <6,4,5,6>
+ 2242310891U, // <4,6,5,5>: Cost 3 vmrghw RHS, <6,5,7,1>
+ 1168569144U, // <4,6,5,6>: Cost 2 vmrghw RHS, <6,6,6,6>
+ 1222233398U, // <4,6,5,7>: Cost 2 vmrglw <2,3,4,5>, RHS
+ 1222233399U, // <4,6,5,u>: Cost 2 vmrglw <2,3,4,5>, RHS
+ 3316576545U, // <4,6,6,0>: Cost 4 vmrghw <4,6,5,0>, <6,0,1,2>
+ 3316584871U, // <4,6,6,1>: Cost 4 vmrghw <4,6,5,1>, <6,1,7,1>
+ 2242851322U, // <4,6,6,2>: Cost 3 vmrghw <4,6,5,2>, <6,2,7,3>
+ 3316601394U, // <4,6,6,3>: Cost 4 vmrghw <4,6,5,3>, <6,3,4,5>
+ 3852890916U, // <4,6,6,4>: Cost 4 vsldoi12 <4,5,6,4>, <6,6,4,4>
+ 3316617963U, // <4,6,6,5>: Cost 4 vmrghw <4,6,5,5>, <6,5,7,1>
+ 2242884408U, // <4,6,6,6>: Cost 3 vmrghw <4,6,5,6>, <6,6,6,6>
+ 2785858370U, // <4,6,6,7>: Cost 3 vsldoi12 <5,6,7,4>, <6,6,7,7>
+ 2785858379U, // <4,6,6,u>: Cost 3 vsldoi12 <5,6,7,4>, <6,6,u,7>
+ 2785858382U, // <4,6,7,0>: Cost 3 vsldoi12 <5,6,7,4>, <6,7,0,1>
+ 3859600215U, // <4,6,7,1>: Cost 4 vsldoi12 <5,6,7,4>, <6,7,1,1>
+ 3317240314U, // <4,6,7,2>: Cost 4 vmrghw <4,7,5,0>, <6,2,7,3>
+ 2792199020U, // <4,6,7,3>: Cost 3 vsldoi12 <6,7,3,4>, <6,7,3,4>
+ 2785858422U, // <4,6,7,4>: Cost 3 vsldoi12 <5,6,7,4>, <6,7,4,5>
+ 3856651132U, // <4,6,7,5>: Cost 4 vsldoi12 <5,2,3,4>, <6,7,5,2>
+ 3317240632U, // <4,6,7,6>: Cost 4 vmrghw <4,7,5,0>, <6,6,6,6>
+ 2303954230U, // <4,6,7,7>: Cost 3 vmrglw <3,6,4,7>, RHS
+ 2303954231U, // <4,6,7,u>: Cost 3 vmrglw <3,6,4,7>, RHS
+ 2244292897U, // <4,6,u,0>: Cost 3 vmrghw RHS, <6,0,1,2>
+ 2244293031U, // <4,6,u,1>: Cost 3 vmrghw RHS, <6,1,7,1>
+ 1170551290U, // <4,6,u,2>: Cost 2 vmrghw RHS, <6,2,7,3>
+ 2244293170U, // <4,6,u,3>: Cost 3 vmrghw RHS, <6,3,4,5>
+ 2244293261U, // <4,6,u,4>: Cost 3 vmrghw RHS, <6,4,5,6>
+ 2244293355U, // <4,6,u,5>: Cost 3 vmrghw RHS, <6,5,7,1>
+ 1170551608U, // <4,6,u,6>: Cost 2 vmrghw RHS, <6,6,6,6>
+ 1222257974U, // <4,6,u,7>: Cost 2 vmrglw <2,3,4,u>, RHS
+ 1222257975U, // <4,6,u,u>: Cost 2 vmrglw <2,3,4,u>, RHS
+ 2238862330U, // <4,7,0,0>: Cost 3 vmrghw <4,0,5,1>, <7,0,1,2>
+ 2706604134U, // <4,7,0,1>: Cost 3 vsldoi8 <3,6,4,7>, LHS
+ 3312604308U, // <4,7,0,2>: Cost 4 vmrghw <4,0,5,1>, <7,2,0,3>
+ 3768402176U, // <4,7,0,3>: Cost 4 vsldoi8 <1,6,4,7>, <0,3,1,4>
+ 2238862648U, // <4,7,0,4>: Cost 3 vmrghw <4,0,5,1>, <7,4,0,5>
+ 3859600418U, // <4,7,0,5>: Cost 4 vsldoi12 <5,6,7,4>, <7,0,5,6>
+ 3729994393U, // <4,7,0,6>: Cost 4 vsldoi4 <6,4,7,0>, <6,4,7,0>
+ 2238862956U, // <4,7,0,7>: Cost 3 vmrghw <4,0,5,1>, <7,7,7,7>
+ 2706604701U, // <4,7,0,u>: Cost 3 vsldoi8 <3,6,4,7>, LHS
+ 3385610338U, // <4,7,1,0>: Cost 4 vmrglw <5,0,4,1>, <5,6,7,0>
+ 3780346676U, // <4,7,1,1>: Cost 4 vsldoi8 <3,6,4,7>, <1,1,1,1>
+ 2706604954U, // <4,7,1,2>: Cost 3 vsldoi8 <3,6,4,7>, <1,2,3,4>
+ 3385610746U, // <4,7,1,3>: Cost 4 vmrglw <5,0,4,1>, <6,2,7,3>
+ 3385610342U, // <4,7,1,4>: Cost 4 vmrglw <5,0,4,1>, <5,6,7,4>
+ 3385610667U, // <4,7,1,5>: Cost 4 vmrglw <5,0,4,1>, <6,1,7,5>
+ 3768403178U, // <4,7,1,6>: Cost 4 vsldoi8 <1,6,4,7>, <1,6,4,7>
+ 3385611074U, // <4,7,1,7>: Cost 4 vmrglw <5,0,4,1>, <6,6,7,7>
+ 2706604954U, // <4,7,1,u>: Cost 3 vsldoi8 <3,6,4,7>, <1,2,3,4>
+ 3859600532U, // <4,7,2,0>: Cost 4 vsldoi12 <5,6,7,4>, <7,2,0,3>
+ 3712091034U, // <4,7,2,1>: Cost 5 vsldoi4 <3,4,7,2>, <1,2,3,4>
+ 3774375528U, // <4,7,2,2>: Cost 4 vsldoi8 <2,6,4,7>, <2,2,2,2>
+ 2794853552U, // <4,7,2,3>: Cost 3 vsldoi12 <7,2,3,4>, <7,2,3,4>
+ 2785858744U, // <4,7,2,4>: Cost 3 vsldoi12 <5,6,7,4>, <7,2,4,3>
+ 3735982182U, // <4,7,2,5>: Cost 4 vsldoi4 <7,4,7,2>, <5,6,7,4>
+ 3774375875U, // <4,7,2,6>: Cost 4 vsldoi8 <2,6,4,7>, <2,6,4,7>
+ 3735983476U, // <4,7,2,7>: Cost 4 vsldoi4 <7,4,7,2>, <7,4,7,2>
+ 2795222237U, // <4,7,2,u>: Cost 3 vsldoi12 <7,2,u,4>, <7,2,u,4>
+ 3780348054U, // <4,7,3,0>: Cost 4 vsldoi8 <3,6,4,7>, <3,0,1,2>
+ 3730015130U, // <4,7,3,1>: Cost 4 vsldoi4 <6,4,7,3>, <1,2,3,4>
+ 3780348244U, // <4,7,3,2>: Cost 4 vsldoi8 <3,6,4,7>, <3,2,4,3>
+ 3778357673U, // <4,7,3,3>: Cost 4 vsldoi8 <3,3,4,7>, <3,3,4,7>
+ 2325155942U, // <4,7,3,4>: Cost 3 vmrglw <7,2,4,3>, <5,6,7,4>
+ 3779684939U, // <4,7,3,5>: Cost 5 vsldoi8 <3,5,4,7>, <3,5,4,7>
+ 2706606748U, // <4,7,3,6>: Cost 3 vsldoi8 <3,6,4,7>, <3,6,4,7>
+ 3398898498U, // <4,7,3,7>: Cost 4 vmrglw <7,2,4,3>, <6,6,7,7>
+ 2707934014U, // <4,7,3,u>: Cost 3 vsldoi8 <3,u,4,7>, <3,u,4,7>
+ 2785858868U, // <4,7,4,0>: Cost 3 vsldoi12 <5,6,7,4>, <7,4,0,1>
+ 3780348874U, // <4,7,4,1>: Cost 4 vsldoi8 <3,6,4,7>, <4,1,2,3>
+ 3780349000U, // <4,7,4,2>: Cost 4 vsldoi8 <3,6,4,7>, <4,2,7,3>
+ 2308575738U, // <4,7,4,3>: Cost 3 vmrglw <4,4,4,4>, <6,2,7,3>
+ 2656283856U, // <4,7,4,4>: Cost 3 vsldoi4 <6,4,7,4>, <4,4,4,4>
+ 2706607414U, // <4,7,4,5>: Cost 3 vsldoi8 <3,6,4,7>, RHS
+ 2656285341U, // <4,7,4,6>: Cost 3 vsldoi4 <6,4,7,4>, <6,4,7,4>
+ 2241468012U, // <4,7,4,7>: Cost 3 vmrghw <4,4,4,4>, <7,7,7,7>
+ 2706607657U, // <4,7,4,u>: Cost 3 vsldoi8 <3,6,4,7>, RHS
+ 1168569338U, // <4,7,5,0>: Cost 2 vmrghw RHS, <7,0,1,2>
+ 2242311242U, // <4,7,5,1>: Cost 3 vmrghw RHS, <7,1,1,1>
+ 2242303178U, // <4,7,5,2>: Cost 3 vmrghw RHS, <7,2,6,3>
+ 2242311395U, // <4,7,5,3>: Cost 3 vmrghw RHS, <7,3,0,1>
+ 1168569702U, // <4,7,5,4>: Cost 2 vmrghw RHS, <7,4,5,6>
+ 2242311606U, // <4,7,5,5>: Cost 3 vmrghw RHS, <7,5,5,5>
+ 2242311662U, // <4,7,5,6>: Cost 3 vmrghw RHS, <7,6,2,7>
+ 1168569964U, // <4,7,5,7>: Cost 2 vmrghw RHS, <7,7,7,7>
+ 1168569986U, // <4,7,5,u>: Cost 2 vmrghw RHS, <7,u,1,2>
+ 3316593658U, // <4,7,6,0>: Cost 4 vmrghw <4,6,5,2>, <7,0,1,2>
+ 3316593738U, // <4,7,6,1>: Cost 5 vmrghw <4,6,5,2>, <7,1,1,1>
+ 3316634800U, // <4,7,6,2>: Cost 4 vmrghw <4,6,5,7>, <7,2,3,4>
+ 3386978810U, // <4,7,6,3>: Cost 4 vmrglw <5,2,4,6>, <6,2,7,3>
+ 2785859072U, // <4,7,6,4>: Cost 3 vsldoi12 <5,6,7,4>, <7,6,4,7>
+ 3736014950U, // <4,7,6,5>: Cost 4 vsldoi4 <7,4,7,6>, <5,6,7,4>
+ 3316594158U, // <4,7,6,6>: Cost 4 vmrghw <4,6,5,2>, <7,6,2,7>
+ 2797803032U, // <4,7,6,7>: Cost 3 vsldoi12 <7,6,7,4>, <7,6,7,4>
+ 2797876769U, // <4,7,6,u>: Cost 3 vsldoi12 <7,6,u,4>, <7,6,u,4>
+ 2243499002U, // <4,7,7,0>: Cost 3 vmrghw <4,7,5,0>, <7,0,1,2>
+ 3718103962U, // <4,7,7,1>: Cost 4 vsldoi4 <4,4,7,7>, <1,2,3,4>
+ 3317257418U, // <4,7,7,2>: Cost 4 vmrghw <4,7,5,2>, <7,2,6,3>
+ 3377695816U, // <4,7,7,3>: Cost 4 vmrglw <3,6,4,7>, <4,2,7,3>
+ 2243532134U, // <4,7,7,4>: Cost 3 vmrghw <4,7,5,4>, <7,4,5,6>
+ 3317282230U, // <4,7,7,5>: Cost 4 vmrghw <4,7,5,5>, <7,5,5,5>
+ 2730497536U, // <4,7,7,6>: Cost 3 vsldoi8 <7,6,4,7>, <7,6,4,7>
+ 2243556972U, // <4,7,7,7>: Cost 3 vmrghw <4,7,5,7>, <7,7,7,7>
+ 2243565186U, // <4,7,7,u>: Cost 3 vmrghw <4,7,5,u>, <7,u,1,2>
+ 1170551802U, // <4,7,u,0>: Cost 2 vmrghw RHS, <7,0,1,2>
+ 2706609966U, // <4,7,u,1>: Cost 3 vsldoi8 <3,6,4,7>, LHS
+ 2244293797U, // <4,7,u,2>: Cost 3 vmrghw RHS, <7,2,2,2>
+ 2244293859U, // <4,7,u,3>: Cost 3 vmrghw RHS, <7,3,0,1>
+ 1170552166U, // <4,7,u,4>: Cost 2 vmrghw RHS, <7,4,5,6>
+ 2706610330U, // <4,7,u,5>: Cost 3 vsldoi8 <3,6,4,7>, RHS
+ 2244294126U, // <4,7,u,6>: Cost 3 vmrghw RHS, <7,6,2,7>
+ 1170552428U, // <4,7,u,7>: Cost 2 vmrghw RHS, <7,7,7,7>
+ 1170552450U, // <4,7,u,u>: Cost 2 vmrghw RHS, <7,u,1,2>
+ 1165118354U, // <4,u,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1>
+ 1624907878U, // <4,u,0,1>: Cost 2 vsldoi8 <2,3,4,u>, LHS
+ 2638407377U, // <4,u,0,2>: Cost 3 vsldoi4 <3,4,u,0>, <2,3,4,u>
+ 2295931036U, // <4,u,0,3>: Cost 3 vmrglw <2,3,4,0>, LHS
+ 2687369584U, // <4,u,0,4>: Cost 3 vsldoi8 <0,4,4,u>, <0,4,4,u>
+ 1165121690U, // <4,u,0,5>: Cost 2 vmrghw <4,0,5,1>, RHS
+ 2662298489U, // <4,u,0,6>: Cost 3 vsldoi4 <7,4,u,0>, <6,7,4,u>
+ 2295934280U, // <4,u,0,7>: Cost 3 vmrglw <2,3,4,0>, RHS
+ 1624908445U, // <4,u,0,u>: Cost 2 vsldoi8 <2,3,4,u>, LHS
+ 2638413926U, // <4,u,1,0>: Cost 3 vsldoi4 <3,4,u,1>, LHS
+ 2691351382U, // <4,u,1,1>: Cost 3 vsldoi8 <1,1,4,u>, <1,1,4,u>
+ 1685280558U, // <4,u,1,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 2287313052U, // <4,u,1,3>: Cost 3 vmrglw <0,u,4,1>, LHS
+ 2299257799U, // <4,u,1,4>: Cost 3 vmrglw <2,u,4,1>, <1,2,u,4>
+ 2694005914U, // <4,u,1,5>: Cost 3 vsldoi8 <1,5,4,u>, <1,5,4,u>
+ 2305231362U, // <4,u,1,6>: Cost 3 vmrglw <3,u,4,1>, <3,4,5,6>
+ 2287316296U, // <4,u,1,7>: Cost 3 vmrglw <0,u,4,1>, RHS
+ 1685280612U, // <4,u,1,u>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 2638422118U, // <4,u,2,0>: Cost 3 vsldoi4 <3,4,u,2>, LHS
+ 2240206638U, // <4,u,2,1>: Cost 3 vmrghw <4,2,5,3>, LHS
+ 2697987712U, // <4,u,2,2>: Cost 3 vsldoi8 <2,2,4,u>, <2,2,4,u>
+ 1624909521U, // <4,u,2,3>: Cost 2 vsldoi8 <2,3,4,u>, <2,3,4,u>
+ 2759391121U, // <4,u,2,4>: Cost 3 vsldoi12 <1,2,u,4>, <u,2,4,3>
+ 2240207002U, // <4,u,2,5>: Cost 3 vmrghw <4,2,5,3>, RHS
+ 2698651578U, // <4,u,2,6>: Cost 3 vsldoi8 <2,3,4,u>, <2,6,3,7>
+ 2785859500U, // <4,u,2,7>: Cost 3 vsldoi12 <5,6,7,4>, <u,2,7,3>
+ 1628227686U, // <4,u,2,u>: Cost 2 vsldoi8 <2,u,4,u>, <2,u,4,u>
+ 2759022524U, // <4,u,3,0>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,0,1>
+ 2801342408U, // <4,u,3,1>: Cost 3 vsldoi12 <u,3,1,4>, <u,3,1,4>
+ 2703960409U, // <4,u,3,2>: Cost 3 vsldoi8 <3,2,4,u>, <3,2,4,u>
+ 2759022554U, // <4,u,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,3,4>
+ 2759022564U, // <4,u,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,4,5>
+ 2240845978U, // <4,u,3,5>: Cost 3 vmrghw <4,3,5,0>, RHS
+ 2706614941U, // <4,u,3,6>: Cost 3 vsldoi8 <3,6,4,u>, <3,6,4,u>
+ 2301267272U, // <4,u,3,7>: Cost 3 vmrglw <3,2,4,3>, RHS
+ 2759022596U, // <4,u,3,u>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,u,1>
+ 1570668646U, // <4,u,4,0>: Cost 2 vsldoi4 <4,4,u,4>, LHS
+ 1167726382U, // <4,u,4,1>: Cost 2 vmrghw <4,4,4,4>, LHS
+ 2698652753U, // <4,u,4,2>: Cost 3 vsldoi8 <2,3,4,u>, <4,2,u,3>
+ 1234829468U, // <4,u,4,3>: Cost 2 vmrglw <4,4,4,4>, LHS
+ 229035318U, // <4,u,4,4>: Cost 1 vspltisw0 RHS
+ 1624911158U, // <4,u,4,5>: Cost 2 vsldoi8 <2,3,4,u>, RHS
+ 2698653081U, // <4,u,4,6>: Cost 3 vsldoi8 <2,3,4,u>, <4,6,u,7>
+ 1234832712U, // <4,u,4,7>: Cost 2 vmrglw <4,4,4,4>, RHS
+ 229035318U, // <4,u,4,u>: Cost 1 vspltisw0 RHS
+ 1168561875U, // <4,u,5,0>: Cost 2 vmrghw RHS, <u,0,1,2>
+ 94820142U, // <4,u,5,1>: Cost 1 vmrghw RHS, LHS
+ 1168562053U, // <4,u,5,2>: Cost 2 vmrghw RHS, <u,2,3,0>
+ 1222230172U, // <4,u,5,3>: Cost 2 vmrglw <2,3,4,5>, LHS
+ 1168562239U, // <4,u,5,4>: Cost 2 vmrghw RHS, <u,4,5,6>
+ 94820506U, // <4,u,5,5>: Cost 1 vmrghw RHS, RHS
+ 1685280922U, // <4,u,5,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+ 1222233416U, // <4,u,5,7>: Cost 2 vmrglw <2,3,4,5>, RHS
+ 94820709U, // <4,u,5,u>: Cost 1 vmrghw RHS, LHS
+ 1564713062U, // <4,u,6,0>: Cost 2 vsldoi4 <3,4,u,6>, LHS
+ 2626511979U, // <4,u,6,1>: Cost 3 vsldoi4 <1,4,u,6>, <1,4,u,6>
+ 2632484676U, // <4,u,6,2>: Cost 3 vsldoi4 <2,4,u,6>, <2,4,u,6>
+ 1564715549U, // <4,u,6,3>: Cost 2 vsldoi4 <3,4,u,6>, <3,4,u,6>
+ 1564716342U, // <4,u,6,4>: Cost 2 vsldoi4 <3,4,u,6>, RHS
+ 2242853018U, // <4,u,6,5>: Cost 3 vmrghw <4,6,5,2>, RHS
+ 2656375464U, // <4,u,6,6>: Cost 3 vsldoi4 <6,4,u,6>, <6,4,u,6>
+ 27705344U, // <4,u,6,7>: Cost 0 copy RHS
+ 27705344U, // <4,u,6,u>: Cost 0 copy RHS
+ 2785859840U, // <4,u,7,0>: Cost 3 vsldoi12 <5,6,7,4>, <u,7,0,1>
+ 2243499822U, // <4,u,7,1>: Cost 3 vmrghw <4,7,5,0>, LHS
+ 2727851197U, // <4,u,7,2>: Cost 3 vsldoi8 <7,2,4,u>, <7,2,4,u>
+ 2303951004U, // <4,u,7,3>: Cost 3 vmrglw <3,6,4,7>, LHS
+ 2785859880U, // <4,u,7,4>: Cost 3 vsldoi12 <5,6,7,4>, <u,7,4,5>
+ 2243500186U, // <4,u,7,5>: Cost 3 vmrghw <4,7,5,0>, RHS
+ 2730505729U, // <4,u,7,6>: Cost 3 vsldoi8 <7,6,4,u>, <7,6,4,u>
+ 2303954248U, // <4,u,7,7>: Cost 3 vmrglw <3,6,4,7>, RHS
+ 2303951009U, // <4,u,7,u>: Cost 3 vmrglw <3,6,4,7>, LHS
+ 1564729446U, // <4,u,u,0>: Cost 2 vsldoi4 <3,4,u,u>, LHS
+ 96810798U, // <4,u,u,1>: Cost 1 vmrghw RHS, LHS
+ 1685281125U, // <4,u,u,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 1222254748U, // <4,u,u,3>: Cost 2 vmrglw <2,3,4,u>, LHS
+ 229035318U, // <4,u,u,4>: Cost 1 vspltisw0 RHS
+ 96811162U, // <4,u,u,5>: Cost 1 vmrghw RHS, RHS
+ 1685281165U, // <4,u,u,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+ 27705344U, // <4,u,u,7>: Cost 0 copy RHS
+ 27705344U, // <4,u,u,u>: Cost 0 copy RHS
+ 2754232320U, // <5,0,0,0>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,0,0>
+ 2754232330U, // <5,0,0,1>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,1,1>
+ 3718194894U, // <5,0,0,2>: Cost 4 vsldoi4 <4,5,0,0>, <2,3,4,5>
+ 3376385762U, // <5,0,0,3>: Cost 4 vmrglw <3,4,5,0>, <5,2,0,3>
+ 2754232357U, // <5,0,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,4,1>
+ 3845816370U, // <5,0,0,5>: Cost 4 vsldoi12 <3,4,0,5>, <0,0,5,5>
+ 3782353389U, // <5,0,0,6>: Cost 4 vsldoi8 <4,0,5,0>, <0,6,0,7>
+ 3376386090U, // <5,0,0,7>: Cost 4 vmrglw <3,4,5,0>, <5,6,0,7>
+ 2757402697U, // <5,0,0,u>: Cost 3 vsldoi12 <0,u,u,5>, <0,0,u,1>
+ 2626543718U, // <5,0,1,0>: Cost 3 vsldoi4 <1,5,0,1>, LHS
+ 2626544751U, // <5,0,1,1>: Cost 3 vsldoi4 <1,5,0,1>, <1,5,0,1>
+ 1680490598U, // <5,0,1,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 3766428665U, // <5,0,1,3>: Cost 4 vsldoi8 <1,3,5,0>, <1,3,5,0>
+ 2626546998U, // <5,0,1,4>: Cost 3 vsldoi4 <1,5,0,1>, RHS
+ 2650435539U, // <5,0,1,5>: Cost 3 vsldoi4 <5,5,0,1>, <5,5,0,1>
+ 3783017715U, // <5,0,1,6>: Cost 4 vsldoi8 <4,1,5,0>, <1,6,5,7>
+ 3385019000U, // <5,0,1,7>: Cost 4 vmrglw <4,u,5,1>, <3,6,0,7>
+ 1680490652U, // <5,0,1,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 3376398336U, // <5,0,2,0>: Cost 4 vmrglw <3,4,5,2>, <0,0,0,0>
+ 2245877862U, // <5,0,2,1>: Cost 3 vmrghw <5,2,1,3>, LHS
+ 3773064808U, // <5,0,2,2>: Cost 4 vsldoi8 <2,4,5,0>, <2,2,2,2>
+ 2705295054U, // <5,0,2,3>: Cost 3 vsldoi8 <3,4,5,0>, <2,3,4,5>
+ 3827974343U, // <5,0,2,4>: Cost 4 vsldoi12 <0,4,1,5>, <0,2,4,1>
+ 3845816530U, // <5,0,2,5>: Cost 4 vsldoi12 <3,4,0,5>, <0,2,5,3>
+ 3779037114U, // <5,0,2,6>: Cost 4 vsldoi8 <3,4,5,0>, <2,6,3,7>
+ 3810887658U, // <5,0,2,7>: Cost 4 vsldoi8 <u,7,5,0>, <2,7,0,1>
+ 2245878429U, // <5,0,2,u>: Cost 3 vmrghw <5,2,1,3>, LHS
+ 2710603926U, // <5,0,3,0>: Cost 3 vsldoi8 <4,3,5,0>, <3,0,1,2>
+ 3827974396U, // <5,0,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <0,3,1,0>
+ 3779037516U, // <5,0,3,2>: Cost 4 vsldoi8 <3,4,5,0>, <3,2,3,4>
+ 3779037596U, // <5,0,3,3>: Cost 4 vsldoi8 <3,4,5,0>, <3,3,3,3>
+ 2705295868U, // <5,0,3,4>: Cost 3 vsldoi8 <3,4,5,0>, <3,4,5,0>
+ 3379726804U, // <5,0,3,5>: Cost 4 vmrglw <4,0,5,3>, <3,4,0,5>
+ 3802925748U, // <5,0,3,6>: Cost 4 vsldoi8 <7,4,5,0>, <3,6,7,4>
+ 3363138168U, // <5,0,3,7>: Cost 5 vmrglw <1,2,5,3>, <3,6,0,7>
+ 2707950400U, // <5,0,3,u>: Cost 3 vsldoi8 <3,u,5,0>, <3,u,5,0>
+ 2626568294U, // <5,0,4,0>: Cost 3 vsldoi4 <1,5,0,4>, LHS
+ 1680490834U, // <5,0,4,1>: Cost 2 vsldoi12 <0,4,1,5>, <0,4,1,5>
+ 3828048219U, // <5,0,4,2>: Cost 4 vsldoi12 <0,4,2,5>, <0,4,2,5>
+ 2710604932U, // <5,0,4,3>: Cost 3 vsldoi8 <4,3,5,0>, <4,3,5,0>
+ 2754232685U, // <5,0,4,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,4,4,5>
+ 2705296694U, // <5,0,4,5>: Cost 3 vsldoi8 <3,4,5,0>, RHS
+ 3779038590U, // <5,0,4,6>: Cost 4 vsldoi8 <3,4,5,0>, <4,6,5,7>
+ 2713259464U, // <5,0,4,7>: Cost 3 vsldoi8 <4,7,5,0>, <4,7,5,0>
+ 1680490834U, // <5,0,4,u>: Cost 2 vsldoi12 <0,4,1,5>, <0,4,1,5>
+ 2311307264U, // <5,0,5,0>: Cost 3 vmrglw <4,u,5,5>, <0,0,0,0>
+ 1174437990U, // <5,0,5,1>: Cost 2 vmrghw <5,5,5,5>, LHS
+ 3779038946U, // <5,0,5,2>: Cost 4 vsldoi8 <3,4,5,0>, <5,2,0,3>
+ 3845816752U, // <5,0,5,3>: Cost 4 vsldoi12 <3,4,0,5>, <0,5,3,0>
+ 2248180050U, // <5,0,5,4>: Cost 3 vmrghw <5,5,5,5>, <0,4,1,5>
+ 2248180194U, // <5,0,5,5>: Cost 3 vmrghw <5,5,5,5>, <0,5,u,5>
+ 3779039274U, // <5,0,5,6>: Cost 4 vsldoi8 <3,4,5,0>, <5,6,0,7>
+ 3385051768U, // <5,0,5,7>: Cost 4 vmrglw <4,u,5,5>, <3,6,0,7>
+ 1174438557U, // <5,0,5,u>: Cost 2 vmrghw <5,5,5,5>, LHS
+ 2302689280U, // <5,0,6,0>: Cost 3 vmrglw <3,4,5,6>, <0,0,0,0>
+ 1175208038U, // <5,0,6,1>: Cost 2 vmrghw <5,6,7,0>, LHS
+ 3787002362U, // <5,0,6,2>: Cost 4 vsldoi8 <4,7,5,0>, <6,2,7,3>
+ 3376432160U, // <5,0,6,3>: Cost 4 vmrglw <3,4,5,6>, <1,4,0,3>
+ 2248950098U, // <5,0,6,4>: Cost 3 vmrghw <5,6,7,0>, <0,4,1,5>
+ 2248950180U, // <5,0,6,5>: Cost 3 vmrghw <5,6,7,0>, <0,5,1,6>
+ 3376433702U, // <5,0,6,6>: Cost 4 vmrglw <3,4,5,6>, <3,5,0,6>
+ 2729186166U, // <5,0,6,7>: Cost 3 vsldoi8 <7,4,5,0>, <6,7,4,5>
+ 1175208605U, // <5,0,6,u>: Cost 2 vmrghw <5,6,7,0>, LHS
+ 2713261050U, // <5,0,7,0>: Cost 3 vsldoi8 <4,7,5,0>, <7,0,1,2>
+ 3365823599U, // <5,0,7,1>: Cost 4 vmrglw <1,6,5,7>, <1,5,0,1>
+ 3808900317U, // <5,0,7,2>: Cost 4 vsldoi8 <u,4,5,0>, <7,2,u,4>
+ 3784348899U, // <5,0,7,3>: Cost 4 vsldoi8 <4,3,5,0>, <7,3,0,1>
+ 2729186656U, // <5,0,7,4>: Cost 3 vsldoi8 <7,4,5,0>, <7,4,5,0>
+ 3787003268U, // <5,0,7,5>: Cost 4 vsldoi8 <4,7,5,0>, <7,5,0,0>
+ 3802928664U, // <5,0,7,6>: Cost 4 vsldoi8 <7,4,5,0>, <7,6,7,4>
+ 3787003431U, // <5,0,7,7>: Cost 4 vsldoi8 <4,7,5,0>, <7,7,0,1>
+ 2731841188U, // <5,0,7,u>: Cost 3 vsldoi8 <7,u,5,0>, <7,u,5,0>
+ 2626601062U, // <5,0,u,0>: Cost 3 vsldoi4 <1,5,0,u>, LHS
+ 1683145366U, // <5,0,u,1>: Cost 2 vsldoi12 <0,u,1,5>, <0,u,1,5>
+ 1680491165U, // <5,0,u,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 2705295054U, // <5,0,u,3>: Cost 3 vsldoi8 <3,4,5,0>, <2,3,4,5>
+ 2754233005U, // <5,0,u,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,u,4,1>
+ 2705299610U, // <5,0,u,5>: Cost 3 vsldoi8 <3,4,5,0>, RHS
+ 3779041488U, // <5,0,u,6>: Cost 4 vsldoi8 <3,4,5,0>, <u,6,3,7>
+ 2737150252U, // <5,0,u,7>: Cost 3 vsldoi8 <u,7,5,0>, <u,7,5,0>
+ 1680491219U, // <5,0,u,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 2713927680U, // <5,1,0,0>: Cost 3 vsldoi8 <4,u,5,1>, <0,0,0,0>
+ 1640185958U, // <5,1,0,1>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+ 2310607866U, // <5,1,0,2>: Cost 3 vmrglw <4,7,5,0>, <7,0,1,2>
+ 3787669756U, // <5,1,0,3>: Cost 4 vsldoi8 <4,u,5,1>, <0,3,1,0>
+ 2713928018U, // <5,1,0,4>: Cost 3 vsldoi8 <4,u,5,1>, <0,4,1,5>
+ 2306621778U, // <5,1,0,5>: Cost 3 vmrglw <4,1,5,0>, <0,4,1,5>
+ 3787670006U, // <5,1,0,6>: Cost 4 vsldoi8 <4,u,5,1>, <0,6,1,7>
+ 3736188301U, // <5,1,0,7>: Cost 4 vsldoi4 <7,5,1,0>, <7,5,1,0>
+ 1640186525U, // <5,1,0,u>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+ 2650505318U, // <5,1,1,0>: Cost 3 vsldoi4 <5,5,1,1>, LHS
+ 2754233140U, // <5,1,1,1>: Cost 3 vsldoi12 <0,4,1,5>, <1,1,1,1>
+ 2311276694U, // <5,1,1,2>: Cost 3 vmrglw <4,u,5,1>, <3,0,1,2>
+ 2311278315U, // <5,1,1,3>: Cost 3 vmrglw <4,u,5,1>, <5,2,1,3>
+ 2758435667U, // <5,1,1,4>: Cost 3 vsldoi12 <1,1,4,5>, <1,1,4,5>
+ 2754233180U, // <5,1,1,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,1,5,5>
+ 3385016497U, // <5,1,1,6>: Cost 4 vmrglw <4,u,5,1>, <0,2,1,6>
+ 2311278643U, // <5,1,1,7>: Cost 3 vmrglw <4,u,5,1>, <5,6,1,7>
+ 2758730615U, // <5,1,1,u>: Cost 3 vsldoi12 <1,1,u,5>, <1,1,u,5>
+ 3700367462U, // <5,1,2,0>: Cost 4 vsldoi4 <1,5,1,2>, LHS
+ 3830629255U, // <5,1,2,1>: Cost 4 vsldoi12 <0,u,1,5>, <1,2,1,3>
+ 2713929320U, // <5,1,2,2>: Cost 3 vsldoi8 <4,u,5,1>, <2,2,2,2>
+ 2754233238U, // <5,1,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,3,0>
+ 2759099300U, // <5,1,2,4>: Cost 3 vsldoi12 <1,2,4,5>, <1,2,4,5>
+ 2754233259U, // <5,1,2,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,5,3>
+ 2713929658U, // <5,1,2,6>: Cost 3 vsldoi8 <4,u,5,1>, <2,6,3,7>
+ 3872359354U, // <5,1,2,7>: Cost 4 vsldoi12 <7,u,0,5>, <1,2,7,0>
+ 2754233283U, // <5,1,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,u,0>
+ 2713929878U, // <5,1,3,0>: Cost 3 vsldoi8 <4,u,5,1>, <3,0,1,2>
+ 3363135498U, // <5,1,3,1>: Cost 4 vmrglw <1,2,5,3>, <0,0,1,1>
+ 3363137686U, // <5,1,3,2>: Cost 4 vmrglw <1,2,5,3>, <3,0,1,2>
+ 2713930140U, // <5,1,3,3>: Cost 3 vsldoi8 <4,u,5,1>, <3,3,3,3>
+ 2713930242U, // <5,1,3,4>: Cost 3 vsldoi8 <4,u,5,1>, <3,4,5,6>
+ 2289394002U, // <5,1,3,5>: Cost 3 vmrglw <1,2,5,3>, <0,4,1,5>
+ 3787672184U, // <5,1,3,6>: Cost 4 vsldoi8 <4,u,5,1>, <3,6,0,7>
+ 3787672259U, // <5,1,3,7>: Cost 4 vsldoi8 <4,u,5,1>, <3,7,0,1>
+ 2713930526U, // <5,1,3,u>: Cost 3 vsldoi8 <4,u,5,1>, <3,u,1,2>
+ 1634880402U, // <5,1,4,0>: Cost 2 vsldoi8 <4,0,5,1>, <4,0,5,1>
+ 2760205355U, // <5,1,4,1>: Cost 3 vsldoi12 <1,4,1,5>, <1,4,1,5>
+ 2760279092U, // <5,1,4,2>: Cost 3 vsldoi12 <1,4,2,5>, <1,4,2,5>
+ 3787672708U, // <5,1,4,3>: Cost 4 vsldoi8 <4,u,5,1>, <4,3,5,0>
+ 2713930960U, // <5,1,4,4>: Cost 3 vsldoi8 <4,u,5,1>, <4,4,4,4>
+ 1640189238U, // <5,1,4,5>: Cost 2 vsldoi8 <4,u,5,1>, RHS
+ 3786345848U, // <5,1,4,6>: Cost 4 vsldoi8 <4,6,5,1>, <4,6,5,1>
+ 3787009481U, // <5,1,4,7>: Cost 4 vsldoi8 <4,7,5,1>, <4,7,5,1>
+ 1640189466U, // <5,1,4,u>: Cost 2 vsldoi8 <4,u,5,1>, <4,u,5,1>
+ 2754233455U, // <5,1,5,0>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,0,1>
+ 2713931407U, // <5,1,5,1>: Cost 3 vsldoi8 <4,u,5,1>, <5,1,0,1>
+ 2713931499U, // <5,1,5,2>: Cost 3 vsldoi8 <4,u,5,1>, <5,2,1,3>
+ 3827975305U, // <5,1,5,3>: Cost 4 vsldoi12 <0,4,1,5>, <1,5,3,0>
+ 2754233495U, // <5,1,5,4>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,4,5>
+ 2288746834U, // <5,1,5,5>: Cost 3 vmrglw <1,1,5,5>, <0,4,1,5>
+ 2713931827U, // <5,1,5,6>: Cost 3 vsldoi8 <4,u,5,1>, <5,6,1,7>
+ 3787673725U, // <5,1,5,7>: Cost 4 vsldoi8 <4,u,5,1>, <5,7,1,0>
+ 2754233527U, // <5,1,5,u>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,u,1>
+ 2668462182U, // <5,1,6,0>: Cost 3 vsldoi4 <u,5,1,6>, LHS
+ 2290746002U, // <5,1,6,1>: Cost 3 vmrglw <1,4,5,6>, <0,u,1,1>
+ 2302691478U, // <5,1,6,2>: Cost 3 vmrglw <3,4,5,6>, <3,0,1,2>
+ 3364488071U, // <5,1,6,3>: Cost 4 vmrglw <1,4,5,6>, <1,2,1,3>
+ 2302689536U, // <5,1,6,4>: Cost 3 vmrglw <3,4,5,6>, <0,3,1,4>
+ 2754233587U, // <5,1,6,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,6,5,7>
+ 2713932600U, // <5,1,6,6>: Cost 3 vsldoi8 <4,u,5,1>, <6,6,6,6>
+ 2713932622U, // <5,1,6,7>: Cost 3 vsldoi8 <4,u,5,1>, <6,7,0,1>
+ 2302689297U, // <5,1,6,u>: Cost 3 vmrglw <3,4,5,6>, <0,0,1,u>
+ 2713932794U, // <5,1,7,0>: Cost 3 vsldoi8 <4,u,5,1>, <7,0,1,2>
+ 3365822474U, // <5,1,7,1>: Cost 4 vmrglw <1,6,5,7>, <0,0,1,1>
+ 3365824662U, // <5,1,7,2>: Cost 4 vmrglw <1,6,5,7>, <3,0,1,2>
+ 3787674851U, // <5,1,7,3>: Cost 4 vsldoi8 <4,u,5,1>, <7,3,0,1>
+ 2713933158U, // <5,1,7,4>: Cost 3 vsldoi8 <4,u,5,1>, <7,4,5,6>
+ 2292080978U, // <5,1,7,5>: Cost 3 vmrglw <1,6,5,7>, <0,4,1,5>
+ 3365823613U, // <5,1,7,6>: Cost 4 vmrglw <1,6,5,7>, <1,5,1,6>
+ 2713933420U, // <5,1,7,7>: Cost 3 vsldoi8 <4,u,5,1>, <7,7,7,7>
+ 2713933442U, // <5,1,7,u>: Cost 3 vsldoi8 <4,u,5,1>, <7,u,1,2>
+ 1658771190U, // <5,1,u,0>: Cost 2 vsldoi8 <u,0,5,1>, <u,0,5,1>
+ 1640191790U, // <5,1,u,1>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+ 2762933624U, // <5,1,u,2>: Cost 3 vsldoi12 <1,u,2,5>, <1,u,2,5>
+ 2754233724U, // <5,1,u,3>: Cost 3 vsldoi12 <0,4,1,5>, <1,u,3,0>
+ 2763081098U, // <5,1,u,4>: Cost 3 vsldoi12 <1,u,4,5>, <1,u,4,5>
+ 1640192154U, // <5,1,u,5>: Cost 2 vsldoi8 <4,u,5,1>, RHS
+ 2713934032U, // <5,1,u,6>: Cost 3 vsldoi8 <4,u,5,1>, <u,6,3,7>
+ 2713934080U, // <5,1,u,7>: Cost 3 vsldoi8 <4,u,5,1>, <u,7,0,1>
+ 1640192357U, // <5,1,u,u>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+ 3779051520U, // <5,2,0,0>: Cost 4 vsldoi8 <3,4,5,2>, <0,0,0,0>
+ 2705309798U, // <5,2,0,1>: Cost 3 vsldoi8 <3,4,5,2>, LHS
+ 3838813637U, // <5,2,0,2>: Cost 4 vsldoi12 <2,2,4,5>, <2,0,2,1>
+ 2302640230U, // <5,2,0,3>: Cost 3 vmrglw <3,4,5,0>, LHS
+ 3765117266U, // <5,2,0,4>: Cost 4 vsldoi8 <1,1,5,2>, <0,4,1,5>
+ 3381027892U, // <5,2,0,5>: Cost 4 vmrglw <4,2,5,0>, <1,4,2,5>
+ 3842794985U, // <5,2,0,6>: Cost 4 vsldoi12 <2,u,4,5>, <2,0,6,1>
+ 3408232554U, // <5,2,0,7>: Cost 4 vmrglw <u,7,5,0>, <0,1,2,7>
+ 2302640235U, // <5,2,0,u>: Cost 3 vmrglw <3,4,5,0>, LHS
+ 3700432998U, // <5,2,1,0>: Cost 4 vsldoi4 <1,5,2,1>, LHS
+ 3765117785U, // <5,2,1,1>: Cost 4 vsldoi8 <1,1,5,2>, <1,1,5,2>
+ 2311276136U, // <5,2,1,2>: Cost 3 vmrglw <4,u,5,1>, <2,2,2,2>
+ 1237532774U, // <5,2,1,3>: Cost 2 vmrglw <4,u,5,1>, LHS
+ 3700436278U, // <5,2,1,4>: Cost 4 vsldoi4 <1,5,2,1>, RHS
+ 3381036084U, // <5,2,1,5>: Cost 4 vmrglw <4,2,5,1>, <1,4,2,5>
+ 3385018045U, // <5,2,1,6>: Cost 4 vmrglw <4,u,5,1>, <2,3,2,6>
+ 3385017560U, // <5,2,1,7>: Cost 4 vmrglw <4,u,5,1>, <1,6,2,7>
+ 1237532779U, // <5,2,1,u>: Cost 2 vmrglw <4,u,5,1>, LHS
+ 3700441190U, // <5,2,2,0>: Cost 4 vsldoi4 <1,5,2,2>, LHS
+ 3700442242U, // <5,2,2,1>: Cost 4 vsldoi4 <1,5,2,2>, <1,5,2,2>
+ 2754233960U, // <5,2,2,2>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,2,2>
+ 2754233970U, // <5,2,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,3,3>
+ 2765071997U, // <5,2,2,4>: Cost 3 vsldoi12 <2,2,4,5>, <2,2,4,5>
+ 3834021508U, // <5,2,2,5>: Cost 4 vsldoi12 <1,4,2,5>, <2,2,5,3>
+ 3842795152U, // <5,2,2,6>: Cost 4 vsldoi12 <2,u,4,5>, <2,2,6,6>
+ 3376402492U, // <5,2,2,7>: Cost 4 vmrglw <3,4,5,2>, <5,6,2,7>
+ 2754234015U, // <5,2,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,u,3>
+ 2754234022U, // <5,2,3,0>: Cost 3 vsldoi12 <0,4,1,5>, <2,3,0,1>
+ 3827975855U, // <5,2,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <2,3,1,1>
+ 2644625102U, // <5,2,3,2>: Cost 3 vsldoi4 <4,5,2,3>, <2,3,4,5>
+ 2289393766U, // <5,2,3,3>: Cost 3 vmrglw <1,2,5,3>, LHS
+ 1691993806U, // <5,2,3,4>: Cost 2 vsldoi12 <2,3,4,5>, <2,3,4,5>
+ 2785052375U, // <5,2,3,5>: Cost 3 vsldoi12 <5,5,5,5>, <2,3,5,5>
+ 3854812897U, // <5,2,3,6>: Cost 4 vsldoi12 <4,u,5,5>, <2,3,6,6>
+ 3802942187U, // <5,2,3,7>: Cost 4 vsldoi8 <7,4,5,2>, <3,7,4,5>
+ 1692288754U, // <5,2,3,u>: Cost 2 vsldoi12 <2,3,u,5>, <2,3,u,5>
+ 3839846139U, // <5,2,4,0>: Cost 4 vsldoi12 <2,4,0,5>, <2,4,0,5>
+ 2709294052U, // <5,2,4,1>: Cost 3 vsldoi8 <4,1,5,2>, <4,1,5,2>
+ 2766251789U, // <5,2,4,2>: Cost 3 vsldoi12 <2,4,2,5>, <2,4,2,5>
+ 2765735702U, // <5,2,4,3>: Cost 3 vsldoi12 <2,3,4,5>, <2,4,3,5>
+ 3840141087U, // <5,2,4,4>: Cost 4 vsldoi12 <2,4,4,5>, <2,4,4,5>
+ 2705313078U, // <5,2,4,5>: Cost 3 vsldoi8 <3,4,5,2>, RHS
+ 2712612217U, // <5,2,4,6>: Cost 3 vsldoi8 <4,6,5,2>, <4,6,5,2>
+ 3787017674U, // <5,2,4,7>: Cost 4 vsldoi8 <4,7,5,2>, <4,7,5,2>
+ 2765735747U, // <5,2,4,u>: Cost 3 vsldoi12 <2,3,4,5>, <2,4,u,5>
+ 3834021704U, // <5,2,5,0>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,0,1>
+ 3834021714U, // <5,2,5,1>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,1,2>
+ 2311308904U, // <5,2,5,2>: Cost 3 vmrglw <4,u,5,5>, <2,2,2,2>
+ 1237565542U, // <5,2,5,3>: Cost 2 vmrglw <4,u,5,5>, LHS
+ 3834021744U, // <5,2,5,4>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,4,5>
+ 3369124916U, // <5,2,5,5>: Cost 4 vmrglw <2,2,5,5>, <1,4,2,5>
+ 2248181690U, // <5,2,5,6>: Cost 3 vmrghw <5,5,5,5>, <2,6,3,7>
+ 3786354825U, // <5,2,5,7>: Cost 4 vsldoi8 <4,6,5,2>, <5,7,2,3>
+ 1237565547U, // <5,2,5,u>: Cost 2 vmrglw <4,u,5,5>, LHS
+ 3700473958U, // <5,2,6,0>: Cost 4 vsldoi4 <1,5,2,6>, LHS
+ 3700475014U, // <5,2,6,1>: Cost 4 vsldoi4 <1,5,2,6>, <1,5,2,6>
+ 2296718952U, // <5,2,6,2>: Cost 3 vmrglw <2,4,5,6>, <2,2,2,2>
+ 1228947558U, // <5,2,6,3>: Cost 2 vmrglw <3,4,5,6>, LHS
+ 3700477238U, // <5,2,6,4>: Cost 4 vsldoi4 <1,5,2,6>, RHS
+ 3834021836U, // <5,2,6,5>: Cost 4 vsldoi12 <1,4,2,5>, <2,6,5,7>
+ 2248951738U, // <5,2,6,6>: Cost 3 vmrghw <5,6,7,0>, <2,6,3,7>
+ 3370461105U, // <5,2,6,7>: Cost 4 vmrglw <2,4,5,6>, <2,6,2,7>
+ 1228947563U, // <5,2,6,u>: Cost 2 vmrglw <3,4,5,6>, LHS
+ 3786355706U, // <5,2,7,0>: Cost 4 vsldoi8 <4,6,5,2>, <7,0,1,2>
+ 3783038037U, // <5,2,7,1>: Cost 4 vsldoi8 <4,1,5,2>, <7,1,2,3>
+ 3365824104U, // <5,2,7,2>: Cost 4 vmrglw <1,6,5,7>, <2,2,2,2>
+ 2292080742U, // <5,2,7,3>: Cost 3 vmrglw <1,6,5,7>, LHS
+ 3842131986U, // <5,2,7,4>: Cost 4 vsldoi12 <2,7,4,5>, <2,7,4,5>
+ 3371795508U, // <5,2,7,5>: Cost 4 vmrglw <2,6,5,7>, <1,4,2,5>
+ 3786356206U, // <5,2,7,6>: Cost 4 vsldoi8 <4,6,5,2>, <7,6,2,7>
+ 3786356332U, // <5,2,7,7>: Cost 4 vsldoi8 <4,6,5,2>, <7,7,7,7>
+ 2292080747U, // <5,2,7,u>: Cost 3 vmrglw <1,6,5,7>, LHS
+ 2754234427U, // <5,2,u,0>: Cost 3 vsldoi12 <0,4,1,5>, <2,u,0,1>
+ 2705315630U, // <5,2,u,1>: Cost 3 vsldoi8 <3,4,5,2>, LHS
+ 2296735336U, // <5,2,u,2>: Cost 3 vmrglw <2,4,5,u>, <2,2,2,2>
+ 1228963942U, // <5,2,u,3>: Cost 2 vmrglw <3,4,5,u>, LHS
+ 1695311971U, // <5,2,u,4>: Cost 2 vsldoi12 <2,u,4,5>, <2,u,4,5>
+ 2705315994U, // <5,2,u,5>: Cost 3 vsldoi8 <3,4,5,2>, RHS
+ 2769201269U, // <5,2,u,6>: Cost 3 vsldoi12 <2,u,6,5>, <2,u,6,5>
+ 3370477489U, // <5,2,u,7>: Cost 4 vmrglw <2,4,5,u>, <2,6,2,7>
+ 1695606919U, // <5,2,u,u>: Cost 2 vsldoi12 <2,u,u,5>, <2,u,u,5>
+ 3827976331U, // <5,3,0,0>: Cost 4 vsldoi12 <0,4,1,5>, <3,0,0,0>
+ 2754234518U, // <5,3,0,1>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,1,2>
+ 3706472290U, // <5,3,0,2>: Cost 4 vsldoi4 <2,5,3,0>, <2,5,3,0>
+ 3700500630U, // <5,3,0,3>: Cost 4 vsldoi4 <1,5,3,0>, <3,0,1,2>
+ 2754234544U, // <5,3,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,4,1>
+ 3376383766U, // <5,3,0,5>: Cost 4 vmrglw <3,4,5,0>, <2,4,3,5>
+ 3769770513U, // <5,3,0,6>: Cost 5 vsldoi8 <1,u,5,3>, <0,6,4,7>
+ 3376383930U, // <5,3,0,7>: Cost 4 vmrglw <3,4,5,0>, <2,6,3,7>
+ 2754234581U, // <5,3,0,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,u,2>
+ 2311275414U, // <5,3,1,0>: Cost 3 vmrglw <4,u,5,1>, <1,2,3,0>
+ 2305967971U, // <5,3,1,1>: Cost 3 vmrglw <4,0,5,1>, <2,5,3,1>
+ 2692047787U, // <5,3,1,2>: Cost 3 vsldoi8 <1,2,5,3>, <1,2,5,3>
+ 2311276146U, // <5,3,1,3>: Cost 3 vmrglw <4,u,5,1>, <2,2,3,3>
+ 2311275418U, // <5,3,1,4>: Cost 3 vmrglw <4,u,5,1>, <1,2,3,4>
+ 3765789807U, // <5,3,1,5>: Cost 4 vsldoi8 <1,2,5,3>, <1,5,0,1>
+ 3765789939U, // <5,3,1,6>: Cost 4 vsldoi8 <1,2,5,3>, <1,6,5,7>
+ 2311276474U, // <5,3,1,7>: Cost 3 vmrglw <4,u,5,1>, <2,6,3,7>
+ 2696029585U, // <5,3,1,u>: Cost 3 vsldoi8 <1,u,5,3>, <1,u,5,3>
+ 2311288709U, // <5,3,2,0>: Cost 3 vmrglw <4,u,5,2>, <u,2,3,0>
+ 3765790243U, // <5,3,2,1>: Cost 4 vsldoi8 <1,2,5,3>, <2,1,3,5>
+ 3827976513U, // <5,3,2,2>: Cost 4 vsldoi12 <0,4,1,5>, <3,2,2,2>
+ 2765736268U, // <5,3,2,3>: Cost 3 vsldoi12 <2,3,4,5>, <3,2,3,4>
+ 2246248962U, // <5,3,2,4>: Cost 3 vmrghw <5,2,6,3>, <3,4,5,6>
+ 3765790563U, // <5,3,2,5>: Cost 4 vsldoi8 <1,2,5,3>, <2,5,3,1>
+ 3827976550U, // <5,3,2,6>: Cost 4 vsldoi12 <0,4,1,5>, <3,2,6,3>
+ 3842795887U, // <5,3,2,7>: Cost 4 vsldoi12 <2,u,4,5>, <3,2,7,3>
+ 2769054073U, // <5,3,2,u>: Cost 3 vsldoi12 <2,u,4,5>, <3,2,u,4>
+ 3827976575U, // <5,3,3,0>: Cost 4 vsldoi12 <0,4,1,5>, <3,3,0,1>
+ 3765790963U, // <5,3,3,1>: Cost 4 vsldoi8 <1,2,5,3>, <3,1,2,5>
+ 3839478162U, // <5,3,3,2>: Cost 4 vsldoi12 <2,3,4,5>, <3,3,2,2>
+ 2754234780U, // <5,3,3,3>: Cost 3 vsldoi12 <0,4,1,5>, <3,3,3,3>
+ 2771708327U, // <5,3,3,4>: Cost 3 vsldoi12 <3,3,4,5>, <3,3,4,5>
+ 3363137059U, // <5,3,3,5>: Cost 4 vmrglw <1,2,5,3>, <2,1,3,5>
+ 3375081320U, // <5,3,3,6>: Cost 4 vmrglw <3,2,5,3>, <2,5,3,6>
+ 3363137466U, // <5,3,3,7>: Cost 4 vmrglw <1,2,5,3>, <2,6,3,7>
+ 2772003275U, // <5,3,3,u>: Cost 3 vsldoi12 <3,3,u,5>, <3,3,u,5>
+ 2772077012U, // <5,3,4,0>: Cost 3 vsldoi12 <3,4,0,5>, <3,4,0,5>
+ 3765791714U, // <5,3,4,1>: Cost 4 vsldoi8 <1,2,5,3>, <4,1,5,0>
+ 2709965878U, // <5,3,4,2>: Cost 3 vsldoi8 <4,2,5,3>, <4,2,5,3>
+ 2772298223U, // <5,3,4,3>: Cost 3 vsldoi12 <3,4,3,5>, <3,4,3,5>
+ 2772371960U, // <5,3,4,4>: Cost 3 vsldoi12 <3,4,4,5>, <3,4,4,5>
+ 2754234882U, // <5,3,4,5>: Cost 3 vsldoi12 <0,4,1,5>, <3,4,5,6>
+ 3839478282U, // <5,3,4,6>: Cost 4 vsldoi12 <2,3,4,5>, <3,4,6,5>
+ 3376416698U, // <5,3,4,7>: Cost 4 vmrglw <3,4,5,4>, <2,6,3,7>
+ 2754234909U, // <5,3,4,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,4,u,6>
+ 2311308182U, // <5,3,5,0>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,0>
+ 3765792421U, // <5,3,5,1>: Cost 4 vsldoi8 <1,2,5,3>, <5,1,2,5>
+ 2715938575U, // <5,3,5,2>: Cost 3 vsldoi8 <5,2,5,3>, <5,2,5,3>
+ 2311308914U, // <5,3,5,3>: Cost 3 vmrglw <4,u,5,5>, <2,2,3,3>
+ 2311308186U, // <5,3,5,4>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,4>
+ 2248182354U, // <5,3,5,5>: Cost 3 vmrghw <5,5,5,5>, <3,5,5,5>
+ 3765792837U, // <5,3,5,6>: Cost 4 vsldoi8 <1,2,5,3>, <5,6,3,7>
+ 2311309242U, // <5,3,5,7>: Cost 3 vmrglw <4,u,5,5>, <2,6,3,7>
+ 2311308190U, // <5,3,5,u>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,u>
+ 2632777830U, // <5,3,6,0>: Cost 3 vsldoi4 <2,5,3,6>, LHS
+ 3706520372U, // <5,3,6,1>: Cost 4 vsldoi4 <2,5,3,6>, <1,1,1,1>
+ 2632779624U, // <5,3,6,2>: Cost 3 vsldoi4 <2,5,3,6>, <2,5,3,6>
+ 2632780290U, // <5,3,6,3>: Cost 3 vsldoi4 <2,5,3,6>, <3,4,5,6>
+ 2632781110U, // <5,3,6,4>: Cost 3 vsldoi4 <2,5,3,6>, RHS
+ 2248952413U, // <5,3,6,5>: Cost 3 vmrghw <5,6,7,0>, <3,5,6,7>
+ 2302691176U, // <5,3,6,6>: Cost 3 vmrglw <3,4,5,6>, <2,5,3,6>
+ 2302691258U, // <5,3,6,7>: Cost 3 vmrglw <3,4,5,6>, <2,6,3,7>
+ 2632783662U, // <5,3,6,u>: Cost 3 vsldoi4 <2,5,3,6>, LHS
+ 3365823382U, // <5,3,7,0>: Cost 4 vmrglw <1,6,5,7>, <1,2,3,0>
+ 3706529011U, // <5,3,7,1>: Cost 4 vsldoi4 <2,5,3,7>, <1,6,5,7>
+ 3706529641U, // <5,3,7,2>: Cost 4 vsldoi4 <2,5,3,7>, <2,5,3,7>
+ 3365824114U, // <5,3,7,3>: Cost 4 vmrglw <1,6,5,7>, <2,2,3,3>
+ 2774362859U, // <5,3,7,4>: Cost 3 vsldoi12 <3,7,4,5>, <3,7,4,5>
+ 3365824035U, // <5,3,7,5>: Cost 4 vmrglw <1,6,5,7>, <2,1,3,5>
+ 3383740183U, // <5,3,7,6>: Cost 4 vmrglw <4,6,5,7>, <2,4,3,6>
+ 3363833786U, // <5,3,7,7>: Cost 4 vmrglw <1,3,5,7>, <2,6,3,7>
+ 2774657807U, // <5,3,7,u>: Cost 3 vsldoi12 <3,7,u,5>, <3,7,u,5>
+ 2632794214U, // <5,3,u,0>: Cost 3 vsldoi4 <2,5,3,u>, LHS
+ 2754235166U, // <5,3,u,1>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,1,2>
+ 2632796010U, // <5,3,u,2>: Cost 3 vsldoi4 <2,5,3,u>, <2,5,3,u>
+ 2632796676U, // <5,3,u,3>: Cost 3 vsldoi4 <2,5,3,u>, <3,4,5,u>
+ 2632797494U, // <5,3,u,4>: Cost 3 vsldoi4 <2,5,3,u>, RHS
+ 2754235206U, // <5,3,u,5>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,5,6>
+ 2302691176U, // <5,3,u,6>: Cost 3 vmrglw <3,4,5,6>, <2,5,3,6>
+ 2302707642U, // <5,3,u,7>: Cost 3 vmrglw <3,4,5,u>, <2,6,3,7>
+ 2754235229U, // <5,3,u,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,u,2>
+ 3765133325U, // <5,4,0,0>: Cost 4 vsldoi8 <1,1,5,4>, <0,0,1,4>
+ 2705326182U, // <5,4,0,1>: Cost 3 vsldoi8 <3,4,5,4>, LHS
+ 3718489806U, // <5,4,0,2>: Cost 4 vsldoi4 <4,5,4,0>, <2,3,4,5>
+ 3718490624U, // <5,4,0,3>: Cost 4 vsldoi4 <4,5,4,0>, <3,4,5,4>
+ 2709307730U, // <5,4,0,4>: Cost 3 vsldoi8 <4,1,5,4>, <0,4,1,5>
+ 2302641870U, // <5,4,0,5>: Cost 3 vmrglw <3,4,5,0>, <2,3,4,5>
+ 3376383695U, // <5,4,0,6>: Cost 5 vmrglw <3,4,5,0>, <2,3,4,6>
+ 3384351018U, // <5,4,0,7>: Cost 4 vmrglw <4,7,5,0>, <u,7,4,7>
+ 2705326749U, // <5,4,0,u>: Cost 3 vsldoi8 <3,4,5,4>, LHS
+ 2305971057U, // <5,4,1,0>: Cost 3 vmrglw <4,0,5,1>, <6,7,4,0>
+ 3765134171U, // <5,4,1,1>: Cost 4 vsldoi8 <1,1,5,4>, <1,1,5,4>
+ 3766461338U, // <5,4,1,2>: Cost 4 vsldoi8 <1,3,5,4>, <1,2,3,4>
+ 3766461437U, // <5,4,1,3>: Cost 4 vsldoi8 <1,3,5,4>, <1,3,5,4>
+ 2311277776U, // <5,4,1,4>: Cost 3 vmrglw <4,u,5,1>, <4,4,4,4>
+ 2754235362U, // <5,4,1,5>: Cost 3 vsldoi12 <0,4,1,5>, <4,1,5,0>
+ 3783050483U, // <5,4,1,6>: Cost 4 vsldoi8 <4,1,5,4>, <1,6,5,7>
+ 3385019036U, // <5,4,1,7>: Cost 4 vmrglw <4,u,5,1>, <3,6,4,7>
+ 2311276241U, // <5,4,1,u>: Cost 3 vmrglw <4,u,5,1>, <2,3,4,u>
+ 3718504550U, // <5,4,2,0>: Cost 4 vsldoi4 <4,5,4,2>, LHS
+ 3783050787U, // <5,4,2,1>: Cost 4 vsldoi8 <4,1,5,4>, <2,1,3,5>
+ 3773097576U, // <5,4,2,2>: Cost 4 vsldoi8 <2,4,5,4>, <2,2,2,2>
+ 2705327822U, // <5,4,2,3>: Cost 3 vsldoi8 <3,4,5,4>, <2,3,4,5>
+ 3773097767U, // <5,4,2,4>: Cost 4 vsldoi8 <2,4,5,4>, <2,4,5,4>
+ 2765737014U, // <5,4,2,5>: Cost 3 vsldoi12 <2,3,4,5>, <4,2,5,3>
+ 3779069882U, // <5,4,2,6>: Cost 4 vsldoi8 <3,4,5,4>, <2,6,3,7>
+ 3376401052U, // <5,4,2,7>: Cost 5 vmrglw <3,4,5,2>, <3,6,4,7>
+ 2245881370U, // <5,4,2,u>: Cost 3 vmrghw <5,2,1,3>, <4,u,5,1>
+ 3779070102U, // <5,4,3,0>: Cost 4 vsldoi8 <3,4,5,4>, <3,0,1,2>
+ 3363135525U, // <5,4,3,1>: Cost 4 vmrglw <1,2,5,3>, <0,0,4,1>
+ 3779070284U, // <5,4,3,2>: Cost 4 vsldoi8 <3,4,5,4>, <3,2,3,4>
+ 3779070364U, // <5,4,3,3>: Cost 4 vsldoi8 <3,4,5,4>, <3,3,3,3>
+ 2705328640U, // <5,4,3,4>: Cost 3 vsldoi8 <3,4,5,4>, <3,4,5,4>
+ 2307311310U, // <5,4,3,5>: Cost 3 vmrglw <4,2,5,3>, <2,3,4,5>
+ 3866021012U, // <5,4,3,6>: Cost 4 vsldoi12 <6,7,4,5>, <4,3,6,7>
+ 3363138204U, // <5,4,3,7>: Cost 5 vmrglw <1,2,5,3>, <3,6,4,7>
+ 2707983172U, // <5,4,3,u>: Cost 3 vsldoi8 <3,u,5,4>, <3,u,5,4>
+ 2708646805U, // <5,4,4,0>: Cost 3 vsldoi8 <4,0,5,4>, <4,0,5,4>
+ 2709310438U, // <5,4,4,1>: Cost 3 vsldoi8 <4,1,5,4>, <4,1,5,4>
+ 3779071030U, // <5,4,4,2>: Cost 4 vsldoi8 <3,4,5,4>, <4,2,5,3>
+ 2710637704U, // <5,4,4,3>: Cost 3 vsldoi8 <4,3,5,4>, <4,3,5,4>
+ 2754235600U, // <5,4,4,4>: Cost 3 vsldoi12 <0,4,1,5>, <4,4,4,4>
+ 1704676570U, // <5,4,4,5>: Cost 2 vsldoi12 <4,4,5,5>, <4,4,5,5>
+ 3779071358U, // <5,4,4,6>: Cost 4 vsldoi8 <3,4,5,4>, <4,6,5,7>
+ 2713292236U, // <5,4,4,7>: Cost 3 vsldoi8 <4,7,5,4>, <4,7,5,4>
+ 1704897781U, // <5,4,4,u>: Cost 2 vsldoi12 <4,4,u,5>, <4,4,u,5>
+ 2626871398U, // <5,4,5,0>: Cost 3 vsldoi4 <1,5,4,5>, LHS
+ 2626872471U, // <5,4,5,1>: Cost 3 vsldoi4 <1,5,4,5>, <1,5,4,5>
+ 2765737230U, // <5,4,5,2>: Cost 3 vsldoi12 <2,3,4,5>, <4,5,2,3>
+ 3700615318U, // <5,4,5,3>: Cost 4 vsldoi4 <1,5,4,5>, <3,0,1,2>
+ 2626874678U, // <5,4,5,4>: Cost 3 vsldoi4 <1,5,4,5>, RHS
+ 1174441270U, // <5,4,5,5>: Cost 2 vmrghw <5,5,5,5>, RHS
+ 1680493878U, // <5,4,5,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 3385051804U, // <5,4,5,7>: Cost 4 vmrglw <4,u,5,5>, <3,6,4,7>
+ 1680493896U, // <5,4,5,u>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 2248952722U, // <5,4,6,0>: Cost 3 vmrghw <5,6,7,0>, <4,0,5,1>
+ 2302692152U, // <5,4,6,1>: Cost 3 vmrglw <3,4,5,6>, <3,u,4,1>
+ 3382406107U, // <5,4,6,2>: Cost 4 vmrglw <4,4,5,6>, <4,1,4,2>
+ 3700623874U, // <5,4,6,3>: Cost 4 vsldoi4 <1,5,4,6>, <3,4,5,6>
+ 2248953040U, // <5,4,6,4>: Cost 3 vmrghw <5,6,7,0>, <4,4,4,4>
+ 1175211318U, // <5,4,6,5>: Cost 2 vmrghw <5,6,7,0>, RHS
+ 3376432280U, // <5,4,6,6>: Cost 4 vmrglw <3,4,5,6>, <1,5,4,6>
+ 2729218934U, // <5,4,6,7>: Cost 3 vsldoi8 <7,4,5,4>, <6,7,4,5>
+ 1175211561U, // <5,4,6,u>: Cost 2 vmrghw <5,6,7,0>, RHS
+ 3787035642U, // <5,4,7,0>: Cost 4 vsldoi8 <4,7,5,4>, <7,0,1,2>
+ 3365822501U, // <5,4,7,1>: Cost 4 vmrglw <1,6,5,7>, <0,0,4,1>
+ 3808933085U, // <5,4,7,2>: Cost 4 vsldoi8 <u,4,5,4>, <7,2,u,4>
+ 3784381707U, // <5,4,7,3>: Cost 4 vsldoi8 <4,3,5,4>, <7,3,4,5>
+ 2713294182U, // <5,4,7,4>: Cost 3 vsldoi8 <4,7,5,4>, <7,4,5,6>
+ 2309998286U, // <5,4,7,5>: Cost 3 vmrglw <4,6,5,7>, <2,3,4,5>
+ 3383740111U, // <5,4,7,6>: Cost 4 vmrglw <4,6,5,7>, <2,3,4,6>
+ 3787036239U, // <5,4,7,7>: Cost 4 vsldoi8 <4,7,5,4>, <7,7,4,5>
+ 2731873960U, // <5,4,7,u>: Cost 3 vsldoi8 <7,u,5,4>, <7,u,5,4>
+ 2626895974U, // <5,4,u,0>: Cost 3 vsldoi4 <1,5,4,u>, LHS
+ 2626897050U, // <5,4,u,1>: Cost 3 vsldoi4 <1,5,4,u>, <1,5,4,u>
+ 2644813518U, // <5,4,u,2>: Cost 3 vsldoi4 <4,5,4,u>, <2,3,4,5>
+ 2705327822U, // <5,4,u,3>: Cost 3 vsldoi8 <3,4,5,4>, <2,3,4,5>
+ 2626899254U, // <5,4,u,4>: Cost 3 vsldoi4 <1,5,4,u>, RHS
+ 1707331102U, // <5,4,u,5>: Cost 2 vsldoi12 <4,u,5,5>, <4,u,5,5>
+ 1680494121U, // <5,4,u,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 2737183024U, // <5,4,u,7>: Cost 3 vsldoi8 <u,7,5,4>, <u,7,5,4>
+ 1680494139U, // <5,4,u,u>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 2302642684U, // <5,5,0,0>: Cost 3 vmrglw <3,4,5,0>, <3,4,5,0>
+ 1640218726U, // <5,5,0,1>: Cost 2 vsldoi8 <4,u,5,5>, LHS
+ 3376384510U, // <5,5,0,2>: Cost 4 vmrglw <3,4,5,0>, <3,4,5,2>
+ 3376385078U, // <5,5,0,3>: Cost 4 vmrglw <3,4,5,0>, <4,2,5,3>
+ 2754236002U, // <5,5,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <5,0,4,1>
+ 2717942242U, // <5,5,0,5>: Cost 3 vsldoi8 <5,5,5,5>, <0,5,u,5>
+ 2244907106U, // <5,5,0,6>: Cost 3 vmrghw <5,0,6,1>, <5,6,7,0>
+ 3376385406U, // <5,5,0,7>: Cost 4 vmrglw <3,4,5,0>, <4,6,5,7>
+ 1640219293U, // <5,5,0,u>: Cost 2 vsldoi8 <4,u,5,5>, LHS
+ 2305969365U, // <5,5,1,0>: Cost 3 vmrglw <4,0,5,1>, <4,4,5,0>
+ 1237536282U, // <5,5,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+ 2713961366U, // <5,5,1,2>: Cost 3 vsldoi8 <4,u,5,5>, <1,2,3,0>
+ 3766469630U, // <5,5,1,3>: Cost 4 vsldoi8 <1,3,5,5>, <1,3,5,5>
+ 2782326455U, // <5,5,1,4>: Cost 3 vsldoi12 <5,1,4,5>, <5,1,4,5>
+ 2311277786U, // <5,5,1,5>: Cost 3 vmrglw <4,u,5,1>, <4,4,5,5>
+ 2311277058U, // <5,5,1,6>: Cost 3 vmrglw <4,u,5,1>, <3,4,5,6>
+ 3385017587U, // <5,5,1,7>: Cost 4 vmrglw <4,u,5,1>, <1,6,5,7>
+ 1237536282U, // <5,5,1,u>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+ 3376400892U, // <5,5,2,0>: Cost 4 vmrglw <3,4,5,2>, <3,4,5,0>
+ 3827977963U, // <5,5,2,1>: Cost 4 vsldoi12 <0,4,1,5>, <5,2,1,3>
+ 2302659070U, // <5,5,2,2>: Cost 3 vmrglw <3,4,5,2>, <3,4,5,2>
+ 2765737726U, // <5,5,2,3>: Cost 3 vsldoi12 <2,3,4,5>, <5,2,3,4>
+ 3839479558U, // <5,5,2,4>: Cost 4 vsldoi12 <2,3,4,5>, <5,2,4,3>
+ 2781073167U, // <5,5,2,5>: Cost 3 vsldoi12 <4,u,5,5>, <5,2,5,3>
+ 2713962426U, // <5,5,2,6>: Cost 3 vsldoi8 <4,u,5,5>, <2,6,3,7>
+ 3376401790U, // <5,5,2,7>: Cost 4 vmrglw <3,4,5,2>, <4,6,5,7>
+ 2769055531U, // <5,5,2,u>: Cost 3 vsldoi12 <2,u,4,5>, <5,2,u,4>
+ 2713962646U, // <5,5,3,0>: Cost 3 vsldoi8 <4,u,5,5>, <3,0,1,2>
+ 3765143786U, // <5,5,3,1>: Cost 4 vsldoi8 <1,1,5,5>, <3,1,1,5>
+ 3839479621U, // <5,5,3,2>: Cost 4 vsldoi12 <2,3,4,5>, <5,3,2,3>
+ 2289394603U, // <5,5,3,3>: Cost 3 vmrglw <1,2,5,3>, <1,2,5,3>
+ 2713963010U, // <5,5,3,4>: Cost 3 vsldoi8 <4,u,5,5>, <3,4,5,6>
+ 2313285150U, // <5,5,3,5>: Cost 3 vmrglw <5,2,5,3>, <4,u,5,5>
+ 3363138050U, // <5,5,3,6>: Cost 4 vmrglw <1,2,5,3>, <3,4,5,6>
+ 3363136755U, // <5,5,3,7>: Cost 4 vmrglw <1,2,5,3>, <1,6,5,7>
+ 2713963294U, // <5,5,3,u>: Cost 3 vsldoi8 <4,u,5,5>, <3,u,1,2>
+ 2713963410U, // <5,5,4,0>: Cost 3 vsldoi8 <4,u,5,5>, <4,0,5,1>
+ 3827978127U, // <5,5,4,1>: Cost 4 vsldoi12 <0,4,1,5>, <5,4,1,5>
+ 3839479704U, // <5,5,4,2>: Cost 4 vsldoi12 <2,3,4,5>, <5,4,2,5>
+ 3376417846U, // <5,5,4,3>: Cost 4 vmrglw <3,4,5,4>, <4,2,5,3>
+ 1637567706U, // <5,5,4,4>: Cost 2 vsldoi8 <4,4,5,5>, <4,4,5,5>
+ 1640222006U, // <5,5,4,5>: Cost 2 vsldoi8 <4,u,5,5>, RHS
+ 2310640998U, // <5,5,4,6>: Cost 3 vmrglw <4,7,5,4>, <7,4,5,6>
+ 3376418174U, // <5,5,4,7>: Cost 4 vmrglw <3,4,5,4>, <4,6,5,7>
+ 1640222238U, // <5,5,4,u>: Cost 2 vsldoi8 <4,u,5,5>, <4,u,5,5>
+ 1577091174U, // <5,5,5,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS
+ 2311310226U, // <5,5,5,1>: Cost 3 vmrglw <4,u,5,5>, <4,0,5,1>
+ 2713964303U, // <5,5,5,2>: Cost 3 vsldoi8 <4,u,5,5>, <5,2,5,3>
+ 2311311119U, // <5,5,5,3>: Cost 3 vmrglw <4,u,5,5>, <5,2,5,3>
+ 1577094454U, // <5,5,5,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS
+ 296144182U, // <5,5,5,5>: Cost 1 vspltisw1 RHS
+ 2311309826U, // <5,5,5,6>: Cost 3 vmrglw <4,u,5,5>, <3,4,5,6>
+ 2311311447U, // <5,5,5,7>: Cost 3 vmrglw <4,u,5,5>, <5,6,5,7>
+ 296144182U, // <5,5,5,u>: Cost 1 vspltisw1 RHS
+ 2248953460U, // <5,5,6,0>: Cost 3 vmrghw <5,6,7,0>, <5,0,6,1>
+ 2326580114U, // <5,5,6,1>: Cost 3 vmrglw <7,4,5,6>, <4,0,5,1>
+ 2713965050U, // <5,5,6,2>: Cost 3 vsldoi8 <4,u,5,5>, <6,2,7,3>
+ 3700697602U, // <5,5,6,3>: Cost 4 vsldoi4 <1,5,5,6>, <3,4,5,6>
+ 2785644620U, // <5,5,6,4>: Cost 3 vsldoi12 <5,6,4,5>, <5,6,4,5>
+ 2781073495U, // <5,5,6,5>: Cost 3 vsldoi12 <4,u,5,5>, <5,6,5,7>
+ 1228950018U, // <5,5,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 2713965390U, // <5,5,6,7>: Cost 3 vsldoi8 <4,u,5,5>, <6,7,0,1>
+ 1228950018U, // <5,5,6,u>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 2713965562U, // <5,5,7,0>: Cost 3 vsldoi8 <4,u,5,5>, <7,0,1,2>
+ 3383741330U, // <5,5,7,1>: Cost 4 vmrglw <4,6,5,7>, <4,0,5,1>
+ 3718620878U, // <5,5,7,2>: Cost 4 vsldoi4 <4,5,5,7>, <2,3,4,5>
+ 3365823403U, // <5,5,7,3>: Cost 4 vmrglw <1,6,5,7>, <1,2,5,3>
+ 2713965926U, // <5,5,7,4>: Cost 3 vsldoi8 <4,u,5,5>, <7,4,5,6>
+ 2717947318U, // <5,5,7,5>: Cost 3 vsldoi8 <5,5,5,5>, <7,5,5,5>
+ 3365825026U, // <5,5,7,6>: Cost 4 vmrglw <1,6,5,7>, <3,4,5,6>
+ 2292081907U, // <5,5,7,7>: Cost 3 vmrglw <1,6,5,7>, <1,6,5,7>
+ 2713966210U, // <5,5,7,u>: Cost 3 vsldoi8 <4,u,5,5>, <7,u,1,2>
+ 1577091174U, // <5,5,u,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS
+ 1640224558U, // <5,5,u,1>: Cost 2 vsldoi8 <4,u,5,5>, LHS
+ 2713966469U, // <5,5,u,2>: Cost 3 vsldoi8 <4,u,5,5>, <u,2,3,0>
+ 2713966524U, // <5,5,u,3>: Cost 3 vsldoi8 <4,u,5,5>, <u,3,0,1>
+ 1577094454U, // <5,5,u,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS
+ 296144182U, // <5,5,u,5>: Cost 1 vspltisw1 RHS
+ 1228950018U, // <5,5,u,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 2713966848U, // <5,5,u,7>: Cost 3 vsldoi8 <4,u,5,5>, <u,7,0,1>
+ 296144182U, // <5,5,u,u>: Cost 1 vspltisw1 RHS
+ 2705342464U, // <5,6,0,0>: Cost 3 vsldoi8 <3,4,5,6>, <0,0,0,0>
+ 1631600742U, // <5,6,0,1>: Cost 2 vsldoi8 <3,4,5,6>, LHS
+ 3773112493U, // <5,6,0,2>: Cost 4 vsldoi8 <2,4,5,6>, <0,2,1,2>
+ 2705342720U, // <5,6,0,3>: Cost 3 vsldoi8 <3,4,5,6>, <0,3,1,4>
+ 2705342802U, // <5,6,0,4>: Cost 3 vsldoi8 <3,4,5,6>, <0,4,1,5>
+ 3779084708U, // <5,6,0,5>: Cost 4 vsldoi8 <3,4,5,6>, <0,5,1,6>
+ 3779084790U, // <5,6,0,6>: Cost 4 vsldoi8 <3,4,5,6>, <0,6,1,7>
+ 2302643510U, // <5,6,0,7>: Cost 3 vmrglw <3,4,5,0>, RHS
+ 1631601309U, // <5,6,0,u>: Cost 2 vsldoi8 <3,4,5,6>, LHS
+ 3767141092U, // <5,6,1,0>: Cost 4 vsldoi8 <1,4,5,6>, <1,0,1,2>
+ 2705343284U, // <5,6,1,1>: Cost 3 vsldoi8 <3,4,5,6>, <1,1,1,1>
+ 2705343382U, // <5,6,1,2>: Cost 3 vsldoi8 <3,4,5,6>, <1,2,3,0>
+ 3779085282U, // <5,6,1,3>: Cost 4 vsldoi8 <3,4,5,6>, <1,3,2,4>
+ 2693399632U, // <5,6,1,4>: Cost 3 vsldoi8 <1,4,5,6>, <1,4,5,6>
+ 3767805089U, // <5,6,1,5>: Cost 4 vsldoi8 <1,5,5,6>, <1,5,5,6>
+ 2311279416U, // <5,6,1,6>: Cost 3 vmrglw <4,u,5,1>, <6,6,6,6>
+ 1237536054U, // <5,6,1,7>: Cost 2 vmrglw <4,u,5,1>, RHS
+ 1237536055U, // <5,6,1,u>: Cost 2 vmrglw <4,u,5,1>, RHS
+ 3773113789U, // <5,6,2,0>: Cost 4 vsldoi8 <2,4,5,6>, <2,0,1,2>
+ 3779085855U, // <5,6,2,1>: Cost 4 vsldoi8 <3,4,5,6>, <2,1,3,1>
+ 2699372136U, // <5,6,2,2>: Cost 3 vsldoi8 <2,4,5,6>, <2,2,2,2>
+ 2705344166U, // <5,6,2,3>: Cost 3 vsldoi8 <3,4,5,6>, <2,3,0,1>
+ 2699372329U, // <5,6,2,4>: Cost 3 vsldoi8 <2,4,5,6>, <2,4,5,6>
+ 2705344360U, // <5,6,2,5>: Cost 3 vsldoi8 <3,4,5,6>, <2,5,3,6>
+ 2705344442U, // <5,6,2,6>: Cost 3 vsldoi8 <3,4,5,6>, <2,6,3,7>
+ 2302659894U, // <5,6,2,7>: Cost 3 vmrglw <3,4,5,2>, RHS
+ 2702026861U, // <5,6,2,u>: Cost 3 vsldoi8 <2,u,5,6>, <2,u,5,6>
+ 2705344662U, // <5,6,3,0>: Cost 3 vsldoi8 <3,4,5,6>, <3,0,1,2>
+ 3767142661U, // <5,6,3,1>: Cost 4 vsldoi8 <1,4,5,6>, <3,1,4,5>
+ 3773114689U, // <5,6,3,2>: Cost 4 vsldoi8 <2,4,5,6>, <3,2,2,2>
+ 2705344924U, // <5,6,3,3>: Cost 3 vsldoi8 <3,4,5,6>, <3,3,3,3>
+ 1631603202U, // <5,6,3,4>: Cost 2 vsldoi8 <3,4,5,6>, <3,4,5,6>
+ 3842945597U, // <5,6,3,5>: Cost 4 vsldoi12 <2,u,6,5>, <6,3,5,7>
+ 3779086962U, // <5,6,3,6>: Cost 4 vsldoi8 <3,4,5,6>, <3,6,0,1>
+ 2289397046U, // <5,6,3,7>: Cost 3 vmrglw <1,2,5,3>, RHS
+ 1634257734U, // <5,6,3,u>: Cost 2 vsldoi8 <3,u,5,6>, <3,u,5,6>
+ 2644926566U, // <5,6,4,0>: Cost 3 vsldoi4 <4,5,6,4>, LHS
+ 3779087306U, // <5,6,4,1>: Cost 4 vsldoi8 <3,4,5,6>, <4,1,2,3>
+ 2790142577U, // <5,6,4,2>: Cost 3 vsldoi12 <6,4,2,5>, <6,4,2,5>
+ 2644929026U, // <5,6,4,3>: Cost 3 vsldoi4 <4,5,6,4>, <3,4,5,6>
+ 2711317723U, // <5,6,4,4>: Cost 3 vsldoi8 <4,4,5,6>, <4,4,5,6>
+ 1631604022U, // <5,6,4,5>: Cost 2 vsldoi8 <3,4,5,6>, RHS
+ 2712644989U, // <5,6,4,6>: Cost 3 vsldoi8 <4,6,5,6>, <4,6,5,6>
+ 2302676278U, // <5,6,4,7>: Cost 3 vmrglw <3,4,5,4>, RHS
+ 1631604265U, // <5,6,4,u>: Cost 2 vsldoi8 <3,4,5,6>, RHS
+ 3842945708U, // <5,6,5,0>: Cost 4 vsldoi12 <2,u,6,5>, <6,5,0,1>
+ 3767144133U, // <5,6,5,1>: Cost 4 vsldoi8 <1,4,5,6>, <5,1,6,1>
+ 2705346328U, // <5,6,5,2>: Cost 3 vsldoi8 <3,4,5,6>, <5,2,6,3>
+ 3779088207U, // <5,6,5,3>: Cost 4 vsldoi8 <3,4,5,6>, <5,3,3,4>
+ 2717290420U, // <5,6,5,4>: Cost 3 vsldoi8 <5,4,5,6>, <5,4,5,6>
+ 2705346574U, // <5,6,5,5>: Cost 3 vsldoi8 <3,4,5,6>, <5,5,6,6>
+ 2705346596U, // <5,6,5,6>: Cost 3 vsldoi8 <3,4,5,6>, <5,6,0,1>
+ 1237568822U, // <5,6,5,7>: Cost 2 vmrglw <4,u,5,5>, RHS
+ 1237568823U, // <5,6,5,u>: Cost 2 vmrglw <4,u,5,5>, RHS
+ 2650914918U, // <5,6,6,0>: Cost 3 vsldoi4 <5,5,6,6>, LHS
+ 3364490949U, // <5,6,6,1>: Cost 4 vmrglw <1,4,5,6>, <5,1,6,1>
+ 2248954362U, // <5,6,6,2>: Cost 3 vmrghw <5,6,7,0>, <6,2,7,3>
+ 2302693144U, // <5,6,6,3>: Cost 3 vmrglw <3,4,5,6>, <5,2,6,3>
+ 2650918198U, // <5,6,6,4>: Cost 3 vsldoi4 <5,5,6,6>, RHS
+ 2650918926U, // <5,6,6,5>: Cost 3 vsldoi4 <5,5,6,6>, <5,5,6,6>
+ 2302693390U, // <5,6,6,6>: Cost 3 vmrglw <3,4,5,6>, <5,5,6,6>
+ 1228950838U, // <5,6,6,7>: Cost 2 vmrglw <3,4,5,6>, RHS
+ 1228950839U, // <5,6,6,u>: Cost 2 vmrglw <3,4,5,6>, RHS
+ 497467494U, // <5,6,7,0>: Cost 1 vsldoi4 RHS, LHS
+ 1571210036U, // <5,6,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1>
+ 1571210856U, // <5,6,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1571211414U, // <5,6,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 497470774U, // <5,6,7,4>: Cost 1 vsldoi4 RHS, RHS
+ 1571213316U, // <5,6,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5>
+ 1571213818U, // <5,6,7,6>: Cost 2 vsldoi4 RHS, <6,2,7,3>
+ 1571214956U, // <5,6,7,7>: Cost 2 vsldoi4 RHS, <7,7,7,7>
+ 497473326U, // <5,6,7,u>: Cost 1 vsldoi4 RHS, LHS
+ 497475686U, // <5,6,u,0>: Cost 1 vsldoi4 RHS, LHS
+ 1631606574U, // <5,6,u,1>: Cost 2 vsldoi8 <3,4,5,6>, LHS
+ 1571219048U, // <5,6,u,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1571219606U, // <5,6,u,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 497478967U, // <5,6,u,4>: Cost 1 vsldoi4 RHS, RHS
+ 1631606938U, // <5,6,u,5>: Cost 2 vsldoi8 <3,4,5,6>, RHS
+ 1571222010U, // <5,6,u,6>: Cost 2 vsldoi4 RHS, <6,2,7,3>
+ 1228967222U, // <5,6,u,7>: Cost 2 vmrglw <3,4,5,u>, RHS
+ 497481518U, // <5,6,u,u>: Cost 1 vsldoi4 RHS, LHS
+ 3768475648U, // <5,7,0,0>: Cost 4 vsldoi8 <1,6,5,7>, <0,0,0,0>
+ 2694733926U, // <5,7,0,1>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+ 3718711395U, // <5,7,0,2>: Cost 4 vsldoi4 <4,5,7,0>, <2,u,4,5>
+ 3384349178U, // <5,7,0,3>: Cost 4 vmrglw <4,7,5,0>, <6,2,7,3>
+ 2694734162U, // <5,7,0,4>: Cost 3 vsldoi8 <1,6,5,7>, <0,4,1,5>
+ 3384347884U, // <5,7,0,5>: Cost 4 vmrglw <4,7,5,0>, <4,4,7,5>
+ 3730658026U, // <5,7,0,6>: Cost 4 vsldoi4 <6,5,7,0>, <6,5,7,0>
+ 3718714362U, // <5,7,0,7>: Cost 4 vsldoi4 <4,5,7,0>, <7,0,1,2>
+ 2694734493U, // <5,7,0,u>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+ 2311278690U, // <5,7,1,0>: Cost 3 vmrglw <4,u,5,1>, <5,6,7,0>
+ 2305970923U, // <5,7,1,1>: Cost 3 vmrglw <4,0,5,1>, <6,5,7,1>
+ 3768476566U, // <5,7,1,2>: Cost 4 vsldoi8 <1,6,5,7>, <1,2,3,0>
+ 2311279098U, // <5,7,1,3>: Cost 3 vmrglw <4,u,5,1>, <6,2,7,3>
+ 2311278694U, // <5,7,1,4>: Cost 3 vmrglw <4,u,5,1>, <5,6,7,4>
+ 3768476783U, // <5,7,1,5>: Cost 4 vsldoi8 <1,6,5,7>, <1,5,0,1>
+ 2694735091U, // <5,7,1,6>: Cost 3 vsldoi8 <1,6,5,7>, <1,6,5,7>
+ 2311279426U, // <5,7,1,7>: Cost 3 vmrglw <4,u,5,1>, <6,6,7,7>
+ 2696062357U, // <5,7,1,u>: Cost 3 vsldoi8 <1,u,5,7>, <1,u,5,7>
+ 3383701602U, // <5,7,2,0>: Cost 4 vmrglw <4,6,5,2>, <5,6,7,0>
+ 3768477219U, // <5,7,2,1>: Cost 4 vsldoi8 <1,6,5,7>, <2,1,3,5>
+ 3768477288U, // <5,7,2,2>: Cost 4 vsldoi8 <1,6,5,7>, <2,2,2,2>
+ 2309960186U, // <5,7,2,3>: Cost 3 vmrglw <4,6,5,2>, <6,2,7,3>
+ 3383701606U, // <5,7,2,4>: Cost 4 vmrglw <4,6,5,2>, <5,6,7,4>
+ 3768477545U, // <5,7,2,5>: Cost 4 vsldoi8 <1,6,5,7>, <2,5,3,7>
+ 3766486970U, // <5,7,2,6>: Cost 4 vsldoi8 <1,3,5,7>, <2,6,3,7>
+ 3383702338U, // <5,7,2,7>: Cost 4 vmrglw <4,6,5,2>, <6,6,7,7>
+ 2309960186U, // <5,7,2,u>: Cost 3 vmrglw <4,6,5,2>, <6,2,7,3>
+ 3768477846U, // <5,7,3,0>: Cost 4 vsldoi8 <1,6,5,7>, <3,0,1,2>
+ 3768477975U, // <5,7,3,1>: Cost 4 vsldoi8 <1,6,5,7>, <3,1,6,5>
+ 3786393932U, // <5,7,3,2>: Cost 4 vsldoi8 <4,6,5,7>, <3,2,3,4>
+ 3768478108U, // <5,7,3,3>: Cost 4 vsldoi8 <1,6,5,7>, <3,3,3,3>
+ 2795599115U, // <5,7,3,4>: Cost 3 vsldoi12 <7,3,4,5>, <7,3,4,5>
+ 3385037470U, // <5,7,3,5>: Cost 4 vmrglw <4,u,5,3>, <6,4,7,5>
+ 3780422309U, // <5,7,3,6>: Cost 4 vsldoi8 <3,6,5,7>, <3,6,5,7>
+ 3848107301U, // <5,7,3,7>: Cost 4 vsldoi12 <3,7,4,5>, <7,3,7,4>
+ 2795894063U, // <5,7,3,u>: Cost 3 vsldoi12 <7,3,u,5>, <7,3,u,5>
+ 2795967800U, // <5,7,4,0>: Cost 3 vsldoi12 <7,4,0,5>, <7,4,0,5>
+ 3768478690U, // <5,7,4,1>: Cost 4 vsldoi8 <1,6,5,7>, <4,1,5,0>
+ 3718744163U, // <5,7,4,2>: Cost 4 vsldoi4 <4,5,7,4>, <2,u,4,5>
+ 3784404107U, // <5,7,4,3>: Cost 4 vsldoi8 <4,3,5,7>, <4,3,5,7>
+ 2796262748U, // <5,7,4,4>: Cost 3 vsldoi12 <7,4,4,5>, <7,4,4,5>
+ 2694737206U, // <5,7,4,5>: Cost 3 vsldoi8 <1,6,5,7>, RHS
+ 2712653182U, // <5,7,4,6>: Cost 3 vsldoi8 <4,6,5,7>, <4,6,5,7>
+ 2713316815U, // <5,7,4,7>: Cost 3 vsldoi8 <4,7,5,7>, <4,7,5,7>
+ 2694737449U, // <5,7,4,u>: Cost 3 vsldoi8 <1,6,5,7>, RHS
+ 2311311458U, // <5,7,5,0>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,0>
+ 3768479433U, // <5,7,5,1>: Cost 4 vsldoi8 <1,6,5,7>, <5,1,6,5>
+ 3768479521U, // <5,7,5,2>: Cost 4 vsldoi8 <1,6,5,7>, <5,2,7,3>
+ 2311311866U, // <5,7,5,3>: Cost 3 vmrglw <4,u,5,5>, <6,2,7,3>
+ 2311311462U, // <5,7,5,4>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,4>
+ 2248185270U, // <5,7,5,5>: Cost 3 vmrghw <5,5,5,5>, <7,5,5,5>
+ 2718625879U, // <5,7,5,6>: Cost 3 vsldoi8 <5,6,5,7>, <5,6,5,7>
+ 2311312194U, // <5,7,5,7>: Cost 3 vmrglw <4,u,5,5>, <6,6,7,7>
+ 2311311466U, // <5,7,5,u>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,u>
+ 2248954874U, // <5,7,6,0>: Cost 3 vmrghw <5,6,7,0>, <7,0,1,2>
+ 3322696778U, // <5,7,6,1>: Cost 4 vmrghw <5,6,7,0>, <7,1,1,1>
+ 2248955028U, // <5,7,6,2>: Cost 3 vmrghw <5,6,7,0>, <7,2,0,3>
+ 2656963074U, // <5,7,6,3>: Cost 3 vsldoi4 <6,5,7,6>, <3,4,5,6>
+ 2248955238U, // <5,7,6,4>: Cost 3 vmrghw <5,6,7,0>, <7,4,5,6>
+ 2248955329U, // <5,7,6,5>: Cost 3 vmrghw <5,6,7,0>, <7,5,6,7>
+ 2656965360U, // <5,7,6,6>: Cost 3 vsldoi4 <6,5,7,6>, <6,5,7,6>
+ 2248955500U, // <5,7,6,7>: Cost 3 vmrghw <5,6,7,0>, <7,7,7,7>
+ 2248955522U, // <5,7,6,u>: Cost 3 vmrghw <5,6,7,0>, <7,u,1,2>
+ 3718766694U, // <5,7,7,0>: Cost 4 vsldoi4 <4,5,7,7>, LHS
+ 3724739827U, // <5,7,7,1>: Cost 4 vsldoi4 <5,5,7,7>, <1,6,5,7>
+ 3718768739U, // <5,7,7,2>: Cost 4 vsldoi4 <4,5,7,7>, <2,u,4,5>
+ 3365826337U, // <5,7,7,3>: Cost 4 vmrglw <1,6,5,7>, <5,2,7,3>
+ 2798253647U, // <5,7,7,4>: Cost 3 vsldoi12 <7,7,4,5>, <7,7,4,5>
+ 3365826258U, // <5,7,7,5>: Cost 4 vmrglw <1,6,5,7>, <5,1,7,5>
+ 3730715377U, // <5,7,7,6>: Cost 4 vsldoi4 <6,5,7,7>, <6,5,7,7>
+ 2310665836U, // <5,7,7,7>: Cost 3 vmrglw <4,7,5,7>, <7,7,7,7>
+ 2798548595U, // <5,7,7,u>: Cost 3 vsldoi12 <7,7,u,5>, <7,7,u,5>
+ 2311336034U, // <5,7,u,0>: Cost 3 vmrglw <4,u,5,u>, <5,6,7,0>
+ 2694739758U, // <5,7,u,1>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+ 2248955028U, // <5,7,u,2>: Cost 3 vmrghw <5,6,7,0>, <7,2,0,3>
+ 2311336442U, // <5,7,u,3>: Cost 3 vmrglw <4,u,5,u>, <6,2,7,3>
+ 2311336038U, // <5,7,u,4>: Cost 3 vmrglw <4,u,5,u>, <5,6,7,4>
+ 2694740122U, // <5,7,u,5>: Cost 3 vsldoi8 <1,6,5,7>, RHS
+ 2656981746U, // <5,7,u,6>: Cost 3 vsldoi4 <6,5,7,u>, <6,5,7,u>
+ 2311336770U, // <5,7,u,7>: Cost 3 vmrglw <4,u,5,u>, <6,6,7,7>
+ 2694740325U, // <5,7,u,u>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+ 2705358848U, // <5,u,0,0>: Cost 3 vsldoi8 <3,4,5,u>, <0,0,0,0>
+ 1631617126U, // <5,u,0,1>: Cost 2 vsldoi8 <3,4,5,u>, LHS
+ 2310607866U, // <5,u,0,2>: Cost 3 vmrglw <4,7,5,0>, <7,0,1,2>
+ 2302640284U, // <5,u,0,3>: Cost 3 vmrglw <3,4,5,0>, LHS
+ 2754238189U, // <5,u,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <u,0,4,1>
+ 2305296114U, // <5,u,0,5>: Cost 3 vmrglw <3,u,5,0>, <2,3,u,5>
+ 2244907106U, // <5,u,0,6>: Cost 3 vmrghw <5,0,6,1>, <5,6,7,0>
+ 2302643528U, // <5,u,0,7>: Cost 3 vmrglw <3,4,5,0>, RHS
+ 1631617693U, // <5,u,0,u>: Cost 2 vsldoi8 <3,4,5,u>, LHS
+ 2627133542U, // <5,u,1,0>: Cost 3 vsldoi4 <1,5,u,1>, LHS
+ 1237536282U, // <5,u,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+ 1680496430U, // <5,u,1,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 1237532828U, // <5,u,1,3>: Cost 2 vmrglw <4,u,5,1>, LHS
+ 2693416018U, // <5,u,1,4>: Cost 3 vsldoi8 <1,4,5,u>, <1,4,5,u>
+ 2756892486U, // <5,u,1,5>: Cost 3 vsldoi12 <0,u,1,5>, <u,1,5,0>
+ 2694743284U, // <5,u,1,6>: Cost 3 vsldoi8 <1,6,5,u>, <1,6,5,u>
+ 1237536072U, // <5,u,1,7>: Cost 2 vmrglw <4,u,5,1>, RHS
+ 1680496484U, // <5,u,1,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 2311288709U, // <5,u,2,0>: Cost 3 vmrglw <4,u,5,2>, <u,2,3,0>
+ 2245883694U, // <5,u,2,1>: Cost 3 vmrghw <5,2,1,3>, LHS
+ 2699388520U, // <5,u,2,2>: Cost 3 vsldoi8 <2,4,5,u>, <2,2,2,2>
+ 2754238344U, // <5,u,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <u,2,3,3>
+ 2699388715U, // <5,u,2,4>: Cost 3 vsldoi8 <2,4,5,u>, <2,4,5,u>
+ 2757408666U, // <5,u,2,5>: Cost 3 vsldoi12 <0,u,u,5>, <u,2,5,3>
+ 2705360826U, // <5,u,2,6>: Cost 3 vsldoi8 <3,4,5,u>, <2,6,3,7>
+ 2302659912U, // <5,u,2,7>: Cost 3 vmrglw <3,4,5,2>, RHS
+ 2754238389U, // <5,u,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <u,2,u,3>
+ 2754238396U, // <5,u,3,0>: Cost 3 vsldoi12 <0,4,1,5>, <u,3,0,1>
+ 3827980229U, // <5,u,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <u,3,1,1>
+ 2644625102U, // <5,u,3,2>: Cost 3 vsldoi4 <4,5,2,3>, <2,3,4,5>
+ 2289393820U, // <5,u,3,3>: Cost 3 vmrglw <1,2,5,3>, LHS
+ 1631619588U, // <5,u,3,4>: Cost 2 vsldoi8 <3,4,5,u>, <3,4,5,u>
+ 2785056749U, // <5,u,3,5>: Cost 3 vsldoi12 <5,5,5,5>, <u,3,5,5>
+ 3363138077U, // <5,u,3,6>: Cost 4 vmrglw <1,2,5,3>, <3,4,u,6>
+ 2289397064U, // <5,u,3,7>: Cost 3 vmrglw <1,2,5,3>, RHS
+ 1634274120U, // <5,u,3,u>: Cost 2 vsldoi8 <3,u,5,u>, <3,u,5,u>
+ 1634937753U, // <5,u,4,0>: Cost 2 vsldoi8 <4,0,5,u>, <4,0,5,u>
+ 1728272410U, // <5,u,4,1>: Cost 2 vsldoi12 <u,4,1,5>, <u,4,1,5>
+ 2710006843U, // <5,u,4,2>: Cost 3 vsldoi8 <4,2,5,u>, <4,2,5,u>
+ 2765740076U, // <5,u,4,3>: Cost 3 vsldoi12 <2,3,4,5>, <u,4,3,5>
+ 1637592285U, // <5,u,4,4>: Cost 2 vsldoi8 <4,4,5,u>, <4,4,5,u>
+ 1631620406U, // <5,u,4,5>: Cost 2 vsldoi8 <3,4,5,u>, RHS
+ 2712661375U, // <5,u,4,6>: Cost 3 vsldoi8 <4,6,5,u>, <4,6,5,u>
+ 2302676296U, // <5,u,4,7>: Cost 3 vmrglw <3,4,5,4>, RHS
+ 1631620649U, // <5,u,4,u>: Cost 2 vsldoi8 <3,4,5,u>, RHS
+ 1577091174U, // <5,u,5,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS
+ 1174443822U, // <5,u,5,1>: Cost 2 vmrghw <5,5,5,5>, LHS
+ 2766035058U, // <5,u,5,2>: Cost 3 vsldoi12 <2,3,u,5>, <u,5,2,3>
+ 1237565596U, // <5,u,5,3>: Cost 2 vmrglw <4,u,5,5>, LHS
+ 1577094454U, // <5,u,5,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS
+ 296144182U, // <5,u,5,5>: Cost 1 vspltisw1 RHS
+ 1680496794U, // <5,u,5,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 1237568840U, // <5,u,5,7>: Cost 2 vmrglw <4,u,5,5>, RHS
+ 296144182U, // <5,u,5,u>: Cost 1 vspltisw1 RHS
+ 2633146470U, // <5,u,6,0>: Cost 3 vsldoi4 <2,5,u,6>, LHS
+ 1175213870U, // <5,u,6,1>: Cost 2 vmrghw <5,6,7,0>, LHS
+ 2633148309U, // <5,u,6,2>: Cost 3 vsldoi4 <2,5,u,6>, <2,5,u,6>
+ 1228947612U, // <5,u,6,3>: Cost 2 vmrglw <3,4,5,6>, LHS
+ 2633149750U, // <5,u,6,4>: Cost 3 vsldoi4 <2,5,u,6>, RHS
+ 1175214234U, // <5,u,6,5>: Cost 2 vmrghw <5,6,7,0>, RHS
+ 1228950018U, // <5,u,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 1228950856U, // <5,u,6,7>: Cost 2 vmrglw <3,4,5,6>, RHS
+ 1228947617U, // <5,u,6,u>: Cost 2 vmrglw <3,4,5,6>, LHS
+ 497614950U, // <5,u,7,0>: Cost 1 vsldoi4 RHS, LHS
+ 1571357492U, // <5,u,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1>
+ 1571358312U, // <5,u,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1571358870U, // <5,u,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 497618248U, // <5,u,7,4>: Cost 1 vsldoi4 RHS, RHS
+ 1571360772U, // <5,u,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5>
+ 1571361274U, // <5,u,7,6>: Cost 2 vsldoi4 RHS, <6,2,7,3>
+ 1571361786U, // <5,u,7,7>: Cost 2 vsldoi4 RHS, <7,0,1,2>
+ 497620782U, // <5,u,7,u>: Cost 1 vsldoi4 RHS, LHS
+ 497623142U, // <5,u,u,0>: Cost 1 vsldoi4 RHS, LHS
+ 1631622958U, // <5,u,u,1>: Cost 2 vsldoi8 <3,4,5,u>, LHS
+ 1680496997U, // <5,u,u,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 1228963996U, // <5,u,u,3>: Cost 2 vmrglw <3,4,5,u>, LHS
+ 497626441U, // <5,u,u,4>: Cost 1 vsldoi4 RHS, RHS
+ 296144182U, // <5,u,u,5>: Cost 1 vspltisw1 RHS
+ 1680497037U, // <5,u,u,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 1228967240U, // <5,u,u,7>: Cost 2 vmrglw <3,4,5,u>, RHS
+ 497628974U, // <5,u,u,u>: Cost 1 vsldoi4 RHS, LHS
+ 2772451328U, // <6,0,0,0>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,0,0>
+ 2772451338U, // <6,0,0,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,1,1>
+ 3771146417U, // <6,0,0,2>: Cost 4 vsldoi8 <2,1,6,0>, <0,2,1,6>
+ 3383095739U, // <6,0,0,3>: Cost 4 vmrglw <4,5,6,0>, <6,2,0,3>
+ 3846193189U, // <6,0,0,4>: Cost 4 vsldoi12 <3,4,5,6>, <0,0,4,1>
+ 3724832803U, // <6,0,0,5>: Cost 4 vsldoi4 <5,6,0,0>, <5,6,0,0>
+ 3383095985U, // <6,0,0,6>: Cost 4 vmrglw <4,5,6,0>, <6,5,0,6>
+ 3383096067U, // <6,0,0,7>: Cost 4 vmrglw <4,5,6,0>, <6,6,0,7>
+ 2772451401U, // <6,0,0,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,u,1>
+ 2651095142U, // <6,0,1,0>: Cost 3 vsldoi4 <5,6,0,1>, LHS
+ 2251612262U, // <6,0,1,1>: Cost 3 vmrghw <6,1,7,1>, LHS
+ 1698709606U, // <6,0,1,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2651097602U, // <6,0,1,3>: Cost 3 vsldoi4 <5,6,0,1>, <3,4,5,6>
+ 2651098422U, // <6,0,1,4>: Cost 3 vsldoi4 <5,6,0,1>, RHS
+ 2651099172U, // <6,0,1,5>: Cost 3 vsldoi4 <5,6,0,1>, <5,6,0,1>
+ 2657071869U, // <6,0,1,6>: Cost 3 vsldoi4 <6,6,0,1>, <6,6,0,1>
+ 3724841978U, // <6,0,1,7>: Cost 4 vsldoi4 <5,6,0,1>, <7,0,1,2>
+ 1698709660U, // <6,0,1,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2252292096U, // <6,0,2,0>: Cost 3 vmrghw <6,2,7,3>, <0,0,0,0>
+ 1178550374U, // <6,0,2,1>: Cost 2 vmrghw <6,2,7,3>, LHS
+ 3826655418U, // <6,0,2,2>: Cost 4 vsldoi12 <0,2,1,6>, <0,2,2,6>
+ 3777783485U, // <6,0,2,3>: Cost 4 vsldoi8 <3,2,6,0>, <2,3,2,6>
+ 2252292434U, // <6,0,2,4>: Cost 3 vmrghw <6,2,7,3>, <0,4,1,5>
+ 3785746280U, // <6,0,2,5>: Cost 4 vsldoi8 <4,5,6,0>, <2,5,3,6>
+ 2252292593U, // <6,0,2,6>: Cost 3 vmrghw <6,2,7,3>, <0,6,1,2>
+ 3736794583U, // <6,0,2,7>: Cost 4 vsldoi4 <7,6,0,2>, <7,6,0,2>
+ 1178550941U, // <6,0,2,u>: Cost 2 vmrghw <6,2,7,3>, LHS
+ 3375153152U, // <6,0,3,0>: Cost 4 vmrglw <3,2,6,3>, <0,0,0,0>
+ 2772451584U, // <6,0,3,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,3,1,4>
+ 3777784163U, // <6,0,3,2>: Cost 4 vsldoi8 <3,2,6,0>, <3,2,6,0>
+ 3846193426U, // <6,0,3,3>: Cost 4 vsldoi12 <3,4,5,6>, <0,3,3,4>
+ 2712005122U, // <6,0,3,4>: Cost 3 vsldoi8 <4,5,6,0>, <3,4,5,6>
+ 3724857382U, // <6,0,3,5>: Cost 4 vsldoi4 <5,6,0,3>, <5,6,0,3>
+ 3802335864U, // <6,0,3,6>: Cost 4 vsldoi8 <7,3,6,0>, <3,6,0,7>
+ 3801672410U, // <6,0,3,7>: Cost 4 vsldoi8 <7,2,6,0>, <3,7,2,6>
+ 2772451647U, // <6,0,3,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,3,u,4>
+ 3383123968U, // <6,0,4,0>: Cost 4 vmrglw <4,5,6,4>, <0,0,0,0>
+ 2772451666U, // <6,0,4,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,4,1,5>
+ 3773803577U, // <6,0,4,2>: Cost 4 vsldoi8 <2,5,6,0>, <4,2,5,6>
+ 3724864002U, // <6,0,4,3>: Cost 4 vsldoi4 <5,6,0,4>, <3,4,5,6>
+ 3846193517U, // <6,0,4,4>: Cost 4 vsldoi12 <3,4,5,6>, <0,4,4,5>
+ 2712005935U, // <6,0,4,5>: Cost 3 vsldoi8 <4,5,6,0>, <4,5,6,0>
+ 3327009265U, // <6,0,4,6>: Cost 4 vmrghw <6,4,2,5>, <0,6,1,2>
+ 3383126648U, // <6,0,4,7>: Cost 5 vmrglw <4,5,6,4>, <3,6,0,7>
+ 2772451729U, // <6,0,4,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,4,u,5>
+ 3373178880U, // <6,0,5,0>: Cost 4 vmrglw <2,u,6,5>, <0,0,0,0>
+ 2254266470U, // <6,0,5,1>: Cost 3 vmrghw <6,5,7,1>, LHS
+ 3785748248U, // <6,0,5,2>: Cost 4 vsldoi8 <4,5,6,0>, <5,2,6,3>
+ 3790393190U, // <6,0,5,3>: Cost 4 vsldoi8 <5,3,6,0>, <5,3,6,0>
+ 3328000338U, // <6,0,5,4>: Cost 4 vmrghw <6,5,7,0>, <0,4,1,5>
+ 3785748494U, // <6,0,5,5>: Cost 4 vsldoi8 <4,5,6,0>, <5,5,6,6>
+ 3785748516U, // <6,0,5,6>: Cost 4 vsldoi8 <4,5,6,0>, <5,6,0,1>
+ 3379153528U, // <6,0,5,7>: Cost 4 vmrglw <3,u,6,5>, <3,6,0,7>
+ 2254267037U, // <6,0,5,u>: Cost 3 vmrghw <6,5,7,1>, LHS
+ 2254897152U, // <6,0,6,0>: Cost 3 vmrghw <6,6,6,6>, <0,0,0,0>
+ 1181155430U, // <6,0,6,1>: Cost 2 vmrghw <6,6,6,6>, LHS
+ 3785748923U, // <6,0,6,2>: Cost 4 vsldoi8 <4,5,6,0>, <6,2,0,3>
+ 3785749042U, // <6,0,6,3>: Cost 4 vsldoi8 <4,5,6,0>, <6,3,4,5>
+ 2254897490U, // <6,0,6,4>: Cost 3 vmrghw <6,6,6,6>, <0,4,1,5>
+ 3785749169U, // <6,0,6,5>: Cost 4 vsldoi8 <4,5,6,0>, <6,5,0,6>
+ 2724614962U, // <6,0,6,6>: Cost 3 vsldoi8 <6,6,6,0>, <6,6,6,0>
+ 3787739982U, // <6,0,6,7>: Cost 4 vsldoi8 <4,u,6,0>, <6,7,0,1>
+ 1181155997U, // <6,0,6,u>: Cost 2 vmrghw <6,6,6,6>, LHS
+ 1235664896U, // <6,0,7,0>: Cost 2 vmrglw RHS, <0,0,0,0>
+ 1235666598U, // <6,0,7,1>: Cost 2 vmrglw RHS, <2,3,0,1>
+ 3712943720U, // <6,0,7,2>: Cost 4 vsldoi4 <3,6,0,7>, <2,2,2,2>
+ 2639202936U, // <6,0,7,3>: Cost 3 vsldoi4 <3,6,0,7>, <3,6,0,7>
+ 2639203638U, // <6,0,7,4>: Cost 3 vsldoi4 <3,6,0,7>, RHS
+ 2309409236U, // <6,0,7,5>: Cost 3 vmrglw RHS, <3,4,0,5>
+ 3712946517U, // <6,0,7,6>: Cost 4 vsldoi4 <3,6,0,7>, <6,0,7,0>
+ 2309409400U, // <6,0,7,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+ 1235666605U, // <6,0,7,u>: Cost 2 vmrglw RHS, <2,3,0,u>
+ 1235673088U, // <6,0,u,0>: Cost 2 vmrglw RHS, <0,0,0,0>
+ 1235674790U, // <6,0,u,1>: Cost 2 vmrglw RHS, <2,3,0,1>
+ 1698710173U, // <6,0,u,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2639211129U, // <6,0,u,3>: Cost 3 vsldoi4 <3,6,0,u>, <3,6,0,u>
+ 2639211830U, // <6,0,u,4>: Cost 3 vsldoi4 <3,6,0,u>, RHS
+ 2712008858U, // <6,0,u,5>: Cost 3 vsldoi8 <4,5,6,0>, RHS
+ 2657129220U, // <6,0,u,6>: Cost 3 vsldoi4 <6,6,0,u>, <6,6,0,u>
+ 2309417592U, // <6,0,u,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+ 1698710227U, // <6,0,u,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 3775799296U, // <6,1,0,0>: Cost 4 vsldoi8 <2,u,6,1>, <0,0,0,0>
+ 2702057574U, // <6,1,0,1>: Cost 3 vsldoi8 <2,u,6,1>, LHS
+ 3373143763U, // <6,1,0,2>: Cost 4 vmrglw <2,u,6,0>, <u,0,1,2>
+ 3695045122U, // <6,1,0,3>: Cost 4 vsldoi4 <0,6,1,0>, <3,4,5,6>
+ 3775799634U, // <6,1,0,4>: Cost 4 vsldoi8 <2,u,6,1>, <0,4,1,5>
+ 3383091538U, // <6,1,0,5>: Cost 4 vmrglw <4,5,6,0>, <0,4,1,5>
+ 3368493233U, // <6,1,0,6>: Cost 4 vmrglw <2,1,6,0>, <0,2,1,6>
+ 3362522319U, // <6,1,0,7>: Cost 5 vmrglw <1,1,6,0>, <1,6,1,7>
+ 2702058141U, // <6,1,0,u>: Cost 3 vsldoi8 <2,u,6,1>, LHS
+ 3834250027U, // <6,1,1,0>: Cost 4 vsldoi12 <1,4,5,6>, <1,1,0,1>
+ 2772452148U, // <6,1,1,1>: Cost 3 vsldoi12 <3,4,5,6>, <1,1,1,1>
+ 3832038210U, // <6,1,1,2>: Cost 4 vsldoi12 <1,1,2,6>, <1,1,2,6>
+ 3373150660U, // <6,1,1,3>: Cost 4 vmrglw <2,u,6,1>, <6,2,1,3>
+ 3834250067U, // <6,1,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <1,1,4,5>
+ 3373146450U, // <6,1,1,5>: Cost 4 vmrglw <2,u,6,1>, <0,4,1,5>
+ 3826656102U, // <6,1,1,6>: Cost 4 vsldoi12 <0,2,1,6>, <1,1,6,6>
+ 3362530511U, // <6,1,1,7>: Cost 4 vmrglw <1,1,6,1>, <1,6,1,7>
+ 2772452148U, // <6,1,1,u>: Cost 3 vsldoi12 <3,4,5,6>, <1,1,1,1>
+ 2669092966U, // <6,1,2,0>: Cost 3 vsldoi4 <u,6,1,2>, LHS
+ 2252292916U, // <6,1,2,1>: Cost 3 vmrghw <6,2,7,3>, <1,1,1,1>
+ 2252293014U, // <6,1,2,2>: Cost 3 vmrghw <6,2,7,3>, <1,2,3,0>
+ 2772452246U, // <6,1,2,3>: Cost 3 vsldoi12 <3,4,5,6>, <1,2,3,0>
+ 2669096246U, // <6,1,2,4>: Cost 3 vsldoi4 <u,6,1,2>, RHS
+ 3846194091U, // <6,1,2,5>: Cost 4 vsldoi12 <3,4,5,6>, <1,2,5,3>
+ 2702059450U, // <6,1,2,6>: Cost 3 vsldoi8 <2,u,6,1>, <2,6,3,7>
+ 3870081978U, // <6,1,2,7>: Cost 4 vsldoi12 <7,4,5,6>, <1,2,7,0>
+ 2702059633U, // <6,1,2,u>: Cost 3 vsldoi8 <2,u,6,1>, <2,u,6,1>
+ 3775801494U, // <6,1,3,0>: Cost 4 vsldoi8 <2,u,6,1>, <3,0,1,2>
+ 3777128723U, // <6,1,3,1>: Cost 4 vsldoi8 <3,1,6,1>, <3,1,6,1>
+ 3775801702U, // <6,1,3,2>: Cost 4 vsldoi8 <2,u,6,1>, <3,2,6,3>
+ 3775801756U, // <6,1,3,3>: Cost 4 vsldoi8 <2,u,6,1>, <3,3,3,3>
+ 3775801858U, // <6,1,3,4>: Cost 4 vsldoi8 <2,u,6,1>, <3,4,5,6>
+ 3375153490U, // <6,1,3,5>: Cost 4 vmrglw <3,2,6,3>, <0,4,1,5>
+ 3826656265U, // <6,1,3,6>: Cost 4 vsldoi12 <0,2,1,6>, <1,3,6,7>
+ 3775802051U, // <6,1,3,7>: Cost 4 vsldoi8 <2,u,6,1>, <3,7,0,1>
+ 3775802142U, // <6,1,3,u>: Cost 4 vsldoi8 <2,u,6,1>, <3,u,1,2>
+ 3846194206U, // <6,1,4,0>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,0,1>
+ 3846194219U, // <6,1,4,1>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,1,5>
+ 3846194228U, // <6,1,4,2>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,2,5>
+ 3846194236U, // <6,1,4,3>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,3,4>
+ 3846194246U, // <6,1,4,4>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,4,5>
+ 2760508496U, // <6,1,4,5>: Cost 3 vsldoi12 <1,4,5,6>, <1,4,5,6>
+ 3368526001U, // <6,1,4,6>: Cost 4 vmrglw <2,1,6,4>, <0,2,1,6>
+ 3870082144U, // <6,1,4,7>: Cost 4 vsldoi12 <7,4,5,6>, <1,4,7,4>
+ 2760729707U, // <6,1,4,u>: Cost 3 vsldoi12 <1,4,u,6>, <1,4,u,6>
+ 2714668660U, // <6,1,5,0>: Cost 3 vsldoi8 <5,0,6,1>, <5,0,6,1>
+ 3834619005U, // <6,1,5,1>: Cost 4 vsldoi12 <1,5,1,6>, <1,5,1,6>
+ 3834692742U, // <6,1,5,2>: Cost 4 vsldoi12 <1,5,2,6>, <1,5,2,6>
+ 3846194317U, // <6,1,5,3>: Cost 4 vsldoi12 <3,4,5,6>, <1,5,3,4>
+ 3834840216U, // <6,1,5,4>: Cost 4 vsldoi12 <1,5,4,6>, <1,5,4,6>
+ 3834913953U, // <6,1,5,5>: Cost 4 vsldoi12 <1,5,5,6>, <1,5,5,6>
+ 2719977570U, // <6,1,5,6>: Cost 3 vsldoi8 <5,u,6,1>, <5,6,7,0>
+ 3367208143U, // <6,1,5,7>: Cost 4 vmrglw <1,u,6,5>, <1,6,1,7>
+ 2719977724U, // <6,1,5,u>: Cost 3 vsldoi8 <5,u,6,1>, <5,u,6,1>
+ 2669125734U, // <6,1,6,0>: Cost 3 vsldoi4 <u,6,1,6>, LHS
+ 2254897972U, // <6,1,6,1>: Cost 3 vmrghw <6,6,6,6>, <1,1,1,1>
+ 2254898070U, // <6,1,6,2>: Cost 3 vmrghw <6,6,6,6>, <1,2,3,0>
+ 3775803929U, // <6,1,6,3>: Cost 4 vsldoi8 <2,u,6,1>, <6,3,1,7>
+ 2669129014U, // <6,1,6,4>: Cost 3 vsldoi4 <u,6,1,6>, RHS
+ 2322006354U, // <6,1,6,5>: Cost 3 vmrglw <6,6,6,6>, <0,4,1,5>
+ 2725950264U, // <6,1,6,6>: Cost 3 vsldoi8 <6,u,6,1>, <6,6,6,6>
+ 3793720142U, // <6,1,6,7>: Cost 4 vsldoi8 <5,u,6,1>, <6,7,0,1>
+ 2254898556U, // <6,1,6,u>: Cost 3 vmrghw <6,6,6,6>, <1,u,3,0>
+ 2627330150U, // <6,1,7,0>: Cost 3 vsldoi4 <1,6,1,7>, LHS
+ 1235664906U, // <6,1,7,1>: Cost 2 vmrglw RHS, <0,0,1,1>
+ 1235667094U, // <6,1,7,2>: Cost 2 vmrglw RHS, <3,0,1,2>
+ 2309406894U, // <6,1,7,3>: Cost 3 vmrglw RHS, <0,2,1,3>
+ 2627333430U, // <6,1,7,4>: Cost 3 vsldoi4 <1,6,1,7>, RHS
+ 1235665234U, // <6,1,7,5>: Cost 2 vmrglw RHS, <0,4,1,5>
+ 2309406897U, // <6,1,7,6>: Cost 3 vmrglw RHS, <0,2,1,6>
+ 2309407222U, // <6,1,7,7>: Cost 3 vmrglw RHS, <0,6,1,7>
+ 1235664913U, // <6,1,7,u>: Cost 2 vmrglw RHS, <0,0,1,u>
+ 2627338342U, // <6,1,u,0>: Cost 3 vsldoi4 <1,6,1,u>, LHS
+ 1235673098U, // <6,1,u,1>: Cost 2 vmrglw RHS, <0,0,1,1>
+ 1235675286U, // <6,1,u,2>: Cost 2 vmrglw RHS, <3,0,1,2>
+ 2772452732U, // <6,1,u,3>: Cost 3 vsldoi12 <3,4,5,6>, <1,u,3,0>
+ 2627341622U, // <6,1,u,4>: Cost 3 vsldoi4 <1,6,1,u>, RHS
+ 1235673426U, // <6,1,u,5>: Cost 2 vmrglw RHS, <0,4,1,5>
+ 2309415089U, // <6,1,u,6>: Cost 3 vmrglw RHS, <0,2,1,6>
+ 2309415414U, // <6,1,u,7>: Cost 3 vmrglw RHS, <0,6,1,7>
+ 1235673105U, // <6,1,u,u>: Cost 2 vmrglw RHS, <0,0,1,u>
+ 3324683725U, // <6,2,0,0>: Cost 4 vmrghw <6,0,7,0>, <2,0,3,0>
+ 2725290086U, // <6,2,0,1>: Cost 3 vsldoi8 <6,7,6,2>, LHS
+ 3771162801U, // <6,2,0,2>: Cost 4 vsldoi8 <2,1,6,2>, <0,2,1,6>
+ 2309349478U, // <6,2,0,3>: Cost 3 vmrglw <4,5,6,0>, LHS
+ 3730951478U, // <6,2,0,4>: Cost 4 vsldoi4 <6,6,2,0>, RHS
+ 3840738784U, // <6,2,0,5>: Cost 4 vsldoi12 <2,5,3,6>, <2,0,5,1>
+ 3842655721U, // <6,2,0,6>: Cost 4 vsldoi12 <2,u,2,6>, <2,0,6,1>
+ 3736925671U, // <6,2,0,7>: Cost 4 vsldoi4 <7,6,2,0>, <7,6,2,0>
+ 2309349483U, // <6,2,0,u>: Cost 3 vmrglw <4,5,6,0>, LHS
+ 3367840468U, // <6,2,1,0>: Cost 4 vmrglw <2,0,6,1>, <3,7,2,0>
+ 3325355551U, // <6,2,1,1>: Cost 4 vmrghw <6,1,7,1>, <2,1,3,1>
+ 3373147752U, // <6,2,1,2>: Cost 4 vmrglw <2,u,6,1>, <2,2,2,2>
+ 2299404390U, // <6,2,1,3>: Cost 3 vmrglw <2,u,6,1>, LHS
+ 3701099830U, // <6,2,1,4>: Cost 5 vsldoi4 <1,6,2,1>, RHS
+ 3767846054U, // <6,2,1,5>: Cost 4 vsldoi8 <1,5,6,2>, <1,5,6,2>
+ 3826656825U, // <6,2,1,6>: Cost 4 vsldoi12 <0,2,1,6>, <2,1,6,0>
+ 3373147838U, // <6,2,1,7>: Cost 5 vmrglw <2,u,6,1>, <2,3,2,7>
+ 2299404395U, // <6,2,1,u>: Cost 3 vmrglw <2,u,6,1>, LHS
+ 2657222758U, // <6,2,2,0>: Cost 3 vsldoi4 <6,6,2,2>, LHS
+ 3771164219U, // <6,2,2,1>: Cost 4 vsldoi8 <2,1,6,2>, <2,1,6,2>
+ 2766481000U, // <6,2,2,2>: Cost 3 vsldoi12 <2,4,5,6>, <2,2,2,2>
+ 2772452978U, // <6,2,2,3>: Cost 3 vsldoi12 <3,4,5,6>, <2,2,3,3>
+ 2657226038U, // <6,2,2,4>: Cost 3 vsldoi4 <6,6,2,2>, RHS
+ 3790407528U, // <6,2,2,5>: Cost 4 vsldoi8 <5,3,6,2>, <2,5,3,6>
+ 2252294074U, // <6,2,2,6>: Cost 3 vmrghw <6,2,7,3>, <2,6,3,7>
+ 2252294148U, // <6,2,2,7>: Cost 3 vmrghw <6,2,7,3>, <2,7,3,0>
+ 2772453023U, // <6,2,2,u>: Cost 3 vsldoi12 <3,4,5,6>, <2,2,u,3>
+ 2772453030U, // <6,2,3,0>: Cost 3 vsldoi12 <3,4,5,6>, <2,3,0,1>
+ 3834250930U, // <6,2,3,1>: Cost 4 vsldoi12 <1,4,5,6>, <2,3,1,4>
+ 2765596349U, // <6,2,3,2>: Cost 3 vsldoi12 <2,3,2,6>, <2,3,2,6>
+ 2301411430U, // <6,2,3,3>: Cost 3 vmrglw <3,2,6,3>, LHS
+ 2772453070U, // <6,2,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <2,3,4,5>
+ 2765817560U, // <6,2,3,5>: Cost 3 vsldoi12 <2,3,5,6>, <2,3,5,6>
+ 2252933050U, // <6,2,3,6>: Cost 3 vmrghw <6,3,7,0>, <2,6,3,7>
+ 2796340968U, // <6,2,3,7>: Cost 3 vsldoi12 <7,4,5,6>, <2,3,7,4>
+ 2766038771U, // <6,2,3,u>: Cost 3 vsldoi12 <2,3,u,6>, <2,3,u,6>
+ 3725008998U, // <6,2,4,0>: Cost 4 vsldoi4 <5,6,2,4>, LHS
+ 3368530217U, // <6,2,4,1>: Cost 5 vmrglw <2,1,6,4>, <6,0,2,1>
+ 3840222989U, // <6,2,4,2>: Cost 4 vsldoi12 <2,4,5,6>, <2,4,2,5>
+ 2309382246U, // <6,2,4,3>: Cost 3 vmrglw <4,5,6,4>, LHS
+ 3725012278U, // <6,2,4,4>: Cost 4 vsldoi4 <5,6,2,4>, RHS
+ 2766481193U, // <6,2,4,5>: Cost 3 vsldoi12 <2,4,5,6>, <2,4,5,6>
+ 3842656049U, // <6,2,4,6>: Cost 4 vsldoi12 <2,u,2,6>, <2,4,6,5>
+ 3327010820U, // <6,2,4,7>: Cost 4 vmrghw <6,4,2,5>, <2,7,3,0>
+ 2766702404U, // <6,2,4,u>: Cost 3 vsldoi12 <2,4,u,6>, <2,4,u,6>
+ 3713073254U, // <6,2,5,0>: Cost 4 vsldoi4 <3,6,2,5>, LHS
+ 3789082310U, // <6,2,5,1>: Cost 4 vsldoi8 <5,1,6,2>, <5,1,6,2>
+ 3840665439U, // <6,2,5,2>: Cost 4 vsldoi12 <2,5,2,6>, <2,5,2,6>
+ 2766997352U, // <6,2,5,3>: Cost 3 vsldoi12 <2,5,3,6>, <2,5,3,6>
+ 3713076534U, // <6,2,5,4>: Cost 4 vsldoi4 <3,6,2,5>, RHS
+ 3791736842U, // <6,2,5,5>: Cost 4 vsldoi8 <5,5,6,2>, <5,5,6,2>
+ 3373180605U, // <6,2,5,6>: Cost 4 vmrglw <2,u,6,5>, <2,3,2,6>
+ 3793064108U, // <6,2,5,7>: Cost 4 vsldoi8 <5,7,6,2>, <5,7,6,2>
+ 2767366037U, // <6,2,5,u>: Cost 3 vsldoi12 <2,5,u,6>, <2,5,u,6>
+ 3701137510U, // <6,2,6,0>: Cost 4 vsldoi4 <1,6,2,6>, LHS
+ 3701138647U, // <6,2,6,1>: Cost 4 vsldoi4 <1,6,2,6>, <1,6,2,6>
+ 2254898792U, // <6,2,6,2>: Cost 3 vmrghw <6,6,6,6>, <2,2,2,2>
+ 1248264294U, // <6,2,6,3>: Cost 2 vmrglw <6,6,6,6>, LHS
+ 3701140790U, // <6,2,6,4>: Cost 4 vsldoi4 <1,6,2,6>, RHS
+ 3725029435U, // <6,2,6,5>: Cost 4 vsldoi4 <5,6,2,6>, <5,6,2,6>
+ 2254899130U, // <6,2,6,6>: Cost 3 vmrghw <6,6,6,6>, <2,6,3,7>
+ 2725294981U, // <6,2,6,7>: Cost 3 vsldoi8 <6,7,6,2>, <6,7,6,2>
+ 1248264299U, // <6,2,6,u>: Cost 2 vmrglw <6,6,6,6>, LHS
+ 2633375846U, // <6,2,7,0>: Cost 3 vsldoi4 <2,6,2,7>, LHS
+ 2309407468U, // <6,2,7,1>: Cost 3 vmrglw RHS, <1,0,2,1>
+ 1235666536U, // <6,2,7,2>: Cost 2 vmrglw RHS, <2,2,2,2>
+ 161923174U, // <6,2,7,3>: Cost 1 vmrglw RHS, LHS
+ 2633379126U, // <6,2,7,4>: Cost 3 vsldoi4 <2,6,2,7>, RHS
+ 2309407796U, // <6,2,7,5>: Cost 3 vmrglw RHS, <1,4,2,5>
+ 2309408445U, // <6,2,7,6>: Cost 3 vmrglw RHS, <2,3,2,6>
+ 2309407960U, // <6,2,7,7>: Cost 3 vmrglw RHS, <1,6,2,7>
+ 161923179U, // <6,2,7,u>: Cost 1 vmrglw RHS, LHS
+ 2633384038U, // <6,2,u,0>: Cost 3 vsldoi4 <2,6,2,u>, LHS
+ 2309415660U, // <6,2,u,1>: Cost 3 vmrglw RHS, <1,0,2,1>
+ 1235674728U, // <6,2,u,2>: Cost 2 vmrglw RHS, <2,2,2,2>
+ 161931366U, // <6,2,u,3>: Cost 1 vmrglw RHS, LHS
+ 2633387318U, // <6,2,u,4>: Cost 3 vsldoi4 <2,6,2,u>, RHS
+ 2769135725U, // <6,2,u,5>: Cost 3 vsldoi12 <2,u,5,6>, <2,u,5,6>
+ 2309416637U, // <6,2,u,6>: Cost 3 vmrglw RHS, <2,3,2,6>
+ 2309416152U, // <6,2,u,7>: Cost 3 vmrglw RHS, <1,6,2,7>
+ 161931371U, // <6,2,u,u>: Cost 1 vmrglw RHS, LHS
+ 3777806336U, // <6,3,0,0>: Cost 4 vsldoi8 <3,2,6,3>, <0,0,0,0>
+ 2704064614U, // <6,3,0,1>: Cost 3 vsldoi8 <3,2,6,3>, LHS
+ 3765862577U, // <6,3,0,2>: Cost 4 vsldoi8 <1,2,6,3>, <0,2,1,6>
+ 3843393708U, // <6,3,0,3>: Cost 4 vsldoi12 <3,0,3,6>, <3,0,3,6>
+ 2250516994U, // <6,3,0,4>: Cost 3 vmrghw <6,0,1,2>, <3,4,5,6>
+ 3725054014U, // <6,3,0,5>: Cost 4 vsldoi4 <5,6,3,0>, <5,6,3,0>
+ 3383093096U, // <6,3,0,6>: Cost 4 vmrglw <4,5,6,0>, <2,5,3,6>
+ 3368495034U, // <6,3,0,7>: Cost 4 vmrglw <2,1,6,0>, <2,6,3,7>
+ 2704065181U, // <6,3,0,u>: Cost 3 vsldoi8 <3,2,6,3>, LHS
+ 2251622550U, // <6,3,1,0>: Cost 3 vmrghw <6,1,7,2>, <3,0,1,2>
+ 3777807156U, // <6,3,1,1>: Cost 4 vsldoi8 <3,2,6,3>, <1,1,1,1>
+ 3765863348U, // <6,3,1,2>: Cost 4 vsldoi8 <1,2,6,3>, <1,2,6,3>
+ 3373147762U, // <6,3,1,3>: Cost 4 vmrglw <2,u,6,1>, <2,2,3,3>
+ 3834251525U, // <6,3,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <3,1,4,5>
+ 3373147683U, // <6,3,1,5>: Cost 5 vmrglw <2,u,6,1>, <2,1,3,5>
+ 3391727545U, // <6,3,1,6>: Cost 4 vmrglw <6,0,6,1>, <2,6,3,6>
+ 2299406266U, // <6,3,1,7>: Cost 3 vmrglw <2,u,6,1>, <2,6,3,7>
+ 2251622550U, // <6,3,1,u>: Cost 3 vmrghw <6,1,7,2>, <3,0,1,2>
+ 2252294294U, // <6,3,2,0>: Cost 3 vmrghw <6,2,7,3>, <3,0,1,2>
+ 3326036198U, // <6,3,2,1>: Cost 4 vmrghw <6,2,7,3>, <3,1,1,1>
+ 3771836045U, // <6,3,2,2>: Cost 4 vsldoi8 <2,2,6,3>, <2,2,6,3>
+ 2252294556U, // <6,3,2,3>: Cost 3 vmrghw <6,2,7,3>, <3,3,3,3>
+ 2252294658U, // <6,3,2,4>: Cost 3 vmrghw <6,2,7,3>, <3,4,5,6>
+ 3840739677U, // <6,3,2,5>: Cost 4 vsldoi12 <2,5,3,6>, <3,2,5,3>
+ 2704066490U, // <6,3,2,6>: Cost 3 vsldoi8 <3,2,6,3>, <2,6,3,7>
+ 3368511418U, // <6,3,2,7>: Cost 4 vmrglw <2,1,6,2>, <2,6,3,7>
+ 2252294942U, // <6,3,2,u>: Cost 3 vmrghw <6,2,7,3>, <3,u,1,2>
+ 3707158630U, // <6,3,3,0>: Cost 4 vsldoi4 <2,6,3,3>, LHS
+ 3765864692U, // <6,3,3,1>: Cost 5 vsldoi8 <1,2,6,3>, <3,1,2,6>
+ 2704066918U, // <6,3,3,2>: Cost 3 vsldoi8 <3,2,6,3>, <3,2,6,3>
+ 2772453788U, // <6,3,3,3>: Cost 3 vsldoi12 <3,4,5,6>, <3,3,3,3>
+ 2772453799U, // <6,3,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <3,3,4,5>
+ 3789752888U, // <6,3,3,5>: Cost 4 vsldoi8 <5,2,6,3>, <3,5,2,6>
+ 3840739770U, // <6,3,3,6>: Cost 4 vsldoi12 <2,5,3,6>, <3,3,6,6>
+ 2301413306U, // <6,3,3,7>: Cost 3 vmrglw <3,2,6,3>, <2,6,3,7>
+ 2775108043U, // <6,3,3,u>: Cost 3 vsldoi12 <3,u,5,6>, <3,3,u,5>
+ 2651340902U, // <6,3,4,0>: Cost 3 vsldoi4 <5,6,3,4>, LHS
+ 3846195674U, // <6,3,4,1>: Cost 4 vsldoi12 <3,4,5,6>, <3,4,1,2>
+ 3845974503U, // <6,3,4,2>: Cost 4 vsldoi12 <3,4,2,6>, <3,4,2,6>
+ 2651343362U, // <6,3,4,3>: Cost 3 vsldoi4 <5,6,3,4>, <3,4,5,6>
+ 2651344182U, // <6,3,4,4>: Cost 3 vsldoi4 <5,6,3,4>, RHS
+ 1698712066U, // <6,3,4,5>: Cost 2 vsldoi12 <3,4,5,6>, <3,4,5,6>
+ 3383125864U, // <6,3,4,6>: Cost 4 vmrglw <4,5,6,4>, <2,5,3,6>
+ 3368527802U, // <6,3,4,7>: Cost 4 vmrglw <2,1,6,4>, <2,6,3,7>
+ 1698933277U, // <6,3,4,u>: Cost 2 vsldoi12 <3,4,u,6>, <3,4,u,6>
+ 3373179798U, // <6,3,5,0>: Cost 4 vmrglw <2,u,6,5>, <1,2,3,0>
+ 3707176179U, // <6,3,5,1>: Cost 5 vsldoi4 <2,6,3,5>, <1,6,5,7>
+ 2716012312U, // <6,3,5,2>: Cost 3 vsldoi8 <5,2,6,3>, <5,2,6,3>
+ 3373180530U, // <6,3,5,3>: Cost 4 vmrglw <2,u,6,5>, <2,2,3,3>
+ 2254309890U, // <6,3,5,4>: Cost 3 vmrghw <6,5,7,6>, <3,4,5,6>
+ 3785773070U, // <6,3,5,5>: Cost 4 vsldoi8 <4,5,6,3>, <5,5,6,6>
+ 3840739932U, // <6,3,5,6>: Cost 4 vsldoi12 <2,5,3,6>, <3,5,6,6>
+ 2299439034U, // <6,3,5,7>: Cost 3 vmrglw <2,u,6,5>, <2,6,3,7>
+ 2719994110U, // <6,3,5,u>: Cost 3 vsldoi8 <5,u,6,3>, <5,u,6,3>
+ 2254899350U, // <6,3,6,0>: Cost 3 vmrghw <6,6,6,6>, <3,0,1,2>
+ 3328641254U, // <6,3,6,1>: Cost 4 vmrghw <6,6,6,6>, <3,1,1,1>
+ 2633443257U, // <6,3,6,2>: Cost 3 vsldoi4 <2,6,3,6>, <2,6,3,6>
+ 2254899612U, // <6,3,6,3>: Cost 3 vmrghw <6,6,6,6>, <3,3,3,3>
+ 2254899714U, // <6,3,6,4>: Cost 3 vmrghw <6,6,6,6>, <3,4,5,6>
+ 3785773772U, // <6,3,6,5>: Cost 4 vsldoi8 <4,5,6,3>, <6,5,3,6>
+ 2725966648U, // <6,3,6,6>: Cost 3 vsldoi8 <6,u,6,3>, <6,6,6,6>
+ 2322007994U, // <6,3,6,7>: Cost 3 vmrglw <6,6,6,6>, <2,6,3,7>
+ 2254899998U, // <6,3,6,u>: Cost 3 vmrghw <6,6,6,6>, <3,u,1,2>
+ 1559707750U, // <6,3,7,0>: Cost 2 vsldoi4 <2,6,3,7>, LHS
+ 2633450292U, // <6,3,7,1>: Cost 3 vsldoi4 <2,6,3,7>, <1,1,1,1>
+ 1559709626U, // <6,3,7,2>: Cost 2 vsldoi4 <2,6,3,7>, <2,6,3,7>
+ 1235666546U, // <6,3,7,3>: Cost 2 vmrglw RHS, <2,2,3,3>
+ 1559711030U, // <6,3,7,4>: Cost 2 vsldoi4 <2,6,3,7>, RHS
+ 2309408291U, // <6,3,7,5>: Cost 3 vmrglw RHS, <2,1,3,5>
+ 2633454152U, // <6,3,7,6>: Cost 3 vsldoi4 <2,6,3,7>, <6,3,7,0>
+ 1235666874U, // <6,3,7,7>: Cost 2 vmrglw RHS, <2,6,3,7>
+ 1559713582U, // <6,3,7,u>: Cost 2 vsldoi4 <2,6,3,7>, LHS
+ 1559715942U, // <6,3,u,0>: Cost 2 vsldoi4 <2,6,3,u>, LHS
+ 2633458484U, // <6,3,u,1>: Cost 3 vsldoi4 <2,6,3,u>, <1,1,1,1>
+ 1559717819U, // <6,3,u,2>: Cost 2 vsldoi4 <2,6,3,u>, <2,6,3,u>
+ 1235674738U, // <6,3,u,3>: Cost 2 vmrglw RHS, <2,2,3,3>
+ 1559719222U, // <6,3,u,4>: Cost 2 vsldoi4 <2,6,3,u>, RHS
+ 1701366598U, // <6,3,u,5>: Cost 2 vsldoi12 <3,u,5,6>, <3,u,5,6>
+ 2633462353U, // <6,3,u,6>: Cost 3 vsldoi4 <2,6,3,u>, <6,3,u,0>
+ 1235675066U, // <6,3,u,7>: Cost 2 vmrglw RHS, <2,6,3,7>
+ 1559721774U, // <6,3,u,u>: Cost 2 vsldoi4 <2,6,3,u>, LHS
+ 3785777152U, // <6,4,0,0>: Cost 4 vsldoi8 <4,5,6,4>, <0,0,0,0>
+ 2712035430U, // <6,4,0,1>: Cost 3 vsldoi8 <4,5,6,4>, LHS
+ 3771179185U, // <6,4,0,2>: Cost 4 vsldoi8 <2,1,6,4>, <0,2,1,6>
+ 3846196096U, // <6,4,0,3>: Cost 4 vsldoi12 <3,4,5,6>, <4,0,3,1>
+ 3785777490U, // <6,4,0,4>: Cost 4 vsldoi8 <4,5,6,4>, <0,4,1,5>
+ 2250517814U, // <6,4,0,5>: Cost 3 vmrghw <6,0,1,2>, RHS
+ 3324259703U, // <6,4,0,6>: Cost 4 vmrghw <6,0,1,2>, <4,6,5,0>
+ 3383092458U, // <6,4,0,7>: Cost 5 vmrglw <4,5,6,0>, <1,6,4,7>
+ 2712035997U, // <6,4,0,u>: Cost 3 vsldoi8 <4,5,6,4>, LHS
+ 3325356946U, // <6,4,1,0>: Cost 4 vmrghw <6,1,7,1>, <4,0,5,1>
+ 3785777972U, // <6,4,1,1>: Cost 4 vsldoi8 <4,5,6,4>, <1,1,1,1>
+ 3846196170U, // <6,4,1,2>: Cost 4 vsldoi12 <3,4,5,6>, <4,1,2,3>
+ 3325365380U, // <6,4,1,3>: Cost 4 vmrghw <6,1,7,2>, <4,3,5,0>
+ 3852168155U, // <6,4,1,4>: Cost 4 vsldoi12 <4,4,5,6>, <4,1,4,2>
+ 2251615542U, // <6,4,1,5>: Cost 3 vmrghw <6,1,7,1>, RHS
+ 3325357432U, // <6,4,1,6>: Cost 4 vmrghw <6,1,7,1>, <4,6,5,1>
+ 3870084088U, // <6,4,1,7>: Cost 4 vsldoi12 <7,4,5,6>, <4,1,7,4>
+ 2251615785U, // <6,4,1,u>: Cost 3 vmrghw <6,1,7,1>, RHS
+ 2252295058U, // <6,4,2,0>: Cost 3 vmrghw <6,2,7,3>, <4,0,5,1>
+ 3771180605U, // <6,4,2,1>: Cost 4 vsldoi8 <2,1,6,4>, <2,1,6,4>
+ 3785778792U, // <6,4,2,2>: Cost 4 vsldoi8 <4,5,6,4>, <2,2,2,2>
+ 3777816253U, // <6,4,2,3>: Cost 4 vsldoi8 <3,2,6,4>, <2,3,2,6>
+ 2252295376U, // <6,4,2,4>: Cost 3 vmrghw <6,2,7,3>, <4,4,4,4>
+ 1178553654U, // <6,4,2,5>: Cost 2 vmrghw <6,2,7,3>, RHS
+ 2252295545U, // <6,4,2,6>: Cost 3 vmrghw <6,2,7,3>, <4,6,5,2>
+ 3326037448U, // <6,4,2,7>: Cost 4 vmrghw <6,2,7,3>, <4,7,5,0>
+ 1178553897U, // <6,4,2,u>: Cost 2 vmrghw <6,2,7,3>, RHS
+ 3785779350U, // <6,4,3,0>: Cost 4 vsldoi8 <4,5,6,4>, <3,0,1,2>
+ 3383118648U, // <6,4,3,1>: Cost 4 vmrglw <4,5,6,3>, <3,u,4,1>
+ 3777816935U, // <6,4,3,2>: Cost 4 vsldoi8 <3,2,6,4>, <3,2,6,4>
+ 3785779612U, // <6,4,3,3>: Cost 4 vsldoi8 <4,5,6,4>, <3,3,3,3>
+ 2712037890U, // <6,4,3,4>: Cost 3 vsldoi8 <4,5,6,4>, <3,4,5,6>
+ 2252754230U, // <6,4,3,5>: Cost 3 vmrghw <6,3,4,5>, RHS
+ 3784452764U, // <6,4,3,6>: Cost 4 vsldoi8 <4,3,6,4>, <3,6,4,7>
+ 3801705178U, // <6,4,3,7>: Cost 4 vsldoi8 <7,2,6,4>, <3,7,2,6>
+ 2252754473U, // <6,4,3,u>: Cost 3 vmrghw <6,3,4,5>, RHS
+ 3787770770U, // <6,4,4,0>: Cost 4 vsldoi8 <4,u,6,4>, <4,0,5,1>
+ 3383126840U, // <6,4,4,1>: Cost 4 vmrglw <4,5,6,4>, <3,u,4,1>
+ 3327380534U, // <6,4,4,2>: Cost 4 vmrghw <6,4,7,5>, <4,2,5,3>
+ 3784453265U, // <6,4,4,3>: Cost 4 vsldoi8 <4,3,6,4>, <4,3,6,4>
+ 2253630672U, // <6,4,4,4>: Cost 3 vmrghw <6,4,7,4>, <4,4,4,4>
+ 2778426587U, // <6,4,4,5>: Cost 3 vsldoi12 <4,4,5,6>, <4,4,5,6>
+ 3383128789U, // <6,4,4,6>: Cost 4 vmrglw <4,5,6,4>, <6,5,4,6>
+ 3381799580U, // <6,4,4,7>: Cost 4 vmrglw <4,3,6,4>, <3,6,4,7>
+ 2778647798U, // <6,4,4,u>: Cost 3 vsldoi12 <4,4,u,6>, <4,4,u,6>
+ 2651422822U, // <6,4,5,0>: Cost 3 vsldoi4 <5,6,4,5>, LHS
+ 3701277928U, // <6,4,5,1>: Cost 4 vsldoi4 <1,6,4,5>, <1,6,4,5>
+ 3701278650U, // <6,4,5,2>: Cost 4 vsldoi4 <1,6,4,5>, <2,6,3,7>
+ 2651425282U, // <6,4,5,3>: Cost 3 vsldoi4 <5,6,4,5>, <3,4,5,6>
+ 2651426102U, // <6,4,5,4>: Cost 3 vsldoi4 <5,6,4,5>, RHS
+ 2651426892U, // <6,4,5,5>: Cost 3 vsldoi4 <5,6,4,5>, <5,6,4,5>
+ 1698712886U, // <6,4,5,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 3725169658U, // <6,4,5,7>: Cost 4 vsldoi4 <5,6,4,5>, <7,0,1,2>
+ 1698712904U, // <6,4,5,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 2254900114U, // <6,4,6,0>: Cost 3 vmrghw <6,6,6,6>, <4,0,5,1>
+ 3389115192U, // <6,4,6,1>: Cost 4 vmrglw <5,5,6,6>, <3,u,4,1>
+ 3785781727U, // <6,4,6,2>: Cost 4 vsldoi8 <4,5,6,4>, <6,2,4,3>
+ 3785781810U, // <6,4,6,3>: Cost 4 vsldoi8 <4,5,6,4>, <6,3,4,5>
+ 2254900432U, // <6,4,6,4>: Cost 3 vmrghw <6,6,6,6>, <4,4,4,4>
+ 1181158710U, // <6,4,6,5>: Cost 2 vmrghw <6,6,6,6>, RHS
+ 2254900605U, // <6,4,6,6>: Cost 3 vmrghw <6,6,6,6>, <4,6,5,6>
+ 3787772750U, // <6,4,6,7>: Cost 4 vsldoi8 <4,u,6,4>, <6,7,0,1>
+ 1181158953U, // <6,4,6,u>: Cost 2 vmrghw <6,6,6,6>, RHS
+ 2639495270U, // <6,4,7,0>: Cost 3 vsldoi4 <3,6,4,7>, LHS
+ 2639496090U, // <6,4,7,1>: Cost 3 vsldoi4 <3,6,4,7>, <1,2,3,4>
+ 3707267011U, // <6,4,7,2>: Cost 4 vsldoi4 <2,6,4,7>, <2,6,4,7>
+ 2639497884U, // <6,4,7,3>: Cost 3 vsldoi4 <3,6,4,7>, <3,6,4,7>
+ 1237658832U, // <6,4,7,4>: Cost 2 vmrglw RHS, <4,4,4,4>
+ 1235666638U, // <6,4,7,5>: Cost 2 vmrglw RHS, <2,3,4,5>
+ 3713241753U, // <6,4,7,6>: Cost 4 vsldoi4 <3,6,4,7>, <6,4,7,0>
+ 2309409436U, // <6,4,7,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+ 1235666641U, // <6,4,7,u>: Cost 2 vmrglw RHS, <2,3,4,u>
+ 2639503462U, // <6,4,u,0>: Cost 3 vsldoi4 <3,6,4,u>, LHS
+ 2639504282U, // <6,4,u,1>: Cost 3 vsldoi4 <3,6,4,u>, <1,2,3,4>
+ 3701303226U, // <6,4,u,2>: Cost 4 vsldoi4 <1,6,4,u>, <2,6,3,7>
+ 2639506077U, // <6,4,u,3>: Cost 3 vsldoi4 <3,6,4,u>, <3,6,4,u>
+ 1235676368U, // <6,4,u,4>: Cost 2 vmrglw RHS, <4,4,4,4>
+ 1235674830U, // <6,4,u,5>: Cost 2 vmrglw RHS, <2,3,4,5>
+ 1698713129U, // <6,4,u,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 2309417628U, // <6,4,u,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+ 1698713147U, // <6,4,u,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 3775832064U, // <6,5,0,0>: Cost 4 vsldoi8 <2,u,6,5>, <0,0,0,0>
+ 2702090342U, // <6,5,0,1>: Cost 3 vsldoi8 <2,u,6,5>, LHS
+ 3775832241U, // <6,5,0,2>: Cost 4 vsldoi8 <2,u,6,5>, <0,2,1,6>
+ 3719227906U, // <6,5,0,3>: Cost 4 vsldoi4 <4,6,5,0>, <3,4,5,6>
+ 3775832402U, // <6,5,0,4>: Cost 4 vsldoi8 <2,u,6,5>, <0,4,1,5>
+ 3385085146U, // <6,5,0,5>: Cost 4 vmrglw <4,u,6,0>, <4,4,5,5>
+ 2309351938U, // <6,5,0,6>: Cost 3 vmrglw <4,5,6,0>, <3,4,5,6>
+ 3376459134U, // <6,5,0,7>: Cost 5 vmrglw <3,4,6,0>, <4,6,5,7>
+ 2702090909U, // <6,5,0,u>: Cost 3 vsldoi8 <2,u,6,5>, LHS
+ 3719233546U, // <6,5,1,0>: Cost 4 vsldoi4 <4,6,5,1>, <0,0,1,1>
+ 3775832884U, // <6,5,1,1>: Cost 4 vsldoi8 <2,u,6,5>, <1,1,1,1>
+ 3775832982U, // <6,5,1,2>: Cost 4 vsldoi8 <2,u,6,5>, <1,2,3,0>
+ 3846196909U, // <6,5,1,3>: Cost 4 vsldoi12 <3,4,5,6>, <5,1,3,4>
+ 3719236984U, // <6,5,1,4>: Cost 4 vsldoi4 <4,6,5,1>, <4,6,5,1>
+ 3856150209U, // <6,5,1,5>: Cost 4 vsldoi12 <5,1,5,6>, <5,1,5,6>
+ 3834252997U, // <6,5,1,6>: Cost 4 vsldoi12 <1,4,5,6>, <5,1,6,1>
+ 3870084817U, // <6,5,1,7>: Cost 4 vsldoi12 <7,4,5,6>, <5,1,7,4>
+ 3769861532U, // <6,5,1,u>: Cost 4 vsldoi8 <1,u,6,5>, <1,u,6,5>
+ 2645500006U, // <6,5,2,0>: Cost 3 vsldoi4 <4,6,5,2>, LHS
+ 3719242548U, // <6,5,2,1>: Cost 4 vsldoi4 <4,6,5,2>, <1,1,1,1>
+ 3775833704U, // <6,5,2,2>: Cost 4 vsldoi8 <2,u,6,5>, <2,2,2,2>
+ 3775833766U, // <6,5,2,3>: Cost 4 vsldoi8 <2,u,6,5>, <2,3,0,1>
+ 2645503353U, // <6,5,2,4>: Cost 3 vsldoi4 <4,6,5,2>, <4,6,5,2>
+ 2252296196U, // <6,5,2,5>: Cost 3 vmrghw <6,2,7,3>, <5,5,5,5>
+ 2702092218U, // <6,5,2,6>: Cost 3 vsldoi8 <2,u,6,5>, <2,6,3,7>
+ 3719246842U, // <6,5,2,7>: Cost 4 vsldoi4 <4,6,5,2>, <7,0,1,2>
+ 2702092405U, // <6,5,2,u>: Cost 3 vsldoi8 <2,u,6,5>, <2,u,6,5>
+ 3775834262U, // <6,5,3,0>: Cost 4 vsldoi8 <2,u,6,5>, <3,0,1,2>
+ 3777161495U, // <6,5,3,1>: Cost 4 vsldoi8 <3,1,6,5>, <3,1,6,5>
+ 3775834470U, // <6,5,3,2>: Cost 4 vsldoi8 <2,u,6,5>, <3,2,6,3>
+ 3775834524U, // <6,5,3,3>: Cost 4 vsldoi8 <2,u,6,5>, <3,3,3,3>
+ 3775834626U, // <6,5,3,4>: Cost 4 vsldoi8 <2,u,6,5>, <3,4,5,6>
+ 3385109722U, // <6,5,3,5>: Cost 4 vmrglw <4,u,6,3>, <4,4,5,5>
+ 2309376514U, // <6,5,3,6>: Cost 3 vmrglw <4,5,6,3>, <3,4,5,6>
+ 3775834819U, // <6,5,3,7>: Cost 4 vsldoi8 <2,u,6,5>, <3,7,0,1>
+ 2309376514U, // <6,5,3,u>: Cost 3 vmrglw <4,5,6,3>, <3,4,5,6>
+ 3719258214U, // <6,5,4,0>: Cost 4 vsldoi4 <4,6,5,4>, LHS
+ 3385117586U, // <6,5,4,1>: Cost 4 vmrglw <4,u,6,4>, <4,0,5,1>
+ 3327242008U, // <6,5,4,2>: Cost 4 vmrghw <6,4,5,6>, <5,2,6,3>
+ 3719260674U, // <6,5,4,3>: Cost 4 vsldoi4 <4,6,5,4>, <3,4,5,6>
+ 3719261563U, // <6,5,4,4>: Cost 4 vsldoi4 <4,6,5,4>, <4,6,5,4>
+ 2702093622U, // <6,5,4,5>: Cost 3 vsldoi8 <2,u,6,5>, RHS
+ 2309384706U, // <6,5,4,6>: Cost 3 vmrglw <4,5,6,4>, <3,4,5,6>
+ 3870085060U, // <6,5,4,7>: Cost 4 vsldoi12 <7,4,5,6>, <5,4,7,4>
+ 2702093865U, // <6,5,4,u>: Cost 3 vsldoi8 <2,u,6,5>, RHS
+ 3719266406U, // <6,5,5,0>: Cost 4 vsldoi4 <4,6,5,5>, LHS
+ 3789106889U, // <6,5,5,1>: Cost 4 vsldoi8 <5,1,6,5>, <5,1,6,5>
+ 3785789208U, // <6,5,5,2>: Cost 4 vsldoi8 <4,5,6,5>, <5,2,6,3>
+ 3373183950U, // <6,5,5,3>: Cost 4 vmrglw <2,u,6,5>, <6,u,5,3>
+ 2717355964U, // <6,5,5,4>: Cost 3 vsldoi8 <5,4,6,5>, <5,4,6,5>
+ 2791772164U, // <6,5,5,5>: Cost 3 vsldoi12 <6,6,6,6>, <5,5,5,5>
+ 2772455438U, // <6,5,5,6>: Cost 3 vsldoi12 <3,4,5,6>, <5,5,6,6>
+ 3373183549U, // <6,5,5,7>: Cost 4 vmrglw <2,u,6,5>, <6,3,5,7>
+ 2720010496U, // <6,5,5,u>: Cost 3 vsldoi8 <5,u,6,5>, <5,u,6,5>
+ 2772455460U, // <6,5,6,0>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,0,1>
+ 2322008978U, // <6,5,6,1>: Cost 3 vmrglw <6,6,6,6>, <4,0,5,1>
+ 3840225335U, // <6,5,6,2>: Cost 4 vsldoi12 <2,4,5,6>, <5,6,2,2>
+ 2772455490U, // <6,5,6,3>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,3,4>
+ 2772455500U, // <6,5,6,4>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,4,5>
+ 2254901252U, // <6,5,6,5>: Cost 3 vmrghw <6,6,6,6>, <5,5,5,5>
+ 2772455520U, // <6,5,6,6>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,6,7>
+ 2785874024U, // <6,5,6,7>: Cost 3 vsldoi12 <5,6,7,6>, <5,6,7,6>
+ 2772455532U, // <6,5,6,u>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,u,1>
+ 2627625062U, // <6,5,7,0>: Cost 3 vsldoi4 <1,6,5,7>, LHS
+ 1235667858U, // <6,5,7,1>: Cost 2 vmrglw RHS, <4,0,5,1>
+ 2309409278U, // <6,5,7,2>: Cost 3 vmrglw RHS, <3,4,5,2>
+ 2309407659U, // <6,5,7,3>: Cost 3 vmrglw RHS, <1,2,5,3>
+ 2627628342U, // <6,5,7,4>: Cost 3 vsldoi4 <1,6,5,7>, RHS
+ 1235668186U, // <6,5,7,5>: Cost 2 vmrglw RHS, <4,4,5,5>
+ 1235667458U, // <6,5,7,6>: Cost 2 vmrglw RHS, <3,4,5,6>
+ 2309407987U, // <6,5,7,7>: Cost 3 vmrglw RHS, <1,6,5,7>
+ 1235667460U, // <6,5,7,u>: Cost 2 vmrglw RHS, <3,4,5,u>
+ 2627633254U, // <6,5,u,0>: Cost 3 vsldoi4 <1,6,5,u>, LHS
+ 1235676050U, // <6,5,u,1>: Cost 2 vmrglw RHS, <4,0,5,1>
+ 2309417470U, // <6,5,u,2>: Cost 3 vmrglw RHS, <3,4,5,2>
+ 2309415851U, // <6,5,u,3>: Cost 3 vmrglw RHS, <1,2,5,3>
+ 2627636534U, // <6,5,u,4>: Cost 3 vsldoi4 <1,6,5,u>, RHS
+ 1235676378U, // <6,5,u,5>: Cost 2 vmrglw RHS, <4,4,5,5>
+ 1235675650U, // <6,5,u,6>: Cost 2 vmrglw RHS, <3,4,5,6>
+ 2309416179U, // <6,5,u,7>: Cost 3 vmrglw RHS, <1,6,5,7>
+ 1235675652U, // <6,5,u,u>: Cost 2 vmrglw RHS, <3,4,5,u>
+ 2309352751U, // <6,6,0,0>: Cost 3 vmrglw <4,5,6,0>, <4,5,6,0>
+ 1650917478U, // <6,6,0,1>: Cost 2 vsldoi8 <6,6,6,6>, LHS
+ 2250584570U, // <6,6,0,2>: Cost 3 vmrghw <6,0,2,1>, <6,2,7,3>
+ 3846197554U, // <6,6,0,3>: Cost 4 vsldoi12 <3,4,5,6>, <6,0,3,1>
+ 2724659538U, // <6,6,0,4>: Cost 3 vsldoi8 <6,6,6,6>, <0,4,1,5>
+ 3725275225U, // <6,6,0,5>: Cost 4 vsldoi4 <5,6,6,0>, <5,6,6,0>
+ 2791772493U, // <6,6,0,6>: Cost 3 vsldoi12 <6,6,6,6>, <6,0,6,1>
+ 2309352758U, // <6,6,0,7>: Cost 3 vmrglw <4,5,6,0>, RHS
+ 1650918045U, // <6,6,0,u>: Cost 2 vsldoi8 <6,6,6,6>, LHS
+ 3325358368U, // <6,6,1,0>: Cost 4 vmrghw <6,1,7,1>, <6,0,1,1>
+ 2299406449U, // <6,6,1,1>: Cost 3 vmrglw <2,u,6,1>, <2,u,6,1>
+ 2724660118U, // <6,6,1,2>: Cost 3 vsldoi8 <6,6,6,6>, <1,2,3,0>
+ 3373148518U, // <6,6,1,3>: Cost 4 vmrglw <2,u,6,1>, <3,2,6,3>
+ 3834253712U, // <6,6,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <6,1,4,5>
+ 3373147953U, // <6,6,1,5>: Cost 4 vmrglw <2,u,6,1>, <2,4,6,5>
+ 2323297080U, // <6,6,1,6>: Cost 3 vmrglw <6,u,6,1>, <6,6,6,6>
+ 2299407670U, // <6,6,1,7>: Cost 3 vmrglw <2,u,6,1>, RHS
+ 2299407671U, // <6,6,1,u>: Cost 3 vmrglw <2,u,6,1>, RHS
+ 2252296489U, // <6,6,2,0>: Cost 3 vmrghw <6,2,7,3>, <6,0,2,1>
+ 3326038394U, // <6,6,2,1>: Cost 4 vmrghw <6,2,7,3>, <6,1,2,1>
+ 1178554874U, // <6,6,2,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+ 2724660902U, // <6,6,2,3>: Cost 3 vsldoi8 <6,6,6,6>, <2,3,0,1>
+ 2252296817U, // <6,6,2,4>: Cost 3 vmrghw <6,2,7,3>, <6,4,2,5>
+ 3840741864U, // <6,6,2,5>: Cost 4 vsldoi12 <2,5,3,6>, <6,2,5,3>
+ 2252296976U, // <6,6,2,6>: Cost 3 vmrghw <6,2,7,3>, <6,6,2,2>
+ 2785874426U, // <6,6,2,7>: Cost 3 vsldoi12 <5,6,7,6>, <6,2,7,3>
+ 1178554874U, // <6,6,2,u>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+ 2724661398U, // <6,6,3,0>: Cost 3 vsldoi8 <6,6,6,6>, <3,0,1,2>
+ 3375154665U, // <6,6,3,1>: Cost 4 vmrglw <3,2,6,3>, <2,0,6,1>
+ 3375154909U, // <6,6,3,2>: Cost 4 vmrglw <3,2,6,3>, <2,3,6,2>
+ 2301413734U, // <6,6,3,3>: Cost 3 vmrglw <3,2,6,3>, <3,2,6,3>
+ 2772455986U, // <6,6,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <6,3,4,5>
+ 3375154993U, // <6,6,3,5>: Cost 4 vmrglw <3,2,6,3>, <2,4,6,5>
+ 2323313464U, // <6,6,3,6>: Cost 3 vmrglw <6,u,6,3>, <6,6,6,6>
+ 2301414710U, // <6,6,3,7>: Cost 3 vmrglw <3,2,6,3>, RHS
+ 2301414711U, // <6,6,3,u>: Cost 3 vmrglw <3,2,6,3>, RHS
+ 2724662162U, // <6,6,4,0>: Cost 3 vsldoi8 <6,6,6,6>, <4,0,5,1>
+ 3326939559U, // <6,6,4,1>: Cost 4 vmrghw <6,4,1,5>, <6,1,7,1>
+ 2253271546U, // <6,6,4,2>: Cost 3 vmrghw <6,4,2,5>, <6,2,7,3>
+ 3383127346U, // <6,6,4,3>: Cost 4 vmrglw <4,5,6,4>, <4,5,6,3>
+ 2309385523U, // <6,6,4,4>: Cost 3 vmrglw <4,5,6,4>, <4,5,6,4>
+ 1650920758U, // <6,6,4,5>: Cost 2 vsldoi8 <6,6,6,6>, RHS
+ 2724662653U, // <6,6,4,6>: Cost 3 vsldoi8 <6,6,6,6>, <4,6,5,6>
+ 2309385526U, // <6,6,4,7>: Cost 3 vmrglw <4,5,6,4>, RHS
+ 1650921001U, // <6,6,4,u>: Cost 2 vsldoi8 <6,6,6,6>, RHS
+ 3725312102U, // <6,6,5,0>: Cost 4 vsldoi4 <5,6,6,5>, LHS
+ 3373180393U, // <6,6,5,1>: Cost 4 vmrglw <2,u,6,5>, <2,0,6,1>
+ 3791769368U, // <6,6,5,2>: Cost 4 vsldoi8 <5,5,6,6>, <5,2,6,3>
+ 3373181286U, // <6,6,5,3>: Cost 4 vmrglw <2,u,6,5>, <3,2,6,3>
+ 3725315382U, // <6,6,5,4>: Cost 4 vsldoi4 <5,6,6,5>, RHS
+ 2299439221U, // <6,6,5,5>: Cost 3 vmrglw <2,u,6,5>, <2,u,6,5>
+ 2724663394U, // <6,6,5,6>: Cost 3 vsldoi8 <6,6,6,6>, <5,6,7,0>
+ 2299440438U, // <6,6,5,7>: Cost 3 vmrglw <2,u,6,5>, RHS
+ 2299440439U, // <6,6,5,u>: Cost 3 vmrglw <2,u,6,5>, RHS
+ 1583808614U, // <6,6,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+ 2322010445U, // <6,6,6,1>: Cost 3 vmrglw <6,6,6,6>, <6,0,6,1>
+ 2254574074U, // <6,6,6,2>: Cost 3 vmrghw <6,6,2,2>, <6,2,7,3>
+ 2322010609U, // <6,6,6,3>: Cost 3 vmrglw <6,6,6,6>, <6,2,6,3>
+ 1583811894U, // <6,6,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+ 2322010773U, // <6,6,6,5>: Cost 3 vmrglw <6,6,6,6>, <6,4,6,5>
+ 363253046U, // <6,6,6,6>: Cost 1 vspltisw2 RHS
+ 1248267574U, // <6,6,6,7>: Cost 2 vmrglw <6,6,6,6>, RHS
+ 363253046U, // <6,6,6,u>: Cost 1 vspltisw2 RHS
+ 2309410095U, // <6,6,7,0>: Cost 3 vmrglw RHS, <4,5,6,0>
+ 2309408233U, // <6,6,7,1>: Cost 3 vmrglw RHS, <2,0,6,1>
+ 2311402373U, // <6,6,7,2>: Cost 3 vmrglw RHS, <6,7,6,2>
+ 2309409126U, // <6,6,7,3>: Cost 3 vmrglw RHS, <3,2,6,3>
+ 2309410099U, // <6,6,7,4>: Cost 3 vmrglw RHS, <4,5,6,4>
+ 2309408561U, // <6,6,7,5>: Cost 3 vmrglw RHS, <2,4,6,5>
+ 1237660472U, // <6,6,7,6>: Cost 2 vmrglw RHS, <6,6,6,6>
+ 161926454U, // <6,6,7,7>: Cost 1 vmrglw RHS, RHS
+ 161926455U, // <6,6,7,u>: Cost 1 vmrglw RHS, RHS
+ 1583808614U, // <6,6,u,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+ 1650923310U, // <6,6,u,1>: Cost 2 vsldoi8 <6,6,6,6>, LHS
+ 1178554874U, // <6,6,u,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+ 2309417318U, // <6,6,u,3>: Cost 3 vmrglw RHS, <3,2,6,3>
+ 1583811894U, // <6,6,u,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+ 1650923674U, // <6,6,u,5>: Cost 2 vsldoi8 <6,6,6,6>, RHS
+ 363253046U, // <6,6,u,6>: Cost 1 vspltisw2 RHS
+ 161934646U, // <6,6,u,7>: Cost 1 vmrglw RHS, RHS
+ 161934647U, // <6,6,u,u>: Cost 1 vmrglw RHS, RHS
+ 1638318080U, // <6,7,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0>
+ 564576358U, // <6,7,0,1>: Cost 1 vsldoi8 RHS, LHS
+ 2712060077U, // <6,7,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2>
+ 2712060156U, // <6,7,0,3>: Cost 3 vsldoi8 RHS, <0,3,1,0>
+ 1638318418U, // <6,7,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5>
+ 1577865314U, // <6,7,0,5>: Cost 2 vsldoi4 <5,6,7,0>, <5,6,7,0>
+ 2712060406U, // <6,7,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7>
+ 2651608058U, // <6,7,0,7>: Cost 3 vsldoi4 <5,6,7,0>, <7,0,1,2>
+ 564576925U, // <6,7,0,u>: Cost 1 vsldoi8 RHS, LHS
+ 2712060643U, // <6,7,1,0>: Cost 3 vsldoi8 RHS, <1,0,1,1>
+ 1638318900U, // <6,7,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1>
+ 1638318998U, // <6,7,1,2>: Cost 2 vsldoi8 RHS, <1,2,3,0>
+ 3766559753U, // <6,7,1,3>: Cost 4 vsldoi8 <1,3,6,7>, <1,3,6,7>
+ 2712060971U, // <6,7,1,4>: Cost 3 vsldoi8 RHS, <1,4,1,5>
+ 2712061039U, // <6,7,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1>
+ 2712061135U, // <6,7,1,6>: Cost 3 vsldoi8 RHS, <1,6,1,7>
+ 3373148612U, // <6,7,1,7>: Cost 4 vmrglw <2,u,6,1>, <3,3,7,7>
+ 1638319484U, // <6,7,1,u>: Cost 2 vsldoi8 RHS, <1,u,3,0>
+ 2712061373U, // <6,7,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2>
+ 2712061471U, // <6,7,2,1>: Cost 3 vsldoi8 RHS, <2,1,3,1>
+ 1638319720U, // <6,7,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2>
+ 1638319782U, // <6,7,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1>
+ 2712061709U, // <6,7,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5>
+ 2712061800U, // <6,7,2,5>: Cost 3 vsldoi8 RHS, <2,5,3,6>
+ 1638320058U, // <6,7,2,6>: Cost 2 vsldoi8 RHS, <2,6,3,7>
+ 2252297836U, // <6,7,2,7>: Cost 3 vmrghw <6,2,7,3>, <7,7,7,7>
+ 1638320187U, // <6,7,2,u>: Cost 2 vsldoi8 RHS, <2,u,0,1>
+ 1638320278U, // <6,7,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2>
+ 2712062182U, // <6,7,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1>
+ 2712062256U, // <6,7,3,2>: Cost 3 vsldoi8 RHS, <3,2,0,3>
+ 1638320540U, // <6,7,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3>
+ 1638320642U, // <6,7,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6>
+ 2712062546U, // <6,7,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5>
+ 2712062584U, // <6,7,3,6>: Cost 3 vsldoi8 RHS, <3,6,0,7>
+ 2712062659U, // <6,7,3,7>: Cost 3 vsldoi8 RHS, <3,7,0,1>
+ 1638320926U, // <6,7,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2>
+ 1638321042U, // <6,7,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1>
+ 2712062922U, // <6,7,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3>
+ 2712063029U, // <6,7,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2>
+ 2712063108U, // <6,7,4,3>: Cost 3 vsldoi8 RHS, <4,3,5,0>
+ 1638321360U, // <6,7,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4>
+ 564579638U, // <6,7,4,5>: Cost 1 vsldoi8 RHS, RHS
+ 2712063357U, // <6,7,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,6>
+ 2712063439U, // <6,7,4,7>: Cost 3 vsldoi8 RHS, <4,7,5,7>
+ 564579881U, // <6,7,4,u>: Cost 1 vsldoi8 RHS, RHS
+ 2712063560U, // <6,7,5,0>: Cost 3 vsldoi8 RHS, <5,0,1,2>
+ 2714054287U, // <6,7,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1>
+ 2712063742U, // <6,7,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4>
+ 3373181295U, // <6,7,5,3>: Cost 4 vmrglw <2,u,6,5>, <3,2,7,3>
+ 2712063924U, // <6,7,5,4>: Cost 3 vsldoi8 RHS, <5,4,5,6>
+ 1638322180U, // <6,7,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5>
+ 1638322274U, // <6,7,5,6>: Cost 2 vsldoi8 RHS, <5,6,7,0>
+ 3373181380U, // <6,7,5,7>: Cost 4 vmrglw <2,u,6,5>, <3,3,7,7>
+ 1640313092U, // <6,7,5,u>: Cost 2 vsldoi8 RHS, <5,u,7,0>
+ 2712064289U, // <6,7,6,0>: Cost 3 vsldoi8 RHS, <6,0,1,2>
+ 2712064423U, // <6,7,6,1>: Cost 3 vsldoi8 RHS, <6,1,7,1>
+ 1638322682U, // <6,7,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+ 2712064562U, // <6,7,6,3>: Cost 3 vsldoi8 RHS, <6,3,4,5>
+ 2712064653U, // <6,7,6,4>: Cost 3 vsldoi8 RHS, <6,4,5,6>
+ 2712064747U, // <6,7,6,5>: Cost 3 vsldoi8 RHS, <6,5,7,1>
+ 1638323000U, // <6,7,6,6>: Cost 2 vsldoi8 RHS, <6,6,6,6>
+ 1638323022U, // <6,7,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1>
+ 1638323168U, // <6,7,6,u>: Cost 2 vsldoi8 RHS, <6,u,7,3>
+ 1237659746U, // <6,7,7,0>: Cost 2 vmrglw RHS, <5,6,7,0>
+ 2309411158U, // <6,7,7,1>: Cost 3 vmrglw RHS, <6,0,7,1>
+ 2639718330U, // <6,7,7,2>: Cost 3 vsldoi4 <3,6,7,7>, <2,6,3,7>
+ 1235669498U, // <6,7,7,3>: Cost 2 vmrglw RHS, <6,2,7,3>
+ 1237659750U, // <6,7,7,4>: Cost 2 vmrglw RHS, <5,6,7,4>
+ 2309411243U, // <6,7,7,5>: Cost 3 vmrglw RHS, <6,1,7,5>
+ 1583895362U, // <6,7,7,6>: Cost 2 vsldoi4 <6,6,7,7>, <6,6,7,7>
+ 1235669826U, // <6,7,7,7>: Cost 2 vmrglw RHS, <6,6,7,7>
+ 1235669503U, // <6,7,7,u>: Cost 2 vmrglw RHS, <6,2,7,u>
+ 1638323923U, // <6,7,u,0>: Cost 2 vsldoi8 RHS, <u,0,1,2>
+ 564582190U, // <6,7,u,1>: Cost 1 vsldoi8 RHS, LHS
+ 1638324101U, // <6,7,u,2>: Cost 2 vsldoi8 RHS, <u,2,3,0>
+ 1638324156U, // <6,7,u,3>: Cost 2 vsldoi8 RHS, <u,3,0,1>
+ 1638324287U, // <6,7,u,4>: Cost 2 vsldoi8 RHS, <u,4,5,6>
+ 564582554U, // <6,7,u,5>: Cost 1 vsldoi8 RHS, RHS
+ 1638324432U, // <6,7,u,6>: Cost 2 vsldoi8 RHS, <u,6,3,7>
+ 1235678018U, // <6,7,u,7>: Cost 2 vmrglw RHS, <6,6,7,7>
+ 564582757U, // <6,7,u,u>: Cost 1 vsldoi8 RHS, LHS
+ 1638326272U, // <6,u,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0>
+ 564584550U, // <6,u,0,1>: Cost 1 vsldoi8 RHS, LHS
+ 2712068269U, // <6,u,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2>
+ 2309349532U, // <6,u,0,3>: Cost 3 vmrglw <4,5,6,0>, LHS
+ 1638326610U, // <6,u,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5>
+ 1577939051U, // <6,u,0,5>: Cost 2 vsldoi4 <5,6,u,0>, <5,6,u,0>
+ 2712068598U, // <6,u,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7>
+ 2309352776U, // <6,u,0,7>: Cost 3 vmrglw <4,5,6,0>, RHS
+ 564585117U, // <6,u,0,u>: Cost 1 vsldoi8 RHS, LHS
+ 2712068835U, // <6,u,1,0>: Cost 3 vsldoi8 RHS, <1,0,1,1>
+ 1638327092U, // <6,u,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1>
+ 1698715438U, // <6,u,1,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2299404444U, // <6,u,1,3>: Cost 3 vmrglw <2,u,6,1>, LHS
+ 2712069163U, // <6,u,1,4>: Cost 3 vsldoi8 RHS, <1,4,1,5>
+ 2712069231U, // <6,u,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1>
+ 2712069327U, // <6,u,1,6>: Cost 3 vsldoi8 RHS, <1,6,1,7>
+ 2299407688U, // <6,u,1,7>: Cost 3 vmrglw <2,u,6,1>, RHS
+ 1698715492U, // <6,u,1,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2712069565U, // <6,u,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2>
+ 1178556206U, // <6,u,2,1>: Cost 2 vmrghw <6,2,7,3>, LHS
+ 1638327912U, // <6,u,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2>
+ 1638327974U, // <6,u,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1>
+ 2712069901U, // <6,u,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5>
+ 1178556570U, // <6,u,2,5>: Cost 2 vmrghw <6,2,7,3>, RHS
+ 1638328250U, // <6,u,2,6>: Cost 2 vsldoi8 RHS, <2,6,3,7>
+ 2252298496U, // <6,u,2,7>: Cost 3 vmrghw <6,2,7,3>, <u,7,0,1>
+ 1638328379U, // <6,u,2,u>: Cost 2 vsldoi8 RHS, <2,u,0,1>
+ 1638328470U, // <6,u,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2>
+ 2712070374U, // <6,u,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1>
+ 2704107883U, // <6,u,3,2>: Cost 3 vsldoi8 <3,2,6,u>, <3,2,6,u>
+ 1638328732U, // <6,u,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3>
+ 1638328834U, // <6,u,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6>
+ 2712070738U, // <6,u,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5>
+ 2712070776U, // <6,u,3,6>: Cost 3 vsldoi8 RHS, <3,6,0,7>
+ 2301414728U, // <6,u,3,7>: Cost 3 vmrglw <3,2,6,3>, RHS
+ 1638329118U, // <6,u,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2>
+ 1638329234U, // <6,u,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1>
+ 2712071114U, // <6,u,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3>
+ 2712071221U, // <6,u,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2>
+ 2309382300U, // <6,u,4,3>: Cost 3 vmrglw <4,5,6,4>, LHS
+ 1638329552U, // <6,u,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4>
+ 564587831U, // <6,u,4,5>: Cost 1 vsldoi8 RHS, RHS
+ 2712071545U, // <6,u,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,2>
+ 2309385544U, // <6,u,4,7>: Cost 3 vmrglw <4,5,6,4>, RHS
+ 564588073U, // <6,u,4,u>: Cost 1 vsldoi8 RHS, RHS
+ 2712071752U, // <6,u,5,0>: Cost 3 vsldoi8 RHS, <5,0,1,2>
+ 2714062479U, // <6,u,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1>
+ 2712071934U, // <6,u,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4>
+ 2299437212U, // <6,u,5,3>: Cost 3 vmrglw <2,u,6,5>, LHS
+ 2712072116U, // <6,u,5,4>: Cost 3 vsldoi8 RHS, <5,4,5,6>
+ 1638330372U, // <6,u,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5>
+ 1698715802U, // <6,u,5,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 2299440456U, // <6,u,5,7>: Cost 3 vmrglw <2,u,6,5>, RHS
+ 1698715820U, // <6,u,5,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 1583808614U, // <6,u,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+ 1181161262U, // <6,u,6,1>: Cost 2 vmrghw <6,6,6,6>, LHS
+ 1638330874U, // <6,u,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+ 1248264348U, // <6,u,6,3>: Cost 2 vmrglw <6,6,6,6>, LHS
+ 1583811894U, // <6,u,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+ 1181161626U, // <6,u,6,5>: Cost 2 vmrghw <6,6,6,6>, RHS
+ 363253046U, // <6,u,6,6>: Cost 1 vspltisw2 RHS
+ 1638331214U, // <6,u,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1>
+ 363253046U, // <6,u,6,u>: Cost 1 vspltisw2 RHS
+ 1560076390U, // <6,u,7,0>: Cost 2 vsldoi4 <2,6,u,7>, LHS
+ 1235664969U, // <6,u,7,1>: Cost 2 vmrglw RHS, <0,0,u,1>
+ 1560078311U, // <6,u,7,2>: Cost 2 vsldoi4 <2,6,u,7>, <2,6,u,7>
+ 161923228U, // <6,u,7,3>: Cost 1 vmrglw RHS, LHS
+ 1560079670U, // <6,u,7,4>: Cost 2 vsldoi4 <2,6,u,7>, RHS
+ 1235665297U, // <6,u,7,5>: Cost 2 vmrglw RHS, <0,4,u,5>
+ 1235667485U, // <6,u,7,6>: Cost 2 vmrglw RHS, <3,4,u,6>
+ 161926472U, // <6,u,7,7>: Cost 1 vmrglw RHS, RHS
+ 161923233U, // <6,u,7,u>: Cost 1 vmrglw RHS, LHS
+ 1560084582U, // <6,u,u,0>: Cost 2 vsldoi4 <2,6,u,u>, LHS
+ 564590382U, // <6,u,u,1>: Cost 1 vsldoi8 RHS, LHS
+ 1560086504U, // <6,u,u,2>: Cost 2 vsldoi4 <2,6,u,u>, <2,6,u,u>
+ 161931420U, // <6,u,u,3>: Cost 1 vmrglw RHS, LHS
+ 1560087862U, // <6,u,u,4>: Cost 2 vsldoi4 <2,6,u,u>, RHS
+ 564590746U, // <6,u,u,5>: Cost 1 vsldoi8 RHS, RHS
+ 363253046U, // <6,u,u,6>: Cost 1 vspltisw2 RHS
+ 161934664U, // <6,u,u,7>: Cost 1 vmrglw RHS, RHS
+ 161931425U, // <6,u,u,u>: Cost 1 vmrglw RHS, LHS
+ 1705426944U, // <7,0,0,0>: Cost 2 vsldoi12 RHS, <0,0,0,0>
+ 1705426954U, // <7,0,0,1>: Cost 2 vsldoi12 RHS, <0,0,1,1>
+ 3713550266U, // <7,0,0,2>: Cost 4 vsldoi4 <3,7,0,0>, <2,6,3,7>
+ 2316063892U, // <7,0,0,3>: Cost 3 vmrglw <5,6,7,0>, <7,2,0,3>
+ 2779168805U, // <7,0,0,4>: Cost 3 vsldoi12 RHS, <0,0,4,1>
+ 2663698530U, // <7,0,0,5>: Cost 3 vsldoi4 <7,7,0,0>, <5,6,7,0>
+ 2657727309U, // <7,0,0,6>: Cost 3 vsldoi4 <6,7,0,0>, <6,7,0,0>
+ 2316064220U, // <7,0,0,7>: Cost 3 vmrglw <5,6,7,0>, <7,6,0,7>
+ 1705427017U, // <7,0,0,u>: Cost 2 vsldoi12 RHS, <0,0,u,1>
+ 1583988838U, // <7,0,1,0>: Cost 2 vsldoi4 <6,7,0,1>, LHS
+ 2779168859U, // <7,0,1,1>: Cost 3 vsldoi12 RHS, <0,1,1,1>
+ 631685222U, // <7,0,1,2>: Cost 1 vsldoi12 RHS, LHS
+ 2639817411U, // <7,0,1,3>: Cost 3 vsldoi4 <3,7,0,1>, <3,7,0,1>
+ 1583992118U, // <7,0,1,4>: Cost 2 vsldoi4 <6,7,0,1>, RHS
+ 2657734660U, // <7,0,1,5>: Cost 3 vsldoi4 <6,7,0,1>, <5,5,5,5>
+ 1583993678U, // <7,0,1,6>: Cost 2 vsldoi4 <6,7,0,1>, <6,7,0,1>
+ 2657735672U, // <7,0,1,7>: Cost 3 vsldoi4 <6,7,0,1>, <7,0,1,0>
+ 631685276U, // <7,0,1,u>: Cost 1 vsldoi12 RHS, LHS
+ 2779168933U, // <7,0,2,0>: Cost 3 vsldoi12 RHS, <0,2,0,3>
+ 2767667377U, // <7,0,2,1>: Cost 3 vsldoi12 <2,6,3,7>, <0,2,1,6>
+ 2718713448U, // <7,0,2,2>: Cost 3 vsldoi8 <5,6,7,0>, <2,2,2,2>
+ 2718713510U, // <7,0,2,3>: Cost 3 vsldoi8 <5,6,7,0>, <2,3,0,1>
+ 3841409228U, // <7,0,2,4>: Cost 4 vsldoi12 <2,6,3,7>, <0,2,4,6>
+ 3852910802U, // <7,0,2,5>: Cost 4 vsldoi12 RHS, <0,2,5,3>
+ 2718713786U, // <7,0,2,6>: Cost 3 vsldoi8 <5,6,7,0>, <2,6,3,7>
+ 3847160036U, // <7,0,2,7>: Cost 4 vsldoi12 <3,6,0,7>, <0,2,7,3>
+ 2767667440U, // <7,0,2,u>: Cost 3 vsldoi12 <2,6,3,7>, <0,2,u,6>
+ 2718714006U, // <7,0,3,0>: Cost 3 vsldoi8 <5,6,7,0>, <3,0,1,2>
+ 2779169020U, // <7,0,3,1>: Cost 3 vsldoi12 RHS, <0,3,1,0>
+ 3852910853U, // <7,0,3,2>: Cost 4 vsldoi12 RHS, <0,3,2,0>
+ 2718714268U, // <7,0,3,3>: Cost 3 vsldoi8 <5,6,7,0>, <3,3,3,3>
+ 2718714370U, // <7,0,3,4>: Cost 3 vsldoi8 <5,6,7,0>, <3,4,5,6>
+ 2718714461U, // <7,0,3,5>: Cost 3 vsldoi8 <5,6,7,0>, <3,5,6,7>
+ 2706770608U, // <7,0,3,6>: Cost 3 vsldoi8 <3,6,7,0>, <3,6,7,0>
+ 3847160114U, // <7,0,3,7>: Cost 4 vsldoi12 <3,6,0,7>, <0,3,7,0>
+ 2779169083U, // <7,0,3,u>: Cost 3 vsldoi12 RHS, <0,3,u,0>
+ 2718714770U, // <7,0,4,0>: Cost 3 vsldoi8 <5,6,7,0>, <4,0,5,1>
+ 1705427282U, // <7,0,4,1>: Cost 2 vsldoi12 RHS, <0,4,1,5>
+ 3713583034U, // <7,0,4,2>: Cost 4 vsldoi4 <3,7,0,4>, <2,6,3,7>
+ 3713583814U, // <7,0,4,3>: Cost 4 vsldoi4 <3,7,0,4>, <3,7,0,4>
+ 2779169133U, // <7,0,4,4>: Cost 3 vsldoi12 RHS, <0,4,4,5>
+ 1644973366U, // <7,0,4,5>: Cost 2 vsldoi8 <5,6,7,0>, RHS
+ 2657760081U, // <7,0,4,6>: Cost 3 vsldoi4 <6,7,0,4>, <6,7,0,4>
+ 2259468868U, // <7,0,4,7>: Cost 3 vmrghw <7,4,5,6>, <0,7,1,4>
+ 1705427345U, // <7,0,4,u>: Cost 2 vsldoi12 RHS, <0,4,u,5>
+ 2718715508U, // <7,0,5,0>: Cost 3 vsldoi8 <5,6,7,0>, <5,0,6,1>
+ 2260123750U, // <7,0,5,1>: Cost 3 vmrghw <7,5,5,5>, LHS
+ 3792457451U, // <7,0,5,2>: Cost 4 vsldoi8 <5,6,7,0>, <5,2,1,3>
+ 3852911024U, // <7,0,5,3>: Cost 4 vsldoi12 RHS, <0,5,3,0>
+ 2718715836U, // <7,0,5,4>: Cost 3 vsldoi8 <5,6,7,0>, <5,4,6,5>
+ 2718715908U, // <7,0,5,5>: Cost 3 vsldoi8 <5,6,7,0>, <5,5,5,5>
+ 1644974178U, // <7,0,5,6>: Cost 2 vsldoi8 <5,6,7,0>, <5,6,7,0>
+ 3792457853U, // <7,0,5,7>: Cost 4 vsldoi8 <5,6,7,0>, <5,7,1,0>
+ 1646301444U, // <7,0,5,u>: Cost 2 vsldoi8 <5,u,7,0>, <5,u,7,0>
+ 2720706901U, // <7,0,6,0>: Cost 3 vsldoi8 <6,0,7,0>, <6,0,7,0>
+ 2779169270U, // <7,0,6,1>: Cost 3 vsldoi12 RHS, <0,6,1,7>
+ 2718716410U, // <7,0,6,2>: Cost 3 vsldoi8 <5,6,7,0>, <6,2,7,3>
+ 2722697800U, // <7,0,6,3>: Cost 3 vsldoi8 <6,3,7,0>, <6,3,7,0>
+ 3852911121U, // <7,0,6,4>: Cost 4 vsldoi12 RHS, <0,6,4,7>
+ 3852911130U, // <7,0,6,5>: Cost 4 vsldoi12 RHS, <0,6,5,7>
+ 2718716728U, // <7,0,6,6>: Cost 3 vsldoi8 <5,6,7,0>, <6,6,6,6>
+ 2718716750U, // <7,0,6,7>: Cost 3 vsldoi8 <5,6,7,0>, <6,7,0,1>
+ 2779169333U, // <7,0,6,u>: Cost 3 vsldoi12 RHS, <0,6,u,7>
+ 2718716922U, // <7,0,7,0>: Cost 3 vsldoi8 <5,6,7,0>, <7,0,1,2>
+ 1187872870U, // <7,0,7,1>: Cost 2 vmrghw <7,7,7,7>, LHS
+ 2718717076U, // <7,0,7,2>: Cost 3 vsldoi8 <5,6,7,0>, <7,2,0,3>
+ 3847160408U, // <7,0,7,3>: Cost 4 vsldoi12 <3,6,0,7>, <0,7,3,6>
+ 2718717286U, // <7,0,7,4>: Cost 3 vsldoi8 <5,6,7,0>, <7,4,5,6>
+ 2718717377U, // <7,0,7,5>: Cost 3 vsldoi8 <5,6,7,0>, <7,5,6,7>
+ 2718717404U, // <7,0,7,6>: Cost 3 vsldoi8 <5,6,7,0>, <7,6,0,7>
+ 2718717478U, // <7,0,7,7>: Cost 3 vsldoi8 <5,6,7,0>, <7,7,0,0>
+ 1187873437U, // <7,0,7,u>: Cost 2 vmrghw <7,7,7,7>, LHS
+ 1584046182U, // <7,0,u,0>: Cost 2 vsldoi4 <6,7,0,u>, LHS
+ 1705427602U, // <7,0,u,1>: Cost 2 vsldoi12 RHS, <0,u,1,1>
+ 631685789U, // <7,0,u,2>: Cost 1 vsldoi12 RHS, LHS
+ 2639874762U, // <7,0,u,3>: Cost 3 vsldoi4 <3,7,0,u>, <3,7,0,u>
+ 1584049462U, // <7,0,u,4>: Cost 2 vsldoi4 <6,7,0,u>, RHS
+ 1644976282U, // <7,0,u,5>: Cost 2 vsldoi8 <5,6,7,0>, RHS
+ 1584051029U, // <7,0,u,6>: Cost 2 vsldoi4 <6,7,0,u>, <6,7,0,u>
+ 2718718208U, // <7,0,u,7>: Cost 3 vsldoi8 <5,6,7,0>, <u,7,0,1>
+ 631685843U, // <7,0,u,u>: Cost 1 vsldoi12 RHS, LHS
+ 2721374218U, // <7,1,0,0>: Cost 3 vsldoi8 <6,1,7,1>, <0,0,1,1>
+ 2779169507U, // <7,1,0,1>: Cost 3 vsldoi12 RHS, <1,0,1,1>
+ 2779169516U, // <7,1,0,2>: Cost 3 vsldoi12 RHS, <1,0,2,1>
+ 3852911348U, // <7,1,0,3>: Cost 4 vsldoi12 RHS, <1,0,3,0>
+ 2669743414U, // <7,1,0,4>: Cost 3 vsldoi4 <u,7,1,0>, RHS
+ 2316058962U, // <7,1,0,5>: Cost 3 vmrglw <5,6,7,0>, <0,4,1,5>
+ 2316059044U, // <7,1,0,6>: Cost 3 vmrglw <5,6,7,0>, <0,5,1,6>
+ 2669745146U, // <7,1,0,7>: Cost 3 vsldoi4 <u,7,1,0>, <7,0,1,2>
+ 2779169570U, // <7,1,0,u>: Cost 3 vsldoi12 RHS, <1,0,u,1>
+ 2779169579U, // <7,1,1,0>: Cost 3 vsldoi12 RHS, <1,1,0,1>
+ 1705427764U, // <7,1,1,1>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+ 2779169598U, // <7,1,1,2>: Cost 3 vsldoi12 RHS, <1,1,2,2>
+ 3713632972U, // <7,1,1,3>: Cost 4 vsldoi4 <3,7,1,1>, <3,7,1,1>
+ 2779169619U, // <7,1,1,4>: Cost 3 vsldoi12 RHS, <1,1,4,5>
+ 2779169628U, // <7,1,1,5>: Cost 3 vsldoi12 RHS, <1,1,5,5>
+ 2657809239U, // <7,1,1,6>: Cost 3 vsldoi4 <6,7,1,1>, <6,7,1,1>
+ 3835290474U, // <7,1,1,7>: Cost 4 vsldoi12 <1,6,1,7>, <1,1,7,1>
+ 1705427764U, // <7,1,1,u>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+ 2779169660U, // <7,1,2,0>: Cost 3 vsldoi12 RHS, <1,2,0,1>
+ 2779169671U, // <7,1,2,1>: Cost 3 vsldoi12 RHS, <1,2,1,3>
+ 2779169680U, // <7,1,2,2>: Cost 3 vsldoi12 RHS, <1,2,2,3>
+ 1705427862U, // <7,1,2,3>: Cost 2 vsldoi12 RHS, <1,2,3,0>
+ 2779169700U, // <7,1,2,4>: Cost 3 vsldoi12 RHS, <1,2,4,5>
+ 2779169707U, // <7,1,2,5>: Cost 3 vsldoi12 RHS, <1,2,5,3>
+ 2657817432U, // <7,1,2,6>: Cost 3 vsldoi4 <6,7,1,2>, <6,7,1,2>
+ 2803057594U, // <7,1,2,7>: Cost 3 vsldoi12 RHS, <1,2,7,0>
+ 1705427907U, // <7,1,2,u>: Cost 2 vsldoi12 RHS, <1,2,u,0>
+ 3776538827U, // <7,1,3,0>: Cost 4 vsldoi8 <3,0,7,1>, <3,0,7,1>
+ 2319400970U, // <7,1,3,1>: Cost 3 vmrglw <6,2,7,3>, <0,0,1,1>
+ 2316085398U, // <7,1,3,2>: Cost 3 vmrglw <5,6,7,3>, <3,0,1,2>
+ 3852911591U, // <7,1,3,3>: Cost 4 vsldoi12 RHS, <1,3,3,0>
+ 3852911600U, // <7,1,3,4>: Cost 4 vsldoi12 RHS, <1,3,4,0>
+ 2319401298U, // <7,1,3,5>: Cost 3 vmrglw <6,2,7,3>, <0,4,1,5>
+ 3833668617U, // <7,1,3,6>: Cost 4 vsldoi12 <1,3,6,7>, <1,3,6,7>
+ 3367265487U, // <7,1,3,7>: Cost 4 vmrglw <1,u,7,3>, <1,6,1,7>
+ 2319400977U, // <7,1,3,u>: Cost 3 vmrglw <6,2,7,3>, <0,0,1,u>
+ 2724031378U, // <7,1,4,0>: Cost 3 vsldoi8 <6,5,7,1>, <4,0,5,1>
+ 2779169835U, // <7,1,4,1>: Cost 3 vsldoi12 RHS, <1,4,1,5>
+ 2779169844U, // <7,1,4,2>: Cost 3 vsldoi12 RHS, <1,4,2,5>
+ 3852911672U, // <7,1,4,3>: Cost 4 vsldoi12 RHS, <1,4,3,0>
+ 2669776182U, // <7,1,4,4>: Cost 3 vsldoi4 <u,7,1,4>, RHS
+ 2779169872U, // <7,1,4,5>: Cost 3 vsldoi12 RHS, <1,4,5,6>
+ 3835290712U, // <7,1,4,6>: Cost 4 vsldoi12 <1,6,1,7>, <1,4,6,5>
+ 2669778278U, // <7,1,4,7>: Cost 3 vsldoi4 <u,7,1,4>, <7,4,5,6>
+ 2779169898U, // <7,1,4,u>: Cost 3 vsldoi12 RHS, <1,4,u,5>
+ 2779169903U, // <7,1,5,0>: Cost 3 vsldoi12 RHS, <1,5,0,1>
+ 3835585661U, // <7,1,5,1>: Cost 4 vsldoi12 <1,6,5,7>, <1,5,1,6>
+ 3841410182U, // <7,1,5,2>: Cost 4 vsldoi12 <2,6,3,7>, <1,5,2,6>
+ 3852911753U, // <7,1,5,3>: Cost 4 vsldoi12 RHS, <1,5,3,0>
+ 2779169943U, // <7,1,5,4>: Cost 3 vsldoi12 RHS, <1,5,4,5>
+ 2318754130U, // <7,1,5,5>: Cost 3 vmrglw <6,1,7,5>, <0,4,1,5>
+ 2718724195U, // <7,1,5,6>: Cost 3 vsldoi8 <5,6,7,1>, <5,6,7,1>
+ 3859178670U, // <7,1,5,7>: Cost 4 vsldoi12 <5,6,1,7>, <1,5,7,1>
+ 2779169975U, // <7,1,5,u>: Cost 3 vsldoi12 RHS, <1,5,u,1>
+ 2720715094U, // <7,1,6,0>: Cost 3 vsldoi8 <6,0,7,1>, <6,0,7,1>
+ 2761549007U, // <7,1,6,1>: Cost 3 vsldoi12 <1,6,1,7>, <1,6,1,7>
+ 2779170008U, // <7,1,6,2>: Cost 3 vsldoi12 RHS, <1,6,2,7>
+ 3835438305U, // <7,1,6,3>: Cost 4 vsldoi12 <1,6,3,7>, <1,6,3,7>
+ 3835512042U, // <7,1,6,4>: Cost 4 vsldoi12 <1,6,4,7>, <1,6,4,7>
+ 2761843955U, // <7,1,6,5>: Cost 3 vsldoi12 <1,6,5,7>, <1,6,5,7>
+ 3835659516U, // <7,1,6,6>: Cost 4 vsldoi12 <1,6,6,7>, <1,6,6,7>
+ 2803057918U, // <7,1,6,7>: Cost 3 vsldoi12 RHS, <1,6,7,0>
+ 2762065166U, // <7,1,6,u>: Cost 3 vsldoi12 <1,6,u,7>, <1,6,u,7>
+ 2669797478U, // <7,1,7,0>: Cost 3 vsldoi4 <u,7,1,7>, LHS
+ 2322087946U, // <7,1,7,1>: Cost 3 vmrglw <6,6,7,7>, <0,0,1,1>
+ 2317448186U, // <7,1,7,2>: Cost 3 vmrglw <5,u,7,7>, <7,0,1,2>
+ 3395829934U, // <7,1,7,3>: Cost 4 vmrglw <6,6,7,7>, <0,2,1,3>
+ 2669800758U, // <7,1,7,4>: Cost 3 vsldoi4 <u,7,1,7>, RHS
+ 2322088274U, // <7,1,7,5>: Cost 3 vmrglw <6,6,7,7>, <0,4,1,5>
+ 3375923377U, // <7,1,7,6>: Cost 4 vmrglw <3,3,7,7>, <0,2,1,6>
+ 2731996780U, // <7,1,7,7>: Cost 3 vsldoi8 <7,u,7,1>, <7,7,7,7>
+ 2322087953U, // <7,1,7,u>: Cost 3 vmrglw <6,6,7,7>, <0,0,1,u>
+ 2779170146U, // <7,1,u,0>: Cost 3 vsldoi12 RHS, <1,u,0,1>
+ 1705427764U, // <7,1,u,1>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+ 2779170164U, // <7,1,u,2>: Cost 3 vsldoi12 RHS, <1,u,2,1>
+ 1705428348U, // <7,1,u,3>: Cost 2 vsldoi12 RHS, <1,u,3,0>
+ 2779170186U, // <7,1,u,4>: Cost 3 vsldoi12 RHS, <1,u,4,5>
+ 2763171221U, // <7,1,u,5>: Cost 3 vsldoi12 <1,u,5,7>, <1,u,5,7>
+ 2657866590U, // <7,1,u,6>: Cost 3 vsldoi4 <6,7,1,u>, <6,7,1,u>
+ 2803058080U, // <7,1,u,7>: Cost 3 vsldoi12 RHS, <1,u,7,0>
+ 1705428393U, // <7,1,u,u>: Cost 2 vsldoi12 RHS, <1,u,u,0>
+ 3713695846U, // <7,2,0,0>: Cost 4 vsldoi4 <3,7,2,0>, LHS
+ 2779170237U, // <7,2,0,1>: Cost 3 vsldoi12 RHS, <2,0,1,2>
+ 2779170245U, // <7,2,0,2>: Cost 3 vsldoi12 RHS, <2,0,2,1>
+ 1242316902U, // <7,2,0,3>: Cost 2 vmrglw <5,6,7,0>, LHS
+ 3713699126U, // <7,2,0,4>: Cost 4 vsldoi4 <3,7,2,0>, RHS
+ 3852912096U, // <7,2,0,5>: Cost 4 vsldoi12 RHS, <2,0,5,1>
+ 2767668713U, // <7,2,0,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,0,6,1>
+ 2256488426U, // <7,2,0,7>: Cost 3 vmrghw <7,0,1,2>, <2,7,0,1>
+ 1242316907U, // <7,2,0,u>: Cost 2 vmrglw <5,6,7,0>, LHS
+ 3852912132U, // <7,2,1,0>: Cost 4 vsldoi12 RHS, <2,1,0,1>
+ 3852912141U, // <7,2,1,1>: Cost 4 vsldoi12 RHS, <2,1,1,1>
+ 3852912149U, // <7,2,1,2>: Cost 4 vsldoi12 RHS, <2,1,2,0>
+ 2779170335U, // <7,2,1,3>: Cost 3 vsldoi12 RHS, <2,1,3,1>
+ 3852912172U, // <7,2,1,4>: Cost 4 vsldoi12 RHS, <2,1,4,5>
+ 3840747062U, // <7,2,1,5>: Cost 5 vsldoi12 <2,5,3,7>, <2,1,5,6>
+ 3841410617U, // <7,2,1,6>: Cost 4 vsldoi12 <2,6,3,7>, <2,1,6,0>
+ 3795125538U, // <7,2,1,7>: Cost 4 vsldoi8 <6,1,7,2>, <1,7,2,0>
+ 2779170380U, // <7,2,1,u>: Cost 3 vsldoi12 RHS, <2,1,u,1>
+ 2779170389U, // <7,2,2,0>: Cost 3 vsldoi12 RHS, <2,2,0,1>
+ 3852912222U, // <7,2,2,1>: Cost 4 vsldoi12 RHS, <2,2,1,1>
+ 1705428584U, // <7,2,2,2>: Cost 2 vsldoi12 RHS, <2,2,2,2>
+ 1705428594U, // <7,2,2,3>: Cost 2 vsldoi12 RHS, <2,2,3,3>
+ 2779170429U, // <7,2,2,4>: Cost 3 vsldoi12 RHS, <2,2,4,5>
+ 3852912259U, // <7,2,2,5>: Cost 4 vsldoi12 RHS, <2,2,5,2>
+ 2767668880U, // <7,2,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,2,6,6>
+ 3841336981U, // <7,2,2,7>: Cost 4 vsldoi12 <2,6,2,7>, <2,2,7,2>
+ 1705428639U, // <7,2,2,u>: Cost 2 vsldoi12 RHS, <2,2,u,3>
+ 1705428646U, // <7,2,3,0>: Cost 2 vsldoi12 RHS, <2,3,0,1>
+ 2779170479U, // <7,2,3,1>: Cost 3 vsldoi12 RHS, <2,3,1,1>
+ 2767668925U, // <7,2,3,2>: Cost 3 vsldoi12 <2,6,3,7>, <2,3,2,6>
+ 1245659238U, // <7,2,3,3>: Cost 2 vmrglw <6,2,7,3>, LHS
+ 1705428686U, // <7,2,3,4>: Cost 2 vsldoi12 RHS, <2,3,4,5>
+ 2779170519U, // <7,2,3,5>: Cost 3 vsldoi12 RHS, <2,3,5,5>
+ 2657899362U, // <7,2,3,6>: Cost 3 vsldoi4 <6,7,2,3>, <6,7,2,3>
+ 2319406574U, // <7,2,3,7>: Cost 3 vmrglw <6,2,7,3>, <7,6,2,7>
+ 1705428718U, // <7,2,3,u>: Cost 2 vsldoi12 RHS, <2,3,u,1>
+ 3713728614U, // <7,2,4,0>: Cost 4 vsldoi4 <3,7,2,4>, LHS
+ 3852912388U, // <7,2,4,1>: Cost 4 vsldoi12 RHS, <2,4,1,5>
+ 2779170573U, // <7,2,4,2>: Cost 3 vsldoi12 RHS, <2,4,2,5>
+ 1242349670U, // <7,2,4,3>: Cost 2 vmrglw <5,6,7,4>, LHS
+ 3713731894U, // <7,2,4,4>: Cost 4 vsldoi4 <3,7,2,4>, RHS
+ 2779170601U, // <7,2,4,5>: Cost 3 vsldoi12 RHS, <2,4,5,6>
+ 2767669041U, // <7,2,4,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,4,6,5>
+ 3389834456U, // <7,2,4,7>: Cost 4 vmrglw <5,6,7,4>, <1,6,2,7>
+ 1242349675U, // <7,2,4,u>: Cost 2 vmrglw <5,6,7,4>, LHS
+ 3852912456U, // <7,2,5,0>: Cost 4 vsldoi12 RHS, <2,5,0,1>
+ 3852912466U, // <7,2,5,1>: Cost 4 vsldoi12 RHS, <2,5,1,2>
+ 3852912475U, // <7,2,5,2>: Cost 4 vsldoi12 RHS, <2,5,2,2>
+ 2779170664U, // <7,2,5,3>: Cost 3 vsldoi12 RHS, <2,5,3,6>
+ 3852912496U, // <7,2,5,4>: Cost 4 vsldoi12 RHS, <2,5,4,5>
+ 3792474116U, // <7,2,5,5>: Cost 4 vsldoi8 <5,6,7,2>, <5,5,5,5>
+ 2718732388U, // <7,2,5,6>: Cost 3 vsldoi8 <5,6,7,2>, <5,6,7,2>
+ 3841337228U, // <7,2,5,7>: Cost 5 vsldoi12 <2,6,2,7>, <2,5,7,6>
+ 2779170709U, // <7,2,5,u>: Cost 3 vsldoi12 RHS, <2,5,u,6>
+ 2640003174U, // <7,2,6,0>: Cost 3 vsldoi4 <3,7,2,6>, LHS
+ 2721386920U, // <7,2,6,1>: Cost 3 vsldoi8 <6,1,7,2>, <6,1,7,2>
+ 2767595441U, // <7,2,6,2>: Cost 3 vsldoi12 <2,6,2,7>, <2,6,2,7>
+ 1693927354U, // <7,2,6,3>: Cost 2 vsldoi12 <2,6,3,7>, <2,6,3,7>
+ 2640006454U, // <7,2,6,4>: Cost 3 vsldoi4 <3,7,2,6>, RHS
+ 3841558476U, // <7,2,6,5>: Cost 4 vsldoi12 <2,6,5,7>, <2,6,5,7>
+ 2657923941U, // <7,2,6,6>: Cost 3 vsldoi4 <6,7,2,6>, <6,7,2,6>
+ 3841337310U, // <7,2,6,7>: Cost 4 vsldoi12 <2,6,2,7>, <2,6,7,7>
+ 1694296039U, // <7,2,6,u>: Cost 2 vsldoi12 <2,6,u,7>, <2,6,u,7>
+ 2803058666U, // <7,2,7,0>: Cost 3 vsldoi12 RHS, <2,7,0,1>
+ 3852912632U, // <7,2,7,1>: Cost 4 vsldoi12 RHS, <2,7,1,6>
+ 2322089576U, // <7,2,7,2>: Cost 3 vmrglw <6,6,7,7>, <2,2,2,2>
+ 1248346214U, // <7,2,7,3>: Cost 2 vmrglw <6,6,7,7>, LHS
+ 3841337362U, // <7,2,7,4>: Cost 4 vsldoi12 <2,6,2,7>, <2,7,4,5>
+ 3395830836U, // <7,2,7,5>: Cost 4 vmrglw <6,6,7,7>, <1,4,2,5>
+ 2261616570U, // <7,2,7,6>: Cost 3 vmrghw <7,7,7,7>, <2,6,3,7>
+ 3371943857U, // <7,2,7,7>: Cost 4 vmrglw <2,6,7,7>, <2,6,2,7>
+ 1248346219U, // <7,2,7,u>: Cost 2 vmrglw <6,6,7,7>, LHS
+ 1705429051U, // <7,2,u,0>: Cost 2 vsldoi12 RHS, <2,u,0,1>
+ 2779170884U, // <7,2,u,1>: Cost 3 vsldoi12 RHS, <2,u,1,1>
+ 1705428584U, // <7,2,u,2>: Cost 2 vsldoi12 RHS, <2,2,2,2>
+ 1695254620U, // <7,2,u,3>: Cost 2 vsldoi12 <2,u,3,7>, <2,u,3,7>
+ 1705429091U, // <7,2,u,4>: Cost 2 vsldoi12 RHS, <2,u,4,5>
+ 2779170924U, // <7,2,u,5>: Cost 3 vsldoi12 RHS, <2,u,5,5>
+ 2767669361U, // <7,2,u,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,u,6,1>
+ 2803058809U, // <7,2,u,7>: Cost 3 vsldoi12 RHS, <2,u,7,0>
+ 1695623305U, // <7,2,u,u>: Cost 2 vsldoi12 <2,u,u,7>, <2,u,u,7>
+ 2779170955U, // <7,3,0,0>: Cost 3 vsldoi12 RHS, <3,0,0,0>
+ 1705429142U, // <7,3,0,1>: Cost 2 vsldoi12 RHS, <3,0,1,2>
+ 2634057732U, // <7,3,0,2>: Cost 3 vsldoi4 <2,7,3,0>, <2,7,3,0>
+ 2779170983U, // <7,3,0,3>: Cost 3 vsldoi12 RHS, <3,0,3,1>
+ 2779170992U, // <7,3,0,4>: Cost 3 vsldoi12 RHS, <3,0,4,1>
+ 3852912829U, // <7,3,0,5>: Cost 4 vsldoi12 RHS, <3,0,5,5>
+ 2657948520U, // <7,3,0,6>: Cost 3 vsldoi4 <6,7,3,0>, <6,7,3,0>
+ 2316060602U, // <7,3,0,7>: Cost 3 vmrglw <5,6,7,0>, <2,6,3,7>
+ 1705429205U, // <7,3,0,u>: Cost 2 vsldoi12 RHS, <3,0,u,2>
+ 3852912860U, // <7,3,1,0>: Cost 4 vsldoi12 RHS, <3,1,0,0>
+ 2779171046U, // <7,3,1,1>: Cost 3 vsldoi12 RHS, <3,1,1,1>
+ 2779171057U, // <7,3,1,2>: Cost 3 vsldoi12 RHS, <3,1,2,3>
+ 3852912887U, // <7,3,1,3>: Cost 4 vsldoi12 RHS, <3,1,3,0>
+ 3852912896U, // <7,3,1,4>: Cost 4 vsldoi12 RHS, <3,1,4,0>
+ 3852912905U, // <7,3,1,5>: Cost 4 vsldoi12 RHS, <3,1,5,0>
+ 3835291923U, // <7,3,1,6>: Cost 4 vsldoi12 <1,6,1,7>, <3,1,6,1>
+ 3841411356U, // <7,3,1,7>: Cost 4 vsldoi12 <2,6,3,7>, <3,1,7,1>
+ 2779171111U, // <7,3,1,u>: Cost 3 vsldoi12 RHS, <3,1,u,3>
+ 2779171120U, // <7,3,2,0>: Cost 3 vsldoi12 RHS, <3,2,0,3>
+ 3852912952U, // <7,3,2,1>: Cost 4 vsldoi12 RHS, <3,2,1,2>
+ 2779171137U, // <7,3,2,2>: Cost 3 vsldoi12 RHS, <3,2,2,2>
+ 2779171144U, // <7,3,2,3>: Cost 3 vsldoi12 RHS, <3,2,3,0>
+ 2779171156U, // <7,3,2,4>: Cost 3 vsldoi12 RHS, <3,2,4,3>
+ 3852912989U, // <7,3,2,5>: Cost 4 vsldoi12 RHS, <3,2,5,3>
+ 2767669606U, // <7,3,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <3,2,6,3>
+ 2767669615U, // <7,3,2,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,2,7,3>
+ 2779171189U, // <7,3,2,u>: Cost 3 vsldoi12 RHS, <3,2,u,0>
+ 2779171198U, // <7,3,3,0>: Cost 3 vsldoi12 RHS, <3,3,0,0>
+ 3852913032U, // <7,3,3,1>: Cost 4 vsldoi12 RHS, <3,3,1,1>
+ 2704140655U, // <7,3,3,2>: Cost 3 vsldoi8 <3,2,7,3>, <3,2,7,3>
+ 1705429404U, // <7,3,3,3>: Cost 2 vsldoi12 RHS, <3,3,3,3>
+ 2779171238U, // <7,3,3,4>: Cost 3 vsldoi12 RHS, <3,3,4,4>
+ 3852913070U, // <7,3,3,5>: Cost 4 vsldoi12 RHS, <3,3,5,3>
+ 2657973099U, // <7,3,3,6>: Cost 3 vsldoi4 <6,7,3,3>, <6,7,3,3>
+ 2767669700U, // <7,3,3,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,3,7,7>
+ 1705429404U, // <7,3,3,u>: Cost 2 vsldoi12 RHS, <3,3,3,3>
+ 2779171280U, // <7,3,4,0>: Cost 3 vsldoi12 RHS, <3,4,0,1>
+ 2779171290U, // <7,3,4,1>: Cost 3 vsldoi12 RHS, <3,4,1,2>
+ 2634090504U, // <7,3,4,2>: Cost 3 vsldoi4 <2,7,3,4>, <2,7,3,4>
+ 2779171311U, // <7,3,4,3>: Cost 3 vsldoi12 RHS, <3,4,3,5>
+ 2779171319U, // <7,3,4,4>: Cost 3 vsldoi12 RHS, <3,4,4,4>
+ 1705429506U, // <7,3,4,5>: Cost 2 vsldoi12 RHS, <3,4,5,6>
+ 2722057593U, // <7,3,4,6>: Cost 3 vsldoi8 <6,2,7,3>, <4,6,5,2>
+ 2316093370U, // <7,3,4,7>: Cost 3 vmrglw <5,6,7,4>, <2,6,3,7>
+ 1705429533U, // <7,3,4,u>: Cost 2 vsldoi12 RHS, <3,4,u,6>
+ 3852913185U, // <7,3,5,0>: Cost 4 vsldoi12 RHS, <3,5,0,1>
+ 3795799695U, // <7,3,5,1>: Cost 4 vsldoi8 <6,2,7,3>, <5,1,0,1>
+ 3852913203U, // <7,3,5,2>: Cost 4 vsldoi12 RHS, <3,5,2,1>
+ 3852913214U, // <7,3,5,3>: Cost 4 vsldoi12 RHS, <3,5,3,3>
+ 3852913225U, // <7,3,5,4>: Cost 4 vsldoi12 RHS, <3,5,4,5>
+ 2779171410U, // <7,3,5,5>: Cost 3 vsldoi12 RHS, <3,5,5,5>
+ 2718740581U, // <7,3,5,6>: Cost 3 vsldoi8 <5,6,7,3>, <5,6,7,3>
+ 3841411685U, // <7,3,5,7>: Cost 4 vsldoi12 <2,6,3,7>, <3,5,7,6>
+ 2720067847U, // <7,3,5,u>: Cost 3 vsldoi8 <5,u,7,3>, <5,u,7,3>
+ 2773420664U, // <7,3,6,0>: Cost 3 vsldoi12 <3,6,0,7>, <3,6,0,7>
+ 3847236225U, // <7,3,6,1>: Cost 4 vsldoi12 <3,6,1,7>, <3,6,1,7>
+ 1648316922U, // <7,3,6,2>: Cost 2 vsldoi8 <6,2,7,3>, <6,2,7,3>
+ 2773641875U, // <7,3,6,3>: Cost 3 vsldoi12 <3,6,3,7>, <3,6,3,7>
+ 2773715612U, // <7,3,6,4>: Cost 3 vsldoi12 <3,6,4,7>, <3,6,4,7>
+ 3847531173U, // <7,3,6,5>: Cost 4 vsldoi12 <3,6,5,7>, <3,6,5,7>
+ 2722059024U, // <7,3,6,6>: Cost 3 vsldoi8 <6,2,7,3>, <6,6,2,2>
+ 2767669943U, // <7,3,6,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,6,7,7>
+ 1652298720U, // <7,3,6,u>: Cost 2 vsldoi8 <6,u,7,3>, <6,u,7,3>
+ 2767669955U, // <7,3,7,0>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,0,1>
+ 3841411788U, // <7,3,7,1>: Cost 4 vsldoi12 <2,6,3,7>, <3,7,1,1>
+ 2767669978U, // <7,3,7,2>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,2,6>
+ 2722059546U, // <7,3,7,3>: Cost 3 vsldoi8 <6,2,7,3>, <7,3,6,2>
+ 2767669995U, // <7,3,7,4>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,4,5>
+ 3852913396U, // <7,3,7,5>: Cost 4 vsldoi12 RHS, <3,7,5,5>
+ 2722059758U, // <7,3,7,6>: Cost 3 vsldoi8 <6,2,7,3>, <7,6,2,7>
+ 2302183354U, // <7,3,7,7>: Cost 3 vmrglw <3,3,7,7>, <2,6,3,7>
+ 2767670027U, // <7,3,7,u>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,u,1>
+ 2774747930U, // <7,3,u,0>: Cost 3 vsldoi12 <3,u,0,7>, <3,u,0,7>
+ 1705429790U, // <7,3,u,1>: Cost 2 vsldoi12 RHS, <3,u,1,2>
+ 1660262316U, // <7,3,u,2>: Cost 2 vsldoi8 <u,2,7,3>, <u,2,7,3>
+ 1705429404U, // <7,3,u,3>: Cost 2 vsldoi12 RHS, <3,3,3,3>
+ 2775042878U, // <7,3,u,4>: Cost 3 vsldoi12 <3,u,4,7>, <3,u,4,7>
+ 1705429830U, // <7,3,u,5>: Cost 2 vsldoi12 RHS, <3,u,5,6>
+ 2779171660U, // <7,3,u,6>: Cost 3 vsldoi12 RHS, <3,u,6,3>
+ 2767670101U, // <7,3,u,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,u,7,3>
+ 1705429853U, // <7,3,u,u>: Cost 2 vsldoi12 RHS, <3,u,u,2>
+ 2718744576U, // <7,4,0,0>: Cost 3 vsldoi8 <5,6,7,4>, <0,0,0,0>
+ 1645002854U, // <7,4,0,1>: Cost 2 vsldoi8 <5,6,7,4>, LHS
+ 3852913527U, // <7,4,0,2>: Cost 4 vsldoi12 RHS, <4,0,2,1>
+ 3852913536U, // <7,4,0,3>: Cost 4 vsldoi12 RHS, <4,0,3,1>
+ 2316061904U, // <7,4,0,4>: Cost 3 vmrglw <5,6,7,0>, <4,4,4,4>
+ 1705429906U, // <7,4,0,5>: Cost 2 vsldoi12 RHS, <4,0,5,1>
+ 2658022257U, // <7,4,0,6>: Cost 3 vsldoi4 <6,7,4,0>, <6,7,4,0>
+ 2256489928U, // <7,4,0,7>: Cost 3 vmrghw <7,0,1,2>, <4,7,5,0>
+ 1707420589U, // <7,4,0,u>: Cost 2 vsldoi12 RHS, <4,0,u,1>
+ 3852913590U, // <7,4,1,0>: Cost 4 vsldoi12 RHS, <4,1,0,1>
+ 2718745396U, // <7,4,1,1>: Cost 3 vsldoi8 <5,6,7,4>, <1,1,1,1>
+ 2779171786U, // <7,4,1,2>: Cost 3 vsldoi12 RHS, <4,1,2,3>
+ 3852913616U, // <7,4,1,3>: Cost 4 vsldoi12 RHS, <4,1,3,0>
+ 3852913627U, // <7,4,1,4>: Cost 4 vsldoi12 RHS, <4,1,4,2>
+ 2779171810U, // <7,4,1,5>: Cost 3 vsldoi12 RHS, <4,1,5,0>
+ 3792487631U, // <7,4,1,6>: Cost 4 vsldoi8 <5,6,7,4>, <1,6,1,7>
+ 3394456220U, // <7,4,1,7>: Cost 4 vmrglw <6,4,7,1>, <3,6,4,7>
+ 2779171837U, // <7,4,1,u>: Cost 3 vsldoi12 RHS, <4,1,u,0>
+ 3852913673U, // <7,4,2,0>: Cost 4 vsldoi12 RHS, <4,2,0,3>
+ 3852913682U, // <7,4,2,1>: Cost 4 vsldoi12 RHS, <4,2,1,3>
+ 2718746216U, // <7,4,2,2>: Cost 3 vsldoi8 <5,6,7,4>, <2,2,2,2>
+ 2718746278U, // <7,4,2,3>: Cost 3 vsldoi8 <5,6,7,4>, <2,3,0,1>
+ 2779171885U, // <7,4,2,4>: Cost 3 vsldoi12 RHS, <4,2,4,3>
+ 2779171893U, // <7,4,2,5>: Cost 3 vsldoi12 RHS, <4,2,5,2>
+ 2718746554U, // <7,4,2,6>: Cost 3 vsldoi8 <5,6,7,4>, <2,6,3,7>
+ 3847457864U, // <7,4,2,7>: Cost 4 vsldoi12 <3,6,4,7>, <4,2,7,3>
+ 2779171921U, // <7,4,2,u>: Cost 3 vsldoi12 RHS, <4,2,u,3>
+ 2718746774U, // <7,4,3,0>: Cost 3 vsldoi8 <5,6,7,4>, <3,0,1,2>
+ 3852913762U, // <7,4,3,1>: Cost 4 vsldoi12 RHS, <4,3,1,2>
+ 3852913772U, // <7,4,3,2>: Cost 4 vsldoi12 RHS, <4,3,2,3>
+ 2718747036U, // <7,4,3,3>: Cost 3 vsldoi8 <5,6,7,4>, <3,3,3,3>
+ 2718747138U, // <7,4,3,4>: Cost 3 vsldoi8 <5,6,7,4>, <3,4,5,6>
+ 2779171972U, // <7,4,3,5>: Cost 3 vsldoi12 RHS, <4,3,5,0>
+ 2706803380U, // <7,4,3,6>: Cost 3 vsldoi8 <3,6,7,4>, <3,6,7,4>
+ 3847457946U, // <7,4,3,7>: Cost 4 vsldoi12 <3,6,4,7>, <4,3,7,4>
+ 2781162655U, // <7,4,3,u>: Cost 3 vsldoi12 RHS, <4,3,u,0>
+ 2718747538U, // <7,4,4,0>: Cost 3 vsldoi8 <5,6,7,4>, <4,0,5,1>
+ 3852913842U, // <7,4,4,1>: Cost 4 vsldoi12 RHS, <4,4,1,1>
+ 3852913852U, // <7,4,4,2>: Cost 4 vsldoi12 RHS, <4,4,2,2>
+ 2316096696U, // <7,4,4,3>: Cost 3 vmrglw <5,6,7,4>, <7,2,4,3>
+ 1705430224U, // <7,4,4,4>: Cost 2 vsldoi12 RHS, <4,4,4,4>
+ 1705430234U, // <7,4,4,5>: Cost 2 vsldoi12 RHS, <4,4,5,5>
+ 2658055029U, // <7,4,4,6>: Cost 3 vsldoi4 <6,7,4,4>, <6,7,4,4>
+ 2316097024U, // <7,4,4,7>: Cost 3 vmrglw <5,6,7,4>, <7,6,4,7>
+ 1707420917U, // <7,4,4,u>: Cost 2 vsldoi12 RHS, <4,4,u,5>
+ 1584316518U, // <7,4,5,0>: Cost 2 vsldoi4 <6,7,4,5>, LHS
+ 2658059060U, // <7,4,5,1>: Cost 3 vsldoi4 <6,7,4,5>, <1,1,1,1>
+ 2640144314U, // <7,4,5,2>: Cost 3 vsldoi4 <3,7,4,5>, <2,6,3,7>
+ 2640145131U, // <7,4,5,3>: Cost 3 vsldoi4 <3,7,4,5>, <3,7,4,5>
+ 1584319798U, // <7,4,5,4>: Cost 2 vsldoi4 <6,7,4,5>, RHS
+ 2779172134U, // <7,4,5,5>: Cost 3 vsldoi12 RHS, <4,5,5,0>
+ 631688502U, // <7,4,5,6>: Cost 1 vsldoi12 RHS, RHS
+ 2658063354U, // <7,4,5,7>: Cost 3 vsldoi4 <6,7,4,5>, <7,0,1,2>
+ 631688520U, // <7,4,5,u>: Cost 1 vsldoi12 RHS, RHS
+ 3852914001U, // <7,4,6,0>: Cost 4 vsldoi12 RHS, <4,6,0,7>
+ 3852914010U, // <7,4,6,1>: Cost 4 vsldoi12 RHS, <4,6,1,7>
+ 2718749178U, // <7,4,6,2>: Cost 3 vsldoi8 <5,6,7,4>, <6,2,7,3>
+ 2722730572U, // <7,4,6,3>: Cost 3 vsldoi8 <6,3,7,4>, <6,3,7,4>
+ 2723394205U, // <7,4,6,4>: Cost 3 vsldoi8 <6,4,7,4>, <6,4,7,4>
+ 2779172221U, // <7,4,6,5>: Cost 3 vsldoi12 RHS, <4,6,5,6>
+ 2718749496U, // <7,4,6,6>: Cost 3 vsldoi8 <5,6,7,4>, <6,6,6,6>
+ 2718749518U, // <7,4,6,7>: Cost 3 vsldoi8 <5,6,7,4>, <6,7,0,1>
+ 2779172249U, // <7,4,6,u>: Cost 3 vsldoi12 RHS, <4,6,u,7>
+ 2718749690U, // <7,4,7,0>: Cost 3 vsldoi8 <5,6,7,4>, <7,0,1,2>
+ 3847458214U, // <7,4,7,1>: Cost 4 vsldoi12 <3,6,4,7>, <4,7,1,2>
+ 2718749880U, // <7,4,7,2>: Cost 3 vsldoi8 <5,6,7,4>, <7,2,4,3>
+ 3847458236U, // <7,4,7,3>: Cost 4 vsldoi12 <3,6,4,7>, <4,7,3,6>
+ 2718750004U, // <7,4,7,4>: Cost 3 vsldoi8 <5,6,7,4>, <7,4,0,1>
+ 1187876150U, // <7,4,7,5>: Cost 2 vmrghw <7,7,7,7>, RHS
+ 2718750208U, // <7,4,7,6>: Cost 3 vsldoi8 <5,6,7,4>, <7,6,4,7>
+ 2718750286U, // <7,4,7,7>: Cost 3 vsldoi8 <5,6,7,4>, <7,7,4,4>
+ 1187876393U, // <7,4,7,u>: Cost 2 vmrghw <7,7,7,7>, RHS
+ 1584341094U, // <7,4,u,0>: Cost 2 vsldoi4 <6,7,4,u>, LHS
+ 1645008686U, // <7,4,u,1>: Cost 2 vsldoi8 <5,6,7,4>, LHS
+ 2640168890U, // <7,4,u,2>: Cost 3 vsldoi4 <3,7,4,u>, <2,6,3,7>
+ 2640169710U, // <7,4,u,3>: Cost 3 vsldoi4 <3,7,4,u>, <3,7,4,u>
+ 1584344374U, // <7,4,u,4>: Cost 2 vsldoi4 <6,7,4,u>, RHS
+ 1705430554U, // <7,4,u,5>: Cost 2 vsldoi12 RHS, <4,u,5,1>
+ 631688745U, // <7,4,u,6>: Cost 1 vsldoi12 RHS, RHS
+ 2718750976U, // <7,4,u,7>: Cost 3 vsldoi8 <5,6,7,4>, <u,7,0,1>
+ 631688763U, // <7,4,u,u>: Cost 1 vsldoi12 RHS, RHS
+ 2646147174U, // <7,5,0,0>: Cost 3 vsldoi4 <4,7,5,0>, LHS
+ 2779172424U, // <7,5,0,1>: Cost 3 vsldoi12 RHS, <5,0,1,2>
+ 3852914258U, // <7,5,0,2>: Cost 4 vsldoi12 RHS, <5,0,2,3>
+ 3852914268U, // <7,5,0,3>: Cost 4 vsldoi12 RHS, <5,0,3,4>
+ 2779172450U, // <7,5,0,4>: Cost 3 vsldoi12 RHS, <5,0,4,1>
+ 2316061914U, // <7,5,0,5>: Cost 3 vmrglw <5,6,7,0>, <4,4,5,5>
+ 2316061186U, // <7,5,0,6>: Cost 3 vmrglw <5,6,7,0>, <3,4,5,6>
+ 2646152186U, // <7,5,0,7>: Cost 3 vsldoi4 <4,7,5,0>, <7,0,1,2>
+ 2779172486U, // <7,5,0,u>: Cost 3 vsldoi12 RHS, <5,0,u,1>
+ 2781163151U, // <7,5,1,0>: Cost 3 vsldoi12 RHS, <5,1,0,1>
+ 2321378194U, // <7,5,1,1>: Cost 3 vmrglw <6,5,7,1>, <4,0,5,1>
+ 3852914339U, // <7,5,1,2>: Cost 4 vsldoi12 RHS, <5,1,2,3>
+ 3852914350U, // <7,5,1,3>: Cost 4 vsldoi12 RHS, <5,1,3,5>
+ 2781163191U, // <7,5,1,4>: Cost 3 vsldoi12 RHS, <5,1,4,5>
+ 3852914363U, // <7,5,1,5>: Cost 4 vsldoi12 RHS, <5,1,5,0>
+ 3835588297U, // <7,5,1,6>: Cost 4 vsldoi12 <1,6,5,7>, <5,1,6,5>
+ 3835588306U, // <7,5,1,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,1,7,5>
+ 2781163223U, // <7,5,1,u>: Cost 3 vsldoi12 RHS, <5,1,u,1>
+ 3852914400U, // <7,5,2,0>: Cost 4 vsldoi12 RHS, <5,2,0,1>
+ 2781163243U, // <7,5,2,1>: Cost 3 vsldoi12 RHS, <5,2,1,3>
+ 3852914419U, // <7,5,2,2>: Cost 4 vsldoi12 RHS, <5,2,2,2>
+ 2779172606U, // <7,5,2,3>: Cost 3 vsldoi12 RHS, <5,2,3,4>
+ 3780552497U, // <7,5,2,4>: Cost 4 vsldoi8 <3,6,7,5>, <2,4,6,5>
+ 2781163279U, // <7,5,2,5>: Cost 3 vsldoi12 RHS, <5,2,5,3>
+ 2779172632U, // <7,5,2,6>: Cost 3 vsldoi12 RHS, <5,2,6,3>
+ 3835588385U, // <7,5,2,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,2,7,3>
+ 2779172650U, // <7,5,2,u>: Cost 3 vsldoi12 RHS, <5,2,u,3>
+ 3852914481U, // <7,5,3,0>: Cost 4 vsldoi12 RHS, <5,3,0,1>
+ 2319403922U, // <7,5,3,1>: Cost 3 vmrglw <6,2,7,3>, <4,0,5,1>
+ 2319404409U, // <7,5,3,2>: Cost 3 vmrglw <6,2,7,3>, <4,6,5,2>
+ 3852914510U, // <7,5,3,3>: Cost 4 vsldoi12 RHS, <5,3,3,3>
+ 3779226131U, // <7,5,3,4>: Cost 4 vsldoi8 <3,4,7,5>, <3,4,7,5>
+ 2319404250U, // <7,5,3,5>: Cost 3 vmrglw <6,2,7,3>, <4,4,5,5>
+ 2319403522U, // <7,5,3,6>: Cost 3 vmrglw <6,2,7,3>, <3,4,5,6>
+ 3852914547U, // <7,5,3,7>: Cost 4 vsldoi12 RHS, <5,3,7,4>
+ 2319403524U, // <7,5,3,u>: Cost 3 vmrglw <6,2,7,3>, <3,4,5,u>
+ 2646179942U, // <7,5,4,0>: Cost 3 vsldoi4 <4,7,5,4>, LHS
+ 2316094354U, // <7,5,4,1>: Cost 3 vmrglw <5,6,7,4>, <4,0,5,1>
+ 3852914582U, // <7,5,4,2>: Cost 4 vsldoi12 RHS, <5,4,2,3>
+ 3852914592U, // <7,5,4,3>: Cost 4 vsldoi12 RHS, <5,4,3,4>
+ 2646183372U, // <7,5,4,4>: Cost 3 vsldoi4 <4,7,5,4>, <4,7,5,4>
+ 2779172788U, // <7,5,4,5>: Cost 3 vsldoi12 RHS, <5,4,5,6>
+ 2316093954U, // <7,5,4,6>: Cost 3 vmrglw <5,6,7,4>, <3,4,5,6>
+ 2646185318U, // <7,5,4,7>: Cost 3 vsldoi4 <4,7,5,4>, <7,4,5,6>
+ 2779172815U, // <7,5,4,u>: Cost 3 vsldoi12 RHS, <5,4,u,6>
+ 2781163475U, // <7,5,5,0>: Cost 3 vsldoi12 RHS, <5,5,0,1>
+ 2781163484U, // <7,5,5,1>: Cost 3 vsldoi12 RHS, <5,5,1,1>
+ 3852914662U, // <7,5,5,2>: Cost 4 vsldoi12 RHS, <5,5,2,2>
+ 3852914672U, // <7,5,5,3>: Cost 4 vsldoi12 RHS, <5,5,3,3>
+ 2781163515U, // <7,5,5,4>: Cost 3 vsldoi12 RHS, <5,5,4,5>
+ 1705431044U, // <7,5,5,5>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+ 2779172878U, // <7,5,5,6>: Cost 3 vsldoi12 RHS, <5,5,6,6>
+ 3835588632U, // <7,5,5,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,5,7,7>
+ 1705431044U, // <7,5,5,u>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+ 2779172900U, // <7,5,6,0>: Cost 3 vsldoi12 RHS, <5,6,0,1>
+ 2781163571U, // <7,5,6,1>: Cost 3 vsldoi12 RHS, <5,6,1,7>
+ 3852914743U, // <7,5,6,2>: Cost 4 vsldoi12 RHS, <5,6,2,2>
+ 2779172930U, // <7,5,6,3>: Cost 3 vsldoi12 RHS, <5,6,3,4>
+ 2779172940U, // <7,5,6,4>: Cost 3 vsldoi12 RHS, <5,6,4,5>
+ 2781163607U, // <7,5,6,5>: Cost 3 vsldoi12 RHS, <5,6,5,7>
+ 2779172960U, // <7,5,6,6>: Cost 3 vsldoi12 RHS, <5,6,6,7>
+ 1705431138U, // <7,5,6,7>: Cost 2 vsldoi12 RHS, <5,6,7,0>
+ 1705578603U, // <7,5,6,u>: Cost 2 vsldoi12 RHS, <5,6,u,0>
+ 2646204518U, // <7,5,7,0>: Cost 3 vsldoi4 <4,7,5,7>, LHS
+ 2322090898U, // <7,5,7,1>: Cost 3 vmrglw <6,6,7,7>, <4,0,5,1>
+ 3719947880U, // <7,5,7,2>: Cost 4 vsldoi4 <4,7,5,7>, <2,2,2,2>
+ 3719948438U, // <7,5,7,3>: Cost 4 vsldoi4 <4,7,5,7>, <3,0,1,2>
+ 2646207951U, // <7,5,7,4>: Cost 3 vsldoi4 <4,7,5,7>, <4,7,5,7>
+ 2322091226U, // <7,5,7,5>: Cost 3 vmrglw <6,6,7,7>, <4,4,5,5>
+ 2322090498U, // <7,5,7,6>: Cost 3 vmrglw <6,6,7,7>, <3,4,5,6>
+ 2646210156U, // <7,5,7,7>: Cost 3 vsldoi4 <4,7,5,7>, <7,7,7,7>
+ 2646210350U, // <7,5,7,u>: Cost 3 vsldoi4 <4,7,5,7>, LHS
+ 2779173062U, // <7,5,u,0>: Cost 3 vsldoi12 RHS, <5,u,0,1>
+ 2779173072U, // <7,5,u,1>: Cost 3 vsldoi12 RHS, <5,u,1,2>
+ 2319404409U, // <7,5,u,2>: Cost 3 vmrglw <6,2,7,3>, <4,6,5,2>
+ 2779173092U, // <7,5,u,3>: Cost 3 vsldoi12 RHS, <5,u,3,4>
+ 2779173101U, // <7,5,u,4>: Cost 3 vsldoi12 RHS, <5,u,4,4>
+ 1705431044U, // <7,5,u,5>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+ 2779173118U, // <7,5,u,6>: Cost 3 vsldoi12 RHS, <5,u,6,3>
+ 1705578756U, // <7,5,u,7>: Cost 2 vsldoi12 RHS, <5,u,7,0>
+ 1707421965U, // <7,5,u,u>: Cost 2 vsldoi12 RHS, <5,u,u,0>
+ 3852914966U, // <7,6,0,0>: Cost 4 vsldoi12 RHS, <6,0,0,0>
+ 2779173153U, // <7,6,0,1>: Cost 3 vsldoi12 RHS, <6,0,1,2>
+ 2256491002U, // <7,6,0,2>: Cost 3 vmrghw <7,0,1,2>, <6,2,7,3>
+ 3852914994U, // <7,6,0,3>: Cost 4 vsldoi12 RHS, <6,0,3,1>
+ 3852915003U, // <7,6,0,4>: Cost 4 vsldoi12 RHS, <6,0,4,1>
+ 2316062652U, // <7,6,0,5>: Cost 3 vmrglw <5,6,7,0>, <5,4,6,5>
+ 2316063544U, // <7,6,0,6>: Cost 3 vmrglw <5,6,7,0>, <6,6,6,6>
+ 1242320182U, // <7,6,0,7>: Cost 2 vmrglw <5,6,7,0>, RHS
+ 1242320183U, // <7,6,0,u>: Cost 2 vmrglw <5,6,7,0>, RHS
+ 3852915048U, // <7,6,1,0>: Cost 4 vsldoi12 RHS, <6,1,0,1>
+ 3377866217U, // <7,6,1,1>: Cost 4 vmrglw <3,6,7,1>, <2,0,6,1>
+ 3852915068U, // <7,6,1,2>: Cost 4 vsldoi12 RHS, <6,1,2,3>
+ 3833672072U, // <7,6,1,3>: Cost 5 vsldoi12 <1,3,6,7>, <6,1,3,6>
+ 3852915088U, // <7,6,1,4>: Cost 4 vsldoi12 RHS, <6,1,4,5>
+ 3395122056U, // <7,6,1,5>: Cost 4 vmrglw <6,5,7,1>, <6,7,6,5>
+ 3389813560U, // <7,6,1,6>: Cost 4 vmrglw <5,6,7,1>, <6,6,6,6>
+ 2779173287U, // <7,6,1,7>: Cost 3 vsldoi12 RHS, <6,1,7,1>
+ 2779320752U, // <7,6,1,u>: Cost 3 vsldoi12 RHS, <6,1,u,1>
+ 2658181222U, // <7,6,2,0>: Cost 3 vsldoi4 <6,7,6,2>, LHS
+ 3852915140U, // <7,6,2,1>: Cost 4 vsldoi12 RHS, <6,2,1,3>
+ 2257973754U, // <7,6,2,2>: Cost 3 vmrghw <7,2,3,3>, <6,2,7,3>
+ 3841413589U, // <7,6,2,3>: Cost 4 vsldoi12 <2,6,3,7>, <6,2,3,2>
+ 2658184502U, // <7,6,2,4>: Cost 3 vsldoi4 <6,7,6,2>, RHS
+ 3852915176U, // <7,6,2,5>: Cost 4 vsldoi12 RHS, <6,2,5,3>
+ 2658186117U, // <7,6,2,6>: Cost 3 vsldoi4 <6,7,6,2>, <6,7,6,2>
+ 1705431546U, // <7,6,2,7>: Cost 2 vsldoi12 RHS, <6,2,7,3>
+ 1705579011U, // <7,6,2,u>: Cost 2 vsldoi12 RHS, <6,2,u,3>
+ 3714015334U, // <7,6,3,0>: Cost 4 vsldoi4 <3,7,6,3>, LHS
+ 3777243425U, // <7,6,3,1>: Cost 4 vsldoi8 <3,1,7,6>, <3,1,7,6>
+ 2319405957U, // <7,6,3,2>: Cost 3 vmrglw <6,2,7,3>, <6,7,6,2>
+ 3375229286U, // <7,6,3,3>: Cost 4 vmrglw <3,2,7,3>, <3,2,6,3>
+ 2779173426U, // <7,6,3,4>: Cost 3 vsldoi12 RHS, <6,3,4,5>
+ 3375228721U, // <7,6,3,5>: Cost 4 vmrglw <3,2,7,3>, <2,4,6,5>
+ 2319405880U, // <7,6,3,6>: Cost 3 vmrglw <6,2,7,3>, <6,6,6,6>
+ 1245662518U, // <7,6,3,7>: Cost 2 vmrglw <6,2,7,3>, RHS
+ 1245662519U, // <7,6,3,u>: Cost 2 vmrglw <6,2,7,3>, RHS
+ 3852915291U, // <7,6,4,0>: Cost 4 vsldoi12 RHS, <6,4,0,1>
+ 3389834729U, // <7,6,4,1>: Cost 4 vmrglw <5,6,7,4>, <2,0,6,1>
+ 2259472890U, // <7,6,4,2>: Cost 3 vmrghw <7,4,5,6>, <6,2,7,3>
+ 3852915321U, // <7,6,4,3>: Cost 4 vsldoi12 RHS, <6,4,3,4>
+ 3852915330U, // <7,6,4,4>: Cost 4 vsldoi12 RHS, <6,4,4,4>
+ 2779173517U, // <7,6,4,5>: Cost 3 vsldoi12 RHS, <6,4,5,6>
+ 2316096312U, // <7,6,4,6>: Cost 3 vmrglw <5,6,7,4>, <6,6,6,6>
+ 1242352950U, // <7,6,4,7>: Cost 2 vmrglw <5,6,7,4>, RHS
+ 1242352951U, // <7,6,4,u>: Cost 2 vmrglw <5,6,7,4>, RHS
+ 3852915372U, // <7,6,5,0>: Cost 4 vsldoi12 RHS, <6,5,0,1>
+ 3835294392U, // <7,6,5,1>: Cost 5 vsldoi12 <1,6,1,7>, <6,5,1,4>
+ 3852915395U, // <7,6,5,2>: Cost 4 vsldoi12 RHS, <6,5,2,6>
+ 3852915404U, // <7,6,5,3>: Cost 4 vsldoi12 RHS, <6,5,3,6>
+ 3852915412U, // <7,6,5,4>: Cost 4 vsldoi12 RHS, <6,5,4,5>
+ 3377899313U, // <7,6,5,5>: Cost 4 vmrglw <3,6,7,5>, <2,4,6,5>
+ 2718765160U, // <7,6,5,6>: Cost 3 vsldoi8 <5,6,7,6>, <5,6,7,6>
+ 2779173611U, // <7,6,5,7>: Cost 3 vsldoi12 RHS, <6,5,7,1>
+ 2779321076U, // <7,6,5,u>: Cost 3 vsldoi12 RHS, <6,5,u,1>
+ 2658213990U, // <7,6,6,0>: Cost 3 vsldoi4 <6,7,6,6>, LHS
+ 3852915462U, // <7,6,6,1>: Cost 4 vsldoi12 RHS, <6,6,1,1>
+ 2718765562U, // <7,6,6,2>: Cost 3 vsldoi8 <5,6,7,6>, <6,2,7,3>
+ 3714042622U, // <7,6,6,3>: Cost 4 vsldoi4 <3,7,6,6>, <3,7,6,6>
+ 2658217270U, // <7,6,6,4>: Cost 3 vsldoi4 <6,7,6,6>, RHS
+ 2724074224U, // <7,6,6,5>: Cost 3 vsldoi8 <6,5,7,6>, <6,5,7,6>
+ 1705431864U, // <7,6,6,6>: Cost 2 vsldoi12 RHS, <6,6,6,6>
+ 1705431874U, // <7,6,6,7>: Cost 2 vsldoi12 RHS, <6,6,7,7>
+ 1705579339U, // <7,6,6,u>: Cost 2 vsldoi12 RHS, <6,6,u,7>
+ 1705431886U, // <7,6,7,0>: Cost 2 vsldoi12 RHS, <6,7,0,1>
+ 2779173719U, // <7,6,7,1>: Cost 3 vsldoi12 RHS, <6,7,1,1>
+ 2779173729U, // <7,6,7,2>: Cost 3 vsldoi12 RHS, <6,7,2,2>
+ 2779173736U, // <7,6,7,3>: Cost 3 vsldoi12 RHS, <6,7,3,0>
+ 1705431926U, // <7,6,7,4>: Cost 2 vsldoi12 RHS, <6,7,4,5>
+ 2779173759U, // <7,6,7,5>: Cost 3 vsldoi12 RHS, <6,7,5,5>
+ 2779173765U, // <7,6,7,6>: Cost 3 vsldoi12 RHS, <6,7,6,2>
+ 1248349494U, // <7,6,7,7>: Cost 2 vmrglw <6,6,7,7>, RHS
+ 1705431958U, // <7,6,7,u>: Cost 2 vsldoi12 RHS, <6,7,u,1>
+ 1705579423U, // <7,6,u,0>: Cost 2 vsldoi12 RHS, <6,u,0,1>
+ 2779173801U, // <7,6,u,1>: Cost 3 vsldoi12 RHS, <6,u,1,2>
+ 2779321266U, // <7,6,u,2>: Cost 3 vsldoi12 RHS, <6,u,2,2>
+ 2779321273U, // <7,6,u,3>: Cost 3 vsldoi12 RHS, <6,u,3,0>
+ 1705579463U, // <7,6,u,4>: Cost 2 vsldoi12 RHS, <6,u,4,5>
+ 2779173841U, // <7,6,u,5>: Cost 3 vsldoi12 RHS, <6,u,5,6>
+ 1705431864U, // <7,6,u,6>: Cost 2 vsldoi12 RHS, <6,6,6,6>
+ 1705432032U, // <7,6,u,7>: Cost 2 vsldoi12 RHS, <6,u,7,3>
+ 1705579495U, // <7,6,u,u>: Cost 2 vsldoi12 RHS, <6,u,u,1>
+ 1242320994U, // <7,7,0,0>: Cost 2 vmrglw <5,6,7,0>, <5,6,7,0>
+ 1705432058U, // <7,7,0,1>: Cost 2 vsldoi12 RHS, <7,0,1,2>
+ 3841414146U, // <7,7,0,2>: Cost 4 vsldoi12 <2,6,3,7>, <7,0,2,1>
+ 2316063226U, // <7,7,0,3>: Cost 3 vmrglw <5,6,7,0>, <6,2,7,3>
+ 2779173908U, // <7,7,0,4>: Cost 3 vsldoi12 RHS, <7,0,4,1>
+ 2658242658U, // <7,7,0,5>: Cost 3 vsldoi4 <6,7,7,0>, <5,6,7,0>
+ 2658243468U, // <7,7,0,6>: Cost 3 vsldoi4 <6,7,7,0>, <6,7,7,0>
+ 2316063554U, // <7,7,0,7>: Cost 3 vmrglw <5,6,7,0>, <6,6,7,7>
+ 1705432121U, // <7,7,0,u>: Cost 2 vsldoi12 RHS, <7,0,u,2>
+ 3852915777U, // <7,7,1,0>: Cost 4 vsldoi12 RHS, <7,1,0,1>
+ 2779173962U, // <7,7,1,1>: Cost 3 vsldoi12 RHS, <7,1,1,1>
+ 2779173973U, // <7,7,1,2>: Cost 3 vsldoi12 RHS, <7,1,2,3>
+ 3389813242U, // <7,7,1,3>: Cost 4 vmrglw <5,6,7,1>, <6,2,7,3>
+ 3852915813U, // <7,7,1,4>: Cost 4 vsldoi12 RHS, <7,1,4,1>
+ 3852915821U, // <7,7,1,5>: Cost 4 vsldoi12 RHS, <7,1,5,0>
+ 3835294839U, // <7,7,1,6>: Cost 4 vsldoi12 <1,6,1,7>, <7,1,6,1>
+ 2329343596U, // <7,7,1,7>: Cost 3 vmrglw <7,u,7,1>, <7,7,7,7>
+ 2779174027U, // <7,7,1,u>: Cost 3 vsldoi12 RHS, <7,1,u,3>
+ 2803061908U, // <7,7,2,0>: Cost 3 vsldoi12 RHS, <7,2,0,3>
+ 3852915869U, // <7,7,2,1>: Cost 4 vsldoi12 RHS, <7,2,1,3>
+ 2779174053U, // <7,7,2,2>: Cost 3 vsldoi12 RHS, <7,2,2,2>
+ 2779174060U, // <7,7,2,3>: Cost 3 vsldoi12 RHS, <7,2,3,0>
+ 2803061944U, // <7,7,2,4>: Cost 3 vsldoi12 RHS, <7,2,4,3>
+ 3852915905U, // <7,7,2,5>: Cost 4 vsldoi12 RHS, <7,2,5,3>
+ 2767672522U, // <7,7,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <7,2,6,3>
+ 2791855315U, // <7,7,2,7>: Cost 3 vsldoi12 <6,6,7,7>, <7,2,7,3>
+ 2768999644U, // <7,7,2,u>: Cost 3 vsldoi12 <2,u,3,7>, <7,2,u,3>
+ 2779174115U, // <7,7,3,0>: Cost 3 vsldoi12 RHS, <7,3,0,1>
+ 3852915948U, // <7,7,3,1>: Cost 4 vsldoi12 RHS, <7,3,1,1>
+ 3841414394U, // <7,7,3,2>: Cost 4 vsldoi12 <2,6,3,7>, <7,3,2,6>
+ 1245663738U, // <7,7,3,3>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3>
+ 2779174155U, // <7,7,3,4>: Cost 3 vsldoi12 RHS, <7,3,4,5>
+ 3852915988U, // <7,7,3,5>: Cost 4 vsldoi12 RHS, <7,3,5,5>
+ 2706827959U, // <7,7,3,6>: Cost 3 vsldoi8 <3,6,7,7>, <3,6,7,7>
+ 2319405890U, // <7,7,3,7>: Cost 3 vmrglw <6,2,7,3>, <6,6,7,7>
+ 1245663738U, // <7,7,3,u>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3>
+ 2779174200U, // <7,7,4,0>: Cost 3 vsldoi12 RHS, <7,4,0,5>
+ 3852916030U, // <7,7,4,1>: Cost 4 vsldoi12 RHS, <7,4,1,2>
+ 3714099130U, // <7,7,4,2>: Cost 4 vsldoi4 <3,7,7,4>, <2,6,3,7>
+ 2316095994U, // <7,7,4,3>: Cost 3 vmrglw <5,6,7,4>, <6,2,7,3>
+ 1242353766U, // <7,7,4,4>: Cost 2 vmrglw <5,6,7,4>, <5,6,7,4>
+ 1705432422U, // <7,7,4,5>: Cost 2 vsldoi12 RHS, <7,4,5,6>
+ 2658276240U, // <7,7,4,6>: Cost 3 vsldoi4 <6,7,7,4>, <6,7,7,4>
+ 2316096322U, // <7,7,4,7>: Cost 3 vmrglw <5,6,7,4>, <6,6,7,7>
+ 1705432449U, // <7,7,4,u>: Cost 2 vsldoi12 RHS, <7,4,u,6>
+ 3852916101U, // <7,7,5,0>: Cost 4 vsldoi12 RHS, <7,5,0,1>
+ 3854906765U, // <7,7,5,1>: Cost 4 vsldoi12 RHS, <7,5,1,0>
+ 3852916121U, // <7,7,5,2>: Cost 4 vsldoi12 RHS, <7,5,2,3>
+ 3389846010U, // <7,7,5,3>: Cost 4 vmrglw <5,6,7,5>, <6,2,7,3>
+ 3852916141U, // <7,7,5,4>: Cost 4 vsldoi12 RHS, <7,5,4,5>
+ 2779174326U, // <7,7,5,5>: Cost 3 vsldoi12 RHS, <7,5,5,5>
+ 2779174337U, // <7,7,5,6>: Cost 3 vsldoi12 RHS, <7,5,6,7>
+ 2329376364U, // <7,7,5,7>: Cost 3 vmrglw <7,u,7,5>, <7,7,7,7>
+ 2779321811U, // <7,7,5,u>: Cost 3 vsldoi12 RHS, <7,5,u,7>
+ 2658287718U, // <7,7,6,0>: Cost 3 vsldoi4 <6,7,7,6>, LHS
+ 3852916197U, // <7,7,6,1>: Cost 4 vsldoi12 RHS, <7,6,1,7>
+ 2779174382U, // <7,7,6,2>: Cost 3 vsldoi12 RHS, <7,6,2,7>
+ 2316112378U, // <7,7,6,3>: Cost 3 vmrglw <5,6,7,6>, <6,2,7,3>
+ 2658290998U, // <7,7,6,4>: Cost 3 vsldoi4 <6,7,7,6>, RHS
+ 3852916233U, // <7,7,6,5>: Cost 4 vsldoi12 RHS, <7,6,5,7>
+ 1651004226U, // <7,7,6,6>: Cost 2 vsldoi8 <6,6,7,7>, <6,6,7,7>
+ 2779174420U, // <7,7,6,7>: Cost 3 vsldoi12 RHS, <7,6,7,0>
+ 1652331492U, // <7,7,6,u>: Cost 2 vsldoi8 <6,u,7,7>, <6,u,7,7>
+ 1590526054U, // <7,7,7,0>: Cost 2 vsldoi4 <7,7,7,7>, LHS
+ 2328728623U, // <7,7,7,1>: Cost 3 vmrglw <7,7,7,7>, <7,0,7,1>
+ 2724746451U, // <7,7,7,2>: Cost 3 vsldoi8 <6,6,7,7>, <7,2,7,3>
+ 2322092538U, // <7,7,7,3>: Cost 3 vmrglw <6,6,7,7>, <6,2,7,3>
+ 1590529334U, // <7,7,7,4>: Cost 2 vsldoi4 <7,7,7,7>, RHS
+ 2328728951U, // <7,7,7,5>: Cost 3 vmrglw <7,7,7,7>, <7,4,7,5>
+ 2724746770U, // <7,7,7,6>: Cost 3 vsldoi8 <6,6,7,7>, <7,6,6,7>
+ 430361910U, // <7,7,7,7>: Cost 1 vspltisw3 RHS
+ 430361910U, // <7,7,7,u>: Cost 1 vspltisw3 RHS
+ 1242320994U, // <7,7,u,0>: Cost 2 vmrglw <5,6,7,0>, <5,6,7,0>
+ 1705580162U, // <7,7,u,1>: Cost 2 vsldoi12 RHS, <7,u,1,2>
+ 2779321996U, // <7,7,u,2>: Cost 3 vsldoi12 RHS, <7,u,2,3>
+ 1245663738U, // <7,7,u,3>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3>
+ 1242353766U, // <7,7,u,4>: Cost 2 vmrglw <5,6,7,4>, <5,6,7,4>
+ 1705580202U, // <7,7,u,5>: Cost 2 vsldoi12 RHS, <7,u,5,6>
+ 1662949620U, // <7,7,u,6>: Cost 2 vsldoi8 <u,6,7,7>, <u,6,7,7>
+ 430361910U, // <7,7,u,7>: Cost 1 vspltisw3 RHS
+ 430361910U, // <7,7,u,u>: Cost 1 vspltisw3 RHS
+ 1705426944U, // <7,u,0,0>: Cost 2 vsldoi12 RHS, <0,0,0,0>
+ 1705432787U, // <7,u,0,1>: Cost 2 vsldoi12 RHS, <u,0,1,2>
+ 2316060885U, // <7,u,0,2>: Cost 3 vmrglw <5,6,7,0>, <3,0,u,2>
+ 1242316956U, // <7,u,0,3>: Cost 2 vmrglw <5,6,7,0>, LHS
+ 2779174637U, // <7,u,0,4>: Cost 3 vsldoi12 RHS, <u,0,4,1>
+ 1182750874U, // <7,u,0,5>: Cost 2 vmrghw <7,0,1,2>, RHS
+ 2316061213U, // <7,u,0,6>: Cost 3 vmrglw <5,6,7,0>, <3,4,u,6>
+ 1242320200U, // <7,u,0,7>: Cost 2 vmrglw <5,6,7,0>, RHS
+ 1705432850U, // <7,u,0,u>: Cost 2 vsldoi12 RHS, <u,0,u,2>
+ 1584578662U, // <7,u,1,0>: Cost 2 vsldoi4 <6,7,u,1>, LHS
+ 1705427764U, // <7,u,1,1>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+ 631691054U, // <7,u,1,2>: Cost 1 vsldoi12 RHS, LHS
+ 2640407307U, // <7,u,1,3>: Cost 3 vsldoi4 <3,7,u,1>, <3,7,u,1>
+ 1584581942U, // <7,u,1,4>: Cost 2 vsldoi4 <6,7,u,1>, RHS
+ 2779174726U, // <7,u,1,5>: Cost 3 vsldoi12 RHS, <u,1,5,0>
+ 1584583574U, // <7,u,1,6>: Cost 2 vsldoi4 <6,7,u,1>, <6,7,u,1>
+ 2779322201U, // <7,u,1,7>: Cost 3 vsldoi12 RHS, <u,1,7,1>
+ 631691108U, // <7,u,1,u>: Cost 1 vsldoi12 RHS, LHS
+ 2779174763U, // <7,u,2,0>: Cost 3 vsldoi12 RHS, <u,2,0,1>
+ 2779174774U, // <7,u,2,1>: Cost 3 vsldoi12 RHS, <u,2,1,3>
+ 1705428584U, // <7,u,2,2>: Cost 2 vsldoi12 RHS, <2,2,2,2>
+ 1705432965U, // <7,u,2,3>: Cost 2 vsldoi12 RHS, <u,2,3,0>
+ 2779174801U, // <7,u,2,4>: Cost 3 vsldoi12 RHS, <u,2,4,3>
+ 2779174810U, // <7,u,2,5>: Cost 3 vsldoi12 RHS, <u,2,5,3>
+ 2767673251U, // <7,u,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <u,2,6,3>
+ 1705580460U, // <7,u,2,7>: Cost 2 vsldoi12 RHS, <u,2,7,3>
+ 1705433010U, // <7,u,2,u>: Cost 2 vsldoi12 RHS, <u,2,u,0>
+ 1705433020U, // <7,u,3,0>: Cost 2 vsldoi12 RHS, <u,3,0,1>
+ 2779174853U, // <7,u,3,1>: Cost 3 vsldoi12 RHS, <u,3,1,1>
+ 2767673299U, // <7,u,3,2>: Cost 3 vsldoi12 <2,6,3,7>, <u,3,2,6>
+ 1245659292U, // <7,u,3,3>: Cost 2 vmrglw <6,2,7,3>, LHS
+ 1705433060U, // <7,u,3,4>: Cost 2 vsldoi12 RHS, <u,3,4,5>
+ 2779174893U, // <7,u,3,5>: Cost 3 vsldoi12 RHS, <u,3,5,5>
+ 2706836152U, // <7,u,3,6>: Cost 3 vsldoi8 <3,6,7,u>, <3,6,7,u>
+ 1245662536U, // <7,u,3,7>: Cost 2 vmrglw <6,2,7,3>, RHS
+ 1705433092U, // <7,u,3,u>: Cost 2 vsldoi12 RHS, <u,3,u,1>
+ 2779174925U, // <7,u,4,0>: Cost 3 vsldoi12 RHS, <u,4,0,1>
+ 1185732398U, // <7,u,4,1>: Cost 2 vmrghw <7,4,5,6>, LHS
+ 2316093653U, // <7,u,4,2>: Cost 3 vmrglw <5,6,7,4>, <3,0,u,2>
+ 1242349724U, // <7,u,4,3>: Cost 2 vmrglw <5,6,7,4>, LHS
+ 1705430224U, // <7,u,4,4>: Cost 2 vsldoi12 RHS, <4,4,4,4>
+ 1705433151U, // <7,u,4,5>: Cost 2 vsldoi12 RHS, <u,4,5,6>
+ 2316093981U, // <7,u,4,6>: Cost 3 vmrglw <5,6,7,4>, <3,4,u,6>
+ 1242352968U, // <7,u,4,7>: Cost 2 vmrglw <5,6,7,4>, RHS
+ 1705433178U, // <7,u,4,u>: Cost 2 vsldoi12 RHS, <u,4,u,6>
+ 1584611430U, // <7,u,5,0>: Cost 2 vsldoi4 <6,7,u,5>, LHS
+ 2781165670U, // <7,u,5,1>: Cost 3 vsldoi12 RHS, <u,5,1,0>
+ 2640439226U, // <7,u,5,2>: Cost 3 vsldoi4 <3,7,u,5>, <2,6,3,7>
+ 2640440079U, // <7,u,5,3>: Cost 3 vsldoi4 <3,7,u,5>, <3,7,u,5>
+ 1584614710U, // <7,u,5,4>: Cost 2 vsldoi4 <6,7,u,5>, RHS
+ 1705431044U, // <7,u,5,5>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+ 631691418U, // <7,u,5,6>: Cost 1 vsldoi12 RHS, RHS
+ 2779322525U, // <7,u,5,7>: Cost 3 vsldoi12 RHS, <u,5,7,1>
+ 631691436U, // <7,u,5,u>: Cost 1 vsldoi12 RHS, RHS
+ 2779175087U, // <7,u,6,0>: Cost 3 vsldoi12 RHS, <u,6,0,1>
+ 2779175102U, // <7,u,6,1>: Cost 3 vsldoi12 RHS, <u,6,1,7>
+ 1648357887U, // <7,u,6,2>: Cost 2 vsldoi8 <6,2,7,u>, <6,2,7,u>
+ 1705433296U, // <7,u,6,3>: Cost 2 vsldoi12 RHS, <u,6,3,7>
+ 2779175127U, // <7,u,6,4>: Cost 3 vsldoi12 RHS, <u,6,4,5>
+ 2779175138U, // <7,u,6,5>: Cost 3 vsldoi12 RHS, <u,6,5,7>
+ 1651012419U, // <7,u,6,6>: Cost 2 vsldoi8 <6,6,7,u>, <6,6,7,u>
+ 1705580788U, // <7,u,6,7>: Cost 2 vsldoi12 RHS, <u,6,7,7>
+ 1705433341U, // <7,u,6,u>: Cost 2 vsldoi12 RHS, <u,6,u,7>
+ 1705580800U, // <7,u,7,0>: Cost 2 vsldoi12 RHS, <u,7,0,1>
+ 1187878702U, // <7,u,7,1>: Cost 2 vmrghw <7,7,7,7>, LHS
+ 2768042263U, // <7,u,7,2>: Cost 3 vsldoi12 <2,6,u,7>, <u,7,2,6>
+ 1248346268U, // <7,u,7,3>: Cost 2 vmrglw <6,6,7,7>, LHS
+ 1705580840U, // <7,u,7,4>: Cost 2 vsldoi12 RHS, <u,7,4,5>
+ 1187879066U, // <7,u,7,5>: Cost 2 vmrghw <7,7,7,7>, RHS
+ 2779322679U, // <7,u,7,6>: Cost 3 vsldoi12 RHS, <u,7,6,2>
+ 430361910U, // <7,u,7,7>: Cost 1 vspltisw3 RHS
+ 430361910U, // <7,u,7,u>: Cost 1 vspltisw3 RHS
+ 1705433425U, // <7,u,u,0>: Cost 2 vsldoi12 RHS, <u,u,0,1>
+ 1705433435U, // <7,u,u,1>: Cost 2 vsldoi12 RHS, <u,u,1,2>
+ 631691621U, // <7,u,u,2>: Cost 1 vsldoi12 RHS, LHS
+ 1705433451U, // <7,u,u,3>: Cost 2 vsldoi12 RHS, <u,u,3,0>
+ 1705433465U, // <7,u,u,4>: Cost 2 vsldoi12 RHS, <u,u,4,5>
+ 1705433475U, // <7,u,u,5>: Cost 2 vsldoi12 RHS, <u,u,5,6>
+ 631691661U, // <7,u,u,6>: Cost 1 vsldoi12 RHS, RHS
+ 430361910U, // <7,u,u,7>: Cost 1 vspltisw3 RHS
+ 631691675U, // <7,u,u,u>: Cost 1 vsldoi12 RHS, LHS
+ 202162278U, // <u,0,0,0>: Cost 1 vspltisw0 LHS
+ 1678598154U, // <u,0,0,1>: Cost 2 vsldoi12 LHS, <0,0,1,1>
+ 2634500154U, // <u,0,0,2>: Cost 3 vsldoi4 <2,u,0,0>, <2,u,0,0>
+ 2289596269U, // <u,0,0,3>: Cost 3 vmrglw <1,2,u,0>, <u,2,0,3>
+ 1548815670U, // <u,0,0,4>: Cost 2 vsldoi4 <0,u,0,0>, RHS
+ 2663698530U, // <u,0,0,5>: Cost 3 vsldoi4 <7,7,0,0>, <5,6,7,0>
+ 2658390942U, // <u,0,0,6>: Cost 3 vsldoi4 <6,u,0,0>, <6,u,0,0>
+ 2289596597U, // <u,0,0,7>: Cost 3 vmrglw <1,2,u,0>, <u,6,0,7>
+ 202162278U, // <u,0,0,u>: Cost 1 vspltisw0 LHS
+ 1560764518U, // <u,0,1,0>: Cost 2 vsldoi4 <2,u,0,1>, LHS
+ 115720294U, // <u,0,1,1>: Cost 1 vmrghw LHS, LHS
+ 604856427U, // <u,0,1,2>: Cost 1 vsldoi12 LHS, LHS
+ 2634508438U, // <u,0,1,3>: Cost 3 vsldoi4 <2,u,0,1>, <3,0,1,2>
+ 1560767798U, // <u,0,1,4>: Cost 2 vsldoi4 <2,u,0,1>, RHS
+ 2652426438U, // <u,0,1,5>: Cost 3 vsldoi4 <5,u,0,1>, <5,u,0,1>
+ 1584657311U, // <u,0,1,6>: Cost 2 vsldoi4 <6,u,0,1>, <6,u,0,1>
+ 2658399226U, // <u,0,1,7>: Cost 3 vsldoi4 <6,u,0,1>, <7,0,1,2>
+ 604856476U, // <u,0,1,u>: Cost 1 vsldoi12 LHS, LHS
+ 2696889850U, // <u,0,2,0>: Cost 3 vsldoi8 <2,0,u,0>, <2,0,u,0>
+ 1190174822U, // <u,0,2,1>: Cost 2 vmrghw <u,2,3,0>, LHS
+ 2692245096U, // <u,0,2,2>: Cost 3 vsldoi8 <1,2,u,0>, <2,2,2,2>
+ 2692245158U, // <u,0,2,3>: Cost 3 vsldoi8 <1,2,u,0>, <2,3,0,1>
+ 2263916882U, // <u,0,2,4>: Cost 3 vmrghw <u,2,3,0>, <0,4,1,5>
+ 2299709908U, // <u,0,2,5>: Cost 3 vmrglw <3,0,1,2>, <3,4,0,5>
+ 2692245434U, // <u,0,2,6>: Cost 3 vsldoi8 <1,2,u,0>, <2,6,3,7>
+ 2701535281U, // <u,0,2,7>: Cost 3 vsldoi8 <2,7,u,0>, <2,7,u,0>
+ 1190175389U, // <u,0,2,u>: Cost 2 vmrghw <u,2,3,0>, LHS
+ 1209237504U, // <u,0,3,0>: Cost 2 vmrglw LHS, <0,0,0,0>
+ 1209239206U, // <u,0,3,1>: Cost 2 vmrglw LHS, <2,3,0,1>
+ 2704189813U, // <u,0,3,2>: Cost 3 vsldoi8 <3,2,u,0>, <3,2,u,0>
+ 2692245916U, // <u,0,3,3>: Cost 3 vsldoi8 <1,2,u,0>, <3,3,3,3>
+ 2282981033U, // <u,0,3,4>: Cost 3 vmrglw LHS, <2,3,0,4>
+ 2664386658U, // <u,0,3,5>: Cost 3 vsldoi4 <7,u,0,3>, <5,6,7,0>
+ 2691877496U, // <u,0,3,6>: Cost 3 vsldoi8 <1,2,3,0>, <3,6,0,7>
+ 2664388218U, // <u,0,3,7>: Cost 3 vsldoi4 <7,u,0,3>, <7,u,0,3>
+ 1209239213U, // <u,0,3,u>: Cost 2 vmrglw LHS, <2,3,0,u>
+ 2289623040U, // <u,0,4,0>: Cost 3 vmrglw <1,2,u,4>, <0,0,0,0>
+ 1678598482U, // <u,0,4,1>: Cost 2 vsldoi12 LHS, <0,4,1,5>
+ 2634532926U, // <u,0,4,2>: Cost 3 vsldoi4 <2,u,0,4>, <2,u,0,4>
+ 2235580672U, // <u,0,4,3>: Cost 3 vmrghw <3,4,5,6>, <0,3,1,4>
+ 1143619922U, // <u,0,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5>
+ 1618505014U, // <u,0,4,5>: Cost 2 vsldoi8 <1,2,u,0>, RHS
+ 2658423714U, // <u,0,4,6>: Cost 3 vsldoi4 <6,u,0,4>, <6,u,0,4>
+ 2713259464U, // <u,0,4,7>: Cost 3 vsldoi8 <4,7,5,0>, <4,7,5,0>
+ 1683243409U, // <u,0,4,u>: Cost 2 vsldoi12 LHS, <0,4,u,5>
+ 1192443904U, // <u,0,5,0>: Cost 2 vmrghw RHS, <0,0,0,0>
+ 118702182U, // <u,0,5,1>: Cost 1 vmrghw RHS, LHS
+ 2266185901U, // <u,0,5,2>: Cost 3 vmrghw RHS, <0,2,1,2>
+ 2640513816U, // <u,0,5,3>: Cost 3 vsldoi4 <3,u,0,5>, <3,u,0,5>
+ 1192444242U, // <u,0,5,4>: Cost 2 vmrghw RHS, <0,4,1,5>
+ 2718789636U, // <u,0,5,5>: Cost 3 vsldoi8 <5,6,u,0>, <5,5,5,5>
+ 1645047915U, // <u,0,5,6>: Cost 2 vsldoi8 <5,6,u,0>, <5,6,u,0>
+ 2664404604U, // <u,0,5,7>: Cost 3 vsldoi4 <7,u,0,5>, <7,u,0,5>
+ 118702749U, // <u,0,5,u>: Cost 1 vmrghw RHS, LHS
+ 2302910464U, // <u,0,6,0>: Cost 3 vmrglw <3,4,u,6>, <0,0,0,0>
+ 1192886374U, // <u,0,6,1>: Cost 2 vmrghw <u,6,3,7>, LHS
+ 2718790138U, // <u,0,6,2>: Cost 3 vsldoi8 <5,6,u,0>, <6,2,7,3>
+ 2722771537U, // <u,0,6,3>: Cost 3 vsldoi8 <6,3,u,0>, <6,3,u,0>
+ 2266628434U, // <u,0,6,4>: Cost 3 vmrghw <u,6,3,7>, <0,4,1,5>
+ 2248950180U, // <u,0,6,5>: Cost 3 vmrghw <5,6,7,0>, <0,5,1,6>
+ 2718790456U, // <u,0,6,6>: Cost 3 vsldoi8 <5,6,u,0>, <6,6,6,6>
+ 2718790478U, // <u,0,6,7>: Cost 3 vsldoi8 <5,6,u,0>, <6,7,0,1>
+ 1192886941U, // <u,0,6,u>: Cost 2 vmrghw <u,6,3,7>, LHS
+ 1235812352U, // <u,0,7,0>: Cost 2 vmrglw RHS, <0,0,0,0>
+ 1235814054U, // <u,0,7,1>: Cost 2 vmrglw RHS, <2,3,0,1>
+ 2728080601U, // <u,0,7,2>: Cost 3 vsldoi8 <7,2,u,0>, <7,2,u,0>
+ 2640530202U, // <u,0,7,3>: Cost 3 vsldoi4 <3,u,0,7>, <3,u,0,7>
+ 2640530742U, // <u,0,7,4>: Cost 3 vsldoi4 <3,u,0,7>, RHS
+ 2309556692U, // <u,0,7,5>: Cost 3 vmrglw RHS, <3,4,0,5>
+ 2730735133U, // <u,0,7,6>: Cost 3 vsldoi8 <7,6,u,0>, <7,6,u,0>
+ 2309556856U, // <u,0,7,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+ 1235814061U, // <u,0,7,u>: Cost 2 vmrglw RHS, <2,3,0,u>
+ 202162278U, // <u,0,u,0>: Cost 1 vspltisw0 LHS
+ 120365158U, // <u,0,u,1>: Cost 1 vmrghw LHS, LHS
+ 604856989U, // <u,0,u,2>: Cost 1 vsldoi12 LHS, LHS
+ 2692249532U, // <u,0,u,3>: Cost 3 vsldoi8 <1,2,u,0>, <u,3,0,1>
+ 1560825142U, // <u,0,u,4>: Cost 2 vsldoi4 <2,u,0,u>, RHS
+ 1618507930U, // <u,0,u,5>: Cost 2 vsldoi8 <1,2,u,0>, RHS
+ 1584714662U, // <u,0,u,6>: Cost 2 vsldoi4 <6,u,0,u>, <6,u,0,u>
+ 2309565048U, // <u,0,u,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+ 604857043U, // <u,0,u,u>: Cost 1 vsldoi12 LHS, LHS
+ 1611210825U, // <u,1,0,0>: Cost 2 vsldoi8 <0,0,u,1>, <0,0,u,1>
+ 1616519270U, // <u,1,0,1>: Cost 2 vsldoi8 <0,u,u,1>, LHS
+ 2287605459U, // <u,1,0,2>: Cost 3 vmrglw <0,u,u,0>, <u,0,1,2>
+ 2640546588U, // <u,1,0,3>: Cost 3 vsldoi4 <3,u,1,0>, <3,u,1,0>
+ 2622631222U, // <u,1,0,4>: Cost 3 vsldoi4 <0,u,1,0>, RHS
+ 2289590610U, // <u,1,0,5>: Cost 3 vmrglw <1,2,u,0>, <0,4,1,5>
+ 2664436630U, // <u,1,0,6>: Cost 3 vsldoi4 <7,u,1,0>, <6,7,u,1>
+ 2664437376U, // <u,1,0,7>: Cost 3 vsldoi4 <7,u,1,0>, <7,u,1,0>
+ 1616519889U, // <u,1,0,u>: Cost 2 vsldoi8 <0,u,u,1>, <0,u,u,1>
+ 1548894866U, // <u,1,1,0>: Cost 2 vsldoi4 <0,u,1,1>, <0,u,1,1>
+ 269271142U, // <u,1,1,1>: Cost 1 vspltisw1 LHS
+ 1189462934U, // <u,1,1,2>: Cost 2 vmrghw LHS, <1,2,3,0>
+ 2622638230U, // <u,1,1,3>: Cost 3 vsldoi4 <0,u,1,1>, <3,0,1,2>
+ 1548897590U, // <u,1,1,4>: Cost 2 vsldoi4 <0,u,1,1>, RHS
+ 2756985692U, // <u,1,1,5>: Cost 3 vsldoi12 LHS, <1,1,5,5>
+ 2658472872U, // <u,1,1,6>: Cost 3 vsldoi4 <6,u,1,1>, <6,u,1,1>
+ 2287614142U, // <u,1,1,7>: Cost 3 vmrglw <0,u,u,1>, <u,6,1,7>
+ 269271142U, // <u,1,1,u>: Cost 1 vspltisw1 LHS
+ 1566818406U, // <u,1,2,0>: Cost 2 vsldoi4 <3,u,1,2>, LHS
+ 2756985735U, // <u,1,2,1>: Cost 3 vsldoi12 LHS, <1,2,1,3>
+ 1148371862U, // <u,1,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 835584U, // <u,1,2,3>: Cost 0 copy LHS
+ 1566821686U, // <u,1,2,4>: Cost 2 vsldoi4 <3,u,1,2>, RHS
+ 2756985771U, // <u,1,2,5>: Cost 3 vsldoi12 LHS, <1,2,5,3>
+ 2690262970U, // <u,1,2,6>: Cost 3 vsldoi8 <0,u,u,1>, <2,6,3,7>
+ 1590711938U, // <u,1,2,7>: Cost 2 vsldoi4 <7,u,1,2>, <7,u,1,2>
+ 835584U, // <u,1,2,u>: Cost 0 copy LHS
+ 2282979337U, // <u,1,3,0>: Cost 3 vmrglw LHS, <0,0,1,0>
+ 1209237514U, // <u,1,3,1>: Cost 2 vmrglw LHS, <0,0,1,1>
+ 1209239702U, // <u,1,3,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+ 2282979502U, // <u,1,3,3>: Cost 3 vmrglw LHS, <0,2,1,3>
+ 2282979341U, // <u,1,3,4>: Cost 3 vmrglw LHS, <0,0,1,4>
+ 1209237842U, // <u,1,3,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+ 2282979505U, // <u,1,3,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+ 2287625423U, // <u,1,3,7>: Cost 3 vmrglw LHS, <1,6,1,7>
+ 1209237521U, // <u,1,3,u>: Cost 2 vmrglw LHS, <0,0,1,u>
+ 1635101613U, // <u,1,4,0>: Cost 2 vsldoi8 <4,0,u,1>, <4,0,u,1>
+ 2289623050U, // <u,1,4,1>: Cost 3 vmrglw <1,2,u,4>, <0,0,1,1>
+ 2289625238U, // <u,1,4,2>: Cost 3 vmrglw <1,2,u,4>, <3,0,1,2>
+ 2640579360U, // <u,1,4,3>: Cost 3 vsldoi4 <3,u,1,4>, <3,u,1,4>
+ 2622663990U, // <u,1,4,4>: Cost 3 vsldoi4 <0,u,1,4>, RHS
+ 1616522550U, // <u,1,4,5>: Cost 2 vsldoi8 <0,u,u,1>, RHS
+ 2664469398U, // <u,1,4,6>: Cost 3 vsldoi4 <7,u,1,4>, <6,7,u,1>
+ 2664470148U, // <u,1,4,7>: Cost 3 vsldoi4 <7,u,1,4>, <7,u,1,4>
+ 1616522793U, // <u,1,4,u>: Cost 2 vsldoi8 <0,u,u,1>, RHS
+ 1548927638U, // <u,1,5,0>: Cost 2 vsldoi4 <0,u,1,5>, <0,u,1,5>
+ 1192444724U, // <u,1,5,1>: Cost 2 vmrghw RHS, <1,1,1,1>
+ 1192444822U, // <u,1,5,2>: Cost 2 vmrghw RHS, <1,2,3,0>
+ 2622670998U, // <u,1,5,3>: Cost 3 vsldoi4 <0,u,1,5>, <3,0,1,2>
+ 1548930358U, // <u,1,5,4>: Cost 2 vsldoi4 <0,u,1,5>, RHS
+ 1210728786U, // <u,1,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+ 2714153058U, // <u,1,5,6>: Cost 3 vsldoi8 <4,u,u,1>, <5,6,7,0>
+ 2670449658U, // <u,1,5,7>: Cost 3 vsldoi4 <u,u,1,5>, <7,0,1,2>
+ 1548932910U, // <u,1,5,u>: Cost 2 vsldoi4 <0,u,1,5>, LHS
+ 2622677655U, // <u,1,6,0>: Cost 3 vsldoi4 <0,u,1,6>, <0,u,1,6>
+ 2756986063U, // <u,1,6,1>: Cost 3 vsldoi12 LHS, <1,6,1,7>
+ 2302912662U, // <u,1,6,2>: Cost 3 vmrglw <3,4,u,6>, <3,0,1,2>
+ 3696421014U, // <u,1,6,3>: Cost 4 vsldoi4 <0,u,1,6>, <3,0,1,2>
+ 2622680374U, // <u,1,6,4>: Cost 3 vsldoi4 <0,u,1,6>, RHS
+ 2756986099U, // <u,1,6,5>: Cost 3 vsldoi12 LHS, <1,6,5,7>
+ 2714153784U, // <u,1,6,6>: Cost 3 vsldoi8 <4,u,u,1>, <6,6,6,6>
+ 1651692438U, // <u,1,6,7>: Cost 2 vsldoi8 <6,7,u,1>, <6,7,u,1>
+ 1652356071U, // <u,1,6,u>: Cost 2 vsldoi8 <6,u,u,1>, <6,u,u,1>
+ 2628657254U, // <u,1,7,0>: Cost 3 vsldoi4 <1,u,1,7>, LHS
+ 1235812362U, // <u,1,7,1>: Cost 2 vmrglw RHS, <0,0,1,1>
+ 1235814550U, // <u,1,7,2>: Cost 2 vmrglw RHS, <3,0,1,2>
+ 2309554350U, // <u,1,7,3>: Cost 3 vmrglw RHS, <0,2,1,3>
+ 2628660534U, // <u,1,7,4>: Cost 3 vsldoi4 <1,u,1,7>, RHS
+ 1235812690U, // <u,1,7,5>: Cost 2 vmrglw RHS, <0,4,1,5>
+ 2309554353U, // <u,1,7,6>: Cost 3 vmrglw RHS, <0,2,1,6>
+ 2309554678U, // <u,1,7,7>: Cost 3 vmrglw RHS, <0,6,1,7>
+ 1235812369U, // <u,1,7,u>: Cost 2 vmrglw RHS, <0,0,1,u>
+ 1548952217U, // <u,1,u,0>: Cost 2 vsldoi4 <0,u,1,u>, <0,u,1,u>
+ 269271142U, // <u,1,u,1>: Cost 1 vspltisw1 LHS
+ 1209280662U, // <u,1,u,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+ 835584U, // <u,1,u,3>: Cost 0 copy LHS
+ 1548954934U, // <u,1,u,4>: Cost 2 vsldoi4 <0,u,1,u>, RHS
+ 1209278802U, // <u,1,u,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+ 2283020465U, // <u,1,u,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+ 1590761096U, // <u,1,u,7>: Cost 2 vsldoi4 <7,u,1,u>, <7,u,1,u>
+ 835584U, // <u,1,u,u>: Cost 0 copy LHS
+ 2702876672U, // <u,2,0,0>: Cost 3 vsldoi8 <3,0,u,2>, <0,0,0,0>
+ 1629134950U, // <u,2,0,1>: Cost 2 vsldoi8 <3,0,u,2>, LHS
+ 2289591912U, // <u,2,0,2>: Cost 3 vmrglw <1,2,u,0>, <2,2,2,2>
+ 1215848550U, // <u,2,0,3>: Cost 2 vmrglw <1,2,u,0>, LHS
+ 2702877010U, // <u,2,0,4>: Cost 3 vsldoi8 <3,0,u,2>, <0,4,1,5>
+ 2289222708U, // <u,2,0,5>: Cost 3 vmrglw <1,2,3,0>, <1,4,2,5>
+ 2779178473U, // <u,2,0,6>: Cost 3 vsldoi12 RHS, <2,0,6,1>
+ 2726249024U, // <u,2,0,7>: Cost 3 vsldoi8 <7,0,1,2>, <0,7,1,0>
+ 1215848555U, // <u,2,0,u>: Cost 2 vmrglw <1,2,u,0>, LHS
+ 2690933539U, // <u,2,1,0>: Cost 3 vsldoi8 <1,0,u,2>, <1,0,u,2>
+ 2628683124U, // <u,2,1,1>: Cost 3 vsldoi4 <1,u,2,1>, <1,u,2,1>
+ 1189463656U, // <u,2,1,2>: Cost 2 vmrghw LHS, <2,2,2,2>
+ 1213866086U, // <u,2,1,3>: Cost 2 vmrglw <0,u,u,1>, LHS
+ 2628685110U, // <u,2,1,4>: Cost 3 vsldoi4 <1,u,2,1>, RHS
+ 2263205736U, // <u,2,1,5>: Cost 3 vmrghw LHS, <2,5,3,6>
+ 1189463994U, // <u,2,1,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+ 2263205866U, // <u,2,1,7>: Cost 3 vmrghw LHS, <2,7,0,1>
+ 1213866091U, // <u,2,1,u>: Cost 2 vmrglw <0,u,u,1>, LHS
+ 1556938854U, // <u,2,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+ 2697569869U, // <u,2,2,1>: Cost 3 vsldoi8 <2,1,u,2>, <2,1,u,2>
+ 336380006U, // <u,2,2,2>: Cost 1 vspltisw2 LHS
+ 1678599794U, // <u,2,2,3>: Cost 2 vsldoi12 LHS, <2,2,3,3>
+ 1556942134U, // <u,2,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+ 2295138061U, // <u,2,2,5>: Cost 3 vmrglw <2,2,2,2>, <2,4,2,5>
+ 2702878650U, // <u,2,2,6>: Cost 3 vsldoi8 <3,0,u,2>, <2,6,3,7>
+ 2300229831U, // <u,2,2,7>: Cost 3 vmrglw <3,0,u,2>, <u,6,2,7>
+ 336380006U, // <u,2,2,u>: Cost 1 vspltisw2 LHS
+ 475243165U, // <u,2,3,0>: Cost 1 vsldoi4 LHS, LHS
+ 1548985140U, // <u,2,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 1209239144U, // <u,2,3,2>: Cost 2 vmrglw LHS, <2,2,2,2>
+ 135495782U, // <u,2,3,3>: Cost 1 vmrglw LHS, LHS
+ 475245878U, // <u,2,3,4>: Cost 1 vsldoi4 LHS, RHS
+ 1596764164U, // <u,2,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5>
+ 1596764666U, // <u,2,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+ 1596765178U, // <u,2,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 135495787U, // <u,2,3,u>: Cost 1 vmrglw LHS, LHS
+ 2708851630U, // <u,2,4,0>: Cost 3 vsldoi8 <4,0,u,2>, <4,0,u,2>
+ 2217362979U, // <u,2,4,1>: Cost 3 vmrghw <0,4,1,5>, <2,1,3,5>
+ 2289624680U, // <u,2,4,2>: Cost 3 vmrglw <1,2,u,4>, <2,2,2,2>
+ 1215881318U, // <u,2,4,3>: Cost 2 vmrglw <1,2,u,4>, LHS
+ 2726767824U, // <u,2,4,4>: Cost 3 vsldoi8 <7,0,u,2>, <4,4,4,4>
+ 1629138230U, // <u,2,4,5>: Cost 2 vsldoi8 <3,0,u,2>, RHS
+ 2779178801U, // <u,2,4,6>: Cost 3 vsldoi12 RHS, <2,4,6,5>
+ 2726251976U, // <u,2,4,7>: Cost 3 vsldoi8 <7,0,1,2>, <4,7,5,0>
+ 1215881323U, // <u,2,4,u>: Cost 2 vmrglw <1,2,u,4>, LHS
+ 2628714598U, // <u,2,5,0>: Cost 3 vsldoi4 <1,u,2,5>, LHS
+ 2628715896U, // <u,2,5,1>: Cost 3 vsldoi4 <1,u,2,5>, <1,u,2,5>
+ 1192445544U, // <u,2,5,2>: Cost 2 vmrghw RHS, <2,2,2,2>
+ 1213898854U, // <u,2,5,3>: Cost 2 vmrglw <0,u,u,5>, LHS
+ 2628717878U, // <u,2,5,4>: Cost 3 vsldoi4 <1,u,2,5>, RHS
+ 2726768644U, // <u,2,5,5>: Cost 3 vsldoi8 <7,0,u,2>, <5,5,5,5>
+ 1192445882U, // <u,2,5,6>: Cost 2 vmrghw RHS, <2,6,3,7>
+ 2266187754U, // <u,2,5,7>: Cost 3 vmrghw RHS, <2,7,0,1>
+ 1213898859U, // <u,2,5,u>: Cost 2 vmrglw <0,u,u,5>, LHS
+ 2634694758U, // <u,2,6,0>: Cost 3 vsldoi4 <2,u,2,6>, LHS
+ 2721460657U, // <u,2,6,1>: Cost 3 vsldoi8 <6,1,u,2>, <6,1,u,2>
+ 2296940136U, // <u,2,6,2>: Cost 3 vmrglw <2,4,u,6>, <2,2,2,2>
+ 1678600122U, // <u,2,6,3>: Cost 2 vsldoi12 LHS, <2,6,3,7>
+ 2634698038U, // <u,2,6,4>: Cost 3 vsldoi4 <2,u,2,6>, RHS
+ 3370682125U, // <u,2,6,5>: Cost 4 vmrglw <2,4,u,6>, <2,4,2,5>
+ 1157056442U, // <u,2,6,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+ 2725442455U, // <u,2,6,7>: Cost 3 vsldoi8 <6,7,u,2>, <6,7,u,2>
+ 1678600167U, // <u,2,6,u>: Cost 2 vsldoi12 LHS, <2,6,u,7>
+ 1653027897U, // <u,2,7,0>: Cost 2 vsldoi8 <7,0,u,2>, <7,0,u,2>
+ 2309554924U, // <u,2,7,1>: Cost 3 vmrglw RHS, <1,0,2,1>
+ 1235813992U, // <u,2,7,2>: Cost 2 vmrglw RHS, <2,2,2,2>
+ 162070630U, // <u,2,7,3>: Cost 1 vmrglw RHS, LHS
+ 2634706230U, // <u,2,7,4>: Cost 3 vsldoi4 <2,u,2,7>, RHS
+ 2309555252U, // <u,2,7,5>: Cost 3 vmrglw RHS, <1,4,2,5>
+ 2309555901U, // <u,2,7,6>: Cost 3 vmrglw RHS, <2,3,2,6>
+ 2309555416U, // <u,2,7,7>: Cost 3 vmrglw RHS, <1,6,2,7>
+ 162070635U, // <u,2,7,u>: Cost 1 vmrglw RHS, LHS
+ 475284130U, // <u,2,u,0>: Cost 1 vsldoi4 LHS, LHS
+ 1549026100U, // <u,2,u,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 336380006U, // <u,2,u,2>: Cost 1 vspltisw2 LHS
+ 135536742U, // <u,2,u,3>: Cost 1 vmrglw LHS, LHS
+ 475286838U, // <u,2,u,4>: Cost 1 vsldoi4 LHS, RHS
+ 1629141146U, // <u,2,u,5>: Cost 2 vsldoi8 <3,0,u,2>, RHS
+ 1194108858U, // <u,2,u,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+ 1596806138U, // <u,2,u,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 135536747U, // <u,2,u,u>: Cost 1 vmrglw LHS, LHS
+ 1611890688U, // <u,3,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0>
+ 538149020U, // <u,3,0,1>: Cost 1 vsldoi8 LHS, LHS
+ 2685632685U, // <u,3,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+ 2685632764U, // <u,3,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0>
+ 1611891026U, // <u,3,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+ 2733408722U, // <u,3,0,5>: Cost 3 vsldoi8 LHS, <0,5,6,7>
+ 2658612153U, // <u,3,0,6>: Cost 3 vsldoi4 <6,u,3,0>, <6,u,3,0>
+ 2289592250U, // <u,3,0,7>: Cost 3 vmrglw <1,2,u,0>, <2,6,3,7>
+ 538149533U, // <u,3,0,u>: Cost 1 vsldoi8 LHS, LHS
+ 1189464214U, // <u,3,1,0>: Cost 2 vmrghw LHS, <3,0,1,2>
+ 1611891508U, // <u,3,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1>
+ 1611891606U, // <u,3,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0>
+ 1189464476U, // <u,3,1,3>: Cost 2 vmrghw LHS, <3,3,3,3>
+ 1189464578U, // <u,3,1,4>: Cost 2 vmrghw LHS, <3,4,5,6>
+ 2690278511U, // <u,3,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1>
+ 2690278607U, // <u,3,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7>
+ 2287609786U, // <u,3,1,7>: Cost 3 vmrglw <0,u,u,1>, <2,6,3,7>
+ 1611892092U, // <u,3,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0>
+ 2685634042U, // <u,3,2,0>: Cost 3 vsldoi8 LHS, <2,0,u,0>
+ 2685634079U, // <u,3,2,1>: Cost 3 vsldoi8 LHS, <2,1,3,1>
+ 1611892328U, // <u,3,2,2>: Cost 2 vsldoi8 LHS, <2,2,2,2>
+ 1611892390U, // <u,3,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1>
+ 2685634371U, // <u,3,2,4>: Cost 3 vsldoi8 LHS, <2,4,u,5>
+ 2685634453U, // <u,3,2,5>: Cost 3 vsldoi8 LHS, <2,5,u,6>
+ 1611892666U, // <u,3,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+ 2300225466U, // <u,3,2,7>: Cost 3 vmrglw <3,0,u,2>, <2,6,3,7>
+ 1611892795U, // <u,3,2,u>: Cost 2 vsldoi8 LHS, <2,u,0,1>
+ 1209238422U, // <u,3,3,0>: Cost 2 vmrglw LHS, <1,2,3,0>
+ 2282980247U, // <u,3,3,1>: Cost 3 vmrglw LHS, <1,2,3,1>
+ 1561004120U, // <u,3,3,2>: Cost 2 vsldoi4 <2,u,3,3>, <2,u,3,3>
+ 403488870U, // <u,3,3,3>: Cost 1 vspltisw3 LHS
+ 1209238426U, // <u,3,3,4>: Cost 2 vmrglw LHS, <1,2,3,4>
+ 2282980899U, // <u,3,3,5>: Cost 3 vmrglw LHS, <2,1,3,5>
+ 2282985598U, // <u,3,3,6>: Cost 3 vmrglw LHS, <u,5,3,6>
+ 1209239482U, // <u,3,3,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+ 403488870U, // <u,3,3,u>: Cost 1 vspltisw3 LHS
+ 1555038310U, // <u,3,4,0>: Cost 2 vsldoi4 <1,u,3,4>, LHS
+ 1555039616U, // <u,3,4,1>: Cost 2 vsldoi4 <1,u,3,4>, <1,u,3,4>
+ 2628781672U, // <u,3,4,2>: Cost 3 vsldoi4 <1,u,3,4>, <2,2,2,2>
+ 2289624690U, // <u,3,4,3>: Cost 3 vmrglw <1,2,u,4>, <2,2,3,3>
+ 1555041590U, // <u,3,4,4>: Cost 2 vsldoi4 <1,u,3,4>, RHS
+ 538152246U, // <u,3,4,5>: Cost 1 vsldoi8 LHS, RHS
+ 2658644925U, // <u,3,4,6>: Cost 3 vsldoi4 <6,u,3,4>, <6,u,3,4>
+ 2289625018U, // <u,3,4,7>: Cost 3 vmrglw <1,2,u,4>, <2,6,3,7>
+ 538152489U, // <u,3,4,u>: Cost 1 vsldoi8 LHS, RHS
+ 1192446102U, // <u,3,5,0>: Cost 2 vmrghw RHS, <3,0,1,2>
+ 2733411983U, // <u,3,5,1>: Cost 3 vsldoi8 LHS, <5,1,0,1>
+ 2634762330U, // <u,3,5,2>: Cost 3 vsldoi4 <2,u,3,5>, <2,u,3,5>
+ 1192446364U, // <u,3,5,3>: Cost 2 vmrghw RHS, <3,3,3,3>
+ 1192446466U, // <u,3,5,4>: Cost 2 vmrghw RHS, <3,4,5,6>
+ 1659670532U, // <u,3,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5>
+ 1659670626U, // <u,3,5,6>: Cost 2 vsldoi8 LHS, <5,6,7,0>
+ 2287642554U, // <u,3,5,7>: Cost 3 vmrglw <0,u,u,5>, <2,6,3,7>
+ 1659670788U, // <u,3,5,u>: Cost 2 vsldoi8 LHS, <5,u,7,0>
+ 2634768486U, // <u,3,6,0>: Cost 3 vsldoi4 <2,u,3,6>, LHS
+ 2733412775U, // <u,3,6,1>: Cost 3 vsldoi8 LHS, <6,1,7,1>
+ 1648390659U, // <u,3,6,2>: Cost 2 vsldoi8 <6,2,u,3>, <6,2,u,3>
+ 2634770973U, // <u,3,6,3>: Cost 3 vsldoi4 <2,u,3,6>, <3,4,u,6>
+ 2634771766U, // <u,3,6,4>: Cost 3 vsldoi4 <2,u,3,6>, RHS
+ 2733413099U, // <u,3,6,5>: Cost 3 vsldoi8 LHS, <6,5,7,1>
+ 1659671352U, // <u,3,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6>
+ 1659671374U, // <u,3,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1>
+ 1652372457U, // <u,3,6,u>: Cost 2 vsldoi8 <6,u,u,3>, <6,u,u,3>
+ 1561034854U, // <u,3,7,0>: Cost 2 vsldoi4 <2,u,3,7>, LHS
+ 2634777396U, // <u,3,7,1>: Cost 3 vsldoi4 <2,u,3,7>, <1,1,1,1>
+ 1561036892U, // <u,3,7,2>: Cost 2 vsldoi4 <2,u,3,7>, <2,u,3,7>
+ 1235814002U, // <u,3,7,3>: Cost 2 vmrglw RHS, <2,2,3,3>
+ 1561038134U, // <u,3,7,4>: Cost 2 vsldoi4 <2,u,3,7>, RHS
+ 2309555747U, // <u,3,7,5>: Cost 3 vmrglw RHS, <2,1,3,5>
+ 2309556072U, // <u,3,7,6>: Cost 3 vmrglw RHS, <2,5,3,6>
+ 1235814330U, // <u,3,7,7>: Cost 2 vmrglw RHS, <2,6,3,7>
+ 1561040686U, // <u,3,7,u>: Cost 2 vsldoi4 <2,u,3,7>, LHS
+ 1611896531U, // <u,3,u,0>: Cost 2 vsldoi8 LHS, <u,0,1,2>
+ 538154798U, // <u,3,u,1>: Cost 1 vsldoi8 LHS, LHS
+ 1611896712U, // <u,3,u,2>: Cost 2 vsldoi8 LHS, <u,2,3,3>
+ 403488870U, // <u,3,u,3>: Cost 1 vspltisw3 LHS
+ 1611896895U, // <u,3,u,4>: Cost 2 vsldoi8 LHS, <u,4,5,6>
+ 538155162U, // <u,3,u,5>: Cost 1 vsldoi8 LHS, RHS
+ 1611897040U, // <u,3,u,6>: Cost 2 vsldoi8 LHS, <u,6,3,7>
+ 1209280442U, // <u,3,u,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+ 538155365U, // <u,3,u,u>: Cost 1 vsldoi8 LHS, LHS
+ 1165118354U, // <u,4,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1>
+ 1618534502U, // <u,4,0,1>: Cost 2 vsldoi8 <1,2,u,4>, LHS
+ 2634795102U, // <u,4,0,2>: Cost 3 vsldoi4 <2,u,4,0>, <2,u,4,0>
+ 2686451968U, // <u,4,0,3>: Cost 3 vsldoi8 <0,3,1,4>, <0,3,1,4>
+ 2692276562U, // <u,4,0,4>: Cost 3 vsldoi8 <1,2,u,4>, <0,4,1,5>
+ 1705438098U, // <u,4,0,5>: Cost 2 vsldoi12 RHS, <4,0,5,1>
+ 2658685890U, // <u,4,0,6>: Cost 3 vsldoi4 <6,u,4,0>, <6,u,4,0>
+ 2256489928U, // <u,4,0,7>: Cost 3 vmrghw <7,0,1,2>, <4,7,5,0>
+ 1618535069U, // <u,4,0,u>: Cost 2 vsldoi8 <1,2,u,4>, LHS
+ 1189464978U, // <u,4,1,0>: Cost 2 vmrghw LHS, <4,0,5,1>
+ 2692277044U, // <u,4,1,1>: Cost 3 vsldoi8 <1,2,u,4>, <1,1,1,1>
+ 1618535367U, // <u,4,1,2>: Cost 2 vsldoi8 <1,2,u,4>, <1,2,u,4>
+ 2640775992U, // <u,4,1,3>: Cost 3 vsldoi4 <3,u,4,1>, <3,u,4,1>
+ 1189465296U, // <u,4,1,4>: Cost 2 vmrghw LHS, <4,4,4,4>
+ 115723574U, // <u,4,1,5>: Cost 1 vmrghw LHS, RHS
+ 2263207289U, // <u,4,1,6>: Cost 3 vmrghw LHS, <4,6,5,2>
+ 2664666780U, // <u,4,1,7>: Cost 3 vsldoi4 <7,u,4,1>, <7,u,4,1>
+ 115723817U, // <u,4,1,u>: Cost 1 vmrghw LHS, RHS
+ 2263919506U, // <u,4,2,0>: Cost 3 vmrghw <u,2,3,0>, <4,0,5,1>
+ 2222115812U, // <u,4,2,1>: Cost 3 vmrghw <1,2,3,0>, <4,1,5,2>
+ 2692277864U, // <u,4,2,2>: Cost 3 vsldoi8 <1,2,u,4>, <2,2,2,2>
+ 2692277926U, // <u,4,2,3>: Cost 3 vsldoi8 <1,2,u,4>, <2,3,0,1>
+ 2324114640U, // <u,4,2,4>: Cost 3 vmrglw <7,0,u,2>, <4,4,4,4>
+ 1190178102U, // <u,4,2,5>: Cost 2 vmrghw <u,2,3,0>, RHS
+ 2692278202U, // <u,4,2,6>: Cost 3 vsldoi8 <1,2,u,4>, <2,6,3,7>
+ 2701568053U, // <u,4,2,7>: Cost 3 vsldoi8 <2,7,u,4>, <2,7,u,4>
+ 1190178345U, // <u,4,2,u>: Cost 2 vmrghw <u,2,3,0>, RHS
+ 2692278422U, // <u,4,3,0>: Cost 3 vsldoi8 <1,2,u,4>, <3,0,1,2>
+ 2282981552U, // <u,4,3,1>: Cost 3 vmrglw LHS, <3,0,4,1>
+ 2704222585U, // <u,4,3,2>: Cost 3 vsldoi8 <3,2,u,4>, <3,2,u,4>
+ 2692278684U, // <u,4,3,3>: Cost 3 vsldoi8 <1,2,u,4>, <3,3,3,3>
+ 1257016528U, // <u,4,3,4>: Cost 2 vmrglw LHS, <4,4,4,4>
+ 1209239246U, // <u,4,3,5>: Cost 2 vmrglw LHS, <2,3,4,5>
+ 2691910300U, // <u,4,3,6>: Cost 3 vsldoi8 <1,2,3,4>, <3,6,4,7>
+ 2664683166U, // <u,4,3,7>: Cost 3 vsldoi4 <7,u,4,3>, <7,u,4,3>
+ 1209239249U, // <u,4,3,u>: Cost 2 vmrglw LHS, <2,3,4,u>
+ 1573027942U, // <u,4,4,0>: Cost 2 vsldoi4 <4,u,4,4>, LHS
+ 2634826695U, // <u,4,4,1>: Cost 3 vsldoi4 <2,u,4,4>, <1,2,u,4>
+ 2634827874U, // <u,4,4,2>: Cost 3 vsldoi4 <2,u,4,4>, <2,u,4,4>
+ 2289629073U, // <u,4,4,3>: Cost 3 vmrglw <1,2,u,4>, <u,2,4,3>
+ 229035318U, // <u,4,4,4>: Cost 1 vspltisw0 RHS
+ 1618537782U, // <u,4,4,5>: Cost 2 vsldoi8 <1,2,u,4>, RHS
+ 2658718662U, // <u,4,4,6>: Cost 3 vsldoi4 <6,u,4,4>, <6,u,4,4>
+ 2289629401U, // <u,4,4,7>: Cost 3 vmrglw <1,2,u,4>, <u,6,4,7>
+ 229035318U, // <u,4,4,u>: Cost 1 vspltisw0 RHS
+ 1561092198U, // <u,4,5,0>: Cost 2 vsldoi4 <2,u,4,5>, LHS
+ 2628863370U, // <u,4,5,1>: Cost 3 vsldoi4 <1,u,4,5>, <1,u,4,5>
+ 1561094243U, // <u,4,5,2>: Cost 2 vsldoi4 <2,u,4,5>, <2,u,4,5>
+ 2634836118U, // <u,4,5,3>: Cost 3 vsldoi4 <2,u,4,5>, <3,0,1,2>
+ 1561095478U, // <u,4,5,4>: Cost 2 vsldoi4 <2,u,4,5>, RHS
+ 118705462U, // <u,4,5,5>: Cost 1 vmrghw RHS, RHS
+ 604859702U, // <u,4,5,6>: Cost 1 vsldoi12 LHS, RHS
+ 2658726906U, // <u,4,5,7>: Cost 3 vsldoi4 <6,u,4,5>, <7,0,1,2>
+ 604859720U, // <u,4,5,u>: Cost 1 vsldoi12 LHS, RHS
+ 2266631058U, // <u,4,6,0>: Cost 3 vmrghw <u,6,3,7>, <4,0,5,1>
+ 2302692152U, // <u,4,6,1>: Cost 3 vmrglw <3,4,5,6>, <3,u,4,1>
+ 2718822906U, // <u,4,6,2>: Cost 3 vsldoi8 <5,6,u,4>, <6,2,7,3>
+ 2722804309U, // <u,4,6,3>: Cost 3 vsldoi8 <6,3,u,4>, <6,3,u,4>
+ 2723467942U, // <u,4,6,4>: Cost 3 vsldoi8 <6,4,u,4>, <6,4,u,4>
+ 1192889654U, // <u,4,6,5>: Cost 2 vmrghw <u,6,3,7>, RHS
+ 2718823224U, // <u,4,6,6>: Cost 3 vsldoi8 <5,6,u,4>, <6,6,6,6>
+ 2718823246U, // <u,4,6,7>: Cost 3 vsldoi8 <5,6,u,4>, <6,7,0,1>
+ 1192889897U, // <u,4,6,u>: Cost 2 vmrghw <u,6,3,7>, RHS
+ 2640822374U, // <u,4,7,0>: Cost 3 vsldoi4 <3,u,4,7>, LHS
+ 2640823194U, // <u,4,7,1>: Cost 3 vsldoi4 <3,u,4,7>, <1,2,3,4>
+ 2728113373U, // <u,4,7,2>: Cost 3 vsldoi8 <7,2,u,4>, <7,2,u,4>
+ 2640825150U, // <u,4,7,3>: Cost 3 vsldoi4 <3,u,4,7>, <3,u,4,7>
+ 1235815632U, // <u,4,7,4>: Cost 2 vmrglw RHS, <4,4,4,4>
+ 1235814094U, // <u,4,7,5>: Cost 2 vmrglw RHS, <2,3,4,5>
+ 2730767905U, // <u,4,7,6>: Cost 3 vsldoi8 <7,6,u,4>, <7,6,u,4>
+ 2309556892U, // <u,4,7,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+ 1235814097U, // <u,4,7,u>: Cost 2 vmrglw RHS, <2,3,4,u>
+ 1561116774U, // <u,4,u,0>: Cost 2 vsldoi4 <2,u,4,u>, LHS
+ 1618540334U, // <u,4,u,1>: Cost 2 vsldoi8 <1,2,u,4>, LHS
+ 1561118822U, // <u,4,u,2>: Cost 2 vsldoi4 <2,u,4,u>, <2,u,4,u>
+ 2692282300U, // <u,4,u,3>: Cost 3 vsldoi8 <1,2,u,4>, <u,3,0,1>
+ 229035318U, // <u,4,u,4>: Cost 1 vspltisw0 RHS
+ 120368438U, // <u,4,u,5>: Cost 1 vmrghw LHS, RHS
+ 604859945U, // <u,4,u,6>: Cost 1 vsldoi12 LHS, RHS
+ 2309565084U, // <u,4,u,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+ 604859963U, // <u,4,u,u>: Cost 1 vsldoi12 LHS, RHS
+ 2690293760U, // <u,5,0,0>: Cost 3 vsldoi8 <0,u,u,5>, <0,0,0,0>
+ 1616552038U, // <u,5,0,1>: Cost 2 vsldoi8 <0,u,u,5>, LHS
+ 2640840434U, // <u,5,0,2>: Cost 3 vsldoi4 <3,u,5,0>, <2,3,u,5>
+ 2640841536U, // <u,5,0,3>: Cost 3 vsldoi4 <3,u,5,0>, <3,u,5,0>
+ 1613381970U, // <u,5,0,4>: Cost 2 vsldoi8 <0,4,1,5>, <0,4,1,5>
+ 2316135642U, // <u,5,0,5>: Cost 3 vmrglw <5,6,u,0>, <4,4,5,5>
+ 2289592834U, // <u,5,0,6>: Cost 3 vmrglw <1,2,u,0>, <3,4,5,6>
+ 2664732324U, // <u,5,0,7>: Cost 3 vsldoi4 <7,u,5,0>, <7,u,5,0>
+ 1616552661U, // <u,5,0,u>: Cost 2 vsldoi8 <0,u,u,5>, <0,u,u,5>
+ 1573077094U, // <u,5,1,0>: Cost 2 vsldoi4 <4,u,5,1>, LHS
+ 1237536282U, // <u,5,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+ 2690294678U, // <u,5,1,2>: Cost 3 vsldoi8 <0,u,u,5>, <1,2,3,0>
+ 2646821014U, // <u,5,1,3>: Cost 3 vsldoi4 <4,u,5,1>, <3,0,1,2>
+ 1573080602U, // <u,5,1,4>: Cost 2 vsldoi4 <4,u,5,1>, <4,u,5,1>
+ 1189466116U, // <u,5,1,5>: Cost 2 vmrghw LHS, <5,5,5,5>
+ 1189466210U, // <u,5,1,6>: Cost 2 vmrghw LHS, <5,6,7,0>
+ 2646823930U, // <u,5,1,7>: Cost 3 vsldoi4 <4,u,5,1>, <7,0,1,2>
+ 1573082926U, // <u,5,1,u>: Cost 2 vsldoi4 <4,u,5,1>, LHS
+ 2640855142U, // <u,5,2,0>: Cost 3 vsldoi4 <3,u,5,2>, LHS
+ 2697594448U, // <u,5,2,1>: Cost 3 vsldoi8 <2,1,u,5>, <2,1,u,5>
+ 2690295400U, // <u,5,2,2>: Cost 3 vsldoi8 <0,u,u,5>, <2,2,2,2>
+ 1625179890U, // <u,5,2,3>: Cost 2 vsldoi8 <2,3,u,5>, <2,3,u,5>
+ 2699585347U, // <u,5,2,4>: Cost 3 vsldoi8 <2,4,u,5>, <2,4,u,5>
+ 2781171471U, // <u,5,2,5>: Cost 3 vsldoi12 RHS, <5,2,5,3>
+ 2690295738U, // <u,5,2,6>: Cost 3 vsldoi8 <0,u,u,5>, <2,6,3,7>
+ 3775318070U, // <u,5,2,7>: Cost 4 vsldoi8 <2,7,u,5>, <2,7,u,5>
+ 1628498055U, // <u,5,2,u>: Cost 2 vsldoi8 <2,u,u,5>, <2,u,u,5>
+ 2287627234U, // <u,5,3,0>: Cost 3 vmrglw LHS, <4,1,5,0>
+ 1257016210U, // <u,5,3,1>: Cost 2 vmrglw LHS, <4,0,5,1>
+ 2646836942U, // <u,5,3,2>: Cost 3 vsldoi4 <4,u,5,3>, <2,3,4,5>
+ 2287625131U, // <u,5,3,3>: Cost 3 vmrglw LHS, <1,2,5,3>
+ 2287627238U, // <u,5,3,4>: Cost 3 vmrglw LHS, <4,1,5,4>
+ 1257016538U, // <u,5,3,5>: Cost 2 vmrglw LHS, <4,4,5,5>
+ 1209240066U, // <u,5,3,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+ 2287625459U, // <u,5,3,7>: Cost 3 vmrglw LHS, <1,6,5,7>
+ 1209240068U, // <u,5,3,u>: Cost 2 vmrglw LHS, <3,4,5,u>
+ 2640871526U, // <u,5,4,0>: Cost 3 vsldoi4 <3,u,5,4>, LHS
+ 2316168082U, // <u,5,4,1>: Cost 3 vmrglw <5,6,u,4>, <4,0,5,1>
+ 2640873202U, // <u,5,4,2>: Cost 3 vsldoi4 <3,u,5,4>, <2,3,u,5>
+ 2640874308U, // <u,5,4,3>: Cost 3 vsldoi4 <3,u,5,4>, <3,u,5,4>
+ 1637788917U, // <u,5,4,4>: Cost 2 vsldoi8 <4,4,u,5>, <4,4,u,5>
+ 1616555318U, // <u,5,4,5>: Cost 2 vsldoi8 <0,u,u,5>, RHS
+ 2287638591U, // <u,5,4,6>: Cost 3 vmrglw <0,u,u,4>, <u,4,5,6>
+ 2664765096U, // <u,5,4,7>: Cost 3 vsldoi4 <7,u,5,4>, <7,u,5,4>
+ 1616555561U, // <u,5,4,u>: Cost 2 vsldoi8 <0,u,u,5>, RHS
+ 1573109862U, // <u,5,5,0>: Cost 2 vsldoi4 <4,u,5,5>, LHS
+ 2646852404U, // <u,5,5,1>: Cost 3 vsldoi4 <4,u,5,5>, <1,1,1,1>
+ 2646853224U, // <u,5,5,2>: Cost 3 vsldoi4 <4,u,5,5>, <2,2,2,2>
+ 2287646618U, // <u,5,5,3>: Cost 3 vmrglw <0,u,u,5>, <u,2,5,3>
+ 1573113374U, // <u,5,5,4>: Cost 2 vsldoi4 <4,u,5,5>, <4,u,5,5>
+ 296144182U, // <u,5,5,5>: Cost 1 vspltisw1 RHS
+ 1192448098U, // <u,5,5,6>: Cost 2 vmrghw RHS, <5,6,7,0>
+ 2287646946U, // <u,5,5,7>: Cost 3 vmrglw <0,u,u,5>, <u,6,5,7>
+ 296144182U, // <u,5,5,u>: Cost 1 vspltisw1 RHS
+ 1567146086U, // <u,5,6,0>: Cost 2 vsldoi4 <3,u,5,6>, LHS
+ 2628945300U, // <u,5,6,1>: Cost 3 vsldoi4 <1,u,5,6>, <1,u,5,6>
+ 2634917997U, // <u,5,6,2>: Cost 3 vsldoi4 <2,u,5,6>, <2,u,5,6>
+ 1567148870U, // <u,5,6,3>: Cost 2 vsldoi4 <3,u,5,6>, <3,u,5,6>
+ 1567149366U, // <u,5,6,4>: Cost 2 vsldoi4 <3,u,5,6>, RHS
+ 2781171799U, // <u,5,6,5>: Cost 3 vsldoi12 RHS, <5,6,5,7>
+ 1228950018U, // <u,5,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 27705344U, // <u,5,6,7>: Cost 0 copy RHS
+ 27705344U, // <u,5,6,u>: Cost 0 copy RHS
+ 2628952166U, // <u,5,7,0>: Cost 3 vsldoi4 <1,u,5,7>, LHS
+ 1235815314U, // <u,5,7,1>: Cost 2 vmrglw RHS, <4,0,5,1>
+ 2309556734U, // <u,5,7,2>: Cost 3 vmrglw RHS, <3,4,5,2>
+ 2309555115U, // <u,5,7,3>: Cost 3 vmrglw RHS, <1,2,5,3>
+ 2628955446U, // <u,5,7,4>: Cost 3 vsldoi4 <1,u,5,7>, RHS
+ 1235815642U, // <u,5,7,5>: Cost 2 vmrglw RHS, <4,4,5,5>
+ 1235814914U, // <u,5,7,6>: Cost 2 vmrglw RHS, <3,4,5,6>
+ 2309555443U, // <u,5,7,7>: Cost 3 vmrglw RHS, <1,6,5,7>
+ 1235814916U, // <u,5,7,u>: Cost 2 vmrglw RHS, <3,4,5,u>
+ 1567162470U, // <u,5,u,0>: Cost 2 vsldoi4 <3,u,5,u>, LHS
+ 1616557870U, // <u,5,u,1>: Cost 2 vsldoi8 <0,u,u,5>, LHS
+ 2690299781U, // <u,5,u,2>: Cost 3 vsldoi8 <0,u,u,5>, <u,2,3,0>
+ 1567165256U, // <u,5,u,3>: Cost 2 vsldoi4 <3,u,5,u>, <3,u,5,u>
+ 1567165750U, // <u,5,u,4>: Cost 2 vsldoi4 <3,u,5,u>, RHS
+ 296144182U, // <u,5,u,5>: Cost 1 vspltisw1 RHS
+ 1209281026U, // <u,5,u,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+ 27705344U, // <u,5,u,7>: Cost 0 copy RHS
+ 27705344U, // <u,5,u,u>: Cost 0 copy RHS
+ 2705563648U, // <u,6,0,0>: Cost 3 vsldoi8 <3,4,u,6>, <0,0,0,0>
+ 1631821926U, // <u,6,0,1>: Cost 2 vsldoi8 <3,4,u,6>, LHS
+ 2262462970U, // <u,6,0,2>: Cost 3 vmrghw <u,0,1,2>, <6,2,7,3>
+ 2646886941U, // <u,6,0,3>: Cost 3 vsldoi4 <4,u,6,0>, <3,4,u,6>
+ 2705563986U, // <u,6,0,4>: Cost 3 vsldoi8 <3,4,u,6>, <0,4,1,5>
+ 2316062652U, // <u,6,0,5>: Cost 3 vmrglw <5,6,7,0>, <5,4,6,5>
+ 2316137272U, // <u,6,0,6>: Cost 3 vmrglw <5,6,u,0>, <6,6,6,6>
+ 1215851830U, // <u,6,0,7>: Cost 2 vmrglw <1,2,u,0>, RHS
+ 1215851831U, // <u,6,0,u>: Cost 2 vmrglw <1,2,u,0>, RHS
+ 2634948710U, // <u,6,1,0>: Cost 3 vsldoi4 <2,u,6,1>, LHS
+ 2705564468U, // <u,6,1,1>: Cost 3 vsldoi8 <3,4,u,6>, <1,1,1,1>
+ 1189466618U, // <u,6,1,2>: Cost 2 vmrghw LHS, <6,2,7,3>
+ 2263208498U, // <u,6,1,3>: Cost 3 vmrghw LHS, <6,3,4,5>
+ 2693620843U, // <u,6,1,4>: Cost 3 vsldoi8 <1,4,u,6>, <1,4,u,6>
+ 2652868860U, // <u,6,1,5>: Cost 3 vsldoi4 <5,u,6,1>, <5,u,6,1>
+ 1189466936U, // <u,6,1,6>: Cost 2 vmrghw LHS, <6,6,6,6>
+ 1213869366U, // <u,6,1,7>: Cost 2 vmrglw <0,u,u,1>, RHS
+ 1213869367U, // <u,6,1,u>: Cost 2 vmrglw <0,u,u,1>, RHS
+ 2658844774U, // <u,6,2,0>: Cost 3 vsldoi4 <6,u,6,2>, LHS
+ 3771344465U, // <u,6,2,1>: Cost 4 vsldoi8 <2,1,u,6>, <2,1,u,6>
+ 1178554874U, // <u,6,2,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+ 2698929907U, // <u,6,2,3>: Cost 3 vsldoi8 <2,3,u,6>, <2,3,u,6>
+ 2699593540U, // <u,6,2,4>: Cost 3 vsldoi8 <2,4,u,6>, <2,4,u,6>
+ 2700257173U, // <u,6,2,5>: Cost 3 vsldoi8 <2,5,u,6>, <2,5,u,6>
+ 2705565626U, // <u,6,2,6>: Cost 3 vsldoi8 <3,4,u,6>, <2,6,3,7>
+ 1226485046U, // <u,6,2,7>: Cost 2 vmrglw <3,0,u,2>, RHS
+ 1226485047U, // <u,6,2,u>: Cost 2 vmrglw <3,0,u,2>, RHS
+ 2705565846U, // <u,6,3,0>: Cost 3 vsldoi8 <3,4,u,6>, <3,0,1,2>
+ 2330756585U, // <u,6,3,1>: Cost 3 vmrglw LHS, <2,0,6,1>
+ 2330756829U, // <u,6,3,2>: Cost 3 vmrglw LHS, <2,3,6,2>
+ 2282981734U, // <u,6,3,3>: Cost 3 vmrglw LHS, <3,2,6,3>
+ 1631824413U, // <u,6,3,4>: Cost 2 vsldoi8 <3,4,u,6>, <3,4,u,6>
+ 2652885246U, // <u,6,3,5>: Cost 3 vsldoi4 <5,u,6,3>, <5,u,6,3>
+ 1257018168U, // <u,6,3,6>: Cost 2 vmrglw LHS, <6,6,6,6>
+ 135499062U, // <u,6,3,7>: Cost 1 vmrglw LHS, RHS
+ 135499063U, // <u,6,3,u>: Cost 1 vmrglw LHS, RHS
+ 2646917222U, // <u,6,4,0>: Cost 3 vsldoi4 <4,u,6,4>, LHS
+ 2217365931U, // <u,6,4,1>: Cost 3 vmrghw <0,4,1,5>, <6,1,7,5>
+ 2790167156U, // <u,6,4,2>: Cost 3 vsldoi12 <6,4,2,u>, <6,4,2,u>
+ 2646919709U, // <u,6,4,3>: Cost 3 vsldoi4 <4,u,6,4>, <3,4,u,6>
+ 2711538934U, // <u,6,4,4>: Cost 3 vsldoi8 <4,4,u,6>, <4,4,u,6>
+ 1631825206U, // <u,6,4,5>: Cost 2 vsldoi8 <3,4,u,6>, RHS
+ 2316170040U, // <u,6,4,6>: Cost 3 vmrglw <5,6,u,4>, <6,6,6,6>
+ 1215884598U, // <u,6,4,7>: Cost 2 vmrglw <1,2,u,4>, RHS
+ 1215884599U, // <u,6,4,u>: Cost 2 vmrglw <1,2,u,4>, RHS
+ 2634981478U, // <u,6,5,0>: Cost 3 vsldoi4 <2,u,6,5>, LHS
+ 2266190247U, // <u,6,5,1>: Cost 3 vmrghw RHS, <6,1,7,1>
+ 1192448506U, // <u,6,5,2>: Cost 2 vmrghw RHS, <6,2,7,3>
+ 2266190386U, // <u,6,5,3>: Cost 3 vmrghw RHS, <6,3,4,5>
+ 2634984758U, // <u,6,5,4>: Cost 3 vsldoi4 <2,u,6,5>, RHS
+ 2652901632U, // <u,6,5,5>: Cost 3 vsldoi4 <5,u,6,5>, <5,u,6,5>
+ 1192448824U, // <u,6,5,6>: Cost 2 vmrghw RHS, <6,6,6,6>
+ 1213902134U, // <u,6,5,7>: Cost 2 vmrglw <0,u,u,5>, RHS
+ 1213902135U, // <u,6,5,u>: Cost 2 vmrglw <0,u,u,5>, RHS
+ 1583808614U, // <u,6,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+ 2322010445U, // <u,6,6,1>: Cost 3 vmrglw <6,6,6,6>, <6,0,6,1>
+ 2718839290U, // <u,6,6,2>: Cost 3 vsldoi8 <5,6,u,6>, <6,2,7,3>
+ 2670823965U, // <u,6,6,3>: Cost 3 vsldoi4 <u,u,6,6>, <3,4,u,6>
+ 1583811894U, // <u,6,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+ 2724147961U, // <u,6,6,5>: Cost 3 vsldoi8 <6,5,u,6>, <6,5,u,6>
+ 363253046U, // <u,6,6,6>: Cost 1 vspltisw2 RHS
+ 1229172022U, // <u,6,6,7>: Cost 2 vmrglw <3,4,u,6>, RHS
+ 363253046U, // <u,6,6,u>: Cost 1 vspltisw2 RHS
+ 499458150U, // <u,6,7,0>: Cost 1 vsldoi4 RHS, LHS
+ 1573200692U, // <u,6,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1>
+ 1573201512U, // <u,6,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1573202070U, // <u,6,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 499461673U, // <u,6,7,4>: Cost 1 vsldoi4 RHS, RHS
+ 1573203972U, // <u,6,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5>
+ 1235817272U, // <u,6,7,6>: Cost 2 vmrglw RHS, <6,6,6,6>
+ 162073910U, // <u,6,7,7>: Cost 1 vmrglw RHS, RHS
+ 162073911U, // <u,6,7,u>: Cost 1 vmrglw RHS, RHS
+ 499466342U, // <u,6,u,0>: Cost 1 vsldoi4 RHS, LHS
+ 1631827758U, // <u,6,u,1>: Cost 2 vsldoi8 <3,4,u,6>, LHS
+ 1573209704U, // <u,6,u,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1573210262U, // <u,6,u,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 499469866U, // <u,6,u,4>: Cost 1 vsldoi4 RHS, RHS
+ 1631828122U, // <u,6,u,5>: Cost 2 vsldoi8 <3,4,u,6>, RHS
+ 363253046U, // <u,6,u,6>: Cost 1 vspltisw2 RHS
+ 135540022U, // <u,6,u,7>: Cost 1 vmrglw LHS, RHS
+ 135540023U, // <u,6,u,u>: Cost 1 vmrglw LHS, RHS
+ 1638465536U, // <u,7,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0>
+ 564723814U, // <u,7,0,1>: Cost 1 vsldoi8 RHS, LHS
+ 2712207533U, // <u,7,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2>
+ 2712207612U, // <u,7,0,3>: Cost 3 vsldoi8 RHS, <0,3,1,0>
+ 1638465874U, // <u,7,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5>
+ 1579192580U, // <u,7,0,5>: Cost 2 vsldoi4 <5,u,7,0>, <5,u,7,0>
+ 2712207862U, // <u,7,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7>
+ 2316137282U, // <u,7,0,7>: Cost 3 vmrglw <5,6,u,0>, <6,6,7,7>
+ 564724381U, // <u,7,0,u>: Cost 1 vsldoi8 RHS, LHS
+ 1189467130U, // <u,7,1,0>: Cost 2 vmrghw LHS, <7,0,1,2>
+ 1638466356U, // <u,7,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1>
+ 1638466454U, // <u,7,1,2>: Cost 2 vsldoi8 RHS, <1,2,3,0>
+ 2311500282U, // <u,7,1,3>: Cost 3 vmrglw <4,u,u,1>, <6,2,7,3>
+ 1189467494U, // <u,7,1,4>: Cost 2 vmrghw LHS, <7,4,5,6>
+ 2712208495U, // <u,7,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1>
+ 2694956302U, // <u,7,1,6>: Cost 3 vsldoi8 <1,6,u,7>, <1,6,u,7>
+ 1189467756U, // <u,7,1,7>: Cost 2 vmrghw LHS, <7,7,7,7>
+ 1638466940U, // <u,7,1,u>: Cost 2 vsldoi8 RHS, <1,u,3,0>
+ 2712208829U, // <u,7,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2>
+ 2712208927U, // <u,7,2,1>: Cost 3 vsldoi8 RHS, <2,1,3,1>
+ 1638467176U, // <u,7,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2>
+ 1638467238U, // <u,7,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1>
+ 2712209165U, // <u,7,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5>
+ 2712209256U, // <u,7,2,5>: Cost 3 vsldoi8 RHS, <2,5,3,6>
+ 1627187175U, // <u,7,2,6>: Cost 2 vsldoi8 <2,6,u,7>, <2,6,u,7>
+ 2324116290U, // <u,7,2,7>: Cost 3 vmrglw <7,0,u,2>, <6,6,7,7>
+ 1628514441U, // <u,7,2,u>: Cost 2 vsldoi8 <2,u,u,7>, <2,u,u,7>
+ 1638467734U, // <u,7,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2>
+ 2712209638U, // <u,7,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1>
+ 2700929387U, // <u,7,3,2>: Cost 3 vsldoi8 <2,6,u,7>, <3,2,6,u>
+ 1638467996U, // <u,7,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3>
+ 1638468098U, // <u,7,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6>
+ 2712210002U, // <u,7,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5>
+ 1585189856U, // <u,7,3,6>: Cost 2 vsldoi4 <6,u,7,3>, <6,u,7,3>
+ 1257018178U, // <u,7,3,7>: Cost 2 vmrglw LHS, <6,6,7,7>
+ 1638468382U, // <u,7,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2>
+ 1638468498U, // <u,7,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1>
+ 2712210378U, // <u,7,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3>
+ 2712210485U, // <u,7,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2>
+ 2712210564U, // <u,7,4,3>: Cost 3 vsldoi8 RHS, <4,3,5,0>
+ 1638468816U, // <u,7,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4>
+ 564727112U, // <u,7,4,5>: Cost 1 vsldoi8 RHS, RHS
+ 2712210809U, // <u,7,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,2>
+ 2712210888U, // <u,7,4,7>: Cost 3 vsldoi8 RHS, <4,7,5,0>
+ 564727337U, // <u,7,4,u>: Cost 1 vsldoi8 RHS, RHS
+ 1192449018U, // <u,7,5,0>: Cost 2 vmrghw RHS, <7,0,1,2>
+ 2714201743U, // <u,7,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1>
+ 2712211198U, // <u,7,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4>
+ 2311533050U, // <u,7,5,3>: Cost 3 vmrglw <4,u,u,5>, <6,2,7,3>
+ 1192449382U, // <u,7,5,4>: Cost 2 vmrghw RHS, <7,4,5,6>
+ 1638469636U, // <u,7,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5>
+ 1638469730U, // <u,7,5,6>: Cost 2 vsldoi8 RHS, <5,6,7,0>
+ 1192449644U, // <u,7,5,7>: Cost 2 vmrghw RHS, <7,7,7,7>
+ 1638469892U, // <u,7,5,u>: Cost 2 vsldoi8 RHS, <5,u,7,0>
+ 2712211745U, // <u,7,6,0>: Cost 3 vsldoi8 RHS, <6,0,1,2>
+ 2712211879U, // <u,7,6,1>: Cost 3 vsldoi8 RHS, <6,1,7,1>
+ 1638470138U, // <u,7,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+ 2712212018U, // <u,7,6,3>: Cost 3 vsldoi8 RHS, <6,3,4,5>
+ 2712212109U, // <u,7,6,4>: Cost 3 vsldoi8 RHS, <6,4,5,6>
+ 2712212203U, // <u,7,6,5>: Cost 3 vsldoi8 RHS, <6,5,7,1>
+ 1638470456U, // <u,7,6,6>: Cost 2 vsldoi8 RHS, <6,6,6,6>
+ 1638470478U, // <u,7,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1>
+ 1638470559U, // <u,7,6,u>: Cost 2 vsldoi8 RHS, <6,u,0,1>
+ 1235816546U, // <u,7,7,0>: Cost 2 vmrglw RHS, <5,6,7,0>
+ 2309558371U, // <u,7,7,1>: Cost 3 vmrglw RHS, <5,6,7,1>
+ 2641045434U, // <u,7,7,2>: Cost 3 vsldoi4 <3,u,7,7>, <2,6,3,7>
+ 1235816954U, // <u,7,7,3>: Cost 2 vmrglw RHS, <6,2,7,3>
+ 1235816550U, // <u,7,7,4>: Cost 2 vmrglw RHS, <5,6,7,4>
+ 2309558375U, // <u,7,7,5>: Cost 3 vmrglw RHS, <5,6,7,5>
+ 1585222628U, // <u,7,7,6>: Cost 2 vsldoi4 <6,u,7,7>, <6,u,7,7>
+ 430361910U, // <u,7,7,7>: Cost 1 vspltisw3 RHS
+ 430361910U, // <u,7,7,u>: Cost 1 vspltisw3 RHS
+ 1638471379U, // <u,7,u,0>: Cost 2 vsldoi8 RHS, <u,0,1,2>
+ 564729646U, // <u,7,u,1>: Cost 1 vsldoi8 RHS, LHS
+ 1638471557U, // <u,7,u,2>: Cost 2 vsldoi8 RHS, <u,2,3,0>
+ 1638471612U, // <u,7,u,3>: Cost 2 vsldoi8 RHS, <u,3,0,1>
+ 1638471743U, // <u,7,u,4>: Cost 2 vsldoi8 RHS, <u,4,5,6>
+ 564730010U, // <u,7,u,5>: Cost 1 vsldoi8 RHS, RHS
+ 1638471888U, // <u,7,u,6>: Cost 2 vsldoi8 RHS, <u,6,3,7>
+ 430361910U, // <u,7,u,7>: Cost 1 vspltisw3 RHS
+ 564730213U, // <u,7,u,u>: Cost 1 vsldoi8 RHS, LHS
+ 202162278U, // <u,u,0,0>: Cost 1 vspltisw0 LHS
+ 538189985U, // <u,u,0,1>: Cost 1 vsldoi8 LHS, LHS
+ 2685673645U, // <u,u,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+ 1215848604U, // <u,u,0,3>: Cost 2 vmrglw <1,2,u,0>, LHS
+ 1611931986U, // <u,u,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+ 1579266317U, // <u,u,0,5>: Cost 2 vsldoi4 <5,u,u,0>, <5,u,u,0>
+ 2289592861U, // <u,u,0,6>: Cost 3 vmrglw <1,2,u,0>, <3,4,u,6>
+ 1215851848U, // <u,u,0,7>: Cost 2 vmrglw <1,2,u,0>, RHS
+ 538190493U, // <u,u,0,u>: Cost 1 vsldoi8 LHS, LHS
+ 1549411025U, // <u,u,1,0>: Cost 2 vsldoi4 <0,u,u,1>, <0,u,u,1>
+ 115726126U, // <u,u,1,1>: Cost 1 vmrghw LHS, LHS
+ 604862254U, // <u,u,1,2>: Cost 1 vsldoi12 LHS, LHS
+ 1213866140U, // <u,u,1,3>: Cost 2 vmrglw <0,u,u,1>, LHS
+ 1549413686U, // <u,u,1,4>: Cost 2 vsldoi4 <0,u,u,1>, RHS
+ 115726490U, // <u,u,1,5>: Cost 1 vmrghw LHS, RHS
+ 1585247207U, // <u,u,1,6>: Cost 2 vsldoi4 <6,u,u,1>, <6,u,u,1>
+ 1213869384U, // <u,u,1,7>: Cost 2 vmrglw <0,u,u,1>, RHS
+ 604862308U, // <u,u,1,u>: Cost 1 vsldoi12 LHS, LHS
+ 1567334502U, // <u,u,2,0>: Cost 2 vsldoi4 <3,u,u,2>, LHS
+ 1190180654U, // <u,u,2,1>: Cost 2 vmrghw <u,2,3,0>, LHS
+ 336380006U, // <u,u,2,2>: Cost 1 vspltisw2 LHS
+ 835584U, // <u,u,2,3>: Cost 0 copy LHS
+ 1567337782U, // <u,u,2,4>: Cost 2 vsldoi4 <3,u,u,2>, RHS
+ 1190181018U, // <u,u,2,5>: Cost 2 vmrghw <u,2,3,0>, RHS
+ 1611933626U, // <u,u,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+ 1226485064U, // <u,u,2,7>: Cost 2 vmrglw <3,0,u,2>, RHS
+ 835584U, // <u,u,2,u>: Cost 0 copy LHS
+ 475685587U, // <u,u,3,0>: Cost 1 vsldoi4 LHS, LHS
+ 1209239278U, // <u,u,3,1>: Cost 2 vmrglw LHS, <2,3,u,1>
+ 1209239765U, // <u,u,3,2>: Cost 2 vmrglw LHS, <3,0,u,2>
+ 135495836U, // <u,u,3,3>: Cost 1 vmrglw LHS, LHS
+ 475688246U, // <u,u,3,4>: Cost 1 vsldoi4 LHS, RHS
+ 1209239282U, // <u,u,3,5>: Cost 2 vmrglw LHS, <2,3,u,5>
+ 1209240093U, // <u,u,3,6>: Cost 2 vmrglw LHS, <3,4,u,6>
+ 135499080U, // <u,u,3,7>: Cost 1 vmrglw LHS, RHS
+ 135495841U, // <u,u,3,u>: Cost 1 vmrglw LHS, LHS
+ 1555406950U, // <u,u,4,0>: Cost 2 vsldoi4 <1,u,u,4>, LHS
+ 1555408301U, // <u,u,4,1>: Cost 2 vsldoi4 <1,u,u,4>, <1,u,u,4>
+ 2289625301U, // <u,u,4,2>: Cost 3 vmrglw <1,2,u,4>, <3,0,u,2>
+ 1215881372U, // <u,u,4,3>: Cost 2 vmrglw <1,2,u,4>, LHS
+ 229035318U, // <u,u,4,4>: Cost 1 vspltisw0 RHS
+ 538193206U, // <u,u,4,5>: Cost 1 vsldoi8 LHS, RHS
+ 2289625629U, // <u,u,4,6>: Cost 3 vmrglw <1,2,u,4>, <3,4,u,6>
+ 1215884616U, // <u,u,4,7>: Cost 2 vmrglw <1,2,u,4>, RHS
+ 538193449U, // <u,u,4,u>: Cost 1 vsldoi8 LHS, RHS
+ 1549443797U, // <u,u,5,0>: Cost 2 vsldoi4 <0,u,u,5>, <0,u,u,5>
+ 118708014U, // <u,u,5,1>: Cost 1 vmrghw RHS, LHS
+ 1561389191U, // <u,u,5,2>: Cost 2 vsldoi4 <2,u,u,5>, <2,u,u,5>
+ 1213898908U, // <u,u,5,3>: Cost 2 vmrglw <0,u,u,5>, LHS
+ 1549446454U, // <u,u,5,4>: Cost 2 vsldoi4 <0,u,u,5>, RHS
+ 118708378U, // <u,u,5,5>: Cost 1 vmrghw RHS, RHS
+ 604862618U, // <u,u,5,6>: Cost 1 vsldoi12 LHS, RHS
+ 1213902152U, // <u,u,5,7>: Cost 2 vmrglw <0,u,u,5>, RHS
+ 604862636U, // <u,u,5,u>: Cost 1 vsldoi12 LHS, RHS
+ 1567367270U, // <u,u,6,0>: Cost 2 vsldoi4 <3,u,u,6>, LHS
+ 1192892206U, // <u,u,6,1>: Cost 2 vmrghw <u,6,3,7>, LHS
+ 1638478330U, // <u,u,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+ 1679046864U, // <u,u,6,3>: Cost 2 vsldoi12 LHS, <u,6,3,7>
+ 1567370550U, // <u,u,6,4>: Cost 2 vsldoi4 <3,u,u,6>, RHS
+ 1192892570U, // <u,u,6,5>: Cost 2 vmrghw <u,6,3,7>, RHS
+ 363253046U, // <u,u,6,6>: Cost 1 vspltisw2 RHS
+ 27705344U, // <u,u,6,7>: Cost 0 copy RHS
+ 27705344U, // <u,u,6,u>: Cost 0 copy RHS
+ 499605606U, // <u,u,7,0>: Cost 1 vsldoi4 RHS, LHS
+ 1235812425U, // <u,u,7,1>: Cost 2 vmrglw RHS, <0,0,u,1>
+ 1561405577U, // <u,u,7,2>: Cost 2 vsldoi4 <2,u,u,7>, <2,u,u,7>
+ 162070684U, // <u,u,7,3>: Cost 1 vmrglw RHS, LHS
+ 499609147U, // <u,u,7,4>: Cost 1 vsldoi4 RHS, RHS
+ 1235812753U, // <u,u,7,5>: Cost 2 vmrglw RHS, <0,4,u,5>
+ 1235814941U, // <u,u,7,6>: Cost 2 vmrglw RHS, <3,4,u,6>
+ 162073928U, // <u,u,7,7>: Cost 1 vmrglw RHS, RHS
+ 162070689U, // <u,u,7,u>: Cost 1 vmrglw RHS, LHS
+ 475726552U, // <u,u,u,0>: Cost 1 vsldoi4 LHS, LHS
+ 538195758U, // <u,u,u,1>: Cost 1 vsldoi8 LHS, LHS
+ 604862821U, // <u,u,u,2>: Cost 1 vsldoi12 LHS, LHS
+ 835584U, // <u,u,u,3>: Cost 0 copy LHS
+ 475729206U, // <u,u,u,4>: Cost 1 vsldoi4 LHS, RHS
+ 538196122U, // <u,u,u,5>: Cost 1 vsldoi8 LHS, RHS
+ 604862861U, // <u,u,u,6>: Cost 1 vsldoi12 LHS, RHS
+ 27705344U, // <u,u,u,7>: Cost 0 copy RHS
+ 835584U, // <u,u,u,u>: Cost 0 copy LHS
+ 0
+};
diff --git a/lib/Target/PowerPC/PPCPredicates.cpp b/lib/Target/PowerPC/PPCPredicates.cpp
new file mode 100644
index 0000000..ccda5c0
--- /dev/null
+++ b/lib/Target/PowerPC/PPCPredicates.cpp
@@ -0,0 +1,30 @@
+//===-- PPCPredicates.cpp - PPC Branch Predicate Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PowerPC branch predicates.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCPredicates.h"
+#include <cassert>
+using namespace llvm;
+
+PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) {
+ switch (Opcode) {
+ default: assert(0 && "Unknown PPC branch opcode!");
+ case PPC::PRED_EQ: return PPC::PRED_NE;
+ case PPC::PRED_NE: return PPC::PRED_EQ;
+ case PPC::PRED_LT: return PPC::PRED_GE;
+ case PPC::PRED_GE: return PPC::PRED_LT;
+ case PPC::PRED_GT: return PPC::PRED_LE;
+ case PPC::PRED_LE: return PPC::PRED_GT;
+ case PPC::PRED_NU: return PPC::PRED_UN;
+ case PPC::PRED_UN: return PPC::PRED_NU;
+ }
+}
diff --git a/lib/Target/PowerPC/PPCPredicates.h b/lib/Target/PowerPC/PPCPredicates.h
new file mode 100644
index 0000000..ba1bb74
--- /dev/null
+++ b/lib/Target/PowerPC/PPCPredicates.h
@@ -0,0 +1,39 @@
+//===-- PPCPredicates.h - PPC Branch Predicate Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PowerPC branch predicates.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_POWERPC_PPCPREDICATES_H
+#define LLVM_TARGET_POWERPC_PPCPREDICATES_H
+
+#include "PPC.h"
+
+namespace llvm {
+namespace PPC {
+ /// Predicate - These are "(BI << 5) | BO" for various predicates.
+ enum Predicate {
+ PRED_ALWAYS = (0 << 5) | 20,
+ PRED_LT = (0 << 5) | 12,
+ PRED_LE = (1 << 5) | 4,
+ PRED_EQ = (2 << 5) | 12,
+ PRED_GE = (0 << 5) | 4,
+ PRED_GT = (1 << 5) | 12,
+ PRED_NE = (2 << 5) | 4,
+ PRED_UN = (3 << 5) | 12,
+ PRED_NU = (3 << 5) | 4
+ };
+
+ /// Invert the specified predicate. != -> ==, < -> >=.
+ Predicate InvertPredicate(Predicate Opcode);
+}
+}
+
+#endif
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
new file mode 100644
index 0000000..19780a8
--- /dev/null
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -0,0 +1,1153 @@
+//===- PPCRegisterInfo.cpp - PowerPC Register Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reginfo"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCRegisterInfo.h"
+#include "PPCFrameInfo.h"
+#include "PPCSubtarget.h"
+#include "llvm/Constants.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include <cstdlib>
+using namespace llvm;
+
+/// getRegisterNumbering - Given the enum value for some register, e.g.
+/// PPC::F14, return the number that it corresponds to (e.g. 14).
+unsigned PPCRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
+ using namespace PPC;
+ switch (RegEnum) {
+ case R0 : case X0 : case F0 : case V0 : case CR0: return 0;
+ case R1 : case X1 : case F1 : case V1 : case CR1: return 1;
+ case R2 : case X2 : case F2 : case V2 : case CR2: return 2;
+ case R3 : case X3 : case F3 : case V3 : case CR3: return 3;
+ case R4 : case X4 : case F4 : case V4 : case CR4: return 4;
+ case R5 : case X5 : case F5 : case V5 : case CR5: return 5;
+ case R6 : case X6 : case F6 : case V6 : case CR6: return 6;
+ case R7 : case X7 : case F7 : case V7 : case CR7: return 7;
+ case R8 : case X8 : case F8 : case V8 : return 8;
+ case R9 : case X9 : case F9 : case V9 : return 9;
+ case R10: case X10: case F10: case V10: return 10;
+ case R11: case X11: case F11: case V11: return 11;
+ case R12: case X12: case F12: case V12: return 12;
+ case R13: case X13: case F13: case V13: return 13;
+ case R14: case X14: case F14: case V14: return 14;
+ case R15: case X15: case F15: case V15: return 15;
+ case R16: case X16: case F16: case V16: return 16;
+ case R17: case X17: case F17: case V17: return 17;
+ case R18: case X18: case F18: case V18: return 18;
+ case R19: case X19: case F19: case V19: return 19;
+ case R20: case X20: case F20: case V20: return 20;
+ case R21: case X21: case F21: case V21: return 21;
+ case R22: case X22: case F22: case V22: return 22;
+ case R23: case X23: case F23: case V23: return 23;
+ case R24: case X24: case F24: case V24: return 24;
+ case R25: case X25: case F25: case V25: return 25;
+ case R26: case X26: case F26: case V26: return 26;
+ case R27: case X27: case F27: case V27: return 27;
+ case R28: case X28: case F28: case V28: return 28;
+ case R29: case X29: case F29: case V29: return 29;
+ case R30: case X30: case F30: case V30: return 30;
+ case R31: case X31: case F31: case V31: return 31;
+ default:
+ cerr << "Unhandled reg in PPCRegisterInfo::getRegisterNumbering!\n";
+ abort();
+ }
+}
+
+PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
+ const TargetInstrInfo &tii)
+ : PPCGenRegisterInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
+ Subtarget(ST), TII(tii) {
+ ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX;
+ ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX;
+ ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX;
+ ImmToIdxMap[PPC::LWZ] = PPC::LWZX; ImmToIdxMap[PPC::LWA] = PPC::LWAX;
+ ImmToIdxMap[PPC::LFS] = PPC::LFSX; ImmToIdxMap[PPC::LFD] = PPC::LFDX;
+ ImmToIdxMap[PPC::STH] = PPC::STHX; ImmToIdxMap[PPC::STW] = PPC::STWX;
+ ImmToIdxMap[PPC::STFS] = PPC::STFSX; ImmToIdxMap[PPC::STFD] = PPC::STFDX;
+ ImmToIdxMap[PPC::ADDI] = PPC::ADD4;
+ ImmToIdxMap[PPC::ADDI8] = PPC::ADD8;
+}
+
+void
+PPCRegisterInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, int FrameIdx,
+ const TargetRegisterClass *RC) const {
+ if (RC == PPC::GPRCRegisterClass) {
+ if (SrcReg != PPC::LR) {
+ addFrameReference(BuildMI(MBB, MI, TII.get(PPC::STW))
+ .addReg(SrcReg, false, false, true), FrameIdx);
+ } else {
+ // FIXME: this spills LR immediately to memory in one step. To do this,
+ // we use R11, which we know cannot be used in the prolog/epilog. This is
+ // a hack.
+ BuildMI(MBB, MI, TII.get(PPC::MFLR), PPC::R11);
+ addFrameReference(BuildMI(MBB, MI, TII.get(PPC::STW))
+ .addReg(PPC::R11, false, false, true), FrameIdx);
+ }
+ } else if (RC == PPC::G8RCRegisterClass) {
+ if (SrcReg != PPC::LR8) {
+ addFrameReference(BuildMI(MBB, MI, TII.get(PPC::STD))
+ .addReg(SrcReg, false, false, true), FrameIdx);
+ } else {
+ // FIXME: this spills LR immediately to memory in one step. To do this,
+ // we use R11, which we know cannot be used in the prolog/epilog. This is
+ // a hack.
+ BuildMI(MBB, MI, TII.get(PPC::MFLR8), PPC::X11);
+ addFrameReference(BuildMI(MBB, MI, TII.get(PPC::STD))
+ .addReg(PPC::X11, false, false, true), FrameIdx);
+ }
+ } else if (RC == PPC::F8RCRegisterClass) {
+ addFrameReference(BuildMI(MBB, MI, TII.get(PPC::STFD))
+ .addReg(SrcReg, false, false, true), FrameIdx);
+ } else if (RC == PPC::F4RCRegisterClass) {
+ addFrameReference(BuildMI(MBB, MI, TII.get(PPC::STFS))
+ .addReg(SrcReg, false, false, true), FrameIdx);
+ } else if (RC == PPC::CRRCRegisterClass) {
+ // FIXME: We use R0 here, because it isn't available for RA.
+ // We need to store the CR in the low 4-bits of the saved value. First,
+ // issue a MFCR to save all of the CRBits.
+ BuildMI(MBB, MI, TII.get(PPC::MFCR), PPC::R0);
+
+ // If the saved register wasn't CR0, shift the bits left so that they are in
+ // CR0's slot.
+ if (SrcReg != PPC::CR0) {
+ unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(SrcReg)*4;
+ // rlwinm r0, r0, ShiftBits, 0, 31.
+ BuildMI(MBB, MI, TII.get(PPC::RLWINM), PPC::R0)
+ .addReg(PPC::R0).addImm(ShiftBits).addImm(0).addImm(31);
+ }
+
+ addFrameReference(BuildMI(MBB, MI, TII.get(PPC::STW))
+ .addReg(PPC::R0, false, false, true), FrameIdx);
+ } else if (RC == PPC::VRRCRegisterClass) {
+ // We don't have indexed addressing for vector loads. Emit:
+ // R11 = ADDI FI#
+ // Dest = LVX R0, R11
+ //
+ // FIXME: We use R0 here, because it isn't available for RA.
+ addFrameReference(BuildMI(MBB, MI, TII.get(PPC::ADDI), PPC::R0),
+ FrameIdx, 0, 0);
+ BuildMI(MBB, MI, TII.get(PPC::STVX))
+ .addReg(SrcReg, false, false, true).addReg(PPC::R0).addReg(PPC::R0);
+ } else {
+ assert(0 && "Unknown regclass!");
+ abort();
+ }
+}
+
+void
+PPCRegisterInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC) const {
+ if (RC == PPC::GPRCRegisterClass) {
+ if (DestReg != PPC::LR) {
+ addFrameReference(BuildMI(MBB, MI, TII.get(PPC::LWZ), DestReg), FrameIdx);
+ } else {
+ addFrameReference(BuildMI(MBB, MI, TII.get(PPC::LWZ), PPC::R11),FrameIdx);
+ BuildMI(MBB, MI, TII.get(PPC::MTLR)).addReg(PPC::R11);
+ }
+ } else if (RC == PPC::G8RCRegisterClass) {
+ if (DestReg != PPC::LR8) {
+ addFrameReference(BuildMI(MBB, MI, TII.get(PPC::LD), DestReg), FrameIdx);
+ } else {
+ addFrameReference(BuildMI(MBB, MI, TII.get(PPC::LD), PPC::R11), FrameIdx);
+ BuildMI(MBB, MI, TII.get(PPC::MTLR8)).addReg(PPC::R11);
+ }
+ } else if (RC == PPC::F8RCRegisterClass) {
+ addFrameReference(BuildMI(MBB, MI, TII.get(PPC::LFD), DestReg), FrameIdx);
+ } else if (RC == PPC::F4RCRegisterClass) {
+ addFrameReference(BuildMI(MBB, MI, TII.get(PPC::LFS), DestReg), FrameIdx);
+ } else if (RC == PPC::CRRCRegisterClass) {
+ // FIXME: We use R0 here, because it isn't available for RA.
+ addFrameReference(BuildMI(MBB, MI, TII.get(PPC::LWZ), PPC::R0), FrameIdx);
+
+ // If the reloaded register isn't CR0, shift the bits right so that they are
+ // in the right CR's slot.
+ if (DestReg != PPC::CR0) {
+ unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(DestReg)*4;
+ // rlwinm r11, r11, 32-ShiftBits, 0, 31.
+ BuildMI(MBB, MI, TII.get(PPC::RLWINM), PPC::R0)
+ .addReg(PPC::R0).addImm(32-ShiftBits).addImm(0).addImm(31);
+ }
+
+ BuildMI(MBB, MI, TII.get(PPC::MTCRF), DestReg).addReg(PPC::R0);
+ } else if (RC == PPC::VRRCRegisterClass) {
+ // We don't have indexed addressing for vector loads. Emit:
+ // R11 = ADDI FI#
+ // Dest = LVX R0, R11
+ //
+ // FIXME: We use R0 here, because it isn't available for RA.
+ addFrameReference(BuildMI(MBB, MI, TII.get(PPC::ADDI), PPC::R0),
+ FrameIdx, 0, 0);
+ BuildMI(MBB, MI, TII.get(PPC::LVX),DestReg).addReg(PPC::R0).addReg(PPC::R0);
+ } else {
+ assert(0 && "Unknown regclass!");
+ abort();
+ }
+}
+
+void PPCRegisterInfo::copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *RC) const {
+ if (RC == PPC::GPRCRegisterClass) {
+ BuildMI(MBB, MI, TII.get(PPC::OR), DestReg).addReg(SrcReg).addReg(SrcReg);
+ } else if (RC == PPC::G8RCRegisterClass) {
+ BuildMI(MBB, MI, TII.get(PPC::OR8), DestReg).addReg(SrcReg).addReg(SrcReg);
+ } else if (RC == PPC::F4RCRegisterClass) {
+ BuildMI(MBB, MI, TII.get(PPC::FMRS), DestReg).addReg(SrcReg);
+ } else if (RC == PPC::F8RCRegisterClass) {
+ BuildMI(MBB, MI, TII.get(PPC::FMRD), DestReg).addReg(SrcReg);
+ } else if (RC == PPC::CRRCRegisterClass) {
+ BuildMI(MBB, MI, TII.get(PPC::MCRF), DestReg).addReg(SrcReg);
+ } else if (RC == PPC::VRRCRegisterClass) {
+ BuildMI(MBB, MI, TII.get(PPC::VOR), DestReg).addReg(SrcReg).addReg(SrcReg);
+ } else {
+ cerr << "Attempt to copy register that is not GPR or FPR";
+ abort();
+ }
+}
+
+void PPCRegisterInfo::reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg,
+ const MachineInstr *Orig) const {
+ MachineInstr *MI = Orig->clone();
+ MI->getOperand(0).setReg(DestReg);
+ MBB.insert(I, MI);
+}
+
+const unsigned* PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+ const {
+ // 32-bit Darwin calling convention.
+ static const unsigned Macho32_CalleeSavedRegs[] = {
+ PPC::R13, PPC::R14, PPC::R15,
+ PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+ PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+ PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+ PPC::R28, PPC::R29, PPC::R30, PPC::R31,
+
+ PPC::F14, PPC::F15, PPC::F16, PPC::F17,
+ PPC::F18, PPC::F19, PPC::F20, PPC::F21,
+ PPC::F22, PPC::F23, PPC::F24, PPC::F25,
+ PPC::F26, PPC::F27, PPC::F28, PPC::F29,
+ PPC::F30, PPC::F31,
+
+ PPC::CR2, PPC::CR3, PPC::CR4,
+ PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+ PPC::V28, PPC::V29, PPC::V30, PPC::V31,
+
+ PPC::LR, 0
+ };
+
+ static const unsigned ELF32_CalleeSavedRegs[] = {
+ PPC::R13, PPC::R14, PPC::R15,
+ PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+ PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+ PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+ PPC::R28, PPC::R29, PPC::R30, PPC::R31,
+
+ PPC::F9,
+ PPC::F10, PPC::F11, PPC::F12, PPC::F13,
+ PPC::F14, PPC::F15, PPC::F16, PPC::F17,
+ PPC::F18, PPC::F19, PPC::F20, PPC::F21,
+ PPC::F22, PPC::F23, PPC::F24, PPC::F25,
+ PPC::F26, PPC::F27, PPC::F28, PPC::F29,
+ PPC::F30, PPC::F31,
+
+ PPC::CR2, PPC::CR3, PPC::CR4,
+ PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+ PPC::V28, PPC::V29, PPC::V30, PPC::V31,
+
+ PPC::LR, 0
+ };
+ // 64-bit Darwin calling convention.
+ static const unsigned Macho64_CalleeSavedRegs[] = {
+ PPC::X14, PPC::X15,
+ PPC::X16, PPC::X17, PPC::X18, PPC::X19,
+ PPC::X20, PPC::X21, PPC::X22, PPC::X23,
+ PPC::X24, PPC::X25, PPC::X26, PPC::X27,
+ PPC::X28, PPC::X29, PPC::X30, PPC::X31,
+
+ PPC::F14, PPC::F15, PPC::F16, PPC::F17,
+ PPC::F18, PPC::F19, PPC::F20, PPC::F21,
+ PPC::F22, PPC::F23, PPC::F24, PPC::F25,
+ PPC::F26, PPC::F27, PPC::F28, PPC::F29,
+ PPC::F30, PPC::F31,
+
+ PPC::CR2, PPC::CR3, PPC::CR4,
+ PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+ PPC::V28, PPC::V29, PPC::V30, PPC::V31,
+
+ PPC::LR8, 0
+ };
+
+ if (Subtarget.isMachoABI())
+ return Subtarget.isPPC64() ? Macho64_CalleeSavedRegs :
+ Macho32_CalleeSavedRegs;
+
+ // ELF 32.
+ return ELF32_CalleeSavedRegs;
+}
+
+const TargetRegisterClass* const*
+PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ // 32-bit Macho calling convention.
+ static const TargetRegisterClass * const Macho32_CalleeSavedRegClasses[] = {
+ &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
+ &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
+ &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
+ &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
+ &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
+
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,
+
+ &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass,
+
+ &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
+ &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
+ &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
+
+ &PPC::GPRCRegClass, 0
+ };
+
+ static const TargetRegisterClass * const ELF32_CalleeSavedRegClasses[] = {
+ &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
+ &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
+ &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
+ &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
+ &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
+
+ &PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,
+
+ &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass,
+
+ &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
+ &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
+ &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
+
+ &PPC::GPRCRegClass, 0
+ };
+
+ // 64-bit Macho calling convention.
+ static const TargetRegisterClass * const Macho64_CalleeSavedRegClasses[] = {
+ &PPC::G8RCRegClass,&PPC::G8RCRegClass,
+ &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
+ &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
+ &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
+ &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
+
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,
+
+ &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass,
+
+ &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
+ &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
+ &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
+
+ &PPC::G8RCRegClass, 0
+ };
+
+ if (Subtarget.isMachoABI())
+ return Subtarget.isPPC64() ? Macho64_CalleeSavedRegClasses :
+ Macho32_CalleeSavedRegClasses;
+
+ // ELF 32.
+ return ELF32_CalleeSavedRegClasses;
+}
+
+// needsFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+//
+static bool needsFP(const MachineFunction &MF) {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return NoFramePointerElim || MFI->hasVarSizedObjects();
+}
+
+BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(PPC::R0);
+ Reserved.set(PPC::R1);
+ Reserved.set(PPC::LR);
+ // In Linux, r2 is reserved for the OS.
+ if (!Subtarget.isDarwin())
+ Reserved.set(PPC::R2);
+ // On PPC64, r13 is the thread pointer. Never allocate this register.
+ // Note that this is overconservative, as it also prevents allocation of
+ // R31 when the FP is not needed.
+ if (Subtarget.isPPC64()) {
+ Reserved.set(PPC::R13);
+ Reserved.set(PPC::R31);
+ }
+ if (needsFP(MF))
+ Reserved.set(PPC::R31);
+ return Reserved;
+}
+
+/// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into
+/// copy instructions, turning them into load/store instructions.
+MachineInstr *PPCRegisterInfo::foldMemoryOperand(MachineInstr *MI,
+ unsigned OpNum,
+ int FrameIndex) const {
+ // Make sure this is a reg-reg copy. Note that we can't handle MCRF, because
+ // it takes more than one instruction to store it.
+ unsigned Opc = MI->getOpcode();
+
+ MachineInstr *NewMI = NULL;
+ if ((Opc == PPC::OR &&
+ MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) {
+ if (OpNum == 0) { // move -> store
+ unsigned InReg = MI->getOperand(1).getReg();
+ NewMI = addFrameReference(BuildMI(TII.get(PPC::STW)).addReg(InReg),
+ FrameIndex);
+ } else { // move -> load
+ unsigned OutReg = MI->getOperand(0).getReg();
+ NewMI = addFrameReference(BuildMI(TII.get(PPC::LWZ), OutReg),
+ FrameIndex);
+ }
+ } else if ((Opc == PPC::OR8 &&
+ MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) {
+ if (OpNum == 0) { // move -> store
+ unsigned InReg = MI->getOperand(1).getReg();
+ NewMI = addFrameReference(BuildMI(TII.get(PPC::STD)).addReg(InReg),
+ FrameIndex);
+ } else { // move -> load
+ unsigned OutReg = MI->getOperand(0).getReg();
+ NewMI = addFrameReference(BuildMI(TII.get(PPC::LD), OutReg), FrameIndex);
+ }
+ } else if (Opc == PPC::FMRD) {
+ if (OpNum == 0) { // move -> store
+ unsigned InReg = MI->getOperand(1).getReg();
+ NewMI = addFrameReference(BuildMI(TII.get(PPC::STFD)).addReg(InReg),
+ FrameIndex);
+ } else { // move -> load
+ unsigned OutReg = MI->getOperand(0).getReg();
+ NewMI = addFrameReference(BuildMI(TII.get(PPC::LFD), OutReg), FrameIndex);
+ }
+ } else if (Opc == PPC::FMRS) {
+ if (OpNum == 0) { // move -> store
+ unsigned InReg = MI->getOperand(1).getReg();
+ NewMI = addFrameReference(BuildMI(TII.get(PPC::STFS)).addReg(InReg),
+ FrameIndex);
+ } else { // move -> load
+ unsigned OutReg = MI->getOperand(0).getReg();
+ NewMI = addFrameReference(BuildMI(TII.get(PPC::LFS), OutReg), FrameIndex);
+ }
+ }
+
+ if (NewMI)
+ NewMI->copyKillDeadInfo(MI);
+ return NewMI;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+// hasFP - Return true if the specified function actually has a dedicated frame
+// pointer register. This is true if the function needs a frame pointer and has
+// a non-zero stack size.
+bool PPCRegisterInfo::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->getStackSize() && needsFP(MF);
+}
+
+/// usesLR - Returns if the link registers (LR) has been used in the function.
+///
+bool PPCRegisterInfo::usesLR(MachineFunction &MF) const {
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ return FI->usesLR();
+}
+
+void PPCRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
+
+/// LowerDynamicAlloc - Generate the code for allocating an object in the
+/// current frame. The sequence of code with be in the general form
+///
+/// addi R0, SP, #frameSize ; get the address of the previous frame
+/// stwxu R0, SP, Rnegsize ; add and update the SP with the negated size
+/// addi Rnew, SP, #maxCalFrameSize ; get the top of the allocation
+///
+void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
+ // Get the instruction.
+ MachineInstr &MI = *II;
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ // Get the basic block's function.
+ MachineFunction &MF = *MBB.getParent();
+ // Get the frame info.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ // Determine whether 64-bit pointers are used.
+ bool LP64 = Subtarget.isPPC64();
+
+ // Get the maximum call stack size.
+ unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+ // Get the total frame size.
+ unsigned FrameSize = MFI->getStackSize();
+
+ // Get stack alignments.
+ unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ assert(MaxAlign <= TargetAlign &&
+ "Dynamic alloca with large aligns not supported");
+
+ // Determine the previous frame's address. If FrameSize can't be
+ // represented as 16 bits or we need special alignment, then we load the
+ // previous frame's address from 0(SP). Why not do an addis of the hi?
+ // Because R0 is our only safe tmp register and addi/addis treat R0 as zero.
+ // Constructing the constant and adding would take 3 instructions.
+ // Fortunately, a frame greater than 32K is rare.
+ if (MaxAlign < TargetAlign && isInt16(FrameSize)) {
+ BuildMI(MBB, II, TII.get(PPC::ADDI), PPC::R0)
+ .addReg(PPC::R31)
+ .addImm(FrameSize);
+ } else if (LP64) {
+ BuildMI(MBB, II, TII.get(PPC::LD), PPC::X0)
+ .addImm(0)
+ .addReg(PPC::X1);
+ } else {
+ BuildMI(MBB, II, TII.get(PPC::LWZ), PPC::R0)
+ .addImm(0)
+ .addReg(PPC::R1);
+ }
+
+ // Grow the stack and update the stack pointer link, then
+ // determine the address of new allocated space.
+ if (LP64) {
+ BuildMI(MBB, II, TII.get(PPC::STDUX))
+ .addReg(PPC::X0)
+ .addReg(PPC::X1)
+ .addReg(MI.getOperand(1).getReg());
+ BuildMI(MBB, II, TII.get(PPC::ADDI8), MI.getOperand(0).getReg())
+ .addReg(PPC::X1)
+ .addImm(maxCallFrameSize);
+ } else {
+ BuildMI(MBB, II, TII.get(PPC::STWUX))
+ .addReg(PPC::R0)
+ .addReg(PPC::R1)
+ .addReg(MI.getOperand(1).getReg());
+ BuildMI(MBB, II, TII.get(PPC::ADDI), MI.getOperand(0).getReg())
+ .addReg(PPC::R1)
+ .addImm(maxCallFrameSize);
+ }
+
+ // Discard the DYNALLOC instruction.
+ MBB.erase(II);
+}
+
+void PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+
+ // Get the instruction.
+ MachineInstr &MI = *II;
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ // Get the basic block's function.
+ MachineFunction &MF = *MBB.getParent();
+ // Get the frame info.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Find out which operand is the frame index.
+ unsigned i = 0;
+ while (!MI.getOperand(i).isFrameIndex()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+ // Take into account whether it's an add or mem instruction
+ unsigned OffIdx = (i == 2) ? 1 : 2;
+ if (MI.getOpcode() == TargetInstrInfo::INLINEASM)
+ OffIdx = i-1;
+
+ // Get the frame index.
+ int FrameIndex = MI.getOperand(i).getFrameIndex();
+
+ // Get the frame pointer save index. Users of this index are primarily
+ // DYNALLOC instructions.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ int FPSI = FI->getFramePointerSaveIndex();
+ // Get the instruction opcode.
+ unsigned OpC = MI.getOpcode();
+
+ // Special case for dynamic alloca.
+ if (FPSI && FrameIndex == FPSI &&
+ (OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) {
+ lowerDynamicAlloc(II);
+ return;
+ }
+
+ // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
+ MI.getOperand(i).ChangeToRegister(hasFP(MF) ? PPC::R31 : PPC::R1, false);
+
+ // Figure out if the offset in the instruction is shifted right two bits. This
+ // is true for instructions like "STD", which the machine implicitly adds two
+ // low zeros to.
+ bool isIXAddr = false;
+ switch (OpC) {
+ case PPC::LWA:
+ case PPC::LD:
+ case PPC::STD:
+ case PPC::STD_32:
+ isIXAddr = true;
+ break;
+ }
+
+ // Now add the frame object offset to the offset from r1.
+ int Offset = MFI->getObjectOffset(FrameIndex);
+
+ if (!isIXAddr)
+ Offset += MI.getOperand(OffIdx).getImmedValue();
+ else
+ Offset += MI.getOperand(OffIdx).getImmedValue() << 2;
+
+ // If we're not using a Frame Pointer that has been set to the value of the
+ // SP before having the stack size subtracted from it, then add the stack size
+ // to Offset to get the correct offset.
+ Offset += MFI->getStackSize();
+
+ if (!isInt16(Offset)) {
+ // Insert a set of r0 with the full offset value before the ld, st, or add
+ BuildMI(MBB, II, TII.get(PPC::LIS), PPC::R0).addImm(Offset >> 16);
+ BuildMI(MBB, II, TII.get(PPC::ORI), PPC::R0).addReg(PPC::R0).addImm(Offset);
+
+ // convert into indexed form of the instruction
+ // sth 0:rA, 1:imm 2:(rB) ==> sthx 0:rA, 2:rB, 1:r0
+ // addi 0:rA 1:rB, 2, imm ==> add 0:rA, 1:rB, 2:r0
+ assert(ImmToIdxMap.count(OpC) &&
+ "No indexed form of load or store available!");
+ unsigned NewOpcode = ImmToIdxMap.find(OpC)->second;
+ MI.setInstrDescriptor(TII.get(NewOpcode));
+ MI.getOperand(1).ChangeToRegister(MI.getOperand(i).getReg(), false);
+ MI.getOperand(2).ChangeToRegister(PPC::R0, false);
+ } else {
+ if (isIXAddr) {
+ assert((Offset & 3) == 0 && "Invalid frame offset!");
+ Offset >>= 2; // The actual encoded value has the low two bits zero.
+ }
+ MI.getOperand(OffIdx).ChangeToImmediate(Offset);
+ }
+}
+
+/// VRRegNo - Map from a numbered VR register to its enum value.
+///
+static const unsigned short VRRegNo[] = {
+ PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
+ PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
+ PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
+};
+
+/// RemoveVRSaveCode - We have found that this function does not need any code
+/// to manipulate the VRSAVE register, even though it uses vector registers.
+/// This can happen when the only registers used are known to be live in or out
+/// of the function. Remove all of the VRSAVE related code from the function.
+static void RemoveVRSaveCode(MachineInstr *MI) {
+ MachineBasicBlock *Entry = MI->getParent();
+ MachineFunction *MF = Entry->getParent();
+
+ // We know that the MTVRSAVE instruction immediately follows MI. Remove it.
+ MachineBasicBlock::iterator MBBI = MI;
+ ++MBBI;
+ assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
+ MBBI->eraseFromParent();
+
+ bool RemovedAllMTVRSAVEs = true;
+ // See if we can find and remove the MTVRSAVE instruction from all of the
+ // epilog blocks.
+ const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
+ // If last instruction is a return instruction, add an epilogue
+ if (!I->empty() && TII.isReturn(I->back().getOpcode())) {
+ bool FoundIt = false;
+ for (MBBI = I->end(); MBBI != I->begin(); ) {
+ --MBBI;
+ if (MBBI->getOpcode() == PPC::MTVRSAVE) {
+ MBBI->eraseFromParent(); // remove it.
+ FoundIt = true;
+ break;
+ }
+ }
+ RemovedAllMTVRSAVEs &= FoundIt;
+ }
+ }
+
+ // If we found and removed all MTVRSAVE instructions, remove the read of
+ // VRSAVE as well.
+ if (RemovedAllMTVRSAVEs) {
+ MBBI = MI;
+ assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
+ --MBBI;
+ assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
+ MBBI->eraseFromParent();
+ }
+
+ // Finally, nuke the UPDATE_VRSAVE.
+ MI->eraseFromParent();
+}
+
+// HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
+// instruction selector. Based on the vector registers that have been used,
+// transform this into the appropriate ORI instruction.
+static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
+ MachineFunction *MF = MI->getParent()->getParent();
+
+ unsigned UsedRegMask = 0;
+ for (unsigned i = 0; i != 32; ++i)
+ if (MF->isPhysRegUsed(VRRegNo[i]))
+ UsedRegMask |= 1 << (31-i);
+
+ // Live in and live out values already must be in the mask, so don't bother
+ // marking them.
+ for (MachineFunction::livein_iterator I =
+ MF->livein_begin(), E = MF->livein_end(); I != E; ++I) {
+ unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(I->first);
+ if (VRRegNo[RegNo] == I->first) // If this really is a vector reg.
+ UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
+ }
+ for (MachineFunction::liveout_iterator I =
+ MF->liveout_begin(), E = MF->liveout_end(); I != E; ++I) {
+ unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(*I);
+ if (VRRegNo[RegNo] == *I) // If this really is a vector reg.
+ UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
+ }
+
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ unsigned DstReg = MI->getOperand(0).getReg();
+ // If no registers are used, turn this into a copy.
+ if (UsedRegMask == 0) {
+ // Remove all VRSAVE code.
+ RemoveVRSaveCode(MI);
+ return;
+ } else if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
+ BuildMI(*MI->getParent(), MI, TII.get(PPC::ORI), DstReg)
+ .addReg(SrcReg).addImm(UsedRegMask);
+ } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
+ BuildMI(*MI->getParent(), MI, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg).addImm(UsedRegMask >> 16);
+ } else {
+ BuildMI(*MI->getParent(), MI, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg).addImm(UsedRegMask >> 16);
+ BuildMI(*MI->getParent(), MI, TII.get(PPC::ORI), DstReg)
+ .addReg(DstReg).addImm(UsedRegMask & 0xFFFF);
+ }
+
+ // Remove the old UPDATE_VRSAVE instruction.
+ MI->eraseFromParent();
+}
+
+/// determineFrameLayout - Determine the size of the frame and maximum call
+/// frame size.
+void PPCRegisterInfo::determineFrameLayout(MachineFunction &MF) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Get the number of bytes to allocate from the FrameInfo
+ unsigned FrameSize = MFI->getStackSize();
+
+ // Get the alignments provided by the target, and the maximum alignment
+ // (if any) of the fixed frame objects.
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+ unsigned AlignMask = TargetAlign - 1; //
+
+ // If we are a leaf function, and use up to 224 bytes of stack space,
+ // don't have a frame pointer, calls, or dynamic alloca then we do not need
+ // to adjust the stack pointer (we fit in the Red Zone).
+ if (FrameSize <= 224 && // Fits in red zone.
+ !MFI->hasVarSizedObjects() && // No dynamic alloca.
+ !MFI->hasCalls() && // No calls.
+ MaxAlign <= TargetAlign) { // No special alignment.
+ // No need for frame
+ MFI->setStackSize(0);
+ return;
+ }
+
+ // Get the maximum call frame size of all the calls.
+ unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+
+ // Maximum call frame needs to be at least big enough for linkage and 8 args.
+ unsigned minCallFrameSize =
+ PPCFrameInfo::getMinCallFrameSize(Subtarget.isPPC64(),
+ Subtarget.isMachoABI());
+ maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
+
+ // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
+ // that allocations will be aligned.
+ if (MFI->hasVarSizedObjects())
+ maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
+
+ // Update maximum call frame size.
+ MFI->setMaxCallFrameSize(maxCallFrameSize);
+
+ // Include call frame size in total.
+ FrameSize += maxCallFrameSize;
+
+ // Make sure the frame is aligned.
+ FrameSize = (FrameSize + AlignMask) & ~AlignMask;
+
+ // Update frame info.
+ MFI->setStackSize(FrameSize);
+}
+
+void PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS)
+ const {
+ // Save and clear the LR state.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ unsigned LR = getRARegister();
+ FI->setUsesLR(MF.isPhysRegUsed(LR));
+ MF.setPhysRegUnused(LR);
+
+ // Save R31 if necessary
+ int FPSI = FI->getFramePointerSaveIndex();
+ bool IsPPC64 = Subtarget.isPPC64();
+ bool IsELF32_ABI = Subtarget.isELF32_ABI();
+ bool IsMachoABI = Subtarget.isMachoABI();
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // If the frame pointer save index hasn't been defined yet.
+ if (!FPSI && (NoFramePointerElim || MFI->hasVarSizedObjects())
+ && IsELF32_ABI) {
+ // Find out what the fix offset of the frame pointer save area.
+ int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64,
+ IsMachoABI);
+ // Allocate the frame index for frame pointer save area.
+ FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset);
+ // Save the result.
+ FI->setFramePointerSaveIndex(FPSI);
+ }
+
+}
+
+void PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
+
+ // Prepare for frame info.
+ unsigned FrameLabelId = 0;
+
+ // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it,
+ // process it.
+ for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
+ if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
+ HandleVRSaveUpdate(MBBI, TII);
+ break;
+ }
+ }
+
+ // Move MBBI back to the beginning of the function.
+ MBBI = MBB.begin();
+
+ // Work out frame sizes.
+ determineFrameLayout(MF);
+ unsigned FrameSize = MFI->getStackSize();
+
+ int NegFrameSize = -FrameSize;
+
+ // Get processor type.
+ bool IsPPC64 = Subtarget.isPPC64();
+ // Get operating system
+ bool IsMachoABI = Subtarget.isMachoABI();
+ // Check if the link register (LR) has been used.
+ bool UsesLR = MFI->hasCalls() || usesLR(MF);
+ // Do we have a frame pointer for this function?
+ bool HasFP = hasFP(MF) && FrameSize;
+
+ int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, IsMachoABI);
+ int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, IsMachoABI);
+
+ if (IsPPC64) {
+ if (UsesLR)
+ BuildMI(MBB, MBBI, TII.get(PPC::MFLR8), PPC::X0);
+
+ if (HasFP)
+ BuildMI(MBB, MBBI, TII.get(PPC::STD))
+ .addReg(PPC::X31).addImm(FPOffset/4).addReg(PPC::X1);
+
+ if (UsesLR)
+ BuildMI(MBB, MBBI, TII.get(PPC::STD))
+ .addReg(PPC::X0).addImm(LROffset/4).addReg(PPC::X1);
+ } else {
+ if (UsesLR)
+ BuildMI(MBB, MBBI, TII.get(PPC::MFLR), PPC::R0);
+
+ if (HasFP)
+ BuildMI(MBB, MBBI, TII.get(PPC::STW))
+ .addReg(PPC::R31).addImm(FPOffset).addReg(PPC::R1);
+
+ if (UsesLR)
+ BuildMI(MBB, MBBI, TII.get(PPC::STW))
+ .addReg(PPC::R0).addImm(LROffset).addReg(PPC::R1);
+ }
+
+ // Skip if a leaf routine.
+ if (!FrameSize) return;
+
+ // Get stack alignments.
+ unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+
+ if (MMI && MMI->needsFrameInfo()) {
+ // Mark effective beginning of when frame pointer becomes valid.
+ FrameLabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, TII.get(PPC::LABEL)).addImm(FrameLabelId);
+ }
+
+ // Adjust stack pointer: r1 += NegFrameSize.
+ // If there is a preferred stack alignment, align R1 now
+ if (!IsPPC64) {
+ // PPC32.
+ if (MaxAlign > TargetAlign) {
+ assert(isPowerOf2_32(MaxAlign)&&isInt16(MaxAlign)&&"Invalid alignment!");
+ assert(isInt16(NegFrameSize) && "Unhandled stack size and alignment!");
+ BuildMI(MBB, MBBI, TII.get(PPC::RLWINM), PPC::R0)
+ .addReg(PPC::R1).addImm(0).addImm(32-Log2_32(MaxAlign)).addImm(31);
+ BuildMI(MBB, MBBI, TII.get(PPC::SUBFIC) ,PPC::R0).addReg(PPC::R0)
+ .addImm(NegFrameSize);
+ BuildMI(MBB, MBBI, TII.get(PPC::STWUX))
+ .addReg(PPC::R1).addReg(PPC::R1).addReg(PPC::R0);
+ } else if (isInt16(NegFrameSize)) {
+ BuildMI(MBB, MBBI, TII.get(PPC::STWU),
+ PPC::R1).addReg(PPC::R1).addImm(NegFrameSize).addReg(PPC::R1);
+ } else {
+ BuildMI(MBB, MBBI, TII.get(PPC::LIS), PPC::R0).addImm(NegFrameSize >> 16);
+ BuildMI(MBB, MBBI, TII.get(PPC::ORI), PPC::R0).addReg(PPC::R0)
+ .addImm(NegFrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, TII.get(PPC::STWUX)).addReg(PPC::R1).addReg(PPC::R1)
+ .addReg(PPC::R0);
+ }
+ } else { // PPC64.
+ if (MaxAlign > TargetAlign) {
+ assert(isPowerOf2_32(MaxAlign)&&isInt16(MaxAlign)&&"Invalid alignment!");
+ assert(isInt16(NegFrameSize) && "Unhandled stack size and alignment!");
+ BuildMI(MBB, MBBI, TII.get(PPC::RLDICL), PPC::X0)
+ .addReg(PPC::X1).addImm(0).addImm(64-Log2_32(MaxAlign));
+ BuildMI(MBB, MBBI, TII.get(PPC::SUBFIC8), PPC::X0).addReg(PPC::X0)
+ .addImm(NegFrameSize);
+ BuildMI(MBB, MBBI, TII.get(PPC::STDUX))
+ .addReg(PPC::X1).addReg(PPC::X1).addReg(PPC::X0);
+ } else if (isInt16(NegFrameSize)) {
+ BuildMI(MBB, MBBI, TII.get(PPC::STDU), PPC::X1)
+ .addReg(PPC::X1).addImm(NegFrameSize/4).addReg(PPC::X1);
+ } else {
+ BuildMI(MBB, MBBI, TII.get(PPC::LIS8), PPC::X0).addImm(NegFrameSize >>16);
+ BuildMI(MBB, MBBI, TII.get(PPC::ORI8), PPC::X0).addReg(PPC::X0)
+ .addImm(NegFrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, TII.get(PPC::STDUX)).addReg(PPC::X1).addReg(PPC::X1)
+ .addReg(PPC::X0);
+ }
+ }
+
+ if (MMI && MMI->needsFrameInfo()) {
+ std::vector<MachineMove> &Moves = MMI->getFrameMoves();
+
+ if (NegFrameSize) {
+ // Show update of SP.
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP, NegFrameSize);
+ Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
+ } else {
+ MachineLocation SP(IsPPC64 ? PPC::X31 : PPC::R31);
+ Moves.push_back(MachineMove(FrameLabelId, SP, SP));
+ }
+
+ if (HasFP) {
+ MachineLocation FPDst(MachineLocation::VirtualFP, FPOffset);
+ MachineLocation FPSrc(IsPPC64 ? PPC::X31 : PPC::R31);
+ Moves.push_back(MachineMove(FrameLabelId, FPDst, FPSrc));
+ }
+
+ // Add callee saved registers to move list.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
+ unsigned Reg = CSI[I].getReg();
+ if (Reg == PPC::LR || Reg == PPC::LR8) continue;
+ MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+ MachineLocation CSSrc(Reg);
+ Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc));
+ }
+
+ MachineLocation LRDst(MachineLocation::VirtualFP, LROffset);
+ MachineLocation LRSrc(IsPPC64 ? PPC::LR8 : PPC::LR);
+ Moves.push_back(MachineMove(FrameLabelId, LRDst, LRSrc));
+
+ // Mark effective beginning of when frame pointer is ready.
+ unsigned ReadyLabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, TII.get(PPC::LABEL)).addImm(ReadyLabelId);
+
+ MachineLocation FPDst(HasFP ? (IsPPC64 ? PPC::X31 : PPC::R31) :
+ (IsPPC64 ? PPC::X1 : PPC::R1));
+ MachineLocation FPSrc(MachineLocation::VirtualFP);
+ Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
+ }
+
+ // If there is a frame pointer, copy R1 into R31
+ if (HasFP) {
+ if (!IsPPC64) {
+ BuildMI(MBB, MBBI, TII.get(PPC::OR), PPC::R31).addReg(PPC::R1)
+ .addReg(PPC::R1);
+ } else {
+ BuildMI(MBB, MBBI, TII.get(PPC::OR8), PPC::X31).addReg(PPC::X1)
+ .addReg(PPC::X1);
+ }
+ }
+}
+
+void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ assert(MBBI->getOpcode() == PPC::BLR &&
+ "Can only insert epilog into returning blocks");
+
+ // Get alignment info so we know how to restore r1
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+
+ // Get the number of bytes allocated from the FrameInfo.
+ unsigned FrameSize = MFI->getStackSize();
+
+ // Get processor type.
+ bool IsPPC64 = Subtarget.isPPC64();
+ // Get operating system
+ bool IsMachoABI = Subtarget.isMachoABI();
+ // Check if the link register (LR) has been used.
+ bool UsesLR = MFI->hasCalls() || usesLR(MF);
+ // Do we have a frame pointer for this function?
+ bool HasFP = hasFP(MF) && FrameSize;
+
+ int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, IsMachoABI);
+ int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, IsMachoABI);
+
+ if (FrameSize) {
+ // The loaded (or persistent) stack pointer value is offset by the 'stwu'
+ // on entry to the function. Add this offset back now.
+ if (!Subtarget.isPPC64()) {
+ if (isInt16(FrameSize) && TargetAlign >= MaxAlign &&
+ !MFI->hasVarSizedObjects()) {
+ BuildMI(MBB, MBBI, TII.get(PPC::ADDI), PPC::R1)
+ .addReg(PPC::R1).addImm(FrameSize);
+ } else {
+ BuildMI(MBB, MBBI, TII.get(PPC::LWZ),PPC::R1).addImm(0).addReg(PPC::R1);
+ }
+ } else {
+ if (isInt16(FrameSize) && TargetAlign >= MaxAlign &&
+ !MFI->hasVarSizedObjects()) {
+ BuildMI(MBB, MBBI, TII.get(PPC::ADDI8), PPC::X1)
+ .addReg(PPC::X1).addImm(FrameSize);
+ } else {
+ BuildMI(MBB, MBBI, TII.get(PPC::LD), PPC::X1).addImm(0).addReg(PPC::X1);
+ }
+ }
+ }
+
+ if (IsPPC64) {
+ if (UsesLR)
+ BuildMI(MBB, MBBI, TII.get(PPC::LD), PPC::X0)
+ .addImm(LROffset/4).addReg(PPC::X1);
+
+ if (HasFP)
+ BuildMI(MBB, MBBI, TII.get(PPC::LD), PPC::X31)
+ .addImm(FPOffset/4).addReg(PPC::X1);
+
+ if (UsesLR)
+ BuildMI(MBB, MBBI, TII.get(PPC::MTLR8)).addReg(PPC::X0);
+ } else {
+ if (UsesLR)
+ BuildMI(MBB, MBBI, TII.get(PPC::LWZ), PPC::R0)
+ .addImm(LROffset).addReg(PPC::R1);
+
+ if (HasFP)
+ BuildMI(MBB, MBBI, TII.get(PPC::LWZ), PPC::R31)
+ .addImm(FPOffset).addReg(PPC::R1);
+
+ if (UsesLR)
+ BuildMI(MBB, MBBI, TII.get(PPC::MTLR)).addReg(PPC::R0);
+ }
+}
+
+unsigned PPCRegisterInfo::getRARegister() const {
+ return !Subtarget.isPPC64() ? PPC::LR : PPC::LR8;
+}
+
+unsigned PPCRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+ if (!Subtarget.isPPC64())
+ return hasFP(MF) ? PPC::R31 : PPC::R1;
+ else
+ return hasFP(MF) ? PPC::X31 : PPC::X1;
+}
+
+void PPCRegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves)
+ const {
+ // Initial state of the frame pointer is R1.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(PPC::R1, 0);
+ Moves.push_back(MachineMove(0, Dst, Src));
+}
+
+unsigned PPCRegisterInfo::getEHExceptionRegister() const {
+ return !Subtarget.isPPC64() ? PPC::R3 : PPC::X3;
+}
+
+unsigned PPCRegisterInfo::getEHHandlerRegister() const {
+ return !Subtarget.isPPC64() ? PPC::R4 : PPC::X4;
+}
+
+#include "PPCGenRegisterInfo.inc"
+
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
new file mode 100644
index 0000000..4112034
--- /dev/null
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -0,0 +1,107 @@
+//===- PPCRegisterInfo.h - PowerPC Register Information Impl -----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC32_REGISTERINFO_H
+#define POWERPC32_REGISTERINFO_H
+
+#include "PPC.h"
+#include "PPCGenRegisterInfo.h.inc"
+#include <map>
+
+namespace llvm {
+class PPCSubtarget;
+class TargetInstrInfo;
+class Type;
+
+class PPCRegisterInfo : public PPCGenRegisterInfo {
+ std::map<unsigned, unsigned> ImmToIdxMap;
+ const PPCSubtarget &Subtarget;
+ const TargetInstrInfo &TII;
+public:
+ PPCRegisterInfo(const PPCSubtarget &SubTarget, const TargetInstrInfo &tii);
+
+ /// getRegisterNumbering - Given the enum value for some register, e.g.
+ /// PPC::F14, return the number that it corresponds to (e.g. 14).
+ static unsigned getRegisterNumbering(unsigned RegEnum);
+
+ /// Code Generation virtual methods...
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ void copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *RC) const;
+
+ void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ unsigned DestReg, const MachineInstr *Orig) const;
+
+ /// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into
+ /// copy instructions, turning them into load/store instructions.
+ virtual MachineInstr* foldMemoryOperand(MachineInstr* MI, unsigned OpNum,
+ int FrameIndex) const;
+
+ const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+
+ const TargetRegisterClass* const* getCalleeSavedRegClasses(
+ const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ /// targetHandlesStackFrameRounding - Returns true if the target is
+ /// responsible for rounding up the stack frame (probably at emitPrologue
+ /// time).
+ bool targetHandlesStackFrameRounding() const { return true; }
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ /// usesLR - Returns if the link registers (LR) has been used in the function.
+ ///
+ bool usesLR(MachineFunction &MF) const;
+
+ void lowerDynamicAlloc(MachineBasicBlock::iterator II) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ /// determineFrameLayout - Determine the size of the frame and maximum call
+ /// frame size.
+ void determineFrameLayout(MachineFunction &MF) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(MachineFunction &MF) const;
+ void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
new file mode 100644
index 0000000..0b3b4ca
--- /dev/null
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -0,0 +1,333 @@
+//===- PowerPCRegisterInfo.td - The PowerPC Register File --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+class PPCReg<string n> : Register<n> {
+ let Namespace = "PPC";
+}
+
+// We identify all our registers with a 5-bit ID, for consistency's sake.
+
+// GPR - One of the 32 32-bit general-purpose registers
+class GPR<bits<5> num, string n> : PPCReg<n> {
+ field bits<5> Num = num;
+}
+
+// GP8 - One of the 32 64-bit general-purpose registers
+class GP8<GPR SubReg> : PPCReg<SubReg.Name> {
+ field bits<5> Num = SubReg.Num;
+ let SubRegs = [SubReg];
+}
+
+// SPR - One of the 32-bit special-purpose registers
+class SPR<bits<10> num, string n> : PPCReg<n> {
+ field bits<10> Num = num;
+}
+
+// FPR - One of the 32 64-bit floating-point registers
+class FPR<bits<5> num, string n> : PPCReg<n> {
+ field bits<5> Num = num;
+}
+
+// VR - One of the 32 128-bit vector registers
+class VR<bits<5> num, string n> : PPCReg<n> {
+ field bits<5> Num = num;
+}
+
+// CR - One of the 8 4-bit condition registers
+class CR<bits<3> num, string n> : PPCReg<n> {
+ field bits<3> Num = num;
+}
+
+// CRBIT - One of the 32 1-bit condition register fields
+class CRBIT<bits<5> num, string n> : PPCReg<n> {
+ field bits<5> Num = num;
+}
+
+
+// General-purpose registers
+def R0 : GPR< 0, "r0">, DwarfRegNum<0>;
+def R1 : GPR< 1, "r1">, DwarfRegNum<1>;
+def R2 : GPR< 2, "r2">, DwarfRegNum<2>;
+def R3 : GPR< 3, "r3">, DwarfRegNum<3>;
+def R4 : GPR< 4, "r4">, DwarfRegNum<4>;
+def R5 : GPR< 5, "r5">, DwarfRegNum<5>;
+def R6 : GPR< 6, "r6">, DwarfRegNum<6>;
+def R7 : GPR< 7, "r7">, DwarfRegNum<7>;
+def R8 : GPR< 8, "r8">, DwarfRegNum<8>;
+def R9 : GPR< 9, "r9">, DwarfRegNum<9>;
+def R10 : GPR<10, "r10">, DwarfRegNum<10>;
+def R11 : GPR<11, "r11">, DwarfRegNum<11>;
+def R12 : GPR<12, "r12">, DwarfRegNum<12>;
+def R13 : GPR<13, "r13">, DwarfRegNum<13>;
+def R14 : GPR<14, "r14">, DwarfRegNum<14>;
+def R15 : GPR<15, "r15">, DwarfRegNum<15>;
+def R16 : GPR<16, "r16">, DwarfRegNum<16>;
+def R17 : GPR<17, "r17">, DwarfRegNum<17>;
+def R18 : GPR<18, "r18">, DwarfRegNum<18>;
+def R19 : GPR<19, "r19">, DwarfRegNum<19>;
+def R20 : GPR<20, "r20">, DwarfRegNum<20>;
+def R21 : GPR<21, "r21">, DwarfRegNum<21>;
+def R22 : GPR<22, "r22">, DwarfRegNum<22>;
+def R23 : GPR<23, "r23">, DwarfRegNum<23>;
+def R24 : GPR<24, "r24">, DwarfRegNum<24>;
+def R25 : GPR<25, "r25">, DwarfRegNum<25>;
+def R26 : GPR<26, "r26">, DwarfRegNum<26>;
+def R27 : GPR<27, "r27">, DwarfRegNum<27>;
+def R28 : GPR<28, "r28">, DwarfRegNum<28>;
+def R29 : GPR<29, "r29">, DwarfRegNum<29>;
+def R30 : GPR<30, "r30">, DwarfRegNum<30>;
+def R31 : GPR<31, "r31">, DwarfRegNum<31>;
+
+// 64-bit General-purpose registers
+def X0 : GP8< R0>, DwarfRegNum<0>;
+def X1 : GP8< R1>, DwarfRegNum<1>;
+def X2 : GP8< R2>, DwarfRegNum<2>;
+def X3 : GP8< R3>, DwarfRegNum<3>;
+def X4 : GP8< R4>, DwarfRegNum<4>;
+def X5 : GP8< R5>, DwarfRegNum<5>;
+def X6 : GP8< R6>, DwarfRegNum<6>;
+def X7 : GP8< R7>, DwarfRegNum<7>;
+def X8 : GP8< R8>, DwarfRegNum<8>;
+def X9 : GP8< R9>, DwarfRegNum<9>;
+def X10 : GP8<R10>, DwarfRegNum<10>;
+def X11 : GP8<R11>, DwarfRegNum<11>;
+def X12 : GP8<R12>, DwarfRegNum<12>;
+def X13 : GP8<R13>, DwarfRegNum<13>;
+def X14 : GP8<R14>, DwarfRegNum<14>;
+def X15 : GP8<R15>, DwarfRegNum<15>;
+def X16 : GP8<R16>, DwarfRegNum<16>;
+def X17 : GP8<R17>, DwarfRegNum<17>;
+def X18 : GP8<R18>, DwarfRegNum<18>;
+def X19 : GP8<R19>, DwarfRegNum<19>;
+def X20 : GP8<R20>, DwarfRegNum<20>;
+def X21 : GP8<R21>, DwarfRegNum<21>;
+def X22 : GP8<R22>, DwarfRegNum<22>;
+def X23 : GP8<R23>, DwarfRegNum<23>;
+def X24 : GP8<R24>, DwarfRegNum<24>;
+def X25 : GP8<R25>, DwarfRegNum<25>;
+def X26 : GP8<R26>, DwarfRegNum<26>;
+def X27 : GP8<R27>, DwarfRegNum<27>;
+def X28 : GP8<R28>, DwarfRegNum<28>;
+def X29 : GP8<R29>, DwarfRegNum<29>;
+def X30 : GP8<R30>, DwarfRegNum<30>;
+def X31 : GP8<R31>, DwarfRegNum<31>;
+
+// Floating-point registers
+def F0 : FPR< 0, "f0">, DwarfRegNum<32>;
+def F1 : FPR< 1, "f1">, DwarfRegNum<33>;
+def F2 : FPR< 2, "f2">, DwarfRegNum<34>;
+def F3 : FPR< 3, "f3">, DwarfRegNum<35>;
+def F4 : FPR< 4, "f4">, DwarfRegNum<36>;
+def F5 : FPR< 5, "f5">, DwarfRegNum<37>;
+def F6 : FPR< 6, "f6">, DwarfRegNum<38>;
+def F7 : FPR< 7, "f7">, DwarfRegNum<39>;
+def F8 : FPR< 8, "f8">, DwarfRegNum<40>;
+def F9 : FPR< 9, "f9">, DwarfRegNum<41>;
+def F10 : FPR<10, "f10">, DwarfRegNum<42>;
+def F11 : FPR<11, "f11">, DwarfRegNum<43>;
+def F12 : FPR<12, "f12">, DwarfRegNum<44>;
+def F13 : FPR<13, "f13">, DwarfRegNum<45>;
+def F14 : FPR<14, "f14">, DwarfRegNum<46>;
+def F15 : FPR<15, "f15">, DwarfRegNum<47>;
+def F16 : FPR<16, "f16">, DwarfRegNum<48>;
+def F17 : FPR<17, "f17">, DwarfRegNum<49>;
+def F18 : FPR<18, "f18">, DwarfRegNum<50>;
+def F19 : FPR<19, "f19">, DwarfRegNum<51>;
+def F20 : FPR<20, "f20">, DwarfRegNum<52>;
+def F21 : FPR<21, "f21">, DwarfRegNum<53>;
+def F22 : FPR<22, "f22">, DwarfRegNum<54>;
+def F23 : FPR<23, "f23">, DwarfRegNum<55>;
+def F24 : FPR<24, "f24">, DwarfRegNum<56>;
+def F25 : FPR<25, "f25">, DwarfRegNum<57>;
+def F26 : FPR<26, "f26">, DwarfRegNum<58>;
+def F27 : FPR<27, "f27">, DwarfRegNum<59>;
+def F28 : FPR<28, "f28">, DwarfRegNum<60>;
+def F29 : FPR<29, "f29">, DwarfRegNum<61>;
+def F30 : FPR<30, "f30">, DwarfRegNum<62>;
+def F31 : FPR<31, "f31">, DwarfRegNum<63>;
+
+// Vector registers
+def V0 : VR< 0, "v0">, DwarfRegNum<77>;
+def V1 : VR< 1, "v1">, DwarfRegNum<78>;
+def V2 : VR< 2, "v2">, DwarfRegNum<79>;
+def V3 : VR< 3, "v3">, DwarfRegNum<80>;
+def V4 : VR< 4, "v4">, DwarfRegNum<81>;
+def V5 : VR< 5, "v5">, DwarfRegNum<82>;
+def V6 : VR< 6, "v6">, DwarfRegNum<83>;
+def V7 : VR< 7, "v7">, DwarfRegNum<84>;
+def V8 : VR< 8, "v8">, DwarfRegNum<85>;
+def V9 : VR< 9, "v9">, DwarfRegNum<86>;
+def V10 : VR<10, "v10">, DwarfRegNum<87>;
+def V11 : VR<11, "v11">, DwarfRegNum<88>;
+def V12 : VR<12, "v12">, DwarfRegNum<89>;
+def V13 : VR<13, "v13">, DwarfRegNum<90>;
+def V14 : VR<14, "v14">, DwarfRegNum<91>;
+def V15 : VR<15, "v15">, DwarfRegNum<92>;
+def V16 : VR<16, "v16">, DwarfRegNum<93>;
+def V17 : VR<17, "v17">, DwarfRegNum<94>;
+def V18 : VR<18, "v18">, DwarfRegNum<95>;
+def V19 : VR<19, "v19">, DwarfRegNum<96>;
+def V20 : VR<20, "v20">, DwarfRegNum<97>;
+def V21 : VR<21, "v21">, DwarfRegNum<98>;
+def V22 : VR<22, "v22">, DwarfRegNum<99>;
+def V23 : VR<23, "v23">, DwarfRegNum<100>;
+def V24 : VR<24, "v24">, DwarfRegNum<101>;
+def V25 : VR<25, "v25">, DwarfRegNum<102>;
+def V26 : VR<26, "v26">, DwarfRegNum<103>;
+def V27 : VR<27, "v27">, DwarfRegNum<104>;
+def V28 : VR<28, "v28">, DwarfRegNum<105>;
+def V29 : VR<29, "v29">, DwarfRegNum<106>;
+def V30 : VR<30, "v30">, DwarfRegNum<107>;
+def V31 : VR<31, "v31">, DwarfRegNum<108>;
+
+// Condition registers
+def CR0 : CR<0, "cr0">, DwarfRegNum<68>;
+def CR1 : CR<1, "cr1">, DwarfRegNum<69>;
+def CR2 : CR<2, "cr2">, DwarfRegNum<70>;
+def CR3 : CR<3, "cr3">, DwarfRegNum<71>;
+def CR4 : CR<4, "cr4">, DwarfRegNum<72>;
+def CR5 : CR<5, "cr5">, DwarfRegNum<73>;
+def CR6 : CR<6, "cr6">, DwarfRegNum<74>;
+def CR7 : CR<7, "cr7">, DwarfRegNum<75>;
+
+// Condition register bits
+def CR0LT : CRBIT< 0, "0">, DwarfRegNum<0>;
+def CR0GT : CRBIT< 1, "1">, DwarfRegNum<0>;
+def CR0EQ : CRBIT< 2, "2">, DwarfRegNum<0>;
+def CR0UN : CRBIT< 3, "3">, DwarfRegNum<0>;
+def CR1LT : CRBIT< 4, "4">, DwarfRegNum<0>;
+def CR1GT : CRBIT< 5, "5">, DwarfRegNum<0>;
+def CR1EQ : CRBIT< 6, "6">, DwarfRegNum<0>;
+def CR1UN : CRBIT< 7, "7">, DwarfRegNum<0>;
+def CR2LT : CRBIT< 8, "8">, DwarfRegNum<0>;
+def CR2GT : CRBIT< 9, "9">, DwarfRegNum<0>;
+def CR2EQ : CRBIT<10, "10">, DwarfRegNum<0>;
+def CR2UN : CRBIT<11, "11">, DwarfRegNum<0>;
+def CR3LT : CRBIT<12, "12">, DwarfRegNum<0>;
+def CR3GT : CRBIT<13, "13">, DwarfRegNum<0>;
+def CR3EQ : CRBIT<14, "14">, DwarfRegNum<0>;
+def CR3UN : CRBIT<15, "15">, DwarfRegNum<0>;
+def CR4LT : CRBIT<16, "16">, DwarfRegNum<0>;
+def CR4GT : CRBIT<17, "17">, DwarfRegNum<0>;
+def CR4EQ : CRBIT<18, "18">, DwarfRegNum<0>;
+def CR4UN : CRBIT<19, "19">, DwarfRegNum<0>;
+def CR5LT : CRBIT<20, "20">, DwarfRegNum<0>;
+def CR5GT : CRBIT<21, "21">, DwarfRegNum<0>;
+def CR5EQ : CRBIT<22, "22">, DwarfRegNum<0>;
+def CR5UN : CRBIT<23, "23">, DwarfRegNum<0>;
+def CR6LT : CRBIT<24, "24">, DwarfRegNum<0>;
+def CR6GT : CRBIT<25, "25">, DwarfRegNum<0>;
+def CR6EQ : CRBIT<26, "26">, DwarfRegNum<0>;
+def CR6UN : CRBIT<27, "27">, DwarfRegNum<0>;
+def CR7LT : CRBIT<28, "28">, DwarfRegNum<0>;
+def CR7GT : CRBIT<29, "29">, DwarfRegNum<0>;
+def CR7EQ : CRBIT<30, "30">, DwarfRegNum<0>;
+def CR7UN : CRBIT<31, "31">, DwarfRegNum<0>;
+
+def : SubRegSet<1, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7],
+ [CR0LT, CR1LT, CR2LT, CR3LT, CR4LT, CR5LT, CR6LT, CR7LT]>;
+def : SubRegSet<2, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7],
+ [CR0GT, CR1GT, CR2GT, CR3GT, CR4GT, CR5GT, CR6GT, CR7GT]>;
+def : SubRegSet<3, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7],
+ [CR0EQ, CR1EQ, CR2EQ, CR3EQ, CR4EQ, CR5EQ, CR6EQ, CR7EQ]>;
+def : SubRegSet<4, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7],
+ [CR0UN, CR1UN, CR2UN, CR3UN, CR4UN, CR5UN, CR6UN, CR7UN]>;
+
+// Link register
+def LR : SPR<8, "lr">, DwarfRegNum<65>;
+//let Aliases = [LR] in
+def LR8 : SPR<8, "lr">, DwarfRegNum<65>;
+
+// Count register
+def CTR : SPR<9, "ctr">, DwarfRegNum<66>;
+def CTR8 : SPR<9, "ctr">, DwarfRegNum<66>;
+
+// VRsave register
+def VRSAVE: SPR<256, "VRsave">, DwarfRegNum<107>;
+
+/// Register classes
+// Allocate volatiles first
+// then nonvolatiles in reverse order since stmw/lmw save from rN to r31
+def GPRC : RegisterClass<"PPC", [i32], 32,
+ [R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12,
+ R30, R29, R28, R27, R26, R25, R24, R23, R22, R21, R20, R19, R18, R17,
+ R16, R15, R14, R13, R31, R0, R1, LR]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ GPRCClass::iterator
+ GPRCClass::allocation_order_begin(const MachineFunction &MF) const {
+ // In Linux, r2 is reserved for the OS.
+ if (!MF.getTarget().getSubtarget<PPCSubtarget>().isDarwin())
+ return begin()+1;
+
+ return begin();
+ }
+ GPRCClass::iterator
+ GPRCClass::allocation_order_end(const MachineFunction &MF) const {
+ // On PPC64, r13 is the thread pointer. Never allocate this register.
+ // Note that this is overconservative, as it also prevents allocation of
+ // R31 when the FP is not needed.
+ if (MF.getTarget().getSubtarget<PPCSubtarget>().isPPC64())
+ return end()-5; // don't allocate R13, R31, R0, R1, LR
+
+ if (needsFP(MF))
+ return end()-4; // don't allocate R31, R0, R1, LR
+ else
+ return end()-3; // don't allocate R0, R1, LR
+ }
+ }];
+}
+def G8RC : RegisterClass<"PPC", [i64], 64,
+ [X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X12,
+ X30, X29, X28, X27, X26, X25, X24, X23, X22, X21, X20, X19, X18, X17,
+ X16, X15, X14, X31, X13, X0, X1, LR8]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ G8RCClass::iterator
+ G8RCClass::allocation_order_begin(const MachineFunction &MF) const {
+ return begin();
+ }
+ G8RCClass::iterator
+ G8RCClass::allocation_order_end(const MachineFunction &MF) const {
+ if (needsFP(MF))
+ return end()-5;
+ else
+ return end()-4;
+ }
+ }];
+}
+
+
+
+def F8RC : RegisterClass<"PPC", [f64], 64, [F0, F1, F2, F3, F4, F5, F6, F7,
+ F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21,
+ F22, F23, F24, F25, F26, F27, F28, F29, F30, F31]>;
+def F4RC : RegisterClass<"PPC", [f32], 32, [F0, F1, F2, F3, F4, F5, F6, F7,
+ F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21,
+ F22, F23, F24, F25, F26, F27, F28, F29, F30, F31]>;
+
+def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128,
+ [V2, V3, V4, V5, V0, V1,
+ V6, V7, V8, V9, V10, V11, V12, V13, V14, V15, V16, V17, V18, V19, V20, V21,
+ V22, V23, V24, V25, V26, V27, V28, V29, V30, V31]>;
+
+def CRRC : RegisterClass<"PPC", [i32], 32, [CR0, CR1, CR5, CR6, CR7, CR2,
+ CR3, CR4]>;
+
diff --git a/lib/Target/PowerPC/PPCRelocations.h b/lib/Target/PowerPC/PPCRelocations.h
new file mode 100644
index 0000000..261622f
--- /dev/null
+++ b/lib/Target/PowerPC/PPCRelocations.h
@@ -0,0 +1,56 @@
+//===- PPCRelocations.h - PPC32 Code Relocations ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PowerPC 32-bit target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPC32RELOCATIONS_H
+#define PPC32RELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+// Hack to rid us of a PPC pre-processor symbol which is erroneously
+// defined in a PowerPC header file (bug in Linux/PPC)
+#ifdef PPC
+#undef PPC
+#endif
+
+namespace llvm {
+ namespace PPC {
+ enum RelocationType {
+ // reloc_vanilla - A standard relocation, where the address of the
+ // relocated object completely overwrites the address of the relocation.
+ reloc_vanilla,
+
+ // reloc_pcrel_bx - PC relative relocation, for the b or bl instructions.
+ reloc_pcrel_bx,
+
+ // reloc_pcrel_bcx - PC relative relocation, for BLT,BLE,BEQ,BGE,BGT,BNE,
+ // and other bcx instructions.
+ reloc_pcrel_bcx,
+
+ // reloc_absolute_high - Absolute relocation, for the loadhi instruction
+ // (which is really addis). Add the high 16-bits of the specified global
+ // address into the low 16-bits of the instruction.
+ reloc_absolute_high,
+
+ // reloc_absolute_low - Absolute relocation, for the la instruction (which
+ // is really an addi). Add the low 16-bits of the specified global
+ // address into the low 16-bits of the instruction.
+ reloc_absolute_low,
+
+ // reloc_absolute_low_ix - Absolute relocation for the 64-bit load/store
+ // instruction which have two implicit zero bits.
+ reloc_absolute_low_ix
+ };
+ }
+}
+
+#endif
diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td
new file mode 100644
index 0000000..0e0fd82
--- /dev/null
+++ b/lib/Target/PowerPC/PPCSchedule.td
@@ -0,0 +1,508 @@
+//===- PPCSchedule.td - PowerPC Scheduling Definitions -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by James M. Laskey and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Functional units across PowerPC chips sets
+//
+def BPU : FuncUnit; // Branch unit
+def SLU : FuncUnit; // Store/load unit
+def SRU : FuncUnit; // special register unit
+def IU1 : FuncUnit; // integer unit 1 (simple)
+def IU2 : FuncUnit; // integer unit 2 (complex)
+def IU3 : FuncUnit; // integer unit 3 (7450 simple)
+def IU4 : FuncUnit; // integer unit 4 (7450 simple)
+def FPU1 : FuncUnit; // floating point unit 1
+def FPU2 : FuncUnit; // floating point unit 2
+def VPU : FuncUnit; // vector permutation unit
+def VIU1 : FuncUnit; // vector integer unit 1 (simple)
+def VIU2 : FuncUnit; // vector integer unit 2 (complex)
+def VFPU : FuncUnit; // vector floating point unit
+
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for PowerPC
+//
+def IntGeneral : InstrItinClass;
+def IntCompare : InstrItinClass;
+def IntDivD : InstrItinClass;
+def IntDivW : InstrItinClass;
+def IntMFFS : InstrItinClass;
+def IntMFVSCR : InstrItinClass;
+def IntMTFSB0 : InstrItinClass;
+def IntMTSRD : InstrItinClass;
+def IntMulHD : InstrItinClass;
+def IntMulHW : InstrItinClass;
+def IntMulHWU : InstrItinClass;
+def IntMulLI : InstrItinClass;
+def IntRFID : InstrItinClass;
+def IntRotateD : InstrItinClass;
+def IntRotate : InstrItinClass;
+def IntShift : InstrItinClass;
+def IntTrapD : InstrItinClass;
+def IntTrapW : InstrItinClass;
+def BrB : InstrItinClass;
+def BrCR : InstrItinClass;
+def BrMCR : InstrItinClass;
+def BrMCRX : InstrItinClass;
+def LdStDCBA : InstrItinClass;
+def LdStDCBF : InstrItinClass;
+def LdStDCBI : InstrItinClass;
+def LdStGeneral : InstrItinClass;
+def LdStDSS : InstrItinClass;
+def LdStICBI : InstrItinClass;
+def LdStUX : InstrItinClass;
+def LdStLD : InstrItinClass;
+def LdStLDARX : InstrItinClass;
+def LdStLFD : InstrItinClass;
+def LdStLFDU : InstrItinClass;
+def LdStLHA : InstrItinClass;
+def LdStLMW : InstrItinClass;
+def LdStLVecX : InstrItinClass;
+def LdStLWA : InstrItinClass;
+def LdStLWARX : InstrItinClass;
+def LdStSLBIA : InstrItinClass;
+def LdStSLBIE : InstrItinClass;
+def LdStSTD : InstrItinClass;
+def LdStSTDCX : InstrItinClass;
+def LdStSTVEBX : InstrItinClass;
+def LdStSTWCX : InstrItinClass;
+def LdStSync : InstrItinClass;
+def SprISYNC : InstrItinClass;
+def SprMFSR : InstrItinClass;
+def SprMTMSR : InstrItinClass;
+def SprMTSR : InstrItinClass;
+def SprTLBSYNC : InstrItinClass;
+def SprMFCR : InstrItinClass;
+def SprMFMSR : InstrItinClass;
+def SprMFSPR : InstrItinClass;
+def SprMFTB : InstrItinClass;
+def SprMTSPR : InstrItinClass;
+def SprMTSRIN : InstrItinClass;
+def SprRFI : InstrItinClass;
+def SprSC : InstrItinClass;
+def FPGeneral : InstrItinClass;
+def FPCompare : InstrItinClass;
+def FPDivD : InstrItinClass;
+def FPDivS : InstrItinClass;
+def FPFused : InstrItinClass;
+def FPRes : InstrItinClass;
+def FPSqrt : InstrItinClass;
+def VecGeneral : InstrItinClass;
+def VecFP : InstrItinClass;
+def VecFPCompare : InstrItinClass;
+def VecComplex : InstrItinClass;
+def VecPerm : InstrItinClass;
+def VecFPRound : InstrItinClass;
+def VecVSL : InstrItinClass;
+def VecVSR : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// Processor instruction itineraries.
+
+include "PPCScheduleG3.td"
+include "PPCScheduleG4.td"
+include "PPCScheduleG4Plus.td"
+include "PPCScheduleG5.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction to itinerary class map - When add new opcodes to the supported
+// set, refer to the following table to determine which itinerary class the
+// opcode belongs.
+//
+// opcode itinerary class
+// ====== ===============
+// add IntGeneral
+// addc IntGeneral
+// adde IntGeneral
+// addi IntGeneral
+// addic IntGeneral
+// addic. IntGeneral
+// addis IntGeneral
+// addme IntGeneral
+// addze IntGeneral
+// and IntGeneral
+// andc IntGeneral
+// andi. IntGeneral
+// andis. IntGeneral
+// b BrB
+// bc BrB
+// bcctr BrB
+// bclr BrB
+// cmp IntCompare
+// cmpi IntCompare
+// cmpl IntCompare
+// cmpli IntCompare
+// cntlzd IntRotateD
+// cntlzw IntGeneral
+// crand BrCR
+// crandc BrCR
+// creqv BrCR
+// crnand BrCR
+// crnor BrCR
+// cror BrCR
+// crorc BrCR
+// crxor BrCR
+// dcba LdStDCBA
+// dcbf LdStDCBF
+// dcbi LdStDCBI
+// dcbst LdStDCBF
+// dcbt LdStGeneral
+// dcbtst LdStGeneral
+// dcbz LdStDCBF
+// divd IntDivD
+// divdu IntDivD
+// divw IntDivW
+// divwu IntDivW
+// dss LdStDSS
+// dst LdStDSS
+// dstst LdStDSS
+// eciwx LdStGeneral
+// ecowx LdStGeneral
+// eieio LdStGeneral
+// eqv IntGeneral
+// extsb IntGeneral
+// extsh IntGeneral
+// extsw IntRotateD
+// fabs FPGeneral
+// fadd FPGeneral
+// fadds FPGeneral
+// fcfid FPGeneral
+// fcmpo FPCompare
+// fcmpu FPCompare
+// fctid FPGeneral
+// fctidz FPGeneral
+// fctiw FPGeneral
+// fctiwz FPGeneral
+// fdiv FPDivD
+// fdivs FPDivS
+// fmadd FPFused
+// fmadds FPGeneral
+// fmr FPGeneral
+// fmsub FPFused
+// fmsubs FPGeneral
+// fmul FPFused
+// fmuls FPGeneral
+// fnabs FPGeneral
+// fneg FPGeneral
+// fnmadd FPFused
+// fnmadds FPGeneral
+// fnmsub FPFused
+// fnmsubs FPGeneral
+// fres FPRes
+// frsp FPGeneral
+// frsqrte FPGeneral
+// fsel FPGeneral
+// fsqrt FPSqrt
+// fsqrts FPSqrt
+// fsub FPGeneral
+// fsubs FPGeneral
+// icbi LdStICBI
+// isync SprISYNC
+// lbz LdStGeneral
+// lbzu LdStGeneral
+// lbzux LdStUX
+// lbzx LdStGeneral
+// ld LdStLD
+// ldarx LdStLDARX
+// ldu LdStLD
+// ldux LdStLD
+// ldx LdStLD
+// lfd LdStLFD
+// lfdu LdStLFDU
+// lfdux LdStLFDU
+// lfdx LdStLFDU
+// lfs LdStLFDU
+// lfsu LdStLFDU
+// lfsux LdStLFDU
+// lfsx LdStLFDU
+// lha LdStLHA
+// lhau LdStLHA
+// lhaux LdStLHA
+// lhax LdStLHA
+// lhbrx LdStGeneral
+// lhz LdStGeneral
+// lhzu LdStGeneral
+// lhzux LdStUX
+// lhzx LdStGeneral
+// lmw LdStLMW
+// lswi LdStLMW
+// lswx LdStLMW
+// lvebx LdStLVecX
+// lvehx LdStLVecX
+// lvewx LdStLVecX
+// lvsl LdStLVecX
+// lvsr LdStLVecX
+// lvx LdStLVecX
+// lvxl LdStLVecX
+// lwa LdStLWA
+// lwarx LdStLWARX
+// lwaux LdStLHA
+// lwax LdStLHA
+// lwbrx LdStGeneral
+// lwz LdStGeneral
+// lwzu LdStGeneral
+// lwzux LdStUX
+// lwzx LdStGeneral
+// mcrf BrMCR
+// mcrfs FPGeneral
+// mcrxr BrMCRX
+// mfcr SprMFCR
+// mffs IntMFFS
+// mfmsr SprMFMSR
+// mfspr SprMFSPR
+// mfsr SprMFSR
+// mfsrin SprMFSR
+// mftb SprMFTB
+// mfvscr IntMFVSCR
+// mtcrf BrMCRX
+// mtfsb0 IntMTFSB0
+// mtfsb1 IntMTFSB0
+// mtfsf IntMTFSB0
+// mtfsfi IntMTFSB0
+// mtmsr SprMTMSR
+// mtmsrd LdStLD
+// mtspr SprMTSPR
+// mtsr SprMTSR
+// mtsrd IntMTSRD
+// mtsrdin IntMTSRD
+// mtsrin SprMTSRIN
+// mtvscr IntMFVSCR
+// mulhd IntMulHD
+// mulhdu IntMulHD
+// mulhw IntMulHW
+// mulhwu IntMulHWU
+// mulld IntMulHD
+// mulli IntMulLI
+// mullw IntMulHW
+// nand IntGeneral
+// neg IntGeneral
+// nor IntGeneral
+// or IntGeneral
+// orc IntGeneral
+// ori IntGeneral
+// oris IntGeneral
+// rfi SprRFI
+// rfid IntRFID
+// rldcl IntRotateD
+// rldcr IntRotateD
+// rldic IntRotateD
+// rldicl IntRotateD
+// rldicr IntRotateD
+// rldimi IntRotateD
+// rlwimi IntRotate
+// rlwinm IntGeneral
+// rlwnm IntGeneral
+// sc SprSC
+// slbia LdStSLBIA
+// slbie LdStSLBIE
+// sld IntRotateD
+// slw IntGeneral
+// srad IntRotateD
+// sradi IntRotateD
+// sraw IntShift
+// srawi IntShift
+// srd IntRotateD
+// srw IntGeneral
+// stb LdStGeneral
+// stbu LdStGeneral
+// stbux LdStGeneral
+// stbx LdStGeneral
+// std LdStSTD
+// stdcx. LdStSTDCX
+// stdu LdStSTD
+// stdux LdStSTD
+// stdx LdStSTD
+// stfd LdStUX
+// stfdu LdStUX
+// stfdux LdStUX
+// stfdx LdStUX
+// stfiwx LdStUX
+// stfs LdStUX
+// stfsu LdStUX
+// stfsux LdStUX
+// stfsx LdStUX
+// sth LdStGeneral
+// sthbrx LdStGeneral
+// sthu LdStGeneral
+// sthux LdStGeneral
+// sthx LdStGeneral
+// stmw LdStLMW
+// stswi LdStLMW
+// stswx LdStLMW
+// stvebx LdStSTVEBX
+// stvehx LdStSTVEBX
+// stvewx LdStSTVEBX
+// stvx LdStSTVEBX
+// stvxl LdStSTVEBX
+// stw LdStGeneral
+// stwbrx LdStGeneral
+// stwcx. LdStSTWCX
+// stwu LdStGeneral
+// stwux LdStGeneral
+// stwx LdStGeneral
+// subf IntGeneral
+// subfc IntGeneral
+// subfe IntGeneral
+// subfic IntGeneral
+// subfme IntGeneral
+// subfze IntGeneral
+// sync LdStSync
+// td IntTrapD
+// tdi IntTrapD
+// tlbia LdStSLBIA
+// tlbie LdStDCBF
+// tlbsync SprTLBSYNC
+// tw IntTrapW
+// twi IntTrapW
+// vaddcuw VecGeneral
+// vaddfp VecFP
+// vaddsbs VecGeneral
+// vaddshs VecGeneral
+// vaddsws VecGeneral
+// vaddubm VecGeneral
+// vaddubs VecGeneral
+// vadduhm VecGeneral
+// vadduhs VecGeneral
+// vadduwm VecGeneral
+// vadduws VecGeneral
+// vand VecGeneral
+// vandc VecGeneral
+// vavgsb VecGeneral
+// vavgsh VecGeneral
+// vavgsw VecGeneral
+// vavgub VecGeneral
+// vavguh VecGeneral
+// vavguw VecGeneral
+// vcfsx VecFP
+// vcfux VecFP
+// vcmpbfp VecFPCompare
+// vcmpeqfp VecFPCompare
+// vcmpequb VecGeneral
+// vcmpequh VecGeneral
+// vcmpequw VecGeneral
+// vcmpgefp VecFPCompare
+// vcmpgtfp VecFPCompare
+// vcmpgtsb VecGeneral
+// vcmpgtsh VecGeneral
+// vcmpgtsw VecGeneral
+// vcmpgtub VecGeneral
+// vcmpgtuh VecGeneral
+// vcmpgtuw VecGeneral
+// vctsxs VecFP
+// vctuxs VecFP
+// vexptefp VecFP
+// vlogefp VecFP
+// vmaddfp VecFP
+// vmaxfp VecFPCompare
+// vmaxsb VecGeneral
+// vmaxsh VecGeneral
+// vmaxsw VecGeneral
+// vmaxub VecGeneral
+// vmaxuh VecGeneral
+// vmaxuw VecGeneral
+// vmhaddshs VecComplex
+// vmhraddshs VecComplex
+// vminfp VecFPCompare
+// vminsb VecGeneral
+// vminsh VecGeneral
+// vminsw VecGeneral
+// vminub VecGeneral
+// vminuh VecGeneral
+// vminuw VecGeneral
+// vmladduhm VecComplex
+// vmrghb VecPerm
+// vmrghh VecPerm
+// vmrghw VecPerm
+// vmrglb VecPerm
+// vmrglh VecPerm
+// vmrglw VecPerm
+// vmsubfp VecFP
+// vmsummbm VecComplex
+// vmsumshm VecComplex
+// vmsumshs VecComplex
+// vmsumubm VecComplex
+// vmsumuhm VecComplex
+// vmsumuhs VecComplex
+// vmulesb VecComplex
+// vmulesh VecComplex
+// vmuleub VecComplex
+// vmuleuh VecComplex
+// vmulosb VecComplex
+// vmulosh VecComplex
+// vmuloub VecComplex
+// vmulouh VecComplex
+// vnor VecGeneral
+// vor VecGeneral
+// vperm VecPerm
+// vpkpx VecPerm
+// vpkshss VecPerm
+// vpkshus VecPerm
+// vpkswss VecPerm
+// vpkswus VecPerm
+// vpkuhum VecPerm
+// vpkuhus VecPerm
+// vpkuwum VecPerm
+// vpkuwus VecPerm
+// vrefp VecFPRound
+// vrfim VecFPRound
+// vrfin VecFPRound
+// vrfip VecFPRound
+// vrfiz VecFPRound
+// vrlb VecGeneral
+// vrlh VecGeneral
+// vrlw VecGeneral
+// vrsqrtefp VecFP
+// vsel VecGeneral
+// vsl VecVSL
+// vslb VecGeneral
+// vsldoi VecPerm
+// vslh VecGeneral
+// vslo VecPerm
+// vslw VecGeneral
+// vspltb VecPerm
+// vsplth VecPerm
+// vspltisb VecPerm
+// vspltish VecPerm
+// vspltisw VecPerm
+// vspltw VecPerm
+// vsr VecVSR
+// vsrab VecGeneral
+// vsrah VecGeneral
+// vsraw VecGeneral
+// vsrb VecGeneral
+// vsrh VecGeneral
+// vsro VecPerm
+// vsrw VecGeneral
+// vsubcuw VecGeneral
+// vsubfp VecFP
+// vsubsbs VecGeneral
+// vsubshs VecGeneral
+// vsubsws VecGeneral
+// vsububm VecGeneral
+// vsububs VecGeneral
+// vsubuhm VecGeneral
+// vsubuhs VecGeneral
+// vsubuwm VecGeneral
+// vsubuws VecGeneral
+// vsum2sws VecComplex
+// vsum4sbs VecComplex
+// vsum4shs VecComplex
+// vsum4ubs VecComplex
+// vsumsws VecComplex
+// vupkhpx VecPerm
+// vupkhsb VecPerm
+// vupkhsh VecPerm
+// vupklpx VecPerm
+// vupklsb VecPerm
+// vupklsh VecPerm
+// vxor VecGeneral
+// xor IntGeneral
+// xori IntGeneral
+// xoris IntGeneral
+//
diff --git a/lib/Target/PowerPC/PPCScheduleG3.td b/lib/Target/PowerPC/PPCScheduleG3.td
new file mode 100644
index 0000000..fbb9f6f
--- /dev/null
+++ b/lib/Target/PowerPC/PPCScheduleG3.td
@@ -0,0 +1,63 @@
+//===- PPCScheduleG3.td - PPC G3 Scheduling Definitions ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by James M. Laskey and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G3 (750) processor.
+//
+//===----------------------------------------------------------------------===//
+
+
+def G3Itineraries : ProcessorItineraries<[
+ InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>,
+ InstrItinData<IntMFFS , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<3, [FPU1]>]>,
+ InstrItinData<IntMulHW , [InstrStage<5, [IU1]>]>,
+ InstrItinData<IntMulHWU , [InstrStage<6, [IU1]>]>,
+ InstrItinData<IntMulLI , [InstrStage<3, [IU1]>]>,
+ InstrItinData<IntRotate , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntShift , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntTrapW , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<BrB , [InstrStage<1, [BPU]>]>,
+ InstrItinData<BrCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<BrMCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<BrMCRX , [InstrStage<1, [SRU]>]>,
+ InstrItinData<LdStDCBA , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStGeneral , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStICBI , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>,
+ InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTWCX , [InstrStage<8, [SLU]>]>,
+ InstrItinData<LdStSync , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprISYNC , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprMFSR , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMTMSR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMTSR , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMFCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMFMSR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMFSPR , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMFTB , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMTSPR , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprMTSRIN , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprSC , [InstrStage<2, [SRU]>]>,
+ InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>,
+ InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>,
+ InstrItinData<FPFused , [InstrStage<2, [FPU1]>]>,
+ InstrItinData<FPRes , [InstrStage<10, [FPU1]>]>
+]>;
diff --git a/lib/Target/PowerPC/PPCScheduleG4.td b/lib/Target/PowerPC/PPCScheduleG4.td
new file mode 100644
index 0000000..d0e4456
--- /dev/null
+++ b/lib/Target/PowerPC/PPCScheduleG4.td
@@ -0,0 +1,73 @@
+//===- PPCScheduleG4.td - PPC G4 Scheduling Definitions ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by James M. Laskey and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G4 (7400) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def G4Itineraries : ProcessorItineraries<[
+ InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>,
+ InstrItinData<IntMFFS , [InstrStage<3, [FPU1]>]>,
+ InstrItinData<IntMFVSCR , [InstrStage<1, [VIU1]>]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<3, [FPU1]>]>,
+ InstrItinData<IntMulHW , [InstrStage<5, [IU1]>]>,
+ InstrItinData<IntMulHWU , [InstrStage<6, [IU1]>]>,
+ InstrItinData<IntMulLI , [InstrStage<3, [IU1]>]>,
+ InstrItinData<IntRotate , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntShift , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntTrapW , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<BrB , [InstrStage<1, [BPU]>]>,
+ InstrItinData<BrCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<BrMCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<BrMCRX , [InstrStage<1, [SRU]>]>,
+ InstrItinData<LdStDCBF , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStDCBI , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStGeneral , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStDSS , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStICBI , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>,
+ InstrItinData<LdStLVecX , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTVEBX , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStSTWCX , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStSync , [InstrStage<8, [SLU]>]>,
+ InstrItinData<SprISYNC , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprMFSR , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMTMSR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMTSR , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<8, [SRU]>]>,
+ InstrItinData<SprMFCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMFMSR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMFSPR , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMFTB , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMTSPR , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprMTSRIN , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprSC , [InstrStage<2, [SRU]>]>,
+ InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>,
+ InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>,
+ InstrItinData<FPFused , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPRes , [InstrStage<10, [FPU1]>]>,
+ InstrItinData<VecGeneral , [InstrStage<1, [VIU1]>]>,
+ InstrItinData<VecFP , [InstrStage<4, [VFPU]>]>,
+ InstrItinData<VecFPCompare, [InstrStage<1, [VIU1]>]>,
+ InstrItinData<VecComplex , [InstrStage<3, [VIU2]>]>,
+ InstrItinData<VecPerm , [InstrStage<1, [VPU]>]>,
+ InstrItinData<VecFPRound , [InstrStage<4, [VFPU]>]>,
+ InstrItinData<VecVSL , [InstrStage<1, [VIU1]>]>,
+ InstrItinData<VecVSR , [InstrStage<1, [VIU1]>]>
+]>;
diff --git a/lib/Target/PowerPC/PPCScheduleG4Plus.td b/lib/Target/PowerPC/PPCScheduleG4Plus.td
new file mode 100644
index 0000000..b40a8a5
--- /dev/null
+++ b/lib/Target/PowerPC/PPCScheduleG4Plus.td
@@ -0,0 +1,76 @@
+//===- PPCScheduleG4Plus.td - PPC G4+ Scheduling Defs. -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by James M. Laskey and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G4+ (7450) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def G4PlusItineraries : ProcessorItineraries<[
+ InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<IntDivW , [InstrStage<23, [IU2]>]>,
+ InstrItinData<IntMFFS , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<IntMFVSCR , [InstrStage<2, [VFPU]>]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<IntMulHW , [InstrStage<4, [IU2]>]>,
+ InstrItinData<IntMulHWU , [InstrStage<4, [IU2]>]>,
+ InstrItinData<IntMulLI , [InstrStage<3, [IU2]>]>,
+ InstrItinData<IntRotate , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<IntShift , [InstrStage<2, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<IntTrapW , [InstrStage<2, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<BrB , [InstrStage<1, [BPU]>]>,
+ InstrItinData<BrCR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<BrMCR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<BrMCRX , [InstrStage<2, [IU2]>]>,
+ InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStGeneral , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStDSS , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStICBI , [InstrStage<3, [IU2]>]>,
+ InstrItinData<LdStUX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLFD , [InstrStage<4, [SLU]>]>,
+ InstrItinData<LdStLFDU , [InstrStage<4, [SLU]>]>,
+ InstrItinData<LdStLHA , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLMW , [InstrStage<37, [SLU]>]>,
+ InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLWA , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTDCX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTVEBX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTWCX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSync , [InstrStage<35, [SLU]>]>,
+ InstrItinData<SprISYNC , [InstrStage<0, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<SprMFSR , [InstrStage<4, [IU2]>]>,
+ InstrItinData<SprMTMSR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprMTSR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprMFCR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprMFMSR , [InstrStage<3, [IU2]>]>,
+ InstrItinData<SprMFSPR , [InstrStage<4, [IU2]>]>,
+ InstrItinData<SprMFTB , [InstrStage<5, [IU2]>]>,
+ InstrItinData<SprMTSPR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprMTSRIN , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprRFI , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<SprSC , [InstrStage<0, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<FPGeneral , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<FPCompare , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<FPDivD , [InstrStage<35, [FPU1]>]>,
+ InstrItinData<FPDivS , [InstrStage<21, [FPU1]>]>,
+ InstrItinData<FPFused , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<FPRes , [InstrStage<14, [FPU1]>]>,
+ InstrItinData<VecGeneral , [InstrStage<1, [VIU1]>]>,
+ InstrItinData<VecFP , [InstrStage<4, [VFPU]>]>,
+ InstrItinData<VecFPCompare, [InstrStage<2, [VFPU]>]>,
+ InstrItinData<VecComplex , [InstrStage<4, [VIU2]>]>,
+ InstrItinData<VecPerm , [InstrStage<2, [VPU]>]>,
+ InstrItinData<VecFPRound , [InstrStage<4, [VIU1]>]>,
+ InstrItinData<VecVSL , [InstrStage<2, [VPU]>]>,
+ InstrItinData<VecVSR , [InstrStage<2, [VPU]>]>
+]>;
diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td
new file mode 100644
index 0000000..ff4be2c
--- /dev/null
+++ b/lib/Target/PowerPC/PPCScheduleG5.td
@@ -0,0 +1,83 @@
+//===- PPCScheduleG5.td - PPC G5 Scheduling Definitions ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by James M. Laskey and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G5 (970) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def G5Itineraries : ProcessorItineraries<[
+ InstrItinData<IntGeneral , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<IntCompare , [InstrStage<3, [IU1, IU2]>]>,
+ InstrItinData<IntDivD , [InstrStage<68, [IU1]>]>,
+ InstrItinData<IntDivW , [InstrStage<36, [IU1]>]>,
+ InstrItinData<IntMFFS , [InstrStage<6, [IU2]>]>,
+ InstrItinData<IntMFVSCR , [InstrStage<1, [VFPU]>]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<6, [FPU1, FPU2]>]>,
+ InstrItinData<IntMulHD , [InstrStage<7, [IU1, IU2]>]>,
+ InstrItinData<IntMulHW , [InstrStage<5, [IU1, IU2]>]>,
+ InstrItinData<IntMulHWU , [InstrStage<5, [IU1, IU2]>]>,
+ InstrItinData<IntMulLI , [InstrStage<4, [IU1, IU2]>]>,
+ InstrItinData<IntRFID , [InstrStage<1, [IU2]>]>,
+ InstrItinData<IntRotateD , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<IntRotate , [InstrStage<4, [IU1, IU2]>]>,
+ InstrItinData<IntShift , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<IntTrapD , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntTrapW , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<BrB , [InstrStage<1, [BPU]>]>,
+ InstrItinData<BrCR , [InstrStage<4, [BPU]>]>,
+ InstrItinData<BrMCR , [InstrStage<2, [BPU]>]>,
+ InstrItinData<BrMCRX , [InstrStage<3, [BPU]>]>,
+ InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStGeneral , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStDSS , [InstrStage<10, [SLU]>]>,
+ InstrItinData<LdStICBI , [InstrStage<40, [SLU]>]>,
+ InstrItinData<LdStUX , [InstrStage<4, [SLU]>]>,
+ InstrItinData<LdStLD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLDARX , [InstrStage<11, [SLU]>]>,
+ InstrItinData<LdStLFD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLFDU , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStLHA , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStLMW , [InstrStage<64, [SLU]>]>,
+ InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLWA , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStLWARX , [InstrStage<11, [SLU]>]>,
+ InstrItinData<LdStSLBIA , [InstrStage<40, [SLU]>]>, // needs work
+ InstrItinData<LdStSLBIE , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTDCX , [InstrStage<11, [SLU]>]>,
+ InstrItinData<LdStSTVEBX , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStSTWCX , [InstrStage<11, [SLU]>]>,
+ InstrItinData<LdStSync , [InstrStage<35, [SLU]>]>,
+ InstrItinData<SprISYNC , [InstrStage<40, [SLU]>]>, // needs work
+ InstrItinData<SprMFSR , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprMTMSR , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprMTSR , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprMFCR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprMFMSR , [InstrStage<3, [IU2]>]>,
+ InstrItinData<SprMFSPR , [InstrStage<3, [IU2]>]>,
+ InstrItinData<SprMFTB , [InstrStage<10, [IU2]>]>,
+ InstrItinData<SprMTSPR , [InstrStage<8, [IU2]>]>,
+ InstrItinData<SprSC , [InstrStage<1, [IU2]>]>,
+ InstrItinData<FPGeneral , [InstrStage<6, [FPU1, FPU2]>]>,
+ InstrItinData<FPCompare , [InstrStage<8, [FPU1, FPU2]>]>,
+ InstrItinData<FPDivD , [InstrStage<33, [FPU1, FPU2]>]>,
+ InstrItinData<FPDivS , [InstrStage<33, [FPU1, FPU2]>]>,
+ InstrItinData<FPFused , [InstrStage<6, [FPU1, FPU2]>]>,
+ InstrItinData<FPRes , [InstrStage<6, [FPU1, FPU2]>]>,
+ InstrItinData<FPSqrt , [InstrStage<40, [FPU1, FPU2]>]>,
+ InstrItinData<VecGeneral , [InstrStage<2, [VIU1]>]>,
+ InstrItinData<VecFP , [InstrStage<8, [VFPU]>]>,
+ InstrItinData<VecFPCompare, [InstrStage<2, [VFPU]>]>,
+ InstrItinData<VecComplex , [InstrStage<5, [VIU2]>]>,
+ InstrItinData<VecPerm , [InstrStage<3, [VPU]>]>,
+ InstrItinData<VecFPRound , [InstrStage<8, [VFPU]>]>,
+ InstrItinData<VecVSL , [InstrStage<2, [VIU1]>]>,
+ InstrItinData<VecVSR , [InstrStage<3, [VPU]>]>
+]>;
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
new file mode 100644
index 0000000..4419d20
--- /dev/null
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -0,0 +1,141 @@
+//===- PowerPCSubtarget.cpp - PPC Subtarget Information -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Nate Begeman and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPC specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCSubtarget.h"
+#include "PPC.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetMachine.h"
+#include "PPCGenSubtarget.inc"
+using namespace llvm;
+
+#if defined(__APPLE__)
+#include <mach/mach.h>
+#include <mach/mach_host.h>
+#include <mach/host_info.h>
+#include <mach/machine.h>
+
+/// GetCurrentPowerPCFeatures - Returns the current CPUs features.
+static const char *GetCurrentPowerPCCPU() {
+ host_basic_info_data_t hostInfo;
+ mach_msg_type_number_t infoCount;
+
+ infoCount = HOST_BASIC_INFO_COUNT;
+ host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&hostInfo,
+ &infoCount);
+
+ if (hostInfo.cpu_type != CPU_TYPE_POWERPC) return "generic";
+
+ switch(hostInfo.cpu_subtype) {
+ case CPU_SUBTYPE_POWERPC_601: return "601";
+ case CPU_SUBTYPE_POWERPC_602: return "602";
+ case CPU_SUBTYPE_POWERPC_603: return "603";
+ case CPU_SUBTYPE_POWERPC_603e: return "603e";
+ case CPU_SUBTYPE_POWERPC_603ev: return "603ev";
+ case CPU_SUBTYPE_POWERPC_604: return "604";
+ case CPU_SUBTYPE_POWERPC_604e: return "604e";
+ case CPU_SUBTYPE_POWERPC_620: return "620";
+ case CPU_SUBTYPE_POWERPC_750: return "750";
+ case CPU_SUBTYPE_POWERPC_7400: return "7400";
+ case CPU_SUBTYPE_POWERPC_7450: return "7450";
+ case CPU_SUBTYPE_POWERPC_970: return "970";
+ default: ;
+ }
+
+ return "generic";
+}
+#endif
+
+
+PPCSubtarget::PPCSubtarget(const TargetMachine &tm, const Module &M,
+ const std::string &FS, bool is64Bit)
+ : TM(tm)
+ , StackAlignment(16)
+ , IsGigaProcessor(false)
+ , Has64BitSupport(false)
+ , Use64BitRegs(false)
+ , IsPPC64(is64Bit)
+ , HasAltivec(false)
+ , HasFSQRT(false)
+ , HasSTFIWX(false)
+ , IsDarwin(false)
+ , HasLazyResolverStubs(false) {
+
+ // Determine default and user specified characteristics
+ std::string CPU = "generic";
+#if defined(__APPLE__)
+ CPU = GetCurrentPowerPCCPU();
+#endif
+
+ // Parse features string.
+ ParseSubtargetFeatures(FS, CPU);
+
+ // If we are generating code for ppc64, verify that options make sense.
+ if (is64Bit) {
+ if (!has64BitSupport()) {
+ cerr << "PPC: Generation of 64-bit code for a 32-bit processor "
+ << "requested. Ignoring 32-bit processor feature.\n";
+ Has64BitSupport = true;
+ }
+ // Silently force 64-bit register use on ppc64.
+ Use64BitRegs = true;
+ }
+
+ // If the user requested use of 64-bit regs, but the cpu selected doesn't
+ // support it, warn and ignore.
+ if (use64BitRegs() && !has64BitSupport()) {
+ cerr << "PPC: 64-bit registers requested on CPU without support. "
+ << "Disabling 64-bit register use.\n";
+ Use64BitRegs = false;
+ }
+
+ // Set the boolean corresponding to the current target triple, or the default
+ // if one cannot be determined, to true.
+ const std::string& TT = M.getTargetTriple();
+ if (TT.length() > 5) {
+ IsDarwin = TT.find("-darwin") != std::string::npos;
+ } else if (TT.empty()) {
+#if defined(__APPLE__)
+ IsDarwin = true;
+#endif
+ }
+
+ // Set up darwin-specific properties.
+ if (IsDarwin) {
+ HasLazyResolverStubs = true;
+ AsmFlavor = NewMnemonic;
+ } else {
+ AsmFlavor = OldMnemonic;
+ }
+}
+
+/// SetJITMode - This is called to inform the subtarget info that we are
+/// producing code for the JIT.
+void PPCSubtarget::SetJITMode() {
+ // JIT mode doesn't want lazy resolver stubs, it knows exactly where
+ // everything is. This matters for PPC64, which codegens in PIC mode without
+ // stubs.
+ HasLazyResolverStubs = false;
+}
+
+
+/// hasLazyResolverStub - Return true if accesses to the specified global have
+/// to go through a dyld lazy resolution stub. This means that an extra load
+/// is required to get the address of the global.
+bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV) const {
+ // We never hae stubs if HasLazyResolverStubs=false or if in static mode.
+ if (!HasLazyResolverStubs || TM.getRelocationModel() == Reloc::Static)
+ return false;
+
+ return GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
+ (GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode());
+}
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
new file mode 100644
index 0000000..d1e135c
--- /dev/null
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -0,0 +1,146 @@
+//=====-- PPCSubtarget.h - Define Subtarget for the PPC -------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Nate Begeman and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PowerPC specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPCSUBTARGET_H
+#define POWERPCSUBTARGET_H
+
+#include "llvm/Target/TargetInstrItineraries.h"
+#include "llvm/Target/TargetSubtarget.h"
+
+#include <string>
+
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
+namespace llvm {
+
+namespace PPC {
+ // -m directive values.
+ enum {
+ DIR_32,
+ DIR_601,
+ DIR_602,
+ DIR_603,
+ DIR_7400,
+ DIR_750,
+ DIR_970,
+ DIR_64
+ };
+}
+
+class Module;
+class GlobalValue;
+class TargetMachine;
+
+class PPCSubtarget : public TargetSubtarget {
+public:
+ enum AsmWriterFlavorTy {
+ OldMnemonic, NewMnemonic, Unset
+ };
+protected:
+ const TargetMachine &TM;
+
+ /// stackAlignment - The minimum alignment known to hold of the stack frame on
+ /// entry to the function and which must be maintained by every function.
+ unsigned StackAlignment;
+
+ /// Selected instruction itineraries (one entry per itinerary class.)
+ InstrItineraryData InstrItins;
+
+ /// Which cpu directive was used.
+ unsigned DarwinDirective;
+
+ /// AsmFlavor - Which PPC asm dialect to use.
+ AsmWriterFlavorTy AsmFlavor;
+
+ /// Used by the ISel to turn in optimizations for POWER4-derived architectures
+ bool IsGigaProcessor;
+ bool Has64BitSupport;
+ bool Use64BitRegs;
+ bool IsPPC64;
+ bool HasAltivec;
+ bool HasFSQRT;
+ bool HasSTFIWX;
+ bool IsDarwin;
+ bool HasLazyResolverStubs;
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified module.
+ ///
+ PPCSubtarget(const TargetMachine &TM, const Module &M,
+ const std::string &FS, bool is64Bit);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(const std::string &FS, const std::string &CPU);
+
+ /// SetJITMode - This is called to inform the subtarget info that we are
+ /// producing code for the JIT.
+ void SetJITMode();
+
+ /// getStackAlignment - Returns the minimum alignment known to hold of the
+ /// stack frame on entry to the function and which must be maintained by every
+ /// function for this subtarget.
+ unsigned getStackAlignment() const { return StackAlignment; }
+
+ /// getDarwinDirective - Returns the -m directive specified for the cpu.
+ ///
+ unsigned getDarwinDirective() const { return DarwinDirective; }
+
+ /// getInstrItins - Return the instruction itineraies based on subtarget
+ /// selection.
+ const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+
+ /// getTargetDataString - Return the pointer size and type alignment
+ /// properties of this subtarget.
+ const char *getTargetDataString() const {
+ return isPPC64() ? "E-p:64:64-f64:32:64-i64:32:64"
+ : "E-p:32:32-f64:32:64-i64:32:64";
+ }
+
+ /// isPPC64 - Return true if we are generating code for 64-bit pointer mode.
+ ///
+ bool isPPC64() const { return IsPPC64; }
+
+ /// has64BitSupport - Return true if the selected CPU supports 64-bit
+ /// instructions, regardless of whether we are in 32-bit or 64-bit mode.
+ bool has64BitSupport() const { return Has64BitSupport; }
+
+ /// use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit
+ /// registers in 32-bit mode when possible. This can only true if
+ /// has64BitSupport() returns true.
+ bool use64BitRegs() const { return Use64BitRegs; }
+
+ /// hasLazyResolverStub - Return true if accesses to the specified global have
+ /// to go through a dyld lazy resolution stub. This means that an extra load
+ /// is required to get the address of the global.
+ bool hasLazyResolverStub(const GlobalValue *GV) const;
+
+ // Specific obvious features.
+ bool hasFSQRT() const { return HasFSQRT; }
+ bool hasSTFIWX() const { return HasSTFIWX; }
+ bool hasAltivec() const { return HasAltivec; }
+ bool isGigaProcessor() const { return IsGigaProcessor; }
+
+ bool isDarwin() const { return IsDarwin; }
+
+ bool isMachoABI() const { return IsDarwin || IsPPC64; }
+ bool isELF32_ABI() const { return !IsDarwin && !IsPPC64; }
+
+ unsigned getAsmFlavor() const {
+ return AsmFlavor != Unset ? unsigned(AsmFlavor) : 0;
+ }
+};
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/PowerPC/PPCTargetAsmInfo.cpp b/lib/Target/PowerPC/PPCTargetAsmInfo.cpp
new file mode 100644
index 0000000..01c78b7
--- /dev/null
+++ b/lib/Target/PowerPC/PPCTargetAsmInfo.cpp
@@ -0,0 +1,96 @@
+//===-- PPCTargetAsmInfo.cpp - PPC asm properties ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by James M. Laskey and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the DarwinTargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCTargetAsmInfo.h"
+#include "PPCTargetMachine.h"
+#include "llvm/Function.h"
+using namespace llvm;
+
+PPCTargetAsmInfo::PPCTargetAsmInfo(const PPCTargetMachine &TM) {
+ bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+
+ ZeroDirective = "\t.space\t";
+ SetDirective = "\t.set";
+ Data64bitsDirective = isPPC64 ? "\t.quad\t" : 0;
+ AlignmentIsInBytes = false;
+ LCOMMDirective = "\t.lcomm\t";
+ InlineAsmStart = "# InlineAsm Start";
+ InlineAsmEnd = "# InlineAsm End";
+ AssemblerDialect = TM.getSubtargetImpl()->getAsmFlavor();
+
+ NeedsSet = true;
+ AddressSize = isPPC64 ? 8 : 4;
+ DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug";
+ DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug";
+ DwarfLineSection = ".section __DWARF,__debug_line,regular,debug";
+ DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug";
+ DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug";
+ DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug";
+ DwarfStrSection = ".section __DWARF,__debug_str,regular,debug";
+ DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug";
+ DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug";
+ DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug";
+ DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug";
+ DwarfEHFrameSection =
+ ".section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support";
+ DwarfExceptionSection = ".section __DATA,__gcc_except_tab";
+}
+
+DarwinTargetAsmInfo::DarwinTargetAsmInfo(const PPCTargetMachine &TM)
+: PPCTargetAsmInfo(TM)
+{
+ PCSymbol = ".";
+ CommentString = ";";
+ GlobalPrefix = "_";
+ PrivateGlobalPrefix = "L";
+ ConstantPoolSection = "\t.const\t";
+ JumpTableDataSection = ".const";
+ GlobalDirective = "\t.globl\t";
+ CStringSection = "\t.cstring";
+ FourByteConstantSection = "\t.literal4\n";
+ EightByteConstantSection = "\t.literal8\n";
+ ReadOnlySection = "\t.const\n";
+ if (TM.getRelocationModel() == Reloc::Static) {
+ StaticCtorsSection = ".constructor";
+ StaticDtorsSection = ".destructor";
+ } else {
+ StaticCtorsSection = ".mod_init_func";
+ StaticDtorsSection = ".mod_term_func";
+ }
+ UsedDirective = "\t.no_dead_strip\t";
+ WeakRefDirective = "\t.weak_reference\t";
+ HiddenDirective = "\t.private_extern\t";
+ SupportsExceptionHandling = true;
+
+ // In non-PIC modes, emit a special label before jump tables so that the
+ // linker can perform more accurate dead code stripping.
+ if (TM.getRelocationModel() != Reloc::PIC_) {
+ // Emit a local label that is preserved until the linker runs.
+ JumpTableSpecialLabelPrefix = "l";
+ }
+}
+
+LinuxTargetAsmInfo::LinuxTargetAsmInfo(const PPCTargetMachine &TM)
+: PPCTargetAsmInfo(TM)
+{
+ CommentString = "#";
+ GlobalPrefix = "";
+ PrivateGlobalPrefix = "";
+ ConstantPoolSection = "\t.section .rodata.cst4\t";
+ JumpTableDataSection = ".section .rodata.cst4";
+ CStringSection = "\t.section\t.rodata";
+ StaticCtorsSection = ".section\t.ctors,\"aw\",@progbits";
+ StaticDtorsSection = ".section\t.dtors,\"aw\",@progbits";
+ UsedDirective = "\t# .no_dead_strip\t";
+ WeakRefDirective = "\t.weak\t";
+}
diff --git a/lib/Target/PowerPC/PPCTargetAsmInfo.h b/lib/Target/PowerPC/PPCTargetAsmInfo.h
new file mode 100644
index 0000000..6a680e2
--- /dev/null
+++ b/lib/Target/PowerPC/PPCTargetAsmInfo.h
@@ -0,0 +1,38 @@
+//=====-- PPCTargetAsmInfo.h - PPC asm properties -------------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by James M. Laskey and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the DarwinTargetAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCTARGETASMINFO_H
+#define PPCTARGETASMINFO_H
+
+#include "llvm/Target/TargetAsmInfo.h"
+
+namespace llvm {
+
+ // Forward declaration.
+ class PPCTargetMachine;
+
+ struct PPCTargetAsmInfo : public TargetAsmInfo {
+ PPCTargetAsmInfo(const PPCTargetMachine &TM);
+ };
+
+ struct DarwinTargetAsmInfo : public PPCTargetAsmInfo {
+ DarwinTargetAsmInfo(const PPCTargetMachine &TM);
+ };
+
+ struct LinuxTargetAsmInfo : public PPCTargetAsmInfo {
+ LinuxTargetAsmInfo(const PPCTargetMachine &TM);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
new file mode 100644
index 0000000..57c8437
--- /dev/null
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -0,0 +1,166 @@
+//===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the PowerPC target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "PPCTargetAsmInfo.h"
+#include "PPCTargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+using namespace llvm;
+
+namespace {
+ // Register the targets
+ RegisterTarget<PPC32TargetMachine>
+ X("ppc32", " PowerPC 32");
+ RegisterTarget<PPC64TargetMachine>
+ Y("ppc64", " PowerPC 64");
+}
+
+const TargetAsmInfo *PPCTargetMachine::createTargetAsmInfo() const {
+ if (Subtarget.isDarwin())
+ return new DarwinTargetAsmInfo(*this);
+ else
+ return new LinuxTargetAsmInfo(*this);
+}
+
+unsigned PPC32TargetMachine::getJITMatchQuality() {
+#if defined(__POWERPC__) || defined (__ppc__) || defined(_POWER) || defined(__PPC__)
+ if (sizeof(void*) == 4)
+ return 10;
+#endif
+ return 0;
+}
+unsigned PPC64TargetMachine::getJITMatchQuality() {
+#if defined(__POWERPC__) || defined (__ppc__) || defined(_POWER) || defined(__PPC__)
+ if (sizeof(void*) == 8)
+ return 10;
+#endif
+ return 0;
+}
+
+unsigned PPC32TargetMachine::getModuleMatchQuality(const Module &M) {
+ // We strongly match "powerpc-*".
+ std::string TT = M.getTargetTriple();
+ if (TT.size() >= 8 && std::string(TT.begin(), TT.begin()+8) == "powerpc-")
+ return 20;
+
+ // If the target triple is something non-powerpc, we don't match.
+ if (!TT.empty()) return 0;
+
+ if (M.getEndianness() == Module::BigEndian &&
+ M.getPointerSize() == Module::Pointer32)
+ return 10; // Weak match
+ else if (M.getEndianness() != Module::AnyEndianness ||
+ M.getPointerSize() != Module::AnyPointerSize)
+ return 0; // Match for some other target
+
+ return getJITMatchQuality()/2;
+}
+
+unsigned PPC64TargetMachine::getModuleMatchQuality(const Module &M) {
+ // We strongly match "powerpc64-*".
+ std::string TT = M.getTargetTriple();
+ if (TT.size() >= 10 && std::string(TT.begin(), TT.begin()+10) == "powerpc64-")
+ return 20;
+
+ if (M.getEndianness() == Module::BigEndian &&
+ M.getPointerSize() == Module::Pointer64)
+ return 10; // Weak match
+ else if (M.getEndianness() != Module::AnyEndianness ||
+ M.getPointerSize() != Module::AnyPointerSize)
+ return 0; // Match for some other target
+
+ return getJITMatchQuality()/2;
+}
+
+
+PPCTargetMachine::PPCTargetMachine(const Module &M, const std::string &FS,
+ bool is64Bit)
+ : Subtarget(*this, M, FS, is64Bit),
+ DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this),
+ FrameInfo(*this, is64Bit), JITInfo(*this, is64Bit), TLInfo(*this),
+ InstrItins(Subtarget.getInstrItineraryData()), MachOWriterInfo(*this) {
+
+ if (getRelocationModel() == Reloc::Default)
+ if (Subtarget.isDarwin())
+ setRelocationModel(Reloc::DynamicNoPIC);
+ else
+ setRelocationModel(Reloc::Static);
+}
+
+/// Override this for PowerPC. Tail merging happily breaks up instruction issue
+/// groups, which typically degrades performance.
+const bool PPCTargetMachine::getEnableTailMergeDefault() const { return false; }
+
+PPC32TargetMachine::PPC32TargetMachine(const Module &M, const std::string &FS)
+ : PPCTargetMachine(M, FS, false) {
+}
+
+
+PPC64TargetMachine::PPC64TargetMachine(const Module &M, const std::string &FS)
+ : PPCTargetMachine(M, FS, true) {
+}
+
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+bool PPCTargetMachine::addInstSelector(FunctionPassManager &PM, bool Fast) {
+ // Install an instruction selector.
+ PM.add(createPPCISelDag(*this));
+ return false;
+}
+
+bool PPCTargetMachine::addPreEmitPass(FunctionPassManager &PM, bool Fast) {
+
+ // Must run branch selection immediately preceding the asm printer.
+ PM.add(createPPCBranchSelectionPass());
+ return false;
+}
+
+bool PPCTargetMachine::addAssemblyEmitter(FunctionPassManager &PM, bool Fast,
+ std::ostream &Out) {
+ PM.add(createPPCAsmPrinterPass(Out, *this));
+ return false;
+}
+
+bool PPCTargetMachine::addCodeEmitter(FunctionPassManager &PM, bool Fast,
+ MachineCodeEmitter &MCE) {
+ // The JIT should use the static relocation model in ppc32 mode, PIC in ppc64.
+ // FIXME: This should be moved to TargetJITInfo!!
+ if (Subtarget.isPPC64()) {
+ // We use PIC codegen in ppc64 mode, because otherwise we'd have to use many
+ // instructions to materialize arbitrary global variable + function +
+ // constant pool addresses.
+ setRelocationModel(Reloc::PIC_);
+ } else {
+ setRelocationModel(Reloc::Static);
+ }
+
+ // Inform the subtarget that we are in JIT mode. FIXME: does this break macho
+ // writing?
+ Subtarget.SetJITMode();
+
+ // Machine code emitter pass for PowerPC.
+ PM.add(createPPCCodeEmitterPass(*this, MCE));
+ return false;
+}
+
+bool PPCTargetMachine::addSimpleCodeEmitter(FunctionPassManager &PM, bool Fast,
+ MachineCodeEmitter &MCE) {
+ // Machine code emitter pass for PowerPC.
+ PM.add(createPPCCodeEmitterPass(*this, MCE));
+ return false;
+}
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
new file mode 100644
index 0000000..10c5b7b
--- /dev/null
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -0,0 +1,101 @@
+//===-- PPCTargetMachine.h - Define TargetMachine for PowerPC -----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PowerPC specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPC_TARGETMACHINE_H
+#define PPC_TARGETMACHINE_H
+
+#include "PPCFrameInfo.h"
+#include "PPCSubtarget.h"
+#include "PPCJITInfo.h"
+#include "PPCInstrInfo.h"
+#include "PPCISelLowering.h"
+#include "PPCMachOWriterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+class PassManager;
+class GlobalValue;
+
+/// PPCTargetMachine - Common code between 32-bit and 64-bit PowerPC targets.
+///
+class PPCTargetMachine : public LLVMTargetMachine {
+ PPCSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ PPCInstrInfo InstrInfo;
+ PPCFrameInfo FrameInfo;
+ PPCJITInfo JITInfo;
+ PPCTargetLowering TLInfo;
+ InstrItineraryData InstrItins;
+ PPCMachOWriterInfo MachOWriterInfo;
+
+protected:
+ virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
+public:
+ PPCTargetMachine(const Module &M, const std::string &FS, bool is64Bit);
+
+ virtual const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
+ virtual TargetJITInfo *getJITInfo() { return &JITInfo; }
+ virtual PPCTargetLowering *getTargetLowering() const {
+ return const_cast<PPCTargetLowering*>(&TLInfo);
+ }
+ virtual const MRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const PPCSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ virtual const InstrItineraryData getInstrItineraryData() const {
+ return InstrItins;
+ }
+ virtual const PPCMachOWriterInfo *getMachOWriterInfo() const {
+ return &MachOWriterInfo;
+ }
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(FunctionPassManager &PM, bool Fast);
+ virtual bool addPreEmitPass(FunctionPassManager &PM, bool Fast);
+ virtual bool addAssemblyEmitter(FunctionPassManager &PM, bool Fast,
+ std::ostream &Out);
+ virtual bool addCodeEmitter(FunctionPassManager &PM, bool Fast,
+ MachineCodeEmitter &MCE);
+ virtual bool addSimpleCodeEmitter(FunctionPassManager &PM, bool Fast,
+ MachineCodeEmitter &MCE);
+ virtual const bool getEnableTailMergeDefault() const;
+};
+
+/// PPC32TargetMachine - PowerPC 32-bit target machine.
+///
+class PPC32TargetMachine : public PPCTargetMachine {
+public:
+ PPC32TargetMachine(const Module &M, const std::string &FS);
+
+ static unsigned getJITMatchQuality();
+ static unsigned getModuleMatchQuality(const Module &M);
+};
+
+/// PPC64TargetMachine - PowerPC 64-bit target machine.
+///
+class PPC64TargetMachine : public PPCTargetMachine {
+public:
+ PPC64TargetMachine(const Module &M, const std::string &FS);
+
+ static unsigned getJITMatchQuality();
+ static unsigned getModuleMatchQuality(const Module &M);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt
new file mode 100644
index 0000000..69e60fc
--- /dev/null
+++ b/lib/Target/PowerPC/README.txt
@@ -0,0 +1,664 @@
+//===- README.txt - Notes for improving PowerPC-specific code gen ---------===//
+
+TODO:
+* gpr0 allocation
+* implement do-loop -> bdnz transform
+* __builtin_return_address not supported on PPC
+
+===-------------------------------------------------------------------------===
+
+Support 'update' load/store instructions. These are cracked on the G5, but are
+still a codesize win.
+
+With preinc enabled, this:
+
+long *%test4(long *%X, long *%dest) {
+ %Y = getelementptr long* %X, int 4
+ %A = load long* %Y
+ store long %A, long* %dest
+ ret long* %Y
+}
+
+compiles to:
+
+_test4:
+ mr r2, r3
+ lwzu r5, 32(r2)
+ lwz r3, 36(r3)
+ stw r5, 0(r4)
+ stw r3, 4(r4)
+ mr r3, r2
+ blr
+
+with -sched=list-burr, I get:
+
+_test4:
+ lwz r2, 36(r3)
+ lwzu r5, 32(r3)
+ stw r2, 4(r4)
+ stw r5, 0(r4)
+ blr
+
+===-------------------------------------------------------------------------===
+
+We compile the hottest inner loop of viterbi to:
+
+ li r6, 0
+ b LBB1_84 ;bb432.i
+LBB1_83: ;bb420.i
+ lbzx r8, r5, r7
+ addi r6, r7, 1
+ stbx r8, r4, r7
+LBB1_84: ;bb432.i
+ mr r7, r6
+ cmplwi cr0, r7, 143
+ bne cr0, LBB1_83 ;bb420.i
+
+The CBE manages to produce:
+
+ li r0, 143
+ mtctr r0
+loop:
+ lbzx r2, r2, r11
+ stbx r0, r2, r9
+ addi r2, r2, 1
+ bdz later
+ b loop
+
+This could be much better (bdnz instead of bdz) but it still beats us. If we
+produced this with bdnz, the loop would be a single dispatch group.
+
+===-------------------------------------------------------------------------===
+
+Compile:
+
+void foo(int *P) {
+ if (P) *P = 0;
+}
+
+into:
+
+_foo:
+ cmpwi cr0,r3,0
+ beqlr cr0
+ li r0,0
+ stw r0,0(r3)
+ blr
+
+This is effectively a simple form of predication.
+
+===-------------------------------------------------------------------------===
+
+Lump the constant pool for each function into ONE pic object, and reference
+pieces of it as offsets from the start. For functions like this (contrived
+to have lots of constants obviously):
+
+double X(double Y) { return (Y*1.23 + 4.512)*2.34 + 14.38; }
+
+We generate:
+
+_X:
+ lis r2, ha16(.CPI_X_0)
+ lfd f0, lo16(.CPI_X_0)(r2)
+ lis r2, ha16(.CPI_X_1)
+ lfd f2, lo16(.CPI_X_1)(r2)
+ fmadd f0, f1, f0, f2
+ lis r2, ha16(.CPI_X_2)
+ lfd f1, lo16(.CPI_X_2)(r2)
+ lis r2, ha16(.CPI_X_3)
+ lfd f2, lo16(.CPI_X_3)(r2)
+ fmadd f1, f0, f1, f2
+ blr
+
+It would be better to materialize .CPI_X into a register, then use immediates
+off of the register to avoid the lis's. This is even more important in PIC
+mode.
+
+Note that this (and the static variable version) is discussed here for GCC:
+http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html
+
+===-------------------------------------------------------------------------===
+
+PIC Code Gen IPO optimization:
+
+Squish small scalar globals together into a single global struct, allowing the
+address of the struct to be CSE'd, avoiding PIC accesses (also reduces the size
+of the GOT on targets with one).
+
+Note that this is discussed here for GCC:
+http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html
+
+===-------------------------------------------------------------------------===
+
+Implement Newton-Rhapson method for improving estimate instructions to the
+correct accuracy, and implementing divide as multiply by reciprocal when it has
+more than one use. Itanium will want this too.
+
+===-------------------------------------------------------------------------===
+
+Compile this:
+
+int %f1(int %a, int %b) {
+ %tmp.1 = and int %a, 15 ; <int> [#uses=1]
+ %tmp.3 = and int %b, 240 ; <int> [#uses=1]
+ %tmp.4 = or int %tmp.3, %tmp.1 ; <int> [#uses=1]
+ ret int %tmp.4
+}
+
+without a copy. We make this currently:
+
+_f1:
+ rlwinm r2, r4, 0, 24, 27
+ rlwimi r2, r3, 0, 28, 31
+ or r3, r2, r2
+ blr
+
+The two-addr pass or RA needs to learn when it is profitable to commute an
+instruction to avoid a copy AFTER the 2-addr instruction. The 2-addr pass
+currently only commutes to avoid inserting a copy BEFORE the two addr instr.
+
+===-------------------------------------------------------------------------===
+
+Compile offsets from allocas:
+
+int *%test() {
+ %X = alloca { int, int }
+ %Y = getelementptr {int,int}* %X, int 0, uint 1
+ ret int* %Y
+}
+
+into a single add, not two:
+
+_test:
+ addi r2, r1, -8
+ addi r3, r2, 4
+ blr
+
+--> important for C++.
+
+===-------------------------------------------------------------------------===
+
+No loads or stores of the constants should be needed:
+
+struct foo { double X, Y; };
+void xxx(struct foo F);
+void bar() { struct foo R = { 1.0, 2.0 }; xxx(R); }
+
+===-------------------------------------------------------------------------===
+
+Darwin Stub LICM optimization:
+
+Loops like this:
+
+ for (...) bar();
+
+Have to go through an indirect stub if bar is external or linkonce. It would
+be better to compile it as:
+
+ fp = &bar;
+ for (...) fp();
+
+which only computes the address of bar once (instead of each time through the
+stub). This is Darwin specific and would have to be done in the code generator.
+Probably not a win on x86.
+
+===-------------------------------------------------------------------------===
+
+Simple IPO for argument passing, change:
+ void foo(int X, double Y, int Z) -> void foo(int X, int Z, double Y)
+
+the Darwin ABI specifies that any integer arguments in the first 32 bytes worth
+of arguments get assigned to r3 through r10. That is, if you have a function
+foo(int, double, int) you get r3, f1, r6, since the 64 bit double ate up the
+argument bytes for r4 and r5. The trick then would be to shuffle the argument
+order for functions we can internalize so that the maximum number of
+integers/pointers get passed in regs before you see any of the fp arguments.
+
+Instead of implementing this, it would actually probably be easier to just
+implement a PPC fastcc, where we could do whatever we wanted to the CC,
+including having this work sanely.
+
+===-------------------------------------------------------------------------===
+
+Fix Darwin FP-In-Integer Registers ABI
+
+Darwin passes doubles in structures in integer registers, which is very very
+bad. Add something like a BIT_CONVERT to LLVM, then do an i-p transformation
+that percolates these things out of functions.
+
+Check out how horrible this is:
+http://gcc.gnu.org/ml/gcc/2005-10/msg01036.html
+
+This is an extension of "interprocedural CC unmunging" that can't be done with
+just fastcc.
+
+===-------------------------------------------------------------------------===
+
+Compile this:
+
+int foo(int a) {
+ int b = (a < 8);
+ if (b) {
+ return b * 3; // ignore the fact that this is always 3.
+ } else {
+ return 2;
+ }
+}
+
+into something not this:
+
+_foo:
+1) cmpwi cr7, r3, 8
+ mfcr r2, 1
+ rlwinm r2, r2, 29, 31, 31
+1) cmpwi cr0, r3, 7
+ bgt cr0, LBB1_2 ; UnifiedReturnBlock
+LBB1_1: ; then
+ rlwinm r2, r2, 0, 31, 31
+ mulli r3, r2, 3
+ blr
+LBB1_2: ; UnifiedReturnBlock
+ li r3, 2
+ blr
+
+In particular, the two compares (marked 1) could be shared by reversing one.
+This could be done in the dag combiner, by swapping a BR_CC when a SETCC of the
+same operands (but backwards) exists. In this case, this wouldn't save us
+anything though, because the compares still wouldn't be shared.
+
+===-------------------------------------------------------------------------===
+
+We should custom expand setcc instead of pretending that we have it. That
+would allow us to expose the access of the crbit after the mfcr, allowing
+that access to be trivially folded into other ops. A simple example:
+
+int foo(int a, int b) { return (a < b) << 4; }
+
+compiles into:
+
+_foo:
+ cmpw cr7, r3, r4
+ mfcr r2, 1
+ rlwinm r2, r2, 29, 31, 31
+ slwi r3, r2, 4
+ blr
+
+===-------------------------------------------------------------------------===
+
+Fold add and sub with constant into non-extern, non-weak addresses so this:
+
+static int a;
+void bar(int b) { a = b; }
+void foo(unsigned char *c) {
+ *c = a;
+}
+
+So that
+
+_foo:
+ lis r2, ha16(_a)
+ la r2, lo16(_a)(r2)
+ lbz r2, 3(r2)
+ stb r2, 0(r3)
+ blr
+
+Becomes
+
+_foo:
+ lis r2, ha16(_a+3)
+ lbz r2, lo16(_a+3)(r2)
+ stb r2, 0(r3)
+ blr
+
+===-------------------------------------------------------------------------===
+
+We generate really bad code for this:
+
+int f(signed char *a, _Bool b, _Bool c) {
+ signed char t = 0;
+ if (b) t = *a;
+ if (c) *a = t;
+}
+
+===-------------------------------------------------------------------------===
+
+This:
+int test(unsigned *P) { return *P >> 24; }
+
+Should compile to:
+
+_test:
+ lbz r3,0(r3)
+ blr
+
+not:
+
+_test:
+ lwz r2, 0(r3)
+ srwi r3, r2, 24
+ blr
+
+===-------------------------------------------------------------------------===
+
+On the G5, logical CR operations are more expensive in their three
+address form: ops that read/write the same register are half as expensive as
+those that read from two registers that are different from their destination.
+
+We should model this with two separate instructions. The isel should generate
+the "two address" form of the instructions. When the register allocator
+detects that it needs to insert a copy due to the two-addresness of the CR
+logical op, it will invoke PPCInstrInfo::convertToThreeAddress. At this point
+we can convert to the "three address" instruction, to save code space.
+
+This only matters when we start generating cr logical ops.
+
+===-------------------------------------------------------------------------===
+
+We should compile these two functions to the same thing:
+
+#include <stdlib.h>
+void f(int a, int b, int *P) {
+ *P = (a-b)>=0?(a-b):(b-a);
+}
+void g(int a, int b, int *P) {
+ *P = abs(a-b);
+}
+
+Further, they should compile to something better than:
+
+_g:
+ subf r2, r4, r3
+ subfic r3, r2, 0
+ cmpwi cr0, r2, -1
+ bgt cr0, LBB2_2 ; entry
+LBB2_1: ; entry
+ mr r2, r3
+LBB2_2: ; entry
+ stw r2, 0(r5)
+ blr
+
+GCC produces:
+
+_g:
+ subf r4,r4,r3
+ srawi r2,r4,31
+ xor r0,r2,r4
+ subf r0,r2,r0
+ stw r0,0(r5)
+ blr
+
+... which is much nicer.
+
+This theoretically may help improve twolf slightly (used in dimbox.c:142?).
+
+===-------------------------------------------------------------------------===
+
+int foo(int N, int ***W, int **TK, int X) {
+ int t, i;
+
+ for (t = 0; t < N; ++t)
+ for (i = 0; i < 4; ++i)
+ W[t / X][i][t % X] = TK[i][t];
+
+ return 5;
+}
+
+We generate relatively atrocious code for this loop compared to gcc.
+
+We could also strength reduce the rem and the div:
+http://www.lcs.mit.edu/pubs/pdf/MIT-LCS-TM-600.pdf
+
+===-------------------------------------------------------------------------===
+
+float foo(float X) { return (int)(X); }
+
+Currently produces:
+
+_foo:
+ fctiwz f0, f1
+ stfd f0, -8(r1)
+ lwz r2, -4(r1)
+ extsw r2, r2
+ std r2, -16(r1)
+ lfd f0, -16(r1)
+ fcfid f0, f0
+ frsp f1, f0
+ blr
+
+We could use a target dag combine to turn the lwz/extsw into an lwa when the
+lwz has a single use. Since LWA is cracked anyway, this would be a codesize
+win only.
+
+===-------------------------------------------------------------------------===
+
+We generate ugly code for this:
+
+void func(unsigned int *ret, float dx, float dy, float dz, float dw) {
+ unsigned code = 0;
+ if(dx < -dw) code |= 1;
+ if(dx > dw) code |= 2;
+ if(dy < -dw) code |= 4;
+ if(dy > dw) code |= 8;
+ if(dz < -dw) code |= 16;
+ if(dz > dw) code |= 32;
+ *ret = code;
+}
+
+===-------------------------------------------------------------------------===
+
+Complete the signed i32 to FP conversion code using 64-bit registers
+transformation, good for PI. See PPCISelLowering.cpp, this comment:
+
+ // FIXME: disable this lowered code. This generates 64-bit register values,
+ // and we don't model the fact that the top part is clobbered by calls. We
+ // need to flag these together so that the value isn't live across a call.
+ //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+
+Also, if the registers are spilled to the stack, we have to ensure that all
+64-bits of them are save/restored, otherwise we will miscompile the code. It
+sounds like we need to get the 64-bit register classes going.
+
+===-------------------------------------------------------------------------===
+
+%struct.B = type { i8, [3 x i8] }
+
+define void @bar(%struct.B* %b) {
+entry:
+ %tmp = bitcast %struct.B* %b to i32* ; <uint*> [#uses=1]
+ %tmp = load i32* %tmp ; <uint> [#uses=1]
+ %tmp3 = bitcast %struct.B* %b to i32* ; <uint*> [#uses=1]
+ %tmp4 = load i32* %tmp3 ; <uint> [#uses=1]
+ %tmp8 = bitcast %struct.B* %b to i32* ; <uint*> [#uses=2]
+ %tmp9 = load i32* %tmp8 ; <uint> [#uses=1]
+ %tmp4.mask17 = shl i32 %tmp4, i8 1 ; <uint> [#uses=1]
+ %tmp1415 = and i32 %tmp4.mask17, 2147483648 ; <uint> [#uses=1]
+ %tmp.masked = and i32 %tmp, 2147483648 ; <uint> [#uses=1]
+ %tmp11 = or i32 %tmp1415, %tmp.masked ; <uint> [#uses=1]
+ %tmp12 = and i32 %tmp9, 2147483647 ; <uint> [#uses=1]
+ %tmp13 = or i32 %tmp12, %tmp11 ; <uint> [#uses=1]
+ store i32 %tmp13, i32* %tmp8
+ ret void
+}
+
+We emit:
+
+_foo:
+ lwz r2, 0(r3)
+ slwi r4, r2, 1
+ or r4, r4, r2
+ rlwimi r2, r4, 0, 0, 0
+ stw r2, 0(r3)
+ blr
+
+We could collapse a bunch of those ORs and ANDs and generate the following
+equivalent code:
+
+_foo:
+ lwz r2, 0(r3)
+ rlwinm r4, r2, 1, 0, 0
+ or r2, r2, r4
+ stw r2, 0(r3)
+ blr
+
+===-------------------------------------------------------------------------===
+
+We compile:
+
+unsigned test6(unsigned x) {
+ return ((x & 0x00FF0000) >> 16) | ((x & 0x000000FF) << 16);
+}
+
+into:
+
+_test6:
+ lis r2, 255
+ rlwinm r3, r3, 16, 0, 31
+ ori r2, r2, 255
+ and r3, r3, r2
+ blr
+
+GCC gets it down to:
+
+_test6:
+ rlwinm r0,r3,16,8,15
+ rlwinm r3,r3,16,24,31
+ or r3,r3,r0
+ blr
+
+
+===-------------------------------------------------------------------------===
+
+Consider a function like this:
+
+float foo(float X) { return X + 1234.4123f; }
+
+The FP constant ends up in the constant pool, so we need to get the LR register.
+ This ends up producing code like this:
+
+_foo:
+.LBB_foo_0: ; entry
+ mflr r11
+*** stw r11, 8(r1)
+ bl "L00000$pb"
+"L00000$pb":
+ mflr r2
+ addis r2, r2, ha16(.CPI_foo_0-"L00000$pb")
+ lfs f0, lo16(.CPI_foo_0-"L00000$pb")(r2)
+ fadds f1, f1, f0
+*** lwz r11, 8(r1)
+ mtlr r11
+ blr
+
+This is functional, but there is no reason to spill the LR register all the way
+to the stack (the two marked instrs): spilling it to a GPR is quite enough.
+
+Implementing this will require some codegen improvements. Nate writes:
+
+"So basically what we need to support the "no stack frame save and restore" is a
+generalization of the LR optimization to "callee-save regs".
+
+Currently, we have LR marked as a callee-save reg. The register allocator sees
+that it's callee save, and spills it directly to the stack.
+
+Ideally, something like this would happen:
+
+LR would be in a separate register class from the GPRs. The class of LR would be
+marked "unspillable". When the register allocator came across an unspillable
+reg, it would ask "what is the best class to copy this into that I *can* spill"
+If it gets a class back, which it will in this case (the gprs), it grabs a free
+register of that class. If it is then later necessary to spill that reg, so be
+it.
+
+===-------------------------------------------------------------------------===
+
+We compile this:
+int test(_Bool X) {
+ return X ? 524288 : 0;
+}
+
+to:
+_test:
+ cmplwi cr0, r3, 0
+ lis r2, 8
+ li r3, 0
+ beq cr0, LBB1_2 ;entry
+LBB1_1: ;entry
+ mr r3, r2
+LBB1_2: ;entry
+ blr
+
+instead of:
+_test:
+ addic r2,r3,-1
+ subfe r0,r2,r3
+ slwi r3,r0,19
+ blr
+
+This sort of thing occurs a lot due to globalopt.
+
+===-------------------------------------------------------------------------===
+
+We currently compile 32-bit bswap:
+
+declare i32 @llvm.bswap.i32(i32 %A)
+define i32 @test(i32 %A) {
+ %B = call i32 @llvm.bswap.i32(i32 %A)
+ ret i32 %B
+}
+
+to:
+
+_test:
+ rlwinm r2, r3, 24, 16, 23
+ slwi r4, r3, 24
+ rlwimi r2, r3, 8, 24, 31
+ rlwimi r4, r3, 8, 8, 15
+ rlwimi r4, r2, 0, 16, 31
+ mr r3, r4
+ blr
+
+it would be more efficient to produce:
+
+_foo: mr r0,r3
+ rlwinm r3,r3,8,0xffffffff
+ rlwimi r3,r0,24,0,7
+ rlwimi r3,r0,24,16,23
+ blr
+
+===-------------------------------------------------------------------------===
+
+test/CodeGen/PowerPC/2007-03-24-cntlzd.ll compiles to:
+
+__ZNK4llvm5APInt17countLeadingZerosEv:
+ ld r2, 0(r3)
+ cntlzd r2, r2
+ or r2, r2, r2 <<-- silly.
+ addi r3, r2, -64
+ blr
+
+The dead or is a 'truncate' from 64- to 32-bits.
+
+===-------------------------------------------------------------------------===
+
+We generate horrible ppc code for this:
+
+#define N 2000000
+double a[N],c[N];
+void simpleloop() {
+ int j;
+ for (j=0; j<N; j++)
+ c[j] = a[j];
+}
+
+LBB1_1: ;bb
+ lfdx f0, r3, r4
+ addi r5, r5, 1 ;; Extra IV for the exit value compare.
+ stfdx f0, r2, r4
+ addi r4, r4, 8
+
+ xoris r6, r5, 30 ;; This is due to a large immediate.
+ cmplwi cr0, r6, 33920
+ bne cr0, LBB1_1
+
+===-------------------------------------------------------------------------===
+
diff --git a/lib/Target/PowerPC/README_ALTIVEC.txt b/lib/Target/PowerPC/README_ALTIVEC.txt
new file mode 100644
index 0000000..143804d
--- /dev/null
+++ b/lib/Target/PowerPC/README_ALTIVEC.txt
@@ -0,0 +1,179 @@
+//===- README_ALTIVEC.txt - Notes for improving Altivec code gen ----------===//
+
+Implement PPCInstrInfo::isLoadFromStackSlot/isStoreToStackSlot for vector
+registers, to generate better spill code.
+
+//===----------------------------------------------------------------------===//
+
+The first should be a single lvx from the constant pool, the second should be
+a xor/stvx:
+
+void foo(void) {
+ int x[8] __attribute__((aligned(128))) = { 1, 1, 1, 17, 1, 1, 1, 1 };
+ bar (x);
+}
+
+#include <string.h>
+void foo(void) {
+ int x[8] __attribute__((aligned(128)));
+ memset (x, 0, sizeof (x));
+ bar (x);
+}
+
+//===----------------------------------------------------------------------===//
+
+Altivec: Codegen'ing MUL with vector FMADD should add -0.0, not 0.0:
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=8763
+
+When -ffast-math is on, we can use 0.0.
+
+//===----------------------------------------------------------------------===//
+
+ Consider this:
+ v4f32 Vector;
+ v4f32 Vector2 = { Vector.X, Vector.X, Vector.X, Vector.X };
+
+Since we know that "Vector" is 16-byte aligned and we know the element offset
+of ".X", we should change the load into a lve*x instruction, instead of doing
+a load/store/lve*x sequence.
+
+//===----------------------------------------------------------------------===//
+
+For functions that use altivec AND have calls, we are VRSAVE'ing all call
+clobbered regs.
+
+//===----------------------------------------------------------------------===//
+
+Implement passing vectors by value into calls and receiving them as arguments.
+
+//===----------------------------------------------------------------------===//
+
+GCC apparently tries to codegen { C1, C2, Variable, C3 } as a constant pool load
+of C1/C2/C3, then a load and vperm of Variable.
+
+//===----------------------------------------------------------------------===//
+
+We need a way to teach tblgen that some operands of an intrinsic are required to
+be constants. The verifier should enforce this constraint.
+
+//===----------------------------------------------------------------------===//
+
+We currently codegen SCALAR_TO_VECTOR as a store of the scalar to a 16-byte
+aligned stack slot, followed by a load/vperm. We should probably just store it
+to a scalar stack slot, then use lvsl/vperm to load it. If the value is already
+in memory this is a big win.
+
+//===----------------------------------------------------------------------===//
+
+extract_vector_elt of an arbitrary constant vector can be done with the
+following instructions:
+
+vTemp = vec_splat(v0,2); // 2 is the element the src is in.
+vec_ste(&destloc,0,vTemp);
+
+We can do an arbitrary non-constant value by using lvsr/perm/ste.
+
+//===----------------------------------------------------------------------===//
+
+If we want to tie instruction selection into the scheduler, we can do some
+constant formation with different instructions. For example, we can generate
+"vsplti -1" with "vcmpequw R,R" and 1,1,1,1 with "vsubcuw R,R", and 0,0,0,0 with
+"vsplti 0" or "vxor", each of which use different execution units, thus could
+help scheduling.
+
+This is probably only reasonable for a post-pass scheduler.
+
+//===----------------------------------------------------------------------===//
+
+For this function:
+
+void test(vector float *A, vector float *B) {
+ vector float C = (vector float)vec_cmpeq(*A, *B);
+ if (!vec_any_eq(*A, *B))
+ *B = (vector float){0,0,0,0};
+ *A = C;
+}
+
+we get the following basic block:
+
+ ...
+ lvx v2, 0, r4
+ lvx v3, 0, r3
+ vcmpeqfp v4, v3, v2
+ vcmpeqfp. v2, v3, v2
+ bne cr6, LBB1_2 ; cond_next
+
+The vcmpeqfp/vcmpeqfp. instructions currently cannot be merged when the
+vcmpeqfp. result is used by a branch. This can be improved.
+
+//===----------------------------------------------------------------------===//
+
+The code generated for this is truly aweful:
+
+vector float test(float a, float b) {
+ return (vector float){ 0.0, a, 0.0, 0.0};
+}
+
+LCPI1_0: ; float
+ .space 4
+ .text
+ .globl _test
+ .align 4
+_test:
+ mfspr r2, 256
+ oris r3, r2, 4096
+ mtspr 256, r3
+ lis r3, ha16(LCPI1_0)
+ addi r4, r1, -32
+ stfs f1, -16(r1)
+ addi r5, r1, -16
+ lfs f0, lo16(LCPI1_0)(r3)
+ stfs f0, -32(r1)
+ lvx v2, 0, r4
+ lvx v3, 0, r5
+ vmrghw v3, v3, v2
+ vspltw v2, v2, 0
+ vmrghw v2, v2, v3
+ mtspr 256, r2
+ blr
+
+//===----------------------------------------------------------------------===//
+
+int foo(vector float *x, vector float *y) {
+ if (vec_all_eq(*x,*y)) return 3245;
+ else return 12;
+}
+
+A predicate compare being used in a select_cc should have the same peephole
+applied to it as a predicate compare used by a br_cc. There should be no
+mfcr here:
+
+_foo:
+ mfspr r2, 256
+ oris r5, r2, 12288
+ mtspr 256, r5
+ li r5, 12
+ li r6, 3245
+ lvx v2, 0, r4
+ lvx v3, 0, r3
+ vcmpeqfp. v2, v3, v2
+ mfcr r3, 2
+ rlwinm r3, r3, 25, 31, 31
+ cmpwi cr0, r3, 0
+ bne cr0, LBB1_2 ; entry
+LBB1_1: ; entry
+ mr r6, r5
+LBB1_2: ; entry
+ mr r3, r6
+ mtspr 256, r2
+ blr
+
+//===----------------------------------------------------------------------===//
+
+CodeGen/PowerPC/vec_constants.ll has an and operation that should be
+codegen'd to andc. The issue is that the 'all ones' build vector is
+SelectNodeTo'd a VSPLTISB instruction node before the and/xor is selected
+which prevents the vnot pattern from matching.
+
+
+//===----------------------------------------------------------------------===//
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
new file mode 100644
index 0000000..37b671f
--- /dev/null
+++ b/lib/Target/README.txt
@@ -0,0 +1,451 @@
+Target Independent Opportunities:
+
+//===---------------------------------------------------------------------===//
+
+With the recent changes to make the implicit def/use set explicit in
+machineinstrs, we should change the target descriptions for 'call' instructions
+so that the .td files don't list all the call-clobbered registers as implicit
+defs. Instead, these should be added by the code generator (e.g. on the dag).
+
+This has a number of uses:
+
+1. PPC32/64 and X86 32/64 can avoid having multiple copies of call instructions
+ for their different impdef sets.
+2. Targets with multiple calling convs (e.g. x86) which have different clobber
+ sets don't need copies of call instructions.
+3. 'Interprocedural register allocation' can be done to reduce the clobber sets
+ of calls.
+
+//===---------------------------------------------------------------------===//
+
+Make the PPC branch selector target independant
+
+//===---------------------------------------------------------------------===//
+
+Get the C front-end to expand hypot(x,y) -> llvm.sqrt(x*x+y*y) when errno and
+precision don't matter (ffastmath). Misc/mandel will like this. :)
+
+//===---------------------------------------------------------------------===//
+
+Solve this DAG isel folding deficiency:
+
+int X, Y;
+
+void fn1(void)
+{
+ X = X | (Y << 3);
+}
+
+compiles to
+
+fn1:
+ movl Y, %eax
+ shll $3, %eax
+ orl X, %eax
+ movl %eax, X
+ ret
+
+The problem is the store's chain operand is not the load X but rather
+a TokenFactor of the load X and load Y, which prevents the folding.
+
+There are two ways to fix this:
+
+1. The dag combiner can start using alias analysis to realize that y/x
+ don't alias, making the store to X not dependent on the load from Y.
+2. The generated isel could be made smarter in the case it can't
+ disambiguate the pointers.
+
+Number 1 is the preferred solution.
+
+This has been "fixed" by a TableGen hack. But that is a short term workaround
+which will be removed once the proper fix is made.
+
+//===---------------------------------------------------------------------===//
+
+On targets with expensive 64-bit multiply, we could LSR this:
+
+for (i = ...; ++i) {
+ x = 1ULL << i;
+
+into:
+ long long tmp = 1;
+ for (i = ...; ++i, tmp+=tmp)
+ x = tmp;
+
+This would be a win on ppc32, but not x86 or ppc64.
+
+//===---------------------------------------------------------------------===//
+
+Shrink: (setlt (loadi32 P), 0) -> (setlt (loadi8 Phi), 0)
+
+//===---------------------------------------------------------------------===//
+
+Reassociate should turn: X*X*X*X -> t=(X*X) (t*t) to eliminate a multiply.
+
+//===---------------------------------------------------------------------===//
+
+Interesting? testcase for add/shift/mul reassoc:
+
+int bar(int x, int y) {
+ return x*x*x+y+x*x*x*x*x*y*y*y*y;
+}
+int foo(int z, int n) {
+ return bar(z, n) + bar(2*z, 2*n);
+}
+
+Reassociate should handle the example in GCC PR16157.
+
+//===---------------------------------------------------------------------===//
+
+These two functions should generate the same code on big-endian systems:
+
+int g(int *j,int *l) { return memcmp(j,l,4); }
+int h(int *j, int *l) { return *j - *l; }
+
+this could be done in SelectionDAGISel.cpp, along with other special cases,
+for 1,2,4,8 bytes.
+
+//===---------------------------------------------------------------------===//
+
+It would be nice to revert this patch:
+http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20060213/031986.html
+
+And teach the dag combiner enough to simplify the code expanded before
+legalize. It seems plausible that this knowledge would let it simplify other
+stuff too.
+
+//===---------------------------------------------------------------------===//
+
+For vector types, TargetData.cpp::getTypeInfo() returns alignment that is equal
+to the type size. It works but can be overly conservative as the alignment of
+specific vector types are target dependent.
+
+//===---------------------------------------------------------------------===//
+
+We should add 'unaligned load/store' nodes, and produce them from code like
+this:
+
+v4sf example(float *P) {
+ return (v4sf){P[0], P[1], P[2], P[3] };
+}
+
+//===---------------------------------------------------------------------===//
+
+We should constant fold vector type casts at the LLVM level, regardless of the
+cast. Currently we cannot fold some casts because we don't have TargetData
+information in the constant folder, so we don't know the endianness of the
+target!
+
+//===---------------------------------------------------------------------===//
+
+Add support for conditional increments, and other related patterns. Instead
+of:
+
+ movl 136(%esp), %eax
+ cmpl $0, %eax
+ je LBB16_2 #cond_next
+LBB16_1: #cond_true
+ incl _foo
+LBB16_2: #cond_next
+
+emit:
+ movl _foo, %eax
+ cmpl $1, %edi
+ sbbl $-1, %eax
+ movl %eax, _foo
+
+//===---------------------------------------------------------------------===//
+
+Combine: a = sin(x), b = cos(x) into a,b = sincos(x).
+
+Expand these to calls of sin/cos and stores:
+ double sincos(double x, double *sin, double *cos);
+ float sincosf(float x, float *sin, float *cos);
+ long double sincosl(long double x, long double *sin, long double *cos);
+
+Doing so could allow SROA of the destination pointers. See also:
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17687
+
+//===---------------------------------------------------------------------===//
+
+Scalar Repl cannot currently promote this testcase to 'ret long cst':
+
+ %struct.X = type { i32, i32 }
+ %struct.Y = type { %struct.X }
+
+define i64 @bar() {
+ %retval = alloca %struct.Y, align 8
+ %tmp12 = getelementptr %struct.Y* %retval, i32 0, i32 0, i32 0
+ store i32 0, i32* %tmp12
+ %tmp15 = getelementptr %struct.Y* %retval, i32 0, i32 0, i32 1
+ store i32 1, i32* %tmp15
+ %retval.upgrd.1 = bitcast %struct.Y* %retval to i64*
+ %retval.upgrd.2 = load i64* %retval.upgrd.1
+ ret i64 %retval.upgrd.2
+}
+
+it should be extended to do so.
+
+//===---------------------------------------------------------------------===//
+
+-scalarrepl should promote this to be a vector scalar.
+
+ %struct..0anon = type { <4 x float> }
+
+define void @test1(<4 x float> %V, float* %P) {
+ %u = alloca %struct..0anon, align 16
+ %tmp = getelementptr %struct..0anon* %u, i32 0, i32 0
+ store <4 x float> %V, <4 x float>* %tmp
+ %tmp1 = bitcast %struct..0anon* %u to [4 x float]*
+ %tmp.upgrd.1 = getelementptr [4 x float]* %tmp1, i32 0, i32 1
+ %tmp.upgrd.2 = load float* %tmp.upgrd.1
+ %tmp3 = mul float %tmp.upgrd.2, 2.000000e+00
+ store float %tmp3, float* %P
+ ret void
+}
+
+//===---------------------------------------------------------------------===//
+
+Turn this into a single byte store with no load (the other 3 bytes are
+unmodified):
+
+void %test(uint* %P) {
+ %tmp = load uint* %P
+ %tmp14 = or uint %tmp, 3305111552
+ %tmp15 = and uint %tmp14, 3321888767
+ store uint %tmp15, uint* %P
+ ret void
+}
+
+//===---------------------------------------------------------------------===//
+
+dag/inst combine "clz(x)>>5 -> x==0" for 32-bit x.
+
+Compile:
+
+int bar(int x)
+{
+ int t = __builtin_clz(x);
+ return -(t>>5);
+}
+
+to:
+
+_bar: addic r3,r3,-1
+ subfe r3,r3,r3
+ blr
+
+//===---------------------------------------------------------------------===//
+
+Legalize should lower ctlz like this:
+ ctlz(x) = popcnt((x-1) & ~x)
+
+on targets that have popcnt but not ctlz. itanium, what else?
+
+//===---------------------------------------------------------------------===//
+
+quantum_sigma_x in 462.libquantum contains the following loop:
+
+ for(i=0; i<reg->size; i++)
+ {
+ /* Flip the target bit of each basis state */
+ reg->node[i].state ^= ((MAX_UNSIGNED) 1 << target);
+ }
+
+Where MAX_UNSIGNED/state is a 64-bit int. On a 32-bit platform it would be just
+so cool to turn it into something like:
+
+ long long Res = ((MAX_UNSIGNED) 1 << target);
+ if (target < 32) {
+ for(i=0; i<reg->size; i++)
+ reg->node[i].state ^= Res & 0xFFFFFFFFULL;
+ } else {
+ for(i=0; i<reg->size; i++)
+ reg->node[i].state ^= Res & 0xFFFFFFFF00000000ULL
+ }
+
+... which would only do one 32-bit XOR per loop iteration instead of two.
+
+It would also be nice to recognize the reg->size doesn't alias reg->node[i], but
+alas...
+
+//===---------------------------------------------------------------------===//
+
+This isn't recognized as bswap by instcombine:
+
+unsigned int swap_32(unsigned int v) {
+ v = ((v & 0x00ff00ffU) << 8) | ((v & 0xff00ff00U) >> 8);
+ v = ((v & 0x0000ffffU) << 16) | ((v & 0xffff0000U) >> 16);
+ return v;
+}
+
+Nor is this (yes, it really is bswap):
+
+unsigned long reverse(unsigned v) {
+ unsigned t;
+ t = v ^ ((v << 16) | (v >> 16));
+ t &= ~0xff0000;
+ v = (v << 24) | (v >> 8);
+ return v ^ (t >> 8);
+}
+
+//===---------------------------------------------------------------------===//
+
+These should turn into single 16-bit (unaligned?) loads on little/big endian
+processors.
+
+unsigned short read_16_le(const unsigned char *adr) {
+ return adr[0] | (adr[1] << 8);
+}
+unsigned short read_16_be(const unsigned char *adr) {
+ return (adr[0] << 8) | adr[1];
+}
+
+//===---------------------------------------------------------------------===//
+
+-instcombine should handle this transform:
+ icmp pred (sdiv X / C1 ), C2
+when X, C1, and C2 are unsigned. Similarly for udiv and signed operands.
+
+Currently InstCombine avoids this transform but will do it when the signs of
+the operands and the sign of the divide match. See the FIXME in
+InstructionCombining.cpp in the visitSetCondInst method after the switch case
+for Instruction::UDiv (around line 4447) for more details.
+
+The SingleSource/Benchmarks/Shootout-C++/hash and hash2 tests have examples of
+this construct.
+
+//===---------------------------------------------------------------------===//
+
+Instcombine misses several of these cases (see the testcase in the patch):
+http://gcc.gnu.org/ml/gcc-patches/2006-10/msg01519.html
+
+//===---------------------------------------------------------------------===//
+
+viterbi speeds up *significantly* if the various "history" related copy loops
+are turned into memcpy calls at the source level. We need a "loops to memcpy"
+pass.
+
+//===---------------------------------------------------------------------===//
+
+Consider:
+
+typedef unsigned U32;
+typedef unsigned long long U64;
+int test (U32 *inst, U64 *regs) {
+ U64 effective_addr2;
+ U32 temp = *inst;
+ int r1 = (temp >> 20) & 0xf;
+ int b2 = (temp >> 16) & 0xf;
+ effective_addr2 = temp & 0xfff;
+ if (b2) effective_addr2 += regs[b2];
+ b2 = (temp >> 12) & 0xf;
+ if (b2) effective_addr2 += regs[b2];
+ effective_addr2 &= regs[4];
+ if ((effective_addr2 & 3) == 0)
+ return 1;
+ return 0;
+}
+
+Note that only the low 2 bits of effective_addr2 are used. On 32-bit systems,
+we don't eliminate the computation of the top half of effective_addr2 because
+we don't have whole-function selection dags. On x86, this means we use one
+extra register for the function when effective_addr2 is declared as U64 than
+when it is declared U32.
+
+//===---------------------------------------------------------------------===//
+
+Promote for i32 bswap can use i64 bswap + shr. Useful on targets with 64-bit
+regs and bswap, like itanium.
+
+//===---------------------------------------------------------------------===//
+
+LSR should know what GPR types a target has. This code:
+
+volatile short X, Y; // globals
+
+void foo(int N) {
+ int i;
+ for (i = 0; i < N; i++) { X = i; Y = i*4; }
+}
+
+produces two identical IV's (after promotion) on PPC/ARM:
+
+LBB1_1: @bb.preheader
+ mov r3, #0
+ mov r2, r3
+ mov r1, r3
+LBB1_2: @bb
+ ldr r12, LCPI1_0
+ ldr r12, [r12]
+ strh r2, [r12]
+ ldr r12, LCPI1_1
+ ldr r12, [r12]
+ strh r3, [r12]
+ add r1, r1, #1 <- [0,+,1]
+ add r3, r3, #4
+ add r2, r2, #1 <- [0,+,1]
+ cmp r1, r0
+ bne LBB1_2 @bb
+
+
+//===---------------------------------------------------------------------===//
+
+Tail call elim should be more aggressive, checking to see if the call is
+followed by an uncond branch to an exit block.
+
+; This testcase is due to tail-duplication not wanting to copy the return
+; instruction into the terminating blocks because there was other code
+; optimized out of the function after the taildup happened.
+;RUN: llvm-upgrade < %s | llvm-as | opt -tailcallelim | llvm-dis | not grep call
+
+int %t4(int %a) {
+entry:
+ %tmp.1 = and int %a, 1
+ %tmp.2 = cast int %tmp.1 to bool
+ br bool %tmp.2, label %then.0, label %else.0
+
+then.0:
+ %tmp.5 = add int %a, -1
+ %tmp.3 = call int %t4( int %tmp.5 )
+ br label %return
+
+else.0:
+ %tmp.7 = setne int %a, 0
+ br bool %tmp.7, label %then.1, label %return
+
+then.1:
+ %tmp.11 = add int %a, -2
+ %tmp.9 = call int %t4( int %tmp.11 )
+ br label %return
+
+return:
+ %result.0 = phi int [ 0, %else.0 ], [ %tmp.3, %then.0 ],
+ [ %tmp.9, %then.1 ]
+ ret int %result.0
+}
+
+//===---------------------------------------------------------------------===//
+
+Argument promotion should promote arguments for recursive functions, like
+this:
+
+; RUN: llvm-upgrade < %s | llvm-as | opt -argpromotion | llvm-dis | grep x.val
+
+implementation ; Functions:
+
+internal int %foo(int* %x) {
+entry:
+ %tmp = load int* %x
+ %tmp.foo = call int %foo(int *%x)
+ ret int %tmp.foo
+}
+
+int %bar(int* %x) {
+entry:
+ %tmp3 = call int %foo( int* %x) ; <int>[#uses=1]
+ ret int %tmp3
+}
+
+
+
diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp
new file mode 100644
index 0000000..784f1bd
--- /dev/null
+++ b/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -0,0 +1,76 @@
+//===-- DelaySlotFiller.cpp - SPARC delay slot filler ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a simple local pass that fills delay slots with NOPs.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "delayslotfiller"
+#include "Sparc.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(FilledSlots, "Number of delay slots filled");
+
+namespace {
+ struct Filler : public MachineFunctionPass {
+ /// Target machine description which we query for reg. names, data
+ /// layout, etc.
+ ///
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+
+ static char ID;
+ Filler(TargetMachine &tm)
+ : MachineFunctionPass((intptr_t)&ID), TM(tm), TII(tm.getInstrInfo()) { }
+
+ virtual const char *getPassName() const {
+ return "SPARC Delay Slot Filler";
+ }
+
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+ bool runOnMachineFunction(MachineFunction &F) {
+ bool Changed = false;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ Changed |= runOnMachineBasicBlock(*FI);
+ return Changed;
+ }
+
+ };
+ char Filler::ID = 0;
+} // end of anonymous namespace
+
+/// createSparcDelaySlotFillerPass - Returns a pass that fills in delay
+/// slots in Sparc MachineFunctions
+///
+FunctionPass *llvm::createSparcDelaySlotFillerPass(TargetMachine &tm) {
+ return new Filler(tm);
+}
+
+/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
+/// Currently, we fill delay slots with NOPs. We assume there is only one
+/// delay slot per delayed instruction.
+///
+bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+ if (TII->hasDelaySlot(I->getOpcode())) {
+ MachineBasicBlock::iterator J = I;
+ ++J;
+ BuildMI(MBB, J, TII->get(SP::NOP));
+ ++FilledSlots;
+ Changed = true;
+ }
+ return Changed;
+}
diff --git a/lib/Target/Sparc/FPMover.cpp b/lib/Target/Sparc/FPMover.cpp
new file mode 100644
index 0000000..e1c9966
--- /dev/null
+++ b/lib/Target/Sparc/FPMover.cpp
@@ -0,0 +1,138 @@
+//===-- FPMover.cpp - Sparc double-precision floating point move fixer ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Expand FpMOVD/FpABSD/FpNEGD instructions into their single-precision pieces.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "fpmover"
+#include "Sparc.h"
+#include "SparcSubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+STATISTIC(NumFpDs , "Number of instructions translated");
+STATISTIC(NoopFpDs, "Number of noop instructions removed");
+
+namespace {
+ struct FPMover : public MachineFunctionPass {
+ /// Target machine description which we query for reg. names, data
+ /// layout, etc.
+ ///
+ TargetMachine &TM;
+
+ static char ID;
+ FPMover(TargetMachine &tm)
+ : MachineFunctionPass((intptr_t)&ID), TM(tm) { }
+
+ virtual const char *getPassName() const {
+ return "Sparc Double-FP Move Fixer";
+ }
+
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+ bool runOnMachineFunction(MachineFunction &F);
+ };
+ char FPMover::ID = 0;
+} // end of anonymous namespace
+
+/// createSparcFPMoverPass - Returns a pass that turns FpMOVD
+/// instructions into FMOVS instructions
+///
+FunctionPass *llvm::createSparcFPMoverPass(TargetMachine &tm) {
+ return new FPMover(tm);
+}
+
+/// getDoubleRegPair - Given a DFP register, return the even and odd FP
+/// registers that correspond to it.
+static void getDoubleRegPair(unsigned DoubleReg, unsigned &EvenReg,
+ unsigned &OddReg) {
+ static const unsigned EvenHalvesOfPairs[] = {
+ SP::F0, SP::F2, SP::F4, SP::F6, SP::F8, SP::F10, SP::F12, SP::F14,
+ SP::F16, SP::F18, SP::F20, SP::F22, SP::F24, SP::F26, SP::F28, SP::F30
+ };
+ static const unsigned OddHalvesOfPairs[] = {
+ SP::F1, SP::F3, SP::F5, SP::F7, SP::F9, SP::F11, SP::F13, SP::F15,
+ SP::F17, SP::F19, SP::F21, SP::F23, SP::F25, SP::F27, SP::F29, SP::F31
+ };
+ static const unsigned DoubleRegsInOrder[] = {
+ SP::D0, SP::D1, SP::D2, SP::D3, SP::D4, SP::D5, SP::D6, SP::D7, SP::D8,
+ SP::D9, SP::D10, SP::D11, SP::D12, SP::D13, SP::D14, SP::D15
+ };
+ for (unsigned i = 0; i < sizeof(DoubleRegsInOrder)/sizeof(unsigned); ++i)
+ if (DoubleRegsInOrder[i] == DoubleReg) {
+ EvenReg = EvenHalvesOfPairs[i];
+ OddReg = OddHalvesOfPairs[i];
+ return;
+ }
+ assert(0 && "Can't find reg");
+}
+
+/// runOnMachineBasicBlock - Fixup FpMOVD instructions in this MBB.
+///
+bool FPMover::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
+ MachineInstr *MI = I++;
+ if (MI->getOpcode() == SP::FpMOVD || MI->getOpcode() == SP::FpABSD ||
+ MI->getOpcode() == SP::FpNEGD) {
+ Changed = true;
+ unsigned DestDReg = MI->getOperand(0).getReg();
+ unsigned SrcDReg = MI->getOperand(1).getReg();
+ if (DestDReg == SrcDReg && MI->getOpcode() == SP::FpMOVD) {
+ MBB.erase(MI); // Eliminate the noop copy.
+ ++NoopFpDs;
+ continue;
+ }
+
+ unsigned EvenSrcReg = 0, OddSrcReg = 0, EvenDestReg = 0, OddDestReg = 0;
+ getDoubleRegPair(DestDReg, EvenDestReg, OddDestReg);
+ getDoubleRegPair(SrcDReg, EvenSrcReg, OddSrcReg);
+
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ if (MI->getOpcode() == SP::FpMOVD)
+ MI->setInstrDescriptor(TII->get(SP::FMOVS));
+ else if (MI->getOpcode() == SP::FpNEGD)
+ MI->setInstrDescriptor(TII->get(SP::FNEGS));
+ else if (MI->getOpcode() == SP::FpABSD)
+ MI->setInstrDescriptor(TII->get(SP::FABSS));
+ else
+ assert(0 && "Unknown opcode!");
+
+ MI->getOperand(0).setReg(EvenDestReg);
+ MI->getOperand(1).setReg(EvenSrcReg);
+ DOUT << "FPMover: the modified instr is: " << *MI;
+ // Insert copy for the other half of the double.
+ if (DestDReg != SrcDReg) {
+ MI = BuildMI(MBB, I, TM.getInstrInfo()->get(SP::FMOVS), OddDestReg)
+ .addReg(OddSrcReg);
+ DOUT << "FPMover: the inserted instr is: " << *MI;
+ }
+ ++NumFpDs;
+ }
+ }
+ return Changed;
+}
+
+bool FPMover::runOnMachineFunction(MachineFunction &F) {
+ // If the target has V9 instructions, the fp-mover pseudos will never be
+ // emitted. Avoid a scan of the instructions to improve compile time.
+ if (TM.getSubtarget<SparcSubtarget>().isV9())
+ return false;
+
+ bool Changed = false;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ Changed |= runOnMachineBasicBlock(*FI);
+ return Changed;
+}
diff --git a/lib/Target/Sparc/Makefile b/lib/Target/Sparc/Makefile
new file mode 100644
index 0000000..8cc4add
--- /dev/null
+++ b/lib/Target/Sparc/Makefile
@@ -0,0 +1,20 @@
+##===- lib/Target/Sparc/Makefile ---------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file was developed by the LLVM research group and is distributed under
+# the University of Illinois Open Source License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMSparc
+TARGET = Sparc
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = SparcGenRegisterInfo.h.inc SparcGenRegisterNames.inc \
+ SparcGenRegisterInfo.inc SparcGenInstrNames.inc \
+ SparcGenInstrInfo.inc SparcGenAsmWriter.inc \
+ SparcGenDAGISel.inc SparcGenSubtarget.inc
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Target/Sparc/README.txt b/lib/Target/Sparc/README.txt
new file mode 100644
index 0000000..f7cb9b8
--- /dev/null
+++ b/lib/Target/Sparc/README.txt
@@ -0,0 +1,57 @@
+
+To-do
+-----
+
+* Keep the address of the constant pool in a register instead of forming its
+ address all of the time.
+* We can fold small constant offsets into the %hi/%lo references to constant
+ pool addresses as well.
+* When in V9 mode, register allocate %icc[0-3].
+* Emit the 'Branch on Integer Register with Prediction' instructions. It's
+ not clear how to write a pattern for this though:
+
+float %t1(int %a, int* %p) {
+ %C = seteq int %a, 0
+ br bool %C, label %T, label %F
+T:
+ store int 123, int* %p
+ br label %F
+F:
+ ret float undef
+}
+
+codegens to this:
+
+t1:
+ save -96, %o6, %o6
+1) subcc %i0, 0, %l0
+1) bne .LBBt1_2 ! F
+ nop
+.LBBt1_1: ! T
+ or %g0, 123, %l0
+ st %l0, [%i1]
+.LBBt1_2: ! F
+ restore %g0, %g0, %g0
+ retl
+ nop
+
+1) should be replaced with a brz in V9 mode.
+
+* Same as above, but emit conditional move on register zero (p192) in V9
+ mode. Testcase:
+
+int %t1(int %a, int %b) {
+ %C = seteq int %a, 0
+ %D = select bool %C, int %a, int %b
+ ret int %D
+}
+
+* Emit MULX/[SU]DIVX instructions in V9 mode instead of fiddling
+ with the Y register, if they are faster.
+
+* Codegen bswap(load)/store(bswap) -> load/store ASI
+
+* Implement frame pointer elimination, e.g. eliminate save/restore for
+ leaf fns.
+* Fill delay slots
+
diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h
new file mode 100644
index 0000000..8936afa
--- /dev/null
+++ b/lib/Target/Sparc/Sparc.h
@@ -0,0 +1,116 @@
+//===-- Sparc.h - Top-level interface for Sparc representation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Sparc back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_SPARC_H
+#define TARGET_SPARC_H
+
+#include <iosfwd>
+#include <cassert>
+
+namespace llvm {
+ class FunctionPass;
+ class TargetMachine;
+
+ FunctionPass *createSparcISelDag(TargetMachine &TM);
+ FunctionPass *createSparcCodePrinterPass(std::ostream &OS, TargetMachine &TM);
+ FunctionPass *createSparcDelaySlotFillerPass(TargetMachine &TM);
+ FunctionPass *createSparcFPMoverPass(TargetMachine &TM);
+} // end namespace llvm;
+
+// Defines symbolic names for Sparc registers. This defines a mapping from
+// register name to register number.
+//
+#include "SparcGenRegisterNames.inc"
+
+// Defines symbolic names for the Sparc instructions.
+//
+#include "SparcGenInstrNames.inc"
+
+
+namespace llvm {
+ // Enums corresponding to Sparc condition codes, both icc's and fcc's. These
+ // values must be kept in sync with the ones in the .td file.
+ namespace SPCC {
+ enum CondCodes {
+ //ICC_A = 8 , // Always
+ //ICC_N = 0 , // Never
+ ICC_NE = 9 , // Not Equal
+ ICC_E = 1 , // Equal
+ ICC_G = 10 , // Greater
+ ICC_LE = 2 , // Less or Equal
+ ICC_GE = 11 , // Greater or Equal
+ ICC_L = 3 , // Less
+ ICC_GU = 12 , // Greater Unsigned
+ ICC_LEU = 4 , // Less or Equal Unsigned
+ ICC_CC = 13 , // Carry Clear/Great or Equal Unsigned
+ ICC_CS = 5 , // Carry Set/Less Unsigned
+ ICC_POS = 14 , // Positive
+ ICC_NEG = 6 , // Negative
+ ICC_VC = 15 , // Overflow Clear
+ ICC_VS = 7 , // Overflow Set
+
+ //FCC_A = 8+16, // Always
+ //FCC_N = 0+16, // Never
+ FCC_U = 7+16, // Unordered
+ FCC_G = 6+16, // Greater
+ FCC_UG = 5+16, // Unordered or Greater
+ FCC_L = 4+16, // Less
+ FCC_UL = 3+16, // Unordered or Less
+ FCC_LG = 2+16, // Less or Greater
+ FCC_NE = 1+16, // Not Equal
+ FCC_E = 9+16, // Equal
+ FCC_UE = 10+16, // Unordered or Equal
+ FCC_GE = 11+16, // Greater or Equal
+ FCC_UGE = 12+16, // Unordered or Greater or Equal
+ FCC_LE = 13+16, // Less or Equal
+ FCC_ULE = 14+16, // Unordered or Less or Equal
+ FCC_O = 15+16 // Ordered
+ };
+ }
+
+ inline static const char *SPARCCondCodeToString(SPCC::CondCodes CC) {
+ switch (CC) {
+ default: assert(0 && "Unknown condition code");
+ case SPCC::ICC_NE: return "ne";
+ case SPCC::ICC_E: return "e";
+ case SPCC::ICC_G: return "g";
+ case SPCC::ICC_LE: return "le";
+ case SPCC::ICC_GE: return "ge";
+ case SPCC::ICC_L: return "l";
+ case SPCC::ICC_GU: return "gu";
+ case SPCC::ICC_LEU: return "leu";
+ case SPCC::ICC_CC: return "cc";
+ case SPCC::ICC_CS: return "cs";
+ case SPCC::ICC_POS: return "pos";
+ case SPCC::ICC_NEG: return "neg";
+ case SPCC::ICC_VC: return "vc";
+ case SPCC::ICC_VS: return "vs";
+ case SPCC::FCC_U: return "u";
+ case SPCC::FCC_G: return "g";
+ case SPCC::FCC_UG: return "ug";
+ case SPCC::FCC_L: return "l";
+ case SPCC::FCC_UL: return "ul";
+ case SPCC::FCC_LG: return "lg";
+ case SPCC::FCC_NE: return "ne";
+ case SPCC::FCC_E: return "e";
+ case SPCC::FCC_UE: return "ue";
+ case SPCC::FCC_GE: return "ge";
+ case SPCC::FCC_UGE: return "uge";
+ case SPCC::FCC_LE: return "le";
+ case SPCC::FCC_ULE: return "ule";
+ case SPCC::FCC_O: return "o";
+ }
+ }
+} // end namespace llvm
+#endif
diff --git a/lib/Target/Sparc/Sparc.td b/lib/Target/Sparc/Sparc.td
new file mode 100644
index 0000000..1646e0e
--- /dev/null
+++ b/lib/Target/Sparc/Sparc.td
@@ -0,0 +1,80 @@
+//===- Sparc.td - Describe the Sparc Target Machine -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "../Target.td"
+
+//===----------------------------------------------------------------------===//
+// SPARC Subtarget features.
+//
+
+def FeatureV9
+ : SubtargetFeature<"v9", "IsV9", "true",
+ "Enable SPARC-V9 instructions">;
+def FeatureV8Deprecated
+ : SubtargetFeature<"deprecated-v8", "V8DeprecatedInsts", "true",
+ "Enable deprecated V8 instructions in V9 mode">;
+def FeatureVIS
+ : SubtargetFeature<"vis", "IsVIS", "true",
+ "Enable UltraSPARC Visual Instruction Set extensions">;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "SparcRegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "SparcInstrInfo.td"
+
+def SparcInstrInfo : InstrInfo {
+ // Define how we want to layout our target-specific information field.
+ let TSFlagsFields = [];
+ let TSFlagsShifts = [];
+}
+
+//===----------------------------------------------------------------------===//
+// SPARC processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic", []>;
+def : Proc<"v8", []>;
+def : Proc<"supersparc", []>;
+def : Proc<"sparclite", []>;
+def : Proc<"f934", []>;
+def : Proc<"hypersparc", []>;
+def : Proc<"sparclite86x", []>;
+def : Proc<"sparclet", []>;
+def : Proc<"tsc701", []>;
+def : Proc<"v9", [FeatureV9]>;
+def : Proc<"ultrasparc", [FeatureV9, FeatureV8Deprecated]>;
+def : Proc<"ultrasparc3", [FeatureV9, FeatureV8Deprecated]>;
+def : Proc<"ultrasparc3-vis", [FeatureV9, FeatureV8Deprecated, FeatureVIS]>;
+
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def Sparc : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = SparcInstrInfo;
+}
diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp
new file mode 100644
index 0000000..1f82326
--- /dev/null
+++ b/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -0,0 +1,291 @@
+//===-- SparcAsmPrinter.cpp - Sparc LLVM assembly writer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format SPARC assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "Sparc.h"
+#include "SparcInstrInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MathExtras.h"
+#include <cctype>
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+namespace {
+ struct VISIBILITY_HIDDEN SparcAsmPrinter : public AsmPrinter {
+ SparcAsmPrinter(std::ostream &O, TargetMachine &TM, const TargetAsmInfo *T)
+ : AsmPrinter(O, TM, T) {
+ }
+
+ /// We name each basic block in a Function with a unique number, so
+ /// that we can consistently refer to them later. This is cleared
+ /// at the beginning of each call to runOnMachineFunction().
+ ///
+ typedef std::map<const Value *, unsigned> ValueMapTy;
+ ValueMapTy NumberForBB;
+
+ virtual const char *getPassName() const {
+ return "Sparc Assembly Printer";
+ }
+
+ void printOperand(const MachineInstr *MI, int opNum);
+ void printMemOperand(const MachineInstr *MI, int opNum,
+ const char *Modifier = 0);
+ void printCCOperand(const MachineInstr *MI, int opNum);
+
+ bool printInstruction(const MachineInstr *MI); // autogenerated.
+ bool runOnMachineFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+ };
+} // end of anonymous namespace
+
+#include "SparcGenAsmWriter.inc"
+
+/// createSparcCodePrinterPass - Returns a pass that prints the SPARC
+/// assembly code for a MachineFunction to the given output stream,
+/// using the given target machine description. This should work
+/// regardless of whether the function is in SSA form.
+///
+FunctionPass *llvm::createSparcCodePrinterPass(std::ostream &o,
+ TargetMachine &tm) {
+ return new SparcAsmPrinter(o, tm, tm.getTargetAsmInfo());
+}
+
+/// runOnMachineFunction - This uses the printMachineInstruction()
+/// method to print assembly for each instruction.
+///
+bool SparcAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ SetupMachineFunction(MF);
+
+ // Print out constants referenced by the function
+ EmitConstantPool(MF.getConstantPool());
+
+ // BBNumber is used here so that a given Printer will never give two
+ // BBs the same name. (If you have a better way, please let me know!)
+ static unsigned BBNumber = 0;
+
+ O << "\n\n";
+ // What's my mangled name?
+ CurrentFnName = Mang->getValueName(MF.getFunction());
+
+ // Print out the label for the function.
+ const Function *F = MF.getFunction();
+ SwitchToTextSection(getSectionForFunction(*F).c_str(), F);
+ EmitAlignment(4, F);
+ O << "\t.globl\t" << CurrentFnName << "\n";
+ O << "\t.type\t" << CurrentFnName << ", #function\n";
+ O << CurrentFnName << ":\n";
+
+ // Number each basic block so that we can consistently refer to them
+ // in PC-relative references.
+ // FIXME: Why not use the MBB numbers?
+ NumberForBB.clear();
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ NumberForBB[I->getBasicBlock()] = BBNumber++;
+ }
+
+ // Print out code for the function.
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ // Print a label for the basic block.
+ if (I != MF.begin()) {
+ printBasicBlockLabel(I, true);
+ O << '\n';
+ }
+ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+ II != E; ++II) {
+ // Print the assembly for the instruction.
+ O << "\t";
+ printInstruction(II);
+ ++EmittedInsts;
+ }
+ }
+
+ // We didn't modify anything.
+ return false;
+}
+
+void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
+ const MachineOperand &MO = MI->getOperand (opNum);
+ const MRegisterInfo &RI = *TM.getRegisterInfo();
+ bool CloseParen = false;
+ if (MI->getOpcode() == SP::SETHIi && !MO.isRegister() && !MO.isImmediate()) {
+ O << "%hi(";
+ CloseParen = true;
+ } else if ((MI->getOpcode() == SP::ORri || MI->getOpcode() == SP::ADDri)
+ && !MO.isRegister() && !MO.isImmediate()) {
+ O << "%lo(";
+ CloseParen = true;
+ }
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ if (MRegisterInfo::isPhysicalRegister(MO.getReg()))
+ O << "%" << LowercaseString (RI.get(MO.getReg()).Name);
+ else
+ O << "%reg" << MO.getReg();
+ break;
+
+ case MachineOperand::MO_Immediate:
+ O << (int)MO.getImmedValue();
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMachineBasicBlock());
+ return;
+ case MachineOperand::MO_GlobalAddress:
+ O << Mang->getValueName(MO.getGlobal());
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ O << MO.getSymbolName();
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
+ << MO.getConstantPoolIndex();
+ break;
+ default:
+ O << "<unknown operand type>"; abort (); break;
+ }
+ if (CloseParen) O << ")";
+}
+
+void SparcAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
+ const char *Modifier) {
+ printOperand(MI, opNum);
+
+ // If this is an ADD operand, emit it like normal operands.
+ if (Modifier && !strcmp(Modifier, "arith")) {
+ O << ", ";
+ printOperand(MI, opNum+1);
+ return;
+ }
+
+ if (MI->getOperand(opNum+1).isRegister() &&
+ MI->getOperand(opNum+1).getReg() == SP::G0)
+ return; // don't print "+%g0"
+ if (MI->getOperand(opNum+1).isImmediate() &&
+ MI->getOperand(opNum+1).getImmedValue() == 0)
+ return; // don't print "+0"
+
+ O << "+";
+ if (MI->getOperand(opNum+1).isGlobalAddress() ||
+ MI->getOperand(opNum+1).isConstantPoolIndex()) {
+ O << "%lo(";
+ printOperand(MI, opNum+1);
+ O << ")";
+ } else {
+ printOperand(MI, opNum+1);
+ }
+}
+
+void SparcAsmPrinter::printCCOperand(const MachineInstr *MI, int opNum) {
+ int CC = (int)MI->getOperand(opNum).getImmedValue();
+ O << SPARCCondCodeToString((SPCC::CondCodes)CC);
+}
+
+
+
+bool SparcAsmPrinter::doInitialization(Module &M) {
+ Mang = new Mangler(M);
+ return false; // success
+}
+
+bool SparcAsmPrinter::doFinalization(Module &M) {
+ const TargetData *TD = TM.getTargetData();
+
+ // Print out module-level global variables here.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ if (I->hasInitializer()) { // External global require no code
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(I))
+ continue;
+
+ O << "\n\n";
+ std::string name = Mang->getValueName(I);
+ Constant *C = I->getInitializer();
+ unsigned Size = TD->getTypeSize(C->getType());
+ unsigned Align = TD->getPrefTypeAlignment(C->getType());
+
+ if (C->isNullValue() &&
+ (I->hasLinkOnceLinkage() || I->hasInternalLinkage() ||
+ I->hasWeakLinkage() /* FIXME: Verify correct */)) {
+ SwitchToDataSection(".data", I);
+ if (I->hasInternalLinkage())
+ O << "\t.local " << name << "\n";
+
+ O << "\t.comm " << name << "," << TD->getTypeSize(C->getType())
+ << "," << Align;
+ O << "\n";
+ } else {
+ switch (I->getLinkage()) {
+ case GlobalValue::LinkOnceLinkage:
+ case GlobalValue::WeakLinkage: // FIXME: Verify correct for weak.
+ // Nonnull linkonce -> weak
+ O << "\t.weak " << name << "\n";
+ SwitchToDataSection("", I);
+ O << "\t.section\t\".llvm.linkonce.d." << name
+ << "\",\"aw\",@progbits\n";
+ break;
+
+ case GlobalValue::AppendingLinkage:
+ // FIXME: appending linkage variables should go into a section of
+ // their name or something. For now, just emit them as external.
+ case GlobalValue::ExternalLinkage:
+ // If external or appending, declare as a global symbol
+ O << "\t.globl " << name << "\n";
+ // FALL THROUGH
+ case GlobalValue::InternalLinkage:
+ if (C->isNullValue())
+ SwitchToDataSection(".bss", I);
+ else
+ SwitchToDataSection(".data", I);
+ break;
+ case GlobalValue::GhostLinkage:
+ cerr << "Should not have any unmaterialized functions!\n";
+ abort();
+ case GlobalValue::DLLImportLinkage:
+ cerr << "DLLImport linkage is not supported by this target!\n";
+ abort();
+ case GlobalValue::DLLExportLinkage:
+ cerr << "DLLExport linkage is not supported by this target!\n";
+ abort();
+ default:
+ assert(0 && "Unknown linkage type!");
+ }
+
+ O << "\t.align " << Align << "\n";
+ O << "\t.type " << name << ",#object\n";
+ O << "\t.size " << name << "," << Size << "\n";
+ O << name << ":\n";
+ EmitGlobalConstant(C);
+ }
+ }
+
+ AsmPrinter::doFinalization(M);
+ return false; // success
+}
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
new file mode 100644
index 0000000..8c8b3f8
--- /dev/null
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -0,0 +1,1140 @@
+//===-- SparcISelDAGToDAG.cpp - A dag to dag inst selector for Sparc ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the SPARC target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sparc.h"
+#include "SparcTargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Support/Debug.h"
+#include <queue>
+#include <set>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation
+//===----------------------------------------------------------------------===//
+
+namespace SPISD {
+ enum {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END+SP::INSTRUCTION_LIST_END,
+ CMPICC, // Compare two GPR operands, set icc.
+ CMPFCC, // Compare two FP operands, set fcc.
+ BRICC, // Branch to dest on icc condition
+ BRFCC, // Branch to dest on fcc condition
+ SELECT_ICC, // Select between two values using the current ICC flags.
+ SELECT_FCC, // Select between two values using the current FCC flags.
+
+ Hi, Lo, // Hi/Lo operations, typically on a global address.
+
+ FTOI, // FP to Int within a FP register.
+ ITOF, // Int to FP within a FP register.
+
+ CALL, // A call instruction.
+ RET_FLAG // Return with a flag operand.
+ };
+}
+
+/// IntCondCCodeToICC - Convert a DAG integer condition code to a SPARC ICC
+/// condition.
+static SPCC::CondCodes IntCondCCodeToICC(ISD::CondCode CC) {
+ switch (CC) {
+ default: assert(0 && "Unknown integer condition code!");
+ case ISD::SETEQ: return SPCC::ICC_E;
+ case ISD::SETNE: return SPCC::ICC_NE;
+ case ISD::SETLT: return SPCC::ICC_L;
+ case ISD::SETGT: return SPCC::ICC_G;
+ case ISD::SETLE: return SPCC::ICC_LE;
+ case ISD::SETGE: return SPCC::ICC_GE;
+ case ISD::SETULT: return SPCC::ICC_CS;
+ case ISD::SETULE: return SPCC::ICC_LEU;
+ case ISD::SETUGT: return SPCC::ICC_GU;
+ case ISD::SETUGE: return SPCC::ICC_CC;
+ }
+}
+
+/// FPCondCCodeToFCC - Convert a DAG floatingp oint condition code to a SPARC
+/// FCC condition.
+static SPCC::CondCodes FPCondCCodeToFCC(ISD::CondCode CC) {
+ switch (CC) {
+ default: assert(0 && "Unknown fp condition code!");
+ case ISD::SETEQ:
+ case ISD::SETOEQ: return SPCC::FCC_E;
+ case ISD::SETNE:
+ case ISD::SETUNE: return SPCC::FCC_NE;
+ case ISD::SETLT:
+ case ISD::SETOLT: return SPCC::FCC_L;
+ case ISD::SETGT:
+ case ISD::SETOGT: return SPCC::FCC_G;
+ case ISD::SETLE:
+ case ISD::SETOLE: return SPCC::FCC_LE;
+ case ISD::SETGE:
+ case ISD::SETOGE: return SPCC::FCC_GE;
+ case ISD::SETULT: return SPCC::FCC_UL;
+ case ISD::SETULE: return SPCC::FCC_ULE;
+ case ISD::SETUGT: return SPCC::FCC_UG;
+ case ISD::SETUGE: return SPCC::FCC_UGE;
+ case ISD::SETUO: return SPCC::FCC_U;
+ case ISD::SETO: return SPCC::FCC_O;
+ case ISD::SETONE: return SPCC::FCC_LG;
+ case ISD::SETUEQ: return SPCC::FCC_UE;
+ }
+}
+
+namespace {
+ class SparcTargetLowering : public TargetLowering {
+ int VarArgsFrameOffset; // Frame offset to start of varargs area.
+ public:
+ SparcTargetLowering(TargetMachine &TM);
+ virtual SDOperand LowerOperation(SDOperand Op, SelectionDAG &DAG);
+
+ /// computeMaskedBitsForTargetNode - Determine which of the bits specified
+ /// in Mask are known to be either zero or one and return them in the
+ /// KnownZero/KnownOne bitsets.
+ virtual void computeMaskedBitsForTargetNode(const SDOperand Op,
+ uint64_t Mask,
+ uint64_t &KnownZero,
+ uint64_t &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const;
+
+ virtual std::vector<SDOperand>
+ LowerArguments(Function &F, SelectionDAG &DAG);
+ virtual std::pair<SDOperand, SDOperand>
+ LowerCallTo(SDOperand Chain, const Type *RetTy, bool RetTyIsSigned,
+ bool isVarArg, unsigned CC, bool isTailCall, SDOperand Callee,
+ ArgListTy &Args, SelectionDAG &DAG);
+ virtual MachineBasicBlock *InsertAtEndOfBasicBlock(MachineInstr *MI,
+ MachineBasicBlock *MBB);
+
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+ };
+}
+
+SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
+ : TargetLowering(TM) {
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i32, SP::IntRegsRegisterClass);
+ addRegisterClass(MVT::f32, SP::FPRegsRegisterClass);
+ addRegisterClass(MVT::f64, SP::DFPRegsRegisterClass);
+
+ // Turn FP extload into load/fextend
+ setLoadXAction(ISD::EXTLOAD, MVT::f32, Expand);
+
+ // Custom legalize GlobalAddress nodes into LO/HI parts.
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool , MVT::i32, Custom);
+
+ // Sparc doesn't have sext_inreg, replace them with shl/sra
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
+
+ // Sparc has no REM operation.
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+
+ // Custom expand fp<->sint
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+
+ // Expand fp<->uint
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+
+ setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
+ setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
+
+ // Sparc has no select or setcc: expand to SELECT_CC.
+ setOperationAction(ISD::SELECT, MVT::i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f64, Expand);
+ setOperationAction(ISD::SETCC, MVT::i32, Expand);
+ setOperationAction(ISD::SETCC, MVT::f32, Expand);
+ setOperationAction(ISD::SETCC, MVT::f64, Expand);
+
+ // Sparc doesn't have BRCOND either, it has BR_CC.
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+ setOperationAction(ISD::BRIND, MVT::Other, Expand);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f64, Custom);
+
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+ // SPARC has no intrinsics for these particular operations.
+ setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
+ setOperationAction(ISD::MEMSET, MVT::Other, Expand);
+ setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
+
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FREM , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FREM , MVT::f32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ , MVT::i32, Expand);
+ setOperationAction(ISD::ROTL , MVT::i32, Expand);
+ setOperationAction(ISD::ROTR , MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+
+ // We don't have line number support yet.
+ setOperationAction(ISD::LOCATION, MVT::Other, Expand);
+ setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
+ setOperationAction(ISD::LABEL, MVT::Other, Expand);
+
+ // RET must be custom lowered, to meet ABI requirements
+ setOperationAction(ISD::RET , MVT::Other, Custom);
+
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
+ setOperationAction(ISD::VASTART , MVT::Other, Custom);
+ // VAARG needs to be lowered to not do unaligned accesses for doubles.
+ setOperationAction(ISD::VAARG , MVT::Other, Custom);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
+
+ setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
+
+ setStackPointerRegisterToSaveRestore(SP::O6);
+
+ if (TM.getSubtarget<SparcSubtarget>().isV9()) {
+ setOperationAction(ISD::CTPOP, MVT::i32, Legal);
+ }
+
+ computeRegisterProperties();
+}
+
+const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case SPISD::CMPICC: return "SPISD::CMPICC";
+ case SPISD::CMPFCC: return "SPISD::CMPFCC";
+ case SPISD::BRICC: return "SPISD::BRICC";
+ case SPISD::BRFCC: return "SPISD::BRFCC";
+ case SPISD::SELECT_ICC: return "SPISD::SELECT_ICC";
+ case SPISD::SELECT_FCC: return "SPISD::SELECT_FCC";
+ case SPISD::Hi: return "SPISD::Hi";
+ case SPISD::Lo: return "SPISD::Lo";
+ case SPISD::FTOI: return "SPISD::FTOI";
+ case SPISD::ITOF: return "SPISD::ITOF";
+ case SPISD::CALL: return "SPISD::CALL";
+ case SPISD::RET_FLAG: return "SPISD::RET_FLAG";
+ }
+}
+
+/// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
+/// be zero. Op is expected to be a target specific node. Used by DAG
+/// combiner.
+void SparcTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
+ uint64_t Mask,
+ uint64_t &KnownZero,
+ uint64_t &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ uint64_t KnownZero2, KnownOne2;
+ KnownZero = KnownOne = 0; // Don't know anything.
+
+ switch (Op.getOpcode()) {
+ default: break;
+ case SPISD::SELECT_ICC:
+ case SPISD::SELECT_FCC:
+ DAG.ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne,
+ Depth+1);
+ DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ }
+}
+
+/// LowerArguments - V8 uses a very simple ABI, where all values are passed in
+/// either one or two GPRs, including FP values. TODO: we should pass FP values
+/// in FP registers for fastcc functions.
+std::vector<SDOperand>
+SparcTargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ SSARegMap *RegMap = MF.getSSARegMap();
+ std::vector<SDOperand> ArgValues;
+
+ static const unsigned ArgRegs[] = {
+ SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
+ };
+
+ const unsigned *CurArgReg = ArgRegs, *ArgRegEnd = ArgRegs+6;
+ unsigned ArgOffset = 68;
+
+ SDOperand Root = DAG.getRoot();
+ std::vector<SDOperand> OutChains;
+
+ for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
+ MVT::ValueType ObjectVT = getValueType(I->getType());
+
+ switch (ObjectVT) {
+ default: assert(0 && "Unhandled argument type!");
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ if (I->use_empty()) { // Argument is dead.
+ if (CurArgReg < ArgRegEnd) ++CurArgReg;
+ ArgValues.push_back(DAG.getNode(ISD::UNDEF, ObjectVT));
+ } else if (CurArgReg < ArgRegEnd) { // Lives in an incoming GPR
+ unsigned VReg = RegMap->createVirtualRegister(&SP::IntRegsRegClass);
+ MF.addLiveIn(*CurArgReg++, VReg);
+ SDOperand Arg = DAG.getCopyFromReg(Root, VReg, MVT::i32);
+ if (ObjectVT != MVT::i32) {
+ unsigned AssertOp = ISD::AssertSext;
+ Arg = DAG.getNode(AssertOp, MVT::i32, Arg,
+ DAG.getValueType(ObjectVT));
+ Arg = DAG.getNode(ISD::TRUNCATE, ObjectVT, Arg);
+ }
+ ArgValues.push_back(Arg);
+ } else {
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset);
+ SDOperand FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+ SDOperand Load;
+ if (ObjectVT == MVT::i32) {
+ Load = DAG.getLoad(MVT::i32, Root, FIPtr, NULL, 0);
+ } else {
+ ISD::LoadExtType LoadOp = ISD::SEXTLOAD;
+
+ // Sparc is big endian, so add an offset based on the ObjectVT.
+ unsigned Offset = 4-std::max(1U, MVT::getSizeInBits(ObjectVT)/8);
+ FIPtr = DAG.getNode(ISD::ADD, MVT::i32, FIPtr,
+ DAG.getConstant(Offset, MVT::i32));
+ Load = DAG.getExtLoad(LoadOp, MVT::i32, Root, FIPtr,
+ NULL, 0, ObjectVT);
+ Load = DAG.getNode(ISD::TRUNCATE, ObjectVT, Load);
+ }
+ ArgValues.push_back(Load);
+ }
+
+ ArgOffset += 4;
+ break;
+ case MVT::f32:
+ if (I->use_empty()) { // Argument is dead.
+ if (CurArgReg < ArgRegEnd) ++CurArgReg;
+ ArgValues.push_back(DAG.getNode(ISD::UNDEF, ObjectVT));
+ } else if (CurArgReg < ArgRegEnd) { // Lives in an incoming GPR
+ // FP value is passed in an integer register.
+ unsigned VReg = RegMap->createVirtualRegister(&SP::IntRegsRegClass);
+ MF.addLiveIn(*CurArgReg++, VReg);
+ SDOperand Arg = DAG.getCopyFromReg(Root, VReg, MVT::i32);
+
+ Arg = DAG.getNode(ISD::BIT_CONVERT, MVT::f32, Arg);
+ ArgValues.push_back(Arg);
+ } else {
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset);
+ SDOperand FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+ SDOperand Load = DAG.getLoad(MVT::f32, Root, FIPtr, NULL, 0);
+ ArgValues.push_back(Load);
+ }
+ ArgOffset += 4;
+ break;
+
+ case MVT::i64:
+ case MVT::f64:
+ if (I->use_empty()) { // Argument is dead.
+ if (CurArgReg < ArgRegEnd) ++CurArgReg;
+ if (CurArgReg < ArgRegEnd) ++CurArgReg;
+ ArgValues.push_back(DAG.getNode(ISD::UNDEF, ObjectVT));
+ } else if (/* FIXME: Apparently this isn't safe?? */
+ 0 && CurArgReg == ArgRegEnd && ObjectVT == MVT::f64 &&
+ ((CurArgReg-ArgRegs) & 1) == 0) {
+ // If this is a double argument and the whole thing lives on the stack,
+ // and the argument is aligned, load the double straight from the stack.
+ // We can't do a load in cases like void foo([6ints], int,double),
+ // because the double wouldn't be aligned!
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(8, ArgOffset);
+ SDOperand FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+ ArgValues.push_back(DAG.getLoad(MVT::f64, Root, FIPtr, NULL, 0));
+ } else {
+ SDOperand HiVal;
+ if (CurArgReg < ArgRegEnd) { // Lives in an incoming GPR
+ unsigned VRegHi = RegMap->createVirtualRegister(&SP::IntRegsRegClass);
+ MF.addLiveIn(*CurArgReg++, VRegHi);
+ HiVal = DAG.getCopyFromReg(Root, VRegHi, MVT::i32);
+ } else {
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset);
+ SDOperand FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+ HiVal = DAG.getLoad(MVT::i32, Root, FIPtr, NULL, 0);
+ }
+
+ SDOperand LoVal;
+ if (CurArgReg < ArgRegEnd) { // Lives in an incoming GPR
+ unsigned VRegLo = RegMap->createVirtualRegister(&SP::IntRegsRegClass);
+ MF.addLiveIn(*CurArgReg++, VRegLo);
+ LoVal = DAG.getCopyFromReg(Root, VRegLo, MVT::i32);
+ } else {
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset+4);
+ SDOperand FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+ LoVal = DAG.getLoad(MVT::i32, Root, FIPtr, NULL, 0);
+ }
+
+ // Compose the two halves together into an i64 unit.
+ SDOperand WholeValue =
+ DAG.getNode(ISD::BUILD_PAIR, MVT::i64, LoVal, HiVal);
+
+ // If we want a double, do a bit convert.
+ if (ObjectVT == MVT::f64)
+ WholeValue = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, WholeValue);
+
+ ArgValues.push_back(WholeValue);
+ }
+ ArgOffset += 8;
+ break;
+ }
+ }
+
+ // Store remaining ArgRegs to the stack if this is a varargs function.
+ if (F.getFunctionType()->isVarArg()) {
+ // Remember the vararg offset for the va_start implementation.
+ VarArgsFrameOffset = ArgOffset;
+
+ for (; CurArgReg != ArgRegEnd; ++CurArgReg) {
+ unsigned VReg = RegMap->createVirtualRegister(&SP::IntRegsRegClass);
+ MF.addLiveIn(*CurArgReg, VReg);
+ SDOperand Arg = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32);
+
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset);
+ SDOperand FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+
+ OutChains.push_back(DAG.getStore(DAG.getRoot(), Arg, FIPtr, NULL, 0));
+ ArgOffset += 4;
+ }
+ }
+
+ if (!OutChains.empty())
+ DAG.setRoot(DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &OutChains[0], OutChains.size()));
+
+ // Finally, inform the code generator which regs we return values in.
+ switch (getValueType(F.getReturnType())) {
+ default: assert(0 && "Unknown type!");
+ case MVT::isVoid: break;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ MF.addLiveOut(SP::I0);
+ break;
+ case MVT::i64:
+ MF.addLiveOut(SP::I0);
+ MF.addLiveOut(SP::I1);
+ break;
+ case MVT::f32:
+ MF.addLiveOut(SP::F0);
+ break;
+ case MVT::f64:
+ MF.addLiveOut(SP::D0);
+ break;
+ }
+
+ return ArgValues;
+}
+
+std::pair<SDOperand, SDOperand>
+SparcTargetLowering::LowerCallTo(SDOperand Chain, const Type *RetTy,
+ bool RetTyIsSigned, bool isVarArg, unsigned CC,
+ bool isTailCall, SDOperand Callee,
+ ArgListTy &Args, SelectionDAG &DAG) {
+ // Count the size of the outgoing arguments.
+ unsigned ArgsSize = 0;
+ for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+ switch (getValueType(Args[i].Ty)) {
+ default: assert(0 && "Unknown value type!");
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::f32:
+ ArgsSize += 4;
+ break;
+ case MVT::i64:
+ case MVT::f64:
+ ArgsSize += 8;
+ break;
+ }
+ }
+ if (ArgsSize > 4*6)
+ ArgsSize -= 4*6; // Space for first 6 arguments is prereserved.
+ else
+ ArgsSize = 0;
+
+ // Keep stack frames 8-byte aligned.
+ ArgsSize = (ArgsSize+7) & ~7;
+
+ Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(ArgsSize, getPointerTy()));
+
+ SDOperand StackPtr;
+ std::vector<SDOperand> Stores;
+ std::vector<SDOperand> RegValuesToPass;
+ unsigned ArgOffset = 68;
+ for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+ SDOperand Val = Args[i].Node;
+ MVT::ValueType ObjectVT = Val.getValueType();
+ SDOperand ValToStore(0, 0);
+ unsigned ObjSize;
+ switch (ObjectVT) {
+ default: assert(0 && "Unhandled argument type!");
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16: {
+ // Promote the integer to 32-bits. If the input type is signed, use a
+ // sign extend, otherwise use a zero extend.
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+ if (Args[i].isSExt)
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (Args[i].isZExt)
+ ExtendKind = ISD::ZERO_EXTEND;
+ Val = DAG.getNode(ExtendKind, MVT::i32, Val);
+ // FALL THROUGH
+ }
+ case MVT::i32:
+ ObjSize = 4;
+
+ if (RegValuesToPass.size() >= 6) {
+ ValToStore = Val;
+ } else {
+ RegValuesToPass.push_back(Val);
+ }
+ break;
+ case MVT::f32:
+ ObjSize = 4;
+ if (RegValuesToPass.size() >= 6) {
+ ValToStore = Val;
+ } else {
+ // Convert this to a FP value in an int reg.
+ Val = DAG.getNode(ISD::BIT_CONVERT, MVT::i32, Val);
+ RegValuesToPass.push_back(Val);
+ }
+ break;
+ case MVT::f64:
+ ObjSize = 8;
+ // If we can store this directly into the outgoing slot, do so. We can
+ // do this when all ArgRegs are used and if the outgoing slot is aligned.
+ // FIXME: McGill/misr fails with this.
+ if (0 && RegValuesToPass.size() >= 6 && ((ArgOffset-68) & 7) == 0) {
+ ValToStore = Val;
+ break;
+ }
+
+ // Otherwise, convert this to a FP value in int regs.
+ Val = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Val);
+ // FALL THROUGH
+ case MVT::i64:
+ ObjSize = 8;
+ if (RegValuesToPass.size() >= 6) {
+ ValToStore = Val; // Whole thing is passed in memory.
+ break;
+ }
+
+ // Split the value into top and bottom part. Top part goes in a reg.
+ SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, getPointerTy(), Val,
+ DAG.getConstant(1, MVT::i32));
+ SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, getPointerTy(), Val,
+ DAG.getConstant(0, MVT::i32));
+ RegValuesToPass.push_back(Hi);
+
+ if (RegValuesToPass.size() >= 6) {
+ ValToStore = Lo;
+ ArgOffset += 4;
+ ObjSize = 4;
+ } else {
+ RegValuesToPass.push_back(Lo);
+ }
+ break;
+ }
+
+ if (ValToStore.Val) {
+ if (!StackPtr.Val) {
+ StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+ }
+ SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
+ PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
+ Stores.push_back(DAG.getStore(Chain, ValToStore, PtrOff, NULL, 0));
+ }
+ ArgOffset += ObjSize;
+ }
+
+ // Emit all stores, make sure the occur before any copies into physregs.
+ if (!Stores.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, &Stores[0],Stores.size());
+
+ static const unsigned ArgRegs[] = {
+ SP::O0, SP::O1, SP::O2, SP::O3, SP::O4, SP::O5
+ };
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into O[0-5].
+ SDOperand InFlag;
+ for (unsigned i = 0, e = RegValuesToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, ArgRegs[i], RegValuesToPass[i], InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress node (quite common, every direct call is)
+ // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+ // Likewise ExternalSymbol -> TargetExternalSymbol.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32);
+ else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
+
+ std::vector<MVT::ValueType> NodeTys;
+ NodeTys.push_back(MVT::Other); // Returns a chain
+ NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
+ SDOperand Ops[] = { Chain, Callee, InFlag };
+ Chain = DAG.getNode(SPISD::CALL, NodeTys, Ops, InFlag.Val ? 3 : 2);
+ InFlag = Chain.getValue(1);
+
+ MVT::ValueType RetTyVT = getValueType(RetTy);
+ SDOperand RetVal;
+ if (RetTyVT != MVT::isVoid) {
+ switch (RetTyVT) {
+ default: assert(0 && "Unknown value type to return!");
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16: {
+ RetVal = DAG.getCopyFromReg(Chain, SP::O0, MVT::i32, InFlag);
+ Chain = RetVal.getValue(1);
+
+ // Add a note to keep track of whether it is sign or zero extended.
+ ISD::NodeType AssertKind = ISD::AssertZext;
+ if (RetTyIsSigned)
+ AssertKind = ISD::AssertSext;
+ RetVal = DAG.getNode(AssertKind, MVT::i32, RetVal,
+ DAG.getValueType(RetTyVT));
+ RetVal = DAG.getNode(ISD::TRUNCATE, RetTyVT, RetVal);
+ break;
+ }
+ case MVT::i32:
+ RetVal = DAG.getCopyFromReg(Chain, SP::O0, MVT::i32, InFlag);
+ Chain = RetVal.getValue(1);
+ break;
+ case MVT::f32:
+ RetVal = DAG.getCopyFromReg(Chain, SP::F0, MVT::f32, InFlag);
+ Chain = RetVal.getValue(1);
+ break;
+ case MVT::f64:
+ RetVal = DAG.getCopyFromReg(Chain, SP::D0, MVT::f64, InFlag);
+ Chain = RetVal.getValue(1);
+ break;
+ case MVT::i64:
+ SDOperand Lo = DAG.getCopyFromReg(Chain, SP::O1, MVT::i32, InFlag);
+ SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), SP::O0, MVT::i32,
+ Lo.getValue(2));
+ RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi);
+ Chain = Hi.getValue(1);
+ break;
+ }
+ }
+
+ Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
+ DAG.getConstant(ArgsSize, getPointerTy()));
+
+ return std::make_pair(RetVal, Chain);
+}
+
+// Look at LHS/RHS/CC and see if they are a lowered setcc instruction. If so
+// set LHS/RHS and SPCC to the LHS/RHS of the setcc and SPCC to the condition.
+static void LookThroughSetCC(SDOperand &LHS, SDOperand &RHS,
+ ISD::CondCode CC, unsigned &SPCC) {
+ if (isa<ConstantSDNode>(RHS) && cast<ConstantSDNode>(RHS)->getValue() == 0 &&
+ CC == ISD::SETNE &&
+ ((LHS.getOpcode() == SPISD::SELECT_ICC &&
+ LHS.getOperand(3).getOpcode() == SPISD::CMPICC) ||
+ (LHS.getOpcode() == SPISD::SELECT_FCC &&
+ LHS.getOperand(3).getOpcode() == SPISD::CMPFCC)) &&
+ isa<ConstantSDNode>(LHS.getOperand(0)) &&
+ isa<ConstantSDNode>(LHS.getOperand(1)) &&
+ cast<ConstantSDNode>(LHS.getOperand(0))->getValue() == 1 &&
+ cast<ConstantSDNode>(LHS.getOperand(1))->getValue() == 0) {
+ SDOperand CMPCC = LHS.getOperand(3);
+ SPCC = cast<ConstantSDNode>(LHS.getOperand(2))->getValue();
+ LHS = CMPCC.getOperand(0);
+ RHS = CMPCC.getOperand(1);
+ }
+}
+
+
+SDOperand SparcTargetLowering::
+LowerOperation(SDOperand Op, SelectionDAG &DAG) {
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Should not custom lower this!");
+ case ISD::GlobalTLSAddress:
+ assert(0 && "TLS not implemented for Sparc.");
+ case ISD::GlobalAddress: {
+ GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ SDOperand GA = DAG.getTargetGlobalAddress(GV, MVT::i32);
+ SDOperand Hi = DAG.getNode(SPISD::Hi, MVT::i32, GA);
+ SDOperand Lo = DAG.getNode(SPISD::Lo, MVT::i32, GA);
+ return DAG.getNode(ISD::ADD, MVT::i32, Lo, Hi);
+ }
+ case ISD::ConstantPool: {
+ Constant *C = cast<ConstantPoolSDNode>(Op)->getConstVal();
+ SDOperand CP = DAG.getTargetConstantPool(C, MVT::i32,
+ cast<ConstantPoolSDNode>(Op)->getAlignment());
+ SDOperand Hi = DAG.getNode(SPISD::Hi, MVT::i32, CP);
+ SDOperand Lo = DAG.getNode(SPISD::Lo, MVT::i32, CP);
+ return DAG.getNode(ISD::ADD, MVT::i32, Lo, Hi);
+ }
+ case ISD::FP_TO_SINT:
+ // Convert the fp value to integer in an FP register.
+ assert(Op.getValueType() == MVT::i32);
+ Op = DAG.getNode(SPISD::FTOI, MVT::f32, Op.getOperand(0));
+ return DAG.getNode(ISD::BIT_CONVERT, MVT::i32, Op);
+ case ISD::SINT_TO_FP: {
+ assert(Op.getOperand(0).getValueType() == MVT::i32);
+ SDOperand Tmp = DAG.getNode(ISD::BIT_CONVERT, MVT::f32, Op.getOperand(0));
+ // Convert the int value to FP in an FP register.
+ return DAG.getNode(SPISD::ITOF, Op.getValueType(), Tmp);
+ }
+ case ISD::BR_CC: {
+ SDOperand Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDOperand LHS = Op.getOperand(2);
+ SDOperand RHS = Op.getOperand(3);
+ SDOperand Dest = Op.getOperand(4);
+ unsigned Opc, SPCC = ~0U;
+
+ // If this is a br_cc of a "setcc", and if the setcc got lowered into
+ // an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values.
+ LookThroughSetCC(LHS, RHS, CC, SPCC);
+
+ // Get the condition flag.
+ SDOperand CompareFlag;
+ if (LHS.getValueType() == MVT::i32) {
+ std::vector<MVT::ValueType> VTs;
+ VTs.push_back(MVT::i32);
+ VTs.push_back(MVT::Flag);
+ SDOperand Ops[2] = { LHS, RHS };
+ CompareFlag = DAG.getNode(SPISD::CMPICC, VTs, Ops, 2).getValue(1);
+ if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
+ Opc = SPISD::BRICC;
+ } else {
+ CompareFlag = DAG.getNode(SPISD::CMPFCC, MVT::Flag, LHS, RHS);
+ if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
+ Opc = SPISD::BRFCC;
+ }
+ return DAG.getNode(Opc, MVT::Other, Chain, Dest,
+ DAG.getConstant(SPCC, MVT::i32), CompareFlag);
+ }
+ case ISD::SELECT_CC: {
+ SDOperand LHS = Op.getOperand(0);
+ SDOperand RHS = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+ SDOperand TrueVal = Op.getOperand(2);
+ SDOperand FalseVal = Op.getOperand(3);
+ unsigned Opc, SPCC = ~0U;
+
+ // If this is a select_cc of a "setcc", and if the setcc got lowered into
+ // an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values.
+ LookThroughSetCC(LHS, RHS, CC, SPCC);
+
+ SDOperand CompareFlag;
+ if (LHS.getValueType() == MVT::i32) {
+ std::vector<MVT::ValueType> VTs;
+ VTs.push_back(LHS.getValueType()); // subcc returns a value
+ VTs.push_back(MVT::Flag);
+ SDOperand Ops[2] = { LHS, RHS };
+ CompareFlag = DAG.getNode(SPISD::CMPICC, VTs, Ops, 2).getValue(1);
+ Opc = SPISD::SELECT_ICC;
+ if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
+ } else {
+ CompareFlag = DAG.getNode(SPISD::CMPFCC, MVT::Flag, LHS, RHS);
+ Opc = SPISD::SELECT_FCC;
+ if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
+ }
+ return DAG.getNode(Opc, TrueVal.getValueType(), TrueVal, FalseVal,
+ DAG.getConstant(SPCC, MVT::i32), CompareFlag);
+ }
+ case ISD::VASTART: {
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ SDOperand Offset = DAG.getNode(ISD::ADD, MVT::i32,
+ DAG.getRegister(SP::I6, MVT::i32),
+ DAG.getConstant(VarArgsFrameOffset, MVT::i32));
+ SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2));
+ return DAG.getStore(Op.getOperand(0), Offset,
+ Op.getOperand(1), SV->getValue(), SV->getOffset());
+ }
+ case ISD::VAARG: {
+ SDNode *Node = Op.Val;
+ MVT::ValueType VT = Node->getValueType(0);
+ SDOperand InChain = Node->getOperand(0);
+ SDOperand VAListPtr = Node->getOperand(1);
+ SrcValueSDNode *SV = cast<SrcValueSDNode>(Node->getOperand(2));
+ SDOperand VAList = DAG.getLoad(getPointerTy(), InChain, VAListPtr,
+ SV->getValue(), SV->getOffset());
+ // Increment the pointer, VAList, to the next vaarg
+ SDOperand NextPtr = DAG.getNode(ISD::ADD, getPointerTy(), VAList,
+ DAG.getConstant(MVT::getSizeInBits(VT)/8,
+ getPointerTy()));
+ // Store the incremented VAList to the legalized pointer
+ InChain = DAG.getStore(VAList.getValue(1), NextPtr,
+ VAListPtr, SV->getValue(), SV->getOffset());
+ // Load the actual argument out of the pointer VAList, unless this is an
+ // f64 load.
+ if (VT != MVT::f64) {
+ return DAG.getLoad(VT, InChain, VAList, NULL, 0);
+ } else {
+ // Otherwise, load it as i64, then do a bitconvert.
+ SDOperand V = DAG.getLoad(MVT::i64, InChain, VAList, NULL, 0);
+ std::vector<MVT::ValueType> Tys;
+ Tys.push_back(MVT::f64);
+ Tys.push_back(MVT::Other);
+ // Bit-Convert the value to f64.
+ SDOperand Ops[2] = { DAG.getNode(ISD::BIT_CONVERT, MVT::f64, V),
+ V.getValue(1) };
+ return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2);
+ }
+ }
+ case ISD::DYNAMIC_STACKALLOC: {
+ SDOperand Chain = Op.getOperand(0); // Legalize the chain.
+ SDOperand Size = Op.getOperand(1); // Legalize the size.
+
+ unsigned SPReg = SP::O6;
+ SDOperand SP = DAG.getCopyFromReg(Chain, SPReg, MVT::i32);
+ SDOperand NewSP = DAG.getNode(ISD::SUB, MVT::i32, SP, Size); // Value
+ Chain = DAG.getCopyToReg(SP.getValue(1), SPReg, NewSP); // Output chain
+
+ // The resultant pointer is actually 16 words from the bottom of the stack,
+ // to provide a register spill area.
+ SDOperand NewVal = DAG.getNode(ISD::ADD, MVT::i32, NewSP,
+ DAG.getConstant(96, MVT::i32));
+ std::vector<MVT::ValueType> Tys;
+ Tys.push_back(MVT::i32);
+ Tys.push_back(MVT::Other);
+ SDOperand Ops[2] = { NewVal, Chain };
+ return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2);
+ }
+ case ISD::RET: {
+ SDOperand Copy;
+
+ switch(Op.getNumOperands()) {
+ default:
+ assert(0 && "Do not know how to return this many arguments!");
+ abort();
+ case 1:
+ return SDOperand(); // ret void is legal
+ case 3: {
+ unsigned ArgReg;
+ switch(Op.getOperand(1).getValueType()) {
+ default: assert(0 && "Unknown type to return!");
+ case MVT::i32: ArgReg = SP::I0; break;
+ case MVT::f32: ArgReg = SP::F0; break;
+ case MVT::f64: ArgReg = SP::D0; break;
+ }
+ Copy = DAG.getCopyToReg(Op.getOperand(0), ArgReg, Op.getOperand(1),
+ SDOperand());
+ break;
+ }
+ case 5:
+ Copy = DAG.getCopyToReg(Op.getOperand(0), SP::I0, Op.getOperand(3),
+ SDOperand());
+ Copy = DAG.getCopyToReg(Copy, SP::I1, Op.getOperand(1), Copy.getValue(1));
+ break;
+ }
+ return DAG.getNode(SPISD::RET_FLAG, MVT::Other, Copy, Copy.getValue(1));
+ }
+ // Frame & Return address. Currently unimplemented
+ case ISD::RETURNADDR: break;
+ case ISD::FRAMEADDR: break;
+ }
+ return SDOperand();
+}
+
+MachineBasicBlock *
+SparcTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
+ MachineBasicBlock *BB) {
+ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+ unsigned BROpcode;
+ unsigned CC;
+ // Figure out the conditional branch opcode to use for this select_cc.
+ switch (MI->getOpcode()) {
+ default: assert(0 && "Unknown SELECT_CC!");
+ case SP::SELECT_CC_Int_ICC:
+ case SP::SELECT_CC_FP_ICC:
+ case SP::SELECT_CC_DFP_ICC:
+ BROpcode = SP::BCOND;
+ break;
+ case SP::SELECT_CC_Int_FCC:
+ case SP::SELECT_CC_FP_FCC:
+ case SP::SELECT_CC_DFP_FCC:
+ BROpcode = SP::FBCOND;
+ break;
+ }
+
+ CC = (SPCC::CondCodes)MI->getOperand(3).getImmedValue();
+
+ // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
+ // control-flow pattern. The incoming instruction knows the destination vreg
+ // to set, the condition code register to branch on, the true/false values to
+ // select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ ilist<MachineBasicBlock>::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // [f]bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
+ BuildMI(BB, TII.get(BROpcode)).addMBB(sinkMBB).addImm(CC);
+ MachineFunction *F = BB->getParent();
+ F->getBasicBlockList().insert(It, copy0MBB);
+ F->getBasicBlockList().insert(It, sinkMBB);
+ // Update machine-CFG edges by first adding all successors of the current
+ // block to the new block which will contain the Phi node for the select.
+ for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
+ e = BB->succ_end(); i != e; ++i)
+ sinkMBB->addSuccessor(*i);
+ // Next, remove all successors of the current block, and add the true
+ // and fallthrough blocks as its successors.
+ while(!BB->succ_empty())
+ BB->removeSuccessor(BB->succ_begin());
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(BB, TII.get(SP::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
+
+ delete MI; // The pseudo instruction is gone now.
+ return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===--------------------------------------------------------------------===//
+/// SparcDAGToDAGISel - SPARC specific code to select SPARC machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+class SparcDAGToDAGISel : public SelectionDAGISel {
+ SparcTargetLowering Lowering;
+
+ /// Subtarget - Keep a pointer to the Sparc Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const SparcSubtarget &Subtarget;
+public:
+ SparcDAGToDAGISel(TargetMachine &TM)
+ : SelectionDAGISel(Lowering), Lowering(TM),
+ Subtarget(TM.getSubtarget<SparcSubtarget>()) {
+ }
+
+ SDNode *Select(SDOperand Op);
+
+ // Complex Pattern Selectors.
+ bool SelectADDRrr(SDOperand Op, SDOperand N, SDOperand &R1, SDOperand &R2);
+ bool SelectADDRri(SDOperand Op, SDOperand N, SDOperand &Base,
+ SDOperand &Offset);
+
+ /// InstructionSelectBasicBlock - This callback is invoked by
+ /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+ virtual void InstructionSelectBasicBlock(SelectionDAG &DAG);
+
+ virtual const char *getPassName() const {
+ return "SPARC DAG->DAG Pattern Instruction Selection";
+ }
+
+ // Include the pieces autogenerated from the target description.
+#include "SparcGenDAGISel.inc"
+};
+} // end anonymous namespace
+
+/// InstructionSelectBasicBlock - This callback is invoked by
+/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+void SparcDAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) {
+ DEBUG(BB->dump());
+
+ // Select target instructions for the DAG.
+ DAG.setRoot(SelectRoot(DAG.getRoot()));
+ DAG.RemoveDeadNodes();
+
+ // Emit machine code to BB.
+ ScheduleAndEmitDAG(DAG);
+}
+
+bool SparcDAGToDAGISel::SelectADDRri(SDOperand Op, SDOperand Addr,
+ SDOperand &Base, SDOperand &Offset) {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // direct calls.
+
+ if (Addr.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
+ if (Predicate_simm13(CN)) {
+ if (FrameIndexSDNode *FIN =
+ dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
+ // Constant offset from frame ref.
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ } else {
+ Base = Addr.getOperand(0);
+ }
+ Offset = CurDAG->getTargetConstant(CN->getValue(), MVT::i32);
+ return true;
+ }
+ }
+ if (Addr.getOperand(0).getOpcode() == SPISD::Lo) {
+ Base = Addr.getOperand(1);
+ Offset = Addr.getOperand(0).getOperand(0);
+ return true;
+ }
+ if (Addr.getOperand(1).getOpcode() == SPISD::Lo) {
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1).getOperand(0);
+ return true;
+ }
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+bool SparcDAGToDAGISel::SelectADDRrr(SDOperand Op, SDOperand Addr,
+ SDOperand &R1, SDOperand &R2) {
+ if (Addr.getOpcode() == ISD::FrameIndex) return false;
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // direct calls.
+
+ if (Addr.getOpcode() == ISD::ADD) {
+ if (isa<ConstantSDNode>(Addr.getOperand(1)) &&
+ Predicate_simm13(Addr.getOperand(1).Val))
+ return false; // Let the reg+imm pattern catch this!
+ if (Addr.getOperand(0).getOpcode() == SPISD::Lo ||
+ Addr.getOperand(1).getOpcode() == SPISD::Lo)
+ return false; // Let the reg+imm pattern catch this!
+ R1 = Addr.getOperand(0);
+ R2 = Addr.getOperand(1);
+ return true;
+ }
+
+ R1 = Addr;
+ R2 = CurDAG->getRegister(SP::G0, MVT::i32);
+ return true;
+}
+
+SDNode *SparcDAGToDAGISel::Select(SDOperand Op) {
+ SDNode *N = Op.Val;
+ if (N->getOpcode() >= ISD::BUILTIN_OP_END &&
+ N->getOpcode() < SPISD::FIRST_NUMBER)
+ return NULL; // Already selected.
+
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::SDIV:
+ case ISD::UDIV: {
+ // FIXME: should use a custom expander to expose the SRA to the dag.
+ SDOperand DivLHS = N->getOperand(0);
+ SDOperand DivRHS = N->getOperand(1);
+ AddToISelQueue(DivLHS);
+ AddToISelQueue(DivRHS);
+
+ // Set the Y register to the high-part.
+ SDOperand TopPart;
+ if (N->getOpcode() == ISD::SDIV) {
+ TopPart = SDOperand(CurDAG->getTargetNode(SP::SRAri, MVT::i32, DivLHS,
+ CurDAG->getTargetConstant(31, MVT::i32)), 0);
+ } else {
+ TopPart = CurDAG->getRegister(SP::G0, MVT::i32);
+ }
+ TopPart = SDOperand(CurDAG->getTargetNode(SP::WRYrr, MVT::Flag, TopPart,
+ CurDAG->getRegister(SP::G0, MVT::i32)), 0);
+
+ // FIXME: Handle div by immediate.
+ unsigned Opcode = N->getOpcode() == ISD::SDIV ? SP::SDIVrr : SP::UDIVrr;
+ return CurDAG->SelectNodeTo(N, Opcode, MVT::i32, DivLHS, DivRHS,
+ TopPart);
+ }
+ case ISD::MULHU:
+ case ISD::MULHS: {
+ // FIXME: Handle mul by immediate.
+ SDOperand MulLHS = N->getOperand(0);
+ SDOperand MulRHS = N->getOperand(1);
+ AddToISelQueue(MulLHS);
+ AddToISelQueue(MulRHS);
+ unsigned Opcode = N->getOpcode() == ISD::MULHU ? SP::UMULrr : SP::SMULrr;
+ SDNode *Mul = CurDAG->getTargetNode(Opcode, MVT::i32, MVT::Flag,
+ MulLHS, MulRHS);
+ // The high part is in the Y register.
+ return CurDAG->SelectNodeTo(N, SP::RDY, MVT::i32, SDOperand(Mul, 1));
+ return NULL;
+ }
+ }
+
+ return SelectCode(Op);
+}
+
+
+/// createSparcISelDag - This pass converts a legalized DAG into a
+/// SPARC-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createSparcISelDag(TargetMachine &TM) {
+ return new SparcDAGToDAGISel(TM);
+}
diff --git a/lib/Target/Sparc/SparcInstrFormats.td b/lib/Target/Sparc/SparcInstrFormats.td
new file mode 100644
index 0000000..f463ab8
--- /dev/null
+++ b/lib/Target/Sparc/SparcInstrFormats.td
@@ -0,0 +1,113 @@
+//===- SparcInstrFormats.td - Sparc Instruction Formats ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+class InstSP<dag ops, string asmstr, list<dag> pattern> : Instruction {
+ field bits<32> Inst;
+
+ let Namespace = "SP";
+
+ bits<2> op;
+ let Inst{31-30} = op; // Top two bits are the 'op' field
+
+ dag OperandList = ops;
+ let AsmString = asmstr;
+ let Pattern = pattern;
+}
+
+//===----------------------------------------------------------------------===//
+// Format #2 instruction classes in the Sparc
+//===----------------------------------------------------------------------===//
+
+// Format 2 instructions
+class F2<dag ops, string asmstr, list<dag> pattern>
+ : InstSP<ops, asmstr, pattern> {
+ bits<3> op2;
+ bits<22> imm22;
+ let op = 0; // op = 0
+ let Inst{24-22} = op2;
+ let Inst{21-0} = imm22;
+}
+
+// Specific F2 classes: SparcV8 manual, page 44
+//
+class F2_1<bits<3> op2Val, dag ops, string asmstr, list<dag> pattern>
+ : F2<ops, asmstr, pattern> {
+ bits<5> rd;
+
+ let op2 = op2Val;
+
+ let Inst{29-25} = rd;
+}
+
+class F2_2<bits<4> condVal, bits<3> op2Val, dag ops, string asmstr,
+ list<dag> pattern> : F2<ops, asmstr, pattern> {
+ bits<4> cond;
+ bit annul = 0; // currently unused
+
+ let cond = condVal;
+ let op2 = op2Val;
+
+ let Inst{29} = annul;
+ let Inst{28-25} = cond;
+}
+
+//===----------------------------------------------------------------------===//
+// Format #3 instruction classes in the Sparc
+//===----------------------------------------------------------------------===//
+
+class F3<dag ops, string asmstr, list<dag> pattern>
+ : InstSP<ops, asmstr, pattern> {
+ bits<5> rd;
+ bits<6> op3;
+ bits<5> rs1;
+ let op{1} = 1; // Op = 2 or 3
+ let Inst{29-25} = rd;
+ let Inst{24-19} = op3;
+ let Inst{18-14} = rs1;
+}
+
+// Specific F3 classes: SparcV8 manual, page 44
+//
+class F3_1<bits<2> opVal, bits<6> op3val, dag ops,
+ string asmstr, list<dag> pattern> : F3<ops, asmstr, pattern> {
+ bits<8> asi = 0; // asi not currently used
+ bits<5> rs2;
+
+ let op = opVal;
+ let op3 = op3val;
+
+ let Inst{13} = 0; // i field = 0
+ let Inst{12-5} = asi; // address space identifier
+ let Inst{4-0} = rs2;
+}
+
+class F3_2<bits<2> opVal, bits<6> op3val, dag ops,
+ string asmstr, list<dag> pattern> : F3<ops, asmstr, pattern> {
+ bits<13> simm13;
+
+ let op = opVal;
+ let op3 = op3val;
+
+ let Inst{13} = 1; // i field = 1
+ let Inst{12-0} = simm13;
+}
+
+// floating-point
+class F3_3<bits<2> opVal, bits<6> op3val, bits<9> opfval, dag ops,
+ string asmstr, list<dag> pattern> : F3<ops, asmstr, pattern> {
+ bits<5> rs2;
+
+ let op = opVal;
+ let op3 = op3val;
+
+ let Inst{13-5} = opfval; // fp opcode
+ let Inst{4-0} = rs2;
+}
+
+
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
new file mode 100644
index 0000000..a8c822a
--- /dev/null
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -0,0 +1,108 @@
+//===- SparcInstrInfo.cpp - Sparc Instruction Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sparc implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcInstrInfo.h"
+#include "Sparc.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "SparcGenInstrInfo.inc"
+using namespace llvm;
+
+SparcInstrInfo::SparcInstrInfo(SparcSubtarget &ST)
+ : TargetInstrInfo(SparcInsts, sizeof(SparcInsts)/sizeof(SparcInsts[0])),
+ RI(ST, *this) {
+}
+
+static bool isZeroImm(const MachineOperand &op) {
+ return op.isImmediate() && op.getImmedValue() == 0;
+}
+
+/// Return true if the instruction is a register to register move and
+/// leave the source and dest operands in the passed parameters.
+///
+bool SparcInstrInfo::isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg) const {
+ // We look for 3 kinds of patterns here:
+ // or with G0 or 0
+ // add with G0 or 0
+ // fmovs or FpMOVD (pseudo double move).
+ if (MI.getOpcode() == SP::ORrr || MI.getOpcode() == SP::ADDrr) {
+ if (MI.getOperand(1).getReg() == SP::G0) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(2).getReg();
+ return true;
+ } else if (MI.getOperand(2).getReg() == SP::G0) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ return true;
+ }
+ } else if ((MI.getOpcode() == SP::ORri || MI.getOpcode() == SP::ADDri) &&
+ isZeroImm(MI.getOperand(2)) && MI.getOperand(1).isRegister()) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ return true;
+ } else if (MI.getOpcode() == SP::FMOVS || MI.getOpcode() == SP::FpMOVD ||
+ MI.getOpcode() == SP::FMOVD) {
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ return true;
+ }
+ return false;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned SparcInstrInfo::isLoadFromStackSlot(MachineInstr *MI,
+ int &FrameIndex) const {
+ if (MI->getOpcode() == SP::LDri ||
+ MI->getOpcode() == SP::LDFri ||
+ MI->getOpcode() == SP::LDDFri) {
+ if (MI->getOperand(1).isFrameIndex() && MI->getOperand(2).isImmediate() &&
+ MI->getOperand(2).getImmedValue() == 0) {
+ FrameIndex = MI->getOperand(1).getFrameIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+ return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned SparcInstrInfo::isStoreToStackSlot(MachineInstr *MI,
+ int &FrameIndex) const {
+ if (MI->getOpcode() == SP::STri ||
+ MI->getOpcode() == SP::STFri ||
+ MI->getOpcode() == SP::STDFri) {
+ if (MI->getOperand(0).isFrameIndex() && MI->getOperand(1).isImmediate() &&
+ MI->getOperand(1).getImmedValue() == 0) {
+ FrameIndex = MI->getOperand(0).getFrameIndex();
+ return MI->getOperand(2).getReg();
+ }
+ }
+ return 0;
+}
+
+unsigned
+SparcInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const std::vector<MachineOperand> &Cond)const{
+ // Can only insert uncond branches so far.
+ assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!");
+ BuildMI(&MBB, get(SP::BA)).addMBB(TBB);
+ return 1;
+}
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
new file mode 100644
index 0000000..3fb50ff
--- /dev/null
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -0,0 +1,73 @@
+//===- SparcInstrInfo.h - Sparc Instruction Information ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sparc implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCINSTRUCTIONINFO_H
+#define SPARCINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "SparcRegisterInfo.h"
+
+namespace llvm {
+
+/// SPII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace SPII {
+ enum {
+ Pseudo = (1<<0),
+ Load = (1<<1),
+ Store = (1<<2),
+ DelaySlot = (1<<3)
+ };
+}
+
+class SparcInstrInfo : public TargetInstrInfo {
+ const SparcRegisterInfo RI;
+public:
+ SparcInstrInfo(SparcSubtarget &ST);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const MRegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// Return true if the instruction is a register to register move and
+ /// leave the source and dest operands in the passed parameters.
+ ///
+ virtual bool isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg) const;
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(MachineInstr *MI, int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(MachineInstr *MI, int &FrameIndex) const;
+
+
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const std::vector<MachineOperand> &Cond) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
new file mode 100644
index 0000000..434e8d7
--- /dev/null
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -0,0 +1,776 @@
+//===- SparcInstrInfo.td - Target Description for Sparc Target ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Sparc instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+include "SparcInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Feature predicates.
+//===----------------------------------------------------------------------===//
+
+// HasV9 - This predicate is true when the target processor supports V9
+// instructions. Note that the machine may be running in 32-bit mode.
+def HasV9 : Predicate<"Subtarget.isV9()">;
+
+// HasNoV9 - This predicate is true when the target doesn't have V9
+// instructions. Use of this is just a hack for the isel not having proper
+// costs for V8 instructions that are more expensive than their V9 ones.
+def HasNoV9 : Predicate<"!Subtarget.isV9()">;
+
+// HasVIS - This is true when the target processor has VIS extensions.
+def HasVIS : Predicate<"Subtarget.isVIS()">;
+
+// UseDeprecatedInsts - This predicate is true when the target processor is a
+// V8, or when it is V9 but the V8 deprecated instructions are efficient enough
+// to use when appropriate. In either of these cases, the instruction selector
+// will pick deprecated instructions.
+def UseDeprecatedInsts : Predicate<"Subtarget.useDeprecatedV8Instructions()">;
+
+//===----------------------------------------------------------------------===//
+// Instruction Pattern Stuff
+//===----------------------------------------------------------------------===//
+
+def simm11 : PatLeaf<(imm), [{
+ // simm11 predicate - True if the imm fits in a 11-bit sign extended field.
+ return (((int)N->getValue() << (32-11)) >> (32-11)) == (int)N->getValue();
+}]>;
+
+def simm13 : PatLeaf<(imm), [{
+ // simm13 predicate - True if the imm fits in a 13-bit sign extended field.
+ return (((int)N->getValue() << (32-13)) >> (32-13)) == (int)N->getValue();
+}]>;
+
+def LO10 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((unsigned)N->getValue() & 1023, MVT::i32);
+}]>;
+
+def HI22 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return CurDAG->getTargetConstant((unsigned)N->getValue() >> 10, MVT::i32);
+}]>;
+
+def SETHIimm : PatLeaf<(imm), [{
+ return (((unsigned)N->getValue() >> 10) << 10) == (unsigned)N->getValue();
+}], HI22>;
+
+// Addressing modes.
+def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
+def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>;
+
+// Address operands
+def MEMrr : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops IntRegs, IntRegs);
+}
+def MEMri : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops IntRegs, i32imm);
+}
+
+// Branch targets have OtherVT type.
+def brtarget : Operand<OtherVT>;
+def calltarget : Operand<i32>;
+
+// Operand for printing out a condition code.
+let PrintMethod = "printCCOperand" in
+ def CCOp : Operand<i32>;
+
+def SDTSPcmpfcc :
+SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisSameAs<0, 1>]>;
+def SDTSPbrcc :
+SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>;
+def SDTSPselectcc :
+SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>]>;
+def SDTSPFTOI :
+SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
+def SDTSPITOF :
+SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
+
+def SPcmpicc : SDNode<"SPISD::CMPICC", SDTIntBinOp, [SDNPOutFlag]>;
+def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutFlag]>;
+def SPbricc : SDNode<"SPISD::BRICC", SDTSPbrcc, [SDNPHasChain, SDNPInFlag]>;
+def SPbrfcc : SDNode<"SPISD::BRFCC", SDTSPbrcc, [SDNPHasChain, SDNPInFlag]>;
+
+def SPhi : SDNode<"SPISD::Hi", SDTIntUnaryOp>;
+def SPlo : SDNode<"SPISD::Lo", SDTIntUnaryOp>;
+
+def SPftoi : SDNode<"SPISD::FTOI", SDTSPFTOI>;
+def SPitof : SDNode<"SPISD::ITOF", SDTSPITOF>;
+
+def SPselecticc : SDNode<"SPISD::SELECT_ICC", SDTSPselectcc, [SDNPInFlag]>;
+def SPselectfcc : SDNode<"SPISD::SELECT_FCC", SDTSPselectcc, [SDNPInFlag]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def SDT_SPCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>;
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeq,
+ [SDNPHasChain, SDNPOutFlag]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeq,
+ [SDNPHasChain, SDNPOutFlag]>;
+
+def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+def call : SDNode<"SPISD::CALL", SDT_SPCall,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+def SDT_SPRetFlag : SDTypeProfile<0, 0, []>;
+def retflag : SDNode<"SPISD::RET_FLAG", SDT_SPRetFlag,
+ [SDNPHasChain, SDNPOptInFlag]>;
+
+//===----------------------------------------------------------------------===//
+// SPARC Flag Conditions
+//===----------------------------------------------------------------------===//
+
+// Note that these values must be kept in sync with the CCOp::CondCode enum
+// values.
+class ICC_VAL<int N> : PatLeaf<(i32 N)>;
+def ICC_NE : ICC_VAL< 9>; // Not Equal
+def ICC_E : ICC_VAL< 1>; // Equal
+def ICC_G : ICC_VAL<10>; // Greater
+def ICC_LE : ICC_VAL< 2>; // Less or Equal
+def ICC_GE : ICC_VAL<11>; // Greater or Equal
+def ICC_L : ICC_VAL< 3>; // Less
+def ICC_GU : ICC_VAL<12>; // Greater Unsigned
+def ICC_LEU : ICC_VAL< 4>; // Less or Equal Unsigned
+def ICC_CC : ICC_VAL<13>; // Carry Clear/Great or Equal Unsigned
+def ICC_CS : ICC_VAL< 5>; // Carry Set/Less Unsigned
+def ICC_POS : ICC_VAL<14>; // Positive
+def ICC_NEG : ICC_VAL< 6>; // Negative
+def ICC_VC : ICC_VAL<15>; // Overflow Clear
+def ICC_VS : ICC_VAL< 7>; // Overflow Set
+
+class FCC_VAL<int N> : PatLeaf<(i32 N)>;
+def FCC_U : FCC_VAL<23>; // Unordered
+def FCC_G : FCC_VAL<22>; // Greater
+def FCC_UG : FCC_VAL<21>; // Unordered or Greater
+def FCC_L : FCC_VAL<20>; // Less
+def FCC_UL : FCC_VAL<19>; // Unordered or Less
+def FCC_LG : FCC_VAL<18>; // Less or Greater
+def FCC_NE : FCC_VAL<17>; // Not Equal
+def FCC_E : FCC_VAL<25>; // Equal
+def FCC_UE : FCC_VAL<24>; // Unordered or Equal
+def FCC_GE : FCC_VAL<25>; // Greater or Equal
+def FCC_UGE : FCC_VAL<26>; // Unordered or Greater or Equal
+def FCC_LE : FCC_VAL<27>; // Less or Equal
+def FCC_ULE : FCC_VAL<28>; // Unordered or Less or Equal
+def FCC_O : FCC_VAL<29>; // Ordered
+
+//===----------------------------------------------------------------------===//
+// Instruction Class Templates
+//===----------------------------------------------------------------------===//
+
+/// F3_12 multiclass - Define a normal F3_1/F3_2 pattern in one shot.
+multiclass F3_12<string OpcStr, bits<6> Op3Val, SDNode OpNode> {
+ def rr : F3_1<2, Op3Val,
+ (ops IntRegs:$dst, IntRegs:$b, IntRegs:$c),
+ !strconcat(OpcStr, " $b, $c, $dst"),
+ [(set IntRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+ def ri : F3_2<2, Op3Val,
+ (ops IntRegs:$dst, IntRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $b, $c, $dst"),
+ [(set IntRegs:$dst, (OpNode IntRegs:$b, simm13:$c))]>;
+}
+
+/// F3_12np multiclass - Define a normal F3_1/F3_2 pattern in one shot, with no
+/// pattern.
+multiclass F3_12np<string OpcStr, bits<6> Op3Val> {
+ def rr : F3_1<2, Op3Val,
+ (ops IntRegs:$dst, IntRegs:$b, IntRegs:$c),
+ !strconcat(OpcStr, " $b, $c, $dst"), []>;
+ def ri : F3_2<2, Op3Val,
+ (ops IntRegs:$dst, IntRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $b, $c, $dst"), []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+// Pseudo instructions.
+class Pseudo<dag ops, string asmstr, list<dag> pattern>
+ : InstSP<ops, asmstr, pattern>;
+
+def ADJCALLSTACKDOWN : Pseudo<(ops i32imm:$amt),
+ "!ADJCALLSTACKDOWN $amt",
+ [(callseq_start imm:$amt)]>, Imp<[O6],[O6]>;
+def ADJCALLSTACKUP : Pseudo<(ops i32imm:$amt),
+ "!ADJCALLSTACKUP $amt",
+ [(callseq_end imm:$amt)]>, Imp<[O6],[O6]>;
+def IMPLICIT_DEF_Int : Pseudo<(ops IntRegs:$dst),
+ "!IMPLICIT_DEF $dst",
+ [(set IntRegs:$dst, (undef))]>;
+def IMPLICIT_DEF_FP : Pseudo<(ops FPRegs:$dst), "!IMPLICIT_DEF $dst",
+ [(set FPRegs:$dst, (undef))]>;
+def IMPLICIT_DEF_DFP : Pseudo<(ops DFPRegs:$dst), "!IMPLICIT_DEF $dst",
+ [(set DFPRegs:$dst, (undef))]>;
+
+// FpMOVD/FpNEGD/FpABSD - These are lowered to single-precision ops by the
+// fpmover pass.
+let Predicates = [HasNoV9] in { // Only emit these in V8 mode.
+ def FpMOVD : Pseudo<(ops DFPRegs:$dst, DFPRegs:$src),
+ "!FpMOVD $src, $dst", []>;
+ def FpNEGD : Pseudo<(ops DFPRegs:$dst, DFPRegs:$src),
+ "!FpNEGD $src, $dst",
+ [(set DFPRegs:$dst, (fneg DFPRegs:$src))]>;
+ def FpABSD : Pseudo<(ops DFPRegs:$dst, DFPRegs:$src),
+ "!FpABSD $src, $dst",
+ [(set DFPRegs:$dst, (fabs DFPRegs:$src))]>;
+}
+
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded by the
+// scheduler into a branch sequence. This has to handle all permutations of
+// selection between i32/f32/f64 on ICC and FCC.
+let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
+ def SELECT_CC_Int_ICC
+ : Pseudo<(ops IntRegs:$dst, IntRegs:$T, IntRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_Int_ICC PSEUDO!",
+ [(set IntRegs:$dst, (SPselecticc IntRegs:$T, IntRegs:$F,
+ imm:$Cond))]>;
+ def SELECT_CC_Int_FCC
+ : Pseudo<(ops IntRegs:$dst, IntRegs:$T, IntRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_Int_FCC PSEUDO!",
+ [(set IntRegs:$dst, (SPselectfcc IntRegs:$T, IntRegs:$F,
+ imm:$Cond))]>;
+ def SELECT_CC_FP_ICC
+ : Pseudo<(ops FPRegs:$dst, FPRegs:$T, FPRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_FP_ICC PSEUDO!",
+ [(set FPRegs:$dst, (SPselecticc FPRegs:$T, FPRegs:$F,
+ imm:$Cond))]>;
+ def SELECT_CC_FP_FCC
+ : Pseudo<(ops FPRegs:$dst, FPRegs:$T, FPRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_FP_FCC PSEUDO!",
+ [(set FPRegs:$dst, (SPselectfcc FPRegs:$T, FPRegs:$F,
+ imm:$Cond))]>;
+ def SELECT_CC_DFP_ICC
+ : Pseudo<(ops DFPRegs:$dst, DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_DFP_ICC PSEUDO!",
+ [(set DFPRegs:$dst, (SPselecticc DFPRegs:$T, DFPRegs:$F,
+ imm:$Cond))]>;
+ def SELECT_CC_DFP_FCC
+ : Pseudo<(ops DFPRegs:$dst, DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_DFP_FCC PSEUDO!",
+ [(set DFPRegs:$dst, (SPselectfcc DFPRegs:$T, DFPRegs:$F,
+ imm:$Cond))]>;
+}
+
+
+// Section A.3 - Synthetic Instructions, p. 85
+// special cases of JMPL:
+let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, noResults = 1 in {
+ let rd = O7.Num, rs1 = G0.Num, simm13 = 8 in
+ def RETL: F3_2<2, 0b111000, (ops), "retl", [(retflag)]>;
+}
+
+// Section B.1 - Load Integer Instructions, p. 90
+def LDSBrr : F3_1<3, 0b001001,
+ (ops IntRegs:$dst, MEMrr:$addr),
+ "ldsb [$addr], $dst",
+ [(set IntRegs:$dst, (sextloadi8 ADDRrr:$addr))]>;
+def LDSBri : F3_2<3, 0b001001,
+ (ops IntRegs:$dst, MEMri:$addr),
+ "ldsb [$addr], $dst",
+ [(set IntRegs:$dst, (sextloadi8 ADDRri:$addr))]>;
+def LDSHrr : F3_1<3, 0b001010,
+ (ops IntRegs:$dst, MEMrr:$addr),
+ "ldsh [$addr], $dst",
+ [(set IntRegs:$dst, (sextloadi16 ADDRrr:$addr))]>;
+def LDSHri : F3_2<3, 0b001010,
+ (ops IntRegs:$dst, MEMri:$addr),
+ "ldsh [$addr], $dst",
+ [(set IntRegs:$dst, (sextloadi16 ADDRri:$addr))]>;
+def LDUBrr : F3_1<3, 0b000001,
+ (ops IntRegs:$dst, MEMrr:$addr),
+ "ldub [$addr], $dst",
+ [(set IntRegs:$dst, (zextloadi8 ADDRrr:$addr))]>;
+def LDUBri : F3_2<3, 0b000001,
+ (ops IntRegs:$dst, MEMri:$addr),
+ "ldub [$addr], $dst",
+ [(set IntRegs:$dst, (zextloadi8 ADDRri:$addr))]>;
+def LDUHrr : F3_1<3, 0b000010,
+ (ops IntRegs:$dst, MEMrr:$addr),
+ "lduh [$addr], $dst",
+ [(set IntRegs:$dst, (zextloadi16 ADDRrr:$addr))]>;
+def LDUHri : F3_2<3, 0b000010,
+ (ops IntRegs:$dst, MEMri:$addr),
+ "lduh [$addr], $dst",
+ [(set IntRegs:$dst, (zextloadi16 ADDRri:$addr))]>;
+def LDrr : F3_1<3, 0b000000,
+ (ops IntRegs:$dst, MEMrr:$addr),
+ "ld [$addr], $dst",
+ [(set IntRegs:$dst, (load ADDRrr:$addr))]>;
+def LDri : F3_2<3, 0b000000,
+ (ops IntRegs:$dst, MEMri:$addr),
+ "ld [$addr], $dst",
+ [(set IntRegs:$dst, (load ADDRri:$addr))]>;
+
+// Section B.2 - Load Floating-point Instructions, p. 92
+def LDFrr : F3_1<3, 0b100000,
+ (ops FPRegs:$dst, MEMrr:$addr),
+ "ld [$addr], $dst",
+ [(set FPRegs:$dst, (load ADDRrr:$addr))]>;
+def LDFri : F3_2<3, 0b100000,
+ (ops FPRegs:$dst, MEMri:$addr),
+ "ld [$addr], $dst",
+ [(set FPRegs:$dst, (load ADDRri:$addr))]>;
+def LDDFrr : F3_1<3, 0b100011,
+ (ops DFPRegs:$dst, MEMrr:$addr),
+ "ldd [$addr], $dst",
+ [(set DFPRegs:$dst, (load ADDRrr:$addr))]>;
+def LDDFri : F3_2<3, 0b100011,
+ (ops DFPRegs:$dst, MEMri:$addr),
+ "ldd [$addr], $dst",
+ [(set DFPRegs:$dst, (load ADDRri:$addr))]>;
+
+// Section B.4 - Store Integer Instructions, p. 95
+def STBrr : F3_1<3, 0b000101,
+ (ops MEMrr:$addr, IntRegs:$src),
+ "stb $src, [$addr]",
+ [(truncstorei8 IntRegs:$src, ADDRrr:$addr)]>;
+def STBri : F3_2<3, 0b000101,
+ (ops MEMri:$addr, IntRegs:$src),
+ "stb $src, [$addr]",
+ [(truncstorei8 IntRegs:$src, ADDRri:$addr)]>;
+def STHrr : F3_1<3, 0b000110,
+ (ops MEMrr:$addr, IntRegs:$src),
+ "sth $src, [$addr]",
+ [(truncstorei16 IntRegs:$src, ADDRrr:$addr)]>;
+def STHri : F3_2<3, 0b000110,
+ (ops MEMri:$addr, IntRegs:$src),
+ "sth $src, [$addr]",
+ [(truncstorei16 IntRegs:$src, ADDRri:$addr)]>;
+def STrr : F3_1<3, 0b000100,
+ (ops MEMrr:$addr, IntRegs:$src),
+ "st $src, [$addr]",
+ [(store IntRegs:$src, ADDRrr:$addr)]>;
+def STri : F3_2<3, 0b000100,
+ (ops MEMri:$addr, IntRegs:$src),
+ "st $src, [$addr]",
+ [(store IntRegs:$src, ADDRri:$addr)]>;
+
+// Section B.5 - Store Floating-point Instructions, p. 97
+def STFrr : F3_1<3, 0b100100,
+ (ops MEMrr:$addr, FPRegs:$src),
+ "st $src, [$addr]",
+ [(store FPRegs:$src, ADDRrr:$addr)]>;
+def STFri : F3_2<3, 0b100100,
+ (ops MEMri:$addr, FPRegs:$src),
+ "st $src, [$addr]",
+ [(store FPRegs:$src, ADDRri:$addr)]>;
+def STDFrr : F3_1<3, 0b100111,
+ (ops MEMrr:$addr, DFPRegs:$src),
+ "std $src, [$addr]",
+ [(store DFPRegs:$src, ADDRrr:$addr)]>;
+def STDFri : F3_2<3, 0b100111,
+ (ops MEMri:$addr, DFPRegs:$src),
+ "std $src, [$addr]",
+ [(store DFPRegs:$src, ADDRri:$addr)]>;
+
+// Section B.9 - SETHI Instruction, p. 104
+def SETHIi: F2_1<0b100,
+ (ops IntRegs:$dst, i32imm:$src),
+ "sethi $src, $dst",
+ [(set IntRegs:$dst, SETHIimm:$src)]>;
+
+// Section B.10 - NOP Instruction, p. 105
+// (It's a special case of SETHI)
+let rd = 0, imm22 = 0 in
+ def NOP : F2_1<0b100, (ops), "nop", []>;
+
+// Section B.11 - Logical Instructions, p. 106
+defm AND : F3_12<"and", 0b000001, and>;
+
+def ANDNrr : F3_1<2, 0b000101,
+ (ops IntRegs:$dst, IntRegs:$b, IntRegs:$c),
+ "andn $b, $c, $dst",
+ [(set IntRegs:$dst, (and IntRegs:$b, (not IntRegs:$c)))]>;
+def ANDNri : F3_2<2, 0b000101,
+ (ops IntRegs:$dst, IntRegs:$b, i32imm:$c),
+ "andn $b, $c, $dst", []>;
+
+defm OR : F3_12<"or", 0b000010, or>;
+
+def ORNrr : F3_1<2, 0b000110,
+ (ops IntRegs:$dst, IntRegs:$b, IntRegs:$c),
+ "orn $b, $c, $dst",
+ [(set IntRegs:$dst, (or IntRegs:$b, (not IntRegs:$c)))]>;
+def ORNri : F3_2<2, 0b000110,
+ (ops IntRegs:$dst, IntRegs:$b, i32imm:$c),
+ "orn $b, $c, $dst", []>;
+defm XOR : F3_12<"xor", 0b000011, xor>;
+
+def XNORrr : F3_1<2, 0b000111,
+ (ops IntRegs:$dst, IntRegs:$b, IntRegs:$c),
+ "xnor $b, $c, $dst",
+ [(set IntRegs:$dst, (not (xor IntRegs:$b, IntRegs:$c)))]>;
+def XNORri : F3_2<2, 0b000111,
+ (ops IntRegs:$dst, IntRegs:$b, i32imm:$c),
+ "xnor $b, $c, $dst", []>;
+
+// Section B.12 - Shift Instructions, p. 107
+defm SLL : F3_12<"sll", 0b100101, shl>;
+defm SRL : F3_12<"srl", 0b100110, srl>;
+defm SRA : F3_12<"sra", 0b100111, sra>;
+
+// Section B.13 - Add Instructions, p. 108
+defm ADD : F3_12<"add", 0b000000, add>;
+
+// "LEA" forms of add (patterns to make tblgen happy)
+def LEA_ADDri : F3_2<2, 0b000000,
+ (ops IntRegs:$dst, MEMri:$addr),
+ "add ${addr:arith}, $dst",
+ [(set IntRegs:$dst, ADDRri:$addr)]>;
+
+defm ADDCC : F3_12<"addcc", 0b010000, addc>;
+defm ADDX : F3_12<"addx", 0b001000, adde>;
+
+// Section B.15 - Subtract Instructions, p. 110
+defm SUB : F3_12 <"sub" , 0b000100, sub>;
+defm SUBX : F3_12 <"subx" , 0b001100, sube>;
+defm SUBCC : F3_12 <"subcc", 0b010100, SPcmpicc>;
+
+def SUBXCCrr: F3_1<2, 0b011100,
+ (ops IntRegs:$dst, IntRegs:$b, IntRegs:$c),
+ "subxcc $b, $c, $dst", []>;
+
+// Section B.18 - Multiply Instructions, p. 113
+defm UMUL : F3_12np<"umul", 0b001010>;
+defm SMUL : F3_12 <"smul", 0b001011, mul>;
+
+
+// Section B.19 - Divide Instructions, p. 115
+defm UDIV : F3_12np<"udiv", 0b001110>;
+defm SDIV : F3_12np<"sdiv", 0b001111>;
+
+// Section B.20 - SAVE and RESTORE, p. 117
+defm SAVE : F3_12np<"save" , 0b111100>;
+defm RESTORE : F3_12np<"restore", 0b111101>;
+
+// Section B.21 - Branch on Integer Condition Codes Instructions, p. 119
+
+// conditional branch class:
+class BranchSP<bits<4> cc, dag ops, string asmstr, list<dag> pattern>
+ : F2_2<cc, 0b010, ops, asmstr, pattern> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let hasDelaySlot = 1;
+ let noResults = 1;
+}
+
+let isBarrier = 1 in
+ def BA : BranchSP<0b1000, (ops brtarget:$dst),
+ "ba $dst",
+ [(br bb:$dst)]>;
+
+// FIXME: the encoding for the JIT should look at the condition field.
+def BCOND : BranchSP<0, (ops brtarget:$dst, CCOp:$cc),
+ "b$cc $dst",
+ [(SPbricc bb:$dst, imm:$cc)]>;
+
+
+// Section B.22 - Branch on Floating-point Condition Codes Instructions, p. 121
+
+// floating-point conditional branch class:
+class FPBranchSP<bits<4> cc, dag ops, string asmstr, list<dag> pattern>
+ : F2_2<cc, 0b110, ops, asmstr, pattern> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let hasDelaySlot = 1;
+ let noResults = 1;
+}
+
+// FIXME: the encoding for the JIT should look at the condition field.
+def FBCOND : FPBranchSP<0, (ops brtarget:$dst, CCOp:$cc),
+ "fb$cc $dst",
+ [(SPbrfcc bb:$dst, imm:$cc)]>;
+
+
+// Section B.24 - Call and Link Instruction, p. 125
+// This is the only Format 1 instruction
+let Uses = [O0, O1, O2, O3, O4, O5],
+ hasDelaySlot = 1, isCall = 1, noResults = 1,
+ Defs = [O0, O1, O2, O3, O4, O5, O7, G1, G2, G3, G4, G5, G6, G7,
+ D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15] in {
+ def CALL : InstSP<(ops calltarget:$dst),
+ "call $dst", []> {
+ bits<30> disp;
+ let op = 1;
+ let Inst{29-0} = disp;
+ }
+
+ // indirect calls
+ def JMPLrr : F3_1<2, 0b111000,
+ (ops MEMrr:$ptr),
+ "call $ptr",
+ [(call ADDRrr:$ptr)]>;
+ def JMPLri : F3_2<2, 0b111000,
+ (ops MEMri:$ptr),
+ "call $ptr",
+ [(call ADDRri:$ptr)]>;
+}
+
+// Section B.28 - Read State Register Instructions
+def RDY : F3_1<2, 0b101000,
+ (ops IntRegs:$dst),
+ "rd %y, $dst", []>;
+
+// Section B.29 - Write State Register Instructions
+def WRYrr : F3_1<2, 0b110000,
+ (ops IntRegs:$b, IntRegs:$c),
+ "wr $b, $c, %y", []>;
+def WRYri : F3_2<2, 0b110000,
+ (ops IntRegs:$b, i32imm:$c),
+ "wr $b, $c, %y", []>;
+
+// Convert Integer to Floating-point Instructions, p. 141
+def FITOS : F3_3<2, 0b110100, 0b011000100,
+ (ops FPRegs:$dst, FPRegs:$src),
+ "fitos $src, $dst",
+ [(set FPRegs:$dst, (SPitof FPRegs:$src))]>;
+def FITOD : F3_3<2, 0b110100, 0b011001000,
+ (ops DFPRegs:$dst, FPRegs:$src),
+ "fitod $src, $dst",
+ [(set DFPRegs:$dst, (SPitof FPRegs:$src))]>;
+
+// Convert Floating-point to Integer Instructions, p. 142
+def FSTOI : F3_3<2, 0b110100, 0b011010001,
+ (ops FPRegs:$dst, FPRegs:$src),
+ "fstoi $src, $dst",
+ [(set FPRegs:$dst, (SPftoi FPRegs:$src))]>;
+def FDTOI : F3_3<2, 0b110100, 0b011010010,
+ (ops FPRegs:$dst, DFPRegs:$src),
+ "fdtoi $src, $dst",
+ [(set FPRegs:$dst, (SPftoi DFPRegs:$src))]>;
+
+// Convert between Floating-point Formats Instructions, p. 143
+def FSTOD : F3_3<2, 0b110100, 0b011001001,
+ (ops DFPRegs:$dst, FPRegs:$src),
+ "fstod $src, $dst",
+ [(set DFPRegs:$dst, (fextend FPRegs:$src))]>;
+def FDTOS : F3_3<2, 0b110100, 0b011000110,
+ (ops FPRegs:$dst, DFPRegs:$src),
+ "fdtos $src, $dst",
+ [(set FPRegs:$dst, (fround DFPRegs:$src))]>;
+
+// Floating-point Move Instructions, p. 144
+def FMOVS : F3_3<2, 0b110100, 0b000000001,
+ (ops FPRegs:$dst, FPRegs:$src),
+ "fmovs $src, $dst", []>;
+def FNEGS : F3_3<2, 0b110100, 0b000000101,
+ (ops FPRegs:$dst, FPRegs:$src),
+ "fnegs $src, $dst",
+ [(set FPRegs:$dst, (fneg FPRegs:$src))]>;
+def FABSS : F3_3<2, 0b110100, 0b000001001,
+ (ops FPRegs:$dst, FPRegs:$src),
+ "fabss $src, $dst",
+ [(set FPRegs:$dst, (fabs FPRegs:$src))]>;
+
+
+// Floating-point Square Root Instructions, p.145
+def FSQRTS : F3_3<2, 0b110100, 0b000101001,
+ (ops FPRegs:$dst, FPRegs:$src),
+ "fsqrts $src, $dst",
+ [(set FPRegs:$dst, (fsqrt FPRegs:$src))]>;
+def FSQRTD : F3_3<2, 0b110100, 0b000101010,
+ (ops DFPRegs:$dst, DFPRegs:$src),
+ "fsqrtd $src, $dst",
+ [(set DFPRegs:$dst, (fsqrt DFPRegs:$src))]>;
+
+
+
+// Floating-point Add and Subtract Instructions, p. 146
+def FADDS : F3_3<2, 0b110100, 0b001000001,
+ (ops FPRegs:$dst, FPRegs:$src1, FPRegs:$src2),
+ "fadds $src1, $src2, $dst",
+ [(set FPRegs:$dst, (fadd FPRegs:$src1, FPRegs:$src2))]>;
+def FADDD : F3_3<2, 0b110100, 0b001000010,
+ (ops DFPRegs:$dst, DFPRegs:$src1, DFPRegs:$src2),
+ "faddd $src1, $src2, $dst",
+ [(set DFPRegs:$dst, (fadd DFPRegs:$src1, DFPRegs:$src2))]>;
+def FSUBS : F3_3<2, 0b110100, 0b001000101,
+ (ops FPRegs:$dst, FPRegs:$src1, FPRegs:$src2),
+ "fsubs $src1, $src2, $dst",
+ [(set FPRegs:$dst, (fsub FPRegs:$src1, FPRegs:$src2))]>;
+def FSUBD : F3_3<2, 0b110100, 0b001000110,
+ (ops DFPRegs:$dst, DFPRegs:$src1, DFPRegs:$src2),
+ "fsubd $src1, $src2, $dst",
+ [(set DFPRegs:$dst, (fsub DFPRegs:$src1, DFPRegs:$src2))]>;
+
+// Floating-point Multiply and Divide Instructions, p. 147
+def FMULS : F3_3<2, 0b110100, 0b001001001,
+ (ops FPRegs:$dst, FPRegs:$src1, FPRegs:$src2),
+ "fmuls $src1, $src2, $dst",
+ [(set FPRegs:$dst, (fmul FPRegs:$src1, FPRegs:$src2))]>;
+def FMULD : F3_3<2, 0b110100, 0b001001010,
+ (ops DFPRegs:$dst, DFPRegs:$src1, DFPRegs:$src2),
+ "fmuld $src1, $src2, $dst",
+ [(set DFPRegs:$dst, (fmul DFPRegs:$src1, DFPRegs:$src2))]>;
+def FSMULD : F3_3<2, 0b110100, 0b001101001,
+ (ops DFPRegs:$dst, FPRegs:$src1, FPRegs:$src2),
+ "fsmuld $src1, $src2, $dst",
+ [(set DFPRegs:$dst, (fmul (fextend FPRegs:$src1),
+ (fextend FPRegs:$src2)))]>;
+def FDIVS : F3_3<2, 0b110100, 0b001001101,
+ (ops FPRegs:$dst, FPRegs:$src1, FPRegs:$src2),
+ "fdivs $src1, $src2, $dst",
+ [(set FPRegs:$dst, (fdiv FPRegs:$src1, FPRegs:$src2))]>;
+def FDIVD : F3_3<2, 0b110100, 0b001001110,
+ (ops DFPRegs:$dst, DFPRegs:$src1, DFPRegs:$src2),
+ "fdivd $src1, $src2, $dst",
+ [(set DFPRegs:$dst, (fdiv DFPRegs:$src1, DFPRegs:$src2))]>;
+
+// Floating-point Compare Instructions, p. 148
+// Note: the 2nd template arg is different for these guys.
+// Note 2: the result of a FCMP is not available until the 2nd cycle
+// after the instr is retired, but there is no interlock. This behavior
+// is modelled with a forced noop after the instruction.
+def FCMPS : F3_3<2, 0b110101, 0b001010001,
+ (ops FPRegs:$src1, FPRegs:$src2),
+ "fcmps $src1, $src2\n\tnop",
+ [(SPcmpfcc FPRegs:$src1, FPRegs:$src2)]>;
+def FCMPD : F3_3<2, 0b110101, 0b001010010,
+ (ops DFPRegs:$src1, DFPRegs:$src2),
+ "fcmpd $src1, $src2\n\tnop",
+ [(SPcmpfcc DFPRegs:$src1, DFPRegs:$src2)]>;
+
+
+//===----------------------------------------------------------------------===//
+// V9 Instructions
+//===----------------------------------------------------------------------===//
+
+// V9 Conditional Moves.
+let Predicates = [HasV9], isTwoAddress = 1 in {
+ // Move Integer Register on Condition (MOVcc) p. 194 of the V9 manual.
+ // FIXME: Add instruction encodings for the JIT some day.
+ def MOVICCrr
+ : Pseudo<(ops IntRegs:$dst, IntRegs:$T, IntRegs:$F, CCOp:$cc),
+ "mov$cc %icc, $F, $dst",
+ [(set IntRegs:$dst,
+ (SPselecticc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
+ def MOVICCri
+ : Pseudo<(ops IntRegs:$dst, IntRegs:$T, i32imm:$F, CCOp:$cc),
+ "mov$cc %icc, $F, $dst",
+ [(set IntRegs:$dst,
+ (SPselecticc simm11:$F, IntRegs:$T, imm:$cc))]>;
+
+ def MOVFCCrr
+ : Pseudo<(ops IntRegs:$dst, IntRegs:$T, IntRegs:$F, CCOp:$cc),
+ "mov$cc %fcc0, $F, $dst",
+ [(set IntRegs:$dst,
+ (SPselectfcc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
+ def MOVFCCri
+ : Pseudo<(ops IntRegs:$dst, IntRegs:$T, i32imm:$F, CCOp:$cc),
+ "mov$cc %fcc0, $F, $dst",
+ [(set IntRegs:$dst,
+ (SPselectfcc simm11:$F, IntRegs:$T, imm:$cc))]>;
+
+ def FMOVS_ICC
+ : Pseudo<(ops FPRegs:$dst, FPRegs:$T, FPRegs:$F, CCOp:$cc),
+ "fmovs$cc %icc, $F, $dst",
+ [(set FPRegs:$dst,
+ (SPselecticc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
+ def FMOVD_ICC
+ : Pseudo<(ops DFPRegs:$dst, DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
+ "fmovd$cc %icc, $F, $dst",
+ [(set DFPRegs:$dst,
+ (SPselecticc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
+ def FMOVS_FCC
+ : Pseudo<(ops FPRegs:$dst, FPRegs:$T, FPRegs:$F, CCOp:$cc),
+ "fmovs$cc %fcc0, $F, $dst",
+ [(set FPRegs:$dst,
+ (SPselectfcc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
+ def FMOVD_FCC
+ : Pseudo<(ops DFPRegs:$dst, DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
+ "fmovd$cc %fcc0, $F, $dst",
+ [(set DFPRegs:$dst,
+ (SPselectfcc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
+
+}
+
+// Floating-Point Move Instructions, p. 164 of the V9 manual.
+let Predicates = [HasV9] in {
+ def FMOVD : F3_3<2, 0b110100, 0b000000010,
+ (ops DFPRegs:$dst, DFPRegs:$src),
+ "fmovd $src, $dst", []>;
+ def FNEGD : F3_3<2, 0b110100, 0b000000110,
+ (ops DFPRegs:$dst, DFPRegs:$src),
+ "fnegd $src, $dst",
+ [(set DFPRegs:$dst, (fneg DFPRegs:$src))]>;
+ def FABSD : F3_3<2, 0b110100, 0b000001010,
+ (ops DFPRegs:$dst, DFPRegs:$src),
+ "fabsd $src, $dst",
+ [(set DFPRegs:$dst, (fabs DFPRegs:$src))]>;
+}
+
+// POPCrr - This does a ctpop of a 64-bit register. As such, we have to clear
+// the top 32-bits before using it. To do this clearing, we use a SLLri X,0.
+def POPCrr : F3_1<2, 0b101110,
+ (ops IntRegs:$dst, IntRegs:$src),
+ "popc $src, $dst", []>, Requires<[HasV9]>;
+def : Pat<(ctpop IntRegs:$src),
+ (POPCrr (SLLri IntRegs:$src, 0))>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// Small immediates.
+def : Pat<(i32 simm13:$val),
+ (ORri G0, imm:$val)>;
+// Arbitrary immediates.
+def : Pat<(i32 imm:$val),
+ (ORri (SETHIi (HI22 imm:$val)), (LO10 imm:$val))>;
+
+// subc
+def : Pat<(subc IntRegs:$b, IntRegs:$c),
+ (SUBCCrr IntRegs:$b, IntRegs:$c)>;
+def : Pat<(subc IntRegs:$b, simm13:$val),
+ (SUBCCri IntRegs:$b, imm:$val)>;
+
+// Global addresses, constant pool entries
+def : Pat<(SPhi tglobaladdr:$in), (SETHIi tglobaladdr:$in)>;
+def : Pat<(SPlo tglobaladdr:$in), (ORri G0, tglobaladdr:$in)>;
+def : Pat<(SPhi tconstpool:$in), (SETHIi tconstpool:$in)>;
+def : Pat<(SPlo tconstpool:$in), (ORri G0, tconstpool:$in)>;
+
+// Add reg, lo. This is used when taking the addr of a global/constpool entry.
+def : Pat<(add IntRegs:$r, (SPlo tglobaladdr:$in)),
+ (ADDri IntRegs:$r, tglobaladdr:$in)>;
+def : Pat<(add IntRegs:$r, (SPlo tconstpool:$in)),
+ (ADDri IntRegs:$r, tconstpool:$in)>;
+
+// Calls:
+def : Pat<(call tglobaladdr:$dst),
+ (CALL tglobaladdr:$dst)>;
+def : Pat<(call texternalsym:$dst),
+ (CALL texternalsym:$dst)>;
+
+def : Pat<(ret), (RETL)>;
+
+// Map integer extload's to zextloads.
+def : Pat<(i32 (extloadi1 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>;
+def : Pat<(i32 (extloadi1 ADDRri:$src)), (LDUBri ADDRri:$src)>;
+def : Pat<(i32 (extloadi8 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>;
+def : Pat<(i32 (extloadi8 ADDRri:$src)), (LDUBri ADDRri:$src)>;
+def : Pat<(i32 (extloadi16 ADDRrr:$src)), (LDUHrr ADDRrr:$src)>;
+def : Pat<(i32 (extloadi16 ADDRri:$src)), (LDUHri ADDRri:$src)>;
+
+// zextload bool -> zextload byte
+def : Pat<(i32 (zextloadi1 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>;
+def : Pat<(i32 (zextloadi1 ADDRri:$src)), (LDUBri ADDRri:$src)>;
+
+// truncstore bool -> truncstore byte.
+def : Pat<(truncstorei1 IntRegs:$src, ADDRrr:$addr),
+ (STBrr ADDRrr:$addr, IntRegs:$src)>;
+def : Pat<(truncstorei1 IntRegs:$src, ADDRri:$addr),
+ (STBri ADDRri:$addr, IntRegs:$src)>;
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
new file mode 100644
index 0000000..1981b4f
--- /dev/null
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -0,0 +1,276 @@
+//===- SparcRegisterInfo.cpp - SPARC Register Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SPARC implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sparc.h"
+#include "SparcRegisterInfo.h"
+#include "SparcSubtarget.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st,
+ const TargetInstrInfo &tii)
+ : SparcGenRegisterInfo(SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP),
+ Subtarget(st), TII(tii) {
+}
+
+void SparcRegisterInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, int FI,
+ const TargetRegisterClass *RC) const {
+ // On the order of operands here: think "[FrameIdx + 0] = SrcReg".
+ if (RC == SP::IntRegsRegisterClass)
+ BuildMI(MBB, I, TII.get(SP::STri)).addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, false, false, true);
+ else if (RC == SP::FPRegsRegisterClass)
+ BuildMI(MBB, I, TII.get(SP::STFri)).addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, false, false, true);
+ else if (RC == SP::DFPRegsRegisterClass)
+ BuildMI(MBB, I, TII.get(SP::STDFri)).addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, false, false, true);
+ else
+ assert(0 && "Can't store this register to stack slot");
+}
+
+void SparcRegisterInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC) const {
+ if (RC == SP::IntRegsRegisterClass)
+ BuildMI(MBB, I, TII.get(SP::LDri), DestReg).addFrameIndex(FI).addImm(0);
+ else if (RC == SP::FPRegsRegisterClass)
+ BuildMI(MBB, I, TII.get(SP::LDFri), DestReg).addFrameIndex(FI).addImm(0);
+ else if (RC == SP::DFPRegsRegisterClass)
+ BuildMI(MBB, I, TII.get(SP::LDDFri), DestReg).addFrameIndex(FI).addImm(0);
+ else
+ assert(0 && "Can't load this register from stack slot");
+}
+
+void SparcRegisterInfo::copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *RC) const {
+ if (RC == SP::IntRegsRegisterClass)
+ BuildMI(MBB, I, TII.get(SP::ORrr), DestReg).addReg(SP::G0).addReg(SrcReg);
+ else if (RC == SP::FPRegsRegisterClass)
+ BuildMI(MBB, I, TII.get(SP::FMOVS), DestReg).addReg(SrcReg);
+ else if (RC == SP::DFPRegsRegisterClass)
+ BuildMI(MBB, I, TII.get(Subtarget.isV9() ? SP::FMOVD : SP::FpMOVD),DestReg)
+ .addReg(SrcReg);
+ else
+ assert (0 && "Can't copy this register");
+}
+
+void SparcRegisterInfo::reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg,
+ const MachineInstr *Orig) const {
+ MachineInstr *MI = Orig->clone();
+ MI->getOperand(0).setReg(DestReg);
+ MBB.insert(I, MI);
+}
+
+MachineInstr *SparcRegisterInfo::foldMemoryOperand(MachineInstr* MI,
+ unsigned OpNum,
+ int FI) const {
+ bool isFloat = false;
+ MachineInstr *NewMI = NULL;
+ switch (MI->getOpcode()) {
+ case SP::ORrr:
+ if (MI->getOperand(1).isRegister() && MI->getOperand(1).getReg() == SP::G0&&
+ MI->getOperand(0).isRegister() && MI->getOperand(2).isRegister()) {
+ if (OpNum == 0) // COPY -> STORE
+ NewMI = BuildMI(TII.get(SP::STri)).addFrameIndex(FI).addImm(0)
+ .addReg(MI->getOperand(2).getReg());
+ else // COPY -> LOAD
+ NewMI = BuildMI(TII.get(SP::LDri), MI->getOperand(0).getReg())
+ .addFrameIndex(FI).addImm(0);
+ }
+ break;
+ case SP::FMOVS:
+ isFloat = true;
+ // FALLTHROUGH
+ case SP::FMOVD:
+ if (OpNum == 0) // COPY -> STORE
+ NewMI = BuildMI(TII.get(isFloat ? SP::STFri : SP::STDFri))
+ .addFrameIndex(FI).addImm(0).addReg(MI->getOperand(1).getReg());
+ else // COPY -> LOAD
+ NewMI = BuildMI(TII.get(isFloat ? SP::LDFri : SP::LDDFri),
+ MI->getOperand(0).getReg()).addFrameIndex(FI).addImm(0);
+ break;
+ }
+
+ if (NewMI)
+ NewMI->copyKillDeadInfo(MI);
+ return NewMI;
+}
+
+const unsigned* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+ const {
+ static const unsigned CalleeSavedRegs[] = { 0 };
+ return CalleeSavedRegs;
+}
+
+BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(SP::G2);
+ Reserved.set(SP::G3);
+ Reserved.set(SP::G4);
+ Reserved.set(SP::O6);
+ Reserved.set(SP::I6);
+ Reserved.set(SP::I7);
+ Reserved.set(SP::G0);
+ Reserved.set(SP::G5);
+ Reserved.set(SP::G6);
+ Reserved.set(SP::G7);
+ return Reserved;
+}
+
+
+const TargetRegisterClass* const*
+SparcRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 };
+ return CalleeSavedRegClasses;
+}
+
+bool SparcRegisterInfo::hasFP(const MachineFunction &MF) const {
+ return false;
+}
+
+void SparcRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ MachineInstr &MI = *I;
+ int Size = MI.getOperand(0).getImmedValue();
+ if (MI.getOpcode() == SP::ADJCALLSTACKDOWN)
+ Size = -Size;
+ if (Size)
+ BuildMI(MBB, I, TII.get(SP::ADDri), SP::O6).addReg(SP::O6).addImm(Size);
+ MBB.erase(I);
+}
+
+void SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ while (!MI.getOperand(i).isFrameIndex()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(i).getFrameIndex();
+
+ // Addressable stack objects are accessed using neg. offsets from %fp
+ MachineFunction &MF = *MI.getParent()->getParent();
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+ MI.getOperand(i+1).getImmedValue();
+
+ // Replace frame index with a frame pointer reference.
+ if (Offset >= -4096 && Offset <= 4095) {
+ // If the offset is small enough to fit in the immediate field, directly
+ // encode it.
+ MI.getOperand(i).ChangeToRegister(SP::I6, false);
+ MI.getOperand(i+1).ChangeToImmediate(Offset);
+ } else {
+ // Otherwise, emit a G1 = SETHI %hi(offset). FIXME: it would be better to
+ // scavenge a register here instead of reserving G1 all of the time.
+ unsigned OffHi = (unsigned)Offset >> 10U;
+ BuildMI(*MI.getParent(), II, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
+ // Emit G1 = G1 + I6
+ BuildMI(*MI.getParent(), II, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1)
+ .addReg(SP::I6);
+ // Insert: G1+%lo(offset) into the user.
+ MI.getOperand(i).ChangeToRegister(SP::G1, false);
+ MI.getOperand(i+1).ChangeToImmediate(Offset & ((1 << 10)-1));
+ }
+}
+
+void SparcRegisterInfo::
+processFunctionBeforeFrameFinalized(MachineFunction &MF) const {}
+
+void SparcRegisterInfo::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Get the number of bytes to allocate from the FrameInfo
+ int NumBytes = (int) MFI->getStackSize();
+
+ // Emit the correct save instruction based on the number of bytes in
+ // the frame. Minimum stack frame size according to V8 ABI is:
+ // 16 words for register window spill
+ // 1 word for address of returned aggregate-value
+ // + 6 words for passing parameters on the stack
+ // ----------
+ // 23 words * 4 bytes per word = 92 bytes
+ NumBytes += 92;
+ // Round up to next doubleword boundary -- a double-word boundary
+ // is required by the ABI.
+ NumBytes = (NumBytes + 7) & ~7;
+ NumBytes = -NumBytes;
+
+ if (NumBytes >= -4096) {
+ BuildMI(MBB, MBB.begin(), TII.get(SP::SAVEri),
+ SP::O6).addImm(NumBytes).addReg(SP::O6);
+ } else {
+ MachineBasicBlock::iterator InsertPt = MBB.begin();
+ // Emit this the hard way. This clobbers G1 which we always know is
+ // available here.
+ unsigned OffHi = (unsigned)NumBytes >> 10U;
+ BuildMI(MBB, InsertPt, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
+ // Emit G1 = G1 + I6
+ BuildMI(MBB, InsertPt, TII.get(SP::ORri), SP::G1)
+ .addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1));
+ BuildMI(MBB, InsertPt, TII.get(SP::SAVErr), SP::O6)
+ .addReg(SP::O6).addReg(SP::G1);
+ }
+}
+
+void SparcRegisterInfo::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ assert(MBBI->getOpcode() == SP::RETL &&
+ "Can only put epilog before 'retl' instruction!");
+ BuildMI(MBB, MBBI, TII.get(SP::RESTORErr), SP::G0).addReg(SP::G0)
+ .addReg(SP::G0);
+}
+
+unsigned SparcRegisterInfo::getRARegister() const {
+ assert(0 && "What is the return address register");
+ return 0;
+}
+
+unsigned SparcRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+ assert(0 && "What is the frame register");
+ return SP::G1;
+}
+
+unsigned SparcRegisterInfo::getEHExceptionRegister() const {
+ assert(0 && "What is the exception register");
+ return 0;
+}
+
+unsigned SparcRegisterInfo::getEHHandlerRegister() const {
+ assert(0 && "What is the exception handler register");
+ return 0;
+}
+
+#include "SparcGenRegisterInfo.inc"
+
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
new file mode 100644
index 0000000..451964b
--- /dev/null
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -0,0 +1,86 @@
+//===- SparcRegisterInfo.h - Sparc Register Information Impl ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sparc implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCREGISTERINFO_H
+#define SPARCREGISTERINFO_H
+
+#include "llvm/Target/MRegisterInfo.h"
+#include "SparcGenRegisterInfo.h.inc"
+
+namespace llvm {
+
+class SparcSubtarget;
+class TargetInstrInfo;
+class Type;
+
+struct SparcRegisterInfo : public SparcGenRegisterInfo {
+ SparcSubtarget &Subtarget;
+ const TargetInstrInfo &TII;
+
+ SparcRegisterInfo(SparcSubtarget &st, const TargetInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ void copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *RC) const;
+
+ void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ unsigned DestReg, const MachineInstr *Orig) const;
+
+ virtual MachineInstr* foldMemoryOperand(MachineInstr* MI,
+ unsigned OpNum,
+ int FrameIndex) const;
+
+ const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ const TargetRegisterClass* const* getCalleeSavedRegClasses(
+ const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(MachineFunction &MF) const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td
new file mode 100644
index 0000000..8e2f444
--- /dev/null
+++ b/lib/Target/Sparc/SparcRegisterInfo.td
@@ -0,0 +1,158 @@
+//===- SparcRegisterInfo.td - Sparc Register defs ----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the Sparc register file
+//===----------------------------------------------------------------------===//
+
+class SparcReg<string n> : Register<n> {
+ field bits<5> Num;
+ let Namespace = "SP";
+}
+
+// Registers are identified with 5-bit ID numbers.
+// Ri - 32-bit integer registers
+class Ri<bits<5> num, string n> : SparcReg<n> {
+ let Num = num;
+}
+// Rf - 32-bit floating-point registers
+class Rf<bits<5> num, string n> : SparcReg<n> {
+ let Num = num;
+}
+// Rd - Slots in the FP register file for 64-bit floating-point values.
+class Rd<bits<5> num, string n, list<Register> subregs> : SparcReg<n> {
+ let Num = num;
+ let SubRegs = subregs;
+}
+
+// Integer registers
+def G0 : Ri< 0, "G0">, DwarfRegNum<0>;
+def G1 : Ri< 1, "G1">, DwarfRegNum<1>;
+def G2 : Ri< 2, "G2">, DwarfRegNum<2>;
+def G3 : Ri< 3, "G3">, DwarfRegNum<3>;
+def G4 : Ri< 4, "G4">, DwarfRegNum<4>;
+def G5 : Ri< 5, "G5">, DwarfRegNum<5>;
+def G6 : Ri< 6, "G6">, DwarfRegNum<6>;
+def G7 : Ri< 7, "G7">, DwarfRegNum<7>;
+def O0 : Ri< 8, "O0">, DwarfRegNum<8>;
+def O1 : Ri< 9, "O1">, DwarfRegNum<9>;
+def O2 : Ri<10, "O2">, DwarfRegNum<10>;
+def O3 : Ri<11, "O3">, DwarfRegNum<11>;
+def O4 : Ri<12, "O4">, DwarfRegNum<12>;
+def O5 : Ri<13, "O5">, DwarfRegNum<13>;
+def O6 : Ri<14, "O6">, DwarfRegNum<14>;
+def O7 : Ri<15, "O7">, DwarfRegNum<15>;
+def L0 : Ri<16, "L0">, DwarfRegNum<16>;
+def L1 : Ri<17, "L1">, DwarfRegNum<17>;
+def L2 : Ri<18, "L2">, DwarfRegNum<18>;
+def L3 : Ri<19, "L3">, DwarfRegNum<19>;
+def L4 : Ri<20, "L4">, DwarfRegNum<20>;
+def L5 : Ri<21, "L5">, DwarfRegNum<21>;
+def L6 : Ri<22, "L6">, DwarfRegNum<22>;
+def L7 : Ri<23, "L7">, DwarfRegNum<23>;
+def I0 : Ri<24, "I0">, DwarfRegNum<24>;
+def I1 : Ri<25, "I1">, DwarfRegNum<25>;
+def I2 : Ri<26, "I2">, DwarfRegNum<26>;
+def I3 : Ri<27, "I3">, DwarfRegNum<27>;
+def I4 : Ri<28, "I4">, DwarfRegNum<28>;
+def I5 : Ri<29, "I5">, DwarfRegNum<29>;
+def I6 : Ri<30, "I6">, DwarfRegNum<30>;
+def I7 : Ri<31, "I7">, DwarfRegNum<31>;
+
+// Floating-point registers
+def F0 : Rf< 0, "F0">, DwarfRegNum<32>;
+def F1 : Rf< 1, "F1">, DwarfRegNum<33>;
+def F2 : Rf< 2, "F2">, DwarfRegNum<34>;
+def F3 : Rf< 3, "F3">, DwarfRegNum<35>;
+def F4 : Rf< 4, "F4">, DwarfRegNum<36>;
+def F5 : Rf< 5, "F5">, DwarfRegNum<37>;
+def F6 : Rf< 6, "F6">, DwarfRegNum<38>;
+def F7 : Rf< 7, "F7">, DwarfRegNum<39>;
+def F8 : Rf< 8, "F8">, DwarfRegNum<40>;
+def F9 : Rf< 9, "F9">, DwarfRegNum<41>;
+def F10 : Rf<10, "F10">, DwarfRegNum<42>;
+def F11 : Rf<11, "F11">, DwarfRegNum<43>;
+def F12 : Rf<12, "F12">, DwarfRegNum<44>;
+def F13 : Rf<13, "F13">, DwarfRegNum<45>;
+def F14 : Rf<14, "F14">, DwarfRegNum<46>;
+def F15 : Rf<15, "F15">, DwarfRegNum<47>;
+def F16 : Rf<16, "F16">, DwarfRegNum<48>;
+def F17 : Rf<17, "F17">, DwarfRegNum<49>;
+def F18 : Rf<18, "F18">, DwarfRegNum<50>;
+def F19 : Rf<19, "F19">, DwarfRegNum<51>;
+def F20 : Rf<20, "F20">, DwarfRegNum<52>;
+def F21 : Rf<21, "F21">, DwarfRegNum<53>;
+def F22 : Rf<22, "F22">, DwarfRegNum<54>;
+def F23 : Rf<23, "F23">, DwarfRegNum<55>;
+def F24 : Rf<24, "F24">, DwarfRegNum<56>;
+def F25 : Rf<25, "F25">, DwarfRegNum<57>;
+def F26 : Rf<26, "F26">, DwarfRegNum<58>;
+def F27 : Rf<27, "F27">, DwarfRegNum<59>;
+def F28 : Rf<28, "F28">, DwarfRegNum<60>;
+def F29 : Rf<29, "F29">, DwarfRegNum<61>;
+def F30 : Rf<30, "F30">, DwarfRegNum<62>;
+def F31 : Rf<31, "F31">, DwarfRegNum<63>;
+
+// Aliases of the F* registers used to hold 64-bit fp values (doubles)
+def D0 : Rd< 0, "F0", [F0, F1]>, DwarfRegNum<32>;
+def D1 : Rd< 2, "F2", [F2, F3]>, DwarfRegNum<34>;
+def D2 : Rd< 4, "F4", [F4, F5]>, DwarfRegNum<36>;
+def D3 : Rd< 6, "F6", [F6, F7]>, DwarfRegNum<38>;
+def D4 : Rd< 8, "F8", [F8, F9]>, DwarfRegNum<40>;
+def D5 : Rd<10, "F10", [F10, F11]>, DwarfRegNum<42>;
+def D6 : Rd<12, "F12", [F12, F13]>, DwarfRegNum<44>;
+def D7 : Rd<14, "F14", [F14, F15]>, DwarfRegNum<46>;
+def D8 : Rd<16, "F16", [F16, F17]>, DwarfRegNum<48>;
+def D9 : Rd<18, "F18", [F18, F19]>, DwarfRegNum<50>;
+def D10 : Rd<20, "F20", [F20, F21]>, DwarfRegNum<52>;
+def D11 : Rd<22, "F22", [F22, F23]>, DwarfRegNum<54>;
+def D12 : Rd<24, "F24", [F24, F25]>, DwarfRegNum<56>;
+def D13 : Rd<26, "F26", [F26, F27]>, DwarfRegNum<58>;
+def D14 : Rd<28, "F28", [F28, F29]>, DwarfRegNum<60>;
+def D15 : Rd<30, "F30", [F30, F31]>, DwarfRegNum<62>;
+
+// Register classes.
+//
+// FIXME: the register order should be defined in terms of the preferred
+// allocation order...
+//
+def IntRegs : RegisterClass<"SP", [i32], 32, [L0, L1, L2, L3, L4, L5, L6, L7,
+ I0, I1, I2, I3, I4, I5,
+ O0, O1, O2, O3, O4, O5, O7,
+
+ // FIXME: G1 reserved for now for large imm generation by frame code.
+ G1,
+ // Non-allocatable regs:
+ G2, G3, G4, // FIXME: OK for use only in
+ // applications, not libraries.
+ O6, // stack ptr
+ I6, // frame ptr
+ I7, // return address
+ G0, // constant zero
+ G5, G6, G7 // reserved for kernel
+ ]> {
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ IntRegsClass::iterator
+ IntRegsClass::allocation_order_end(const MachineFunction &MF) const {
+ // FIXME: These special regs should be taken out of the regclass!
+ return end()-10 // Don't allocate special registers
+ -1; // FIXME: G1 reserved for large imm generation by frame code.
+ }
+ }];
+}
+
+def FPRegs : RegisterClass<"SP", [f32], 32, [F0, F1, F2, F3, F4, F5, F6, F7, F8,
+ F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22,
+ F23, F24, F25, F26, F27, F28, F29, F30, F31]>;
+
+def DFPRegs : RegisterClass<"SP", [f64], 64, [D0, D1, D2, D3, D4, D5, D6, D7,
+ D8, D9, D10, D11, D12, D13, D14, D15]>;
diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp
new file mode 100644
index 0000000..9940fcf
--- /dev/null
+++ b/lib/Target/Sparc/SparcSubtarget.cpp
@@ -0,0 +1,43 @@
+//===- SparcSubtarget.cpp - SPARC Subtarget Information -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SPARC specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcSubtarget.h"
+#include "SparcGenSubtarget.inc"
+using namespace llvm;
+
+// FIXME: temporary.
+#include "llvm/Support/CommandLine.h"
+namespace {
+ cl::opt<bool> EnableV9("enable-sparc-v9-insts", cl::Hidden,
+ cl::desc("Enable V9 instructions in the V8 target"));
+}
+
+SparcSubtarget::SparcSubtarget(const Module &M, const std::string &FS) {
+ // Set the default features.
+ IsV9 = false;
+ V8DeprecatedInsts = false;
+ IsVIS = false;
+
+ // Determine default and user specified characteristics
+ std::string CPU = "generic";
+
+ // FIXME: autodetect host here!
+ CPU = "v9"; // What is a good way to detect V9?
+
+ // Parse features string.
+ ParseSubtargetFeatures(FS, CPU);
+
+ // Unless explicitly enabled, disable the V9 instructions.
+ if (!EnableV9)
+ IsV9 = false;
+}
diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h
new file mode 100644
index 0000000..59da95a
--- /dev/null
+++ b/lib/Target/Sparc/SparcSubtarget.h
@@ -0,0 +1,42 @@
+//=====-- SparcSubtarget.h - Define Subtarget for the SPARC ----*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SPARC specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARC_SUBTARGET_H
+#define SPARC_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+#include <string>
+
+namespace llvm {
+ class Module;
+
+class SparcSubtarget : public TargetSubtarget {
+ bool IsV9;
+ bool V8DeprecatedInsts;
+ bool IsVIS;
+public:
+ SparcSubtarget(const Module &M, const std::string &FS);
+
+ bool isV9() const { return IsV9; }
+ bool isVIS() const { return IsVIS; }
+ bool useDeprecatedV8Instructions() const { return V8DeprecatedInsts; }
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(const std::string &FS, const std::string &CPU);
+
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Sparc/SparcTargetAsmInfo.cpp b/lib/Target/Sparc/SparcTargetAsmInfo.cpp
new file mode 100644
index 0000000..01f7f11
--- /dev/null
+++ b/lib/Target/Sparc/SparcTargetAsmInfo.cpp
@@ -0,0 +1,25 @@
+//===-- SparcTargetAsmInfo.cpp - Sparc asm properties -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by James M. Laskey and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the SparcTargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcTargetAsmInfo.h"
+
+using namespace llvm;
+
+SparcTargetAsmInfo::SparcTargetAsmInfo(const SparcTargetMachine &TM) {
+ Data16bitsDirective = "\t.half\t";
+ Data32bitsDirective = "\t.word\t";
+ Data64bitsDirective = 0; // .xword is only supported by V9.
+ ZeroDirective = "\t.skip\t";
+ CommentString = "!";
+ ConstantPoolSection = "\t.section \".rodata\",#alloc\n";
+}
diff --git a/lib/Target/Sparc/SparcTargetAsmInfo.h b/lib/Target/Sparc/SparcTargetAsmInfo.h
new file mode 100644
index 0000000..6b2dc59
--- /dev/null
+++ b/lib/Target/Sparc/SparcTargetAsmInfo.h
@@ -0,0 +1,31 @@
+//=====-- SparcTargetAsmInfo.h - Sparc asm properties ---------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by James M. Laskey and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the SparcTargetAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCTARGETASMINFO_H
+#define SPARCTARGETASMINFO_H
+
+#include "llvm/Target/TargetAsmInfo.h"
+
+namespace llvm {
+
+ // Forward declaration.
+ class SparcTargetMachine;
+
+ struct SparcTargetAsmInfo : public TargetAsmInfo {
+ SparcTargetAsmInfo(const SparcTargetMachine &TM);
+ };
+
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
new file mode 100644
index 0000000..e0206d8
--- /dev/null
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -0,0 +1,83 @@
+//===-- SparcTargetMachine.cpp - Define TargetMachine for Sparc -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcTargetAsmInfo.h"
+#include "SparcTargetMachine.h"
+#include "Sparc.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+using namespace llvm;
+
+namespace {
+ // Register the target.
+ RegisterTarget<SparcTargetMachine> X("sparc", " SPARC");
+}
+
+const TargetAsmInfo *SparcTargetMachine::createTargetAsmInfo() const {
+ return new SparcTargetAsmInfo(*this);
+}
+
+/// SparcTargetMachine ctor - Create an ILP32 architecture model
+///
+SparcTargetMachine::SparcTargetMachine(const Module &M, const std::string &FS)
+ : DataLayout("E-p:32:32"),
+ Subtarget(M, FS), InstrInfo(Subtarget),
+ FrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) {
+}
+
+unsigned SparcTargetMachine::getModuleMatchQuality(const Module &M) {
+ std::string TT = M.getTargetTriple();
+ if (TT.size() >= 6 && std::string(TT.begin(), TT.begin()+6) == "sparc-")
+ return 20;
+
+ // If the target triple is something non-sparc, we don't match.
+ if (!TT.empty()) return 0;
+
+ if (M.getEndianness() == Module::BigEndian &&
+ M.getPointerSize() == Module::Pointer32)
+#ifdef __sparc__
+ return 20; // BE/32 ==> Prefer sparc on sparc
+#else
+ return 5; // BE/32 ==> Prefer ppc elsewhere
+#endif
+ else if (M.getEndianness() != Module::AnyEndianness ||
+ M.getPointerSize() != Module::AnyPointerSize)
+ return 0; // Match for some other target
+
+#if defined(__sparc__)
+ return 10;
+#else
+ return 0;
+#endif
+}
+
+bool SparcTargetMachine::addInstSelector(FunctionPassManager &PM, bool Fast) {
+ PM.add(createSparcISelDag(*this));
+ return false;
+}
+
+/// addPreEmitPass - This pass may be implemented by targets that want to run
+/// passes immediately before machine code is emitted. This should return
+/// true if -print-machineinstrs should print out the code after the passes.
+bool SparcTargetMachine::addPreEmitPass(FunctionPassManager &PM, bool Fast) {
+ PM.add(createSparcFPMoverPass(*this));
+ PM.add(createSparcDelaySlotFillerPass(*this));
+ return true;
+}
+
+bool SparcTargetMachine::addAssemblyEmitter(FunctionPassManager &PM, bool Fast,
+ std::ostream &Out) {
+ // Output assembly language.
+ PM.add(createSparcCodePrinterPass(Out, *this));
+ return false;
+}
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
new file mode 100644
index 0000000..cec0a44
--- /dev/null
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -0,0 +1,57 @@
+//===-- SparcTargetMachine.h - Define TargetMachine for Sparc ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Sparc specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCTARGETMACHINE_H
+#define SPARCTARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "SparcInstrInfo.h"
+#include "SparcSubtarget.h"
+
+namespace llvm {
+
+class Module;
+
+class SparcTargetMachine : public LLVMTargetMachine {
+ const TargetData DataLayout; // Calculates type size & alignment
+ SparcSubtarget Subtarget;
+ SparcInstrInfo InstrInfo;
+ TargetFrameInfo FrameInfo;
+
+protected:
+ virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
+public:
+ SparcTargetMachine(const Module &M, const std::string &FS);
+
+ virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
+ virtual const TargetSubtarget *getSubtargetImpl() const{ return &Subtarget; }
+ virtual const MRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ static unsigned getModuleMatchQuality(const Module &M);
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(FunctionPassManager &PM, bool Fast);
+ virtual bool addPreEmitPass(FunctionPassManager &PM, bool Fast);
+ virtual bool addAssemblyEmitter(FunctionPassManager &PM, bool Fast,
+ std::ostream &Out);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/SubtargetFeature.cpp b/lib/Target/SubtargetFeature.cpp
new file mode 100644
index 0000000..d783f8b
--- /dev/null
+++ b/lib/Target/SubtargetFeature.cpp
@@ -0,0 +1,357 @@
+//===- SubtargetFeature.cpp - CPU characteristics Implementation ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by James M. Laskey and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SubtargetFeature interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/SubtargetFeature.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Streams.h"
+#include <algorithm>
+#include <ostream>
+#include <cassert>
+#include <cctype>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Static Helper Functions
+//===----------------------------------------------------------------------===//
+
+/// hasFlag - Determine if a feature has a flag; '+' or '-'
+///
+static inline bool hasFlag(const std::string &Feature) {
+ assert(!Feature.empty() && "Empty string");
+ // Get first character
+ char Ch = Feature[0];
+ // Check if first character is '+' or '-' flag
+ return Ch == '+' || Ch =='-';
+}
+
+/// StripFlag - Return string stripped of flag.
+///
+static inline std::string StripFlag(const std::string &Feature) {
+ return hasFlag(Feature) ? Feature.substr(1) : Feature;
+}
+
+/// isEnabled - Return true if enable flag; '+'.
+///
+static inline bool isEnabled(const std::string &Feature) {
+ assert(!Feature.empty() && "Empty string");
+ // Get first character
+ char Ch = Feature[0];
+ // Check if first character is '+' for enabled
+ return Ch == '+';
+}
+
+/// PrependFlag - Return a string with a prepended flag; '+' or '-'.
+///
+static inline std::string PrependFlag(const std::string &Feature,
+ bool IsEnabled) {
+ assert(!Feature.empty() && "Empty string");
+ if (hasFlag(Feature)) return Feature;
+ return std::string(IsEnabled ? "+" : "-") + Feature;
+}
+
+/// Split - Splits a string of comma separated items in to a vector of strings.
+///
+static void Split(std::vector<std::string> &V, const std::string &S) {
+ // Start at beginning of string.
+ size_t Pos = 0;
+ while (true) {
+ // Find the next comma
+ size_t Comma = S.find(',', Pos);
+ // If no comma found then the the rest of the string is used
+ if (Comma == std::string::npos) {
+ // Add string to vector
+ V.push_back(S.substr(Pos));
+ break;
+ }
+ // Otherwise add substring to vector
+ V.push_back(S.substr(Pos, Comma - Pos));
+ // Advance to next item
+ Pos = Comma + 1;
+ }
+}
+
+/// Join a vector of strings to a string with a comma separating each element.
+///
+static std::string Join(const std::vector<std::string> &V) {
+ // Start with empty string.
+ std::string Result;
+ // If the vector is not empty
+ if (!V.empty()) {
+ // Start with the CPU feature
+ Result = V[0];
+ // For each successive feature
+ for (size_t i = 1; i < V.size(); i++) {
+ // Add a comma
+ Result += ",";
+ // Add the feature
+ Result += V[i];
+ }
+ }
+ // Return the features string
+ return Result;
+}
+
+/// Adding features.
+void SubtargetFeatures::AddFeature(const std::string &String,
+ bool IsEnabled) {
+ // Don't add empty features
+ if (!String.empty()) {
+ // Convert to lowercase, prepend flag and add to vector
+ Features.push_back(PrependFlag(LowercaseString(String), IsEnabled));
+ }
+}
+
+/// Find KV in array using binary search.
+template<typename T> const T *Find(const std::string &S, const T *A, size_t L) {
+ // Make the lower bound element we're looking for
+ T KV;
+ KV.Key = S.c_str();
+ // Determine the end of the array
+ const T *Hi = A + L;
+ // Binary search the array
+ const T *F = std::lower_bound(A, Hi, KV);
+ // If not found then return NULL
+ if (F == Hi || std::string(F->Key) != S) return NULL;
+ // Return the found array item
+ return F;
+}
+
+/// getLongestEntryLength - Return the length of the longest entry in the table.
+///
+static size_t getLongestEntryLength(const SubtargetFeatureKV *Table,
+ size_t Size) {
+ size_t MaxLen = 0;
+ for (size_t i = 0; i < Size; i++)
+ MaxLen = std::max(MaxLen, std::strlen(Table[i].Key));
+ return MaxLen;
+}
+
+/// Display help for feature choices.
+///
+static void Help(const SubtargetFeatureKV *CPUTable, size_t CPUTableSize,
+ const SubtargetFeatureKV *FeatTable, size_t FeatTableSize) {
+ // Determine the length of the longest CPU and Feature entries.
+ unsigned MaxCPULen = getLongestEntryLength(CPUTable, CPUTableSize);
+ unsigned MaxFeatLen = getLongestEntryLength(FeatTable, FeatTableSize);
+
+ // Print the CPU table.
+ cerr << "Available CPUs for this target:\n\n";
+ for (size_t i = 0; i != CPUTableSize; i++)
+ cerr << " " << CPUTable[i].Key
+ << std::string(MaxCPULen - std::strlen(CPUTable[i].Key), ' ')
+ << " - " << CPUTable[i].Desc << ".\n";
+ cerr << "\n";
+
+ // Print the Feature table.
+ cerr << "Available features for this target:\n\n";
+ for (size_t i = 0; i != FeatTableSize; i++)
+ cerr << " " << FeatTable[i].Key
+ << std::string(MaxFeatLen - std::strlen(FeatTable[i].Key), ' ')
+ << " - " << FeatTable[i].Desc << ".\n";
+ cerr << "\n";
+
+ cerr << "Use +feature to enable a feature, or -feature to disable it.\n"
+ << "For example, llc -mcpu=mycpu -mattr=+feature1,-feature2\n";
+ exit(1);
+}
+
+//===----------------------------------------------------------------------===//
+// SubtargetFeatures Implementation
+//===----------------------------------------------------------------------===//
+
+SubtargetFeatures::SubtargetFeatures(const std::string &Initial) {
+ // Break up string into separate features
+ Split(Features, Initial);
+}
+
+
+std::string SubtargetFeatures::getString() const {
+ return Join(Features);
+}
+void SubtargetFeatures::setString(const std::string &Initial) {
+ // Throw out old features
+ Features.clear();
+ // Break up string into separate features
+ Split(Features, LowercaseString(Initial));
+}
+
+
+/// setCPU - Set the CPU string. Replaces previous setting. Setting to ""
+/// clears CPU.
+void SubtargetFeatures::setCPU(const std::string &String) {
+ Features[0] = LowercaseString(String);
+}
+
+
+/// setCPUIfNone - Setting CPU string only if no string is set.
+///
+void SubtargetFeatures::setCPUIfNone(const std::string &String) {
+ if (Features[0].empty()) setCPU(String);
+}
+
+/// SetImpliedBits - For each feature that is (transitively) implied by this
+/// feature, set it.
+///
+static
+void SetImpliedBits(uint32_t &Bits, const SubtargetFeatureKV *FeatureEntry,
+ const SubtargetFeatureKV *FeatureTable,
+ size_t FeatureTableSize) {
+ for (size_t i = 0; i < FeatureTableSize; ++i) {
+ const SubtargetFeatureKV &FE = FeatureTable[i];
+
+ if (FeatureEntry->Value == FE.Value) continue;
+
+ if (FeatureEntry->Implies & FE.Value) {
+ Bits |= FE.Value;
+ SetImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize);
+ }
+ }
+}
+
+/// ClearImpliedBits - For each feature that (transitively) implies this
+/// feature, clear it.
+///
+static
+void ClearImpliedBits(uint32_t &Bits, const SubtargetFeatureKV *FeatureEntry,
+ const SubtargetFeatureKV *FeatureTable,
+ size_t FeatureTableSize) {
+ for (size_t i = 0; i < FeatureTableSize; ++i) {
+ const SubtargetFeatureKV &FE = FeatureTable[i];
+
+ if (FeatureEntry->Value == FE.Value) continue;
+
+ if (FE.Implies & FeatureEntry->Value) {
+ Bits &= ~FE.Value;
+ ClearImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize);
+ }
+ }
+}
+
+/// getBits - Get feature bits.
+///
+uint32_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable,
+ size_t CPUTableSize,
+ const SubtargetFeatureKV *FeatureTable,
+ size_t FeatureTableSize) {
+ assert(CPUTable && "missing CPU table");
+ assert(FeatureTable && "missing features table");
+#ifndef NDEBUG
+ for (size_t i = 1; i < CPUTableSize; i++) {
+ assert(strcmp(CPUTable[i - 1].Key, CPUTable[i].Key) < 0 &&
+ "CPU table is not sorted");
+ }
+ for (size_t i = 1; i < FeatureTableSize; i++) {
+ assert(strcmp(FeatureTable[i - 1].Key, FeatureTable[i].Key) < 0 &&
+ "CPU features table is not sorted");
+ }
+#endif
+ uint32_t Bits = 0; // Resulting bits
+
+ // Check if help is needed
+ if (Features[0] == "help")
+ Help(CPUTable, CPUTableSize, FeatureTable, FeatureTableSize);
+
+ // Find CPU entry
+ const SubtargetFeatureKV *CPUEntry =
+ Find(Features[0], CPUTable, CPUTableSize);
+ // If there is a match
+ if (CPUEntry) {
+ // Set base feature bits
+ Bits = CPUEntry->Value;
+
+ // Set the feature implied by this CPU feature, if any.
+ for (size_t i = 0; i < FeatureTableSize; ++i) {
+ const SubtargetFeatureKV &FE = FeatureTable[i];
+ if (CPUEntry->Value & FE.Value)
+ SetImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize);
+ }
+ } else {
+ cerr << "'" << Features[0]
+ << "' is not a recognized processor for this target"
+ << " (ignoring processor)"
+ << "\n";
+ }
+ // Iterate through each feature
+ for (size_t i = 1; i < Features.size(); i++) {
+ const std::string &Feature = Features[i];
+
+ // Check for help
+ if (Feature == "+help")
+ Help(CPUTable, CPUTableSize, FeatureTable, FeatureTableSize);
+
+ // Find feature in table.
+ const SubtargetFeatureKV *FeatureEntry =
+ Find(StripFlag(Feature), FeatureTable, FeatureTableSize);
+ // If there is a match
+ if (FeatureEntry) {
+ // Enable/disable feature in bits
+ if (isEnabled(Feature)) {
+ Bits |= FeatureEntry->Value;
+
+ // For each feature that this implies, set it.
+ SetImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize);
+ } else {
+ Bits &= ~FeatureEntry->Value;
+
+ // For each feature that implies this, clear it.
+ ClearImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize);
+ }
+ } else {
+ cerr << "'" << Feature
+ << "' is not a recognized feature for this target"
+ << " (ignoring feature)"
+ << "\n";
+ }
+ }
+
+ return Bits;
+}
+
+/// Get info pointer
+void *SubtargetFeatures::getInfo(const SubtargetInfoKV *Table,
+ size_t TableSize) {
+ assert(Table && "missing table");
+#ifndef NDEBUG
+ for (size_t i = 1; i < TableSize; i++) {
+ assert(strcmp(Table[i - 1].Key, Table[i].Key) < 0 && "Table is not sorted");
+ }
+#endif
+
+ // Find entry
+ const SubtargetInfoKV *Entry = Find(Features[0], Table, TableSize);
+
+ if (Entry) {
+ return Entry->Value;
+ } else {
+ cerr << "'" << Features[0]
+ << "' is not a recognized processor for this target"
+ << " (ignoring processor)"
+ << "\n";
+ return NULL;
+ }
+}
+
+/// print - Print feature string.
+///
+void SubtargetFeatures::print(std::ostream &OS) const {
+ for (size_t i = 0; i < Features.size(); i++) {
+ OS << Features[i] << " ";
+ }
+ OS << "\n";
+}
+
+/// dump - Dump feature info.
+///
+void SubtargetFeatures::dump() const {
+ print(*cerr.stream());
+}
diff --git a/lib/Target/Target.td b/lib/Target/Target.td
new file mode 100644
index 0000000..48af218
--- /dev/null
+++ b/lib/Target/Target.td
@@ -0,0 +1,413 @@
+//===- Target.td - Target Independent TableGen interface ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the target-independent interfaces which should be
+// implemented by each target which is using a TableGen based code generator.
+//
+//===----------------------------------------------------------------------===//
+
+// Include all information about LLVM intrinsics.
+include "llvm/Intrinsics.td"
+
+//===----------------------------------------------------------------------===//
+// Register file description - These classes are used to fill in the target
+// description classes.
+
+class RegisterClass; // Forward def
+
+// Register - You should define one instance of this class for each register
+// in the target machine. String n will become the "name" of the register.
+class Register<string n> {
+ string Namespace = "";
+ string Name = n;
+
+ // SpillSize - If this value is set to a non-zero value, it is the size in
+ // bits of the spill slot required to hold this register. If this value is
+ // set to zero, the information is inferred from any register classes the
+ // register belongs to.
+ int SpillSize = 0;
+
+ // SpillAlignment - This value is used to specify the alignment required for
+ // spilling the register. Like SpillSize, this should only be explicitly
+ // specified if the register is not in a register class.
+ int SpillAlignment = 0;
+
+ // Aliases - A list of registers that this register overlaps with. A read or
+ // modification of this register can potentially read or modify the aliased
+ // registers.
+ list<Register> Aliases = [];
+
+ // SubRegs - A list of registers that are parts of this register. Note these
+ // are "immediate" sub-registers and the registers within the list do not
+ // themselves overlap. e.g. For X86, EAX's SubRegs list contains only [AX],
+ // not [AX, AH, AL].
+ list<Register> SubRegs = [];
+
+ // DwarfNumber - Number used internally by gcc/gdb to identify the register.
+ // These values can be determined by locating the <target>.h file in the
+ // directory llvmgcc/gcc/config/<target>/ and looking for REGISTER_NAMES. The
+ // order of these names correspond to the enumeration used by gcc. A value of
+ // -1 indicates that the gcc number is undefined.
+ int DwarfNumber = -1;
+}
+
+// RegisterWithSubRegs - This can be used to define instances of Register which
+// need to specify sub-registers.
+// List "subregs" specifies which registers are sub-registers to this one. This
+// is used to populate the SubRegs and AliasSet fields of TargetRegisterDesc.
+// This allows the code generator to be careful not to put two values with
+// overlapping live ranges into registers which alias.
+class RegisterWithSubRegs<string n, list<Register> subregs> : Register<n> {
+ let SubRegs = subregs;
+}
+
+// SubRegSet - This can be used to define a specific mapping of registers to
+// indices, for use as named subregs of a particular physical register. Each
+// register in 'subregs' becomes an addressable subregister at index 'n' of the
+// corresponding register in 'regs'.
+class SubRegSet<int n, list<Register> regs, list<Register> subregs> {
+ int index = n;
+
+ list<Register> From = regs;
+ list<Register> To = subregs;
+}
+
+// RegisterClass - Now that all of the registers are defined, and aliases
+// between registers are defined, specify which registers belong to which
+// register classes. This also defines the default allocation order of
+// registers by register allocators.
+//
+class RegisterClass<string namespace, list<ValueType> regTypes, int alignment,
+ list<Register> regList> {
+ string Namespace = namespace;
+
+ // RegType - Specify the list ValueType of the registers in this register
+ // class. Note that all registers in a register class must have the same
+ // ValueTypes. This is a list because some targets permit storing different
+ // types in same register, for example vector values with 128-bit total size,
+ // but different count/size of items, like SSE on x86.
+ //
+ list<ValueType> RegTypes = regTypes;
+
+ // Size - Specify the spill size in bits of the registers. A default value of
+ // zero lets tablgen pick an appropriate size.
+ int Size = 0;
+
+ // Alignment - Specify the alignment required of the registers when they are
+ // stored or loaded to memory.
+ //
+ int Alignment = alignment;
+
+ // MemberList - Specify which registers are in this class. If the
+ // allocation_order_* method are not specified, this also defines the order of
+ // allocation used by the register allocator.
+ //
+ list<Register> MemberList = regList;
+
+ // SubClassList - Specify which register classes correspond to subregisters
+ // of this class. The order should be by subregister set index.
+ list<RegisterClass> SubRegClassList = [];
+
+ // MethodProtos/MethodBodies - These members can be used to insert arbitrary
+ // code into a generated register class. The normal usage of this is to
+ // overload virtual methods.
+ code MethodProtos = [{}];
+ code MethodBodies = [{}];
+}
+
+
+//===----------------------------------------------------------------------===//
+// DwarfRegNum - This class provides a mapping of the llvm register enumeration
+// to the register numbering used by gcc and gdb. These values are used by a
+// debug information writer (ex. DwarfWriter) to describe where values may be
+// located during execution.
+class DwarfRegNum<int N> {
+ // DwarfNumber - Number used internally by gcc/gdb to identify the register.
+ // These values can be determined by locating the <target>.h file in the
+ // directory llvmgcc/gcc/config/<target>/ and looking for REGISTER_NAMES. The
+ // order of these names correspond to the enumeration used by gcc. A value of
+ // -1 indicates that the gcc number is undefined.
+ int DwarfNumber = N;
+}
+
+//===----------------------------------------------------------------------===//
+// Pull in the common support for scheduling
+//
+include "TargetSchedule.td"
+
+class Predicate; // Forward def
+
+//===----------------------------------------------------------------------===//
+// Instruction set description - These classes correspond to the C++ classes in
+// the Target/TargetInstrInfo.h file.
+//
+class Instruction {
+ string Name = ""; // The opcode string for this instruction
+ string Namespace = "";
+
+ dag OperandList; // An dag containing the MI operand list.
+ string AsmString = ""; // The .s format to print the instruction with.
+
+ // Pattern - Set to the DAG pattern for this instruction, if we know of one,
+ // otherwise, uninitialized.
+ list<dag> Pattern;
+
+ // The follow state will eventually be inferred automatically from the
+ // instruction pattern.
+
+ list<Register> Uses = []; // Default to using no non-operand registers
+ list<Register> Defs = []; // Default to modifying no non-operand registers
+
+ // Predicates - List of predicates which will be turned into isel matching
+ // code.
+ list<Predicate> Predicates = [];
+
+ // Code size.
+ int CodeSize = 0;
+
+ // Added complexity passed onto matching pattern.
+ int AddedComplexity = 0;
+
+ // These bits capture information about the high-level semantics of the
+ // instruction.
+ bit isReturn = 0; // Is this instruction a return instruction?
+ bit isBranch = 0; // Is this instruction a branch instruction?
+ bit isBarrier = 0; // Can control flow fall through this instruction?
+ bit isCall = 0; // Is this instruction a call instruction?
+ bit isLoad = 0; // Is this instruction a load instruction?
+ bit isStore = 0; // Is this instruction a store instruction?
+ bit isTwoAddress = 0; // Is this a two address instruction?
+ bit isConvertibleToThreeAddress = 0; // Can this 2-addr instruction promote?
+ bit isCommutable = 0; // Is this 3 operand instruction commutable?
+ bit isTerminator = 0; // Is this part of the terminator for a basic block?
+ bit isReMaterializable = 0; // Is this instruction re-materializable?
+ bit isPredicable = 0; // Is this instruction predicable?
+ bit hasDelaySlot = 0; // Does this instruction have an delay slot?
+ bit usesCustomDAGSchedInserter = 0; // Pseudo instr needing special help.
+ bit hasCtrlDep = 0; // Does this instruction r/w ctrl-flow chains?
+ bit noResults = 0; // Does this instruction produce no results?
+ bit isNotDuplicable = 0; // Is it unsafe to duplicate this instruction?
+
+ InstrItinClass Itinerary = NoItinerary;// Execution steps used for scheduling.
+
+ string Constraints = ""; // OperandConstraint, e.g. $src = $dst.
+
+ /// DisableEncoding - List of operand names (e.g. "$op1,$op2") that should not
+ /// be encoded into the output machineinstr.
+ string DisableEncoding = "";
+}
+
+/// Imp - Helper class for specifying the implicit uses/defs set for an
+/// instruction.
+class Imp<list<Register> uses, list<Register> defs> {
+ list<Register> Uses = uses;
+ list<Register> Defs = defs;
+}
+
+/// Predicates - These are extra conditionals which are turned into instruction
+/// selector matching code. Currently each predicate is just a string.
+class Predicate<string cond> {
+ string CondString = cond;
+}
+
+/// NoHonorSignDependentRounding - This predicate is true if support for
+/// sign-dependent-rounding is not enabled.
+def NoHonorSignDependentRounding
+ : Predicate<"!HonorSignDependentRoundingFPMath()">;
+
+class Requires<list<Predicate> preds> {
+ list<Predicate> Predicates = preds;
+}
+
+/// ops definition - This is just a simple marker used to identify the operands
+/// list for an instruction. This should be used like this:
+/// (ops R32:$dst, R32:$src) or something similar.
+def ops;
+
+/// variable_ops definition - Mark this instruction as taking a variable number
+/// of operands.
+def variable_ops;
+
+/// ptr_rc definition - Mark this operand as being a pointer value whose
+/// register class is resolved dynamically via a callback to TargetInstrInfo.
+/// FIXME: We should probably change this to a class which contain a list of
+/// flags. But currently we have but one flag.
+def ptr_rc;
+
+/// Operand Types - These provide the built-in operand types that may be used
+/// by a target. Targets can optionally provide their own operand types as
+/// needed, though this should not be needed for RISC targets.
+class Operand<ValueType ty> {
+ ValueType Type = ty;
+ string PrintMethod = "printOperand";
+ dag MIOperandInfo = (ops);
+}
+
+def i1imm : Operand<i1>;
+def i8imm : Operand<i8>;
+def i16imm : Operand<i16>;
+def i32imm : Operand<i32>;
+def i64imm : Operand<i64>;
+
+/// zero_reg definition - Special node to stand for the zero register.
+///
+def zero_reg;
+
+/// PredicateOperand - This can be used to define a predicate operand for an
+/// instruction. OpTypes specifies the MIOperandInfo for the operand, and
+/// AlwaysVal specifies the value of this predicate when set to "always
+/// execute".
+class PredicateOperand<ValueType ty, dag OpTypes, dag AlwaysVal>
+ : Operand<ty> {
+ let MIOperandInfo = OpTypes;
+ dag DefaultOps = AlwaysVal;
+}
+
+/// OptionalDefOperand - This is used to define a optional definition operand
+/// for an instruction. DefaultOps is the register the operand represents if none
+/// is supplied, e.g. zero_reg.
+class OptionalDefOperand<ValueType ty, dag OpTypes, dag defaultops>
+ : Operand<ty> {
+ let MIOperandInfo = OpTypes;
+ dag DefaultOps = defaultops;
+}
+
+
+// InstrInfo - This class should only be instantiated once to provide parameters
+// which are global to the the target machine.
+//
+class InstrInfo {
+ // If the target wants to associate some target-specific information with each
+ // instruction, it should provide these two lists to indicate how to assemble
+ // the target specific information into the 32 bits available.
+ //
+ list<string> TSFlagsFields = [];
+ list<int> TSFlagsShifts = [];
+
+ // Target can specify its instructions in either big or little-endian formats.
+ // For instance, while both Sparc and PowerPC are big-endian platforms, the
+ // Sparc manual specifies its instructions in the format [31..0] (big), while
+ // PowerPC specifies them using the format [0..31] (little).
+ bit isLittleEndianEncoding = 0;
+}
+
+// Standard Instructions.
+def PHI : Instruction {
+ let OperandList = (ops variable_ops);
+ let AsmString = "PHINODE";
+ let Namespace = "TargetInstrInfo";
+}
+def INLINEASM : Instruction {
+ let OperandList = (ops variable_ops);
+ let AsmString = "";
+ let Namespace = "TargetInstrInfo";
+}
+def LABEL : Instruction {
+ let OperandList = (ops i32imm:$id);
+ let AsmString = "";
+ let Namespace = "TargetInstrInfo";
+ let hasCtrlDep = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// AsmWriter - This class can be implemented by targets that need to customize
+// the format of the .s file writer.
+//
+// Subtargets can have multiple different asmwriters (e.g. AT&T vs Intel syntax
+// on X86 for example).
+//
+class AsmWriter {
+ // AsmWriterClassName - This specifies the suffix to use for the asmwriter
+ // class. Generated AsmWriter classes are always prefixed with the target
+ // name.
+ string AsmWriterClassName = "AsmPrinter";
+
+ // InstFormatName - AsmWriters can specify the name of the format string to
+ // print instructions with.
+ string InstFormatName = "AsmString";
+
+ // Variant - AsmWriters can be of multiple different variants. Variants are
+ // used to support targets that need to emit assembly code in ways that are
+ // mostly the same for different targets, but have minor differences in
+ // syntax. If the asmstring contains {|} characters in them, this integer
+ // will specify which alternative to use. For example "{x|y|z}" with Variant
+ // == 1, will expand to "y".
+ int Variant = 0;
+}
+def DefaultAsmWriter : AsmWriter;
+
+
+//===----------------------------------------------------------------------===//
+// Target - This class contains the "global" target information
+//
+class Target {
+ // InstructionSet - Instruction set description for this target.
+ InstrInfo InstructionSet;
+
+ // AssemblyWriters - The AsmWriter instances available for this target.
+ list<AsmWriter> AssemblyWriters = [DefaultAsmWriter];
+}
+
+//===----------------------------------------------------------------------===//
+// SubtargetFeature - A characteristic of the chip set.
+//
+class SubtargetFeature<string n, string a, string v, string d,
+ list<SubtargetFeature> i = []> {
+ // Name - Feature name. Used by command line (-mattr=) to determine the
+ // appropriate target chip.
+ //
+ string Name = n;
+
+ // Attribute - Attribute to be set by feature.
+ //
+ string Attribute = a;
+
+ // Value - Value the attribute to be set to by feature.
+ //
+ string Value = v;
+
+ // Desc - Feature description. Used by command line (-mattr=) to display help
+ // information.
+ //
+ string Desc = d;
+
+ // Implies - Features that this feature implies are present. If one of those
+ // features isn't set, then this one shouldn't be set either.
+ //
+ list<SubtargetFeature> Implies = i;
+}
+
+//===----------------------------------------------------------------------===//
+// Processor chip sets - These values represent each of the chip sets supported
+// by the scheduler. Each Processor definition requires corresponding
+// instruction itineraries.
+//
+class Processor<string n, ProcessorItineraries pi, list<SubtargetFeature> f> {
+ // Name - Chip set name. Used by command line (-mcpu=) to determine the
+ // appropriate target chip.
+ //
+ string Name = n;
+
+ // ProcItin - The scheduling information for the target processor.
+ //
+ ProcessorItineraries ProcItin = pi;
+
+ // Features - list of
+ list<SubtargetFeature> Features = f;
+}
+
+//===----------------------------------------------------------------------===//
+// Pull in the common support for calling conventions.
+//
+include "TargetCallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Pull in the common support for DAG isel generation.
+//
+include "TargetSelectionDAG.td"
diff --git a/lib/Target/TargetAsmInfo.cpp b/lib/Target/TargetAsmInfo.cpp
new file mode 100644
index 0000000..df7a2ec
--- /dev/null
+++ b/lib/Target/TargetAsmInfo.cpp
@@ -0,0 +1,131 @@
+//===-- TargetAsmInfo.cpp - Asm Info ---------------------------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by James M. Laskey and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target asm properties related what form asm statements
+// should take.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmInfo.h"
+#include <cctype>
+#include <cstring>
+
+using namespace llvm;
+
+TargetAsmInfo::TargetAsmInfo() :
+ TextSection("\t.text"),
+ DataSection("\t.data"),
+ BSSSection("\t.bss"),
+ TLSDataSection("\t.section .tdata,\"awT\",@progbits"),
+ TLSBSSSection("\t.section .tbss,\"awT\",@nobits"),
+ ZeroFillDirective(0),
+ AddressSize(4),
+ NeedsSet(false),
+ MaxInstLength(4),
+ PCSymbol("$"),
+ SeparatorChar(';'),
+ CommentString("#"),
+ GlobalPrefix(""),
+ PrivateGlobalPrefix("."),
+ JumpTableSpecialLabelPrefix(0),
+ GlobalVarAddrPrefix(""),
+ GlobalVarAddrSuffix(""),
+ FunctionAddrPrefix(""),
+ FunctionAddrSuffix(""),
+ InlineAsmStart("#APP"),
+ InlineAsmEnd("#NO_APP"),
+ AssemblerDialect(0),
+ ZeroDirective("\t.zero\t"),
+ ZeroDirectiveSuffix(0),
+ AsciiDirective("\t.ascii\t"),
+ AscizDirective("\t.asciz\t"),
+ Data8bitsDirective("\t.byte\t"),
+ Data16bitsDirective("\t.short\t"),
+ Data32bitsDirective("\t.long\t"),
+ Data64bitsDirective("\t.quad\t"),
+ AlignDirective("\t.align\t"),
+ AlignmentIsInBytes(true),
+ SwitchToSectionDirective("\t.section\t"),
+ TextSectionStartSuffix(""),
+ DataSectionStartSuffix(""),
+ SectionEndDirectiveSuffix(0),
+ ConstantPoolSection("\t.section .rodata"),
+ JumpTableDataSection("\t.section .rodata"),
+ JumpTableDirective(0),
+ CStringSection(0),
+ StaticCtorsSection("\t.section .ctors,\"aw\",@progbits"),
+ StaticDtorsSection("\t.section .dtors,\"aw\",@progbits"),
+ FourByteConstantSection(0),
+ EightByteConstantSection(0),
+ SixteenByteConstantSection(0),
+ ReadOnlySection(0),
+ GlobalDirective(0),
+ SetDirective(0),
+ LCOMMDirective(0),
+ COMMDirective("\t.comm\t"),
+ COMMDirectiveTakesAlignment(true),
+ HasDotTypeDotSizeDirective(true),
+ UsedDirective(0),
+ WeakRefDirective(0),
+ HiddenDirective("\t.hidden\t"),
+ ProtectedDirective("\t.protected\t"),
+ AbsoluteDebugSectionOffsets(false),
+ AbsoluteEHSectionOffsets(false),
+ HasLEB128(false),
+ HasDotLoc(false),
+ HasDotFile(false),
+ SupportsDebugInformation(false),
+ SupportsExceptionHandling(false),
+ DwarfRequiresFrameSection(true),
+ DwarfSectionOffsetDirective(0),
+ DwarfAbbrevSection(".debug_abbrev"),
+ DwarfInfoSection(".debug_info"),
+ DwarfLineSection(".debug_line"),
+ DwarfFrameSection(".debug_frame"),
+ DwarfPubNamesSection(".debug_pubnames"),
+ DwarfPubTypesSection(".debug_pubtypes"),
+ DwarfStrSection(".debug_str"),
+ DwarfLocSection(".debug_loc"),
+ DwarfARangesSection(".debug_aranges"),
+ DwarfRangesSection(".debug_ranges"),
+ DwarfMacInfoSection(".debug_macinfo"),
+ DwarfEHFrameSection(".eh_frame"),
+ DwarfExceptionSection(".gcc_except_table"),
+ AsmTransCBE(0) {
+}
+
+TargetAsmInfo::~TargetAsmInfo() {
+}
+
+/// Measure the specified inline asm to determine an approximation of its
+/// length.
+/// Comments (which run till the next SeparatorChar or newline) do not
+/// count as an instruction.
+/// Any other non-whitespace text is considered an instruction, with
+/// multiple instructions separated by SeparatorChar or newlines.
+/// Variable-length instructions are not handled here; this function
+/// may be overloaded in the target code to do that.
+unsigned TargetAsmInfo::getInlineAsmLength(const char *Str) const {
+ // Count the number of instructions in the asm.
+ bool atInsnStart = true;
+ unsigned Length = 0;
+ for (; *Str; ++Str) {
+ if (*Str == '\n' || *Str == SeparatorChar)
+ atInsnStart = true;
+ if (atInsnStart && !isspace(*Str)) {
+ Length += MaxInstLength;
+ atInsnStart = false;
+ }
+ if (atInsnStart && strncmp(Str, CommentString, strlen(CommentString))==0)
+ atInsnStart = false;
+ }
+
+ return Length;
+}
+
diff --git a/lib/Target/TargetCallingConv.td b/lib/Target/TargetCallingConv.td
new file mode 100644
index 0000000..9419320
--- /dev/null
+++ b/lib/Target/TargetCallingConv.td
@@ -0,0 +1,88 @@
+//===- TargetCallingConv.td - Target Calling Conventions ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the target-independent interfaces with which targets
+// describe their calling conventions.
+//
+//===----------------------------------------------------------------------===//
+
+class CCAction;
+class CallingConv;
+
+/// CCPredicateAction - Instances of this class check some predicate, then
+/// delegate to another action if the predicate is true.
+class CCPredicateAction<CCAction A> : CCAction {
+ CCAction SubAction = A;
+}
+
+/// CCIfType - If the current argument is one of the specified types, apply
+/// Action A.
+class CCIfType<list<ValueType> vts, CCAction A> : CCPredicateAction<A> {
+ list<ValueType> VTs = vts;
+}
+
+/// CCIf - If the predicate matches, apply A.
+class CCIf<string predicate, CCAction A> : CCPredicateAction<A> {
+ string Predicate = predicate;
+}
+
+/// CCIfStruct - If the current argument is a struct, apply
+/// Action A.
+class CCIfStruct<CCAction A> : CCIf<"ArgFlags & ISD::ParamFlags::ByVal", A> {
+}
+
+/// CCIfCC - Match of the current calling convention is 'CC'.
+class CCIfCC<string CC, CCAction A>
+ : CCIf<!strconcat("State.getCallingConv() == ", CC), A> {}
+
+/// CCIfInReg - If this argument is marked with the 'inreg' attribute, apply
+/// the specified action.
+class CCIfInReg<CCAction A> : CCIf<"ArgFlags & ISD::ParamFlags::InReg", A> {}
+
+/// CCIfNotVarArg - If the current function is not vararg - apply the action
+class CCIfNotVarArg<CCAction A> : CCIf<"!State.isVarArg()", A> {}
+
+/// CCAssignToReg - This action matches if there is a register in the specified
+/// list that is still available. If so, it assigns the value to the first
+/// available register and succeeds.
+class CCAssignToReg<list<Register> regList> : CCAction {
+ list<Register> RegList = regList;
+}
+
+/// CCAssignToStack - This action always matches: it assigns the value to a
+/// stack slot of the specified size and alignment on the stack.
+class CCAssignToStack<int size, int align> : CCAction {
+ int Size = size;
+ int Align = align;
+}
+
+/// CCStructAssign - This action always matches: it will use the C ABI and
+/// the register availability to decided whether to assign to a set of
+/// registers or to a stack slot.
+class CCStructAssign<list<Register> regList> : CCAction {
+ list<Register> RegList = regList;
+}
+
+/// CCPromoteToType - If applied, this promotes the specified current value to
+/// the specified type.
+class CCPromoteToType<ValueType destTy> : CCAction {
+ ValueType DestTy = destTy;
+}
+
+/// CCDelegateTo - This action invokes the specified sub-calling-convention. It
+/// is successful if the specified CC matches.
+class CCDelegateTo<CallingConv cc> : CCAction {
+ CallingConv CC = cc;
+}
+
+/// CallingConv - An instance of this is used to define each calling convention
+/// that the target supports.
+class CallingConv<list<CCAction> actions> {
+ list<CCAction> Actions = actions;
+}
diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp
new file mode 100644
index 0000000..301e8c1
--- /dev/null
+++ b/lib/Target/TargetData.cpp
@@ -0,0 +1,595 @@
+//===-- TargetData.cpp - Data size & alignment routines --------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target properties related to datatype size/offset/alignment
+// information.
+//
+// This structure should be created once, filled in if the defaults are not
+// correct and then passed around by const&. None of the members functions
+// require modification to the object.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetData.h"
+#include "llvm/Module.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Constants.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringExtras.h"
+#include <algorithm>
+#include <cstdlib>
+#include <sstream>
+using namespace llvm;
+
+// Handle the Pass registration stuff necessary to use TargetData's.
+namespace {
+ // Register the default SparcV9 implementation...
+ RegisterPass<TargetData> X("targetdata", "Target Data Layout");
+}
+char TargetData::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// Support for StructLayout
+//===----------------------------------------------------------------------===//
+
+StructLayout::StructLayout(const StructType *ST, const TargetData &TD) {
+ StructAlignment = 0;
+ StructSize = 0;
+ NumElements = ST->getNumElements();
+
+ // Loop over each of the elements, placing them in memory...
+ for (unsigned i = 0, e = NumElements; i != e; ++i) {
+ const Type *Ty = ST->getElementType(i);
+ unsigned TyAlign;
+ uint64_t TySize;
+ TyAlign = (ST->isPacked() ? 1 : TD.getABITypeAlignment(Ty));
+ TySize = TD.getTypeSize(Ty);
+
+ // Add padding if necessary to make the data element aligned properly...
+ if (StructSize % TyAlign != 0)
+ StructSize = (StructSize/TyAlign + 1) * TyAlign; // Add padding...
+
+ // Keep track of maximum alignment constraint
+ StructAlignment = std::max(TyAlign, StructAlignment);
+
+ MemberOffsets[i] = StructSize;
+ StructSize += TySize; // Consume space for this data item
+ }
+
+ // Empty structures have alignment of 1 byte.
+ if (StructAlignment == 0) StructAlignment = 1;
+
+ // Add padding to the end of the struct so that it could be put in an array
+ // and all array elements would be aligned correctly.
+ if (StructSize % StructAlignment != 0)
+ StructSize = (StructSize/StructAlignment + 1) * StructAlignment;
+}
+
+
+/// getElementContainingOffset - Given a valid offset into the structure,
+/// return the structure index that contains it.
+unsigned StructLayout::getElementContainingOffset(uint64_t Offset) const {
+ const uint64_t *SI =
+ std::upper_bound(&MemberOffsets[0], &MemberOffsets[NumElements], Offset);
+ assert(SI != &MemberOffsets[0] && "Offset not in structure type!");
+ --SI;
+ assert(*SI <= Offset && "upper_bound didn't work");
+ assert((SI == &MemberOffsets[0] || *(SI-1) < Offset) &&
+ (SI+1 == &MemberOffsets[NumElements] || *(SI+1) > Offset) &&
+ "Upper bound didn't work!");
+ return SI-&MemberOffsets[0];
+}
+
+//===----------------------------------------------------------------------===//
+// TargetAlignElem, TargetAlign support
+//===----------------------------------------------------------------------===//
+
+TargetAlignElem
+TargetAlignElem::get(AlignTypeEnum align_type, unsigned char abi_align,
+ unsigned char pref_align, uint32_t bit_width) {
+ TargetAlignElem retval;
+ retval.AlignType = align_type;
+ retval.ABIAlign = abi_align;
+ retval.PrefAlign = pref_align;
+ retval.TypeBitWidth = bit_width;
+ return retval;
+}
+
+bool
+TargetAlignElem::operator==(const TargetAlignElem &rhs) const {
+ return (AlignType == rhs.AlignType
+ && ABIAlign == rhs.ABIAlign
+ && PrefAlign == rhs.PrefAlign
+ && TypeBitWidth == rhs.TypeBitWidth);
+}
+
+std::ostream &
+TargetAlignElem::dump(std::ostream &os) const {
+ return os << AlignType
+ << TypeBitWidth
+ << ":" << (int) (ABIAlign * 8)
+ << ":" << (int) (PrefAlign * 8);
+}
+
+const TargetAlignElem TargetData::InvalidAlignmentElem =
+ TargetAlignElem::get((AlignTypeEnum) -1, 0, 0, 0);
+
+//===----------------------------------------------------------------------===//
+// TargetData Class Implementation
+//===----------------------------------------------------------------------===//
+
+/*!
+ A TargetDescription string consists of a sequence of hyphen-delimited
+ specifiers for target endianness, pointer size and alignments, and various
+ primitive type sizes and alignments. A typical string looks something like:
+ <br><br>
+ "E-p:32:32:32-i1:8:8-i8:8:8-i32:32:32-i64:32:64-f32:32:32-f64:32:64"
+ <br><br>
+ (note: this string is not fully specified and is only an example.)
+ \p
+ Alignments come in two flavors: ABI and preferred. ABI alignment (abi_align,
+ below) dictates how a type will be aligned within an aggregate and when used
+ as an argument. Preferred alignment (pref_align, below) determines a type's
+ alignment when emitted as a global.
+ \p
+ Specifier string details:
+ <br><br>
+ <i>[E|e]</i>: Endianness. "E" specifies a big-endian target data model, "e"
+ specifies a little-endian target data model.
+ <br><br>
+ <i>p:<size>:<abi_align>:<pref_align></i>: Pointer size, ABI and preferred
+ alignment.
+ <br><br>
+ <i><type><size>:<abi_align>:<pref_align></i>: Numeric type alignment. Type is
+ one of <i>i|f|v|a</i>, corresponding to integer, floating point, vector (aka
+ packed) or aggregate. Size indicates the size, e.g., 32 or 64 bits.
+ \p
+ The default string, fully specified is:
+ <br><br>
+ "E-p:64:64:64-a0:0:0-f32:32:32-f64:0:64"
+ "-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:0:64"
+ "-v64:64:64-v128:128:128"
+ <br><br>
+ Note that in the case of aggregates, 0 is the default ABI and preferred
+ alignment. This is a special case, where the aggregate's computed worst-case
+ alignment will be used.
+ */
+void TargetData::init(const std::string &TargetDescription) {
+ std::string temp = TargetDescription;
+
+ LittleEndian = false;
+ PointerMemSize = 8;
+ PointerABIAlign = 8;
+ PointerPrefAlign = PointerABIAlign;
+
+ // Default alignments
+ setAlignment(INTEGER_ALIGN, 1, 1, 1); // Bool
+ setAlignment(INTEGER_ALIGN, 1, 1, 8); // Byte
+ setAlignment(INTEGER_ALIGN, 2, 2, 16); // short
+ setAlignment(INTEGER_ALIGN, 4, 4, 32); // int
+ setAlignment(INTEGER_ALIGN, 4, 8, 64); // long
+ setAlignment(FLOAT_ALIGN, 4, 4, 32); // float
+ setAlignment(FLOAT_ALIGN, 8, 8, 64); // double
+ setAlignment(VECTOR_ALIGN, 8, 8, 64); // v2i32
+ setAlignment(VECTOR_ALIGN, 16, 16, 128); // v16i8, v8i16, v4i32, ...
+ setAlignment(AGGREGATE_ALIGN, 0, 8, 0); // struct, union, class, ...
+
+ while (!temp.empty()) {
+ std::string token = getToken(temp, "-");
+ std::string arg0 = getToken(token, ":");
+ const char *p = arg0.c_str();
+ switch(*p) {
+ case 'E':
+ LittleEndian = false;
+ break;
+ case 'e':
+ LittleEndian = true;
+ break;
+ case 'p':
+ PointerMemSize = atoi(getToken(token,":").c_str()) / 8;
+ PointerABIAlign = atoi(getToken(token,":").c_str()) / 8;
+ PointerPrefAlign = atoi(getToken(token,":").c_str()) / 8;
+ if (PointerPrefAlign == 0)
+ PointerPrefAlign = PointerABIAlign;
+ break;
+ case 'i':
+ case 'v':
+ case 'f':
+ case 'a': {
+ AlignTypeEnum align_type =
+ (*p == 'i' ? INTEGER_ALIGN : (*p == 'f' ? FLOAT_ALIGN :
+ (*p == 'v' ? VECTOR_ALIGN : AGGREGATE_ALIGN)));
+ uint32_t size = (uint32_t) atoi(++p);
+ unsigned char abi_align = atoi(getToken(token, ":").c_str()) / 8;
+ unsigned char pref_align = atoi(getToken(token, ":").c_str()) / 8;
+ if (pref_align == 0)
+ pref_align = abi_align;
+ setAlignment(align_type, abi_align, pref_align, size);
+ break;
+ }
+ default:
+ break;
+ }
+ }
+}
+
+TargetData::TargetData(const Module *M)
+ : ImmutablePass((intptr_t)&ID) {
+ init(M->getDataLayout());
+}
+
+void
+TargetData::setAlignment(AlignTypeEnum align_type, unsigned char abi_align,
+ unsigned char pref_align, uint32_t bit_width) {
+ for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
+ if (Alignments[i].AlignType == align_type &&
+ Alignments[i].TypeBitWidth == bit_width) {
+ // Update the abi, preferred alignments.
+ Alignments[i].ABIAlign = abi_align;
+ Alignments[i].PrefAlign = pref_align;
+ return;
+ }
+ }
+
+ Alignments.push_back(TargetAlignElem::get(align_type, abi_align,
+ pref_align, bit_width));
+}
+
+/// getAlignmentInfo - Return the alignment (either ABI if ABIInfo = true or
+/// preferred if ABIInfo = false) the target wants for the specified datatype.
+unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType,
+ uint32_t BitWidth, bool ABIInfo) const {
+ // Check to see if we have an exact match and remember the best match we see.
+ int BestMatchIdx = -1;
+ int LargestInt = -1;
+ for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
+ if (Alignments[i].AlignType == AlignType &&
+ Alignments[i].TypeBitWidth == BitWidth)
+ return ABIInfo ? Alignments[i].ABIAlign : Alignments[i].PrefAlign;
+
+ // The best match so far depends on what we're looking for.
+ if (AlignType == VECTOR_ALIGN) {
+ // If this is a specification for a smaller vector type, we will fall back
+ // to it. This happens because <128 x double> can be implemented in terms
+ // of 64 <2 x double>.
+ if (Alignments[i].AlignType == VECTOR_ALIGN &&
+ Alignments[i].TypeBitWidth < BitWidth) {
+ // Verify that we pick the biggest of the fallbacks.
+ if (BestMatchIdx == -1 ||
+ Alignments[BestMatchIdx].TypeBitWidth < BitWidth)
+ BestMatchIdx = i;
+ }
+ } else if (AlignType == INTEGER_ALIGN &&
+ Alignments[i].AlignType == INTEGER_ALIGN) {
+ // The "best match" for integers is the smallest size that is larger than
+ // the BitWidth requested.
+ if (Alignments[i].TypeBitWidth > BitWidth && (BestMatchIdx == -1 ||
+ Alignments[i].TypeBitWidth < Alignments[BestMatchIdx].TypeBitWidth))
+ BestMatchIdx = i;
+ // However, if there isn't one that's larger, then we must use the
+ // largest one we have (see below)
+ if (LargestInt == -1 ||
+ Alignments[i].TypeBitWidth > Alignments[LargestInt].TypeBitWidth)
+ LargestInt = i;
+ }
+ }
+
+ // For integers, if we didn't find a best match, use the largest one found.
+ if (BestMatchIdx == -1)
+ BestMatchIdx = LargestInt;
+
+ // Okay, we didn't find an exact solution. Fall back here depending on what
+ // is being looked for.
+ assert(BestMatchIdx != -1 && "Didn't find alignment info for this datatype!");
+
+ // Since we got a "best match" index, just return it.
+ return ABIInfo ? Alignments[BestMatchIdx].ABIAlign
+ : Alignments[BestMatchIdx].PrefAlign;
+}
+
+/// LayoutInfo - The lazy cache of structure layout information maintained by
+/// TargetData. Note that the struct types must have been free'd before
+/// llvm_shutdown is called (and thus this is deallocated) because all the
+/// targets with cached elements should have been destroyed.
+///
+typedef std::pair<const TargetData*,const StructType*> LayoutKey;
+
+struct DenseMapLayoutKeyInfo {
+ static inline LayoutKey getEmptyKey() { return LayoutKey(0, 0); }
+ static inline LayoutKey getTombstoneKey() {
+ return LayoutKey((TargetData*)(intptr_t)-1, 0);
+ }
+ static unsigned getHashValue(const LayoutKey &Val) {
+ return DenseMapKeyInfo<void*>::getHashValue(Val.first) ^
+ DenseMapKeyInfo<void*>::getHashValue(Val.second);
+ }
+ static bool isPod() { return true; }
+};
+
+typedef DenseMap<LayoutKey, StructLayout*, DenseMapLayoutKeyInfo> LayoutInfoTy;
+static ManagedStatic<LayoutInfoTy> LayoutInfo;
+
+
+TargetData::~TargetData() {
+ if (LayoutInfo.isConstructed()) {
+ // Remove any layouts for this TD.
+ LayoutInfoTy &TheMap = *LayoutInfo;
+ for (LayoutInfoTy::iterator I = TheMap.begin(), E = TheMap.end();
+ I != E; ) {
+ if (I->first.first == this) {
+ I->second->~StructLayout();
+ free(I->second);
+ TheMap.erase(I++);
+ } else {
+ ++I;
+ }
+ }
+ }
+}
+
+const StructLayout *TargetData::getStructLayout(const StructType *Ty) const {
+ LayoutInfoTy &TheMap = *LayoutInfo;
+
+ StructLayout *&SL = TheMap[LayoutKey(this, Ty)];
+ if (SL) return SL;
+
+ // Otherwise, create the struct layout. Because it is variable length, we
+ // malloc it, then use placement new.
+ int NumElts = Ty->getNumElements();
+ StructLayout *L =
+ (StructLayout *)malloc(sizeof(StructLayout)+(NumElts-1)*sizeof(uint64_t));
+
+ // Set SL before calling StructLayout's ctor. The ctor could cause other
+ // entries to be added to TheMap, invalidating our reference.
+ SL = L;
+
+ new (L) StructLayout(Ty, *this);
+ return L;
+}
+
+/// InvalidateStructLayoutInfo - TargetData speculatively caches StructLayout
+/// objects. If a TargetData object is alive when types are being refined and
+/// removed, this method must be called whenever a StructType is removed to
+/// avoid a dangling pointer in this cache.
+void TargetData::InvalidateStructLayoutInfo(const StructType *Ty) const {
+ if (!LayoutInfo.isConstructed()) return; // No cache.
+
+ LayoutInfoTy::iterator I = LayoutInfo->find(LayoutKey(this, Ty));
+ if (I != LayoutInfo->end()) {
+ I->second->~StructLayout();
+ free(I->second);
+ LayoutInfo->erase(I);
+ }
+}
+
+
+std::string TargetData::getStringRepresentation() const {
+ std::string repr;
+ repr.append(LittleEndian ? "e" : "E");
+ repr.append("-p:").append(itostr((int64_t) (PointerMemSize * 8))).
+ append(":").append(itostr((int64_t) (PointerABIAlign * 8))).
+ append(":").append(itostr((int64_t) (PointerPrefAlign * 8)));
+ for (align_const_iterator I = Alignments.begin();
+ I != Alignments.end();
+ ++I) {
+ repr.append("-").append(1, (char) I->AlignType).
+ append(utostr((int64_t) I->TypeBitWidth)).
+ append(":").append(utostr((uint64_t) (I->ABIAlign * 8))).
+ append(":").append(utostr((uint64_t) (I->PrefAlign * 8)));
+ }
+ return repr;
+}
+
+
+uint64_t TargetData::getTypeSize(const Type *Ty) const {
+ assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
+ switch (Ty->getTypeID()) {
+ case Type::LabelTyID:
+ case Type::PointerTyID:
+ return getPointerSize();
+ case Type::ArrayTyID: {
+ const ArrayType *ATy = cast<ArrayType>(Ty);
+ uint64_t Size;
+ unsigned char Alignment;
+ Size = getTypeSize(ATy->getElementType());
+ Alignment = getABITypeAlignment(ATy->getElementType());
+ uint64_t AlignedSize = (Size + Alignment - 1)/Alignment*Alignment;
+ return AlignedSize*ATy->getNumElements();
+ }
+ case Type::StructTyID: {
+ // Get the layout annotation... which is lazily created on demand.
+ const StructLayout *Layout = getStructLayout(cast<StructType>(Ty));
+ return Layout->getSizeInBytes();
+ }
+ case Type::IntegerTyID: {
+ unsigned BitWidth = cast<IntegerType>(Ty)->getBitWidth();
+ if (BitWidth <= 8) {
+ return 1;
+ } else if (BitWidth <= 16) {
+ return 2;
+ } else if (BitWidth <= 32) {
+ return 4;
+ } else if (BitWidth <= 64) {
+ return 8;
+ } else {
+ // The size of this > 64 bit type is chosen as a multiple of the
+ // preferred alignment of the largest "native" size the target supports.
+ // We first obtain the the alignment info for this type and then compute
+ // the next largest multiple of that size.
+ uint64_t size = getAlignmentInfo(INTEGER_ALIGN, BitWidth, false) * 8;
+ return (((BitWidth / (size)) + (BitWidth % size != 0)) * size) / 8;
+ }
+ break;
+ }
+ case Type::VoidTyID:
+ return 1;
+ case Type::FloatTyID:
+ return 4;
+ case Type::DoubleTyID:
+ return 8;
+ case Type::VectorTyID: {
+ const VectorType *PTy = cast<VectorType>(Ty);
+ return PTy->getBitWidth() / 8;
+ }
+ default:
+ assert(0 && "TargetData::getTypeSize(): Unsupported type");
+ break;
+ }
+ return 0;
+}
+
+uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const {
+ if (Ty->isInteger())
+ return cast<IntegerType>(Ty)->getBitWidth();
+ else
+ return getTypeSize(Ty) * 8;
+}
+
+
+/*!
+ \param abi_or_pref Flag that determines which alignment is returned. true
+ returns the ABI alignment, false returns the preferred alignment.
+ \param Ty The underlying type for which alignment is determined.
+
+ Get the ABI (\a abi_or_pref == true) or preferred alignment (\a abi_or_pref
+ == false) for the requested type \a Ty.
+ */
+unsigned char TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const {
+ int AlignType = -1;
+
+ assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
+ switch (Ty->getTypeID()) {
+ /* Early escape for the non-numeric types */
+ case Type::LabelTyID:
+ case Type::PointerTyID:
+ return (abi_or_pref
+ ? getPointerABIAlignment()
+ : getPointerPrefAlignment());
+ case Type::ArrayTyID:
+ return getAlignment(cast<ArrayType>(Ty)->getElementType(), abi_or_pref);
+
+ case Type::StructTyID: {
+ // Packed structure types always have an ABI alignment of one.
+ if (cast<StructType>(Ty)->isPacked() && abi_or_pref)
+ return 1;
+
+ // Get the layout annotation... which is lazily created on demand.
+ const StructLayout *Layout = getStructLayout(cast<StructType>(Ty));
+ unsigned Align = getAlignmentInfo(AGGREGATE_ALIGN, 0, abi_or_pref);
+ return std::max(Align, (unsigned)Layout->getAlignment());
+ }
+ case Type::IntegerTyID:
+ case Type::VoidTyID:
+ AlignType = INTEGER_ALIGN;
+ break;
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ AlignType = FLOAT_ALIGN;
+ break;
+ case Type::VectorTyID: {
+ const VectorType *VTy = cast<VectorType>(Ty);
+ // Degenerate vectors are assumed to be scalar-ized
+ if (VTy->getNumElements() == 1)
+ return getAlignment(VTy->getElementType(), abi_or_pref);
+ else
+ AlignType = VECTOR_ALIGN;
+ break;
+ }
+ default:
+ assert(0 && "Bad type for getAlignment!!!");
+ break;
+ }
+
+ return getAlignmentInfo((AlignTypeEnum)AlignType, getTypeSize(Ty) * 8,
+ abi_or_pref);
+}
+
+unsigned char TargetData::getABITypeAlignment(const Type *Ty) const {
+ return getAlignment(Ty, true);
+}
+
+unsigned char TargetData::getPrefTypeAlignment(const Type *Ty) const {
+ return getAlignment(Ty, false);
+}
+
+unsigned char TargetData::getPreferredTypeAlignmentShift(const Type *Ty) const {
+ unsigned Align = (unsigned) getPrefTypeAlignment(Ty);
+ assert(!(Align & (Align-1)) && "Alignment is not a power of two!");
+ return Log2_32(Align);
+}
+
+/// getIntPtrType - Return an unsigned integer type that is the same size or
+/// greater to the host pointer size.
+const Type *TargetData::getIntPtrType() const {
+ switch (getPointerSize()) {
+ default: assert(0 && "Unknown pointer size!");
+ case 2: return Type::Int16Ty;
+ case 4: return Type::Int32Ty;
+ case 8: return Type::Int64Ty;
+ }
+}
+
+
+uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices,
+ unsigned NumIndices) const {
+ const Type *Ty = ptrTy;
+ assert(isa<PointerType>(Ty) && "Illegal argument for getIndexedOffset()");
+ uint64_t Result = 0;
+
+ generic_gep_type_iterator<Value* const*>
+ TI = gep_type_begin(ptrTy, Indices, Indices+NumIndices);
+ for (unsigned CurIDX = 0; CurIDX != NumIndices; ++CurIDX, ++TI) {
+ if (const StructType *STy = dyn_cast<StructType>(*TI)) {
+ assert(Indices[CurIDX]->getType() == Type::Int32Ty &&
+ "Illegal struct idx");
+ unsigned FieldNo = cast<ConstantInt>(Indices[CurIDX])->getZExtValue();
+
+ // Get structure layout information...
+ const StructLayout *Layout = getStructLayout(STy);
+
+ // Add in the offset, as calculated by the structure layout info...
+ Result += Layout->getElementOffset(FieldNo);
+
+ // Update Ty to refer to current element
+ Ty = STy->getElementType(FieldNo);
+ } else {
+ // Update Ty to refer to current element
+ Ty = cast<SequentialType>(Ty)->getElementType();
+
+ // Get the array index and the size of each array element.
+ int64_t arrayIdx = cast<ConstantInt>(Indices[CurIDX])->getSExtValue();
+ Result += arrayIdx * (int64_t)getTypeSize(Ty);
+ }
+ }
+
+ return Result;
+}
+
+/// getPreferredAlignmentLog - Return the preferred alignment of the
+/// specified global, returned in log form. This includes an explicitly
+/// requested alignment (if the global has one).
+unsigned TargetData::getPreferredAlignmentLog(const GlobalVariable *GV) const {
+ const Type *ElemType = GV->getType()->getElementType();
+ unsigned Alignment = getPreferredTypeAlignmentShift(ElemType);
+ if (GV->getAlignment() > (1U << Alignment))
+ Alignment = Log2_32(GV->getAlignment());
+
+ if (GV->hasInitializer()) {
+ if (Alignment < 4) {
+ // If the global is not external, see if it is large. If so, give it a
+ // larger alignment.
+ if (getTypeSize(ElemType) > 128)
+ Alignment = 4; // 16-byte alignment.
+ }
+ }
+ return Alignment;
+}
diff --git a/lib/Target/TargetFrameInfo.cpp b/lib/Target/TargetFrameInfo.cpp
new file mode 100644
index 0000000..e6f15b1
--- /dev/null
+++ b/lib/Target/TargetFrameInfo.cpp
@@ -0,0 +1,19 @@
+//===-- TargetFrameInfo.cpp - Implement machine frame interface -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the layout of a stack frame on the target machine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetFrameInfo.h"
+#include <cstdlib>
+using namespace llvm;
+
+TargetFrameInfo::~TargetFrameInfo() {
+}
diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp
new file mode 100644
index 0000000..11b5b63
--- /dev/null
+++ b/lib/Target/TargetInstrInfo.cpp
@@ -0,0 +1,99 @@
+//===-- TargetInstrInfo.cpp - Target Instruction Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Constant.h"
+#include "llvm/DerivedTypes.h"
+using namespace llvm;
+
+/// findTiedToSrcOperand - Returns the operand that is tied to the specified
+/// dest operand. Returns -1 if there isn't one.
+int TargetInstrDescriptor::findTiedToSrcOperand(unsigned OpNum) const {
+ for (unsigned i = 0, e = numOperands; i != e; ++i) {
+ if (i == OpNum)
+ continue;
+ if (getOperandConstraint(i, TOI::TIED_TO) == (int)OpNum)
+ return i;
+ }
+ return -1;
+}
+
+
+TargetInstrInfo::TargetInstrInfo(const TargetInstrDescriptor* Desc,
+ unsigned numOpcodes)
+ : desc(Desc), NumOpcodes(numOpcodes) {
+}
+
+TargetInstrInfo::~TargetInstrInfo() {
+}
+
+// commuteInstruction - The default implementation of this method just exchanges
+// operand 1 and 2.
+MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI) const {
+ assert(MI->getOperand(1).isRegister() && MI->getOperand(2).isRegister() &&
+ "This only knows how to commute register operands so far");
+ unsigned Reg1 = MI->getOperand(1).getReg();
+ unsigned Reg2 = MI->getOperand(2).getReg();
+ bool Reg1IsKill = MI->getOperand(1).isKill();
+ bool Reg2IsKill = MI->getOperand(2).isKill();
+ MI->getOperand(2).setReg(Reg1);
+ MI->getOperand(1).setReg(Reg2);
+ if (Reg1IsKill)
+ MI->getOperand(2).setIsKill();
+ else
+ MI->getOperand(2).unsetIsKill();
+ if (Reg2IsKill)
+ MI->getOperand(1).setIsKill();
+ else
+ MI->getOperand(1).unsetIsKill();
+ return MI;
+}
+
+bool TargetInstrInfo::PredicateInstruction(MachineInstr *MI,
+ const std::vector<MachineOperand> &Pred) const {
+ bool MadeChange = false;
+ const TargetInstrDescriptor *TID = MI->getInstrDescriptor();
+ if (TID->Flags & M_PREDICABLE) {
+ for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ if ((TID->OpInfo[i].Flags & M_PREDICATE_OPERAND)) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg()) {
+ MO.setReg(Pred[j].getReg());
+ MadeChange = true;
+ } else if (MO.isImm()) {
+ MO.setImm(Pred[j].getImmedValue());
+ MadeChange = true;
+ } else if (MO.isMBB()) {
+ MO.setMachineBasicBlock(Pred[j].getMachineBasicBlock());
+ MadeChange = true;
+ }
+ ++j;
+ }
+ }
+ }
+ return MadeChange;
+}
+
+bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+ const TargetInstrDescriptor *TID = MI->getInstrDescriptor();
+ if (TID->Flags & M_TERMINATOR_FLAG) {
+ // Conditional branch is a special case.
+ if ((TID->Flags & M_BRANCH_FLAG) != 0 && (TID->Flags & M_BARRIER_FLAG) == 0)
+ return true;
+ if ((TID->Flags & M_PREDICABLE) == 0)
+ return true;
+ return !isPredicated(MI);
+ }
+ return false;
+}
diff --git a/lib/Target/TargetMachOWriterInfo.cpp b/lib/Target/TargetMachOWriterInfo.cpp
new file mode 100644
index 0000000..9a9d0d3
--- /dev/null
+++ b/lib/Target/TargetMachOWriterInfo.cpp
@@ -0,0 +1,25 @@
+//===-- llvm/Target/TargetMachOWriterInfo.h - MachO Writer Info -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Bill Wendling and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the TargetMachOWriterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetMachOWriterInfo.h"
+#include "llvm/CodeGen/MachineRelocation.h"
+using namespace llvm;
+
+TargetMachOWriterInfo::~TargetMachOWriterInfo() {}
+
+MachineRelocation
+TargetMachOWriterInfo::GetJTRelocation(unsigned Offset,
+ MachineBasicBlock *MBB) const {
+ // FIXME: do something about PIC
+ return MachineRelocation::getBB(Offset, MachineRelocation::VANILLA, MBB);
+}
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
new file mode 100644
index 0000000..6c00a3f
--- /dev/null
+++ b/lib/Target/TargetMachine.cpp
@@ -0,0 +1,164 @@
+//===-- TargetMachine.cpp - General Target Information ---------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the general parts of a Target machine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+//---------------------------------------------------------------------------
+// Command-line options that tend to be useful on more than one back-end.
+//
+
+namespace llvm {
+ bool PrintMachineCode;
+ bool NoFramePointerElim;
+ bool NoExcessFPPrecision;
+ bool UnsafeFPMath;
+ bool FiniteOnlyFPMathOption;
+ bool HonorSignDependentRoundingFPMathOption;
+ bool UseSoftFloat;
+ bool NoZerosInBSS;
+ bool ExceptionHandling;
+ Reloc::Model RelocationModel;
+ CodeModel::Model CMModel;
+}
+namespace {
+ cl::opt<bool, true> PrintCode("print-machineinstrs",
+ cl::desc("Print generated machine code"),
+ cl::location(PrintMachineCode), cl::init(false));
+
+ cl::opt<bool, true>
+ DisableFPElim("disable-fp-elim",
+ cl::desc("Disable frame pointer elimination optimization"),
+ cl::location(NoFramePointerElim),
+ cl::init(false));
+ cl::opt<bool, true>
+ DisableExcessPrecision("disable-excess-fp-precision",
+ cl::desc("Disable optimizations that may increase FP precision"),
+ cl::location(NoExcessFPPrecision),
+ cl::init(false));
+ cl::opt<bool, true>
+ EnableUnsafeFPMath("enable-unsafe-fp-math",
+ cl::desc("Enable optimizations that may decrease FP precision"),
+ cl::location(UnsafeFPMath),
+ cl::init(false));
+ cl::opt<bool, true>
+ EnableFiniteOnlyFPMath("enable-finite-only-fp-math",
+ cl::desc("Enable optimizations that assumes non- NaNs / +-Infs"),
+ cl::location(FiniteOnlyFPMathOption),
+ cl::init(false));
+ cl::opt<bool, true>
+ EnableHonorSignDependentRoundingFPMath(cl::Hidden,
+ "enable-sign-dependent-rounding-fp-math",
+ cl::desc("Force codegen to assume rounding mode can change dynamically"),
+ cl::location(HonorSignDependentRoundingFPMathOption),
+ cl::init(false));
+
+ cl::opt<bool, true>
+ GenerateSoftFloatCalls("soft-float",
+ cl::desc("Generate software floating point library calls"),
+ cl::location(UseSoftFloat),
+ cl::init(false));
+ cl::opt<bool, true>
+ DontPlaceZerosInBSS("nozero-initialized-in-bss",
+ cl::desc("Don't place zero-initialized symbols into bss section"),
+ cl::location(NoZerosInBSS),
+ cl::init(false));
+ cl::opt<bool, true>
+ EnableExceptionHandling("enable-eh",
+ cl::desc("Exception handling should be emitted."),
+ cl::location(ExceptionHandling),
+ cl::init(false));
+
+ cl::opt<llvm::Reloc::Model, true>
+ DefRelocationModel(
+ "relocation-model",
+ cl::desc("Choose relocation model"),
+ cl::location(RelocationModel),
+ cl::init(Reloc::Default),
+ cl::values(
+ clEnumValN(Reloc::Default, "default",
+ " Target default relocation model"),
+ clEnumValN(Reloc::Static, "static",
+ " Non-relocatable code"),
+ clEnumValN(Reloc::PIC_, "pic",
+ " Fully relocatable, position independent code"),
+ clEnumValN(Reloc::DynamicNoPIC, "dynamic-no-pic",
+ " Relocatable external references, non-relocatable code"),
+ clEnumValEnd));
+ cl::opt<llvm::CodeModel::Model, true>
+ DefCodeModel(
+ "code-model",
+ cl::desc("Choose code model"),
+ cl::location(CMModel),
+ cl::init(CodeModel::Default),
+ cl::values(
+ clEnumValN(CodeModel::Default, "default",
+ " Target default code model"),
+ clEnumValN(CodeModel::Small, "small",
+ " Small code model"),
+ clEnumValN(CodeModel::Kernel, "kernel",
+ " Kernel code model"),
+ clEnumValN(CodeModel::Medium, "medium",
+ " Medium code model"),
+ clEnumValN(CodeModel::Large, "large",
+ " Large code model"),
+ clEnumValEnd));
+}
+
+//---------------------------------------------------------------------------
+// TargetMachine Class
+//
+
+TargetMachine::~TargetMachine() {
+ delete AsmInfo;
+}
+
+/// getRelocationModel - Returns the code generation relocation model. The
+/// choices are static, PIC, and dynamic-no-pic, and target default.
+Reloc::Model TargetMachine::getRelocationModel() {
+ return RelocationModel;
+}
+
+/// setRelocationModel - Sets the code generation relocation model.
+void TargetMachine::setRelocationModel(Reloc::Model Model) {
+ RelocationModel = Model;
+}
+
+/// getCodeModel - Returns the code model. The choices are small, kernel,
+/// medium, large, and target default.
+CodeModel::Model TargetMachine::getCodeModel() {
+ return CMModel;
+}
+
+/// setCodeModel - Sets the code model.
+void TargetMachine::setCodeModel(CodeModel::Model Model) {
+ CMModel = Model;
+}
+
+namespace llvm {
+ /// FiniteOnlyFPMath - This returns true when the -enable-finite-only-fp-math
+ /// option is specified on the command line. If this returns false (default),
+ /// the code generator is not allowed to assume that FP arithmetic arguments
+ /// and results are never NaNs or +-Infs.
+ bool FiniteOnlyFPMath() { return UnsafeFPMath || FiniteOnlyFPMathOption; }
+
+ /// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
+ /// that the rounding mode of the FPU can change from its default.
+ bool HonorSignDependentRoundingFPMath() {
+ return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption;
+ }
+}
+
diff --git a/lib/Target/TargetMachineRegistry.cpp b/lib/Target/TargetMachineRegistry.cpp
new file mode 100644
index 0000000..2ab8f51
--- /dev/null
+++ b/lib/Target/TargetMachineRegistry.cpp
@@ -0,0 +1,107 @@
+//===-- TargetMachineRegistry.cpp - Target Auto Registration Impl ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes the RegisterTarget class, which TargetMachine
+// implementations should use to register themselves with the system. This file
+// also exposes the TargetMachineRegistry class, which allows tools to inspect
+// all of registered targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetMachineRegistry.h"
+#include <algorithm>
+using namespace llvm;
+
+/// List - This is the main list of all of the registered target machines.
+const TargetMachineRegistry::Entry *TargetMachineRegistry::List = 0;
+
+/// Listeners - All of the listeners registered to get notified when new targets
+/// are loaded.
+static TargetRegistrationListener *Listeners = 0;
+
+TargetMachineRegistry::Entry::Entry(const char *N, const char *SD,
+ TargetMachine *(*CF)(const Module &,const std::string &),
+ unsigned (*MMF)(const Module &M), unsigned (*JMF)())
+ : Name(N), ShortDesc(SD), CtorFn(CF), ModuleMatchQualityFn(MMF),
+ JITMatchQualityFn(JMF), Next(List) {
+ List = this;
+ for (TargetRegistrationListener *L = Listeners; L; L = L->getNext())
+ L->targetRegistered(this);
+}
+
+TargetRegistrationListener::TargetRegistrationListener() {
+ Next = Listeners;
+ if (Next) Next->Prev = &Next;
+ Prev = &Listeners;
+ Listeners = this;
+}
+
+TargetRegistrationListener::~TargetRegistrationListener() {
+ *Prev = Next;
+}
+
+/// getClosestStaticTargetForModule - Given an LLVM module, pick the best target
+/// that is compatible with the module. If no close target can be found, this
+/// returns null and sets the Error string to a reason.
+const TargetMachineRegistry::Entry *
+TargetMachineRegistry::getClosestStaticTargetForModule(const Module &M,
+ std::string &Error) {
+ std::vector<std::pair<unsigned, const Entry *> > UsableTargets;
+ for (const Entry *E = getList(); E; E = E->getNext())
+ if (unsigned Qual = E->ModuleMatchQualityFn(M))
+ UsableTargets.push_back(std::make_pair(Qual, E));
+
+ if (UsableTargets.empty()) {
+ Error = "No available targets are compatible with this module";
+ return 0;
+ } else if (UsableTargets.size() == 1)
+ return UsableTargets.back().second;
+
+ // Otherwise, take the best target, but make sure we don't have to equally
+ // good best targets.
+ std::sort(UsableTargets.begin(), UsableTargets.end());
+ if (UsableTargets.back().first ==UsableTargets[UsableTargets.size()-2].first){
+ Error = "Cannot choose between targets \"" +
+ std::string(UsableTargets.back().second->Name) + "\" and \"" +
+ std::string(UsableTargets[UsableTargets.size()-2].second->Name) + "\"";
+ return 0;
+ }
+ return UsableTargets.back().second;
+}
+
+/// getClosestTargetForJIT - Given an LLVM module, pick the best target that
+/// is compatible with the current host and the specified module. If no
+/// close target can be found, this returns null and sets the Error string
+/// to a reason.
+const TargetMachineRegistry::Entry *
+TargetMachineRegistry::getClosestTargetForJIT(std::string &Error) {
+ std::vector<std::pair<unsigned, const Entry *> > UsableTargets;
+ for (const Entry *E = getList(); E; E = E->getNext())
+ if (unsigned Qual = E->JITMatchQualityFn())
+ UsableTargets.push_back(std::make_pair(Qual, E));
+
+ if (UsableTargets.empty()) {
+ Error = "No JIT is available for this host";
+ return 0;
+ } else if (UsableTargets.size() == 1)
+ return UsableTargets.back().second;
+
+ // Otherwise, take the best target. If there is a tie, just pick one.
+ unsigned MaxQual = UsableTargets.front().first;
+ const Entry *MaxQualTarget = UsableTargets.front().second;
+
+ for (unsigned i = 1, e = UsableTargets.size(); i != e; ++i)
+ if (UsableTargets[i].first > MaxQual) {
+ MaxQual = UsableTargets[i].first;
+ MaxQualTarget = UsableTargets[i].second;
+ }
+
+ return MaxQualTarget;
+}
+
diff --git a/lib/Target/TargetSchedule.td b/lib/Target/TargetSchedule.td
new file mode 100644
index 0000000..8ac537f
--- /dev/null
+++ b/lib/Target/TargetSchedule.td
@@ -0,0 +1,72 @@
+//===- TargetSchedule.td - Target Independent Scheduling ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by James M. Laskey and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the target-independent scheduling interfaces which should
+// be implemented by each target which is using TableGen based scheduling.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Processor functional unit - These values represent the function units
+// available across all chip sets for the target. Eg., IntUnit, FPUnit, ...
+// These may be independent values for each chip set or may be shared across
+// all chip sets of the target. Each functional unit is treated as a resource
+// during scheduling and has an affect instruction order based on availability
+// during a time interval.
+//
+class FuncUnit;
+
+//===----------------------------------------------------------------------===//
+// Instruction stage - These values represent a step in the execution of an
+// instruction. The latency represents the number of discrete time slots used
+// need to complete the stage. Units represent the choice of functional units
+// that can be used to complete the stage. Eg. IntUnit1, IntUnit2.
+//
+class InstrStage<int cycles, list<FuncUnit> units> {
+ int Cycles = cycles; // length of stage in machine cycles
+ list<FuncUnit> Units = units; // choice of functional units
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction itinerary - An itinerary represents a sequential series of steps
+// required to complete an instruction. Itineraries are represented as lists of
+// instruction stages.
+//
+
+//===----------------------------------------------------------------------===//
+// Instruction itinerary classes - These values represent 'named' instruction
+// itinerary. Using named itineraries simplifies managing groups of
+// instructions across chip sets. An instruction uses the same itinerary class
+// across all chip sets. Thus a new chip set can be added without modifying
+// instruction information.
+//
+class InstrItinClass;
+def NoItinerary : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// Instruction itinerary data - These values provide a runtime map of an
+// instruction itinerary class (name) to it's itinerary data.
+//
+class InstrItinData<InstrItinClass Class, list<InstrStage> stages> {
+ InstrItinClass TheClass = Class;
+ list<InstrStage> Stages = stages;
+}
+
+//===----------------------------------------------------------------------===//
+// Processor itineraries - These values represent the set of all itinerary
+// classes for a given chip set.
+//
+class ProcessorItineraries<list<InstrItinData> iid> {
+ list<InstrItinData> IID = iid;
+}
+
+// NoItineraries - A marker that can be used by processors without schedule
+// info.
+def NoItineraries : ProcessorItineraries<[]>;
+
diff --git a/lib/Target/TargetSelectionDAG.td b/lib/Target/TargetSelectionDAG.td
new file mode 100644
index 0000000..491bb02
--- /dev/null
+++ b/lib/Target/TargetSelectionDAG.td
@@ -0,0 +1,763 @@
+//===- TargetSelectionDAG.td - Common code for DAG isels ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the target-independent interfaces used by SelectionDAG
+// instruction selection generators.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Selection DAG Type Constraint definitions.
+//
+// Note that the semantics of these constraints are hard coded into tblgen. To
+// modify or add constraints, you have to hack tblgen.
+//
+
+class SDTypeConstraint<int opnum> {
+ int OperandNum = opnum;
+}
+
+// SDTCisVT - The specified operand has exactly this VT.
+class SDTCisVT<int OpNum, ValueType vt> : SDTypeConstraint<OpNum> {
+ ValueType VT = vt;
+}
+
+class SDTCisPtrTy<int OpNum> : SDTypeConstraint<OpNum>;
+
+// SDTCisInt - The specified operand is has integer type.
+class SDTCisInt<int OpNum> : SDTypeConstraint<OpNum>;
+
+// SDTCisFP - The specified operand is has floating point type.
+class SDTCisFP<int OpNum> : SDTypeConstraint<OpNum>;
+
+// SDTCisSameAs - The two specified operands have identical types.
+class SDTCisSameAs<int OpNum, int OtherOp> : SDTypeConstraint<OpNum> {
+ int OtherOperandNum = OtherOp;
+}
+
+// SDTCisVTSmallerThanOp - The specified operand is a VT SDNode, and its type is
+// smaller than the 'Other' operand.
+class SDTCisVTSmallerThanOp<int OpNum, int OtherOp> : SDTypeConstraint<OpNum> {
+ int OtherOperandNum = OtherOp;
+}
+
+class SDTCisOpSmallerThanOp<int SmallOp, int BigOp> : SDTypeConstraint<SmallOp>{
+ int BigOperandNum = BigOp;
+}
+
+/// SDTCisIntVectorOfSameSize - This indicates that ThisOp and OtherOp are
+/// vector types, and that ThisOp is the result of
+/// MVT::getIntVectorWithNumElements with the number of elements that ThisOp
+/// has.
+class SDTCisIntVectorOfSameSize<int ThisOp, int OtherOp>
+ : SDTypeConstraint<ThisOp> {
+ int OtherOpNum = OtherOp;
+}
+
+//===----------------------------------------------------------------------===//
+// Selection DAG Type Profile definitions.
+//
+// These use the constraints defined above to describe the type requirements of
+// the various nodes. These are not hard coded into tblgen, allowing targets to
+// add their own if needed.
+//
+
+// SDTypeProfile - This profile describes the type requirements of a Selection
+// DAG node.
+class SDTypeProfile<int numresults, int numoperands,
+ list<SDTypeConstraint> constraints> {
+ int NumResults = numresults;
+ int NumOperands = numoperands;
+ list<SDTypeConstraint> Constraints = constraints;
+}
+
+// Builtin profiles.
+def SDTIntLeaf: SDTypeProfile<1, 0, [SDTCisInt<0>]>; // for 'imm'.
+def SDTFPLeaf : SDTypeProfile<1, 0, [SDTCisFP<0>]>; // for 'fpimm'.
+def SDTPtrLeaf: SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>; // for '&g'.
+def SDTOther : SDTypeProfile<1, 0, [SDTCisVT<0, OtherVT>]>; // for 'vt'.
+def SDTUNDEF : SDTypeProfile<1, 0, []>; // for 'undef'.
+def SDTUnaryOp : SDTypeProfile<1, 1, []>; // bitconvert
+
+def SDTIntBinOp : SDTypeProfile<1, 2, [ // add, and, or, xor, udiv, etc.
+ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>
+]>;
+def SDTIntShiftOp : SDTypeProfile<1, 2, [ // shl, sra, srl
+ SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<2>
+]>;
+def SDTFPBinOp : SDTypeProfile<1, 2, [ // fadd, fmul, etc.
+ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>
+]>;
+def SDTFPSignOp : SDTypeProfile<1, 2, [ // fcopysign.
+ SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisFP<2>
+]>;
+def SDTFPTernaryOp : SDTypeProfile<1, 3, [ // fmadd, fnmsub, etc.
+ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisFP<0>
+]>;
+def SDTIntUnaryOp : SDTypeProfile<1, 1, [ // ctlz
+ SDTCisSameAs<0, 1>, SDTCisInt<0>
+]>;
+def SDTIntExtendOp : SDTypeProfile<1, 1, [ // sext, zext, anyext
+ SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>
+]>;
+def SDTIntTruncOp : SDTypeProfile<1, 1, [ // trunc
+ SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<0, 1>
+]>;
+def SDTFPUnaryOp : SDTypeProfile<1, 1, [ // fneg, fsqrt, etc
+ SDTCisSameAs<0, 1>, SDTCisFP<0>
+]>;
+def SDTFPRoundOp : SDTypeProfile<1, 1, [ // fround
+ SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<0, 1>
+]>;
+def SDTFPExtendOp : SDTypeProfile<1, 1, [ // fextend
+ SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<1, 0>
+]>;
+def SDTIntToFPOp : SDTypeProfile<1, 1, [ // [su]int_to_fp
+ SDTCisFP<0>, SDTCisInt<1>
+]>;
+def SDTFPToIntOp : SDTypeProfile<1, 1, [ // fp_to_[su]int
+ SDTCisInt<0>, SDTCisFP<1>
+]>;
+def SDTExtInreg : SDTypeProfile<1, 2, [ // sext_inreg
+ SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisVT<2, OtherVT>,
+ SDTCisVTSmallerThanOp<2, 1>
+]>;
+
+def SDTSetCC : SDTypeProfile<1, 3, [ // setcc
+ SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT>
+]>;
+
+def SDTSelect : SDTypeProfile<1, 3, [ // select
+ SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>
+]>;
+
+def SDTSelectCC : SDTypeProfile<1, 5, [ // select_cc
+ SDTCisSameAs<1, 2>, SDTCisSameAs<3, 4>, SDTCisSameAs<0, 3>,
+ SDTCisVT<5, OtherVT>
+]>;
+
+def SDTBr : SDTypeProfile<0, 1, [ // br
+ SDTCisVT<0, OtherVT>
+]>;
+
+def SDTBrcond : SDTypeProfile<0, 2, [ // brcond
+ SDTCisInt<0>, SDTCisVT<1, OtherVT>
+]>;
+
+def SDTBrind : SDTypeProfile<0, 1, [ // brind
+ SDTCisPtrTy<0>
+]>;
+
+def SDTRet : SDTypeProfile<0, 0, []>; // ret
+
+def SDTLoad : SDTypeProfile<1, 1, [ // load
+ SDTCisPtrTy<1>
+]>;
+
+def SDTStore : SDTypeProfile<0, 2, [ // store
+ SDTCisPtrTy<1>
+]>;
+
+def SDTIStore : SDTypeProfile<1, 3, [ // indexed store
+ SDTCisSameAs<0, 2>, SDTCisPtrTy<0>, SDTCisPtrTy<3>
+]>;
+
+def SDTVecShuffle : SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisIntVectorOfSameSize<3, 0>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Selection DAG Node Properties.
+//
+// Note: These are hard coded into tblgen.
+//
+class SDNodeProperty;
+def SDNPCommutative : SDNodeProperty; // X op Y == Y op X
+def SDNPAssociative : SDNodeProperty; // (X op Y) op Z == X op (Y op Z)
+def SDNPHasChain : SDNodeProperty; // R/W chain operand and result
+def SDNPOutFlag : SDNodeProperty; // Write a flag result
+def SDNPInFlag : SDNodeProperty; // Read a flag operand
+def SDNPOptInFlag : SDNodeProperty; // Optionally read a flag operand
+
+//===----------------------------------------------------------------------===//
+// Selection DAG Node definitions.
+//
+class SDNode<string opcode, SDTypeProfile typeprof,
+ list<SDNodeProperty> props = [], string sdclass = "SDNode"> {
+ string Opcode = opcode;
+ string SDClass = sdclass;
+ list<SDNodeProperty> Properties = props;
+ SDTypeProfile TypeProfile = typeprof;
+}
+
+def set;
+def node;
+def srcvalue;
+
+def imm : SDNode<"ISD::Constant" , SDTIntLeaf , [], "ConstantSDNode">;
+def fpimm : SDNode<"ISD::TargetConstantFP",
+ SDTFPLeaf, [], "ConstantFPSDNode">;
+def vt : SDNode<"ISD::VALUETYPE" , SDTOther , [], "VTSDNode">;
+def bb : SDNode<"ISD::BasicBlock", SDTOther , [], "BasicBlockSDNode">;
+def cond : SDNode<"ISD::CONDCODE" , SDTOther , [], "CondCodeSDNode">;
+def undef : SDNode<"ISD::UNDEF" , SDTUNDEF , []>;
+def globaladdr : SDNode<"ISD::GlobalAddress", SDTPtrLeaf, [],
+ "GlobalAddressSDNode">;
+def tglobaladdr : SDNode<"ISD::TargetGlobalAddress", SDTPtrLeaf, [],
+ "GlobalAddressSDNode">;
+def globaltlsaddr : SDNode<"ISD::GlobalTLSAddress", SDTPtrLeaf, [],
+ "GlobalAddressSDNode">;
+def tglobaltlsaddr : SDNode<"ISD::TargetGlobalTLSAddress", SDTPtrLeaf, [],
+ "GlobalAddressSDNode">;
+def constpool : SDNode<"ISD::ConstantPool", SDTPtrLeaf, [],
+ "ConstantPoolSDNode">;
+def tconstpool : SDNode<"ISD::TargetConstantPool", SDTPtrLeaf, [],
+ "ConstantPoolSDNode">;
+def jumptable : SDNode<"ISD::JumpTable", SDTPtrLeaf, [],
+ "JumpTableSDNode">;
+def tjumptable : SDNode<"ISD::TargetJumpTable", SDTPtrLeaf, [],
+ "JumpTableSDNode">;
+def frameindex : SDNode<"ISD::FrameIndex", SDTPtrLeaf, [],
+ "FrameIndexSDNode">;
+def tframeindex : SDNode<"ISD::TargetFrameIndex", SDTPtrLeaf, [],
+ "FrameIndexSDNode">;
+def externalsym : SDNode<"ISD::ExternalSymbol", SDTPtrLeaf, [],
+ "ExternalSymbolSDNode">;
+def texternalsym: SDNode<"ISD::TargetExternalSymbol", SDTPtrLeaf, [],
+ "ExternalSymbolSDNode">;
+
+def add : SDNode<"ISD::ADD" , SDTIntBinOp ,
+ [SDNPCommutative, SDNPAssociative]>;
+def sub : SDNode<"ISD::SUB" , SDTIntBinOp>;
+def mul : SDNode<"ISD::MUL" , SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def mulhs : SDNode<"ISD::MULHS" , SDTIntBinOp, [SDNPCommutative]>;
+def mulhu : SDNode<"ISD::MULHU" , SDTIntBinOp, [SDNPCommutative]>;
+def sdiv : SDNode<"ISD::SDIV" , SDTIntBinOp>;
+def udiv : SDNode<"ISD::UDIV" , SDTIntBinOp>;
+def srem : SDNode<"ISD::SREM" , SDTIntBinOp>;
+def urem : SDNode<"ISD::UREM" , SDTIntBinOp>;
+def srl : SDNode<"ISD::SRL" , SDTIntShiftOp>;
+def sra : SDNode<"ISD::SRA" , SDTIntShiftOp>;
+def shl : SDNode<"ISD::SHL" , SDTIntShiftOp>;
+def rotl : SDNode<"ISD::ROTL" , SDTIntShiftOp>;
+def rotr : SDNode<"ISD::ROTR" , SDTIntShiftOp>;
+def and : SDNode<"ISD::AND" , SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def or : SDNode<"ISD::OR" , SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def xor : SDNode<"ISD::XOR" , SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def addc : SDNode<"ISD::ADDC" , SDTIntBinOp,
+ [SDNPCommutative, SDNPOutFlag]>;
+def adde : SDNode<"ISD::ADDE" , SDTIntBinOp,
+ [SDNPCommutative, SDNPOutFlag, SDNPInFlag]>;
+def subc : SDNode<"ISD::SUBC" , SDTIntBinOp,
+ [SDNPOutFlag]>;
+def sube : SDNode<"ISD::SUBE" , SDTIntBinOp,
+ [SDNPOutFlag, SDNPInFlag]>;
+
+def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>;
+def bswap : SDNode<"ISD::BSWAP" , SDTIntUnaryOp>;
+def ctlz : SDNode<"ISD::CTLZ" , SDTIntUnaryOp>;
+def cttz : SDNode<"ISD::CTTZ" , SDTIntUnaryOp>;
+def ctpop : SDNode<"ISD::CTPOP" , SDTIntUnaryOp>;
+def sext : SDNode<"ISD::SIGN_EXTEND", SDTIntExtendOp>;
+def zext : SDNode<"ISD::ZERO_EXTEND", SDTIntExtendOp>;
+def anyext : SDNode<"ISD::ANY_EXTEND" , SDTIntExtendOp>;
+def trunc : SDNode<"ISD::TRUNCATE" , SDTIntTruncOp>;
+def bitconvert : SDNode<"ISD::BIT_CONVERT", SDTUnaryOp>;
+
+def fadd : SDNode<"ISD::FADD" , SDTFPBinOp, [SDNPCommutative]>;
+def fsub : SDNode<"ISD::FSUB" , SDTFPBinOp>;
+def fmul : SDNode<"ISD::FMUL" , SDTFPBinOp, [SDNPCommutative]>;
+def fdiv : SDNode<"ISD::FDIV" , SDTFPBinOp>;
+def frem : SDNode<"ISD::FREM" , SDTFPBinOp>;
+def fabs : SDNode<"ISD::FABS" , SDTFPUnaryOp>;
+def fneg : SDNode<"ISD::FNEG" , SDTFPUnaryOp>;
+def fsqrt : SDNode<"ISD::FSQRT" , SDTFPUnaryOp>;
+def fsin : SDNode<"ISD::FSIN" , SDTFPUnaryOp>;
+def fcos : SDNode<"ISD::FCOS" , SDTFPUnaryOp>;
+
+def fround : SDNode<"ISD::FP_ROUND" , SDTFPRoundOp>;
+def fextend : SDNode<"ISD::FP_EXTEND" , SDTFPExtendOp>;
+def fcopysign : SDNode<"ISD::FCOPYSIGN" , SDTFPSignOp>;
+
+def sint_to_fp : SDNode<"ISD::SINT_TO_FP" , SDTIntToFPOp>;
+def uint_to_fp : SDNode<"ISD::UINT_TO_FP" , SDTIntToFPOp>;
+def fp_to_sint : SDNode<"ISD::FP_TO_SINT" , SDTFPToIntOp>;
+def fp_to_uint : SDNode<"ISD::FP_TO_UINT" , SDTFPToIntOp>;
+
+def setcc : SDNode<"ISD::SETCC" , SDTSetCC>;
+def select : SDNode<"ISD::SELECT" , SDTSelect>;
+def selectcc : SDNode<"ISD::SELECT_CC" , SDTSelectCC>;
+
+def brcond : SDNode<"ISD::BRCOND" , SDTBrcond, [SDNPHasChain]>;
+def brind : SDNode<"ISD::BRIND" , SDTBrind, [SDNPHasChain]>;
+def br : SDNode<"ISD::BR" , SDTBr, [SDNPHasChain]>;
+def ret : SDNode<"ISD::RET" , SDTRet, [SDNPHasChain]>;
+
+// Do not use ld, st directly. Use load, extload, sextload, zextload, store,
+// and truncst (see below).
+def ld : SDNode<"ISD::LOAD" , SDTLoad, [SDNPHasChain]>;
+def st : SDNode<"ISD::STORE" , SDTStore, [SDNPHasChain]>;
+def ist : SDNode<"ISD::STORE" , SDTIStore, [SDNPHasChain]>;
+
+def vector_shuffle : SDNode<"ISD::VECTOR_SHUFFLE", SDTVecShuffle, []>;
+def build_vector : SDNode<"ISD::BUILD_VECTOR", SDTypeProfile<1, 0, []>, []>;
+def scalar_to_vector : SDNode<"ISD::SCALAR_TO_VECTOR", SDTypeProfile<1, 1, []>,
+ []>;
+def vector_extract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
+ SDTypeProfile<1, 2, [SDTCisPtrTy<2>]>, []>;
+def vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT",
+ SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisPtrTy<3>]>, []>;
+
+// Nodes for intrinsics, you should use the intrinsic itself and let tblgen use
+// these internally. Don't reference these directly.
+def intrinsic_void : SDNode<"ISD::INTRINSIC_VOID",
+ SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
+ [SDNPHasChain]>;
+def intrinsic_w_chain : SDNode<"ISD::INTRINSIC_W_CHAIN",
+ SDTypeProfile<1, -1, [SDTCisPtrTy<1>]>,
+ [SDNPHasChain]>;
+def intrinsic_wo_chain : SDNode<"ISD::INTRINSIC_WO_CHAIN",
+ SDTypeProfile<1, -1, [SDTCisPtrTy<1>]>, []>;
+
+
+//===----------------------------------------------------------------------===//
+// Selection DAG Condition Codes
+
+class CondCode; // ISD::CondCode enums
+def SETOEQ : CondCode; def SETOGT : CondCode;
+def SETOGE : CondCode; def SETOLT : CondCode; def SETOLE : CondCode;
+def SETONE : CondCode; def SETO : CondCode; def SETUO : CondCode;
+def SETUEQ : CondCode; def SETUGT : CondCode; def SETUGE : CondCode;
+def SETULT : CondCode; def SETULE : CondCode; def SETUNE : CondCode;
+
+def SETEQ : CondCode; def SETGT : CondCode; def SETGE : CondCode;
+def SETLT : CondCode; def SETLE : CondCode; def SETNE : CondCode;
+
+
+//===----------------------------------------------------------------------===//
+// Selection DAG Node Transformation Functions.
+//
+// This mechanism allows targets to manipulate nodes in the output DAG once a
+// match has been formed. This is typically used to manipulate immediate
+// values.
+//
+class SDNodeXForm<SDNode opc, code xformFunction> {
+ SDNode Opcode = opc;
+ code XFormFunction = xformFunction;
+}
+
+def NOOP_SDNodeXForm : SDNodeXForm<imm, [{}]>;
+
+
+//===----------------------------------------------------------------------===//
+// Selection DAG Pattern Fragments.
+//
+// Pattern fragments are reusable chunks of dags that match specific things.
+// They can take arguments and have C++ predicates that control whether they
+// match. They are intended to make the patterns for common instructions more
+// compact and readable.
+//
+
+/// PatFrag - Represents a pattern fragment. This can match something on the
+/// DAG, frame a single node to multiply nested other fragments.
+///
+class PatFrag<dag ops, dag frag, code pred = [{}],
+ SDNodeXForm xform = NOOP_SDNodeXForm> {
+ dag Operands = ops;
+ dag Fragment = frag;
+ code Predicate = pred;
+ SDNodeXForm OperandTransform = xform;
+}
+
+// PatLeaf's are pattern fragments that have no operands. This is just a helper
+// to define immediates and other common things concisely.
+class PatLeaf<dag frag, code pred = [{}], SDNodeXForm xform = NOOP_SDNodeXForm>
+ : PatFrag<(ops), frag, pred, xform>;
+
+// Leaf fragments.
+
+def vtInt : PatLeaf<(vt), [{ return MVT::isInteger(N->getVT()); }]>;
+def vtFP : PatLeaf<(vt), [{ return MVT::isFloatingPoint(N->getVT()); }]>;
+
+def immAllOnes : PatLeaf<(imm), [{ return N->isAllOnesValue(); }]>;
+def immAllOnesV: PatLeaf<(build_vector), [{
+ return ISD::isBuildVectorAllOnes(N);
+}]>;
+def immAllZerosV: PatLeaf<(build_vector), [{
+ return ISD::isBuildVectorAllZeros(N);
+}]>;
+
+def immAllOnesV_bc: PatLeaf<(bitconvert), [{
+ return ISD::isBuildVectorAllOnes(N);
+}]>;
+
+
+// Other helper fragments.
+def not : PatFrag<(ops node:$in), (xor node:$in, immAllOnes)>;
+def vnot : PatFrag<(ops node:$in), (xor node:$in, immAllOnesV)>;
+def vnot_conv : PatFrag<(ops node:$in), (xor node:$in, immAllOnesV_bc)>;
+def ineg : PatFrag<(ops node:$in), (sub 0, node:$in)>;
+
+// load fragments.
+def load : PatFrag<(ops node:$ptr), (ld node:$ptr), [{
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
+ return LD->getExtensionType() == ISD::NON_EXTLOAD &&
+ LD->getAddressingMode() == ISD::UNINDEXED;
+ return false;
+}]>;
+
+// extending load fragments.
+def extloadi1 : PatFrag<(ops node:$ptr), (ld node:$ptr), [{
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
+ return LD->getExtensionType() == ISD::EXTLOAD &&
+ LD->getAddressingMode() == ISD::UNINDEXED &&
+ LD->getLoadedVT() == MVT::i1;
+ return false;
+}]>;
+def extloadi8 : PatFrag<(ops node:$ptr), (ld node:$ptr), [{
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
+ return LD->getExtensionType() == ISD::EXTLOAD &&
+ LD->getAddressingMode() == ISD::UNINDEXED &&
+ LD->getLoadedVT() == MVT::i8;
+ return false;
+}]>;
+def extloadi16 : PatFrag<(ops node:$ptr), (ld node:$ptr), [{
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
+ return LD->getExtensionType() == ISD::EXTLOAD &&
+ LD->getAddressingMode() == ISD::UNINDEXED &&
+ LD->getLoadedVT() == MVT::i16;
+ return false;
+}]>;
+def extloadi32 : PatFrag<(ops node:$ptr), (ld node:$ptr), [{
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
+ return LD->getExtensionType() == ISD::EXTLOAD &&
+ LD->getAddressingMode() == ISD::UNINDEXED &&
+ LD->getLoadedVT() == MVT::i32;
+ return false;
+}]>;
+def extloadf32 : PatFrag<(ops node:$ptr), (ld node:$ptr), [{
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
+ return LD->getExtensionType() == ISD::EXTLOAD &&
+ LD->getAddressingMode() == ISD::UNINDEXED &&
+ LD->getLoadedVT() == MVT::f32;
+ return false;
+}]>;
+
+def sextloadi1 : PatFrag<(ops node:$ptr), (ld node:$ptr), [{
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
+ return LD->getExtensionType() == ISD::SEXTLOAD &&
+ LD->getAddressingMode() == ISD::UNINDEXED &&
+ LD->getLoadedVT() == MVT::i1;
+ return false;
+}]>;
+def sextloadi8 : PatFrag<(ops node:$ptr), (ld node:$ptr), [{
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
+ return LD->getExtensionType() == ISD::SEXTLOAD &&
+ LD->getAddressingMode() == ISD::UNINDEXED &&
+ LD->getLoadedVT() == MVT::i8;
+ return false;
+}]>;
+def sextloadi16 : PatFrag<(ops node:$ptr), (ld node:$ptr), [{
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
+ return LD->getExtensionType() == ISD::SEXTLOAD &&
+ LD->getAddressingMode() == ISD::UNINDEXED &&
+ LD->getLoadedVT() == MVT::i16;
+ return false;
+}]>;
+def sextloadi32 : PatFrag<(ops node:$ptr), (ld node:$ptr), [{
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
+ return LD->getExtensionType() == ISD::SEXTLOAD &&
+ LD->getAddressingMode() == ISD::UNINDEXED &&
+ LD->getLoadedVT() == MVT::i32;
+ return false;
+}]>;
+
+def zextloadi1 : PatFrag<(ops node:$ptr), (ld node:$ptr), [{
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
+ return LD->getExtensionType() == ISD::ZEXTLOAD &&
+ LD->getAddressingMode() == ISD::UNINDEXED &&
+ LD->getLoadedVT() == MVT::i1;
+ return false;
+}]>;
+def zextloadi8 : PatFrag<(ops node:$ptr), (ld node:$ptr), [{
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
+ return LD->getExtensionType() == ISD::ZEXTLOAD &&
+ LD->getAddressingMode() == ISD::UNINDEXED &&
+ LD->getLoadedVT() == MVT::i8;
+ return false;
+}]>;
+def zextloadi16 : PatFrag<(ops node:$ptr), (ld node:$ptr), [{
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
+ return LD->getExtensionType() == ISD::ZEXTLOAD &&
+ LD->getAddressingMode() == ISD::UNINDEXED &&
+ LD->getLoadedVT() == MVT::i16;
+ return false;
+}]>;
+def zextloadi32 : PatFrag<(ops node:$ptr), (ld node:$ptr), [{
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
+ return LD->getExtensionType() == ISD::ZEXTLOAD &&
+ LD->getAddressingMode() == ISD::UNINDEXED &&
+ LD->getLoadedVT() == MVT::i32;
+ return false;
+}]>;
+
+// store fragments.
+def store : PatFrag<(ops node:$val, node:$ptr),
+ (st node:$val, node:$ptr), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+ return !ST->isTruncatingStore() &&
+ ST->getAddressingMode() == ISD::UNINDEXED;
+ return false;
+}]>;
+
+// truncstore fragments.
+def truncstorei1 : PatFrag<(ops node:$val, node:$ptr),
+ (st node:$val, node:$ptr), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+ return ST->isTruncatingStore() && ST->getStoredVT() == MVT::i1 &&
+ ST->getAddressingMode() == ISD::UNINDEXED;
+ return false;
+}]>;
+def truncstorei8 : PatFrag<(ops node:$val, node:$ptr),
+ (st node:$val, node:$ptr), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+ return ST->isTruncatingStore() && ST->getStoredVT() == MVT::i8 &&
+ ST->getAddressingMode() == ISD::UNINDEXED;
+ return false;
+}]>;
+def truncstorei16 : PatFrag<(ops node:$val, node:$ptr),
+ (st node:$val, node:$ptr), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+ return ST->isTruncatingStore() && ST->getStoredVT() == MVT::i16 &&
+ ST->getAddressingMode() == ISD::UNINDEXED;
+ return false;
+}]>;
+def truncstorei32 : PatFrag<(ops node:$val, node:$ptr),
+ (st node:$val, node:$ptr), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+ return ST->isTruncatingStore() && ST->getStoredVT() == MVT::i32 &&
+ ST->getAddressingMode() == ISD::UNINDEXED;
+ return false;
+}]>;
+def truncstoref32 : PatFrag<(ops node:$val, node:$ptr),
+ (st node:$val, node:$ptr), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+ return ST->isTruncatingStore() && ST->getStoredVT() == MVT::f32 &&
+ ST->getAddressingMode() == ISD::UNINDEXED;
+ return false;
+}]>;
+
+// indexed store fragments.
+def pre_store : PatFrag<(ops node:$val, node:$base, node:$offset),
+ (ist node:$val, node:$base, node:$offset), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ ISD::MemIndexedMode AM = ST->getAddressingMode();
+ return (AM == ISD::PRE_INC || AM == ISD::PRE_DEC) &&
+ !ST->isTruncatingStore();
+ }
+ return false;
+}]>;
+
+def pre_truncsti1 : PatFrag<(ops node:$val, node:$base, node:$offset),
+ (ist node:$val, node:$base, node:$offset), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ ISD::MemIndexedMode AM = ST->getAddressingMode();
+ return (AM == ISD::PRE_INC || AM == ISD::PRE_DEC) &&
+ ST->isTruncatingStore() && ST->getStoredVT() == MVT::i1;
+ }
+ return false;
+}]>;
+def pre_truncsti8 : PatFrag<(ops node:$val, node:$base, node:$offset),
+ (ist node:$val, node:$base, node:$offset), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ ISD::MemIndexedMode AM = ST->getAddressingMode();
+ return (AM == ISD::PRE_INC || AM == ISD::PRE_DEC) &&
+ ST->isTruncatingStore() && ST->getStoredVT() == MVT::i8;
+ }
+ return false;
+}]>;
+def pre_truncsti16 : PatFrag<(ops node:$val, node:$base, node:$offset),
+ (ist node:$val, node:$base, node:$offset), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ ISD::MemIndexedMode AM = ST->getAddressingMode();
+ return (AM == ISD::PRE_INC || AM == ISD::PRE_DEC) &&
+ ST->isTruncatingStore() && ST->getStoredVT() == MVT::i16;
+ }
+ return false;
+}]>;
+def pre_truncsti32 : PatFrag<(ops node:$val, node:$base, node:$offset),
+ (ist node:$val, node:$base, node:$offset), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ ISD::MemIndexedMode AM = ST->getAddressingMode();
+ return (AM == ISD::PRE_INC || AM == ISD::PRE_DEC) &&
+ ST->isTruncatingStore() && ST->getStoredVT() == MVT::i32;
+ }
+ return false;
+}]>;
+def pre_truncstf32 : PatFrag<(ops node:$val, node:$base, node:$offset),
+ (ist node:$val, node:$base, node:$offset), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ ISD::MemIndexedMode AM = ST->getAddressingMode();
+ return (AM == ISD::PRE_INC || AM == ISD::PRE_DEC) &&
+ ST->isTruncatingStore() && ST->getStoredVT() == MVT::f32;
+ }
+ return false;
+}]>;
+
+def post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
+ (ist node:$val, node:$ptr, node:$offset), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ ISD::MemIndexedMode AM = ST->getAddressingMode();
+ return !ST->isTruncatingStore() &&
+ (AM == ISD::POST_INC || AM == ISD::POST_DEC);
+ }
+ return false;
+}]>;
+
+def post_truncsti1 : PatFrag<(ops node:$val, node:$base, node:$offset),
+ (ist node:$val, node:$base, node:$offset), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ ISD::MemIndexedMode AM = ST->getAddressingMode();
+ return (AM == ISD::POST_INC || AM == ISD::POST_DEC) &&
+ ST->isTruncatingStore() && ST->getStoredVT() == MVT::i1;
+ }
+ return false;
+}]>;
+def post_truncsti8 : PatFrag<(ops node:$val, node:$base, node:$offset),
+ (ist node:$val, node:$base, node:$offset), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ ISD::MemIndexedMode AM = ST->getAddressingMode();
+ return (AM == ISD::POST_INC || AM == ISD::POST_DEC) &&
+ ST->isTruncatingStore() && ST->getStoredVT() == MVT::i8;
+ }
+ return false;
+}]>;
+def post_truncsti16 : PatFrag<(ops node:$val, node:$base, node:$offset),
+ (ist node:$val, node:$base, node:$offset), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ ISD::MemIndexedMode AM = ST->getAddressingMode();
+ return (AM == ISD::POST_INC || AM == ISD::POST_DEC) &&
+ ST->isTruncatingStore() && ST->getStoredVT() == MVT::i16;
+ }
+ return false;
+}]>;
+def post_truncsti32 : PatFrag<(ops node:$val, node:$base, node:$offset),
+ (ist node:$val, node:$base, node:$offset), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ ISD::MemIndexedMode AM = ST->getAddressingMode();
+ return (AM == ISD::POST_INC || AM == ISD::POST_DEC) &&
+ ST->isTruncatingStore() && ST->getStoredVT() == MVT::i32;
+ }
+ return false;
+}]>;
+def post_truncstf32 : PatFrag<(ops node:$val, node:$base, node:$offset),
+ (ist node:$val, node:$base, node:$offset), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ ISD::MemIndexedMode AM = ST->getAddressingMode();
+ return (AM == ISD::POST_INC || AM == ISD::POST_DEC) &&
+ ST->isTruncatingStore() && ST->getStoredVT() == MVT::f32;
+ }
+ return false;
+}]>;
+
+// setcc convenience fragments.
+def setoeq : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETOEQ)>;
+def setogt : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETOGT)>;
+def setoge : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETOGE)>;
+def setolt : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETOLT)>;
+def setole : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETOLE)>;
+def setone : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETONE)>;
+def seto : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETO)>;
+def setuo : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETUO)>;
+def setueq : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETUEQ)>;
+def setugt : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETUGT)>;
+def setuge : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETUGE)>;
+def setult : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETULT)>;
+def setule : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETULE)>;
+def setune : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETUNE)>;
+def seteq : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETEQ)>;
+def setgt : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETGT)>;
+def setge : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETGE)>;
+def setlt : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETLT)>;
+def setle : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETLE)>;
+def setne : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETNE)>;
+
+//===----------------------------------------------------------------------===//
+// Selection DAG Pattern Support.
+//
+// Patterns are what are actually matched against the target-flavored
+// instruction selection DAG. Instructions defined by the target implicitly
+// define patterns in most cases, but patterns can also be explicitly added when
+// an operation is defined by a sequence of instructions (e.g. loading a large
+// immediate value on RISC targets that do not support immediates as large as
+// their GPRs).
+//
+
+class Pattern<dag patternToMatch, list<dag> resultInstrs> {
+ dag PatternToMatch = patternToMatch;
+ list<dag> ResultInstrs = resultInstrs;
+ list<Predicate> Predicates = []; // See class Instruction in Target.td.
+ int AddedComplexity = 0; // See class Instruction in Target.td.
+}
+
+// Pat - A simple (but common) form of a pattern, which produces a simple result
+// not needing a full list.
+class Pat<dag pattern, dag result> : Pattern<pattern, [result]>;
+
+//===----------------------------------------------------------------------===//
+// Complex pattern definitions.
+//
+// Complex patterns, e.g. X86 addressing mode, requires pattern matching code
+// in C++. NumOperands is the number of operands returned by the select function;
+// SelectFunc is the name of the function used to pattern match the max. pattern;
+// RootNodes are the list of possible root nodes of the sub-dags to match.
+// e.g. X86 addressing mode - def addr : ComplexPattern<4, "SelectAddr", [add]>;
+//
+class ComplexPattern<ValueType ty, int numops, string fn,
+ list<SDNode> roots = [], list<SDNodeProperty> props = []> {
+ ValueType Ty = ty;
+ int NumOperands = numops;
+ string SelectFunc = fn;
+ list<SDNode> RootNodes = roots;
+ list<SDNodeProperty> Properties = props;
+}
+
+//===----------------------------------------------------------------------===//
+// Dwarf support.
+//
+def SDT_dwarf_loc : SDTypeProfile<0, 3,
+ [SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>]>;
+def dwarf_loc : SDNode<"ISD::DEBUG_LOC", SDT_dwarf_loc,[SDNPHasChain]>;
+
+
+
diff --git a/lib/Target/TargetSubtarget.cpp b/lib/Target/TargetSubtarget.cpp
new file mode 100644
index 0000000..a8bb9b9
--- /dev/null
+++ b/lib/Target/TargetSubtarget.cpp
@@ -0,0 +1,22 @@
+//===-- TargetSubtarget.cpp - General Target Information -------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Nate Begeman and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the general parts of a Subtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetSubtarget.h"
+using namespace llvm;
+
+//---------------------------------------------------------------------------
+// TargetSubtarget Class
+//
+TargetSubtarget::TargetSubtarget() {}
+
+TargetSubtarget::~TargetSubtarget() {}
diff --git a/lib/Target/X86/Makefile b/lib/Target/X86/Makefile
new file mode 100644
index 0000000..5416cdb
--- /dev/null
+++ b/lib/Target/X86/Makefile
@@ -0,0 +1,20 @@
+##===- lib/Target/X86/Makefile -----------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file was developed by the LLVM research group and is distributed under
+# the University of Illinois Open Source License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMX86
+TARGET = X86
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \
+ X86GenRegisterInfo.inc X86GenInstrNames.inc \
+ X86GenInstrInfo.inc X86GenAsmWriter.inc \
+ X86GenAsmWriter1.inc X86GenDAGISel.inc \
+ X86GenCallingConv.inc X86GenSubtarget.inc
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/X86/README-FPStack.txt b/lib/Target/X86/README-FPStack.txt
new file mode 100644
index 0000000..d94fa02
--- /dev/null
+++ b/lib/Target/X86/README-FPStack.txt
@@ -0,0 +1,99 @@
+//===---------------------------------------------------------------------===//
+// Random ideas for the X86 backend: FP stack related stuff
+//===---------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------===//
+
+Some targets (e.g. athlons) prefer freep to fstp ST(0):
+http://gcc.gnu.org/ml/gcc-patches/2004-04/msg00659.html
+
+//===---------------------------------------------------------------------===//
+
+On darwin/x86, we should codegen:
+
+ ret double 0.000000e+00
+
+as fld0/ret, not as:
+
+ movl $0, 4(%esp)
+ movl $0, (%esp)
+ fldl (%esp)
+ ...
+ ret
+
+//===---------------------------------------------------------------------===//
+
+This should use fiadd on chips where it is profitable:
+double foo(double P, int *I) { return P+*I; }
+
+We have fiadd patterns now but the followings have the same cost and
+complexity. We need a way to specify the later is more profitable.
+
+def FpADD32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
+ [(set RFP:$dst, (fadd RFP:$src1,
+ (extloadf64f32 addr:$src2)))]>;
+ // ST(0) = ST(0) + [mem32]
+
+def FpIADD32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
+ [(set RFP:$dst, (fadd RFP:$src1,
+ (X86fild addr:$src2, i32)))]>;
+ // ST(0) = ST(0) + [mem32int]
+
+//===---------------------------------------------------------------------===//
+
+The FP stackifier needs to be global. Also, it should handle simple permutates
+to reduce number of shuffle instructions, e.g. turning:
+
+fld P -> fld Q
+fld Q fld P
+fxch
+
+or:
+
+fxch -> fucomi
+fucomi jl X
+jg X
+
+Ideas:
+http://gcc.gnu.org/ml/gcc-patches/2004-11/msg02410.html
+
+
+//===---------------------------------------------------------------------===//
+
+Add a target specific hook to DAG combiner to handle SINT_TO_FP and
+FP_TO_SINT when the source operand is already in memory.
+
+//===---------------------------------------------------------------------===//
+
+Open code rint,floor,ceil,trunc:
+http://gcc.gnu.org/ml/gcc-patches/2004-08/msg02006.html
+http://gcc.gnu.org/ml/gcc-patches/2004-08/msg02011.html
+
+Opencode the sincos[f] libcall.
+
+//===---------------------------------------------------------------------===//
+
+None of the FPStack instructions are handled in
+X86RegisterInfo::foldMemoryOperand, which prevents the spiller from
+folding spill code into the instructions.
+
+//===---------------------------------------------------------------------===//
+
+Currently the x86 codegen isn't very good at mixing SSE and FPStack
+code:
+
+unsigned int foo(double x) { return x; }
+
+foo:
+ subl $20, %esp
+ movsd 24(%esp), %xmm0
+ movsd %xmm0, 8(%esp)
+ fldl 8(%esp)
+ fisttpll (%esp)
+ movl (%esp), %eax
+ addl $20, %esp
+ ret
+
+This will be solved when we go to a dynamic programming based isel.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/README-MMX.txt b/lib/Target/X86/README-MMX.txt
new file mode 100644
index 0000000..57c7c3f
--- /dev/null
+++ b/lib/Target/X86/README-MMX.txt
@@ -0,0 +1,69 @@
+//===---------------------------------------------------------------------===//
+// Random ideas for the X86 backend: MMX-specific stuff.
+//===---------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------===//
+
+This:
+
+#include <mmintrin.h>
+
+__v2si qux(int A) {
+ return (__v2si){ 0, A };
+}
+
+is compiled into:
+
+_qux:
+ subl $28, %esp
+ movl 32(%esp), %eax
+ movd %eax, %mm0
+ movq %mm0, (%esp)
+ movl (%esp), %eax
+ movl %eax, 20(%esp)
+ movq %mm0, 8(%esp)
+ movl 12(%esp), %eax
+ movl %eax, 16(%esp)
+ movq 16(%esp), %mm0
+ addl $28, %esp
+ ret
+
+Yuck!
+
+GCC gives us:
+
+_qux:
+ subl $12, %esp
+ movl 16(%esp), %eax
+ movl 20(%esp), %edx
+ movl $0, (%eax)
+ movl %edx, 4(%eax)
+ addl $12, %esp
+ ret $4
+
+//===---------------------------------------------------------------------===//
+
+int main() {
+ __m64 A[1] = { _mm_cvtsi32_si64(1) };
+ __m64 B[1] = { _mm_cvtsi32_si64(10) };
+ __m64 sum = _mm_cvtsi32_si64(0);
+
+ sum = __builtin_ia32_paddq(__builtin_ia32_paddq(A[0], B[0]), sum);
+
+ printf("Sum = %d\n", _mm_cvtsi64_si32(sum));
+ return 0;
+}
+
+Generates:
+
+ movl $11, %eax
+### movd %eax, %mm0
+### movq %mm0, 8(%esp)
+### movl 8(%esp), %eax
+ movl %eax, 4(%esp)
+ movl $_str, (%esp)
+ call L_printf$stub
+ xorl %eax, %eax
+ addl $28, %esp
+
+These instructions are unnecessary.
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
new file mode 100644
index 0000000..f4b54c4
--- /dev/null
+++ b/lib/Target/X86/README-SSE.txt
@@ -0,0 +1,629 @@
+//===---------------------------------------------------------------------===//
+// Random ideas for the X86 backend: SSE-specific stuff.
+//===---------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------===//
+
+Expand libm rounding functions inline: Significant speedups possible.
+http://gcc.gnu.org/ml/gcc-patches/2006-10/msg00909.html
+
+//===---------------------------------------------------------------------===//
+
+When compiled with unsafemath enabled, "main" should enable SSE DAZ mode and
+other fast SSE modes.
+
+//===---------------------------------------------------------------------===//
+
+Think about doing i64 math in SSE regs.
+
+//===---------------------------------------------------------------------===//
+
+This testcase should have no SSE instructions in it, and only one load from
+a constant pool:
+
+double %test3(bool %B) {
+ %C = select bool %B, double 123.412, double 523.01123123
+ ret double %C
+}
+
+Currently, the select is being lowered, which prevents the dag combiner from
+turning 'select (load CPI1), (load CPI2)' -> 'load (select CPI1, CPI2)'
+
+The pattern isel got this one right.
+
+//===---------------------------------------------------------------------===//
+
+SSE doesn't have [mem] op= reg instructions. If we have an SSE instruction
+like this:
+
+ X += y
+
+and the register allocator decides to spill X, it is cheaper to emit this as:
+
+Y += [xslot]
+store Y -> [xslot]
+
+than as:
+
+tmp = [xslot]
+tmp += y
+store tmp -> [xslot]
+
+..and this uses one fewer register (so this should be done at load folding
+time, not at spiller time). *Note* however that this can only be done
+if Y is dead. Here's a testcase:
+
+%.str_3 = external global [15 x sbyte] ; <[15 x sbyte]*> [#uses=0]
+implementation ; Functions:
+declare void %printf(int, ...)
+void %main() {
+build_tree.exit:
+ br label %no_exit.i7
+no_exit.i7: ; preds = %no_exit.i7, %build_tree.exit
+ %tmp.0.1.0.i9 = phi double [ 0.000000e+00, %build_tree.exit ], [ %tmp.34.i18, %no_exit.i7 ] ; <double> [#uses=1]
+ %tmp.0.0.0.i10 = phi double [ 0.000000e+00, %build_tree.exit ], [ %tmp.28.i16, %no_exit.i7 ] ; <double> [#uses=1]
+ %tmp.28.i16 = add double %tmp.0.0.0.i10, 0.000000e+00
+ %tmp.34.i18 = add double %tmp.0.1.0.i9, 0.000000e+00
+ br bool false, label %Compute_Tree.exit23, label %no_exit.i7
+Compute_Tree.exit23: ; preds = %no_exit.i7
+ tail call void (int, ...)* %printf( int 0 )
+ store double %tmp.34.i18, double* null
+ ret void
+}
+
+We currently emit:
+
+.BBmain_1:
+ xorpd %XMM1, %XMM1
+ addsd %XMM0, %XMM1
+*** movsd %XMM2, QWORD PTR [%ESP + 8]
+*** addsd %XMM2, %XMM1
+*** movsd QWORD PTR [%ESP + 8], %XMM2
+ jmp .BBmain_1 # no_exit.i7
+
+This is a bugpoint reduced testcase, which is why the testcase doesn't make
+much sense (e.g. its an infinite loop). :)
+
+//===---------------------------------------------------------------------===//
+
+SSE should implement 'select_cc' using 'emulated conditional moves' that use
+pcmp/pand/pandn/por to do a selection instead of a conditional branch:
+
+double %X(double %Y, double %Z, double %A, double %B) {
+ %C = setlt double %A, %B
+ %z = add double %Z, 0.0 ;; select operand is not a load
+ %D = select bool %C, double %Y, double %z
+ ret double %D
+}
+
+We currently emit:
+
+_X:
+ subl $12, %esp
+ xorpd %xmm0, %xmm0
+ addsd 24(%esp), %xmm0
+ movsd 32(%esp), %xmm1
+ movsd 16(%esp), %xmm2
+ ucomisd 40(%esp), %xmm1
+ jb LBB_X_2
+LBB_X_1:
+ movsd %xmm0, %xmm2
+LBB_X_2:
+ movsd %xmm2, (%esp)
+ fldl (%esp)
+ addl $12, %esp
+ ret
+
+//===---------------------------------------------------------------------===//
+
+It's not clear whether we should use pxor or xorps / xorpd to clear XMM
+registers. The choice may depend on subtarget information. We should do some
+more experiments on different x86 machines.
+
+//===---------------------------------------------------------------------===//
+
+Currently the x86 codegen isn't very good at mixing SSE and FPStack
+code:
+
+unsigned int foo(double x) { return x; }
+
+foo:
+ subl $20, %esp
+ movsd 24(%esp), %xmm0
+ movsd %xmm0, 8(%esp)
+ fldl 8(%esp)
+ fisttpll (%esp)
+ movl (%esp), %eax
+ addl $20, %esp
+ ret
+
+This will be solved when we go to a dynamic programming based isel.
+
+//===---------------------------------------------------------------------===//
+
+Lower memcpy / memset to a series of SSE 128 bit move instructions when it's
+feasible.
+
+//===---------------------------------------------------------------------===//
+
+Teach the coalescer to commute 2-addr instructions, allowing us to eliminate
+the reg-reg copy in this example:
+
+float foo(int *x, float *y, unsigned c) {
+ float res = 0.0;
+ unsigned i;
+ for (i = 0; i < c; i++) {
+ float xx = (float)x[i];
+ xx = xx * y[i];
+ xx += res;
+ res = xx;
+ }
+ return res;
+}
+
+LBB_foo_3: # no_exit
+ cvtsi2ss %XMM0, DWORD PTR [%EDX + 4*%ESI]
+ mulss %XMM0, DWORD PTR [%EAX + 4*%ESI]
+ addss %XMM0, %XMM1
+ inc %ESI
+ cmp %ESI, %ECX
+**** movaps %XMM1, %XMM0
+ jb LBB_foo_3 # no_exit
+
+//===---------------------------------------------------------------------===//
+
+Codegen:
+ if (copysign(1.0, x) == copysign(1.0, y))
+into:
+ if (x^y & mask)
+when using SSE.
+
+//===---------------------------------------------------------------------===//
+
+Use movhps to update upper 64-bits of a v4sf value. Also movlps on lower half
+of a v4sf value.
+
+//===---------------------------------------------------------------------===//
+
+Better codegen for vector_shuffles like this { x, 0, 0, 0 } or { x, 0, x, 0}.
+Perhaps use pxor / xorp* to clear a XMM register first?
+
+//===---------------------------------------------------------------------===//
+
+How to decide when to use the "floating point version" of logical ops? Here are
+some code fragments:
+
+ movaps LCPI5_5, %xmm2
+ divps %xmm1, %xmm2
+ mulps %xmm2, %xmm3
+ mulps 8656(%ecx), %xmm3
+ addps 8672(%ecx), %xmm3
+ andps LCPI5_6, %xmm2
+ andps LCPI5_1, %xmm3
+ por %xmm2, %xmm3
+ movdqa %xmm3, (%edi)
+
+ movaps LCPI5_5, %xmm1
+ divps %xmm0, %xmm1
+ mulps %xmm1, %xmm3
+ mulps 8656(%ecx), %xmm3
+ addps 8672(%ecx), %xmm3
+ andps LCPI5_6, %xmm1
+ andps LCPI5_1, %xmm3
+ orps %xmm1, %xmm3
+ movaps %xmm3, 112(%esp)
+ movaps %xmm3, (%ebx)
+
+Due to some minor source change, the later case ended up using orps and movaps
+instead of por and movdqa. Does it matter?
+
+//===---------------------------------------------------------------------===//
+
+X86RegisterInfo::copyRegToReg() returns X86::MOVAPSrr for VR128. Is it possible
+to choose between movaps, movapd, and movdqa based on types of source and
+destination?
+
+How about andps, andpd, and pand? Do we really care about the type of the packed
+elements? If not, why not always use the "ps" variants which are likely to be
+shorter.
+
+//===---------------------------------------------------------------------===//
+
+External test Nurbs exposed some problems. Look for
+__ZN15Nurbs_SSE_Cubic17TessellateSurfaceE, bb cond_next140. This is what icc
+emits:
+
+ movaps (%edx), %xmm2 #59.21
+ movaps (%edx), %xmm5 #60.21
+ movaps (%edx), %xmm4 #61.21
+ movaps (%edx), %xmm3 #62.21
+ movl 40(%ecx), %ebp #69.49
+ shufps $0, %xmm2, %xmm5 #60.21
+ movl 100(%esp), %ebx #69.20
+ movl (%ebx), %edi #69.20
+ imull %ebp, %edi #69.49
+ addl (%eax), %edi #70.33
+ shufps $85, %xmm2, %xmm4 #61.21
+ shufps $170, %xmm2, %xmm3 #62.21
+ shufps $255, %xmm2, %xmm2 #63.21
+ lea (%ebp,%ebp,2), %ebx #69.49
+ negl %ebx #69.49
+ lea -3(%edi,%ebx), %ebx #70.33
+ shll $4, %ebx #68.37
+ addl 32(%ecx), %ebx #68.37
+ testb $15, %bl #91.13
+ jne L_B1.24 # Prob 5% #91.13
+
+This is the llvm code after instruction scheduling:
+
+cond_next140 (0xa910740, LLVM BB @0xa90beb0):
+ %reg1078 = MOV32ri -3
+ %reg1079 = ADD32rm %reg1078, %reg1068, 1, %NOREG, 0
+ %reg1037 = MOV32rm %reg1024, 1, %NOREG, 40
+ %reg1080 = IMUL32rr %reg1079, %reg1037
+ %reg1081 = MOV32rm %reg1058, 1, %NOREG, 0
+ %reg1038 = LEA32r %reg1081, 1, %reg1080, -3
+ %reg1036 = MOV32rm %reg1024, 1, %NOREG, 32
+ %reg1082 = SHL32ri %reg1038, 4
+ %reg1039 = ADD32rr %reg1036, %reg1082
+ %reg1083 = MOVAPSrm %reg1059, 1, %NOREG, 0
+ %reg1034 = SHUFPSrr %reg1083, %reg1083, 170
+ %reg1032 = SHUFPSrr %reg1083, %reg1083, 0
+ %reg1035 = SHUFPSrr %reg1083, %reg1083, 255
+ %reg1033 = SHUFPSrr %reg1083, %reg1083, 85
+ %reg1040 = MOV32rr %reg1039
+ %reg1084 = AND32ri8 %reg1039, 15
+ CMP32ri8 %reg1084, 0
+ JE mbb<cond_next204,0xa914d30>
+
+Still ok. After register allocation:
+
+cond_next140 (0xa910740, LLVM BB @0xa90beb0):
+ %EAX = MOV32ri -3
+ %EDX = MOV32rm <fi#3>, 1, %NOREG, 0
+ ADD32rm %EAX<def&use>, %EDX, 1, %NOREG, 0
+ %EDX = MOV32rm <fi#7>, 1, %NOREG, 0
+ %EDX = MOV32rm %EDX, 1, %NOREG, 40
+ IMUL32rr %EAX<def&use>, %EDX
+ %ESI = MOV32rm <fi#5>, 1, %NOREG, 0
+ %ESI = MOV32rm %ESI, 1, %NOREG, 0
+ MOV32mr <fi#4>, 1, %NOREG, 0, %ESI
+ %EAX = LEA32r %ESI, 1, %EAX, -3
+ %ESI = MOV32rm <fi#7>, 1, %NOREG, 0
+ %ESI = MOV32rm %ESI, 1, %NOREG, 32
+ %EDI = MOV32rr %EAX
+ SHL32ri %EDI<def&use>, 4
+ ADD32rr %EDI<def&use>, %ESI
+ %XMM0 = MOVAPSrm %ECX, 1, %NOREG, 0
+ %XMM1 = MOVAPSrr %XMM0
+ SHUFPSrr %XMM1<def&use>, %XMM1, 170
+ %XMM2 = MOVAPSrr %XMM0
+ SHUFPSrr %XMM2<def&use>, %XMM2, 0
+ %XMM3 = MOVAPSrr %XMM0
+ SHUFPSrr %XMM3<def&use>, %XMM3, 255
+ SHUFPSrr %XMM0<def&use>, %XMM0, 85
+ %EBX = MOV32rr %EDI
+ AND32ri8 %EBX<def&use>, 15
+ CMP32ri8 %EBX, 0
+ JE mbb<cond_next204,0xa914d30>
+
+This looks really bad. The problem is shufps is a destructive opcode. Since it
+appears as operand two in more than one shufps ops. It resulted in a number of
+copies. Note icc also suffers from the same problem. Either the instruction
+selector should select pshufd or The register allocator can made the two-address
+to three-address transformation.
+
+It also exposes some other problems. See MOV32ri -3 and the spills.
+
+//===---------------------------------------------------------------------===//
+
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=25500
+
+LLVM is producing bad code.
+
+LBB_main_4: # cond_true44
+ addps %xmm1, %xmm2
+ subps %xmm3, %xmm2
+ movaps (%ecx), %xmm4
+ movaps %xmm2, %xmm1
+ addps %xmm4, %xmm1
+ addl $16, %ecx
+ incl %edx
+ cmpl $262144, %edx
+ movaps %xmm3, %xmm2
+ movaps %xmm4, %xmm3
+ jne LBB_main_4 # cond_true44
+
+There are two problems. 1) No need to two loop induction variables. We can
+compare against 262144 * 16. 2) Known register coalescer issue. We should
+be able eliminate one of the movaps:
+
+ addps %xmm2, %xmm1 <=== Commute!
+ subps %xmm3, %xmm1
+ movaps (%ecx), %xmm4
+ movaps %xmm1, %xmm1 <=== Eliminate!
+ addps %xmm4, %xmm1
+ addl $16, %ecx
+ incl %edx
+ cmpl $262144, %edx
+ movaps %xmm3, %xmm2
+ movaps %xmm4, %xmm3
+ jne LBB_main_4 # cond_true44
+
+//===---------------------------------------------------------------------===//
+
+Consider:
+
+__m128 test(float a) {
+ return _mm_set_ps(0.0, 0.0, 0.0, a*a);
+}
+
+This compiles into:
+
+movss 4(%esp), %xmm1
+mulss %xmm1, %xmm1
+xorps %xmm0, %xmm0
+movss %xmm1, %xmm0
+ret
+
+Because mulss doesn't modify the top 3 elements, the top elements of
+xmm1 are already zero'd. We could compile this to:
+
+movss 4(%esp), %xmm0
+mulss %xmm0, %xmm0
+ret
+
+//===---------------------------------------------------------------------===//
+
+Here's a sick and twisted idea. Consider code like this:
+
+__m128 test(__m128 a) {
+ float b = *(float*)&A;
+ ...
+ return _mm_set_ps(0.0, 0.0, 0.0, b);
+}
+
+This might compile to this code:
+
+movaps c(%esp), %xmm1
+xorps %xmm0, %xmm0
+movss %xmm1, %xmm0
+ret
+
+Now consider if the ... code caused xmm1 to get spilled. This might produce
+this code:
+
+movaps c(%esp), %xmm1
+movaps %xmm1, c2(%esp)
+...
+
+xorps %xmm0, %xmm0
+movaps c2(%esp), %xmm1
+movss %xmm1, %xmm0
+ret
+
+However, since the reload is only used by these instructions, we could
+"fold" it into the uses, producing something like this:
+
+movaps c(%esp), %xmm1
+movaps %xmm1, c2(%esp)
+...
+
+movss c2(%esp), %xmm0
+ret
+
+... saving two instructions.
+
+The basic idea is that a reload from a spill slot, can, if only one 4-byte
+chunk is used, bring in 3 zeros the the one element instead of 4 elements.
+This can be used to simplify a variety of shuffle operations, where the
+elements are fixed zeros.
+
+//===---------------------------------------------------------------------===//
+
+For this:
+
+#include <emmintrin.h>
+void test(__m128d *r, __m128d *A, double B) {
+ *r = _mm_loadl_pd(*A, &B);
+}
+
+We generates:
+
+ subl $12, %esp
+ movsd 24(%esp), %xmm0
+ movsd %xmm0, (%esp)
+ movl 20(%esp), %eax
+ movapd (%eax), %xmm0
+ movlpd (%esp), %xmm0
+ movl 16(%esp), %eax
+ movapd %xmm0, (%eax)
+ addl $12, %esp
+ ret
+
+icc generates:
+
+ movl 4(%esp), %edx #3.6
+ movl 8(%esp), %eax #3.6
+ movapd (%eax), %xmm0 #4.22
+ movlpd 12(%esp), %xmm0 #4.8
+ movapd %xmm0, (%edx) #4.3
+ ret #5.1
+
+So icc is smart enough to know that B is in memory so it doesn't load it and
+store it back to stack.
+
+//===---------------------------------------------------------------------===//
+
+__m128d test1( __m128d A, __m128d B) {
+ return _mm_shuffle_pd(A, B, 0x3);
+}
+
+compiles to
+
+shufpd $3, %xmm1, %xmm0
+
+Perhaps it's better to use unpckhpd instead?
+
+unpckhpd %xmm1, %xmm0
+
+Don't know if unpckhpd is faster. But it is shorter.
+
+//===---------------------------------------------------------------------===//
+
+This code generates ugly code, probably due to costs being off or something:
+
+void %test(float* %P, <4 x float>* %P2 ) {
+ %xFloat0.688 = load float* %P
+ %loadVector37.712 = load <4 x float>* %P2
+ %inFloat3.713 = insertelement <4 x float> %loadVector37.712, float 0.000000e+00, uint 3
+ store <4 x float> %inFloat3.713, <4 x float>* %P2
+ ret void
+}
+
+Generates:
+
+_test:
+ pxor %xmm0, %xmm0
+ movd %xmm0, %eax ;; EAX = 0!
+ movl 8(%esp), %ecx
+ movaps (%ecx), %xmm0
+ pinsrw $6, %eax, %xmm0
+ shrl $16, %eax ;; EAX = 0 again!
+ pinsrw $7, %eax, %xmm0
+ movaps %xmm0, (%ecx)
+ ret
+
+It would be better to generate:
+
+_test:
+ movl 8(%esp), %ecx
+ movaps (%ecx), %xmm0
+ xor %eax, %eax
+ pinsrw $6, %eax, %xmm0
+ pinsrw $7, %eax, %xmm0
+ movaps %xmm0, (%ecx)
+ ret
+
+or use pxor (to make a zero vector) and shuffle (to insert it).
+
+//===---------------------------------------------------------------------===//
+
+Some useful information in the Apple Altivec / SSE Migration Guide:
+
+http://developer.apple.com/documentation/Performance/Conceptual/
+Accelerate_sse_migration/index.html
+
+e.g. SSE select using and, andnot, or. Various SSE compare translations.
+
+//===---------------------------------------------------------------------===//
+
+Add hooks to commute some CMPP operations.
+
+//===---------------------------------------------------------------------===//
+
+Apply the same transformation that merged four float into a single 128-bit load
+to loads from constant pool.
+
+//===---------------------------------------------------------------------===//
+
+Floating point max / min are commutable when -enable-unsafe-fp-path is
+specified. We should turn int_x86_sse_max_ss and X86ISD::FMIN etc. into other
+nodes which are selected to max / min instructions that are marked commutable.
+
+//===---------------------------------------------------------------------===//
+
+We should compile this:
+#include <xmmintrin.h>
+typedef union {
+ int i[4];
+ float f[4];
+ __m128 v;
+} vector4_t;
+void swizzle (const void *a, vector4_t * b, vector4_t * c) {
+ b->v = _mm_loadl_pi (b->v, (__m64 *) a);
+ c->v = _mm_loadl_pi (c->v, ((__m64 *) a) + 1);
+}
+
+to:
+
+_swizzle:
+ movl 4(%esp), %eax
+ movl 8(%esp), %edx
+ movl 12(%esp), %ecx
+ movlps (%eax), %xmm0
+ movlps %xmm0, (%edx)
+ movlps 8(%eax), %xmm0
+ movlps %xmm0, (%ecx)
+ ret
+
+not:
+
+swizzle:
+ movl 8(%esp), %eax
+ movaps (%eax), %xmm0
+ movl 4(%esp), %ecx
+ movlps (%ecx), %xmm0
+ movaps %xmm0, (%eax)
+ movl 12(%esp), %eax
+ movaps (%eax), %xmm0
+ movlps 8(%ecx), %xmm0
+ movaps %xmm0, (%eax)
+ ret
+
+//===---------------------------------------------------------------------===//
+
+This code:
+
+#include <emmintrin.h>
+__m128i test(long long i) { return _mm_cvtsi64x_si128(i); }
+
+Should turn into a single 'movq %rdi, %xmm0' instruction. Instead, we
+get this (on x86-64):
+
+_test:
+ movd %rdi, %xmm1
+ xorps %xmm0, %xmm0
+ movsd %xmm1, %xmm0
+ ret
+
+The LLVM IR is:
+
+target triple = "x86_64-apple-darwin8"
+define <2 x i64> @test(i64 %i) {
+entry:
+ %tmp10 = insertelement <2 x i64> undef, i64 %i, i32 0
+ %tmp11 = insertelement <2 x i64> %tmp10, i64 0, i32 1
+ ret <2 x i64> %tmp11
+}
+
+//===---------------------------------------------------------------------===//
+
+These functions should produce the same code:
+
+#include <emmintrin.h>
+
+typedef long long __m128i __attribute__ ((__vector_size__ (16)));
+
+int foo(__m128i* val) {
+ return __builtin_ia32_vec_ext_v4si(*val, 1);
+}
+int bar(__m128i* val) {
+ union vs {
+ __m128i *_v;
+ int* _s;
+ } v = {val};
+ return v._s[1];
+}
+
+We currently produce (with -m64):
+
+_foo:
+ pshufd $1, (%rdi), %xmm0
+ movd %xmm0, %eax
+ ret
+_bar:
+ movl 4(%rdi), %eax
+ ret
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt
new file mode 100644
index 0000000..191904a
--- /dev/null
+++ b/lib/Target/X86/README-X86-64.txt
@@ -0,0 +1,223 @@
+//===- README_X86_64.txt - Notes for X86-64 code gen ----------------------===//
+
+Implement different PIC models? Right now we only support Mac OS X with small
+PIC code model.
+
+//===---------------------------------------------------------------------===//
+
+Make use of "Red Zone".
+
+//===---------------------------------------------------------------------===//
+
+Implement __int128 and long double support.
+
+//===---------------------------------------------------------------------===//
+
+For this:
+
+extern void xx(void);
+void bar(void) {
+ xx();
+}
+
+gcc compiles to:
+
+.globl _bar
+_bar:
+ jmp _xx
+
+We need to do the tailcall optimization as well.
+
+//===---------------------------------------------------------------------===//
+
+AMD64 Optimization Manual 8.2 has some nice information about optimizing integer
+multiplication by a constant. How much of it applies to Intel's X86-64
+implementation? There are definite trade-offs to consider: latency vs. register
+pressure vs. code size.
+
+//===---------------------------------------------------------------------===//
+
+Are we better off using branches instead of cmove to implement FP to
+unsigned i64?
+
+_conv:
+ ucomiss LC0(%rip), %xmm0
+ cvttss2siq %xmm0, %rdx
+ jb L3
+ subss LC0(%rip), %xmm0
+ movabsq $-9223372036854775808, %rax
+ cvttss2siq %xmm0, %rdx
+ xorq %rax, %rdx
+L3:
+ movq %rdx, %rax
+ ret
+
+instead of
+
+_conv:
+ movss LCPI1_0(%rip), %xmm1
+ cvttss2siq %xmm0, %rcx
+ movaps %xmm0, %xmm2
+ subss %xmm1, %xmm2
+ cvttss2siq %xmm2, %rax
+ movabsq $-9223372036854775808, %rdx
+ xorq %rdx, %rax
+ ucomiss %xmm1, %xmm0
+ cmovb %rcx, %rax
+ ret
+
+Seems like the jb branch has high likelyhood of being taken. It would have
+saved a few instructions.
+
+//===---------------------------------------------------------------------===//
+
+Poor codegen:
+
+int X[2];
+int b;
+void test(void) {
+ memset(X, b, 2*sizeof(X[0]));
+}
+
+llc:
+ movq _b@GOTPCREL(%rip), %rax
+ movzbq (%rax), %rax
+ movq %rax, %rcx
+ shlq $8, %rcx
+ orq %rax, %rcx
+ movq %rcx, %rax
+ shlq $16, %rax
+ orq %rcx, %rax
+ movq %rax, %rcx
+ shlq $32, %rcx
+ movq _X@GOTPCREL(%rip), %rdx
+ orq %rax, %rcx
+ movq %rcx, (%rdx)
+ ret
+
+gcc:
+ movq _b@GOTPCREL(%rip), %rax
+ movabsq $72340172838076673, %rdx
+ movzbq (%rax), %rax
+ imulq %rdx, %rax
+ movq _X@GOTPCREL(%rip), %rdx
+ movq %rax, (%rdx)
+ ret
+
+//===---------------------------------------------------------------------===//
+
+Vararg function prologue can be further optimized. Currently all XMM registers
+are stored into register save area. Most of them can be eliminated since the
+upper bound of the number of XMM registers used are passed in %al. gcc produces
+something like the following:
+
+ movzbl %al, %edx
+ leaq 0(,%rdx,4), %rax
+ leaq 4+L2(%rip), %rdx
+ leaq 239(%rsp), %rax
+ jmp *%rdx
+ movaps %xmm7, -15(%rax)
+ movaps %xmm6, -31(%rax)
+ movaps %xmm5, -47(%rax)
+ movaps %xmm4, -63(%rax)
+ movaps %xmm3, -79(%rax)
+ movaps %xmm2, -95(%rax)
+ movaps %xmm1, -111(%rax)
+ movaps %xmm0, -127(%rax)
+L2:
+
+It jumps over the movaps that do not need to be stored. Hard to see this being
+significant as it added 5 instruciton (including a indirect branch) to avoid
+executing 0 to 8 stores in the function prologue.
+
+Perhaps we can optimize for the common case where no XMM registers are used for
+parameter passing. i.e. is %al == 0 jump over all stores. Or in the case of a
+leaf function where we can determine that no XMM input parameter is need, avoid
+emitting the stores at all.
+
+//===---------------------------------------------------------------------===//
+
+AMD64 has a complex calling convention for aggregate passing by value:
+
+1. If the size of an object is larger than two eightbytes, or in C++, is a non-
+ POD structure or union type, or contains unaligned fields, it has class
+ MEMORY.
+2. Both eightbytes get initialized to class NO_CLASS.
+3. Each field of an object is classified recursively so that always two fields
+ are considered. The resulting class is calculated according to the classes
+ of the fields in the eightbyte:
+ (a) If both classes are equal, this is the resulting class.
+ (b) If one of the classes is NO_CLASS, the resulting class is the other
+ class.
+ (c) If one of the classes is MEMORY, the result is the MEMORY class.
+ (d) If one of the classes is INTEGER, the result is the INTEGER.
+ (e) If one of the classes is X87, X87UP, COMPLEX_X87 class, MEMORY is used as
+ class.
+ (f) Otherwise class SSE is used.
+4. Then a post merger cleanup is done:
+ (a) If one of the classes is MEMORY, the whole argument is passed in memory.
+ (b) If SSEUP is not preceeded by SSE, it is converted to SSE.
+
+Currently llvm frontend does not handle this correctly.
+
+Problem 1:
+ typedef struct { int i; double d; } QuadWordS;
+It is currently passed in two i64 integer registers. However, gcc compiled
+callee expects the second element 'd' to be passed in XMM0.
+
+Problem 2:
+ typedef struct { int32_t i; float j; double d; } QuadWordS;
+The size of the first two fields == i64 so they will be combined and passed in
+a integer register RDI. The third field is still passed in XMM0.
+
+Problem 3:
+ typedef struct { int64_t i; int8_t j; int64_t d; } S;
+ void test(S s)
+The size of this aggregate is greater than two i64 so it should be passed in
+memory. Currently llvm breaks this down and passed it in three integer
+registers.
+
+Problem 4:
+Taking problem 3 one step ahead where a function expects a aggregate value
+in memory followed by more parameter(s) passed in register(s).
+ void test(S s, int b)
+
+LLVM IR does not allow parameter passing by aggregates, therefore it must break
+the aggregates value (in problem 3 and 4) into a number of scalar values:
+ void %test(long %s.i, byte %s.j, long %s.d);
+
+However, if the backend were to lower this code literally it would pass the 3
+values in integer registers. To force it be passed in memory, the frontend
+should change the function signiture to:
+ void %test(long %undef1, long %undef2, long %undef3, long %undef4,
+ long %undef5, long %undef6,
+ long %s.i, byte %s.j, long %s.d);
+And the callee would look something like this:
+ call void %test( undef, undef, undef, undef, undef, undef,
+ %tmp.s.i, %tmp.s.j, %tmp.s.d );
+The first 6 undef parameters would exhaust the 6 integer registers used for
+parameter passing. The following three integer values would then be forced into
+memory.
+
+For problem 4, the parameter 'd' would be moved to the front of the parameter
+list so it will be passed in register:
+ void %test(int %d,
+ long %undef1, long %undef2, long %undef3, long %undef4,
+ long %undef5, long %undef6,
+ long %s.i, byte %s.j, long %s.d);
+
+//===---------------------------------------------------------------------===//
+
+Right now the asm printer assumes GlobalAddress are accessed via RIP relative
+addressing. Therefore, it is not possible to generate this:
+ movabsq $__ZTV10polynomialIdE+16, %rax
+
+That is ok for now since we currently only support small model. So the above
+is selected as
+ leaq __ZTV10polynomialIdE+16(%rip), %rax
+
+This is probably slightly slower but is much shorter than movabsq. However, if
+we were to support medium or larger code models, we need to use the movabs
+instruction. We should probably introduce something like AbsoluteAddress to
+distinguish it from GlobalAddress so the asm printer and JIT code emitter can
+do the right thing.
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
new file mode 100644
index 0000000..f15090a
--- /dev/null
+++ b/lib/Target/X86/README.txt
@@ -0,0 +1,1150 @@
+//===---------------------------------------------------------------------===//
+// Random ideas for the X86 backend.
+//===---------------------------------------------------------------------===//
+
+Missing features:
+ - Support for SSE4: http://www.intel.com/software/penryn
+http://softwarecommunity.intel.com/isn/Downloads/Intel%20SSE4%20Programming%20Reference.pdf
+ - support for 3DNow!
+ - weird abis?
+
+//===---------------------------------------------------------------------===//
+
+Add a MUL2U and MUL2S nodes to represent a multiply that returns both the
+Hi and Lo parts (combination of MUL and MULH[SU] into one node). Add this to
+X86, & make the dag combiner produce it when needed. This will eliminate one
+imul from the code generated for:
+
+long long test(long long X, long long Y) { return X*Y; }
+
+by using the EAX result from the mul. We should add a similar node for
+DIVREM.
+
+another case is:
+
+long long test(int X, int Y) { return (long long)X*Y; }
+
+... which should only be one imul instruction.
+
+or:
+
+unsigned long long int t2(unsigned int a, unsigned int b) {
+ return (unsigned long long)a * b;
+}
+
+... which should be one mul instruction.
+
+
+This can be done with a custom expander, but it would be nice to move this to
+generic code.
+
+//===---------------------------------------------------------------------===//
+
+CodeGen/X86/lea-3.ll:test3 should be a single LEA, not a shift/move. The X86
+backend knows how to three-addressify this shift, but it appears the register
+allocator isn't even asking it to do so in this case. We should investigate
+why this isn't happening, it could have significant impact on other important
+cases for X86 as well.
+
+//===---------------------------------------------------------------------===//
+
+This should be one DIV/IDIV instruction, not a libcall:
+
+unsigned test(unsigned long long X, unsigned Y) {
+ return X/Y;
+}
+
+This can be done trivially with a custom legalizer. What about overflow
+though? http://gcc.gnu.org/bugzilla/show_bug.cgi?id=14224
+
+//===---------------------------------------------------------------------===//
+
+Improvements to the multiply -> shift/add algorithm:
+http://gcc.gnu.org/ml/gcc-patches/2004-08/msg01590.html
+
+//===---------------------------------------------------------------------===//
+
+Improve code like this (occurs fairly frequently, e.g. in LLVM):
+long long foo(int x) { return 1LL << x; }
+
+http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01109.html
+http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01128.html
+http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01136.html
+
+Another useful one would be ~0ULL >> X and ~0ULL << X.
+
+One better solution for 1LL << x is:
+ xorl %eax, %eax
+ xorl %edx, %edx
+ testb $32, %cl
+ sete %al
+ setne %dl
+ sall %cl, %eax
+ sall %cl, %edx
+
+But that requires good 8-bit subreg support.
+
+64-bit shifts (in general) expand to really bad code. Instead of using
+cmovs, we should expand to a conditional branch like GCC produces.
+
+//===---------------------------------------------------------------------===//
+
+Compile this:
+_Bool f(_Bool a) { return a!=1; }
+
+into:
+ movzbl %dil, %eax
+ xorl $1, %eax
+ ret
+
+//===---------------------------------------------------------------------===//
+
+Some isel ideas:
+
+1. Dynamic programming based approach when compile time if not an
+ issue.
+2. Code duplication (addressing mode) during isel.
+3. Other ideas from "Register-Sensitive Selection, Duplication, and
+ Sequencing of Instructions".
+4. Scheduling for reduced register pressure. E.g. "Minimum Register
+ Instruction Sequence Problem: Revisiting Optimal Code Generation for DAGs"
+ and other related papers.
+ http://citeseer.ist.psu.edu/govindarajan01minimum.html
+
+//===---------------------------------------------------------------------===//
+
+Should we promote i16 to i32 to avoid partial register update stalls?
+
+//===---------------------------------------------------------------------===//
+
+Leave any_extend as pseudo instruction and hint to register
+allocator. Delay codegen until post register allocation.
+
+//===---------------------------------------------------------------------===//
+
+Count leading zeros and count trailing zeros:
+
+int clz(int X) { return __builtin_clz(X); }
+int ctz(int X) { return __builtin_ctz(X); }
+
+$ gcc t.c -S -o - -O3 -fomit-frame-pointer -masm=intel
+clz:
+ bsr %eax, DWORD PTR [%esp+4]
+ xor %eax, 31
+ ret
+ctz:
+ bsf %eax, DWORD PTR [%esp+4]
+ ret
+
+however, check that these are defined for 0 and 32. Our intrinsics are, GCC's
+aren't.
+
+Another example (use predsimplify to eliminate a select):
+
+int foo (unsigned long j) {
+ if (j)
+ return __builtin_ffs (j) - 1;
+ else
+ return 0;
+}
+
+//===---------------------------------------------------------------------===//
+
+It appears icc use push for parameter passing. Need to investigate.
+
+//===---------------------------------------------------------------------===//
+
+Only use inc/neg/not instructions on processors where they are faster than
+add/sub/xor. They are slower on the P4 due to only updating some processor
+flags.
+
+//===---------------------------------------------------------------------===//
+
+The instruction selector sometimes misses folding a load into a compare. The
+pattern is written as (cmp reg, (load p)). Because the compare isn't
+commutative, it is not matched with the load on both sides. The dag combiner
+should be made smart enough to cannonicalize the load into the RHS of a compare
+when it can invert the result of the compare for free.
+
+//===---------------------------------------------------------------------===//
+
+How about intrinsics? An example is:
+ *res = _mm_mulhi_epu16(*A, _mm_mul_epu32(*B, *C));
+
+compiles to
+ pmuludq (%eax), %xmm0
+ movl 8(%esp), %eax
+ movdqa (%eax), %xmm1
+ pmulhuw %xmm0, %xmm1
+
+The transformation probably requires a X86 specific pass or a DAG combiner
+target specific hook.
+
+//===---------------------------------------------------------------------===//
+
+In many cases, LLVM generates code like this:
+
+_test:
+ movl 8(%esp), %eax
+ cmpl %eax, 4(%esp)
+ setl %al
+ movzbl %al, %eax
+ ret
+
+on some processors (which ones?), it is more efficient to do this:
+
+_test:
+ movl 8(%esp), %ebx
+ xor %eax, %eax
+ cmpl %ebx, 4(%esp)
+ setl %al
+ ret
+
+Doing this correctly is tricky though, as the xor clobbers the flags.
+
+//===---------------------------------------------------------------------===//
+
+We should generate bts/btr/etc instructions on targets where they are cheap or
+when codesize is important. e.g., for:
+
+void setbit(int *target, int bit) {
+ *target |= (1 << bit);
+}
+void clearbit(int *target, int bit) {
+ *target &= ~(1 << bit);
+}
+
+//===---------------------------------------------------------------------===//
+
+Instead of the following for memset char*, 1, 10:
+
+ movl $16843009, 4(%edx)
+ movl $16843009, (%edx)
+ movw $257, 8(%edx)
+
+It might be better to generate
+
+ movl $16843009, %eax
+ movl %eax, 4(%edx)
+ movl %eax, (%edx)
+ movw al, 8(%edx)
+
+when we can spare a register. It reduces code size.
+
+//===---------------------------------------------------------------------===//
+
+Evaluate what the best way to codegen sdiv X, (2^C) is. For X/8, we currently
+get this:
+
+int %test1(int %X) {
+ %Y = div int %X, 8
+ ret int %Y
+}
+
+_test1:
+ movl 4(%esp), %eax
+ movl %eax, %ecx
+ sarl $31, %ecx
+ shrl $29, %ecx
+ addl %ecx, %eax
+ sarl $3, %eax
+ ret
+
+GCC knows several different ways to codegen it, one of which is this:
+
+_test1:
+ movl 4(%esp), %eax
+ cmpl $-1, %eax
+ leal 7(%eax), %ecx
+ cmovle %ecx, %eax
+ sarl $3, %eax
+ ret
+
+which is probably slower, but it's interesting at least :)
+
+//===---------------------------------------------------------------------===//
+
+The first BB of this code:
+
+declare bool %foo()
+int %bar() {
+ %V = call bool %foo()
+ br bool %V, label %T, label %F
+T:
+ ret int 1
+F:
+ call bool %foo()
+ ret int 12
+}
+
+compiles to:
+
+_bar:
+ subl $12, %esp
+ call L_foo$stub
+ xorb $1, %al
+ testb %al, %al
+ jne LBB_bar_2 # F
+
+It would be better to emit "cmp %al, 1" than a xor and test.
+
+//===---------------------------------------------------------------------===//
+
+Enable X86InstrInfo::convertToThreeAddress().
+
+//===---------------------------------------------------------------------===//
+
+We are currently lowering large (1MB+) memmove/memcpy to rep/stosl and rep/movsl
+We should leave these as libcalls for everything over a much lower threshold,
+since libc is hand tuned for medium and large mem ops (avoiding RFO for large
+stores, TLB preheating, etc)
+
+//===---------------------------------------------------------------------===//
+
+Optimize this into something reasonable:
+ x * copysign(1.0, y) * copysign(1.0, z)
+
+//===---------------------------------------------------------------------===//
+
+Optimize copysign(x, *y) to use an integer load from y.
+
+//===---------------------------------------------------------------------===//
+
+%X = weak global int 0
+
+void %foo(int %N) {
+ %N = cast int %N to uint
+ %tmp.24 = setgt int %N, 0
+ br bool %tmp.24, label %no_exit, label %return
+
+no_exit:
+ %indvar = phi uint [ 0, %entry ], [ %indvar.next, %no_exit ]
+ %i.0.0 = cast uint %indvar to int
+ volatile store int %i.0.0, int* %X
+ %indvar.next = add uint %indvar, 1
+ %exitcond = seteq uint %indvar.next, %N
+ br bool %exitcond, label %return, label %no_exit
+
+return:
+ ret void
+}
+
+compiles into:
+
+ .text
+ .align 4
+ .globl _foo
+_foo:
+ movl 4(%esp), %eax
+ cmpl $1, %eax
+ jl LBB_foo_4 # return
+LBB_foo_1: # no_exit.preheader
+ xorl %ecx, %ecx
+LBB_foo_2: # no_exit
+ movl L_X$non_lazy_ptr, %edx
+ movl %ecx, (%edx)
+ incl %ecx
+ cmpl %eax, %ecx
+ jne LBB_foo_2 # no_exit
+LBB_foo_3: # return.loopexit
+LBB_foo_4: # return
+ ret
+
+We should hoist "movl L_X$non_lazy_ptr, %edx" out of the loop after
+remateralization is implemented. This can be accomplished with 1) a target
+dependent LICM pass or 2) makeing SelectDAG represent the whole function.
+
+//===---------------------------------------------------------------------===//
+
+The following tests perform worse with LSR:
+
+lambda, siod, optimizer-eval, ackermann, hash2, nestedloop, strcat, and Treesor.
+
+//===---------------------------------------------------------------------===//
+
+We are generating far worse code than gcc:
+
+volatile short X, Y;
+
+void foo(int N) {
+ int i;
+ for (i = 0; i < N; i++) { X = i; Y = i*4; }
+}
+
+LBB1_1: #bb.preheader
+ xorl %ecx, %ecx
+ xorw %dx, %dx
+LBB1_2: #bb
+ movl L_X$non_lazy_ptr, %esi
+ movw %dx, (%esi)
+ movw %dx, %si
+ shlw $2, %si
+ movl L_Y$non_lazy_ptr, %edi
+ movw %si, (%edi)
+ incl %ecx
+ incw %dx
+ cmpl %eax, %ecx
+ jne LBB1_2 #bb
+
+vs.
+
+ xorl %edx, %edx
+ movl L_X$non_lazy_ptr-"L00000000001$pb"(%ebx), %esi
+ movl L_Y$non_lazy_ptr-"L00000000001$pb"(%ebx), %ecx
+L4:
+ movw %dx, (%esi)
+ leal 0(,%edx,4), %eax
+ movw %ax, (%ecx)
+ addl $1, %edx
+ cmpl %edx, %edi
+ jne L4
+
+There are 3 issues:
+
+1. Lack of post regalloc LICM.
+2. Poor sub-regclass support. That leads to inability to promote the 16-bit
+ arithmetic op to 32-bit and making use of leal.
+3. LSR unable to reused IV for a different type (i16 vs. i32) even though
+ the cast would be free.
+
+//===---------------------------------------------------------------------===//
+
+Teach the coalescer to coalesce vregs of different register classes. e.g. FR32 /
+FR64 to VR128.
+
+//===---------------------------------------------------------------------===//
+
+mov $reg, 48(%esp)
+...
+leal 48(%esp), %eax
+mov %eax, (%esp)
+call _foo
+
+Obviously it would have been better for the first mov (or any op) to store
+directly %esp[0] if there are no other uses.
+
+//===---------------------------------------------------------------------===//
+
+Adding to the list of cmp / test poor codegen issues:
+
+int test(__m128 *A, __m128 *B) {
+ if (_mm_comige_ss(*A, *B))
+ return 3;
+ else
+ return 4;
+}
+
+_test:
+ movl 8(%esp), %eax
+ movaps (%eax), %xmm0
+ movl 4(%esp), %eax
+ movaps (%eax), %xmm1
+ comiss %xmm0, %xmm1
+ setae %al
+ movzbl %al, %ecx
+ movl $3, %eax
+ movl $4, %edx
+ cmpl $0, %ecx
+ cmove %edx, %eax
+ ret
+
+Note the setae, movzbl, cmpl, cmove can be replaced with a single cmovae. There
+are a number of issues. 1) We are introducing a setcc between the result of the
+intrisic call and select. 2) The intrinsic is expected to produce a i32 value
+so a any extend (which becomes a zero extend) is added.
+
+We probably need some kind of target DAG combine hook to fix this.
+
+//===---------------------------------------------------------------------===//
+
+We generate significantly worse code for this than GCC:
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=21150
+http://gcc.gnu.org/bugzilla/attachment.cgi?id=8701
+
+There is also one case we do worse on PPC.
+
+//===---------------------------------------------------------------------===//
+
+If shorter, we should use things like:
+movzwl %ax, %eax
+instead of:
+andl $65535, %EAX
+
+The former can also be used when the two-addressy nature of the 'and' would
+require a copy to be inserted (in X86InstrInfo::convertToThreeAddress).
+
+//===---------------------------------------------------------------------===//
+
+Bad codegen:
+
+char foo(int x) { return x; }
+
+_foo:
+ movl 4(%esp), %eax
+ shll $24, %eax
+ sarl $24, %eax
+ ret
+
+SIGN_EXTEND_INREG can be implemented as (sext (trunc)) to take advantage of
+sub-registers.
+
+//===---------------------------------------------------------------------===//
+
+Consider this:
+
+typedef struct pair { float A, B; } pair;
+void pairtest(pair P, float *FP) {
+ *FP = P.A+P.B;
+}
+
+We currently generate this code with llvmgcc4:
+
+_pairtest:
+ movl 8(%esp), %eax
+ movl 4(%esp), %ecx
+ movd %eax, %xmm0
+ movd %ecx, %xmm1
+ addss %xmm0, %xmm1
+ movl 12(%esp), %eax
+ movss %xmm1, (%eax)
+ ret
+
+we should be able to generate:
+_pairtest:
+ movss 4(%esp), %xmm0
+ movl 12(%esp), %eax
+ addss 8(%esp), %xmm0
+ movss %xmm0, (%eax)
+ ret
+
+The issue is that llvmgcc4 is forcing the struct to memory, then passing it as
+integer chunks. It does this so that structs like {short,short} are passed in
+a single 32-bit integer stack slot. We should handle the safe cases above much
+nicer, while still handling the hard cases.
+
+While true in general, in this specific case we could do better by promoting
+load int + bitcast to float -> load fload. This basically needs alignment info,
+the code is already implemented (but disabled) in dag combine).
+
+//===---------------------------------------------------------------------===//
+
+Another instruction selector deficiency:
+
+void %bar() {
+ %tmp = load int (int)** %foo
+ %tmp = tail call int %tmp( int 3 )
+ ret void
+}
+
+_bar:
+ subl $12, %esp
+ movl L_foo$non_lazy_ptr, %eax
+ movl (%eax), %eax
+ call *%eax
+ addl $12, %esp
+ ret
+
+The current isel scheme will not allow the load to be folded in the call since
+the load's chain result is read by the callseq_start.
+
+//===---------------------------------------------------------------------===//
+
+Don't forget to find a way to squash noop truncates in the JIT environment.
+
+//===---------------------------------------------------------------------===//
+
+Implement anyext in the same manner as truncate that would allow them to be
+eliminated.
+
+//===---------------------------------------------------------------------===//
+
+How about implementing truncate / anyext as a property of machine instruction
+operand? i.e. Print as 32-bit super-class register / 16-bit sub-class register.
+Do this for the cases where a truncate / anyext is guaranteed to be eliminated.
+For IA32 that is truncate from 32 to 16 and anyext from 16 to 32.
+
+//===---------------------------------------------------------------------===//
+
+For this:
+
+int test(int a)
+{
+ return a * 3;
+}
+
+We currently emits
+ imull $3, 4(%esp), %eax
+
+Perhaps this is what we really should generate is? Is imull three or four
+cycles? Note: ICC generates this:
+ movl 4(%esp), %eax
+ leal (%eax,%eax,2), %eax
+
+The current instruction priority is based on pattern complexity. The former is
+more "complex" because it folds a load so the latter will not be emitted.
+
+Perhaps we should use AddedComplexity to give LEA32r a higher priority? We
+should always try to match LEA first since the LEA matching code does some
+estimate to determine whether the match is profitable.
+
+However, if we care more about code size, then imull is better. It's two bytes
+shorter than movl + leal.
+
+//===---------------------------------------------------------------------===//
+
+Implement CTTZ, CTLZ with bsf and bsr.
+
+//===---------------------------------------------------------------------===//
+
+It appears gcc place string data with linkonce linkage in
+.section __TEXT,__const_coal,coalesced instead of
+.section __DATA,__const_coal,coalesced.
+Take a look at darwin.h, there are other Darwin assembler directives that we
+do not make use of.
+
+//===---------------------------------------------------------------------===//
+
+int %foo(int* %a, int %t) {
+entry:
+ br label %cond_true
+
+cond_true: ; preds = %cond_true, %entry
+ %x.0.0 = phi int [ 0, %entry ], [ %tmp9, %cond_true ]
+ %t_addr.0.0 = phi int [ %t, %entry ], [ %tmp7, %cond_true ]
+ %tmp2 = getelementptr int* %a, int %x.0.0
+ %tmp3 = load int* %tmp2 ; <int> [#uses=1]
+ %tmp5 = add int %t_addr.0.0, %x.0.0 ; <int> [#uses=1]
+ %tmp7 = add int %tmp5, %tmp3 ; <int> [#uses=2]
+ %tmp9 = add int %x.0.0, 1 ; <int> [#uses=2]
+ %tmp = setgt int %tmp9, 39 ; <bool> [#uses=1]
+ br bool %tmp, label %bb12, label %cond_true
+
+bb12: ; preds = %cond_true
+ ret int %tmp7
+}
+
+is pessimized by -loop-reduce and -indvars
+
+//===---------------------------------------------------------------------===//
+
+u32 to float conversion improvement:
+
+float uint32_2_float( unsigned u ) {
+ float fl = (int) (u & 0xffff);
+ float fh = (int) (u >> 16);
+ fh *= 0x1.0p16f;
+ return fh + fl;
+}
+
+00000000 subl $0x04,%esp
+00000003 movl 0x08(%esp,1),%eax
+00000007 movl %eax,%ecx
+00000009 shrl $0x10,%ecx
+0000000c cvtsi2ss %ecx,%xmm0
+00000010 andl $0x0000ffff,%eax
+00000015 cvtsi2ss %eax,%xmm1
+00000019 mulss 0x00000078,%xmm0
+00000021 addss %xmm1,%xmm0
+00000025 movss %xmm0,(%esp,1)
+0000002a flds (%esp,1)
+0000002d addl $0x04,%esp
+00000030 ret
+
+//===---------------------------------------------------------------------===//
+
+When using fastcc abi, align stack slot of argument of type double on 8 byte
+boundary to improve performance.
+
+//===---------------------------------------------------------------------===//
+
+Codegen:
+
+int f(int a, int b) {
+ if (a == 4 || a == 6)
+ b++;
+ return b;
+}
+
+
+as:
+
+or eax, 2
+cmp eax, 6
+jz label
+
+//===---------------------------------------------------------------------===//
+
+GCC's ix86_expand_int_movcc function (in i386.c) has a ton of interesting
+simplifications for integer "x cmp y ? a : b". For example, instead of:
+
+int G;
+void f(int X, int Y) {
+ G = X < 0 ? 14 : 13;
+}
+
+compiling to:
+
+_f:
+ movl $14, %eax
+ movl $13, %ecx
+ movl 4(%esp), %edx
+ testl %edx, %edx
+ cmovl %eax, %ecx
+ movl %ecx, _G
+ ret
+
+it could be:
+_f:
+ movl 4(%esp), %eax
+ sarl $31, %eax
+ notl %eax
+ addl $14, %eax
+ movl %eax, _G
+ ret
+
+etc.
+
+//===---------------------------------------------------------------------===//
+
+Currently we don't have elimination of redundant stack manipulations. Consider
+the code:
+
+int %main() {
+entry:
+ call fastcc void %test1( )
+ call fastcc void %test2( sbyte* cast (void ()* %test1 to sbyte*) )
+ ret int 0
+}
+
+declare fastcc void %test1()
+
+declare fastcc void %test2(sbyte*)
+
+
+This currently compiles to:
+
+ subl $16, %esp
+ call _test5
+ addl $12, %esp
+ subl $16, %esp
+ movl $_test5, (%esp)
+ call _test6
+ addl $12, %esp
+
+The add\sub pair is really unneeded here.
+
+//===---------------------------------------------------------------------===//
+
+We currently compile sign_extend_inreg into two shifts:
+
+long foo(long X) {
+ return (long)(signed char)X;
+}
+
+becomes:
+
+_foo:
+ movl 4(%esp), %eax
+ shll $24, %eax
+ sarl $24, %eax
+ ret
+
+This could be:
+
+_foo:
+ movsbl 4(%esp),%eax
+ ret
+
+//===---------------------------------------------------------------------===//
+
+Consider the expansion of:
+
+uint %test3(uint %X) {
+ %tmp1 = rem uint %X, 255
+ ret uint %tmp1
+}
+
+Currently it compiles to:
+
+...
+ movl $2155905153, %ecx
+ movl 8(%esp), %esi
+ movl %esi, %eax
+ mull %ecx
+...
+
+This could be "reassociated" into:
+
+ movl $2155905153, %eax
+ movl 8(%esp), %ecx
+ mull %ecx
+
+to avoid the copy. In fact, the existing two-address stuff would do this
+except that mul isn't a commutative 2-addr instruction. I guess this has
+to be done at isel time based on the #uses to mul?
+
+//===---------------------------------------------------------------------===//
+
+Make sure the instruction which starts a loop does not cross a cacheline
+boundary. This requires knowning the exact length of each machine instruction.
+That is somewhat complicated, but doable. Example 256.bzip2:
+
+In the new trace, the hot loop has an instruction which crosses a cacheline
+boundary. In addition to potential cache misses, this can't help decoding as I
+imagine there has to be some kind of complicated decoder reset and realignment
+to grab the bytes from the next cacheline.
+
+532 532 0x3cfc movb (1809(%esp, %esi), %bl <<<--- spans 2 64 byte lines
+942 942 0x3d03 movl %dh, (1809(%esp, %esi)
+937 937 0x3d0a incl %esi
+3 3 0x3d0b cmpb %bl, %dl
+27 27 0x3d0d jnz 0x000062db <main+11707>
+
+//===---------------------------------------------------------------------===//
+
+In c99 mode, the preprocessor doesn't like assembly comments like #TRUNCATE.
+
+//===---------------------------------------------------------------------===//
+
+This could be a single 16-bit load.
+
+int f(char *p) {
+ if ((p[0] == 1) & (p[1] == 2)) return 1;
+ return 0;
+}
+
+//===---------------------------------------------------------------------===//
+
+We should inline lrintf and probably other libc functions.
+
+//===---------------------------------------------------------------------===//
+
+Start using the flags more. For example, compile:
+
+int add_zf(int *x, int y, int a, int b) {
+ if ((*x += y) == 0)
+ return a;
+ else
+ return b;
+}
+
+to:
+ addl %esi, (%rdi)
+ movl %edx, %eax
+ cmovne %ecx, %eax
+ ret
+instead of:
+
+_add_zf:
+ addl (%rdi), %esi
+ movl %esi, (%rdi)
+ testl %esi, %esi
+ cmove %edx, %ecx
+ movl %ecx, %eax
+ ret
+
+and:
+
+int add_zf(int *x, int y, int a, int b) {
+ if ((*x + y) < 0)
+ return a;
+ else
+ return b;
+}
+
+to:
+
+add_zf:
+ addl (%rdi), %esi
+ movl %edx, %eax
+ cmovns %ecx, %eax
+ ret
+
+instead of:
+
+_add_zf:
+ addl (%rdi), %esi
+ testl %esi, %esi
+ cmovs %edx, %ecx
+ movl %ecx, %eax
+ ret
+
+//===---------------------------------------------------------------------===//
+
+This:
+#include <math.h>
+int foo(double X) { return isnan(X); }
+
+compiles to (-m64):
+
+_foo:
+ pxor %xmm1, %xmm1
+ ucomisd %xmm1, %xmm0
+ setp %al
+ movzbl %al, %eax
+ ret
+
+the pxor is not needed, we could compare the value against itself.
+
+//===---------------------------------------------------------------------===//
+
+These two functions have identical effects:
+
+unsigned int f(unsigned int i, unsigned int n) {++i; if (i == n) ++i; return i;}
+unsigned int f2(unsigned int i, unsigned int n) {++i; i += i == n; return i;}
+
+We currently compile them to:
+
+_f:
+ movl 4(%esp), %eax
+ movl %eax, %ecx
+ incl %ecx
+ movl 8(%esp), %edx
+ cmpl %edx, %ecx
+ jne LBB1_2 #UnifiedReturnBlock
+LBB1_1: #cond_true
+ addl $2, %eax
+ ret
+LBB1_2: #UnifiedReturnBlock
+ movl %ecx, %eax
+ ret
+_f2:
+ movl 4(%esp), %eax
+ movl %eax, %ecx
+ incl %ecx
+ cmpl 8(%esp), %ecx
+ sete %cl
+ movzbl %cl, %ecx
+ leal 1(%ecx,%eax), %eax
+ ret
+
+both of which are inferior to GCC's:
+
+_f:
+ movl 4(%esp), %edx
+ leal 1(%edx), %eax
+ addl $2, %edx
+ cmpl 8(%esp), %eax
+ cmove %edx, %eax
+ ret
+_f2:
+ movl 4(%esp), %eax
+ addl $1, %eax
+ xorl %edx, %edx
+ cmpl 8(%esp), %eax
+ sete %dl
+ addl %edx, %eax
+ ret
+
+//===---------------------------------------------------------------------===//
+
+This code:
+
+void test(int X) {
+ if (X) abort();
+}
+
+is currently compiled to:
+
+_test:
+ subl $12, %esp
+ cmpl $0, 16(%esp)
+ jne LBB1_1
+ addl $12, %esp
+ ret
+LBB1_1:
+ call L_abort$stub
+
+It would be better to produce:
+
+_test:
+ subl $12, %esp
+ cmpl $0, 16(%esp)
+ jne L_abort$stub
+ addl $12, %esp
+ ret
+
+This can be applied to any no-return function call that takes no arguments etc.
+Alternatively, the stack save/restore logic could be shrink-wrapped, producing
+something like this:
+
+_test:
+ cmpl $0, 4(%esp)
+ jne LBB1_1
+ ret
+LBB1_1:
+ subl $12, %esp
+ call L_abort$stub
+
+Both are useful in different situations. Finally, it could be shrink-wrapped
+and tail called, like this:
+
+_test:
+ cmpl $0, 4(%esp)
+ jne LBB1_1
+ ret
+LBB1_1:
+ pop %eax # realign stack.
+ call L_abort$stub
+
+Though this probably isn't worth it.
+
+//===---------------------------------------------------------------------===//
+
+We need to teach the codegen to convert two-address INC instructions to LEA
+when the flags are dead. For example, on X86-64, compile:
+
+int foo(int A, int B) {
+ return A+1;
+}
+
+to:
+
+_foo:
+ leal 1(%edi), %eax
+ ret
+
+instead of:
+
+_foo:
+ incl %edi
+ movl %edi, %eax
+ ret
+
+Another example is:
+
+;; X's live range extends beyond the shift, so the register allocator
+;; cannot coalesce it with Y. Because of this, a copy needs to be
+;; emitted before the shift to save the register value before it is
+;; clobbered. However, this copy is not needed if the register
+;; allocator turns the shift into an LEA. This also occurs for ADD.
+
+; Check that the shift gets turned into an LEA.
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: not grep {mov E.X, E.X}
+
+%G = external global int
+
+int %test1(int %X, int %Y) {
+ %Z = add int %X, %Y
+ volatile store int %Y, int* %G
+ volatile store int %Z, int* %G
+ ret int %X
+}
+
+int %test2(int %X) {
+ %Z = add int %X, 1 ;; inc
+ volatile store int %Z, int* %G
+ ret int %X
+}
+
+//===---------------------------------------------------------------------===//
+
+We use push/pop of stack space around calls in situations where we don't have to.
+Call to f below produces:
+ subl $16, %esp <<<<<
+ movl %eax, (%esp)
+ call L_f$stub
+ addl $16, %esp <<<<<
+The stack push/pop can be moved into the prolog/epilog. It does this because it's
+building the frame pointer, but this should not be sufficient, only the use of alloca
+should cause it to do this.
+(There are other issues shown by this code, but this is one.)
+
+typedef struct _range_t {
+ float fbias;
+ float fscale;
+ int ibias;
+ int iscale;
+ int ishift;
+ unsigned char lut[];
+} range_t;
+
+struct _decode_t {
+ int type:4;
+ int unit:4;
+ int alpha:8;
+ int N:8;
+ int bpc:8;
+ int bpp:16;
+ int skip:8;
+ int swap:8;
+ const range_t*const*range;
+};
+
+typedef struct _decode_t decode_t;
+
+extern int f(const decode_t* decode);
+
+int decode_byte (const decode_t* decode) {
+ if (decode->swap != 0)
+ return f(decode);
+ return 0;
+}
+
+
+//===---------------------------------------------------------------------===//
+
+This:
+#include <xmmintrin.h>
+unsigned test(float f) {
+ return _mm_cvtsi128_si32( (__m128i) _mm_set_ss( f ));
+}
+
+Compiles to:
+_test:
+ movss 4(%esp), %xmm0
+ movd %xmm0, %eax
+ ret
+
+it should compile to a move from the stack slot directly into eax. DAGCombine
+has this xform, but it is currently disabled until the alignment fields of
+the load/store nodes are trustworthy.
+
+//===---------------------------------------------------------------------===//
+
+Sometimes it is better to codegen subtractions from a constant (e.g. 7-x) with
+a neg instead of a sub instruction. Consider:
+
+int test(char X) { return 7-X; }
+
+we currently produce:
+_test:
+ movl $7, %eax
+ movsbl 4(%esp), %ecx
+ subl %ecx, %eax
+ ret
+
+We would use one fewer register if codegen'd as:
+
+ movsbl 4(%esp), %eax
+ neg %eax
+ add $7, %eax
+ ret
+
+Note that this isn't beneficial if the load can be folded into the sub. In
+this case, we want a sub:
+
+int test(int X) { return 7-X; }
+_test:
+ movl $7, %eax
+ subl 4(%esp), %eax
+ ret
+
+//===---------------------------------------------------------------------===//
+
+For code like:
+phi (undef, x)
+
+We get an implicit def on the undef side. If the phi is spilled, we then get:
+implicitdef xmm1
+store xmm1 -> stack
+
+It should be possible to teach the x86 backend to "fold" the store into the
+implicitdef, which just deletes the implicit def.
+
+These instructions should go away:
+#IMPLICIT_DEF %xmm1
+movaps %xmm1, 192(%esp)
+movaps %xmm1, 224(%esp)
+movaps %xmm1, 176(%esp)
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
new file mode 100644
index 0000000..c7663be
--- /dev/null
+++ b/lib/Target/X86/X86.h
@@ -0,0 +1,66 @@
+//===-- X86.h - Top-level interface for X86 representation ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the x86
+// target library, as used by the LLVM JIT.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_X86_H
+#define TARGET_X86_H
+
+#include <iosfwd>
+
+namespace llvm {
+
+class X86TargetMachine;
+class FunctionPassManager;
+class FunctionPass;
+class MachineCodeEmitter;
+
+/// createX86ISelDag - This pass converts a legalized DAG into a
+/// X86-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *createX86ISelDag(X86TargetMachine &TM, bool Fast);
+
+/// createX86FloatingPointStackifierPass - This function returns a pass which
+/// converts floating point register references and pseudo instructions into
+/// floating point stack references and physical instructions.
+///
+FunctionPass *createX86FloatingPointStackifierPass();
+
+/// createX86CodePrinterPass - Returns a pass that prints the X86
+/// assembly code for a MachineFunction to the given output stream,
+/// using the given target machine description.
+///
+FunctionPass *createX86CodePrinterPass(std::ostream &o, X86TargetMachine &tm);
+
+/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
+/// to the specified MCE object.
+FunctionPass *createX86CodeEmitterPass(X86TargetMachine &TM,
+ MachineCodeEmitter &MCE);
+
+/// createX86EmitCodeToMemory - Returns a pass that converts a register
+/// allocated function into raw machine code in a dynamically
+/// allocated chunk of memory.
+///
+FunctionPass *createEmitX86CodeToMemory();
+
+} // End llvm namespace
+
+// Defines symbolic names for X86 registers. This defines a mapping from
+// register name to register number.
+//
+#include "X86GenRegisterNames.inc"
+
+// Defines symbolic names for the X86 instructions.
+//
+#include "X86GenInstrNames.inc"
+
+#endif
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
new file mode 100644
index 0000000..98362c8
--- /dev/null
+++ b/lib/Target/X86/X86.td
@@ -0,0 +1,150 @@
+//===- X86.td - Target definition file for the Intel X86 arch ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a target description file for the Intel i386 architecture, refered to
+// here as the "X86" architecture.
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing...
+//
+include "../Target.td"
+
+//===----------------------------------------------------------------------===//
+// X86 Subtarget features.
+//===----------------------------------------------------------------------===//
+
+def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX",
+ "Enable MMX instructions">;
+def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
+ "Enable SSE instructions",
+ [FeatureMMX]>;
+def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
+ "Enable SSE2 instructions",
+ [FeatureSSE1]>;
+def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
+ "Enable SSE3 instructions",
+ [FeatureSSE2]>;
+def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
+ "Enable SSSE3 instructions",
+ [FeatureSSE3]>;
+def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
+ "Enable 3DNow! instructions">;
+def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
+ "Enable 3DNow! Athlon instructions",
+ [Feature3DNow]>;
+def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
+ "Support 64-bit instructions",
+ [FeatureSSE2]>;
+
+//===----------------------------------------------------------------------===//
+// X86 processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic", []>;
+def : Proc<"i386", []>;
+def : Proc<"i486", []>;
+def : Proc<"pentium", []>;
+def : Proc<"pentium-mmx", [FeatureMMX]>;
+def : Proc<"i686", []>;
+def : Proc<"pentiumpro", []>;
+def : Proc<"pentium2", [FeatureMMX]>;
+def : Proc<"pentium3", [FeatureSSE1]>;
+def : Proc<"pentium-m", [FeatureSSE2]>;
+def : Proc<"pentium4", [FeatureSSE2]>;
+def : Proc<"x86-64", [Feature64Bit]>;
+def : Proc<"yonah", [FeatureSSE3]>;
+def : Proc<"prescott", [FeatureSSE3]>;
+def : Proc<"nocona", [FeatureSSE3]>;
+def : Proc<"core2", [FeatureSSSE3]>;
+
+def : Proc<"k6", [FeatureMMX]>;
+def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>;
+def : Proc<"k6-3", [FeatureMMX, Feature3DNow]>;
+def : Proc<"athlon", [FeatureMMX, Feature3DNowA]>;
+def : Proc<"athlon-tbird", [FeatureMMX, Feature3DNowA]>;
+def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA]>;
+def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA]>;
+def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA]>;
+def : Proc<"k8", [Feature3DNowA, Feature64Bit]>;
+def : Proc<"opteron", [Feature3DNowA, Feature64Bit]>;
+def : Proc<"athlon64", [Feature3DNowA, Feature64Bit]>;
+def : Proc<"athlon-fx", [Feature3DNowA, Feature64Bit]>;
+
+def : Proc<"winchip-c6", [FeatureMMX]>;
+def : Proc<"winchip2", [FeatureMMX, Feature3DNow]>;
+def : Proc<"c3", [FeatureMMX, Feature3DNow]>;
+def : Proc<"c3-2", [FeatureSSE1]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "X86RegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "X86InstrInfo.td"
+
+def X86InstrInfo : InstrInfo {
+
+ // Define how we want to layout our TargetSpecific information field... This
+ // should be kept up-to-date with the fields in the X86InstrInfo.h file.
+ let TSFlagsFields = ["FormBits",
+ "hasOpSizePrefix",
+ "hasAdSizePrefix",
+ "Prefix",
+ "hasREX_WPrefix",
+ "ImmTypeBits",
+ "FPFormBits",
+ "Opcode"];
+ let TSFlagsShifts = [0,
+ 6,
+ 7,
+ 8,
+ 12,
+ 13,
+ 16,
+ 24];
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Conventions
+//===----------------------------------------------------------------------===//
+
+include "X86CallingConv.td"
+
+
+//===----------------------------------------------------------------------===//
+// Assembly Printers
+//===----------------------------------------------------------------------===//
+
+// The X86 target supports two different syntaxes for emitting machine code.
+// This is controlled by the -x86-asm-syntax={att|intel}
+def ATTAsmWriter : AsmWriter {
+ string AsmWriterClassName = "ATTAsmPrinter";
+ int Variant = 0;
+}
+def IntelAsmWriter : AsmWriter {
+ string AsmWriterClassName = "IntelAsmPrinter";
+ int Variant = 1;
+}
+
+
+def X86 : Target {
+ // Information about the instructions...
+ let InstructionSet = X86InstrInfo;
+
+ let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
+}
diff --git a/lib/Target/X86/X86ATTAsmPrinter.cpp b/lib/Target/X86/X86ATTAsmPrinter.cpp
new file mode 100755
index 0000000..e97babe
--- /dev/null
+++ b/lib/Target/X86/X86ATTAsmPrinter.cpp
@@ -0,0 +1,607 @@
+//===-- X86ATTAsmPrinter.cpp - Convert X86 LLVM code to AT&T assembly -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to AT&T format assembly
+// language. This printer is the output mechanism used by `llc'.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "X86ATTAsmPrinter.h"
+#include "X86.h"
+#include "X86COFF.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86TargetMachine.h"
+#include "X86TargetAsmInfo.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Module.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+static std::string computePICLabel(unsigned FnNum,
+ const TargetAsmInfo *TAI,
+ const X86Subtarget* Subtarget) {
+ std::string label;
+ if (Subtarget->isTargetDarwin())
+ label = "\"L" + utostr_32(FnNum) + "$pb\"";
+ else if (Subtarget->isTargetELF())
+ label = ".Lllvm$" + utostr_32(FnNum) + "$piclabel";
+ else
+ assert(0 && "Don't know how to print PIC label!\n");
+
+ return label;
+}
+
+/// getSectionForFunction - Return the section that we should emit the
+/// specified function body into.
+std::string X86ATTAsmPrinter::getSectionForFunction(const Function &F) const {
+ switch (F.getLinkage()) {
+ default: assert(0 && "Unknown linkage type!");
+ case Function::InternalLinkage:
+ case Function::DLLExportLinkage:
+ case Function::ExternalLinkage:
+ return TAI->getTextSection();
+ case Function::WeakLinkage:
+ case Function::LinkOnceLinkage:
+ if (Subtarget->isTargetDarwin()) {
+ return ".section __TEXT,__textcoal_nt,coalesced,pure_instructions";
+ } else if (Subtarget->isTargetCygMing()) {
+ return "\t.section\t.text$linkonce." + CurrentFnName + ",\"ax\"";
+ } else {
+ return "\t.section\t.llvm.linkonce.t." + CurrentFnName +
+ ",\"ax\",@progbits";
+ }
+ }
+}
+
+/// runOnMachineFunction - This uses the printMachineInstruction()
+/// method to print assembly for each instruction.
+///
+bool X86ATTAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ if (TAI->doesSupportDebugInformation()) {
+ // Let PassManager know we need debug information and relay
+ // the MachineModuleInfo address on to DwarfWriter.
+ DW.SetModuleInfo(&getAnalysis<MachineModuleInfo>());
+ }
+
+ SetupMachineFunction(MF);
+ O << "\n\n";
+
+ // Print out constants referenced by the function
+ EmitConstantPool(MF.getConstantPool());
+
+ // Print out labels for the function.
+ const Function *F = MF.getFunction();
+ unsigned CC = F->getCallingConv();
+
+ // Populate function information map. Actually, We don't want to populate
+ // non-stdcall or non-fastcall functions' information right now.
+ if (CC == CallingConv::X86_StdCall || CC == CallingConv::X86_FastCall)
+ FunctionInfoMap[F] = *MF.getInfo<X86MachineFunctionInfo>();
+
+ X86SharedAsmPrinter::decorateName(CurrentFnName, F);
+
+ SwitchToTextSection(getSectionForFunction(*F).c_str(), F);
+
+ switch (F->getLinkage()) {
+ default: assert(0 && "Unknown linkage type!");
+ case Function::InternalLinkage: // Symbols default to internal.
+ EmitAlignment(4, F); // FIXME: This should be parameterized somewhere.
+ break;
+ case Function::DLLExportLinkage:
+ DLLExportedFns.insert(Mang->makeNameProper(F->getName(), ""));
+ //FALLS THROUGH
+ case Function::ExternalLinkage:
+ EmitAlignment(4, F); // FIXME: This should be parameterized somewhere.
+ O << "\t.globl\t" << CurrentFnName << "\n";
+ break;
+ case Function::LinkOnceLinkage:
+ case Function::WeakLinkage:
+ if (Subtarget->isTargetDarwin()) {
+ O << "\t.globl\t" << CurrentFnName << "\n";
+ O << "\t.weak_definition\t" << CurrentFnName << "\n";
+ } else if (Subtarget->isTargetCygMing()) {
+ EmitAlignment(4, F); // FIXME: This should be parameterized somewhere.
+ O << "\t.globl " << CurrentFnName << "\n";
+ O << "\t.linkonce discard\n";
+ } else {
+ EmitAlignment(4, F); // FIXME: This should be parameterized somewhere.
+ O << "\t.weak " << CurrentFnName << "\n";
+ }
+ break;
+ }
+ if (F->hasHiddenVisibility()) {
+ if (const char *Directive = TAI->getHiddenDirective())
+ O << Directive << CurrentFnName << "\n";
+ } else if (F->hasProtectedVisibility()) {
+ if (const char *Directive = TAI->getProtectedDirective())
+ O << Directive << CurrentFnName << "\n";
+ }
+
+ if (Subtarget->isTargetELF())
+ O << "\t.type " << CurrentFnName << ",@function\n";
+ else if (Subtarget->isTargetCygMing()) {
+ O << "\t.def\t " << CurrentFnName
+ << ";\t.scl\t" <<
+ (F->getLinkage() == Function::InternalLinkage ? COFF::C_STAT : COFF::C_EXT)
+ << ";\t.type\t" << (COFF::DT_FCN << COFF::N_BTSHFT)
+ << ";\t.endef\n";
+ }
+
+ O << CurrentFnName << ":\n";
+ // Add some workaround for linkonce linkage on Cygwin\MinGW
+ if (Subtarget->isTargetCygMing() &&
+ (F->getLinkage() == Function::LinkOnceLinkage ||
+ F->getLinkage() == Function::WeakLinkage))
+ O << "Lllvm$workaround$fake$stub$" << CurrentFnName << ":\n";
+
+ if (TAI->doesSupportDebugInformation()) {
+ // Emit pre-function debug information.
+ DW.BeginFunction(&MF);
+ }
+
+ // Print out code for the function.
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ // Print a label for the basic block.
+ if (I->pred_begin() != I->pred_end()) {
+ printBasicBlockLabel(I, true);
+ O << '\n';
+ }
+ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+ II != E; ++II) {
+ // Print the assembly for the instruction.
+ O << "\t";
+ printMachineInstruction(II);
+ }
+ }
+
+ if (TAI->hasDotTypeDotSizeDirective())
+ O << "\t.size " << CurrentFnName << ", .-" << CurrentFnName << "\n";
+
+ if (TAI->doesSupportDebugInformation()) {
+ // Emit post-function debug information.
+ DW.EndFunction();
+ }
+
+ // Print out jump tables referenced by the function.
+ EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+
+ // We didn't modify anything.
+ return false;
+}
+
+static inline bool printGOT(TargetMachine &TM, const X86Subtarget* ST) {
+ return ST->isPICStyleGOT() && TM.getRelocationModel() == Reloc::PIC_;
+}
+
+static inline bool printStub(TargetMachine &TM, const X86Subtarget* ST) {
+ return ST->isPICStyleStub() && TM.getRelocationModel() != Reloc::Static;
+}
+
+void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
+ const char *Modifier, bool NotRIPRel) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ const MRegisterInfo &RI = *TM.getRegisterInfo();
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register: {
+ assert(MRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+ "Virtual registers should not make it this far!");
+ O << '%';
+ unsigned Reg = MO.getReg();
+ if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
+ MVT::ValueType VT = (strcmp(Modifier+6,"64") == 0) ?
+ MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 :
+ ((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8));
+ Reg = getX86SubSuperRegister(Reg, VT);
+ }
+ for (const char *Name = RI.get(Reg).Name; *Name; ++Name)
+ O << (char)tolower(*Name);
+ return;
+ }
+
+ case MachineOperand::MO_Immediate:
+ if (!Modifier ||
+ (strcmp(Modifier, "debug") && strcmp(Modifier, "mem")))
+ O << '$';
+ O << MO.getImmedValue();
+ return;
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMachineBasicBlock());
+ return;
+ case MachineOperand::MO_JumpTableIndex: {
+ bool isMemOp = Modifier && !strcmp(Modifier, "mem");
+ if (!isMemOp) O << '$';
+ O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << "_"
+ << MO.getJumpTableIndex();
+
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ if (Subtarget->isPICStyleStub())
+ O << "-\"" << TAI->getPrivateGlobalPrefix() << getFunctionNumber()
+ << "$pb\"";
+ else if (Subtarget->isPICStyleGOT())
+ O << "@GOTOFF";
+ }
+
+ if (isMemOp && Subtarget->isPICStyleRIPRel() && !NotRIPRel)
+ O << "(%rip)";
+ return;
+ }
+ case MachineOperand::MO_ConstantPoolIndex: {
+ bool isMemOp = Modifier && !strcmp(Modifier, "mem");
+ if (!isMemOp) O << '$';
+ O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
+ << MO.getConstantPoolIndex();
+
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ if (Subtarget->isPICStyleStub())
+ O << "-\"" << TAI->getPrivateGlobalPrefix() << getFunctionNumber()
+ << "$pb\"";
+ else if (Subtarget->isPICStyleGOT())
+ O << "@GOTOFF";
+ }
+
+ int Offset = MO.getOffset();
+ if (Offset > 0)
+ O << "+" << Offset;
+ else if (Offset < 0)
+ O << Offset;
+
+ if (isMemOp && Subtarget->isPICStyleRIPRel() && !NotRIPRel)
+ O << "(%rip)";
+ return;
+ }
+ case MachineOperand::MO_GlobalAddress: {
+ bool isCallOp = Modifier && !strcmp(Modifier, "call");
+ bool isMemOp = Modifier && !strcmp(Modifier, "mem");
+ bool needCloseParen = false;
+
+ GlobalValue *GV = MO.getGlobal();
+ GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ bool isThreadLocal = GVar && GVar->isThreadLocal();
+
+ std::string Name = Mang->getValueName(GV);
+ X86SharedAsmPrinter::decorateName(Name, GV);
+
+ if (!isMemOp && !isCallOp)
+ O << '$';
+ else if (Name[0] == '$') {
+ // The name begins with a dollar-sign. In order to avoid having it look
+ // like an integer immediate to the assembler, enclose it in parens.
+ O << '(';
+ needCloseParen = true;
+ }
+
+ if (printStub(TM, Subtarget)) {
+ // Link-once, declaration, or Weakly-linked global variables need
+ // non-lazily-resolved stubs
+ if (GV->isDeclaration() ||
+ GV->hasWeakLinkage() ||
+ GV->hasLinkOnceLinkage()) {
+ // Dynamically-resolved functions need a stub for the function.
+ if (isCallOp && isa<Function>(GV)) {
+ FnStubs.insert(Name);
+ O << TAI->getPrivateGlobalPrefix() << Name << "$stub";
+ } else {
+ GVStubs.insert(Name);
+ O << TAI->getPrivateGlobalPrefix() << Name << "$non_lazy_ptr";
+ }
+ } else {
+ if (GV->hasDLLImportLinkage())
+ O << "__imp_";
+ O << Name;
+ }
+
+ if (!isCallOp && TM.getRelocationModel() == Reloc::PIC_)
+ O << "-\"" << TAI->getPrivateGlobalPrefix() << getFunctionNumber()
+ << "$pb\"";
+ } else {
+ if (GV->hasDLLImportLinkage()) {
+ O << "__imp_";
+ }
+ O << Name;
+
+ if (isCallOp && isa<Function>(GV)) {
+ if (printGOT(TM, Subtarget)) {
+ // Assemble call via PLT for non-local symbols
+ if (!(GV->hasHiddenVisibility() || GV->hasProtectedVisibility()) ||
+ GV->isDeclaration())
+ O << "@PLT";
+ }
+ if (Subtarget->isTargetCygMing() && GV->isDeclaration())
+ // Save function name for later type emission
+ FnStubs.insert(Name);
+ }
+ }
+
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+
+ int Offset = MO.getOffset();
+ if (Offset > 0)
+ O << "+" << Offset;
+ else if (Offset < 0)
+ O << Offset;
+
+ if (isThreadLocal) {
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ O << "@TLSGD"; // general dynamic TLS model
+ else
+ if (GV->isDeclaration())
+ O << "@INDNTPOFF"; // initial exec TLS model
+ else
+ O << "@NTPOFF"; // local exec TLS model
+ } else if (isMemOp) {
+ if (printGOT(TM, Subtarget)) {
+ if (Subtarget->GVRequiresExtraLoad(GV, TM, false))
+ O << "@GOT";
+ else
+ O << "@GOTOFF";
+ } else if (Subtarget->isPICStyleRIPRel() && !NotRIPRel) {
+ if ((GV->isDeclaration() ||
+ GV->hasWeakLinkage() ||
+ GV->hasLinkOnceLinkage()) &&
+ TM.getRelocationModel() != Reloc::Static)
+ O << "@GOTPCREL";
+
+ if (needCloseParen) {
+ needCloseParen = false;
+ O << ')';
+ }
+
+ // Use rip when possible to reduce code size, except when
+ // index or base register are also part of the address. e.g.
+ // foo(%rip)(%rcx,%rax,4) is not legal
+ O << "(%rip)";
+ }
+ }
+
+ if (needCloseParen)
+ O << ')';
+
+ return;
+ }
+ case MachineOperand::MO_ExternalSymbol: {
+ bool isCallOp = Modifier && !strcmp(Modifier, "call");
+ bool needCloseParen = false;
+ std::string Name(TAI->getGlobalPrefix());
+ Name += MO.getSymbolName();
+ if (isCallOp && printStub(TM, Subtarget)) {
+ FnStubs.insert(Name);
+ O << TAI->getPrivateGlobalPrefix() << Name << "$stub";
+ return;
+ }
+ if (!isCallOp)
+ O << '$';
+ else if (Name[0] == '$') {
+ // The name begins with a dollar-sign. In order to avoid having it look
+ // like an integer immediate to the assembler, enclose it in parens.
+ O << '(';
+ needCloseParen = true;
+ }
+
+ O << Name;
+
+ if (printGOT(TM, Subtarget)) {
+ std::string GOTName(TAI->getGlobalPrefix());
+ GOTName+="_GLOBAL_OFFSET_TABLE_";
+ if (Name == GOTName)
+ // HACK! Emit extra offset to PC during printing GOT offset to
+ // compensate for the size of popl instruction. The resulting code
+ // should look like:
+ // call .piclabel
+ // piclabel:
+ // popl %some_register
+ // addl $_GLOBAL_ADDRESS_TABLE_ + [.-piclabel], %some_register
+ O << " + [.-"
+ << computePICLabel(getFunctionNumber(), TAI, Subtarget) << "]";
+
+ if (isCallOp)
+ O << "@PLT";
+ }
+
+ if (needCloseParen)
+ O << ')';
+
+ if (!isCallOp && Subtarget->isPICStyleRIPRel())
+ O << "(%rip)";
+
+ return;
+ }
+ default:
+ O << "<unknown operand type>"; return;
+ }
+}
+
+void X86ATTAsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op) {
+ unsigned char value = MI->getOperand(Op).getImmedValue();
+ assert(value <= 7 && "Invalid ssecc argument!");
+ switch (value) {
+ case 0: O << "eq"; break;
+ case 1: O << "lt"; break;
+ case 2: O << "le"; break;
+ case 3: O << "unord"; break;
+ case 4: O << "neq"; break;
+ case 5: O << "nlt"; break;
+ case 6: O << "nle"; break;
+ case 7: O << "ord"; break;
+ }
+}
+
+void X86ATTAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op,
+ const char *Modifier){
+ assert(isMem(MI, Op) && "Invalid memory reference!");
+ MachineOperand BaseReg = MI->getOperand(Op);
+ MachineOperand IndexReg = MI->getOperand(Op+2);
+ const MachineOperand &DispSpec = MI->getOperand(Op+3);
+
+ bool NotRIPRel = IndexReg.getReg() || BaseReg.getReg();
+ if (DispSpec.isGlobalAddress() ||
+ DispSpec.isConstantPoolIndex() ||
+ DispSpec.isJumpTableIndex()) {
+ printOperand(MI, Op+3, "mem", NotRIPRel);
+ } else {
+ int DispVal = DispSpec.getImmedValue();
+ if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg()))
+ O << DispVal;
+ }
+
+ if (IndexReg.getReg() || BaseReg.getReg()) {
+ unsigned ScaleVal = MI->getOperand(Op+1).getImmedValue();
+ unsigned BaseRegOperand = 0, IndexRegOperand = 2;
+
+ // There are cases where we can end up with ESP/RSP in the indexreg slot.
+ // If this happens, swap the base/index register to support assemblers that
+ // don't work when the index is *SP.
+ if (IndexReg.getReg() == X86::ESP || IndexReg.getReg() == X86::RSP) {
+ assert(ScaleVal == 1 && "Scale not supported for stack pointer!");
+ std::swap(BaseReg, IndexReg);
+ std::swap(BaseRegOperand, IndexRegOperand);
+ }
+
+ O << "(";
+ if (BaseReg.getReg())
+ printOperand(MI, Op+BaseRegOperand, Modifier);
+
+ if (IndexReg.getReg()) {
+ O << ",";
+ printOperand(MI, Op+IndexRegOperand, Modifier);
+ if (ScaleVal != 1)
+ O << "," << ScaleVal;
+ }
+ O << ")";
+ }
+}
+
+void X86ATTAsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op) {
+ std::string label = computePICLabel(getFunctionNumber(), TAI, Subtarget);
+ O << label << "\n" << label << ":";
+}
+
+
+bool X86ATTAsmPrinter::printAsmMRegister(const MachineOperand &MO,
+ const char Mode) {
+ const MRegisterInfo &RI = *TM.getRegisterInfo();
+ unsigned Reg = MO.getReg();
+ switch (Mode) {
+ default: return true; // Unknown mode.
+ case 'b': // Print QImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i8);
+ break;
+ case 'h': // Print QImode high register
+ Reg = getX86SubSuperRegister(Reg, MVT::i8, true);
+ break;
+ case 'w': // Print HImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i16);
+ break;
+ case 'k': // Print SImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i32);
+ break;
+ }
+
+ O << '%';
+ for (const char *Name = RI.get(Reg).Name; *Name; ++Name)
+ O << (char)tolower(*Name);
+ return false;
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool X86ATTAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'c': // Don't print "$" before a global var name or constant.
+ printOperand(MI, OpNo, "mem");
+ return false;
+ case 'b': // Print QImode register
+ case 'h': // Print QImode high register
+ case 'w': // Print HImode register
+ case 'k': // Print SImode register
+ if (MI->getOperand(OpNo).isReg())
+ return printAsmMRegister(MI->getOperand(OpNo), ExtraCode[0]);
+ printOperand(MI, OpNo);
+ return false;
+
+ case 'P': // Don't print @PLT, but do print as memory.
+ printOperand(MI, OpNo, "mem");
+ return false;
+ }
+ }
+
+ printOperand(MI, OpNo);
+ return false;
+}
+
+bool X86ATTAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+ printMemReference(MI, OpNo);
+ return false;
+}
+
+/// printMachineInstruction -- Print out a single X86 LLVM instruction
+/// MI in AT&T syntax to the current output stream.
+///
+void X86ATTAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
+ ++EmittedInsts;
+
+ // See if a truncate instruction can be turned into a nop.
+ switch (MI->getOpcode()) {
+ default: break;
+ case X86::TRUNC_64to32:
+ case X86::TRUNC_64to16:
+ case X86::TRUNC_32to16:
+ case X86::TRUNC_32to8:
+ case X86::TRUNC_16to8:
+ case X86::TRUNC_32_to8:
+ case X86::TRUNC_16_to8: {
+ const MachineOperand &MO0 = MI->getOperand(0);
+ const MachineOperand &MO1 = MI->getOperand(1);
+ unsigned Reg0 = MO0.getReg();
+ unsigned Reg1 = MO1.getReg();
+ unsigned Opc = MI->getOpcode();
+ if (Opc == X86::TRUNC_64to32)
+ Reg1 = getX86SubSuperRegister(Reg1, MVT::i32);
+ else if (Opc == X86::TRUNC_32to16 || Opc == X86::TRUNC_64to16)
+ Reg1 = getX86SubSuperRegister(Reg1, MVT::i16);
+ else
+ Reg1 = getX86SubSuperRegister(Reg1, MVT::i8);
+ O << TAI->getCommentString() << " TRUNCATE ";
+ if (Reg0 != Reg1)
+ O << "\n\t";
+ break;
+ }
+ case X86::PsMOVZX64rr32:
+ O << TAI->getCommentString() << " ZERO-EXTEND " << "\n\t";
+ break;
+ }
+
+ // Call the autogenerated instruction printer routines.
+ printInstruction(MI);
+}
+
+// Include the auto-generated portion of the assembly writer.
+#include "X86GenAsmWriter.inc"
+
diff --git a/lib/Target/X86/X86ATTAsmPrinter.h b/lib/Target/X86/X86ATTAsmPrinter.h
new file mode 100755
index 0000000..a3bdce9
--- /dev/null
+++ b/lib/Target/X86/X86ATTAsmPrinter.h
@@ -0,0 +1,87 @@
+//===-- X86ATTAsmPrinter.h - Convert X86 LLVM code to AT&T assembly -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// AT&T assembly code printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86ATTASMPRINTER_H
+#define X86ATTASMPRINTER_H
+
+#include "X86AsmPrinter.h"
+#include "llvm/CodeGen/ValueTypes.h"
+
+namespace llvm {
+
+struct X86ATTAsmPrinter : public X86SharedAsmPrinter {
+ X86ATTAsmPrinter(std::ostream &O, X86TargetMachine &TM, const TargetAsmInfo *T)
+ : X86SharedAsmPrinter(O, TM, T) { }
+
+ virtual const char *getPassName() const {
+ return "X86 AT&T-Style Assembly Printer";
+ }
+
+ /// printInstruction - This method is automatically generated by tablegen
+ /// from the instruction set description. This method returns true if the
+ /// machine instruction was sufficiently described to print it, otherwise it
+ /// returns false.
+ bool printInstruction(const MachineInstr *MI);
+
+ // These methods are used by the tablegen'erated instruction printer.
+ void printOperand(const MachineInstr *MI, unsigned OpNo,
+ const char *Modifier = 0, bool NotRIPRel = false);
+ void printi8mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printi16mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printi32mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printi64mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printi128mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printf32mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printf64mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printf128mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printlea64_32mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo, "subreg64");
+ }
+
+ bool printAsmMRegister(const MachineOperand &MO, const char Mode);
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+
+ void printMachineInstruction(const MachineInstr *MI);
+ void printSSECC(const MachineInstr *MI, unsigned Op);
+ void printMemReference(const MachineInstr *MI, unsigned Op,
+ const char *Modifier=NULL);
+ void printPICLabel(const MachineInstr *MI, unsigned Op);
+ bool runOnMachineFunction(MachineFunction &F);
+
+ /// getSectionForFunction - Return the section that we should emit the
+ /// specified function body into.
+ virtual std::string getSectionForFunction(const Function &F) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
new file mode 100644
index 0000000..59b9b1f
--- /dev/null
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -0,0 +1,409 @@
+//===-- X86AsmPrinter.cpp - Convert X86 LLVM IR to X86 assembly -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file the shared super class printer that converts from our internal
+// representation of machine-dependent LLVM code to Intel and AT&T format
+// assembly language.
+// This printer is the output mechanism used by `llc'.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86AsmPrinter.h"
+#include "X86ATTAsmPrinter.h"
+#include "X86COFF.h"
+#include "X86IntelAsmPrinter.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Type.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+static X86MachineFunctionInfo calculateFunctionInfo(const Function *F,
+ const TargetData *TD) {
+ X86MachineFunctionInfo Info;
+ uint64_t Size = 0;
+
+ switch (F->getCallingConv()) {
+ case CallingConv::X86_StdCall:
+ Info.setDecorationStyle(StdCall);
+ break;
+ case CallingConv::X86_FastCall:
+ Info.setDecorationStyle(FastCall);
+ break;
+ default:
+ return Info;
+ }
+
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI)
+ // Size should be aligned to DWORD boundary
+ Size += ((TD->getTypeSize(AI->getType()) + 3)/4)*4;
+
+ // We're not supporting tooooo huge arguments :)
+ Info.setBytesToPopOnReturn((unsigned int)Size);
+ return Info;
+}
+
+
+/// decorateName - Query FunctionInfoMap and use this information for various
+/// name decoration.
+void X86SharedAsmPrinter::decorateName(std::string &Name,
+ const GlobalValue *GV) {
+ const Function *F = dyn_cast<Function>(GV);
+ if (!F) return;
+
+ // We don't want to decorate non-stdcall or non-fastcall functions right now
+ unsigned CC = F->getCallingConv();
+ if (CC != CallingConv::X86_StdCall && CC != CallingConv::X86_FastCall)
+ return;
+
+ // Decorate names only when we're targeting Cygwin/Mingw32 targets
+ if (!Subtarget->isTargetCygMing())
+ return;
+
+ FMFInfoMap::const_iterator info_item = FunctionInfoMap.find(F);
+
+ const X86MachineFunctionInfo *Info;
+ if (info_item == FunctionInfoMap.end()) {
+ // Calculate apropriate function info and populate map
+ FunctionInfoMap[F] = calculateFunctionInfo(F, TM.getTargetData());
+ Info = &FunctionInfoMap[F];
+ } else {
+ Info = &info_item->second;
+ }
+
+ const FunctionType *FT = F->getFunctionType();
+ switch (Info->getDecorationStyle()) {
+ case None:
+ break;
+ case StdCall:
+ // "Pure" variadic functions do not receive @0 suffix.
+ if (!FT->isVarArg() || (FT->getNumParams() == 0) ||
+ (FT->getNumParams() == 1 && FT->isStructReturn()))
+ Name += '@' + utostr_32(Info->getBytesToPopOnReturn());
+ break;
+ case FastCall:
+ // "Pure" variadic functions do not receive @0 suffix.
+ if (!FT->isVarArg() || (FT->getNumParams() == 0) ||
+ (FT->getNumParams() == 1 && FT->isStructReturn()))
+ Name += '@' + utostr_32(Info->getBytesToPopOnReturn());
+
+ if (Name[0] == '_') {
+ Name[0] = '@';
+ } else {
+ Name = '@' + Name;
+ }
+ break;
+ default:
+ assert(0 && "Unsupported DecorationStyle");
+ }
+}
+
+/// doInitialization
+bool X86SharedAsmPrinter::doInitialization(Module &M) {
+ if (TAI->doesSupportDebugInformation()) {
+ // Emit initial debug information.
+ DW.BeginModule(&M);
+ }
+
+ AsmPrinter::doInitialization(M);
+
+ // Darwin wants symbols to be quoted if they have complex names.
+ if (Subtarget->isTargetDarwin())
+ Mang->setUseQuotes(true);
+
+ return false;
+}
+
+bool X86SharedAsmPrinter::doFinalization(Module &M) {
+ // Note: this code is not shared by the Intel printer as it is too different
+ // from how MASM does things. When making changes here don't forget to look
+ // at X86IntelAsmPrinter::doFinalization().
+ const TargetData *TD = TM.getTargetData();
+
+ // Print out module-level global variables here.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ if (!I->hasInitializer())
+ continue; // External global require no code
+
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(I)) {
+ if (Subtarget->isTargetDarwin() &&
+ TM.getRelocationModel() == Reloc::Static) {
+ if (I->getName() == "llvm.global_ctors")
+ O << ".reference .constructors_used\n";
+ else if (I->getName() == "llvm.global_dtors")
+ O << ".reference .destructors_used\n";
+ }
+ continue;
+ }
+
+ std::string name = Mang->getValueName(I);
+ Constant *C = I->getInitializer();
+ const Type *Type = C->getType();
+ unsigned Size = TD->getTypeSize(Type);
+ unsigned Align = TD->getPreferredAlignmentLog(I);
+
+ if (I->hasHiddenVisibility()) {
+ if (const char *Directive = TAI->getHiddenDirective())
+ O << Directive << name << "\n";
+ } else if (I->hasProtectedVisibility()) {
+ if (const char *Directive = TAI->getProtectedDirective())
+ O << Directive << name << "\n";
+ }
+
+ if (Subtarget->isTargetELF())
+ O << "\t.type " << name << ",@object\n";
+
+ if (C->isNullValue()) {
+ if (I->hasExternalLinkage()) {
+ if (const char *Directive = TAI->getZeroFillDirective()) {
+ O << "\t.globl\t" << name << "\n";
+ O << Directive << "__DATA__, __common, " << name << ", "
+ << Size << ", " << Align << "\n";
+ continue;
+ }
+ }
+
+ if (!I->hasSection() && !I->isThreadLocal() &&
+ (I->hasInternalLinkage() || I->hasWeakLinkage() ||
+ I->hasLinkOnceLinkage())) {
+ if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
+ if (!NoZerosInBSS && TAI->getBSSSection())
+ SwitchToDataSection(TAI->getBSSSection(), I);
+ else
+ SwitchToDataSection(TAI->getDataSection(), I);
+ if (TAI->getLCOMMDirective() != NULL) {
+ if (I->hasInternalLinkage()) {
+ O << TAI->getLCOMMDirective() << name << "," << Size;
+ if (Subtarget->isTargetDarwin())
+ O << "," << Align;
+ } else
+ O << TAI->getCOMMDirective() << name << "," << Size;
+ } else {
+ if (!Subtarget->isTargetCygMing()) {
+ if (I->hasInternalLinkage())
+ O << "\t.local\t" << name << "\n";
+ }
+ O << TAI->getCOMMDirective() << name << "," << Size;
+ if (TAI->getCOMMDirectiveTakesAlignment())
+ O << "," << (TAI->getAlignmentIsInBytes() ? (1 << Align) : Align);
+ }
+ O << "\t\t" << TAI->getCommentString() << " " << I->getName() << "\n";
+ continue;
+ }
+ }
+
+ switch (I->getLinkage()) {
+ case GlobalValue::LinkOnceLinkage:
+ case GlobalValue::WeakLinkage:
+ if (Subtarget->isTargetDarwin()) {
+ O << "\t.globl " << name << "\n"
+ << "\t.weak_definition " << name << "\n";
+ SwitchToDataSection(".section __DATA,__const_coal,coalesced", I);
+ } else if (Subtarget->isTargetCygMing()) {
+ std::string SectionName(".section\t.data$linkonce." +
+ name +
+ ",\"aw\"");
+ SwitchToDataSection(SectionName.c_str(), I);
+ O << "\t.globl " << name << "\n"
+ << "\t.linkonce same_size\n";
+ } else {
+ std::string SectionName("\t.section\t.llvm.linkonce.d." +
+ name +
+ ",\"aw\",@progbits");
+ SwitchToDataSection(SectionName.c_str(), I);
+ O << "\t.weak " << name << "\n";
+ }
+ break;
+ case GlobalValue::AppendingLinkage:
+ // FIXME: appending linkage variables should go into a section of
+ // their name or something. For now, just emit them as external.
+ case GlobalValue::DLLExportLinkage:
+ DLLExportedGVs.insert(Mang->makeNameProper(I->getName(),""));
+ // FALL THROUGH
+ case GlobalValue::ExternalLinkage:
+ // If external or appending, declare as a global symbol
+ O << "\t.globl " << name << "\n";
+ // FALL THROUGH
+ case GlobalValue::InternalLinkage: {
+ if (I->isConstant()) {
+ const ConstantArray *CVA = dyn_cast<ConstantArray>(C);
+ if (TAI->getCStringSection() && CVA && CVA->isCString()) {
+ SwitchToDataSection(TAI->getCStringSection(), I);
+ break;
+ }
+ }
+ // FIXME: special handling for ".ctors" & ".dtors" sections
+ if (I->hasSection() &&
+ (I->getSection() == ".ctors" ||
+ I->getSection() == ".dtors")) {
+ std::string SectionName = ".section " + I->getSection();
+
+ if (Subtarget->isTargetCygMing()) {
+ SectionName += ",\"aw\"";
+ } else {
+ assert(!Subtarget->isTargetDarwin());
+ SectionName += ",\"aw\",@progbits";
+ }
+
+ SwitchToDataSection(SectionName.c_str());
+ } else {
+ if (C->isNullValue() && !NoZerosInBSS && TAI->getBSSSection())
+ SwitchToDataSection(I->isThreadLocal() ? TAI->getTLSBSSSection() :
+ TAI->getBSSSection(), I);
+ else if (!I->isConstant())
+ SwitchToDataSection(I->isThreadLocal() ? TAI->getTLSDataSection() :
+ TAI->getDataSection(), I);
+ else if (I->isThreadLocal())
+ SwitchToDataSection(TAI->getTLSDataSection());
+ else {
+ // Read-only data.
+ bool HasReloc = C->ContainsRelocations();
+ if (HasReloc &&
+ Subtarget->isTargetDarwin() &&
+ TM.getRelocationModel() != Reloc::Static)
+ SwitchToDataSection("\t.const_data\n");
+ else if (!HasReloc && Size == 4 &&
+ TAI->getFourByteConstantSection())
+ SwitchToDataSection(TAI->getFourByteConstantSection(), I);
+ else if (!HasReloc && Size == 8 &&
+ TAI->getEightByteConstantSection())
+ SwitchToDataSection(TAI->getEightByteConstantSection(), I);
+ else if (!HasReloc && Size == 16 &&
+ TAI->getSixteenByteConstantSection())
+ SwitchToDataSection(TAI->getSixteenByteConstantSection(), I);
+ else if (TAI->getReadOnlySection())
+ SwitchToDataSection(TAI->getReadOnlySection(), I);
+ else
+ SwitchToDataSection(TAI->getDataSection(), I);
+ }
+ }
+
+ break;
+ }
+ default:
+ assert(0 && "Unknown linkage type!");
+ }
+
+ EmitAlignment(Align, I);
+ O << name << ":\t\t\t\t" << TAI->getCommentString() << " " << I->getName()
+ << "\n";
+ if (TAI->hasDotTypeDotSizeDirective())
+ O << "\t.size " << name << ", " << Size << "\n";
+ // If the initializer is a extern weak symbol, remember to emit the weak
+ // reference!
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+
+ EmitGlobalConstant(C);
+ }
+
+ // Output linker support code for dllexported globals
+ if (DLLExportedGVs.begin() != DLLExportedGVs.end()) {
+ SwitchToDataSection(".section .drectve");
+ }
+
+ for (std::set<std::string>::iterator i = DLLExportedGVs.begin(),
+ e = DLLExportedGVs.end();
+ i != e; ++i) {
+ O << "\t.ascii \" -export:" << *i << ",data\"\n";
+ }
+
+ if (DLLExportedFns.begin() != DLLExportedFns.end()) {
+ SwitchToDataSection(".section .drectve");
+ }
+
+ for (std::set<std::string>::iterator i = DLLExportedFns.begin(),
+ e = DLLExportedFns.end();
+ i != e; ++i) {
+ O << "\t.ascii \" -export:" << *i << "\"\n";
+ }
+
+ if (Subtarget->isTargetDarwin()) {
+ SwitchToDataSection("");
+
+ // Output stubs for dynamically-linked functions
+ unsigned j = 1;
+ for (std::set<std::string>::iterator i = FnStubs.begin(), e = FnStubs.end();
+ i != e; ++i, ++j) {
+ SwitchToDataSection(".section __IMPORT,__jump_table,symbol_stubs,"
+ "self_modifying_code+pure_instructions,5", 0);
+ O << "L" << *i << "$stub:\n";
+ O << "\t.indirect_symbol " << *i << "\n";
+ O << "\thlt ; hlt ; hlt ; hlt ; hlt\n";
+ }
+
+ O << "\n";
+
+ // Output stubs for external and common global variables.
+ if (GVStubs.begin() != GVStubs.end())
+ SwitchToDataSection(
+ ".section __IMPORT,__pointers,non_lazy_symbol_pointers");
+ for (std::set<std::string>::iterator i = GVStubs.begin(), e = GVStubs.end();
+ i != e; ++i) {
+ O << "L" << *i << "$non_lazy_ptr:\n";
+ O << "\t.indirect_symbol " << *i << "\n";
+ O << "\t.long\t0\n";
+ }
+
+ // Emit final debug information.
+ DW.EndModule();
+
+ // Funny Darwin hack: This flag tells the linker that no global symbols
+ // contain code that falls through to other global symbols (e.g. the obvious
+ // implementation of multiple entry points). If this doesn't occur, the
+ // linker can safely perform dead code stripping. Since LLVM never
+ // generates code that does this, it is always safe to set.
+ O << "\t.subsections_via_symbols\n";
+ } else if (Subtarget->isTargetCygMing()) {
+ // Emit type information for external functions
+ for (std::set<std::string>::iterator i = FnStubs.begin(), e = FnStubs.end();
+ i != e; ++i) {
+ O << "\t.def\t " << *i
+ << ";\t.scl\t" << COFF::C_EXT
+ << ";\t.type\t" << (COFF::DT_FCN << COFF::N_BTSHFT)
+ << ";\t.endef\n";
+ }
+
+ // Emit final debug information.
+ DW.EndModule();
+ } else if (Subtarget->isTargetELF()) {
+ // Emit final debug information.
+ DW.EndModule();
+ }
+
+ AsmPrinter::doFinalization(M);
+ return false; // success
+}
+
+/// createX86CodePrinterPass - Returns a pass that prints the X86 assembly code
+/// for a MachineFunction to the given output stream, using the given target
+/// machine description.
+///
+FunctionPass *llvm::createX86CodePrinterPass(std::ostream &o,
+ X86TargetMachine &tm) {
+ const X86Subtarget *Subtarget = &tm.getSubtarget<X86Subtarget>();
+
+ if (Subtarget->isFlavorIntel()) {
+ return new X86IntelAsmPrinter(o, tm, tm.getTargetAsmInfo());
+ } else {
+ return new X86ATTAsmPrinter(o, tm, tm.getTargetAsmInfo());
+ }
+}
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
new file mode 100755
index 0000000..45be89e
--- /dev/null
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -0,0 +1,97 @@
+//===-- X86AsmPrinter.h - Convert X86 LLVM code to Intel assembly ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file the shared super class printer that converts from our internal
+// representation of machine-dependent LLVM code to Intel and AT&T format
+// assembly language. This printer is the output mechanism used by `llc'.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86ASMPRINTER_H
+#define X86ASMPRINTER_H
+
+#include "X86.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86TargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Support/Compiler.h"
+#include <set>
+
+
+namespace llvm {
+
+struct VISIBILITY_HIDDEN X86SharedAsmPrinter : public AsmPrinter {
+ DwarfWriter DW;
+
+ X86SharedAsmPrinter(std::ostream &O, X86TargetMachine &TM,
+ const TargetAsmInfo *T)
+ : AsmPrinter(O, TM, T), DW(O, this, T) {
+ Subtarget = &TM.getSubtarget<X86Subtarget>();
+ }
+
+ // We have to propagate some information about MachineFunction to
+ // AsmPrinter. It's ok, when we're printing the function, since we have
+ // access to MachineFunction and can get the appropriate MachineFunctionInfo.
+ // Unfortunately, this is not possible when we're printing reference to
+ // Function (e.g. calling it and so on). Even more, there is no way to get the
+ // corresponding MachineFunctions: it can even be not created at all. That's
+ // why we should use additional structure, when we're collecting all necessary
+ // information.
+ //
+ // This structure is using e.g. for name decoration for stdcall & fastcall'ed
+ // function, since we have to use arguments' size for decoration.
+ typedef std::map<const Function*, X86MachineFunctionInfo> FMFInfoMap;
+ FMFInfoMap FunctionInfoMap;
+
+ void decorateName(std::string& Name, const GlobalValue* GV);
+
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ if (Subtarget->isTargetDarwin() ||
+ Subtarget->isTargetELF() ||
+ Subtarget->isTargetCygMing()) {
+ AU.addRequired<MachineModuleInfo>();
+ }
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ const X86Subtarget *Subtarget;
+
+ // Necessary for Darwin to print out the apprioriate types of linker stubs
+ std::set<std::string> FnStubs, GVStubs, LinkOnceStubs;
+
+ // Necessary for dllexport support
+ std::set<std::string> DLLExportedFns, DLLExportedGVs;
+
+ inline static bool isScale(const MachineOperand &MO) {
+ return MO.isImmediate() &&
+ (MO.getImmedValue() == 1 || MO.getImmedValue() == 2 ||
+ MO.getImmedValue() == 4 || MO.getImmedValue() == 8);
+ }
+
+ inline static bool isMem(const MachineInstr *MI, unsigned Op) {
+ if (MI->getOperand(Op).isFrameIndex()) return true;
+ return Op+4 <= MI->getNumOperands() &&
+ MI->getOperand(Op ).isRegister() && isScale(MI->getOperand(Op+1)) &&
+ MI->getOperand(Op+2).isRegister() &&
+ (MI->getOperand(Op+3).isImmediate() ||
+ MI->getOperand(Op+3).isGlobalAddress() ||
+ MI->getOperand(Op+3).isConstantPoolIndex() ||
+ MI->getOperand(Op+3).isJumpTableIndex());
+ }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/X86/X86COFF.h b/lib/Target/X86/X86COFF.h
new file mode 100644
index 0000000..75892ef
--- /dev/null
+++ b/lib/Target/X86/X86COFF.h
@@ -0,0 +1,95 @@
+//===--- X86COFF.h - Some definitions from COFF documentations ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Anton Korobeynikov and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file just defines some symbols found in COFF documentation. They are
+// used to emit function type information for COFF targets (Cygwin/Mingw32).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86COFF_H
+#define X86COFF_H
+
+namespace COFF
+{
+/// Storage class tells where and what the symbol represents
+enum StorageClass {
+ C_EFCN = -1, ///< Physical end of function
+ C_NULL = 0, ///< No symbol
+ C_AUTO = 1, ///< External definition
+ C_EXT = 2, ///< External symbol
+ C_STAT = 3, ///< Static
+ C_REG = 4, ///< Register variable
+ C_EXTDEF = 5, ///< External definition
+ C_LABEL = 6, ///< Label
+ C_ULABEL = 7, ///< Undefined label
+ C_MOS = 8, ///< Member of structure
+ C_ARG = 9, ///< Function argument
+ C_STRTAG = 10, ///< Structure tag
+ C_MOU = 11, ///< Member of union
+ C_UNTAG = 12, ///< Union tag
+ C_TPDEF = 13, ///< Type definition
+ C_USTATIC = 14, ///< Undefined static
+ C_ENTAG = 15, ///< Enumeration tag
+ C_MOE = 16, ///< Member of enumeration
+ C_REGPARM = 17, ///< Register parameter
+ C_FIELD = 18, ///< Bit field
+
+ C_BLOCK = 100, ///< ".bb" or ".eb" - beginning or end of block
+ C_FCN = 101, ///< ".bf" or ".ef" - beginning or end of function
+ C_EOS = 102, ///< End of structure
+ C_FILE = 103, ///< File name
+ C_LINE = 104, ///< Line number, reformatted as symbol
+ C_ALIAS = 105, ///< Duplicate tag
+ C_HIDDEN = 106 ///< External symbol in dmert public lib
+};
+
+/// The type of the symbol. This is made up of a base type and a derived type.
+/// For example, pointer to int is "pointer to T" and "int"
+enum SymbolType {
+ T_NULL = 0, ///< No type info
+ T_ARG = 1, ///< Void function argument (only used by compiler)
+ T_VOID = 1, ///< The same as above. Just named differently in some specs.
+ T_CHAR = 2, ///< Character
+ T_SHORT = 3, ///< Short integer
+ T_INT = 4, ///< Integer
+ T_LONG = 5, ///< Long integer
+ T_FLOAT = 6, ///< Floating point
+ T_DOUBLE = 7, ///< Double word
+ T_STRUCT = 8, ///< Structure
+ T_UNION = 9, ///< Union
+ T_ENUM = 10, ///< Enumeration
+ T_MOE = 11, ///< Member of enumeration
+ T_UCHAR = 12, ///< Unsigned character
+ T_USHORT = 13, ///< Unsigned short
+ T_UINT = 14, ///< Unsigned integer
+ T_ULONG = 15 ///< Unsigned long
+};
+
+/// Derived type of symbol
+enum SymbolDerivedType {
+ DT_NON = 0, ///< No derived type
+ DT_PTR = 1, ///< Pointer to T
+ DT_FCN = 2, ///< Function returning T
+ DT_ARY = 3 ///< Array of T
+};
+
+/// Masks for extracting parts of type
+enum SymbolTypeMasks {
+ N_BTMASK = 017, ///< Mask for base type
+ N_TMASK = 060 ///< Mask for derived type
+};
+
+/// Offsets of parts of type
+enum Shifts {
+ N_BTSHFT = 4 ///< Type is formed as (base + derived << N_BTSHIFT)
+};
+
+}
+
+#endif // X86COFF_H
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
new file mode 100644
index 0000000..39811bd7
--- /dev/null
+++ b/lib/Target/X86/X86CallingConv.td
@@ -0,0 +1,172 @@
+//===- X86CallingConv.td - Calling Conventions for X86 32/64 ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the X86-32 and X86-64
+// architectures.
+//
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("State.getTarget().getSubtarget<X86Subtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// Return-value conventions common to all X86 CC's.
+def RetCC_X86Common : CallingConv<[
+ // Scalar values are returned in AX first, then DX.
+ CCIfType<[i8] , CCAssignToReg<[AL]>>,
+ CCIfType<[i16], CCAssignToReg<[AX]>>,
+ CCIfType<[i32], CCAssignToReg<[EAX, EDX]>>,
+ CCIfType<[i64], CCAssignToReg<[RAX, RDX]>>,
+
+ // Vector types are returned in XMM0 and XMM1, when they fit. If the target
+ // doesn't have XMM registers, it won't have vector types.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0,XMM1]>>,
+
+ // MMX vector types are always returned in MM0. If the target doesn't have
+ // MM0, it doesn't support these vector types.
+ CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToReg<[MM0]>>
+]>;
+
+// X86-32 C return-value convention.
+def RetCC_X86_32_C : CallingConv<[
+ // The X86-32 calling convention returns FP values in ST0, otherwise it is the
+ // same as the common X86 calling conv.
+ CCIfType<[f32], CCAssignToReg<[ST0]>>,
+ CCIfType<[f64], CCAssignToReg<[ST0]>>,
+ CCDelegateTo<RetCC_X86Common>
+]>;
+
+// X86-32 FastCC return-value convention.
+def RetCC_X86_32_Fast : CallingConv<[
+ // The X86-32 fastcc returns FP values in XMM0 if the target has SSE2,
+ // otherwise it is the the C calling conventions.
+ CCIfType<[f32], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0]>>>,
+ CCIfType<[f64], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0]>>>,
+ CCDelegateTo<RetCC_X86Common>
+]>;
+
+// X86-64 C return-value convention.
+def RetCC_X86_64_C : CallingConv<[
+ // The X86-64 calling convention always returns FP values in XMM0.
+ CCIfType<[f32], CCAssignToReg<[XMM0]>>,
+ CCIfType<[f64], CCAssignToReg<[XMM0]>>,
+ CCDelegateTo<RetCC_X86Common>
+]>;
+
+
+
+// This is the root return-value convention for the X86-32 backend.
+def RetCC_X86_32 : CallingConv<[
+ // If FastCC, use RetCC_X86_32_Fast.
+ CCIfCC<"CallingConv::Fast", CCDelegateTo<RetCC_X86_32_Fast>>,
+ // Otherwise, use RetCC_X86_32_C.
+ CCDelegateTo<RetCC_X86_32_C>
+]>;
+
+// This is the root return-value convention for the X86-64 backend.
+def RetCC_X86_64 : CallingConv<[
+ // Always just the same as C calling conv for X86-64.
+ CCDelegateTo<RetCC_X86_64_C>
+]>;
+
+// This is the return-value convention used for the entire X86 backend.
+def RetCC_X86 : CallingConv<[
+ CCIfSubtarget<"is64Bit()", CCDelegateTo<RetCC_X86_64>>,
+ CCDelegateTo<RetCC_X86_32>
+]>;
+
+//===----------------------------------------------------------------------===//
+// X86-64 Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+
+def CC_X86_64_C : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ CCIfStruct<CCStructAssign<[RDI, RSI, RDX, RCX, R8, R9 ]>>,
+
+ // The first 6 integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D, R9D]>>,
+ CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8 , R9 ]>>,
+
+ // The first 8 FP/Vector arguments are passed in XMM registers.
+ CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>,
+
+ // The first 8 MMX vector arguments are passed in GPRs.
+ CCIfType<[v8i8, v4i16, v2i32, v1i64],
+ CCAssignToReg<[RDI, RSI, RDX, RCX, R8 , R9 ]>>,
+
+ // Integer/FP values get stored in stack slots that are 8 bytes in size and
+ // 8-byte aligned if there are no more registers to hold them.
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
+
+ // Vectors get 16-byte stack slots that are 16-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
+
+ // __m64 vectors get 8-byte stack slots that are 8-byte aligned.
+ CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// X86 C Calling Convention
+//===----------------------------------------------------------------------===//
+
+/// CC_X86_32_Common - In all X86-32 calling conventions, extra integers and FP
+/// values are spilled on the stack, and the first 4 vector values go in XMM
+/// regs.
+def CC_X86_32_Common : CallingConv<[
+ // Integer/Float values get stored in stack slots that are 4 bytes in
+ // size and 4-byte aligned.
+ CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+
+ // Doubles get 8-byte slots that are 4-byte aligned.
+ CCIfType<[f64], CCAssignToStack<8, 4>>,
+
+ // The first 4 vector arguments are passed in XMM registers.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>,
+
+ // Other vectors get 16-byte stack slots that are 16-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
+
+ // __m64 vectors get 8-byte stack slots that are 8-byte aligned. They are
+ // passed in the parameter area.
+ CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
+]>;
+
+def CC_X86_32_C : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The first 3 integer arguments, if marked 'inreg' and if the call is not
+ // a vararg call, are passed in integer registers.
+ CCIfNotVarArg<CCIfInReg<CCIfType<[i32], CCAssignToReg<[EAX, EDX, ECX]>>>>,
+
+ // Otherwise, same as everything else.
+ CCDelegateTo<CC_X86_32_Common>
+]>;
+
+
+def CC_X86_32_FastCall : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The first 2 integer arguments are passed in ECX/EDX
+ CCIfType<[i32], CCAssignToReg<[ECX, EDX]>>,
+
+ // Otherwise, same as everything else.
+ CCDelegateTo<CC_X86_32_Common>
+]>;
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
new file mode 100644
index 0000000..8b22634
--- /dev/null
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -0,0 +1,824 @@
+//===-- X86/X86CodeEmitter.cpp - Convert X86 code to machine code ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the pass that transforms the X86 machine instructions into
+// relocatable machine code.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-emitter"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "X86Relocations.h"
+#include "X86.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+STATISTIC(NumEmitted, "Number of machine instructions emitted");
+
+namespace {
+ class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass {
+ const X86InstrInfo *II;
+ const TargetData *TD;
+ TargetMachine &TM;
+ MachineCodeEmitter &MCE;
+ bool Is64BitMode;
+ public:
+ static char ID;
+ explicit Emitter(TargetMachine &tm, MachineCodeEmitter &mce)
+ : MachineFunctionPass((intptr_t)&ID), II(0), TD(0), TM(tm),
+ MCE(mce), Is64BitMode(false) {}
+ Emitter(TargetMachine &tm, MachineCodeEmitter &mce,
+ const X86InstrInfo &ii, const TargetData &td, bool is64)
+ : MachineFunctionPass((intptr_t)&ID), II(&ii), TD(&td), TM(tm),
+ MCE(mce), Is64BitMode(is64) {}
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "X86 Machine Code Emitter";
+ }
+
+ void emitInstruction(const MachineInstr &MI);
+
+ private:
+ void emitPCRelativeBlockAddress(MachineBasicBlock *MBB);
+ void emitPCRelativeValue(intptr_t Address);
+ void emitGlobalAddressForCall(GlobalValue *GV, bool DoesntNeedStub);
+ void emitGlobalAddressForPtr(GlobalValue *GV, unsigned Reloc,
+ int Disp = 0, unsigned PCAdj = 0);
+ void emitExternalSymbolAddress(const char *ES, unsigned Reloc);
+ void emitConstPoolAddress(unsigned CPI, unsigned Reloc, int Disp = 0,
+ unsigned PCAdj = 0);
+ void emitJumpTableAddress(unsigned JTI, unsigned Reloc, unsigned PCAdj = 0);
+
+ void emitDisplacementField(const MachineOperand *RelocOp, int DispVal,
+ unsigned PCAdj = 0);
+
+ void emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeField);
+ void emitSIBByte(unsigned SS, unsigned Index, unsigned Base);
+ void emitConstant(uint64_t Val, unsigned Size);
+
+ void emitMemModRMByte(const MachineInstr &MI,
+ unsigned Op, unsigned RegOpcodeField,
+ unsigned PCAdj = 0);
+
+ unsigned getX86RegNum(unsigned RegNo);
+ bool isX86_64ExtendedReg(const MachineOperand &MO);
+ unsigned determineREX(const MachineInstr &MI);
+ };
+ char Emitter::ID = 0;
+}
+
+/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
+/// to the specified MCE object.
+FunctionPass *llvm::createX86CodeEmitterPass(X86TargetMachine &TM,
+ MachineCodeEmitter &MCE) {
+ return new Emitter(TM, MCE);
+}
+
+bool Emitter::runOnMachineFunction(MachineFunction &MF) {
+ assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
+ MF.getTarget().getRelocationModel() != Reloc::Static) &&
+ "JIT relocation model must be set to static or default!");
+ II = ((X86TargetMachine&)MF.getTarget()).getInstrInfo();
+ TD = ((X86TargetMachine&)MF.getTarget()).getTargetData();
+ Is64BitMode =
+ ((X86TargetMachine&)MF.getTarget()).getSubtarget<X86Subtarget>().is64Bit();
+
+ do {
+ MCE.startFunction(MF);
+ for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
+ MBB != E; ++MBB) {
+ MCE.StartMachineBasicBlock(MBB);
+ for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I)
+ emitInstruction(*I);
+ }
+ } while (MCE.finishFunction(MF));
+
+ return false;
+}
+
+/// emitPCRelativeValue - Emit a PC relative address.
+///
+void Emitter::emitPCRelativeValue(intptr_t Address) {
+ MCE.emitWordLE(Address-MCE.getCurrentPCValue()-4);
+}
+
+/// emitPCRelativeBlockAddress - This method keeps track of the information
+/// necessary to resolve the address of this block later and emits a dummy
+/// value.
+///
+void Emitter::emitPCRelativeBlockAddress(MachineBasicBlock *MBB) {
+ // Remember where this reference was and where it is to so we can
+ // deal with it later.
+ MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+ X86::reloc_pcrel_word, MBB));
+ MCE.emitWordLE(0);
+}
+
+/// emitGlobalAddressForCall - Emit the specified address to the code stream
+/// assuming this is part of a function call, which is PC relative.
+///
+void Emitter::emitGlobalAddressForCall(GlobalValue *GV, bool DoesntNeedStub) {
+ MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(),
+ X86::reloc_pcrel_word, GV, 0,
+ DoesntNeedStub));
+ MCE.emitWordLE(0);
+}
+
+/// emitGlobalAddress - Emit the specified address to the code stream assuming
+/// this is part of a "take the address of a global" instruction.
+///
+void Emitter::emitGlobalAddressForPtr(GlobalValue *GV, unsigned Reloc,
+ int Disp /* = 0 */,
+ unsigned PCAdj /* = 0 */) {
+ MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
+ GV, PCAdj));
+ if (Reloc == X86::reloc_absolute_dword)
+ MCE.emitWordLE(0);
+ MCE.emitWordLE(Disp); // The relocated value will be added to the displacement
+}
+
+/// emitExternalSymbolAddress - Arrange for the address of an external symbol to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+void Emitter::emitExternalSymbolAddress(const char *ES, unsigned Reloc) {
+ MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+ Reloc, ES));
+ if (Reloc == X86::reloc_absolute_dword)
+ MCE.emitWordLE(0);
+ MCE.emitWordLE(0);
+}
+
+/// emitConstPoolAddress - Arrange for the address of an constant pool
+/// to be emitted to the current location in the function, and allow it to be PC
+/// relative.
+void Emitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc,
+ int Disp /* = 0 */,
+ unsigned PCAdj /* = 0 */) {
+ MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+ Reloc, CPI, PCAdj));
+ if (Reloc == X86::reloc_absolute_dword)
+ MCE.emitWordLE(0);
+ MCE.emitWordLE(Disp); // The relocated value will be added to the displacement
+}
+
+/// emitJumpTableAddress - Arrange for the address of a jump table to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+void Emitter::emitJumpTableAddress(unsigned JTI, unsigned Reloc,
+ unsigned PCAdj /* = 0 */) {
+ MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+ Reloc, JTI, PCAdj));
+ if (Reloc == X86::reloc_absolute_dword)
+ MCE.emitWordLE(0);
+ MCE.emitWordLE(0); // The relocated value will be added to the displacement
+}
+
+/// N86 namespace - Native X86 Register numbers... used by X86 backend.
+///
+namespace N86 {
+ enum {
+ EAX = 0, ECX = 1, EDX = 2, EBX = 3, ESP = 4, EBP = 5, ESI = 6, EDI = 7
+ };
+}
+
+// getX86RegNum - This function maps LLVM register identifiers to their X86
+// specific numbering, which is used in various places encoding instructions.
+//
+unsigned Emitter::getX86RegNum(unsigned RegNo) {
+ switch(RegNo) {
+ case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX;
+ case X86::RCX: case X86::ECX: case X86::CX: case X86::CL: return N86::ECX;
+ case X86::RDX: case X86::EDX: case X86::DX: case X86::DL: return N86::EDX;
+ case X86::RBX: case X86::EBX: case X86::BX: case X86::BL: return N86::EBX;
+ case X86::RSP: case X86::ESP: case X86::SP: case X86::SPL: case X86::AH:
+ return N86::ESP;
+ case X86::RBP: case X86::EBP: case X86::BP: case X86::BPL: case X86::CH:
+ return N86::EBP;
+ case X86::RSI: case X86::ESI: case X86::SI: case X86::SIL: case X86::DH:
+ return N86::ESI;
+ case X86::RDI: case X86::EDI: case X86::DI: case X86::DIL: case X86::BH:
+ return N86::EDI;
+
+ case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B:
+ return N86::EAX;
+ case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B:
+ return N86::ECX;
+ case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B:
+ return N86::EDX;
+ case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B:
+ return N86::EBX;
+ case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B:
+ return N86::ESP;
+ case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B:
+ return N86::EBP;
+ case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B:
+ return N86::ESI;
+ case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B:
+ return N86::EDI;
+
+ case X86::ST0: case X86::ST1: case X86::ST2: case X86::ST3:
+ case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7:
+ return RegNo-X86::ST0;
+
+ case X86::XMM0: case X86::XMM1: case X86::XMM2: case X86::XMM3:
+ case X86::XMM4: case X86::XMM5: case X86::XMM6: case X86::XMM7:
+ return II->getRegisterInfo().getDwarfRegNum(RegNo) -
+ II->getRegisterInfo().getDwarfRegNum(X86::XMM0);
+ case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11:
+ case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
+ return II->getRegisterInfo().getDwarfRegNum(RegNo) -
+ II->getRegisterInfo().getDwarfRegNum(X86::XMM8);
+
+ default:
+ assert(MRegisterInfo::isVirtualRegister(RegNo) &&
+ "Unknown physical register!");
+ assert(0 && "Register allocator hasn't allocated reg correctly yet!");
+ return 0;
+ }
+}
+
+inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode,
+ unsigned RM) {
+ assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!");
+ return RM | (RegOpcode << 3) | (Mod << 6);
+}
+
+void Emitter::emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeFld){
+ MCE.emitByte(ModRMByte(3, RegOpcodeFld, getX86RegNum(ModRMReg)));
+}
+
+void Emitter::emitSIBByte(unsigned SS, unsigned Index, unsigned Base) {
+ // SIB byte is in the same format as the ModRMByte...
+ MCE.emitByte(ModRMByte(SS, Index, Base));
+}
+
+void Emitter::emitConstant(uint64_t Val, unsigned Size) {
+ // Output the constant in little endian byte order...
+ for (unsigned i = 0; i != Size; ++i) {
+ MCE.emitByte(Val & 255);
+ Val >>= 8;
+ }
+}
+
+/// isDisp8 - Return true if this signed displacement fits in a 8-bit
+/// sign-extended field.
+static bool isDisp8(int Value) {
+ return Value == (signed char)Value;
+}
+
+void Emitter::emitDisplacementField(const MachineOperand *RelocOp,
+ int DispVal, unsigned PCAdj) {
+ // If this is a simple integer displacement that doesn't require a relocation,
+ // emit it now.
+ if (!RelocOp) {
+ emitConstant(DispVal, 4);
+ return;
+ }
+
+ // Otherwise, this is something that requires a relocation. Emit it as such
+ // now.
+ if (RelocOp->isGlobalAddress()) {
+ // In 64-bit static small code model, we could potentially emit absolute.
+ // But it's probably not beneficial.
+ // 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative
+ // 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute
+ unsigned rt= Is64BitMode ? X86::reloc_pcrel_word : X86::reloc_absolute_word;
+ emitGlobalAddressForPtr(RelocOp->getGlobal(), rt,
+ RelocOp->getOffset(), PCAdj);
+ } else if (RelocOp->isConstantPoolIndex()) {
+ // Must be in 64-bit mode.
+ emitConstPoolAddress(RelocOp->getConstantPoolIndex(), X86::reloc_pcrel_word,
+ RelocOp->getOffset(), PCAdj);
+ } else if (RelocOp->isJumpTableIndex()) {
+ // Must be in 64-bit mode.
+ emitJumpTableAddress(RelocOp->getJumpTableIndex(), X86::reloc_pcrel_word,
+ PCAdj);
+ } else {
+ assert(0 && "Unknown value to relocate!");
+ }
+}
+
+void Emitter::emitMemModRMByte(const MachineInstr &MI,
+ unsigned Op, unsigned RegOpcodeField,
+ unsigned PCAdj) {
+ const MachineOperand &Op3 = MI.getOperand(Op+3);
+ int DispVal = 0;
+ const MachineOperand *DispForReloc = 0;
+
+ // Figure out what sort of displacement we have to handle here.
+ if (Op3.isGlobalAddress()) {
+ DispForReloc = &Op3;
+ } else if (Op3.isConstantPoolIndex()) {
+ if (Is64BitMode) {
+ DispForReloc = &Op3;
+ } else {
+ DispVal += MCE.getConstantPoolEntryAddress(Op3.getConstantPoolIndex());
+ DispVal += Op3.getOffset();
+ }
+ } else if (Op3.isJumpTableIndex()) {
+ if (Is64BitMode) {
+ DispForReloc = &Op3;
+ } else {
+ DispVal += MCE.getJumpTableEntryAddress(Op3.getJumpTableIndex());
+ }
+ } else {
+ DispVal = Op3.getImm();
+ }
+
+ const MachineOperand &Base = MI.getOperand(Op);
+ const MachineOperand &Scale = MI.getOperand(Op+1);
+ const MachineOperand &IndexReg = MI.getOperand(Op+2);
+
+ unsigned BaseReg = Base.getReg();
+
+ // Is a SIB byte needed?
+ if (IndexReg.getReg() == 0 &&
+ (BaseReg == 0 || getX86RegNum(BaseReg) != N86::ESP)) {
+ if (BaseReg == 0) { // Just a displacement?
+ // Emit special case [disp32] encoding
+ MCE.emitByte(ModRMByte(0, RegOpcodeField, 5));
+
+ emitDisplacementField(DispForReloc, DispVal, PCAdj);
+ } else {
+ unsigned BaseRegNo = getX86RegNum(BaseReg);
+ if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) {
+ // Emit simple indirect register encoding... [EAX] f.e.
+ MCE.emitByte(ModRMByte(0, RegOpcodeField, BaseRegNo));
+ } else if (!DispForReloc && isDisp8(DispVal)) {
+ // Emit the disp8 encoding... [REG+disp8]
+ MCE.emitByte(ModRMByte(1, RegOpcodeField, BaseRegNo));
+ emitConstant(DispVal, 1);
+ } else {
+ // Emit the most general non-SIB encoding: [REG+disp32]
+ MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo));
+ emitDisplacementField(DispForReloc, DispVal, PCAdj);
+ }
+ }
+
+ } else { // We need a SIB byte, so start by outputting the ModR/M byte first
+ assert(IndexReg.getReg() != X86::ESP &&
+ IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");
+
+ bool ForceDisp32 = false;
+ bool ForceDisp8 = false;
+ if (BaseReg == 0) {
+ // If there is no base register, we emit the special case SIB byte with
+ // MOD=0, BASE=5, to JUST get the index, scale, and displacement.
+ MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
+ ForceDisp32 = true;
+ } else if (DispForReloc) {
+ // Emit the normal disp32 encoding.
+ MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
+ ForceDisp32 = true;
+ } else if (DispVal == 0 && getX86RegNum(BaseReg) != N86::EBP) {
+ // Emit no displacement ModR/M byte
+ MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
+ } else if (isDisp8(DispVal)) {
+ // Emit the disp8 encoding...
+ MCE.emitByte(ModRMByte(1, RegOpcodeField, 4));
+ ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP
+ } else {
+ // Emit the normal disp32 encoding...
+ MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
+ }
+
+ // Calculate what the SS field value should be...
+ static const unsigned SSTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 };
+ unsigned SS = SSTable[Scale.getImm()];
+
+ if (BaseReg == 0) {
+ // Handle the SIB byte for the case where there is no base. The
+ // displacement has already been output.
+ assert(IndexReg.getReg() && "Index register must be specified!");
+ emitSIBByte(SS, getX86RegNum(IndexReg.getReg()), 5);
+ } else {
+ unsigned BaseRegNo = getX86RegNum(BaseReg);
+ unsigned IndexRegNo;
+ if (IndexReg.getReg())
+ IndexRegNo = getX86RegNum(IndexReg.getReg());
+ else
+ IndexRegNo = 4; // For example [ESP+1*<noreg>+4]
+ emitSIBByte(SS, IndexRegNo, BaseRegNo);
+ }
+
+ // Do we need to output a displacement?
+ if (ForceDisp8) {
+ emitConstant(DispVal, 1);
+ } else if (DispVal != 0 || ForceDisp32) {
+ emitDisplacementField(DispForReloc, DispVal, PCAdj);
+ }
+ }
+}
+
+static unsigned sizeOfImm(const TargetInstrDescriptor *Desc) {
+ switch (Desc->TSFlags & X86II::ImmMask) {
+ case X86II::Imm8: return 1;
+ case X86II::Imm16: return 2;
+ case X86II::Imm32: return 4;
+ case X86II::Imm64: return 8;
+ default: assert(0 && "Immediate size not set!");
+ return 0;
+ }
+}
+
+/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended register?
+/// e.g. r8, xmm8, etc.
+bool Emitter::isX86_64ExtendedReg(const MachineOperand &MO) {
+ if (!MO.isRegister()) return false;
+ unsigned RegNo = MO.getReg();
+ int DWNum = II->getRegisterInfo().getDwarfRegNum(RegNo);
+ if (DWNum >= II->getRegisterInfo().getDwarfRegNum(X86::R8) &&
+ DWNum <= II->getRegisterInfo().getDwarfRegNum(X86::R15))
+ return true;
+ if (DWNum >= II->getRegisterInfo().getDwarfRegNum(X86::XMM8) &&
+ DWNum <= II->getRegisterInfo().getDwarfRegNum(X86::XMM15))
+ return true;
+ return false;
+}
+
+inline static bool isX86_64TruncToByte(unsigned oc) {
+ return (oc == X86::TRUNC_64to8 || oc == X86::TRUNC_32to8 ||
+ oc == X86::TRUNC_16to8);
+}
+
+
+inline static bool isX86_64NonExtLowByteReg(unsigned reg) {
+ return (reg == X86::SPL || reg == X86::BPL ||
+ reg == X86::SIL || reg == X86::DIL);
+}
+
+/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64
+/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
+/// size, and 3) use of X86-64 extended registers.
+unsigned Emitter::determineREX(const MachineInstr &MI) {
+ unsigned REX = 0;
+ const TargetInstrDescriptor *Desc = MI.getInstrDescriptor();
+ unsigned Opcode = Desc->Opcode;
+
+ // Pseudo instructions do not need REX prefix byte.
+ if ((Desc->TSFlags & X86II::FormMask) == X86II::Pseudo)
+ return 0;
+ if (Desc->TSFlags & X86II::REX_W)
+ REX |= 1 << 3;
+
+ unsigned NumOps = Desc->numOperands;
+ if (NumOps) {
+ bool isTwoAddr = NumOps > 1 &&
+ Desc->getOperandConstraint(1, TOI::TIED_TO) != -1;
+
+ // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
+ bool isTrunc8 = isX86_64TruncToByte(Opcode);
+ unsigned i = isTwoAddr ? 1 : 0;
+ for (unsigned e = NumOps; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (MO.isRegister()) {
+ unsigned Reg = MO.getReg();
+ // Trunc to byte are actually movb. The real source operand is the low
+ // byte of the register.
+ if (isTrunc8 && i == 1)
+ Reg = getX86SubSuperRegister(Reg, MVT::i8);
+ if (isX86_64NonExtLowByteReg(Reg))
+ REX |= 0x40;
+ }
+ }
+
+ switch (Desc->TSFlags & X86II::FormMask) {
+ case X86II::MRMInitReg:
+ if (isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= (1 << 0) | (1 << 2);
+ break;
+ case X86II::MRMSrcReg: {
+ if (isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= 1 << 2;
+ i = isTwoAddr ? 2 : 1;
+ for (unsigned e = NumOps; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (isX86_64ExtendedReg(MO))
+ REX |= 1 << 0;
+ }
+ break;
+ }
+ case X86II::MRMSrcMem: {
+ if (isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= 1 << 2;
+ unsigned Bit = 0;
+ i = isTwoAddr ? 2 : 1;
+ for (; i != NumOps; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (MO.isRegister()) {
+ if (isX86_64ExtendedReg(MO))
+ REX |= 1 << Bit;
+ Bit++;
+ }
+ }
+ break;
+ }
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m:
+ case X86II::MRMDestMem: {
+ unsigned e = isTwoAddr ? 5 : 4;
+ i = isTwoAddr ? 1 : 0;
+ if (NumOps > e && isX86_64ExtendedReg(MI.getOperand(e)))
+ REX |= 1 << 2;
+ unsigned Bit = 0;
+ for (; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (MO.isRegister()) {
+ if (isX86_64ExtendedReg(MO))
+ REX |= 1 << Bit;
+ Bit++;
+ }
+ }
+ break;
+ }
+ default: {
+ if (isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= 1 << 0;
+ i = isTwoAddr ? 2 : 1;
+ for (unsigned e = NumOps; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (isX86_64ExtendedReg(MO))
+ REX |= 1 << 2;
+ }
+ break;
+ }
+ }
+ }
+ return REX;
+}
+
+void Emitter::emitInstruction(const MachineInstr &MI) {
+ NumEmitted++; // Keep track of the # of mi's emitted
+
+ const TargetInstrDescriptor *Desc = MI.getInstrDescriptor();
+ unsigned Opcode = Desc->Opcode;
+
+ // Emit the repeat opcode prefix as needed.
+ if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP) MCE.emitByte(0xF3);
+
+ // Emit the operand size opcode prefix as needed.
+ if (Desc->TSFlags & X86II::OpSize) MCE.emitByte(0x66);
+
+ // Emit the address size opcode prefix as needed.
+ if (Desc->TSFlags & X86II::AdSize) MCE.emitByte(0x67);
+
+ bool Need0FPrefix = false;
+ switch (Desc->TSFlags & X86II::Op0Mask) {
+ case X86II::TB:
+ Need0FPrefix = true; // Two-byte opcode prefix
+ break;
+ case X86II::T8:
+ MCE.emitByte(0x0F);
+ MCE.emitByte(0x38);
+ break;
+ case X86II::TA:
+ MCE.emitByte(0x0F);
+ MCE.emitByte(0x3A);
+ break;
+ case X86II::REP: break; // already handled.
+ case X86II::XS: // F3 0F
+ MCE.emitByte(0xF3);
+ Need0FPrefix = true;
+ break;
+ case X86II::XD: // F2 0F
+ MCE.emitByte(0xF2);
+ Need0FPrefix = true;
+ break;
+ case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB:
+ case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF:
+ MCE.emitByte(0xD8+
+ (((Desc->TSFlags & X86II::Op0Mask)-X86II::D8)
+ >> X86II::Op0Shift));
+ break; // Two-byte opcode prefix
+ default: assert(0 && "Invalid prefix!");
+ case 0: break; // No prefix!
+ }
+
+ if (Is64BitMode) {
+ // REX prefix
+ unsigned REX = determineREX(MI);
+ if (REX)
+ MCE.emitByte(0x40 | REX);
+ }
+
+ // 0x0F escape code must be emitted just before the opcode.
+ if (Need0FPrefix)
+ MCE.emitByte(0x0F);
+
+ // If this is a two-address instruction, skip one of the register operands.
+ unsigned NumOps = Desc->numOperands;
+ unsigned CurOp = 0;
+ if (NumOps > 1 && Desc->getOperandConstraint(1, TOI::TIED_TO) != -1)
+ CurOp++;
+
+ unsigned char BaseOpcode = II->getBaseOpcodeFor(Desc);
+ switch (Desc->TSFlags & X86II::FormMask) {
+ default: assert(0 && "Unknown FormMask value in X86 MachineCodeEmitter!");
+ case X86II::Pseudo:
+#ifndef NDEBUG
+ switch (Opcode) {
+ default:
+ assert(0 && "psuedo instructions should be removed before code emission");
+ case TargetInstrInfo::INLINEASM:
+ assert(0 && "JIT does not support inline asm!\n");
+ case TargetInstrInfo::LABEL:
+ assert(0 && "JIT does not support meta labels!\n");
+ case X86::IMPLICIT_USE:
+ case X86::IMPLICIT_DEF:
+ case X86::IMPLICIT_DEF_GR8:
+ case X86::IMPLICIT_DEF_GR16:
+ case X86::IMPLICIT_DEF_GR32:
+ case X86::IMPLICIT_DEF_GR64:
+ case X86::IMPLICIT_DEF_FR32:
+ case X86::IMPLICIT_DEF_FR64:
+ case X86::IMPLICIT_DEF_VR64:
+ case X86::IMPLICIT_DEF_VR128:
+ case X86::FP_REG_KILL:
+ break;
+ }
+#endif
+ CurOp = NumOps;
+ break;
+
+ case X86II::RawFrm:
+ MCE.emitByte(BaseOpcode);
+ if (CurOp != NumOps) {
+ const MachineOperand &MO = MI.getOperand(CurOp++);
+ if (MO.isMachineBasicBlock()) {
+ emitPCRelativeBlockAddress(MO.getMachineBasicBlock());
+ } else if (MO.isGlobalAddress()) {
+ bool NeedStub = Is64BitMode ||
+ Opcode == X86::TAILJMPd ||
+ Opcode == X86::TAILJMPr || Opcode == X86::TAILJMPm;
+ emitGlobalAddressForCall(MO.getGlobal(), !NeedStub);
+ } else if (MO.isExternalSymbol()) {
+ emitExternalSymbolAddress(MO.getSymbolName(), X86::reloc_pcrel_word);
+ } else if (MO.isImmediate()) {
+ emitConstant(MO.getImm(), sizeOfImm(Desc));
+ } else {
+ assert(0 && "Unknown RawFrm operand!");
+ }
+ }
+ break;
+
+ case X86II::AddRegFrm:
+ MCE.emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++).getReg()));
+
+ if (CurOp != NumOps) {
+ const MachineOperand &MO1 = MI.getOperand(CurOp++);
+ unsigned Size = sizeOfImm(Desc);
+ if (MO1.isImmediate())
+ emitConstant(MO1.getImm(), Size);
+ else {
+ unsigned rt = Is64BitMode ? X86::reloc_pcrel_word : X86::reloc_absolute_word;
+ if (Opcode == X86::MOV64ri)
+ rt = X86::reloc_absolute_dword; // FIXME: add X86II flag?
+ if (MO1.isGlobalAddress())
+ emitGlobalAddressForPtr(MO1.getGlobal(), rt, MO1.getOffset());
+ else if (MO1.isExternalSymbol())
+ emitExternalSymbolAddress(MO1.getSymbolName(), rt);
+ else if (MO1.isConstantPoolIndex())
+ emitConstPoolAddress(MO1.getConstantPoolIndex(), rt);
+ else if (MO1.isJumpTableIndex())
+ emitJumpTableAddress(MO1.getJumpTableIndex(), rt);
+ }
+ }
+ break;
+
+ case X86II::MRMDestReg: {
+ MCE.emitByte(BaseOpcode);
+ emitRegModRMByte(MI.getOperand(CurOp).getReg(),
+ getX86RegNum(MI.getOperand(CurOp+1).getReg()));
+ CurOp += 2;
+ if (CurOp != NumOps)
+ emitConstant(MI.getOperand(CurOp++).getImm(), sizeOfImm(Desc));
+ break;
+ }
+ case X86II::MRMDestMem: {
+ MCE.emitByte(BaseOpcode);
+ emitMemModRMByte(MI, CurOp, getX86RegNum(MI.getOperand(CurOp+4).getReg()));
+ CurOp += 5;
+ if (CurOp != NumOps)
+ emitConstant(MI.getOperand(CurOp++).getImm(), sizeOfImm(Desc));
+ break;
+ }
+
+ case X86II::MRMSrcReg:
+ MCE.emitByte(BaseOpcode);
+ emitRegModRMByte(MI.getOperand(CurOp+1).getReg(),
+ getX86RegNum(MI.getOperand(CurOp).getReg()));
+ CurOp += 2;
+ if (CurOp != NumOps)
+ emitConstant(MI.getOperand(CurOp++).getImm(), sizeOfImm(Desc));
+ break;
+
+ case X86II::MRMSrcMem: {
+ unsigned PCAdj = (CurOp+5 != NumOps) ? sizeOfImm(Desc) : 0;
+
+ MCE.emitByte(BaseOpcode);
+ emitMemModRMByte(MI, CurOp+1, getX86RegNum(MI.getOperand(CurOp).getReg()),
+ PCAdj);
+ CurOp += 5;
+ if (CurOp != NumOps)
+ emitConstant(MI.getOperand(CurOp++).getImm(), sizeOfImm(Desc));
+ break;
+ }
+
+ case X86II::MRM0r: case X86II::MRM1r:
+ case X86II::MRM2r: case X86II::MRM3r:
+ case X86II::MRM4r: case X86II::MRM5r:
+ case X86II::MRM6r: case X86II::MRM7r:
+ MCE.emitByte(BaseOpcode);
+ emitRegModRMByte(MI.getOperand(CurOp++).getReg(),
+ (Desc->TSFlags & X86II::FormMask)-X86II::MRM0r);
+
+ if (CurOp != NumOps) {
+ const MachineOperand &MO1 = MI.getOperand(CurOp++);
+ unsigned Size = sizeOfImm(Desc);
+ if (MO1.isImmediate())
+ emitConstant(MO1.getImm(), Size);
+ else {
+ unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+ : X86::reloc_absolute_word;
+ if (Opcode == X86::MOV64ri32)
+ rt = X86::reloc_absolute_word; // FIXME: add X86II flag?
+ if (MO1.isGlobalAddress())
+ emitGlobalAddressForPtr(MO1.getGlobal(), rt, MO1.getOffset());
+ else if (MO1.isExternalSymbol())
+ emitExternalSymbolAddress(MO1.getSymbolName(), rt);
+ else if (MO1.isConstantPoolIndex())
+ emitConstPoolAddress(MO1.getConstantPoolIndex(), rt);
+ else if (MO1.isJumpTableIndex())
+ emitJumpTableAddress(MO1.getJumpTableIndex(), rt);
+ }
+ }
+ break;
+
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m: {
+ unsigned PCAdj = (CurOp+4 != NumOps) ?
+ (MI.getOperand(CurOp+4).isImmediate() ? sizeOfImm(Desc) : 4) : 0;
+
+ MCE.emitByte(BaseOpcode);
+ emitMemModRMByte(MI, CurOp, (Desc->TSFlags & X86II::FormMask)-X86II::MRM0m,
+ PCAdj);
+ CurOp += 4;
+
+ if (CurOp != NumOps) {
+ const MachineOperand &MO = MI.getOperand(CurOp++);
+ unsigned Size = sizeOfImm(Desc);
+ if (MO.isImmediate())
+ emitConstant(MO.getImm(), Size);
+ else {
+ unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+ : X86::reloc_absolute_word;
+ if (Opcode == X86::MOV64mi32)
+ rt = X86::reloc_absolute_word; // FIXME: add X86II flag?
+ if (MO.isGlobalAddress())
+ emitGlobalAddressForPtr(MO.getGlobal(), rt, MO.getOffset());
+ else if (MO.isExternalSymbol())
+ emitExternalSymbolAddress(MO.getSymbolName(), rt);
+ else if (MO.isConstantPoolIndex())
+ emitConstPoolAddress(MO.getConstantPoolIndex(), rt);
+ else if (MO.isJumpTableIndex())
+ emitJumpTableAddress(MO.getJumpTableIndex(), rt);
+ }
+ }
+ break;
+ }
+
+ case X86II::MRMInitReg:
+ MCE.emitByte(BaseOpcode);
+ // Duplicate register, used by things like MOV8r0 (aka xor reg,reg).
+ emitRegModRMByte(MI.getOperand(CurOp).getReg(),
+ getX86RegNum(MI.getOperand(CurOp).getReg()));
+ ++CurOp;
+ break;
+ }
+
+ assert((Desc->Flags & M_VARIABLE_OPS) != 0 ||
+ CurOp == NumOps && "Unknown encoding!");
+}
diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp
new file mode 100644
index 0000000..f8f8d48
--- /dev/null
+++ b/lib/Target/X86/X86ELFWriterInfo.cpp
@@ -0,0 +1,18 @@
+//===-- X86ELFWriterInfo.cpp - ELF Writer Info for the X86 backend --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Bill Wendling and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the X86 backend.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86ELFWriterInfo.h"
+using namespace llvm;
+
+X86ELFWriterInfo::X86ELFWriterInfo() : TargetELFWriterInfo(EM_386) {}
+X86ELFWriterInfo::~X86ELFWriterInfo() {}
diff --git a/lib/Target/X86/X86ELFWriterInfo.h b/lib/Target/X86/X86ELFWriterInfo.h
new file mode 100644
index 0000000..eb564fb
--- /dev/null
+++ b/lib/Target/X86/X86ELFWriterInfo.h
@@ -0,0 +1,29 @@
+//===-- X86ELFWriterInfo.h - ELF Writer Info for X86 ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Bill Wendling and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the X86 backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_ELF_WRITER_INFO_H
+#define X86_ELF_WRITER_INFO_H
+
+#include "llvm/Target/TargetELFWriterInfo.h"
+
+namespace llvm {
+
+ class X86ELFWriterInfo : public TargetELFWriterInfo {
+ public:
+ X86ELFWriterInfo();
+ virtual ~X86ELFWriterInfo();
+ };
+
+} // end llvm namespace
+
+#endif // X86_ELF_WRITER_INFO_H
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
new file mode 100644
index 0000000..c293a32
--- /dev/null
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -0,0 +1,882 @@
+//===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass which converts floating point instructions from
+// virtual registers into register stack instructions. This pass uses live
+// variable information to indicate where the FPn registers are used and their
+// lifetimes.
+//
+// This pass is hampered by the lack of decent CFG manipulation routines for
+// machine code. In particular, this wants to be able to split critical edges
+// as necessary, traverse the machine basic block CFG in depth-first order, and
+// allow there to be multiple machine basic blocks for each LLVM basicblock
+// (needed for critical edge splitting).
+//
+// In particular, this pass currently barfs on critical edges. Because of this,
+// it requires the instruction selector to insert FP_REG_KILL instructions on
+// the exits of any basic block that has critical edges going from it, or which
+// branch to a critical basic block.
+//
+// FIXME: this is not implemented yet. The stackifier pass only works on local
+// basic blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-codegen"
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumFXCH, "Number of fxch instructions inserted");
+STATISTIC(NumFP , "Number of floating point instructions");
+
+namespace {
+ struct VISIBILITY_HIDDEN FPS : public MachineFunctionPass {
+ static char ID;
+ FPS() : MachineFunctionPass((intptr_t)&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const { return "X86 FP Stackifier"; }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LiveVariables>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ private:
+ const TargetInstrInfo *TII; // Machine instruction info.
+ LiveVariables *LV; // Live variable info for current function...
+ MachineBasicBlock *MBB; // Current basic block
+ unsigned Stack[8]; // FP<n> Registers in each stack slot...
+ unsigned RegMap[8]; // Track which stack slot contains each register
+ unsigned StackTop; // The current top of the FP stack.
+
+ void dumpStack() const {
+ cerr << "Stack contents:";
+ for (unsigned i = 0; i != StackTop; ++i) {
+ cerr << " FP" << Stack[i];
+ assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
+ }
+ cerr << "\n";
+ }
+ private:
+ // getSlot - Return the stack slot number a particular register number is
+ // in...
+ unsigned getSlot(unsigned RegNo) const {
+ assert(RegNo < 8 && "Regno out of range!");
+ return RegMap[RegNo];
+ }
+
+ // getStackEntry - Return the X86::FP<n> register in register ST(i)
+ unsigned getStackEntry(unsigned STi) const {
+ assert(STi < StackTop && "Access past stack top!");
+ return Stack[StackTop-1-STi];
+ }
+
+ // getSTReg - Return the X86::ST(i) register which contains the specified
+ // FP<RegNo> register
+ unsigned getSTReg(unsigned RegNo) const {
+ return StackTop - 1 - getSlot(RegNo) + llvm::X86::ST0;
+ }
+
+ // pushReg - Push the specified FP<n> register onto the stack
+ void pushReg(unsigned Reg) {
+ assert(Reg < 8 && "Register number out of range!");
+ assert(StackTop < 8 && "Stack overflow!");
+ Stack[StackTop] = Reg;
+ RegMap[Reg] = StackTop++;
+ }
+
+ bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; }
+ void moveToTop(unsigned RegNo, MachineBasicBlock::iterator &I) {
+ if (!isAtTop(RegNo)) {
+ unsigned STReg = getSTReg(RegNo);
+ unsigned RegOnTop = getStackEntry(0);
+
+ // Swap the slots the regs are in
+ std::swap(RegMap[RegNo], RegMap[RegOnTop]);
+
+ // Swap stack slot contents
+ assert(RegMap[RegOnTop] < StackTop);
+ std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
+
+ // Emit an fxch to update the runtime processors version of the state
+ BuildMI(*MBB, I, TII->get(X86::XCH_F)).addReg(STReg);
+ NumFXCH++;
+ }
+ }
+
+ void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) {
+ unsigned STReg = getSTReg(RegNo);
+ pushReg(AsReg); // New register on top of stack
+
+ BuildMI(*MBB, I, TII->get(X86::LD_Frr)).addReg(STReg);
+ }
+
+ // popStackAfter - Pop the current value off of the top of the FP stack
+ // after the specified instruction.
+ void popStackAfter(MachineBasicBlock::iterator &I);
+
+ // freeStackSlotAfter - Free the specified register from the register stack,
+ // so that it is no longer in a register. If the register is currently at
+ // the top of the stack, we just pop the current instruction, otherwise we
+ // store the current top-of-stack into the specified slot, then pop the top
+ // of stack.
+ void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg);
+
+ bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
+
+ void handleZeroArgFP(MachineBasicBlock::iterator &I);
+ void handleOneArgFP(MachineBasicBlock::iterator &I);
+ void handleOneArgFPRW(MachineBasicBlock::iterator &I);
+ void handleTwoArgFP(MachineBasicBlock::iterator &I);
+ void handleCompareFP(MachineBasicBlock::iterator &I);
+ void handleCondMovFP(MachineBasicBlock::iterator &I);
+ void handleSpecialFP(MachineBasicBlock::iterator &I);
+ };
+ char FPS::ID = 0;
+}
+
+FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); }
+
+/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
+/// register references into FP stack references.
+///
+bool FPS::runOnMachineFunction(MachineFunction &MF) {
+ // We only need to run this pass if there are any FP registers used in this
+ // function. If it is all integer, there is nothing for us to do!
+ bool FPIsUsed = false;
+
+ assert(X86::FP6 == X86::FP0+6 && "Register enums aren't sorted right!");
+ for (unsigned i = 0; i <= 6; ++i)
+ if (MF.isPhysRegUsed(X86::FP0+i)) {
+ FPIsUsed = true;
+ break;
+ }
+
+ // Early exit.
+ if (!FPIsUsed) return false;
+
+ TII = MF.getTarget().getInstrInfo();
+ LV = &getAnalysis<LiveVariables>();
+ StackTop = 0;
+
+ // Process the function in depth first order so that we process at least one
+ // of the predecessors for every reachable block in the function.
+ std::set<MachineBasicBlock*> Processed;
+ MachineBasicBlock *Entry = MF.begin();
+
+ bool Changed = false;
+ for (df_ext_iterator<MachineBasicBlock*, std::set<MachineBasicBlock*> >
+ I = df_ext_begin(Entry, Processed), E = df_ext_end(Entry, Processed);
+ I != E; ++I)
+ Changed |= processBasicBlock(MF, **I);
+
+ return Changed;
+}
+
+/// processBasicBlock - Loop over all of the instructions in the basic block,
+/// transforming FP instructions into their stack form.
+///
+bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
+ bool Changed = false;
+ MBB = &BB;
+
+ for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
+ MachineInstr *MI = I;
+ unsigned Flags = MI->getInstrDescriptor()->TSFlags;
+ if ((Flags & X86II::FPTypeMask) == X86II::NotFP)
+ continue; // Efficiently ignore non-fp insts!
+
+ MachineInstr *PrevMI = 0;
+ if (I != BB.begin())
+ PrevMI = prior(I);
+
+ ++NumFP; // Keep track of # of pseudo instrs
+ DOUT << "\nFPInst:\t" << *MI;
+
+ // Get dead variables list now because the MI pointer may be deleted as part
+ // of processing!
+ SmallVector<unsigned, 8> DeadRegs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDead())
+ DeadRegs.push_back(MO.getReg());
+ }
+
+ switch (Flags & X86II::FPTypeMask) {
+ case X86II::ZeroArgFP: handleZeroArgFP(I); break;
+ case X86II::OneArgFP: handleOneArgFP(I); break; // fstp ST(0)
+ case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0))
+ case X86II::TwoArgFP: handleTwoArgFP(I); break;
+ case X86II::CompareFP: handleCompareFP(I); break;
+ case X86II::CondMovFP: handleCondMovFP(I); break;
+ case X86II::SpecialFP: handleSpecialFP(I); break;
+ default: assert(0 && "Unknown FP Type!");
+ }
+
+ // Check to see if any of the values defined by this instruction are dead
+ // after definition. If so, pop them.
+ for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) {
+ unsigned Reg = DeadRegs[i];
+ if (Reg >= X86::FP0 && Reg <= X86::FP6) {
+ DOUT << "Register FP#" << Reg-X86::FP0 << " is dead!\n";
+ freeStackSlotAfter(I, Reg-X86::FP0);
+ }
+ }
+
+ // Print out all of the instructions expanded to if -debug
+ DEBUG(
+ MachineBasicBlock::iterator PrevI(PrevMI);
+ if (I == PrevI) {
+ cerr << "Just deleted pseudo instruction\n";
+ } else {
+ MachineBasicBlock::iterator Start = I;
+ // Rewind to first instruction newly inserted.
+ while (Start != BB.begin() && prior(Start) != PrevI) --Start;
+ cerr << "Inserted instructions:\n\t";
+ Start->print(*cerr.stream(), &MF.getTarget());
+ while (++Start != next(I));
+ }
+ dumpStack();
+ );
+
+ Changed = true;
+ }
+
+ assert(StackTop == 0 && "Stack not empty at end of basic block?");
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// Efficient Lookup Table Support
+//===----------------------------------------------------------------------===//
+
+namespace {
+ struct TableEntry {
+ unsigned from;
+ unsigned to;
+ bool operator<(const TableEntry &TE) const { return from < TE.from; }
+ friend bool operator<(const TableEntry &TE, unsigned V) {
+ return TE.from < V;
+ }
+ friend bool operator<(unsigned V, const TableEntry &TE) {
+ return V < TE.from;
+ }
+ };
+}
+
+static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) {
+ for (unsigned i = 0; i != NumEntries-1; ++i)
+ if (!(Table[i] < Table[i+1])) return false;
+ return true;
+}
+
+static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) {
+ const TableEntry *I = std::lower_bound(Table, Table+N, Opcode);
+ if (I != Table+N && I->from == Opcode)
+ return I->to;
+ return -1;
+}
+
+#define ARRAY_SIZE(TABLE) \
+ (sizeof(TABLE)/sizeof(TABLE[0]))
+
+#ifdef NDEBUG
+#define ASSERT_SORTED(TABLE)
+#else
+#define ASSERT_SORTED(TABLE) \
+ { static bool TABLE##Checked = false; \
+ if (!TABLE##Checked) { \
+ assert(TableIsSorted(TABLE, ARRAY_SIZE(TABLE)) && \
+ "All lookup tables must be sorted for efficient access!"); \
+ TABLE##Checked = true; \
+ } \
+ }
+#endif
+
+//===----------------------------------------------------------------------===//
+// Register File -> Register Stack Mapping Methods
+//===----------------------------------------------------------------------===//
+
+// OpcodeTable - Sorted map of register instructions to their stack version.
+// The first element is an register file pseudo instruction, the second is the
+// concrete X86 instruction which uses the register stack.
+//
+static const TableEntry OpcodeTable[] = {
+ { X86::ABS_Fp32 , X86::ABS_F },
+ { X86::ABS_Fp64 , X86::ABS_F },
+ { X86::ADD_Fp32m , X86::ADD_F32m },
+ { X86::ADD_Fp64m , X86::ADD_F64m },
+ { X86::ADD_Fp64m32 , X86::ADD_F32m },
+ { X86::ADD_FpI16m32 , X86::ADD_FI16m },
+ { X86::ADD_FpI16m64 , X86::ADD_FI16m },
+ { X86::ADD_FpI32m32 , X86::ADD_FI32m },
+ { X86::ADD_FpI32m64 , X86::ADD_FI32m },
+ { X86::CHS_Fp32 , X86::CHS_F },
+ { X86::CHS_Fp64 , X86::CHS_F },
+ { X86::CMOVBE_Fp32 , X86::CMOVBE_F },
+ { X86::CMOVBE_Fp64 , X86::CMOVBE_F },
+ { X86::CMOVB_Fp32 , X86::CMOVB_F },
+ { X86::CMOVB_Fp64 , X86::CMOVB_F },
+ { X86::CMOVE_Fp32 , X86::CMOVE_F },
+ { X86::CMOVE_Fp64 , X86::CMOVE_F },
+ { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F },
+ { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F },
+ { X86::CMOVNB_Fp32 , X86::CMOVNB_F },
+ { X86::CMOVNB_Fp64 , X86::CMOVNB_F },
+ { X86::CMOVNE_Fp32 , X86::CMOVNE_F },
+ { X86::CMOVNE_Fp64 , X86::CMOVNE_F },
+ { X86::CMOVNP_Fp32 , X86::CMOVNP_F },
+ { X86::CMOVNP_Fp64 , X86::CMOVNP_F },
+ { X86::CMOVP_Fp32 , X86::CMOVP_F },
+ { X86::CMOVP_Fp64 , X86::CMOVP_F },
+ { X86::COS_Fp32 , X86::COS_F },
+ { X86::COS_Fp64 , X86::COS_F },
+ { X86::DIVR_Fp32m , X86::DIVR_F32m },
+ { X86::DIVR_Fp64m , X86::DIVR_F64m },
+ { X86::DIVR_Fp64m32 , X86::DIVR_F32m },
+ { X86::DIVR_FpI16m32, X86::DIVR_FI16m},
+ { X86::DIVR_FpI16m64, X86::DIVR_FI16m},
+ { X86::DIVR_FpI32m32, X86::DIVR_FI32m},
+ { X86::DIVR_FpI32m64, X86::DIVR_FI32m},
+ { X86::DIV_Fp32m , X86::DIV_F32m },
+ { X86::DIV_Fp64m , X86::DIV_F64m },
+ { X86::DIV_Fp64m32 , X86::DIV_F32m },
+ { X86::DIV_FpI16m32 , X86::DIV_FI16m },
+ { X86::DIV_FpI16m64 , X86::DIV_FI16m },
+ { X86::DIV_FpI32m32 , X86::DIV_FI32m },
+ { X86::DIV_FpI32m64 , X86::DIV_FI32m },
+ { X86::ILD_Fp16m32 , X86::ILD_F16m },
+ { X86::ILD_Fp16m64 , X86::ILD_F16m },
+ { X86::ILD_Fp32m32 , X86::ILD_F32m },
+ { X86::ILD_Fp32m64 , X86::ILD_F32m },
+ { X86::ILD_Fp64m32 , X86::ILD_F64m },
+ { X86::ILD_Fp64m64 , X86::ILD_F64m },
+ { X86::ISTT_Fp16m32 , X86::ISTT_FP16m},
+ { X86::ISTT_Fp16m64 , X86::ISTT_FP16m},
+ { X86::ISTT_Fp32m32 , X86::ISTT_FP32m},
+ { X86::ISTT_Fp32m64 , X86::ISTT_FP32m},
+ { X86::ISTT_Fp64m32 , X86::ISTT_FP64m},
+ { X86::ISTT_Fp64m64 , X86::ISTT_FP64m},
+ { X86::IST_Fp16m32 , X86::IST_F16m },
+ { X86::IST_Fp16m64 , X86::IST_F16m },
+ { X86::IST_Fp32m32 , X86::IST_F32m },
+ { X86::IST_Fp32m64 , X86::IST_F32m },
+ { X86::IST_Fp64m32 , X86::IST_FP64m },
+ { X86::IST_Fp64m64 , X86::IST_FP64m },
+ { X86::LD_Fp032 , X86::LD_F0 },
+ { X86::LD_Fp064 , X86::LD_F0 },
+ { X86::LD_Fp132 , X86::LD_F1 },
+ { X86::LD_Fp164 , X86::LD_F1 },
+ { X86::LD_Fp32m , X86::LD_F32m },
+ { X86::LD_Fp64m , X86::LD_F64m },
+ { X86::MUL_Fp32m , X86::MUL_F32m },
+ { X86::MUL_Fp64m , X86::MUL_F64m },
+ { X86::MUL_Fp64m32 , X86::MUL_F32m },
+ { X86::MUL_FpI16m32 , X86::MUL_FI16m },
+ { X86::MUL_FpI16m64 , X86::MUL_FI16m },
+ { X86::MUL_FpI32m32 , X86::MUL_FI32m },
+ { X86::MUL_FpI32m64 , X86::MUL_FI32m },
+ { X86::SIN_Fp32 , X86::SIN_F },
+ { X86::SIN_Fp64 , X86::SIN_F },
+ { X86::SQRT_Fp32 , X86::SQRT_F },
+ { X86::SQRT_Fp64 , X86::SQRT_F },
+ { X86::ST_Fp32m , X86::ST_F32m },
+ { X86::ST_Fp64m , X86::ST_F64m },
+ { X86::ST_Fp64m32 , X86::ST_F32m },
+ { X86::SUBR_Fp32m , X86::SUBR_F32m },
+ { X86::SUBR_Fp64m , X86::SUBR_F64m },
+ { X86::SUBR_Fp64m32 , X86::SUBR_F32m },
+ { X86::SUBR_FpI16m32, X86::SUBR_FI16m},
+ { X86::SUBR_FpI16m64, X86::SUBR_FI16m},
+ { X86::SUBR_FpI32m32, X86::SUBR_FI32m},
+ { X86::SUBR_FpI32m64, X86::SUBR_FI32m},
+ { X86::SUB_Fp32m , X86::SUB_F32m },
+ { X86::SUB_Fp64m , X86::SUB_F64m },
+ { X86::SUB_Fp64m32 , X86::SUB_F32m },
+ { X86::SUB_FpI16m32 , X86::SUB_FI16m },
+ { X86::SUB_FpI16m64 , X86::SUB_FI16m },
+ { X86::SUB_FpI32m32 , X86::SUB_FI32m },
+ { X86::SUB_FpI32m64 , X86::SUB_FI32m },
+ { X86::TST_Fp32 , X86::TST_F },
+ { X86::TST_Fp64 , X86::TST_F },
+ { X86::UCOM_FpIr32 , X86::UCOM_FIr },
+ { X86::UCOM_FpIr64 , X86::UCOM_FIr },
+ { X86::UCOM_Fpr32 , X86::UCOM_Fr },
+ { X86::UCOM_Fpr64 , X86::UCOM_Fr },
+};
+
+static unsigned getConcreteOpcode(unsigned Opcode) {
+ ASSERT_SORTED(OpcodeTable);
+ int Opc = Lookup(OpcodeTable, ARRAY_SIZE(OpcodeTable), Opcode);
+ assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!");
+ return Opc;
+}
+
+//===----------------------------------------------------------------------===//
+// Helper Methods
+//===----------------------------------------------------------------------===//
+
+// PopTable - Sorted map of instructions to their popping version. The first
+// element is an instruction, the second is the version which pops.
+//
+static const TableEntry PopTable[] = {
+ { X86::ADD_FrST0 , X86::ADD_FPrST0 },
+
+ { X86::DIVR_FrST0, X86::DIVR_FPrST0 },
+ { X86::DIV_FrST0 , X86::DIV_FPrST0 },
+
+ { X86::IST_F16m , X86::IST_FP16m },
+ { X86::IST_F32m , X86::IST_FP32m },
+
+ { X86::MUL_FrST0 , X86::MUL_FPrST0 },
+
+ { X86::ST_F32m , X86::ST_FP32m },
+ { X86::ST_F64m , X86::ST_FP64m },
+ { X86::ST_Frr , X86::ST_FPrr },
+
+ { X86::SUBR_FrST0, X86::SUBR_FPrST0 },
+ { X86::SUB_FrST0 , X86::SUB_FPrST0 },
+
+ { X86::UCOM_FIr , X86::UCOM_FIPr },
+
+ { X86::UCOM_FPr , X86::UCOM_FPPr },
+ { X86::UCOM_Fr , X86::UCOM_FPr },
+};
+
+/// popStackAfter - Pop the current value off of the top of the FP stack after
+/// the specified instruction. This attempts to be sneaky and combine the pop
+/// into the instruction itself if possible. The iterator is left pointing to
+/// the last instruction, be it a new pop instruction inserted, or the old
+/// instruction if it was modified in place.
+///
+void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
+ ASSERT_SORTED(PopTable);
+ assert(StackTop > 0 && "Cannot pop empty stack!");
+ RegMap[Stack[--StackTop]] = ~0; // Update state
+
+ // Check to see if there is a popping version of this instruction...
+ int Opcode = Lookup(PopTable, ARRAY_SIZE(PopTable), I->getOpcode());
+ if (Opcode != -1) {
+ I->setInstrDescriptor(TII->get(Opcode));
+ if (Opcode == X86::UCOM_FPPr)
+ I->RemoveOperand(0);
+ } else { // Insert an explicit pop
+ I = BuildMI(*MBB, ++I, TII->get(X86::ST_FPrr)).addReg(X86::ST0);
+ }
+}
+
+/// freeStackSlotAfter - Free the specified register from the register stack, so
+/// that it is no longer in a register. If the register is currently at the top
+/// of the stack, we just pop the current instruction, otherwise we store the
+/// current top-of-stack into the specified slot, then pop the top of stack.
+void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) {
+ if (getStackEntry(0) == FPRegNo) { // already at the top of stack? easy.
+ popStackAfter(I);
+ return;
+ }
+
+ // Otherwise, store the top of stack into the dead slot, killing the operand
+ // without having to add in an explicit xchg then pop.
+ //
+ unsigned STReg = getSTReg(FPRegNo);
+ unsigned OldSlot = getSlot(FPRegNo);
+ unsigned TopReg = Stack[StackTop-1];
+ Stack[OldSlot] = TopReg;
+ RegMap[TopReg] = OldSlot;
+ RegMap[FPRegNo] = ~0;
+ Stack[--StackTop] = ~0;
+ I = BuildMI(*MBB, ++I, TII->get(X86::ST_FPrr)).addReg(STReg);
+}
+
+
+static unsigned getFPReg(const MachineOperand &MO) {
+ assert(MO.isRegister() && "Expected an FP register!");
+ unsigned Reg = MO.getReg();
+ assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!");
+ return Reg - X86::FP0;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Instruction transformation implementation
+//===----------------------------------------------------------------------===//
+
+/// handleZeroArgFP - ST(0) = fld0 ST(0) = flds <mem>
+///
+void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = I;
+ unsigned DestReg = getFPReg(MI->getOperand(0));
+
+ // Change from the pseudo instruction to the concrete instruction.
+ MI->RemoveOperand(0); // Remove the explicit ST(0) operand
+ MI->setInstrDescriptor(TII->get(getConcreteOpcode(MI->getOpcode())));
+
+ // Result gets pushed on the stack.
+ pushReg(DestReg);
+}
+
+/// handleOneArgFP - fst <mem>, ST(0)
+///
+void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = I;
+ unsigned NumOps = MI->getInstrDescriptor()->numOperands;
+ assert((NumOps == 5 || NumOps == 1) &&
+ "Can only handle fst* & ftst instructions!");
+
+ // Is this the last use of the source register?
+ unsigned Reg = getFPReg(MI->getOperand(NumOps-1));
+ bool KillsSrc = LV->KillsRegister(MI, X86::FP0+Reg);
+
+ // FISTP64m is strange because there isn't a non-popping versions.
+ // If we have one _and_ we don't want to pop the operand, duplicate the value
+ // on the stack instead of moving it. This ensure that popping the value is
+ // always ok.
+ // Ditto FISTTP16m, FISTTP32m, FISTTP64m.
+ //
+ if (!KillsSrc &&
+ (MI->getOpcode() == X86::IST_Fp64m32 ||
+ MI->getOpcode() == X86::ISTT_Fp16m32 ||
+ MI->getOpcode() == X86::ISTT_Fp32m32 ||
+ MI->getOpcode() == X86::ISTT_Fp64m32 ||
+ MI->getOpcode() == X86::IST_Fp64m64 ||
+ MI->getOpcode() == X86::ISTT_Fp16m64 ||
+ MI->getOpcode() == X86::ISTT_Fp32m64 ||
+ MI->getOpcode() == X86::ISTT_Fp64m64)) {
+ duplicateToTop(Reg, 7 /*temp register*/, I);
+ } else {
+ moveToTop(Reg, I); // Move to the top of the stack...
+ }
+
+ // Convert from the pseudo instruction to the concrete instruction.
+ MI->RemoveOperand(NumOps-1); // Remove explicit ST(0) operand
+ MI->setInstrDescriptor(TII->get(getConcreteOpcode(MI->getOpcode())));
+
+ if (MI->getOpcode() == X86::IST_FP64m ||
+ MI->getOpcode() == X86::ISTT_FP16m ||
+ MI->getOpcode() == X86::ISTT_FP32m ||
+ MI->getOpcode() == X86::ISTT_FP64m) {
+ assert(StackTop > 0 && "Stack empty??");
+ --StackTop;
+ } else if (KillsSrc) { // Last use of operand?
+ popStackAfter(I);
+ }
+}
+
+
+/// handleOneArgFPRW: Handle instructions that read from the top of stack and
+/// replace the value with a newly computed value. These instructions may have
+/// non-fp operands after their FP operands.
+///
+/// Examples:
+/// R1 = fchs R2
+/// R1 = fadd R2, [mem]
+///
+void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = I;
+ unsigned NumOps = MI->getInstrDescriptor()->numOperands;
+ assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!");
+
+ // Is this the last use of the source register?
+ unsigned Reg = getFPReg(MI->getOperand(1));
+ bool KillsSrc = LV->KillsRegister(MI, X86::FP0+Reg);
+
+ if (KillsSrc) {
+ // If this is the last use of the source register, just make sure it's on
+ // the top of the stack.
+ moveToTop(Reg, I);
+ assert(StackTop > 0 && "Stack cannot be empty!");
+ --StackTop;
+ pushReg(getFPReg(MI->getOperand(0)));
+ } else {
+ // If this is not the last use of the source register, _copy_ it to the top
+ // of the stack.
+ duplicateToTop(Reg, getFPReg(MI->getOperand(0)), I);
+ }
+
+ // Change from the pseudo instruction to the concrete instruction.
+ MI->RemoveOperand(1); // Drop the source operand.
+ MI->RemoveOperand(0); // Drop the destination operand.
+ MI->setInstrDescriptor(TII->get(getConcreteOpcode(MI->getOpcode())));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define tables of various ways to map pseudo instructions
+//
+
+// ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i)
+static const TableEntry ForwardST0Table[] = {
+ { X86::ADD_Fp32 , X86::ADD_FST0r },
+ { X86::ADD_Fp64 , X86::ADD_FST0r },
+ { X86::DIV_Fp32 , X86::DIV_FST0r },
+ { X86::DIV_Fp64 , X86::DIV_FST0r },
+ { X86::MUL_Fp32 , X86::MUL_FST0r },
+ { X86::MUL_Fp64 , X86::MUL_FST0r },
+ { X86::SUB_Fp32 , X86::SUB_FST0r },
+ { X86::SUB_Fp64 , X86::SUB_FST0r },
+};
+
+// ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0)
+static const TableEntry ReverseST0Table[] = {
+ { X86::ADD_Fp32 , X86::ADD_FST0r }, // commutative
+ { X86::ADD_Fp64 , X86::ADD_FST0r }, // commutative
+ { X86::DIV_Fp32 , X86::DIVR_FST0r },
+ { X86::DIV_Fp64 , X86::DIVR_FST0r },
+ { X86::MUL_Fp32 , X86::MUL_FST0r }, // commutative
+ { X86::MUL_Fp64 , X86::MUL_FST0r }, // commutative
+ { X86::SUB_Fp32 , X86::SUBR_FST0r },
+ { X86::SUB_Fp64 , X86::SUBR_FST0r },
+};
+
+// ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i)
+static const TableEntry ForwardSTiTable[] = {
+ { X86::ADD_Fp32 , X86::ADD_FrST0 }, // commutative
+ { X86::ADD_Fp64 , X86::ADD_FrST0 }, // commutative
+ { X86::DIV_Fp32 , X86::DIVR_FrST0 },
+ { X86::DIV_Fp64 , X86::DIVR_FrST0 },
+ { X86::MUL_Fp32 , X86::MUL_FrST0 }, // commutative
+ { X86::MUL_Fp64 , X86::MUL_FrST0 }, // commutative
+ { X86::SUB_Fp32 , X86::SUBR_FrST0 },
+ { X86::SUB_Fp64 , X86::SUBR_FrST0 },
+};
+
+// ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0)
+static const TableEntry ReverseSTiTable[] = {
+ { X86::ADD_Fp32 , X86::ADD_FrST0 },
+ { X86::ADD_Fp64 , X86::ADD_FrST0 },
+ { X86::DIV_Fp32 , X86::DIV_FrST0 },
+ { X86::DIV_Fp64 , X86::DIV_FrST0 },
+ { X86::MUL_Fp32 , X86::MUL_FrST0 },
+ { X86::MUL_Fp64 , X86::MUL_FrST0 },
+ { X86::SUB_Fp32 , X86::SUB_FrST0 },
+ { X86::SUB_Fp64 , X86::SUB_FrST0 },
+};
+
+
+/// handleTwoArgFP - Handle instructions like FADD and friends which are virtual
+/// instructions which need to be simplified and possibly transformed.
+///
+/// Result: ST(0) = fsub ST(0), ST(i)
+/// ST(i) = fsub ST(0), ST(i)
+/// ST(0) = fsubr ST(0), ST(i)
+/// ST(i) = fsubr ST(0), ST(i)
+///
+void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
+ ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table);
+ ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable);
+ MachineInstr *MI = I;
+
+ unsigned NumOperands = MI->getInstrDescriptor()->numOperands;
+ assert(NumOperands == 3 && "Illegal TwoArgFP instruction!");
+ unsigned Dest = getFPReg(MI->getOperand(0));
+ unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2));
+ unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1));
+ bool KillsOp0 = LV->KillsRegister(MI, X86::FP0+Op0);
+ bool KillsOp1 = LV->KillsRegister(MI, X86::FP0+Op1);
+
+ unsigned TOS = getStackEntry(0);
+
+ // One of our operands must be on the top of the stack. If neither is yet, we
+ // need to move one.
+ if (Op0 != TOS && Op1 != TOS) { // No operand at TOS?
+ // We can choose to move either operand to the top of the stack. If one of
+ // the operands is killed by this instruction, we want that one so that we
+ // can update right on top of the old version.
+ if (KillsOp0) {
+ moveToTop(Op0, I); // Move dead operand to TOS.
+ TOS = Op0;
+ } else if (KillsOp1) {
+ moveToTop(Op1, I);
+ TOS = Op1;
+ } else {
+ // All of the operands are live after this instruction executes, so we
+ // cannot update on top of any operand. Because of this, we must
+ // duplicate one of the stack elements to the top. It doesn't matter
+ // which one we pick.
+ //
+ duplicateToTop(Op0, Dest, I);
+ Op0 = TOS = Dest;
+ KillsOp0 = true;
+ }
+ } else if (!KillsOp0 && !KillsOp1) {
+ // If we DO have one of our operands at the top of the stack, but we don't
+ // have a dead operand, we must duplicate one of the operands to a new slot
+ // on the stack.
+ duplicateToTop(Op0, Dest, I);
+ Op0 = TOS = Dest;
+ KillsOp0 = true;
+ }
+
+ // Now we know that one of our operands is on the top of the stack, and at
+ // least one of our operands is killed by this instruction.
+ assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
+ "Stack conditions not set up right!");
+
+ // We decide which form to use based on what is on the top of the stack, and
+ // which operand is killed by this instruction.
+ const TableEntry *InstTable;
+ bool isForward = TOS == Op0;
+ bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
+ if (updateST0) {
+ if (isForward)
+ InstTable = ForwardST0Table;
+ else
+ InstTable = ReverseST0Table;
+ } else {
+ if (isForward)
+ InstTable = ForwardSTiTable;
+ else
+ InstTable = ReverseSTiTable;
+ }
+
+ int Opcode = Lookup(InstTable, ARRAY_SIZE(ForwardST0Table), MI->getOpcode());
+ assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!");
+
+ // NotTOS - The register which is not on the top of stack...
+ unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
+
+ // Replace the old instruction with a new instruction
+ MBB->remove(I++);
+ I = BuildMI(*MBB, I, TII->get(Opcode)).addReg(getSTReg(NotTOS));
+
+ // If both operands are killed, pop one off of the stack in addition to
+ // overwriting the other one.
+ if (KillsOp0 && KillsOp1 && Op0 != Op1) {
+ assert(!updateST0 && "Should have updated other operand!");
+ popStackAfter(I); // Pop the top of stack
+ }
+
+ // Update stack information so that we know the destination register is now on
+ // the stack.
+ unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
+ assert(UpdatedSlot < StackTop && Dest < 7);
+ Stack[UpdatedSlot] = Dest;
+ RegMap[Dest] = UpdatedSlot;
+ delete MI; // Remove the old instruction
+}
+
+/// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP
+/// register arguments and no explicit destinations.
+///
+void FPS::handleCompareFP(MachineBasicBlock::iterator &I) {
+ ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table);
+ ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable);
+ MachineInstr *MI = I;
+
+ unsigned NumOperands = MI->getInstrDescriptor()->numOperands;
+ assert(NumOperands == 2 && "Illegal FUCOM* instruction!");
+ unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2));
+ unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1));
+ bool KillsOp0 = LV->KillsRegister(MI, X86::FP0+Op0);
+ bool KillsOp1 = LV->KillsRegister(MI, X86::FP0+Op1);
+
+ // Make sure the first operand is on the top of stack, the other one can be
+ // anywhere.
+ moveToTop(Op0, I);
+
+ // Change from the pseudo instruction to the concrete instruction.
+ MI->getOperand(0).setReg(getSTReg(Op1));
+ MI->RemoveOperand(1);
+ MI->setInstrDescriptor(TII->get(getConcreteOpcode(MI->getOpcode())));
+
+ // If any of the operands are killed by this instruction, free them.
+ if (KillsOp0) freeStackSlotAfter(I, Op0);
+ if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1);
+}
+
+/// handleCondMovFP - Handle two address conditional move instructions. These
+/// instructions move a st(i) register to st(0) iff a condition is true. These
+/// instructions require that the first operand is at the top of the stack, but
+/// otherwise don't modify the stack at all.
+void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = I;
+
+ unsigned Op0 = getFPReg(MI->getOperand(0));
+ unsigned Op1 = getFPReg(MI->getOperand(2));
+ bool KillsOp1 = LV->KillsRegister(MI, X86::FP0+Op1);
+
+ // The first operand *must* be on the top of the stack.
+ moveToTop(Op0, I);
+
+ // Change the second operand to the stack register that the operand is in.
+ // Change from the pseudo instruction to the concrete instruction.
+ MI->RemoveOperand(0);
+ MI->RemoveOperand(1);
+ MI->getOperand(0).setReg(getSTReg(Op1));
+ MI->setInstrDescriptor(TII->get(getConcreteOpcode(MI->getOpcode())));
+
+ // If we kill the second operand, make sure to pop it from the stack.
+ if (Op0 != Op1 && KillsOp1) {
+ // Get this value off of the register stack.
+ freeStackSlotAfter(I, Op1);
+ }
+}
+
+
+/// handleSpecialFP - Handle special instructions which behave unlike other
+/// floating point instructions. This is primarily intended for use by pseudo
+/// instructions.
+///
+void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = I;
+ switch (MI->getOpcode()) {
+ default: assert(0 && "Unknown SpecialFP instruction!");
+ case X86::FpGETRESULT32: // Appears immediately after a call returning FP type!
+ case X86::FpGETRESULT64: // Appears immediately after a call returning FP type!
+ assert(StackTop == 0 && "Stack should be empty after a call!");
+ pushReg(getFPReg(MI->getOperand(0)));
+ break;
+ case X86::FpSETRESULT32:
+ case X86::FpSETRESULT64:
+ assert(StackTop == 1 && "Stack should have one element on it to return!");
+ --StackTop; // "Forget" we have something on the top of stack!
+ break;
+ case X86::MOV_Fp3232:
+ case X86::MOV_Fp3264:
+ case X86::MOV_Fp6432:
+ case X86::MOV_Fp6464: {
+ unsigned SrcReg = getFPReg(MI->getOperand(1));
+ unsigned DestReg = getFPReg(MI->getOperand(0));
+
+ if (LV->KillsRegister(MI, X86::FP0+SrcReg)) {
+ // If the input operand is killed, we can just change the owner of the
+ // incoming stack slot into the result.
+ unsigned Slot = getSlot(SrcReg);
+ assert(Slot < 7 && DestReg < 7 && "FpMOV operands invalid!");
+ Stack[Slot] = DestReg;
+ RegMap[DestReg] = Slot;
+
+ } else {
+ // For FMOV we just duplicate the specified value to a new stack slot.
+ // This could be made better, but would require substantial changes.
+ duplicateToTop(SrcReg, DestReg, I);
+ }
+ break;
+ }
+ }
+
+ I = MBB->erase(I); // Remove the pseudo instruction
+ --I;
+}
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
new file mode 100644
index 0000000..8b1690c
--- /dev/null
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -0,0 +1,1342 @@
+//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the Evan Cheng and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a DAG pattern matching instruction selector for X86,
+// converting from a legalized dag to a X86 dag.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-isel"
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86ISelLowering.h"
+#include "X86RegisterInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include <queue>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumFPKill , "Number of FP_REG_KILL instructions added");
+STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
+
+
+//===----------------------------------------------------------------------===//
+// Pattern Matcher Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
+ /// SDOperand's instead of register numbers for the leaves of the matched
+ /// tree.
+ struct X86ISelAddressMode {
+ enum {
+ RegBase,
+ FrameIndexBase
+ } BaseType;
+
+ struct { // This is really a union, discriminated by BaseType!
+ SDOperand Reg;
+ int FrameIndex;
+ } Base;
+
+ bool isRIPRel; // RIP relative?
+ unsigned Scale;
+ SDOperand IndexReg;
+ unsigned Disp;
+ GlobalValue *GV;
+ Constant *CP;
+ const char *ES;
+ int JT;
+ unsigned Align; // CP alignment.
+
+ X86ISelAddressMode()
+ : BaseType(RegBase), isRIPRel(false), Scale(1), IndexReg(), Disp(0),
+ GV(0), CP(0), ES(0), JT(-1), Align(0) {
+ }
+ };
+}
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ /// ISel - X86 specific code to select X86 machine instructions for
+ /// SelectionDAG operations.
+ ///
+ class VISIBILITY_HIDDEN X86DAGToDAGISel : public SelectionDAGISel {
+ /// ContainsFPCode - Every instruction we select that uses or defines a FP
+ /// register should set this to true.
+ bool ContainsFPCode;
+
+ /// FastISel - Enable fast(er) instruction selection.
+ ///
+ bool FastISel;
+
+ /// TM - Keep a reference to X86TargetMachine.
+ ///
+ X86TargetMachine &TM;
+
+ /// X86Lowering - This object fully describes how to lower LLVM code to an
+ /// X86-specific SelectionDAG.
+ X86TargetLowering X86Lowering;
+
+ /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const X86Subtarget *Subtarget;
+
+ /// GlobalBaseReg - keeps track of the virtual register mapped onto global
+ /// base register.
+ unsigned GlobalBaseReg;
+
+ public:
+ X86DAGToDAGISel(X86TargetMachine &tm, bool fast)
+ : SelectionDAGISel(X86Lowering),
+ ContainsFPCode(false), FastISel(fast), TM(tm),
+ X86Lowering(*TM.getTargetLowering()),
+ Subtarget(&TM.getSubtarget<X86Subtarget>()) {}
+
+ virtual bool runOnFunction(Function &Fn) {
+ // Make sure we re-emit a set of the global base reg if necessary
+ GlobalBaseReg = 0;
+ return SelectionDAGISel::runOnFunction(Fn);
+ }
+
+ virtual const char *getPassName() const {
+ return "X86 DAG->DAG Instruction Selection";
+ }
+
+ /// InstructionSelectBasicBlock - This callback is invoked by
+ /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+ virtual void InstructionSelectBasicBlock(SelectionDAG &DAG);
+
+ virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF);
+
+ virtual bool CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root);
+
+// Include the pieces autogenerated from the target description.
+#include "X86GenDAGISel.inc"
+
+ private:
+ SDNode *Select(SDOperand N);
+
+ bool MatchAddress(SDOperand N, X86ISelAddressMode &AM,
+ bool isRoot = true, unsigned Depth = 0);
+ bool SelectAddr(SDOperand Op, SDOperand N, SDOperand &Base,
+ SDOperand &Scale, SDOperand &Index, SDOperand &Disp);
+ bool SelectLEAAddr(SDOperand Op, SDOperand N, SDOperand &Base,
+ SDOperand &Scale, SDOperand &Index, SDOperand &Disp);
+ bool SelectScalarSSELoad(SDOperand Op, SDOperand Pred,
+ SDOperand N, SDOperand &Base, SDOperand &Scale,
+ SDOperand &Index, SDOperand &Disp,
+ SDOperand &InChain, SDOperand &OutChain);
+ bool TryFoldLoad(SDOperand P, SDOperand N,
+ SDOperand &Base, SDOperand &Scale,
+ SDOperand &Index, SDOperand &Disp);
+ void InstructionSelectPreprocess(SelectionDAG &DAG);
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDOperand &Op,
+ char ConstraintCode,
+ std::vector<SDOperand> &OutOps,
+ SelectionDAG &DAG);
+
+ void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
+
+ inline void getAddressOperands(X86ISelAddressMode &AM, SDOperand &Base,
+ SDOperand &Scale, SDOperand &Index,
+ SDOperand &Disp) {
+ Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
+ CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) :
+ AM.Base.Reg;
+ Scale = getI8Imm(AM.Scale);
+ Index = AM.IndexReg;
+ // These are 32-bit even in 64-bit mode since RIP relative offset
+ // is 32-bit.
+ if (AM.GV)
+ Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp);
+ else if (AM.CP)
+ Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Align, AM.Disp);
+ else if (AM.ES)
+ Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32);
+ else if (AM.JT != -1)
+ Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32);
+ else
+ Disp = getI32Imm(AM.Disp);
+ }
+
+ /// getI8Imm - Return a target constant with the specified value, of type
+ /// i8.
+ inline SDOperand getI8Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i8);
+ }
+
+ /// getI16Imm - Return a target constant with the specified value, of type
+ /// i16.
+ inline SDOperand getI16Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i16);
+ }
+
+ /// getI32Imm - Return a target constant with the specified value, of type
+ /// i32.
+ inline SDOperand getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
+ /// base register. Return the virtual register that holds this value.
+ SDNode *getGlobalBaseReg();
+
+#ifndef NDEBUG
+ unsigned Indent;
+#endif
+ };
+}
+
+static SDNode *findFlagUse(SDNode *N) {
+ unsigned FlagResNo = N->getNumValues()-1;
+ for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+ SDNode *User = *I;
+ for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
+ SDOperand Op = User->getOperand(i);
+ if (Op.Val == N && Op.ResNo == FlagResNo)
+ return User;
+ }
+ }
+ return NULL;
+}
+
+static void findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
+ SDNode *Root, SDNode *Skip, bool &found,
+ std::set<SDNode *> &Visited) {
+ if (found ||
+ Use->getNodeId() > Def->getNodeId() ||
+ !Visited.insert(Use).second)
+ return;
+
+ for (unsigned i = 0, e = Use->getNumOperands(); !found && i != e; ++i) {
+ SDNode *N = Use->getOperand(i).Val;
+ if (N == Skip)
+ continue;
+ if (N == Def) {
+ if (Use == ImmedUse)
+ continue; // Immediate use is ok.
+ if (Use == Root) {
+ assert(Use->getOpcode() == ISD::STORE ||
+ Use->getOpcode() == X86ISD::CMP);
+ continue;
+ }
+ found = true;
+ break;
+ }
+ findNonImmUse(N, Def, ImmedUse, Root, Skip, found, Visited);
+ }
+}
+
+/// isNonImmUse - Start searching from Root up the DAG to check is Def can
+/// be reached. Return true if that's the case. However, ignore direct uses
+/// by ImmedUse (which would be U in the example illustrated in
+/// CanBeFoldedBy) and by Root (which can happen in the store case).
+/// FIXME: to be really generic, we should allow direct use by any node
+/// that is being folded. But realisticly since we only fold loads which
+/// have one non-chain use, we only need to watch out for load/op/store
+/// and load/op/cmp case where the root (store / cmp) may reach the load via
+/// its chain operand.
+static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse,
+ SDNode *Skip = NULL) {
+ std::set<SDNode *> Visited;
+ bool found = false;
+ findNonImmUse(Root, Def, ImmedUse, Root, Skip, found, Visited);
+ return found;
+}
+
+
+bool X86DAGToDAGISel::CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root) {
+ if (FastISel) return false;
+
+ // If U use can somehow reach N through another path then U can't fold N or
+ // it will create a cycle. e.g. In the following diagram, U can reach N
+ // through X. If N is folded into into U, then X is both a predecessor and
+ // a successor of U.
+ //
+ // [ N ]
+ // ^ ^
+ // | |
+ // / \---
+ // / [X]
+ // | ^
+ // [U]--------|
+
+ if (isNonImmUse(Root, N, U))
+ return false;
+
+ // If U produces a flag, then it gets (even more) interesting. Since it
+ // would have been "glued" together with its flag use, we need to check if
+ // it might reach N:
+ //
+ // [ N ]
+ // ^ ^
+ // | |
+ // [U] \--
+ // ^ [TF]
+ // | ^
+ // | |
+ // \ /
+ // [FU]
+ //
+ // If FU (flag use) indirectly reach N (the load), and U fold N (call it
+ // NU), then TF is a predecessor of FU and a successor of NU. But since
+ // NU and FU are flagged together, this effectively creates a cycle.
+ bool HasFlagUse = false;
+ MVT::ValueType VT = Root->getValueType(Root->getNumValues()-1);
+ while ((VT == MVT::Flag && !Root->use_empty())) {
+ SDNode *FU = findFlagUse(Root);
+ if (FU == NULL)
+ break;
+ else {
+ Root = FU;
+ HasFlagUse = true;
+ }
+ VT = Root->getValueType(Root->getNumValues()-1);
+ }
+
+ if (HasFlagUse)
+ return !isNonImmUse(Root, N, Root, U);
+ return true;
+}
+
+/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand
+/// and move load below the TokenFactor. Replace store's chain operand with
+/// load's chain result.
+static void MoveBelowTokenFactor(SelectionDAG &DAG, SDOperand Load,
+ SDOperand Store, SDOperand TF) {
+ std::vector<SDOperand> Ops;
+ for (unsigned i = 0, e = TF.Val->getNumOperands(); i != e; ++i)
+ if (Load.Val == TF.Val->getOperand(i).Val)
+ Ops.push_back(Load.Val->getOperand(0));
+ else
+ Ops.push_back(TF.Val->getOperand(i));
+ DAG.UpdateNodeOperands(TF, &Ops[0], Ops.size());
+ DAG.UpdateNodeOperands(Load, TF, Load.getOperand(1), Load.getOperand(2));
+ DAG.UpdateNodeOperands(Store, Load.getValue(1), Store.getOperand(1),
+ Store.getOperand(2), Store.getOperand(3));
+}
+
+/// InstructionSelectPreprocess - Preprocess the DAG to allow the instruction
+/// selector to pick more load-modify-store instructions. This is a common
+/// case:
+///
+/// [Load chain]
+/// ^
+/// |
+/// [Load]
+/// ^ ^
+/// | |
+/// / \-
+/// / |
+/// [TokenFactor] [Op]
+/// ^ ^
+/// | |
+/// \ /
+/// \ /
+/// [Store]
+///
+/// The fact the store's chain operand != load's chain will prevent the
+/// (store (op (load))) instruction from being selected. We can transform it to:
+///
+/// [Load chain]
+/// ^
+/// |
+/// [TokenFactor]
+/// ^
+/// |
+/// [Load]
+/// ^ ^
+/// | |
+/// | \-
+/// | |
+/// | [Op]
+/// | ^
+/// | |
+/// \ /
+/// \ /
+/// [Store]
+void X86DAGToDAGISel::InstructionSelectPreprocess(SelectionDAG &DAG) {
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); I != E; ++I) {
+ if (!ISD::isNON_TRUNCStore(I))
+ continue;
+ SDOperand Chain = I->getOperand(0);
+ if (Chain.Val->getOpcode() != ISD::TokenFactor)
+ continue;
+
+ SDOperand N1 = I->getOperand(1);
+ SDOperand N2 = I->getOperand(2);
+ if (MVT::isFloatingPoint(N1.getValueType()) ||
+ MVT::isVector(N1.getValueType()) ||
+ !N1.hasOneUse())
+ continue;
+
+ bool RModW = false;
+ SDOperand Load;
+ unsigned Opcode = N1.Val->getOpcode();
+ switch (Opcode) {
+ case ISD::ADD:
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::ADDC:
+ case ISD::ADDE: {
+ SDOperand N10 = N1.getOperand(0);
+ SDOperand N11 = N1.getOperand(1);
+ if (ISD::isNON_EXTLoad(N10.Val))
+ RModW = true;
+ else if (ISD::isNON_EXTLoad(N11.Val)) {
+ RModW = true;
+ std::swap(N10, N11);
+ }
+ RModW = RModW && N10.Val->isOperand(Chain.Val) && N10.hasOneUse() &&
+ (N10.getOperand(1) == N2) &&
+ (N10.Val->getValueType(0) == N1.getValueType());
+ if (RModW)
+ Load = N10;
+ break;
+ }
+ case ISD::SUB:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ case ISD::SUBC:
+ case ISD::SUBE:
+ case X86ISD::SHLD:
+ case X86ISD::SHRD: {
+ SDOperand N10 = N1.getOperand(0);
+ if (ISD::isNON_EXTLoad(N10.Val))
+ RModW = N10.Val->isOperand(Chain.Val) && N10.hasOneUse() &&
+ (N10.getOperand(1) == N2) &&
+ (N10.Val->getValueType(0) == N1.getValueType());
+ if (RModW)
+ Load = N10;
+ break;
+ }
+ }
+
+ if (RModW) {
+ MoveBelowTokenFactor(DAG, Load, SDOperand(I, 0), Chain);
+ ++NumLoadMoved;
+ }
+ }
+}
+
+/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel
+/// when it has created a SelectionDAG for us to codegen.
+void X86DAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) {
+ DEBUG(BB->dump());
+ MachineFunction::iterator FirstMBB = BB;
+
+ if (!FastISel)
+ InstructionSelectPreprocess(DAG);
+
+ // Codegen the basic block.
+#ifndef NDEBUG
+ DOUT << "===== Instruction selection begins:\n";
+ Indent = 0;
+#endif
+ DAG.setRoot(SelectRoot(DAG.getRoot()));
+#ifndef NDEBUG
+ DOUT << "===== Instruction selection ends:\n";
+#endif
+
+ DAG.RemoveDeadNodes();
+
+ // Emit machine code to BB.
+ ScheduleAndEmitDAG(DAG);
+
+ // If we are emitting FP stack code, scan the basic block to determine if this
+ // block defines any FP values. If so, put an FP_REG_KILL instruction before
+ // the terminator of the block.
+ if (!Subtarget->hasSSE2()) {
+ // Note that FP stack instructions *are* used in SSE code when returning
+ // values, but these are not live out of the basic block, so we don't need
+ // an FP_REG_KILL in this case either.
+ bool ContainsFPCode = false;
+
+ // Scan all of the machine instructions in these MBBs, checking for FP
+ // stores.
+ MachineFunction::iterator MBBI = FirstMBB;
+ do {
+ for (MachineBasicBlock::iterator I = MBBI->begin(), E = MBBI->end();
+ !ContainsFPCode && I != E; ++I) {
+ if (I->getNumOperands() != 0 && I->getOperand(0).isRegister()) {
+ const TargetRegisterClass *clas;
+ for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
+ if (I->getOperand(op).isRegister() && I->getOperand(op).isDef() &&
+ MRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) &&
+ ((clas = RegMap->getRegClass(I->getOperand(0).getReg())) ==
+ X86::RFP32RegisterClass ||
+ clas == X86::RFP64RegisterClass)) {
+ ContainsFPCode = true;
+ break;
+ }
+ }
+ }
+ }
+ } while (!ContainsFPCode && &*(MBBI++) != BB);
+
+ // Check PHI nodes in successor blocks. These PHI's will be lowered to have
+ // a copy of the input value in this block.
+ if (!ContainsFPCode) {
+ // Final check, check LLVM BB's that are successors to the LLVM BB
+ // corresponding to BB for FP PHI nodes.
+ const BasicBlock *LLVMBB = BB->getBasicBlock();
+ const PHINode *PN;
+ for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB);
+ !ContainsFPCode && SI != E; ++SI) {
+ for (BasicBlock::const_iterator II = SI->begin();
+ (PN = dyn_cast<PHINode>(II)); ++II) {
+ if (PN->getType()->isFloatingPoint()) {
+ ContainsFPCode = true;
+ break;
+ }
+ }
+ }
+ }
+
+ // Finally, if we found any FP code, emit the FP_REG_KILL instruction.
+ if (ContainsFPCode) {
+ BuildMI(*BB, BB->getFirstTerminator(),
+ TM.getInstrInfo()->get(X86::FP_REG_KILL));
+ ++NumFPKill;
+ }
+ }
+}
+
+/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
+/// the main function.
+void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
+ MachineFrameInfo *MFI) {
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ if (Subtarget->isTargetCygMing())
+ BuildMI(BB, TII->get(X86::CALLpcrel32)).addExternalSymbol("__main");
+
+ // Switch the FPU to 64-bit precision mode for better compatibility and speed.
+ int CWFrameIdx = MFI->CreateStackObject(2, 2);
+ addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx);
+
+ // Set the high part to be 64-bit precision.
+ addFrameReference(BuildMI(BB, TII->get(X86::MOV8mi)),
+ CWFrameIdx, 1).addImm(2);
+
+ // Reload the modified control word now.
+ addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx);
+}
+
+void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) {
+ // If this is main, emit special code for main.
+ MachineBasicBlock *BB = MF.begin();
+ if (Fn.hasExternalLinkage() && Fn.getName() == "main")
+ EmitSpecialCodeForMain(BB, MF.getFrameInfo());
+}
+
+/// MatchAddress - Add the specified node to the specified addressing mode,
+/// returning true if it cannot be done. This just pattern matches for the
+/// addressing mode
+bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM,
+ bool isRoot, unsigned Depth) {
+ if (Depth > 5) {
+ // Default, generate it as a register.
+ AM.BaseType = X86ISelAddressMode::RegBase;
+ AM.Base.Reg = N;
+ return false;
+ }
+
+ // RIP relative addressing: %rip + 32-bit displacement!
+ if (AM.isRIPRel) {
+ if (!AM.ES && AM.JT != -1 && N.getOpcode() == ISD::Constant) {
+ int64_t Val = cast<ConstantSDNode>(N)->getSignExtended();
+ if (isInt32(AM.Disp + Val)) {
+ AM.Disp += Val;
+ return false;
+ }
+ }
+ return true;
+ }
+
+ int id = N.Val->getNodeId();
+ bool Available = isSelected(id);
+
+ switch (N.getOpcode()) {
+ default: break;
+ case ISD::Constant: {
+ int64_t Val = cast<ConstantSDNode>(N)->getSignExtended();
+ if (isInt32(AM.Disp + Val)) {
+ AM.Disp += Val;
+ return false;
+ }
+ break;
+ }
+
+ case X86ISD::Wrapper: {
+ bool is64Bit = Subtarget->is64Bit();
+ // Under X86-64 non-small code model, GV (and friends) are 64-bits.
+ if (is64Bit && TM.getCodeModel() != CodeModel::Small)
+ break;
+ if (AM.GV != 0 || AM.CP != 0 || AM.ES != 0 || AM.JT != -1)
+ break;
+ // If value is available in a register both base and index components have
+ // been picked, we can't fit the result available in the register in the
+ // addressing mode. Duplicate GlobalAddress or ConstantPool as displacement.
+ if (!Available || (AM.Base.Reg.Val && AM.IndexReg.Val)) {
+ bool isStatic = TM.getRelocationModel() == Reloc::Static;
+ SDOperand N0 = N.getOperand(0);
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
+ GlobalValue *GV = G->getGlobal();
+ bool isAbs32 = !is64Bit || isStatic;
+ if (isAbs32 || isRoot) {
+ AM.GV = GV;
+ AM.Disp += G->getOffset();
+ AM.isRIPRel = !isAbs32;
+ return false;
+ }
+ } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
+ if (!is64Bit || isStatic || isRoot) {
+ AM.CP = CP->getConstVal();
+ AM.Align = CP->getAlignment();
+ AM.Disp += CP->getOffset();
+ AM.isRIPRel = !isStatic;
+ return false;
+ }
+ } else if (ExternalSymbolSDNode *S =dyn_cast<ExternalSymbolSDNode>(N0)) {
+ if (isStatic || isRoot) {
+ AM.ES = S->getSymbol();
+ AM.isRIPRel = !isStatic;
+ return false;
+ }
+ } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
+ if (isStatic || isRoot) {
+ AM.JT = J->getIndex();
+ AM.isRIPRel = !isStatic;
+ return false;
+ }
+ }
+ }
+ break;
+ }
+
+ case ISD::FrameIndex:
+ if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base.Reg.Val == 0) {
+ AM.BaseType = X86ISelAddressMode::FrameIndexBase;
+ AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
+ return false;
+ }
+ break;
+
+ case ISD::SHL:
+ if (!Available && AM.IndexReg.Val == 0 && AM.Scale == 1)
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1))) {
+ unsigned Val = CN->getValue();
+ if (Val == 1 || Val == 2 || Val == 3) {
+ AM.Scale = 1 << Val;
+ SDOperand ShVal = N.Val->getOperand(0);
+
+ // Okay, we know that we have a scale by now. However, if the scaled
+ // value is an add of something and a constant, we can fold the
+ // constant into the disp field here.
+ if (ShVal.Val->getOpcode() == ISD::ADD && ShVal.hasOneUse() &&
+ isa<ConstantSDNode>(ShVal.Val->getOperand(1))) {
+ AM.IndexReg = ShVal.Val->getOperand(0);
+ ConstantSDNode *AddVal =
+ cast<ConstantSDNode>(ShVal.Val->getOperand(1));
+ uint64_t Disp = AM.Disp + (AddVal->getValue() << Val);
+ if (isInt32(Disp))
+ AM.Disp = Disp;
+ else
+ AM.IndexReg = ShVal;
+ } else {
+ AM.IndexReg = ShVal;
+ }
+ return false;
+ }
+ }
+ break;
+
+ case ISD::MUL:
+ // X*[3,5,9] -> X+X*[2,4,8]
+ if (!Available &&
+ AM.BaseType == X86ISelAddressMode::RegBase &&
+ AM.Base.Reg.Val == 0 &&
+ AM.IndexReg.Val == 0) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1)))
+ if (CN->getValue() == 3 || CN->getValue() == 5 || CN->getValue() == 9) {
+ AM.Scale = unsigned(CN->getValue())-1;
+
+ SDOperand MulVal = N.Val->getOperand(0);
+ SDOperand Reg;
+
+ // Okay, we know that we have a scale by now. However, if the scaled
+ // value is an add of something and a constant, we can fold the
+ // constant into the disp field here.
+ if (MulVal.Val->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
+ isa<ConstantSDNode>(MulVal.Val->getOperand(1))) {
+ Reg = MulVal.Val->getOperand(0);
+ ConstantSDNode *AddVal =
+ cast<ConstantSDNode>(MulVal.Val->getOperand(1));
+ uint64_t Disp = AM.Disp + AddVal->getValue() * CN->getValue();
+ if (isInt32(Disp))
+ AM.Disp = Disp;
+ else
+ Reg = N.Val->getOperand(0);
+ } else {
+ Reg = N.Val->getOperand(0);
+ }
+
+ AM.IndexReg = AM.Base.Reg = Reg;
+ return false;
+ }
+ }
+ break;
+
+ case ISD::ADD:
+ if (!Available) {
+ X86ISelAddressMode Backup = AM;
+ if (!MatchAddress(N.Val->getOperand(0), AM, false, Depth+1) &&
+ !MatchAddress(N.Val->getOperand(1), AM, false, Depth+1))
+ return false;
+ AM = Backup;
+ if (!MatchAddress(N.Val->getOperand(1), AM, false, Depth+1) &&
+ !MatchAddress(N.Val->getOperand(0), AM, false, Depth+1))
+ return false;
+ AM = Backup;
+ }
+ break;
+
+ case ISD::OR:
+ // Handle "X | C" as "X + C" iff X is known to have C bits clear.
+ if (!Available) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ X86ISelAddressMode Backup = AM;
+ // Start with the LHS as an addr mode.
+ if (!MatchAddress(N.getOperand(0), AM, false) &&
+ // Address could not have picked a GV address for the displacement.
+ AM.GV == NULL &&
+ // On x86-64, the resultant disp must fit in 32-bits.
+ isInt32(AM.Disp + CN->getSignExtended()) &&
+ // Check to see if the LHS & C is zero.
+ CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getValue())) {
+ AM.Disp += CN->getValue();
+ return false;
+ }
+ AM = Backup;
+ }
+ }
+ break;
+ }
+
+ // Is the base register already occupied?
+ if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.Val) {
+ // If so, check to see if the scale index register is set.
+ if (AM.IndexReg.Val == 0) {
+ AM.IndexReg = N;
+ AM.Scale = 1;
+ return false;
+ }
+
+ // Otherwise, we cannot select it.
+ return true;
+ }
+
+ // Default, generate it as a register.
+ AM.BaseType = X86ISelAddressMode::RegBase;
+ AM.Base.Reg = N;
+ return false;
+}
+
+/// SelectAddr - returns true if it is able pattern match an addressing mode.
+/// It returns the operands which make up the maximal addressing mode it can
+/// match by reference.
+bool X86DAGToDAGISel::SelectAddr(SDOperand Op, SDOperand N, SDOperand &Base,
+ SDOperand &Scale, SDOperand &Index,
+ SDOperand &Disp) {
+ X86ISelAddressMode AM;
+ if (MatchAddress(N, AM))
+ return false;
+
+ MVT::ValueType VT = N.getValueType();
+ if (AM.BaseType == X86ISelAddressMode::RegBase) {
+ if (!AM.Base.Reg.Val)
+ AM.Base.Reg = CurDAG->getRegister(0, VT);
+ }
+
+ if (!AM.IndexReg.Val)
+ AM.IndexReg = CurDAG->getRegister(0, VT);
+
+ getAddressOperands(AM, Base, Scale, Index, Disp);
+ return true;
+}
+
+/// isZeroNode - Returns true if Elt is a constant zero or a floating point
+/// constant +0.0.
+static inline bool isZeroNode(SDOperand Elt) {
+ return ((isa<ConstantSDNode>(Elt) &&
+ cast<ConstantSDNode>(Elt)->getValue() == 0) ||
+ (isa<ConstantFPSDNode>(Elt) &&
+ cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0)));
+}
+
+
+/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to
+/// match a load whose top elements are either undef or zeros. The load flavor
+/// is derived from the type of N, which is either v4f32 or v2f64.
+bool X86DAGToDAGISel::SelectScalarSSELoad(SDOperand Op, SDOperand Pred,
+ SDOperand N, SDOperand &Base,
+ SDOperand &Scale, SDOperand &Index,
+ SDOperand &Disp, SDOperand &InChain,
+ SDOperand &OutChain) {
+ if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ InChain = N.getOperand(0).getValue(1);
+ if (ISD::isNON_EXTLoad(InChain.Val) &&
+ InChain.getValue(0).hasOneUse() &&
+ N.hasOneUse() &&
+ CanBeFoldedBy(N.Val, Pred.Val, Op.Val)) {
+ LoadSDNode *LD = cast<LoadSDNode>(InChain);
+ if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp))
+ return false;
+ OutChain = LD->getChain();
+ return true;
+ }
+ }
+
+ // Also handle the case where we explicitly require zeros in the top
+ // elements. This is a vector shuffle from the zero vector.
+ if (N.getOpcode() == ISD::VECTOR_SHUFFLE && N.Val->hasOneUse() &&
+ N.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
+ N.getOperand(1).getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ N.getOperand(1).Val->hasOneUse() &&
+ ISD::isNON_EXTLoad(N.getOperand(1).getOperand(0).Val) &&
+ N.getOperand(1).getOperand(0).hasOneUse()) {
+ // Check to see if the BUILD_VECTOR is building a zero vector.
+ SDOperand BV = N.getOperand(0);
+ for (unsigned i = 0, e = BV.getNumOperands(); i != e; ++i)
+ if (!isZeroNode(BV.getOperand(i)) &&
+ BV.getOperand(i).getOpcode() != ISD::UNDEF)
+ return false; // Not a zero/undef vector.
+ // Check to see if the shuffle mask is 4/L/L/L or 2/L, where L is something
+ // from the LHS.
+ unsigned VecWidth = BV.getNumOperands();
+ SDOperand ShufMask = N.getOperand(2);
+ assert(ShufMask.getOpcode() == ISD::BUILD_VECTOR && "Invalid shuf mask!");
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(ShufMask.getOperand(0))) {
+ if (C->getValue() == VecWidth) {
+ for (unsigned i = 1; i != VecWidth; ++i) {
+ if (ShufMask.getOperand(i).getOpcode() == ISD::UNDEF) {
+ // ok.
+ } else {
+ ConstantSDNode *C = cast<ConstantSDNode>(ShufMask.getOperand(i));
+ if (C->getValue() >= VecWidth) return false;
+ }
+ }
+ }
+
+ // Okay, this is a zero extending load. Fold it.
+ LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(1).getOperand(0));
+ if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp))
+ return false;
+ OutChain = LD->getChain();
+ InChain = SDOperand(LD, 1);
+ return true;
+ }
+ }
+ return false;
+}
+
+
+/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
+/// mode it matches can be cost effectively emitted as an LEA instruction.
+bool X86DAGToDAGISel::SelectLEAAddr(SDOperand Op, SDOperand N,
+ SDOperand &Base, SDOperand &Scale,
+ SDOperand &Index, SDOperand &Disp) {
+ X86ISelAddressMode AM;
+ if (MatchAddress(N, AM))
+ return false;
+
+ MVT::ValueType VT = N.getValueType();
+ unsigned Complexity = 0;
+ if (AM.BaseType == X86ISelAddressMode::RegBase)
+ if (AM.Base.Reg.Val)
+ Complexity = 1;
+ else
+ AM.Base.Reg = CurDAG->getRegister(0, VT);
+ else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
+ Complexity = 4;
+
+ if (AM.IndexReg.Val)
+ Complexity++;
+ else
+ AM.IndexReg = CurDAG->getRegister(0, VT);
+
+ // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
+ // a simple shift.
+ if (AM.Scale > 1)
+ Complexity++;
+
+ // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
+ // to a LEA. This is determined with some expermentation but is by no means
+ // optimal (especially for code size consideration). LEA is nice because of
+ // its three-address nature. Tweak the cost function again when we can run
+ // convertToThreeAddress() at register allocation time.
+ if (AM.GV || AM.CP || AM.ES || AM.JT != -1) {
+ // For X86-64, we should always use lea to materialize RIP relative
+ // addresses.
+ if (Subtarget->is64Bit())
+ Complexity = 4;
+ else
+ Complexity += 2;
+ }
+
+ if (AM.Disp && (AM.Base.Reg.Val || AM.IndexReg.Val))
+ Complexity++;
+
+ if (Complexity > 2) {
+ getAddressOperands(AM, Base, Scale, Index, Disp);
+ return true;
+ }
+ return false;
+}
+
+bool X86DAGToDAGISel::TryFoldLoad(SDOperand P, SDOperand N,
+ SDOperand &Base, SDOperand &Scale,
+ SDOperand &Index, SDOperand &Disp) {
+ if (ISD::isNON_EXTLoad(N.Val) &&
+ N.hasOneUse() &&
+ CanBeFoldedBy(N.Val, P.Val, P.Val))
+ return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp);
+ return false;
+}
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// base address to use for accessing globals into a register.
+///
+SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
+ assert(!Subtarget->is64Bit() && "X86-64 PIC uses RIP relative addressing");
+ if (!GlobalBaseReg) {
+ // Insert the set of GlobalBaseReg into the first MBB of the function
+ MachineBasicBlock &FirstMBB = BB->getParent()->front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ SSARegMap *RegMap = BB->getParent()->getSSARegMap();
+ unsigned PC = RegMap->createVirtualRegister(X86::GR32RegisterClass);
+
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ BuildMI(FirstMBB, MBBI, TII->get(X86::MovePCtoStack));
+ BuildMI(FirstMBB, MBBI, TII->get(X86::POP32r), PC);
+
+ // If we're using vanilla 'GOT' PIC style, we should use relative addressing
+ // not to pc, but to _GLOBAL_ADDRESS_TABLE_ external
+ if (TM.getRelocationModel() == Reloc::PIC_ &&
+ Subtarget->isPICStyleGOT()) {
+ GlobalBaseReg = RegMap->createVirtualRegister(X86::GR32RegisterClass);
+ BuildMI(FirstMBB, MBBI, TII->get(X86::ADD32ri), GlobalBaseReg).
+ addReg(PC).
+ addExternalSymbol("_GLOBAL_OFFSET_TABLE_");
+ } else {
+ GlobalBaseReg = PC;
+ }
+
+ }
+ return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).Val;
+}
+
+static SDNode *FindCallStartFromCall(SDNode *Node) {
+ if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
+ assert(Node->getOperand(0).getValueType() == MVT::Other &&
+ "Node doesn't have a token chain argument!");
+ return FindCallStartFromCall(Node->getOperand(0).Val);
+}
+
+SDNode *X86DAGToDAGISel::Select(SDOperand N) {
+ SDNode *Node = N.Val;
+ MVT::ValueType NVT = Node->getValueType(0);
+ unsigned Opc, MOpc;
+ unsigned Opcode = Node->getOpcode();
+
+#ifndef NDEBUG
+ DOUT << std::string(Indent, ' ') << "Selecting: ";
+ DEBUG(Node->dump(CurDAG));
+ DOUT << "\n";
+ Indent += 2;
+#endif
+
+ if (Opcode >= ISD::BUILTIN_OP_END && Opcode < X86ISD::FIRST_NUMBER) {
+#ifndef NDEBUG
+ DOUT << std::string(Indent-2, ' ') << "== ";
+ DEBUG(Node->dump(CurDAG));
+ DOUT << "\n";
+ Indent -= 2;
+#endif
+ return NULL; // Already selected.
+ }
+
+ switch (Opcode) {
+ default: break;
+ case X86ISD::GlobalBaseReg:
+ return getGlobalBaseReg();
+
+ case ISD::ADD: {
+ // Turn ADD X, c to MOV32ri X+c. This cannot be done with tblgen'd
+ // code and is matched first so to prevent it from being turned into
+ // LEA32r X+c.
+ // In 64-bit mode, use LEA to take advantage of RIP-relative addressing.
+ MVT::ValueType PtrVT = TLI.getPointerTy();
+ SDOperand N0 = N.getOperand(0);
+ SDOperand N1 = N.getOperand(1);
+ if (N.Val->getValueType(0) == PtrVT &&
+ N0.getOpcode() == X86ISD::Wrapper &&
+ N1.getOpcode() == ISD::Constant) {
+ unsigned Offset = (unsigned)cast<ConstantSDNode>(N1)->getValue();
+ SDOperand C(0, 0);
+ // TODO: handle ExternalSymbolSDNode.
+ if (GlobalAddressSDNode *G =
+ dyn_cast<GlobalAddressSDNode>(N0.getOperand(0))) {
+ C = CurDAG->getTargetGlobalAddress(G->getGlobal(), PtrVT,
+ G->getOffset() + Offset);
+ } else if (ConstantPoolSDNode *CP =
+ dyn_cast<ConstantPoolSDNode>(N0.getOperand(0))) {
+ C = CurDAG->getTargetConstantPool(CP->getConstVal(), PtrVT,
+ CP->getAlignment(),
+ CP->getOffset()+Offset);
+ }
+
+ if (C.Val) {
+ if (Subtarget->is64Bit()) {
+ SDOperand Ops[] = { CurDAG->getRegister(0, PtrVT), getI8Imm(1),
+ CurDAG->getRegister(0, PtrVT), C };
+ return CurDAG->SelectNodeTo(N.Val, X86::LEA64r, MVT::i64, Ops, 4);
+ } else
+ return CurDAG->SelectNodeTo(N.Val, X86::MOV32ri, PtrVT, C);
+ }
+ }
+
+ // Other cases are handled by auto-generated code.
+ break;
+ }
+
+ case ISD::MULHU:
+ case ISD::MULHS: {
+ if (Opcode == ISD::MULHU)
+ switch (NVT) {
+ default: assert(0 && "Unsupported VT!");
+ case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
+ case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
+ case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
+ case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
+ }
+ else
+ switch (NVT) {
+ default: assert(0 && "Unsupported VT!");
+ case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break;
+ case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
+ case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
+ case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
+ }
+
+ unsigned LoReg, HiReg;
+ switch (NVT) {
+ default: assert(0 && "Unsupported VT!");
+ case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break;
+ case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break;
+ case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
+ case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
+ }
+
+ SDOperand N0 = Node->getOperand(0);
+ SDOperand N1 = Node->getOperand(1);
+
+ bool foldedLoad = false;
+ SDOperand Tmp0, Tmp1, Tmp2, Tmp3;
+ foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3);
+ // MULHU and MULHS are commmutative
+ if (!foldedLoad) {
+ foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3);
+ if (foldedLoad) {
+ N0 = Node->getOperand(1);
+ N1 = Node->getOperand(0);
+ }
+ }
+
+ SDOperand Chain;
+ if (foldedLoad) {
+ Chain = N1.getOperand(0);
+ AddToISelQueue(Chain);
+ } else
+ Chain = CurDAG->getEntryNode();
+
+ SDOperand InFlag(0, 0);
+ AddToISelQueue(N0);
+ Chain = CurDAG->getCopyToReg(Chain, CurDAG->getRegister(LoReg, NVT),
+ N0, InFlag);
+ InFlag = Chain.getValue(1);
+
+ if (foldedLoad) {
+ AddToISelQueue(Tmp0);
+ AddToISelQueue(Tmp1);
+ AddToISelQueue(Tmp2);
+ AddToISelQueue(Tmp3);
+ SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Chain, InFlag };
+ SDNode *CNode =
+ CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6);
+ Chain = SDOperand(CNode, 0);
+ InFlag = SDOperand(CNode, 1);
+ } else {
+ AddToISelQueue(N1);
+ InFlag =
+ SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0);
+ }
+
+ SDOperand Result = CurDAG->getCopyFromReg(Chain, HiReg, NVT, InFlag);
+ ReplaceUses(N.getValue(0), Result);
+ if (foldedLoad)
+ ReplaceUses(N1.getValue(1), Result.getValue(1));
+
+#ifndef NDEBUG
+ DOUT << std::string(Indent-2, ' ') << "=> ";
+ DEBUG(Result.Val->dump(CurDAG));
+ DOUT << "\n";
+ Indent -= 2;
+#endif
+ return NULL;
+ }
+
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM: {
+ bool isSigned = Opcode == ISD::SDIV || Opcode == ISD::SREM;
+ bool isDiv = Opcode == ISD::SDIV || Opcode == ISD::UDIV;
+ if (!isSigned)
+ switch (NVT) {
+ default: assert(0 && "Unsupported VT!");
+ case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
+ case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
+ case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
+ case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
+ }
+ else
+ switch (NVT) {
+ default: assert(0 && "Unsupported VT!");
+ case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
+ case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
+ case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
+ case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
+ }
+
+ unsigned LoReg, HiReg;
+ unsigned ClrOpcode, SExtOpcode;
+ switch (NVT) {
+ default: assert(0 && "Unsupported VT!");
+ case MVT::i8:
+ LoReg = X86::AL; HiReg = X86::AH;
+ ClrOpcode = 0;
+ SExtOpcode = X86::CBW;
+ break;
+ case MVT::i16:
+ LoReg = X86::AX; HiReg = X86::DX;
+ ClrOpcode = X86::MOV16r0;
+ SExtOpcode = X86::CWD;
+ break;
+ case MVT::i32:
+ LoReg = X86::EAX; HiReg = X86::EDX;
+ ClrOpcode = X86::MOV32r0;
+ SExtOpcode = X86::CDQ;
+ break;
+ case MVT::i64:
+ LoReg = X86::RAX; HiReg = X86::RDX;
+ ClrOpcode = X86::MOV64r0;
+ SExtOpcode = X86::CQO;
+ break;
+ }
+
+ SDOperand N0 = Node->getOperand(0);
+ SDOperand N1 = Node->getOperand(1);
+ SDOperand InFlag(0, 0);
+ if (NVT == MVT::i8 && !isSigned) {
+ // Special case for div8, just use a move with zero extension to AX to
+ // clear the upper 8 bits (AH).
+ SDOperand Tmp0, Tmp1, Tmp2, Tmp3, Move, Chain;
+ if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3)) {
+ SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N0.getOperand(0) };
+ AddToISelQueue(N0.getOperand(0));
+ AddToISelQueue(Tmp0);
+ AddToISelQueue(Tmp1);
+ AddToISelQueue(Tmp2);
+ AddToISelQueue(Tmp3);
+ Move =
+ SDOperand(CurDAG->getTargetNode(X86::MOVZX16rm8, MVT::i16, MVT::Other,
+ Ops, 5), 0);
+ Chain = Move.getValue(1);
+ ReplaceUses(N0.getValue(1), Chain);
+ } else {
+ AddToISelQueue(N0);
+ Move =
+ SDOperand(CurDAG->getTargetNode(X86::MOVZX16rr8, MVT::i16, N0), 0);
+ Chain = CurDAG->getEntryNode();
+ }
+ Chain = CurDAG->getCopyToReg(Chain, X86::AX, Move, InFlag);
+ InFlag = Chain.getValue(1);
+ } else {
+ AddToISelQueue(N0);
+ InFlag =
+ CurDAG->getCopyToReg(CurDAG->getEntryNode(), LoReg, N0,
+ InFlag).getValue(1);
+ if (isSigned) {
+ // Sign extend the low part into the high part.
+ InFlag =
+ SDOperand(CurDAG->getTargetNode(SExtOpcode, MVT::Flag, InFlag), 0);
+ } else {
+ // Zero out the high part, effectively zero extending the input.
+ SDOperand ClrNode = SDOperand(CurDAG->getTargetNode(ClrOpcode, NVT), 0);
+ InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), HiReg, ClrNode,
+ InFlag).getValue(1);
+ }
+ }
+
+ SDOperand Tmp0, Tmp1, Tmp2, Tmp3, Chain;
+ bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3);
+ if (foldedLoad) {
+ AddToISelQueue(N1.getOperand(0));
+ AddToISelQueue(Tmp0);
+ AddToISelQueue(Tmp1);
+ AddToISelQueue(Tmp2);
+ AddToISelQueue(Tmp3);
+ SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N1.getOperand(0), InFlag };
+ SDNode *CNode =
+ CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6);
+ Chain = SDOperand(CNode, 0);
+ InFlag = SDOperand(CNode, 1);
+ } else {
+ AddToISelQueue(N1);
+ Chain = CurDAG->getEntryNode();
+ InFlag =
+ SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0);
+ }
+
+ SDOperand Result =
+ CurDAG->getCopyFromReg(Chain, isDiv ? LoReg : HiReg, NVT, InFlag);
+ ReplaceUses(N.getValue(0), Result);
+ if (foldedLoad)
+ ReplaceUses(N1.getValue(1), Result.getValue(1));
+
+#ifndef NDEBUG
+ DOUT << std::string(Indent-2, ' ') << "=> ";
+ DEBUG(Result.Val->dump(CurDAG));
+ DOUT << "\n";
+ Indent -= 2;
+#endif
+
+ return NULL;
+ }
+
+ case ISD::TRUNCATE: {
+ if (!Subtarget->is64Bit() && NVT == MVT::i8) {
+ unsigned Opc2;
+ MVT::ValueType VT;
+ switch (Node->getOperand(0).getValueType()) {
+ default: assert(0 && "Unknown truncate!");
+ case MVT::i16:
+ Opc = X86::MOV16to16_;
+ VT = MVT::i16;
+ Opc2 = X86::TRUNC_16_to8;
+ break;
+ case MVT::i32:
+ Opc = X86::MOV32to32_;
+ VT = MVT::i32;
+ Opc2 = X86::TRUNC_32_to8;
+ break;
+ }
+
+ AddToISelQueue(Node->getOperand(0));
+ SDOperand Tmp =
+ SDOperand(CurDAG->getTargetNode(Opc, VT, Node->getOperand(0)), 0);
+ SDNode *ResNode = CurDAG->getTargetNode(Opc2, NVT, Tmp);
+
+#ifndef NDEBUG
+ DOUT << std::string(Indent-2, ' ') << "=> ";
+ DEBUG(ResNode->dump(CurDAG));
+ DOUT << "\n";
+ Indent -= 2;
+#endif
+ return ResNode;
+ }
+
+ break;
+ }
+ }
+
+ SDNode *ResNode = SelectCode(N);
+
+#ifndef NDEBUG
+ DOUT << std::string(Indent-2, ' ') << "=> ";
+ if (ResNode == NULL || ResNode == N.Val)
+ DEBUG(N.Val->dump(CurDAG));
+ else
+ DEBUG(ResNode->dump(CurDAG));
+ DOUT << "\n";
+ Indent -= 2;
+#endif
+
+ return ResNode;
+}
+
+bool X86DAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDOperand &Op, char ConstraintCode,
+ std::vector<SDOperand> &OutOps, SelectionDAG &DAG){
+ SDOperand Op0, Op1, Op2, Op3;
+ switch (ConstraintCode) {
+ case 'o': // offsetable ??
+ case 'v': // not offsetable ??
+ default: return true;
+ case 'm': // memory
+ if (!SelectAddr(Op, Op, Op0, Op1, Op2, Op3))
+ return true;
+ break;
+ }
+
+ OutOps.push_back(Op0);
+ OutOps.push_back(Op1);
+ OutOps.push_back(Op2);
+ OutOps.push_back(Op3);
+ AddToISelQueue(Op0);
+ AddToISelQueue(Op1);
+ AddToISelQueue(Op2);
+ AddToISelQueue(Op3);
+ return false;
+}
+
+/// createX86ISelDag - This pass converts a legalized DAG into a
+/// X86-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, bool Fast) {
+ return new X86DAGToDAGISel(TM, Fast);
+}
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
new file mode 100644
index 0000000..37dea79
--- /dev/null
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -0,0 +1,5094 @@
+//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that X86 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86ISelLowering.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86TargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+X86TargetLowering::X86TargetLowering(TargetMachine &TM)
+ : TargetLowering(TM) {
+ Subtarget = &TM.getSubtarget<X86Subtarget>();
+ X86ScalarSSE = Subtarget->hasSSE2();
+ X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
+
+ RegInfo = TM.getRegisterInfo();
+
+ // Set up the TargetLowering object.
+
+ // X86 is weird, it always uses i8 for shift amounts and setcc results.
+ setShiftAmountType(MVT::i8);
+ setSetCCResultType(MVT::i8);
+ setSetCCResultContents(ZeroOrOneSetCCResult);
+ setSchedulingPreference(SchedulingForRegPressure);
+ setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0
+ setStackPointerRegisterToSaveRestore(X86StackPtr);
+
+ if (Subtarget->isTargetDarwin()) {
+ // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
+ setUseUnderscoreSetJmp(false);
+ setUseUnderscoreLongJmp(false);
+ } else if (Subtarget->isTargetMingw()) {
+ // MS runtime is weird: it exports _setjmp, but longjmp!
+ setUseUnderscoreSetJmp(true);
+ setUseUnderscoreLongJmp(false);
+ } else {
+ setUseUnderscoreSetJmp(true);
+ setUseUnderscoreLongJmp(true);
+ }
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i8, X86::GR8RegisterClass);
+ addRegisterClass(MVT::i16, X86::GR16RegisterClass);
+ addRegisterClass(MVT::i32, X86::GR32RegisterClass);
+ if (Subtarget->is64Bit())
+ addRegisterClass(MVT::i64, X86::GR64RegisterClass);
+
+ setLoadXAction(ISD::SEXTLOAD, MVT::i1, Expand);
+
+ // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
+ // operation.
+ setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
+ setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
+ setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
+
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand);
+ setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
+ } else {
+ if (X86ScalarSSE)
+ // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP.
+ setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand);
+ else
+ setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
+ }
+
+ // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
+ // this operation.
+ setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
+ setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
+ // SSE has no i16 to fp conversion, only i32
+ if (X86ScalarSSE)
+ setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
+ else {
+ setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
+ setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
+ }
+
+ if (!Subtarget->is64Bit()) {
+ // Custom lower SINT_TO_FP and FP_TO_SINT from/to i64 in 32-bit mode.
+ setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
+ setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
+ }
+
+ // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
+ // this operation.
+ setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
+ setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
+
+ if (X86ScalarSSE) {
+ setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
+ } else {
+ setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
+ setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
+ }
+
+ // Handle FP_TO_UINT by promoting the destination to a larger signed
+ // conversion.
+ setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
+ setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
+ setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
+
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
+ setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
+ } else {
+ if (X86ScalarSSE && !Subtarget->hasSSE3())
+ // Expand FP_TO_UINT into a select.
+ // FIXME: We would like to use a Custom expander here eventually to do
+ // the optimal thing for SSE vs. the default expansion in the legalizer.
+ setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
+ else
+ // With SSE3 we can use fisttpll to convert to a signed i64.
+ setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
+ }
+
+ // TODO: when we have SSE, these could be more efficient, by using movd/movq.
+ if (!X86ScalarSSE) {
+ setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand);
+ setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand);
+ }
+
+ setOperationAction(ISD::BR_JT , MVT::Other, Expand);
+ setOperationAction(ISD::BRCOND , MVT::Other, Custom);
+ setOperationAction(ISD::BR_CC , MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC , MVT::Other, Expand);
+ setOperationAction(ISD::MEMMOVE , MVT::Other, Expand);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
+ setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
+ setOperationAction(ISD::FREM , MVT::f64 , Expand);
+
+ setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i8 , Expand);
+ setOperationAction(ISD::CTLZ , MVT::i8 , Expand);
+ setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i16 , Expand);
+ setOperationAction(ISD::CTLZ , MVT::i16 , Expand);
+ setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
+ setOperationAction(ISD::CTLZ , MVT::i32 , Expand);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
+ setOperationAction(ISD::CTLZ , MVT::i64 , Expand);
+ }
+
+ setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
+ setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
+
+ // These should be promoted to a larger select which is supported.
+ setOperationAction(ISD::SELECT , MVT::i1 , Promote);
+ setOperationAction(ISD::SELECT , MVT::i8 , Promote);
+ // X86 wants to expand cmov itself.
+ setOperationAction(ISD::SELECT , MVT::i16 , Custom);
+ setOperationAction(ISD::SELECT , MVT::i32 , Custom);
+ setOperationAction(ISD::SELECT , MVT::f32 , Custom);
+ setOperationAction(ISD::SELECT , MVT::f64 , Custom);
+ setOperationAction(ISD::SETCC , MVT::i8 , Custom);
+ setOperationAction(ISD::SETCC , MVT::i16 , Custom);
+ setOperationAction(ISD::SETCC , MVT::i32 , Custom);
+ setOperationAction(ISD::SETCC , MVT::f32 , Custom);
+ setOperationAction(ISD::SETCC , MVT::f64 , Custom);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::SELECT , MVT::i64 , Custom);
+ setOperationAction(ISD::SETCC , MVT::i64 , Custom);
+ }
+ // X86 ret instruction may pop stack.
+ setOperationAction(ISD::RET , MVT::Other, Custom);
+ if (!Subtarget->is64Bit())
+ setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
+
+ // Darwin ABI issue.
+ setOperationAction(ISD::ConstantPool , MVT::i32 , Custom);
+ setOperationAction(ISD::JumpTable , MVT::i32 , Custom);
+ setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom);
+ setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::ConstantPool , MVT::i64 , Custom);
+ setOperationAction(ISD::JumpTable , MVT::i64 , Custom);
+ setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom);
+ setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom);
+ }
+ // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
+ setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom);
+ setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom);
+ setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom);
+ // X86 wants to expand memset / memcpy itself.
+ setOperationAction(ISD::MEMSET , MVT::Other, Custom);
+ setOperationAction(ISD::MEMCPY , MVT::Other, Custom);
+
+ // We don't have line number support yet.
+ setOperationAction(ISD::LOCATION, MVT::Other, Expand);
+ setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
+ // FIXME - use subtarget debug flags
+ if (!Subtarget->isTargetDarwin() &&
+ !Subtarget->isTargetELF() &&
+ !Subtarget->isTargetCygMing())
+ setOperationAction(ISD::LABEL, MVT::Other, Expand);
+
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
+ if (Subtarget->is64Bit()) {
+ // FIXME: Verify
+ setExceptionPointerRegister(X86::RAX);
+ setExceptionSelectorRegister(X86::RDX);
+ } else {
+ setExceptionPointerRegister(X86::EAX);
+ setExceptionSelectorRegister(X86::EDX);
+ }
+
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex
+ setOperationAction(ISD::VASTART , MVT::Other, Custom);
+ setOperationAction(ISD::VAARG , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::VACOPY , MVT::Other, Custom);
+ else
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
+ if (Subtarget->isTargetCygMing())
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+ else
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+
+ if (X86ScalarSSE) {
+ // Set up the FP register classes.
+ addRegisterClass(MVT::f32, X86::FR32RegisterClass);
+ addRegisterClass(MVT::f64, X86::FR64RegisterClass);
+
+ // Use ANDPD to simulate FABS.
+ setOperationAction(ISD::FABS , MVT::f64, Custom);
+ setOperationAction(ISD::FABS , MVT::f32, Custom);
+
+ // Use XORP to simulate FNEG.
+ setOperationAction(ISD::FNEG , MVT::f64, Custom);
+ setOperationAction(ISD::FNEG , MVT::f32, Custom);
+
+ // Use ANDPD and ORPD to simulate FCOPYSIGN.
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+
+ // We don't support sin/cos/fmod
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FREM , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FREM , MVT::f32, Expand);
+
+ // Expand FP immediates into loads from the stack, except for the special
+ // cases we handle.
+ setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
+ addLegalFPImmediate(+0.0); // xorps / xorpd
+ } else {
+ // Set up the FP register classes.
+ addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
+ addRegisterClass(MVT::f32, X86::RFP32RegisterClass);
+
+ setOperationAction(ISD::UNDEF, MVT::f64, Expand);
+ setOperationAction(ISD::UNDEF, MVT::f32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::f32, Expand);
+
+ if (!UnsafeFPMath) {
+ setOperationAction(ISD::FSIN , MVT::f64 , Expand);
+ setOperationAction(ISD::FCOS , MVT::f64 , Expand);
+ }
+
+ setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
+ addLegalFPImmediate(+0.0); // FLD0
+ addLegalFPImmediate(+1.0); // FLD1
+ addLegalFPImmediate(-0.0); // FLD0/FCHS
+ addLegalFPImmediate(-1.0); // FLD1/FCHS
+ }
+
+ // First set operation action for all vector types to expand. Then we
+ // will selectively turn on ones that can be effectively codegen'd.
+ for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
+ setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::FADD, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::FNEG, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::FSUB, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::FMUL, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::FABS, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::FSIN, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::FCOS, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::FREM, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::FPOWI, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::FSQRT, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::FCOPYSIGN, (MVT::ValueType)VT, Expand);
+ }
+
+ if (Subtarget->hasMMX()) {
+ addRegisterClass(MVT::v8i8, X86::VR64RegisterClass);
+ addRegisterClass(MVT::v4i16, X86::VR64RegisterClass);
+ addRegisterClass(MVT::v2i32, X86::VR64RegisterClass);
+ addRegisterClass(MVT::v1i64, X86::VR64RegisterClass);
+
+ // FIXME: add MMX packed arithmetics
+
+ setOperationAction(ISD::ADD, MVT::v8i8, Legal);
+ setOperationAction(ISD::ADD, MVT::v4i16, Legal);
+ setOperationAction(ISD::ADD, MVT::v2i32, Legal);
+ setOperationAction(ISD::ADD, MVT::v1i64, Legal);
+
+ setOperationAction(ISD::SUB, MVT::v8i8, Legal);
+ setOperationAction(ISD::SUB, MVT::v4i16, Legal);
+ setOperationAction(ISD::SUB, MVT::v2i32, Legal);
+
+ setOperationAction(ISD::MULHS, MVT::v4i16, Legal);
+ setOperationAction(ISD::MUL, MVT::v4i16, Legal);
+
+ setOperationAction(ISD::AND, MVT::v8i8, Promote);
+ AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64);
+ setOperationAction(ISD::AND, MVT::v4i16, Promote);
+ AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64);
+ setOperationAction(ISD::AND, MVT::v2i32, Promote);
+ AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64);
+ setOperationAction(ISD::AND, MVT::v1i64, Legal);
+
+ setOperationAction(ISD::OR, MVT::v8i8, Promote);
+ AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64);
+ setOperationAction(ISD::OR, MVT::v4i16, Promote);
+ AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64);
+ setOperationAction(ISD::OR, MVT::v2i32, Promote);
+ AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64);
+ setOperationAction(ISD::OR, MVT::v1i64, Legal);
+
+ setOperationAction(ISD::XOR, MVT::v8i8, Promote);
+ AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64);
+ setOperationAction(ISD::XOR, MVT::v4i16, Promote);
+ AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64);
+ setOperationAction(ISD::XOR, MVT::v2i32, Promote);
+ AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64);
+ setOperationAction(ISD::XOR, MVT::v1i64, Legal);
+
+ setOperationAction(ISD::LOAD, MVT::v8i8, Promote);
+ AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64);
+ setOperationAction(ISD::LOAD, MVT::v4i16, Promote);
+ AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64);
+ setOperationAction(ISD::LOAD, MVT::v2i32, Promote);
+ AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64);
+ setOperationAction(ISD::LOAD, MVT::v1i64, Legal);
+
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom);
+
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom);
+
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom);
+ }
+
+ if (Subtarget->hasSSE1()) {
+ addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
+
+ setOperationAction(ISD::FADD, MVT::v4f32, Legal);
+ setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
+ setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
+ setOperationAction(ISD::FABS, MVT::v4f32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v4f32, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
+ setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
+ }
+
+ if (Subtarget->hasSSE2()) {
+ addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
+ addRegisterClass(MVT::v16i8, X86::VR128RegisterClass);
+ addRegisterClass(MVT::v8i16, X86::VR128RegisterClass);
+ addRegisterClass(MVT::v4i32, X86::VR128RegisterClass);
+ addRegisterClass(MVT::v2i64, X86::VR128RegisterClass);
+
+ setOperationAction(ISD::ADD, MVT::v16i8, Legal);
+ setOperationAction(ISD::ADD, MVT::v8i16, Legal);
+ setOperationAction(ISD::ADD, MVT::v4i32, Legal);
+ setOperationAction(ISD::ADD, MVT::v2i64, Legal);
+ setOperationAction(ISD::SUB, MVT::v16i8, Legal);
+ setOperationAction(ISD::SUB, MVT::v8i16, Legal);
+ setOperationAction(ISD::SUB, MVT::v4i32, Legal);
+ setOperationAction(ISD::SUB, MVT::v2i64, Legal);
+ setOperationAction(ISD::MUL, MVT::v8i16, Legal);
+ setOperationAction(ISD::FADD, MVT::v2f64, Legal);
+ setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
+ setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
+ setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
+ setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
+ setOperationAction(ISD::FABS, MVT::v2f64, Custom);
+
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
+ // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones.
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
+
+ // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
+ for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
+ setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
+ }
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
+
+ // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
+ for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
+ setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote);
+ AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64);
+ setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote);
+ AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64);
+ setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote);
+ AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64);
+ setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote);
+ AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64);
+ setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote);
+ AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64);
+ }
+
+ // Custom lower v2i64 and v2f64 selects.
+ setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
+ setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
+ setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
+ setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
+ }
+
+ // We want to custom lower some of our intrinsics.
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+ // We have target-specific dag combine patterns for the following nodes:
+ setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+ setTargetDAGCombine(ISD::SELECT);
+
+ computeRegisterProperties();
+
+ // FIXME: These should be based on subtarget info. Plus, the values should
+ // be smaller when we are in optimizing for size mode.
+ maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores
+ maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores
+ maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores
+ allowUnalignedMemoryAccesses = true; // x86 supports it!
+}
+
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "X86GenCallingConv.inc"
+
+/// LowerRET - Lower an ISD::RET node.
+SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) {
+ assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args");
+
+ SmallVector<CCValAssign, 16> RVLocs;
+ unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
+ bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs);
+ CCInfo.AnalyzeReturn(Op.Val, RetCC_X86);
+
+
+ // If this is the first return lowered for this function, add the regs to the
+ // liveout set for the function.
+ if (DAG.getMachineFunction().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDOperand Chain = Op.getOperand(0);
+ SDOperand Flag;
+
+ // Copy the result values into the output registers.
+ if (RVLocs.size() != 1 || !RVLocs[0].isRegLoc() ||
+ RVLocs[0].getLocReg() != X86::ST0) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1),
+ Flag);
+ Flag = Chain.getValue(1);
+ }
+ } else {
+ // We need to handle a destination of ST0 specially, because it isn't really
+ // a register.
+ SDOperand Value = Op.getOperand(1);
+
+ // If this is an FP return with ScalarSSE, we need to move the value from
+ // an XMM register onto the fp-stack.
+ if (X86ScalarSSE) {
+ SDOperand MemLoc;
+
+ // If this is a load into a scalarsse value, don't store the loaded value
+ // back to the stack, only to reload it: just replace the scalar-sse load.
+ if (ISD::isNON_EXTLoad(Value.Val) &&
+ (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) {
+ Chain = Value.getOperand(0);
+ MemLoc = Value.getOperand(1);
+ } else {
+ // Spill the value to memory and reload it into top of stack.
+ unsigned Size = MVT::getSizeInBits(RVLocs[0].getValVT())/8;
+ MachineFunction &MF = DAG.getMachineFunction();
+ int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
+ MemLoc = DAG.getFrameIndex(SSFI, getPointerTy());
+ Chain = DAG.getStore(Op.getOperand(0), Value, MemLoc, NULL, 0);
+ }
+ SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other);
+ SDOperand Ops[] = {Chain, MemLoc, DAG.getValueType(RVLocs[0].getValVT())};
+ Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3);
+ Chain = Value.getValue(1);
+ }
+
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDOperand Ops[] = { Chain, Value };
+ Chain = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops, 2);
+ Flag = Chain.getValue(1);
+ }
+
+ SDOperand BytesToPop = DAG.getConstant(getBytesToPopOnReturn(), MVT::i16);
+ if (Flag.Val)
+ return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop, Flag);
+ else
+ return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop);
+}
+
+
+/// LowerCallResult - Lower the result values of an ISD::CALL into the
+/// appropriate copies out of appropriate physical registers. This assumes that
+/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
+/// being lowered. The returns a SDNode with the same number of values as the
+/// ISD::CALL.
+SDNode *X86TargetLowering::
+LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall,
+ unsigned CallingConv, SelectionDAG &DAG) {
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ bool isVarArg = cast<ConstantSDNode>(TheCall->getOperand(2))->getValue() != 0;
+ CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
+ CCInfo.AnalyzeCallResult(TheCall, RetCC_X86);
+
+
+ SmallVector<SDOperand, 8> ResultVals;
+
+ // Copy all of the result registers out of their specified physreg.
+ if (RVLocs.size() != 1 || RVLocs[0].getLocReg() != X86::ST0) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(),
+ RVLocs[i].getValVT(), InFlag).getValue(1);
+ InFlag = Chain.getValue(2);
+ ResultVals.push_back(Chain.getValue(0));
+ }
+ } else {
+ // Copies from the FP stack are special, as ST0 isn't a valid register
+ // before the fp stackifier runs.
+
+ // Copy ST0 into an RFP register with FP_GET_RESULT.
+ SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other, MVT::Flag);
+ SDOperand GROps[] = { Chain, InFlag };
+ SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2);
+ Chain = RetVal.getValue(1);
+ InFlag = RetVal.getValue(2);
+
+ // If we are using ScalarSSE, store ST(0) to the stack and reload it into
+ // an XMM register.
+ if (X86ScalarSSE) {
+ // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
+ // shouldn't be necessary except that RFP cannot be live across
+ // multiple blocks. When stackifier is fixed, they can be uncoupled.
+ MachineFunction &MF = DAG.getMachineFunction();
+ int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
+ SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+ SDOperand Ops[] = {
+ Chain, RetVal, StackSlot, DAG.getValueType(RVLocs[0].getValVT()), InFlag
+ };
+ Chain = DAG.getNode(X86ISD::FST, MVT::Other, Ops, 5);
+ RetVal = DAG.getLoad(RVLocs[0].getValVT(), Chain, StackSlot, NULL, 0);
+ Chain = RetVal.getValue(1);
+ }
+ ResultVals.push_back(RetVal);
+ }
+
+ // Merge everything together with a MERGE_VALUES node.
+ ResultVals.push_back(Chain);
+ return DAG.getNode(ISD::MERGE_VALUES, TheCall->getVTList(),
+ &ResultVals[0], ResultVals.size()).Val;
+}
+
+
+//===----------------------------------------------------------------------===//
+// C & StdCall Calling Convention implementation
+//===----------------------------------------------------------------------===//
+// StdCall calling convention seems to be standard for many Windows' API
+// routines and around. It differs from C calling convention just a little:
+// callee should clean up the stack, not caller. Symbols should be also
+// decorated in some fancy way :) It doesn't support any vector arguments.
+
+/// AddLiveIn - This helper function adds the specified physical register to the
+/// MachineFunction as a live in value. It also creates a corresponding virtual
+/// register for it.
+static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
+ const TargetRegisterClass *RC) {
+ assert(RC->contains(PReg) && "Not the correct regclass!");
+ unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC);
+ MF.addLiveIn(PReg, VReg);
+ return VReg;
+}
+
+SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG,
+ bool isStdCall) {
+ unsigned NumArgs = Op.Val->getNumValues() - 1;
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ SDOperand Root = Op.getOperand(0);
+ bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg,
+ getTargetMachine(), ArgLocs);
+ CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_C);
+
+ SmallVector<SDOperand, 8> ArgValues;
+ unsigned LastVal = ~0U;
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
+ // places.
+ assert(VA.getValNo() != LastVal &&
+ "Don't support value assigned to multiple locs yet");
+ LastVal = VA.getValNo();
+
+ if (VA.isRegLoc()) {
+ MVT::ValueType RegVT = VA.getLocVT();
+ TargetRegisterClass *RC;
+ if (RegVT == MVT::i32)
+ RC = X86::GR32RegisterClass;
+ else {
+ assert(MVT::isVector(RegVT));
+ RC = X86::VR128RegisterClass;
+ }
+
+ unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
+ SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT);
+
+ // If this is an 8 or 16-bit value, it is really passed promoted to 32
+ // bits. Insert an assert[sz]ext to capture this, then truncate to the
+ // right size.
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+
+ if (VA.getLocInfo() != CCValAssign::Full)
+ ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
+
+ ArgValues.push_back(ArgValue);
+ } else {
+ assert(VA.isMemLoc());
+
+ // Create the nodes corresponding to a load from this parameter slot.
+ int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8,
+ VA.getLocMemOffset());
+ SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
+ ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0));
+ }
+ }
+
+ unsigned StackSize = CCInfo.getNextStackOffset();
+
+ ArgValues.push_back(Root);
+
+ // If the function takes variable number of arguments, make a frame index for
+ // the start of the first vararg value... for expansion of llvm.va_start.
+ if (isVarArg)
+ VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
+
+ if (isStdCall && !isVarArg) {
+ BytesToPopOnReturn = StackSize; // Callee pops everything..
+ BytesCallerReserves = 0;
+ } else {
+ BytesToPopOnReturn = 0; // Callee pops nothing.
+
+ // If this is an sret function, the return should pop the hidden pointer.
+ if (NumArgs &&
+ (cast<ConstantSDNode>(Op.getOperand(3))->getValue() &
+ ISD::ParamFlags::StructReturn))
+ BytesToPopOnReturn = 4;
+
+ BytesCallerReserves = StackSize;
+ }
+
+ RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only.
+ ReturnAddrIndex = 0; // No return address slot generated yet.
+
+ MF.getInfo<X86MachineFunctionInfo>()
+ ->setBytesToPopOnReturn(BytesToPopOnReturn);
+
+ // Return the new list of results.
+ return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
+ &ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
+}
+
+SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG,
+ unsigned CC) {
+ SDOperand Chain = Op.getOperand(0);
+ bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
+ bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
+ SDOperand Callee = Op.getOperand(4);
+ unsigned NumOps = (Op.getNumOperands() - 5) / 2;
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+ CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
+
+ SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
+ SmallVector<SDOperand, 8> MemOpChains;
+
+ SDOperand StackPtr;
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
+ break;
+ }
+
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ assert(VA.isMemLoc());
+ if (StackPtr.Val == 0)
+ StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
+ SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy());
+ PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
+ }
+ }
+
+ // If the first argument is an sret pointer, remember it.
+ bool isSRet = NumOps &&
+ (cast<ConstantSDNode>(Op.getOperand(6))->getValue() &
+ ISD::ParamFlags::StructReturn);
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into registers.
+ SDOperand InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
+ InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // ELF / PIC requires GOT in the EBX register before function calls via PLT
+ // GOT pointer.
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ Subtarget->isPICStyleGOT()) {
+ Chain = DAG.getCopyToReg(Chain, X86::EBX,
+ DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress node (quite common, every direct call is)
+ // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ // We should use extra load for direct calls to dllimported functions in
+ // non-JIT mode.
+ if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
+ getTargetMachine(), true))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
+
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SmallVector<SDOperand, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ // Add an implicit use GOT pointer in EBX.
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ Subtarget->isPICStyleGOT())
+ Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
+
+ if (InFlag.Val)
+ Ops.push_back(InFlag);
+
+ Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
+ NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ unsigned NumBytesForCalleeToPush = 0;
+
+ if (CC == CallingConv::X86_StdCall) {
+ if (isVarArg)
+ NumBytesForCalleeToPush = isSRet ? 4 : 0;
+ else
+ NumBytesForCalleeToPush = NumBytes;
+ } else {
+ // If this is is a call to a struct-return function, the callee
+ // pops the hidden struct pointer, so we have to push it back.
+ // This is common for Darwin/X86, Linux & Mingw32 targets.
+ NumBytesForCalleeToPush = isSRet ? 4 : 0;
+ }
+
+ NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ Ops.clear();
+ Ops.push_back(Chain);
+ Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
+ Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy()));
+ Ops.push_back(InFlag);
+ Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
+}
+
+
+//===----------------------------------------------------------------------===//
+// FastCall Calling Convention implementation
+//===----------------------------------------------------------------------===//
+//
+// The X86 'fastcall' calling convention passes up to two integer arguments in
+// registers (an appropriate portion of ECX/EDX), passes arguments in C order,
+// and requires that the callee pop its arguments off the stack (allowing proper
+// tail calls), and has the same return value conventions as C calling convs.
+//
+// This calling convention always arranges for the callee pop value to be 8n+4
+// bytes, which is needed for tail recursion elimination and stack alignment
+// reasons.
+SDOperand
+X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ SDOperand Root = Op.getOperand(0);
+ bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg,
+ getTargetMachine(), ArgLocs);
+ CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_FastCall);
+
+ SmallVector<SDOperand, 8> ArgValues;
+ unsigned LastVal = ~0U;
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
+ // places.
+ assert(VA.getValNo() != LastVal &&
+ "Don't support value assigned to multiple locs yet");
+ LastVal = VA.getValNo();
+
+ if (VA.isRegLoc()) {
+ MVT::ValueType RegVT = VA.getLocVT();
+ TargetRegisterClass *RC;
+ if (RegVT == MVT::i32)
+ RC = X86::GR32RegisterClass;
+ else {
+ assert(MVT::isVector(RegVT));
+ RC = X86::VR128RegisterClass;
+ }
+
+ unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
+ SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT);
+
+ // If this is an 8 or 16-bit value, it is really passed promoted to 32
+ // bits. Insert an assert[sz]ext to capture this, then truncate to the
+ // right size.
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+
+ if (VA.getLocInfo() != CCValAssign::Full)
+ ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
+
+ ArgValues.push_back(ArgValue);
+ } else {
+ assert(VA.isMemLoc());
+
+ // Create the nodes corresponding to a load from this parameter slot.
+ int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8,
+ VA.getLocMemOffset());
+ SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
+ ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0));
+ }
+ }
+
+ ArgValues.push_back(Root);
+
+ unsigned StackSize = CCInfo.getNextStackOffset();
+
+ if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) {
+ // Make sure the instruction takes 8n+4 bytes to make sure the start of the
+ // arguments and the arguments after the retaddr has been pushed are aligned.
+ if ((StackSize & 7) == 0)
+ StackSize += 4;
+ }
+
+ VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs.
+ RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only.
+ ReturnAddrIndex = 0; // No return address slot generated yet.
+ BytesToPopOnReturn = StackSize; // Callee pops all stack arguments.
+ BytesCallerReserves = 0;
+
+ MF.getInfo<X86MachineFunctionInfo>()
+ ->setBytesToPopOnReturn(BytesToPopOnReturn);
+
+ // Return the new list of results.
+ return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
+ &ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
+}
+
+SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG,
+ unsigned CC) {
+ SDOperand Chain = Op.getOperand(0);
+ bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
+ bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
+ SDOperand Callee = Op.getOperand(4);
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+ CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_FastCall);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) {
+ // Make sure the instruction takes 8n+4 bytes to make sure the start of the
+ // arguments and the arguments after the retaddr has been pushed are aligned.
+ if ((NumBytes & 7) == 0)
+ NumBytes += 4;
+ }
+
+ Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
+
+ SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
+ SmallVector<SDOperand, 8> MemOpChains;
+
+ SDOperand StackPtr;
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
+ break;
+ }
+
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ assert(VA.isMemLoc());
+ if (StackPtr.Val == 0)
+ StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
+ SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy());
+ PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
+ }
+ }
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into registers.
+ SDOperand InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
+ InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress node (quite common, every direct call is)
+ // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ // We should use extra load for direct calls to dllimported functions in
+ // non-JIT mode.
+ if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
+ getTargetMachine(), true))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
+
+ // ELF / PIC requires GOT in the EBX register before function calls via PLT
+ // GOT pointer.
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ Subtarget->isPICStyleGOT()) {
+ Chain = DAG.getCopyToReg(Chain, X86::EBX,
+ DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SmallVector<SDOperand, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ // Add an implicit use GOT pointer in EBX.
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ Subtarget->isPICStyleGOT())
+ Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
+
+ if (InFlag.Val)
+ Ops.push_back(InFlag);
+
+ // FIXME: Do not generate X86ISD::TAILCALL for now.
+ Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
+ NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Returns a flag for retval copy to use.
+ NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ Ops.clear();
+ Ops.push_back(Chain);
+ Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
+ Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
+ Ops.push_back(InFlag);
+ Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
+}
+
+
+//===----------------------------------------------------------------------===//
+// X86-64 C Calling Convention implementation
+//===----------------------------------------------------------------------===//
+
+SDOperand
+X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ SDOperand Root = Op.getOperand(0);
+ bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
+
+ static const unsigned GPR64ArgRegs[] = {
+ X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
+ };
+ static const unsigned XMMArgRegs[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+ X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+ };
+
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg,
+ getTargetMachine(), ArgLocs);
+ CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_C);
+
+ SmallVector<SDOperand, 8> ArgValues;
+ unsigned LastVal = ~0U;
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
+ // places.
+ assert(VA.getValNo() != LastVal &&
+ "Don't support value assigned to multiple locs yet");
+ LastVal = VA.getValNo();
+
+ if (VA.isRegLoc()) {
+ MVT::ValueType RegVT = VA.getLocVT();
+ TargetRegisterClass *RC;
+ if (RegVT == MVT::i32)
+ RC = X86::GR32RegisterClass;
+ else if (RegVT == MVT::i64)
+ RC = X86::GR64RegisterClass;
+ else if (RegVT == MVT::f32)
+ RC = X86::FR32RegisterClass;
+ else if (RegVT == MVT::f64)
+ RC = X86::FR64RegisterClass;
+ else {
+ assert(MVT::isVector(RegVT));
+ if (MVT::getSizeInBits(RegVT) == 64) {
+ RC = X86::GR64RegisterClass; // MMX values are passed in GPRs.
+ RegVT = MVT::i64;
+ } else
+ RC = X86::VR128RegisterClass;
+ }
+
+ unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
+ SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT);
+
+ // If this is an 8 or 16-bit value, it is really passed promoted to 32
+ // bits. Insert an assert[sz]ext to capture this, then truncate to the
+ // right size.
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+
+ if (VA.getLocInfo() != CCValAssign::Full)
+ ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
+
+ // Handle MMX values passed in GPRs.
+ if (RegVT != VA.getLocVT() && RC == X86::GR64RegisterClass &&
+ MVT::getSizeInBits(RegVT) == 64)
+ ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue);
+
+ ArgValues.push_back(ArgValue);
+ } else {
+ assert(VA.isMemLoc());
+
+ // Create the nodes corresponding to a load from this parameter slot.
+ int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8,
+ VA.getLocMemOffset());
+ SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
+ ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0));
+ }
+ }
+
+ unsigned StackSize = CCInfo.getNextStackOffset();
+
+ // If the function takes variable number of arguments, make a frame index for
+ // the start of the first vararg value... for expansion of llvm.va_start.
+ if (isVarArg) {
+ unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6);
+ unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
+
+ // For X86-64, if there are vararg parameters that are passed via
+ // registers, then we must store them to their spots on the stack so they
+ // may be loaded by deferencing the result of va_next.
+ VarArgsGPOffset = NumIntRegs * 8;
+ VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16;
+ VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
+ RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16);
+
+ // Store the integer parameter registers.
+ SmallVector<SDOperand, 8> MemOps;
+ SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
+ SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
+ DAG.getConstant(VarArgsGPOffset, getPointerTy()));
+ for (; NumIntRegs != 6; ++NumIntRegs) {
+ unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs],
+ X86::GR64RegisterClass);
+ SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64);
+ SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
+ DAG.getConstant(8, getPointerTy()));
+ }
+
+ // Now store the XMM (fp + vector) parameter registers.
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
+ DAG.getConstant(VarArgsFPOffset, getPointerTy()));
+ for (; NumXMMRegs != 8; ++NumXMMRegs) {
+ unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs],
+ X86::VR128RegisterClass);
+ SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32);
+ SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
+ DAG.getConstant(16, getPointerTy()));
+ }
+ if (!MemOps.empty())
+ Root = DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &MemOps[0], MemOps.size());
+ }
+
+ ArgValues.push_back(Root);
+
+ ReturnAddrIndex = 0; // No return address slot generated yet.
+ BytesToPopOnReturn = 0; // Callee pops nothing.
+ BytesCallerReserves = StackSize;
+
+ // Return the new list of results.
+ return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
+ &ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
+}
+
+SDOperand
+X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG,
+ unsigned CC) {
+ SDOperand Chain = Op.getOperand(0);
+ bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
+ bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
+ SDOperand Callee = Op.getOperand(4);
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+ CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+ Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
+
+ SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
+ SmallVector<SDOperand, 8> MemOpChains;
+
+ SDOperand StackPtr;
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
+ break;
+ }
+
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ assert(VA.isMemLoc());
+ if (StackPtr.Val == 0)
+ StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
+ SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy());
+ PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
+ }
+ }
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into registers.
+ SDOperand InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
+ InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ if (isVarArg) {
+ // From AMD64 ABI document:
+ // For calls that may call functions that use varargs or stdargs
+ // (prototype-less calls or calls to functions containing ellipsis (...) in
+ // the declaration) %al is used as hidden argument to specify the number
+ // of SSE registers used. The contents of %al do not need to match exactly
+ // the number of registers, but must be an ubound on the number of SSE
+ // registers used and is in the range 0 - 8 inclusive.
+
+ // Count the number of XMM registers allocated.
+ static const unsigned XMMArgRegs[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+ X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+ };
+ unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
+
+ Chain = DAG.getCopyToReg(Chain, X86::AL,
+ DAG.getConstant(NumXMMRegs, MVT::i8), InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress node (quite common, every direct call is)
+ // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ // We should use extra load for direct calls to dllimported functions in
+ // non-JIT mode.
+ if (getTargetMachine().getCodeModel() != CodeModel::Large
+ && !Subtarget->GVRequiresExtraLoad(G->getGlobal(),
+ getTargetMachine(), true))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+ if (getTargetMachine().getCodeModel() != CodeModel::Large)
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
+
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SmallVector<SDOperand, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.Val)
+ Ops.push_back(InFlag);
+
+ // FIXME: Do not generate X86ISD::TAILCALL for now.
+ Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
+ NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Returns a flag for retval copy to use.
+ NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ Ops.clear();
+ Ops.push_back(Chain);
+ Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
+ Ops.push_back(DAG.getConstant(0, getPointerTy()));
+ Ops.push_back(InFlag);
+ Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Hooks
+//===----------------------------------------------------------------------===//
+
+
+SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
+ if (ReturnAddrIndex == 0) {
+ // Set up a frame object for the return address.
+ MachineFunction &MF = DAG.getMachineFunction();
+ if (Subtarget->is64Bit())
+ ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8);
+ else
+ ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4);
+ }
+
+ return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
+}
+
+
+
+/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86
+/// specific condition code. It returns a false if it cannot do a direct
+/// translation. X86CC is the translated CondCode. LHS/RHS are modified as
+/// needed.
+static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
+ unsigned &X86CC, SDOperand &LHS, SDOperand &RHS,
+ SelectionDAG &DAG) {
+ X86CC = X86::COND_INVALID;
+ if (!isFP) {
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
+ if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
+ // X > -1 -> X == 0, jump !sign.
+ RHS = DAG.getConstant(0, RHS.getValueType());
+ X86CC = X86::COND_NS;
+ return true;
+ } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
+ // X < 0 -> X == 0, jump on sign.
+ X86CC = X86::COND_S;
+ return true;
+ }
+ }
+
+ switch (SetCCOpcode) {
+ default: break;
+ case ISD::SETEQ: X86CC = X86::COND_E; break;
+ case ISD::SETGT: X86CC = X86::COND_G; break;
+ case ISD::SETGE: X86CC = X86::COND_GE; break;
+ case ISD::SETLT: X86CC = X86::COND_L; break;
+ case ISD::SETLE: X86CC = X86::COND_LE; break;
+ case ISD::SETNE: X86CC = X86::COND_NE; break;
+ case ISD::SETULT: X86CC = X86::COND_B; break;
+ case ISD::SETUGT: X86CC = X86::COND_A; break;
+ case ISD::SETULE: X86CC = X86::COND_BE; break;
+ case ISD::SETUGE: X86CC = X86::COND_AE; break;
+ }
+ } else {
+ // On a floating point condition, the flags are set as follows:
+ // ZF PF CF op
+ // 0 | 0 | 0 | X > Y
+ // 0 | 0 | 1 | X < Y
+ // 1 | 0 | 0 | X == Y
+ // 1 | 1 | 1 | unordered
+ bool Flip = false;
+ switch (SetCCOpcode) {
+ default: break;
+ case ISD::SETUEQ:
+ case ISD::SETEQ: X86CC = X86::COND_E; break;
+ case ISD::SETOLT: Flip = true; // Fallthrough
+ case ISD::SETOGT:
+ case ISD::SETGT: X86CC = X86::COND_A; break;
+ case ISD::SETOLE: Flip = true; // Fallthrough
+ case ISD::SETOGE:
+ case ISD::SETGE: X86CC = X86::COND_AE; break;
+ case ISD::SETUGT: Flip = true; // Fallthrough
+ case ISD::SETULT:
+ case ISD::SETLT: X86CC = X86::COND_B; break;
+ case ISD::SETUGE: Flip = true; // Fallthrough
+ case ISD::SETULE:
+ case ISD::SETLE: X86CC = X86::COND_BE; break;
+ case ISD::SETONE:
+ case ISD::SETNE: X86CC = X86::COND_NE; break;
+ case ISD::SETUO: X86CC = X86::COND_P; break;
+ case ISD::SETO: X86CC = X86::COND_NP; break;
+ }
+ if (Flip)
+ std::swap(LHS, RHS);
+ }
+
+ return X86CC != X86::COND_INVALID;
+}
+
+/// hasFPCMov - is there a floating point cmov for the specific X86 condition
+/// code. Current x86 isa includes the following FP cmov instructions:
+/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
+static bool hasFPCMov(unsigned X86CC) {
+ switch (X86CC) {
+ default:
+ return false;
+ case X86::COND_B:
+ case X86::COND_BE:
+ case X86::COND_E:
+ case X86::COND_P:
+ case X86::COND_A:
+ case X86::COND_AE:
+ case X86::COND_NE:
+ case X86::COND_NP:
+ return true;
+ }
+}
+
+/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return
+/// true if Op is undef or if its value falls within the specified range (L, H].
+static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) {
+ if (Op.getOpcode() == ISD::UNDEF)
+ return true;
+
+ unsigned Val = cast<ConstantSDNode>(Op)->getValue();
+ return (Val >= Low && Val < Hi);
+}
+
+/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return
+/// true if Op is undef or if its value equal to the specified value.
+static bool isUndefOrEqual(SDOperand Op, unsigned Val) {
+ if (Op.getOpcode() == ISD::UNDEF)
+ return true;
+ return cast<ConstantSDNode>(Op)->getValue() == Val;
+}
+
+/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to PSHUFD.
+bool X86::isPSHUFDMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+ if (N->getNumOperands() != 4)
+ return false;
+
+ // Check if the value doesn't reference the second vector.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDOperand Arg = N->getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
+ if (cast<ConstantSDNode>(Arg)->getValue() >= 4)
+ return false;
+ }
+
+ return true;
+}
+
+/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to PSHUFHW.
+bool X86::isPSHUFHWMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+ if (N->getNumOperands() != 8)
+ return false;
+
+ // Lower quadword copied in order.
+ for (unsigned i = 0; i != 4; ++i) {
+ SDOperand Arg = N->getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
+ if (cast<ConstantSDNode>(Arg)->getValue() != i)
+ return false;
+ }
+
+ // Upper quadword shuffled.
+ for (unsigned i = 4; i != 8; ++i) {
+ SDOperand Arg = N->getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
+ unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
+ if (Val < 4 || Val > 7)
+ return false;
+ }
+
+ return true;
+}
+
+/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to PSHUFLW.
+bool X86::isPSHUFLWMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+ if (N->getNumOperands() != 8)
+ return false;
+
+ // Upper quadword copied in order.
+ for (unsigned i = 4; i != 8; ++i)
+ if (!isUndefOrEqual(N->getOperand(i), i))
+ return false;
+
+ // Lower quadword shuffled.
+ for (unsigned i = 0; i != 4; ++i)
+ if (!isUndefOrInRange(N->getOperand(i), 0, 4))
+ return false;
+
+ return true;
+}
+
+/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to SHUFP*.
+static bool isSHUFPMask(const SDOperand *Elems, unsigned NumElems) {
+ if (NumElems != 2 && NumElems != 4) return false;
+
+ unsigned Half = NumElems / 2;
+ for (unsigned i = 0; i < Half; ++i)
+ if (!isUndefOrInRange(Elems[i], 0, NumElems))
+ return false;
+ for (unsigned i = Half; i < NumElems; ++i)
+ if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2))
+ return false;
+
+ return true;
+}
+
+bool X86::isSHUFPMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+ return ::isSHUFPMask(N->op_begin(), N->getNumOperands());
+}
+
+/// isCommutedSHUFP - Returns true if the shuffle mask is exactly
+/// the reverse of what x86 shuffles want. x86 shuffles requires the lower
+/// half elements to come from vector 1 (which would equal the dest.) and
+/// the upper half to come from vector 2.
+static bool isCommutedSHUFP(const SDOperand *Ops, unsigned NumOps) {
+ if (NumOps != 2 && NumOps != 4) return false;
+
+ unsigned Half = NumOps / 2;
+ for (unsigned i = 0; i < Half; ++i)
+ if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2))
+ return false;
+ for (unsigned i = Half; i < NumOps; ++i)
+ if (!isUndefOrInRange(Ops[i], 0, NumOps))
+ return false;
+ return true;
+}
+
+static bool isCommutedSHUFP(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+ return isCommutedSHUFP(N->op_begin(), N->getNumOperands());
+}
+
+/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
+bool X86::isMOVHLPSMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+ if (N->getNumOperands() != 4)
+ return false;
+
+ // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
+ return isUndefOrEqual(N->getOperand(0), 6) &&
+ isUndefOrEqual(N->getOperand(1), 7) &&
+ isUndefOrEqual(N->getOperand(2), 2) &&
+ isUndefOrEqual(N->getOperand(3), 3);
+}
+
+/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
+/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
+/// <2, 3, 2, 3>
+bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+ if (N->getNumOperands() != 4)
+ return false;
+
+ // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3
+ return isUndefOrEqual(N->getOperand(0), 2) &&
+ isUndefOrEqual(N->getOperand(1), 3) &&
+ isUndefOrEqual(N->getOperand(2), 2) &&
+ isUndefOrEqual(N->getOperand(3), 3);
+}
+
+/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
+bool X86::isMOVLPMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+ unsigned NumElems = N->getNumOperands();
+ if (NumElems != 2 && NumElems != 4)
+ return false;
+
+ for (unsigned i = 0; i < NumElems/2; ++i)
+ if (!isUndefOrEqual(N->getOperand(i), i + NumElems))
+ return false;
+
+ for (unsigned i = NumElems/2; i < NumElems; ++i)
+ if (!isUndefOrEqual(N->getOperand(i), i))
+ return false;
+
+ return true;
+}
+
+/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D}
+/// and MOVLHPS.
+bool X86::isMOVHPMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+ unsigned NumElems = N->getNumOperands();
+ if (NumElems != 2 && NumElems != 4)
+ return false;
+
+ for (unsigned i = 0; i < NumElems/2; ++i)
+ if (!isUndefOrEqual(N->getOperand(i), i))
+ return false;
+
+ for (unsigned i = 0; i < NumElems/2; ++i) {
+ SDOperand Arg = N->getOperand(i + NumElems/2);
+ if (!isUndefOrEqual(Arg, i + NumElems))
+ return false;
+ }
+
+ return true;
+}
+
+/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to UNPCKL.
+bool static isUNPCKLMask(const SDOperand *Elts, unsigned NumElts,
+ bool V2IsSplat = false) {
+ if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
+ return false;
+
+ for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) {
+ SDOperand BitI = Elts[i];
+ SDOperand BitI1 = Elts[i+1];
+ if (!isUndefOrEqual(BitI, j))
+ return false;
+ if (V2IsSplat) {
+ if (isUndefOrEqual(BitI1, NumElts))
+ return false;
+ } else {
+ if (!isUndefOrEqual(BitI1, j + NumElts))
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+ return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat);
+}
+
+/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to UNPCKH.
+bool static isUNPCKHMask(const SDOperand *Elts, unsigned NumElts,
+ bool V2IsSplat = false) {
+ if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
+ return false;
+
+ for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) {
+ SDOperand BitI = Elts[i];
+ SDOperand BitI1 = Elts[i+1];
+ if (!isUndefOrEqual(BitI, j + NumElts/2))
+ return false;
+ if (V2IsSplat) {
+ if (isUndefOrEqual(BitI1, NumElts))
+ return false;
+ } else {
+ if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts))
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+ return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat);
+}
+
+/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
+/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
+/// <0, 0, 1, 1>
+bool X86::isUNPCKL_v_undef_Mask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+ unsigned NumElems = N->getNumOperands();
+ if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
+ return false;
+
+ for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
+ SDOperand BitI = N->getOperand(i);
+ SDOperand BitI1 = N->getOperand(i+1);
+
+ if (!isUndefOrEqual(BitI, j))
+ return false;
+ if (!isUndefOrEqual(BitI1, j))
+ return false;
+ }
+
+ return true;
+}
+
+/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
+/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
+/// <2, 2, 3, 3>
+bool X86::isUNPCKH_v_undef_Mask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+ unsigned NumElems = N->getNumOperands();
+ if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
+ return false;
+
+ for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) {
+ SDOperand BitI = N->getOperand(i);
+ SDOperand BitI1 = N->getOperand(i + 1);
+
+ if (!isUndefOrEqual(BitI, j))
+ return false;
+ if (!isUndefOrEqual(BitI1, j))
+ return false;
+ }
+
+ return true;
+}
+
+/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVSS,
+/// MOVSD, and MOVD, i.e. setting the lowest element.
+static bool isMOVLMask(const SDOperand *Elts, unsigned NumElts) {
+ if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
+ return false;
+
+ if (!isUndefOrEqual(Elts[0], NumElts))
+ return false;
+
+ for (unsigned i = 1; i < NumElts; ++i) {
+ if (!isUndefOrEqual(Elts[i], i))
+ return false;
+ }
+
+ return true;
+}
+
+bool X86::isMOVLMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+ return ::isMOVLMask(N->op_begin(), N->getNumOperands());
+}
+
+/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
+/// of what x86 movss want. X86 movs requires the lowest element to be lowest
+/// element of vector 2 and the other elements to come from vector 1 in order.
+static bool isCommutedMOVL(const SDOperand *Ops, unsigned NumOps,
+ bool V2IsSplat = false,
+ bool V2IsUndef = false) {
+ if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
+ return false;
+
+ if (!isUndefOrEqual(Ops[0], 0))
+ return false;
+
+ for (unsigned i = 1; i < NumOps; ++i) {
+ SDOperand Arg = Ops[i];
+ if (!(isUndefOrEqual(Arg, i+NumOps) ||
+ (V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) ||
+ (V2IsSplat && isUndefOrEqual(Arg, NumOps))))
+ return false;
+ }
+
+ return true;
+}
+
+static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false,
+ bool V2IsUndef = false) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+ return isCommutedMOVL(N->op_begin(), N->getNumOperands(),
+ V2IsSplat, V2IsUndef);
+}
+
+/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
+bool X86::isMOVSHDUPMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+ if (N->getNumOperands() != 4)
+ return false;
+
+ // Expect 1, 1, 3, 3
+ for (unsigned i = 0; i < 2; ++i) {
+ SDOperand Arg = N->getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
+ unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
+ if (Val != 1) return false;
+ }
+
+ bool HasHi = false;
+ for (unsigned i = 2; i < 4; ++i) {
+ SDOperand Arg = N->getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
+ unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
+ if (Val != 3) return false;
+ HasHi = true;
+ }
+
+ // Don't use movshdup if it can be done with a shufps.
+ return HasHi;
+}
+
+/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
+bool X86::isMOVSLDUPMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+ if (N->getNumOperands() != 4)
+ return false;
+
+ // Expect 0, 0, 2, 2
+ for (unsigned i = 0; i < 2; ++i) {
+ SDOperand Arg = N->getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
+ unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
+ if (Val != 0) return false;
+ }
+
+ bool HasHi = false;
+ for (unsigned i = 2; i < 4; ++i) {
+ SDOperand Arg = N->getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
+ unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
+ if (Val != 2) return false;
+ HasHi = true;
+ }
+
+ // Don't use movshdup if it can be done with a shufps.
+ return HasHi;
+}
+
+/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a identity operation on the LHS or RHS.
+static bool isIdentityMask(SDNode *N, bool RHS = false) {
+ unsigned NumElems = N->getNumOperands();
+ for (unsigned i = 0; i < NumElems; ++i)
+ if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0)))
+ return false;
+ return true;
+}
+
+/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
+/// a splat of a single element.
+static bool isSplatMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+ // This is a splat operation if each element of the permute is the same, and
+ // if the value doesn't reference the second vector.
+ unsigned NumElems = N->getNumOperands();
+ SDOperand ElementBase;
+ unsigned i = 0;
+ for (; i != NumElems; ++i) {
+ SDOperand Elt = N->getOperand(i);
+ if (isa<ConstantSDNode>(Elt)) {
+ ElementBase = Elt;
+ break;
+ }
+ }
+
+ if (!ElementBase.Val)
+ return false;
+
+ for (; i != NumElems; ++i) {
+ SDOperand Arg = N->getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
+ if (Arg != ElementBase) return false;
+ }
+
+ // Make sure it is a splat of the first vector operand.
+ return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems;
+}
+
+/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
+/// a splat of a single element and it's a 2 or 4 element mask.
+bool X86::isSplatMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+ // We can only splat 64-bit, and 32-bit quantities with a single instruction.
+ if (N->getNumOperands() != 4 && N->getNumOperands() != 2)
+ return false;
+ return ::isSplatMask(N);
+}
+
+/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a splat of zero element.
+bool X86::isSplatLoMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+ for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
+ if (!isUndefOrEqual(N->getOperand(i), 0))
+ return false;
+ return true;
+}
+
+/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
+/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
+/// instructions.
+unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
+ unsigned NumOperands = N->getNumOperands();
+ unsigned Shift = (NumOperands == 4) ? 2 : 1;
+ unsigned Mask = 0;
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ unsigned Val = 0;
+ SDOperand Arg = N->getOperand(NumOperands-i-1);
+ if (Arg.getOpcode() != ISD::UNDEF)
+ Val = cast<ConstantSDNode>(Arg)->getValue();
+ if (Val >= NumOperands) Val -= NumOperands;
+ Mask |= Val;
+ if (i != NumOperands - 1)
+ Mask <<= Shift;
+ }
+
+ return Mask;
+}
+
+/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
+/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW
+/// instructions.
+unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
+ unsigned Mask = 0;
+ // 8 nodes, but we only care about the last 4.
+ for (unsigned i = 7; i >= 4; --i) {
+ unsigned Val = 0;
+ SDOperand Arg = N->getOperand(i);
+ if (Arg.getOpcode() != ISD::UNDEF)
+ Val = cast<ConstantSDNode>(Arg)->getValue();
+ Mask |= (Val - 4);
+ if (i != 4)
+ Mask <<= 2;
+ }
+
+ return Mask;
+}
+
+/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
+/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
+/// instructions.
+unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
+ unsigned Mask = 0;
+ // 8 nodes, but we only care about the first 4.
+ for (int i = 3; i >= 0; --i) {
+ unsigned Val = 0;
+ SDOperand Arg = N->getOperand(i);
+ if (Arg.getOpcode() != ISD::UNDEF)
+ Val = cast<ConstantSDNode>(Arg)->getValue();
+ Mask |= Val;
+ if (i != 0)
+ Mask <<= 2;
+ }
+
+ return Mask;
+}
+
+/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand
+/// specifies a 8 element shuffle that can be broken into a pair of
+/// PSHUFHW and PSHUFLW.
+static bool isPSHUFHW_PSHUFLWMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+ if (N->getNumOperands() != 8)
+ return false;
+
+ // Lower quadword shuffled.
+ for (unsigned i = 0; i != 4; ++i) {
+ SDOperand Arg = N->getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
+ unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
+ if (Val > 4)
+ return false;
+ }
+
+ // Upper quadword shuffled.
+ for (unsigned i = 4; i != 8; ++i) {
+ SDOperand Arg = N->getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
+ unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
+ if (Val < 4 || Val > 7)
+ return false;
+ }
+
+ return true;
+}
+
+/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as
+/// values in ther permute mask.
+static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1,
+ SDOperand &V2, SDOperand &Mask,
+ SelectionDAG &DAG) {
+ MVT::ValueType VT = Op.getValueType();
+ MVT::ValueType MaskVT = Mask.getValueType();
+ MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT);
+ unsigned NumElems = Mask.getNumOperands();
+ SmallVector<SDOperand, 8> MaskVec;
+
+ for (unsigned i = 0; i != NumElems; ++i) {
+ SDOperand Arg = Mask.getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) {
+ MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT));
+ continue;
+ }
+ assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
+ unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
+ if (Val < NumElems)
+ MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT));
+ else
+ MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT));
+ }
+
+ std::swap(V1, V2);
+ Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
+}
+
+/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
+/// match movhlps. The lower half elements should come from upper half of
+/// V1 (and in order), and the upper half elements should come from the upper
+/// half of V2 (and in order).
+static bool ShouldXformToMOVHLPS(SDNode *Mask) {
+ unsigned NumElems = Mask->getNumOperands();
+ if (NumElems != 4)
+ return false;
+ for (unsigned i = 0, e = 2; i != e; ++i)
+ if (!isUndefOrEqual(Mask->getOperand(i), i+2))
+ return false;
+ for (unsigned i = 2; i != 4; ++i)
+ if (!isUndefOrEqual(Mask->getOperand(i), i+4))
+ return false;
+ return true;
+}
+
+/// isScalarLoadToVector - Returns true if the node is a scalar load that
+/// is promoted to a vector.
+static inline bool isScalarLoadToVector(SDNode *N) {
+ if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ N = N->getOperand(0).Val;
+ return ISD::isNON_EXTLoad(N);
+ }
+ return false;
+}
+
+/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
+/// match movlp{s|d}. The lower half elements should come from lower half of
+/// V1 (and in order), and the upper half elements should come from the upper
+/// half of V2 (and in order). And since V1 will become the source of the
+/// MOVLP, it must be either a vector load or a scalar load to vector.
+static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) {
+ if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
+ return false;
+ // Is V2 is a vector load, don't do this transformation. We will try to use
+ // load folding shufps op.
+ if (ISD::isNON_EXTLoad(V2))
+ return false;
+
+ unsigned NumElems = Mask->getNumOperands();
+ if (NumElems != 2 && NumElems != 4)
+ return false;
+ for (unsigned i = 0, e = NumElems/2; i != e; ++i)
+ if (!isUndefOrEqual(Mask->getOperand(i), i))
+ return false;
+ for (unsigned i = NumElems/2; i != NumElems; ++i)
+ if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems))
+ return false;
+ return true;
+}
+
+/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
+/// all the same.
+static bool isSplatVector(SDNode *N) {
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+
+ SDOperand SplatValue = N->getOperand(0);
+ for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
+ if (N->getOperand(i) != SplatValue)
+ return false;
+ return true;
+}
+
+/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
+/// to an undef.
+static bool isUndefShuffle(SDNode *N) {
+ if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
+ return false;
+
+ SDOperand V1 = N->getOperand(0);
+ SDOperand V2 = N->getOperand(1);
+ SDOperand Mask = N->getOperand(2);
+ unsigned NumElems = Mask.getNumOperands();
+ for (unsigned i = 0; i != NumElems; ++i) {
+ SDOperand Arg = Mask.getOperand(i);
+ if (Arg.getOpcode() != ISD::UNDEF) {
+ unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
+ if (Val < NumElems && V1.getOpcode() != ISD::UNDEF)
+ return false;
+ else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF)
+ return false;
+ }
+ }
+ return true;
+}
+
+/// isZeroNode - Returns true if Elt is a constant zero or a floating point
+/// constant +0.0.
+static inline bool isZeroNode(SDOperand Elt) {
+ return ((isa<ConstantSDNode>(Elt) &&
+ cast<ConstantSDNode>(Elt)->getValue() == 0) ||
+ (isa<ConstantFPSDNode>(Elt) &&
+ cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0)));
+}
+
+/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
+/// to an zero vector.
+static bool isZeroShuffle(SDNode *N) {
+ if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
+ return false;
+
+ SDOperand V1 = N->getOperand(0);
+ SDOperand V2 = N->getOperand(1);
+ SDOperand Mask = N->getOperand(2);
+ unsigned NumElems = Mask.getNumOperands();
+ for (unsigned i = 0; i != NumElems; ++i) {
+ SDOperand Arg = Mask.getOperand(i);
+ if (Arg.getOpcode() != ISD::UNDEF) {
+ unsigned Idx = cast<ConstantSDNode>(Arg)->getValue();
+ if (Idx < NumElems) {
+ unsigned Opc = V1.Val->getOpcode();
+ if (Opc == ISD::UNDEF)
+ continue;
+ if (Opc != ISD::BUILD_VECTOR ||
+ !isZeroNode(V1.Val->getOperand(Idx)))
+ return false;
+ } else if (Idx >= NumElems) {
+ unsigned Opc = V2.Val->getOpcode();
+ if (Opc == ISD::UNDEF)
+ continue;
+ if (Opc != ISD::BUILD_VECTOR ||
+ !isZeroNode(V2.Val->getOperand(Idx - NumElems)))
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+/// getZeroVector - Returns a vector of specified type with all zero elements.
+///
+static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) {
+ assert(MVT::isVector(VT) && "Expected a vector type");
+ unsigned NumElems = MVT::getVectorNumElements(VT);
+ MVT::ValueType EVT = MVT::getVectorElementType(VT);
+ bool isFP = MVT::isFloatingPoint(EVT);
+ SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT);
+ SmallVector<SDOperand, 8> ZeroVec(NumElems, Zero);
+ return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size());
+}
+
+/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
+/// that point to V2 points to its first element.
+static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) {
+ assert(Mask.getOpcode() == ISD::BUILD_VECTOR);
+
+ bool Changed = false;
+ SmallVector<SDOperand, 8> MaskVec;
+ unsigned NumElems = Mask.getNumOperands();
+ for (unsigned i = 0; i != NumElems; ++i) {
+ SDOperand Arg = Mask.getOperand(i);
+ if (Arg.getOpcode() != ISD::UNDEF) {
+ unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
+ if (Val > NumElems) {
+ Arg = DAG.getConstant(NumElems, Arg.getValueType());
+ Changed = true;
+ }
+ }
+ MaskVec.push_back(Arg);
+ }
+
+ if (Changed)
+ Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(),
+ &MaskVec[0], MaskVec.size());
+ return Mask;
+}
+
+/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
+/// operation of specified width.
+static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) {
+ MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
+ MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
+
+ SmallVector<SDOperand, 8> MaskVec;
+ MaskVec.push_back(DAG.getConstant(NumElems, BaseVT));
+ for (unsigned i = 1; i != NumElems; ++i)
+ MaskVec.push_back(DAG.getConstant(i, BaseVT));
+ return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
+}
+
+/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation
+/// of specified width.
+static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) {
+ MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
+ MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
+ SmallVector<SDOperand, 8> MaskVec;
+ for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
+ MaskVec.push_back(DAG.getConstant(i, BaseVT));
+ MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT));
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
+}
+
+/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation
+/// of specified width.
+static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) {
+ MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
+ MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
+ unsigned Half = NumElems/2;
+ SmallVector<SDOperand, 8> MaskVec;
+ for (unsigned i = 0; i != Half; ++i) {
+ MaskVec.push_back(DAG.getConstant(i + Half, BaseVT));
+ MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT));
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
+}
+
+/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32.
+///
+static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) {
+ SDOperand V1 = Op.getOperand(0);
+ SDOperand Mask = Op.getOperand(2);
+ MVT::ValueType VT = Op.getValueType();
+ unsigned NumElems = Mask.getNumOperands();
+ Mask = getUnpacklMask(NumElems, DAG);
+ while (NumElems != 4) {
+ V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask);
+ NumElems >>= 1;
+ }
+ V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1);
+
+ MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
+ Mask = getZeroVector(MaskVT, DAG);
+ SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1,
+ DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask);
+ return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
+}
+
+/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
+/// vector of zero or undef vector.
+static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT,
+ unsigned NumElems, unsigned Idx,
+ bool isZero, SelectionDAG &DAG) {
+ SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT);
+ MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
+ MVT::ValueType EVT = MVT::getVectorElementType(MaskVT);
+ SDOperand Zero = DAG.getConstant(0, EVT);
+ SmallVector<SDOperand, 8> MaskVec(NumElems, Zero);
+ MaskVec[Idx] = DAG.getConstant(NumElems, EVT);
+ SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
+ &MaskVec[0], MaskVec.size());
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
+}
+
+/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
+///
+static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros,
+ unsigned NumNonZero, unsigned NumZero,
+ SelectionDAG &DAG, TargetLowering &TLI) {
+ if (NumNonZero > 8)
+ return SDOperand();
+
+ SDOperand V(0, 0);
+ bool First = true;
+ for (unsigned i = 0; i < 16; ++i) {
+ bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
+ if (ThisIsNonZero && First) {
+ if (NumZero)
+ V = getZeroVector(MVT::v8i16, DAG);
+ else
+ V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
+ First = false;
+ }
+
+ if ((i & 1) != 0) {
+ SDOperand ThisElt(0, 0), LastElt(0, 0);
+ bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
+ if (LastIsNonZero) {
+ LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1));
+ }
+ if (ThisIsNonZero) {
+ ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i));
+ ThisElt = DAG.getNode(ISD::SHL, MVT::i16,
+ ThisElt, DAG.getConstant(8, MVT::i8));
+ if (LastIsNonZero)
+ ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt);
+ } else
+ ThisElt = LastElt;
+
+ if (ThisElt.Val)
+ V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt,
+ DAG.getConstant(i/2, TLI.getPointerTy()));
+ }
+ }
+
+ return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V);
+}
+
+/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
+///
+static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros,
+ unsigned NumNonZero, unsigned NumZero,
+ SelectionDAG &DAG, TargetLowering &TLI) {
+ if (NumNonZero > 4)
+ return SDOperand();
+
+ SDOperand V(0, 0);
+ bool First = true;
+ for (unsigned i = 0; i < 8; ++i) {
+ bool isNonZero = (NonZeros & (1 << i)) != 0;
+ if (isNonZero) {
+ if (First) {
+ if (NumZero)
+ V = getZeroVector(MVT::v8i16, DAG);
+ else
+ V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
+ First = false;
+ }
+ V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i),
+ DAG.getConstant(i, TLI.getPointerTy()));
+ }
+ }
+
+ return V;
+}
+
+SDOperand
+X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
+ // All zero's are handled with pxor.
+ if (ISD::isBuildVectorAllZeros(Op.Val))
+ return Op;
+
+ // All one's are handled with pcmpeqd.
+ if (ISD::isBuildVectorAllOnes(Op.Val))
+ return Op;
+
+ MVT::ValueType VT = Op.getValueType();
+ MVT::ValueType EVT = MVT::getVectorElementType(VT);
+ unsigned EVTBits = MVT::getSizeInBits(EVT);
+
+ unsigned NumElems = Op.getNumOperands();
+ unsigned NumZero = 0;
+ unsigned NumNonZero = 0;
+ unsigned NonZeros = 0;
+ std::set<SDOperand> Values;
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDOperand Elt = Op.getOperand(i);
+ if (Elt.getOpcode() != ISD::UNDEF) {
+ Values.insert(Elt);
+ if (isZeroNode(Elt))
+ NumZero++;
+ else {
+ NonZeros |= (1 << i);
+ NumNonZero++;
+ }
+ }
+ }
+
+ if (NumNonZero == 0) {
+ if (NumZero == 0)
+ // All undef vector. Return an UNDEF.
+ return DAG.getNode(ISD::UNDEF, VT);
+ else
+ // A mix of zero and undef. Return a zero vector.
+ return getZeroVector(VT, DAG);
+ }
+
+ // Splat is obviously ok. Let legalizer expand it to a shuffle.
+ if (Values.size() == 1)
+ return SDOperand();
+
+ // Special case for single non-zero element.
+ if (NumNonZero == 1) {
+ unsigned Idx = CountTrailingZeros_32(NonZeros);
+ SDOperand Item = Op.getOperand(Idx);
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
+ if (Idx == 0)
+ // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
+ return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx,
+ NumZero > 0, DAG);
+
+ if (EVTBits == 32) {
+ // Turn it into a shuffle of zero and zero-extended scalar to vector.
+ Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0,
+ DAG);
+ MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
+ MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
+ SmallVector<SDOperand, 8> MaskVec;
+ for (unsigned i = 0; i < NumElems; i++)
+ MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT));
+ SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
+ &MaskVec[0], MaskVec.size());
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item,
+ DAG.getNode(ISD::UNDEF, VT), Mask);
+ }
+ }
+
+ // Let legalizer expand 2-wide build_vectors.
+ if (EVTBits == 64)
+ return SDOperand();
+
+ // If element VT is < 32 bits, convert it to inserts into a zero vector.
+ if (EVTBits == 8 && NumElems == 16) {
+ SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
+ *this);
+ if (V.Val) return V;
+ }
+
+ if (EVTBits == 16 && NumElems == 8) {
+ SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
+ *this);
+ if (V.Val) return V;
+ }
+
+ // If element VT is == 32 bits, turn it into a number of shuffles.
+ SmallVector<SDOperand, 8> V;
+ V.resize(NumElems);
+ if (NumElems == 4 && NumZero > 0) {
+ for (unsigned i = 0; i < 4; ++i) {
+ bool isZero = !(NonZeros & (1 << i));
+ if (isZero)
+ V[i] = getZeroVector(VT, DAG);
+ else
+ V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
+ }
+
+ for (unsigned i = 0; i < 2; ++i) {
+ switch ((NonZeros & (0x3 << i*2)) >> (i*2)) {
+ default: break;
+ case 0:
+ V[i] = V[i*2]; // Must be a zero vector.
+ break;
+ case 1:
+ V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2],
+ getMOVLMask(NumElems, DAG));
+ break;
+ case 2:
+ V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
+ getMOVLMask(NumElems, DAG));
+ break;
+ case 3:
+ V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
+ getUnpacklMask(NumElems, DAG));
+ break;
+ }
+ }
+
+ // Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd)
+ // clears the upper bits.
+ // FIXME: we can do the same for v4f32 case when we know both parts of
+ // the lower half come from scalar_to_vector (loadf32). We should do
+ // that in post legalizer dag combiner with target specific hooks.
+ if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0)
+ return V[0];
+ MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
+ MVT::ValueType EVT = MVT::getVectorElementType(MaskVT);
+ SmallVector<SDOperand, 8> MaskVec;
+ bool Reverse = (NonZeros & 0x3) == 2;
+ for (unsigned i = 0; i < 2; ++i)
+ if (Reverse)
+ MaskVec.push_back(DAG.getConstant(1-i, EVT));
+ else
+ MaskVec.push_back(DAG.getConstant(i, EVT));
+ Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2;
+ for (unsigned i = 0; i < 2; ++i)
+ if (Reverse)
+ MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT));
+ else
+ MaskVec.push_back(DAG.getConstant(i+NumElems, EVT));
+ SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
+ &MaskVec[0], MaskVec.size());
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask);
+ }
+
+ if (Values.size() > 2) {
+ // Expand into a number of unpckl*.
+ // e.g. for v4f32
+ // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
+ // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
+ // Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
+ SDOperand UnpckMask = getUnpacklMask(NumElems, DAG);
+ for (unsigned i = 0; i < NumElems; ++i)
+ V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
+ NumElems >>= 1;
+ while (NumElems != 0) {
+ for (unsigned i = 0; i < NumElems; ++i)
+ V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems],
+ UnpckMask);
+ NumElems >>= 1;
+ }
+ return V[0];
+ }
+
+ return SDOperand();
+}
+
+SDOperand
+X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
+ SDOperand V1 = Op.getOperand(0);
+ SDOperand V2 = Op.getOperand(1);
+ SDOperand PermMask = Op.getOperand(2);
+ MVT::ValueType VT = Op.getValueType();
+ unsigned NumElems = PermMask.getNumOperands();
+ bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
+ bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
+ bool V1IsSplat = false;
+ bool V2IsSplat = false;
+
+ if (isUndefShuffle(Op.Val))
+ return DAG.getNode(ISD::UNDEF, VT);
+
+ if (isZeroShuffle(Op.Val))
+ return getZeroVector(VT, DAG);
+
+ if (isIdentityMask(PermMask.Val))
+ return V1;
+ else if (isIdentityMask(PermMask.Val, true))
+ return V2;
+
+ if (isSplatMask(PermMask.Val)) {
+ if (NumElems <= 4) return Op;
+ // Promote it to a v4i32 splat.
+ return PromoteSplat(Op, DAG);
+ }
+
+ if (X86::isMOVLMask(PermMask.Val))
+ return (V1IsUndef) ? V2 : Op;
+
+ if (X86::isMOVSHDUPMask(PermMask.Val) ||
+ X86::isMOVSLDUPMask(PermMask.Val) ||
+ X86::isMOVHLPSMask(PermMask.Val) ||
+ X86::isMOVHPMask(PermMask.Val) ||
+ X86::isMOVLPMask(PermMask.Val))
+ return Op;
+
+ if (ShouldXformToMOVHLPS(PermMask.Val) ||
+ ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val))
+ return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
+
+ bool Commuted = false;
+ V1IsSplat = isSplatVector(V1.Val);
+ V2IsSplat = isSplatVector(V2.Val);
+ if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) {
+ Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
+ std::swap(V1IsSplat, V2IsSplat);
+ std::swap(V1IsUndef, V2IsUndef);
+ Commuted = true;
+ }
+
+ if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) {
+ if (V2IsUndef) return V1;
+ Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
+ if (V2IsSplat) {
+ // V2 is a splat, so the mask may be malformed. That is, it may point
+ // to any V2 element. The instruction selectior won't like this. Get
+ // a corrected mask and commute to form a proper MOVS{S|D}.
+ SDOperand NewMask = getMOVLMask(NumElems, DAG);
+ if (NewMask.Val != PermMask.Val)
+ Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
+ }
+ return Op;
+ }
+
+ if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) ||
+ X86::isUNPCKH_v_undef_Mask(PermMask.Val) ||
+ X86::isUNPCKLMask(PermMask.Val) ||
+ X86::isUNPCKHMask(PermMask.Val))
+ return Op;
+
+ if (V2IsSplat) {
+ // Normalize mask so all entries that point to V2 points to its first
+ // element then try to match unpck{h|l} again. If match, return a
+ // new vector_shuffle with the corrected mask.
+ SDOperand NewMask = NormalizeMask(PermMask, DAG);
+ if (NewMask.Val != PermMask.Val) {
+ if (X86::isUNPCKLMask(PermMask.Val, true)) {
+ SDOperand NewMask = getUnpacklMask(NumElems, DAG);
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
+ } else if (X86::isUNPCKHMask(PermMask.Val, true)) {
+ SDOperand NewMask = getUnpackhMask(NumElems, DAG);
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
+ }
+ }
+ }
+
+ // Normalize the node to match x86 shuffle ops if needed
+ if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.Val))
+ Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
+
+ if (Commuted) {
+ // Commute is back and try unpck* again.
+ Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
+ if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) ||
+ X86::isUNPCKH_v_undef_Mask(PermMask.Val) ||
+ X86::isUNPCKLMask(PermMask.Val) ||
+ X86::isUNPCKHMask(PermMask.Val))
+ return Op;
+ }
+
+ // If VT is integer, try PSHUF* first, then SHUFP*.
+ if (MVT::isInteger(VT)) {
+ if (X86::isPSHUFDMask(PermMask.Val) ||
+ X86::isPSHUFHWMask(PermMask.Val) ||
+ X86::isPSHUFLWMask(PermMask.Val)) {
+ if (V2.getOpcode() != ISD::UNDEF)
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
+ DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
+ return Op;
+ }
+
+ if (X86::isSHUFPMask(PermMask.Val) &&
+ MVT::getSizeInBits(VT) != 64) // Don't do this for MMX.
+ return Op;
+
+ // Handle v8i16 shuffle high / low shuffle node pair.
+ if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) {
+ MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
+ MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
+ SmallVector<SDOperand, 8> MaskVec;
+ for (unsigned i = 0; i != 4; ++i)
+ MaskVec.push_back(PermMask.getOperand(i));
+ for (unsigned i = 4; i != 8; ++i)
+ MaskVec.push_back(DAG.getConstant(i, BaseVT));
+ SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
+ &MaskVec[0], MaskVec.size());
+ V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
+ MaskVec.clear();
+ for (unsigned i = 0; i != 4; ++i)
+ MaskVec.push_back(DAG.getConstant(i, BaseVT));
+ for (unsigned i = 4; i != 8; ++i)
+ MaskVec.push_back(PermMask.getOperand(i));
+ Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size());
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
+ }
+ } else {
+ // Floating point cases in the other order.
+ if (X86::isSHUFPMask(PermMask.Val))
+ return Op;
+ if (X86::isPSHUFDMask(PermMask.Val) ||
+ X86::isPSHUFHWMask(PermMask.Val) ||
+ X86::isPSHUFLWMask(PermMask.Val)) {
+ if (V2.getOpcode() != ISD::UNDEF)
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
+ DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
+ return Op;
+ }
+ }
+
+ if (NumElems == 4 &&
+ // Don't do this for MMX.
+ MVT::getSizeInBits(VT) != 64) {
+ MVT::ValueType MaskVT = PermMask.getValueType();
+ MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
+ SmallVector<std::pair<int, int>, 8> Locs;
+ Locs.reserve(NumElems);
+ SmallVector<SDOperand, 8> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
+ SmallVector<SDOperand, 8> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
+ unsigned NumHi = 0;
+ unsigned NumLo = 0;
+ // If no more than two elements come from either vector. This can be
+ // implemented with two shuffles. First shuffle gather the elements.
+ // The second shuffle, which takes the first shuffle as both of its
+ // vector operands, put the elements into the right order.
+ for (unsigned i = 0; i != NumElems; ++i) {
+ SDOperand Elt = PermMask.getOperand(i);
+ if (Elt.getOpcode() == ISD::UNDEF) {
+ Locs[i] = std::make_pair(-1, -1);
+ } else {
+ unsigned Val = cast<ConstantSDNode>(Elt)->getValue();
+ if (Val < NumElems) {
+ Locs[i] = std::make_pair(0, NumLo);
+ Mask1[NumLo] = Elt;
+ NumLo++;
+ } else {
+ Locs[i] = std::make_pair(1, NumHi);
+ if (2+NumHi < NumElems)
+ Mask1[2+NumHi] = Elt;
+ NumHi++;
+ }
+ }
+ }
+ if (NumLo <= 2 && NumHi <= 2) {
+ V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
+ &Mask1[0], Mask1.size()));
+ for (unsigned i = 0; i != NumElems; ++i) {
+ if (Locs[i].first == -1)
+ continue;
+ else {
+ unsigned Idx = (i < NumElems/2) ? 0 : NumElems;
+ Idx += Locs[i].first * (NumElems/2) + Locs[i].second;
+ Mask2[i] = DAG.getConstant(Idx, MaskEVT);
+ }
+ }
+
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1,
+ DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
+ &Mask2[0], Mask2.size()));
+ }
+
+ // Break it into (shuffle shuffle_hi, shuffle_lo).
+ Locs.clear();
+ SmallVector<SDOperand,8> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
+ SmallVector<SDOperand,8> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
+ SmallVector<SDOperand,8> *MaskPtr = &LoMask;
+ unsigned MaskIdx = 0;
+ unsigned LoIdx = 0;
+ unsigned HiIdx = NumElems/2;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ if (i == NumElems/2) {
+ MaskPtr = &HiMask;
+ MaskIdx = 1;
+ LoIdx = 0;
+ HiIdx = NumElems/2;
+ }
+ SDOperand Elt = PermMask.getOperand(i);
+ if (Elt.getOpcode() == ISD::UNDEF) {
+ Locs[i] = std::make_pair(-1, -1);
+ } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) {
+ Locs[i] = std::make_pair(MaskIdx, LoIdx);
+ (*MaskPtr)[LoIdx] = Elt;
+ LoIdx++;
+ } else {
+ Locs[i] = std::make_pair(MaskIdx, HiIdx);
+ (*MaskPtr)[HiIdx] = Elt;
+ HiIdx++;
+ }
+ }
+
+ SDOperand LoShuffle =
+ DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
+ &LoMask[0], LoMask.size()));
+ SDOperand HiShuffle =
+ DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
+ &HiMask[0], HiMask.size()));
+ SmallVector<SDOperand, 8> MaskOps;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ if (Locs[i].first == -1) {
+ MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
+ } else {
+ unsigned Idx = Locs[i].first * NumElems + Locs[i].second;
+ MaskOps.push_back(DAG.getConstant(Idx, MaskEVT));
+ }
+ }
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle,
+ DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
+ &MaskOps[0], MaskOps.size()));
+ }
+
+ return SDOperand();
+}
+
+SDOperand
+X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
+ if (!isa<ConstantSDNode>(Op.getOperand(1)))
+ return SDOperand();
+
+ MVT::ValueType VT = Op.getValueType();
+ // TODO: handle v16i8.
+ if (MVT::getSizeInBits(VT) == 16) {
+ // Transform it so it match pextrw which produces a 32-bit result.
+ MVT::ValueType EVT = (MVT::ValueType)(VT+1);
+ SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
+ Op.getOperand(0), Op.getOperand(1));
+ SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract,
+ DAG.getValueType(VT));
+ return DAG.getNode(ISD::TRUNCATE, VT, Assert);
+ } else if (MVT::getSizeInBits(VT) == 32) {
+ SDOperand Vec = Op.getOperand(0);
+ unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
+ if (Idx == 0)
+ return Op;
+ // SHUFPS the element to the lowest double word, then movss.
+ MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
+ SmallVector<SDOperand, 8> IdxVec;
+ IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT)));
+ IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
+ IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
+ IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
+ SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
+ &IdxVec[0], IdxVec.size());
+ Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
+ Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
+ DAG.getConstant(0, getPointerTy()));
+ } else if (MVT::getSizeInBits(VT) == 64) {
+ SDOperand Vec = Op.getOperand(0);
+ unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
+ if (Idx == 0)
+ return Op;
+
+ // UNPCKHPD the element to the lowest double word, then movsd.
+ // Note if the lower 64 bits of the result of the UNPCKHPD is then stored
+ // to a f64mem, the whole operation is folded into a single MOVHPDmr.
+ MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
+ SmallVector<SDOperand, 8> IdxVec;
+ IdxVec.push_back(DAG.getConstant(1, MVT::getVectorElementType(MaskVT)));
+ IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
+ SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
+ &IdxVec[0], IdxVec.size());
+ Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
+ Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
+ DAG.getConstant(0, getPointerTy()));
+ }
+
+ return SDOperand();
+}
+
+SDOperand
+X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
+ // Transform it so it match pinsrw which expects a 16-bit value in a GR32
+ // as its second argument.
+ MVT::ValueType VT = Op.getValueType();
+ MVT::ValueType BaseVT = MVT::getVectorElementType(VT);
+ SDOperand N0 = Op.getOperand(0);
+ SDOperand N1 = Op.getOperand(1);
+ SDOperand N2 = Op.getOperand(2);
+ if (MVT::getSizeInBits(BaseVT) == 16) {
+ if (N1.getValueType() != MVT::i32)
+ N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
+ if (N2.getValueType() != MVT::i32)
+ N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(),getPointerTy());
+ return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2);
+ } else if (MVT::getSizeInBits(BaseVT) == 32) {
+ unsigned Idx = cast<ConstantSDNode>(N2)->getValue();
+ if (Idx == 0) {
+ // Use a movss.
+ N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1);
+ MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
+ MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
+ SmallVector<SDOperand, 8> MaskVec;
+ MaskVec.push_back(DAG.getConstant(4, BaseVT));
+ for (unsigned i = 1; i <= 3; ++i)
+ MaskVec.push_back(DAG.getConstant(i, BaseVT));
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1,
+ DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
+ &MaskVec[0], MaskVec.size()));
+ } else {
+ // Use two pinsrw instructions to insert a 32 bit value.
+ Idx <<= 1;
+ if (MVT::isFloatingPoint(N1.getValueType())) {
+ if (ISD::isNON_EXTLoad(N1.Val)) {
+ // Just load directly from f32mem to GR32.
+ LoadSDNode *LD = cast<LoadSDNode>(N1);
+ N1 = DAG.getLoad(MVT::i32, LD->getChain(), LD->getBasePtr(),
+ LD->getSrcValue(), LD->getSrcValueOffset());
+ } else {
+ N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1);
+ N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1);
+ N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1,
+ DAG.getConstant(0, getPointerTy()));
+ }
+ }
+ N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0);
+ N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
+ DAG.getConstant(Idx, getPointerTy()));
+ N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8));
+ N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
+ DAG.getConstant(Idx+1, getPointerTy()));
+ return DAG.getNode(ISD::BIT_CONVERT, VT, N0);
+ }
+ }
+
+ return SDOperand();
+}
+
+SDOperand
+X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
+ SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0));
+ return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt);
+}
+
+// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
+// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
+// one of the above mentioned nodes. It has to be wrapped because otherwise
+// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
+// be used to form addressing mode. These wrapped nodes will be selected
+// into MOV32ri.
+SDOperand
+X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ SDOperand Result = DAG.getTargetConstantPool(CP->getConstVal(),
+ getPointerTy(),
+ CP->getAlignment());
+ Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
+ // With PIC, the address is actually $g + Offset.
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ !Subtarget->isPICStyleRIPRel()) {
+ Result = DAG.getNode(ISD::ADD, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
+ Result);
+ }
+
+ return Result;
+}
+
+SDOperand
+X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {
+ GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ SDOperand Result = DAG.getTargetGlobalAddress(GV, getPointerTy());
+ Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
+ // With PIC, the address is actually $g + Offset.
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ !Subtarget->isPICStyleRIPRel()) {
+ Result = DAG.getNode(ISD::ADD, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
+ Result);
+ }
+
+ // For Darwin & Mingw32, external and weak symbols are indirect, so we want to
+ // load the value at address GV, not the value of GV itself. This means that
+ // the GlobalAddress must be in the base or index register of the address, not
+ // the GV offset field. Platform check is inside GVRequiresExtraLoad() call
+ // The same applies for external symbols during PIC codegen
+ if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false))
+ Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0);
+
+ return Result;
+}
+
+// Lower ISD::GlobalTLSAddress using the "general dynamic" model
+static SDOperand
+LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+ const MVT::ValueType PtrVT) {
+ SDOperand InFlag;
+ SDOperand Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX,
+ DAG.getNode(X86ISD::GlobalBaseReg,
+ PtrVT), InFlag);
+ InFlag = Chain.getValue(1);
+
+ // emit leal symbol@TLSGD(,%ebx,1), %eax
+ SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag);
+ SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
+ GA->getValueType(0),
+ GA->getOffset());
+ SDOperand Ops[] = { Chain, TGA, InFlag };
+ SDOperand Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 3);
+ InFlag = Result.getValue(2);
+ Chain = Result.getValue(1);
+
+ // call ___tls_get_addr. This function receives its argument in
+ // the register EAX.
+ Chain = DAG.getCopyToReg(Chain, X86::EAX, Result, InFlag);
+ InFlag = Chain.getValue(1);
+
+ NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDOperand Ops1[] = { Chain,
+ DAG.getTargetExternalSymbol("___tls_get_addr",
+ PtrVT),
+ DAG.getRegister(X86::EAX, PtrVT),
+ DAG.getRegister(X86::EBX, PtrVT),
+ InFlag };
+ Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 5);
+ InFlag = Chain.getValue(1);
+
+ return DAG.getCopyFromReg(Chain, X86::EAX, PtrVT, InFlag);
+}
+
+// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or
+// "local exec" model.
+static SDOperand
+LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+ const MVT::ValueType PtrVT) {
+ // Get the Thread Pointer
+ SDOperand ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER, PtrVT);
+ // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
+ // exec)
+ SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
+ GA->getValueType(0),
+ GA->getOffset());
+ SDOperand Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA);
+
+ if (GA->getGlobal()->isDeclaration()) // initial exec TLS model
+ Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset, NULL, 0);
+
+ // The address of the thread local variable is the add of the thread
+ // pointer with the offset of the variable.
+ return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset);
+}
+
+SDOperand
+X86TargetLowering::LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) {
+ // TODO: implement the "local dynamic" model
+ // TODO: implement the "initial exec"model for pic executables
+ assert(!Subtarget->is64Bit() && Subtarget->isTargetELF() &&
+ "TLS not implemented for non-ELF and 64-bit targets");
+ GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ // If the relocation model is PIC, use the "General Dynamic" TLS Model,
+ // otherwise use the "Local Exec"TLS Model
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
+ return LowerToTLSGeneralDynamicModel(GA, DAG, getPointerTy());
+ else
+ return LowerToTLSExecModel(GA, DAG, getPointerTy());
+}
+
+SDOperand
+X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) {
+ const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
+ SDOperand Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
+ Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
+ // With PIC, the address is actually $g + Offset.
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ !Subtarget->isPICStyleRIPRel()) {
+ Result = DAG.getNode(ISD::ADD, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
+ Result);
+ }
+
+ return Result;
+}
+
+SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) {
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ SDOperand Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy());
+ Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
+ // With PIC, the address is actually $g + Offset.
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ !Subtarget->isPICStyleRIPRel()) {
+ Result = DAG.getNode(ISD::ADD, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
+ Result);
+ }
+
+ return Result;
+}
+
+SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) {
+ assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
+ "Not an i64 shift!");
+ bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
+ SDOperand ShOpLo = Op.getOperand(0);
+ SDOperand ShOpHi = Op.getOperand(1);
+ SDOperand ShAmt = Op.getOperand(2);
+ SDOperand Tmp1 = isSRA ?
+ DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) :
+ DAG.getConstant(0, MVT::i32);
+
+ SDOperand Tmp2, Tmp3;
+ if (Op.getOpcode() == ISD::SHL_PARTS) {
+ Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt);
+ Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt);
+ } else {
+ Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt);
+ Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt);
+ }
+
+ const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
+ SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt,
+ DAG.getConstant(32, MVT::i8));
+ SDOperand COps[]={DAG.getEntryNode(), AndNode, DAG.getConstant(0, MVT::i8)};
+ SDOperand InFlag = DAG.getNode(X86ISD::CMP, VTs, 2, COps, 3).getValue(1);
+
+ SDOperand Hi, Lo;
+ SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+
+ VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag);
+ SmallVector<SDOperand, 4> Ops;
+ if (Op.getOpcode() == ISD::SHL_PARTS) {
+ Ops.push_back(Tmp2);
+ Ops.push_back(Tmp3);
+ Ops.push_back(CC);
+ Ops.push_back(InFlag);
+ Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
+ InFlag = Hi.getValue(1);
+
+ Ops.clear();
+ Ops.push_back(Tmp3);
+ Ops.push_back(Tmp1);
+ Ops.push_back(CC);
+ Ops.push_back(InFlag);
+ Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
+ } else {
+ Ops.push_back(Tmp2);
+ Ops.push_back(Tmp3);
+ Ops.push_back(CC);
+ Ops.push_back(InFlag);
+ Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
+ InFlag = Lo.getValue(1);
+
+ Ops.clear();
+ Ops.push_back(Tmp3);
+ Ops.push_back(Tmp1);
+ Ops.push_back(CC);
+ Ops.push_back(InFlag);
+ Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
+ }
+
+ VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32);
+ Ops.clear();
+ Ops.push_back(Lo);
+ Ops.push_back(Hi);
+ return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size());
+}
+
+SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
+ assert(Op.getOperand(0).getValueType() <= MVT::i64 &&
+ Op.getOperand(0).getValueType() >= MVT::i16 &&
+ "Unknown SINT_TO_FP to lower!");
+
+ SDOperand Result;
+ MVT::ValueType SrcVT = Op.getOperand(0).getValueType();
+ unsigned Size = MVT::getSizeInBits(SrcVT)/8;
+ MachineFunction &MF = DAG.getMachineFunction();
+ int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
+ SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+ SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0),
+ StackSlot, NULL, 0);
+
+ // Build the FILD
+ SDVTList Tys;
+ if (X86ScalarSSE)
+ Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
+ else
+ Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
+ SmallVector<SDOperand, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(StackSlot);
+ Ops.push_back(DAG.getValueType(SrcVT));
+ Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD,
+ Tys, &Ops[0], Ops.size());
+
+ if (X86ScalarSSE) {
+ Chain = Result.getValue(1);
+ SDOperand InFlag = Result.getValue(2);
+
+ // FIXME: Currently the FST is flagged to the FILD_FLAG. This
+ // shouldn't be necessary except that RFP cannot be live across
+ // multiple blocks. When stackifier is fixed, they can be uncoupled.
+ MachineFunction &MF = DAG.getMachineFunction();
+ int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
+ SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+ Tys = DAG.getVTList(MVT::Other);
+ SmallVector<SDOperand, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Result);
+ Ops.push_back(StackSlot);
+ Ops.push_back(DAG.getValueType(Op.getValueType()));
+ Ops.push_back(InFlag);
+ Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size());
+ Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, NULL, 0);
+ }
+
+ return Result;
+}
+
+SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
+ assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 &&
+ "Unknown FP_TO_SINT to lower!");
+ // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
+ // stack slot.
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8;
+ int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
+ SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+
+ unsigned Opc;
+ switch (Op.getValueType()) {
+ default: assert(0 && "Invalid FP_TO_SINT to lower!");
+ case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
+ case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
+ case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
+ }
+
+ SDOperand Chain = DAG.getEntryNode();
+ SDOperand Value = Op.getOperand(0);
+ if (X86ScalarSSE) {
+ assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!");
+ Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0);
+ SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
+ SDOperand Ops[] = {
+ Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType())
+ };
+ Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3);
+ Chain = Value.getValue(1);
+ SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
+ StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+ }
+
+ // Build the FP_TO_INT*_IN_MEM
+ SDOperand Ops[] = { Chain, Value, StackSlot };
+ SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops, 3);
+
+ // Load the result.
+ return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0);
+}
+
+SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) {
+ MVT::ValueType VT = Op.getValueType();
+ MVT::ValueType EltVT = VT;
+ if (MVT::isVector(VT))
+ EltVT = MVT::getVectorElementType(VT);
+ const Type *OpNTy = MVT::getTypeForValueType(EltVT);
+ std::vector<Constant*> CV;
+ if (EltVT == MVT::f64) {
+ Constant *C = ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63)));
+ CV.push_back(C);
+ CV.push_back(C);
+ } else {
+ Constant *C = ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31)));
+ CV.push_back(C);
+ CV.push_back(C);
+ CV.push_back(C);
+ CV.push_back(C);
+ }
+ Constant *CS = ConstantStruct::get(CV);
+ SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
+ SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+ SmallVector<SDOperand, 3> Ops;
+ Ops.push_back(DAG.getEntryNode());
+ Ops.push_back(CPIdx);
+ Ops.push_back(DAG.getSrcValue(NULL));
+ SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
+ return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask);
+}
+
+SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) {
+ MVT::ValueType VT = Op.getValueType();
+ MVT::ValueType EltVT = VT;
+ if (MVT::isVector(VT))
+ EltVT = MVT::getVectorElementType(VT);
+ const Type *OpNTy = MVT::getTypeForValueType(EltVT);
+ std::vector<Constant*> CV;
+ if (EltVT == MVT::f64) {
+ Constant *C = ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63));
+ CV.push_back(C);
+ CV.push_back(C);
+ } else {
+ Constant *C = ConstantFP::get(OpNTy, BitsToFloat(1U << 31));
+ CV.push_back(C);
+ CV.push_back(C);
+ CV.push_back(C);
+ CV.push_back(C);
+ }
+ Constant *CS = ConstantStruct::get(CV);
+ SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
+ SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+ SmallVector<SDOperand, 3> Ops;
+ Ops.push_back(DAG.getEntryNode());
+ Ops.push_back(CPIdx);
+ Ops.push_back(DAG.getSrcValue(NULL));
+ SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
+ return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask);
+}
+
+SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) {
+ SDOperand Op0 = Op.getOperand(0);
+ SDOperand Op1 = Op.getOperand(1);
+ MVT::ValueType VT = Op.getValueType();
+ MVT::ValueType SrcVT = Op1.getValueType();
+ const Type *SrcTy = MVT::getTypeForValueType(SrcVT);
+
+ // If second operand is smaller, extend it first.
+ if (MVT::getSizeInBits(SrcVT) < MVT::getSizeInBits(VT)) {
+ Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1);
+ SrcVT = VT;
+ }
+
+ // First get the sign bit of second operand.
+ std::vector<Constant*> CV;
+ if (SrcVT == MVT::f64) {
+ CV.push_back(ConstantFP::get(SrcTy, BitsToDouble(1ULL << 63)));
+ CV.push_back(ConstantFP::get(SrcTy, 0.0));
+ } else {
+ CV.push_back(ConstantFP::get(SrcTy, BitsToFloat(1U << 31)));
+ CV.push_back(ConstantFP::get(SrcTy, 0.0));
+ CV.push_back(ConstantFP::get(SrcTy, 0.0));
+ CV.push_back(ConstantFP::get(SrcTy, 0.0));
+ }
+ Constant *CS = ConstantStruct::get(CV);
+ SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
+ SDVTList Tys = DAG.getVTList(SrcVT, MVT::Other);
+ SmallVector<SDOperand, 3> Ops;
+ Ops.push_back(DAG.getEntryNode());
+ Ops.push_back(CPIdx);
+ Ops.push_back(DAG.getSrcValue(NULL));
+ SDOperand Mask1 = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
+ SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1);
+
+ // Shift sign bit right or left if the two operands have different types.
+ if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) {
+ // Op0 is MVT::f32, Op1 is MVT::f64.
+ SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit);
+ SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit,
+ DAG.getConstant(32, MVT::i32));
+ SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit);
+ SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit,
+ DAG.getConstant(0, getPointerTy()));
+ }
+
+ // Clear first operand sign bit.
+ CV.clear();
+ if (VT == MVT::f64) {
+ CV.push_back(ConstantFP::get(SrcTy, BitsToDouble(~(1ULL << 63))));
+ CV.push_back(ConstantFP::get(SrcTy, 0.0));
+ } else {
+ CV.push_back(ConstantFP::get(SrcTy, BitsToFloat(~(1U << 31))));
+ CV.push_back(ConstantFP::get(SrcTy, 0.0));
+ CV.push_back(ConstantFP::get(SrcTy, 0.0));
+ CV.push_back(ConstantFP::get(SrcTy, 0.0));
+ }
+ CS = ConstantStruct::get(CV);
+ CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
+ Tys = DAG.getVTList(VT, MVT::Other);
+ Ops.clear();
+ Ops.push_back(DAG.getEntryNode());
+ Ops.push_back(CPIdx);
+ Ops.push_back(DAG.getSrcValue(NULL));
+ SDOperand Mask2 = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
+ SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2);
+
+ // Or the value with the sign bit.
+ return DAG.getNode(X86ISD::FOR, VT, Val, SignBit);
+}
+
+SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG,
+ SDOperand Chain) {
+ assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
+ SDOperand Cond;
+ SDOperand Op0 = Op.getOperand(0);
+ SDOperand Op1 = Op.getOperand(1);
+ SDOperand CC = Op.getOperand(2);
+ ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+ const MVT::ValueType *VTs1 = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
+ const MVT::ValueType *VTs2 = DAG.getNodeValueTypes(MVT::i8, MVT::Flag);
+ bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType());
+ unsigned X86CC;
+
+ if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC,
+ Op0, Op1, DAG)) {
+ SDOperand Ops1[] = { Chain, Op0, Op1 };
+ Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, Ops1, 3).getValue(1);
+ SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond };
+ return DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2);
+ }
+
+ assert(isFP && "Illegal integer SetCC!");
+
+ SDOperand COps[] = { Chain, Op0, Op1 };
+ Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, COps, 3).getValue(1);
+
+ switch (SetCCOpcode) {
+ default: assert(false && "Illegal floating point SetCC!");
+ case ISD::SETOEQ: { // !PF & ZF
+ SDOperand Ops1[] = { DAG.getConstant(X86::COND_NP, MVT::i8), Cond };
+ SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2);
+ SDOperand Ops2[] = { DAG.getConstant(X86::COND_E, MVT::i8),
+ Tmp1.getValue(1) };
+ SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2);
+ return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2);
+ }
+ case ISD::SETUNE: { // PF | !ZF
+ SDOperand Ops1[] = { DAG.getConstant(X86::COND_P, MVT::i8), Cond };
+ SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2);
+ SDOperand Ops2[] = { DAG.getConstant(X86::COND_NE, MVT::i8),
+ Tmp1.getValue(1) };
+ SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2);
+ return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2);
+ }
+ }
+}
+
+SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) {
+ bool addTest = true;
+ SDOperand Chain = DAG.getEntryNode();
+ SDOperand Cond = Op.getOperand(0);
+ SDOperand CC;
+ const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
+
+ if (Cond.getOpcode() == ISD::SETCC)
+ Cond = LowerSETCC(Cond, DAG, Chain);
+
+ if (Cond.getOpcode() == X86ISD::SETCC) {
+ CC = Cond.getOperand(0);
+
+ // If condition flag is set by a X86ISD::CMP, then make a copy of it
+ // (since flag operand cannot be shared). Use it as the condition setting
+ // operand in place of the X86ISD::SETCC.
+ // If the X86ISD::SETCC has more than one use, then perhaps it's better
+ // to use a test instead of duplicating the X86ISD::CMP (for register
+ // pressure reason)?
+ SDOperand Cmp = Cond.getOperand(1);
+ unsigned Opc = Cmp.getOpcode();
+ bool IllegalFPCMov = !X86ScalarSSE &&
+ MVT::isFloatingPoint(Op.getValueType()) &&
+ !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
+ if ((Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) &&
+ !IllegalFPCMov) {
+ SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) };
+ Cond = DAG.getNode(Opc, VTs, 2, Ops, 3);
+ addTest = false;
+ }
+ }
+
+ if (addTest) {
+ CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+ SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) };
+ Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3);
+ }
+
+ VTs = DAG.getNodeValueTypes(Op.getValueType(), MVT::Flag);
+ SmallVector<SDOperand, 4> Ops;
+ // X86ISD::CMOV means set the result (which is operand 1) to the RHS if
+ // condition is true.
+ Ops.push_back(Op.getOperand(2));
+ Ops.push_back(Op.getOperand(1));
+ Ops.push_back(CC);
+ Ops.push_back(Cond.getValue(1));
+ return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
+}
+
+SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) {
+ bool addTest = true;
+ SDOperand Chain = Op.getOperand(0);
+ SDOperand Cond = Op.getOperand(1);
+ SDOperand Dest = Op.getOperand(2);
+ SDOperand CC;
+ const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
+
+ if (Cond.getOpcode() == ISD::SETCC)
+ Cond = LowerSETCC(Cond, DAG, Chain);
+
+ if (Cond.getOpcode() == X86ISD::SETCC) {
+ CC = Cond.getOperand(0);
+
+ // If condition flag is set by a X86ISD::CMP, then make a copy of it
+ // (since flag operand cannot be shared). Use it as the condition setting
+ // operand in place of the X86ISD::SETCC.
+ // If the X86ISD::SETCC has more than one use, then perhaps it's better
+ // to use a test instead of duplicating the X86ISD::CMP (for register
+ // pressure reason)?
+ SDOperand Cmp = Cond.getOperand(1);
+ unsigned Opc = Cmp.getOpcode();
+ if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) {
+ SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) };
+ Cond = DAG.getNode(Opc, VTs, 2, Ops, 3);
+ addTest = false;
+ }
+ }
+
+ if (addTest) {
+ CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+ SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) };
+ Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3);
+ }
+ return DAG.getNode(X86ISD::BRCOND, Op.getValueType(),
+ Cond, Op.getOperand(2), CC, Cond.getValue(1));
+}
+
+SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
+ unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue();
+
+ if (Subtarget->is64Bit())
+ return LowerX86_64CCCCallTo(Op, DAG, CallingConv);
+ else
+ switch (CallingConv) {
+ default:
+ assert(0 && "Unsupported calling convention");
+ case CallingConv::Fast:
+ // TODO: Implement fastcc
+ // Falls through
+ case CallingConv::C:
+ case CallingConv::X86_StdCall:
+ return LowerCCCCallTo(Op, DAG, CallingConv);
+ case CallingConv::X86_FastCall:
+ return LowerFastCCCallTo(Op, DAG, CallingConv);
+ }
+}
+
+
+// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets.
+// Calls to _alloca is needed to probe the stack when allocating more than 4k
+// bytes in one go. Touching the stack at 4K increments is necessary to ensure
+// that the guard pages used by the OS virtual memory manager are allocated in
+// correct sequence.
+SDOperand
+X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op,
+ SelectionDAG &DAG) {
+ assert(Subtarget->isTargetCygMing() &&
+ "This should be used only on Cygwin/Mingw targets");
+
+ // Get the inputs.
+ SDOperand Chain = Op.getOperand(0);
+ SDOperand Size = Op.getOperand(1);
+ // FIXME: Ensure alignment here
+
+ SDOperand Flag;
+
+ MVT::ValueType IntPtr = getPointerTy();
+ MVT::ValueType SPTy = (Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
+
+ Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag);
+ Flag = Chain.getValue(1);
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDOperand Ops[] = { Chain,
+ DAG.getTargetExternalSymbol("_alloca", IntPtr),
+ DAG.getRegister(X86::EAX, IntPtr),
+ Flag };
+ Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops, 4);
+ Flag = Chain.getValue(1);
+
+ Chain = DAG.getCopyFromReg(Chain, X86StackPtr, SPTy).getValue(1);
+
+ std::vector<MVT::ValueType> Tys;
+ Tys.push_back(SPTy);
+ Tys.push_back(MVT::Other);
+ SDOperand Ops1[2] = { Chain.getValue(0), Chain };
+ return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2);
+}
+
+SDOperand
+X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ const Function* Fn = MF.getFunction();
+ if (Fn->hasExternalLinkage() &&
+ Subtarget->isTargetCygMing() &&
+ Fn->getName() == "main")
+ MF.getInfo<X86MachineFunctionInfo>()->setForceFramePointer(true);
+
+ unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
+ if (Subtarget->is64Bit())
+ return LowerX86_64CCCArguments(Op, DAG);
+ else
+ switch(CC) {
+ default:
+ assert(0 && "Unsupported calling convention");
+ case CallingConv::Fast:
+ // TODO: implement fastcc.
+
+ // Falls through
+ case CallingConv::C:
+ return LowerCCCArguments(Op, DAG);
+ case CallingConv::X86_StdCall:
+ MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(StdCall);
+ return LowerCCCArguments(Op, DAG, true);
+ case CallingConv::X86_FastCall:
+ MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(FastCall);
+ return LowerFastCCArguments(Op, DAG);
+ }
+}
+
+SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
+ SDOperand InFlag(0, 0);
+ SDOperand Chain = Op.getOperand(0);
+ unsigned Align =
+ (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
+ if (Align == 0) Align = 1;
+
+ ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
+ // If not DWORD aligned, call memset if size is less than the threshold.
+ // It knows how to align to the right boundary first.
+ if ((Align & 3) != 0 ||
+ (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
+ MVT::ValueType IntPtr = getPointerTy();
+ const Type *IntPtrTy = getTargetData()->getIntPtrType();
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Op.getOperand(1);
+ Entry.Ty = IntPtrTy;
+ Args.push_back(Entry);
+ // Extend the unsigned i8 argument to be an int value for the call.
+ Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2));
+ Entry.Ty = IntPtrTy;
+ Args.push_back(Entry);
+ Entry.Node = Op.getOperand(3);
+ Args.push_back(Entry);
+ std::pair<SDOperand,SDOperand> CallResult =
+ LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false,
+ DAG.getExternalSymbol("memset", IntPtr), Args, DAG);
+ return CallResult.second;
+ }
+
+ MVT::ValueType AVT;
+ SDOperand Count;
+ ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ unsigned BytesLeft = 0;
+ bool TwoRepStos = false;
+ if (ValC) {
+ unsigned ValReg;
+ uint64_t Val = ValC->getValue() & 255;
+
+ // If the value is a constant, then we can potentially use larger sets.
+ switch (Align & 3) {
+ case 2: // WORD aligned
+ AVT = MVT::i16;
+ ValReg = X86::AX;
+ Val = (Val << 8) | Val;
+ break;
+ case 0: // DWORD aligned
+ AVT = MVT::i32;
+ ValReg = X86::EAX;
+ Val = (Val << 8) | Val;
+ Val = (Val << 16) | Val;
+ if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) { // QWORD aligned
+ AVT = MVT::i64;
+ ValReg = X86::RAX;
+ Val = (Val << 32) | Val;
+ }
+ break;
+ default: // Byte aligned
+ AVT = MVT::i8;
+ ValReg = X86::AL;
+ Count = Op.getOperand(3);
+ break;
+ }
+
+ if (AVT > MVT::i8) {
+ if (I) {
+ unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
+ Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
+ BytesLeft = I->getValue() % UBytes;
+ } else {
+ assert(AVT >= MVT::i32 &&
+ "Do not use rep;stos if not at least DWORD aligned");
+ Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
+ Op.getOperand(3), DAG.getConstant(2, MVT::i8));
+ TwoRepStos = true;
+ }
+ }
+
+ Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ } else {
+ AVT = MVT::i8;
+ Count = Op.getOperand(3);
+ Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
+ Count, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
+ Op.getOperand(1), InFlag);
+ InFlag = Chain.getValue(1);
+
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SmallVector<SDOperand, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(DAG.getValueType(AVT));
+ Ops.push_back(InFlag);
+ Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
+
+ if (TwoRepStos) {
+ InFlag = Chain.getValue(1);
+ Count = Op.getOperand(3);
+ MVT::ValueType CVT = Count.getValueType();
+ SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
+ DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
+ Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
+ Left, InFlag);
+ InFlag = Chain.getValue(1);
+ Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ Ops.clear();
+ Ops.push_back(Chain);
+ Ops.push_back(DAG.getValueType(MVT::i8));
+ Ops.push_back(InFlag);
+ Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
+ } else if (BytesLeft) {
+ // Issue stores for the last 1 - 7 bytes.
+ SDOperand Value;
+ unsigned Val = ValC->getValue() & 255;
+ unsigned Offset = I->getValue() - BytesLeft;
+ SDOperand DstAddr = Op.getOperand(1);
+ MVT::ValueType AddrVT = DstAddr.getValueType();
+ if (BytesLeft >= 4) {
+ Val = (Val << 8) | Val;
+ Val = (Val << 16) | Val;
+ Value = DAG.getConstant(Val, MVT::i32);
+ Chain = DAG.getStore(Chain, Value,
+ DAG.getNode(ISD::ADD, AddrVT, DstAddr,
+ DAG.getConstant(Offset, AddrVT)),
+ NULL, 0);
+ BytesLeft -= 4;
+ Offset += 4;
+ }
+ if (BytesLeft >= 2) {
+ Value = DAG.getConstant((Val << 8) | Val, MVT::i16);
+ Chain = DAG.getStore(Chain, Value,
+ DAG.getNode(ISD::ADD, AddrVT, DstAddr,
+ DAG.getConstant(Offset, AddrVT)),
+ NULL, 0);
+ BytesLeft -= 2;
+ Offset += 2;
+ }
+ if (BytesLeft == 1) {
+ Value = DAG.getConstant(Val, MVT::i8);
+ Chain = DAG.getStore(Chain, Value,
+ DAG.getNode(ISD::ADD, AddrVT, DstAddr,
+ DAG.getConstant(Offset, AddrVT)),
+ NULL, 0);
+ }
+ }
+
+ return Chain;
+}
+
+SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
+ SDOperand Chain = Op.getOperand(0);
+ unsigned Align =
+ (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
+ if (Align == 0) Align = 1;
+
+ ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
+ // If not DWORD aligned, call memcpy if size is less than the threshold.
+ // It knows how to align to the right boundary first.
+ if ((Align & 3) != 0 ||
+ (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
+ MVT::ValueType IntPtr = getPointerTy();
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = getTargetData()->getIntPtrType();
+ Entry.Node = Op.getOperand(1); Args.push_back(Entry);
+ Entry.Node = Op.getOperand(2); Args.push_back(Entry);
+ Entry.Node = Op.getOperand(3); Args.push_back(Entry);
+ std::pair<SDOperand,SDOperand> CallResult =
+ LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false,
+ DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG);
+ return CallResult.second;
+ }
+
+ MVT::ValueType AVT;
+ SDOperand Count;
+ unsigned BytesLeft = 0;
+ bool TwoRepMovs = false;
+ switch (Align & 3) {
+ case 2: // WORD aligned
+ AVT = MVT::i16;
+ break;
+ case 0: // DWORD aligned
+ AVT = MVT::i32;
+ if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) // QWORD aligned
+ AVT = MVT::i64;
+ break;
+ default: // Byte aligned
+ AVT = MVT::i8;
+ Count = Op.getOperand(3);
+ break;
+ }
+
+ if (AVT > MVT::i8) {
+ if (I) {
+ unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
+ Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
+ BytesLeft = I->getValue() % UBytes;
+ } else {
+ assert(AVT >= MVT::i32 &&
+ "Do not use rep;movs if not at least DWORD aligned");
+ Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
+ Op.getOperand(3), DAG.getConstant(2, MVT::i8));
+ TwoRepMovs = true;
+ }
+ }
+
+ SDOperand InFlag(0, 0);
+ Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
+ Count, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
+ Op.getOperand(1), InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI,
+ Op.getOperand(2), InFlag);
+ InFlag = Chain.getValue(1);
+
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SmallVector<SDOperand, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(DAG.getValueType(AVT));
+ Ops.push_back(InFlag);
+ Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
+
+ if (TwoRepMovs) {
+ InFlag = Chain.getValue(1);
+ Count = Op.getOperand(3);
+ MVT::ValueType CVT = Count.getValueType();
+ SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
+ DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
+ Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
+ Left, InFlag);
+ InFlag = Chain.getValue(1);
+ Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ Ops.clear();
+ Ops.push_back(Chain);
+ Ops.push_back(DAG.getValueType(MVT::i8));
+ Ops.push_back(InFlag);
+ Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
+ } else if (BytesLeft) {
+ // Issue loads and stores for the last 1 - 7 bytes.
+ unsigned Offset = I->getValue() - BytesLeft;
+ SDOperand DstAddr = Op.getOperand(1);
+ MVT::ValueType DstVT = DstAddr.getValueType();
+ SDOperand SrcAddr = Op.getOperand(2);
+ MVT::ValueType SrcVT = SrcAddr.getValueType();
+ SDOperand Value;
+ if (BytesLeft >= 4) {
+ Value = DAG.getLoad(MVT::i32, Chain,
+ DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
+ DAG.getConstant(Offset, SrcVT)),
+ NULL, 0);
+ Chain = Value.getValue(1);
+ Chain = DAG.getStore(Chain, Value,
+ DAG.getNode(ISD::ADD, DstVT, DstAddr,
+ DAG.getConstant(Offset, DstVT)),
+ NULL, 0);
+ BytesLeft -= 4;
+ Offset += 4;
+ }
+ if (BytesLeft >= 2) {
+ Value = DAG.getLoad(MVT::i16, Chain,
+ DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
+ DAG.getConstant(Offset, SrcVT)),
+ NULL, 0);
+ Chain = Value.getValue(1);
+ Chain = DAG.getStore(Chain, Value,
+ DAG.getNode(ISD::ADD, DstVT, DstAddr,
+ DAG.getConstant(Offset, DstVT)),
+ NULL, 0);
+ BytesLeft -= 2;
+ Offset += 2;
+ }
+
+ if (BytesLeft == 1) {
+ Value = DAG.getLoad(MVT::i8, Chain,
+ DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
+ DAG.getConstant(Offset, SrcVT)),
+ NULL, 0);
+ Chain = Value.getValue(1);
+ Chain = DAG.getStore(Chain, Value,
+ DAG.getNode(ISD::ADD, DstVT, DstAddr,
+ DAG.getConstant(Offset, DstVT)),
+ NULL, 0);
+ }
+ }
+
+ return Chain;
+}
+
+SDOperand
+X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) {
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDOperand TheOp = Op.getOperand(0);
+ SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheOp, 1);
+ if (Subtarget->is64Bit()) {
+ SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1));
+ SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::RDX,
+ MVT::i64, Copy1.getValue(2));
+ SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, Copy2,
+ DAG.getConstant(32, MVT::i8));
+ SDOperand Ops[] = {
+ DAG.getNode(ISD::OR, MVT::i64, Copy1, Tmp), Copy2.getValue(1)
+ };
+
+ Tys = DAG.getVTList(MVT::i64, MVT::Other);
+ return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2);
+ }
+
+ SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1));
+ SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::EDX,
+ MVT::i32, Copy1.getValue(2));
+ SDOperand Ops[] = { Copy1, Copy2, Copy2.getValue(1) };
+ Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
+ return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 3);
+}
+
+SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) {
+ SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2));
+
+ if (!Subtarget->is64Bit()) {
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
+ return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV->getValue(),
+ SV->getOffset());
+ }
+
+ // __va_list_tag:
+ // gp_offset (0 - 6 * 8)
+ // fp_offset (48 - 48 + 8 * 16)
+ // overflow_arg_area (point to parameters coming in memory).
+ // reg_save_area
+ SmallVector<SDOperand, 8> MemOps;
+ SDOperand FIN = Op.getOperand(1);
+ // Store gp_offset
+ SDOperand Store = DAG.getStore(Op.getOperand(0),
+ DAG.getConstant(VarArgsGPOffset, MVT::i32),
+ FIN, SV->getValue(), SV->getOffset());
+ MemOps.push_back(Store);
+
+ // Store fp_offset
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
+ DAG.getConstant(4, getPointerTy()));
+ Store = DAG.getStore(Op.getOperand(0),
+ DAG.getConstant(VarArgsFPOffset, MVT::i32),
+ FIN, SV->getValue(), SV->getOffset());
+ MemOps.push_back(Store);
+
+ // Store ptr to overflow_arg_area
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
+ DAG.getConstant(4, getPointerTy()));
+ SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
+ Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV->getValue(),
+ SV->getOffset());
+ MemOps.push_back(Store);
+
+ // Store ptr to reg_save_area.
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
+ DAG.getConstant(8, getPointerTy()));
+ SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
+ Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV->getValue(),
+ SV->getOffset());
+ MemOps.push_back(Store);
+ return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size());
+}
+
+SDOperand X86TargetLowering::LowerVACOPY(SDOperand Op, SelectionDAG &DAG) {
+ // X86-64 va_list is a struct { i32, i32, i8*, i8* }.
+ SDOperand Chain = Op.getOperand(0);
+ SDOperand DstPtr = Op.getOperand(1);
+ SDOperand SrcPtr = Op.getOperand(2);
+ SrcValueSDNode *DstSV = cast<SrcValueSDNode>(Op.getOperand(3));
+ SrcValueSDNode *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4));
+
+ SrcPtr = DAG.getLoad(getPointerTy(), Chain, SrcPtr,
+ SrcSV->getValue(), SrcSV->getOffset());
+ Chain = SrcPtr.getValue(1);
+ for (unsigned i = 0; i < 3; ++i) {
+ SDOperand Val = DAG.getLoad(MVT::i64, Chain, SrcPtr,
+ SrcSV->getValue(), SrcSV->getOffset());
+ Chain = Val.getValue(1);
+ Chain = DAG.getStore(Chain, Val, DstPtr,
+ DstSV->getValue(), DstSV->getOffset());
+ if (i == 2)
+ break;
+ SrcPtr = DAG.getNode(ISD::ADD, getPointerTy(), SrcPtr,
+ DAG.getConstant(8, getPointerTy()));
+ DstPtr = DAG.getNode(ISD::ADD, getPointerTy(), DstPtr,
+ DAG.getConstant(8, getPointerTy()));
+ }
+ return Chain;
+}
+
+SDOperand
+X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) {
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue();
+ switch (IntNo) {
+ default: return SDOperand(); // Don't custom lower most intrinsics.
+ // Comparison intrinsics.
+ case Intrinsic::x86_sse_comieq_ss:
+ case Intrinsic::x86_sse_comilt_ss:
+ case Intrinsic::x86_sse_comile_ss:
+ case Intrinsic::x86_sse_comigt_ss:
+ case Intrinsic::x86_sse_comige_ss:
+ case Intrinsic::x86_sse_comineq_ss:
+ case Intrinsic::x86_sse_ucomieq_ss:
+ case Intrinsic::x86_sse_ucomilt_ss:
+ case Intrinsic::x86_sse_ucomile_ss:
+ case Intrinsic::x86_sse_ucomigt_ss:
+ case Intrinsic::x86_sse_ucomige_ss:
+ case Intrinsic::x86_sse_ucomineq_ss:
+ case Intrinsic::x86_sse2_comieq_sd:
+ case Intrinsic::x86_sse2_comilt_sd:
+ case Intrinsic::x86_sse2_comile_sd:
+ case Intrinsic::x86_sse2_comigt_sd:
+ case Intrinsic::x86_sse2_comige_sd:
+ case Intrinsic::x86_sse2_comineq_sd:
+ case Intrinsic::x86_sse2_ucomieq_sd:
+ case Intrinsic::x86_sse2_ucomilt_sd:
+ case Intrinsic::x86_sse2_ucomile_sd:
+ case Intrinsic::x86_sse2_ucomigt_sd:
+ case Intrinsic::x86_sse2_ucomige_sd:
+ case Intrinsic::x86_sse2_ucomineq_sd: {
+ unsigned Opc = 0;
+ ISD::CondCode CC = ISD::SETCC_INVALID;
+ switch (IntNo) {
+ default: break;
+ case Intrinsic::x86_sse_comieq_ss:
+ case Intrinsic::x86_sse2_comieq_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETEQ;
+ break;
+ case Intrinsic::x86_sse_comilt_ss:
+ case Intrinsic::x86_sse2_comilt_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETLT;
+ break;
+ case Intrinsic::x86_sse_comile_ss:
+ case Intrinsic::x86_sse2_comile_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETLE;
+ break;
+ case Intrinsic::x86_sse_comigt_ss:
+ case Intrinsic::x86_sse2_comigt_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETGT;
+ break;
+ case Intrinsic::x86_sse_comige_ss:
+ case Intrinsic::x86_sse2_comige_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETGE;
+ break;
+ case Intrinsic::x86_sse_comineq_ss:
+ case Intrinsic::x86_sse2_comineq_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETNE;
+ break;
+ case Intrinsic::x86_sse_ucomieq_ss:
+ case Intrinsic::x86_sse2_ucomieq_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETEQ;
+ break;
+ case Intrinsic::x86_sse_ucomilt_ss:
+ case Intrinsic::x86_sse2_ucomilt_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETLT;
+ break;
+ case Intrinsic::x86_sse_ucomile_ss:
+ case Intrinsic::x86_sse2_ucomile_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETLE;
+ break;
+ case Intrinsic::x86_sse_ucomigt_ss:
+ case Intrinsic::x86_sse2_ucomigt_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETGT;
+ break;
+ case Intrinsic::x86_sse_ucomige_ss:
+ case Intrinsic::x86_sse2_ucomige_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETGE;
+ break;
+ case Intrinsic::x86_sse_ucomineq_ss:
+ case Intrinsic::x86_sse2_ucomineq_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETNE;
+ break;
+ }
+
+ unsigned X86CC;
+ SDOperand LHS = Op.getOperand(1);
+ SDOperand RHS = Op.getOperand(2);
+ translateX86CC(CC, true, X86CC, LHS, RHS, DAG);
+
+ const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
+ SDOperand Ops1[] = { DAG.getEntryNode(), LHS, RHS };
+ SDOperand Cond = DAG.getNode(Opc, VTs, 2, Ops1, 3);
+ VTs = DAG.getNodeValueTypes(MVT::i8, MVT::Flag);
+ SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond };
+ SDOperand SetCC = DAG.getNode(X86ISD::SETCC, VTs, 2, Ops2, 2);
+ return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC);
+ }
+ }
+}
+
+SDOperand X86TargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) {
+ // Depths > 0 not supported yet!
+ if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0)
+ return SDOperand();
+
+ // Just load the return address
+ SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
+ return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0);
+}
+
+SDOperand X86TargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) {
+ // Depths > 0 not supported yet!
+ if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0)
+ return SDOperand();
+
+ SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
+ return DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI,
+ DAG.getConstant(4, getPointerTy()));
+}
+
+SDOperand X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDOperand Op,
+ SelectionDAG &DAG) {
+ // Is not yet supported on x86-64
+ if (Subtarget->is64Bit())
+ return SDOperand();
+
+ return DAG.getConstant(8, getPointerTy());
+}
+
+SDOperand X86TargetLowering::LowerEH_RETURN(SDOperand Op, SelectionDAG &DAG)
+{
+ assert(!Subtarget->is64Bit() &&
+ "Lowering of eh_return builtin is not supported yet on x86-64");
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ SDOperand Chain = Op.getOperand(0);
+ SDOperand Offset = Op.getOperand(1);
+ SDOperand Handler = Op.getOperand(2);
+
+ SDOperand Frame = DAG.getRegister(RegInfo->getFrameRegister(MF),
+ getPointerTy());
+
+ SDOperand StoreAddr = DAG.getNode(ISD::SUB, getPointerTy(), Frame,
+ DAG.getConstant(-4UL, getPointerTy()));
+ StoreAddr = DAG.getNode(ISD::ADD, getPointerTy(), StoreAddr, Offset);
+ Chain = DAG.getStore(Chain, Handler, StoreAddr, NULL, 0);
+ Chain = DAG.getCopyToReg(Chain, X86::ECX, StoreAddr);
+ MF.addLiveOut(X86::ECX);
+
+ return DAG.getNode(X86ISD::EH_RETURN, MVT::Other,
+ Chain, DAG.getRegister(X86::ECX, getPointerTy()));
+}
+
+/// LowerOperation - Provide custom lowering hooks for some operations.
+///
+SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Should not custom lower this!");
+ case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
+ case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+ case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
+ case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG);
+ case ISD::SHL_PARTS:
+ case ISD::SRA_PARTS:
+ case ISD::SRL_PARTS: return LowerShift(Op, DAG);
+ case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
+ case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
+ case ISD::FABS: return LowerFABS(Op, DAG);
+ case ISD::FNEG: return LowerFNEG(Op, DAG);
+ case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
+ case ISD::SETCC: return LowerSETCC(Op, DAG, DAG.getEntryNode());
+ case ISD::SELECT: return LowerSELECT(Op, DAG);
+ case ISD::BRCOND: return LowerBRCOND(Op, DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ case ISD::CALL: return LowerCALL(Op, DAG);
+ case ISD::RET: return LowerRET(Op, DAG);
+ case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG);
+ case ISD::MEMSET: return LowerMEMSET(Op, DAG);
+ case ISD::MEMCPY: return LowerMEMCPY(Op, DAG);
+ case ISD::READCYCLECOUNTER: return LowerREADCYCLCECOUNTER(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ case ISD::VACOPY: return LowerVACOPY(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::FRAME_TO_ARGS_OFFSET:
+ return LowerFRAME_TO_ARGS_OFFSET(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
+ }
+ return SDOperand();
+}
+
+const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return NULL;
+ case X86ISD::SHLD: return "X86ISD::SHLD";
+ case X86ISD::SHRD: return "X86ISD::SHRD";
+ case X86ISD::FAND: return "X86ISD::FAND";
+ case X86ISD::FOR: return "X86ISD::FOR";
+ case X86ISD::FXOR: return "X86ISD::FXOR";
+ case X86ISD::FSRL: return "X86ISD::FSRL";
+ case X86ISD::FILD: return "X86ISD::FILD";
+ case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG";
+ case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
+ case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
+ case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
+ case X86ISD::FLD: return "X86ISD::FLD";
+ case X86ISD::FST: return "X86ISD::FST";
+ case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT";
+ case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT";
+ case X86ISD::CALL: return "X86ISD::CALL";
+ case X86ISD::TAILCALL: return "X86ISD::TAILCALL";
+ case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG";
+ case X86ISD::CMP: return "X86ISD::CMP";
+ case X86ISD::COMI: return "X86ISD::COMI";
+ case X86ISD::UCOMI: return "X86ISD::UCOMI";
+ case X86ISD::SETCC: return "X86ISD::SETCC";
+ case X86ISD::CMOV: return "X86ISD::CMOV";
+ case X86ISD::BRCOND: return "X86ISD::BRCOND";
+ case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
+ case X86ISD::REP_STOS: return "X86ISD::REP_STOS";
+ case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS";
+ case X86ISD::LOAD_PACK: return "X86ISD::LOAD_PACK";
+ case X86ISD::LOAD_UA: return "X86ISD::LOAD_UA";
+ case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg";
+ case X86ISD::Wrapper: return "X86ISD::Wrapper";
+ case X86ISD::S2VEC: return "X86ISD::S2VEC";
+ case X86ISD::PEXTRW: return "X86ISD::PEXTRW";
+ case X86ISD::PINSRW: return "X86ISD::PINSRW";
+ case X86ISD::FMAX: return "X86ISD::FMAX";
+ case X86ISD::FMIN: return "X86ISD::FMIN";
+ case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
+ case X86ISD::FRCP: return "X86ISD::FRCP";
+ case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
+ case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER";
+ case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
+ }
+}
+
+// isLegalAddressingMode - Return true if the addressing mode represented
+// by AM is legal for this target, for a load/store of the specified type.
+bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ const Type *Ty) const {
+ // X86 supports extremely general addressing modes.
+
+ // X86 allows a sign-extended 32-bit immediate field as a displacement.
+ if (AM.BaseOffs <= -(1LL << 32) || AM.BaseOffs >= (1LL << 32)-1)
+ return false;
+
+ if (AM.BaseGV) {
+ // X86-64 only supports addr of globals in small code model.
+ if (Subtarget->is64Bit() &&
+ getTargetMachine().getCodeModel() != CodeModel::Small)
+ return false;
+
+ // We can only fold this if we don't need a load either.
+ if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false))
+ return false;
+ }
+
+ switch (AM.Scale) {
+ case 0:
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ // These scales always work.
+ break;
+ case 3:
+ case 5:
+ case 9:
+ // These scales are formed with basereg+scalereg. Only accept if there is
+ // no basereg yet.
+ if (AM.HasBaseReg)
+ return false;
+ break;
+ default: // Other stuff never works.
+ return false;
+ }
+
+ return true;
+}
+
+
+/// isShuffleMaskLegal - Targets can use this to indicate that they only
+/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
+/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
+/// are assumed to be legal.
+bool
+X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const {
+ // Only do shuffles on 128-bit vector types for now.
+ if (MVT::getSizeInBits(VT) == 64) return false;
+ return (Mask.Val->getNumOperands() <= 4 ||
+ isIdentityMask(Mask.Val) ||
+ isIdentityMask(Mask.Val, true) ||
+ isSplatMask(Mask.Val) ||
+ isPSHUFHW_PSHUFLWMask(Mask.Val) ||
+ X86::isUNPCKLMask(Mask.Val) ||
+ X86::isUNPCKHMask(Mask.Val) ||
+ X86::isUNPCKL_v_undef_Mask(Mask.Val) ||
+ X86::isUNPCKH_v_undef_Mask(Mask.Val));
+}
+
+bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps,
+ MVT::ValueType EVT,
+ SelectionDAG &DAG) const {
+ unsigned NumElts = BVOps.size();
+ // Only do shuffles on 128-bit vector types for now.
+ if (MVT::getSizeInBits(EVT) * NumElts == 64) return false;
+ if (NumElts == 2) return true;
+ if (NumElts == 4) {
+ return (isMOVLMask(&BVOps[0], 4) ||
+ isCommutedMOVL(&BVOps[0], 4, true) ||
+ isSHUFPMask(&BVOps[0], 4) ||
+ isCommutedSHUFP(&BVOps[0], 4));
+ }
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// X86 Scheduler Hooks
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
+ MachineBasicBlock *BB) {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ switch (MI->getOpcode()) {
+ default: assert(false && "Unexpected instr type to insert");
+ case X86::CMOV_FR32:
+ case X86::CMOV_FR64:
+ case X86::CMOV_V4F32:
+ case X86::CMOV_V2F64:
+ case X86::CMOV_V2I64: {
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ ilist<MachineBasicBlock>::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
+ unsigned Opc =
+ X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
+ BuildMI(BB, TII->get(Opc)).addMBB(sinkMBB);
+ MachineFunction *F = BB->getParent();
+ F->getBasicBlockList().insert(It, copy0MBB);
+ F->getBasicBlockList().insert(It, sinkMBB);
+ // Update machine-CFG edges by first adding all successors of the current
+ // block to the new block which will contain the Phi node for the select.
+ for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
+ e = BB->succ_end(); i != e; ++i)
+ sinkMBB->addSuccessor(*i);
+ // Next, remove all successors of the current block, and add the true
+ // and fallthrough blocks as its successors.
+ while(!BB->succ_empty())
+ BB->removeSuccessor(BB->succ_begin());
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(BB, TII->get(X86::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+ delete MI; // The pseudo instruction is gone now.
+ return BB;
+ }
+
+ case X86::FP32_TO_INT16_IN_MEM:
+ case X86::FP32_TO_INT32_IN_MEM:
+ case X86::FP32_TO_INT64_IN_MEM:
+ case X86::FP64_TO_INT16_IN_MEM:
+ case X86::FP64_TO_INT32_IN_MEM:
+ case X86::FP64_TO_INT64_IN_MEM: {
+ // Change the floating point control register to use "round towards zero"
+ // mode when truncating to an integer value.
+ MachineFunction *F = BB->getParent();
+ int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
+ addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx);
+
+ // Load the old value of the high byte of the control word...
+ unsigned OldCW =
+ F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass);
+ addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx);
+
+ // Set the high part to be round to zero...
+ addFrameReference(BuildMI(BB, TII->get(X86::MOV16mi)), CWFrameIdx)
+ .addImm(0xC7F);
+
+ // Reload the modified control word now...
+ addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx);
+
+ // Restore the memory image of control word to original value
+ addFrameReference(BuildMI(BB, TII->get(X86::MOV16mr)), CWFrameIdx)
+ .addReg(OldCW);
+
+ // Get the X86 opcode to use.
+ unsigned Opc;
+ switch (MI->getOpcode()) {
+ default: assert(0 && "illegal opcode!");
+ case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break;
+ case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break;
+ case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break;
+ case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break;
+ case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break;
+ case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break;
+ }
+
+ X86AddressMode AM;
+ MachineOperand &Op = MI->getOperand(0);
+ if (Op.isRegister()) {
+ AM.BaseType = X86AddressMode::RegBase;
+ AM.Base.Reg = Op.getReg();
+ } else {
+ AM.BaseType = X86AddressMode::FrameIndexBase;
+ AM.Base.FrameIndex = Op.getFrameIndex();
+ }
+ Op = MI->getOperand(1);
+ if (Op.isImmediate())
+ AM.Scale = Op.getImm();
+ Op = MI->getOperand(2);
+ if (Op.isImmediate())
+ AM.IndexReg = Op.getImm();
+ Op = MI->getOperand(3);
+ if (Op.isGlobalAddress()) {
+ AM.GV = Op.getGlobal();
+ } else {
+ AM.Disp = Op.getImm();
+ }
+ addFullAddress(BuildMI(BB, TII->get(Opc)), AM)
+ .addReg(MI->getOperand(4).getReg());
+
+ // Reload the original control word now.
+ addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx);
+
+ delete MI; // The pseudo instruction is gone now.
+ return BB;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// X86 Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
+ uint64_t Mask,
+ uint64_t &KnownZero,
+ uint64_t &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ unsigned Opc = Op.getOpcode();
+ assert((Opc >= ISD::BUILTIN_OP_END ||
+ Opc == ISD::INTRINSIC_WO_CHAIN ||
+ Opc == ISD::INTRINSIC_W_CHAIN ||
+ Opc == ISD::INTRINSIC_VOID) &&
+ "Should use MaskedValueIsZero if you don't know whether Op"
+ " is a target node!");
+
+ KnownZero = KnownOne = 0; // Don't know anything.
+ switch (Opc) {
+ default: break;
+ case X86ISD::SETCC:
+ KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL);
+ break;
+ }
+}
+
+/// getShuffleScalarElt - Returns the scalar element that will make up the ith
+/// element of the result of the vector shuffle.
+static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) {
+ MVT::ValueType VT = N->getValueType(0);
+ SDOperand PermMask = N->getOperand(2);
+ unsigned NumElems = PermMask.getNumOperands();
+ SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1);
+ i %= NumElems;
+ if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ return (i == 0)
+ ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT));
+ } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) {
+ SDOperand Idx = PermMask.getOperand(i);
+ if (Idx.getOpcode() == ISD::UNDEF)
+ return DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT));
+ return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG);
+ }
+ return SDOperand();
+}
+
+/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
+/// node is a GlobalAddress + an offset.
+static bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) {
+ unsigned Opc = N->getOpcode();
+ if (Opc == X86ISD::Wrapper) {
+ if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) {
+ GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
+ return true;
+ }
+ } else if (Opc == ISD::ADD) {
+ SDOperand N1 = N->getOperand(0);
+ SDOperand N2 = N->getOperand(1);
+ if (isGAPlusOffset(N1.Val, GA, Offset)) {
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2);
+ if (V) {
+ Offset += V->getSignExtended();
+ return true;
+ }
+ } else if (isGAPlusOffset(N2.Val, GA, Offset)) {
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1);
+ if (V) {
+ Offset += V->getSignExtended();
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// isConsecutiveLoad - Returns true if N is loading from an address of Base
+/// + Dist * Size.
+static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size,
+ MachineFrameInfo *MFI) {
+ if (N->getOperand(0).Val != Base->getOperand(0).Val)
+ return false;
+
+ SDOperand Loc = N->getOperand(1);
+ SDOperand BaseLoc = Base->getOperand(1);
+ if (Loc.getOpcode() == ISD::FrameIndex) {
+ if (BaseLoc.getOpcode() != ISD::FrameIndex)
+ return false;
+ int FI = dyn_cast<FrameIndexSDNode>(Loc)->getIndex();
+ int BFI = dyn_cast<FrameIndexSDNode>(BaseLoc)->getIndex();
+ int FS = MFI->getObjectSize(FI);
+ int BFS = MFI->getObjectSize(BFI);
+ if (FS != BFS || FS != Size) return false;
+ return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size);
+ } else {
+ GlobalValue *GV1 = NULL;
+ GlobalValue *GV2 = NULL;
+ int64_t Offset1 = 0;
+ int64_t Offset2 = 0;
+ bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1);
+ bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2);
+ if (isGA1 && isGA2 && GV1 == GV2)
+ return Offset1 == (Offset2 + Dist*Size);
+ }
+
+ return false;
+}
+
+static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI,
+ const X86Subtarget *Subtarget) {
+ GlobalValue *GV;
+ int64_t Offset;
+ if (isGAPlusOffset(Base, GV, Offset))
+ return (GV->getAlignment() >= 16 && (Offset % 16) == 0);
+ else {
+ assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!");
+ int BFI = dyn_cast<FrameIndexSDNode>(Base)->getIndex();
+ if (BFI < 0)
+ // Fixed objects do not specify alignment, however the offsets are known.
+ return ((Subtarget->getStackAlignment() % 16) == 0 &&
+ (MFI->getObjectOffset(BFI) % 16) == 0);
+ else
+ return MFI->getObjectAlignment(BFI) >= 16;
+ }
+ return false;
+}
+
+
+/// PerformShuffleCombine - Combine a vector_shuffle that is equal to
+/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
+/// if the load addresses are consecutive, non-overlapping, and in the right
+/// order.
+static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MVT::ValueType VT = N->getValueType(0);
+ MVT::ValueType EVT = MVT::getVectorElementType(VT);
+ SDOperand PermMask = N->getOperand(2);
+ int NumElems = (int)PermMask.getNumOperands();
+ SDNode *Base = NULL;
+ for (int i = 0; i < NumElems; ++i) {
+ SDOperand Idx = PermMask.getOperand(i);
+ if (Idx.getOpcode() == ISD::UNDEF) {
+ if (!Base) return SDOperand();
+ } else {
+ SDOperand Arg =
+ getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG);
+ if (!Arg.Val || !ISD::isNON_EXTLoad(Arg.Val))
+ return SDOperand();
+ if (!Base)
+ Base = Arg.Val;
+ else if (!isConsecutiveLoad(Arg.Val, Base,
+ i, MVT::getSizeInBits(EVT)/8,MFI))
+ return SDOperand();
+ }
+ }
+
+ bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget);
+ if (isAlign16) {
+ LoadSDNode *LD = cast<LoadSDNode>(Base);
+ return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(),
+ LD->getSrcValueOffset());
+ } else {
+ // Just use movups, it's shorter.
+ SDVTList Tys = DAG.getVTList(MVT::v4f32, MVT::Other);
+ SmallVector<SDOperand, 3> Ops;
+ Ops.push_back(Base->getOperand(0));
+ Ops.push_back(Base->getOperand(1));
+ Ops.push_back(Base->getOperand(2));
+ return DAG.getNode(ISD::BIT_CONVERT, VT,
+ DAG.getNode(X86ISD::LOAD_UA, Tys, &Ops[0], Ops.size()));
+ }
+}
+
+/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
+static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ SDOperand Cond = N->getOperand(0);
+
+ // If we have SSE[12] support, try to form min/max nodes.
+ if (Subtarget->hasSSE2() &&
+ (N->getValueType(0) == MVT::f32 || N->getValueType(0) == MVT::f64)) {
+ if (Cond.getOpcode() == ISD::SETCC) {
+ // Get the LHS/RHS of the select.
+ SDOperand LHS = N->getOperand(1);
+ SDOperand RHS = N->getOperand(2);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+
+ unsigned Opcode = 0;
+ if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) {
+ switch (CC) {
+ default: break;
+ case ISD::SETOLE: // (X <= Y) ? X : Y -> min
+ case ISD::SETULE:
+ case ISD::SETLE:
+ if (!UnsafeFPMath) break;
+ // FALL THROUGH.
+ case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min
+ case ISD::SETLT:
+ Opcode = X86ISD::FMIN;
+ break;
+
+ case ISD::SETOGT: // (X > Y) ? X : Y -> max
+ case ISD::SETUGT:
+ case ISD::SETGT:
+ if (!UnsafeFPMath) break;
+ // FALL THROUGH.
+ case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max
+ case ISD::SETGE:
+ Opcode = X86ISD::FMAX;
+ break;
+ }
+ } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) {
+ switch (CC) {
+ default: break;
+ case ISD::SETOGT: // (X > Y) ? Y : X -> min
+ case ISD::SETUGT:
+ case ISD::SETGT:
+ if (!UnsafeFPMath) break;
+ // FALL THROUGH.
+ case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min
+ case ISD::SETGE:
+ Opcode = X86ISD::FMIN;
+ break;
+
+ case ISD::SETOLE: // (X <= Y) ? Y : X -> max
+ case ISD::SETULE:
+ case ISD::SETLE:
+ if (!UnsafeFPMath) break;
+ // FALL THROUGH.
+ case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max
+ case ISD::SETLT:
+ Opcode = X86ISD::FMAX;
+ break;
+ }
+ }
+
+ if (Opcode)
+ return DAG.getNode(Opcode, N->getValueType(0), LHS, RHS);
+ }
+
+ }
+
+ return SDOperand();
+}
+
+
+SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::VECTOR_SHUFFLE:
+ return PerformShuffleCombine(N, DAG, Subtarget);
+ case ISD::SELECT:
+ return PerformSELECTCombine(N, DAG, Subtarget);
+ }
+
+ return SDOperand();
+}
+
+//===----------------------------------------------------------------------===//
+// X86 Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+X86TargetLowering::ConstraintType
+X86TargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'A':
+ case 'r':
+ case 'R':
+ case 'l':
+ case 'q':
+ case 'Q':
+ case 'x':
+ case 'Y':
+ return C_RegisterClass;
+ default:
+ break;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+/// isOperandValidForConstraint - Return the specified operand (possibly
+/// modified) if the specified SDOperand is valid for the specified target
+/// constraint letter, otherwise return null.
+SDOperand X86TargetLowering::
+isOperandValidForConstraint(SDOperand Op, char Constraint, SelectionDAG &DAG) {
+ switch (Constraint) {
+ default: break;
+ case 'I':
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (C->getValue() <= 31)
+ return DAG.getTargetConstant(C->getValue(), Op.getValueType());
+ }
+ return SDOperand(0,0);
+ case 'N':
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (C->getValue() <= 255)
+ return DAG.getTargetConstant(C->getValue(), Op.getValueType());
+ }
+ return SDOperand(0,0);
+ case 'i': {
+ // Literal immediates are always ok.
+ if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op))
+ return DAG.getTargetConstant(CST->getValue(), Op.getValueType());
+
+ // If we are in non-pic codegen mode, we allow the address of a global (with
+ // an optional displacement) to be used with 'i'.
+ GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
+ int64_t Offset = 0;
+
+ // Match either (GA) or (GA+C)
+ if (GA) {
+ Offset = GA->getOffset();
+ } else if (Op.getOpcode() == ISD::ADD) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
+ if (C && GA) {
+ Offset = GA->getOffset()+C->getValue();
+ } else {
+ C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
+ if (C && GA)
+ Offset = GA->getOffset()+C->getValue();
+ else
+ C = 0, GA = 0;
+ }
+ }
+
+ if (GA) {
+ // If addressing this global requires a load (e.g. in PIC mode), we can't
+ // match.
+ if (Subtarget->GVRequiresExtraLoad(GA->getGlobal(), getTargetMachine(),
+ false))
+ return SDOperand(0, 0);
+
+ Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0),
+ Offset);
+ return Op;
+ }
+
+ // Otherwise, not valid for this mode.
+ return SDOperand(0, 0);
+ }
+ }
+ return TargetLowering::isOperandValidForConstraint(Op, Constraint, DAG);
+}
+
+std::vector<unsigned> X86TargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT::ValueType VT) const {
+ if (Constraint.size() == 1) {
+ // FIXME: not handling fp-stack yet!
+ switch (Constraint[0]) { // GCC X86 Constraint Letters
+ default: break; // Unknown constraint letter
+ case 'A': // EAX/EDX
+ if (VT == MVT::i32 || VT == MVT::i64)
+ return make_vector<unsigned>(X86::EAX, X86::EDX, 0);
+ break;
+ case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode)
+ case 'Q': // Q_REGS
+ if (VT == MVT::i32)
+ return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0);
+ else if (VT == MVT::i16)
+ return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0);
+ else if (VT == MVT::i8)
+ return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0);
+ break;
+ }
+ }
+
+ return std::vector<unsigned>();
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT::ValueType VT) const {
+ // First, see if this is a constraint that directly corresponds to an LLVM
+ // register class.
+ if (Constraint.size() == 1) {
+ // GCC Constraint Letters
+ switch (Constraint[0]) {
+ default: break;
+ case 'r': // GENERAL_REGS
+ case 'R': // LEGACY_REGS
+ case 'l': // INDEX_REGS
+ if (VT == MVT::i64 && Subtarget->is64Bit())
+ return std::make_pair(0U, X86::GR64RegisterClass);
+ if (VT == MVT::i32)
+ return std::make_pair(0U, X86::GR32RegisterClass);
+ else if (VT == MVT::i16)
+ return std::make_pair(0U, X86::GR16RegisterClass);
+ else if (VT == MVT::i8)
+ return std::make_pair(0U, X86::GR8RegisterClass);
+ break;
+ case 'y': // MMX_REGS if MMX allowed.
+ if (!Subtarget->hasMMX()) break;
+ return std::make_pair(0U, X86::VR64RegisterClass);
+ break;
+ case 'Y': // SSE_REGS if SSE2 allowed
+ if (!Subtarget->hasSSE2()) break;
+ // FALL THROUGH.
+ case 'x': // SSE_REGS if SSE1 allowed
+ if (!Subtarget->hasSSE1()) break;
+
+ switch (VT) {
+ default: break;
+ // Scalar SSE types.
+ case MVT::f32:
+ case MVT::i32:
+ return std::make_pair(0U, X86::FR32RegisterClass);
+ case MVT::f64:
+ case MVT::i64:
+ return std::make_pair(0U, X86::FR64RegisterClass);
+ // Vector types.
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v4f32:
+ case MVT::v2f64:
+ return std::make_pair(0U, X86::VR128RegisterClass);
+ }
+ break;
+ }
+ }
+
+ // Use the default implementation in TargetLowering to convert the register
+ // constraint into a member of a register class.
+ std::pair<unsigned, const TargetRegisterClass*> Res;
+ Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+
+ // Not found as a standard register?
+ if (Res.second == 0) {
+ // GCC calls "st(0)" just plain "st".
+ if (StringsEqualNoCase("{st}", Constraint)) {
+ Res.first = X86::ST0;
+ Res.second = X86::RSTRegisterClass;
+ }
+
+ return Res;
+ }
+
+ // Otherwise, check to see if this is a register class of the wrong value
+ // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to
+ // turn into {ax},{dx}.
+ if (Res.second->hasType(VT))
+ return Res; // Correct type already, nothing to do.
+
+ // All of the single-register GCC register classes map their values onto
+ // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we
+ // really want an 8-bit or 32-bit register, map to the appropriate register
+ // class and return the appropriate register.
+ if (Res.second != X86::GR16RegisterClass)
+ return Res;
+
+ if (VT == MVT::i8) {
+ unsigned DestReg = 0;
+ switch (Res.first) {
+ default: break;
+ case X86::AX: DestReg = X86::AL; break;
+ case X86::DX: DestReg = X86::DL; break;
+ case X86::CX: DestReg = X86::CL; break;
+ case X86::BX: DestReg = X86::BL; break;
+ }
+ if (DestReg) {
+ Res.first = DestReg;
+ Res.second = Res.second = X86::GR8RegisterClass;
+ }
+ } else if (VT == MVT::i32) {
+ unsigned DestReg = 0;
+ switch (Res.first) {
+ default: break;
+ case X86::AX: DestReg = X86::EAX; break;
+ case X86::DX: DestReg = X86::EDX; break;
+ case X86::CX: DestReg = X86::ECX; break;
+ case X86::BX: DestReg = X86::EBX; break;
+ case X86::SI: DestReg = X86::ESI; break;
+ case X86::DI: DestReg = X86::EDI; break;
+ case X86::BP: DestReg = X86::EBP; break;
+ case X86::SP: DestReg = X86::ESP; break;
+ }
+ if (DestReg) {
+ Res.first = DestReg;
+ Res.second = Res.second = X86::GR32RegisterClass;
+ }
+ } else if (VT == MVT::i64) {
+ unsigned DestReg = 0;
+ switch (Res.first) {
+ default: break;
+ case X86::AX: DestReg = X86::RAX; break;
+ case X86::DX: DestReg = X86::RDX; break;
+ case X86::CX: DestReg = X86::RCX; break;
+ case X86::BX: DestReg = X86::RBX; break;
+ case X86::SI: DestReg = X86::RSI; break;
+ case X86::DI: DestReg = X86::RDI; break;
+ case X86::BP: DestReg = X86::RBP; break;
+ case X86::SP: DestReg = X86::RSP; break;
+ }
+ if (DestReg) {
+ Res.first = DestReg;
+ Res.second = Res.second = X86::GR64RegisterClass;
+ }
+ }
+
+ return Res;
+}
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
new file mode 100644
index 0000000..07a96d3
--- /dev/null
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -0,0 +1,437 @@
+//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that X86 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86ISELLOWERING_H
+#define X86ISELLOWERING_H
+
+#include "X86Subtarget.h"
+#include "X86RegisterInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+
+namespace llvm {
+ namespace X86ISD {
+ // X86 Specific DAG Nodes
+ enum NodeType {
+ // Start the numbering where the builtin ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END+X86::INSTRUCTION_LIST_END,
+
+ /// SHLD, SHRD - Double shift instructions. These correspond to
+ /// X86::SHLDxx and X86::SHRDxx instructions.
+ SHLD,
+ SHRD,
+
+ /// FAND - Bitwise logical AND of floating point values. This corresponds
+ /// to X86::ANDPS or X86::ANDPD.
+ FAND,
+
+ /// FOR - Bitwise logical OR of floating point values. This corresponds
+ /// to X86::ORPS or X86::ORPD.
+ FOR,
+
+ /// FXOR - Bitwise logical XOR of floating point values. This corresponds
+ /// to X86::XORPS or X86::XORPD.
+ FXOR,
+
+ /// FSRL - Bitwise logical right shift of floating point values. These
+ /// corresponds to X86::PSRLDQ.
+ FSRL,
+
+ /// FILD, FILD_FLAG - This instruction implements SINT_TO_FP with the
+ /// integer source in memory and FP reg result. This corresponds to the
+ /// X86::FILD*m instructions. It has three inputs (token chain, address,
+ /// and source type) and two outputs (FP value and token chain). FILD_FLAG
+ /// also produces a flag).
+ FILD,
+ FILD_FLAG,
+
+ /// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the
+ /// integer destination in memory and a FP reg source. This corresponds
+ /// to the X86::FIST*m instructions and the rounding mode change stuff. It
+ /// has two inputs (token chain and address) and two outputs (int value
+ /// and token chain).
+ FP_TO_INT16_IN_MEM,
+ FP_TO_INT32_IN_MEM,
+ FP_TO_INT64_IN_MEM,
+
+ /// FLD - This instruction implements an extending load to FP stack slots.
+ /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
+ /// operand, ptr to load from, and a ValueType node indicating the type
+ /// to load to.
+ FLD,
+
+ /// FST - This instruction implements a truncating store to FP stack
+ /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
+ /// chain operand, value to store, address, and a ValueType to store it
+ /// as.
+ FST,
+
+ /// FP_GET_RESULT - This corresponds to FpGETRESULT pseudo instruction
+ /// which copies from ST(0) to the destination. It takes a chain and
+ /// writes a RFP result and a chain.
+ FP_GET_RESULT,
+
+ /// FP_SET_RESULT - This corresponds to FpSETRESULT pseudo instruction
+ /// which copies the source operand to ST(0). It takes a chain+value and
+ /// returns a chain and a flag.
+ FP_SET_RESULT,
+
+ /// CALL/TAILCALL - These operations represent an abstract X86 call
+ /// instruction, which includes a bunch of information. In particular the
+ /// operands of these node are:
+ ///
+ /// #0 - The incoming token chain
+ /// #1 - The callee
+ /// #2 - The number of arg bytes the caller pushes on the stack.
+ /// #3 - The number of arg bytes the callee pops off the stack.
+ /// #4 - The value to pass in AL/AX/EAX (optional)
+ /// #5 - The value to pass in DL/DX/EDX (optional)
+ ///
+ /// The result values of these nodes are:
+ ///
+ /// #0 - The outgoing token chain
+ /// #1 - The first register result value (optional)
+ /// #2 - The second register result value (optional)
+ ///
+ /// The CALL vs TAILCALL distinction boils down to whether the callee is
+ /// known not to modify the caller's stack frame, as is standard with
+ /// LLVM.
+ CALL,
+ TAILCALL,
+
+ /// RDTSC_DAG - This operation implements the lowering for
+ /// readcyclecounter
+ RDTSC_DAG,
+
+ /// X86 compare and logical compare instructions.
+ CMP, TEST, COMI, UCOMI,
+
+ /// X86 SetCC. Operand 1 is condition code, and operand 2 is the flag
+ /// operand produced by a CMP instruction.
+ SETCC,
+
+ /// X86 conditional moves. Operand 1 and operand 2 are the two values
+ /// to select from (operand 1 is a R/W operand). Operand 3 is the
+ /// condition code, and operand 4 is the flag operand produced by a CMP
+ /// or TEST instruction. It also writes a flag result.
+ CMOV,
+
+ /// X86 conditional branches. Operand 1 is the chain operand, operand 2
+ /// is the block to branch if condition is true, operand 3 is the
+ /// condition code, and operand 4 is the flag operand produced by a CMP
+ /// or TEST instruction.
+ BRCOND,
+
+ /// Return with a flag operand. Operand 1 is the chain operand, operand
+ /// 2 is the number of bytes of stack to pop.
+ RET_FLAG,
+
+ /// REP_STOS - Repeat fill, corresponds to X86::REP_STOSx.
+ REP_STOS,
+
+ /// REP_MOVS - Repeat move, corresponds to X86::REP_MOVSx.
+ REP_MOVS,
+
+ /// LOAD_PACK Load a 128-bit packed float / double value. It has the same
+ /// operands as a normal load.
+ LOAD_PACK,
+
+ /// LOAD_UA Load an unaligned 128-bit value. It has the same operands as
+ /// a normal load.
+ LOAD_UA,
+
+ /// GlobalBaseReg - On Darwin, this node represents the result of the popl
+ /// at function entry, used for PIC code.
+ GlobalBaseReg,
+
+ /// Wrapper - A wrapper node for TargetConstantPool,
+ /// TargetExternalSymbol, and TargetGlobalAddress.
+ Wrapper,
+
+ /// WrapperRIP - Special wrapper used under X86-64 PIC mode for RIP
+ /// relative displacements.
+ WrapperRIP,
+
+ /// S2VEC - X86 version of SCALAR_TO_VECTOR. The destination base does not
+ /// have to match the operand type.
+ S2VEC,
+
+ /// PEXTRW - Extract a 16-bit value from a vector and zero extend it to
+ /// i32, corresponds to X86::PEXTRW.
+ PEXTRW,
+
+ /// PINSRW - Insert the lower 16-bits of a 32-bit value to a vector,
+ /// corresponds to X86::PINSRW.
+ PINSRW,
+
+ /// FMAX, FMIN - Floating point max and min.
+ ///
+ FMAX, FMIN,
+
+ /// FRSQRT, FRCP - Floating point reciprocal-sqrt and reciprocal
+ /// approximation. Note that these typically require refinement
+ /// in order to obtain suitable precision.
+ FRSQRT, FRCP,
+
+ // Thread Local Storage
+ TLSADDR, THREAD_POINTER,
+
+ // Exception Handling helpers
+ EH_RETURN
+ };
+ }
+
+ /// Define some predicates that are used for node matching.
+ namespace X86 {
+ /// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to PSHUFD.
+ bool isPSHUFDMask(SDNode *N);
+
+ /// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to PSHUFD.
+ bool isPSHUFHWMask(SDNode *N);
+
+ /// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to PSHUFD.
+ bool isPSHUFLWMask(SDNode *N);
+
+ /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to SHUFP*.
+ bool isSHUFPMask(SDNode *N);
+
+ /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to MOVHLPS.
+ bool isMOVHLPSMask(SDNode *N);
+
+ /// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
+ /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
+ /// <2, 3, 2, 3>
+ bool isMOVHLPS_v_undef_Mask(SDNode *N);
+
+ /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
+ bool isMOVLPMask(SDNode *N);
+
+ /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to MOVHP{S|D}
+ /// as well as MOVLHPS.
+ bool isMOVHPMask(SDNode *N);
+
+ /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to UNPCKL.
+ bool isUNPCKLMask(SDNode *N, bool V2IsSplat = false);
+
+ /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to UNPCKH.
+ bool isUNPCKHMask(SDNode *N, bool V2IsSplat = false);
+
+ /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
+ /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
+ /// <0, 0, 1, 1>
+ bool isUNPCKL_v_undef_Mask(SDNode *N);
+
+ /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
+ /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
+ /// <2, 2, 3, 3>
+ bool isUNPCKH_v_undef_Mask(SDNode *N);
+
+ /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to MOVSS,
+ /// MOVSD, and MOVD, i.e. setting the lowest element.
+ bool isMOVLMask(SDNode *N);
+
+ /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
+ bool isMOVSHDUPMask(SDNode *N);
+
+ /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
+ bool isMOVSLDUPMask(SDNode *N);
+
+ /// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a splat of a single element.
+ bool isSplatMask(SDNode *N);
+
+ /// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a splat of zero element.
+ bool isSplatLoMask(SDNode *N);
+
+ /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
+ /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
+ /// instructions.
+ unsigned getShuffleSHUFImmediate(SDNode *N);
+
+ /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
+ /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW
+ /// instructions.
+ unsigned getShufflePSHUFHWImmediate(SDNode *N);
+
+ /// getShufflePSHUFKWImmediate - Return the appropriate immediate to shuffle
+ /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
+ /// instructions.
+ unsigned getShufflePSHUFLWImmediate(SDNode *N);
+ }
+
+ //===--------------------------------------------------------------------===//
+ // X86TargetLowering - X86 Implementation of the TargetLowering interface
+ class X86TargetLowering : public TargetLowering {
+ int VarArgsFrameIndex; // FrameIndex for start of varargs area.
+ int RegSaveFrameIndex; // X86-64 vararg func register save area.
+ unsigned VarArgsGPOffset; // X86-64 vararg func int reg offset.
+ unsigned VarArgsFPOffset; // X86-64 vararg func fp reg offset.
+ int ReturnAddrIndex; // FrameIndex for return slot.
+ int BytesToPopOnReturn; // Number of arg bytes ret should pop.
+ int BytesCallerReserves; // Number of arg bytes caller makes.
+ public:
+ X86TargetLowering(TargetMachine &TM);
+
+ // Return the number of bytes that a function should pop when it returns (in
+ // addition to the space used by the return address).
+ //
+ unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; }
+
+ // Return the number of bytes that the caller reserves for arguments passed
+ // to this function.
+ unsigned getBytesCallerReserves() const { return BytesCallerReserves; }
+
+ /// getStackPtrReg - Return the stack pointer register we are using: either
+ /// ESP or RSP.
+ unsigned getStackPtrReg() const { return X86StackPtr; }
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ ///
+ virtual SDOperand LowerOperation(SDOperand Op, SelectionDAG &DAG);
+
+ virtual SDOperand PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ virtual MachineBasicBlock *InsertAtEndOfBasicBlock(MachineInstr *MI,
+ MachineBasicBlock *MBB);
+
+ /// getTargetNodeName - This method returns the name of a target specific
+ /// DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ /// computeMaskedBitsForTargetNode - Determine which of the bits specified
+ /// in Mask are known to be either zero or one and return them in the
+ /// KnownZero/KnownOne bitsets.
+ virtual void computeMaskedBitsForTargetNode(const SDOperand Op,
+ uint64_t Mask,
+ uint64_t &KnownZero,
+ uint64_t &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const;
+
+ SDOperand getReturnAddressFrameIndex(SelectionDAG &DAG);
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ std::vector<unsigned>
+ getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT::ValueType VT) const;
+ /// isOperandValidForConstraint - Return the specified operand (possibly
+ /// modified) if the specified SDOperand is valid for the specified target
+ /// constraint letter, otherwise return null.
+ SDOperand isOperandValidForConstraint(SDOperand Op, char ConstraintLetter,
+ SelectionDAG &DAG);
+
+ /// getRegForInlineAsmConstraint - Given a physical register constraint
+ /// (e.g. {edx}), return the register number and the register class for the
+ /// register. This should only be used for C_Register constraints. On
+ /// error, this returns a register number of 0.
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT::ValueType VT) const;
+
+ /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// by AM is legal for this target, for a load/store of the specified type.
+ virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
+
+ /// isShuffleMaskLegal - Targets can use this to indicate that they only
+ /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
+ /// By default, if a target supports the VECTOR_SHUFFLE node, all mask
+ /// values are assumed to be legal.
+ virtual bool isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const;
+
+ /// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is
+ /// used by Targets can use this to indicate if there is a suitable
+ /// VECTOR_SHUFFLE that can be used to replace a VAND with a constant
+ /// pool entry.
+ virtual bool isVectorClearMaskLegal(std::vector<SDOperand> &BVOps,
+ MVT::ValueType EVT,
+ SelectionDAG &DAG) const;
+ private:
+ /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const X86Subtarget *Subtarget;
+ const MRegisterInfo *RegInfo;
+
+ /// X86StackPtr - X86 physical register used as stack ptr.
+ unsigned X86StackPtr;
+
+ /// X86ScalarSSE - Select between SSE2 or x87 floating point ops.
+ bool X86ScalarSSE;
+
+ SDNode *LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode*TheCall,
+ unsigned CallingConv, SelectionDAG &DAG);
+
+ // C and StdCall Calling Convention implementation.
+ SDOperand LowerCCCArguments(SDOperand Op, SelectionDAG &DAG,
+ bool isStdCall = false);
+ SDOperand LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG, unsigned CC);
+
+ // X86-64 C Calling Convention implementation.
+ SDOperand LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG,unsigned CC);
+
+ // Fast and FastCall Calling Convention implementation.
+ SDOperand LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG, unsigned CC);
+
+ SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerConstantPool(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerShift(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerFABS(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerFNEG(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerSETCC(SDOperand Op, SelectionDAG &DAG, SDOperand Chain);
+ SDOperand LowerSELECT(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerBRCOND(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerMEMSET(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerMEMCPY(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerJumpTable(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerDYNAMIC_STACKALLOC(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerVASTART(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerVACOPY(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerFRAME_TO_ARGS_OFFSET(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerEH_RETURN(SDOperand Op, SelectionDAG &DAG);
+ };
+}
+
+#endif // X86ISELLOWERING_H
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
new file mode 100644
index 0000000..c0fa58d
--- /dev/null
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -0,0 +1,125 @@
+//===-- X86InstrBuilder.h - Functions to aid building x86 insts -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to handle X86'isms in a clean way.
+//
+// The BuildMem function may be used with the BuildMI function to add entire
+// memory references in a single, typed, function call. X86 memory references
+// can be very complex expressions (described in the README), so wrapping them
+// up behind an easier to use interface makes sense. Descriptions of the
+// functions are included below.
+//
+// For reference, the order of operands for memory references is:
+// (Operand), Base, Scale, Index, Displacement.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86INSTRBUILDER_H
+#define X86INSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+namespace llvm {
+
+/// X86AddressMode - This struct holds a generalized full x86 address mode.
+/// The base register can be a frame index, which will eventually be replaced
+/// with BP or SP and Disp being offsetted accordingly. The displacement may
+/// also include the offset of a global value.
+struct X86AddressMode {
+ enum {
+ RegBase,
+ FrameIndexBase
+ } BaseType;
+
+ union {
+ unsigned Reg;
+ int FrameIndex;
+ } Base;
+
+ unsigned Scale;
+ unsigned IndexReg;
+ unsigned Disp;
+ GlobalValue *GV;
+
+ X86AddressMode() : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0) {
+ Base.Reg = 0;
+ }
+};
+
+/// addDirectMem - This function is used to add a direct memory reference to the
+/// current instruction -- that is, a dereference of an address in a register,
+/// with no scale, index or displacement. An example is: DWORD PTR [EAX].
+///
+inline const MachineInstrBuilder &addDirectMem(const MachineInstrBuilder &MIB,
+ unsigned Reg) {
+ // Because memory references are always represented with four
+ // values, this adds: Reg, [1, NoReg, 0] to the instruction.
+ return MIB.addReg(Reg).addImm(1).addReg(0).addImm(0);
+}
+
+
+/// addRegOffset - This function is used to add a memory reference of the form
+/// [Reg + Offset], i.e., one with no scale or index, but with a
+/// displacement. An example is: DWORD PTR [EAX + 4].
+///
+inline const MachineInstrBuilder &addRegOffset(const MachineInstrBuilder &MIB,
+ unsigned Reg, int Offset) {
+ return MIB.addReg(Reg).addImm(1).addReg(0).addImm(Offset);
+}
+
+/// addRegReg - This function is used to add a memory reference of the form:
+/// [Reg + Reg].
+inline const MachineInstrBuilder &addRegReg(const MachineInstrBuilder &MIB,
+ unsigned Reg1, unsigned Reg2) {
+ return MIB.addReg(Reg1).addImm(1).addReg(Reg2).addImm(0);
+}
+
+inline const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB,
+ const X86AddressMode &AM) {
+ assert (AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8);
+
+ if (AM.BaseType == X86AddressMode::RegBase)
+ MIB.addReg(AM.Base.Reg);
+ else if (AM.BaseType == X86AddressMode::FrameIndexBase)
+ MIB.addFrameIndex(AM.Base.FrameIndex);
+ else
+ assert (0);
+ MIB.addImm(AM.Scale).addReg(AM.IndexReg);
+ if (AM.GV)
+ return MIB.addGlobalAddress(AM.GV, AM.Disp);
+ else
+ return MIB.addImm(AM.Disp);
+}
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function. This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+inline const MachineInstrBuilder &
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
+ return MIB.addFrameIndex(FI).addImm(1).addReg(0).addImm(Offset);
+}
+
+/// addConstantPoolReference - This function is used to add a reference to the
+/// base of a constant value spilled to the per-function constant pool. The
+/// reference has base register ConstantPoolIndex offset which is retained until
+/// either machine code emission or assembly output. This allows an optional
+/// offset to be added as well.
+///
+inline const MachineInstrBuilder &
+addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI,
+ int Offset = 0) {
+ return MIB.addConstantPoolIndex(CPI).addImm(1).addReg(0).addImm(Offset);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
new file mode 100644
index 0000000..11aeb07
--- /dev/null
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -0,0 +1,456 @@
+//==- X86InstrFPStack.td - Describe the X86 Instruction Set -------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the Evan Cheng and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 x87 FPU instruction set, defining the
+// instructions, and properties of the instructions which are needed for code
+// generation, machine code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FPStack specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDTX86FpGet : SDTypeProfile<1, 0, [SDTCisFP<0>]>;
+def SDTX86FpSet : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
+def SDTX86Fld : SDTypeProfile<1, 2, [SDTCisFP<0>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, OtherVT>]>;
+def SDTX86Fst : SDTypeProfile<0, 3, [SDTCisFP<0>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, OtherVT>]>;
+def SDTX86Fild : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisPtrTy<1>,
+ SDTCisVT<2, OtherVT>]>;
+def SDTX86FpToIMem : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
+
+def X86fpget : SDNode<"X86ISD::FP_GET_RESULT", SDTX86FpGet,
+ [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+def X86fpset : SDNode<"X86ISD::FP_SET_RESULT", SDTX86FpSet,
+ [SDNPHasChain, SDNPOutFlag]>;
+def X86fld : SDNode<"X86ISD::FLD", SDTX86Fld,
+ [SDNPHasChain]>;
+def X86fst : SDNode<"X86ISD::FST", SDTX86Fst,
+ [SDNPHasChain, SDNPInFlag]>;
+def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild,
+ [SDNPHasChain]>;
+def X86fildflag : SDNode<"X86ISD::FILD_FLAG",SDTX86Fild,
+ [SDNPHasChain, SDNPOutFlag]>;
+def X86fp_to_i16mem : SDNode<"X86ISD::FP_TO_INT16_IN_MEM", SDTX86FpToIMem,
+ [SDNPHasChain]>;
+def X86fp_to_i32mem : SDNode<"X86ISD::FP_TO_INT32_IN_MEM", SDTX86FpToIMem,
+ [SDNPHasChain]>;
+def X86fp_to_i64mem : SDNode<"X86ISD::FP_TO_INT64_IN_MEM", SDTX86FpToIMem,
+ [SDNPHasChain]>;
+
+//===----------------------------------------------------------------------===//
+// FPStack pattern fragments
+//===----------------------------------------------------------------------===//
+
+def fpimm0 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(+0.0);
+}]>;
+
+def fpimmneg0 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(-0.0);
+}]>;
+
+def fpimm1 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(+1.0);
+}]>;
+
+def fpimmneg1 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(-1.0);
+}]>;
+
+// Some 'special' instructions
+let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
+ def FP32_TO_INT16_IN_MEM : I<0, Pseudo,
+ (ops i16mem:$dst, RFP32:$src),
+ "#FP32_TO_INT16_IN_MEM PSEUDO!",
+ [(X86fp_to_i16mem RFP32:$src, addr:$dst)]>;
+ def FP32_TO_INT32_IN_MEM : I<0, Pseudo,
+ (ops i32mem:$dst, RFP32:$src),
+ "#FP32_TO_INT32_IN_MEM PSEUDO!",
+ [(X86fp_to_i32mem RFP32:$src, addr:$dst)]>;
+ def FP32_TO_INT64_IN_MEM : I<0, Pseudo,
+ (ops i64mem:$dst, RFP32:$src),
+ "#FP32_TO_INT64_IN_MEM PSEUDO!",
+ [(X86fp_to_i64mem RFP32:$src, addr:$dst)]>;
+ def FP64_TO_INT16_IN_MEM : I<0, Pseudo,
+ (ops i16mem:$dst, RFP64:$src),
+ "#FP64_TO_INT16_IN_MEM PSEUDO!",
+ [(X86fp_to_i16mem RFP64:$src, addr:$dst)]>;
+ def FP64_TO_INT32_IN_MEM : I<0, Pseudo,
+ (ops i32mem:$dst, RFP64:$src),
+ "#FP64_TO_INT32_IN_MEM PSEUDO!",
+ [(X86fp_to_i32mem RFP64:$src, addr:$dst)]>;
+ def FP64_TO_INT64_IN_MEM : I<0, Pseudo,
+ (ops i64mem:$dst, RFP64:$src),
+ "#FP64_TO_INT64_IN_MEM PSEUDO!",
+ [(X86fp_to_i64mem RFP64:$src, addr:$dst)]>;
+}
+
+let isTerminator = 1 in
+ let Defs = [FP0, FP1, FP2, FP3, FP4, FP5, FP6] in
+ def FP_REG_KILL : I<0, Pseudo, (ops), "#FP_REG_KILL", []>;
+
+// All FP Stack operations are represented with three instructions here. The
+// first two instructions, generated by the instruction selector, uses "RFP32"
+// or "RFP64" registers: traditional register files to reference 32-bit or
+// 64-bit floating point values. These sizes apply to the values, not the
+// registers, which are always 64 bits; RFP32 and RFP64 can be copied to
+// each other without losing information. These instructions are all psuedo
+// instructions and use the "_Fp" suffix.
+// In some cases there are additional variants with a mixture of 32-bit and
+// 64-bit registers.
+// The second instruction is defined with FPI, which is the actual instruction
+// emitted by the assembler. These use "RST" registers, although frequently
+// the actual register(s) used are implicit. These are always 64-bits.
+// The FP stackifier pass converts one to the other after register allocation
+// occurs.
+//
+// Note that the FpI instruction should have instruction selection info (e.g.
+// a pattern) and the FPI instruction should have emission info (e.g. opcode
+// encoding and asm printing info).
+
+// FPI - Floating Point Instruction template.
+class FPI<bits<8> o, Format F, dag ops, string asm> : I<o, F, ops, asm, []> {}
+
+// FpI_ - Floating Point Psuedo Instruction template. Not Predicated.
+class FpI_<dag ops, FPFormat fp, list<dag> pattern>
+ : X86Inst<0, Pseudo, NoImm, ops, ""> {
+ let FPForm = fp; let FPFormBits = FPForm.Value;
+ let Pattern = pattern;
+}
+
+// Random Pseudo Instructions.
+def FpGETRESULT32 : FpI_<(ops RFP32:$dst), SpecialFP,
+ [(set RFP32:$dst, X86fpget)]>; // FPR = ST(0)
+
+def FpGETRESULT64 : FpI_<(ops RFP64:$dst), SpecialFP,
+ [(set RFP64:$dst, X86fpget)]>; // FPR = ST(0)
+
+let noResults = 1 in {
+ def FpSETRESULT32 : FpI_<(ops RFP32:$src), SpecialFP,
+ [(X86fpset RFP32:$src)]>, Imp<[], [ST0]>;// ST(0) = FPR
+
+ def FpSETRESULT64 : FpI_<(ops RFP64:$src), SpecialFP,
+ [(X86fpset RFP64:$src)]>, Imp<[], [ST0]>;// ST(0) = FPR
+}
+// FpI - Floating Point Psuedo Instruction template. Predicated on FPStack.
+class FpI<dag ops, FPFormat fp, list<dag> pattern> :
+ FpI_<ops, fp, pattern>, Requires<[FPStack]>;
+
+// Register copies. Just copies, the 64->32 version does not truncate.
+def MOV_Fp3232 : FpI<(ops RFP32:$dst, RFP32:$src), SpecialFP, []>;
+def MOV_Fp3264 : FpI<(ops RFP64:$dst, RFP32:$src), SpecialFP, []>;
+def MOV_Fp6432 : FpI<(ops RFP32:$dst, RFP64:$src), SpecialFP, []>;
+def MOV_Fp6464 : FpI<(ops RFP64:$dst, RFP64:$src), SpecialFP, []>;
+
+// Factoring for arithmetic.
+multiclass FPBinary_rr<SDNode OpNode> {
+// Register op register -> register
+// These are separated out because they have no reversed form.
+def _Fp32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), TwoArgFP,
+ [(set RFP32:$dst, (OpNode RFP32:$src1, RFP32:$src2))]>;
+def _Fp64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), TwoArgFP,
+ [(set RFP64:$dst, (OpNode RFP64:$src1, RFP64:$src2))]>;
+}
+// The FopST0 series are not included here because of the irregularities
+// in where the 'r' goes in assembly output.
+multiclass FPBinary<SDNode OpNode, Format fp, string asmstring> {
+// ST(0) = ST(0) + [mem]
+def _Fp32m : FpI<(ops RFP32:$dst, RFP32:$src1, f32mem:$src2), OneArgFPRW,
+ [(set RFP32:$dst,
+ (OpNode RFP32:$src1, (loadf32 addr:$src2)))]>;
+def _Fp64m : FpI<(ops RFP64:$dst, RFP64:$src1, f64mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst,
+ (OpNode RFP64:$src1, (loadf64 addr:$src2)))]>;
+def _Fp64m32: FpI<(ops RFP64:$dst, RFP64:$src1, f32mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst,
+ (OpNode RFP64:$src1, (extloadf32 addr:$src2)))]>;
+def _F32m : FPI<0xD8, fp, (ops f32mem:$src),
+ !strconcat("f", !strconcat(asmstring, "{s} $src"))>;
+def _F64m : FPI<0xDC, fp, (ops f64mem:$src),
+ !strconcat("f", !strconcat(asmstring, "{l} $src"))>;
+// ST(0) = ST(0) + [memint]
+def _FpI16m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i16mem:$src2), OneArgFPRW,
+ [(set RFP32:$dst, (OpNode RFP32:$src1,
+ (X86fild addr:$src2, i16)))]>;
+def _FpI32m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i32mem:$src2), OneArgFPRW,
+ [(set RFP32:$dst, (OpNode RFP32:$src1,
+ (X86fild addr:$src2, i32)))]>;
+def _FpI16m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i16mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst, (OpNode RFP64:$src1,
+ (X86fild addr:$src2, i16)))]>;
+def _FpI32m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i32mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst, (OpNode RFP64:$src1,
+ (X86fild addr:$src2, i32)))]>;
+def _FI16m : FPI<0xDE, fp, (ops i16mem:$src),
+ !strconcat("fi", !strconcat(asmstring, "{s} $src"))>;
+def _FI32m : FPI<0xDA, fp, (ops i32mem:$src),
+ !strconcat("fi", !strconcat(asmstring, "{l} $src"))>;
+}
+
+defm ADD : FPBinary_rr<fadd>;
+defm SUB : FPBinary_rr<fsub>;
+defm MUL : FPBinary_rr<fmul>;
+defm DIV : FPBinary_rr<fdiv>;
+defm ADD : FPBinary<fadd, MRM0m, "add">;
+defm SUB : FPBinary<fsub, MRM4m, "sub">;
+defm SUBR: FPBinary<fsub ,MRM5m, "subr">;
+defm MUL : FPBinary<fmul, MRM1m, "mul">;
+defm DIV : FPBinary<fdiv, MRM6m, "div">;
+defm DIVR: FPBinary<fdiv, MRM7m, "divr">;
+
+class FPST0rInst<bits<8> o, string asm>
+ : FPI<o, AddRegFrm, (ops RST:$op), asm>, D8;
+class FPrST0Inst<bits<8> o, string asm>
+ : FPI<o, AddRegFrm, (ops RST:$op), asm>, DC;
+class FPrST0PInst<bits<8> o, string asm>
+ : FPI<o, AddRegFrm, (ops RST:$op), asm>, DE;
+
+// NOTE: GAS and apparently all other AT&T style assemblers have a broken notion
+// of some of the 'reverse' forms of the fsub and fdiv instructions. As such,
+// we have to put some 'r's in and take them out of weird places.
+def ADD_FST0r : FPST0rInst <0xC0, "fadd $op">;
+def ADD_FrST0 : FPrST0Inst <0xC0, "fadd {%st(0), $op|$op, %ST(0)}">;
+def ADD_FPrST0 : FPrST0PInst<0xC0, "faddp $op">;
+def SUBR_FST0r : FPST0rInst <0xE8, "fsubr $op">;
+def SUB_FrST0 : FPrST0Inst <0xE8, "fsub{r} {%st(0), $op|$op, %ST(0)}">;
+def SUB_FPrST0 : FPrST0PInst<0xE8, "fsub{r}p $op">;
+def SUB_FST0r : FPST0rInst <0xE0, "fsub $op">;
+def SUBR_FrST0 : FPrST0Inst <0xE0, "fsub{|r} {%st(0), $op|$op, %ST(0)}">;
+def SUBR_FPrST0 : FPrST0PInst<0xE0, "fsub{|r}p $op">;
+def MUL_FST0r : FPST0rInst <0xC8, "fmul $op">;
+def MUL_FrST0 : FPrST0Inst <0xC8, "fmul {%st(0), $op|$op, %ST(0)}">;
+def MUL_FPrST0 : FPrST0PInst<0xC8, "fmulp $op">;
+def DIVR_FST0r : FPST0rInst <0xF8, "fdivr $op">;
+def DIV_FrST0 : FPrST0Inst <0xF8, "fdiv{r} {%st(0), $op|$op, %ST(0)}">;
+def DIV_FPrST0 : FPrST0PInst<0xF8, "fdiv{r}p $op">;
+def DIV_FST0r : FPST0rInst <0xF0, "fdiv $op">;
+def DIVR_FrST0 : FPrST0Inst <0xF0, "fdiv{|r} {%st(0), $op|$op, %ST(0)}">;
+def DIVR_FPrST0 : FPrST0PInst<0xF0, "fdiv{|r}p $op">;
+
+// Unary operations.
+multiclass FPUnary<SDNode OpNode, bits<8> opcode, string asmstring> {
+def _Fp32 : FpI<(ops RFP32:$dst, RFP32:$src), OneArgFPRW,
+ [(set RFP32:$dst, (OpNode RFP32:$src))]>;
+def _Fp64 : FpI<(ops RFP64:$dst, RFP64:$src), OneArgFPRW,
+ [(set RFP64:$dst, (OpNode RFP64:$src))]>;
+def _F : FPI<opcode, RawFrm, (ops), asmstring>, D9;
+}
+
+defm CHS : FPUnary<fneg, 0xE0, "fchs">;
+defm ABS : FPUnary<fabs, 0xE1, "fabs">;
+defm SQRT: FPUnary<fsqrt,0xFA, "fsqrt">;
+defm SIN : FPUnary<fsin, 0xFE, "fsin">;
+defm COS : FPUnary<fcos, 0xFF, "fcos">;
+
+def TST_Fp32 : FpI<(ops RFP32:$src), OneArgFP,
+ []>;
+def TST_Fp64 : FpI<(ops RFP64:$src), OneArgFP,
+ []>;
+def TST_F : FPI<0xE4, RawFrm, (ops), "ftst">, D9;
+
+// Floating point cmovs.
+multiclass FPCMov<PatLeaf cc> {
+ def _Fp32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), CondMovFP,
+ [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
+ cc))]>;
+ def _Fp64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), CondMovFP,
+ [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
+ cc))]>;
+}
+let isTwoAddress = 1 in {
+defm CMOVB : FPCMov<X86_COND_B>;
+defm CMOVBE : FPCMov<X86_COND_BE>;
+defm CMOVE : FPCMov<X86_COND_E>;
+defm CMOVP : FPCMov<X86_COND_P>;
+defm CMOVNB : FPCMov<X86_COND_AE>;
+defm CMOVNBE: FPCMov<X86_COND_A>;
+defm CMOVNE : FPCMov<X86_COND_NE>;
+defm CMOVNP : FPCMov<X86_COND_NP>;
+}
+
+// These are not factored because there's no clean way to pass DA/DB.
+def CMOVB_F : FPI<0xC0, AddRegFrm, (ops RST:$op),
+ "fcmovb {$op, %st(0)|%ST(0), $op}">, DA;
+def CMOVBE_F : FPI<0xD0, AddRegFrm, (ops RST:$op),
+ "fcmovbe {$op, %st(0)|%ST(0), $op}">, DA;
+def CMOVE_F : FPI<0xC8, AddRegFrm, (ops RST:$op),
+ "fcmove {$op, %st(0)|%ST(0), $op}">, DA;
+def CMOVP_F : FPI<0xD8, AddRegFrm, (ops RST:$op),
+ "fcmovu {$op, %st(0)|%ST(0), $op}">, DA;
+def CMOVNB_F : FPI<0xC0, AddRegFrm, (ops RST:$op),
+ "fcmovnb {$op, %st(0)|%ST(0), $op}">, DB;
+def CMOVNBE_F: FPI<0xD0, AddRegFrm, (ops RST:$op),
+ "fcmovnbe {$op, %st(0)|%ST(0), $op}">, DB;
+def CMOVNE_F : FPI<0xC8, AddRegFrm, (ops RST:$op),
+ "fcmovne {$op, %st(0)|%ST(0), $op}">, DB;
+def CMOVNP_F : FPI<0xD8, AddRegFrm, (ops RST:$op),
+ "fcmovnu {$op, %st(0)|%ST(0), $op}">, DB;
+
+// Floating point loads & stores.
+def LD_Fp32m : FpI<(ops RFP32:$dst, f32mem:$src), ZeroArgFP,
+ [(set RFP32:$dst, (loadf32 addr:$src))]>;
+def LD_Fp64m : FpI<(ops RFP64:$dst, f64mem:$src), ZeroArgFP,
+ [(set RFP64:$dst, (loadf64 addr:$src))]>;
+def ILD_Fp16m32: FpI<(ops RFP32:$dst, i16mem:$src), ZeroArgFP,
+ [(set RFP32:$dst, (X86fild addr:$src, i16))]>;
+def ILD_Fp32m32: FpI<(ops RFP32:$dst, i32mem:$src), ZeroArgFP,
+ [(set RFP32:$dst, (X86fild addr:$src, i32))]>;
+def ILD_Fp64m32: FpI<(ops RFP32:$dst, i64mem:$src), ZeroArgFP,
+ [(set RFP32:$dst, (X86fild addr:$src, i64))]>;
+def ILD_Fp16m64: FpI<(ops RFP64:$dst, i16mem:$src), ZeroArgFP,
+ [(set RFP64:$dst, (X86fild addr:$src, i16))]>;
+def ILD_Fp32m64: FpI<(ops RFP64:$dst, i32mem:$src), ZeroArgFP,
+ [(set RFP64:$dst, (X86fild addr:$src, i32))]>;
+def ILD_Fp64m64: FpI<(ops RFP64:$dst, i64mem:$src), ZeroArgFP,
+ [(set RFP64:$dst, (X86fild addr:$src, i64))]>;
+
+def ST_Fp32m : FpI<(ops f32mem:$op, RFP32:$src), OneArgFP,
+ [(store RFP32:$src, addr:$op)]>;
+def ST_Fp64m32 : FpI<(ops f32mem:$op, RFP64:$src), OneArgFP,
+ [(truncstoref32 RFP64:$src, addr:$op)]>;
+def ST_Fp64m : FpI<(ops f64mem:$op, RFP64:$src), OneArgFP,
+ [(store RFP64:$src, addr:$op)]>;
+
+def ST_FpP32m : FpI<(ops f32mem:$op, RFP32:$src), OneArgFP, []>;
+def ST_FpP64m32 : FpI<(ops f32mem:$op, RFP64:$src), OneArgFP, []>;
+def ST_FpP64m : FpI<(ops f64mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp16m32 : FpI<(ops i16mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp32m32 : FpI<(ops i32mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp64m32 : FpI<(ops i64mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp16m64 : FpI<(ops i16mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp32m64 : FpI<(ops i32mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp64m64 : FpI<(ops i64mem:$op, RFP64:$src), OneArgFP, []>;
+
+def LD_F32m : FPI<0xD9, MRM0m, (ops f32mem:$src), "fld{s} $src">;
+def LD_F64m : FPI<0xDD, MRM0m, (ops f64mem:$src), "fld{l} $src">;
+def ILD_F16m : FPI<0xDF, MRM0m, (ops i16mem:$src), "fild{s} $src">;
+def ILD_F32m : FPI<0xDB, MRM0m, (ops i32mem:$src), "fild{l} $src">;
+def ILD_F64m : FPI<0xDF, MRM5m, (ops i64mem:$src), "fild{ll} $src">;
+def ST_F32m : FPI<0xD9, MRM2m, (ops f32mem:$dst), "fst{s} $dst">;
+def ST_F64m : FPI<0xDD, MRM2m, (ops f64mem:$dst), "fst{l} $dst">;
+def ST_FP32m : FPI<0xD9, MRM3m, (ops f32mem:$dst), "fstp{s} $dst">;
+def ST_FP64m : FPI<0xDD, MRM3m, (ops f64mem:$dst), "fstp{l} $dst">;
+def IST_F16m : FPI<0xDF, MRM2m, (ops i16mem:$dst), "fist{s} $dst">;
+def IST_F32m : FPI<0xDB, MRM2m, (ops i32mem:$dst), "fist{l} $dst">;
+def IST_FP16m : FPI<0xDF, MRM3m, (ops i16mem:$dst), "fistp{s} $dst">;
+def IST_FP32m : FPI<0xDB, MRM3m, (ops i32mem:$dst), "fistp{l} $dst">;
+def IST_FP64m : FPI<0xDF, MRM7m, (ops i64mem:$dst), "fistp{ll} $dst">;
+
+// FISTTP requires SSE3 even though it's a FPStack op.
+def ISTT_Fp16m32 : FpI_<(ops i16mem:$op, RFP32:$src), OneArgFP,
+ [(X86fp_to_i16mem RFP32:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp32m32 : FpI_<(ops i32mem:$op, RFP32:$src), OneArgFP,
+ [(X86fp_to_i32mem RFP32:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp64m32 : FpI_<(ops i64mem:$op, RFP32:$src), OneArgFP,
+ [(X86fp_to_i64mem RFP32:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp16m64 : FpI_<(ops i16mem:$op, RFP64:$src), OneArgFP,
+ [(X86fp_to_i16mem RFP64:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp32m64 : FpI_<(ops i32mem:$op, RFP64:$src), OneArgFP,
+ [(X86fp_to_i32mem RFP64:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp64m64 : FpI_<(ops i64mem:$op, RFP64:$src), OneArgFP,
+ [(X86fp_to_i64mem RFP64:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+
+def ISTT_FP16m : FPI<0xDF, MRM1m, (ops i16mem:$dst), "fisttp{s} $dst">;
+def ISTT_FP32m : FPI<0xDB, MRM1m, (ops i32mem:$dst), "fisttp{l} $dst">;
+def ISTT_FP64m : FPI<0xDD, MRM1m, (ops i64mem:$dst), "fisttp{ll} $dst">;
+
+// FP Stack manipulation instructions.
+def LD_Frr : FPI<0xC0, AddRegFrm, (ops RST:$op), "fld $op">, D9;
+def ST_Frr : FPI<0xD0, AddRegFrm, (ops RST:$op), "fst $op">, DD;
+def ST_FPrr : FPI<0xD8, AddRegFrm, (ops RST:$op), "fstp $op">, DD;
+def XCH_F : FPI<0xC8, AddRegFrm, (ops RST:$op), "fxch $op">, D9;
+
+// Floating point constant loads.
+let isReMaterializable = 1 in {
+def LD_Fp032 : FpI<(ops RFP32:$dst), ZeroArgFP,
+ [(set RFP32:$dst, fpimm0)]>;
+def LD_Fp132 : FpI<(ops RFP32:$dst), ZeroArgFP,
+ [(set RFP32:$dst, fpimm1)]>;
+def LD_Fp064 : FpI<(ops RFP64:$dst), ZeroArgFP,
+ [(set RFP64:$dst, fpimm0)]>;
+def LD_Fp164 : FpI<(ops RFP64:$dst), ZeroArgFP,
+ [(set RFP64:$dst, fpimm1)]>;
+}
+
+def LD_F0 : FPI<0xEE, RawFrm, (ops), "fldz">, D9;
+def LD_F1 : FPI<0xE8, RawFrm, (ops), "fld1">, D9;
+
+
+// Floating point compares.
+def UCOM_Fpr32 : FpI<(ops RFP32:$lhs, RFP32:$rhs), CompareFP,
+ []>; // FPSW = cmp ST(0) with ST(i)
+def UCOM_FpIr32: FpI<(ops RFP32:$lhs, RFP32:$rhs), CompareFP,
+ [(X86cmp RFP32:$lhs, RFP32:$rhs)]>; // CC = ST(0) cmp ST(i)
+def UCOM_Fpr64 : FpI<(ops RFP64:$lhs, RFP64:$rhs), CompareFP,
+ []>; // FPSW = cmp ST(0) with ST(i)
+def UCOM_FpIr64: FpI<(ops RFP64:$lhs, RFP64:$rhs), CompareFP,
+ [(X86cmp RFP64:$lhs, RFP64:$rhs)]>; // CC = ST(0) cmp ST(i)
+
+def UCOM_Fr : FPI<0xE0, AddRegFrm, // FPSW = cmp ST(0) with ST(i)
+ (ops RST:$reg),
+ "fucom $reg">, DD, Imp<[ST0],[]>;
+def UCOM_FPr : FPI<0xE8, AddRegFrm, // FPSW = cmp ST(0) with ST(i), pop
+ (ops RST:$reg),
+ "fucomp $reg">, DD, Imp<[ST0],[]>;
+def UCOM_FPPr : FPI<0xE9, RawFrm, // cmp ST(0) with ST(1), pop, pop
+ (ops),
+ "fucompp">, DA, Imp<[ST0],[]>;
+
+def UCOM_FIr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i)
+ (ops RST:$reg),
+ "fucomi {$reg, %st(0)|%ST(0), $reg}">, DB, Imp<[ST0],[]>;
+def UCOM_FIPr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i), pop
+ (ops RST:$reg),
+ "fucomip {$reg, %st(0)|%ST(0), $reg}">, DF, Imp<[ST0],[]>;
+
+// Floating point flag ops.
+def FNSTSW8r : I<0xE0, RawFrm, // AX = fp flags
+ (ops), "fnstsw", []>, DF, Imp<[],[AX]>;
+
+def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world
+ (ops i16mem:$dst), "fnstcw $dst", []>;
+def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16]
+ (ops i16mem:$dst), "fldcw $dst", []>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// Required for RET of f32 / f64 values.
+def : Pat<(X86fld addr:$src, f32), (LD_Fp32m addr:$src)>;
+def : Pat<(X86fld addr:$src, f64), (LD_Fp64m addr:$src)>;
+
+// Required for CALL which return f32 / f64 values.
+def : Pat<(X86fst RFP32:$src, addr:$op, f32), (ST_Fp32m addr:$op, RFP32:$src)>;
+def : Pat<(X86fst RFP64:$src, addr:$op, f32), (ST_Fp64m32 addr:$op, RFP64:$src)>;
+def : Pat<(X86fst RFP64:$src, addr:$op, f64), (ST_Fp64m addr:$op, RFP64:$src)>;
+
+// Floating point constant -0.0 and -1.0
+def : Pat<(f32 fpimmneg0), (CHS_Fp32 (LD_Fp032))>, Requires<[FPStack]>;
+def : Pat<(f32 fpimmneg1), (CHS_Fp32 (LD_Fp132))>, Requires<[FPStack]>;
+def : Pat<(f64 fpimmneg0), (CHS_Fp64 (LD_Fp064))>, Requires<[FPStack]>;
+def : Pat<(f64 fpimmneg1), (CHS_Fp64 (LD_Fp164))>, Requires<[FPStack]>;
+
+// Used to conv. i64 to f64 since there isn't a SSE version.
+def : Pat<(X86fildflag addr:$src, i64), (ILD_Fp64m64 addr:$src)>;
+
+def : Pat<(extloadf32 addr:$src),
+ (MOV_Fp3264 (LD_Fp32m addr:$src))>, Requires<[FPStack]>;
+def : Pat<(fextend RFP32:$src), (MOV_Fp3264 RFP32:$src)>, Requires<[FPStack]>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
new file mode 100644
index 0000000..06b14fe
--- /dev/null
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -0,0 +1,567 @@
+//===- X86InstrInfo.cpp - X86 Instruction Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86InstrInfo.h"
+#include "X86.h"
+#include "X86GenInstrInfo.inc"
+#include "X86InstrBuilder.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/LiveVariables.h"
+using namespace llvm;
+
+X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
+ : TargetInstrInfo(X86Insts, sizeof(X86Insts)/sizeof(X86Insts[0])),
+ TM(tm), RI(tm, *this) {
+}
+
+bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
+ unsigned& sourceReg,
+ unsigned& destReg) const {
+ MachineOpCode oc = MI.getOpcode();
+ if (oc == X86::MOV8rr || oc == X86::MOV16rr ||
+ oc == X86::MOV32rr || oc == X86::MOV64rr ||
+ oc == X86::MOV16to16_ || oc == X86::MOV32to32_ ||
+ oc == X86::MOV_Fp3232 || oc == X86::MOVSSrr || oc == X86::MOVSDrr ||
+ oc == X86::MOV_Fp3264 || oc == X86::MOV_Fp6432 || oc == X86::MOV_Fp6464 ||
+ oc == X86::FsMOVAPSrr || oc == X86::FsMOVAPDrr ||
+ oc == X86::MOVAPSrr || oc == X86::MOVAPDrr ||
+ oc == X86::MOVSS2PSrr || oc == X86::MOVSD2PDrr ||
+ oc == X86::MOVPS2SSrr || oc == X86::MOVPD2SDrr ||
+ oc == X86::MMX_MOVD64rr || oc == X86::MMX_MOVQ64rr) {
+ assert(MI.getNumOperands() >= 2 &&
+ MI.getOperand(0).isRegister() &&
+ MI.getOperand(1).isRegister() &&
+ "invalid register-register move instruction");
+ sourceReg = MI.getOperand(1).getReg();
+ destReg = MI.getOperand(0).getReg();
+ return true;
+ }
+ return false;
+}
+
+unsigned X86InstrInfo::isLoadFromStackSlot(MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case X86::MOV8rm:
+ case X86::MOV16rm:
+ case X86::MOV16_rm:
+ case X86::MOV32rm:
+ case X86::MOV32_rm:
+ case X86::MOV64rm:
+ case X86::LD_Fp64m:
+ case X86::MOVSSrm:
+ case X86::MOVSDrm:
+ case X86::MOVAPSrm:
+ case X86::MOVAPDrm:
+ case X86::MMX_MOVD64rm:
+ case X86::MMX_MOVQ64rm:
+ if (MI->getOperand(1).isFrameIndex() && MI->getOperand(2).isImmediate() &&
+ MI->getOperand(3).isRegister() && MI->getOperand(4).isImmediate() &&
+ MI->getOperand(2).getImmedValue() == 1 &&
+ MI->getOperand(3).getReg() == 0 &&
+ MI->getOperand(4).getImmedValue() == 0) {
+ FrameIndex = MI->getOperand(1).getFrameIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+unsigned X86InstrInfo::isStoreToStackSlot(MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case X86::MOV8mr:
+ case X86::MOV16mr:
+ case X86::MOV16_mr:
+ case X86::MOV32mr:
+ case X86::MOV32_mr:
+ case X86::MOV64mr:
+ case X86::ST_FpP64m:
+ case X86::MOVSSmr:
+ case X86::MOVSDmr:
+ case X86::MOVAPSmr:
+ case X86::MOVAPDmr:
+ case X86::MMX_MOVD64mr:
+ case X86::MMX_MOVQ64mr:
+ case X86::MMX_MOVNTQmr:
+ if (MI->getOperand(0).isFrameIndex() && MI->getOperand(1).isImmediate() &&
+ MI->getOperand(2).isRegister() && MI->getOperand(3).isImmediate() &&
+ MI->getOperand(1).getImmedValue() == 1 &&
+ MI->getOperand(2).getReg() == 0 &&
+ MI->getOperand(3).getImmedValue() == 0) {
+ FrameIndex = MI->getOperand(0).getFrameIndex();
+ return MI->getOperand(4).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+
+bool X86InstrInfo::isReallyTriviallyReMaterializable(MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case X86::MOV8rm:
+ case X86::MOV16rm:
+ case X86::MOV16_rm:
+ case X86::MOV32rm:
+ case X86::MOV32_rm:
+ case X86::MOV64rm:
+ case X86::LD_Fp64m:
+ case X86::MOVSSrm:
+ case X86::MOVSDrm:
+ case X86::MOVAPSrm:
+ case X86::MOVAPDrm:
+ case X86::MMX_MOVD64rm:
+ case X86::MMX_MOVQ64rm:
+ // Loads from constant pools are trivially rematerializable.
+ return MI->getOperand(1).isRegister() && MI->getOperand(2).isImmediate() &&
+ MI->getOperand(3).isRegister() && MI->getOperand(4).isConstantPoolIndex() &&
+ MI->getOperand(1).getReg() == 0 &&
+ MI->getOperand(2).getImmedValue() == 1 &&
+ MI->getOperand(3).getReg() == 0;
+ }
+ // All other instructions marked M_REMATERIALIZABLE are always trivially
+ // rematerializable.
+ return true;
+}
+
+/// convertToThreeAddress - This method must be implemented by targets that
+/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
+/// may be able to convert a two-address instruction into a true
+/// three-address instruction on demand. This allows the X86 target (for
+/// example) to convert ADD and SHL instructions into LEA instructions if they
+/// would require register copies due to two-addressness.
+///
+/// This method returns a null pointer if the transformation cannot be
+/// performed, otherwise it returns the new instruction.
+///
+MachineInstr *
+X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables &LV) const {
+ MachineInstr *MI = MBBI;
+ // All instructions input are two-addr instructions. Get the known operands.
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Src = MI->getOperand(1).getReg();
+
+ MachineInstr *NewMI = NULL;
+ // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When
+ // we have better subtarget support, enable the 16-bit LEA generation here.
+ bool DisableLEA16 = true;
+
+ switch (MI->getOpcode()) {
+ default: return 0;
+ case X86::SHUFPSrri: {
+ assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!");
+ if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0;
+
+ unsigned A = MI->getOperand(0).getReg();
+ unsigned B = MI->getOperand(1).getReg();
+ unsigned C = MI->getOperand(2).getReg();
+ unsigned M = MI->getOperand(3).getImm();
+ if (B != C) return 0;
+ NewMI = BuildMI(get(X86::PSHUFDri), A).addReg(B).addImm(M);
+ break;
+ }
+ case X86::SHL64ri: {
+ assert(MI->getNumOperands() == 3 && "Unknown shift instruction!");
+ // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
+ // the flags produced by a shift yet, so this is safe.
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Src = MI->getOperand(1).getReg();
+ unsigned ShAmt = MI->getOperand(2).getImm();
+ if (ShAmt == 0 || ShAmt >= 4) return 0;
+
+ NewMI = BuildMI(get(X86::LEA64r), Dest)
+ .addReg(0).addImm(1 << ShAmt).addReg(Src).addImm(0);
+ break;
+ }
+ case X86::SHL32ri: {
+ assert(MI->getNumOperands() == 3 && "Unknown shift instruction!");
+ // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
+ // the flags produced by a shift yet, so this is safe.
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Src = MI->getOperand(1).getReg();
+ unsigned ShAmt = MI->getOperand(2).getImm();
+ if (ShAmt == 0 || ShAmt >= 4) return 0;
+
+ unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit() ?
+ X86::LEA64_32r : X86::LEA32r;
+ NewMI = BuildMI(get(Opc), Dest)
+ .addReg(0).addImm(1 << ShAmt).addReg(Src).addImm(0);
+ break;
+ }
+ case X86::SHL16ri: {
+ assert(MI->getNumOperands() == 3 && "Unknown shift instruction!");
+ if (DisableLEA16) return 0;
+
+ // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
+ // the flags produced by a shift yet, so this is safe.
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Src = MI->getOperand(1).getReg();
+ unsigned ShAmt = MI->getOperand(2).getImm();
+ if (ShAmt == 0 || ShAmt >= 4) return 0;
+
+ NewMI = BuildMI(get(X86::LEA16r), Dest)
+ .addReg(0).addImm(1 << ShAmt).addReg(Src).addImm(0);
+ break;
+ }
+ }
+
+ // FIXME: None of these instructions are promotable to LEAs without
+ // additional information. In particular, LEA doesn't set the flags that
+ // add and inc do. :(
+ if (0)
+ switch (MI->getOpcode()) {
+ case X86::INC32r:
+ case X86::INC64_32r:
+ assert(MI->getNumOperands() == 2 && "Unknown inc instruction!");
+ NewMI = addRegOffset(BuildMI(get(X86::LEA32r), Dest), Src, 1);
+ break;
+ case X86::INC16r:
+ case X86::INC64_16r:
+ if (DisableLEA16) return 0;
+ assert(MI->getNumOperands() == 2 && "Unknown inc instruction!");
+ NewMI = addRegOffset(BuildMI(get(X86::LEA16r), Dest), Src, 1);
+ break;
+ case X86::DEC32r:
+ case X86::DEC64_32r:
+ assert(MI->getNumOperands() == 2 && "Unknown dec instruction!");
+ NewMI = addRegOffset(BuildMI(get(X86::LEA32r), Dest), Src, -1);
+ break;
+ case X86::DEC16r:
+ case X86::DEC64_16r:
+ if (DisableLEA16) return 0;
+ assert(MI->getNumOperands() == 2 && "Unknown dec instruction!");
+ NewMI = addRegOffset(BuildMI(get(X86::LEA16r), Dest), Src, -1);
+ break;
+ case X86::ADD32rr:
+ assert(MI->getNumOperands() == 3 && "Unknown add instruction!");
+ NewMI = addRegReg(BuildMI(get(X86::LEA32r), Dest), Src,
+ MI->getOperand(2).getReg());
+ break;
+ case X86::ADD16rr:
+ if (DisableLEA16) return 0;
+ assert(MI->getNumOperands() == 3 && "Unknown add instruction!");
+ NewMI = addRegReg(BuildMI(get(X86::LEA16r), Dest), Src,
+ MI->getOperand(2).getReg());
+ break;
+ case X86::ADD32ri:
+ case X86::ADD32ri8:
+ assert(MI->getNumOperands() == 3 && "Unknown add instruction!");
+ if (MI->getOperand(2).isImmediate())
+ NewMI = addRegOffset(BuildMI(get(X86::LEA32r), Dest), Src,
+ MI->getOperand(2).getImmedValue());
+ break;
+ case X86::ADD16ri:
+ case X86::ADD16ri8:
+ if (DisableLEA16) return 0;
+ assert(MI->getNumOperands() == 3 && "Unknown add instruction!");
+ if (MI->getOperand(2).isImmediate())
+ NewMI = addRegOffset(BuildMI(get(X86::LEA16r), Dest), Src,
+ MI->getOperand(2).getImmedValue());
+ break;
+ case X86::SHL16ri:
+ if (DisableLEA16) return 0;
+ case X86::SHL32ri:
+ assert(MI->getNumOperands() == 3 && MI->getOperand(2).isImmediate() &&
+ "Unknown shl instruction!");
+ unsigned ShAmt = MI->getOperand(2).getImmedValue();
+ if (ShAmt == 1 || ShAmt == 2 || ShAmt == 3) {
+ X86AddressMode AM;
+ AM.Scale = 1 << ShAmt;
+ AM.IndexReg = Src;
+ unsigned Opc = MI->getOpcode() == X86::SHL32ri ? X86::LEA32r :X86::LEA16r;
+ NewMI = addFullAddress(BuildMI(get(Opc), Dest), AM);
+ }
+ break;
+ }
+
+ if (NewMI) {
+ NewMI->copyKillDeadInfo(MI);
+ LV.instructionChanged(MI, NewMI); // Update live variables
+ MFI->insert(MBBI, NewMI); // Insert the new inst
+ }
+ return NewMI;
+}
+
+/// commuteInstruction - We have a few instructions that must be hacked on to
+/// commute them.
+///
+MachineInstr *X86InstrInfo::commuteInstruction(MachineInstr *MI) const {
+ // FIXME: Can commute cmoves by changing the condition!
+ switch (MI->getOpcode()) {
+ case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I)
+ case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I)
+ case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I)
+ case X86::SHLD32rri8:{// A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I)
+ unsigned Opc;
+ unsigned Size;
+ switch (MI->getOpcode()) {
+ default: assert(0 && "Unreachable!");
+ case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break;
+ case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break;
+ case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break;
+ case X86::SHLD32rri8: Size = 32; Opc = X86::SHRD32rri8; break;
+ }
+ unsigned Amt = MI->getOperand(3).getImmedValue();
+ unsigned A = MI->getOperand(0).getReg();
+ unsigned B = MI->getOperand(1).getReg();
+ unsigned C = MI->getOperand(2).getReg();
+ bool BisKill = MI->getOperand(1).isKill();
+ bool CisKill = MI->getOperand(2).isKill();
+ return BuildMI(get(Opc), A).addReg(C, false, false, CisKill)
+ .addReg(B, false, false, BisKill).addImm(Size-Amt);
+ }
+ default:
+ return TargetInstrInfo::commuteInstruction(MI);
+ }
+}
+
+static X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) {
+ switch (BrOpc) {
+ default: return X86::COND_INVALID;
+ case X86::JE: return X86::COND_E;
+ case X86::JNE: return X86::COND_NE;
+ case X86::JL: return X86::COND_L;
+ case X86::JLE: return X86::COND_LE;
+ case X86::JG: return X86::COND_G;
+ case X86::JGE: return X86::COND_GE;
+ case X86::JB: return X86::COND_B;
+ case X86::JBE: return X86::COND_BE;
+ case X86::JA: return X86::COND_A;
+ case X86::JAE: return X86::COND_AE;
+ case X86::JS: return X86::COND_S;
+ case X86::JNS: return X86::COND_NS;
+ case X86::JP: return X86::COND_P;
+ case X86::JNP: return X86::COND_NP;
+ case X86::JO: return X86::COND_O;
+ case X86::JNO: return X86::COND_NO;
+ }
+}
+
+unsigned X86::GetCondBranchFromCond(X86::CondCode CC) {
+ switch (CC) {
+ default: assert(0 && "Illegal condition code!");
+ case X86::COND_E: return X86::JE;
+ case X86::COND_NE: return X86::JNE;
+ case X86::COND_L: return X86::JL;
+ case X86::COND_LE: return X86::JLE;
+ case X86::COND_G: return X86::JG;
+ case X86::COND_GE: return X86::JGE;
+ case X86::COND_B: return X86::JB;
+ case X86::COND_BE: return X86::JBE;
+ case X86::COND_A: return X86::JA;
+ case X86::COND_AE: return X86::JAE;
+ case X86::COND_S: return X86::JS;
+ case X86::COND_NS: return X86::JNS;
+ case X86::COND_P: return X86::JP;
+ case X86::COND_NP: return X86::JNP;
+ case X86::COND_O: return X86::JO;
+ case X86::COND_NO: return X86::JNO;
+ }
+}
+
+/// GetOppositeBranchCondition - Return the inverse of the specified condition,
+/// e.g. turning COND_E to COND_NE.
+X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
+ switch (CC) {
+ default: assert(0 && "Illegal condition code!");
+ case X86::COND_E: return X86::COND_NE;
+ case X86::COND_NE: return X86::COND_E;
+ case X86::COND_L: return X86::COND_GE;
+ case X86::COND_LE: return X86::COND_G;
+ case X86::COND_G: return X86::COND_LE;
+ case X86::COND_GE: return X86::COND_L;
+ case X86::COND_B: return X86::COND_AE;
+ case X86::COND_BE: return X86::COND_A;
+ case X86::COND_A: return X86::COND_BE;
+ case X86::COND_AE: return X86::COND_B;
+ case X86::COND_S: return X86::COND_NS;
+ case X86::COND_NS: return X86::COND_S;
+ case X86::COND_P: return X86::COND_NP;
+ case X86::COND_NP: return X86::COND_P;
+ case X86::COND_O: return X86::COND_NO;
+ case X86::COND_NO: return X86::COND_O;
+ }
+}
+
+// For purposes of branch analysis do not count FP_REG_KILL as a terminator.
+bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+ if (MI->getOpcode() == X86::FP_REG_KILL)
+ return false;
+
+ const TargetInstrDescriptor *TID = MI->getInstrDescriptor();
+ if (TID->Flags & M_TERMINATOR_FLAG) {
+ // Conditional branch is a special case.
+ if ((TID->Flags & M_BRANCH_FLAG) != 0 && (TID->Flags & M_BARRIER_FLAG) == 0)
+ return true;
+ if ((TID->Flags & M_PREDICABLE) == 0)
+ return true;
+ return !isPredicated(MI);
+ }
+ return false;
+}
+
+bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ std::vector<MachineOperand> &Cond) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (!isBranch(LastInst->getOpcode()))
+ return true;
+
+ // If the block ends with a branch there are 3 possibilities:
+ // it's an unconditional, conditional, or indirect branch.
+
+ if (LastInst->getOpcode() == X86::JMP) {
+ TBB = LastInst->getOperand(0).getMachineBasicBlock();
+ return false;
+ }
+ X86::CondCode BranchCode = GetCondFromBranchOpc(LastInst->getOpcode());
+ if (BranchCode == X86::COND_INVALID)
+ return true; // Can't handle indirect branch.
+
+ // Otherwise, block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(0).getMachineBasicBlock();
+ Cond.push_back(MachineOperand::CreateImm(BranchCode));
+ return false;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with X86::JMP and a conditional branch, handle it.
+ X86::CondCode BranchCode = GetCondFromBranchOpc(SecondLastInst->getOpcode());
+ if (BranchCode != X86::COND_INVALID && LastInst->getOpcode() == X86::JMP) {
+ TBB = SecondLastInst->getOperand(0).getMachineBasicBlock();
+ Cond.push_back(MachineOperand::CreateImm(BranchCode));
+ FBB = LastInst->getOperand(0).getMachineBasicBlock();
+ return false;
+ }
+
+ // If the block ends with two X86::JMPs, handle it. The second one is not
+ // executed, so remove it.
+ if (SecondLastInst->getOpcode() == X86::JMP &&
+ LastInst->getOpcode() == X86::JMP) {
+ TBB = SecondLastInst->getOperand(0).getMachineBasicBlock();
+ I = LastInst;
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ if (I->getOpcode() != X86::JMP &&
+ GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID)
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID)
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+unsigned
+X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const std::vector<MachineOperand> &Cond) const {
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 1 || Cond.size() == 0) &&
+ "X86 branch conditions have one component!");
+
+ if (FBB == 0) { // One way branch.
+ if (Cond.empty()) {
+ // Unconditional branch?
+ BuildMI(&MBB, get(X86::JMP)).addMBB(TBB);
+ } else {
+ // Conditional branch.
+ unsigned Opc = GetCondBranchFromCond((X86::CondCode)Cond[0].getImm());
+ BuildMI(&MBB, get(Opc)).addMBB(TBB);
+ }
+ return 1;
+ }
+
+ // Two-way Conditional branch.
+ unsigned Opc = GetCondBranchFromCond((X86::CondCode)Cond[0].getImm());
+ BuildMI(&MBB, get(Opc)).addMBB(TBB);
+ BuildMI(&MBB, get(X86::JMP)).addMBB(FBB);
+ return 2;
+}
+
+bool X86InstrInfo::BlockHasNoFallThrough(MachineBasicBlock &MBB) const {
+ if (MBB.empty()) return false;
+
+ switch (MBB.back().getOpcode()) {
+ case X86::RET: // Return.
+ case X86::RETI:
+ case X86::TAILJMPd:
+ case X86::TAILJMPr:
+ case X86::TAILJMPm:
+ case X86::JMP: // Uncond branch.
+ case X86::JMP32r: // Indirect branch.
+ case X86::JMP32m: // Indirect branch through mem.
+ return true;
+ default: return false;
+ }
+}
+
+bool X86InstrInfo::
+ReverseBranchCondition(std::vector<MachineOperand> &Cond) const {
+ assert(Cond.size() == 1 && "Invalid X86 branch condition!");
+ Cond[0].setImm(GetOppositeBranchCondition((X86::CondCode)Cond[0].getImm()));
+ return false;
+}
+
+const TargetRegisterClass *X86InstrInfo::getPointerRegClass() const {
+ const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+ if (Subtarget->is64Bit())
+ return &X86::GR64RegClass;
+ else
+ return &X86::GR32RegClass;
+}
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
new file mode 100644
index 0000000..ec30cc7
--- /dev/null
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -0,0 +1,287 @@
+//===- X86InstrInfo.h - X86 Instruction Information ------------*- C++ -*- ===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86INSTRUCTIONINFO_H
+#define X86INSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "X86RegisterInfo.h"
+
+namespace llvm {
+ class X86RegisterInfo;
+ class X86TargetMachine;
+
+namespace X86 {
+ // X86 specific condition code. These correspond to X86_*_COND in
+ // X86InstrInfo.td. They must be kept in synch.
+ enum CondCode {
+ COND_A = 0,
+ COND_AE = 1,
+ COND_B = 2,
+ COND_BE = 3,
+ COND_E = 4,
+ COND_G = 5,
+ COND_GE = 6,
+ COND_L = 7,
+ COND_LE = 8,
+ COND_NE = 9,
+ COND_NO = 10,
+ COND_NP = 11,
+ COND_NS = 12,
+ COND_O = 13,
+ COND_P = 14,
+ COND_S = 15,
+ COND_INVALID
+ };
+
+ // Turn condition code into conditional branch opcode.
+ unsigned GetCondBranchFromCond(CondCode CC);
+
+ /// GetOppositeBranchCondition - Return the inverse of the specified cond,
+ /// e.g. turning COND_E to COND_NE.
+ CondCode GetOppositeBranchCondition(X86::CondCode CC);
+
+}
+
+/// X86II - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace X86II {
+ enum {
+ //===------------------------------------------------------------------===//
+ // Instruction types. These are the standard/most common forms for X86
+ // instructions.
+ //
+
+ // PseudoFrm - This represents an instruction that is a pseudo instruction
+ // or one that has not been implemented yet. It is illegal to code generate
+ // it, but tolerated for intermediate implementation stages.
+ Pseudo = 0,
+
+ /// Raw - This form is for instructions that don't have any operands, so
+ /// they are just a fixed opcode value, like 'leave'.
+ RawFrm = 1,
+
+ /// AddRegFrm - This form is used for instructions like 'push r32' that have
+ /// their one register operand added to their opcode.
+ AddRegFrm = 2,
+
+ /// MRMDestReg - This form is used for instructions that use the Mod/RM byte
+ /// to specify a destination, which in this case is a register.
+ ///
+ MRMDestReg = 3,
+
+ /// MRMDestMem - This form is used for instructions that use the Mod/RM byte
+ /// to specify a destination, which in this case is memory.
+ ///
+ MRMDestMem = 4,
+
+ /// MRMSrcReg - This form is used for instructions that use the Mod/RM byte
+ /// to specify a source, which in this case is a register.
+ ///
+ MRMSrcReg = 5,
+
+ /// MRMSrcMem - This form is used for instructions that use the Mod/RM byte
+ /// to specify a source, which in this case is memory.
+ ///
+ MRMSrcMem = 6,
+
+ /// MRM[0-7][rm] - These forms are used to represent instructions that use
+ /// a Mod/RM byte, and use the middle field to hold extended opcode
+ /// information. In the intel manual these are represented as /0, /1, ...
+ ///
+
+ // First, instructions that operate on a register r/m operand...
+ MRM0r = 16, MRM1r = 17, MRM2r = 18, MRM3r = 19, // Format /0 /1 /2 /3
+ MRM4r = 20, MRM5r = 21, MRM6r = 22, MRM7r = 23, // Format /4 /5 /6 /7
+
+ // Next, instructions that operate on a memory r/m operand...
+ MRM0m = 24, MRM1m = 25, MRM2m = 26, MRM3m = 27, // Format /0 /1 /2 /3
+ MRM4m = 28, MRM5m = 29, MRM6m = 30, MRM7m = 31, // Format /4 /5 /6 /7
+
+ // MRMInitReg - This form is used for instructions whose source and
+ // destinations are the same register.
+ MRMInitReg = 32,
+
+ FormMask = 63,
+
+ //===------------------------------------------------------------------===//
+ // Actual flags...
+
+ // OpSize - Set if this instruction requires an operand size prefix (0x66),
+ // which most often indicates that the instruction operates on 16 bit data
+ // instead of 32 bit data.
+ OpSize = 1 << 6,
+
+ // AsSize - Set if this instruction requires an operand size prefix (0x67),
+ // which most often indicates that the instruction address 16 bit address
+ // instead of 32 bit address (or 32 bit address in 64 bit mode).
+ AdSize = 1 << 7,
+
+ //===------------------------------------------------------------------===//
+ // Op0Mask - There are several prefix bytes that are used to form two byte
+ // opcodes. These are currently 0x0F, 0xF3, and 0xD8-0xDF. This mask is
+ // used to obtain the setting of this field. If no bits in this field is
+ // set, there is no prefix byte for obtaining a multibyte opcode.
+ //
+ Op0Shift = 8,
+ Op0Mask = 0xF << Op0Shift,
+
+ // TB - TwoByte - Set if this instruction has a two byte opcode, which
+ // starts with a 0x0F byte before the real opcode.
+ TB = 1 << Op0Shift,
+
+ // REP - The 0xF3 prefix byte indicating repetition of the following
+ // instruction.
+ REP = 2 << Op0Shift,
+
+ // D8-DF - These escape opcodes are used by the floating point unit. These
+ // values must remain sequential.
+ D8 = 3 << Op0Shift, D9 = 4 << Op0Shift,
+ DA = 5 << Op0Shift, DB = 6 << Op0Shift,
+ DC = 7 << Op0Shift, DD = 8 << Op0Shift,
+ DE = 9 << Op0Shift, DF = 10 << Op0Shift,
+
+ // XS, XD - These prefix codes are for single and double precision scalar
+ // floating point operations performed in the SSE registers.
+ XD = 11 << Op0Shift, XS = 12 << Op0Shift,
+
+ // T8, TA - Prefix after the 0x0F prefix.
+ T8 = 13 << Op0Shift, TA = 14 << Op0Shift,
+
+ //===------------------------------------------------------------------===//
+ // REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
+ // They are used to specify GPRs and SSE registers, 64-bit operand size,
+ // etc. We only cares about REX.W and REX.R bits and only the former is
+ // statically determined.
+ //
+ REXShift = 12,
+ REX_W = 1 << REXShift,
+
+ //===------------------------------------------------------------------===//
+ // This three-bit field describes the size of an immediate operand. Zero is
+ // unused so that we can tell if we forgot to set a value.
+ ImmShift = 13,
+ ImmMask = 7 << ImmShift,
+ Imm8 = 1 << ImmShift,
+ Imm16 = 2 << ImmShift,
+ Imm32 = 3 << ImmShift,
+ Imm64 = 4 << ImmShift,
+
+ //===------------------------------------------------------------------===//
+ // FP Instruction Classification... Zero is non-fp instruction.
+
+ // FPTypeMask - Mask for all of the FP types...
+ FPTypeShift = 16,
+ FPTypeMask = 7 << FPTypeShift,
+
+ // NotFP - The default, set for instructions that do not use FP registers.
+ NotFP = 0 << FPTypeShift,
+
+ // ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0
+ ZeroArgFP = 1 << FPTypeShift,
+
+ // OneArgFP - 1 arg FP instructions which implicitly read ST(0), such as fst
+ OneArgFP = 2 << FPTypeShift,
+
+ // OneArgFPRW - 1 arg FP instruction which implicitly read ST(0) and write a
+ // result back to ST(0). For example, fcos, fsqrt, etc.
+ //
+ OneArgFPRW = 3 << FPTypeShift,
+
+ // TwoArgFP - 2 arg FP instructions which implicitly read ST(0), and an
+ // explicit argument, storing the result to either ST(0) or the implicit
+ // argument. For example: fadd, fsub, fmul, etc...
+ TwoArgFP = 4 << FPTypeShift,
+
+ // CompareFP - 2 arg FP instructions which implicitly read ST(0) and an
+ // explicit argument, but have no destination. Example: fucom, fucomi, ...
+ CompareFP = 5 << FPTypeShift,
+
+ // CondMovFP - "2 operand" floating point conditional move instructions.
+ CondMovFP = 6 << FPTypeShift,
+
+ // SpecialFP - Special instruction forms. Dispatch by opcode explicitly.
+ SpecialFP = 7 << FPTypeShift,
+
+ // Bits 19 -> 23 are unused
+ OpcodeShift = 24,
+ OpcodeMask = 0xFF << OpcodeShift
+ };
+}
+
+class X86InstrInfo : public TargetInstrInfo {
+ X86TargetMachine &TM;
+ const X86RegisterInfo RI;
+public:
+ X86InstrInfo(X86TargetMachine &tm);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const MRegisterInfo &getRegisterInfo() const { return RI; }
+
+ // Return true if the instruction is a register to register move and
+ // leave the source and dest operands in the passed parameters.
+ //
+ bool isMoveInstr(const MachineInstr& MI, unsigned& sourceReg,
+ unsigned& destReg) const;
+ unsigned isLoadFromStackSlot(MachineInstr *MI, int &FrameIndex) const;
+ unsigned isStoreToStackSlot(MachineInstr *MI, int &FrameIndex) const;
+ bool isReallyTriviallyReMaterializable(MachineInstr *MI) const;
+
+ /// convertToThreeAddress - This method must be implemented by targets that
+ /// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
+ /// may be able to convert a two-address instruction into a true
+ /// three-address instruction on demand. This allows the X86 target (for
+ /// example) to convert ADD and SHL instructions into LEA instructions if they
+ /// would require register copies due to two-addressness.
+ ///
+ /// This method returns a null pointer if the transformation cannot be
+ /// performed, otherwise it returns the new instruction.
+ ///
+ virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables &LV) const;
+
+ /// commuteInstruction - We have a few instructions that must be hacked on to
+ /// commute them.
+ ///
+ virtual MachineInstr *commuteInstruction(MachineInstr *MI) const;
+
+ // Branch analysis.
+ virtual bool isUnpredicatedTerminator(const MachineInstr* MI) const;
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ std::vector<MachineOperand> &Cond) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const std::vector<MachineOperand> &Cond) const;
+ virtual bool BlockHasNoFallThrough(MachineBasicBlock &MBB) const;
+ virtual bool ReverseBranchCondition(std::vector<MachineOperand> &Cond) const;
+
+ const TargetRegisterClass *getPointerRegClass() const;
+
+ // getBaseOpcodeFor - This function returns the "base" X86 opcode for the
+ // specified opcode number.
+ //
+ unsigned char getBaseOpcodeFor(const TargetInstrDescriptor *TID) const {
+ return TID->TSFlags >> X86II::OpcodeShift;
+ }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
new file mode 100644
index 0000000..b24f644
--- /dev/null
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -0,0 +1,2674 @@
+//===- X86InstrInfo.td - Describe the X86 Instruction Set -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 instruction set, defining the instructions, and
+// properties of the instructions which are needed for code generation, machine
+// code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// X86 specific DAG Nodes.
+//
+
+def SDTIntShiftDOp: SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisInt<0>, SDTCisInt<3>]>;
+
+def SDTX86CmpTest : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+
+def SDTX86Cmov : SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
+ SDTCisVT<3, i8>]>;
+
+def SDTX86BrCond : SDTypeProfile<0, 2,
+ [SDTCisVT<0, OtherVT>, SDTCisVT<1, i8>]>;
+
+def SDTX86SetCC : SDTypeProfile<1, 1,
+ [SDTCisVT<0, i8>, SDTCisVT<1, i8>]>;
+
+def SDTX86Ret : SDTypeProfile<0, 1, [SDTCisVT<0, i16>]>;
+
+def SDT_X86CallSeqStart : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>;
+def SDT_X86CallSeqEnd : SDTypeProfile<0, 2, [ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>;
+
+def SDT_X86Call : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
+
+def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
+
+def SDTX86RdTsc : SDTypeProfile<0, 0, []>;
+
+def SDTX86Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
+
+def SDT_X86TLSADDR : SDTypeProfile<1, 1, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
+
+def SDT_X86TLSTP : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
+
+def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def X86shld : SDNode<"X86ISD::SHLD", SDTIntShiftDOp>;
+def X86shrd : SDNode<"X86ISD::SHRD", SDTIntShiftDOp>;
+
+def X86cmp : SDNode<"X86ISD::CMP" , SDTX86CmpTest,
+ [SDNPHasChain, SDNPOutFlag]>;
+
+def X86cmov : SDNode<"X86ISD::CMOV", SDTX86Cmov,
+ [SDNPInFlag, SDNPOutFlag]>;
+def X86brcond : SDNode<"X86ISD::BRCOND", SDTX86BrCond,
+ [SDNPHasChain, SDNPInFlag]>;
+def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC,
+ [SDNPInFlag, SDNPOutFlag]>;
+
+def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,
+ [SDNPHasChain, SDNPOptInFlag]>;
+
+def X86callseq_start :
+ SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart,
+ [SDNPHasChain, SDNPOutFlag]>;
+def X86callseq_end :
+ SDNode<"ISD::CALLSEQ_END", SDT_X86CallSeqEnd,
+ [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+
+def X86call : SDNode<"X86ISD::CALL", SDT_X86Call,
+ [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>;
+
+def X86tailcall: SDNode<"X86ISD::TAILCALL", SDT_X86Call,
+ [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>;
+
+def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr,
+ [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr,
+ [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+
+def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG",SDTX86RdTsc,
+ [SDNPHasChain, SDNPOutFlag]>;
+
+def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>;
+def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>;
+
+def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR,
+ [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+def X86TLStp : SDNode<"X86ISD::THREAD_POINTER", SDT_X86TLSTP, []>;
+
+def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
+ [SDNPHasChain]>;
+
+
+//===----------------------------------------------------------------------===//
+// X86 Operand Definitions.
+//
+
+// *mem - Operand definitions for the funky X86 addressing mode operands.
+//
+class X86MemOperand<string printMethod> : Operand<iPTR> {
+ let PrintMethod = printMethod;
+ let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm);
+}
+
+def i8mem : X86MemOperand<"printi8mem">;
+def i16mem : X86MemOperand<"printi16mem">;
+def i32mem : X86MemOperand<"printi32mem">;
+def i64mem : X86MemOperand<"printi64mem">;
+def i128mem : X86MemOperand<"printi128mem">;
+def f32mem : X86MemOperand<"printf32mem">;
+def f64mem : X86MemOperand<"printf64mem">;
+def f128mem : X86MemOperand<"printf128mem">;
+
+def lea32mem : Operand<i32> {
+ let PrintMethod = "printi32mem";
+ let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm);
+}
+
+def SSECC : Operand<i8> {
+ let PrintMethod = "printSSECC";
+}
+
+def piclabel: Operand<i32> {
+ let PrintMethod = "printPICLabel";
+}
+
+// A couple of more descriptive operand definitions.
+// 16-bits but only 8 bits are significant.
+def i16i8imm : Operand<i16>;
+// 32-bits but only 8 bits are significant.
+def i32i8imm : Operand<i32>;
+
+// Branch targets have OtherVT type.
+def brtarget : Operand<OtherVT>;
+
+//===----------------------------------------------------------------------===//
+// X86 Complex Pattern Definitions.
+//
+
+// Define X86 specific addressing mode.
+def addr : ComplexPattern<iPTR, 4, "SelectAddr", [], []>;
+def lea32addr : ComplexPattern<i32, 4, "SelectLEAAddr",
+ [add, mul, shl, or, frameindex], []>;
+
+//===----------------------------------------------------------------------===//
+// X86 Instruction Format Definitions.
+//
+
+// Format specifies the encoding used by the instruction. This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<6> val> {
+ bits<6> Value = val;
+}
+
+def Pseudo : Format<0>; def RawFrm : Format<1>;
+def AddRegFrm : Format<2>; def MRMDestReg : Format<3>;
+def MRMDestMem : Format<4>; def MRMSrcReg : Format<5>;
+def MRMSrcMem : Format<6>;
+def MRM0r : Format<16>; def MRM1r : Format<17>; def MRM2r : Format<18>;
+def MRM3r : Format<19>; def MRM4r : Format<20>; def MRM5r : Format<21>;
+def MRM6r : Format<22>; def MRM7r : Format<23>;
+def MRM0m : Format<24>; def MRM1m : Format<25>; def MRM2m : Format<26>;
+def MRM3m : Format<27>; def MRM4m : Format<28>; def MRM5m : Format<29>;
+def MRM6m : Format<30>; def MRM7m : Format<31>;
+def MRMInitReg : Format<32>;
+
+//===----------------------------------------------------------------------===//
+// X86 Instruction Predicate Definitions.
+def HasMMX : Predicate<"Subtarget->hasMMX()">;
+def HasSSE1 : Predicate<"Subtarget->hasSSE1()">;
+def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
+def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
+def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">;
+def FPStack : Predicate<"!Subtarget->hasSSE2()">;
+def In32BitMode : Predicate<"!Subtarget->is64Bit()">;
+def In64BitMode : Predicate<"Subtarget->is64Bit()">;
+def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">;
+def NotSmallCode : Predicate<"TM.getCodeModel() != CodeModel::Small">;
+def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
+
+//===----------------------------------------------------------------------===//
+// X86 specific pattern fragments.
+//
+
+// ImmType - This specifies the immediate type used by an instruction. This is
+// part of the ad-hoc solution used to emit machine instruction encodings by our
+// machine code emitter.
+class ImmType<bits<3> val> {
+ bits<3> Value = val;
+}
+def NoImm : ImmType<0>;
+def Imm8 : ImmType<1>;
+def Imm16 : ImmType<2>;
+def Imm32 : ImmType<3>;
+def Imm64 : ImmType<4>;
+
+// FPFormat - This specifies what form this FP instruction has. This is used by
+// the Floating-Point stackifier pass.
+class FPFormat<bits<3> val> {
+ bits<3> Value = val;
+}
+def NotFP : FPFormat<0>;
+def ZeroArgFP : FPFormat<1>;
+def OneArgFP : FPFormat<2>;
+def OneArgFPRW : FPFormat<3>;
+def TwoArgFP : FPFormat<4>;
+def CompareFP : FPFormat<5>;
+def CondMovFP : FPFormat<6>;
+def SpecialFP : FPFormat<7>;
+
+
+class X86Inst<bits<8> opcod, Format f, ImmType i, dag ops, string AsmStr>
+ : Instruction {
+ let Namespace = "X86";
+
+ bits<8> Opcode = opcod;
+ Format Form = f;
+ bits<6> FormBits = Form.Value;
+ ImmType ImmT = i;
+ bits<3> ImmTypeBits = ImmT.Value;
+
+ dag OperandList = ops;
+ string AsmString = AsmStr;
+
+ //
+ // Attributes specific to X86 instructions...
+ //
+ bit hasOpSizePrefix = 0; // Does this inst have a 0x66 prefix?
+ bit hasAdSizePrefix = 0; // Does this inst have a 0x67 prefix?
+
+ bits<4> Prefix = 0; // Which prefix byte does this inst have?
+ bit hasREX_WPrefix = 0; // Does this inst requires the REX.W prefix?
+ FPFormat FPForm; // What flavor of FP instruction is this?
+ bits<3> FPFormBits = 0;
+}
+
+
+// Prefix byte classes which are used to indicate to the ad-hoc machine code
+// emitter that various prefix bytes are required.
+class OpSize { bit hasOpSizePrefix = 1; }
+class AdSize { bit hasAdSizePrefix = 1; }
+class REX_W { bit hasREX_WPrefix = 1; }
+class TB { bits<4> Prefix = 1; }
+class REP { bits<4> Prefix = 2; }
+class D8 { bits<4> Prefix = 3; }
+class D9 { bits<4> Prefix = 4; }
+class DA { bits<4> Prefix = 5; }
+class DB { bits<4> Prefix = 6; }
+class DC { bits<4> Prefix = 7; }
+class DD { bits<4> Prefix = 8; }
+class DE { bits<4> Prefix = 9; }
+class DF { bits<4> Prefix = 10; }
+class XD { bits<4> Prefix = 11; }
+class XS { bits<4> Prefix = 12; }
+class T8 { bits<4> Prefix = 13; }
+class TA { bits<4> Prefix = 14; }
+
+
+//===----------------------------------------------------------------------===//
+// Pattern fragments...
+//
+
+// X86 specific condition code. These correspond to CondCode in
+// X86InstrInfo.h. They must be kept in synch.
+def X86_COND_A : PatLeaf<(i8 0)>;
+def X86_COND_AE : PatLeaf<(i8 1)>;
+def X86_COND_B : PatLeaf<(i8 2)>;
+def X86_COND_BE : PatLeaf<(i8 3)>;
+def X86_COND_E : PatLeaf<(i8 4)>;
+def X86_COND_G : PatLeaf<(i8 5)>;
+def X86_COND_GE : PatLeaf<(i8 6)>;
+def X86_COND_L : PatLeaf<(i8 7)>;
+def X86_COND_LE : PatLeaf<(i8 8)>;
+def X86_COND_NE : PatLeaf<(i8 9)>;
+def X86_COND_NO : PatLeaf<(i8 10)>;
+def X86_COND_NP : PatLeaf<(i8 11)>;
+def X86_COND_NS : PatLeaf<(i8 12)>;
+def X86_COND_O : PatLeaf<(i8 13)>;
+def X86_COND_P : PatLeaf<(i8 14)>;
+def X86_COND_S : PatLeaf<(i8 15)>;
+
+def i16immSExt8 : PatLeaf<(i16 imm), [{
+ // i16immSExt8 predicate - True if the 16-bit immediate fits in a 8-bit
+ // sign extended field.
+ return (int16_t)N->getValue() == (int8_t)N->getValue();
+}]>;
+
+def i32immSExt8 : PatLeaf<(i32 imm), [{
+ // i32immSExt8 predicate - True if the 32-bit immediate fits in a 8-bit
+ // sign extended field.
+ return (int32_t)N->getValue() == (int8_t)N->getValue();
+}]>;
+
+// Helper fragments for loads.
+def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>;
+def loadi16 : PatFrag<(ops node:$ptr), (i16 (load node:$ptr))>;
+def loadi32 : PatFrag<(ops node:$ptr), (i32 (load node:$ptr))>;
+def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
+
+def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
+def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;
+
+def sextloadi16i1 : PatFrag<(ops node:$ptr), (i16 (sextloadi1 node:$ptr))>;
+def sextloadi32i1 : PatFrag<(ops node:$ptr), (i32 (sextloadi1 node:$ptr))>;
+def sextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>;
+def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>;
+def sextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (sextloadi16 node:$ptr))>;
+
+def zextloadi8i1 : PatFrag<(ops node:$ptr), (i8 (zextloadi1 node:$ptr))>;
+def zextloadi16i1 : PatFrag<(ops node:$ptr), (i16 (zextloadi1 node:$ptr))>;
+def zextloadi32i1 : PatFrag<(ops node:$ptr), (i32 (zextloadi1 node:$ptr))>;
+def zextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>;
+def zextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (zextloadi8 node:$ptr))>;
+def zextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (zextloadi16 node:$ptr))>;
+
+def extloadi8i1 : PatFrag<(ops node:$ptr), (i8 (extloadi1 node:$ptr))>;
+def extloadi16i1 : PatFrag<(ops node:$ptr), (i16 (extloadi1 node:$ptr))>;
+def extloadi32i1 : PatFrag<(ops node:$ptr), (i32 (extloadi1 node:$ptr))>;
+def extloadi16i8 : PatFrag<(ops node:$ptr), (i16 (extloadi8 node:$ptr))>;
+def extloadi32i8 : PatFrag<(ops node:$ptr), (i32 (extloadi8 node:$ptr))>;
+def extloadi32i16 : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>;
+
+//===----------------------------------------------------------------------===//
+// Instruction templates...
+//
+
+class I<bits<8> o, Format f, dag ops, string asm, list<dag> pattern>
+ : X86Inst<o, f, NoImm, ops, asm> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+class Ii8 <bits<8> o, Format f, dag ops, string asm, list<dag> pattern>
+ : X86Inst<o, f, Imm8 , ops, asm> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+class Ii16<bits<8> o, Format f, dag ops, string asm, list<dag> pattern>
+ : X86Inst<o, f, Imm16, ops, asm> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+class Ii32<bits<8> o, Format f, dag ops, string asm, list<dag> pattern>
+ : X86Inst<o, f, Imm32, ops, asm> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction list...
+//
+
+// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into
+// a stack adjustment and the codegen must know that they may modify the stack
+// pointer before prolog-epilog rewriting occurs.
+def ADJCALLSTACKDOWN : I<0, Pseudo, (ops i32imm:$amt), "#ADJCALLSTACKDOWN",
+ [(X86callseq_start imm:$amt)]>, Imp<[ESP],[ESP]>;
+def ADJCALLSTACKUP : I<0, Pseudo, (ops i32imm:$amt1, i32imm:$amt2),
+ "#ADJCALLSTACKUP",
+ [(X86callseq_end imm:$amt1, imm:$amt2)]>,
+ Imp<[ESP],[ESP]>;
+def IMPLICIT_USE : I<0, Pseudo, (ops variable_ops), "#IMPLICIT_USE", []>;
+def IMPLICIT_DEF : I<0, Pseudo, (ops variable_ops), "#IMPLICIT_DEF", []>;
+def IMPLICIT_DEF_GR8 : I<0, Pseudo, (ops GR8:$dst),
+ "#IMPLICIT_DEF $dst",
+ [(set GR8:$dst, (undef))]>;
+def IMPLICIT_DEF_GR16 : I<0, Pseudo, (ops GR16:$dst),
+ "#IMPLICIT_DEF $dst",
+ [(set GR16:$dst, (undef))]>;
+def IMPLICIT_DEF_GR32 : I<0, Pseudo, (ops GR32:$dst),
+ "#IMPLICIT_DEF $dst",
+ [(set GR32:$dst, (undef))]>;
+
+// Nop
+def NOOP : I<0x90, RawFrm, (ops), "nop", []>;
+
+// Truncate
+def TRUNC_32_to8 : I<0x88, MRMDestReg, (ops GR8:$dst, GR32_:$src),
+ "mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}", []>;
+def TRUNC_16_to8 : I<0x88, MRMDestReg, (ops GR8:$dst, GR16_:$src),
+ "mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}}", []>;
+def TRUNC_32to16 : I<0x89, MRMDestReg, (ops GR16:$dst, GR32:$src),
+ "mov{w} {${src:subreg16}, $dst|$dst, ${src:subreg16}}",
+ [(set GR16:$dst, (trunc GR32:$src))]>;
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions...
+//
+
+// Return instructions.
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+ hasCtrlDep = 1, noResults = 1 in {
+ def RET : I<0xC3, RawFrm, (ops), "ret", [(X86retflag 0)]>;
+ def RETI : Ii16<0xC2, RawFrm, (ops i16imm:$amt), "ret $amt",
+ [(X86retflag imm:$amt)]>;
+}
+
+// All branches are RawFrm, Void, Branch, and Terminators
+let isBranch = 1, isTerminator = 1, noResults = 1 in
+ class IBr<bits<8> opcode, dag ops, string asm, list<dag> pattern> :
+ I<opcode, RawFrm, ops, asm, pattern>;
+
+// Indirect branches
+let isBranch = 1, isBarrier = 1 in
+ def JMP : IBr<0xE9, (ops brtarget:$dst), "jmp $dst", [(br bb:$dst)]>;
+
+let isBranch = 1, isTerminator = 1, noResults = 1, isBarrier = 1 in {
+ def JMP32r : I<0xFF, MRM4r, (ops GR32:$dst), "jmp{l} {*}$dst",
+ [(brind GR32:$dst)]>;
+ def JMP32m : I<0xFF, MRM4m, (ops i32mem:$dst), "jmp{l} {*}$dst",
+ [(brind (loadi32 addr:$dst))]>;
+}
+
+// Conditional branches
+def JE : IBr<0x84, (ops brtarget:$dst), "je $dst",
+ [(X86brcond bb:$dst, X86_COND_E)]>, TB;
+def JNE : IBr<0x85, (ops brtarget:$dst), "jne $dst",
+ [(X86brcond bb:$dst, X86_COND_NE)]>, TB;
+def JL : IBr<0x8C, (ops brtarget:$dst), "jl $dst",
+ [(X86brcond bb:$dst, X86_COND_L)]>, TB;
+def JLE : IBr<0x8E, (ops brtarget:$dst), "jle $dst",
+ [(X86brcond bb:$dst, X86_COND_LE)]>, TB;
+def JG : IBr<0x8F, (ops brtarget:$dst), "jg $dst",
+ [(X86brcond bb:$dst, X86_COND_G)]>, TB;
+def JGE : IBr<0x8D, (ops brtarget:$dst), "jge $dst",
+ [(X86brcond bb:$dst, X86_COND_GE)]>, TB;
+
+def JB : IBr<0x82, (ops brtarget:$dst), "jb $dst",
+ [(X86brcond bb:$dst, X86_COND_B)]>, TB;
+def JBE : IBr<0x86, (ops brtarget:$dst), "jbe $dst",
+ [(X86brcond bb:$dst, X86_COND_BE)]>, TB;
+def JA : IBr<0x87, (ops brtarget:$dst), "ja $dst",
+ [(X86brcond bb:$dst, X86_COND_A)]>, TB;
+def JAE : IBr<0x83, (ops brtarget:$dst), "jae $dst",
+ [(X86brcond bb:$dst, X86_COND_AE)]>, TB;
+
+def JS : IBr<0x88, (ops brtarget:$dst), "js $dst",
+ [(X86brcond bb:$dst, X86_COND_S)]>, TB;
+def JNS : IBr<0x89, (ops brtarget:$dst), "jns $dst",
+ [(X86brcond bb:$dst, X86_COND_NS)]>, TB;
+def JP : IBr<0x8A, (ops brtarget:$dst), "jp $dst",
+ [(X86brcond bb:$dst, X86_COND_P)]>, TB;
+def JNP : IBr<0x8B, (ops brtarget:$dst), "jnp $dst",
+ [(X86brcond bb:$dst, X86_COND_NP)]>, TB;
+def JO : IBr<0x80, (ops brtarget:$dst), "jo $dst",
+ [(X86brcond bb:$dst, X86_COND_O)]>, TB;
+def JNO : IBr<0x81, (ops brtarget:$dst), "jno $dst",
+ [(X86brcond bb:$dst, X86_COND_NO)]>, TB;
+
+//===----------------------------------------------------------------------===//
+// Call Instructions...
+//
+let isCall = 1, noResults = 1 in
+ // All calls clobber the non-callee saved registers...
+ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7] in {
+ def CALLpcrel32 : I<0xE8, RawFrm, (ops i32imm:$dst, variable_ops),
+ "call ${dst:call}", []>;
+ def CALL32r : I<0xFF, MRM2r, (ops GR32:$dst, variable_ops),
+ "call {*}$dst", [(X86call GR32:$dst)]>;
+ def CALL32m : I<0xFF, MRM2m, (ops i32mem:$dst, variable_ops),
+ "call {*}$dst", []>;
+ }
+
+// Tail call stuff.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, noResults = 1 in
+ def TAILJMPd : IBr<0xE9, (ops i32imm:$dst), "jmp ${dst:call} # TAIL CALL",
+ []>;
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, noResults = 1 in
+ def TAILJMPr : I<0xFF, MRM4r, (ops GR32:$dst), "jmp {*}$dst # TAIL CALL",
+ []>;
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, noResults = 1 in
+ def TAILJMPm : I<0xFF, MRM4m, (ops i32mem:$dst),
+ "jmp {*}$dst # TAIL CALL", []>;
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions...
+//
+def LEAVE : I<0xC9, RawFrm,
+ (ops), "leave", []>, Imp<[EBP,ESP],[EBP,ESP]>;
+def POP32r : I<0x58, AddRegFrm,
+ (ops GR32:$reg), "pop{l} $reg", []>, Imp<[ESP],[ESP]>;
+
+def PUSH32r : I<0x50, AddRegFrm,
+ (ops GR32:$reg), "push{l} $reg", []>, Imp<[ESP],[ESP]>;
+
+def MovePCtoStack : I<0, Pseudo, (ops piclabel:$label),
+ "call $label", []>;
+
+let isTwoAddress = 1 in // GR32 = bswap GR32
+ def BSWAP32r : I<0xC8, AddRegFrm,
+ (ops GR32:$dst, GR32:$src),
+ "bswap{l} $dst",
+ [(set GR32:$dst, (bswap GR32:$src))]>, TB;
+
+def XCHG8rr : I<0x86, MRMDestReg, // xchg GR8, GR8
+ (ops GR8:$src1, GR8:$src2),
+ "xchg{b} {$src2|$src1}, {$src1|$src2}", []>;
+def XCHG16rr : I<0x87, MRMDestReg, // xchg GR16, GR16
+ (ops GR16:$src1, GR16:$src2),
+ "xchg{w} {$src2|$src1}, {$src1|$src2}", []>, OpSize;
+def XCHG32rr : I<0x87, MRMDestReg, // xchg GR32, GR32
+ (ops GR32:$src1, GR32:$src2),
+ "xchg{l} {$src2|$src1}, {$src1|$src2}", []>;
+
+def XCHG8mr : I<0x86, MRMDestMem,
+ (ops i8mem:$src1, GR8:$src2),
+ "xchg{b} {$src2|$src1}, {$src1|$src2}", []>;
+def XCHG16mr : I<0x87, MRMDestMem,
+ (ops i16mem:$src1, GR16:$src2),
+ "xchg{w} {$src2|$src1}, {$src1|$src2}", []>, OpSize;
+def XCHG32mr : I<0x87, MRMDestMem,
+ (ops i32mem:$src1, GR32:$src2),
+ "xchg{l} {$src2|$src1}, {$src1|$src2}", []>;
+def XCHG8rm : I<0x86, MRMSrcMem,
+ (ops GR8:$src1, i8mem:$src2),
+ "xchg{b} {$src2|$src1}, {$src1|$src2}", []>;
+def XCHG16rm : I<0x87, MRMSrcMem,
+ (ops GR16:$src1, i16mem:$src2),
+ "xchg{w} {$src2|$src1}, {$src1|$src2}", []>, OpSize;
+def XCHG32rm : I<0x87, MRMSrcMem,
+ (ops GR32:$src1, i32mem:$src2),
+ "xchg{l} {$src2|$src1}, {$src1|$src2}", []>;
+
+def LEA16r : I<0x8D, MRMSrcMem,
+ (ops GR16:$dst, i32mem:$src),
+ "lea{w} {$src|$dst}, {$dst|$src}", []>, OpSize;
+def LEA32r : I<0x8D, MRMSrcMem,
+ (ops GR32:$dst, lea32mem:$src),
+ "lea{l} {$src|$dst}, {$dst|$src}",
+ [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>;
+
+def REP_MOVSB : I<0xA4, RawFrm, (ops), "{rep;movsb|rep movsb}",
+ [(X86rep_movs i8)]>,
+ Imp<[ECX,EDI,ESI], [ECX,EDI,ESI]>, REP;
+def REP_MOVSW : I<0xA5, RawFrm, (ops), "{rep;movsw|rep movsw}",
+ [(X86rep_movs i16)]>,
+ Imp<[ECX,EDI,ESI], [ECX,EDI,ESI]>, REP, OpSize;
+def REP_MOVSD : I<0xA5, RawFrm, (ops), "{rep;movsl|rep movsd}",
+ [(X86rep_movs i32)]>,
+ Imp<[ECX,EDI,ESI], [ECX,EDI,ESI]>, REP;
+
+def REP_STOSB : I<0xAA, RawFrm, (ops), "{rep;stosb|rep stosb}",
+ [(X86rep_stos i8)]>,
+ Imp<[AL,ECX,EDI], [ECX,EDI]>, REP;
+def REP_STOSW : I<0xAB, RawFrm, (ops), "{rep;stosw|rep stosw}",
+ [(X86rep_stos i16)]>,
+ Imp<[AX,ECX,EDI], [ECX,EDI]>, REP, OpSize;
+def REP_STOSD : I<0xAB, RawFrm, (ops), "{rep;stosl|rep stosd}",
+ [(X86rep_stos i32)]>,
+ Imp<[EAX,ECX,EDI], [ECX,EDI]>, REP;
+
+def RDTSC : I<0x31, RawFrm, (ops), "rdtsc", [(X86rdtsc)]>,
+ TB, Imp<[],[RAX,RDX]>;
+
+//===----------------------------------------------------------------------===//
+// Input/Output Instructions...
+//
+def IN8rr : I<0xEC, RawFrm, (ops),
+ "in{b} {%dx, %al|%AL, %DX}",
+ []>, Imp<[DX], [AL]>;
+def IN16rr : I<0xED, RawFrm, (ops),
+ "in{w} {%dx, %ax|%AX, %DX}",
+ []>, Imp<[DX], [AX]>, OpSize;
+def IN32rr : I<0xED, RawFrm, (ops),
+ "in{l} {%dx, %eax|%EAX, %DX}",
+ []>, Imp<[DX],[EAX]>;
+
+def IN8ri : Ii8<0xE4, RawFrm, (ops i16i8imm:$port),
+ "in{b} {$port, %al|%AL, $port}",
+ []>,
+ Imp<[], [AL]>;
+def IN16ri : Ii8<0xE5, RawFrm, (ops i16i8imm:$port),
+ "in{w} {$port, %ax|%AX, $port}",
+ []>,
+ Imp<[], [AX]>, OpSize;
+def IN32ri : Ii8<0xE5, RawFrm, (ops i16i8imm:$port),
+ "in{l} {$port, %eax|%EAX, $port}",
+ []>,
+ Imp<[],[EAX]>;
+
+def OUT8rr : I<0xEE, RawFrm, (ops),
+ "out{b} {%al, %dx|%DX, %AL}",
+ []>, Imp<[DX, AL], []>;
+def OUT16rr : I<0xEF, RawFrm, (ops),
+ "out{w} {%ax, %dx|%DX, %AX}",
+ []>, Imp<[DX, AX], []>, OpSize;
+def OUT32rr : I<0xEF, RawFrm, (ops),
+ "out{l} {%eax, %dx|%DX, %EAX}",
+ []>, Imp<[DX, EAX], []>;
+
+def OUT8ir : Ii8<0xE6, RawFrm, (ops i16i8imm:$port),
+ "out{b} {%al, $port|$port, %AL}",
+ []>,
+ Imp<[AL], []>;
+def OUT16ir : Ii8<0xE7, RawFrm, (ops i16i8imm:$port),
+ "out{w} {%ax, $port|$port, %AX}",
+ []>,
+ Imp<[AX], []>, OpSize;
+def OUT32ir : Ii8<0xE7, RawFrm, (ops i16i8imm:$port),
+ "out{l} {%eax, $port|$port, %EAX}",
+ []>,
+ Imp<[EAX], []>;
+
+//===----------------------------------------------------------------------===//
+// Move Instructions...
+//
+def MOV8rr : I<0x88, MRMDestReg, (ops GR8 :$dst, GR8 :$src),
+ "mov{b} {$src, $dst|$dst, $src}", []>;
+def MOV16rr : I<0x89, MRMDestReg, (ops GR16:$dst, GR16:$src),
+ "mov{w} {$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32rr : I<0x89, MRMDestReg, (ops GR32:$dst, GR32:$src),
+ "mov{l} {$src, $dst|$dst, $src}", []>;
+let isReMaterializable = 1 in {
+def MOV8ri : Ii8 <0xB0, AddRegFrm, (ops GR8 :$dst, i8imm :$src),
+ "mov{b} {$src, $dst|$dst, $src}",
+ [(set GR8:$dst, imm:$src)]>;
+def MOV16ri : Ii16<0xB8, AddRegFrm, (ops GR16:$dst, i16imm:$src),
+ "mov{w} {$src, $dst|$dst, $src}",
+ [(set GR16:$dst, imm:$src)]>, OpSize;
+def MOV32ri : Ii32<0xB8, AddRegFrm, (ops GR32:$dst, i32imm:$src),
+ "mov{l} {$src, $dst|$dst, $src}",
+ [(set GR32:$dst, imm:$src)]>;
+}
+def MOV8mi : Ii8 <0xC6, MRM0m, (ops i8mem :$dst, i8imm :$src),
+ "mov{b} {$src, $dst|$dst, $src}",
+ [(store (i8 imm:$src), addr:$dst)]>;
+def MOV16mi : Ii16<0xC7, MRM0m, (ops i16mem:$dst, i16imm:$src),
+ "mov{w} {$src, $dst|$dst, $src}",
+ [(store (i16 imm:$src), addr:$dst)]>, OpSize;
+def MOV32mi : Ii32<0xC7, MRM0m, (ops i32mem:$dst, i32imm:$src),
+ "mov{l} {$src, $dst|$dst, $src}",
+ [(store (i32 imm:$src), addr:$dst)]>;
+
+def MOV8rm : I<0x8A, MRMSrcMem, (ops GR8 :$dst, i8mem :$src),
+ "mov{b} {$src, $dst|$dst, $src}",
+ [(set GR8:$dst, (load addr:$src))]>;
+def MOV16rm : I<0x8B, MRMSrcMem, (ops GR16:$dst, i16mem:$src),
+ "mov{w} {$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (load addr:$src))]>, OpSize;
+def MOV32rm : I<0x8B, MRMSrcMem, (ops GR32:$dst, i32mem:$src),
+ "mov{l} {$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (load addr:$src))]>;
+
+def MOV8mr : I<0x88, MRMDestMem, (ops i8mem :$dst, GR8 :$src),
+ "mov{b} {$src, $dst|$dst, $src}",
+ [(store GR8:$src, addr:$dst)]>;
+def MOV16mr : I<0x89, MRMDestMem, (ops i16mem:$dst, GR16:$src),
+ "mov{w} {$src, $dst|$dst, $src}",
+ [(store GR16:$src, addr:$dst)]>, OpSize;
+def MOV32mr : I<0x89, MRMDestMem, (ops i32mem:$dst, GR32:$src),
+ "mov{l} {$src, $dst|$dst, $src}",
+ [(store GR32:$src, addr:$dst)]>;
+
+//===----------------------------------------------------------------------===//
+// Fixed-Register Multiplication and Division Instructions...
+//
+
+// Extra precision multiplication
+def MUL8r : I<0xF6, MRM4r, (ops GR8:$src), "mul{b} $src",
+ // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
+ // This probably ought to be moved to a def : Pat<> if the
+ // syntax can be accepted.
+ [(set AL, (mul AL, GR8:$src))]>,
+ Imp<[AL],[AX]>; // AL,AH = AL*GR8
+def MUL16r : I<0xF7, MRM4r, (ops GR16:$src), "mul{w} $src", []>,
+ Imp<[AX],[AX,DX]>, OpSize; // AX,DX = AX*GR16
+def MUL32r : I<0xF7, MRM4r, (ops GR32:$src), "mul{l} $src", []>,
+ Imp<[EAX],[EAX,EDX]>; // EAX,EDX = EAX*GR32
+def MUL8m : I<0xF6, MRM4m, (ops i8mem :$src),
+ "mul{b} $src",
+ // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
+ // This probably ought to be moved to a def : Pat<> if the
+ // syntax can be accepted.
+ [(set AL, (mul AL, (loadi8 addr:$src)))]>,
+ Imp<[AL],[AX]>; // AL,AH = AL*[mem8]
+def MUL16m : I<0xF7, MRM4m, (ops i16mem:$src),
+ "mul{w} $src", []>, Imp<[AX],[AX,DX]>,
+ OpSize; // AX,DX = AX*[mem16]
+def MUL32m : I<0xF7, MRM4m, (ops i32mem:$src),
+ "mul{l} $src", []>, Imp<[EAX],[EAX,EDX]>;// EAX,EDX = EAX*[mem32]
+
+def IMUL8r : I<0xF6, MRM5r, (ops GR8:$src), "imul{b} $src", []>,
+ Imp<[AL],[AX]>; // AL,AH = AL*GR8
+def IMUL16r : I<0xF7, MRM5r, (ops GR16:$src), "imul{w} $src", []>,
+ Imp<[AX],[AX,DX]>, OpSize; // AX,DX = AX*GR16
+def IMUL32r : I<0xF7, MRM5r, (ops GR32:$src), "imul{l} $src", []>,
+ Imp<[EAX],[EAX,EDX]>; // EAX,EDX = EAX*GR32
+def IMUL8m : I<0xF6, MRM5m, (ops i8mem :$src),
+ "imul{b} $src", []>, Imp<[AL],[AX]>; // AL,AH = AL*[mem8]
+def IMUL16m : I<0xF7, MRM5m, (ops i16mem:$src),
+ "imul{w} $src", []>, Imp<[AX],[AX,DX]>,
+ OpSize; // AX,DX = AX*[mem16]
+def IMUL32m : I<0xF7, MRM5m, (ops i32mem:$src),
+ "imul{l} $src", []>,
+ Imp<[EAX],[EAX,EDX]>; // EAX,EDX = EAX*[mem32]
+
+// unsigned division/remainder
+def DIV8r : I<0xF6, MRM6r, (ops GR8:$src), // AX/r8 = AL,AH
+ "div{b} $src", []>, Imp<[AX],[AX]>;
+def DIV16r : I<0xF7, MRM6r, (ops GR16:$src), // DX:AX/r16 = AX,DX
+ "div{w} $src", []>, Imp<[AX,DX],[AX,DX]>, OpSize;
+def DIV32r : I<0xF7, MRM6r, (ops GR32:$src), // EDX:EAX/r32 = EAX,EDX
+ "div{l} $src", []>, Imp<[EAX,EDX],[EAX,EDX]>;
+def DIV8m : I<0xF6, MRM6m, (ops i8mem:$src), // AX/[mem8] = AL,AH
+ "div{b} $src", []>, Imp<[AX],[AX]>;
+def DIV16m : I<0xF7, MRM6m, (ops i16mem:$src), // DX:AX/[mem16] = AX,DX
+ "div{w} $src", []>, Imp<[AX,DX],[AX,DX]>, OpSize;
+def DIV32m : I<0xF7, MRM6m, (ops i32mem:$src), // EDX:EAX/[mem32] = EAX,EDX
+ "div{l} $src", []>, Imp<[EAX,EDX],[EAX,EDX]>;
+
+// Signed division/remainder.
+def IDIV8r : I<0xF6, MRM7r, (ops GR8:$src), // AX/r8 = AL,AH
+ "idiv{b} $src", []>, Imp<[AX],[AX]>;
+def IDIV16r: I<0xF7, MRM7r, (ops GR16:$src), // DX:AX/r16 = AX,DX
+ "idiv{w} $src", []>, Imp<[AX,DX],[AX,DX]>, OpSize;
+def IDIV32r: I<0xF7, MRM7r, (ops GR32:$src), // EDX:EAX/r32 = EAX,EDX
+ "idiv{l} $src", []>, Imp<[EAX,EDX],[EAX,EDX]>;
+def IDIV8m : I<0xF6, MRM7m, (ops i8mem:$src), // AX/[mem8] = AL,AH
+ "idiv{b} $src", []>, Imp<[AX],[AX]>;
+def IDIV16m: I<0xF7, MRM7m, (ops i16mem:$src), // DX:AX/[mem16] = AX,DX
+ "idiv{w} $src", []>, Imp<[AX,DX],[AX,DX]>, OpSize;
+def IDIV32m: I<0xF7, MRM7m, (ops i32mem:$src), // EDX:EAX/[mem32] = EAX,EDX
+ "idiv{l} $src", []>, Imp<[EAX,EDX],[EAX,EDX]>;
+
+
+//===----------------------------------------------------------------------===//
+// Two address Instructions...
+//
+let isTwoAddress = 1 in {
+
+// Conditional moves
+def CMOVB16rr : I<0x42, MRMSrcReg, // if <u, GR16 = GR16
+ (ops GR16:$dst, GR16:$src1, GR16:$src2),
+ "cmovb {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_B))]>,
+ TB, OpSize;
+def CMOVB16rm : I<0x42, MRMSrcMem, // if <u, GR16 = [mem16]
+ (ops GR16:$dst, GR16:$src1, i16mem:$src2),
+ "cmovb {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_B))]>,
+ TB, OpSize;
+def CMOVB32rr : I<0x42, MRMSrcReg, // if <u, GR32 = GR32
+ (ops GR32:$dst, GR32:$src1, GR32:$src2),
+ "cmovb {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_B))]>,
+ TB;
+def CMOVB32rm : I<0x42, MRMSrcMem, // if <u, GR32 = [mem32]
+ (ops GR32:$dst, GR32:$src1, i32mem:$src2),
+ "cmovb {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_B))]>,
+ TB;
+
+def CMOVAE16rr: I<0x43, MRMSrcReg, // if >=u, GR16 = GR16
+ (ops GR16:$dst, GR16:$src1, GR16:$src2),
+ "cmovae {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_AE))]>,
+ TB, OpSize;
+def CMOVAE16rm: I<0x43, MRMSrcMem, // if >=u, GR16 = [mem16]
+ (ops GR16:$dst, GR16:$src1, i16mem:$src2),
+ "cmovae {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_AE))]>,
+ TB, OpSize;
+def CMOVAE32rr: I<0x43, MRMSrcReg, // if >=u, GR32 = GR32
+ (ops GR32:$dst, GR32:$src1, GR32:$src2),
+ "cmovae {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_AE))]>,
+ TB;
+def CMOVAE32rm: I<0x43, MRMSrcMem, // if >=u, GR32 = [mem32]
+ (ops GR32:$dst, GR32:$src1, i32mem:$src2),
+ "cmovae {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_AE))]>,
+ TB;
+
+def CMOVE16rr : I<0x44, MRMSrcReg, // if ==, GR16 = GR16
+ (ops GR16:$dst, GR16:$src1, GR16:$src2),
+ "cmove {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_E))]>,
+ TB, OpSize;
+def CMOVE16rm : I<0x44, MRMSrcMem, // if ==, GR16 = [mem16]
+ (ops GR16:$dst, GR16:$src1, i16mem:$src2),
+ "cmove {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_E))]>,
+ TB, OpSize;
+def CMOVE32rr : I<0x44, MRMSrcReg, // if ==, GR32 = GR32
+ (ops GR32:$dst, GR32:$src1, GR32:$src2),
+ "cmove {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_E))]>,
+ TB;
+def CMOVE32rm : I<0x44, MRMSrcMem, // if ==, GR32 = [mem32]
+ (ops GR32:$dst, GR32:$src1, i32mem:$src2),
+ "cmove {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_E))]>,
+ TB;
+
+def CMOVNE16rr: I<0x45, MRMSrcReg, // if !=, GR16 = GR16
+ (ops GR16:$dst, GR16:$src1, GR16:$src2),
+ "cmovne {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_NE))]>,
+ TB, OpSize;
+def CMOVNE16rm: I<0x45, MRMSrcMem, // if !=, GR16 = [mem16]
+ (ops GR16:$dst, GR16:$src1, i16mem:$src2),
+ "cmovne {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_NE))]>,
+ TB, OpSize;
+def CMOVNE32rr: I<0x45, MRMSrcReg, // if !=, GR32 = GR32
+ (ops GR32:$dst, GR32:$src1, GR32:$src2),
+ "cmovne {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_NE))]>,
+ TB;
+def CMOVNE32rm: I<0x45, MRMSrcMem, // if !=, GR32 = [mem32]
+ (ops GR32:$dst, GR32:$src1, i32mem:$src2),
+ "cmovne {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_NE))]>,
+ TB;
+
+def CMOVBE16rr: I<0x46, MRMSrcReg, // if <=u, GR16 = GR16
+ (ops GR16:$dst, GR16:$src1, GR16:$src2),
+ "cmovbe {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_BE))]>,
+ TB, OpSize;
+def CMOVBE16rm: I<0x46, MRMSrcMem, // if <=u, GR16 = [mem16]
+ (ops GR16:$dst, GR16:$src1, i16mem:$src2),
+ "cmovbe {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_BE))]>,
+ TB, OpSize;
+def CMOVBE32rr: I<0x46, MRMSrcReg, // if <=u, GR32 = GR32
+ (ops GR32:$dst, GR32:$src1, GR32:$src2),
+ "cmovbe {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_BE))]>,
+ TB;
+def CMOVBE32rm: I<0x46, MRMSrcMem, // if <=u, GR32 = [mem32]
+ (ops GR32:$dst, GR32:$src1, i32mem:$src2),
+ "cmovbe {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_BE))]>,
+ TB;
+
+def CMOVA16rr : I<0x47, MRMSrcReg, // if >u, GR16 = GR16
+ (ops GR16:$dst, GR16:$src1, GR16:$src2),
+ "cmova {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_A))]>,
+ TB, OpSize;
+def CMOVA16rm : I<0x47, MRMSrcMem, // if >u, GR16 = [mem16]
+ (ops GR16:$dst, GR16:$src1, i16mem:$src2),
+ "cmova {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_A))]>,
+ TB, OpSize;
+def CMOVA32rr : I<0x47, MRMSrcReg, // if >u, GR32 = GR32
+ (ops GR32:$dst, GR32:$src1, GR32:$src2),
+ "cmova {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_A))]>,
+ TB;
+def CMOVA32rm : I<0x47, MRMSrcMem, // if >u, GR32 = [mem32]
+ (ops GR32:$dst, GR32:$src1, i32mem:$src2),
+ "cmova {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_A))]>,
+ TB;
+
+def CMOVL16rr : I<0x4C, MRMSrcReg, // if <s, GR16 = GR16
+ (ops GR16:$dst, GR16:$src1, GR16:$src2),
+ "cmovl {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_L))]>,
+ TB, OpSize;
+def CMOVL16rm : I<0x4C, MRMSrcMem, // if <s, GR16 = [mem16]
+ (ops GR16:$dst, GR16:$src1, i16mem:$src2),
+ "cmovl {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_L))]>,
+ TB, OpSize;
+def CMOVL32rr : I<0x4C, MRMSrcReg, // if <s, GR32 = GR32
+ (ops GR32:$dst, GR32:$src1, GR32:$src2),
+ "cmovl {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_L))]>,
+ TB;
+def CMOVL32rm : I<0x4C, MRMSrcMem, // if <s, GR32 = [mem32]
+ (ops GR32:$dst, GR32:$src1, i32mem:$src2),
+ "cmovl {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_L))]>,
+ TB;
+
+def CMOVGE16rr: I<0x4D, MRMSrcReg, // if >=s, GR16 = GR16
+ (ops GR16:$dst, GR16:$src1, GR16:$src2),
+ "cmovge {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_GE))]>,
+ TB, OpSize;
+def CMOVGE16rm: I<0x4D, MRMSrcMem, // if >=s, GR16 = [mem16]
+ (ops GR16:$dst, GR16:$src1, i16mem:$src2),
+ "cmovge {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_GE))]>,
+ TB, OpSize;
+def CMOVGE32rr: I<0x4D, MRMSrcReg, // if >=s, GR32 = GR32
+ (ops GR32:$dst, GR32:$src1, GR32:$src2),
+ "cmovge {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_GE))]>,
+ TB;
+def CMOVGE32rm: I<0x4D, MRMSrcMem, // if >=s, GR32 = [mem32]
+ (ops GR32:$dst, GR32:$src1, i32mem:$src2),
+ "cmovge {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_GE))]>,
+ TB;
+
+def CMOVLE16rr: I<0x4E, MRMSrcReg, // if <=s, GR16 = GR16
+ (ops GR16:$dst, GR16:$src1, GR16:$src2),
+ "cmovle {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_LE))]>,
+ TB, OpSize;
+def CMOVLE16rm: I<0x4E, MRMSrcMem, // if <=s, GR16 = [mem16]
+ (ops GR16:$dst, GR16:$src1, i16mem:$src2),
+ "cmovle {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_LE))]>,
+ TB, OpSize;
+def CMOVLE32rr: I<0x4E, MRMSrcReg, // if <=s, GR32 = GR32
+ (ops GR32:$dst, GR32:$src1, GR32:$src2),
+ "cmovle {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_LE))]>,
+ TB;
+def CMOVLE32rm: I<0x4E, MRMSrcMem, // if <=s, GR32 = [mem32]
+ (ops GR32:$dst, GR32:$src1, i32mem:$src2),
+ "cmovle {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_LE))]>,
+ TB;
+
+def CMOVG16rr : I<0x4F, MRMSrcReg, // if >s, GR16 = GR16
+ (ops GR16:$dst, GR16:$src1, GR16:$src2),
+ "cmovg {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_G))]>,
+ TB, OpSize;
+def CMOVG16rm : I<0x4F, MRMSrcMem, // if >s, GR16 = [mem16]
+ (ops GR16:$dst, GR16:$src1, i16mem:$src2),
+ "cmovg {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_G))]>,
+ TB, OpSize;
+def CMOVG32rr : I<0x4F, MRMSrcReg, // if >s, GR32 = GR32
+ (ops GR32:$dst, GR32:$src1, GR32:$src2),
+ "cmovg {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_G))]>,
+ TB;
+def CMOVG32rm : I<0x4F, MRMSrcMem, // if >s, GR32 = [mem32]
+ (ops GR32:$dst, GR32:$src1, i32mem:$src2),
+ "cmovg {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_G))]>,
+ TB;
+
+def CMOVS16rr : I<0x48, MRMSrcReg, // if signed, GR16 = GR16
+ (ops GR16:$dst, GR16:$src1, GR16:$src2),
+ "cmovs {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_S))]>,
+ TB, OpSize;
+def CMOVS16rm : I<0x48, MRMSrcMem, // if signed, GR16 = [mem16]
+ (ops GR16:$dst, GR16:$src1, i16mem:$src2),
+ "cmovs {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_S))]>,
+ TB, OpSize;
+def CMOVS32rr : I<0x48, MRMSrcReg, // if signed, GR32 = GR32
+ (ops GR32:$dst, GR32:$src1, GR32:$src2),
+ "cmovs {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_S))]>,
+ TB;
+def CMOVS32rm : I<0x48, MRMSrcMem, // if signed, GR32 = [mem32]
+ (ops GR32:$dst, GR32:$src1, i32mem:$src2),
+ "cmovs {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_S))]>,
+ TB;
+
+def CMOVNS16rr: I<0x49, MRMSrcReg, // if !signed, GR16 = GR16
+ (ops GR16:$dst, GR16:$src1, GR16:$src2),
+ "cmovns {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_NS))]>,
+ TB, OpSize;
+def CMOVNS16rm: I<0x49, MRMSrcMem, // if !signed, GR16 = [mem16]
+ (ops GR16:$dst, GR16:$src1, i16mem:$src2),
+ "cmovns {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_NS))]>,
+ TB, OpSize;
+def CMOVNS32rr: I<0x49, MRMSrcReg, // if !signed, GR32 = GR32
+ (ops GR32:$dst, GR32:$src1, GR32:$src2),
+ "cmovns {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_NS))]>,
+ TB;
+def CMOVNS32rm: I<0x49, MRMSrcMem, // if !signed, GR32 = [mem32]
+ (ops GR32:$dst, GR32:$src1, i32mem:$src2),
+ "cmovns {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_NS))]>,
+ TB;
+
+def CMOVP16rr : I<0x4A, MRMSrcReg, // if parity, GR16 = GR16
+ (ops GR16:$dst, GR16:$src1, GR16:$src2),
+ "cmovp {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_P))]>,
+ TB, OpSize;
+def CMOVP16rm : I<0x4A, MRMSrcMem, // if parity, GR16 = [mem16]
+ (ops GR16:$dst, GR16:$src1, i16mem:$src2),
+ "cmovp {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_P))]>,
+ TB, OpSize;
+def CMOVP32rr : I<0x4A, MRMSrcReg, // if parity, GR32 = GR32
+ (ops GR32:$dst, GR32:$src1, GR32:$src2),
+ "cmovp {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_P))]>,
+ TB;
+def CMOVP32rm : I<0x4A, MRMSrcMem, // if parity, GR32 = [mem32]
+ (ops GR32:$dst, GR32:$src1, i32mem:$src2),
+ "cmovp {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_P))]>,
+ TB;
+
+def CMOVNP16rr : I<0x4B, MRMSrcReg, // if !parity, GR16 = GR16
+ (ops GR16:$dst, GR16:$src1, GR16:$src2),
+ "cmovnp {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_NP))]>,
+ TB, OpSize;
+def CMOVNP16rm : I<0x4B, MRMSrcMem, // if !parity, GR16 = [mem16]
+ (ops GR16:$dst, GR16:$src1, i16mem:$src2),
+ "cmovnp {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_NP))]>,
+ TB, OpSize;
+def CMOVNP32rr : I<0x4B, MRMSrcReg, // if !parity, GR32 = GR32
+ (ops GR32:$dst, GR32:$src1, GR32:$src2),
+ "cmovnp {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_NP))]>,
+ TB;
+def CMOVNP32rm : I<0x4B, MRMSrcMem, // if !parity, GR32 = [mem32]
+ (ops GR32:$dst, GR32:$src1, i32mem:$src2),
+ "cmovnp {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_NP))]>,
+ TB;
+
+
+// unary instructions
+let CodeSize = 2 in {
+def NEG8r : I<0xF6, MRM3r, (ops GR8 :$dst, GR8 :$src), "neg{b} $dst",
+ [(set GR8:$dst, (ineg GR8:$src))]>;
+def NEG16r : I<0xF7, MRM3r, (ops GR16:$dst, GR16:$src), "neg{w} $dst",
+ [(set GR16:$dst, (ineg GR16:$src))]>, OpSize;
+def NEG32r : I<0xF7, MRM3r, (ops GR32:$dst, GR32:$src), "neg{l} $dst",
+ [(set GR32:$dst, (ineg GR32:$src))]>;
+let isTwoAddress = 0 in {
+ def NEG8m : I<0xF6, MRM3m, (ops i8mem :$dst), "neg{b} $dst",
+ [(store (ineg (loadi8 addr:$dst)), addr:$dst)]>;
+ def NEG16m : I<0xF7, MRM3m, (ops i16mem:$dst), "neg{w} $dst",
+ [(store (ineg (loadi16 addr:$dst)), addr:$dst)]>, OpSize;
+ def NEG32m : I<0xF7, MRM3m, (ops i32mem:$dst), "neg{l} $dst",
+ [(store (ineg (loadi32 addr:$dst)), addr:$dst)]>;
+
+}
+
+def NOT8r : I<0xF6, MRM2r, (ops GR8 :$dst, GR8 :$src), "not{b} $dst",
+ [(set GR8:$dst, (not GR8:$src))]>;
+def NOT16r : I<0xF7, MRM2r, (ops GR16:$dst, GR16:$src), "not{w} $dst",
+ [(set GR16:$dst, (not GR16:$src))]>, OpSize;
+def NOT32r : I<0xF7, MRM2r, (ops GR32:$dst, GR32:$src), "not{l} $dst",
+ [(set GR32:$dst, (not GR32:$src))]>;
+let isTwoAddress = 0 in {
+ def NOT8m : I<0xF6, MRM2m, (ops i8mem :$dst), "not{b} $dst",
+ [(store (not (loadi8 addr:$dst)), addr:$dst)]>;
+ def NOT16m : I<0xF7, MRM2m, (ops i16mem:$dst), "not{w} $dst",
+ [(store (not (loadi16 addr:$dst)), addr:$dst)]>, OpSize;
+ def NOT32m : I<0xF7, MRM2m, (ops i32mem:$dst), "not{l} $dst",
+ [(store (not (loadi32 addr:$dst)), addr:$dst)]>;
+}
+} // CodeSize
+
+// TODO: inc/dec is slow for P4, but fast for Pentium-M.
+let CodeSize = 2 in
+def INC8r : I<0xFE, MRM0r, (ops GR8 :$dst, GR8 :$src), "inc{b} $dst",
+ [(set GR8:$dst, (add GR8:$src, 1))]>;
+let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
+def INC16r : I<0x40, AddRegFrm, (ops GR16:$dst, GR16:$src), "inc{w} $dst",
+ [(set GR16:$dst, (add GR16:$src, 1))]>,
+ OpSize, Requires<[In32BitMode]>;
+def INC32r : I<0x40, AddRegFrm, (ops GR32:$dst, GR32:$src), "inc{l} $dst",
+ [(set GR32:$dst, (add GR32:$src, 1))]>, Requires<[In32BitMode]>;
+}
+let isTwoAddress = 0, CodeSize = 2 in {
+ def INC8m : I<0xFE, MRM0m, (ops i8mem :$dst), "inc{b} $dst",
+ [(store (add (loadi8 addr:$dst), 1), addr:$dst)]>;
+ def INC16m : I<0xFF, MRM0m, (ops i16mem:$dst), "inc{w} $dst",
+ [(store (add (loadi16 addr:$dst), 1), addr:$dst)]>, OpSize;
+ def INC32m : I<0xFF, MRM0m, (ops i32mem:$dst), "inc{l} $dst",
+ [(store (add (loadi32 addr:$dst), 1), addr:$dst)]>;
+}
+
+let CodeSize = 2 in
+def DEC8r : I<0xFE, MRM1r, (ops GR8 :$dst, GR8 :$src), "dec{b} $dst",
+ [(set GR8:$dst, (add GR8:$src, -1))]>;
+let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
+def DEC16r : I<0x48, AddRegFrm, (ops GR16:$dst, GR16:$src), "dec{w} $dst",
+ [(set GR16:$dst, (add GR16:$src, -1))]>,
+ OpSize, Requires<[In32BitMode]>;
+def DEC32r : I<0x48, AddRegFrm, (ops GR32:$dst, GR32:$src), "dec{l} $dst",
+ [(set GR32:$dst, (add GR32:$src, -1))]>, Requires<[In32BitMode]>;
+}
+
+let isTwoAddress = 0, CodeSize = 2 in {
+ def DEC8m : I<0xFE, MRM1m, (ops i8mem :$dst), "dec{b} $dst",
+ [(store (add (loadi8 addr:$dst), -1), addr:$dst)]>;
+ def DEC16m : I<0xFF, MRM1m, (ops i16mem:$dst), "dec{w} $dst",
+ [(store (add (loadi16 addr:$dst), -1), addr:$dst)]>, OpSize;
+ def DEC32m : I<0xFF, MRM1m, (ops i32mem:$dst), "dec{l} $dst",
+ [(store (add (loadi32 addr:$dst), -1), addr:$dst)]>;
+}
+
+// Logical operators...
+let isCommutable = 1 in { // X = AND Y, Z --> X = AND Z, Y
+def AND8rr : I<0x20, MRMDestReg,
+ (ops GR8 :$dst, GR8 :$src1, GR8 :$src2),
+ "and{b} {$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (and GR8:$src1, GR8:$src2))]>;
+def AND16rr : I<0x21, MRMDestReg,
+ (ops GR16:$dst, GR16:$src1, GR16:$src2),
+ "and{w} {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (and GR16:$src1, GR16:$src2))]>, OpSize;
+def AND32rr : I<0x21, MRMDestReg,
+ (ops GR32:$dst, GR32:$src1, GR32:$src2),
+ "and{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (and GR32:$src1, GR32:$src2))]>;
+}
+
+def AND8rm : I<0x22, MRMSrcMem,
+ (ops GR8 :$dst, GR8 :$src1, i8mem :$src2),
+ "and{b} {$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (and GR8:$src1, (load addr:$src2)))]>;
+def AND16rm : I<0x23, MRMSrcMem,
+ (ops GR16:$dst, GR16:$src1, i16mem:$src2),
+ "and{w} {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (and GR16:$src1, (load addr:$src2)))]>, OpSize;
+def AND32rm : I<0x23, MRMSrcMem,
+ (ops GR32:$dst, GR32:$src1, i32mem:$src2),
+ "and{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (and GR32:$src1, (load addr:$src2)))]>;
+
+def AND8ri : Ii8<0x80, MRM4r,
+ (ops GR8 :$dst, GR8 :$src1, i8imm :$src2),
+ "and{b} {$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (and GR8:$src1, imm:$src2))]>;
+def AND16ri : Ii16<0x81, MRM4r,
+ (ops GR16:$dst, GR16:$src1, i16imm:$src2),
+ "and{w} {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (and GR16:$src1, imm:$src2))]>, OpSize;
+def AND32ri : Ii32<0x81, MRM4r,
+ (ops GR32:$dst, GR32:$src1, i32imm:$src2),
+ "and{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (and GR32:$src1, imm:$src2))]>;
+def AND16ri8 : Ii8<0x83, MRM4r,
+ (ops GR16:$dst, GR16:$src1, i16i8imm:$src2),
+ "and{w} {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (and GR16:$src1, i16immSExt8:$src2))]>,
+ OpSize;
+def AND32ri8 : Ii8<0x83, MRM4r,
+ (ops GR32:$dst, GR32:$src1, i32i8imm:$src2),
+ "and{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (and GR32:$src1, i32immSExt8:$src2))]>;
+
+let isTwoAddress = 0 in {
+ def AND8mr : I<0x20, MRMDestMem,
+ (ops i8mem :$dst, GR8 :$src),
+ "and{b} {$src, $dst|$dst, $src}",
+ [(store (and (load addr:$dst), GR8:$src), addr:$dst)]>;
+ def AND16mr : I<0x21, MRMDestMem,
+ (ops i16mem:$dst, GR16:$src),
+ "and{w} {$src, $dst|$dst, $src}",
+ [(store (and (load addr:$dst), GR16:$src), addr:$dst)]>,
+ OpSize;
+ def AND32mr : I<0x21, MRMDestMem,
+ (ops i32mem:$dst, GR32:$src),
+ "and{l} {$src, $dst|$dst, $src}",
+ [(store (and (load addr:$dst), GR32:$src), addr:$dst)]>;
+ def AND8mi : Ii8<0x80, MRM4m,
+ (ops i8mem :$dst, i8imm :$src),
+ "and{b} {$src, $dst|$dst, $src}",
+ [(store (and (loadi8 addr:$dst), imm:$src), addr:$dst)]>;
+ def AND16mi : Ii16<0x81, MRM4m,
+ (ops i16mem:$dst, i16imm:$src),
+ "and{w} {$src, $dst|$dst, $src}",
+ [(store (and (loadi16 addr:$dst), imm:$src), addr:$dst)]>,
+ OpSize;
+ def AND32mi : Ii32<0x81, MRM4m,
+ (ops i32mem:$dst, i32imm:$src),
+ "and{l} {$src, $dst|$dst, $src}",
+ [(store (and (loadi32 addr:$dst), imm:$src), addr:$dst)]>;
+ def AND16mi8 : Ii8<0x83, MRM4m,
+ (ops i16mem:$dst, i16i8imm :$src),
+ "and{w} {$src, $dst|$dst, $src}",
+ [(store (and (load addr:$dst), i16immSExt8:$src), addr:$dst)]>,
+ OpSize;
+ def AND32mi8 : Ii8<0x83, MRM4m,
+ (ops i32mem:$dst, i32i8imm :$src),
+ "and{l} {$src, $dst|$dst, $src}",
+ [(store (and (load addr:$dst), i32immSExt8:$src), addr:$dst)]>;
+}
+
+
+let isCommutable = 1 in { // X = OR Y, Z --> X = OR Z, Y
+def OR8rr : I<0x08, MRMDestReg, (ops GR8 :$dst, GR8 :$src1, GR8 :$src2),
+ "or{b} {$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (or GR8:$src1, GR8:$src2))]>;
+def OR16rr : I<0x09, MRMDestReg, (ops GR16:$dst, GR16:$src1, GR16:$src2),
+ "or{w} {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (or GR16:$src1, GR16:$src2))]>, OpSize;
+def OR32rr : I<0x09, MRMDestReg, (ops GR32:$dst, GR32:$src1, GR32:$src2),
+ "or{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (or GR32:$src1, GR32:$src2))]>;
+}
+def OR8rm : I<0x0A, MRMSrcMem , (ops GR8 :$dst, GR8 :$src1, i8mem :$src2),
+ "or{b} {$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (or GR8:$src1, (load addr:$src2)))]>;
+def OR16rm : I<0x0B, MRMSrcMem , (ops GR16:$dst, GR16:$src1, i16mem:$src2),
+ "or{w} {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (or GR16:$src1, (load addr:$src2)))]>, OpSize;
+def OR32rm : I<0x0B, MRMSrcMem , (ops GR32:$dst, GR32:$src1, i32mem:$src2),
+ "or{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (or GR32:$src1, (load addr:$src2)))]>;
+
+def OR8ri : Ii8 <0x80, MRM1r, (ops GR8 :$dst, GR8 :$src1, i8imm:$src2),
+ "or{b} {$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (or GR8:$src1, imm:$src2))]>;
+def OR16ri : Ii16<0x81, MRM1r, (ops GR16:$dst, GR16:$src1, i16imm:$src2),
+ "or{w} {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (or GR16:$src1, imm:$src2))]>, OpSize;
+def OR32ri : Ii32<0x81, MRM1r, (ops GR32:$dst, GR32:$src1, i32imm:$src2),
+ "or{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (or GR32:$src1, imm:$src2))]>;
+
+def OR16ri8 : Ii8<0x83, MRM1r, (ops GR16:$dst, GR16:$src1, i16i8imm:$src2),
+ "or{w} {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (or GR16:$src1, i16immSExt8:$src2))]>, OpSize;
+def OR32ri8 : Ii8<0x83, MRM1r, (ops GR32:$dst, GR32:$src1, i32i8imm:$src2),
+ "or{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (or GR32:$src1, i32immSExt8:$src2))]>;
+let isTwoAddress = 0 in {
+ def OR8mr : I<0x08, MRMDestMem, (ops i8mem:$dst, GR8:$src),
+ "or{b} {$src, $dst|$dst, $src}",
+ [(store (or (load addr:$dst), GR8:$src), addr:$dst)]>;
+ def OR16mr : I<0x09, MRMDestMem, (ops i16mem:$dst, GR16:$src),
+ "or{w} {$src, $dst|$dst, $src}",
+ [(store (or (load addr:$dst), GR16:$src), addr:$dst)]>, OpSize;
+ def OR32mr : I<0x09, MRMDestMem, (ops i32mem:$dst, GR32:$src),
+ "or{l} {$src, $dst|$dst, $src}",
+ [(store (or (load addr:$dst), GR32:$src), addr:$dst)]>;
+ def OR8mi : Ii8<0x80, MRM1m, (ops i8mem :$dst, i8imm:$src),
+ "or{b} {$src, $dst|$dst, $src}",
+ [(store (or (loadi8 addr:$dst), imm:$src), addr:$dst)]>;
+ def OR16mi : Ii16<0x81, MRM1m, (ops i16mem:$dst, i16imm:$src),
+ "or{w} {$src, $dst|$dst, $src}",
+ [(store (or (loadi16 addr:$dst), imm:$src), addr:$dst)]>,
+ OpSize;
+ def OR32mi : Ii32<0x81, MRM1m, (ops i32mem:$dst, i32imm:$src),
+ "or{l} {$src, $dst|$dst, $src}",
+ [(store (or (loadi32 addr:$dst), imm:$src), addr:$dst)]>;
+ def OR16mi8 : Ii8<0x83, MRM1m, (ops i16mem:$dst, i16i8imm:$src),
+ "or{w} {$src, $dst|$dst, $src}",
+ [(store (or (load addr:$dst), i16immSExt8:$src), addr:$dst)]>,
+ OpSize;
+ def OR32mi8 : Ii8<0x83, MRM1m, (ops i32mem:$dst, i32i8imm:$src),
+ "or{l} {$src, $dst|$dst, $src}",
+ [(store (or (load addr:$dst), i32immSExt8:$src), addr:$dst)]>;
+}
+
+
+let isCommutable = 1 in { // X = XOR Y, Z --> X = XOR Z, Y
+def XOR8rr : I<0x30, MRMDestReg,
+ (ops GR8 :$dst, GR8 :$src1, GR8 :$src2),
+ "xor{b} {$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (xor GR8:$src1, GR8:$src2))]>;
+def XOR16rr : I<0x31, MRMDestReg,
+ (ops GR16:$dst, GR16:$src1, GR16:$src2),
+ "xor{w} {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (xor GR16:$src1, GR16:$src2))]>, OpSize;
+def XOR32rr : I<0x31, MRMDestReg,
+ (ops GR32:$dst, GR32:$src1, GR32:$src2),
+ "xor{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (xor GR32:$src1, GR32:$src2))]>;
+}
+
+def XOR8rm : I<0x32, MRMSrcMem ,
+ (ops GR8 :$dst, GR8:$src1, i8mem :$src2),
+ "xor{b} {$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (xor GR8:$src1, (load addr:$src2)))]>;
+def XOR16rm : I<0x33, MRMSrcMem ,
+ (ops GR16:$dst, GR16:$src1, i16mem:$src2),
+ "xor{w} {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (xor GR16:$src1, (load addr:$src2)))]>, OpSize;
+def XOR32rm : I<0x33, MRMSrcMem ,
+ (ops GR32:$dst, GR32:$src1, i32mem:$src2),
+ "xor{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (xor GR32:$src1, (load addr:$src2)))]>;
+
+def XOR8ri : Ii8<0x80, MRM6r,
+ (ops GR8:$dst, GR8:$src1, i8imm:$src2),
+ "xor{b} {$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (xor GR8:$src1, imm:$src2))]>;
+def XOR16ri : Ii16<0x81, MRM6r,
+ (ops GR16:$dst, GR16:$src1, i16imm:$src2),
+ "xor{w} {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (xor GR16:$src1, imm:$src2))]>, OpSize;
+def XOR32ri : Ii32<0x81, MRM6r,
+ (ops GR32:$dst, GR32:$src1, i32imm:$src2),
+ "xor{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (xor GR32:$src1, imm:$src2))]>;
+def XOR16ri8 : Ii8<0x83, MRM6r,
+ (ops GR16:$dst, GR16:$src1, i16i8imm:$src2),
+ "xor{w} {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (xor GR16:$src1, i16immSExt8:$src2))]>,
+ OpSize;
+def XOR32ri8 : Ii8<0x83, MRM6r,
+ (ops GR32:$dst, GR32:$src1, i32i8imm:$src2),
+ "xor{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (xor GR32:$src1, i32immSExt8:$src2))]>;
+let isTwoAddress = 0 in {
+ def XOR8mr : I<0x30, MRMDestMem,
+ (ops i8mem :$dst, GR8 :$src),
+ "xor{b} {$src, $dst|$dst, $src}",
+ [(store (xor (load addr:$dst), GR8:$src), addr:$dst)]>;
+ def XOR16mr : I<0x31, MRMDestMem,
+ (ops i16mem:$dst, GR16:$src),
+ "xor{w} {$src, $dst|$dst, $src}",
+ [(store (xor (load addr:$dst), GR16:$src), addr:$dst)]>,
+ OpSize;
+ def XOR32mr : I<0x31, MRMDestMem,
+ (ops i32mem:$dst, GR32:$src),
+ "xor{l} {$src, $dst|$dst, $src}",
+ [(store (xor (load addr:$dst), GR32:$src), addr:$dst)]>;
+ def XOR8mi : Ii8<0x80, MRM6m,
+ (ops i8mem :$dst, i8imm :$src),
+ "xor{b} {$src, $dst|$dst, $src}",
+ [(store (xor (loadi8 addr:$dst), imm:$src), addr:$dst)]>;
+ def XOR16mi : Ii16<0x81, MRM6m,
+ (ops i16mem:$dst, i16imm:$src),
+ "xor{w} {$src, $dst|$dst, $src}",
+ [(store (xor (loadi16 addr:$dst), imm:$src), addr:$dst)]>,
+ OpSize;
+ def XOR32mi : Ii32<0x81, MRM6m,
+ (ops i32mem:$dst, i32imm:$src),
+ "xor{l} {$src, $dst|$dst, $src}",
+ [(store (xor (loadi32 addr:$dst), imm:$src), addr:$dst)]>;
+ def XOR16mi8 : Ii8<0x83, MRM6m,
+ (ops i16mem:$dst, i16i8imm :$src),
+ "xor{w} {$src, $dst|$dst, $src}",
+ [(store (xor (load addr:$dst), i16immSExt8:$src), addr:$dst)]>,
+ OpSize;
+ def XOR32mi8 : Ii8<0x83, MRM6m,
+ (ops i32mem:$dst, i32i8imm :$src),
+ "xor{l} {$src, $dst|$dst, $src}",
+ [(store (xor (load addr:$dst), i32immSExt8:$src), addr:$dst)]>;
+}
+
+// Shift instructions
+def SHL8rCL : I<0xD2, MRM4r, (ops GR8 :$dst, GR8 :$src),
+ "shl{b} {%cl, $dst|$dst, %CL}",
+ [(set GR8:$dst, (shl GR8:$src, CL))]>, Imp<[CL],[]>;
+def SHL16rCL : I<0xD3, MRM4r, (ops GR16:$dst, GR16:$src),
+ "shl{w} {%cl, $dst|$dst, %CL}",
+ [(set GR16:$dst, (shl GR16:$src, CL))]>, Imp<[CL],[]>, OpSize;
+def SHL32rCL : I<0xD3, MRM4r, (ops GR32:$dst, GR32:$src),
+ "shl{l} {%cl, $dst|$dst, %CL}",
+ [(set GR32:$dst, (shl GR32:$src, CL))]>, Imp<[CL],[]>;
+
+def SHL8ri : Ii8<0xC0, MRM4r, (ops GR8 :$dst, GR8 :$src1, i8imm:$src2),
+ "shl{b} {$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>;
+let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
+def SHL16ri : Ii8<0xC1, MRM4r, (ops GR16:$dst, GR16:$src1, i8imm:$src2),
+ "shl{w} {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
+def SHL32ri : Ii8<0xC1, MRM4r, (ops GR32:$dst, GR32:$src1, i8imm:$src2),
+ "shl{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))]>;
+}
+
+// Shift left by one. Not used because (add x, x) is slightly cheaper.
+def SHL8r1 : I<0xD0, MRM4r, (ops GR8 :$dst, GR8 :$src1),
+ "shl{b} $dst", []>;
+def SHL16r1 : I<0xD1, MRM4r, (ops GR16:$dst, GR16:$src1),
+ "shl{w} $dst", []>, OpSize;
+def SHL32r1 : I<0xD1, MRM4r, (ops GR32:$dst, GR32:$src1),
+ "shl{l} $dst", []>;
+
+let isTwoAddress = 0 in {
+ def SHL8mCL : I<0xD2, MRM4m, (ops i8mem :$dst),
+ "shl{b} {%cl, $dst|$dst, %CL}",
+ [(store (shl (loadi8 addr:$dst), CL), addr:$dst)]>,
+ Imp<[CL],[]>;
+ def SHL16mCL : I<0xD3, MRM4m, (ops i16mem:$dst),
+ "shl{w} {%cl, $dst|$dst, %CL}",
+ [(store (shl (loadi16 addr:$dst), CL), addr:$dst)]>,
+ Imp<[CL],[]>, OpSize;
+ def SHL32mCL : I<0xD3, MRM4m, (ops i32mem:$dst),
+ "shl{l} {%cl, $dst|$dst, %CL}",
+ [(store (shl (loadi32 addr:$dst), CL), addr:$dst)]>,
+ Imp<[CL],[]>;
+ def SHL8mi : Ii8<0xC0, MRM4m, (ops i8mem :$dst, i8imm:$src),
+ "shl{b} {$src, $dst|$dst, $src}",
+ [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+ def SHL16mi : Ii8<0xC1, MRM4m, (ops i16mem:$dst, i8imm:$src),
+ "shl{w} {$src, $dst|$dst, $src}",
+ [(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ OpSize;
+ def SHL32mi : Ii8<0xC1, MRM4m, (ops i32mem:$dst, i8imm:$src),
+ "shl{l} {$src, $dst|$dst, $src}",
+ [(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+ // Shift by 1
+ def SHL8m1 : I<0xD0, MRM4m, (ops i8mem :$dst),
+ "shl{b} $dst",
+ [(store (shl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+ def SHL16m1 : I<0xD1, MRM4m, (ops i16mem:$dst),
+ "shl{w} $dst",
+ [(store (shl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+ OpSize;
+ def SHL32m1 : I<0xD1, MRM4m, (ops i32mem:$dst),
+ "shl{l} $dst",
+ [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+}
+
+def SHR8rCL : I<0xD2, MRM5r, (ops GR8 :$dst, GR8 :$src),
+ "shr{b} {%cl, $dst|$dst, %CL}",
+ [(set GR8:$dst, (srl GR8:$src, CL))]>, Imp<[CL],[]>;
+def SHR16rCL : I<0xD3, MRM5r, (ops GR16:$dst, GR16:$src),
+ "shr{w} {%cl, $dst|$dst, %CL}",
+ [(set GR16:$dst, (srl GR16:$src, CL))]>, Imp<[CL],[]>, OpSize;
+def SHR32rCL : I<0xD3, MRM5r, (ops GR32:$dst, GR32:$src),
+ "shr{l} {%cl, $dst|$dst, %CL}",
+ [(set GR32:$dst, (srl GR32:$src, CL))]>, Imp<[CL],[]>;
+
+def SHR8ri : Ii8<0xC0, MRM5r, (ops GR8:$dst, GR8:$src1, i8imm:$src2),
+ "shr{b} {$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))]>;
+def SHR16ri : Ii8<0xC1, MRM5r, (ops GR16:$dst, GR16:$src1, i8imm:$src2),
+ "shr{w} {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
+def SHR32ri : Ii8<0xC1, MRM5r, (ops GR32:$dst, GR32:$src1, i8imm:$src2),
+ "shr{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))]>;
+
+// Shift by 1
+def SHR8r1 : I<0xD0, MRM5r, (ops GR8:$dst, GR8:$src1),
+ "shr{b} $dst",
+ [(set GR8:$dst, (srl GR8:$src1, (i8 1)))]>;
+def SHR16r1 : I<0xD1, MRM5r, (ops GR16:$dst, GR16:$src1),
+ "shr{w} $dst",
+ [(set GR16:$dst, (srl GR16:$src1, (i8 1)))]>, OpSize;
+def SHR32r1 : I<0xD1, MRM5r, (ops GR32:$dst, GR32:$src1),
+ "shr{l} $dst",
+ [(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>;
+
+let isTwoAddress = 0 in {
+ def SHR8mCL : I<0xD2, MRM5m, (ops i8mem :$dst),
+ "shr{b} {%cl, $dst|$dst, %CL}",
+ [(store (srl (loadi8 addr:$dst), CL), addr:$dst)]>,
+ Imp<[CL],[]>;
+ def SHR16mCL : I<0xD3, MRM5m, (ops i16mem:$dst),
+ "shr{w} {%cl, $dst|$dst, %CL}",
+ [(store (srl (loadi16 addr:$dst), CL), addr:$dst)]>,
+ Imp<[CL],[]>, OpSize;
+ def SHR32mCL : I<0xD3, MRM5m, (ops i32mem:$dst),
+ "shr{l} {%cl, $dst|$dst, %CL}",
+ [(store (srl (loadi32 addr:$dst), CL), addr:$dst)]>,
+ Imp<[CL],[]>;
+ def SHR8mi : Ii8<0xC0, MRM5m, (ops i8mem :$dst, i8imm:$src),
+ "shr{b} {$src, $dst|$dst, $src}",
+ [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+ def SHR16mi : Ii8<0xC1, MRM5m, (ops i16mem:$dst, i8imm:$src),
+ "shr{w} {$src, $dst|$dst, $src}",
+ [(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ OpSize;
+ def SHR32mi : Ii8<0xC1, MRM5m, (ops i32mem:$dst, i8imm:$src),
+ "shr{l} {$src, $dst|$dst, $src}",
+ [(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+ // Shift by 1
+ def SHR8m1 : I<0xD0, MRM5m, (ops i8mem :$dst),
+ "shr{b} $dst",
+ [(store (srl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+ def SHR16m1 : I<0xD1, MRM5m, (ops i16mem:$dst),
+ "shr{w} $dst",
+ [(store (srl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,OpSize;
+ def SHR32m1 : I<0xD1, MRM5m, (ops i32mem:$dst),
+ "shr{l} $dst",
+ [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+}
+
+def SAR8rCL : I<0xD2, MRM7r, (ops GR8 :$dst, GR8 :$src),
+ "sar{b} {%cl, $dst|$dst, %CL}",
+ [(set GR8:$dst, (sra GR8:$src, CL))]>, Imp<[CL],[]>;
+def SAR16rCL : I<0xD3, MRM7r, (ops GR16:$dst, GR16:$src),
+ "sar{w} {%cl, $dst|$dst, %CL}",
+ [(set GR16:$dst, (sra GR16:$src, CL))]>, Imp<[CL],[]>, OpSize;
+def SAR32rCL : I<0xD3, MRM7r, (ops GR32:$dst, GR32:$src),
+ "sar{l} {%cl, $dst|$dst, %CL}",
+ [(set GR32:$dst, (sra GR32:$src, CL))]>, Imp<[CL],[]>;
+
+def SAR8ri : Ii8<0xC0, MRM7r, (ops GR8 :$dst, GR8 :$src1, i8imm:$src2),
+ "sar{b} {$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))]>;
+def SAR16ri : Ii8<0xC1, MRM7r, (ops GR16:$dst, GR16:$src1, i8imm:$src2),
+ "sar{w} {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))]>,
+ OpSize;
+def SAR32ri : Ii8<0xC1, MRM7r, (ops GR32:$dst, GR32:$src1, i8imm:$src2),
+ "sar{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))]>;
+
+// Shift by 1
+def SAR8r1 : I<0xD0, MRM7r, (ops GR8 :$dst, GR8 :$src1),
+ "sar{b} $dst",
+ [(set GR8:$dst, (sra GR8:$src1, (i8 1)))]>;
+def SAR16r1 : I<0xD1, MRM7r, (ops GR16:$dst, GR16:$src1),
+ "sar{w} $dst",
+ [(set GR16:$dst, (sra GR16:$src1, (i8 1)))]>, OpSize;
+def SAR32r1 : I<0xD1, MRM7r, (ops GR32:$dst, GR32:$src1),
+ "sar{l} $dst",
+ [(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>;
+
+let isTwoAddress = 0 in {
+ def SAR8mCL : I<0xD2, MRM7m, (ops i8mem :$dst),
+ "sar{b} {%cl, $dst|$dst, %CL}",
+ [(store (sra (loadi8 addr:$dst), CL), addr:$dst)]>,
+ Imp<[CL],[]>;
+ def SAR16mCL : I<0xD3, MRM7m, (ops i16mem:$dst),
+ "sar{w} {%cl, $dst|$dst, %CL}",
+ [(store (sra (loadi16 addr:$dst), CL), addr:$dst)]>,
+ Imp<[CL],[]>, OpSize;
+ def SAR32mCL : I<0xD3, MRM7m, (ops i32mem:$dst),
+ "sar{l} {%cl, $dst|$dst, %CL}",
+ [(store (sra (loadi32 addr:$dst), CL), addr:$dst)]>,
+ Imp<[CL],[]>;
+ def SAR8mi : Ii8<0xC0, MRM7m, (ops i8mem :$dst, i8imm:$src),
+ "sar{b} {$src, $dst|$dst, $src}",
+ [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+ def SAR16mi : Ii8<0xC1, MRM7m, (ops i16mem:$dst, i8imm:$src),
+ "sar{w} {$src, $dst|$dst, $src}",
+ [(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ OpSize;
+ def SAR32mi : Ii8<0xC1, MRM7m, (ops i32mem:$dst, i8imm:$src),
+ "sar{l} {$src, $dst|$dst, $src}",
+ [(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+ // Shift by 1
+ def SAR8m1 : I<0xD0, MRM7m, (ops i8mem :$dst),
+ "sar{b} $dst",
+ [(store (sra (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+ def SAR16m1 : I<0xD1, MRM7m, (ops i16mem:$dst),
+ "sar{w} $dst",
+ [(store (sra (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+ OpSize;
+ def SAR32m1 : I<0xD1, MRM7m, (ops i32mem:$dst),
+ "sar{l} $dst",
+ [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+}
+
+// Rotate instructions
+// FIXME: provide shorter instructions when imm8 == 1
+def ROL8rCL : I<0xD2, MRM0r, (ops GR8 :$dst, GR8 :$src),
+ "rol{b} {%cl, $dst|$dst, %CL}",
+ [(set GR8:$dst, (rotl GR8:$src, CL))]>, Imp<[CL],[]>;
+def ROL16rCL : I<0xD3, MRM0r, (ops GR16:$dst, GR16:$src),
+ "rol{w} {%cl, $dst|$dst, %CL}",
+ [(set GR16:$dst, (rotl GR16:$src, CL))]>, Imp<[CL],[]>, OpSize;
+def ROL32rCL : I<0xD3, MRM0r, (ops GR32:$dst, GR32:$src),
+ "rol{l} {%cl, $dst|$dst, %CL}",
+ [(set GR32:$dst, (rotl GR32:$src, CL))]>, Imp<[CL],[]>;
+
+def ROL8ri : Ii8<0xC0, MRM0r, (ops GR8 :$dst, GR8 :$src1, i8imm:$src2),
+ "rol{b} {$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))]>;
+def ROL16ri : Ii8<0xC1, MRM0r, (ops GR16:$dst, GR16:$src1, i8imm:$src2),
+ "rol{w} {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
+def ROL32ri : Ii8<0xC1, MRM0r, (ops GR32:$dst, GR32:$src1, i8imm:$src2),
+ "rol{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))]>;
+
+// Rotate by 1
+def ROL8r1 : I<0xD0, MRM0r, (ops GR8 :$dst, GR8 :$src1),
+ "rol{b} $dst",
+ [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))]>;
+def ROL16r1 : I<0xD1, MRM0r, (ops GR16:$dst, GR16:$src1),
+ "rol{w} $dst",
+ [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))]>, OpSize;
+def ROL32r1 : I<0xD1, MRM0r, (ops GR32:$dst, GR32:$src1),
+ "rol{l} $dst",
+ [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>;
+
+let isTwoAddress = 0 in {
+ def ROL8mCL : I<0xD2, MRM0m, (ops i8mem :$dst),
+ "rol{b} {%cl, $dst|$dst, %CL}",
+ [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)]>,
+ Imp<[CL],[]>;
+ def ROL16mCL : I<0xD3, MRM0m, (ops i16mem:$dst),
+ "rol{w} {%cl, $dst|$dst, %CL}",
+ [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)]>,
+ Imp<[CL],[]>, OpSize;
+ def ROL32mCL : I<0xD3, MRM0m, (ops i32mem:$dst),
+ "rol{l} {%cl, $dst|$dst, %CL}",
+ [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)]>,
+ Imp<[CL],[]>;
+ def ROL8mi : Ii8<0xC0, MRM0m, (ops i8mem :$dst, i8imm:$src),
+ "rol{b} {$src, $dst|$dst, $src}",
+ [(store (rotl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+ def ROL16mi : Ii8<0xC1, MRM0m, (ops i16mem:$dst, i8imm:$src),
+ "rol{w} {$src, $dst|$dst, $src}",
+ [(store (rotl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ OpSize;
+ def ROL32mi : Ii8<0xC1, MRM0m, (ops i32mem:$dst, i8imm:$src),
+ "rol{l} {$src, $dst|$dst, $src}",
+ [(store (rotl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+ // Rotate by 1
+ def ROL8m1 : I<0xD0, MRM0m, (ops i8mem :$dst),
+ "rol{b} $dst",
+ [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+ def ROL16m1 : I<0xD1, MRM0m, (ops i16mem:$dst),
+ "rol{w} $dst",
+ [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+ OpSize;
+ def ROL32m1 : I<0xD1, MRM0m, (ops i32mem:$dst),
+ "rol{l} $dst",
+ [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+}
+
+def ROR8rCL : I<0xD2, MRM1r, (ops GR8 :$dst, GR8 :$src),
+ "ror{b} {%cl, $dst|$dst, %CL}",
+ [(set GR8:$dst, (rotr GR8:$src, CL))]>, Imp<[CL],[]>;
+def ROR16rCL : I<0xD3, MRM1r, (ops GR16:$dst, GR16:$src),
+ "ror{w} {%cl, $dst|$dst, %CL}",
+ [(set GR16:$dst, (rotr GR16:$src, CL))]>, Imp<[CL],[]>, OpSize;
+def ROR32rCL : I<0xD3, MRM1r, (ops GR32:$dst, GR32:$src),
+ "ror{l} {%cl, $dst|$dst, %CL}",
+ [(set GR32:$dst, (rotr GR32:$src, CL))]>, Imp<[CL],[]>;
+
+def ROR8ri : Ii8<0xC0, MRM1r, (ops GR8 :$dst, GR8 :$src1, i8imm:$src2),
+ "ror{b} {$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))]>;
+def ROR16ri : Ii8<0xC1, MRM1r, (ops GR16:$dst, GR16:$src1, i8imm:$src2),
+ "ror{w} {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))]>, OpSize;
+def ROR32ri : Ii8<0xC1, MRM1r, (ops GR32:$dst, GR32:$src1, i8imm:$src2),
+ "ror{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))]>;
+
+// Rotate by 1
+def ROR8r1 : I<0xD0, MRM1r, (ops GR8 :$dst, GR8 :$src1),
+ "ror{b} $dst",
+ [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))]>;
+def ROR16r1 : I<0xD1, MRM1r, (ops GR16:$dst, GR16:$src1),
+ "ror{w} $dst",
+ [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))]>, OpSize;
+def ROR32r1 : I<0xD1, MRM1r, (ops GR32:$dst, GR32:$src1),
+ "ror{l} $dst",
+ [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>;
+
+let isTwoAddress = 0 in {
+ def ROR8mCL : I<0xD2, MRM1m, (ops i8mem :$dst),
+ "ror{b} {%cl, $dst|$dst, %CL}",
+ [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)]>,
+ Imp<[CL],[]>;
+ def ROR16mCL : I<0xD3, MRM1m, (ops i16mem:$dst),
+ "ror{w} {%cl, $dst|$dst, %CL}",
+ [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)]>,
+ Imp<[CL],[]>, OpSize;
+ def ROR32mCL : I<0xD3, MRM1m, (ops i32mem:$dst),
+ "ror{l} {%cl, $dst|$dst, %CL}",
+ [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)]>,
+ Imp<[CL],[]>;
+ def ROR8mi : Ii8<0xC0, MRM1m, (ops i8mem :$dst, i8imm:$src),
+ "ror{b} {$src, $dst|$dst, $src}",
+ [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+ def ROR16mi : Ii8<0xC1, MRM1m, (ops i16mem:$dst, i8imm:$src),
+ "ror{w} {$src, $dst|$dst, $src}",
+ [(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ OpSize;
+ def ROR32mi : Ii8<0xC1, MRM1m, (ops i32mem:$dst, i8imm:$src),
+ "ror{l} {$src, $dst|$dst, $src}",
+ [(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+ // Rotate by 1
+ def ROR8m1 : I<0xD0, MRM1m, (ops i8mem :$dst),
+ "ror{b} $dst",
+ [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+ def ROR16m1 : I<0xD1, MRM1m, (ops i16mem:$dst),
+ "ror{w} $dst",
+ [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+ OpSize;
+ def ROR32m1 : I<0xD1, MRM1m, (ops i32mem:$dst),
+ "ror{l} $dst",
+ [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+}
+
+
+
+// Double shift instructions (generalizations of rotate)
+def SHLD32rrCL : I<0xA5, MRMDestReg, (ops GR32:$dst, GR32:$src1, GR32:$src2),
+ "shld{l} {%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>,
+ Imp<[CL],[]>, TB;
+def SHRD32rrCL : I<0xAD, MRMDestReg, (ops GR32:$dst, GR32:$src1, GR32:$src2),
+ "shrd{l} {%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>,
+ Imp<[CL],[]>, TB;
+def SHLD16rrCL : I<0xA5, MRMDestReg, (ops GR16:$dst, GR16:$src1, GR16:$src2),
+ "shld{w} {%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>,
+ Imp<[CL],[]>, TB, OpSize;
+def SHRD16rrCL : I<0xAD, MRMDestReg, (ops GR16:$dst, GR16:$src1, GR16:$src2),
+ "shrd{w} {%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>,
+ Imp<[CL],[]>, TB, OpSize;
+
+let isCommutable = 1 in { // These instructions commute to each other.
+def SHLD32rri8 : Ii8<0xA4, MRMDestReg,
+ (ops GR32:$dst, GR32:$src1, GR32:$src2, i8imm:$src3),
+ "shld{l} {$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2,
+ (i8 imm:$src3)))]>,
+ TB;
+def SHRD32rri8 : Ii8<0xAC, MRMDestReg,
+ (ops GR32:$dst, GR32:$src1, GR32:$src2, i8imm:$src3),
+ "shrd{l} {$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2,
+ (i8 imm:$src3)))]>,
+ TB;
+def SHLD16rri8 : Ii8<0xA4, MRMDestReg,
+ (ops GR16:$dst, GR16:$src1, GR16:$src2, i8imm:$src3),
+ "shld{w} {$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2,
+ (i8 imm:$src3)))]>,
+ TB, OpSize;
+def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
+ (ops GR16:$dst, GR16:$src1, GR16:$src2, i8imm:$src3),
+ "shrd{w} {$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2,
+ (i8 imm:$src3)))]>,
+ TB, OpSize;
+}
+
+let isTwoAddress = 0 in {
+ def SHLD32mrCL : I<0xA5, MRMDestMem, (ops i32mem:$dst, GR32:$src2),
+ "shld{l} {%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL),
+ addr:$dst)]>,
+ Imp<[CL],[]>, TB;
+ def SHRD32mrCL : I<0xAD, MRMDestMem, (ops i32mem:$dst, GR32:$src2),
+ "shrd{l} {%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL),
+ addr:$dst)]>,
+ Imp<[CL],[]>, TB;
+ def SHLD32mri8 : Ii8<0xA4, MRMDestMem,
+ (ops i32mem:$dst, GR32:$src2, i8imm:$src3),
+ "shld{l} {$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shld (loadi32 addr:$dst), GR32:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB;
+ def SHRD32mri8 : Ii8<0xAC, MRMDestMem,
+ (ops i32mem:$dst, GR32:$src2, i8imm:$src3),
+ "shrd{l} {$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shrd (loadi32 addr:$dst), GR32:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB;
+
+ def SHLD16mrCL : I<0xA5, MRMDestMem, (ops i16mem:$dst, GR16:$src2),
+ "shld{w} {%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL),
+ addr:$dst)]>,
+ Imp<[CL],[]>, TB, OpSize;
+ def SHRD16mrCL : I<0xAD, MRMDestMem, (ops i16mem:$dst, GR16:$src2),
+ "shrd{w} {%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL),
+ addr:$dst)]>,
+ Imp<[CL],[]>, TB, OpSize;
+ def SHLD16mri8 : Ii8<0xA4, MRMDestMem,
+ (ops i16mem:$dst, GR16:$src2, i8imm:$src3),
+ "shld{w} {$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shld (loadi16 addr:$dst), GR16:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB, OpSize;
+ def SHRD16mri8 : Ii8<0xAC, MRMDestMem,
+ (ops i16mem:$dst, GR16:$src2, i8imm:$src3),
+ "shrd{w} {$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shrd (loadi16 addr:$dst), GR16:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB, OpSize;
+}
+
+
+// Arithmetic.
+let isCommutable = 1 in { // X = ADD Y, Z --> X = ADD Z, Y
+def ADD8rr : I<0x00, MRMDestReg, (ops GR8 :$dst, GR8 :$src1, GR8 :$src2),
+ "add{b} {$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (add GR8:$src1, GR8:$src2))]>;
+let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
+def ADD16rr : I<0x01, MRMDestReg, (ops GR16:$dst, GR16:$src1, GR16:$src2),
+ "add{w} {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (add GR16:$src1, GR16:$src2))]>, OpSize;
+def ADD32rr : I<0x01, MRMDestReg, (ops GR32:$dst, GR32:$src1, GR32:$src2),
+ "add{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (add GR32:$src1, GR32:$src2))]>;
+} // end isConvertibleToThreeAddress
+} // end isCommutable
+def ADD8rm : I<0x02, MRMSrcMem, (ops GR8 :$dst, GR8 :$src1, i8mem :$src2),
+ "add{b} {$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (add GR8:$src1, (load addr:$src2)))]>;
+def ADD16rm : I<0x03, MRMSrcMem, (ops GR16:$dst, GR16:$src1, i16mem:$src2),
+ "add{w} {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (add GR16:$src1, (load addr:$src2)))]>, OpSize;
+def ADD32rm : I<0x03, MRMSrcMem, (ops GR32:$dst, GR32:$src1, i32mem:$src2),
+ "add{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (add GR32:$src1, (load addr:$src2)))]>;
+
+def ADD8ri : Ii8<0x80, MRM0r, (ops GR8:$dst, GR8:$src1, i8imm:$src2),
+ "add{b} {$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (add GR8:$src1, imm:$src2))]>;
+
+let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
+def ADD16ri : Ii16<0x81, MRM0r, (ops GR16:$dst, GR16:$src1, i16imm:$src2),
+ "add{w} {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (add GR16:$src1, imm:$src2))]>, OpSize;
+def ADD32ri : Ii32<0x81, MRM0r, (ops GR32:$dst, GR32:$src1, i32imm:$src2),
+ "add{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (add GR32:$src1, imm:$src2))]>;
+def ADD16ri8 : Ii8<0x83, MRM0r, (ops GR16:$dst, GR16:$src1, i16i8imm:$src2),
+ "add{w} {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (add GR16:$src1, i16immSExt8:$src2))]>,
+ OpSize;
+def ADD32ri8 : Ii8<0x83, MRM0r, (ops GR32:$dst, GR32:$src1, i32i8imm:$src2),
+ "add{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (add GR32:$src1, i32immSExt8:$src2))]>;
+}
+
+let isTwoAddress = 0 in {
+ def ADD8mr : I<0x00, MRMDestMem, (ops i8mem :$dst, GR8 :$src2),
+ "add{b} {$src2, $dst|$dst, $src2}",
+ [(store (add (load addr:$dst), GR8:$src2), addr:$dst)]>;
+ def ADD16mr : I<0x01, MRMDestMem, (ops i16mem:$dst, GR16:$src2),
+ "add{w} {$src2, $dst|$dst, $src2}",
+ [(store (add (load addr:$dst), GR16:$src2), addr:$dst)]>,
+ OpSize;
+ def ADD32mr : I<0x01, MRMDestMem, (ops i32mem:$dst, GR32:$src2),
+ "add{l} {$src2, $dst|$dst, $src2}",
+ [(store (add (load addr:$dst), GR32:$src2), addr:$dst)]>;
+ def ADD8mi : Ii8<0x80, MRM0m, (ops i8mem :$dst, i8imm :$src2),
+ "add{b} {$src2, $dst|$dst, $src2}",
+ [(store (add (loadi8 addr:$dst), imm:$src2), addr:$dst)]>;
+ def ADD16mi : Ii16<0x81, MRM0m, (ops i16mem:$dst, i16imm:$src2),
+ "add{w} {$src2, $dst|$dst, $src2}",
+ [(store (add (loadi16 addr:$dst), imm:$src2), addr:$dst)]>,
+ OpSize;
+ def ADD32mi : Ii32<0x81, MRM0m, (ops i32mem:$dst, i32imm:$src2),
+ "add{l} {$src2, $dst|$dst, $src2}",
+ [(store (add (loadi32 addr:$dst), imm:$src2), addr:$dst)]>;
+ def ADD16mi8 : Ii8<0x83, MRM0m, (ops i16mem:$dst, i16i8imm :$src2),
+ "add{w} {$src2, $dst|$dst, $src2}",
+ [(store (add (load addr:$dst), i16immSExt8:$src2), addr:$dst)]>,
+ OpSize;
+ def ADD32mi8 : Ii8<0x83, MRM0m, (ops i32mem:$dst, i32i8imm :$src2),
+ "add{l} {$src2, $dst|$dst, $src2}",
+ [(store (add (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>;
+}
+
+let isCommutable = 1 in { // X = ADC Y, Z --> X = ADC Z, Y
+def ADC32rr : I<0x11, MRMDestReg, (ops GR32:$dst, GR32:$src1, GR32:$src2),
+ "adc{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (adde GR32:$src1, GR32:$src2))]>;
+}
+def ADC32rm : I<0x13, MRMSrcMem , (ops GR32:$dst, GR32:$src1, i32mem:$src2),
+ "adc{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (adde GR32:$src1, (load addr:$src2)))]>;
+def ADC32ri : Ii32<0x81, MRM2r, (ops GR32:$dst, GR32:$src1, i32imm:$src2),
+ "adc{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (adde GR32:$src1, imm:$src2))]>;
+def ADC32ri8 : Ii8<0x83, MRM2r, (ops GR32:$dst, GR32:$src1, i32i8imm:$src2),
+ "adc{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (adde GR32:$src1, i32immSExt8:$src2))]>;
+
+let isTwoAddress = 0 in {
+ def ADC32mr : I<0x11, MRMDestMem, (ops i32mem:$dst, GR32:$src2),
+ "adc{l} {$src2, $dst|$dst, $src2}",
+ [(store (adde (load addr:$dst), GR32:$src2), addr:$dst)]>;
+ def ADC32mi : Ii32<0x81, MRM2m, (ops i32mem:$dst, i32imm:$src2),
+ "adc{l} {$src2, $dst|$dst, $src2}",
+ [(store (adde (loadi32 addr:$dst), imm:$src2), addr:$dst)]>;
+ def ADC32mi8 : Ii8<0x83, MRM2m, (ops i32mem:$dst, i32i8imm :$src2),
+ "adc{l} {$src2, $dst|$dst, $src2}",
+ [(store (adde (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>;
+}
+
+def SUB8rr : I<0x28, MRMDestReg, (ops GR8 :$dst, GR8 :$src1, GR8 :$src2),
+ "sub{b} {$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (sub GR8:$src1, GR8:$src2))]>;
+def SUB16rr : I<0x29, MRMDestReg, (ops GR16:$dst, GR16:$src1, GR16:$src2),
+ "sub{w} {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (sub GR16:$src1, GR16:$src2))]>, OpSize;
+def SUB32rr : I<0x29, MRMDestReg, (ops GR32:$dst, GR32:$src1, GR32:$src2),
+ "sub{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sub GR32:$src1, GR32:$src2))]>;
+def SUB8rm : I<0x2A, MRMSrcMem, (ops GR8 :$dst, GR8 :$src1, i8mem :$src2),
+ "sub{b} {$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (sub GR8:$src1, (load addr:$src2)))]>;
+def SUB16rm : I<0x2B, MRMSrcMem, (ops GR16:$dst, GR16:$src1, i16mem:$src2),
+ "sub{w} {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (sub GR16:$src1, (load addr:$src2)))]>, OpSize;
+def SUB32rm : I<0x2B, MRMSrcMem, (ops GR32:$dst, GR32:$src1, i32mem:$src2),
+ "sub{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sub GR32:$src1, (load addr:$src2)))]>;
+
+def SUB8ri : Ii8 <0x80, MRM5r, (ops GR8:$dst, GR8:$src1, i8imm:$src2),
+ "sub{b} {$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (sub GR8:$src1, imm:$src2))]>;
+def SUB16ri : Ii16<0x81, MRM5r, (ops GR16:$dst, GR16:$src1, i16imm:$src2),
+ "sub{w} {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (sub GR16:$src1, imm:$src2))]>, OpSize;
+def SUB32ri : Ii32<0x81, MRM5r, (ops GR32:$dst, GR32:$src1, i32imm:$src2),
+ "sub{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sub GR32:$src1, imm:$src2))]>;
+def SUB16ri8 : Ii8<0x83, MRM5r, (ops GR16:$dst, GR16:$src1, i16i8imm:$src2),
+ "sub{w} {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (sub GR16:$src1, i16immSExt8:$src2))]>,
+ OpSize;
+def SUB32ri8 : Ii8<0x83, MRM5r, (ops GR32:$dst, GR32:$src1, i32i8imm:$src2),
+ "sub{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sub GR32:$src1, i32immSExt8:$src2))]>;
+let isTwoAddress = 0 in {
+ def SUB8mr : I<0x28, MRMDestMem, (ops i8mem :$dst, GR8 :$src2),
+ "sub{b} {$src2, $dst|$dst, $src2}",
+ [(store (sub (load addr:$dst), GR8:$src2), addr:$dst)]>;
+ def SUB16mr : I<0x29, MRMDestMem, (ops i16mem:$dst, GR16:$src2),
+ "sub{w} {$src2, $dst|$dst, $src2}",
+ [(store (sub (load addr:$dst), GR16:$src2), addr:$dst)]>,
+ OpSize;
+ def SUB32mr : I<0x29, MRMDestMem, (ops i32mem:$dst, GR32:$src2),
+ "sub{l} {$src2, $dst|$dst, $src2}",
+ [(store (sub (load addr:$dst), GR32:$src2), addr:$dst)]>;
+ def SUB8mi : Ii8<0x80, MRM5m, (ops i8mem :$dst, i8imm:$src2),
+ "sub{b} {$src2, $dst|$dst, $src2}",
+ [(store (sub (loadi8 addr:$dst), imm:$src2), addr:$dst)]>;
+ def SUB16mi : Ii16<0x81, MRM5m, (ops i16mem:$dst, i16imm:$src2),
+ "sub{w} {$src2, $dst|$dst, $src2}",
+ [(store (sub (loadi16 addr:$dst), imm:$src2), addr:$dst)]>,
+ OpSize;
+ def SUB32mi : Ii32<0x81, MRM5m, (ops i32mem:$dst, i32imm:$src2),
+ "sub{l} {$src2, $dst|$dst, $src2}",
+ [(store (sub (loadi32 addr:$dst), imm:$src2), addr:$dst)]>;
+ def SUB16mi8 : Ii8<0x83, MRM5m, (ops i16mem:$dst, i16i8imm :$src2),
+ "sub{w} {$src2, $dst|$dst, $src2}",
+ [(store (sub (load addr:$dst), i16immSExt8:$src2), addr:$dst)]>,
+ OpSize;
+ def SUB32mi8 : Ii8<0x83, MRM5m, (ops i32mem:$dst, i32i8imm :$src2),
+ "sub{l} {$src2, $dst|$dst, $src2}",
+ [(store (sub (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>;
+}
+
+def SBB32rr : I<0x19, MRMDestReg, (ops GR32:$dst, GR32:$src1, GR32:$src2),
+ "sbb{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sube GR32:$src1, GR32:$src2))]>;
+
+let isTwoAddress = 0 in {
+ def SBB32mr : I<0x19, MRMDestMem, (ops i32mem:$dst, GR32:$src2),
+ "sbb{l} {$src2, $dst|$dst, $src2}",
+ [(store (sube (load addr:$dst), GR32:$src2), addr:$dst)]>;
+ def SBB8mi : Ii32<0x80, MRM3m, (ops i8mem:$dst, i8imm:$src2),
+ "sbb{b} {$src2, $dst|$dst, $src2}",
+ [(store (sube (loadi8 addr:$dst), imm:$src2), addr:$dst)]>;
+ def SBB32mi : Ii32<0x81, MRM3m, (ops i32mem:$dst, i32imm:$src2),
+ "sbb{l} {$src2, $dst|$dst, $src2}",
+ [(store (sube (loadi32 addr:$dst), imm:$src2), addr:$dst)]>;
+ def SBB32mi8 : Ii8<0x83, MRM3m, (ops i32mem:$dst, i32i8imm :$src2),
+ "sbb{l} {$src2, $dst|$dst, $src2}",
+ [(store (sube (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>;
+}
+def SBB32rm : I<0x1B, MRMSrcMem, (ops GR32:$dst, GR32:$src1, i32mem:$src2),
+ "sbb{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sube GR32:$src1, (load addr:$src2)))]>;
+def SBB32ri : Ii32<0x81, MRM3r, (ops GR32:$dst, GR32:$src1, i32imm:$src2),
+ "sbb{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sube GR32:$src1, imm:$src2))]>;
+def SBB32ri8 : Ii8<0x83, MRM3r, (ops GR32:$dst, GR32:$src1, i32i8imm:$src2),
+ "sbb{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sube GR32:$src1, i32immSExt8:$src2))]>;
+
+let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y
+def IMUL16rr : I<0xAF, MRMSrcReg, (ops GR16:$dst, GR16:$src1, GR16:$src2),
+ "imul{w} {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (mul GR16:$src1, GR16:$src2))]>, TB, OpSize;
+def IMUL32rr : I<0xAF, MRMSrcReg, (ops GR32:$dst, GR32:$src1, GR32:$src2),
+ "imul{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (mul GR32:$src1, GR32:$src2))]>, TB;
+}
+def IMUL16rm : I<0xAF, MRMSrcMem, (ops GR16:$dst, GR16:$src1, i16mem:$src2),
+ "imul{w} {$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (mul GR16:$src1, (load addr:$src2)))]>,
+ TB, OpSize;
+def IMUL32rm : I<0xAF, MRMSrcMem, (ops GR32:$dst, GR32:$src1, i32mem:$src2),
+ "imul{l} {$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (mul GR32:$src1, (load addr:$src2)))]>, TB;
+
+} // end Two Address instructions
+
+// Suprisingly enough, these are not two address instructions!
+def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16
+ (ops GR16:$dst, GR16:$src1, i16imm:$src2),
+ "imul{w} {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, (mul GR16:$src1, imm:$src2))]>, OpSize;
+def IMUL32rri : Ii32<0x69, MRMSrcReg, // GR32 = GR32*I32
+ (ops GR32:$dst, GR32:$src1, i32imm:$src2),
+ "imul{l} {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, (mul GR32:$src1, imm:$src2))]>;
+def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8
+ (ops GR16:$dst, GR16:$src1, i16i8imm:$src2),
+ "imul{w} {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, (mul GR16:$src1, i16immSExt8:$src2))]>,
+ OpSize;
+def IMUL32rri8 : Ii8<0x6B, MRMSrcReg, // GR32 = GR32*I8
+ (ops GR32:$dst, GR32:$src1, i32i8imm:$src2),
+ "imul{l} {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, (mul GR32:$src1, i32immSExt8:$src2))]>;
+
+def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16
+ (ops GR16:$dst, i16mem:$src1, i16imm:$src2),
+ "imul{w} {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, (mul (load addr:$src1), imm:$src2))]>,
+ OpSize;
+def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32
+ (ops GR32:$dst, i32mem:$src1, i32imm:$src2),
+ "imul{l} {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, (mul (load addr:$src1), imm:$src2))]>;
+def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem, // GR16 = [mem16]*I8
+ (ops GR16:$dst, i16mem:$src1, i16i8imm :$src2),
+ "imul{w} {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, (mul (load addr:$src1), i16immSExt8:$src2))]>,
+ OpSize;
+def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem, // GR32 = [mem32]*I8
+ (ops GR32:$dst, i32mem:$src1, i32i8imm: $src2),
+ "imul{l} {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, (mul (load addr:$src1), i32immSExt8:$src2))]>;
+
+//===----------------------------------------------------------------------===//
+// Test instructions are just like AND, except they don't generate a result.
+//
+let isCommutable = 1 in { // TEST X, Y --> TEST Y, X
+def TEST8rr : I<0x84, MRMDestReg, (ops GR8:$src1, GR8:$src2),
+ "test{b} {$src2, $src1|$src1, $src2}",
+ [(X86cmp (and GR8:$src1, GR8:$src2), 0)]>;
+def TEST16rr : I<0x85, MRMDestReg, (ops GR16:$src1, GR16:$src2),
+ "test{w} {$src2, $src1|$src1, $src2}",
+ [(X86cmp (and GR16:$src1, GR16:$src2), 0)]>, OpSize;
+def TEST32rr : I<0x85, MRMDestReg, (ops GR32:$src1, GR32:$src2),
+ "test{l} {$src2, $src1|$src1, $src2}",
+ [(X86cmp (and GR32:$src1, GR32:$src2), 0)]>;
+}
+
+def TEST8rm : I<0x84, MRMSrcMem, (ops GR8 :$src1, i8mem :$src2),
+ "test{b} {$src2, $src1|$src1, $src2}",
+ [(X86cmp (and GR8:$src1, (loadi8 addr:$src2)), 0)]>;
+def TEST16rm : I<0x85, MRMSrcMem, (ops GR16:$src1, i16mem:$src2),
+ "test{w} {$src2, $src1|$src1, $src2}",
+ [(X86cmp (and GR16:$src1, (loadi16 addr:$src2)), 0)]>,
+ OpSize;
+def TEST32rm : I<0x85, MRMSrcMem, (ops GR32:$src1, i32mem:$src2),
+ "test{l} {$src2, $src1|$src1, $src2}",
+ [(X86cmp (and GR32:$src1, (loadi32 addr:$src2)), 0)]>;
+
+def TEST8ri : Ii8 <0xF6, MRM0r, // flags = GR8 & imm8
+ (ops GR8:$src1, i8imm:$src2),
+ "test{b} {$src2, $src1|$src1, $src2}",
+ [(X86cmp (and GR8:$src1, imm:$src2), 0)]>;
+def TEST16ri : Ii16<0xF7, MRM0r, // flags = GR16 & imm16
+ (ops GR16:$src1, i16imm:$src2),
+ "test{w} {$src2, $src1|$src1, $src2}",
+ [(X86cmp (and GR16:$src1, imm:$src2), 0)]>, OpSize;
+def TEST32ri : Ii32<0xF7, MRM0r, // flags = GR32 & imm32
+ (ops GR32:$src1, i32imm:$src2),
+ "test{l} {$src2, $src1|$src1, $src2}",
+ [(X86cmp (and GR32:$src1, imm:$src2), 0)]>;
+
+def TEST8mi : Ii8 <0xF6, MRM0m, // flags = [mem8] & imm8
+ (ops i8mem:$src1, i8imm:$src2),
+ "test{b} {$src2, $src1|$src1, $src2}",
+ [(X86cmp (and (loadi8 addr:$src1), imm:$src2), 0)]>;
+def TEST16mi : Ii16<0xF7, MRM0m, // flags = [mem16] & imm16
+ (ops i16mem:$src1, i16imm:$src2),
+ "test{w} {$src2, $src1|$src1, $src2}",
+ [(X86cmp (and (loadi16 addr:$src1), imm:$src2), 0)]>,
+ OpSize;
+def TEST32mi : Ii32<0xF7, MRM0m, // flags = [mem32] & imm32
+ (ops i32mem:$src1, i32imm:$src2),
+ "test{l} {$src2, $src1|$src1, $src2}",
+ [(X86cmp (and (loadi32 addr:$src1), imm:$src2), 0)]>;
+
+
+// Condition code ops, incl. set if equal/not equal/...
+def SAHF : I<0x9E, RawFrm, (ops), "sahf", []>, Imp<[AH],[]>; // flags = AH
+def LAHF : I<0x9F, RawFrm, (ops), "lahf", []>, Imp<[],[AH]>; // AH = flags
+
+def SETEr : I<0x94, MRM0r,
+ (ops GR8 :$dst),
+ "sete $dst",
+ [(set GR8:$dst, (X86setcc X86_COND_E))]>,
+ TB; // GR8 = ==
+def SETEm : I<0x94, MRM0m,
+ (ops i8mem:$dst),
+ "sete $dst",
+ [(store (X86setcc X86_COND_E), addr:$dst)]>,
+ TB; // [mem8] = ==
+def SETNEr : I<0x95, MRM0r,
+ (ops GR8 :$dst),
+ "setne $dst",
+ [(set GR8:$dst, (X86setcc X86_COND_NE))]>,
+ TB; // GR8 = !=
+def SETNEm : I<0x95, MRM0m,
+ (ops i8mem:$dst),
+ "setne $dst",
+ [(store (X86setcc X86_COND_NE), addr:$dst)]>,
+ TB; // [mem8] = !=
+def SETLr : I<0x9C, MRM0r,
+ (ops GR8 :$dst),
+ "setl $dst",
+ [(set GR8:$dst, (X86setcc X86_COND_L))]>,
+ TB; // GR8 = < signed
+def SETLm : I<0x9C, MRM0m,
+ (ops i8mem:$dst),
+ "setl $dst",
+ [(store (X86setcc X86_COND_L), addr:$dst)]>,
+ TB; // [mem8] = < signed
+def SETGEr : I<0x9D, MRM0r,
+ (ops GR8 :$dst),
+ "setge $dst",
+ [(set GR8:$dst, (X86setcc X86_COND_GE))]>,
+ TB; // GR8 = >= signed
+def SETGEm : I<0x9D, MRM0m,
+ (ops i8mem:$dst),
+ "setge $dst",
+ [(store (X86setcc X86_COND_GE), addr:$dst)]>,
+ TB; // [mem8] = >= signed
+def SETLEr : I<0x9E, MRM0r,
+ (ops GR8 :$dst),
+ "setle $dst",
+ [(set GR8:$dst, (X86setcc X86_COND_LE))]>,
+ TB; // GR8 = <= signed
+def SETLEm : I<0x9E, MRM0m,
+ (ops i8mem:$dst),
+ "setle $dst",
+ [(store (X86setcc X86_COND_LE), addr:$dst)]>,
+ TB; // [mem8] = <= signed
+def SETGr : I<0x9F, MRM0r,
+ (ops GR8 :$dst),
+ "setg $dst",
+ [(set GR8:$dst, (X86setcc X86_COND_G))]>,
+ TB; // GR8 = > signed
+def SETGm : I<0x9F, MRM0m,
+ (ops i8mem:$dst),
+ "setg $dst",
+ [(store (X86setcc X86_COND_G), addr:$dst)]>,
+ TB; // [mem8] = > signed
+
+def SETBr : I<0x92, MRM0r,
+ (ops GR8 :$dst),
+ "setb $dst",
+ [(set GR8:$dst, (X86setcc X86_COND_B))]>,
+ TB; // GR8 = < unsign
+def SETBm : I<0x92, MRM0m,
+ (ops i8mem:$dst),
+ "setb $dst",
+ [(store (X86setcc X86_COND_B), addr:$dst)]>,
+ TB; // [mem8] = < unsign
+def SETAEr : I<0x93, MRM0r,
+ (ops GR8 :$dst),
+ "setae $dst",
+ [(set GR8:$dst, (X86setcc X86_COND_AE))]>,
+ TB; // GR8 = >= unsign
+def SETAEm : I<0x93, MRM0m,
+ (ops i8mem:$dst),
+ "setae $dst",
+ [(store (X86setcc X86_COND_AE), addr:$dst)]>,
+ TB; // [mem8] = >= unsign
+def SETBEr : I<0x96, MRM0r,
+ (ops GR8 :$dst),
+ "setbe $dst",
+ [(set GR8:$dst, (X86setcc X86_COND_BE))]>,
+ TB; // GR8 = <= unsign
+def SETBEm : I<0x96, MRM0m,
+ (ops i8mem:$dst),
+ "setbe $dst",
+ [(store (X86setcc X86_COND_BE), addr:$dst)]>,
+ TB; // [mem8] = <= unsign
+def SETAr : I<0x97, MRM0r,
+ (ops GR8 :$dst),
+ "seta $dst",
+ [(set GR8:$dst, (X86setcc X86_COND_A))]>,
+ TB; // GR8 = > signed
+def SETAm : I<0x97, MRM0m,
+ (ops i8mem:$dst),
+ "seta $dst",
+ [(store (X86setcc X86_COND_A), addr:$dst)]>,
+ TB; // [mem8] = > signed
+
+def SETSr : I<0x98, MRM0r,
+ (ops GR8 :$dst),
+ "sets $dst",
+ [(set GR8:$dst, (X86setcc X86_COND_S))]>,
+ TB; // GR8 = <sign bit>
+def SETSm : I<0x98, MRM0m,
+ (ops i8mem:$dst),
+ "sets $dst",
+ [(store (X86setcc X86_COND_S), addr:$dst)]>,
+ TB; // [mem8] = <sign bit>
+def SETNSr : I<0x99, MRM0r,
+ (ops GR8 :$dst),
+ "setns $dst",
+ [(set GR8:$dst, (X86setcc X86_COND_NS))]>,
+ TB; // GR8 = !<sign bit>
+def SETNSm : I<0x99, MRM0m,
+ (ops i8mem:$dst),
+ "setns $dst",
+ [(store (X86setcc X86_COND_NS), addr:$dst)]>,
+ TB; // [mem8] = !<sign bit>
+def SETPr : I<0x9A, MRM0r,
+ (ops GR8 :$dst),
+ "setp $dst",
+ [(set GR8:$dst, (X86setcc X86_COND_P))]>,
+ TB; // GR8 = parity
+def SETPm : I<0x9A, MRM0m,
+ (ops i8mem:$dst),
+ "setp $dst",
+ [(store (X86setcc X86_COND_P), addr:$dst)]>,
+ TB; // [mem8] = parity
+def SETNPr : I<0x9B, MRM0r,
+ (ops GR8 :$dst),
+ "setnp $dst",
+ [(set GR8:$dst, (X86setcc X86_COND_NP))]>,
+ TB; // GR8 = not parity
+def SETNPm : I<0x9B, MRM0m,
+ (ops i8mem:$dst),
+ "setnp $dst",
+ [(store (X86setcc X86_COND_NP), addr:$dst)]>,
+ TB; // [mem8] = not parity
+
+// Integer comparisons
+def CMP8rr : I<0x38, MRMDestReg,
+ (ops GR8 :$src1, GR8 :$src2),
+ "cmp{b} {$src2, $src1|$src1, $src2}",
+ [(X86cmp GR8:$src1, GR8:$src2)]>;
+def CMP16rr : I<0x39, MRMDestReg,
+ (ops GR16:$src1, GR16:$src2),
+ "cmp{w} {$src2, $src1|$src1, $src2}",
+ [(X86cmp GR16:$src1, GR16:$src2)]>, OpSize;
+def CMP32rr : I<0x39, MRMDestReg,
+ (ops GR32:$src1, GR32:$src2),
+ "cmp{l} {$src2, $src1|$src1, $src2}",
+ [(X86cmp GR32:$src1, GR32:$src2)]>;
+def CMP8mr : I<0x38, MRMDestMem,
+ (ops i8mem :$src1, GR8 :$src2),
+ "cmp{b} {$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi8 addr:$src1), GR8:$src2)]>;
+def CMP16mr : I<0x39, MRMDestMem,
+ (ops i16mem:$src1, GR16:$src2),
+ "cmp{w} {$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi16 addr:$src1), GR16:$src2)]>, OpSize;
+def CMP32mr : I<0x39, MRMDestMem,
+ (ops i32mem:$src1, GR32:$src2),
+ "cmp{l} {$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi32 addr:$src1), GR32:$src2)]>;
+def CMP8rm : I<0x3A, MRMSrcMem,
+ (ops GR8 :$src1, i8mem :$src2),
+ "cmp{b} {$src2, $src1|$src1, $src2}",
+ [(X86cmp GR8:$src1, (loadi8 addr:$src2))]>;
+def CMP16rm : I<0x3B, MRMSrcMem,
+ (ops GR16:$src1, i16mem:$src2),
+ "cmp{w} {$src2, $src1|$src1, $src2}",
+ [(X86cmp GR16:$src1, (loadi16 addr:$src2))]>, OpSize;
+def CMP32rm : I<0x3B, MRMSrcMem,
+ (ops GR32:$src1, i32mem:$src2),
+ "cmp{l} {$src2, $src1|$src1, $src2}",
+ [(X86cmp GR32:$src1, (loadi32 addr:$src2))]>;
+def CMP8ri : Ii8<0x80, MRM7r,
+ (ops GR8:$src1, i8imm:$src2),
+ "cmp{b} {$src2, $src1|$src1, $src2}",
+ [(X86cmp GR8:$src1, imm:$src2)]>;
+def CMP16ri : Ii16<0x81, MRM7r,
+ (ops GR16:$src1, i16imm:$src2),
+ "cmp{w} {$src2, $src1|$src1, $src2}",
+ [(X86cmp GR16:$src1, imm:$src2)]>, OpSize;
+def CMP32ri : Ii32<0x81, MRM7r,
+ (ops GR32:$src1, i32imm:$src2),
+ "cmp{l} {$src2, $src1|$src1, $src2}",
+ [(X86cmp GR32:$src1, imm:$src2)]>;
+def CMP8mi : Ii8 <0x80, MRM7m,
+ (ops i8mem :$src1, i8imm :$src2),
+ "cmp{b} {$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi8 addr:$src1), imm:$src2)]>;
+def CMP16mi : Ii16<0x81, MRM7m,
+ (ops i16mem:$src1, i16imm:$src2),
+ "cmp{w} {$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi16 addr:$src1), imm:$src2)]>, OpSize;
+def CMP32mi : Ii32<0x81, MRM7m,
+ (ops i32mem:$src1, i32imm:$src2),
+ "cmp{l} {$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi32 addr:$src1), imm:$src2)]>;
+def CMP16ri8 : Ii8<0x83, MRM7r,
+ (ops GR16:$src1, i16i8imm:$src2),
+ "cmp{w} {$src2, $src1|$src1, $src2}",
+ [(X86cmp GR16:$src1, i16immSExt8:$src2)]>, OpSize;
+def CMP16mi8 : Ii8<0x83, MRM7m,
+ (ops i16mem:$src1, i16i8imm:$src2),
+ "cmp{w} {$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi16 addr:$src1), i16immSExt8:$src2)]>, OpSize;
+def CMP32mi8 : Ii8<0x83, MRM7m,
+ (ops i32mem:$src1, i32i8imm:$src2),
+ "cmp{l} {$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi32 addr:$src1), i32immSExt8:$src2)]>;
+def CMP32ri8 : Ii8<0x83, MRM7r,
+ (ops GR32:$src1, i32i8imm:$src2),
+ "cmp{l} {$src2, $src1|$src1, $src2}",
+ [(X86cmp GR32:$src1, i32immSExt8:$src2)]>;
+
+// Sign/Zero extenders
+def MOVSX16rr8 : I<0xBE, MRMSrcReg, (ops GR16:$dst, GR8 :$src),
+ "movs{bw|x} {$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (sext GR8:$src))]>, TB, OpSize;
+def MOVSX16rm8 : I<0xBE, MRMSrcMem, (ops GR16:$dst, i8mem :$src),
+ "movs{bw|x} {$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (sextloadi16i8 addr:$src))]>, TB, OpSize;
+def MOVSX32rr8 : I<0xBE, MRMSrcReg, (ops GR32:$dst, GR8 :$src),
+ "movs{bl|x} {$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (sext GR8:$src))]>, TB;
+def MOVSX32rm8 : I<0xBE, MRMSrcMem, (ops GR32:$dst, i8mem :$src),
+ "movs{bl|x} {$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (sextloadi32i8 addr:$src))]>, TB;
+def MOVSX32rr16: I<0xBF, MRMSrcReg, (ops GR32:$dst, GR16:$src),
+ "movs{wl|x} {$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (sext GR16:$src))]>, TB;
+def MOVSX32rm16: I<0xBF, MRMSrcMem, (ops GR32:$dst, i16mem:$src),
+ "movs{wl|x} {$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (sextloadi32i16 addr:$src))]>, TB;
+
+def MOVZX16rr8 : I<0xB6, MRMSrcReg, (ops GR16:$dst, GR8 :$src),
+ "movz{bw|x} {$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (zext GR8:$src))]>, TB, OpSize;
+def MOVZX16rm8 : I<0xB6, MRMSrcMem, (ops GR16:$dst, i8mem :$src),
+ "movz{bw|x} {$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (zextloadi16i8 addr:$src))]>, TB, OpSize;
+def MOVZX32rr8 : I<0xB6, MRMSrcReg, (ops GR32:$dst, GR8 :$src),
+ "movz{bl|x} {$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (zext GR8:$src))]>, TB;
+def MOVZX32rm8 : I<0xB6, MRMSrcMem, (ops GR32:$dst, i8mem :$src),
+ "movz{bl|x} {$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (zextloadi32i8 addr:$src))]>, TB;
+def MOVZX32rr16: I<0xB7, MRMSrcReg, (ops GR32:$dst, GR16:$src),
+ "movz{wl|x} {$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (zext GR16:$src))]>, TB;
+def MOVZX32rm16: I<0xB7, MRMSrcMem, (ops GR32:$dst, i16mem:$src),
+ "movz{wl|x} {$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, TB;
+
+def CBW : I<0x98, RawFrm, (ops),
+ "{cbtw|cbw}", []>, Imp<[AL],[AX]>, OpSize; // AX = signext(AL)
+def CWDE : I<0x98, RawFrm, (ops),
+ "{cwtl|cwde}", []>, Imp<[AX],[EAX]>; // EAX = signext(AX)
+
+def CWD : I<0x99, RawFrm, (ops),
+ "{cwtd|cwd}", []>, Imp<[AX],[AX,DX]>, OpSize; // DX:AX = signext(AX)
+def CDQ : I<0x99, RawFrm, (ops),
+ "{cltd|cdq}", []>, Imp<[EAX],[EAX,EDX]>; // EDX:EAX = signext(EAX)
+
+
+//===----------------------------------------------------------------------===//
+// Alias Instructions
+//===----------------------------------------------------------------------===//
+
+// Alias instructions that map movr0 to xor.
+// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
+def MOV8r0 : I<0x30, MRMInitReg, (ops GR8 :$dst),
+ "xor{b} $dst, $dst",
+ [(set GR8:$dst, 0)]>;
+def MOV16r0 : I<0x31, MRMInitReg, (ops GR16:$dst),
+ "xor{w} $dst, $dst",
+ [(set GR16:$dst, 0)]>, OpSize;
+def MOV32r0 : I<0x31, MRMInitReg, (ops GR32:$dst),
+ "xor{l} $dst, $dst",
+ [(set GR32:$dst, 0)]>;
+
+// Basic operations on GR16 / GR32 subclasses GR16_ and GR32_ which contains only
+// those registers that have GR8 sub-registers (i.e. AX - DX, EAX - EDX).
+def MOV16to16_ : I<0x89, MRMDestReg, (ops GR16_:$dst, GR16:$src),
+ "mov{w} {$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32to32_ : I<0x89, MRMDestReg, (ops GR32_:$dst, GR32:$src),
+ "mov{l} {$src, $dst|$dst, $src}", []>;
+
+def MOV16_rr : I<0x89, MRMDestReg, (ops GR16_:$dst, GR16_:$src),
+ "mov{w} {$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32_rr : I<0x89, MRMDestReg, (ops GR32_:$dst, GR32_:$src),
+ "mov{l} {$src, $dst|$dst, $src}", []>;
+def MOV16_rm : I<0x8B, MRMSrcMem, (ops GR16_:$dst, i16mem:$src),
+ "mov{w} {$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32_rm : I<0x8B, MRMSrcMem, (ops GR32_:$dst, i32mem:$src),
+ "mov{l} {$src, $dst|$dst, $src}", []>;
+def MOV16_mr : I<0x89, MRMDestMem, (ops i16mem:$dst, GR16_:$src),
+ "mov{w} {$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32_mr : I<0x89, MRMDestMem, (ops i32mem:$dst, GR32_:$src),
+ "mov{l} {$src, $dst|$dst, $src}", []>;
+
+//===----------------------------------------------------------------------===//
+// Thread Local Storage Instructions
+//
+
+def TLS_addr : I<0, Pseudo, (ops GR32:$dst, i32imm:$sym),
+ "leal ${sym:mem}(,%ebx,1), $dst",
+ [(set GR32:$dst, (X86tlsaddr tglobaltlsaddr:$sym))]>,
+ Imp<[EBX],[]>;
+
+let AddedComplexity = 10 in
+def TLS_gs_rr : I<0, Pseudo, (ops GR32:$dst, GR32:$src),
+ "movl %gs:($src), $dst",
+ [(set GR32:$dst, (load (add X86TLStp, GR32:$src)))]>;
+
+let AddedComplexity = 15 in
+def TLS_gs_ri : I<0, Pseudo, (ops GR32:$dst, i32imm:$src),
+ "movl %gs:${src:mem}, $dst",
+ [(set GR32:$dst,
+ (load (add X86TLStp, (X86Wrapper tglobaltlsaddr:$src))))]>;
+
+def TLS_tp : I<0, Pseudo, (ops GR32:$dst),
+ "movl %gs:0, $dst",
+ [(set GR32:$dst, X86TLStp)]>;
+
+//===----------------------------------------------------------------------===//
+// DWARF Pseudo Instructions
+//
+
+def DWARF_LOC : I<0, Pseudo, (ops i32imm:$line, i32imm:$col, i32imm:$file),
+ "; .loc $file, $line, $col",
+ [(dwarf_loc (i32 imm:$line), (i32 imm:$col),
+ (i32 imm:$file))]>;
+
+//===----------------------------------------------------------------------===//
+// EH Pseudo Instructions
+//
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+ hasCtrlDep = 1, noResults = 1 in {
+def EH_RETURN : I<0xC3, RawFrm, (ops GR32:$addr),
+ "ret #eh_return, addr: $addr",
+ [(X86ehret GR32:$addr)]>;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
+def : Pat<(i32 (X86Wrapper tconstpool :$dst)), (MOV32ri tconstpool :$dst)>;
+def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>;
+def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)), (MOV32ri tglobaltlsaddr:$dst)>;
+def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
+def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
+
+def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)),
+ (ADD32ri GR32:$src1, tconstpool:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)),
+ (ADD32ri GR32:$src1, tjumptable:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)),
+ (ADD32ri GR32:$src1, tglobaladdr:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)),
+ (ADD32ri GR32:$src1, texternalsym:$src2)>;
+
+def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst),
+ (MOV32mi addr:$dst, tglobaladdr:$src)>;
+def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
+ (MOV32mi addr:$dst, texternalsym:$src)>;
+
+// Calls
+def : Pat<(X86tailcall GR32:$dst),
+ (CALL32r GR32:$dst)>;
+
+def : Pat<(X86tailcall (i32 tglobaladdr:$dst)),
+ (CALLpcrel32 tglobaladdr:$dst)>;
+def : Pat<(X86tailcall (i32 texternalsym:$dst)),
+ (CALLpcrel32 texternalsym:$dst)>;
+
+def : Pat<(X86call (i32 tglobaladdr:$dst)),
+ (CALLpcrel32 tglobaladdr:$dst)>;
+def : Pat<(X86call (i32 texternalsym:$dst)),
+ (CALLpcrel32 texternalsym:$dst)>;
+
+// X86 specific add which produces a flag.
+def : Pat<(addc GR32:$src1, GR32:$src2),
+ (ADD32rr GR32:$src1, GR32:$src2)>;
+def : Pat<(addc GR32:$src1, (load addr:$src2)),
+ (ADD32rm GR32:$src1, addr:$src2)>;
+def : Pat<(addc GR32:$src1, imm:$src2),
+ (ADD32ri GR32:$src1, imm:$src2)>;
+def : Pat<(addc GR32:$src1, i32immSExt8:$src2),
+ (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+def : Pat<(subc GR32:$src1, GR32:$src2),
+ (SUB32rr GR32:$src1, GR32:$src2)>;
+def : Pat<(subc GR32:$src1, (load addr:$src2)),
+ (SUB32rm GR32:$src1, addr:$src2)>;
+def : Pat<(subc GR32:$src1, imm:$src2),
+ (SUB32ri GR32:$src1, imm:$src2)>;
+def : Pat<(subc GR32:$src1, i32immSExt8:$src2),
+ (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+def : Pat<(truncstorei1 (i8 imm:$src), addr:$dst),
+ (MOV8mi addr:$dst, imm:$src)>;
+def : Pat<(truncstorei1 GR8:$src, addr:$dst),
+ (MOV8mr addr:$dst, GR8:$src)>;
+
+// Comparisons.
+
+// TEST R,R is smaller than CMP R,0
+def : Pat<(X86cmp GR8:$src1, 0),
+ (TEST8rr GR8:$src1, GR8:$src1)>;
+def : Pat<(X86cmp GR16:$src1, 0),
+ (TEST16rr GR16:$src1, GR16:$src1)>;
+def : Pat<(X86cmp GR32:$src1, 0),
+ (TEST32rr GR32:$src1, GR32:$src1)>;
+
+// {s|z}extload bool -> {s|z}extload byte
+def : Pat<(sextloadi16i1 addr:$src), (MOVSX16rm8 addr:$src)>;
+def : Pat<(sextloadi32i1 addr:$src), (MOVSX32rm8 addr:$src)>;
+def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;
+def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
+def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
+
+// extload bool -> extload byte
+def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>;
+def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
+def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
+def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>;
+def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>;
+def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
+
+// anyext -> zext
+def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>;
+def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>;
+def : Pat<(i32 (anyext GR16:$src)), (MOVZX32rr16 GR16:$src)>;
+def : Pat<(i16 (anyext (loadi8 addr:$src))), (MOVZX16rm8 addr:$src)>;
+def : Pat<(i32 (anyext (loadi8 addr:$src))), (MOVZX32rm8 addr:$src)>;
+def : Pat<(i32 (anyext (loadi16 addr:$src))), (MOVZX32rm16 addr:$src)>;
+
+//===----------------------------------------------------------------------===//
+// Some peepholes
+//===----------------------------------------------------------------------===//
+
+// (shl x, 1) ==> (add x, x)
+def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>;
+def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
+def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
+
+// (or (x >> c) | (y << (32 - c))) ==> (shrd32 x, y, c)
+def : Pat<(or (srl GR32:$src1, CL:$amt),
+ (shl GR32:$src2, (sub 32, CL:$amt))),
+ (SHRD32rrCL GR32:$src1, GR32:$src2)>;
+
+def : Pat<(store (or (srl (loadi32 addr:$dst), CL:$amt),
+ (shl GR32:$src2, (sub 32, CL:$amt))), addr:$dst),
+ (SHRD32mrCL addr:$dst, GR32:$src2)>;
+
+// (or (x << c) | (y >> (32 - c))) ==> (shld32 x, y, c)
+def : Pat<(or (shl GR32:$src1, CL:$amt),
+ (srl GR32:$src2, (sub 32, CL:$amt))),
+ (SHLD32rrCL GR32:$src1, GR32:$src2)>;
+
+def : Pat<(store (or (shl (loadi32 addr:$dst), CL:$amt),
+ (srl GR32:$src2, (sub 32, CL:$amt))), addr:$dst),
+ (SHLD32mrCL addr:$dst, GR32:$src2)>;
+
+// (or (x >> c) | (y << (16 - c))) ==> (shrd16 x, y, c)
+def : Pat<(or (srl GR16:$src1, CL:$amt),
+ (shl GR16:$src2, (sub 16, CL:$amt))),
+ (SHRD16rrCL GR16:$src1, GR16:$src2)>;
+
+def : Pat<(store (or (srl (loadi16 addr:$dst), CL:$amt),
+ (shl GR16:$src2, (sub 16, CL:$amt))), addr:$dst),
+ (SHRD16mrCL addr:$dst, GR16:$src2)>;
+
+// (or (x << c) | (y >> (16 - c))) ==> (shld16 x, y, c)
+def : Pat<(or (shl GR16:$src1, CL:$amt),
+ (srl GR16:$src2, (sub 16, CL:$amt))),
+ (SHLD16rrCL GR16:$src1, GR16:$src2)>;
+
+def : Pat<(store (or (shl (loadi16 addr:$dst), CL:$amt),
+ (srl GR16:$src2, (sub 16, CL:$amt))), addr:$dst),
+ (SHLD16mrCL addr:$dst, GR16:$src2)>;
+
+
+//===----------------------------------------------------------------------===//
+// Floating Point Stack Support
+//===----------------------------------------------------------------------===//
+
+include "X86InstrFPStack.td"
+
+//===----------------------------------------------------------------------===//
+// MMX and XMM Packed Integer support (requires MMX, SSE, and SSE2)
+//===----------------------------------------------------------------------===//
+
+include "X86InstrMMX.td"
+
+//===----------------------------------------------------------------------===//
+// XMM Floating point support (requires SSE / SSE2)
+//===----------------------------------------------------------------------===//
+
+include "X86InstrSSE.td"
+
+//===----------------------------------------------------------------------===//
+// X86-64 Support
+//===----------------------------------------------------------------------===//
+
+include "X86InstrX86-64.td"
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
new file mode 100644
index 0000000..c774460
--- /dev/null
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -0,0 +1,645 @@
+//====- X86InstrMMX.td - Describe the X86 Instruction Set --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Evan Cheng and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 MMX instruction set, defining the instructions,
+// and properties of the instructions which are needed for code generation,
+// machine code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction templates
+//===----------------------------------------------------------------------===//
+
+// MMXI - MMX instructions with TB prefix.
+// MMX2I - MMX / SSE2 instructions with TB and OpSize prefixes.
+// MMXIi8 - MMX instructions with ImmT == Imm8 and TB prefix.
+// MMXIi8 - MMX instructions with ImmT == Imm8 and TB prefix.
+// MMXID - MMX instructions with XD prefix.
+// MMXIS - MMX instructions with XS prefix.
+class MMXI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
+ : I<o, F, ops, asm, pattern>, TB, Requires<[HasMMX]>;
+class MMXRI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
+ : I<o, F, ops, asm, pattern>, TB, REX_W, Requires<[HasMMX]>;
+class MMX2I<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
+ : I<o, F, ops, asm, pattern>, TB, OpSize, Requires<[HasMMX]>;
+class MMXIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
+ : Ii8<o, F, ops, asm, pattern>, TB, Requires<[HasMMX]>;
+class MMXID<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
+ : Ii8<o, F, ops, asm, pattern>, XD, Requires<[HasMMX]>;
+class MMXIS<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
+ : Ii8<o, F, ops, asm, pattern>, XS, Requires<[HasMMX]>;
+
+// Some 'special' instructions
+def IMPLICIT_DEF_VR64 : I<0, Pseudo, (ops VR64:$dst),
+ "#IMPLICIT_DEF $dst",
+ [(set VR64:$dst, (v8i8 (undef)))]>,
+ Requires<[HasMMX]>;
+
+// 64-bit vector undef's.
+def : Pat<(v8i8 (undef)), (IMPLICIT_DEF_VR64)>;
+def : Pat<(v4i16 (undef)), (IMPLICIT_DEF_VR64)>;
+def : Pat<(v2i32 (undef)), (IMPLICIT_DEF_VR64)>;
+def : Pat<(v1i64 (undef)), (IMPLICIT_DEF_VR64)>;
+
+//===----------------------------------------------------------------------===//
+// MMX Pattern Fragments
+//===----------------------------------------------------------------------===//
+
+def load_mmx : PatFrag<(ops node:$ptr), (v1i64 (load node:$ptr))>;
+
+def bc_v8i8 : PatFrag<(ops node:$in), (v8i8 (bitconvert node:$in))>;
+def bc_v4i16 : PatFrag<(ops node:$in), (v4i16 (bitconvert node:$in))>;
+def bc_v2i32 : PatFrag<(ops node:$in), (v2i32 (bitconvert node:$in))>;
+def bc_v1i64 : PatFrag<(ops node:$in), (v1i64 (bitconvert node:$in))>;
+
+//===----------------------------------------------------------------------===//
+// MMX Masks
+//===----------------------------------------------------------------------===//
+
+// MMX_SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to
+// PSHUFW imm.
+def MMX_SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
+ return getI8Imm(X86::getShuffleSHUFImmediate(N));
+}]>;
+
+// Patterns for: vector_shuffle v1, v2, <2, 6, 3, 7, ...>
+def MMX_UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isUNPCKHMask(N);
+}]>;
+
+// Patterns for: vector_shuffle v1, v2, <0, 4, 2, 5, ...>
+def MMX_UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isUNPCKLMask(N);
+}]>;
+
+// Patterns for: vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
+def MMX_UNPCKH_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isUNPCKH_v_undef_Mask(N);
+}]>;
+
+// Patterns for: vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
+def MMX_UNPCKL_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isUNPCKL_v_undef_Mask(N);
+}]>;
+
+// Patterns for shuffling.
+def MMX_PSHUFW_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isPSHUFDMask(N);
+}], MMX_SHUFFLE_get_shuf_imm>;
+
+// Patterns for: vector_shuffle v1, v2, <4, 5, 2, 3>; etc.
+def MMX_MOVL_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isMOVLMask(N);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// MMX Multiclasses
+//===----------------------------------------------------------------------===//
+
+let isTwoAddress = 1 in {
+ // MMXI_binop_rm - Simple MMX binary operator.
+ multiclass MMXI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, bit Commutable = 0> {
+ def rr : MMXI<opc, MRMSrcReg, (ops VR64:$dst, VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (OpVT (OpNode VR64:$src1, VR64:$src2)))]> {
+ let isCommutable = Commutable;
+ }
+ def rm : MMXI<opc, MRMSrcMem, (ops VR64:$dst, VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (OpVT (OpNode VR64:$src1,
+ (bitconvert
+ (load_mmx addr:$src2)))))]>;
+ }
+
+ multiclass MMXI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
+ bit Commutable = 0> {
+ def rr : MMXI<opc, MRMSrcReg, (ops VR64:$dst, VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]> {
+ let isCommutable = Commutable;
+ }
+ def rm : MMXI<opc, MRMSrcMem, (ops VR64:$dst, VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId VR64:$src1,
+ (bitconvert (load_mmx addr:$src2))))]>;
+ }
+
+ // MMXI_binop_rm_v1i64 - Simple MMX binary operator whose type is v1i64.
+ //
+ // FIXME: we could eliminate this and use MMXI_binop_rm instead if tblgen knew
+ // to collapse (bitconvert VT to VT) into its operand.
+ //
+ multiclass MMXI_binop_rm_v1i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ bit Commutable = 0> {
+ def rr : MMXI<opc, MRMSrcReg, (ops VR64:$dst, VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (v1i64 (OpNode VR64:$src1, VR64:$src2)))]> {
+ let isCommutable = Commutable;
+ }
+ def rm : MMXI<opc, MRMSrcMem, (ops VR64:$dst, VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst,
+ (OpNode VR64:$src1,(load_mmx addr:$src2)))]>;
+ }
+
+ multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
+ string OpcodeStr, Intrinsic IntId> {
+ def rr : MMXI<opc, MRMSrcReg, (ops VR64:$dst, VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]>;
+ def rm : MMXI<opc, MRMSrcMem, (ops VR64:$dst, VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId VR64:$src1,
+ (bitconvert (load_mmx addr:$src2))))]>;
+ def ri : MMXIi8<opc2, ImmForm, (ops VR64:$dst, VR64:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId VR64:$src1,
+ (scalar_to_vector (i32 imm:$src2))))]>;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// MMX EMMS & FEMMS Instructions
+//===----------------------------------------------------------------------===//
+
+def MMX_EMMS : MMXI<0x77, RawFrm, (ops), "emms", [(int_x86_mmx_emms)]>;
+def MMX_FEMMS : MMXI<0x0E, RawFrm, (ops), "femms", [(int_x86_mmx_femms)]>;
+
+//===----------------------------------------------------------------------===//
+// MMX Scalar Instructions
+//===----------------------------------------------------------------------===//
+
+// Data Transfer Instructions
+def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (ops VR64:$dst, GR32:$src),
+ "movd {$src, $dst|$dst, $src}", []>;
+def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (ops VR64:$dst, i32mem:$src),
+ "movd {$src, $dst|$dst, $src}", []>;
+def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (ops i32mem:$dst, VR64:$src),
+ "movd {$src, $dst|$dst, $src}", []>;
+
+def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (ops VR64:$dst, GR64:$src),
+ "movd {$src, $dst|$dst, $src}", []>;
+
+def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (ops VR64:$dst, VR64:$src),
+ "movq {$src, $dst|$dst, $src}", []>;
+def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (ops VR64:$dst, i64mem:$src),
+ "movq {$src, $dst|$dst, $src}",
+ [(set VR64:$dst, (load_mmx addr:$src))]>;
+def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (ops i64mem:$dst, VR64:$src),
+ "movq {$src, $dst|$dst, $src}",
+ [(store (v1i64 VR64:$src), addr:$dst)]>;
+
+def MMX_MOVDQ2Qrr : MMXID<0xD6, MRMDestMem, (ops VR64:$dst, VR128:$src),
+ "movdq2q {$src, $dst|$dst, $src}",
+ [(set VR64:$dst,
+ (v1i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))))]>;
+
+def MMX_MOVQ2DQrr : MMXIS<0xD6, MRMDestMem, (ops VR128:$dst, VR64:$src),
+ "movq2dq {$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (bitconvert (v1i64 VR64:$src)))]>;
+
+def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (ops i64mem:$dst, VR64:$src),
+ "movntq {$src, $dst|$dst, $src}",
+ [(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)]>;
+
+let AddedComplexity = 15 in
+// movd to MMX register zero-extends
+def MMX_MOVZDI2PDIrr : MMX2I<0x6E, MRMSrcReg, (ops VR64:$dst, GR32:$src),
+ "movd {$src, $dst|$dst, $src}",
+ [(set VR64:$dst,
+ (v2i32 (vector_shuffle immAllZerosV,
+ (v2i32 (scalar_to_vector GR32:$src)),
+ MMX_MOVL_shuffle_mask)))]>;
+let AddedComplexity = 20 in
+def MMX_MOVZDI2PDIrm : MMX2I<0x6E, MRMSrcMem, (ops VR64:$dst, i32mem:$src),
+ "movd {$src, $dst|$dst, $src}",
+ [(set VR64:$dst,
+ (v2i32 (vector_shuffle immAllZerosV,
+ (v2i32 (scalar_to_vector
+ (loadi32 addr:$src))),
+ MMX_MOVL_shuffle_mask)))]>;
+
+// Arithmetic Instructions
+
+// -- Addition
+defm MMX_PADDB : MMXI_binop_rm<0xFC, "paddb", add, v8i8, 1>;
+defm MMX_PADDW : MMXI_binop_rm<0xFD, "paddw", add, v4i16, 1>;
+defm MMX_PADDD : MMXI_binop_rm<0xFE, "paddd", add, v2i32, 1>;
+defm MMX_PADDQ : MMXI_binop_rm<0xD4, "paddq", add, v1i64, 1>;
+
+defm MMX_PADDSB : MMXI_binop_rm_int<0xEC, "paddsb" , int_x86_mmx_padds_b, 1>;
+defm MMX_PADDSW : MMXI_binop_rm_int<0xED, "paddsw" , int_x86_mmx_padds_w, 1>;
+
+defm MMX_PADDUSB : MMXI_binop_rm_int<0xDC, "paddusb", int_x86_mmx_paddus_b, 1>;
+defm MMX_PADDUSW : MMXI_binop_rm_int<0xDD, "paddusw", int_x86_mmx_paddus_w, 1>;
+
+// -- Subtraction
+defm MMX_PSUBB : MMXI_binop_rm<0xF8, "psubb", sub, v8i8>;
+defm MMX_PSUBW : MMXI_binop_rm<0xF9, "psubw", sub, v4i16>;
+defm MMX_PSUBD : MMXI_binop_rm<0xFA, "psubd", sub, v2i32>;
+defm MMX_PSUBQ : MMXI_binop_rm<0xFB, "psubq", sub, v1i64>;
+
+defm MMX_PSUBSB : MMXI_binop_rm_int<0xE8, "psubsb" , int_x86_mmx_psubs_b>;
+defm MMX_PSUBSW : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w>;
+
+defm MMX_PSUBUSB : MMXI_binop_rm_int<0xD8, "psubusb", int_x86_mmx_psubus_b>;
+defm MMX_PSUBUSW : MMXI_binop_rm_int<0xD9, "psubusw", int_x86_mmx_psubus_w>;
+
+// -- Multiplication
+defm MMX_PMULLW : MMXI_binop_rm<0xD5, "pmullw", mul, v4i16, 1>;
+
+defm MMX_PMULHW : MMXI_binop_rm_int<0xE5, "pmulhw", int_x86_mmx_pmulh_w, 1>;
+defm MMX_PMULHUW : MMXI_binop_rm_int<0xE4, "pmulhuw", int_x86_mmx_pmulhu_w, 1>;
+defm MMX_PMULUDQ : MMXI_binop_rm_int<0xF4, "pmuludq", int_x86_mmx_pmulu_dq, 1>;
+
+// -- Miscellanea
+defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd, 1>;
+
+defm MMX_PAVGB : MMXI_binop_rm_int<0xE0, "pavgb", int_x86_mmx_pavg_b, 1>;
+defm MMX_PAVGW : MMXI_binop_rm_int<0xE3, "pavgw", int_x86_mmx_pavg_w, 1>;
+
+defm MMX_PMINUB : MMXI_binop_rm_int<0xDA, "pminub", int_x86_mmx_pminu_b, 1>;
+defm MMX_PMINSW : MMXI_binop_rm_int<0xEA, "pminsw", int_x86_mmx_pmins_w, 1>;
+
+defm MMX_PMAXUB : MMXI_binop_rm_int<0xDE, "pmaxub", int_x86_mmx_pmaxu_b, 1>;
+defm MMX_PMAXSW : MMXI_binop_rm_int<0xEE, "pmaxsw", int_x86_mmx_pmaxs_w, 1>;
+
+defm MMX_PSADBW : MMXI_binop_rm_int<0xE0, "psadbw", int_x86_mmx_psad_bw, 1>;
+
+// Logical Instructions
+defm MMX_PAND : MMXI_binop_rm_v1i64<0xDB, "pand", and, 1>;
+defm MMX_POR : MMXI_binop_rm_v1i64<0xEB, "por" , or, 1>;
+defm MMX_PXOR : MMXI_binop_rm_v1i64<0xEF, "pxor", xor, 1>;
+
+let isTwoAddress = 1 in {
+ def MMX_PANDNrr : MMXI<0xDF, MRMSrcReg,
+ (ops VR64:$dst, VR64:$src1, VR64:$src2),
+ "pandn {$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst, (v1i64 (and (vnot VR64:$src1),
+ VR64:$src2)))]>;
+ def MMX_PANDNrm : MMXI<0xDF, MRMSrcMem,
+ (ops VR64:$dst, VR64:$src1, i64mem:$src2),
+ "pandn {$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst, (v1i64 (and (vnot VR64:$src1),
+ (load addr:$src2))))]>;
+}
+
+// Shift Instructions
+defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
+ int_x86_mmx_psrl_w>;
+defm MMX_PSRLD : MMXI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
+ int_x86_mmx_psrl_d>;
+defm MMX_PSRLQ : MMXI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
+ int_x86_mmx_psrl_q>;
+
+defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
+ int_x86_mmx_psll_w>;
+defm MMX_PSLLD : MMXI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
+ int_x86_mmx_psll_d>;
+defm MMX_PSLLQ : MMXI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
+ int_x86_mmx_psll_q>;
+
+defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
+ int_x86_mmx_psra_w>;
+defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
+ int_x86_mmx_psra_d>;
+
+// Comparison Instructions
+defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b>;
+defm MMX_PCMPEQW : MMXI_binop_rm_int<0x75, "pcmpeqw", int_x86_mmx_pcmpeq_w>;
+defm MMX_PCMPEQD : MMXI_binop_rm_int<0x76, "pcmpeqd", int_x86_mmx_pcmpeq_d>;
+
+defm MMX_PCMPGTB : MMXI_binop_rm_int<0x64, "pcmpgtb", int_x86_mmx_pcmpgt_b>;
+defm MMX_PCMPGTW : MMXI_binop_rm_int<0x65, "pcmpgtw", int_x86_mmx_pcmpgt_w>;
+defm MMX_PCMPGTD : MMXI_binop_rm_int<0x66, "pcmpgtd", int_x86_mmx_pcmpgt_d>;
+
+// Conversion Instructions
+
+// -- Unpack Instructions
+let isTwoAddress = 1 in {
+ // Unpack High Packed Data Instructions
+ def MMX_PUNPCKHBWrr : MMXI<0x68, MRMSrcReg,
+ (ops VR64:$dst, VR64:$src1, VR64:$src2),
+ "punpckhbw {$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v8i8 (vector_shuffle VR64:$src1, VR64:$src2,
+ MMX_UNPCKH_shuffle_mask)))]>;
+ def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem,
+ (ops VR64:$dst, VR64:$src1, i64mem:$src2),
+ "punpckhbw {$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v8i8 (vector_shuffle VR64:$src1,
+ (bc_v8i8 (load_mmx addr:$src2)),
+ MMX_UNPCKH_shuffle_mask)))]>;
+
+ def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg,
+ (ops VR64:$dst, VR64:$src1, VR64:$src2),
+ "punpckhwd {$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v4i16 (vector_shuffle VR64:$src1, VR64:$src2,
+ MMX_UNPCKH_shuffle_mask)))]>;
+ def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem,
+ (ops VR64:$dst, VR64:$src1, i64mem:$src2),
+ "punpckhwd {$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v4i16 (vector_shuffle VR64:$src1,
+ (bc_v4i16 (load_mmx addr:$src2)),
+ MMX_UNPCKH_shuffle_mask)))]>;
+
+ def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg,
+ (ops VR64:$dst, VR64:$src1, VR64:$src2),
+ "punpckhdq {$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v2i32 (vector_shuffle VR64:$src1, VR64:$src2,
+ MMX_UNPCKH_shuffle_mask)))]>;
+ def MMX_PUNPCKHDQrm : MMXI<0x6A, MRMSrcMem,
+ (ops VR64:$dst, VR64:$src1, i64mem:$src2),
+ "punpckhdq {$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v2i32 (vector_shuffle VR64:$src1,
+ (bc_v2i32 (load_mmx addr:$src2)),
+ MMX_UNPCKH_shuffle_mask)))]>;
+
+ // Unpack Low Packed Data Instructions
+ def MMX_PUNPCKLBWrr : MMXI<0x60, MRMSrcReg,
+ (ops VR64:$dst, VR64:$src1, VR64:$src2),
+ "punpcklbw {$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v8i8 (vector_shuffle VR64:$src1, VR64:$src2,
+ MMX_UNPCKL_shuffle_mask)))]>;
+ def MMX_PUNPCKLBWrm : MMXI<0x60, MRMSrcMem,
+ (ops VR64:$dst, VR64:$src1, i64mem:$src2),
+ "punpcklbw {$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v8i8 (vector_shuffle VR64:$src1,
+ (bc_v8i8 (load_mmx addr:$src2)),
+ MMX_UNPCKL_shuffle_mask)))]>;
+
+ def MMX_PUNPCKLWDrr : MMXI<0x61, MRMSrcReg,
+ (ops VR64:$dst, VR64:$src1, VR64:$src2),
+ "punpcklwd {$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v4i16 (vector_shuffle VR64:$src1, VR64:$src2,
+ MMX_UNPCKL_shuffle_mask)))]>;
+ def MMX_PUNPCKLWDrm : MMXI<0x61, MRMSrcMem,
+ (ops VR64:$dst, VR64:$src1, i64mem:$src2),
+ "punpcklwd {$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v4i16 (vector_shuffle VR64:$src1,
+ (bc_v4i16 (load_mmx addr:$src2)),
+ MMX_UNPCKL_shuffle_mask)))]>;
+
+ def MMX_PUNPCKLDQrr : MMXI<0x62, MRMSrcReg,
+ (ops VR64:$dst, VR64:$src1, VR64:$src2),
+ "punpckldq {$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v2i32 (vector_shuffle VR64:$src1, VR64:$src2,
+ MMX_UNPCKL_shuffle_mask)))]>;
+ def MMX_PUNPCKLDQrm : MMXI<0x62, MRMSrcMem,
+ (ops VR64:$dst, VR64:$src1, i64mem:$src2),
+ "punpckldq {$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v2i32 (vector_shuffle VR64:$src1,
+ (bc_v2i32 (load_mmx addr:$src2)),
+ MMX_UNPCKL_shuffle_mask)))]>;
+}
+
+// -- Pack Instructions
+defm MMX_PACKSSWB : MMXI_binop_rm_int<0x63, "packsswb", int_x86_mmx_packsswb>;
+defm MMX_PACKSSDW : MMXI_binop_rm_int<0x6B, "packssdw", int_x86_mmx_packssdw>;
+defm MMX_PACKUSWB : MMXI_binop_rm_int<0x67, "packuswb", int_x86_mmx_packuswb>;
+
+// -- Shuffle Instructions
+def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
+ (ops VR64:$dst, VR64:$src1, i8imm:$src2),
+ "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR64:$dst,
+ (v4i16 (vector_shuffle
+ VR64:$src1, (undef),
+ MMX_PSHUFW_shuffle_mask:$src2)))]>;
+def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
+ (ops VR64:$dst, i64mem:$src1, i8imm:$src2),
+ "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR64:$dst,
+ (v4i16 (vector_shuffle
+ (bc_v4i16 (load_mmx addr:$src1)),
+ (undef),
+ MMX_PSHUFW_shuffle_mask:$src2)))]>;
+
+// -- Conversion Instructions
+def MMX_CVTPD2PIrr : MMX2I<0x2D, MRMSrcReg, (ops VR64:$dst, VR128:$src),
+ "cvtpd2pi {$src, $dst|$dst, $src}", []>;
+def MMX_CVTPD2PIrm : MMX2I<0x2D, MRMSrcMem, (ops VR64:$dst, f128mem:$src),
+ "cvtpd2pi {$src, $dst|$dst, $src}", []>;
+
+def MMX_CVTPI2PDrr : MMX2I<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
+ "cvtpi2pd {$src, $dst|$dst, $src}", []>;
+def MMX_CVTPI2PDrm : MMX2I<0x2A, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
+ "cvtpi2pd {$src, $dst|$dst, $src}", []>;
+
+def MMX_CVTPI2PSrr : MMXI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
+ "cvtpi2ps {$src, $dst|$dst, $src}", []>;
+def MMX_CVTPI2PSrm : MMXI<0x2A, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
+ "cvtpi2ps {$src, $dst|$dst, $src}", []>;
+
+def MMX_CVTPS2PIrr : MMXI<0x2D, MRMSrcReg, (ops VR64:$dst, VR128:$src),
+ "cvtps2pi {$src, $dst|$dst, $src}", []>;
+def MMX_CVTPS2PIrm : MMXI<0x2D, MRMSrcMem, (ops VR64:$dst, f64mem:$src),
+ "cvtps2pi {$src, $dst|$dst, $src}", []>;
+
+def MMX_CVTTPD2PIrr : MMX2I<0x2C, MRMSrcReg, (ops VR64:$dst, VR128:$src),
+ "cvttpd2pi {$src, $dst|$dst, $src}", []>;
+def MMX_CVTTPD2PIrm : MMX2I<0x2C, MRMSrcMem, (ops VR64:$dst, f128mem:$src),
+ "cvttpd2pi {$src, $dst|$dst, $src}", []>;
+
+def MMX_CVTTPS2PIrr : MMXI<0x2C, MRMSrcReg, (ops VR64:$dst, VR128:$src),
+ "cvttps2pi {$src, $dst|$dst, $src}", []>;
+def MMX_CVTTPS2PIrm : MMXI<0x2C, MRMSrcMem, (ops VR64:$dst, f64mem:$src),
+ "cvttps2pi {$src, $dst|$dst, $src}", []>;
+
+// Extract / Insert
+def MMX_X86pextrw : SDNode<"X86ISD::PEXTRW", SDTypeProfile<1, 2, []>, []>;
+def MMX_X86pinsrw : SDNode<"X86ISD::PINSRW", SDTypeProfile<1, 3, []>, []>;
+
+def MMX_PEXTRWri : MMXIi8<0xC5, MRMSrcReg,
+ (ops GR32:$dst, VR64:$src1, i16i8imm:$src2),
+ "pextrw {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, (MMX_X86pextrw (v4i16 VR64:$src1),
+ (iPTR imm:$src2)))]>;
+let isTwoAddress = 1 in {
+ def MMX_PINSRWrri : MMXIi8<0xC4, MRMSrcReg,
+ (ops VR64:$dst, VR64:$src1, GR32:$src2, i16i8imm:$src3),
+ "pinsrw {$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR64:$dst, (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1),
+ GR32:$src2, (iPTR imm:$src3))))]>;
+ def MMX_PINSRWrmi : MMXIi8<0xC4, MRMSrcMem,
+ (ops VR64:$dst, VR64:$src1, i16mem:$src2, i16i8imm:$src3),
+ "pinsrw {$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR64:$dst,
+ (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1),
+ (i32 (anyext (loadi16 addr:$src2))),
+ (iPTR imm:$src3))))]>;
+}
+
+// Mask creation
+def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (ops GR32:$dst, VR64:$src),
+ "pmovmskb {$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_mmx_pmovmskb VR64:$src))]>;
+
+// Misc.
+def MMX_MASKMOVQ : MMXI<0xF7, MRMDestMem, (ops VR64:$src, VR64:$mask),
+ "maskmovq {$mask, $src|$src, $mask}",
+ [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)]>,
+ Imp<[EDI],[]>;
+
+//===----------------------------------------------------------------------===//
+// Alias Instructions
+//===----------------------------------------------------------------------===//
+
+// Alias instructions that map zero vector to pxor.
+// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
+let isReMaterializable = 1 in {
+ def MMX_V_SET0 : MMXI<0xEF, MRMInitReg, (ops VR64:$dst),
+ "pxor $dst, $dst",
+ [(set VR64:$dst, (v1i64 immAllZerosV))]>;
+ def MMX_V_SETALLONES : MMXI<0x76, MRMInitReg, (ops VR64:$dst),
+ "pcmpeqd $dst, $dst",
+ [(set VR64:$dst, (v1i64 immAllOnesV))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// Store 64-bit integer vector values.
+def : Pat<(store (v8i8 VR64:$src), addr:$dst),
+ (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
+def : Pat<(store (v4i16 VR64:$src), addr:$dst),
+ (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
+def : Pat<(store (v2i32 VR64:$src), addr:$dst),
+ (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
+def : Pat<(store (v1i64 VR64:$src), addr:$dst),
+ (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
+
+// 64-bit vector all zero's.
+def : Pat<(v8i8 immAllZerosV), (MMX_V_SET0)>;
+def : Pat<(v4i16 immAllZerosV), (MMX_V_SET0)>;
+def : Pat<(v2i32 immAllZerosV), (MMX_V_SET0)>;
+def : Pat<(v1i64 immAllZerosV), (MMX_V_SET0)>;
+
+// 64-bit vector all one's.
+def : Pat<(v8i8 immAllOnesV), (MMX_V_SETALLONES)>;
+def : Pat<(v4i16 immAllOnesV), (MMX_V_SETALLONES)>;
+def : Pat<(v2i32 immAllOnesV), (MMX_V_SETALLONES)>;
+def : Pat<(v1i64 immAllOnesV), (MMX_V_SETALLONES)>;
+
+// Bit convert.
+def : Pat<(v8i8 (bitconvert (v1i64 VR64:$src))), (v8i8 VR64:$src)>;
+def : Pat<(v8i8 (bitconvert (v2i32 VR64:$src))), (v8i8 VR64:$src)>;
+def : Pat<(v8i8 (bitconvert (v4i16 VR64:$src))), (v8i8 VR64:$src)>;
+def : Pat<(v4i16 (bitconvert (v1i64 VR64:$src))), (v4i16 VR64:$src)>;
+def : Pat<(v4i16 (bitconvert (v2i32 VR64:$src))), (v4i16 VR64:$src)>;
+def : Pat<(v4i16 (bitconvert (v8i8 VR64:$src))), (v4i16 VR64:$src)>;
+def : Pat<(v2i32 (bitconvert (v1i64 VR64:$src))), (v2i32 VR64:$src)>;
+def : Pat<(v2i32 (bitconvert (v4i16 VR64:$src))), (v2i32 VR64:$src)>;
+def : Pat<(v2i32 (bitconvert (v8i8 VR64:$src))), (v2i32 VR64:$src)>;
+def : Pat<(v1i64 (bitconvert (v2i32 VR64:$src))), (v1i64 VR64:$src)>;
+def : Pat<(v1i64 (bitconvert (v4i16 VR64:$src))), (v1i64 VR64:$src)>;
+def : Pat<(v1i64 (bitconvert (v8i8 VR64:$src))), (v1i64 VR64:$src)>;
+
+// 64-bit bit convert.
+def : Pat<(v1i64 (bitconvert (i64 GR64:$src))),
+ (MMX_MOVD64to64rr GR64:$src)>;
+def : Pat<(v2i32 (bitconvert (i64 GR64:$src))),
+ (MMX_MOVD64to64rr GR64:$src)>;
+def : Pat<(v4i16 (bitconvert (i64 GR64:$src))),
+ (MMX_MOVD64to64rr GR64:$src)>;
+def : Pat<(v8i8 (bitconvert (i64 GR64:$src))),
+ (MMX_MOVD64to64rr GR64:$src)>;
+
+def MMX_X86s2vec : SDNode<"X86ISD::S2VEC", SDTypeProfile<1, 1, []>, []>;
+
+// Move scalar to XMM zero-extended
+// movd to XMM register zero-extends
+let AddedComplexity = 15 in {
+ def : Pat<(v8i8 (vector_shuffle immAllZerosV,
+ (v8i8 (MMX_X86s2vec GR32:$src)), MMX_MOVL_shuffle_mask)),
+ (MMX_MOVZDI2PDIrr GR32:$src)>;
+ def : Pat<(v4i16 (vector_shuffle immAllZerosV,
+ (v4i16 (MMX_X86s2vec GR32:$src)), MMX_MOVL_shuffle_mask)),
+ (MMX_MOVZDI2PDIrr GR32:$src)>;
+ def : Pat<(v2i32 (vector_shuffle immAllZerosV,
+ (v2i32 (MMX_X86s2vec GR32:$src)), MMX_MOVL_shuffle_mask)),
+ (MMX_MOVZDI2PDIrr GR32:$src)>;
+}
+
+// Scalar to v2i32 / v4i16 / v8i8. The source may be a GR32, but only the lower
+// 8 or 16-bits matter.
+def : Pat<(v8i8 (MMX_X86s2vec GR32:$src)), (MMX_MOVD64rr GR32:$src)>;
+def : Pat<(v4i16 (MMX_X86s2vec GR32:$src)), (MMX_MOVD64rr GR32:$src)>;
+def : Pat<(v2i32 (MMX_X86s2vec GR32:$src)), (MMX_MOVD64rr GR32:$src)>;
+
+// Patterns to perform canonical versions of vector shuffling.
+let AddedComplexity = 10 in {
+ def : Pat<(v8i8 (vector_shuffle VR64:$src, (undef),
+ MMX_UNPCKL_v_undef_shuffle_mask)),
+ (MMX_PUNPCKLBWrr VR64:$src, VR64:$src)>;
+ def : Pat<(v4i16 (vector_shuffle VR64:$src, (undef),
+ MMX_UNPCKL_v_undef_shuffle_mask)),
+ (MMX_PUNPCKLWDrr VR64:$src, VR64:$src)>;
+ def : Pat<(v2i32 (vector_shuffle VR64:$src, (undef),
+ MMX_UNPCKL_v_undef_shuffle_mask)),
+ (MMX_PUNPCKLDQrr VR64:$src, VR64:$src)>;
+}
+
+let AddedComplexity = 10 in {
+ def : Pat<(v8i8 (vector_shuffle VR64:$src, (undef),
+ MMX_UNPCKH_v_undef_shuffle_mask)),
+ (MMX_PUNPCKHBWrr VR64:$src, VR64:$src)>;
+ def : Pat<(v4i16 (vector_shuffle VR64:$src, (undef),
+ MMX_UNPCKH_v_undef_shuffle_mask)),
+ (MMX_PUNPCKHWDrr VR64:$src, VR64:$src)>;
+ def : Pat<(v2i32 (vector_shuffle VR64:$src, (undef),
+ MMX_UNPCKH_v_undef_shuffle_mask)),
+ (MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>;
+}
+
+// Patterns to perform vector shuffling with a zeroed out vector.
+let AddedComplexity = 20 in {
+ def : Pat<(bc_v2i32 (vector_shuffle immAllZerosV,
+ (v2i32 (scalar_to_vector (load_mmx addr:$src))),
+ MMX_UNPCKL_shuffle_mask)),
+ (MMX_PUNPCKLDQrm VR64:$src, VR64:$src)>;
+}
+
+// Some special case PANDN patterns.
+// FIXME: Get rid of these.
+def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))),
+ VR64:$src2)),
+ (MMX_PANDNrr VR64:$src1, VR64:$src2)>;
+def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v4i16 immAllOnesV))),
+ VR64:$src2)),
+ (MMX_PANDNrr VR64:$src1, VR64:$src2)>;
+def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v8i8 immAllOnesV))),
+ VR64:$src2)),
+ (MMX_PANDNrr VR64:$src1, VR64:$src2)>;
+
+def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))),
+ (load addr:$src2))),
+ (MMX_PANDNrm VR64:$src1, addr:$src2)>;
+def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v4i16 immAllOnesV))),
+ (load addr:$src2))),
+ (MMX_PANDNrm VR64:$src1, addr:$src2)>;
+def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v8i8 immAllOnesV))),
+ (load addr:$src2))),
+ (MMX_PANDNrm VR64:$src1, addr:$src2)>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
new file mode 100644
index 0000000..5fc7a65
--- /dev/null
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -0,0 +1,2572 @@
+//====- X86InstrSSE.td - Describe the X86 Instruction Set -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Evan Cheng and is distributed under the University
+// of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 SSE instruction set, defining the instructions,
+// and properties of the instructions which are needed for code generation,
+// machine code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// SSE specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>,
+ SDTCisFP<0>, SDTCisInt<2> ]>;
+
+def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad, [SDNPHasChain]>;
+def X86loadu : SDNode<"X86ISD::LOAD_UA", SDTLoad, [SDNPHasChain]>;
+def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
+def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
+def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
+def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
+def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
+def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest,
+ [SDNPHasChain, SDNPOutFlag]>;
+def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest,
+ [SDNPHasChain, SDNPOutFlag]>;
+def X86s2vec : SDNode<"X86ISD::S2VEC", SDTypeProfile<1, 1, []>, []>;
+def X86pextrw : SDNode<"X86ISD::PEXTRW", SDTypeProfile<1, 2, []>, []>;
+def X86pinsrw : SDNode<"X86ISD::PINSRW", SDTypeProfile<1, 3, []>, []>;
+
+//===----------------------------------------------------------------------===//
+// SSE 'Special' Instructions
+//===----------------------------------------------------------------------===//
+
+def IMPLICIT_DEF_VR128 : I<0, Pseudo, (ops VR128:$dst),
+ "#IMPLICIT_DEF $dst",
+ [(set VR128:$dst, (v4f32 (undef)))]>,
+ Requires<[HasSSE1]>;
+def IMPLICIT_DEF_FR32 : I<0, Pseudo, (ops FR32:$dst),
+ "#IMPLICIT_DEF $dst",
+ [(set FR32:$dst, (undef))]>, Requires<[HasSSE2]>;
+def IMPLICIT_DEF_FR64 : I<0, Pseudo, (ops FR64:$dst),
+ "#IMPLICIT_DEF $dst",
+ [(set FR64:$dst, (undef))]>, Requires<[HasSSE2]>;
+
+//===----------------------------------------------------------------------===//
+// SSE Complex Patterns
+//===----------------------------------------------------------------------===//
+
+// These are 'extloads' from a scalar to the low element of a vector, zeroing
+// the top elements. These are used for the SSE 'ss' and 'sd' instruction
+// forms.
+def sse_load_f32 : ComplexPattern<v4f32, 4, "SelectScalarSSELoad", [],
+ [SDNPHasChain]>;
+def sse_load_f64 : ComplexPattern<v2f64, 4, "SelectScalarSSELoad", [],
+ [SDNPHasChain]>;
+
+def ssmem : Operand<v4f32> {
+ let PrintMethod = "printf32mem";
+ let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm);
+}
+def sdmem : Operand<v2f64> {
+ let PrintMethod = "printf64mem";
+ let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm);
+}
+
+//===----------------------------------------------------------------------===//
+// SSE pattern fragments
+//===----------------------------------------------------------------------===//
+
+def X86loadpf32 : PatFrag<(ops node:$ptr), (f32 (X86loadp node:$ptr))>;
+def X86loadpf64 : PatFrag<(ops node:$ptr), (f64 (X86loadp node:$ptr))>;
+
+def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
+def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
+def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
+def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
+
+def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
+def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
+def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
+def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>;
+def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
+def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
+
+def fp32imm0 : PatLeaf<(f32 fpimm), [{
+ return N->isExactlyValue(+0.0);
+}]>;
+
+def PSxLDQ_imm : SDNodeXForm<imm, [{
+ // Transformation function: imm >> 3
+ return getI32Imm(N->getValue() >> 3);
+}]>;
+
+// SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
+// SHUFP* etc. imm.
+def SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
+ return getI8Imm(X86::getShuffleSHUFImmediate(N));
+}]>;
+
+// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to
+// PSHUFHW imm.
+def SHUFFLE_get_pshufhw_imm : SDNodeXForm<build_vector, [{
+ return getI8Imm(X86::getShufflePSHUFHWImmediate(N));
+}]>;
+
+// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to
+// PSHUFLW imm.
+def SHUFFLE_get_pshuflw_imm : SDNodeXForm<build_vector, [{
+ return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
+}]>;
+
+def SSE_splat_mask : PatLeaf<(build_vector), [{
+ return X86::isSplatMask(N);
+}], SHUFFLE_get_shuf_imm>;
+
+def SSE_splat_lo_mask : PatLeaf<(build_vector), [{
+ return X86::isSplatLoMask(N);
+}]>;
+
+def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isMOVHLPSMask(N);
+}]>;
+
+def MOVHLPS_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isMOVHLPS_v_undef_Mask(N);
+}]>;
+
+def MOVHP_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isMOVHPMask(N);
+}]>;
+
+def MOVLP_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isMOVLPMask(N);
+}]>;
+
+def MOVL_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isMOVLMask(N);
+}]>;
+
+def MOVSHDUP_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isMOVSHDUPMask(N);
+}]>;
+
+def MOVSLDUP_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isMOVSLDUPMask(N);
+}]>;
+
+def UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isUNPCKLMask(N);
+}]>;
+
+def UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isUNPCKHMask(N);
+}]>;
+
+def UNPCKL_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isUNPCKL_v_undef_Mask(N);
+}]>;
+
+def UNPCKH_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isUNPCKH_v_undef_Mask(N);
+}]>;
+
+def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isPSHUFDMask(N);
+}], SHUFFLE_get_shuf_imm>;
+
+def PSHUFHW_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isPSHUFHWMask(N);
+}], SHUFFLE_get_pshufhw_imm>;
+
+def PSHUFLW_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isPSHUFLWMask(N);
+}], SHUFFLE_get_pshuflw_imm>;
+
+def SHUFP_unary_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isPSHUFDMask(N);
+}], SHUFFLE_get_shuf_imm>;
+
+def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isSHUFPMask(N);
+}], SHUFFLE_get_shuf_imm>;
+
+def PSHUFD_binary_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isSHUFPMask(N);
+}], SHUFFLE_get_shuf_imm>;
+
+//===----------------------------------------------------------------------===//
+// SSE scalar FP Instructions
+//===----------------------------------------------------------------------===//
+
+// CMOV* - Used to implement the SSE SELECT DAG operation. Expanded by the
+// scheduler into a branch sequence.
+let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
+ def CMOV_FR32 : I<0, Pseudo,
+ (ops FR32:$dst, FR32:$t, FR32:$f, i8imm:$cond),
+ "#CMOV_FR32 PSEUDO!",
+ [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond))]>;
+ def CMOV_FR64 : I<0, Pseudo,
+ (ops FR64:$dst, FR64:$t, FR64:$f, i8imm:$cond),
+ "#CMOV_FR64 PSEUDO!",
+ [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond))]>;
+ def CMOV_V4F32 : I<0, Pseudo,
+ (ops VR128:$dst, VR128:$t, VR128:$f, i8imm:$cond),
+ "#CMOV_V4F32 PSEUDO!",
+ [(set VR128:$dst,
+ (v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond)))]>;
+ def CMOV_V2F64 : I<0, Pseudo,
+ (ops VR128:$dst, VR128:$t, VR128:$f, i8imm:$cond),
+ "#CMOV_V2F64 PSEUDO!",
+ [(set VR128:$dst,
+ (v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond)))]>;
+ def CMOV_V2I64 : I<0, Pseudo,
+ (ops VR128:$dst, VR128:$t, VR128:$f, i8imm:$cond),
+ "#CMOV_V2I64 PSEUDO!",
+ [(set VR128:$dst,
+ (v2i64 (X86cmov VR128:$t, VR128:$f, imm:$cond)))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE1 Instructions
+//===----------------------------------------------------------------------===//
+
+// SSE1 Instruction Templates:
+//
+// SSI - SSE1 instructions with XS prefix.
+// PSI - SSE1 instructions with TB prefix.
+// PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix.
+
+class SSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
+ : I<o, F, ops, asm, pattern>, XS, Requires<[HasSSE1]>;
+class PSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
+ : I<o, F, ops, asm, pattern>, TB, Requires<[HasSSE1]>;
+class PSIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
+ : Ii8<o, F, ops, asm, pattern>, TB, Requires<[HasSSE1]>;
+
+// Move Instructions
+def MOVSSrr : SSI<0x10, MRMSrcReg, (ops FR32:$dst, FR32:$src),
+ "movss {$src, $dst|$dst, $src}", []>;
+def MOVSSrm : SSI<0x10, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
+ "movss {$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (loadf32 addr:$src))]>;
+def MOVSSmr : SSI<0x11, MRMDestMem, (ops f32mem:$dst, FR32:$src),
+ "movss {$src, $dst|$dst, $src}",
+ [(store FR32:$src, addr:$dst)]>;
+
+// Conversion instructions
+def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (ops GR32:$dst, FR32:$src),
+ "cvttss2si {$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (fp_to_sint FR32:$src))]>;
+def CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (ops GR32:$dst, f32mem:$src),
+ "cvttss2si {$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
+def CVTSI2SSrr : SSI<0x2A, MRMSrcReg, (ops FR32:$dst, GR32:$src),
+ "cvtsi2ss {$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (sint_to_fp GR32:$src))]>;
+def CVTSI2SSrm : SSI<0x2A, MRMSrcMem, (ops FR32:$dst, i32mem:$src),
+ "cvtsi2ss {$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
+
+// Match intrinsics which expect XMM operand(s).
+def Int_CVTSS2SIrr : SSI<0x2D, MRMSrcReg, (ops GR32:$dst, VR128:$src),
+ "cvtss2si {$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse_cvtss2si VR128:$src))]>;
+def Int_CVTSS2SIrm : SSI<0x2D, MRMSrcMem, (ops GR32:$dst, f32mem:$src),
+ "cvtss2si {$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse_cvtss2si
+ (load addr:$src)))]>;
+
+// Aliases for intrinsics
+def Int_CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (ops GR32:$dst, VR128:$src),
+ "cvttss2si {$src, $dst|$dst, $src}",
+ [(set GR32:$dst,
+ (int_x86_sse_cvttss2si VR128:$src))]>;
+def Int_CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (ops GR32:$dst, f32mem:$src),
+ "cvttss2si {$src, $dst|$dst, $src}",
+ [(set GR32:$dst,
+ (int_x86_sse_cvttss2si(load addr:$src)))]>;
+
+let isTwoAddress = 1 in {
+ def Int_CVTSI2SSrr : SSI<0x2A, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, GR32:$src2),
+ "cvtsi2ss {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1,
+ GR32:$src2))]>;
+ def Int_CVTSI2SSrm : SSI<0x2A, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, i32mem:$src2),
+ "cvtsi2ss {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1,
+ (loadi32 addr:$src2)))]>;
+}
+
+// Comparison instructions
+let isTwoAddress = 1 in {
+ def CMPSSrr : SSI<0xC2, MRMSrcReg,
+ (ops FR32:$dst, FR32:$src1, FR32:$src, SSECC:$cc),
+ "cmp${cc}ss {$src, $dst|$dst, $src}",
+ []>;
+ def CMPSSrm : SSI<0xC2, MRMSrcMem,
+ (ops FR32:$dst, FR32:$src1, f32mem:$src, SSECC:$cc),
+ "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
+}
+
+def UCOMISSrr: PSI<0x2E, MRMSrcReg, (ops FR32:$src1, FR32:$src2),
+ "ucomiss {$src2, $src1|$src1, $src2}",
+ [(X86cmp FR32:$src1, FR32:$src2)]>;
+def UCOMISSrm: PSI<0x2E, MRMSrcMem, (ops FR32:$src1, f32mem:$src2),
+ "ucomiss {$src2, $src1|$src1, $src2}",
+ [(X86cmp FR32:$src1, (loadf32 addr:$src2))]>;
+
+// Aliases to match intrinsics which expect XMM operand(s).
+let isTwoAddress = 1 in {
+ def Int_CMPSSrr : SSI<0xC2, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
+ "cmp${cc}ss {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
+ VR128:$src, imm:$cc))]>;
+ def Int_CMPSSrm : SSI<0xC2, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, f32mem:$src, SSECC:$cc),
+ "cmp${cc}ss {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
+ (load addr:$src), imm:$cc))]>;
+}
+
+def Int_UCOMISSrr: PSI<0x2E, MRMSrcReg, (ops VR128:$src1, VR128:$src2),
+ "ucomiss {$src2, $src1|$src1, $src2}",
+ [(X86ucomi (v4f32 VR128:$src1), VR128:$src2)]>;
+def Int_UCOMISSrm: PSI<0x2E, MRMSrcMem, (ops VR128:$src1, f128mem:$src2),
+ "ucomiss {$src2, $src1|$src1, $src2}",
+ [(X86ucomi (v4f32 VR128:$src1), (load addr:$src2))]>;
+
+def Int_COMISSrr: PSI<0x2F, MRMSrcReg, (ops VR128:$src1, VR128:$src2),
+ "comiss {$src2, $src1|$src1, $src2}",
+ [(X86comi (v4f32 VR128:$src1), VR128:$src2)]>;
+def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (ops VR128:$src1, f128mem:$src2),
+ "comiss {$src2, $src1|$src1, $src2}",
+ [(X86comi (v4f32 VR128:$src1), (load addr:$src2))]>;
+
+// Aliases of packed SSE1 instructions for scalar use. These all have names that
+// start with 'Fs'.
+
+// Alias instructions that map fld0 to pxor for sse.
+def FsFLD0SS : I<0xEF, MRMInitReg, (ops FR32:$dst),
+ "pxor $dst, $dst", [(set FR32:$dst, fp32imm0)]>,
+ Requires<[HasSSE1]>, TB, OpSize;
+
+// Alias instruction to do FR32 reg-to-reg copy using movaps. Upper bits are
+// disregarded.
+def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (ops FR32:$dst, FR32:$src),
+ "movaps {$src, $dst|$dst, $src}", []>;
+
+// Alias instruction to load FR32 from f128mem using movaps. Upper bits are
+// disregarded.
+def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src),
+ "movaps {$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (X86loadpf32 addr:$src))]>;
+
+// Alias bitwise logical operations using SSE logical ops on packed FP values.
+let isTwoAddress = 1 in {
+let isCommutable = 1 in {
+ def FsANDPSrr : PSI<0x54, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
+ "andps {$src2, $dst|$dst, $src2}",
+ [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>;
+ def FsORPSrr : PSI<0x56, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
+ "orps {$src2, $dst|$dst, $src2}",
+ [(set FR32:$dst, (X86for FR32:$src1, FR32:$src2))]>;
+ def FsXORPSrr : PSI<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
+ "xorps {$src2, $dst|$dst, $src2}",
+ [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>;
+}
+
+def FsANDPSrm : PSI<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
+ "andps {$src2, $dst|$dst, $src2}",
+ [(set FR32:$dst, (X86fand FR32:$src1,
+ (X86loadpf32 addr:$src2)))]>;
+def FsORPSrm : PSI<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
+ "orps {$src2, $dst|$dst, $src2}",
+ [(set FR32:$dst, (X86for FR32:$src1,
+ (X86loadpf32 addr:$src2)))]>;
+def FsXORPSrm : PSI<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
+ "xorps {$src2, $dst|$dst, $src2}",
+ [(set FR32:$dst, (X86fxor FR32:$src1,
+ (X86loadpf32 addr:$src2)))]>;
+
+def FsANDNPSrr : PSI<0x55, MRMSrcReg,
+ (ops FR32:$dst, FR32:$src1, FR32:$src2),
+ "andnps {$src2, $dst|$dst, $src2}", []>;
+def FsANDNPSrm : PSI<0x55, MRMSrcMem,
+ (ops FR32:$dst, FR32:$src1, f128mem:$src2),
+ "andnps {$src2, $dst|$dst, $src2}", []>;
+}
+
+/// basic_sse1_fp_binop_rm - SSE1 binops come in both scalar and vector forms.
+///
+/// In addition, we also have a special variant of the scalar form here to
+/// represent the associated intrinsic operation. This form is unlike the
+/// plain scalar form, in that it takes an entire vector (instead of a scalar)
+/// and leaves the top elements undefined.
+///
+/// These three forms can each be reg+reg or reg+mem, so there are a total of
+/// six "instructions".
+///
+let isTwoAddress = 1 in {
+multiclass basic_sse1_fp_binop_rm<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, Intrinsic F32Int,
+ bit Commutable = 0> {
+ // Scalar operation, reg+reg.
+ def SSrr : SSI<opc, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
+ !strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2}"),
+ [(set FR32:$dst, (OpNode FR32:$src1, FR32:$src2))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Scalar operation, reg+mem.
+ def SSrm : SSI<opc, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
+ !strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2}"),
+ [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
+
+ // Vector operation, reg+reg.
+ def PSrr : PSI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "ps {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Vector operation, reg+mem.
+ def PSrm : PSI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "ps {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (OpNode VR128:$src1, (loadv4f32 addr:$src2)))]>;
+
+ // Intrinsic operation, reg+reg.
+ def SSrr_Int : SSI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Intrinsic operation, reg+mem.
+ def SSrm_Int : SSI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, ssmem:$src2),
+ !strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (F32Int VR128:$src1,
+ sse_load_f32:$src2))]>;
+}
+}
+
+// Arithmetic instructions
+defm ADD : basic_sse1_fp_binop_rm<0x58, "add", fadd, int_x86_sse_add_ss, 1>;
+defm MUL : basic_sse1_fp_binop_rm<0x59, "mul", fmul, int_x86_sse_mul_ss, 1>;
+defm SUB : basic_sse1_fp_binop_rm<0x5C, "sub", fsub, int_x86_sse_sub_ss>;
+defm DIV : basic_sse1_fp_binop_rm<0x5E, "div", fdiv, int_x86_sse_div_ss>;
+
+/// sse1_fp_binop_rm - Other SSE1 binops
+///
+/// This multiclass is like basic_sse1_fp_binop_rm, with the addition of
+/// instructions for a full-vector intrinsic form. Operations that map
+/// onto C operators don't use this form since they just use the plain
+/// vector form instead of having a separate vector intrinsic form.
+///
+/// This provides a total of eight "instructions".
+///
+let isTwoAddress = 1 in {
+multiclass sse1_fp_binop_rm<bits<8> opc, string OpcodeStr,
+ SDNode OpNode,
+ Intrinsic F32Int,
+ Intrinsic V4F32Int,
+ bit Commutable = 0> {
+
+ // Scalar operation, reg+reg.
+ def SSrr : SSI<opc, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
+ !strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2}"),
+ [(set FR32:$dst, (OpNode FR32:$src1, FR32:$src2))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Scalar operation, reg+mem.
+ def SSrm : SSI<opc, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
+ !strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2}"),
+ [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
+
+ // Vector operation, reg+reg.
+ def PSrr : PSI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "ps {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Vector operation, reg+mem.
+ def PSrm : PSI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "ps {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (OpNode VR128:$src1, (loadv4f32 addr:$src2)))]>;
+
+ // Intrinsic operation, reg+reg.
+ def SSrr_Int : SSI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Intrinsic operation, reg+mem.
+ def SSrm_Int : SSI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, ssmem:$src2),
+ !strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (F32Int VR128:$src1,
+ sse_load_f32:$src2))]>;
+
+ // Vector intrinsic operation, reg+reg.
+ def PSrr_Int : PSI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "ps {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (V4F32Int VR128:$src1, VR128:$src2))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Vector intrinsic operation, reg+mem.
+ def PSrm_Int : PSI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f32mem:$src2),
+ !strconcat(OpcodeStr, "ps {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (V4F32Int VR128:$src1, (load addr:$src2)))]>;
+}
+}
+
+defm MAX : sse1_fp_binop_rm<0x5F, "max", X86fmax,
+ int_x86_sse_max_ss, int_x86_sse_max_ps>;
+defm MIN : sse1_fp_binop_rm<0x5D, "min", X86fmin,
+ int_x86_sse_min_ss, int_x86_sse_min_ps>;
+
+//===----------------------------------------------------------------------===//
+// SSE packed FP Instructions
+
+// Move Instructions
+def MOVAPSrr : PSI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src),
+ "movaps {$src, $dst|$dst, $src}", []>;
+def MOVAPSrm : PSI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
+ "movaps {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (loadv4f32 addr:$src))]>;
+
+def MOVAPSmr : PSI<0x29, MRMDestMem, (ops f128mem:$dst, VR128:$src),
+ "movaps {$src, $dst|$dst, $src}",
+ [(store (v4f32 VR128:$src), addr:$dst)]>;
+
+def MOVUPSrr : PSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src),
+ "movups {$src, $dst|$dst, $src}", []>;
+def MOVUPSrm : PSI<0x10, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
+ "movups {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>;
+def MOVUPSmr : PSI<0x11, MRMDestMem, (ops f128mem:$dst, VR128:$src),
+ "movups {$src, $dst|$dst, $src}",
+ [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>;
+
+let isTwoAddress = 1 in {
+ let AddedComplexity = 20 in {
+ def MOVLPSrm : PSI<0x12, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, f64mem:$src2),
+ "movlps {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (vector_shuffle VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))),
+ MOVLP_shuffle_mask)))]>;
+ def MOVHPSrm : PSI<0x16, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, f64mem:$src2),
+ "movhps {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (vector_shuffle VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))),
+ MOVHP_shuffle_mask)))]>;
+ } // AddedComplexity
+} // isTwoAddress
+
+def MOVLPSmr : PSI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src),
+ "movlps {$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
+ (iPTR 0))), addr:$dst)]>;
+
+// v2f64 extract element 1 is always custom lowered to unpack high to low
+// and extract element 0 so the non-store version isn't too horrible.
+def MOVHPSmr : PSI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src),
+ "movhps {$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (v2f64 (vector_shuffle
+ (bc_v2f64 (v4f32 VR128:$src)), (undef),
+ UNPCKH_shuffle_mask)), (iPTR 0))),
+ addr:$dst)]>;
+
+let isTwoAddress = 1 in {
+let AddedComplexity = 15 in {
+def MOVLHPSrr : PSI<0x16, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "movlhps {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
+ MOVHP_shuffle_mask)))]>;
+
+def MOVHLPSrr : PSI<0x12, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "movhlps {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
+ MOVHLPS_shuffle_mask)))]>;
+} // AddedComplexity
+} // isTwoAddress
+
+
+
+// Arithmetic
+
+/// sse1_fp_unop_rm - SSE1 unops come in both scalar and vector forms.
+///
+/// In addition, we also have a special variant of the scalar form here to
+/// represent the associated intrinsic operation. This form is unlike the
+/// plain scalar form, in that it takes an entire vector (instead of a
+/// scalar) and leaves the top elements undefined.
+///
+/// And, we have a special variant form for a full-vector intrinsic form.
+///
+/// These four forms can each have a reg or a mem operand, so there are a
+/// total of eight "instructions".
+///
+multiclass sse1_fp_unop_rm<bits<8> opc, string OpcodeStr,
+ SDNode OpNode,
+ Intrinsic F32Int,
+ Intrinsic V4F32Int,
+ bit Commutable = 0> {
+ // Scalar operation, reg.
+ def SSr : SSI<opc, MRMSrcReg, (ops FR32:$dst, FR32:$src),
+ !strconcat(OpcodeStr, "ss {$src, $dst|$dst, $src}"),
+ [(set FR32:$dst, (OpNode FR32:$src))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Scalar operation, mem.
+ def SSm : SSI<opc, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
+ !strconcat(OpcodeStr, "ss {$src, $dst|$dst, $src}"),
+ [(set FR32:$dst, (OpNode (load addr:$src)))]>;
+
+ // Vector operation, reg.
+ def PSr : PSI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src),
+ !strconcat(OpcodeStr, "ps {$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Vector operation, mem.
+ def PSm : PSI<opc, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
+ !strconcat(OpcodeStr, "ps {$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (OpNode (loadv4f32 addr:$src)))]>;
+
+ // Intrinsic operation, reg.
+ def SSr_Int : SSI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src),
+ !strconcat(OpcodeStr, "ss {$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F32Int VR128:$src))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Intrinsic operation, mem.
+ def SSm_Int : SSI<opc, MRMSrcMem, (ops VR128:$dst, ssmem:$src),
+ !strconcat(OpcodeStr, "ss {$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
+
+ // Vector intrinsic operation, reg
+ def PSr_Int : PSI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src),
+ !strconcat(OpcodeStr, "ps {$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V4F32Int VR128:$src))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Vector intrinsic operation, mem
+ def PSm_Int : PSI<opc, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
+ !strconcat(OpcodeStr, "ps {$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V4F32Int (load addr:$src)))]>;
+}
+
+// Square root.
+defm SQRT : sse1_fp_unop_rm<0x51, "sqrt", fsqrt,
+ int_x86_sse_sqrt_ss, int_x86_sse_sqrt_ps>;
+
+// Reciprocal approximations. Note that these typically require refinement
+// in order to obtain suitable precision.
+defm RSQRT : sse1_fp_unop_rm<0x52, "rsqrt", X86frsqrt,
+ int_x86_sse_rsqrt_ss, int_x86_sse_rsqrt_ps>;
+defm RCP : sse1_fp_unop_rm<0x53, "rcp", X86frcp,
+ int_x86_sse_rcp_ss, int_x86_sse_rcp_ps>;
+
+// Logical
+let isTwoAddress = 1 in {
+ let isCommutable = 1 in {
+ def ANDPSrr : PSI<0x54, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "andps {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v2i64
+ (and VR128:$src1, VR128:$src2)))]>;
+ def ORPSrr : PSI<0x56, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "orps {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v2i64
+ (or VR128:$src1, VR128:$src2)))]>;
+ def XORPSrr : PSI<0x57, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "xorps {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v2i64
+ (xor VR128:$src1, VR128:$src2)))]>;
+ }
+
+ def ANDPSrm : PSI<0x54, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, f128mem:$src2),
+ "andps {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (and VR128:$src1,
+ (bc_v2i64 (loadv4f32 addr:$src2))))]>;
+ def ORPSrm : PSI<0x56, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, f128mem:$src2),
+ "orps {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (or VR128:$src1,
+ (bc_v2i64 (loadv4f32 addr:$src2))))]>;
+ def XORPSrm : PSI<0x57, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, f128mem:$src2),
+ "xorps {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (xor VR128:$src1,
+ (bc_v2i64 (loadv4f32 addr:$src2))))]>;
+ def ANDNPSrr : PSI<0x55, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "andnps {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (and (xor VR128:$src1,
+ (bc_v2i64 (v4i32 immAllOnesV))),
+ VR128:$src2)))]>;
+ def ANDNPSrm : PSI<0x55, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1,f128mem:$src2),
+ "andnps {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (and (xor VR128:$src1,
+ (bc_v2i64 (v4i32 immAllOnesV))),
+ (bc_v2i64 (loadv4f32 addr:$src2)))))]>;
+}
+
+let isTwoAddress = 1 in {
+ def CMPPSrri : PSIi8<0xC2, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
+ "cmp${cc}ps {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
+ VR128:$src, imm:$cc))]>;
+ def CMPPSrmi : PSIi8<0xC2, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
+ "cmp${cc}ps {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
+ (load addr:$src), imm:$cc))]>;
+}
+
+// Shuffle and unpack instructions
+let isTwoAddress = 1 in {
+ let isConvertibleToThreeAddress = 1 in // Convert to pshufd
+ def SHUFPSrri : PSIi8<0xC6, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1,
+ VR128:$src2, i32i8imm:$src3),
+ "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst,
+ (v4f32 (vector_shuffle
+ VR128:$src1, VR128:$src2,
+ SHUFP_shuffle_mask:$src3)))]>;
+ def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1,
+ f128mem:$src2, i32i8imm:$src3),
+ "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst,
+ (v4f32 (vector_shuffle
+ VR128:$src1, (load addr:$src2),
+ SHUFP_shuffle_mask:$src3)))]>;
+
+ let AddedComplexity = 10 in {
+ def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "unpckhps {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (vector_shuffle
+ VR128:$src1, VR128:$src2,
+ UNPCKH_shuffle_mask)))]>;
+ def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, f128mem:$src2),
+ "unpckhps {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (vector_shuffle
+ VR128:$src1, (load addr:$src2),
+ UNPCKH_shuffle_mask)))]>;
+
+ def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "unpcklps {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (vector_shuffle
+ VR128:$src1, VR128:$src2,
+ UNPCKL_shuffle_mask)))]>;
+ def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, f128mem:$src2),
+ "unpcklps {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (vector_shuffle
+ VR128:$src1, (load addr:$src2),
+ UNPCKL_shuffle_mask)))]>;
+ } // AddedComplexity
+} // isTwoAddress
+
+// Mask creation
+def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (ops GR32:$dst, VR128:$src),
+ "movmskps {$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse_movmsk_ps VR128:$src))]>;
+def MOVMSKPDrr : PSI<0x50, MRMSrcReg, (ops GR32:$dst, VR128:$src),
+ "movmskpd {$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse2_movmsk_pd VR128:$src))]>;
+
+// Prefetching loads.
+// TODO: no intrinsics for these?
+def PREFETCHT0 : PSI<0x18, MRM1m, (ops i8mem:$src), "prefetcht0 $src", []>;
+def PREFETCHT1 : PSI<0x18, MRM2m, (ops i8mem:$src), "prefetcht1 $src", []>;
+def PREFETCHT2 : PSI<0x18, MRM3m, (ops i8mem:$src), "prefetcht2 $src", []>;
+def PREFETCHNTA : PSI<0x18, MRM0m, (ops i8mem:$src), "prefetchnta $src", []>;
+
+// Non-temporal stores
+def MOVNTPSmr : PSI<0x2B, MRMDestMem, (ops i128mem:$dst, VR128:$src),
+ "movntps {$src, $dst|$dst, $src}",
+ [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>;
+
+// Load, store, and memory fence
+def SFENCE : PSI<0xAE, MRM7m, (ops), "sfence", [(int_x86_sse_sfence)]>;
+
+// MXCSR register
+def LDMXCSR : PSI<0xAE, MRM2m, (ops i32mem:$src),
+ "ldmxcsr $src", [(int_x86_sse_ldmxcsr addr:$src)]>;
+def STMXCSR : PSI<0xAE, MRM3m, (ops i32mem:$dst),
+ "stmxcsr $dst", [(int_x86_sse_stmxcsr addr:$dst)]>;
+
+// Alias instructions that map zero vector to pxor / xorp* for sse.
+// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
+let isReMaterializable = 1 in
+def V_SET0 : PSI<0x57, MRMInitReg, (ops VR128:$dst),
+ "xorps $dst, $dst",
+ [(set VR128:$dst, (v4f32 immAllZerosV))]>;
+
+// FR32 to 128-bit vector conversion.
+def MOVSS2PSrr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, FR32:$src),
+ "movss {$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4f32 (scalar_to_vector FR32:$src)))]>;
+def MOVSS2PSrm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
+ "movss {$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>;
+
+// FIXME: may not be able to eliminate this movss with coalescing the src and
+// dest register classes are different. We really want to write this pattern
+// like this:
+// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+// (f32 FR32:$src)>;
+def MOVPS2SSrr : SSI<0x10, MRMSrcReg, (ops FR32:$dst, VR128:$src),
+ "movss {$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (vector_extract (v4f32 VR128:$src),
+ (iPTR 0)))]>;
+def MOVPS2SSmr : SSI<0x11, MRMDestMem, (ops f32mem:$dst, VR128:$src),
+ "movss {$src, $dst|$dst, $src}",
+ [(store (f32 (vector_extract (v4f32 VR128:$src),
+ (iPTR 0))), addr:$dst)]>;
+
+
+// Move to lower bits of a VR128, leaving upper bits alone.
+// Three operand (but two address) aliases.
+let isTwoAddress = 1 in {
+ def MOVLSS2PSrr : SSI<0x10, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, FR32:$src2),
+ "movss {$src2, $dst|$dst, $src2}", []>;
+
+ let AddedComplexity = 15 in
+ def MOVLPSrr : SSI<0x10, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "movss {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
+ MOVL_shuffle_mask)))]>;
+}
+
+// Move to lower bits of a VR128 and zeroing upper bits.
+// Loading from memory automatically zeroing upper bits.
+let AddedComplexity = 20 in
+def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
+ "movss {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v4f32 (vector_shuffle immAllZerosV,
+ (v4f32 (scalar_to_vector (loadf32 addr:$src))),
+ MOVL_shuffle_mask)))]>;
+
+
+//===----------------------------------------------------------------------===//
+// SSE2 Instructions
+//===----------------------------------------------------------------------===//
+
+// SSE2 Instruction Templates:
+//
+// SDI - SSE2 instructions with XD prefix.
+// PDI - SSE2 instructions with TB and OpSize prefixes.
+// PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
+
+class SDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
+ : I<o, F, ops, asm, pattern>, XD, Requires<[HasSSE2]>;
+class PDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
+ : I<o, F, ops, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
+class PDIi8<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
+ : Ii8<o, F, ops, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
+
+// Move Instructions
+def MOVSDrr : SDI<0x10, MRMSrcReg, (ops FR64:$dst, FR64:$src),
+ "movsd {$src, $dst|$dst, $src}", []>;
+def MOVSDrm : SDI<0x10, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
+ "movsd {$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (loadf64 addr:$src))]>;
+def MOVSDmr : SDI<0x11, MRMDestMem, (ops f64mem:$dst, FR64:$src),
+ "movsd {$src, $dst|$dst, $src}",
+ [(store FR64:$src, addr:$dst)]>;
+
+// Conversion instructions
+def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (ops GR32:$dst, FR64:$src),
+ "cvttsd2si {$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (fp_to_sint FR64:$src))]>;
+def CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (ops GR32:$dst, f64mem:$src),
+ "cvttsd2si {$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
+def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (ops FR32:$dst, FR64:$src),
+ "cvtsd2ss {$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (fround FR64:$src))]>;
+def CVTSD2SSrm : SDI<0x5A, MRMSrcMem, (ops FR32:$dst, f64mem:$src),
+ "cvtsd2ss {$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (fround (loadf64 addr:$src)))]>;
+def CVTSI2SDrr : SDI<0x2A, MRMSrcReg, (ops FR64:$dst, GR32:$src),
+ "cvtsi2sd {$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (sint_to_fp GR32:$src))]>;
+def CVTSI2SDrm : SDI<0x2A, MRMSrcMem, (ops FR64:$dst, i32mem:$src),
+ "cvtsi2sd {$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
+
+// SSE2 instructions with XS prefix
+def CVTSS2SDrr : I<0x5A, MRMSrcReg, (ops FR64:$dst, FR32:$src),
+ "cvtss2sd {$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (fextend FR32:$src))]>, XS,
+ Requires<[HasSSE2]>;
+def CVTSS2SDrm : I<0x5A, MRMSrcMem, (ops FR64:$dst, f32mem:$src),
+ "cvtss2sd {$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (extloadf32 addr:$src))]>, XS,
+ Requires<[HasSSE2]>;
+
+// Match intrinsics which expect XMM operand(s).
+def Int_CVTSD2SIrr : SDI<0x2D, MRMSrcReg, (ops GR32:$dst, VR128:$src),
+ "cvtsd2si {$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse2_cvtsd2si VR128:$src))]>;
+def Int_CVTSD2SIrm : SDI<0x2D, MRMSrcMem, (ops GR32:$dst, f128mem:$src),
+ "cvtsd2si {$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse2_cvtsd2si
+ (load addr:$src)))]>;
+
+// Aliases for intrinsics
+def Int_CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (ops GR32:$dst, VR128:$src),
+ "cvttsd2si {$src, $dst|$dst, $src}",
+ [(set GR32:$dst,
+ (int_x86_sse2_cvttsd2si VR128:$src))]>;
+def Int_CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (ops GR32:$dst, f128mem:$src),
+ "cvttsd2si {$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse2_cvttsd2si
+ (load addr:$src)))]>;
+
+// Comparison instructions
+let isTwoAddress = 1 in {
+ def CMPSDrr : SDI<0xC2, MRMSrcReg,
+ (ops FR64:$dst, FR64:$src1, FR64:$src, SSECC:$cc),
+ "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
+ def CMPSDrm : SDI<0xC2, MRMSrcMem,
+ (ops FR64:$dst, FR64:$src1, f64mem:$src, SSECC:$cc),
+ "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
+}
+
+def UCOMISDrr: PDI<0x2E, MRMSrcReg, (ops FR64:$src1, FR64:$src2),
+ "ucomisd {$src2, $src1|$src1, $src2}",
+ [(X86cmp FR64:$src1, FR64:$src2)]>;
+def UCOMISDrm: PDI<0x2E, MRMSrcMem, (ops FR64:$src1, f64mem:$src2),
+ "ucomisd {$src2, $src1|$src1, $src2}",
+ [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>;
+
+// Aliases to match intrinsics which expect XMM operand(s).
+let isTwoAddress = 1 in {
+ def Int_CMPSDrr : SDI<0xC2, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
+ "cmp${cc}sd {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
+ VR128:$src, imm:$cc))]>;
+ def Int_CMPSDrm : SDI<0xC2, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, f64mem:$src, SSECC:$cc),
+ "cmp${cc}sd {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
+ (load addr:$src), imm:$cc))]>;
+}
+
+def Int_UCOMISDrr: PDI<0x2E, MRMSrcReg, (ops VR128:$src1, VR128:$src2),
+ "ucomisd {$src2, $src1|$src1, $src2}",
+ [(X86ucomi (v2f64 VR128:$src1), (v2f64 VR128:$src2))]>;
+def Int_UCOMISDrm: PDI<0x2E, MRMSrcMem, (ops VR128:$src1, f128mem:$src2),
+ "ucomisd {$src2, $src1|$src1, $src2}",
+ [(X86ucomi (v2f64 VR128:$src1), (load addr:$src2))]>;
+
+def Int_COMISDrr: PDI<0x2F, MRMSrcReg, (ops VR128:$src1, VR128:$src2),
+ "comisd {$src2, $src1|$src1, $src2}",
+ [(X86comi (v2f64 VR128:$src1), (v2f64 VR128:$src2))]>;
+def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (ops VR128:$src1, f128mem:$src2),
+ "comisd {$src2, $src1|$src1, $src2}",
+ [(X86comi (v2f64 VR128:$src1), (load addr:$src2))]>;
+
+// Aliases of packed SSE2 instructions for scalar use. These all have names that
+// start with 'Fs'.
+
+// Alias instructions that map fld0 to pxor for sse.
+def FsFLD0SD : I<0xEF, MRMInitReg, (ops FR64:$dst),
+ "pxor $dst, $dst", [(set FR64:$dst, fpimm0)]>,
+ Requires<[HasSSE2]>, TB, OpSize;
+
+// Alias instruction to do FR64 reg-to-reg copy using movapd. Upper bits are
+// disregarded.
+def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (ops FR64:$dst, FR64:$src),
+ "movapd {$src, $dst|$dst, $src}", []>;
+
+// Alias instruction to load FR64 from f128mem using movapd. Upper bits are
+// disregarded.
+def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src),
+ "movapd {$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (X86loadpf64 addr:$src))]>;
+
+// Alias bitwise logical operations using SSE logical ops on packed FP values.
+let isTwoAddress = 1 in {
+let isCommutable = 1 in {
+ def FsANDPDrr : PDI<0x54, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
+ "andpd {$src2, $dst|$dst, $src2}",
+ [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>;
+ def FsORPDrr : PDI<0x56, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
+ "orpd {$src2, $dst|$dst, $src2}",
+ [(set FR64:$dst, (X86for FR64:$src1, FR64:$src2))]>;
+ def FsXORPDrr : PDI<0x57, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
+ "xorpd {$src2, $dst|$dst, $src2}",
+ [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>;
+}
+
+def FsANDPDrm : PDI<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
+ "andpd {$src2, $dst|$dst, $src2}",
+ [(set FR64:$dst, (X86fand FR64:$src1,
+ (X86loadpf64 addr:$src2)))]>;
+def FsORPDrm : PDI<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
+ "orpd {$src2, $dst|$dst, $src2}",
+ [(set FR64:$dst, (X86for FR64:$src1,
+ (X86loadpf64 addr:$src2)))]>;
+def FsXORPDrm : PDI<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
+ "xorpd {$src2, $dst|$dst, $src2}",
+ [(set FR64:$dst, (X86fxor FR64:$src1,
+ (X86loadpf64 addr:$src2)))]>;
+
+def FsANDNPDrr : PDI<0x55, MRMSrcReg,
+ (ops FR64:$dst, FR64:$src1, FR64:$src2),
+ "andnpd {$src2, $dst|$dst, $src2}", []>;
+def FsANDNPDrm : PDI<0x55, MRMSrcMem,
+ (ops FR64:$dst, FR64:$src1, f128mem:$src2),
+ "andnpd {$src2, $dst|$dst, $src2}", []>;
+}
+
+/// basic_sse2_fp_binop_rm - SSE2 binops come in both scalar and vector forms.
+///
+/// In addition, we also have a special variant of the scalar form here to
+/// represent the associated intrinsic operation. This form is unlike the
+/// plain scalar form, in that it takes an entire vector (instead of a scalar)
+/// and leaves the top elements undefined.
+///
+/// These three forms can each be reg+reg or reg+mem, so there are a total of
+/// six "instructions".
+///
+let isTwoAddress = 1 in {
+multiclass basic_sse2_fp_binop_rm<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, Intrinsic F64Int,
+ bit Commutable = 0> {
+ // Scalar operation, reg+reg.
+ def SDrr : SDI<opc, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
+ !strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2}"),
+ [(set FR64:$dst, (OpNode FR64:$src1, FR64:$src2))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Scalar operation, reg+mem.
+ def SDrm : SDI<opc, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
+ !strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2}"),
+ [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
+
+ // Vector operation, reg+reg.
+ def PDrr : PDI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "pd {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Vector operation, reg+mem.
+ def PDrm : PDI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "pd {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (OpNode VR128:$src1, (loadv2f64 addr:$src2)))]>;
+
+ // Intrinsic operation, reg+reg.
+ def SDrr_Int : SDI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Intrinsic operation, reg+mem.
+ def SDrm_Int : SDI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, sdmem:$src2),
+ !strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (F64Int VR128:$src1,
+ sse_load_f64:$src2))]>;
+}
+}
+
+// Arithmetic instructions
+defm ADD : basic_sse2_fp_binop_rm<0x58, "add", fadd, int_x86_sse2_add_sd, 1>;
+defm MUL : basic_sse2_fp_binop_rm<0x59, "mul", fmul, int_x86_sse2_mul_sd, 1>;
+defm SUB : basic_sse2_fp_binop_rm<0x5C, "sub", fsub, int_x86_sse2_sub_sd>;
+defm DIV : basic_sse2_fp_binop_rm<0x5E, "div", fdiv, int_x86_sse2_div_sd>;
+
+/// sse2_fp_binop_rm - Other SSE2 binops
+///
+/// This multiclass is like basic_sse2_fp_binop_rm, with the addition of
+/// instructions for a full-vector intrinsic form. Operations that map
+/// onto C operators don't use this form since they just use the plain
+/// vector form instead of having a separate vector intrinsic form.
+///
+/// This provides a total of eight "instructions".
+///
+let isTwoAddress = 1 in {
+multiclass sse2_fp_binop_rm<bits<8> opc, string OpcodeStr,
+ SDNode OpNode,
+ Intrinsic F64Int,
+ Intrinsic V2F64Int,
+ bit Commutable = 0> {
+
+ // Scalar operation, reg+reg.
+ def SDrr : SDI<opc, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
+ !strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2}"),
+ [(set FR64:$dst, (OpNode FR64:$src1, FR64:$src2))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Scalar operation, reg+mem.
+ def SDrm : SDI<opc, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
+ !strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2}"),
+ [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
+
+ // Vector operation, reg+reg.
+ def PDrr : PDI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "pd {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Vector operation, reg+mem.
+ def PDrm : PDI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "pd {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (OpNode VR128:$src1, (loadv2f64 addr:$src2)))]>;
+
+ // Intrinsic operation, reg+reg.
+ def SDrr_Int : SDI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Intrinsic operation, reg+mem.
+ def SDrm_Int : SDI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, sdmem:$src2),
+ !strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (F64Int VR128:$src1,
+ sse_load_f64:$src2))]>;
+
+ // Vector intrinsic operation, reg+reg.
+ def PDrr_Int : PDI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "pd {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (V2F64Int VR128:$src1, VR128:$src2))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Vector intrinsic operation, reg+mem.
+ def PDrm_Int : PDI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
+ !strconcat(OpcodeStr, "pd {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (V2F64Int VR128:$src1, (load addr:$src2)))]>;
+}
+}
+
+defm MAX : sse2_fp_binop_rm<0x5F, "max", X86fmax,
+ int_x86_sse2_max_sd, int_x86_sse2_max_pd>;
+defm MIN : sse2_fp_binop_rm<0x5D, "min", X86fmin,
+ int_x86_sse2_min_sd, int_x86_sse2_min_pd>;
+
+//===----------------------------------------------------------------------===//
+// SSE packed FP Instructions
+
+// Move Instructions
+def MOVAPDrr : PDI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src),
+ "movapd {$src, $dst|$dst, $src}", []>;
+def MOVAPDrm : PDI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
+ "movapd {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (loadv2f64 addr:$src))]>;
+
+def MOVAPDmr : PDI<0x29, MRMDestMem, (ops f128mem:$dst, VR128:$src),
+ "movapd {$src, $dst|$dst, $src}",
+ [(store (v2f64 VR128:$src), addr:$dst)]>;
+
+def MOVUPDrr : PDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src),
+ "movupd {$src, $dst|$dst, $src}", []>;
+def MOVUPDrm : PDI<0x10, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
+ "movupd {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>;
+def MOVUPDmr : PDI<0x11, MRMDestMem, (ops f128mem:$dst, VR128:$src),
+ "movupd {$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>;
+
+let isTwoAddress = 1 in {
+ let AddedComplexity = 20 in {
+ def MOVLPDrm : PDI<0x12, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, f64mem:$src2),
+ "movlpd {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2f64 (vector_shuffle VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)),
+ MOVLP_shuffle_mask)))]>;
+ def MOVHPDrm : PDI<0x16, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, f64mem:$src2),
+ "movhpd {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2f64 (vector_shuffle VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)),
+ MOVHP_shuffle_mask)))]>;
+ } // AddedComplexity
+} // isTwoAddress
+
+def MOVLPDmr : PDI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src),
+ "movlpd {$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (v2f64 VR128:$src),
+ (iPTR 0))), addr:$dst)]>;
+
+// v2f64 extract element 1 is always custom lowered to unpack high to low
+// and extract element 0 so the non-store version isn't too horrible.
+def MOVHPDmr : PDI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src),
+ "movhpd {$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (v2f64 (vector_shuffle VR128:$src, (undef),
+ UNPCKH_shuffle_mask)), (iPTR 0))),
+ addr:$dst)]>;
+
+// SSE2 instructions without OpSize prefix
+def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
+ "cvtdq2ps {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
+ TB, Requires<[HasSSE2]>;
+def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
+ "cvtdq2ps {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
+ (bitconvert (loadv2i64 addr:$src))))]>,
+ TB, Requires<[HasSSE2]>;
+
+// SSE2 instructions with XS prefix
+def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (ops VR128:$dst, VR128:$src),
+ "cvtdq2pd {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
+ XS, Requires<[HasSSE2]>;
+def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
+ "cvtdq2pd {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
+ (bitconvert (loadv2i64 addr:$src))))]>,
+ XS, Requires<[HasSSE2]>;
+
+def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
+ "cvtps2dq {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>;
+def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
+ "cvtps2dq {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2dq
+ (load addr:$src)))]>;
+// SSE2 packed instructions with XS prefix
+def Int_CVTTPS2DQrr : I<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
+ "cvttps2dq {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))]>,
+ XS, Requires<[HasSSE2]>;
+def Int_CVTTPS2DQrm : I<0x5B, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
+ "cvttps2dq {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttps2dq
+ (load addr:$src)))]>,
+ XS, Requires<[HasSSE2]>;
+
+// SSE2 packed instructions with XD prefix
+def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (ops VR128:$dst, VR128:$src),
+ "cvtpd2dq {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
+ XD, Requires<[HasSSE2]>;
+def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
+ "cvtpd2dq {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
+ (load addr:$src)))]>,
+ XD, Requires<[HasSSE2]>;
+
+def Int_CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (ops VR128:$dst, VR128:$src),
+ "cvttpd2dq {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
+def Int_CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
+ "cvttpd2dq {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
+ (load addr:$src)))]>;
+
+// SSE2 instructions without OpSize prefix
+def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
+ "cvtps2pd {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
+ TB, Requires<[HasSSE2]>;
+def Int_CVTPS2PDrm : I<0x5A, MRMSrcReg, (ops VR128:$dst, f64mem:$src),
+ "cvtps2pd {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2pd
+ (load addr:$src)))]>,
+ TB, Requires<[HasSSE2]>;
+
+def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
+ "cvtpd2ps {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
+def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, f128mem:$src),
+ "cvtpd2ps {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
+ (load addr:$src)))]>;
+
+// Match intrinsics which expect XMM operand(s).
+// Aliases for intrinsics
+let isTwoAddress = 1 in {
+def Int_CVTSI2SDrr: SDI<0x2A, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, GR32:$src2),
+ "cvtsi2sd {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1,
+ GR32:$src2))]>;
+def Int_CVTSI2SDrm: SDI<0x2A, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, i32mem:$src2),
+ "cvtsi2sd {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1,
+ (loadi32 addr:$src2)))]>;
+def Int_CVTSD2SSrr: SDI<0x5A, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "cvtsd2ss {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
+ VR128:$src2))]>;
+def Int_CVTSD2SSrm: SDI<0x5A, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, f64mem:$src2),
+ "cvtsd2ss {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
+ (load addr:$src2)))]>;
+def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "cvtss2sd {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+ VR128:$src2))]>, XS,
+ Requires<[HasSSE2]>;
+def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, f32mem:$src2),
+ "cvtss2sd {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+ (load addr:$src2)))]>, XS,
+ Requires<[HasSSE2]>;
+}
+
+// Arithmetic
+
+/// sse2_fp_unop_rm - SSE2 unops come in both scalar and vector forms.
+///
+/// In addition, we also have a special variant of the scalar form here to
+/// represent the associated intrinsic operation. This form is unlike the
+/// plain scalar form, in that it takes an entire vector (instead of a
+/// scalar) and leaves the top elements undefined.
+///
+/// And, we have a special variant form for a full-vector intrinsic form.
+///
+/// These four forms can each have a reg or a mem operand, so there are a
+/// total of eight "instructions".
+///
+multiclass sse2_fp_unop_rm<bits<8> opc, string OpcodeStr,
+ SDNode OpNode,
+ Intrinsic F64Int,
+ Intrinsic V2F64Int,
+ bit Commutable = 0> {
+ // Scalar operation, reg.
+ def SDr : SDI<opc, MRMSrcReg, (ops FR64:$dst, FR64:$src),
+ !strconcat(OpcodeStr, "sd {$src, $dst|$dst, $src}"),
+ [(set FR64:$dst, (OpNode FR64:$src))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Scalar operation, mem.
+ def SDm : SDI<opc, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
+ !strconcat(OpcodeStr, "sd {$src, $dst|$dst, $src}"),
+ [(set FR64:$dst, (OpNode (load addr:$src)))]>;
+
+ // Vector operation, reg.
+ def PDr : PDI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src),
+ !strconcat(OpcodeStr, "pd {$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Vector operation, mem.
+ def PDm : PDI<opc, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
+ !strconcat(OpcodeStr, "pd {$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (OpNode (loadv2f64 addr:$src)))]>;
+
+ // Intrinsic operation, reg.
+ def SDr_Int : SDI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src),
+ !strconcat(OpcodeStr, "sd {$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F64Int VR128:$src))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Intrinsic operation, mem.
+ def SDm_Int : SDI<opc, MRMSrcMem, (ops VR128:$dst, sdmem:$src),
+ !strconcat(OpcodeStr, "sd {$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F64Int sse_load_f64:$src))]>;
+
+ // Vector intrinsic operation, reg
+ def PDr_Int : PDI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src),
+ !strconcat(OpcodeStr, "pd {$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V2F64Int VR128:$src))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Vector intrinsic operation, mem
+ def PDm_Int : PDI<opc, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
+ !strconcat(OpcodeStr, "pd {$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V2F64Int (load addr:$src)))]>;
+}
+
+// Square root.
+defm SQRT : sse2_fp_unop_rm<0x51, "sqrt", fsqrt,
+ int_x86_sse2_sqrt_sd, int_x86_sse2_sqrt_pd>;
+
+// There is no f64 version of the reciprocal approximation instructions.
+
+// Logical
+let isTwoAddress = 1 in {
+ let isCommutable = 1 in {
+ def ANDPDrr : PDI<0x54, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "andpd {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (and (bc_v2i64 (v2f64 VR128:$src1)),
+ (bc_v2i64 (v2f64 VR128:$src2))))]>;
+ def ORPDrr : PDI<0x56, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "orpd {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (or (bc_v2i64 (v2f64 VR128:$src1)),
+ (bc_v2i64 (v2f64 VR128:$src2))))]>;
+ def XORPDrr : PDI<0x57, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "xorpd {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (xor (bc_v2i64 (v2f64 VR128:$src1)),
+ (bc_v2i64 (v2f64 VR128:$src2))))]>;
+ }
+
+ def ANDPDrm : PDI<0x54, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, f128mem:$src2),
+ "andpd {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (and (bc_v2i64 (v2f64 VR128:$src1)),
+ (bc_v2i64 (loadv2f64 addr:$src2))))]>;
+ def ORPDrm : PDI<0x56, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, f128mem:$src2),
+ "orpd {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (or (bc_v2i64 (v2f64 VR128:$src1)),
+ (bc_v2i64 (loadv2f64 addr:$src2))))]>;
+ def XORPDrm : PDI<0x57, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, f128mem:$src2),
+ "xorpd {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (xor (bc_v2i64 (v2f64 VR128:$src1)),
+ (bc_v2i64 (loadv2f64 addr:$src2))))]>;
+ def ANDNPDrr : PDI<0x55, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "andnpd {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
+ (bc_v2i64 (v2f64 VR128:$src2))))]>;
+ def ANDNPDrm : PDI<0x55, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1,f128mem:$src2),
+ "andnpd {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
+ (bc_v2i64 (loadv2f64 addr:$src2))))]>;
+}
+
+let isTwoAddress = 1 in {
+ def CMPPDrri : PDIi8<0xC2, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src, SSECC:$cc),
+ "cmp${cc}pd {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
+ VR128:$src, imm:$cc))]>;
+ def CMPPDrmi : PDIi8<0xC2, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, f128mem:$src, SSECC:$cc),
+ "cmp${cc}pd {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
+ (load addr:$src), imm:$cc))]>;
+}
+
+// Shuffle and unpack instructions
+let isTwoAddress = 1 in {
+ def SHUFPDrri : PDIi8<0xC6, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3),
+ "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst, (v2f64 (vector_shuffle
+ VR128:$src1, VR128:$src2,
+ SHUFP_shuffle_mask:$src3)))]>;
+ def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1,
+ f128mem:$src2, i8imm:$src3),
+ "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst,
+ (v2f64 (vector_shuffle
+ VR128:$src1, (load addr:$src2),
+ SHUFP_shuffle_mask:$src3)))]>;
+
+ let AddedComplexity = 10 in {
+ def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "unpckhpd {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2f64 (vector_shuffle
+ VR128:$src1, VR128:$src2,
+ UNPCKH_shuffle_mask)))]>;
+ def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, f128mem:$src2),
+ "unpckhpd {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2f64 (vector_shuffle
+ VR128:$src1, (load addr:$src2),
+ UNPCKH_shuffle_mask)))]>;
+
+ def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "unpcklpd {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2f64 (vector_shuffle
+ VR128:$src1, VR128:$src2,
+ UNPCKL_shuffle_mask)))]>;
+ def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, f128mem:$src2),
+ "unpcklpd {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2f64 (vector_shuffle
+ VR128:$src1, (load addr:$src2),
+ UNPCKL_shuffle_mask)))]>;
+ } // AddedComplexity
+} // isTwoAddress
+
+
+//===----------------------------------------------------------------------===//
+// SSE integer instructions
+
+// Move Instructions
+def MOVDQArr : PDI<0x6F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
+ "movdqa {$src, $dst|$dst, $src}", []>;
+def MOVDQArm : PDI<0x6F, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
+ "movdqa {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (loadv2i64 addr:$src))]>;
+def MOVDQAmr : PDI<0x7F, MRMDestMem, (ops i128mem:$dst, VR128:$src),
+ "movdqa {$src, $dst|$dst, $src}",
+ [(store (v2i64 VR128:$src), addr:$dst)]>;
+def MOVDQUrm : I<0x6F, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
+ "movdqu {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>,
+ XS, Requires<[HasSSE2]>;
+def MOVDQUmr : I<0x7F, MRMDestMem, (ops i128mem:$dst, VR128:$src),
+ "movdqu {$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
+ XS, Requires<[HasSSE2]>;
+
+
+let isTwoAddress = 1 in {
+
+multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
+ bit Commutable = 0> {
+ def rr : PDI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]> {
+ let isCommutable = Commutable;
+ }
+ def rm : PDI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId VR128:$src1,
+ (bitconvert (loadv2i64 addr:$src2))))]>;
+}
+
+multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
+ string OpcodeStr, Intrinsic IntId> {
+ def rr : PDI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
+ def rm : PDI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId VR128:$src1,
+ (bitconvert (loadv2i64 addr:$src2))))]>;
+ def ri : PDIi8<opc2, ImmForm, (ops VR128:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId VR128:$src1,
+ (scalar_to_vector (i32 imm:$src2))))]>;
+}
+
+
+/// PDI_binop_rm - Simple SSE2 binary operator.
+multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, bit Commutable = 0> {
+ def rr : PDI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]> {
+ let isCommutable = Commutable;
+ }
+ def rm : PDI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (OpVT (OpNode VR128:$src1,
+ (bitconvert (loadv2i64 addr:$src2)))))]>;
+}
+
+/// PDI_binop_rm_v2i64 - Simple SSE2 binary operator whose type is v2i64.
+///
+/// FIXME: we could eliminate this and use PDI_binop_rm instead if tblgen knew
+/// to collapse (bitconvert VT to VT) into its operand.
+///
+multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ bit Commutable = 0> {
+ def rr : PDI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]> {
+ let isCommutable = Commutable;
+ }
+ def rm : PDI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (OpNode VR128:$src1,(loadv2i64 addr:$src2)))]>;
+}
+
+} // isTwoAddress
+
+// 128-bit Integer Arithmetic
+
+defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>;
+defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>;
+defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>;
+defm PADDQ : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>;
+
+defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>;
+defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>;
+defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>;
+defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>;
+
+defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8>;
+defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16>;
+defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32>;
+defm PSUBQ : PDI_binop_rm_v2i64<0xFB, "psubq", sub>;
+
+defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b>;
+defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w>;
+defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b>;
+defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w>;
+
+defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>;
+
+defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, 1>;
+defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w , 1>;
+defm PMULUDQ : PDI_binop_rm_int<0xF4, "pmuludq", int_x86_sse2_pmulu_dq, 1>;
+
+defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 1>;
+
+defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>;
+defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>;
+
+
+defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>;
+defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>;
+defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>;
+defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>;
+defm PSADBW : PDI_binop_rm_int<0xE0, "psadbw", int_x86_sse2_psad_bw, 1>;
+
+
+defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw", int_x86_sse2_psll_w>;
+defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld", int_x86_sse2_psll_d>;
+defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq", int_x86_sse2_psll_q>;
+
+defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw", int_x86_sse2_psrl_w>;
+defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld", int_x86_sse2_psrl_d>;
+defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq", int_x86_sse2_psrl_q>;
+
+defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", int_x86_sse2_psra_w>;
+defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", int_x86_sse2_psra_d>;
+// PSRAQ doesn't exist in SSE[1-3].
+
+// 128-bit logical shifts.
+let isTwoAddress = 1 in {
+ def PSLLDQri : PDIi8<0x73, MRM7r,
+ (ops VR128:$dst, VR128:$src1, i32i8imm:$src2),
+ "pslldq {$src2, $dst|$dst, $src2}", []>;
+ def PSRLDQri : PDIi8<0x73, MRM3r,
+ (ops VR128:$dst, VR128:$src1, i32i8imm:$src2),
+ "psrldq {$src2, $dst|$dst, $src2}", []>;
+ // PSRADQri doesn't exist in SSE[1-3].
+}
+
+let Predicates = [HasSSE2] in {
+ def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
+ (v2i64 (PSLLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
+ def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
+ (v2i64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
+ def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
+ (v2f64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
+}
+
+// Logical
+defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>;
+defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or , 1>;
+defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>;
+
+let isTwoAddress = 1 in {
+ def PANDNrr : PDI<0xDF, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "pandn {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
+ VR128:$src2)))]>;
+
+ def PANDNrm : PDI<0xDF, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+ "pandn {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
+ (load addr:$src2))))]>;
+}
+
+// SSE2 Integer comparison
+defm PCMPEQB : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b>;
+defm PCMPEQW : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w>;
+defm PCMPEQD : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d>;
+defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>;
+defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>;
+defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>;
+
+// Pack instructions
+defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128>;
+defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>;
+defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>;
+
+// Shuffle and unpack instructions
+def PSHUFDri : PDIi8<0x70, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, i8imm:$src2),
+ "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v4i32 (vector_shuffle
+ VR128:$src1, (undef),
+ PSHUFD_shuffle_mask:$src2)))]>;
+def PSHUFDmi : PDIi8<0x70, MRMSrcMem,
+ (ops VR128:$dst, i128mem:$src1, i8imm:$src2),
+ "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v4i32 (vector_shuffle
+ (bc_v4i32(loadv2i64 addr:$src1)),
+ (undef),
+ PSHUFD_shuffle_mask:$src2)))]>;
+
+// SSE2 with ImmT == Imm8 and XS prefix.
+def PSHUFHWri : Ii8<0x70, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, i8imm:$src2),
+ "pshufhw {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v8i16 (vector_shuffle
+ VR128:$src1, (undef),
+ PSHUFHW_shuffle_mask:$src2)))]>,
+ XS, Requires<[HasSSE2]>;
+def PSHUFHWmi : Ii8<0x70, MRMSrcMem,
+ (ops VR128:$dst, i128mem:$src1, i8imm:$src2),
+ "pshufhw {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v8i16 (vector_shuffle
+ (bc_v8i16 (loadv2i64 addr:$src1)),
+ (undef),
+ PSHUFHW_shuffle_mask:$src2)))]>,
+ XS, Requires<[HasSSE2]>;
+
+// SSE2 with ImmT == Imm8 and XD prefix.
+def PSHUFLWri : Ii8<0x70, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, i32i8imm:$src2),
+ "pshuflw {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v8i16 (vector_shuffle
+ VR128:$src1, (undef),
+ PSHUFLW_shuffle_mask:$src2)))]>,
+ XD, Requires<[HasSSE2]>;
+def PSHUFLWmi : Ii8<0x70, MRMSrcMem,
+ (ops VR128:$dst, i128mem:$src1, i32i8imm:$src2),
+ "pshuflw {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v8i16 (vector_shuffle
+ (bc_v8i16 (loadv2i64 addr:$src1)),
+ (undef),
+ PSHUFLW_shuffle_mask:$src2)))]>,
+ XD, Requires<[HasSSE2]>;
+
+
+let isTwoAddress = 1 in {
+ def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "punpcklbw {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
+ UNPCKL_shuffle_mask)))]>;
+ def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+ "punpcklbw {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v16i8 (vector_shuffle VR128:$src1,
+ (bc_v16i8 (loadv2i64 addr:$src2)),
+ UNPCKL_shuffle_mask)))]>;
+ def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "punpcklwd {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
+ UNPCKL_shuffle_mask)))]>;
+ def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+ "punpcklwd {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v8i16 (vector_shuffle VR128:$src1,
+ (bc_v8i16 (loadv2i64 addr:$src2)),
+ UNPCKL_shuffle_mask)))]>;
+ def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "punpckldq {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
+ UNPCKL_shuffle_mask)))]>;
+ def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+ "punpckldq {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4i32 (vector_shuffle VR128:$src1,
+ (bc_v4i32 (loadv2i64 addr:$src2)),
+ UNPCKL_shuffle_mask)))]>;
+ def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "punpcklqdq {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
+ UNPCKL_shuffle_mask)))]>;
+ def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+ "punpcklqdq {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (vector_shuffle VR128:$src1,
+ (loadv2i64 addr:$src2),
+ UNPCKL_shuffle_mask)))]>;
+
+ def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "punpckhbw {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
+ UNPCKH_shuffle_mask)))]>;
+ def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+ "punpckhbw {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v16i8 (vector_shuffle VR128:$src1,
+ (bc_v16i8 (loadv2i64 addr:$src2)),
+ UNPCKH_shuffle_mask)))]>;
+ def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "punpckhwd {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
+ UNPCKH_shuffle_mask)))]>;
+ def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+ "punpckhwd {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v8i16 (vector_shuffle VR128:$src1,
+ (bc_v8i16 (loadv2i64 addr:$src2)),
+ UNPCKH_shuffle_mask)))]>;
+ def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "punpckhdq {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
+ UNPCKH_shuffle_mask)))]>;
+ def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+ "punpckhdq {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4i32 (vector_shuffle VR128:$src1,
+ (bc_v4i32 (loadv2i64 addr:$src2)),
+ UNPCKH_shuffle_mask)))]>;
+ def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "punpckhqdq {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
+ UNPCKH_shuffle_mask)))]>;
+ def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+ "punpckhqdq {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (vector_shuffle VR128:$src1,
+ (loadv2i64 addr:$src2),
+ UNPCKH_shuffle_mask)))]>;
+}
+
+// Extract / Insert
+def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
+ (ops GR32:$dst, VR128:$src1, i32i8imm:$src2),
+ "pextrw {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
+ (iPTR imm:$src2)))]>;
+let isTwoAddress = 1 in {
+ def PINSRWrri : PDIi8<0xC4, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1,
+ GR32:$src2, i32i8imm:$src3),
+ "pinsrw {$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst,
+ (v8i16 (X86pinsrw (v8i16 VR128:$src1),
+ GR32:$src2, (iPTR imm:$src3))))]>;
+ def PINSRWrmi : PDIi8<0xC4, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1,
+ i16mem:$src2, i32i8imm:$src3),
+ "pinsrw {$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst,
+ (v8i16 (X86pinsrw (v8i16 VR128:$src1),
+ (i32 (anyext (loadi16 addr:$src2))),
+ (iPTR imm:$src3))))]>;
+}
+
+// Mask creation
+def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (ops GR32:$dst, VR128:$src),
+ "pmovmskb {$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
+
+// Conditional store
+def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (ops VR128:$src, VR128:$mask),
+ "maskmovdqu {$mask, $src|$src, $mask}",
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>,
+ Imp<[EDI],[]>;
+
+// Non-temporal stores
+def MOVNTPDmr : PDI<0x2B, MRMDestMem, (ops i128mem:$dst, VR128:$src),
+ "movntpd {$src, $dst|$dst, $src}",
+ [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
+def MOVNTDQmr : PDI<0xE7, MRMDestMem, (ops f128mem:$dst, VR128:$src),
+ "movntdq {$src, $dst|$dst, $src}",
+ [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
+def MOVNTImr : I<0xC3, MRMDestMem, (ops i32mem:$dst, GR32:$src),
+ "movnti {$src, $dst|$dst, $src}",
+ [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
+ TB, Requires<[HasSSE2]>;
+
+// Flush cache
+def CLFLUSH : I<0xAE, MRM7m, (ops i8mem:$src),
+ "clflush $src", [(int_x86_sse2_clflush addr:$src)]>,
+ TB, Requires<[HasSSE2]>;
+
+// Load, store, and memory fence
+def LFENCE : I<0xAE, MRM5m, (ops),
+ "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
+def MFENCE : I<0xAE, MRM6m, (ops),
+ "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
+
+
+// Alias instructions that map zero vector to pxor / xorp* for sse.
+// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
+let isReMaterializable = 1 in
+ def V_SETALLONES : PDI<0x76, MRMInitReg, (ops VR128:$dst),
+ "pcmpeqd $dst, $dst",
+ [(set VR128:$dst, (v2f64 immAllOnesV))]>;
+
+// FR64 to 128-bit vector conversion.
+def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, FR64:$src),
+ "movsd {$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2f64 (scalar_to_vector FR64:$src)))]>;
+def MOVSD2PDrm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
+ "movsd {$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>;
+
+def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, GR32:$src),
+ "movd {$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (scalar_to_vector GR32:$src)))]>;
+def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
+ "movd {$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
+
+def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (ops FR32:$dst, GR32:$src),
+ "movd {$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert GR32:$src))]>;
+
+def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (ops FR32:$dst, i32mem:$src),
+ "movd {$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>;
+
+// SSE2 instructions with XS prefix
+def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
+ "movq {$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
+ Requires<[HasSSE2]>;
+def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (ops i64mem:$dst, VR128:$src),
+ "movq {$src, $dst|$dst, $src}",
+ [(store (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))), addr:$dst)]>;
+
+// FIXME: may not be able to eliminate this movss with coalescing the src and
+// dest register classes are different. We really want to write this pattern
+// like this:
+// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+// (f32 FR32:$src)>;
+def MOVPD2SDrr : SDI<0x10, MRMSrcReg, (ops FR64:$dst, VR128:$src),
+ "movsd {$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (vector_extract (v2f64 VR128:$src),
+ (iPTR 0)))]>;
+def MOVPD2SDmr : SDI<0x11, MRMDestMem, (ops f64mem:$dst, VR128:$src),
+ "movsd {$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (v2f64 VR128:$src),
+ (iPTR 0))), addr:$dst)]>;
+def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (ops GR32:$dst, VR128:$src),
+ "movd {$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
+ (iPTR 0)))]>;
+def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
+ "movd {$src, $dst|$dst, $src}",
+ [(store (i32 (vector_extract (v4i32 VR128:$src),
+ (iPTR 0))), addr:$dst)]>;
+
+def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (ops GR32:$dst, FR32:$src),
+ "movd {$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (bitconvert FR32:$src))]>;
+def MOVSS2DImr : PDI<0x7E, MRMDestMem, (ops i32mem:$dst, FR32:$src),
+ "movd {$src, $dst|$dst, $src}",
+ [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>;
+
+
+// Move to lower bits of a VR128, leaving upper bits alone.
+// Three operand (but two address) aliases.
+let isTwoAddress = 1 in {
+ def MOVLSD2PDrr : SDI<0x10, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, FR64:$src2),
+ "movsd {$src2, $dst|$dst, $src2}", []>;
+
+ let AddedComplexity = 15 in
+ def MOVLPDrr : SDI<0x10, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "movsd {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2f64 (vector_shuffle VR128:$src1, VR128:$src2,
+ MOVL_shuffle_mask)))]>;
+}
+
+// Store / copy lower 64-bits of a XMM register.
+def MOVLQ128mr : PDI<0xD6, MRMDestMem, (ops i64mem:$dst, VR128:$src),
+ "movq {$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>;
+
+// Move to lower bits of a VR128 and zeroing upper bits.
+// Loading from memory automatically zeroing upper bits.
+let AddedComplexity = 20 in
+ def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
+ "movsd {$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2f64 (vector_shuffle immAllZerosV,
+ (v2f64 (scalar_to_vector
+ (loadf64 addr:$src))),
+ MOVL_shuffle_mask)))]>;
+
+let AddedComplexity = 15 in
+// movd / movq to XMM register zero-extends
+def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, GR32:$src),
+ "movd {$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (vector_shuffle immAllZerosV,
+ (v4i32 (scalar_to_vector GR32:$src)),
+ MOVL_shuffle_mask)))]>;
+let AddedComplexity = 20 in
+def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
+ "movd {$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (vector_shuffle immAllZerosV,
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))),
+ MOVL_shuffle_mask)))]>;
+
+// Moving from XMM to XMM but still clear upper 64 bits.
+let AddedComplexity = 15 in
+def MOVZQI2PQIrr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR128:$src),
+ "movq {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_movl_dq VR128:$src))]>,
+ XS, Requires<[HasSSE2]>;
+let AddedComplexity = 20 in
+def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
+ "movq {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_movl_dq
+ (bitconvert (loadv2i64 addr:$src))))]>,
+ XS, Requires<[HasSSE2]>;
+
+
+//===----------------------------------------------------------------------===//
+// SSE3 Instructions
+//===----------------------------------------------------------------------===//
+
+// SSE3 Instruction Templates:
+//
+// S3I - SSE3 instructions with TB and OpSize prefixes.
+// S3SI - SSE3 instructions with XS prefix.
+// S3DI - SSE3 instructions with XD prefix.
+
+class S3SI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
+ : I<o, F, ops, asm, pattern>, XS, Requires<[HasSSE3]>;
+class S3DI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
+ : I<o, F, ops, asm, pattern>, XD, Requires<[HasSSE3]>;
+class S3I<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
+ : I<o, F, ops, asm, pattern>, TB, OpSize, Requires<[HasSSE3]>;
+
+// Move Instructions
+def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (ops VR128:$dst, VR128:$src),
+ "movshdup {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v4f32 (vector_shuffle
+ VR128:$src, (undef),
+ MOVSHDUP_shuffle_mask)))]>;
+def MOVSHDUPrm : S3SI<0x16, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
+ "movshdup {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v4f32 (vector_shuffle
+ (loadv4f32 addr:$src), (undef),
+ MOVSHDUP_shuffle_mask)))]>;
+
+def MOVSLDUPrr : S3SI<0x12, MRMSrcReg, (ops VR128:$dst, VR128:$src),
+ "movsldup {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v4f32 (vector_shuffle
+ VR128:$src, (undef),
+ MOVSLDUP_shuffle_mask)))]>;
+def MOVSLDUPrm : S3SI<0x12, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
+ "movsldup {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v4f32 (vector_shuffle
+ (loadv4f32 addr:$src), (undef),
+ MOVSLDUP_shuffle_mask)))]>;
+
+def MOVDDUPrr : S3DI<0x12, MRMSrcReg, (ops VR128:$dst, VR128:$src),
+ "movddup {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v2f64 (vector_shuffle
+ VR128:$src, (undef),
+ SSE_splat_lo_mask)))]>;
+def MOVDDUPrm : S3DI<0x12, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
+ "movddup {$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2f64 (vector_shuffle
+ (scalar_to_vector (loadf64 addr:$src)),
+ (undef),
+ SSE_splat_lo_mask)))]>;
+
+// Arithmetic
+let isTwoAddress = 1 in {
+ def ADDSUBPSrr : S3DI<0xD0, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "addsubps {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1,
+ VR128:$src2))]>;
+ def ADDSUBPSrm : S3DI<0xD0, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, f128mem:$src2),
+ "addsubps {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1,
+ (load addr:$src2)))]>;
+ def ADDSUBPDrr : S3I<0xD0, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "addsubpd {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1,
+ VR128:$src2))]>;
+ def ADDSUBPDrm : S3I<0xD0, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, f128mem:$src2),
+ "addsubpd {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1,
+ (load addr:$src2)))]>;
+}
+
+def LDDQUrm : S3DI<0xF0, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
+ "lddqu {$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
+
+// Horizontal ops
+class S3D_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
+ : S3DI<o, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (v4f32 (IntId VR128:$src1, VR128:$src2)))]>;
+class S3D_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
+ : S3DI<o, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (v4f32 (IntId VR128:$src1, (load addr:$src2))))]>;
+class S3_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
+ : S3I<o, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (v2f64 (IntId VR128:$src1, VR128:$src2)))]>;
+class S3_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
+ : S3I<o, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (v2f64 (IntId VR128:$src1, (load addr:$src2))))]>;
+
+let isTwoAddress = 1 in {
+ def HADDPSrr : S3D_Intrr<0x7C, "haddps", int_x86_sse3_hadd_ps>;
+ def HADDPSrm : S3D_Intrm<0x7C, "haddps", int_x86_sse3_hadd_ps>;
+ def HADDPDrr : S3_Intrr <0x7C, "haddpd", int_x86_sse3_hadd_pd>;
+ def HADDPDrm : S3_Intrm <0x7C, "haddpd", int_x86_sse3_hadd_pd>;
+ def HSUBPSrr : S3D_Intrr<0x7D, "hsubps", int_x86_sse3_hsub_ps>;
+ def HSUBPSrm : S3D_Intrm<0x7D, "hsubps", int_x86_sse3_hsub_ps>;
+ def HSUBPDrr : S3_Intrr <0x7D, "hsubpd", int_x86_sse3_hsub_pd>;
+ def HSUBPDrm : S3_Intrm <0x7D, "hsubpd", int_x86_sse3_hsub_pd>;
+}
+
+// Thread synchronization
+def MONITOR : I<0xC8, RawFrm, (ops), "monitor",
+ [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>;
+def MWAIT : I<0xC9, RawFrm, (ops), "mwait",
+ [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>;
+
+// vector_shuffle v1, <undef> <1, 1, 3, 3>
+let AddedComplexity = 15 in
+def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
+ MOVSHDUP_shuffle_mask)),
+ (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>;
+let AddedComplexity = 20 in
+def : Pat<(v4i32 (vector_shuffle (bc_v4i32 (loadv2i64 addr:$src)), (undef),
+ MOVSHDUP_shuffle_mask)),
+ (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>;
+
+// vector_shuffle v1, <undef> <0, 0, 2, 2>
+let AddedComplexity = 15 in
+ def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
+ MOVSLDUP_shuffle_mask)),
+ (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>;
+let AddedComplexity = 20 in
+ def : Pat<(v4i32 (vector_shuffle (bc_v4i32 (loadv2i64 addr:$src)), (undef),
+ MOVSLDUP_shuffle_mask)),
+ (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
+
+//===----------------------------------------------------------------------===//
+// SSSE3 Instructions
+//===----------------------------------------------------------------------===//
+
+// SSE3 Instruction Templates:
+//
+// SS38I - SSSE3 instructions with T8 and OpSize prefixes.
+// SS3AI - SSSE3 instructions with TA and OpSize prefixes.
+
+class SS38I<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
+ : I<o, F, ops, asm, pattern>, T8, OpSize, Requires<[HasSSSE3]>;
+class SS3AI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
+ : I<o, F, ops, asm, pattern>, TA, OpSize, Requires<[HasSSSE3]>;
+
+/// SS3I_binop_rm_int - Simple SSSE3 binary operatr whose type is v2i64.
+let isTwoAddress = 1 in {
+ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
+ bit Commutable = 0> {
+ def rr : SS38I<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]> {
+ let isCommutable = Commutable;
+ }
+ def rm : SS38I<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst,
+ (IntId VR128:$src1,
+ (bitconvert (loadv2i64 addr:$src2))))]>;
+ }
+}
+
+defm PMULHRSW128 : SS3I_binop_rm_int<0x0B, "pmulhrsw",
+ int_x86_ssse3_pmulhrsw_128, 1>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// 128-bit vector undef's.
+def : Pat<(v2f64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
+def : Pat<(v16i8 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
+def : Pat<(v8i16 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
+def : Pat<(v4i32 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
+def : Pat<(v2i64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
+
+// 128-bit vector all zero's.
+def : Pat<(v16i8 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
+def : Pat<(v8i16 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
+def : Pat<(v4i32 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
+def : Pat<(v2i64 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
+def : Pat<(v2f64 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
+
+// 128-bit vector all one's.
+def : Pat<(v16i8 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
+def : Pat<(v8i16 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
+def : Pat<(v4i32 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
+def : Pat<(v2i64 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
+def : Pat<(v4f32 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE1]>;
+
+// Store 128-bit integer vector values.
+def : Pat<(store (v16i8 VR128:$src), addr:$dst),
+ (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(store (v8i16 VR128:$src), addr:$dst),
+ (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(store (v4i32 VR128:$src), addr:$dst),
+ (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+
+// Scalar to v8i16 / v16i8. The source may be a GR32, but only the lower 8 or
+// 16-bits matter.
+def : Pat<(v8i16 (X86s2vec GR32:$src)), (MOVDI2PDIrr GR32:$src)>,
+ Requires<[HasSSE2]>;
+def : Pat<(v16i8 (X86s2vec GR32:$src)), (MOVDI2PDIrr GR32:$src)>,
+ Requires<[HasSSE2]>;
+
+// bit_convert
+let Predicates = [HasSSE2] in {
+ def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
+}
+
+// Move scalar to XMM zero-extended
+// movd to XMM register zero-extends
+let AddedComplexity = 15 in {
+def : Pat<(v8i16 (vector_shuffle immAllZerosV,
+ (v8i16 (X86s2vec GR32:$src)), MOVL_shuffle_mask)),
+ (MOVZDI2PDIrr GR32:$src)>, Requires<[HasSSE2]>;
+def : Pat<(v16i8 (vector_shuffle immAllZerosV,
+ (v16i8 (X86s2vec GR32:$src)), MOVL_shuffle_mask)),
+ (MOVZDI2PDIrr GR32:$src)>, Requires<[HasSSE2]>;
+// Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
+def : Pat<(v2f64 (vector_shuffle immAllZerosV,
+ (v2f64 (scalar_to_vector FR64:$src)), MOVL_shuffle_mask)),
+ (MOVLSD2PDrr (V_SET0), FR64:$src)>, Requires<[HasSSE2]>;
+def : Pat<(v4f32 (vector_shuffle immAllZerosV,
+ (v4f32 (scalar_to_vector FR32:$src)), MOVL_shuffle_mask)),
+ (MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE2]>;
+}
+
+// Splat v2f64 / v2i64
+let AddedComplexity = 10 in {
+def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), SSE_splat_lo_mask:$sm),
+ (UNPCKLPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm),
+ (UNPCKHPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), SSE_splat_lo_mask:$sm),
+ (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm),
+ (PUNPCKHQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+}
+
+// Splat v4f32
+def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), SSE_splat_mask:$sm),
+ (SHUFPSrri VR128:$src, VR128:$src, SSE_splat_mask:$sm)>,
+ Requires<[HasSSE1]>;
+
+// Special unary SHUFPSrri case.
+// FIXME: when we want non two-address code, then we should use PSHUFD?
+def : Pat<(vector_shuffle (v4f32 VR128:$src1), (undef),
+ SHUFP_unary_shuffle_mask:$sm),
+ (SHUFPSrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>,
+ Requires<[HasSSE1]>;
+// Unary v4f32 shuffle with PSHUF* in order to fold a load.
+def : Pat<(vector_shuffle (loadv4f32 addr:$src1), (undef),
+ SHUFP_unary_shuffle_mask:$sm),
+ (PSHUFDmi addr:$src1, SHUFP_unary_shuffle_mask:$sm)>,
+ Requires<[HasSSE2]>;
+// Special binary v4i32 shuffle cases with SHUFPS.
+def : Pat<(vector_shuffle (v4i32 VR128:$src1), (v4i32 VR128:$src2),
+ PSHUFD_binary_shuffle_mask:$sm),
+ (SHUFPSrri VR128:$src1, VR128:$src2, PSHUFD_binary_shuffle_mask:$sm)>,
+ Requires<[HasSSE2]>;
+def : Pat<(vector_shuffle (v4i32 VR128:$src1),
+ (bc_v4i32 (loadv2i64 addr:$src2)), PSHUFD_binary_shuffle_mask:$sm),
+ (SHUFPSrmi VR128:$src1, addr:$src2, PSHUFD_binary_shuffle_mask:$sm)>,
+ Requires<[HasSSE2]>;
+
+// vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
+let AddedComplexity = 10 in {
+def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
+ UNPCKL_v_undef_shuffle_mask)),
+ (UNPCKLPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(v16i8 (vector_shuffle VR128:$src, (undef),
+ UNPCKL_v_undef_shuffle_mask)),
+ (PUNPCKLBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(v8i16 (vector_shuffle VR128:$src, (undef),
+ UNPCKL_v_undef_shuffle_mask)),
+ (PUNPCKLWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
+ UNPCKL_v_undef_shuffle_mask)),
+ (PUNPCKLDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
+}
+
+// vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
+let AddedComplexity = 10 in {
+def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
+ UNPCKH_v_undef_shuffle_mask)),
+ (UNPCKHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(v16i8 (vector_shuffle VR128:$src, (undef),
+ UNPCKH_v_undef_shuffle_mask)),
+ (PUNPCKHBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(v8i16 (vector_shuffle VR128:$src, (undef),
+ UNPCKH_v_undef_shuffle_mask)),
+ (PUNPCKHWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
+ UNPCKH_v_undef_shuffle_mask)),
+ (PUNPCKHDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
+}
+
+let AddedComplexity = 15 in {
+// vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
+def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
+ MOVHP_shuffle_mask)),
+ (MOVLHPSrr VR128:$src1, VR128:$src2)>;
+
+// vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS
+def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
+ MOVHLPS_shuffle_mask)),
+ (MOVHLPSrr VR128:$src1, VR128:$src2)>;
+
+// vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS
+def : Pat<(v4f32 (vector_shuffle VR128:$src1, (undef),
+ MOVHLPS_v_undef_shuffle_mask)),
+ (MOVHLPSrr VR128:$src1, VR128:$src1)>;
+def : Pat<(v4i32 (vector_shuffle VR128:$src1, (undef),
+ MOVHLPS_v_undef_shuffle_mask)),
+ (MOVHLPSrr VR128:$src1, VR128:$src1)>;
+}
+
+let AddedComplexity = 20 in {
+// vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
+// vector_shuffle v1, (load v2) <0, 1, 4, 5> using MOVHPS
+def : Pat<(v4f32 (vector_shuffle VR128:$src1, (loadv4f32 addr:$src2),
+ MOVLP_shuffle_mask)),
+ (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
+def : Pat<(v2f64 (vector_shuffle VR128:$src1, (loadv2f64 addr:$src2),
+ MOVLP_shuffle_mask)),
+ (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(v4f32 (vector_shuffle VR128:$src1, (loadv4f32 addr:$src2),
+ MOVHP_shuffle_mask)),
+ (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
+def : Pat<(v2f64 (vector_shuffle VR128:$src1, (loadv2f64 addr:$src2),
+ MOVHP_shuffle_mask)),
+ (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+
+def : Pat<(v4i32 (vector_shuffle VR128:$src1, (bc_v4i32 (loadv2i64 addr:$src2)),
+ MOVLP_shuffle_mask)),
+ (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(v2i64 (vector_shuffle VR128:$src1, (loadv2i64 addr:$src2),
+ MOVLP_shuffle_mask)),
+ (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(v4i32 (vector_shuffle VR128:$src1, (bc_v4i32 (loadv2i64 addr:$src2)),
+ MOVHP_shuffle_mask)),
+ (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
+def : Pat<(v2i64 (vector_shuffle VR128:$src1, (loadv2i64 addr:$src2),
+ MOVLP_shuffle_mask)),
+ (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+}
+
+let AddedComplexity = 15 in {
+// Setting the lowest element in the vector.
+def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
+ MOVL_shuffle_mask)),
+ (MOVLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
+ MOVL_shuffle_mask)),
+ (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+
+// vector_shuffle v1, v2 <4, 5, 2, 3> using MOVLPDrr (movsd)
+def : Pat<(v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
+ MOVLP_shuffle_mask)),
+ (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
+ MOVLP_shuffle_mask)),
+ (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+}
+
+// Set lowest element and zero upper elements.
+let AddedComplexity = 20 in
+def : Pat<(bc_v2i64 (vector_shuffle immAllZerosV,
+ (v2f64 (scalar_to_vector (loadf64 addr:$src))),
+ MOVL_shuffle_mask)),
+ (MOVZQI2PQIrm addr:$src)>, Requires<[HasSSE2]>;
+
+// FIXME: Temporary workaround since 2-wide shuffle is broken.
+def : Pat<(int_x86_sse2_movs_d VR128:$src1, VR128:$src2),
+ (v2f64 (MOVLPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
+def : Pat<(int_x86_sse2_loadh_pd VR128:$src1, addr:$src2),
+ (v2f64 (MOVHPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
+def : Pat<(int_x86_sse2_loadl_pd VR128:$src1, addr:$src2),
+ (v2f64 (MOVLPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
+def : Pat<(int_x86_sse2_shuf_pd VR128:$src1, VR128:$src2, imm:$src3),
+ (v2f64 (SHUFPDrri VR128:$src1, VR128:$src2, imm:$src3))>,
+ Requires<[HasSSE2]>;
+def : Pat<(int_x86_sse2_shuf_pd VR128:$src1, (load addr:$src2), imm:$src3),
+ (v2f64 (SHUFPDrmi VR128:$src1, addr:$src2, imm:$src3))>,
+ Requires<[HasSSE2]>;
+def : Pat<(int_x86_sse2_unpckh_pd VR128:$src1, VR128:$src2),
+ (v2f64 (UNPCKHPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
+def : Pat<(int_x86_sse2_unpckh_pd VR128:$src1, (load addr:$src2)),
+ (v2f64 (UNPCKHPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
+def : Pat<(int_x86_sse2_unpckl_pd VR128:$src1, VR128:$src2),
+ (v2f64 (UNPCKLPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
+def : Pat<(int_x86_sse2_unpckl_pd VR128:$src1, (load addr:$src2)),
+ (v2f64 (UNPCKLPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
+def : Pat<(int_x86_sse2_punpckh_qdq VR128:$src1, VR128:$src2),
+ (v2i64 (PUNPCKHQDQrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
+def : Pat<(int_x86_sse2_punpckh_qdq VR128:$src1, (load addr:$src2)),
+ (v2i64 (PUNPCKHQDQrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
+def : Pat<(int_x86_sse2_punpckl_qdq VR128:$src1, VR128:$src2),
+ (v2i64 (PUNPCKLQDQrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
+def : Pat<(int_x86_sse2_punpckl_qdq VR128:$src1, (load addr:$src2)),
+ (PUNPCKLQDQrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+
+// Some special case pandn patterns.
+def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
+ VR128:$src2)),
+ (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))),
+ VR128:$src2)),
+ (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
+ VR128:$src2)),
+ (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+
+def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
+ (load addr:$src2))),
+ (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))),
+ (load addr:$src2))),
+ (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
+ (load addr:$src2))),
+ (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+
+// Unaligned load
+def : Pat<(v4f32 (X86loadu addr:$src)), (MOVUPSrm addr:$src)>,
+ Requires<[HasSSE1]>;
diff --git a/lib/Target/X86/X86InstrX86-64.td b/lib/Target/X86/X86InstrX86-64.td
new file mode 100644
index 0000000..ac43846
--- /dev/null
+++ b/lib/Target/X86/X86InstrX86-64.td
@@ -0,0 +1,1165 @@
+//====- X86InstrX86-64.td - Describe the X86 Instruction Set ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the Evan Cheng and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86-64 instruction set, defining the instructions,
+// and properties of the instructions which are needed for code generation,
+// machine code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Operand Definitions...
+//
+
+// 64-bits but only 32 bits are significant.
+def i64i32imm : Operand<i64>;
+// 64-bits but only 8 bits are significant.
+def i64i8imm : Operand<i64>;
+
+def lea64mem : Operand<i64> {
+ let PrintMethod = "printi64mem";
+ let MIOperandInfo = (ops GR64, i8imm, GR64, i32imm);
+}
+
+def lea64_32mem : Operand<i32> {
+ let PrintMethod = "printlea64_32mem";
+ let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm);
+}
+
+//===----------------------------------------------------------------------===//
+// Complex Pattern Definitions...
+//
+def lea64addr : ComplexPattern<i64, 4, "SelectLEAAddr",
+ [add, mul, shl, or, frameindex, X86Wrapper],
+ []>;
+
+//===----------------------------------------------------------------------===//
+// Instruction templates...
+//
+
+class RI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
+ : I<o, F, ops, asm, pattern>, REX_W;
+class RIi8 <bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
+ : Ii8<o, F, ops, asm, pattern>, REX_W;
+class RIi32 <bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
+ : Ii32<o, F, ops, asm, pattern>, REX_W;
+
+class RIi64<bits<8> o, Format f, dag ops, string asm, list<dag> pattern>
+ : X86Inst<o, f, Imm64, ops, asm>, REX_W {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+
+class RSSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
+ : SSI<o, F, ops, asm, pattern>, REX_W;
+class RSDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
+ : SDI<o, F, ops, asm, pattern>, REX_W;
+class RPDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
+ : PDI<o, F, ops, asm, pattern>, REX_W;
+
+//===----------------------------------------------------------------------===//
+// Pattern fragments...
+//
+
+def i64immSExt32 : PatLeaf<(i64 imm), [{
+ // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+ // sign extended field.
+ return (int64_t)N->getValue() == (int32_t)N->getValue();
+}]>;
+
+def i64immZExt32 : PatLeaf<(i64 imm), [{
+ // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+ // unsignedsign extended field.
+ return (uint64_t)N->getValue() == (uint32_t)N->getValue();
+}]>;
+
+def i64immSExt8 : PatLeaf<(i64 imm), [{
+ // i64immSExt8 predicate - True if the 64-bit immediate fits in a 8-bit
+ // sign extended field.
+ return (int64_t)N->getValue() == (int8_t)N->getValue();
+}]>;
+
+def sextloadi64i1 : PatFrag<(ops node:$ptr), (i64 (sextloadi1 node:$ptr))>;
+def sextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (sextloadi8 node:$ptr))>;
+def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>;
+def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>;
+
+def zextloadi64i1 : PatFrag<(ops node:$ptr), (i64 (zextloadi1 node:$ptr))>;
+def zextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (zextloadi8 node:$ptr))>;
+def zextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>;
+def zextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>;
+
+def extloadi64i1 : PatFrag<(ops node:$ptr), (i64 (extloadi1 node:$ptr))>;
+def extloadi64i8 : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>;
+def extloadi64i16 : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>;
+def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>;
+
+//===----------------------------------------------------------------------===//
+// Instruction list...
+//
+
+def IMPLICIT_DEF_GR64 : I<0, Pseudo, (ops GR64:$dst),
+ "#IMPLICIT_DEF $dst",
+ [(set GR64:$dst, (undef))]>;
+
+//===----------------------------------------------------------------------===//
+// Call Instructions...
+//
+let isCall = 1, noResults = 1 in
+ // All calls clobber the non-callee saved registers...
+ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+ FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15] in {
+ def CALL64pcrel32 : I<0xE8, RawFrm, (ops i64imm:$dst, variable_ops),
+ "call ${dst:call}", []>;
+ def CALL64r : I<0xFF, MRM2r, (ops GR64:$dst, variable_ops),
+ "call {*}$dst", [(X86call GR64:$dst)]>;
+ def CALL64m : I<0xFF, MRM2m, (ops i64mem:$dst, variable_ops),
+ "call {*}$dst", []>;
+ }
+
+// Branches
+let isBranch = 1, isTerminator = 1, noResults = 1, isBarrier = 1 in {
+ def JMP64r : I<0xFF, MRM4r, (ops GR64:$dst), "jmp{q} {*}$dst",
+ [(brind GR64:$dst)]>;
+ def JMP64m : I<0xFF, MRM4m, (ops i64mem:$dst), "jmp{q} {*}$dst",
+ [(brind (loadi64 addr:$dst))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions...
+//
+def LEAVE64 : I<0xC9, RawFrm,
+ (ops), "leave", []>, Imp<[RBP,RSP],[RBP,RSP]>;
+def POP64r : I<0x58, AddRegFrm,
+ (ops GR64:$reg), "pop{q} $reg", []>, Imp<[RSP],[RSP]>;
+def PUSH64r : I<0x50, AddRegFrm,
+ (ops GR64:$reg), "push{q} $reg", []>, Imp<[RSP],[RSP]>;
+
+def LEA64_32r : I<0x8D, MRMSrcMem,
+ (ops GR32:$dst, lea64_32mem:$src),
+ "lea{l} {$src|$dst}, {$dst|$src}",
+ [(set GR32:$dst, lea32addr:$src)]>, Requires<[In64BitMode]>;
+
+def LEA64r : RI<0x8D, MRMSrcMem, (ops GR64:$dst, lea64mem:$src),
+ "lea{q} {$src|$dst}, {$dst|$src}",
+ [(set GR64:$dst, lea64addr:$src)]>;
+
+let isTwoAddress = 1 in
+def BSWAP64r : RI<0xC8, AddRegFrm, (ops GR64:$dst, GR64:$src),
+ "bswap{q} $dst",
+ [(set GR64:$dst, (bswap GR64:$src))]>, TB;
+// Exchange
+def XCHG64rr : RI<0x87, MRMDestReg, (ops GR64:$src1, GR64:$src2),
+ "xchg{q} {$src2|$src1}, {$src1|$src2}", []>;
+def XCHG64mr : RI<0x87, MRMDestMem, (ops i64mem:$src1, GR64:$src2),
+ "xchg{q} {$src2|$src1}, {$src1|$src2}", []>;
+def XCHG64rm : RI<0x87, MRMSrcMem, (ops GR64:$src1, i64mem:$src2),
+ "xchg{q} {$src2|$src1}, {$src1|$src2}", []>;
+
+// Repeat string ops
+def REP_MOVSQ : RI<0xA5, RawFrm, (ops), "{rep;movsq|rep movsq}",
+ [(X86rep_movs i64)]>,
+ Imp<[RCX,RDI,RSI], [RCX,RDI,RSI]>, REP;
+def REP_STOSQ : RI<0xAB, RawFrm, (ops), "{rep;stosq|rep stosq}",
+ [(X86rep_stos i64)]>,
+ Imp<[RAX,RCX,RDI], [RCX,RDI]>, REP;
+
+//===----------------------------------------------------------------------===//
+// Move Instructions...
+//
+
+def MOV64rr : RI<0x89, MRMDestReg, (ops GR64:$dst, GR64:$src),
+ "mov{q} {$src, $dst|$dst, $src}", []>;
+
+def MOV64ri : RIi64<0xB8, AddRegFrm, (ops GR64:$dst, i64imm:$src),
+ "movabs{q} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, imm:$src)]>;
+def MOV64ri32 : RIi32<0xC7, MRM0r, (ops GR64:$dst, i64i32imm:$src),
+ "mov{q} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, i64immSExt32:$src)]>;
+
+def MOV64rm : RI<0x8B, MRMSrcMem, (ops GR64:$dst, i64mem:$src),
+ "mov{q} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (load addr:$src))]>;
+
+def MOV64mr : RI<0x89, MRMDestMem, (ops i64mem:$dst, GR64:$src),
+ "mov{q} {$src, $dst|$dst, $src}",
+ [(store GR64:$src, addr:$dst)]>;
+def MOV64mi32 : RIi32<0xC7, MRM0m, (ops i64mem:$dst, i64i32imm:$src),
+ "mov{q} {$src, $dst|$dst, $src}",
+ [(store i64immSExt32:$src, addr:$dst)]>;
+
+// Sign/Zero extenders
+
+def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (ops GR64:$dst, GR8 :$src),
+ "movs{bq|x} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sext GR8:$src))]>, TB;
+def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (ops GR64:$dst, i8mem :$src),
+ "movs{bq|x} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sextloadi64i8 addr:$src))]>, TB;
+def MOVSX64rr16: RI<0xBF, MRMSrcReg, (ops GR64:$dst, GR16:$src),
+ "movs{wq|x} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sext GR16:$src))]>, TB;
+def MOVSX64rm16: RI<0xBF, MRMSrcMem, (ops GR64:$dst, i16mem:$src),
+ "movs{wq|x} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sextloadi64i16 addr:$src))]>, TB;
+def MOVSX64rr32: RI<0x63, MRMSrcReg, (ops GR64:$dst, GR32:$src),
+ "movs{lq|xd} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sext GR32:$src))]>;
+def MOVSX64rm32: RI<0x63, MRMSrcMem, (ops GR64:$dst, i32mem:$src),
+ "movs{lq|xd} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sextloadi64i32 addr:$src))]>;
+
+def MOVZX64rr8 : RI<0xB6, MRMSrcReg, (ops GR64:$dst, GR8 :$src),
+ "movz{bq|x} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (zext GR8:$src))]>, TB;
+def MOVZX64rm8 : RI<0xB6, MRMSrcMem, (ops GR64:$dst, i8mem :$src),
+ "movz{bq|x} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB;
+def MOVZX64rr16: RI<0xB7, MRMSrcReg, (ops GR64:$dst, GR16:$src),
+ "movz{wq|x} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (zext GR16:$src))]>, TB;
+def MOVZX64rm16: RI<0xB7, MRMSrcMem, (ops GR64:$dst, i16mem:$src),
+ "movz{wq|x} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB;
+
+def CDQE : RI<0x98, RawFrm, (ops),
+ "{cltq|cdqe}", []>, Imp<[EAX],[RAX]>; // RAX = signext(EAX)
+
+def CQO : RI<0x99, RawFrm, (ops),
+ "{cqto|cqo}", []>, Imp<[RAX],[RAX,RDX]>; // RDX:RAX = signext(RAX)
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions...
+//
+
+let isTwoAddress = 1 in {
+let isConvertibleToThreeAddress = 1 in {
+let isCommutable = 1 in
+def ADD64rr : RI<0x01, MRMDestReg, (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "add{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (add GR64:$src1, GR64:$src2))]>;
+
+def ADD64ri32 : RIi32<0x81, MRM0r, (ops GR64:$dst, GR64:$src1, i64i32imm:$src2),
+ "add{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (add GR64:$src1, i64immSExt32:$src2))]>;
+def ADD64ri8 : RIi8<0x83, MRM0r, (ops GR64:$dst, GR64:$src1, i64i8imm:$src2),
+ "add{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (add GR64:$src1, i64immSExt8:$src2))]>;
+} // isConvertibleToThreeAddress
+
+def ADD64rm : RI<0x03, MRMSrcMem, (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "add{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (add GR64:$src1, (load addr:$src2)))]>;
+} // isTwoAddress
+
+def ADD64mr : RI<0x01, MRMDestMem, (ops i64mem:$dst, GR64:$src2),
+ "add{q} {$src2, $dst|$dst, $src2}",
+ [(store (add (load addr:$dst), GR64:$src2), addr:$dst)]>;
+def ADD64mi32 : RIi32<0x81, MRM0m, (ops i64mem:$dst, i64i32imm :$src2),
+ "add{q} {$src2, $dst|$dst, $src2}",
+ [(store (add (load addr:$dst), i64immSExt32:$src2), addr:$dst)]>;
+def ADD64mi8 : RIi8<0x83, MRM0m, (ops i64mem:$dst, i64i8imm :$src2),
+ "add{q} {$src2, $dst|$dst, $src2}",
+ [(store (add (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>;
+
+let isTwoAddress = 1 in {
+let isCommutable = 1 in
+def ADC64rr : RI<0x11, MRMDestReg, (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "adc{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (adde GR64:$src1, GR64:$src2))]>;
+
+def ADC64rm : RI<0x13, MRMSrcMem , (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "adc{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (adde GR64:$src1, (load addr:$src2)))]>;
+
+def ADC64ri32 : RIi32<0x81, MRM2r, (ops GR64:$dst, GR64:$src1, i64i32imm:$src2),
+ "adc{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (adde GR64:$src1, i64immSExt32:$src2))]>;
+def ADC64ri8 : RIi8<0x83, MRM2r, (ops GR64:$dst, GR64:$src1, i64i8imm:$src2),
+ "adc{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (adde GR64:$src1, i64immSExt8:$src2))]>;
+} // isTwoAddress
+
+def ADC64mr : RI<0x11, MRMDestMem, (ops i64mem:$dst, GR64:$src2),
+ "adc{q} {$src2, $dst|$dst, $src2}",
+ [(store (adde (load addr:$dst), GR64:$src2), addr:$dst)]>;
+def ADC64mi32 : RIi32<0x81, MRM2m, (ops i64mem:$dst, i64i32imm:$src2),
+ "adc{q} {$src2, $dst|$dst, $src2}",
+ [(store (adde (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>;
+def ADC64mi8 : RIi8<0x83, MRM2m, (ops i64mem:$dst, i64i8imm :$src2),
+ "adc{q} {$src2, $dst|$dst, $src2}",
+ [(store (adde (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>;
+
+let isTwoAddress = 1 in {
+def SUB64rr : RI<0x29, MRMDestReg, (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "sub{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sub GR64:$src1, GR64:$src2))]>;
+
+def SUB64rm : RI<0x2B, MRMSrcMem, (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "sub{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sub GR64:$src1, (load addr:$src2)))]>;
+
+def SUB64ri32 : RIi32<0x81, MRM5r, (ops GR64:$dst, GR64:$src1, i64i32imm:$src2),
+ "sub{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sub GR64:$src1, i64immSExt32:$src2))]>;
+def SUB64ri8 : RIi8<0x83, MRM5r, (ops GR64:$dst, GR64:$src1, i64i8imm:$src2),
+ "sub{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sub GR64:$src1, i64immSExt8:$src2))]>;
+} // isTwoAddress
+
+def SUB64mr : RI<0x29, MRMDestMem, (ops i64mem:$dst, GR64:$src2),
+ "sub{q} {$src2, $dst|$dst, $src2}",
+ [(store (sub (load addr:$dst), GR64:$src2), addr:$dst)]>;
+def SUB64mi32 : RIi32<0x81, MRM5m, (ops i64mem:$dst, i64i32imm:$src2),
+ "sub{q} {$src2, $dst|$dst, $src2}",
+ [(store (sub (load addr:$dst), i64immSExt32:$src2), addr:$dst)]>;
+def SUB64mi8 : RIi8<0x83, MRM5m, (ops i64mem:$dst, i64i8imm :$src2),
+ "sub{q} {$src2, $dst|$dst, $src2}",
+ [(store (sub (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>;
+
+let isTwoAddress = 1 in {
+def SBB64rr : RI<0x19, MRMDestReg, (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "sbb{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sube GR64:$src1, GR64:$src2))]>;
+
+def SBB64rm : RI<0x1B, MRMSrcMem, (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "sbb{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sube GR64:$src1, (load addr:$src2)))]>;
+
+def SBB64ri32 : RIi32<0x81, MRM3r, (ops GR64:$dst, GR64:$src1, i64i32imm:$src2),
+ "sbb{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sube GR64:$src1, i64immSExt32:$src2))]>;
+def SBB64ri8 : RIi8<0x83, MRM3r, (ops GR64:$dst, GR64:$src1, i64i8imm:$src2),
+ "sbb{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sube GR64:$src1, i64immSExt8:$src2))]>;
+} // isTwoAddress
+
+def SBB64mr : RI<0x19, MRMDestMem, (ops i64mem:$dst, GR64:$src2),
+ "sbb{q} {$src2, $dst|$dst, $src2}",
+ [(store (sube (load addr:$dst), GR64:$src2), addr:$dst)]>;
+def SBB64mi32 : RIi32<0x81, MRM3m, (ops i64mem:$dst, i64i32imm:$src2),
+ "sbb{q} {$src2, $dst|$dst, $src2}",
+ [(store (sube (load addr:$dst), i64immSExt32:$src2), addr:$dst)]>;
+def SBB64mi8 : RIi8<0x83, MRM3m, (ops i64mem:$dst, i64i8imm :$src2),
+ "sbb{q} {$src2, $dst|$dst, $src2}",
+ [(store (sube (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>;
+
+// Unsigned multiplication
+def MUL64r : RI<0xF7, MRM4r, (ops GR64:$src),
+ "mul{q} $src", []>,
+ Imp<[RAX],[RAX,RDX]>; // RAX,RDX = RAX*GR64
+def MUL64m : RI<0xF7, MRM4m, (ops i64mem:$src),
+ "mul{q} $src", []>,
+ Imp<[RAX],[RAX,RDX]>; // RAX,RDX = RAX*[mem64]
+
+// Signed multiplication
+def IMUL64r : RI<0xF7, MRM5r, (ops GR64:$src),
+ "imul{q} $src", []>,
+ Imp<[RAX],[RAX,RDX]>; // RAX,RDX = RAX*GR64
+def IMUL64m : RI<0xF7, MRM5m, (ops i64mem:$src),
+ "imul{q} $src", []>,
+ Imp<[RAX],[RAX,RDX]>; // RAX,RDX = RAX*[mem64]
+
+let isTwoAddress = 1 in {
+let isCommutable = 1 in
+def IMUL64rr : RI<0xAF, MRMSrcReg, (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "imul{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (mul GR64:$src1, GR64:$src2))]>, TB;
+
+def IMUL64rm : RI<0xAF, MRMSrcMem, (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "imul{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (mul GR64:$src1, (load addr:$src2)))]>, TB;
+} // isTwoAddress
+
+// Suprisingly enough, these are not two address instructions!
+def IMUL64rri32 : RIi32<0x69, MRMSrcReg, // GR64 = GR64*I32
+ (ops GR64:$dst, GR64:$src1, i64i32imm:$src2),
+ "imul{q} {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, (mul GR64:$src1, i64immSExt32:$src2))]>;
+def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8
+ (ops GR64:$dst, GR64:$src1, i64i8imm:$src2),
+ "imul{q} {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, (mul GR64:$src1, i64immSExt8:$src2))]>;
+def IMUL64rmi32 : RIi32<0x69, MRMSrcMem, // GR64 = [mem64]*I32
+ (ops GR64:$dst, i64mem:$src1, i64i32imm:$src2),
+ "imul{q} {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, (mul (load addr:$src1), i64immSExt32:$src2))]>;
+def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8
+ (ops GR64:$dst, i64mem:$src1, i64i8imm: $src2),
+ "imul{q} {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, (mul (load addr:$src1), i64immSExt8:$src2))]>;
+
+// Unsigned division / remainder
+def DIV64r : RI<0xF7, MRM6r, (ops GR64:$src), // RDX:RAX/r64 = RAX,RDX
+ "div{q} $src", []>, Imp<[RAX,RDX],[RAX,RDX]>;
+def DIV64m : RI<0xF7, MRM6m, (ops i64mem:$src), // RDX:RAX/[mem64] = RAX,RDX
+ "div{q} $src", []>, Imp<[RAX,RDX],[RAX,RDX]>;
+
+// Signed division / remainder
+def IDIV64r: RI<0xF7, MRM7r, (ops GR64:$src), // RDX:RAX/r64 = RAX,RDX
+ "idiv{q} $src", []>, Imp<[RAX,RDX],[RAX,RDX]>;
+def IDIV64m: RI<0xF7, MRM7m, (ops i64mem:$src), // RDX:RAX/[mem64] = RAX,RDX
+ "idiv{q} $src", []>, Imp<[RAX,RDX],[RAX,RDX]>;
+
+// Unary instructions
+let CodeSize = 2 in {
+let isTwoAddress = 1 in
+def NEG64r : RI<0xF7, MRM3r, (ops GR64:$dst, GR64:$src), "neg{q} $dst",
+ [(set GR64:$dst, (ineg GR64:$src))]>;
+def NEG64m : RI<0xF7, MRM3m, (ops i64mem:$dst), "neg{q} $dst",
+ [(store (ineg (loadi64 addr:$dst)), addr:$dst)]>;
+
+let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in
+def INC64r : RI<0xFF, MRM0r, (ops GR64:$dst, GR64:$src), "inc{q} $dst",
+ [(set GR64:$dst, (add GR64:$src, 1))]>;
+def INC64m : RI<0xFF, MRM0m, (ops i64mem:$dst), "inc{q} $dst",
+ [(store (add (loadi64 addr:$dst), 1), addr:$dst)]>;
+
+let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in
+def DEC64r : RI<0xFF, MRM1r, (ops GR64:$dst, GR64:$src), "dec{q} $dst",
+ [(set GR64:$dst, (add GR64:$src, -1))]>;
+def DEC64m : RI<0xFF, MRM1m, (ops i64mem:$dst), "dec{q} $dst",
+ [(store (add (loadi64 addr:$dst), -1), addr:$dst)]>;
+
+// In 64-bit mode, single byte INC and DEC cannot be encoded.
+let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in {
+// Can transform into LEA.
+def INC64_16r : I<0xFF, MRM0r, (ops GR16:$dst, GR16:$src), "inc{w} $dst",
+ [(set GR16:$dst, (add GR16:$src, 1))]>,
+ OpSize, Requires<[In64BitMode]>;
+def INC64_32r : I<0xFF, MRM0r, (ops GR32:$dst, GR32:$src), "inc{l} $dst",
+ [(set GR32:$dst, (add GR32:$src, 1))]>,
+ Requires<[In64BitMode]>;
+def DEC64_16r : I<0xFF, MRM1r, (ops GR16:$dst, GR16:$src), "dec{w} $dst",
+ [(set GR16:$dst, (add GR16:$src, -1))]>,
+ OpSize, Requires<[In64BitMode]>;
+def DEC64_32r : I<0xFF, MRM1r, (ops GR32:$dst, GR32:$src), "dec{l} $dst",
+ [(set GR32:$dst, (add GR32:$src, -1))]>,
+ Requires<[In64BitMode]>;
+} // isConvertibleToThreeAddress
+} // CodeSize
+
+
+// Shift instructions
+let isTwoAddress = 1 in {
+def SHL64rCL : RI<0xD3, MRM4r, (ops GR64:$dst, GR64:$src),
+ "shl{q} {%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (shl GR64:$src, CL))]>,
+ Imp<[CL],[]>;
+def SHL64ri : RIi8<0xC1, MRM4r, (ops GR64:$dst, GR64:$src1, i8imm:$src2),
+ "shl{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>;
+def SHL64r1 : RI<0xD1, MRM4r, (ops GR64:$dst, GR64:$src1),
+ "shl{q} $dst", []>;
+} // isTwoAddress
+
+def SHL64mCL : RI<0xD3, MRM4m, (ops i64mem:$dst),
+ "shl{q} {%cl, $dst|$dst, %CL}",
+ [(store (shl (loadi64 addr:$dst), CL), addr:$dst)]>,
+ Imp<[CL],[]>;
+def SHL64mi : RIi8<0xC1, MRM4m, (ops i64mem:$dst, i8imm:$src),
+ "shl{q} {$src, $dst|$dst, $src}",
+ [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHL64m1 : RI<0xD1, MRM4m, (ops i64mem:$dst),
+ "shl{q} $dst",
+ [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+let isTwoAddress = 1 in {
+def SHR64rCL : RI<0xD3, MRM5r, (ops GR64:$dst, GR64:$src),
+ "shr{q} {%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (srl GR64:$src, CL))]>,
+ Imp<[CL],[]>;
+def SHR64ri : RIi8<0xC1, MRM5r, (ops GR64:$dst, GR64:$src1, i8imm:$src2),
+ "shr{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))]>;
+def SHR64r1 : RI<0xD1, MRM5r, (ops GR64:$dst, GR64:$src1),
+ "shr{q} $dst",
+ [(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>;
+} // isTwoAddress
+
+def SHR64mCL : RI<0xD3, MRM5m, (ops i64mem:$dst),
+ "shr{q} {%cl, $dst|$dst, %CL}",
+ [(store (srl (loadi64 addr:$dst), CL), addr:$dst)]>,
+ Imp<[CL],[]>;
+def SHR64mi : RIi8<0xC1, MRM5m, (ops i64mem:$dst, i8imm:$src),
+ "shr{q} {$src, $dst|$dst, $src}",
+ [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHR64m1 : RI<0xD1, MRM5m, (ops i64mem:$dst),
+ "shr{q} $dst",
+ [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+let isTwoAddress = 1 in {
+def SAR64rCL : RI<0xD3, MRM7r, (ops GR64:$dst, GR64:$src),
+ "sar{q} {%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (sra GR64:$src, CL))]>, Imp<[CL],[]>;
+def SAR64ri : RIi8<0xC1, MRM7r, (ops GR64:$dst, GR64:$src1, i8imm:$src2),
+ "sar{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))]>;
+def SAR64r1 : RI<0xD1, MRM7r, (ops GR64:$dst, GR64:$src1),
+ "sar{q} $dst",
+ [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>;
+} // isTwoAddress
+
+def SAR64mCL : RI<0xD3, MRM7m, (ops i64mem:$dst),
+ "sar{q} {%cl, $dst|$dst, %CL}",
+ [(store (sra (loadi64 addr:$dst), CL), addr:$dst)]>,
+ Imp<[CL],[]>;
+def SAR64mi : RIi8<0xC1, MRM7m, (ops i64mem:$dst, i8imm:$src),
+ "sar{q} {$src, $dst|$dst, $src}",
+ [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SAR64m1 : RI<0xD1, MRM7m, (ops i64mem:$dst),
+ "sar{q} $dst",
+ [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+// Rotate instructions
+let isTwoAddress = 1 in {
+def ROL64rCL : RI<0xD3, MRM0r, (ops GR64:$dst, GR64:$src),
+ "rol{q} {%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (rotl GR64:$src, CL))]>, Imp<[CL],[]>;
+def ROL64ri : RIi8<0xC1, MRM0r, (ops GR64:$dst, GR64:$src1, i8imm:$src2),
+ "rol{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))]>;
+def ROL64r1 : RI<0xD1, MRM0r, (ops GR64:$dst, GR64:$src1),
+ "rol{q} $dst",
+ [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>;
+} // isTwoAddress
+
+def ROL64mCL : I<0xD3, MRM0m, (ops i64mem:$dst),
+ "rol{q} {%cl, $dst|$dst, %CL}",
+ [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)]>,
+ Imp<[CL],[]>;
+def ROL64mi : RIi8<0xC1, MRM0m, (ops i64mem:$dst, i8imm:$src),
+ "rol{q} {$src, $dst|$dst, $src}",
+ [(store (rotl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def ROL64m1 : RI<0xD1, MRM0m, (ops i64mem:$dst),
+ "rol{q} $dst",
+ [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+let isTwoAddress = 1 in {
+def ROR64rCL : RI<0xD3, MRM1r, (ops GR64:$dst, GR64:$src),
+ "ror{q} {%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (rotr GR64:$src, CL))]>, Imp<[CL],[]>;
+def ROR64ri : RIi8<0xC1, MRM1r, (ops GR64:$dst, GR64:$src1, i8imm:$src2),
+ "ror{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))]>;
+def ROR64r1 : RI<0xD1, MRM1r, (ops GR64:$dst, GR64:$src1),
+ "ror{q} $dst",
+ [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>;
+} // isTwoAddress
+
+def ROR64mCL : RI<0xD3, MRM1m, (ops i64mem:$dst),
+ "ror{q} {%cl, $dst|$dst, %CL}",
+ [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)]>,
+ Imp<[CL],[]>;
+def ROR64mi : RIi8<0xC1, MRM1m, (ops i64mem:$dst, i8imm:$src),
+ "ror{q} {$src, $dst|$dst, $src}",
+ [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def ROR64m1 : RI<0xD1, MRM1m, (ops i64mem:$dst),
+ "ror{q} $dst",
+ [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+// Double shift instructions (generalizations of rotate)
+let isTwoAddress = 1 in {
+def SHLD64rrCL : RI<0xA5, MRMDestReg, (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "shld{q} {%cl, $src2, $dst|$dst, $src2, %CL}", []>,
+ Imp<[CL],[]>, TB;
+def SHRD64rrCL : RI<0xAD, MRMDestReg, (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "shrd{q} {%cl, $src2, $dst|$dst, $src2, %CL}", []>,
+ Imp<[CL],[]>, TB;
+
+let isCommutable = 1 in { // FIXME: Update X86InstrInfo::commuteInstruction
+def SHLD64rri8 : RIi8<0xA4, MRMDestReg,
+ (ops GR64:$dst, GR64:$src1, GR64:$src2, i8imm:$src3),
+ "shld{q} {$src3, $src2, $dst|$dst, $src2, $src3}", []>,
+ TB;
+def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
+ (ops GR64:$dst, GR64:$src1, GR64:$src2, i8imm:$src3),
+ "shrd{q} {$src3, $src2, $dst|$dst, $src2, $src3}", []>,
+ TB;
+} // isCommutable
+} // isTwoAddress
+
+// Temporary hack: there is no patterns associated with these instructions
+// so we have to tell tblgen that these do not produce results.
+let noResults = 1 in {
+def SHLD64mrCL : RI<0xA5, MRMDestMem, (ops i64mem:$dst, GR64:$src2),
+ "shld{q} {%cl, $src2, $dst|$dst, $src2, %CL}", []>,
+ Imp<[CL],[]>, TB;
+def SHRD64mrCL : RI<0xAD, MRMDestMem, (ops i64mem:$dst, GR64:$src2),
+ "shrd{q} {%cl, $src2, $dst|$dst, $src2, %CL}", []>,
+ Imp<[CL],[]>, TB;
+def SHLD64mri8 : RIi8<0xA4, MRMDestMem,
+ (ops i64mem:$dst, GR64:$src2, i8imm:$src3),
+ "shld{q} {$src3, $src2, $dst|$dst, $src2, $src3}", []>,
+ TB;
+def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
+ (ops i64mem:$dst, GR64:$src2, i8imm:$src3),
+ "shrd{q} {$src3, $src2, $dst|$dst, $src2, $src3}", []>,
+ TB;
+} // noResults
+
+//===----------------------------------------------------------------------===//
+// Logical Instructions...
+//
+
+let isTwoAddress = 1 in
+def NOT64r : RI<0xF7, MRM2r, (ops GR64:$dst, GR64:$src), "not{q} $dst",
+ [(set GR64:$dst, (not GR64:$src))]>;
+def NOT64m : RI<0xF7, MRM2m, (ops i64mem:$dst), "not{q} $dst",
+ [(store (not (loadi64 addr:$dst)), addr:$dst)]>;
+
+let isTwoAddress = 1 in {
+let isCommutable = 1 in
+def AND64rr : RI<0x21, MRMDestReg,
+ (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "and{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (and GR64:$src1, GR64:$src2))]>;
+def AND64rm : RI<0x23, MRMSrcMem,
+ (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "and{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (and GR64:$src1, (load addr:$src2)))]>;
+def AND64ri32 : RIi32<0x81, MRM4r,
+ (ops GR64:$dst, GR64:$src1, i64i32imm:$src2),
+ "and{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (and GR64:$src1, i64immSExt32:$src2))]>;
+def AND64ri8 : RIi8<0x83, MRM4r,
+ (ops GR64:$dst, GR64:$src1, i64i8imm:$src2),
+ "and{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (and GR64:$src1, i64immSExt8:$src2))]>;
+} // isTwoAddress
+
+def AND64mr : RI<0x21, MRMDestMem,
+ (ops i64mem:$dst, GR64:$src),
+ "and{q} {$src, $dst|$dst, $src}",
+ [(store (and (load addr:$dst), GR64:$src), addr:$dst)]>;
+def AND64mi32 : RIi32<0x81, MRM4m,
+ (ops i64mem:$dst, i64i32imm:$src),
+ "and{q} {$src, $dst|$dst, $src}",
+ [(store (and (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst)]>;
+def AND64mi8 : RIi8<0x83, MRM4m,
+ (ops i64mem:$dst, i64i8imm :$src),
+ "and{q} {$src, $dst|$dst, $src}",
+ [(store (and (load addr:$dst), i64immSExt8:$src), addr:$dst)]>;
+
+let isTwoAddress = 1 in {
+let isCommutable = 1 in
+def OR64rr : RI<0x09, MRMDestReg, (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "or{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (or GR64:$src1, GR64:$src2))]>;
+def OR64rm : RI<0x0B, MRMSrcMem , (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "or{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (or GR64:$src1, (load addr:$src2)))]>;
+def OR64ri32 : RIi32<0x81, MRM1r, (ops GR64:$dst, GR64:$src1, i64i32imm:$src2),
+ "or{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (or GR64:$src1, i64immSExt32:$src2))]>;
+def OR64ri8 : RIi8<0x83, MRM1r, (ops GR64:$dst, GR64:$src1, i64i8imm:$src2),
+ "or{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (or GR64:$src1, i64immSExt8:$src2))]>;
+} // isTwoAddress
+
+def OR64mr : RI<0x09, MRMDestMem, (ops i64mem:$dst, GR64:$src),
+ "or{q} {$src, $dst|$dst, $src}",
+ [(store (or (load addr:$dst), GR64:$src), addr:$dst)]>;
+def OR64mi32 : RIi32<0x81, MRM1m, (ops i64mem:$dst, i64i32imm:$src),
+ "or{q} {$src, $dst|$dst, $src}",
+ [(store (or (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst)]>;
+def OR64mi8 : RIi8<0x83, MRM1m, (ops i64mem:$dst, i64i8imm:$src),
+ "or{q} {$src, $dst|$dst, $src}",
+ [(store (or (load addr:$dst), i64immSExt8:$src), addr:$dst)]>;
+
+let isTwoAddress = 1 in {
+let isCommutable = 1 in
+def XOR64rr : RI<0x31, MRMDestReg, (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "xor{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (xor GR64:$src1, GR64:$src2))]>;
+def XOR64rm : RI<0x33, MRMSrcMem, (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "xor{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (xor GR64:$src1, (load addr:$src2)))]>;
+def XOR64ri32 : RIi32<0x81, MRM6r,
+ (ops GR64:$dst, GR64:$src1, i64i32imm:$src2),
+ "xor{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (xor GR64:$src1, i64immSExt32:$src2))]>;
+def XOR64ri8 : RIi8<0x83, MRM6r, (ops GR64:$dst, GR64:$src1, i64i8imm:$src2),
+ "xor{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (xor GR64:$src1, i64immSExt8:$src2))]>;
+} // isTwoAddress
+
+def XOR64mr : RI<0x31, MRMDestMem, (ops i64mem:$dst, GR64:$src),
+ "xor{q} {$src, $dst|$dst, $src}",
+ [(store (xor (load addr:$dst), GR64:$src), addr:$dst)]>;
+def XOR64mi32 : RIi32<0x81, MRM6m, (ops i64mem:$dst, i64i32imm:$src),
+ "xor{q} {$src, $dst|$dst, $src}",
+ [(store (xor (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst)]>;
+def XOR64mi8 : RIi8<0x83, MRM6m, (ops i64mem:$dst, i64i8imm :$src),
+ "xor{q} {$src, $dst|$dst, $src}",
+ [(store (xor (load addr:$dst), i64immSExt8:$src), addr:$dst)]>;
+
+//===----------------------------------------------------------------------===//
+// Comparison Instructions...
+//
+
+// Integer comparison
+let isCommutable = 1 in
+def TEST64rr : RI<0x85, MRMDestReg, (ops GR64:$src1, GR64:$src2),
+ "test{q} {$src2, $src1|$src1, $src2}",
+ [(X86cmp (and GR64:$src1, GR64:$src2), 0)]>;
+def TEST64rm : RI<0x85, MRMSrcMem, (ops GR64:$src1, i64mem:$src2),
+ "test{q} {$src2, $src1|$src1, $src2}",
+ [(X86cmp (and GR64:$src1, (loadi64 addr:$src2)), 0)]>;
+def TEST64ri32 : RIi32<0xF7, MRM0r, (ops GR64:$src1, i64i32imm:$src2),
+ "test{q} {$src2, $src1|$src1, $src2}",
+ [(X86cmp (and GR64:$src1, i64immSExt32:$src2), 0)]>;
+def TEST64mi32 : RIi32<0xF7, MRM0m, (ops i64mem:$src1, i64i32imm:$src2),
+ "test{q} {$src2, $src1|$src1, $src2}",
+ [(X86cmp (and (loadi64 addr:$src1), i64immSExt32:$src2), 0)]>;
+
+def CMP64rr : RI<0x39, MRMDestReg, (ops GR64:$src1, GR64:$src2),
+ "cmp{q} {$src2, $src1|$src1, $src2}",
+ [(X86cmp GR64:$src1, GR64:$src2)]>;
+def CMP64mr : RI<0x39, MRMDestMem, (ops i64mem:$src1, GR64:$src2),
+ "cmp{q} {$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi64 addr:$src1), GR64:$src2)]>;
+def CMP64rm : RI<0x3B, MRMSrcMem, (ops GR64:$src1, i64mem:$src2),
+ "cmp{q} {$src2, $src1|$src1, $src2}",
+ [(X86cmp GR64:$src1, (loadi64 addr:$src2))]>;
+def CMP64ri32 : RIi32<0x81, MRM7r, (ops GR64:$src1, i64i32imm:$src2),
+ "cmp{q} {$src2, $src1|$src1, $src2}",
+ [(X86cmp GR64:$src1, i64immSExt32:$src2)]>;
+def CMP64mi32 : RIi32<0x81, MRM7m, (ops i64mem:$src1, i64i32imm:$src2),
+ "cmp{q} {$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi64 addr:$src1), i64immSExt32:$src2)]>;
+def CMP64mi8 : RIi8<0x83, MRM7m, (ops i64mem:$src1, i64i8imm:$src2),
+ "cmp{q} {$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi64 addr:$src1), i64immSExt8:$src2)]>;
+def CMP64ri8 : RIi8<0x83, MRM7r, (ops GR64:$src1, i64i8imm:$src2),
+ "cmp{q} {$src2, $src1|$src1, $src2}",
+ [(X86cmp GR64:$src1, i64immSExt8:$src2)]>;
+
+// Conditional moves
+let isTwoAddress = 1 in {
+def CMOVB64rr : RI<0x42, MRMSrcReg, // if <u, GR64 = GR64
+ (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "cmovb {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_B))]>, TB;
+def CMOVB64rm : RI<0x42, MRMSrcMem, // if <u, GR64 = [mem64]
+ (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "cmovb {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_B))]>, TB;
+def CMOVAE64rr: RI<0x43, MRMSrcReg, // if >=u, GR64 = GR64
+ (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "cmovae {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_AE))]>, TB;
+def CMOVAE64rm: RI<0x43, MRMSrcMem, // if >=u, GR64 = [mem64]
+ (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "cmovae {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_AE))]>, TB;
+def CMOVE64rr : RI<0x44, MRMSrcReg, // if ==, GR64 = GR64
+ (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "cmove {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_E))]>, TB;
+def CMOVE64rm : RI<0x44, MRMSrcMem, // if ==, GR64 = [mem64]
+ (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "cmove {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_E))]>, TB;
+def CMOVNE64rr: RI<0x45, MRMSrcReg, // if !=, GR64 = GR64
+ (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "cmovne {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_NE))]>, TB;
+def CMOVNE64rm: RI<0x45, MRMSrcMem, // if !=, GR64 = [mem64]
+ (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "cmovne {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_NE))]>, TB;
+def CMOVBE64rr: RI<0x46, MRMSrcReg, // if <=u, GR64 = GR64
+ (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "cmovbe {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_BE))]>, TB;
+def CMOVBE64rm: RI<0x46, MRMSrcMem, // if <=u, GR64 = [mem64]
+ (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "cmovbe {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_BE))]>, TB;
+def CMOVA64rr : RI<0x47, MRMSrcReg, // if >u, GR64 = GR64
+ (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "cmova {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_A))]>, TB;
+def CMOVA64rm : RI<0x47, MRMSrcMem, // if >u, GR64 = [mem64]
+ (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "cmova {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_A))]>, TB;
+def CMOVL64rr : RI<0x4C, MRMSrcReg, // if <s, GR64 = GR64
+ (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "cmovl {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_L))]>, TB;
+def CMOVL64rm : RI<0x4C, MRMSrcMem, // if <s, GR64 = [mem64]
+ (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "cmovl {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_L))]>, TB;
+def CMOVGE64rr: RI<0x4D, MRMSrcReg, // if >=s, GR64 = GR64
+ (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "cmovge {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_GE))]>, TB;
+def CMOVGE64rm: RI<0x4D, MRMSrcMem, // if >=s, GR64 = [mem64]
+ (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "cmovge {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_GE))]>, TB;
+def CMOVLE64rr: RI<0x4E, MRMSrcReg, // if <=s, GR64 = GR64
+ (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "cmovle {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_LE))]>, TB;
+def CMOVLE64rm: RI<0x4E, MRMSrcMem, // if <=s, GR64 = [mem64]
+ (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "cmovle {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_LE))]>, TB;
+def CMOVG64rr : RI<0x4F, MRMSrcReg, // if >s, GR64 = GR64
+ (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "cmovg {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_G))]>, TB;
+def CMOVG64rm : RI<0x4F, MRMSrcMem, // if >s, GR64 = [mem64]
+ (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "cmovg {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_G))]>, TB;
+def CMOVS64rr : RI<0x48, MRMSrcReg, // if signed, GR64 = GR64
+ (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "cmovs {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_S))]>, TB;
+def CMOVS64rm : RI<0x48, MRMSrcMem, // if signed, GR64 = [mem64]
+ (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "cmovs {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_S))]>, TB;
+def CMOVNS64rr: RI<0x49, MRMSrcReg, // if !signed, GR64 = GR64
+ (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "cmovns {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_NS))]>, TB;
+def CMOVNS64rm: RI<0x49, MRMSrcMem, // if !signed, GR64 = [mem64]
+ (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "cmovns {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_NS))]>, TB;
+def CMOVP64rr : RI<0x4A, MRMSrcReg, // if parity, GR64 = GR64
+ (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "cmovp {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_P))]>, TB;
+def CMOVP64rm : RI<0x4A, MRMSrcMem, // if parity, GR64 = [mem64]
+ (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "cmovp {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_P))]>, TB;
+def CMOVNP64rr : RI<0x4B, MRMSrcReg, // if !parity, GR64 = GR64
+ (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "cmovnp {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_NP))]>, TB;
+def CMOVNP64rm : RI<0x4B, MRMSrcMem, // if !parity, GR64 = [mem64]
+ (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "cmovnp {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_NP))]>, TB;
+} // isTwoAddress
+
+//===----------------------------------------------------------------------===//
+// Conversion Instructions...
+//
+
+// f64 -> signed i64
+def Int_CVTSD2SI64rr: RSDI<0x2D, MRMSrcReg, (ops GR64:$dst, VR128:$src),
+ "cvtsd2si{q} {$src, $dst|$dst, $src}",
+ []>; // TODO: add intrinsic
+def Int_CVTSD2SI64rm: RSDI<0x2D, MRMSrcMem, (ops GR64:$dst, f128mem:$src),
+ "cvtsd2si{q} {$src, $dst|$dst, $src}",
+ []>; // TODO: add intrinsic
+def CVTTSD2SI64rr: RSDI<0x2C, MRMSrcReg, (ops GR64:$dst, FR64:$src),
+ "cvttsd2si{q} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (fp_to_sint FR64:$src))]>;
+def CVTTSD2SI64rm: RSDI<0x2C, MRMSrcMem, (ops GR64:$dst, f64mem:$src),
+ "cvttsd2si{q} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
+def Int_CVTTSD2SI64rr: RSDI<0x2C, MRMSrcReg, (ops GR64:$dst, VR128:$src),
+ "cvttsd2si{q} {$src, $dst|$dst, $src}",
+ []>; // TODO: add intrinsic
+def Int_CVTTSD2SI64rm: RSDI<0x2C, MRMSrcMem, (ops GR64:$dst, f128mem:$src),
+ "cvttsd2si{q} {$src, $dst|$dst, $src}",
+ []>; // TODO: add intrinsic
+
+// Signed i64 -> f64
+def CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg, (ops FR64:$dst, GR64:$src),
+ "cvtsi2sd{q} {$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (sint_to_fp GR64:$src))]>;
+def CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem, (ops FR64:$dst, i64mem:$src),
+ "cvtsi2sd{q} {$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (sint_to_fp (loadi64 addr:$src)))]>;
+let isTwoAddress = 1 in {
+def Int_CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, GR64:$src2),
+ "cvtsi2sd{q} {$src2, $dst|$dst, $src2}",
+ []>; // TODO: add intrinsic
+def Int_CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, i64mem:$src2),
+ "cvtsi2sd{q} {$src2, $dst|$dst, $src2}",
+ []>; // TODO: add intrinsic
+} // isTwoAddress
+
+// Signed i64 -> f32
+def CVTSI2SS64rr: RSSI<0x2A, MRMSrcReg, (ops FR32:$dst, GR64:$src),
+ "cvtsi2ss{q} {$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (sint_to_fp GR64:$src))]>;
+def CVTSI2SS64rm: RSSI<0x2A, MRMSrcMem, (ops FR32:$dst, i64mem:$src),
+ "cvtsi2ss{q} {$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (sint_to_fp (loadi64 addr:$src)))]>;
+let isTwoAddress = 1 in {
+def Int_CVTSI2SS64rr: RSSI<0x2A, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, GR64:$src2),
+ "cvtsi2ss{q} {$src2, $dst|$dst, $src2}",
+ []>; // TODO: add intrinsic
+def Int_CVTSI2SS64rm: RSSI<0x2A, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, i64mem:$src2),
+ "cvtsi2ss{q} {$src2, $dst|$dst, $src2}",
+ []>; // TODO: add intrinsic
+} // isTwoAddress
+
+// f32 -> signed i64
+def Int_CVTSS2SI64rr: RSSI<0x2D, MRMSrcReg, (ops GR64:$dst, VR128:$src),
+ "cvtss2si{q} {$src, $dst|$dst, $src}",
+ []>; // TODO: add intrinsic
+def Int_CVTSS2SI64rm: RSSI<0x2D, MRMSrcMem, (ops GR64:$dst, f32mem:$src),
+ "cvtss2si{q} {$src, $dst|$dst, $src}",
+ []>; // TODO: add intrinsic
+def CVTTSS2SI64rr: RSSI<0x2C, MRMSrcReg, (ops GR64:$dst, FR32:$src),
+ "cvttss2si{q} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (fp_to_sint FR32:$src))]>;
+def CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (ops GR64:$dst, f32mem:$src),
+ "cvttss2si{q} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
+def Int_CVTTSS2SI64rr: RSSI<0x2C, MRMSrcReg, (ops GR64:$dst, VR128:$src),
+ "cvttss2si{q} {$src, $dst|$dst, $src}",
+ []>; // TODO: add intrinsic
+def Int_CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (ops GR64:$dst, f32mem:$src),
+ "cvttss2si{q} {$src, $dst|$dst, $src}",
+ []>; // TODO: add intrinsic
+
+//===----------------------------------------------------------------------===//
+// Alias Instructions
+//===----------------------------------------------------------------------===//
+
+// Truncate
+// In 64-mode, each 64-bit and 32-bit registers has a low 8-bit sub-register.
+def TRUNC_64to8 : I<0x88, MRMDestReg, (ops GR8:$dst, GR64:$src),
+ "mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}",
+ [(set GR8:$dst, (trunc GR64:$src))]>;
+def TRUNC_32to8 : I<0x88, MRMDestReg, (ops GR8:$dst, GR32:$src),
+ "mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}",
+ [(set GR8:$dst, (trunc GR32:$src))]>,
+ Requires<[In64BitMode]>;
+def TRUNC_16to8 : I<0x88, MRMDestReg, (ops GR8:$dst, GR16:$src),
+ "mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}}",
+ [(set GR8:$dst, (trunc GR16:$src))]>,
+ Requires<[In64BitMode]>;
+
+def TRUNC_64to16 : I<0x89, MRMDestReg, (ops GR16:$dst, GR64:$src),
+ "mov{w} {${src:subreg16}, $dst|$dst, ${src:subreg16}}",
+ [(set GR16:$dst, (trunc GR64:$src))]>;
+
+def TRUNC_64to32 : I<0x89, MRMDestReg, (ops GR32:$dst, GR64:$src),
+ "mov{l} {${src:subreg32}, $dst|$dst, ${src:subreg32}}",
+ [(set GR32:$dst, (trunc GR64:$src))]>;
+
+// Zero-extension
+// TODO: Remove this after proper i32 -> i64 zext support.
+def PsMOVZX64rr32: I<0x89, MRMDestReg, (ops GR64:$dst, GR32:$src),
+ "mov{l} {$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+ [(set GR64:$dst, (zext GR32:$src))]>;
+def PsMOVZX64rm32: I<0x8B, MRMSrcMem, (ops GR64:$dst, i32mem:$src),
+ "mov{l} {$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+ [(set GR64:$dst, (zextloadi64i32 addr:$src))]>;
+
+
+// Alias instructions that map movr0 to xor.
+// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
+// FIXME: AddedComplexity gives MOV64r0 a higher priority than MOV64ri32. Remove
+// when we have a better way to specify isel priority.
+let AddedComplexity = 1 in
+def MOV64r0 : RI<0x31, MRMInitReg, (ops GR64:$dst),
+ "xor{q} $dst, $dst",
+ [(set GR64:$dst, 0)]>;
+
+// Materialize i64 constant where top 32-bits are zero.
+let AddedComplexity = 1 in
+def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (ops GR64:$dst, i64i32imm:$src),
+ "mov{l} {$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+ [(set GR64:$dst, i64immZExt32:$src)]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
+def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
+ (MOV64ri tconstpool :$dst)>, Requires<[NotSmallCode]>;
+def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
+ (MOV64ri tjumptable :$dst)>, Requires<[NotSmallCode]>;
+def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
+ (MOV64ri tglobaladdr :$dst)>, Requires<[NotSmallCode]>;
+def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
+ (MOV64ri texternalsym:$dst)>, Requires<[NotSmallCode]>;
+
+def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, tconstpool:$src)>,
+ Requires<[SmallCode, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, tjumptable:$src)>,
+ Requires<[SmallCode, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, tglobaladdr:$src)>,
+ Requires<[SmallCode, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, texternalsym:$src)>,
+ Requires<[SmallCode, IsStatic]>;
+
+// Calls
+// Direct PC relative function call for small code model. 32-bit displacement
+// sign extended to 64-bit.
+def : Pat<(X86call (i64 tglobaladdr:$dst)),
+ (CALL64pcrel32 tglobaladdr:$dst)>;
+def : Pat<(X86call (i64 texternalsym:$dst)),
+ (CALL64pcrel32 texternalsym:$dst)>;
+
+def : Pat<(X86tailcall (i64 tglobaladdr:$dst)),
+ (CALL64pcrel32 tglobaladdr:$dst)>;
+def : Pat<(X86tailcall (i64 texternalsym:$dst)),
+ (CALL64pcrel32 texternalsym:$dst)>;
+
+def : Pat<(X86tailcall GR64:$dst),
+ (CALL64r GR64:$dst)>;
+
+// {s|z}extload bool -> {s|z}extload byte
+def : Pat<(sextloadi64i1 addr:$src), (MOVSX64rm8 addr:$src)>;
+def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
+
+// extload
+def : Pat<(extloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
+def : Pat<(extloadi64i8 addr:$src), (MOVZX64rm8 addr:$src)>;
+def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>;
+def : Pat<(extloadi64i32 addr:$src), (PsMOVZX64rm32 addr:$src)>;
+
+// anyext -> zext
+def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8 GR8 :$src)>;
+def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16:$src)>;
+def : Pat<(i64 (anyext GR32:$src)), (PsMOVZX64rr32 GR32:$src)>;
+def : Pat<(i64 (anyext (loadi8 addr:$src))), (MOVZX64rm8 addr:$src)>;
+def : Pat<(i64 (anyext (loadi16 addr:$src))), (MOVZX64rm16 addr:$src)>;
+def : Pat<(i64 (anyext (loadi32 addr:$src))), (PsMOVZX64rm32 addr:$src)>;
+
+//===----------------------------------------------------------------------===//
+// Some peepholes
+//===----------------------------------------------------------------------===//
+
+// (shl x, 1) ==> (add x, x)
+def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
+
+// (or (x >> c) | (y << (64 - c))) ==> (shrd64 x, y, c)
+def : Pat<(or (srl GR64:$src1, CL:$amt),
+ (shl GR64:$src2, (sub 64, CL:$amt))),
+ (SHRD64rrCL GR64:$src1, GR64:$src2)>;
+
+def : Pat<(store (or (srl (loadi64 addr:$dst), CL:$amt),
+ (shl GR64:$src2, (sub 64, CL:$amt))), addr:$dst),
+ (SHRD64mrCL addr:$dst, GR64:$src2)>;
+
+// (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
+def : Pat<(or (shl GR64:$src1, CL:$amt),
+ (srl GR64:$src2, (sub 64, CL:$amt))),
+ (SHLD64rrCL GR64:$src1, GR64:$src2)>;
+
+def : Pat<(store (or (shl (loadi64 addr:$dst), CL:$amt),
+ (srl GR64:$src2, (sub 64, CL:$amt))), addr:$dst),
+ (SHLD64mrCL addr:$dst, GR64:$src2)>;
+
+// X86 specific add which produces a flag.
+def : Pat<(addc GR64:$src1, GR64:$src2),
+ (ADD64rr GR64:$src1, GR64:$src2)>;
+def : Pat<(addc GR64:$src1, (load addr:$src2)),
+ (ADD64rm GR64:$src1, addr:$src2)>;
+def : Pat<(addc GR64:$src1, i64immSExt32:$src2),
+ (ADD64ri32 GR64:$src1, imm:$src2)>;
+def : Pat<(addc GR64:$src1, i64immSExt8:$src2),
+ (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
+
+def : Pat<(subc GR64:$src1, GR64:$src2),
+ (SUB64rr GR64:$src1, GR64:$src2)>;
+def : Pat<(subc GR64:$src1, (load addr:$src2)),
+ (SUB64rm GR64:$src1, addr:$src2)>;
+def : Pat<(subc GR64:$src1, imm:$src2),
+ (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
+def : Pat<(subc GR64:$src1, i64immSExt8:$src2),
+ (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
+
+
+//===----------------------------------------------------------------------===//
+// X86-64 SSE Instructions
+//===----------------------------------------------------------------------===//
+
+// Move instructions...
+
+def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (ops VR128:$dst, GR64:$src),
+ "mov{d|q} {$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector GR64:$src)))]>;
+def MOV64toPQIrm : RPDI<0x6E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
+ "mov{d|q} {$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>;
+
+def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (ops GR64:$dst, VR128:$src),
+ "mov{d|q} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
+ (iPTR 0)))]>;
+def MOVPQIto64mr : RPDI<0x7E, MRMDestMem, (ops i64mem:$dst, VR128:$src),
+ "mov{d|q} {$src, $dst|$dst, $src}",
+ [(store (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))), addr:$dst)]>;
+
+def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (ops FR64:$dst, GR64:$src),
+ "mov{d|q} {$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert GR64:$src))]>;
+def MOV64toSDrm : RPDI<0x6E, MRMSrcMem, (ops FR64:$dst, i64mem:$src),
+ "mov{d|q} {$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>;
+
+def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (ops GR64:$dst, FR64:$src),
+ "mov{d|q} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (bitconvert FR64:$src))]>;
+def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (ops i64mem:$dst, FR64:$src),
+ "mov{d|q} {$src, $dst|$dst, $src}",
+ [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
diff --git a/lib/Target/X86/X86IntelAsmPrinter.cpp b/lib/Target/X86/X86IntelAsmPrinter.cpp
new file mode 100755
index 0000000..39b65ee
--- /dev/null
+++ b/lib/Target/X86/X86IntelAsmPrinter.cpp
@@ -0,0 +1,533 @@
+//===-- X86IntelAsmPrinter.cpp - Convert X86 LLVM code to Intel assembly --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to Intel format assembly language.
+// This printer is the output mechanism used by `llc'.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "X86IntelAsmPrinter.h"
+#include "X86TargetAsmInfo.h"
+#include "X86.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+std::string X86IntelAsmPrinter::getSectionForFunction(const Function &F) const {
+ // Intel asm always emits functions to _text.
+ return "_text";
+}
+
+/// runOnMachineFunction - This uses the printMachineInstruction()
+/// method to print assembly for each instruction.
+///
+bool X86IntelAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ SetupMachineFunction(MF);
+ O << "\n\n";
+
+ // Print out constants referenced by the function
+ EmitConstantPool(MF.getConstantPool());
+
+ // Print out labels for the function.
+ const Function *F = MF.getFunction();
+ unsigned CC = F->getCallingConv();
+
+ // Populate function information map. Actually, We don't want to populate
+ // non-stdcall or non-fastcall functions' information right now.
+ if (CC == CallingConv::X86_StdCall || CC == CallingConv::X86_FastCall)
+ FunctionInfoMap[F] = *MF.getInfo<X86MachineFunctionInfo>();
+
+ X86SharedAsmPrinter::decorateName(CurrentFnName, F);
+
+ SwitchToTextSection(getSectionForFunction(*F).c_str(), F);
+
+ switch (F->getLinkage()) {
+ default: assert(0 && "Unsupported linkage type!");
+ case Function::InternalLinkage:
+ EmitAlignment(4);
+ break;
+ case Function::DLLExportLinkage:
+ DLLExportedFns.insert(CurrentFnName);
+ //FALLS THROUGH
+ case Function::ExternalLinkage:
+ O << "\tpublic " << CurrentFnName << "\n";
+ EmitAlignment(4);
+ break;
+ }
+
+ O << CurrentFnName << "\tproc near\n";
+
+ // Print out code for the function.
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ // Print a label for the basic block if there are any predecessors.
+ if (I->pred_begin() != I->pred_end()) {
+ printBasicBlockLabel(I, true);
+ O << '\n';
+ }
+ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+ II != E; ++II) {
+ // Print the assembly for the instruction.
+ O << "\t";
+ printMachineInstruction(II);
+ }
+ }
+
+ // Print out jump tables referenced by the function.
+ EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+
+ O << CurrentFnName << "\tendp\n";
+
+ // We didn't modify anything.
+ return false;
+}
+
+void X86IntelAsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op) {
+ unsigned char value = MI->getOperand(Op).getImmedValue();
+ assert(value <= 7 && "Invalid ssecc argument!");
+ switch (value) {
+ case 0: O << "eq"; break;
+ case 1: O << "lt"; break;
+ case 2: O << "le"; break;
+ case 3: O << "unord"; break;
+ case 4: O << "neq"; break;
+ case 5: O << "nlt"; break;
+ case 6: O << "nle"; break;
+ case 7: O << "ord"; break;
+ }
+}
+
+void X86IntelAsmPrinter::printOp(const MachineOperand &MO,
+ const char *Modifier) {
+ const MRegisterInfo &RI = *TM.getRegisterInfo();
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register: {
+ if (MRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ unsigned Reg = MO.getReg();
+ if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
+ MVT::ValueType VT = (strcmp(Modifier,"subreg64") == 0) ?
+ MVT::i64 : ((strcmp(Modifier, "subreg32") == 0) ? MVT::i32 :
+ ((strcmp(Modifier,"subreg16") == 0) ? MVT::i16 :MVT::i8));
+ Reg = getX86SubSuperRegister(Reg, VT);
+ }
+ O << RI.get(Reg).Name;
+ } else
+ O << "reg" << MO.getReg();
+ return;
+ }
+ case MachineOperand::MO_Immediate:
+ O << MO.getImmedValue();
+ return;
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMachineBasicBlock());
+ return;
+ case MachineOperand::MO_JumpTableIndex: {
+ bool isMemOp = Modifier && !strcmp(Modifier, "mem");
+ if (!isMemOp) O << "OFFSET ";
+ O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << "_" << MO.getJumpTableIndex();
+ return;
+ }
+ case MachineOperand::MO_ConstantPoolIndex: {
+ bool isMemOp = Modifier && !strcmp(Modifier, "mem");
+ if (!isMemOp) O << "OFFSET ";
+ O << "[" << TAI->getPrivateGlobalPrefix() << "CPI"
+ << getFunctionNumber() << "_" << MO.getConstantPoolIndex();
+ int Offset = MO.getOffset();
+ if (Offset > 0)
+ O << " + " << Offset;
+ else if (Offset < 0)
+ O << Offset;
+ O << "]";
+ return;
+ }
+ case MachineOperand::MO_GlobalAddress: {
+ bool isCallOp = Modifier && !strcmp(Modifier, "call");
+ bool isMemOp = Modifier && !strcmp(Modifier, "mem");
+ GlobalValue *GV = MO.getGlobal();
+ std::string Name = Mang->getValueName(GV);
+
+ X86SharedAsmPrinter::decorateName(Name, GV);
+
+ if (!isMemOp && !isCallOp) O << "OFFSET ";
+ if (GV->hasDLLImportLinkage()) {
+ // FIXME: This should be fixed with full support of stdcall & fastcall
+ // CC's
+ O << "__imp_";
+ }
+ O << Name;
+ int Offset = MO.getOffset();
+ if (Offset > 0)
+ O << " + " << Offset;
+ else if (Offset < 0)
+ O << Offset;
+ return;
+ }
+ case MachineOperand::MO_ExternalSymbol: {
+ bool isCallOp = Modifier && !strcmp(Modifier, "call");
+ if (!isCallOp) O << "OFFSET ";
+ O << TAI->getGlobalPrefix() << MO.getSymbolName();
+ return;
+ }
+ default:
+ O << "<unknown operand type>"; return;
+ }
+}
+
+void X86IntelAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op,
+ const char *Modifier) {
+ assert(isMem(MI, Op) && "Invalid memory reference!");
+
+ const MachineOperand &BaseReg = MI->getOperand(Op);
+ int ScaleVal = MI->getOperand(Op+1).getImmedValue();
+ const MachineOperand &IndexReg = MI->getOperand(Op+2);
+ const MachineOperand &DispSpec = MI->getOperand(Op+3);
+
+ O << "[";
+ bool NeedPlus = false;
+ if (BaseReg.getReg()) {
+ printOp(BaseReg, Modifier);
+ NeedPlus = true;
+ }
+
+ if (IndexReg.getReg()) {
+ if (NeedPlus) O << " + ";
+ if (ScaleVal != 1)
+ O << ScaleVal << "*";
+ printOp(IndexReg, Modifier);
+ NeedPlus = true;
+ }
+
+ if (DispSpec.isGlobalAddress() || DispSpec.isConstantPoolIndex() ||
+ DispSpec.isJumpTableIndex()) {
+ if (NeedPlus)
+ O << " + ";
+ printOp(DispSpec, "mem");
+ } else {
+ int DispVal = DispSpec.getImmedValue();
+ if (DispVal || (!BaseReg.getReg() && !IndexReg.getReg())) {
+ if (NeedPlus)
+ if (DispVal > 0)
+ O << " + ";
+ else {
+ O << " - ";
+ DispVal = -DispVal;
+ }
+ O << DispVal;
+ }
+ }
+ O << "]";
+}
+
+void X86IntelAsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op) {
+ O << "\"L" << getFunctionNumber() << "$pb\"\n";
+ O << "\"L" << getFunctionNumber() << "$pb\":";
+}
+
+bool X86IntelAsmPrinter::printAsmMRegister(const MachineOperand &MO,
+ const char Mode) {
+ const MRegisterInfo &RI = *TM.getRegisterInfo();
+ unsigned Reg = MO.getReg();
+ switch (Mode) {
+ default: return true; // Unknown mode.
+ case 'b': // Print QImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i8);
+ break;
+ case 'h': // Print QImode high register
+ Reg = getX86SubSuperRegister(Reg, MVT::i8, true);
+ break;
+ case 'w': // Print HImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i16);
+ break;
+ case 'k': // Print SImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i32);
+ break;
+ }
+
+ O << '%' << RI.get(Reg).Name;
+ return false;
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool X86IntelAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'b': // Print QImode register
+ case 'h': // Print QImode high register
+ case 'w': // Print HImode register
+ case 'k': // Print SImode register
+ return printAsmMRegister(MI->getOperand(OpNo), ExtraCode[0]);
+ }
+ }
+
+ printOperand(MI, OpNo);
+ return false;
+}
+
+bool X86IntelAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+ printMemReference(MI, OpNo);
+ return false;
+}
+
+/// printMachineInstruction -- Print out a single X86 LLVM instruction
+/// MI in Intel syntax to the current output stream.
+///
+void X86IntelAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
+ ++EmittedInsts;
+
+ // See if a truncate instruction can be turned into a nop.
+ switch (MI->getOpcode()) {
+ default: break;
+ case X86::TRUNC_64to32:
+ case X86::TRUNC_64to16:
+ case X86::TRUNC_32to16:
+ case X86::TRUNC_32to8:
+ case X86::TRUNC_16to8:
+ case X86::TRUNC_32_to8:
+ case X86::TRUNC_16_to8: {
+ const MachineOperand &MO0 = MI->getOperand(0);
+ const MachineOperand &MO1 = MI->getOperand(1);
+ unsigned Reg0 = MO0.getReg();
+ unsigned Reg1 = MO1.getReg();
+ unsigned Opc = MI->getOpcode();
+ if (Opc == X86::TRUNC_64to32)
+ Reg1 = getX86SubSuperRegister(Reg1, MVT::i32);
+ else if (Opc == X86::TRUNC_32to16 || Opc == X86::TRUNC_64to16)
+ Reg1 = getX86SubSuperRegister(Reg1, MVT::i16);
+ else
+ Reg1 = getX86SubSuperRegister(Reg1, MVT::i8);
+ O << TAI->getCommentString() << " TRUNCATE ";
+ if (Reg0 != Reg1)
+ O << "\n\t";
+ break;
+ }
+ case X86::PsMOVZX64rr32:
+ O << TAI->getCommentString() << " ZERO-EXTEND " << "\n\t";
+ break;
+ }
+
+ // Call the autogenerated instruction printer routines.
+ printInstruction(MI);
+}
+
+bool X86IntelAsmPrinter::doInitialization(Module &M) {
+ X86SharedAsmPrinter::doInitialization(M);
+
+ Mang->markCharUnacceptable('.');
+
+ O << "\t.686\n\t.model flat\n\n";
+
+ // Emit declarations for external functions.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (I->isDeclaration()) {
+ std::string Name = Mang->getValueName(I);
+ X86SharedAsmPrinter::decorateName(Name, I);
+
+ O << "\textern " ;
+ if (I->hasDLLImportLinkage()) {
+ O << "__imp_";
+ }
+ O << Name << ":near\n";
+ }
+
+ // Emit declarations for external globals. Note that VC++ always declares
+ // external globals to have type byte, and if that's good enough for VC++...
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ if (I->isDeclaration()) {
+ std::string Name = Mang->getValueName(I);
+
+ O << "\textern " ;
+ if (I->hasDLLImportLinkage()) {
+ O << "__imp_";
+ }
+ O << Name << ":byte\n";
+ }
+ }
+
+ return false;
+}
+
+bool X86IntelAsmPrinter::doFinalization(Module &M) {
+ const TargetData *TD = TM.getTargetData();
+
+ // Print out module-level global variables here.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ if (I->isDeclaration()) continue; // External global require no code
+
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(I))
+ continue;
+
+ std::string name = Mang->getValueName(I);
+ Constant *C = I->getInitializer();
+ unsigned Align = TD->getPreferredAlignmentLog(I);
+ bool bCustomSegment = false;
+
+ switch (I->getLinkage()) {
+ case GlobalValue::LinkOnceLinkage:
+ case GlobalValue::WeakLinkage:
+ SwitchToDataSection("");
+ O << name << "?\tsegment common 'COMMON'\n";
+ bCustomSegment = true;
+ // FIXME: the default alignment is 16 bytes, but 1, 2, 4, and 256
+ // are also available.
+ break;
+ case GlobalValue::AppendingLinkage:
+ SwitchToDataSection("");
+ O << name << "?\tsegment public 'DATA'\n";
+ bCustomSegment = true;
+ // FIXME: the default alignment is 16 bytes, but 1, 2, 4, and 256
+ // are also available.
+ break;
+ case GlobalValue::DLLExportLinkage:
+ DLLExportedGVs.insert(name);
+ // FALL THROUGH
+ case GlobalValue::ExternalLinkage:
+ O << "\tpublic " << name << "\n";
+ // FALL THROUGH
+ case GlobalValue::InternalLinkage:
+ SwitchToDataSection(TAI->getDataSection(), I);
+ break;
+ default:
+ assert(0 && "Unknown linkage type!");
+ }
+
+ if (!bCustomSegment)
+ EmitAlignment(Align, I);
+
+ O << name << ":\t\t\t\t" << TAI->getCommentString()
+ << " " << I->getName() << '\n';
+
+ EmitGlobalConstant(C);
+
+ if (bCustomSegment)
+ O << name << "?\tends\n";
+ }
+
+ // Output linker support code for dllexported globals
+ if ((DLLExportedGVs.begin() != DLLExportedGVs.end()) ||
+ (DLLExportedFns.begin() != DLLExportedFns.end())) {
+ SwitchToDataSection("");
+ O << "; WARNING: The following code is valid only with MASM v8.x and (possible) higher\n"
+ << "; This version of MASM is usually shipped with Microsoft Visual Studio 2005\n"
+ << "; or (possible) further versions. Unfortunately, there is no way to support\n"
+ << "; dllexported symbols in the earlier versions of MASM in fully automatic way\n\n";
+ O << "_drectve\t segment info alias('.drectve')\n";
+ }
+
+ for (std::set<std::string>::iterator i = DLLExportedGVs.begin(),
+ e = DLLExportedGVs.end();
+ i != e; ++i) {
+ O << "\t db ' /EXPORT:" << *i << ",data'\n";
+ }
+
+ for (std::set<std::string>::iterator i = DLLExportedFns.begin(),
+ e = DLLExportedFns.end();
+ i != e; ++i) {
+ O << "\t db ' /EXPORT:" << *i << "'\n";
+ }
+
+ if ((DLLExportedGVs.begin() != DLLExportedGVs.end()) ||
+ (DLLExportedFns.begin() != DLLExportedFns.end())) {
+ O << "_drectve\t ends\n";
+ }
+
+ // Bypass X86SharedAsmPrinter::doFinalization().
+ AsmPrinter::doFinalization(M);
+ SwitchToDataSection("");
+ O << "\tend\n";
+ return false; // success
+}
+
+void X86IntelAsmPrinter::EmitString(const ConstantArray *CVA) const {
+ unsigned NumElts = CVA->getNumOperands();
+ if (NumElts) {
+ // ML does not have escape sequences except '' for '. It also has a maximum
+ // string length of 255.
+ unsigned len = 0;
+ bool inString = false;
+ for (unsigned i = 0; i < NumElts; i++) {
+ int n = cast<ConstantInt>(CVA->getOperand(i))->getZExtValue() & 255;
+ if (len == 0)
+ O << "\tdb ";
+
+ if (n >= 32 && n <= 127) {
+ if (!inString) {
+ if (len > 0) {
+ O << ",'";
+ len += 2;
+ } else {
+ O << "'";
+ len++;
+ }
+ inString = true;
+ }
+ if (n == '\'') {
+ O << "'";
+ len++;
+ }
+ O << char(n);
+ } else {
+ if (inString) {
+ O << "'";
+ len++;
+ inString = false;
+ }
+ if (len > 0) {
+ O << ",";
+ len++;
+ }
+ O << n;
+ len += 1 + (n > 9) + (n > 99);
+ }
+
+ if (len > 60) {
+ if (inString) {
+ O << "'";
+ inString = false;
+ }
+ O << "\n";
+ len = 0;
+ }
+ }
+
+ if (len > 0) {
+ if (inString)
+ O << "'";
+ O << "\n";
+ }
+ }
+}
+
+// Include the auto-generated portion of the assembly writer.
+#include "X86GenAsmWriter1.inc"
diff --git a/lib/Target/X86/X86IntelAsmPrinter.h b/lib/Target/X86/X86IntelAsmPrinter.h
new file mode 100755
index 0000000..9ad11ff
--- /dev/null
+++ b/lib/Target/X86/X86IntelAsmPrinter.h
@@ -0,0 +1,112 @@
+//===-- X86IntelAsmPrinter.h - Convert X86 LLVM code to Intel assembly ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Intel assembly code printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86INTELASMPRINTER_H
+#define X86INTELASMPRINTER_H
+
+#include "X86AsmPrinter.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Target/MRegisterInfo.h"
+
+namespace llvm {
+
+struct X86IntelAsmPrinter : public X86SharedAsmPrinter {
+ X86IntelAsmPrinter(std::ostream &O, X86TargetMachine &TM,
+ const TargetAsmInfo *T)
+ : X86SharedAsmPrinter(O, TM, T) {
+ }
+
+ virtual const char *getPassName() const {
+ return "X86 Intel-Style Assembly Printer";
+ }
+
+ /// printInstruction - This method is automatically generated by tablegen
+ /// from the instruction set description. This method returns true if the
+ /// machine instruction was sufficiently described to print it, otherwise it
+ /// returns false.
+ bool printInstruction(const MachineInstr *MI);
+
+ // This method is used by the tablegen'erated instruction printer.
+ void printOperand(const MachineInstr *MI, unsigned OpNo,
+ const char *Modifier = 0) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ if (MO.isRegister()) {
+ assert(MRegisterInfo::isPhysicalRegister(MO.getReg()) && "Not physreg??");
+ O << TM.getRegisterInfo()->get(MO.getReg()).Name;
+ } else {
+ printOp(MO, Modifier);
+ }
+ }
+
+ void printi8mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "BYTE PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printi16mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "WORD PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printi32mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "DWORD PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printi64mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "QWORD PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printi128mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "XMMWORD PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printf32mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "DWORD PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printf64mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "QWORD PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printf128mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "XMMWORD PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printlea64_32mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "QWORD PTR ";
+ printMemReference(MI, OpNo, "subreg64");
+ }
+
+ bool printAsmMRegister(const MachineOperand &MO, const char Mode);
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+ void printMachineInstruction(const MachineInstr *MI);
+ void printOp(const MachineOperand &MO, const char *Modifier = 0);
+ void printSSECC(const MachineInstr *MI, unsigned Op);
+ void printMemReference(const MachineInstr *MI, unsigned Op,
+ const char *Modifier=NULL);
+ void printPICLabel(const MachineInstr *MI, unsigned Op);
+ bool runOnMachineFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+
+ /// getSectionForFunction - Return the section that we should emit the
+ /// specified function body into.
+ virtual std::string getSectionForFunction(const Function &F) const;
+
+ virtual void EmitString(const ConstantArray *CVA) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
new file mode 100644
index 0000000..b9e5d5b
--- /dev/null
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -0,0 +1,372 @@
+//===-- X86JITInfo.cpp - Implement the JIT interfaces for the X86 target --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the X86 target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "X86JITInfo.h"
+#include "X86Relocations.h"
+#include "X86Subtarget.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/Config/alloca.h"
+#include <cstdlib>
+using namespace llvm;
+
+#ifdef _MSC_VER
+ extern "C" void *_AddressOfReturnAddress(void);
+ #pragma intrinsic(_AddressOfReturnAddress)
+#endif
+
+void X86JITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+ unsigned char *OldByte = (unsigned char *)Old;
+ *OldByte++ = 0xE9; // Emit JMP opcode.
+ unsigned *OldWord = (unsigned *)OldByte;
+ unsigned NewAddr = (intptr_t)New;
+ unsigned OldAddr = (intptr_t)OldWord;
+ *OldWord = NewAddr - OldAddr - 4; // Emit PC-relative addr of New code.
+}
+
+
+/// JITCompilerFunction - This contains the address of the JIT function used to
+/// compile a function lazily.
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+// Get the ASMPREFIX for the current host. This is often '_'.
+#ifndef __USER_LABEL_PREFIX__
+#define __USER_LABEL_PREFIX__
+#endif
+#define GETASMPREFIX2(X) #X
+#define GETASMPREFIX(X) GETASMPREFIX2(X)
+#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
+
+// Provide a wrapper for X86CompilationCallback2 that saves non-traditional
+// callee saved registers, for the fastcc calling convention.
+extern "C" {
+#if defined(__x86_64__)
+ // No need to save EAX/EDX for X86-64.
+ void X86CompilationCallback(void);
+ asm(
+ ".text\n"
+ ".align 8\n"
+ ".globl " ASMPREFIX "X86CompilationCallback\n"
+ ASMPREFIX "X86CompilationCallback:\n"
+ // Save RBP
+ "pushq %rbp\n"
+ // Save RSP
+ "movq %rsp, %rbp\n"
+ // Save all int arg registers
+ "pushq %rdi\n"
+ "pushq %rsi\n"
+ "pushq %rdx\n"
+ "pushq %rcx\n"
+ "pushq %r8\n"
+ "pushq %r9\n"
+ // Align stack on 16-byte boundary. ESP might not be properly aligned
+ // (8 byte) if this is called from an indirect stub.
+ "andq $-16, %rsp\n"
+ // Save all XMM arg registers
+ "subq $128, %rsp\n"
+ "movaps %xmm0, (%rsp)\n"
+ "movaps %xmm1, 16(%rsp)\n"
+ "movaps %xmm2, 32(%rsp)\n"
+ "movaps %xmm3, 48(%rsp)\n"
+ "movaps %xmm4, 64(%rsp)\n"
+ "movaps %xmm5, 80(%rsp)\n"
+ "movaps %xmm6, 96(%rsp)\n"
+ "movaps %xmm7, 112(%rsp)\n"
+ // JIT callee
+ "movq %rbp, %rdi\n" // Pass prev frame and return address
+ "movq 8(%rbp), %rsi\n"
+ "call " ASMPREFIX "X86CompilationCallback2\n"
+ // Restore all XMM arg registers
+ "movaps 112(%rsp), %xmm7\n"
+ "movaps 96(%rsp), %xmm6\n"
+ "movaps 80(%rsp), %xmm5\n"
+ "movaps 64(%rsp), %xmm4\n"
+ "movaps 48(%rsp), %xmm3\n"
+ "movaps 32(%rsp), %xmm2\n"
+ "movaps 16(%rsp), %xmm1\n"
+ "movaps (%rsp), %xmm0\n"
+ // Restore RSP
+ "movq %rbp, %rsp\n"
+ // Restore all int arg registers
+ "subq $48, %rsp\n"
+ "popq %r9\n"
+ "popq %r8\n"
+ "popq %rcx\n"
+ "popq %rdx\n"
+ "popq %rsi\n"
+ "popq %rdi\n"
+ // Restore RBP
+ "popq %rbp\n"
+ "ret\n");
+#elif defined(__i386__) || defined(i386) || defined(_M_IX86)
+#ifndef _MSC_VER
+ void X86CompilationCallback(void);
+ asm(
+ ".text\n"
+ ".align 8\n"
+ ".globl " ASMPREFIX "X86CompilationCallback\n"
+ ASMPREFIX "X86CompilationCallback:\n"
+ "pushl %ebp\n"
+ "movl %esp, %ebp\n" // Standard prologue
+ "pushl %eax\n"
+ "pushl %edx\n" // Save EAX/EDX/ECX
+ "pushl %ecx\n"
+#if defined(__APPLE__)
+ "andl $-16, %esp\n" // Align ESP on 16-byte boundary
+#endif
+ "subl $16, %esp\n"
+ "movl 4(%ebp), %eax\n" // Pass prev frame and return address
+ "movl %eax, 4(%esp)\n"
+ "movl %ebp, (%esp)\n"
+ "call " ASMPREFIX "X86CompilationCallback2\n"
+ "movl %ebp, %esp\n" // Restore ESP
+ "subl $12, %esp\n"
+ "popl %ecx\n"
+ "popl %edx\n"
+ "popl %eax\n"
+ "popl %ebp\n"
+ "ret\n");
+
+ // Same as X86CompilationCallback but also saves XMM argument registers.
+ void X86CompilationCallback_SSE(void);
+ asm(
+ ".text\n"
+ ".align 8\n"
+ ".globl " ASMPREFIX "X86CompilationCallback_SSE\n"
+ ASMPREFIX "X86CompilationCallback_SSE:\n"
+ "pushl %ebp\n"
+ "movl %esp, %ebp\n" // Standard prologue
+ "pushl %eax\n"
+ "pushl %edx\n" // Save EAX/EDX/ECX
+ "pushl %ecx\n"
+ "andl $-16, %esp\n" // Align ESP on 16-byte boundary
+ // Save all XMM arg registers
+ "subl $64, %esp\n"
+ "movaps %xmm0, (%esp)\n"
+ "movaps %xmm1, 16(%esp)\n"
+ "movaps %xmm2, 32(%esp)\n"
+ "movaps %xmm3, 48(%esp)\n"
+ "subl $16, %esp\n"
+ "movl 4(%ebp), %eax\n" // Pass prev frame and return address
+ "movl %eax, 4(%esp)\n"
+ "movl %ebp, (%esp)\n"
+ "call " ASMPREFIX "X86CompilationCallback2\n"
+ "addl $16, %esp\n"
+ "movaps 48(%esp), %xmm3\n"
+ "movaps 32(%esp), %xmm2\n"
+ "movaps 16(%esp), %xmm1\n"
+ "movaps (%esp), %xmm0\n"
+ "movl %ebp, %esp\n" // Restore ESP
+ "subl $12, %esp\n"
+ "popl %ecx\n"
+ "popl %edx\n"
+ "popl %eax\n"
+ "popl %ebp\n"
+ "ret\n");
+#else
+ void X86CompilationCallback2(void);
+
+ _declspec(naked) void X86CompilationCallback(void) {
+ __asm {
+ push eax
+ push edx
+ push ecx
+ call X86CompilationCallback2
+ pop ecx
+ pop edx
+ pop eax
+ ret
+ }
+ }
+#endif // _MSC_VER
+
+#else // Not an i386 host
+ void X86CompilationCallback() {
+ assert(0 && "Cannot call X86CompilationCallback() on a non-x86 arch!\n");
+ abort();
+ }
+#endif
+}
+
+/// X86CompilationCallback - This is the target-specific function invoked by the
+/// function stub when we did not know the real target of a call. This function
+/// must locate the start of the stub or call site and pass it into the JIT
+/// compiler function.
+#ifdef _MSC_VER
+extern "C" void X86CompilationCallback2() {
+ assert(sizeof(size_t) == 4); // FIXME: handle Win64
+ intptr_t *RetAddrLoc = (intptr_t *)_AddressOfReturnAddress();
+ RetAddrLoc += 4; // skip over ret addr, edx, eax, ecx
+ intptr_t RetAddr = *RetAddrLoc;
+#else
+extern "C" void X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
+ intptr_t *RetAddrLoc = &StackPtr[1];
+#endif
+ assert(*RetAddrLoc == RetAddr &&
+ "Could not find return address on the stack!");
+
+ // It's a stub if there is an interrupt marker after the call.
+ bool isStub = ((unsigned char*)RetAddr)[0] == 0xCD;
+
+ // The call instruction should have pushed the return value onto the stack...
+#ifdef __x86_64__
+ RetAddr--; // Backtrack to the reference itself...
+#else
+ RetAddr -= 4; // Backtrack to the reference itself...
+#endif
+
+#if 0
+ DOUT << "In callback! Addr=" << (void*)RetAddr
+ << " ESP=" << (void*)StackPtr
+ << ": Resolving call to function: "
+ << TheVM->getFunctionReferencedName((void*)RetAddr) << "\n";
+#endif
+
+ // Sanity check to make sure this really is a call instruction.
+#ifdef __x86_64__
+ assert(((unsigned char*)RetAddr)[-2] == 0x41 &&"Not a call instr!");
+ assert(((unsigned char*)RetAddr)[-1] == 0xFF &&"Not a call instr!");
+#else
+ assert(((unsigned char*)RetAddr)[-1] == 0xE8 &&"Not a call instr!");
+#endif
+
+ intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)RetAddr);
+
+ // Rewrite the call target... so that we don't end up here every time we
+ // execute the call.
+#ifdef __x86_64__
+ *(intptr_t *)(RetAddr - 0xa) = NewVal;
+#else
+ *(intptr_t *)RetAddr = (intptr_t)(NewVal-RetAddr-4);
+#endif
+
+ if (isStub) {
+ // If this is a stub, rewrite the call into an unconditional branch
+ // instruction so that two return addresses are not pushed onto the stack
+ // when the requested function finally gets called. This also makes the
+ // 0xCD byte (interrupt) dead, so the marker doesn't effect anything.
+#ifdef __x86_64__
+ ((unsigned char*)RetAddr)[0] = (2 | (4 << 3) | (3 << 6));
+#else
+ ((unsigned char*)RetAddr)[-1] = 0xE9;
+#endif
+ }
+
+ // Change the return address to reexecute the call instruction...
+#ifdef __x86_64__
+ *RetAddrLoc -= 0xd;
+#else
+ *RetAddrLoc -= 5;
+#endif
+}
+
+TargetJITInfo::LazyResolverFn
+X86JITInfo::getLazyResolverFunction(JITCompilerFn F) {
+ JITCompilerFunction = F;
+
+#if (defined(__i386__) || defined(i386) || defined(_M_IX86)) && \
+ !defined(_MSC_VER) && !defined(__x86_64__)
+ unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
+ union {
+ unsigned u[3];
+ char c[12];
+ } text;
+
+ if (!X86::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1)) {
+ // FIXME: support for AMD family of processors.
+ if (memcmp(text.c, "GenuineIntel", 12) == 0) {
+ X86::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
+ if ((EDX >> 25) & 0x1)
+ return X86CompilationCallback_SSE;
+ }
+ }
+#endif
+
+ return X86CompilationCallback;
+}
+
+void *X86JITInfo::emitFunctionStub(void *Fn, MachineCodeEmitter &MCE) {
+ // Note, we cast to intptr_t here to silence a -pedantic warning that
+ // complains about casting a function pointer to a normal pointer.
+#if (defined(__i386__) || defined(i386) || defined(_M_IX86)) && \
+ !defined(_MSC_VER) && !defined(__x86_64__)
+ bool NotCC = (Fn != (void*)(intptr_t)X86CompilationCallback &&
+ Fn != (void*)(intptr_t)X86CompilationCallback_SSE);
+#else
+ bool NotCC = Fn != (void*)(intptr_t)X86CompilationCallback;
+#endif
+ if (NotCC) {
+#ifdef __x86_64__
+ MCE.startFunctionStub(13, 4);
+ MCE.emitByte(0x49); // REX prefix
+ MCE.emitByte(0xB8+2); // movabsq r10
+ MCE.emitWordLE(((unsigned *)&Fn)[0]);
+ MCE.emitWordLE(((unsigned *)&Fn)[1]);
+ MCE.emitByte(0x41); // REX prefix
+ MCE.emitByte(0xFF); // jmpq *r10
+ MCE.emitByte(2 | (4 << 3) | (3 << 6));
+#else
+ MCE.startFunctionStub(5, 4);
+ MCE.emitByte(0xE9);
+ MCE.emitWordLE((intptr_t)Fn-MCE.getCurrentPCValue()-4);
+#endif
+ return MCE.finishFunctionStub(0);
+ }
+
+#ifdef __x86_64__
+ MCE.startFunctionStub(14, 4);
+ MCE.emitByte(0x49); // REX prefix
+ MCE.emitByte(0xB8+2); // movabsq r10
+ MCE.emitWordLE(((unsigned *)&Fn)[0]);
+ MCE.emitWordLE(((unsigned *)&Fn)[1]);
+ MCE.emitByte(0x41); // REX prefix
+ MCE.emitByte(0xFF); // callq *r10
+ MCE.emitByte(2 | (2 << 3) | (3 << 6));
+#else
+ MCE.startFunctionStub(6, 4);
+ MCE.emitByte(0xE8); // Call with 32 bit pc-rel destination...
+
+ MCE.emitWordLE((intptr_t)Fn-MCE.getCurrentPCValue()-4);
+#endif
+
+ MCE.emitByte(0xCD); // Interrupt - Just a marker identifying the stub!
+ return MCE.finishFunctionStub(0);
+}
+
+/// relocate - Before the JIT can run a block of code that has been emitted,
+/// it must rewrite the code to contain the actual addresses of any
+/// referenced global symbols.
+void X86JITInfo::relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase) {
+ for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+ void *RelocPos = (char*)Function + MR->getMachineCodeOffset();
+ intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
+ switch ((X86::RelocationType)MR->getRelocationType()) {
+ case X86::reloc_pcrel_word: {
+ // PC relative relocation, add the relocated value to the value already in
+ // memory, after we adjust it for where the PC is.
+ ResultPtr = ResultPtr-(intptr_t)RelocPos-4-MR->getConstantVal();
+ *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+ break;
+ }
+ case X86::reloc_absolute_word:
+ // Absolute relocation, just add the relocated value to the value already
+ // in memory.
+ *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+ break;
+ case X86::reloc_absolute_dword:
+ *((intptr_t*)RelocPos) += ResultPtr;
+ break;
+ }
+ }
+}
diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h
new file mode 100644
index 0000000..a4c731a
--- /dev/null
+++ b/lib/Target/X86/X86JITInfo.h
@@ -0,0 +1,50 @@
+//===- X86JITInfo.h - X86 implementation of the JIT interface --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86JITINFO_H
+#define X86JITINFO_H
+
+#include "llvm/Target/TargetJITInfo.h"
+
+namespace llvm {
+ class X86TargetMachine;
+
+ class X86JITInfo : public TargetJITInfo {
+ X86TargetMachine &TM;
+ public:
+ X86JITInfo(X86TargetMachine &tm) : TM(tm) {useGOT = 0;}
+
+ /// replaceMachineCodeForFunction - Make it so that calling the function
+ /// whose machine code is at OLD turns into a call to NEW, perhaps by
+ /// overwriting OLD with a branch to NEW. This is used for self-modifying
+ /// code.
+ ///
+ virtual void replaceMachineCodeForFunction(void *Old, void *New);
+
+ /// emitFunctionStub - Use the specified MachineCodeEmitter object to emit a
+ /// small native function that simply calls the function at the specified
+ /// address.
+ virtual void *emitFunctionStub(void *Fn, MachineCodeEmitter &MCE);
+
+ /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
+ virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+
+ /// relocate - Before the JIT can run a block of code that has been emitted,
+ /// it must rewrite the code to contain the actual addresses of any
+ /// referenced global symbols.
+ virtual void relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase);
+ };
+}
+
+#endif
diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h
new file mode 100644
index 0000000..7a21fb2
--- /dev/null
+++ b/lib/Target/X86/X86MachineFunctionInfo.h
@@ -0,0 +1,74 @@
+//====- X86MachineFuctionInfo.h - X86 machine function info -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the Evan Cheng and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares X86-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86MACHINEFUNCTIONINFO_H
+#define X86MACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+enum NameDecorationStyle {
+ None,
+ StdCall,
+ FastCall
+};
+
+/// X86MachineFunctionInfo - This class is derived from MachineFunction private
+/// X86 target-specific information for each MachineFunction.
+class X86MachineFunctionInfo : public MachineFunctionInfo {
+ /// ForceFramePointer - True if the function is required to use of frame
+ /// pointer for reasons other than it containing dynamic allocation or
+ /// that FP eliminatation is turned off. For example, Cygwin main function
+ /// contains stack pointer re-alignment code which requires FP.
+ bool ForceFramePointer;
+
+ /// CalleeSavedFrameSize - Size of the callee-saved register portion of the
+ /// stack frame in bytes.
+ unsigned CalleeSavedFrameSize;
+
+ /// BytesToPopOnReturn - amount of bytes function pops on return.
+ /// Used on windows platform for stdcall & fastcall name decoration
+ unsigned BytesToPopOnReturn;
+
+ /// If the function requires additional name decoration, DecorationStyle holds
+ /// the right way to do so.
+ NameDecorationStyle DecorationStyle;
+
+public:
+ X86MachineFunctionInfo() : ForceFramePointer(false),
+ CalleeSavedFrameSize(0),
+ BytesToPopOnReturn(0),
+ DecorationStyle(None) {}
+
+ X86MachineFunctionInfo(MachineFunction &MF) : ForceFramePointer(false),
+ CalleeSavedFrameSize(0),
+ BytesToPopOnReturn(0),
+ DecorationStyle(None) {}
+
+ bool getForceFramePointer() const { return ForceFramePointer;}
+ void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
+
+ unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
+ void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
+
+ unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; }
+ void setBytesToPopOnReturn (unsigned bytes) { BytesToPopOnReturn = bytes;}
+
+ NameDecorationStyle getDecorationStyle() const { return DecorationStyle; }
+ void setDecorationStyle(NameDecorationStyle style) { DecorationStyle = style;}
+
+};
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
new file mode 100644
index 0000000..da65db0
--- /dev/null
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -0,0 +1,1613 @@
+//===- X86RegisterInfo.cpp - X86 Register Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the MRegisterInfo class. This
+// file is responsible for the frame pointer elimination optimization on X86.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86RegisterInfo.h"
+#include "X86InstrBuilder.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+namespace {
+ cl::opt<bool>
+ NoFusing("disable-spill-fusing",
+ cl::desc("Disable fusing of spill code into instructions"));
+ cl::opt<bool>
+ PrintFailedFusing("print-failed-fuse-candidates",
+ cl::desc("Print instructions that the allocator wants to"
+ " fuse, but the X86 backend currently can't"),
+ cl::Hidden);
+}
+
+X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
+ const TargetInstrInfo &tii)
+ : X86GenRegisterInfo(X86::ADJCALLSTACKDOWN, X86::ADJCALLSTACKUP),
+ TM(tm), TII(tii) {
+ // Cache some information.
+ const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+ Is64Bit = Subtarget->is64Bit();
+ if (Is64Bit) {
+ SlotSize = 8;
+ StackPtr = X86::RSP;
+ FramePtr = X86::RBP;
+ } else {
+ SlotSize = 4;
+ StackPtr = X86::ESP;
+ FramePtr = X86::EBP;
+ }
+}
+
+bool X86RegisterInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const {
+ if (CSI.empty())
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ X86FI->setCalleeSavedFrameSize(CSI.size() * SlotSize);
+ unsigned Opc = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB.addLiveIn(Reg);
+ BuildMI(MBB, MI, TII.get(Opc)).addReg(Reg);
+ }
+ return true;
+}
+
+bool X86RegisterInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const {
+ if (CSI.empty())
+ return false;
+
+ unsigned Opc = Is64Bit ? X86::POP64r : X86::POP32r;
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ BuildMI(MBB, MI, TII.get(Opc), Reg);
+ }
+ return true;
+}
+
+void X86RegisterInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, int FrameIdx,
+ const TargetRegisterClass *RC) const {
+ unsigned Opc;
+ if (RC == &X86::GR64RegClass) {
+ Opc = X86::MOV64mr;
+ } else if (RC == &X86::GR32RegClass) {
+ Opc = X86::MOV32mr;
+ } else if (RC == &X86::GR16RegClass) {
+ Opc = X86::MOV16mr;
+ } else if (RC == &X86::GR8RegClass) {
+ Opc = X86::MOV8mr;
+ } else if (RC == &X86::GR32_RegClass) {
+ Opc = X86::MOV32_mr;
+ } else if (RC == &X86::GR16_RegClass) {
+ Opc = X86::MOV16_mr;
+ } else if (RC == &X86::RFP64RegClass || RC == &X86::RSTRegClass) {
+ Opc = X86::ST_Fp64m;
+ } else if (RC == &X86::RFP32RegClass) {
+ Opc = X86::ST_Fp32m;
+ } else if (RC == &X86::FR32RegClass) {
+ Opc = X86::MOVSSmr;
+ } else if (RC == &X86::FR64RegClass) {
+ Opc = X86::MOVSDmr;
+ } else if (RC == &X86::VR128RegClass) {
+ Opc = X86::MOVAPSmr;
+ } else if (RC == &X86::VR64RegClass) {
+ Opc = X86::MMX_MOVQ64mr;
+ } else {
+ assert(0 && "Unknown regclass");
+ abort();
+ }
+ addFrameReference(BuildMI(MBB, MI, TII.get(Opc)), FrameIdx)
+ .addReg(SrcReg, false, false, true);
+}
+
+void X86RegisterInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC) const{
+ unsigned Opc;
+ if (RC == &X86::GR64RegClass) {
+ Opc = X86::MOV64rm;
+ } else if (RC == &X86::GR32RegClass) {
+ Opc = X86::MOV32rm;
+ } else if (RC == &X86::GR16RegClass) {
+ Opc = X86::MOV16rm;
+ } else if (RC == &X86::GR8RegClass) {
+ Opc = X86::MOV8rm;
+ } else if (RC == &X86::GR32_RegClass) {
+ Opc = X86::MOV32_rm;
+ } else if (RC == &X86::GR16_RegClass) {
+ Opc = X86::MOV16_rm;
+ } else if (RC == &X86::RFP64RegClass || RC == &X86::RSTRegClass) {
+ Opc = X86::LD_Fp64m;
+ } else if (RC == &X86::RFP32RegClass) {
+ Opc = X86::LD_Fp32m;
+ } else if (RC == &X86::FR32RegClass) {
+ Opc = X86::MOVSSrm;
+ } else if (RC == &X86::FR64RegClass) {
+ Opc = X86::MOVSDrm;
+ } else if (RC == &X86::VR128RegClass) {
+ Opc = X86::MOVAPSrm;
+ } else if (RC == &X86::VR64RegClass) {
+ Opc = X86::MMX_MOVQ64rm;
+ } else {
+ assert(0 && "Unknown regclass");
+ abort();
+ }
+ addFrameReference(BuildMI(MBB, MI, TII.get(Opc), DestReg), FrameIdx);
+}
+
+void X86RegisterInfo::copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *RC) const {
+ unsigned Opc;
+ if (RC == &X86::GR64RegClass) {
+ Opc = X86::MOV64rr;
+ } else if (RC == &X86::GR32RegClass) {
+ Opc = X86::MOV32rr;
+ } else if (RC == &X86::GR16RegClass) {
+ Opc = X86::MOV16rr;
+ } else if (RC == &X86::GR8RegClass) {
+ Opc = X86::MOV8rr;
+ } else if (RC == &X86::GR32_RegClass) {
+ Opc = X86::MOV32_rr;
+ } else if (RC == &X86::GR16_RegClass) {
+ Opc = X86::MOV16_rr;
+ } else if (RC == &X86::RFP32RegClass) {
+ Opc = X86::MOV_Fp3232;
+ } else if (RC == &X86::RFP64RegClass || RC == &X86::RSTRegClass) {
+ Opc = X86::MOV_Fp6464;
+ } else if (RC == &X86::FR32RegClass) {
+ Opc = X86::FsMOVAPSrr;
+ } else if (RC == &X86::FR64RegClass) {
+ Opc = X86::FsMOVAPDrr;
+ } else if (RC == &X86::VR128RegClass) {
+ Opc = X86::MOVAPSrr;
+ } else if (RC == &X86::VR64RegClass) {
+ Opc = X86::MMX_MOVQ64rr;
+ } else {
+ assert(0 && "Unknown regclass");
+ abort();
+ }
+ BuildMI(MBB, MI, TII.get(Opc), DestReg).addReg(SrcReg);
+}
+
+
+void X86RegisterInfo::reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg,
+ const MachineInstr *Orig) const {
+ MachineInstr *MI = Orig->clone();
+ MI->getOperand(0).setReg(DestReg);
+ MBB.insert(I, MI);
+}
+
+static MachineInstr *FuseTwoAddrInst(unsigned Opcode, unsigned FrameIndex,
+ MachineInstr *MI,
+ const TargetInstrInfo &TII) {
+ unsigned NumOps = TII.getNumOperands(MI->getOpcode())-2;
+ // Create the base instruction with the memory operand as the first part.
+ MachineInstrBuilder MIB = addFrameReference(BuildMI(TII.get(Opcode)),
+ FrameIndex);
+
+ // Loop over the rest of the ri operands, converting them over.
+ for (unsigned i = 0; i != NumOps; ++i) {
+ MachineOperand &MO = MI->getOperand(i+2);
+ if (MO.isReg())
+ MIB = MIB.addReg(MO.getReg(), false, MO.isImplicit());
+ else if (MO.isImm())
+ MIB = MIB.addImm(MO.getImm());
+ else if (MO.isGlobalAddress())
+ MIB = MIB.addGlobalAddress(MO.getGlobal(), MO.getOffset());
+ else if (MO.isJumpTableIndex())
+ MIB = MIB.addJumpTableIndex(MO.getJumpTableIndex());
+ else if (MO.isExternalSymbol())
+ MIB = MIB.addExternalSymbol(MO.getSymbolName());
+ else
+ assert(0 && "Unknown operand type!");
+ }
+ return MIB;
+}
+
+static MachineInstr *FuseInst(unsigned Opcode, unsigned OpNo,
+ unsigned FrameIndex, MachineInstr *MI,
+ const TargetInstrInfo &TII) {
+ MachineInstrBuilder MIB = BuildMI(TII.get(Opcode));
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (i == OpNo) {
+ assert(MO.isReg() && "Expected to fold into reg operand!");
+ MIB = addFrameReference(MIB, FrameIndex);
+ } else if (MO.isReg())
+ MIB = MIB.addReg(MO.getReg(), MO.isDef(), MO.isImplicit());
+ else if (MO.isImm())
+ MIB = MIB.addImm(MO.getImm());
+ else if (MO.isGlobalAddress())
+ MIB = MIB.addGlobalAddress(MO.getGlobal(), MO.getOffset());
+ else if (MO.isJumpTableIndex())
+ MIB = MIB.addJumpTableIndex(MO.getJumpTableIndex());
+ else if (MO.isExternalSymbol())
+ MIB = MIB.addExternalSymbol(MO.getSymbolName());
+ else
+ assert(0 && "Unknown operand for FuseInst!");
+ }
+ return MIB;
+}
+
+static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII,
+ unsigned Opcode, unsigned FrameIndex,
+ MachineInstr *MI) {
+ return addFrameReference(BuildMI(TII.get(Opcode)), FrameIndex).addImm(0);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Efficient Lookup Table Support
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// TableEntry - Maps the 'from' opcode to a fused form of the 'to' opcode.
+ ///
+ struct TableEntry {
+ unsigned from; // Original opcode.
+ unsigned to; // New opcode.
+
+ // less operators used by STL search.
+ bool operator<(const TableEntry &TE) const { return from < TE.from; }
+ friend bool operator<(const TableEntry &TE, unsigned V) {
+ return TE.from < V;
+ }
+ friend bool operator<(unsigned V, const TableEntry &TE) {
+ return V < TE.from;
+ }
+ };
+}
+
+/// TableIsSorted - Return true if the table is in 'from' opcode order.
+///
+static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) {
+ for (unsigned i = 1; i != NumEntries; ++i)
+ if (!(Table[i-1] < Table[i])) {
+ cerr << "Entries out of order " << Table[i-1].from
+ << " " << Table[i].from << "\n";
+ return false;
+ }
+ return true;
+}
+
+/// TableLookup - Return the table entry matching the specified opcode.
+/// Otherwise return NULL.
+static const TableEntry *TableLookup(const TableEntry *Table, unsigned N,
+ unsigned Opcode) {
+ const TableEntry *I = std::lower_bound(Table, Table+N, Opcode);
+ if (I != Table+N && I->from == Opcode)
+ return I;
+ return NULL;
+}
+
+#define ARRAY_SIZE(TABLE) \
+ (sizeof(TABLE)/sizeof(TABLE[0]))
+
+#ifdef NDEBUG
+#define ASSERT_SORTED(TABLE)
+#else
+#define ASSERT_SORTED(TABLE) \
+ { static bool TABLE##Checked = false; \
+ if (!TABLE##Checked) { \
+ assert(TableIsSorted(TABLE, ARRAY_SIZE(TABLE)) && \
+ "All lookup tables must be sorted for efficient access!"); \
+ TABLE##Checked = true; \
+ } \
+ }
+#endif
+
+
+MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
+ unsigned i,
+ int FrameIndex) const {
+ // Check switch flag
+ if (NoFusing) return NULL;
+
+ // Table (and size) to search
+ const TableEntry *OpcodeTablePtr = NULL;
+ unsigned OpcodeTableSize = 0;
+ bool isTwoAddrFold = false;
+ unsigned NumOps = TII.getNumOperands(MI->getOpcode());
+ bool isTwoAddr = NumOps > 1 &&
+ MI->getInstrDescriptor()->getOperandConstraint(1, TOI::TIED_TO) != -1;
+
+ MachineInstr *NewMI = NULL;
+ // Folding a memory location into the two-address part of a two-address
+ // instruction is different than folding it other places. It requires
+ // replacing the *two* registers with the memory location.
+ if (isTwoAddr && NumOps >= 2 && i < 2 &&
+ MI->getOperand(0).isReg() &&
+ MI->getOperand(1).isReg() &&
+ MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) {
+ static const TableEntry OpcodeTable[] = {
+ { X86::ADC32ri, X86::ADC32mi },
+ { X86::ADC32ri8, X86::ADC32mi8 },
+ { X86::ADC32rr, X86::ADC32mr },
+ { X86::ADC64ri32, X86::ADC64mi32 },
+ { X86::ADC64ri8, X86::ADC64mi8 },
+ { X86::ADC64rr, X86::ADC64mr },
+ { X86::ADD16ri, X86::ADD16mi },
+ { X86::ADD16ri8, X86::ADD16mi8 },
+ { X86::ADD16rr, X86::ADD16mr },
+ { X86::ADD32ri, X86::ADD32mi },
+ { X86::ADD32ri8, X86::ADD32mi8 },
+ { X86::ADD32rr, X86::ADD32mr },
+ { X86::ADD64ri32, X86::ADD64mi32 },
+ { X86::ADD64ri8, X86::ADD64mi8 },
+ { X86::ADD64rr, X86::ADD64mr },
+ { X86::ADD8ri, X86::ADD8mi },
+ { X86::ADD8rr, X86::ADD8mr },
+ { X86::AND16ri, X86::AND16mi },
+ { X86::AND16ri8, X86::AND16mi8 },
+ { X86::AND16rr, X86::AND16mr },
+ { X86::AND32ri, X86::AND32mi },
+ { X86::AND32ri8, X86::AND32mi8 },
+ { X86::AND32rr, X86::AND32mr },
+ { X86::AND64ri32, X86::AND64mi32 },
+ { X86::AND64ri8, X86::AND64mi8 },
+ { X86::AND64rr, X86::AND64mr },
+ { X86::AND8ri, X86::AND8mi },
+ { X86::AND8rr, X86::AND8mr },
+ { X86::DEC16r, X86::DEC16m },
+ { X86::DEC32r, X86::DEC32m },
+ { X86::DEC64_16r, X86::DEC16m },
+ { X86::DEC64_32r, X86::DEC32m },
+ { X86::DEC64r, X86::DEC64m },
+ { X86::DEC8r, X86::DEC8m },
+ { X86::INC16r, X86::INC16m },
+ { X86::INC32r, X86::INC32m },
+ { X86::INC64_16r, X86::INC16m },
+ { X86::INC64_32r, X86::INC32m },
+ { X86::INC64r, X86::INC64m },
+ { X86::INC8r, X86::INC8m },
+ { X86::NEG16r, X86::NEG16m },
+ { X86::NEG32r, X86::NEG32m },
+ { X86::NEG64r, X86::NEG64m },
+ { X86::NEG8r, X86::NEG8m },
+ { X86::NOT16r, X86::NOT16m },
+ { X86::NOT32r, X86::NOT32m },
+ { X86::NOT64r, X86::NOT64m },
+ { X86::NOT8r, X86::NOT8m },
+ { X86::OR16ri, X86::OR16mi },
+ { X86::OR16ri8, X86::OR16mi8 },
+ { X86::OR16rr, X86::OR16mr },
+ { X86::OR32ri, X86::OR32mi },
+ { X86::OR32ri8, X86::OR32mi8 },
+ { X86::OR32rr, X86::OR32mr },
+ { X86::OR64ri32, X86::OR64mi32 },
+ { X86::OR64ri8, X86::OR64mi8 },
+ { X86::OR64rr, X86::OR64mr },
+ { X86::OR8ri, X86::OR8mi },
+ { X86::OR8rr, X86::OR8mr },
+ { X86::ROL16r1, X86::ROL16m1 },
+ { X86::ROL16rCL, X86::ROL16mCL },
+ { X86::ROL16ri, X86::ROL16mi },
+ { X86::ROL32r1, X86::ROL32m1 },
+ { X86::ROL32rCL, X86::ROL32mCL },
+ { X86::ROL32ri, X86::ROL32mi },
+ { X86::ROL64r1, X86::ROL64m1 },
+ { X86::ROL64rCL, X86::ROL64mCL },
+ { X86::ROL64ri, X86::ROL64mi },
+ { X86::ROL8r1, X86::ROL8m1 },
+ { X86::ROL8rCL, X86::ROL8mCL },
+ { X86::ROL8ri, X86::ROL8mi },
+ { X86::ROR16r1, X86::ROR16m1 },
+ { X86::ROR16rCL, X86::ROR16mCL },
+ { X86::ROR16ri, X86::ROR16mi },
+ { X86::ROR32r1, X86::ROR32m1 },
+ { X86::ROR32rCL, X86::ROR32mCL },
+ { X86::ROR32ri, X86::ROR32mi },
+ { X86::ROR64r1, X86::ROR64m1 },
+ { X86::ROR64rCL, X86::ROR64mCL },
+ { X86::ROR64ri, X86::ROR64mi },
+ { X86::ROR8r1, X86::ROR8m1 },
+ { X86::ROR8rCL, X86::ROR8mCL },
+ { X86::ROR8ri, X86::ROR8mi },
+ { X86::SAR16r1, X86::SAR16m1 },
+ { X86::SAR16rCL, X86::SAR16mCL },
+ { X86::SAR16ri, X86::SAR16mi },
+ { X86::SAR32r1, X86::SAR32m1 },
+ { X86::SAR32rCL, X86::SAR32mCL },
+ { X86::SAR32ri, X86::SAR32mi },
+ { X86::SAR64r1, X86::SAR64m1 },
+ { X86::SAR64rCL, X86::SAR64mCL },
+ { X86::SAR64ri, X86::SAR64mi },
+ { X86::SAR8r1, X86::SAR8m1 },
+ { X86::SAR8rCL, X86::SAR8mCL },
+ { X86::SAR8ri, X86::SAR8mi },
+ { X86::SBB32ri, X86::SBB32mi },
+ { X86::SBB32ri8, X86::SBB32mi8 },
+ { X86::SBB32rr, X86::SBB32mr },
+ { X86::SBB64ri32, X86::SBB64mi32 },
+ { X86::SBB64ri8, X86::SBB64mi8 },
+ { X86::SBB64rr, X86::SBB64mr },
+ { X86::SHL16r1, X86::SHL16m1 },
+ { X86::SHL16rCL, X86::SHL16mCL },
+ { X86::SHL16ri, X86::SHL16mi },
+ { X86::SHL32r1, X86::SHL32m1 },
+ { X86::SHL32rCL, X86::SHL32mCL },
+ { X86::SHL32ri, X86::SHL32mi },
+ { X86::SHL64r1, X86::SHL64m1 },
+ { X86::SHL64rCL, X86::SHL64mCL },
+ { X86::SHL64ri, X86::SHL64mi },
+ { X86::SHL8r1, X86::SHL8m1 },
+ { X86::SHL8rCL, X86::SHL8mCL },
+ { X86::SHL8ri, X86::SHL8mi },
+ { X86::SHLD16rrCL, X86::SHLD16mrCL },
+ { X86::SHLD16rri8, X86::SHLD16mri8 },
+ { X86::SHLD32rrCL, X86::SHLD32mrCL },
+ { X86::SHLD32rri8, X86::SHLD32mri8 },
+ { X86::SHLD64rrCL, X86::SHLD64mrCL },
+ { X86::SHLD64rri8, X86::SHLD64mri8 },
+ { X86::SHR16r1, X86::SHR16m1 },
+ { X86::SHR16rCL, X86::SHR16mCL },
+ { X86::SHR16ri, X86::SHR16mi },
+ { X86::SHR32r1, X86::SHR32m1 },
+ { X86::SHR32rCL, X86::SHR32mCL },
+ { X86::SHR32ri, X86::SHR32mi },
+ { X86::SHR64r1, X86::SHR64m1 },
+ { X86::SHR64rCL, X86::SHR64mCL },
+ { X86::SHR64ri, X86::SHR64mi },
+ { X86::SHR8r1, X86::SHR8m1 },
+ { X86::SHR8rCL, X86::SHR8mCL },
+ { X86::SHR8ri, X86::SHR8mi },
+ { X86::SHRD16rrCL, X86::SHRD16mrCL },
+ { X86::SHRD16rri8, X86::SHRD16mri8 },
+ { X86::SHRD32rrCL, X86::SHRD32mrCL },
+ { X86::SHRD32rri8, X86::SHRD32mri8 },
+ { X86::SHRD64rrCL, X86::SHRD64mrCL },
+ { X86::SHRD64rri8, X86::SHRD64mri8 },
+ { X86::SUB16ri, X86::SUB16mi },
+ { X86::SUB16ri8, X86::SUB16mi8 },
+ { X86::SUB16rr, X86::SUB16mr },
+ { X86::SUB32ri, X86::SUB32mi },
+ { X86::SUB32ri8, X86::SUB32mi8 },
+ { X86::SUB32rr, X86::SUB32mr },
+ { X86::SUB64ri32, X86::SUB64mi32 },
+ { X86::SUB64ri8, X86::SUB64mi8 },
+ { X86::SUB64rr, X86::SUB64mr },
+ { X86::SUB8ri, X86::SUB8mi },
+ { X86::SUB8rr, X86::SUB8mr },
+ { X86::XOR16ri, X86::XOR16mi },
+ { X86::XOR16ri8, X86::XOR16mi8 },
+ { X86::XOR16rr, X86::XOR16mr },
+ { X86::XOR32ri, X86::XOR32mi },
+ { X86::XOR32ri8, X86::XOR32mi8 },
+ { X86::XOR32rr, X86::XOR32mr },
+ { X86::XOR64ri32, X86::XOR64mi32 },
+ { X86::XOR64ri8, X86::XOR64mi8 },
+ { X86::XOR64rr, X86::XOR64mr },
+ { X86::XOR8ri, X86::XOR8mi },
+ { X86::XOR8rr, X86::XOR8mr }
+ };
+ ASSERT_SORTED(OpcodeTable);
+ OpcodeTablePtr = OpcodeTable;
+ OpcodeTableSize = ARRAY_SIZE(OpcodeTable);
+ isTwoAddrFold = true;
+ } else if (i == 0) { // If operand 0
+ if (MI->getOpcode() == X86::MOV16r0)
+ NewMI = MakeM0Inst(TII, X86::MOV16mi, FrameIndex, MI);
+ else if (MI->getOpcode() == X86::MOV32r0)
+ NewMI = MakeM0Inst(TII, X86::MOV32mi, FrameIndex, MI);
+ else if (MI->getOpcode() == X86::MOV64r0)
+ NewMI = MakeM0Inst(TII, X86::MOV64mi32, FrameIndex, MI);
+ else if (MI->getOpcode() == X86::MOV8r0)
+ NewMI = MakeM0Inst(TII, X86::MOV8mi, FrameIndex, MI);
+ if (NewMI) {
+ NewMI->copyKillDeadInfo(MI);
+ return NewMI;
+ }
+
+ static const TableEntry OpcodeTable[] = {
+ { X86::CMP16ri, X86::CMP16mi },
+ { X86::CMP16ri8, X86::CMP16mi8 },
+ { X86::CMP32ri, X86::CMP32mi },
+ { X86::CMP32ri8, X86::CMP32mi8 },
+ { X86::CMP8ri, X86::CMP8mi },
+ { X86::DIV16r, X86::DIV16m },
+ { X86::DIV32r, X86::DIV32m },
+ { X86::DIV64r, X86::DIV64m },
+ { X86::DIV8r, X86::DIV8m },
+ { X86::FsMOVAPDrr, X86::MOVSDmr },
+ { X86::FsMOVAPSrr, X86::MOVSSmr },
+ { X86::IDIV16r, X86::IDIV16m },
+ { X86::IDIV32r, X86::IDIV32m },
+ { X86::IDIV64r, X86::IDIV64m },
+ { X86::IDIV8r, X86::IDIV8m },
+ { X86::IMUL16r, X86::IMUL16m },
+ { X86::IMUL32r, X86::IMUL32m },
+ { X86::IMUL64r, X86::IMUL64m },
+ { X86::IMUL8r, X86::IMUL8m },
+ { X86::MOV16ri, X86::MOV16mi },
+ { X86::MOV16rr, X86::MOV16mr },
+ { X86::MOV32ri, X86::MOV32mi },
+ { X86::MOV32rr, X86::MOV32mr },
+ { X86::MOV64ri32, X86::MOV64mi32 },
+ { X86::MOV64rr, X86::MOV64mr },
+ { X86::MOV8ri, X86::MOV8mi },
+ { X86::MOV8rr, X86::MOV8mr },
+ { X86::MOVAPDrr, X86::MOVAPDmr },
+ { X86::MOVAPSrr, X86::MOVAPSmr },
+ { X86::MOVPDI2DIrr, X86::MOVPDI2DImr },
+ { X86::MOVPQIto64rr,X86::MOVPQIto64mr },
+ { X86::MOVPS2SSrr, X86::MOVPS2SSmr },
+ { X86::MOVSDrr, X86::MOVSDmr },
+ { X86::MOVSDto64rr, X86::MOVSDto64mr },
+ { X86::MOVSS2DIrr, X86::MOVSS2DImr },
+ { X86::MOVSSrr, X86::MOVSSmr },
+ { X86::MOVUPDrr, X86::MOVUPDmr },
+ { X86::MOVUPSrr, X86::MOVUPSmr },
+ { X86::MUL16r, X86::MUL16m },
+ { X86::MUL32r, X86::MUL32m },
+ { X86::MUL64r, X86::MUL64m },
+ { X86::MUL8r, X86::MUL8m },
+ { X86::SETAEr, X86::SETAEm },
+ { X86::SETAr, X86::SETAm },
+ { X86::SETBEr, X86::SETBEm },
+ { X86::SETBr, X86::SETBm },
+ { X86::SETEr, X86::SETEm },
+ { X86::SETGEr, X86::SETGEm },
+ { X86::SETGr, X86::SETGm },
+ { X86::SETLEr, X86::SETLEm },
+ { X86::SETLr, X86::SETLm },
+ { X86::SETNEr, X86::SETNEm },
+ { X86::SETNPr, X86::SETNPm },
+ { X86::SETNSr, X86::SETNSm },
+ { X86::SETPr, X86::SETPm },
+ { X86::SETSr, X86::SETSm },
+ { X86::TEST16ri, X86::TEST16mi },
+ { X86::TEST32ri, X86::TEST32mi },
+ { X86::TEST64ri32, X86::TEST64mi32 },
+ { X86::TEST8ri, X86::TEST8mi },
+ { X86::XCHG16rr, X86::XCHG16mr },
+ { X86::XCHG32rr, X86::XCHG32mr },
+ { X86::XCHG64rr, X86::XCHG64mr },
+ { X86::XCHG8rr, X86::XCHG8mr }
+ };
+ ASSERT_SORTED(OpcodeTable);
+ OpcodeTablePtr = OpcodeTable;
+ OpcodeTableSize = ARRAY_SIZE(OpcodeTable);
+ } else if (i == 1) {
+ static const TableEntry OpcodeTable[] = {
+ { X86::CMP16rr, X86::CMP16rm },
+ { X86::CMP32rr, X86::CMP32rm },
+ { X86::CMP64ri32, X86::CMP64mi32 },
+ { X86::CMP64ri8, X86::CMP64mi8 },
+ { X86::CMP64rr, X86::CMP64rm },
+ { X86::CMP8rr, X86::CMP8rm },
+ { X86::CMPPDrri, X86::CMPPDrmi },
+ { X86::CMPPSrri, X86::CMPPSrmi },
+ { X86::CMPSDrr, X86::CMPSDrm },
+ { X86::CMPSSrr, X86::CMPSSrm },
+ { X86::CVTSD2SSrr, X86::CVTSD2SSrm },
+ { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm },
+ { X86::CVTSI2SDrr, X86::CVTSI2SDrm },
+ { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm },
+ { X86::CVTSI2SSrr, X86::CVTSI2SSrm },
+ { X86::CVTSS2SDrr, X86::CVTSS2SDrm },
+ { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm },
+ { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm },
+ { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm },
+ { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm },
+ { X86::FsMOVAPDrr, X86::MOVSDrm },
+ { X86::FsMOVAPSrr, X86::MOVSSrm },
+ { X86::IMUL16rri, X86::IMUL16rmi },
+ { X86::IMUL16rri8, X86::IMUL16rmi8 },
+ { X86::IMUL32rri, X86::IMUL32rmi },
+ { X86::IMUL32rri8, X86::IMUL32rmi8 },
+ { X86::IMUL64rr, X86::IMUL64rm },
+ { X86::IMUL64rri32, X86::IMUL64rmi32 },
+ { X86::IMUL64rri8, X86::IMUL64rmi8 },
+ { X86::Int_CMPSDrr, X86::Int_CMPSDrm },
+ { X86::Int_CMPSSrr, X86::Int_CMPSSrm },
+ { X86::Int_COMISDrr, X86::Int_COMISDrm },
+ { X86::Int_COMISSrr, X86::Int_COMISSrm },
+ { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm },
+ { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm },
+ { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm },
+ { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm },
+ { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm },
+ { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm },
+ { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm },
+ { X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm },
+ { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm },
+ { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm },
+ { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm },
+ { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm },
+ { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm },
+ { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm },
+ { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm },
+ { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm },
+ { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm },
+ { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm },
+ { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm },
+ { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm },
+ { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm },
+ { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm },
+ { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm },
+ { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm },
+ { X86::MOV16rr, X86::MOV16rm },
+ { X86::MOV32rr, X86::MOV32rm },
+ { X86::MOV64rr, X86::MOV64rm },
+ { X86::MOV64toPQIrr, X86::MOV64toPQIrm },
+ { X86::MOV64toSDrr, X86::MOV64toSDrm },
+ { X86::MOV8rr, X86::MOV8rm },
+ { X86::MOVAPDrr, X86::MOVAPDrm },
+ { X86::MOVAPSrr, X86::MOVAPSrm },
+ { X86::MOVDDUPrr, X86::MOVDDUPrm },
+ { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm },
+ { X86::MOVDI2SSrr, X86::MOVDI2SSrm },
+ { X86::MOVSD2PDrr, X86::MOVSD2PDrm },
+ { X86::MOVSDrr, X86::MOVSDrm },
+ { X86::MOVSHDUPrr, X86::MOVSHDUPrm },
+ { X86::MOVSLDUPrr, X86::MOVSLDUPrm },
+ { X86::MOVSS2PSrr, X86::MOVSS2PSrm },
+ { X86::MOVSSrr, X86::MOVSSrm },
+ { X86::MOVSX16rr8, X86::MOVSX16rm8 },
+ { X86::MOVSX32rr16, X86::MOVSX32rm16 },
+ { X86::MOVSX32rr8, X86::MOVSX32rm8 },
+ { X86::MOVSX64rr16, X86::MOVSX64rm16 },
+ { X86::MOVSX64rr32, X86::MOVSX64rm32 },
+ { X86::MOVSX64rr8, X86::MOVSX64rm8 },
+ { X86::MOVUPDrr, X86::MOVUPDrm },
+ { X86::MOVUPSrr, X86::MOVUPSrm },
+ { X86::MOVZX16rr8, X86::MOVZX16rm8 },
+ { X86::MOVZX32rr16, X86::MOVZX32rm16 },
+ { X86::MOVZX32rr8, X86::MOVZX32rm8 },
+ { X86::MOVZX64rr16, X86::MOVZX64rm16 },
+ { X86::MOVZX64rr8, X86::MOVZX64rm8 },
+ { X86::PSHUFDri, X86::PSHUFDmi },
+ { X86::PSHUFHWri, X86::PSHUFHWmi },
+ { X86::PSHUFLWri, X86::PSHUFLWmi },
+ { X86::PsMOVZX64rr32, X86::PsMOVZX64rm32 },
+ { X86::TEST16rr, X86::TEST16rm },
+ { X86::TEST32rr, X86::TEST32rm },
+ { X86::TEST64rr, X86::TEST64rm },
+ { X86::TEST8rr, X86::TEST8rm },
+ // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0
+ { X86::UCOMISDrr, X86::UCOMISDrm },
+ { X86::UCOMISSrr, X86::UCOMISSrm },
+ { X86::XCHG16rr, X86::XCHG16rm },
+ { X86::XCHG32rr, X86::XCHG32rm },
+ { X86::XCHG64rr, X86::XCHG64rm },
+ { X86::XCHG8rr, X86::XCHG8rm }
+ };
+ ASSERT_SORTED(OpcodeTable);
+ OpcodeTablePtr = OpcodeTable;
+ OpcodeTableSize = ARRAY_SIZE(OpcodeTable);
+ } else if (i == 2) {
+ static const TableEntry OpcodeTable[] = {
+ { X86::ADC32rr, X86::ADC32rm },
+ { X86::ADC64rr, X86::ADC64rm },
+ { X86::ADD16rr, X86::ADD16rm },
+ { X86::ADD32rr, X86::ADD32rm },
+ { X86::ADD64rr, X86::ADD64rm },
+ { X86::ADD8rr, X86::ADD8rm },
+ { X86::ADDPDrr, X86::ADDPDrm },
+ { X86::ADDPSrr, X86::ADDPSrm },
+ { X86::ADDSDrr, X86::ADDSDrm },
+ { X86::ADDSSrr, X86::ADDSSrm },
+ { X86::ADDSUBPDrr, X86::ADDSUBPDrm },
+ { X86::ADDSUBPSrr, X86::ADDSUBPSrm },
+ { X86::AND16rr, X86::AND16rm },
+ { X86::AND32rr, X86::AND32rm },
+ { X86::AND64rr, X86::AND64rm },
+ { X86::AND8rr, X86::AND8rm },
+ { X86::ANDNPDrr, X86::ANDNPDrm },
+ { X86::ANDNPSrr, X86::ANDNPSrm },
+ { X86::ANDPDrr, X86::ANDPDrm },
+ { X86::ANDPSrr, X86::ANDPSrm },
+ { X86::CMOVA16rr, X86::CMOVA16rm },
+ { X86::CMOVA32rr, X86::CMOVA32rm },
+ { X86::CMOVA64rr, X86::CMOVA64rm },
+ { X86::CMOVAE16rr, X86::CMOVAE16rm },
+ { X86::CMOVAE32rr, X86::CMOVAE32rm },
+ { X86::CMOVAE64rr, X86::CMOVAE64rm },
+ { X86::CMOVB16rr, X86::CMOVB16rm },
+ { X86::CMOVB32rr, X86::CMOVB32rm },
+ { X86::CMOVB64rr, X86::CMOVB64rm },
+ { X86::CMOVBE16rr, X86::CMOVBE16rm },
+ { X86::CMOVBE32rr, X86::CMOVBE32rm },
+ { X86::CMOVBE64rr, X86::CMOVBE64rm },
+ { X86::CMOVE16rr, X86::CMOVE16rm },
+ { X86::CMOVE32rr, X86::CMOVE32rm },
+ { X86::CMOVE64rr, X86::CMOVE64rm },
+ { X86::CMOVG16rr, X86::CMOVG16rm },
+ { X86::CMOVG32rr, X86::CMOVG32rm },
+ { X86::CMOVG64rr, X86::CMOVG64rm },
+ { X86::CMOVGE16rr, X86::CMOVGE16rm },
+ { X86::CMOVGE32rr, X86::CMOVGE32rm },
+ { X86::CMOVGE64rr, X86::CMOVGE64rm },
+ { X86::CMOVL16rr, X86::CMOVL16rm },
+ { X86::CMOVL32rr, X86::CMOVL32rm },
+ { X86::CMOVL64rr, X86::CMOVL64rm },
+ { X86::CMOVLE16rr, X86::CMOVLE16rm },
+ { X86::CMOVLE32rr, X86::CMOVLE32rm },
+ { X86::CMOVLE64rr, X86::CMOVLE64rm },
+ { X86::CMOVNE16rr, X86::CMOVNE16rm },
+ { X86::CMOVNE32rr, X86::CMOVNE32rm },
+ { X86::CMOVNE64rr, X86::CMOVNE64rm },
+ { X86::CMOVNP16rr, X86::CMOVNP16rm },
+ { X86::CMOVNP32rr, X86::CMOVNP32rm },
+ { X86::CMOVNP64rr, X86::CMOVNP64rm },
+ { X86::CMOVNS16rr, X86::CMOVNS16rm },
+ { X86::CMOVNS32rr, X86::CMOVNS32rm },
+ { X86::CMOVNS64rr, X86::CMOVNS64rm },
+ { X86::CMOVP16rr, X86::CMOVP16rm },
+ { X86::CMOVP32rr, X86::CMOVP32rm },
+ { X86::CMOVP64rr, X86::CMOVP64rm },
+ { X86::CMOVS16rr, X86::CMOVS16rm },
+ { X86::CMOVS32rr, X86::CMOVS32rm },
+ { X86::CMOVS64rr, X86::CMOVS64rm },
+ { X86::DIVPDrr, X86::DIVPDrm },
+ { X86::DIVPSrr, X86::DIVPSrm },
+ { X86::DIVSDrr, X86::DIVSDrm },
+ { X86::DIVSSrr, X86::DIVSSrm },
+ { X86::HADDPDrr, X86::HADDPDrm },
+ { X86::HADDPSrr, X86::HADDPSrm },
+ { X86::HSUBPDrr, X86::HSUBPDrm },
+ { X86::HSUBPSrr, X86::HSUBPSrm },
+ { X86::IMUL16rr, X86::IMUL16rm },
+ { X86::IMUL32rr, X86::IMUL32rm },
+ { X86::MAXPDrr, X86::MAXPDrm },
+ { X86::MAXPDrr_Int, X86::MAXPDrm_Int },
+ { X86::MAXPSrr, X86::MAXPSrm },
+ { X86::MAXPSrr_Int, X86::MAXPSrm_Int },
+ { X86::MAXSDrr, X86::MAXSDrm },
+ { X86::MAXSDrr_Int, X86::MAXSDrm_Int },
+ { X86::MAXSSrr, X86::MAXSSrm },
+ { X86::MAXSSrr_Int, X86::MAXSSrm_Int },
+ { X86::MINPDrr, X86::MINPDrm },
+ { X86::MINPDrr_Int, X86::MINPDrm_Int },
+ { X86::MINPSrr, X86::MINPSrm },
+ { X86::MINPSrr_Int, X86::MINPSrm_Int },
+ { X86::MINSDrr, X86::MINSDrm },
+ { X86::MINSDrr_Int, X86::MINSDrm_Int },
+ { X86::MINSSrr, X86::MINSSrm },
+ { X86::MINSSrr_Int, X86::MINSSrm_Int },
+ { X86::MULPDrr, X86::MULPDrm },
+ { X86::MULPSrr, X86::MULPSrm },
+ { X86::MULSDrr, X86::MULSDrm },
+ { X86::MULSSrr, X86::MULSSrm },
+ { X86::OR16rr, X86::OR16rm },
+ { X86::OR32rr, X86::OR32rm },
+ { X86::OR64rr, X86::OR64rm },
+ { X86::OR8rr, X86::OR8rm },
+ { X86::ORPDrr, X86::ORPDrm },
+ { X86::ORPSrr, X86::ORPSrm },
+ { X86::PACKSSDWrr, X86::PACKSSDWrm },
+ { X86::PACKSSWBrr, X86::PACKSSWBrm },
+ { X86::PACKUSWBrr, X86::PACKUSWBrm },
+ { X86::PADDBrr, X86::PADDBrm },
+ { X86::PADDDrr, X86::PADDDrm },
+ { X86::PADDQrr, X86::PADDQrm },
+ { X86::PADDSBrr, X86::PADDSBrm },
+ { X86::PADDSWrr, X86::PADDSWrm },
+ { X86::PADDWrr, X86::PADDWrm },
+ { X86::PANDNrr, X86::PANDNrm },
+ { X86::PANDrr, X86::PANDrm },
+ { X86::PAVGBrr, X86::PAVGBrm },
+ { X86::PAVGWrr, X86::PAVGWrm },
+ { X86::PCMPEQBrr, X86::PCMPEQBrm },
+ { X86::PCMPEQDrr, X86::PCMPEQDrm },
+ { X86::PCMPEQWrr, X86::PCMPEQWrm },
+ { X86::PCMPGTBrr, X86::PCMPGTBrm },
+ { X86::PCMPGTDrr, X86::PCMPGTDrm },
+ { X86::PCMPGTWrr, X86::PCMPGTWrm },
+ { X86::PINSRWrri, X86::PINSRWrmi },
+ { X86::PMADDWDrr, X86::PMADDWDrm },
+ { X86::PMAXSWrr, X86::PMAXSWrm },
+ { X86::PMAXUBrr, X86::PMAXUBrm },
+ { X86::PMINSWrr, X86::PMINSWrm },
+ { X86::PMINUBrr, X86::PMINUBrm },
+ { X86::PMULHUWrr, X86::PMULHUWrm },
+ { X86::PMULHWrr, X86::PMULHWrm },
+ { X86::PMULLWrr, X86::PMULLWrm },
+ { X86::PMULUDQrr, X86::PMULUDQrm },
+ { X86::PORrr, X86::PORrm },
+ { X86::PSADBWrr, X86::PSADBWrm },
+ { X86::PSLLDrr, X86::PSLLDrm },
+ { X86::PSLLQrr, X86::PSLLQrm },
+ { X86::PSLLWrr, X86::PSLLWrm },
+ { X86::PSRADrr, X86::PSRADrm },
+ { X86::PSRAWrr, X86::PSRAWrm },
+ { X86::PSRLDrr, X86::PSRLDrm },
+ { X86::PSRLQrr, X86::PSRLQrm },
+ { X86::PSRLWrr, X86::PSRLWrm },
+ { X86::PSUBBrr, X86::PSUBBrm },
+ { X86::PSUBDrr, X86::PSUBDrm },
+ { X86::PSUBSBrr, X86::PSUBSBrm },
+ { X86::PSUBSWrr, X86::PSUBSWrm },
+ { X86::PSUBWrr, X86::PSUBWrm },
+ { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm },
+ { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm },
+ { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm },
+ { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm },
+ { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm },
+ { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm },
+ { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm },
+ { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm },
+ { X86::PXORrr, X86::PXORrm },
+ { X86::RCPPSr, X86::RCPPSm },
+ { X86::RCPPSr_Int, X86::RCPPSm_Int },
+ { X86::RSQRTPSr, X86::RSQRTPSm },
+ { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int },
+ { X86::RSQRTSSr, X86::RSQRTSSm },
+ { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int },
+ { X86::SBB32rr, X86::SBB32rm },
+ { X86::SBB64rr, X86::SBB64rm },
+ { X86::SHUFPDrri, X86::SHUFPDrmi },
+ { X86::SHUFPSrri, X86::SHUFPSrmi },
+ { X86::SQRTPDr, X86::SQRTPDm },
+ { X86::SQRTPDr_Int, X86::SQRTPDm_Int },
+ { X86::SQRTPSr, X86::SQRTPSm },
+ { X86::SQRTPSr_Int, X86::SQRTPSm_Int },
+ { X86::SQRTSDr, X86::SQRTSDm },
+ { X86::SQRTSDr_Int, X86::SQRTSDm_Int },
+ { X86::SQRTSSr, X86::SQRTSSm },
+ { X86::SQRTSSr_Int, X86::SQRTSSm_Int },
+ { X86::SUB16rr, X86::SUB16rm },
+ { X86::SUB32rr, X86::SUB32rm },
+ { X86::SUB64rr, X86::SUB64rm },
+ { X86::SUB8rr, X86::SUB8rm },
+ { X86::SUBPDrr, X86::SUBPDrm },
+ { X86::SUBPSrr, X86::SUBPSrm },
+ { X86::SUBSDrr, X86::SUBSDrm },
+ { X86::SUBSSrr, X86::SUBSSrm },
+ // FIXME: TEST*rr -> swapped operand of TEST*mr.
+ { X86::UNPCKHPDrr, X86::UNPCKHPDrm },
+ { X86::UNPCKHPSrr, X86::UNPCKHPSrm },
+ { X86::UNPCKLPDrr, X86::UNPCKLPDrm },
+ { X86::UNPCKLPSrr, X86::UNPCKLPSrm },
+ { X86::XOR16rr, X86::XOR16rm },
+ { X86::XOR32rr, X86::XOR32rm },
+ { X86::XOR64rr, X86::XOR64rm },
+ { X86::XOR8rr, X86::XOR8rm },
+ { X86::XORPDrr, X86::XORPDrm },
+ { X86::XORPSrr, X86::XORPSrm }
+ };
+ ASSERT_SORTED(OpcodeTable);
+ OpcodeTablePtr = OpcodeTable;
+ OpcodeTableSize = ARRAY_SIZE(OpcodeTable);
+ }
+
+ // If table selected...
+ if (OpcodeTablePtr) {
+ // Find the Opcode to fuse
+ unsigned fromOpcode = MI->getOpcode();
+ // Lookup fromOpcode in table
+ if (const TableEntry *Entry = TableLookup(OpcodeTablePtr, OpcodeTableSize,
+ fromOpcode)) {
+ if (isTwoAddrFold)
+ NewMI = FuseTwoAddrInst(Entry->to, FrameIndex, MI, TII);
+ else
+ NewMI = FuseInst(Entry->to, i, FrameIndex, MI, TII);
+ NewMI->copyKillDeadInfo(MI);
+ return NewMI;
+ }
+ }
+
+ // No fusion
+ if (PrintFailedFusing)
+ cerr << "We failed to fuse ("
+ << ((i == 1) ? "r" : "s") << "): " << *MI;
+ return NULL;
+}
+
+
+const unsigned *X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+ const {
+ static const unsigned CalleeSavedRegs32Bit[] = {
+ X86::ESI, X86::EDI, X86::EBX, X86::EBP, 0
+ };
+
+ static const unsigned CalleeSavedRegs32EHRet[] = {
+ X86::EAX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP, 0
+ };
+
+ static const unsigned CalleeSavedRegs64Bit[] = {
+ X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
+ };
+
+ if (Is64Bit)
+ return CalleeSavedRegs64Bit;
+ else {
+ if (MF) {
+ MachineFrameInfo *MFI = MF->getFrameInfo();
+ MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
+ if (MMI && MMI->callsEHReturn())
+ return CalleeSavedRegs32EHRet;
+ }
+ return CalleeSavedRegs32Bit;
+ }
+}
+
+const TargetRegisterClass* const*
+X86RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ static const TargetRegisterClass * const CalleeSavedRegClasses32Bit[] = {
+ &X86::GR32RegClass, &X86::GR32RegClass,
+ &X86::GR32RegClass, &X86::GR32RegClass, 0
+ };
+ static const TargetRegisterClass * const CalleeSavedRegClasses32EHRet[] = {
+ &X86::GR32RegClass, &X86::GR32RegClass,
+ &X86::GR32RegClass, &X86::GR32RegClass,
+ &X86::GR32RegClass, &X86::GR32RegClass, 0
+ };
+ static const TargetRegisterClass * const CalleeSavedRegClasses64Bit[] = {
+ &X86::GR64RegClass, &X86::GR64RegClass,
+ &X86::GR64RegClass, &X86::GR64RegClass,
+ &X86::GR64RegClass, &X86::GR64RegClass, 0
+ };
+
+ if (Is64Bit)
+ return CalleeSavedRegClasses64Bit;
+ else {
+ if (MF) {
+ MachineFrameInfo *MFI = MF->getFrameInfo();
+ MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
+ if (MMI && MMI->callsEHReturn())
+ return CalleeSavedRegClasses32EHRet;
+ }
+ return CalleeSavedRegClasses32Bit;
+ }
+
+}
+
+BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(X86::RSP);
+ Reserved.set(X86::ESP);
+ Reserved.set(X86::SP);
+ Reserved.set(X86::SPL);
+ if (hasFP(MF)) {
+ Reserved.set(X86::RBP);
+ Reserved.set(X86::EBP);
+ Reserved.set(X86::BP);
+ Reserved.set(X86::BPL);
+ }
+ return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+//
+bool X86RegisterInfo::hasFP(const MachineFunction &MF) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
+
+ return (NoFramePointerElim ||
+ MF.getFrameInfo()->hasVarSizedObjects() ||
+ MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
+ (MMI && MMI->callsUnwindInit()));
+}
+
+void X86RegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ if (hasFP(MF)) {
+ // If we have a frame pointer, turn the adjcallstackup instruction into a
+ // 'sub ESP, <amt>' and the adjcallstackdown instruction into 'add ESP,
+ // <amt>'
+ MachineInstr *Old = I;
+ uint64_t Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ MachineInstr *New = 0;
+ if (Old->getOpcode() == X86::ADJCALLSTACKDOWN) {
+ New=BuildMI(TII.get(Is64Bit ? X86::SUB64ri32 : X86::SUB32ri), StackPtr)
+ .addReg(StackPtr).addImm(Amount);
+ } else {
+ assert(Old->getOpcode() == X86::ADJCALLSTACKUP);
+ // factor out the amount the callee already popped.
+ uint64_t CalleeAmt = Old->getOperand(1).getImm();
+ Amount -= CalleeAmt;
+ if (Amount) {
+ unsigned Opc = (Amount < 128) ?
+ (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) :
+ (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri);
+ New = BuildMI(TII.get(Opc), StackPtr)
+ .addReg(StackPtr).addImm(Amount);
+ }
+ }
+
+ // Replace the pseudo instruction with a new instruction...
+ if (New) MBB.insert(I, New);
+ }
+ } else if (I->getOpcode() == X86::ADJCALLSTACKUP) {
+ // If we are performing frame pointer elimination and if the callee pops
+ // something off the stack pointer, add it back. We do this until we have
+ // more advanced stack pointer tracking ability.
+ if (uint64_t CalleeAmt = I->getOperand(1).getImm()) {
+ unsigned Opc = (CalleeAmt < 128) ?
+ (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) :
+ (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri);
+ MachineInstr *New =
+ BuildMI(TII.get(Opc), StackPtr).addReg(StackPtr).addImm(CalleeAmt);
+ MBB.insert(I, New);
+ }
+ }
+
+ MBB.erase(I);
+}
+
+void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const{
+ assert(SPAdj == 0 && "Unexpected");
+
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineFunction &MF = *MI.getParent()->getParent();
+ while (!MI.getOperand(i).isFrameIndex()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(i).getFrameIndex();
+ // This must be part of a four operand memory reference. Replace the
+ // FrameIndex with base register with EBP. Add an offset to the offset.
+ MI.getOperand(i).ChangeToRegister(hasFP(MF) ? FramePtr : StackPtr, false);
+
+ // Now add the frame object offset to the offset from EBP.
+ int64_t Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+ MI.getOperand(i+3).getImm()+SlotSize;
+
+ if (!hasFP(MF))
+ Offset += MF.getFrameInfo()->getStackSize();
+ else
+ Offset += SlotSize; // Skip the saved EBP
+
+ MI.getOperand(i+3).ChangeToImmediate(Offset);
+}
+
+void
+X86RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) const{
+ if (hasFP(MF)) {
+ // Create a frame entry for the EBP register that must be saved.
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize,
+ (int)SlotSize * -2);
+ assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
+ "Slot for EBP register must be last in order to be found!");
+ }
+}
+
+/// emitSPUpdate - Emit a series of instructions to increment / decrement the
+/// stack pointer by a constant value.
+static
+void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ unsigned StackPtr, int64_t NumBytes, bool Is64Bit,
+ const TargetInstrInfo &TII) {
+ bool isSub = NumBytes < 0;
+ uint64_t Offset = isSub ? -NumBytes : NumBytes;
+ unsigned Opc = isSub
+ ? ((Offset < 128) ?
+ (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) :
+ (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri))
+ : ((Offset < 128) ?
+ (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) :
+ (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri));
+ uint64_t Chunk = (1LL << 31) - 1;
+
+ while (Offset) {
+ uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
+ BuildMI(MBB, MBBI, TII.get(Opc), StackPtr).addReg(StackPtr).addImm(ThisVal);
+ Offset -= ThisVal;
+ }
+}
+
+void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+ const Function* Fn = MF.getFunction();
+ const X86Subtarget* Subtarget = &MF.getTarget().getSubtarget<X86Subtarget>();
+ MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+
+ // Prepare for frame info.
+ unsigned FrameLabelId = 0, StartLabelId = 0;
+
+ // Get the number of bytes to allocate from the FrameInfo
+ uint64_t StackSize = MFI->getStackSize();
+ uint64_t NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
+
+ if (MMI && MMI->needsFrameInfo()) {
+ // Mark function start
+ StartLabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, TII.get(X86::LABEL)).addImm(StartLabelId);
+ }
+
+ if (hasFP(MF)) {
+ // Get the offset of the stack slot for the EBP register... which is
+ // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
+ // Update the frame offset adjustment.
+ MFI->setOffsetAdjustment(SlotSize-NumBytes);
+
+ // Save EBP into the appropriate stack slot...
+ BuildMI(MBB, MBBI, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
+ .addReg(FramePtr);
+ NumBytes -= SlotSize;
+
+ if (MMI && MMI->needsFrameInfo()) {
+ // Mark effective beginning of when frame pointer becomes valid.
+ FrameLabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, TII.get(X86::LABEL)).addImm(FrameLabelId);
+ }
+
+ // Update EBP with the new base value...
+ BuildMI(MBB, MBBI, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
+ .addReg(StackPtr);
+ }
+
+ unsigned ReadyLabelId = 0;
+ if (MMI && MMI->needsFrameInfo()) {
+ // Mark effective beginning of when frame pointer is ready.
+ ReadyLabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, TII.get(X86::LABEL)).addImm(ReadyLabelId);
+ }
+
+ // Skip the callee-saved push instructions.
+ while (MBBI != MBB.end() &&
+ (MBBI->getOpcode() == X86::PUSH32r ||
+ MBBI->getOpcode() == X86::PUSH64r))
+ ++MBBI;
+
+ if (NumBytes) { // adjust stack pointer: ESP -= numbytes
+ if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) {
+ // Check, whether EAX is livein for this function
+ bool isEAXAlive = false;
+ for (MachineFunction::livein_iterator II = MF.livein_begin(),
+ EE = MF.livein_end(); (II != EE) && !isEAXAlive; ++II) {
+ unsigned Reg = II->first;
+ isEAXAlive = (Reg == X86::EAX || Reg == X86::AX ||
+ Reg == X86::AH || Reg == X86::AL);
+ }
+
+ // Function prologue calls _alloca to probe the stack when allocating
+ // more than 4k bytes in one go. Touching the stack at 4K increments is
+ // necessary to ensure that the guard pages used by the OS virtual memory
+ // manager are allocated in correct sequence.
+ if (!isEAXAlive) {
+ BuildMI(MBB, MBBI, TII.get(X86::MOV32ri), X86::EAX).addImm(NumBytes);
+ BuildMI(MBB, MBBI, TII.get(X86::CALLpcrel32))
+ .addExternalSymbol("_alloca");
+ } else {
+ // Save EAX
+ BuildMI(MBB, MBBI, TII.get(X86::PUSH32r), X86::EAX);
+ // Allocate NumBytes-4 bytes on stack. We'll also use 4 already
+ // allocated bytes for EAX.
+ BuildMI(MBB, MBBI, TII.get(X86::MOV32ri), X86::EAX).addImm(NumBytes-4);
+ BuildMI(MBB, MBBI, TII.get(X86::CALLpcrel32))
+ .addExternalSymbol("_alloca");
+ // Restore EAX
+ MachineInstr *MI = addRegOffset(BuildMI(TII.get(X86::MOV32rm),X86::EAX),
+ StackPtr, NumBytes-4);
+ MBB.insert(MBBI, MI);
+ }
+ } else {
+ // If there is an ADD32ri or SUB32ri of ESP immediately after this
+ // instruction, merge the two instructions.
+ if (MBBI != MBB.end()) {
+ MachineBasicBlock::iterator NI = next(MBBI);
+ unsigned Opc = MBBI->getOpcode();
+ if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
+ Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+ MBBI->getOperand(0).getReg() == StackPtr) {
+ NumBytes -= MBBI->getOperand(2).getImm();
+ MBB.erase(MBBI);
+ MBBI = NI;
+ } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+ Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+ MBBI->getOperand(0).getReg() == StackPtr) {
+ NumBytes += MBBI->getOperand(2).getImm();
+ MBB.erase(MBBI);
+ MBBI = NI;
+ }
+ }
+
+ if (NumBytes)
+ emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
+ }
+ }
+
+ if (MMI && MMI->needsFrameInfo()) {
+ std::vector<MachineMove> &Moves = MMI->getFrameMoves();
+ const TargetAsmInfo *TAI = MF.getTarget().getTargetAsmInfo();
+
+ // Calculate amount of bytes used for return address storing
+ int stackGrowth =
+ (MF.getTarget().getFrameInfo()->getStackGrowthDirection() ==
+ TargetFrameInfo::StackGrowsUp ?
+ TAI->getAddressSize() : -TAI->getAddressSize());
+
+ if (StackSize) {
+ // Show update of SP.
+ if (hasFP(MF)) {
+ // Adjust SP
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP, 2*stackGrowth);
+ Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
+ } else {
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP, -StackSize+stackGrowth);
+ Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
+ }
+ } else {
+ //FIXME: Verify & implement for FP
+ MachineLocation SPDst(StackPtr);
+ MachineLocation SPSrc(StackPtr, stackGrowth);
+ Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
+ }
+
+ // Add callee saved registers to move list.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ int64_t Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
+ unsigned Reg = CSI[I].getReg();
+ MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+ MachineLocation CSSrc(Reg);
+ Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc));
+ }
+
+ if (hasFP(MF)) {
+ // Save FP
+ MachineLocation FPDst(MachineLocation::VirtualFP, 2*stackGrowth);
+ MachineLocation FPSrc(FramePtr);
+ Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
+ }
+
+ MachineLocation FPDst(hasFP(MF) ? FramePtr : StackPtr);
+ MachineLocation FPSrc(MachineLocation::VirtualFP);
+ Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
+ }
+
+ // If it's main() on Cygwin\Mingw32 we should align stack as well
+ if (Fn->hasExternalLinkage() && Fn->getName() == "main" &&
+ Subtarget->isTargetCygMing()) {
+ BuildMI(MBB, MBBI, TII.get(X86::AND32ri), X86::ESP)
+ .addReg(X86::ESP).addImm(-Align);
+
+ // Probe the stack
+ BuildMI(MBB, MBBI, TII.get(X86::MOV32ri), X86::EAX).addImm(Align);
+ BuildMI(MBB, MBBI, TII.get(X86::CALLpcrel32)).addExternalSymbol("_alloca");
+ }
+}
+
+void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ unsigned RetOpcode = MBBI->getOpcode();
+
+ switch (RetOpcode) {
+ case X86::RET:
+ case X86::RETI:
+ case X86::EH_RETURN:
+ case X86::TAILJMPd:
+ case X86::TAILJMPr:
+ case X86::TAILJMPm: break; // These are ok
+ default:
+ assert(0 && "Can only insert epilog into returning blocks");
+ }
+
+ // Get the number of bytes to allocate from the FrameInfo
+ uint64_t StackSize = MFI->getStackSize();
+ unsigned CSSize = X86FI->getCalleeSavedFrameSize();
+ uint64_t NumBytes = StackSize - CSSize;
+
+ if (hasFP(MF)) {
+ // pop EBP.
+ BuildMI(MBB, MBBI, TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr);
+ NumBytes -= SlotSize;
+ }
+
+ // Skip the callee-saved pop instructions.
+ while (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator PI = prior(MBBI);
+ if (PI->getOpcode() != X86::POP32r && PI->getOpcode() != X86::POP64r)
+ break;
+ --MBBI;
+ }
+
+ // If dynamic alloca is used, then reset esp to point to the last
+ // callee-saved slot before popping them off!
+ if (MFI->hasVarSizedObjects()) {
+ unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r;
+ MachineInstr *MI = addRegOffset(BuildMI(TII.get(Opc), StackPtr),
+ FramePtr, -CSSize);
+ MBB.insert(MBBI, MI);
+ NumBytes = 0;
+ }
+
+ if (NumBytes) { // adjust stack pointer back: ESP += numbytes
+ // If there is an ADD32ri or SUB32ri of ESP immediately before this
+ // instruction, merge the two instructions.
+ if (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator PI = prior(MBBI);
+ unsigned Opc = PI->getOpcode();
+ if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
+ Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+ PI->getOperand(0).getReg() == StackPtr) {
+ NumBytes += PI->getOperand(2).getImm();
+ MBB.erase(PI);
+ } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+ Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+ PI->getOperand(0).getReg() == StackPtr) {
+ NumBytes -= PI->getOperand(2).getImm();
+ MBB.erase(PI);
+ }
+ }
+
+ if (NumBytes)
+ emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII);
+ }
+
+ // We're returning from function via eh_return.
+ if (RetOpcode == X86::EH_RETURN) {
+ MBBI = prior(MBB.end());
+ MachineOperand &DestAddr = MBBI->getOperand(0);
+ assert(DestAddr.isReg() && "Offset should be in register!");
+ BuildMI(MBB, MBBI, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),StackPtr).
+ addReg(DestAddr.getReg());
+ }
+}
+
+unsigned X86RegisterInfo::getRARegister() const {
+ if (Is64Bit)
+ return X86::RIP; // Should have dwarf #16
+ else
+ return X86::EIP; // Should have dwarf #8
+}
+
+unsigned X86RegisterInfo::getFrameRegister(MachineFunction &MF) const {
+ return hasFP(MF) ? FramePtr : StackPtr;
+}
+
+void X86RegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves)
+ const {
+ // Calculate amount of bytes used for return address storing
+ int stackGrowth = (Is64Bit ? -8 : -4);
+
+ // Initial state of the frame pointer is esp+4.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(StackPtr, stackGrowth);
+ Moves.push_back(MachineMove(0, Dst, Src));
+
+ // Add return address to move list
+ MachineLocation CSDst(StackPtr, stackGrowth);
+ MachineLocation CSSrc(getRARegister());
+ Moves.push_back(MachineMove(0, CSDst, CSSrc));
+}
+
+unsigned X86RegisterInfo::getEHExceptionRegister() const {
+ assert(0 && "What is the exception register");
+ return 0;
+}
+
+unsigned X86RegisterInfo::getEHHandlerRegister() const {
+ assert(0 && "What is the exception handler register");
+ return 0;
+}
+
+namespace llvm {
+unsigned getX86SubSuperRegister(unsigned Reg, MVT::ValueType VT, bool High) {
+ switch (VT) {
+ default: return Reg;
+ case MVT::i8:
+ if (High) {
+ switch (Reg) {
+ default: return 0;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::AH;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::DH;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::CH;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::BH;
+ }
+ } else {
+ switch (Reg) {
+ default: return 0;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::AL;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::DL;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::CL;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::BL;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::SIL;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::DIL;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::BPL;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::SPL;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8B;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9B;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10B;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11B;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12B;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13B;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14B;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15B;
+ }
+ }
+ case MVT::i16:
+ switch (Reg) {
+ default: return Reg;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::AX;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::DX;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::CX;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::BX;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::SI;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::DI;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::BP;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::SP;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8W;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9W;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10W;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11W;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12W;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13W;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14W;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15W;
+ }
+ case MVT::i32:
+ switch (Reg) {
+ default: return Reg;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::EAX;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::EDX;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::ECX;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::EBX;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::ESI;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::EDI;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::EBP;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::ESP;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8D;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9D;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10D;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11D;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12D;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13D;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14D;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15D;
+ }
+ case MVT::i64:
+ switch (Reg) {
+ default: return Reg;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::RAX;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::RDX;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::RCX;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::RBX;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::RSI;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::RDI;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::RBP;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::RSP;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15;
+ }
+ }
+
+ return Reg;
+}
+}
+
+#include "X86GenRegisterInfo.inc"
+
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
new file mode 100644
index 0000000..ab9e33f
--- /dev/null
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -0,0 +1,130 @@
+//===- X86RegisterInfo.h - X86 Register Information Impl --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86REGISTERINFO_H
+#define X86REGISTERINFO_H
+
+#include "llvm/Target/MRegisterInfo.h"
+#include "X86GenRegisterInfo.h.inc"
+
+namespace llvm {
+ class Type;
+ class TargetInstrInfo;
+ class X86TargetMachine;
+
+class X86RegisterInfo : public X86GenRegisterInfo {
+public:
+ X86TargetMachine &TM;
+ const TargetInstrInfo &TII;
+
+private:
+ /// Is64Bit - Is the target 64-bits.
+ bool Is64Bit;
+
+ /// SlotSize - Stack slot size in bytes.
+ unsigned SlotSize;
+
+ /// StackPtr - X86 physical register used as stack ptr.
+ unsigned StackPtr;
+
+ /// FramePtr - X86 physical register used as frame ptr.
+ unsigned FramePtr;
+
+public:
+ X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const;
+
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const;
+
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ void copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *RC) const;
+
+ void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ unsigned DestReg, const MachineInstr *Orig) const;
+
+ /// foldMemoryOperand - If this target supports it, fold a load or store of
+ /// the specified stack slot into the specified machine instruction for the
+ /// specified operand. If this is possible, the target should perform the
+ /// folding and return true, otherwise it should return false. If it folds
+ /// the instruction, it is likely that the MachineInstruction the iterator
+ /// references has been changed.
+ MachineInstr* foldMemoryOperand(MachineInstr* MI,
+ unsigned OpNum,
+ int FrameIndex) const;
+
+ /// getCalleeSavedRegs - Return a null-terminated list of all of the
+ /// callee-save registers on this target.
+ const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+
+ /// getCalleeSavedRegClasses - Return a null-terminated list of the preferred
+ /// register classes to spill each callee-saved register with. The order and
+ /// length of this list match the getCalleeSavedRegs() list.
+ const TargetRegisterClass* const* getCalleeSavedRegClasses(
+ const MachineFunction *MF = 0) const;
+
+ /// getReservedRegs - Returns a bitset indexed by physical register number
+ /// indicating if a register is a special register that has particular uses and
+ /// should be considered unavailable at all times, e.g. SP, RA. This is used by
+ /// register scavenger to determine what registers are free.
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(MachineFunction &MF) const;
+ void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+};
+
+// getX86SubSuperRegister - X86 utility function. It returns the sub or super
+// register of a specific X86 register.
+// e.g. getX86SubSuperRegister(X86::EAX, MVT::i16) return X86:AX
+unsigned getX86SubSuperRegister(unsigned, MVT::ValueType, bool High=false);
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
new file mode 100644
index 0000000..a1e7bb9
--- /dev/null
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -0,0 +1,468 @@
+//===- X86RegisterInfo.td - Describe the X86 Register File ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 Register file, defining the registers themselves,
+// aliases between the registers, and the register classes built out of the
+// registers.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Register definitions...
+//
+let Namespace = "X86" in {
+
+ // In the register alias definitions below, we define which registers alias
+ // which others. We only specify which registers the small registers alias,
+ // because the register file generator is smart enough to figure out that
+ // AL aliases AX if we tell it that AX aliased AL (for example).
+
+ // FIXME: X86-64 have different Dwarf numbers.
+ // 8-bit registers
+ // Low registers
+ def AL : Register<"AL">, DwarfRegNum<0>;
+ def CL : Register<"CL">, DwarfRegNum<1>;
+ def DL : Register<"DL">, DwarfRegNum<2>;
+ def BL : Register<"BL">, DwarfRegNum<3>;
+
+ // X86-64 only
+ def SIL : Register<"SIL">, DwarfRegNum<4>;
+ def DIL : Register<"DIL">, DwarfRegNum<5>;
+ def BPL : Register<"BPL">, DwarfRegNum<6>;
+ def SPL : Register<"SPL">, DwarfRegNum<7>;
+ def R8B : Register<"R8B">, DwarfRegNum<8>;
+ def R9B : Register<"R9B">, DwarfRegNum<9>;
+ def R10B : Register<"R10B">, DwarfRegNum<10>;
+ def R11B : Register<"R11B">, DwarfRegNum<11>;
+ def R12B : Register<"R12B">, DwarfRegNum<12>;
+ def R13B : Register<"R13B">, DwarfRegNum<13>;
+ def R14B : Register<"R14B">, DwarfRegNum<14>;
+ def R15B : Register<"R15B">, DwarfRegNum<15>;
+
+ // High registers X86-32 only
+ def AH : Register<"AH">, DwarfRegNum<0>;
+ def CH : Register<"CH">, DwarfRegNum<1>;
+ def DH : Register<"DH">, DwarfRegNum<2>;
+ def BH : Register<"BH">, DwarfRegNum<3>;
+
+ // 16-bit registers
+ def AX : RegisterWithSubRegs<"AX", [AH,AL]>, DwarfRegNum<0>;
+ def CX : RegisterWithSubRegs<"CX", [CH,CL]>, DwarfRegNum<1>;
+ def DX : RegisterWithSubRegs<"DX", [DH,DL]>, DwarfRegNum<2>;
+ def BX : RegisterWithSubRegs<"BX", [BH,BL]>, DwarfRegNum<3>;
+ def SP : RegisterWithSubRegs<"SP", [SPL]>, DwarfRegNum<4>;
+ def BP : RegisterWithSubRegs<"BP", [BPL]>, DwarfRegNum<5>;
+ def SI : RegisterWithSubRegs<"SI", [SIL]>, DwarfRegNum<6>;
+ def DI : RegisterWithSubRegs<"DI", [DIL]>, DwarfRegNum<7>;
+ def IP : Register<"IP">, DwarfRegNum<8>;
+
+ // X86-64 only
+ def R8W : RegisterWithSubRegs<"R8W", [R8B]>, DwarfRegNum<8>;
+ def R9W : RegisterWithSubRegs<"R9W", [R9B]>, DwarfRegNum<9>;
+ def R10W : RegisterWithSubRegs<"R10W", [R10B]>, DwarfRegNum<10>;
+ def R11W : RegisterWithSubRegs<"R11W", [R11B]>, DwarfRegNum<11>;
+ def R12W : RegisterWithSubRegs<"R12W", [R12B]>, DwarfRegNum<12>;
+ def R13W : RegisterWithSubRegs<"R13W", [R13B]>, DwarfRegNum<13>;
+ def R14W : RegisterWithSubRegs<"R14W", [R14B]>, DwarfRegNum<14>;
+ def R15W : RegisterWithSubRegs<"R15W", [R15B]>, DwarfRegNum<15>;
+
+ // 32-bit registers
+ def EAX : RegisterWithSubRegs<"EAX", [AX]>, DwarfRegNum<0>;
+ def ECX : RegisterWithSubRegs<"ECX", [CX]>, DwarfRegNum<1>;
+ def EDX : RegisterWithSubRegs<"EDX", [DX]>, DwarfRegNum<2>;
+ def EBX : RegisterWithSubRegs<"EBX", [BX]>, DwarfRegNum<3>;
+ def ESP : RegisterWithSubRegs<"ESP", [SP]>, DwarfRegNum<4>;
+ def EBP : RegisterWithSubRegs<"EBP", [BP]>, DwarfRegNum<5>;
+ def ESI : RegisterWithSubRegs<"ESI", [SI]>, DwarfRegNum<6>;
+ def EDI : RegisterWithSubRegs<"EDI", [DI]>, DwarfRegNum<7>;
+ def EIP : RegisterWithSubRegs<"EIP", [IP]>, DwarfRegNum<8>;
+
+ // X86-64 only
+ def R8D : RegisterWithSubRegs<"R8D", [R8W]>, DwarfRegNum<8>;
+ def R9D : RegisterWithSubRegs<"R9D", [R9W]>, DwarfRegNum<9>;
+ def R10D : RegisterWithSubRegs<"R10D", [R10W]>, DwarfRegNum<10>;
+ def R11D : RegisterWithSubRegs<"R11D", [R11W]>, DwarfRegNum<11>;
+ def R12D : RegisterWithSubRegs<"R12D", [R12W]>, DwarfRegNum<12>;
+ def R13D : RegisterWithSubRegs<"R13D", [R13W]>, DwarfRegNum<13>;
+ def R14D : RegisterWithSubRegs<"R14D", [R14W]>, DwarfRegNum<14>;
+ def R15D : RegisterWithSubRegs<"R15D", [R15W]>, DwarfRegNum<15>;
+
+ // 64-bit registers, X86-64 only
+ def RAX : RegisterWithSubRegs<"RAX", [EAX]>, DwarfRegNum<0>;
+ def RDX : RegisterWithSubRegs<"RDX", [EDX]>, DwarfRegNum<1>;
+ def RCX : RegisterWithSubRegs<"RCX", [ECX]>, DwarfRegNum<2>;
+ def RBX : RegisterWithSubRegs<"RBX", [EBX]>, DwarfRegNum<3>;
+ def RSI : RegisterWithSubRegs<"RSI", [ESI]>, DwarfRegNum<4>;
+ def RDI : RegisterWithSubRegs<"RDI", [EDI]>, DwarfRegNum<5>;
+ def RBP : RegisterWithSubRegs<"RBP", [EBP]>, DwarfRegNum<6>;
+ def RSP : RegisterWithSubRegs<"RSP", [ESP]>, DwarfRegNum<7>;
+
+ def R8 : RegisterWithSubRegs<"R8", [R8D]>, DwarfRegNum<8>;
+ def R9 : RegisterWithSubRegs<"R9", [R9D]>, DwarfRegNum<9>;
+ def R10 : RegisterWithSubRegs<"R10", [R10D]>, DwarfRegNum<10>;
+ def R11 : RegisterWithSubRegs<"R11", [R11D]>, DwarfRegNum<11>;
+ def R12 : RegisterWithSubRegs<"R12", [R12D]>, DwarfRegNum<12>;
+ def R13 : RegisterWithSubRegs<"R13", [R13D]>, DwarfRegNum<13>;
+ def R14 : RegisterWithSubRegs<"R14", [R14D]>, DwarfRegNum<14>;
+ def R15 : RegisterWithSubRegs<"R15", [R15D]>, DwarfRegNum<15>;
+ def RIP : RegisterWithSubRegs<"RIP", [EIP]>, DwarfRegNum<16>;
+
+ // MMX Registers. These are actually aliased to ST0 .. ST7
+ def MM0 : Register<"MM0">, DwarfRegNum<29>;
+ def MM1 : Register<"MM1">, DwarfRegNum<30>;
+ def MM2 : Register<"MM2">, DwarfRegNum<31>;
+ def MM3 : Register<"MM3">, DwarfRegNum<32>;
+ def MM4 : Register<"MM4">, DwarfRegNum<33>;
+ def MM5 : Register<"MM5">, DwarfRegNum<34>;
+ def MM6 : Register<"MM6">, DwarfRegNum<35>;
+ def MM7 : Register<"MM7">, DwarfRegNum<36>;
+
+ // Pseudo Floating Point registers
+ def FP0 : Register<"FP0">, DwarfRegNum<-1>;
+ def FP1 : Register<"FP1">, DwarfRegNum<-1>;
+ def FP2 : Register<"FP2">, DwarfRegNum<-1>;
+ def FP3 : Register<"FP3">, DwarfRegNum<-1>;
+ def FP4 : Register<"FP4">, DwarfRegNum<-1>;
+ def FP5 : Register<"FP5">, DwarfRegNum<-1>;
+ def FP6 : Register<"FP6">, DwarfRegNum<-1>;
+
+ // XMM Registers, used by the various SSE instruction set extensions
+ def XMM0: Register<"XMM0">, DwarfRegNum<17>;
+ def XMM1: Register<"XMM1">, DwarfRegNum<18>;
+ def XMM2: Register<"XMM2">, DwarfRegNum<19>;
+ def XMM3: Register<"XMM3">, DwarfRegNum<20>;
+ def XMM4: Register<"XMM4">, DwarfRegNum<21>;
+ def XMM5: Register<"XMM5">, DwarfRegNum<22>;
+ def XMM6: Register<"XMM6">, DwarfRegNum<23>;
+ def XMM7: Register<"XMM7">, DwarfRegNum<24>;
+
+ // X86-64 only
+ def XMM8: Register<"XMM8">, DwarfRegNum<25>;
+ def XMM9: Register<"XMM9">, DwarfRegNum<26>;
+ def XMM10: Register<"XMM10">, DwarfRegNum<27>;
+ def XMM11: Register<"XMM11">, DwarfRegNum<28>;
+ def XMM12: Register<"XMM12">, DwarfRegNum<29>;
+ def XMM13: Register<"XMM13">, DwarfRegNum<30>;
+ def XMM14: Register<"XMM14">, DwarfRegNum<31>;
+ def XMM15: Register<"XMM15">, DwarfRegNum<32>;
+
+ // Floating point stack registers
+ def ST0 : Register<"ST(0)">, DwarfRegNum<11>;
+ def ST1 : Register<"ST(1)">, DwarfRegNum<12>;
+ def ST2 : Register<"ST(2)">, DwarfRegNum<13>;
+ def ST3 : Register<"ST(3)">, DwarfRegNum<14>;
+ def ST4 : Register<"ST(4)">, DwarfRegNum<15>;
+ def ST5 : Register<"ST(5)">, DwarfRegNum<16>;
+ def ST6 : Register<"ST(6)">, DwarfRegNum<17>;
+ def ST7 : Register<"ST(7)">, DwarfRegNum<18>;
+}
+
+//===----------------------------------------------------------------------===//
+// Register Class Definitions... now that we have all of the pieces, define the
+// top-level register classes. The order specified in the register list is
+// implicitly defined to be the register allocation order.
+//
+
+// List call-clobbered registers before callee-save registers. RBX, RBP, (and
+// R12, R13, R14, and R15 for X86-64) are callee-save registers.
+// In 64-mode, there are 12 additional i8 registers, SIL, DIL, BPL, SPL, and
+// R8B, ... R15B.
+// FIXME: Allow AH, CH, DH, BH in 64-mode for non-REX instructions,
+def GR8 : RegisterClass<"X86", [i8], 8,
+ [AL, CL, DL, BL, AH, CH, DH, BH, SIL, DIL, BPL, SPL,
+ R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]> {
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ // Does the function dedicate RBP / EBP to being a frame ptr?
+ // If so, don't allocate SPL or BPL.
+ static const unsigned X86_GR8_AO_64_fp[] =
+ {X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL,
+ X86::R8B, X86::R9B, X86::R10B, X86::R11B,
+ X86::BL, X86::R14B, X86::R15B, X86::R12B, X86::R13B};
+ // If not, just don't allocate SPL.
+ static const unsigned X86_GR8_AO_64[] =
+ {X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL,
+ X86::R8B, X86::R9B, X86::R10B, X86::R11B,
+ X86::BL, X86::R14B, X86::R15B, X86::R12B, X86::R13B, X86::BPL};
+ // In 32-mode, none of the 8-bit registers aliases EBP or ESP.
+ static const unsigned X86_GR8_AO_32[] =
+ {X86::AL, X86::CL, X86::DL, X86::AH, X86::CH, X86::DH, X86::BL, X86::BH};
+
+ GR8Class::iterator
+ GR8Class::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const MRegisterInfo *RI = TM.getRegisterInfo();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (!Subtarget.is64Bit())
+ return X86_GR8_AO_32;
+ else if (RI->hasFP(MF))
+ return X86_GR8_AO_64_fp;
+ else
+ return X86_GR8_AO_64;
+ }
+
+ GR8Class::iterator
+ GR8Class::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const MRegisterInfo *RI = TM.getRegisterInfo();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (!Subtarget.is64Bit())
+ return X86_GR8_AO_32 + (sizeof(X86_GR8_AO_32) / sizeof(unsigned));
+ else if (RI->hasFP(MF))
+ return X86_GR8_AO_64_fp + (sizeof(X86_GR8_AO_64_fp) / sizeof(unsigned));
+ else
+ return X86_GR8_AO_64 + (sizeof(X86_GR8_AO_64) / sizeof(unsigned));
+ }
+ }];
+}
+
+
+def GR16 : RegisterClass<"X86", [i16], 16,
+ [AX, CX, DX, SI, DI, BX, BP, SP,
+ R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]> {
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ // Does the function dedicate RBP / EBP to being a frame ptr?
+ // If so, don't allocate SP or BP.
+ static const unsigned X86_GR16_AO_64_fp[] =
+ {X86::AX, X86::CX, X86::DX, X86::SI, X86::DI,
+ X86::R8W, X86::R9W, X86::R10W, X86::R11W,
+ X86::BX, X86::R14W, X86::R15W, X86::R12W, X86::R13W};
+ static const unsigned X86_GR16_AO_32_fp[] =
+ {X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX};
+ // If not, just don't allocate SPL.
+ static const unsigned X86_GR16_AO_64[] =
+ {X86::AX, X86::CX, X86::DX, X86::SI, X86::DI,
+ X86::R8W, X86::R9W, X86::R10W, X86::R11W,
+ X86::BX, X86::R14W, X86::R15W, X86::R12W, X86::R13W, X86::BP};
+ static const unsigned X86_GR16_AO_32[] =
+ {X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX, X86::BP};
+
+ GR16Class::iterator
+ GR16Class::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const MRegisterInfo *RI = TM.getRegisterInfo();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (Subtarget.is64Bit()) {
+ if (RI->hasFP(MF))
+ return X86_GR16_AO_64_fp;
+ else
+ return X86_GR16_AO_64;
+ } else {
+ if (RI->hasFP(MF))
+ return X86_GR16_AO_32_fp;
+ else
+ return X86_GR16_AO_32;
+ }
+ }
+
+ GR16Class::iterator
+ GR16Class::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const MRegisterInfo *RI = TM.getRegisterInfo();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (Subtarget.is64Bit()) {
+ if (RI->hasFP(MF))
+ return X86_GR16_AO_64_fp+(sizeof(X86_GR16_AO_64_fp)/sizeof(unsigned));
+ else
+ return X86_GR16_AO_64 + (sizeof(X86_GR16_AO_64) / sizeof(unsigned));
+ } else {
+ if (RI->hasFP(MF))
+ return X86_GR16_AO_32_fp+(sizeof(X86_GR16_AO_32_fp)/sizeof(unsigned));
+ else
+ return X86_GR16_AO_32 + (sizeof(X86_GR16_AO_32) / sizeof(unsigned));
+ }
+ }
+ }];
+}
+
+
+def GR32 : RegisterClass<"X86", [i32], 32,
+ [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
+ R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]> {
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ // Does the function dedicate RBP / EBP to being a frame ptr?
+ // If so, don't allocate ESP or EBP.
+ static const unsigned X86_GR32_AO_64_fp[] =
+ {X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI,
+ X86::R8D, X86::R9D, X86::R10D, X86::R11D,
+ X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D};
+ static const unsigned X86_GR32_AO_32_fp[] =
+ {X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX};
+ // If not, just don't allocate SPL.
+ static const unsigned X86_GR32_AO_64[] =
+ {X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI,
+ X86::R8D, X86::R9D, X86::R10D, X86::R11D,
+ X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D, X86::EBP};
+ static const unsigned X86_GR32_AO_32[] =
+ {X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP};
+
+ GR32Class::iterator
+ GR32Class::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const MRegisterInfo *RI = TM.getRegisterInfo();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (Subtarget.is64Bit()) {
+ if (RI->hasFP(MF))
+ return X86_GR32_AO_64_fp;
+ else
+ return X86_GR32_AO_64;
+ } else {
+ if (RI->hasFP(MF))
+ return X86_GR32_AO_32_fp;
+ else
+ return X86_GR32_AO_32;
+ }
+ }
+
+ GR32Class::iterator
+ GR32Class::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const MRegisterInfo *RI = TM.getRegisterInfo();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (Subtarget.is64Bit()) {
+ if (RI->hasFP(MF))
+ return X86_GR32_AO_64_fp+(sizeof(X86_GR32_AO_64_fp)/sizeof(unsigned));
+ else
+ return X86_GR32_AO_64 + (sizeof(X86_GR32_AO_64) / sizeof(unsigned));
+ } else {
+ if (RI->hasFP(MF))
+ return X86_GR32_AO_32_fp+(sizeof(X86_GR32_AO_32_fp)/sizeof(unsigned));
+ else
+ return X86_GR32_AO_32 + (sizeof(X86_GR32_AO_32) / sizeof(unsigned));
+ }
+ }
+ }];
+}
+
+
+def GR64 : RegisterClass<"X86", [i64], 64,
+ [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+ RBX, R14, R15, R12, R13, RBP, RSP]> {
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ GR64Class::iterator
+ GR64Class::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const MRegisterInfo *RI = TM.getRegisterInfo();
+ if (RI->hasFP(MF)) // Does the function dedicate RBP to being a frame ptr?
+ return end()-2; // If so, don't allocate RSP or RBP
+ else
+ return end()-1; // If not, just don't allocate RSP
+ }
+ }];
+}
+
+
+// GR16, GR32 subclasses which contain registers that have R8 sub-registers.
+// These should only be used for 32-bit mode.
+def GR16_ : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]>;
+def GR32_ : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]>;
+
+// Scalar SSE2 floating point registers.
+def FR32 : RegisterClass<"X86", [f32], 32,
+ [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11,
+ XMM12, XMM13, XMM14, XMM15]> {
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ FR32Class::iterator
+ FR32Class::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (!Subtarget.is64Bit())
+ return end()-8; // Only XMM0 to XMM7 are available in 32-bit mode.
+ else
+ return end();
+ }
+ }];
+}
+
+def FR64 : RegisterClass<"X86", [f64], 64,
+ [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11,
+ XMM12, XMM13, XMM14, XMM15]> {
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ FR64Class::iterator
+ FR64Class::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (!Subtarget.is64Bit())
+ return end()-8; // Only XMM0 to XMM7 are available in 32-bit mode.
+ else
+ return end();
+ }
+ }];
+}
+
+
+// FIXME: This sets up the floating point register files as though they are f64
+// values, though they really are f80 values. This will cause us to spill
+// values as 64-bit quantities instead of 80-bit quantities, which is much much
+// faster on common hardware. In reality, this should be controlled by a
+// command line option or something.
+
+def RFP32 : RegisterClass<"X86", [f32], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
+def RFP64 : RegisterClass<"X86", [f64], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
+
+// Floating point stack registers (these are not allocatable by the
+// register allocator - the floating point stackifier is responsible
+// for transforming FPn allocations to STn registers)
+def RST : RegisterClass<"X86", [f64], 32,
+ [ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7]> {
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ RSTClass::iterator
+ RSTClass::allocation_order_end(const MachineFunction &MF) const {
+ return begin();
+ }
+ }];
+}
+
+// Generic vector registers: VR64 and VR128.
+def VR64 : RegisterClass<"X86", [v8i8, v4i16, v2i32, v1i64], 64,
+ [MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7]>;
+def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
+ [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11,
+ XMM12, XMM13, XMM14, XMM15]> {
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ VR128Class::iterator
+ VR128Class::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (!Subtarget.is64Bit())
+ return end()-8; // Only XMM0 to XMM7 are available in 32-bit mode.
+ else
+ return end();
+ }
+ }];
+}
diff --git a/lib/Target/X86/X86Relocations.h b/lib/Target/X86/X86Relocations.h
new file mode 100644
index 0000000..3dd2b24
--- /dev/null
+++ b/lib/Target/X86/X86Relocations.h
@@ -0,0 +1,34 @@
+//===- X86Relocations.h - X86 Code Relocations ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86 target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86RELOCATIONS_H
+#define X86RELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+ namespace X86 {
+ enum RelocationType {
+ // reloc_pcrel_word - PC relative relocation, add the relocated value to
+ // the value already in memory, after we adjust it for where the PC is.
+ reloc_pcrel_word = 0,
+
+ // reloc_absolute_word, reloc_absolute_dword - Absolute relocation, just
+ // add the relocated value to the value already in memory.
+ reloc_absolute_word = 1,
+ reloc_absolute_dword = 2
+ };
+ }
+}
+
+#endif
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
new file mode 100644
index 0000000..1a75e04
--- /dev/null
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -0,0 +1,293 @@
+//===-- X86Subtarget.cpp - X86 Subtarget Information ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Nate Begeman and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the X86 specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86Subtarget.h"
+#include "X86GenSubtarget.inc"
+#include "llvm/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+cl::opt<X86Subtarget::AsmWriterFlavorTy>
+AsmWriterFlavor("x86-asm-syntax", cl::init(X86Subtarget::Unset),
+ cl::desc("Choose style of code to emit from X86 backend:"),
+ cl::values(
+ clEnumValN(X86Subtarget::ATT, "att", " Emit AT&T-style assembly"),
+ clEnumValN(X86Subtarget::Intel, "intel", " Emit Intel-style assembly"),
+ clEnumValEnd));
+
+
+/// True if accessing the GV requires an extra load. For Windows, dllimported
+/// symbols are indirect, loading the value at address GV rather then the
+/// value of GV itself. This means that the GlobalAddress must be in the base
+/// or index register of the address, not the GV offset field.
+bool X86Subtarget::GVRequiresExtraLoad(const GlobalValue* GV,
+ const TargetMachine& TM,
+ bool isDirectCall) const
+{
+ // FIXME: PIC
+ if (TM.getRelocationModel() != Reloc::Static)
+ if (isTargetDarwin()) {
+ return (!isDirectCall &&
+ (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
+ (GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode())));
+ } else if (TM.getRelocationModel() == Reloc::PIC_ && isPICStyleGOT()) {
+ // Extra load is needed for all non-statics.
+ return (!isDirectCall &&
+ (GV->isDeclaration() || !GV->hasInternalLinkage()));
+ } else if (isTargetCygMing() || isTargetWindows()) {
+ return (GV->hasDLLImportLinkage());
+ }
+
+ return false;
+}
+
+/// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
+/// specified arguments. If we can't run cpuid on the host, return true.
+bool X86::GetCpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
+ unsigned *rECX, unsigned *rEDX) {
+#if defined(__x86_64__)
+ // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
+ asm ("movq\t%%rbx, %%rsi\n\t"
+ "cpuid\n\t"
+ "xchgq\t%%rbx, %%rsi\n\t"
+ : "=a" (*rEAX),
+ "=S" (*rEBX),
+ "=c" (*rECX),
+ "=d" (*rEDX)
+ : "a" (value));
+ return false;
+#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
+#if defined(__GNUC__)
+ asm ("movl\t%%ebx, %%esi\n\t"
+ "cpuid\n\t"
+ "xchgl\t%%ebx, %%esi\n\t"
+ : "=a" (*rEAX),
+ "=S" (*rEBX),
+ "=c" (*rECX),
+ "=d" (*rEDX)
+ : "a" (value));
+ return false;
+#elif defined(_MSC_VER)
+ __asm {
+ mov eax,value
+ cpuid
+ mov esi,rEAX
+ mov dword ptr [esi],eax
+ mov esi,rEBX
+ mov dword ptr [esi],ebx
+ mov esi,rECX
+ mov dword ptr [esi],ecx
+ mov esi,rEDX
+ mov dword ptr [esi],edx
+ }
+ return false;
+#endif
+#endif
+ return true;
+}
+
+void X86Subtarget::AutoDetectSubtargetFeatures() {
+ unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
+ union {
+ unsigned u[3];
+ char c[12];
+ } text;
+
+ if (X86::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1))
+ return;
+
+ X86::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
+
+ if ((EDX >> 23) & 0x1) X86SSELevel = MMX;
+ if ((EDX >> 25) & 0x1) X86SSELevel = SSE1;
+ if ((EDX >> 26) & 0x1) X86SSELevel = SSE2;
+ if (ECX & 0x1) X86SSELevel = SSE3;
+ if ((ECX >> 9) & 0x1) X86SSELevel = SSSE3;
+
+ if (memcmp(text.c, "GenuineIntel", 12) == 0 ||
+ memcmp(text.c, "AuthenticAMD", 12) == 0) {
+ X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+ HasX86_64 = (EDX >> 29) & 0x1;
+ }
+}
+
+static const char *GetCurrentX86CPU() {
+ unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
+ if (X86::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX))
+ return "generic";
+ unsigned Family = (EAX >> 8) & 0xf; // Bits 8 - 11
+ unsigned Model = (EAX >> 4) & 0xf; // Bits 4 - 7
+ X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+ bool Em64T = (EDX >> 29) & 0x1;
+
+ union {
+ unsigned u[3];
+ char c[12];
+ } text;
+
+ X86::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1);
+ if (memcmp(text.c, "GenuineIntel", 12) == 0) {
+ switch (Family) {
+ case 3:
+ return "i386";
+ case 4:
+ return "i486";
+ case 5:
+ switch (Model) {
+ case 4: return "pentium-mmx";
+ default: return "pentium";
+ }
+ case 6:
+ switch (Model) {
+ case 1: return "pentiumpro";
+ case 3:
+ case 5:
+ case 6: return "pentium2";
+ case 7:
+ case 8:
+ case 10:
+ case 11: return "pentium3";
+ case 9:
+ case 13: return "pentium-m";
+ case 14: return "yonah";
+ case 15: return "core2";
+ default: return "i686";
+ }
+ case 15: {
+ switch (Model) {
+ case 3:
+ case 4:
+ return (Em64T) ? "nocona" : "prescott";
+ default:
+ return (Em64T) ? "x86-64" : "pentium4";
+ }
+ }
+
+ default:
+ return "generic";
+ }
+ } else if (memcmp(text.c, "AuthenticAMD", 12) == 0) {
+ // FIXME: this poorly matches the generated SubtargetFeatureKV table. There
+ // appears to be no way to generate the wide variety of AMD-specific targets
+ // from the information returned from CPUID.
+ switch (Family) {
+ case 4:
+ return "i486";
+ case 5:
+ switch (Model) {
+ case 6:
+ case 7: return "k6";
+ case 8: return "k6-2";
+ case 9:
+ case 13: return "k6-3";
+ default: return "pentium";
+ }
+ case 6:
+ switch (Model) {
+ case 4: return "athlon-tbird";
+ case 6:
+ case 7:
+ case 8: return "athlon-mp";
+ case 10: return "athlon-xp";
+ default: return "athlon";
+ }
+ case 15:
+ switch (Model) {
+ case 1: return "opteron";
+ case 5: return "athlon-fx"; // also opteron
+ default: return "athlon64";
+ }
+ default:
+ return "generic";
+ }
+ } else {
+ return "generic";
+ }
+}
+
+X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit)
+ : AsmFlavor(AsmWriterFlavor)
+ , PICStyle(PICStyle::None)
+ , X86SSELevel(NoMMXSSE)
+ , HasX86_64(false)
+ , stackAlignment(8)
+ // FIXME: this is a known good value for Yonah. How about others?
+ , MinRepStrSizeThreshold(128)
+ , Is64Bit(is64Bit)
+ , TargetType(isELF) { // Default to ELF unless otherwise specified.
+
+ // Determine default and user specified characteristics
+ if (!FS.empty()) {
+ // If feature string is not empty, parse features string.
+ std::string CPU = GetCurrentX86CPU();
+ ParseSubtargetFeatures(FS, CPU);
+
+ if (Is64Bit && !HasX86_64)
+ cerr << "Warning: Generation of 64-bit code for a 32-bit processor "
+ << "requested.\n";
+ if (Is64Bit && X86SSELevel < SSE2)
+ cerr << "Warning: 64-bit processors all have at least SSE2.\n";
+ } else {
+ // Otherwise, use CPUID to auto-detect feature set.
+ AutoDetectSubtargetFeatures();
+ }
+
+ // If requesting codegen for X86-64, make sure that 64-bit and SSE2 features
+ // are enabled. These are available on all x86-64 CPUs.
+ if (Is64Bit) {
+ HasX86_64 = true;
+ if (X86SSELevel < SSE2)
+ X86SSELevel = SSE2;
+ }
+
+ // Set the boolean corresponding to the current target triple, or the default
+ // if one cannot be determined, to true.
+ const std::string& TT = M.getTargetTriple();
+ if (TT.length() > 5) {
+ if (TT.find("cygwin") != std::string::npos)
+ TargetType = isCygwin;
+ else if (TT.find("mingw") != std::string::npos)
+ TargetType = isMingw;
+ else if (TT.find("darwin") != std::string::npos)
+ TargetType = isDarwin;
+ else if (TT.find("win32") != std::string::npos)
+ TargetType = isWindows;
+ } else if (TT.empty()) {
+#if defined(__CYGWIN__)
+ TargetType = isCygwin;
+#elif defined(__MINGW32__)
+ TargetType = isMingw;
+#elif defined(__APPLE__)
+ TargetType = isDarwin;
+#elif defined(_WIN32)
+ TargetType = isWindows;
+#endif
+ }
+
+ // If the asm syntax hasn't been overridden on the command line, use whatever
+ // the target wants.
+ if (AsmFlavor == X86Subtarget::Unset) {
+ if (TargetType == isWindows) {
+ AsmFlavor = X86Subtarget::Intel;
+ } else {
+ AsmFlavor = X86Subtarget::ATT;
+ }
+ }
+
+ if (TargetType == isDarwin ||
+ TargetType == isCygwin ||
+ TargetType == isMingw ||
+ (TargetType == isELF && Is64Bit))
+ stackAlignment = 16;
+}
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
new file mode 100644
index 0000000..2cda970
--- /dev/null
+++ b/lib/Target/X86/X86Subtarget.h
@@ -0,0 +1,156 @@
+//=====---- X86Subtarget.h - Define Subtarget for the X86 -----*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Nate Begeman and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the X86 specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86SUBTARGET_H
+#define X86SUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+
+#include <string>
+
+namespace llvm {
+class Module;
+class GlobalValue;
+class TargetMachine;
+
+namespace PICStyle {
+enum Style {
+ Stub, GOT, RIPRel, WinPIC, None
+};
+}
+
+class X86Subtarget : public TargetSubtarget {
+public:
+ enum AsmWriterFlavorTy {
+ // Note: This numbering has to match the GCC assembler dialects for inline
+ // asm alternatives to work right.
+ ATT = 0, Intel = 1, Unset
+ };
+protected:
+ enum X86SSEEnum {
+ NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3
+ };
+
+ enum X863DNowEnum {
+ NoThreeDNow, ThreeDNow, ThreeDNowA
+ };
+
+ /// AsmFlavor - Which x86 asm dialect to use.
+ AsmWriterFlavorTy AsmFlavor;
+
+ /// PICStyle - Which PIC style to use
+ PICStyle::Style PICStyle;
+
+ /// X86SSELevel - MMX, SSE1, SSE2, SSE3, SSSE3, or none supported.
+ X86SSEEnum X86SSELevel;
+
+ /// X863DNowLevel - 3DNow or 3DNow Athlon, or none supported.
+ X863DNowEnum X863DNowLevel;
+
+ /// HasX86_64 - True if the processor supports X86-64 instructions.
+ bool HasX86_64;
+
+ /// stackAlignment - The minimum alignment known to hold of the stack frame on
+ /// entry to the function and which must be maintained by every function.
+ unsigned stackAlignment;
+
+ /// Min. memset / memcpy size that is turned into rep/movs, rep/stos ops.
+ unsigned MinRepStrSizeThreshold;
+
+private:
+ /// Is64Bit - True if the processor supports 64-bit instructions and module
+ /// pointer size is 64 bit.
+ bool Is64Bit;
+
+public:
+ enum {
+ isELF, isCygwin, isDarwin, isWindows, isMingw
+ } TargetType;
+
+ /// This constructor initializes the data members to match that
+ /// of the specified module.
+ ///
+ X86Subtarget(const Module &M, const std::string &FS, bool is64Bit);
+
+ /// getStackAlignment - Returns the minimum alignment known to hold of the
+ /// stack frame on entry to the function and which must be maintained by every
+ /// function for this subtarget.
+ unsigned getStackAlignment() const { return stackAlignment; }
+
+ /// getMinRepStrSizeThreshold - Returns the minimum memset / memcpy size
+ /// required to turn the operation into a X86 rep/movs or rep/stos
+ /// instruction. This is only used if the src / dst alignment is not DWORD
+ /// aligned.
+ unsigned getMinRepStrSizeThreshold() const { return MinRepStrSizeThreshold; }
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(const std::string &FS, const std::string &CPU);
+
+ /// AutoDetectSubtargetFeatures - Auto-detect CPU features using CPUID
+ /// instruction.
+ void AutoDetectSubtargetFeatures();
+
+ bool is64Bit() const { return Is64Bit; }
+
+ PICStyle::Style getPICStyle() const { return PICStyle; }
+ void setPICStyle(PICStyle::Style Style) { PICStyle = Style; }
+
+ bool hasMMX() const { return X86SSELevel >= MMX; }
+ bool hasSSE1() const { return X86SSELevel >= SSE1; }
+ bool hasSSE2() const { return X86SSELevel >= SSE2; }
+ bool hasSSE3() const { return X86SSELevel >= SSE3; }
+ bool hasSSSE3() const { return X86SSELevel >= SSSE3; }
+ bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
+ bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
+
+ unsigned getAsmFlavor() const {
+ return AsmFlavor != Unset ? unsigned(AsmFlavor) : 0;
+ }
+
+ bool isFlavorAtt() const { return AsmFlavor == ATT; }
+ bool isFlavorIntel() const { return AsmFlavor == Intel; }
+
+ bool isTargetDarwin() const { return TargetType == isDarwin; }
+ bool isTargetELF() const { return TargetType == isELF; }
+ bool isTargetWindows() const { return TargetType == isWindows; }
+ bool isTargetMingw() const { return TargetType == isMingw; }
+ bool isTargetCygMing() const { return (TargetType == isMingw ||
+ TargetType == isCygwin); }
+ bool isTargetCygwin() const { return TargetType == isCygwin; }
+
+ bool isPICStyleSet() const { return PICStyle != PICStyle::None; }
+ bool isPICStyleGOT() const { return PICStyle == PICStyle::GOT; }
+ bool isPICStyleStub() const { return PICStyle == PICStyle::Stub; }
+ bool isPICStyleRIPRel() const { return PICStyle == PICStyle::RIPRel; }
+ bool isPICStyleWinPIC() const { return PICStyle == PICStyle:: WinPIC; }
+
+ /// True if accessing the GV requires an extra load. For Windows, dllimported
+ /// symbols are indirect, loading the value at address GV rather then the
+ /// value of GV itself. This means that the GlobalAddress must be in the base
+ /// or index register of the address, not the GV offset field.
+ bool GVRequiresExtraLoad(const GlobalValue* GV, const TargetMachine& TM,
+ bool isDirectCall) const;
+
+};
+
+namespace X86 {
+ /// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in
+ /// the specified arguments. If we can't run cpuid on the host, return true.
+ bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
+ unsigned *rECX, unsigned *rEDX);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/X86/X86TargetAsmInfo.cpp b/lib/Target/X86/X86TargetAsmInfo.cpp
new file mode 100644
index 0000000..4bb854e
--- /dev/null
+++ b/lib/Target/X86/X86TargetAsmInfo.cpp
@@ -0,0 +1,280 @@
+//===-- X86TargetAsmInfo.cpp - X86 asm properties ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by James M. Laskey and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the X86TargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86TargetAsmInfo.h"
+#include "X86TargetMachine.h"
+#include "X86Subtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+static const char* x86_asm_table[] = {"{si}", "S",
+ "{di}", "D",
+ "{ax}", "a",
+ "{cx}", "c",
+ "{memory}", "memory",
+ "{flags}", "",
+ "{dirflag}", "",
+ "{fpsr}", "",
+ "{cc}", "cc",
+ 0,0};
+
+X86TargetAsmInfo::X86TargetAsmInfo(const X86TargetMachine &TM) {
+ const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+
+ // FIXME - Should be simplified.
+
+ AsmTransCBE = x86_asm_table;
+
+ switch (Subtarget->TargetType) {
+ case X86Subtarget::isDarwin:
+ AlignmentIsInBytes = false;
+ GlobalPrefix = "_";
+ if (!Subtarget->is64Bit())
+ Data64bitsDirective = 0; // we can't emit a 64-bit unit
+ ZeroDirective = "\t.space\t"; // ".space N" emits N zeros.
+ PrivateGlobalPrefix = "L"; // Marker for constant pool idxs
+ BSSSection = 0; // no BSS section.
+ ZeroFillDirective = "\t.zerofill\t"; // Uses .zerofill
+ ConstantPoolSection = "\t.const\n";
+ JumpTableDataSection = "\t.const\n";
+ CStringSection = "\t.cstring";
+ FourByteConstantSection = "\t.literal4\n";
+ EightByteConstantSection = "\t.literal8\n";
+ if (Subtarget->is64Bit())
+ SixteenByteConstantSection = "\t.literal16\n";
+ ReadOnlySection = "\t.const\n";
+ LCOMMDirective = "\t.lcomm\t";
+ COMMDirectiveTakesAlignment = false;
+ HasDotTypeDotSizeDirective = false;
+ if (TM.getRelocationModel() == Reloc::Static) {
+ StaticCtorsSection = ".constructor";
+ StaticDtorsSection = ".destructor";
+ } else {
+ StaticCtorsSection = ".mod_init_func";
+ StaticDtorsSection = ".mod_term_func";
+ }
+ InlineAsmStart = "# InlineAsm Start";
+ InlineAsmEnd = "# InlineAsm End";
+ SetDirective = "\t.set";
+ UsedDirective = "\t.no_dead_strip\t";
+ WeakRefDirective = "\t.weak_reference\t";
+ HiddenDirective = "\t.private_extern\t";
+
+ // In non-PIC modes, emit a special label before jump tables so that the
+ // linker can perform more accurate dead code stripping.
+ if (TM.getRelocationModel() != Reloc::PIC_) {
+ // Emit a local label that is preserved until the linker runs.
+ JumpTableSpecialLabelPrefix = "l";
+ }
+
+ SupportsDebugInformation = true;
+ NeedsSet = true;
+ DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug";
+ DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug";
+ DwarfLineSection = ".section __DWARF,__debug_line,regular,debug";
+ DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug";
+ DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug";
+ DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug";
+ DwarfStrSection = ".section __DWARF,__debug_str,regular,debug";
+ DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug";
+ DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug";
+ DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug";
+ DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug";
+ break;
+
+ case X86Subtarget::isELF:
+ ReadOnlySection = "\t.section\t.rodata";
+ FourByteConstantSection = "\t.section\t.rodata.cst4,\"aM\",@progbits,4";
+ EightByteConstantSection = "\t.section\t.rodata.cst8,\"aM\",@progbits,8";
+ SixteenByteConstantSection = "\t.section\t.rodata.cst16,\"aM\",@progbits,16";
+ CStringSection = "\t.section\t.rodata.str1.1,\"aMS\",@progbits,1";
+ PrivateGlobalPrefix = ".L";
+ WeakRefDirective = "\t.weak\t";
+ SetDirective = "\t.set\t";
+ PCSymbol = ".";
+
+ // Set up DWARF directives
+ HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
+ AbsoluteDebugSectionOffsets = true;
+ AbsoluteEHSectionOffsets = false;
+ SupportsDebugInformation = true;
+ DwarfAbbrevSection = "\t.section\t.debug_abbrev,\"\",@progbits";
+ DwarfInfoSection = "\t.section\t.debug_info,\"\",@progbits";
+ DwarfLineSection = "\t.section\t.debug_line,\"\",@progbits";
+ DwarfFrameSection = "\t.section\t.debug_frame,\"\",@progbits";
+ DwarfPubNamesSection ="\t.section\t.debug_pubnames,\"\",@progbits";
+ DwarfPubTypesSection ="\t.section\t.debug_pubtypes,\"\",@progbits";
+ DwarfStrSection = "\t.section\t.debug_str,\"\",@progbits";
+ DwarfLocSection = "\t.section\t.debug_loc,\"\",@progbits";
+ DwarfARangesSection = "\t.section\t.debug_aranges,\"\",@progbits";
+ DwarfRangesSection = "\t.section\t.debug_ranges,\"\",@progbits";
+ DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits";
+
+ if (!Subtarget->is64Bit())
+ SupportsExceptionHandling = true;
+ DwarfEHFrameSection = "\t.section\t.eh_frame,\"aw\",@progbits";
+ DwarfExceptionSection = "\t.section\t.gcc_except_table,\"a\",@progbits";
+ break;
+
+ case X86Subtarget::isCygwin:
+ case X86Subtarget::isMingw:
+ GlobalPrefix = "_";
+ LCOMMDirective = "\t.lcomm\t";
+ COMMDirectiveTakesAlignment = false;
+ HasDotTypeDotSizeDirective = false;
+ StaticCtorsSection = "\t.section .ctors,\"aw\"";
+ StaticDtorsSection = "\t.section .dtors,\"aw\"";
+ HiddenDirective = NULL;
+ PrivateGlobalPrefix = "L"; // Prefix for private global symbols
+ WeakRefDirective = "\t.weak\t";
+ SetDirective = "\t.set\t";
+
+ // Set up DWARF directives
+ HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
+ AbsoluteDebugSectionOffsets = true;
+ AbsoluteEHSectionOffsets = false;
+ SupportsDebugInformation = true;
+ DwarfSectionOffsetDirective = "\t.secrel32\t";
+ DwarfAbbrevSection = "\t.section\t.debug_abbrev,\"dr\"";
+ DwarfInfoSection = "\t.section\t.debug_info,\"dr\"";
+ DwarfLineSection = "\t.section\t.debug_line,\"dr\"";
+ DwarfFrameSection = "\t.section\t.debug_frame,\"dr\"";
+ DwarfPubNamesSection ="\t.section\t.debug_pubnames,\"dr\"";
+ DwarfPubTypesSection ="\t.section\t.debug_pubtypes,\"dr\"";
+ DwarfStrSection = "\t.section\t.debug_str,\"dr\"";
+ DwarfLocSection = "\t.section\t.debug_loc,\"dr\"";
+ DwarfARangesSection = "\t.section\t.debug_aranges,\"dr\"";
+ DwarfRangesSection = "\t.section\t.debug_ranges,\"dr\"";
+ DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"dr\"";
+ break;
+
+ case X86Subtarget::isWindows:
+ GlobalPrefix = "_";
+ HasDotTypeDotSizeDirective = false;
+ break;
+
+ default: break;
+ }
+
+ if (Subtarget->isFlavorIntel()) {
+ GlobalPrefix = "_";
+ CommentString = ";";
+
+ PrivateGlobalPrefix = "$";
+ AlignDirective = "\talign\t";
+ ZeroDirective = "\tdb\t";
+ ZeroDirectiveSuffix = " dup(0)";
+ AsciiDirective = "\tdb\t";
+ AscizDirective = 0;
+ Data8bitsDirective = "\tdb\t";
+ Data16bitsDirective = "\tdw\t";
+ Data32bitsDirective = "\tdd\t";
+ Data64bitsDirective = "\tdq\t";
+ HasDotTypeDotSizeDirective = false;
+
+ TextSection = "_text";
+ DataSection = "_data";
+ JumpTableDataSection = NULL;
+ SwitchToSectionDirective = "";
+ TextSectionStartSuffix = "\tsegment 'CODE'";
+ DataSectionStartSuffix = "\tsegment 'DATA'";
+ SectionEndDirectiveSuffix = "\tends\n";
+ }
+
+ AssemblerDialect = Subtarget->getAsmFlavor();
+}
+
+bool X86TargetAsmInfo::LowerToBSwap(CallInst *CI) const {
+ // FIXME: this should verify that we are targetting a 486 or better. If not,
+ // we will turn this bswap into something that will be lowered to logical ops
+ // instead of emitting the bswap asm. For now, we don't support 486 or lower
+ // so don't worry about this.
+
+ // Verify this is a simple bswap.
+ if (CI->getNumOperands() != 2 ||
+ CI->getType() != CI->getOperand(1)->getType() ||
+ !CI->getType()->isInteger())
+ return false;
+
+ const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty || Ty->getBitWidth() % 16 != 0)
+ return false;
+
+ // Okay, we can do this xform, do so now.
+ const Type *Tys[] = { Ty, Ty };
+ Module *M = CI->getParent()->getParent()->getParent();
+ Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 2);
+
+ Value *Op = CI->getOperand(1);
+ Op = new CallInst(Int, Op, CI->getName(), CI);
+
+ CI->replaceAllUsesWith(Op);
+ CI->eraseFromParent();
+ return true;
+}
+
+
+bool X86TargetAsmInfo::ExpandInlineAsm(CallInst *CI) const {
+ InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
+ std::vector<InlineAsm::ConstraintInfo> Constraints = IA->ParseConstraints();
+
+ std::string AsmStr = IA->getAsmString();
+
+ // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a"
+ std::vector<std::string> AsmPieces;
+ SplitString(AsmStr, AsmPieces, "\n"); // ; as separator?
+
+ switch (AsmPieces.size()) {
+ default: return false;
+ case 1:
+ AsmStr = AsmPieces[0];
+ AsmPieces.clear();
+ SplitString(AsmStr, AsmPieces, " \t"); // Split with whitespace.
+
+ // bswap $0
+ if (AsmPieces.size() == 2 &&
+ AsmPieces[0] == "bswap" && AsmPieces[1] == "$0") {
+ // No need to check constraints, nothing other than the equivalent of
+ // "=r,0" would be valid here.
+ return LowerToBSwap(CI);
+ }
+ break;
+ case 3:
+ if (CI->getType() == Type::Int64Ty && Constraints.size() >= 2 &&
+ Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
+ Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
+ // bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64
+ std::vector<std::string> Words;
+ SplitString(AsmPieces[0], Words, " \t");
+ if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") {
+ Words.clear();
+ SplitString(AsmPieces[1], Words, " \t");
+ if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%edx") {
+ Words.clear();
+ SplitString(AsmPieces[2], Words, " \t,");
+ if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" &&
+ Words[2] == "%edx") {
+ return LowerToBSwap(CI);
+ }
+ }
+ }
+ }
+ break;
+ }
+ return false;
+}
diff --git a/lib/Target/X86/X86TargetAsmInfo.h b/lib/Target/X86/X86TargetAsmInfo.h
new file mode 100644
index 0000000..cc509d1
--- /dev/null
+++ b/lib/Target/X86/X86TargetAsmInfo.h
@@ -0,0 +1,33 @@
+//=====-- X86TargetAsmInfo.h - X86 asm properties -------------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by James M. Laskey and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the X86TargetAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86TARGETASMINFO_H
+#define X86TARGETASMINFO_H
+
+#include "llvm/Target/TargetAsmInfo.h"
+
+namespace llvm {
+
+ // Forward declaration.
+ class X86TargetMachine;
+
+ struct X86TargetAsmInfo : public TargetAsmInfo {
+ X86TargetAsmInfo(const X86TargetMachine &TM);
+
+ virtual bool ExpandInlineAsm(CallInst *CI) const;
+ private:
+ bool LowerToBSwap(CallInst *CI) const;
+ };
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
new file mode 100644
index 0000000..4d4bd3f
--- /dev/null
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -0,0 +1,190 @@
+//===-- X86TargetMachine.cpp - Define TargetMachine for the X86 -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86TargetAsmInfo.h"
+#include "X86TargetMachine.h"
+#include "X86.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+#include "llvm/Transforms/Scalar.h"
+using namespace llvm;
+
+/// X86TargetMachineModule - Note that this is used on hosts that cannot link
+/// in a library unless there are references into the library. In particular,
+/// it seems that it is not possible to get things to work on Win32 without
+/// this. Though it is unused, do not remove it.
+extern "C" int X86TargetMachineModule;
+int X86TargetMachineModule = 0;
+
+namespace {
+ // Register the target.
+ RegisterTarget<X86_32TargetMachine>
+ X("x86", " 32-bit X86: Pentium-Pro and above");
+ RegisterTarget<X86_64TargetMachine>
+ Y("x86-64", " 64-bit X86: EM64T and AMD64");
+}
+
+const TargetAsmInfo *X86TargetMachine::createTargetAsmInfo() const {
+ return new X86TargetAsmInfo(*this);
+}
+
+unsigned X86_32TargetMachine::getJITMatchQuality() {
+#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
+ return 10;
+#endif
+ return 0;
+}
+
+unsigned X86_64TargetMachine::getJITMatchQuality() {
+#if defined(__x86_64__)
+ return 10;
+#endif
+ return 0;
+}
+
+unsigned X86_32TargetMachine::getModuleMatchQuality(const Module &M) {
+ // We strongly match "i[3-9]86-*".
+ std::string TT = M.getTargetTriple();
+ if (TT.size() >= 5 && TT[0] == 'i' && TT[2] == '8' && TT[3] == '6' &&
+ TT[4] == '-' && TT[1] - '3' < 6)
+ return 20;
+ // If the target triple is something non-X86, we don't match.
+ if (!TT.empty()) return 0;
+
+ if (M.getEndianness() == Module::LittleEndian &&
+ M.getPointerSize() == Module::Pointer32)
+ return 10; // Weak match
+ else if (M.getEndianness() != Module::AnyEndianness ||
+ M.getPointerSize() != Module::AnyPointerSize)
+ return 0; // Match for some other target
+
+ return getJITMatchQuality()/2;
+}
+
+unsigned X86_64TargetMachine::getModuleMatchQuality(const Module &M) {
+ // We strongly match "x86_64-*".
+ std::string TT = M.getTargetTriple();
+ if (TT.size() >= 7 && TT[0] == 'x' && TT[1] == '8' && TT[2] == '6' &&
+ TT[3] == '_' && TT[4] == '6' && TT[5] == '4' && TT[6] == '-')
+ return 20;
+
+ // We strongly match "amd64-*".
+ if (TT.size() >= 6 && TT[0] == 'a' && TT[1] == 'm' && TT[2] == 'd' &&
+ TT[3] == '6' && TT[4] == '4' && TT[5] == '-')
+ return 20;
+
+ // If the target triple is something non-X86-64, we don't match.
+ if (!TT.empty()) return 0;
+
+ if (M.getEndianness() == Module::LittleEndian &&
+ M.getPointerSize() == Module::Pointer64)
+ return 10; // Weak match
+ else if (M.getEndianness() != Module::AnyEndianness ||
+ M.getPointerSize() != Module::AnyPointerSize)
+ return 0; // Match for some other target
+
+ return getJITMatchQuality()/2;
+}
+
+X86_32TargetMachine::X86_32TargetMachine(const Module &M, const std::string &FS)
+ : X86TargetMachine(M, FS, false) {
+}
+
+
+X86_64TargetMachine::X86_64TargetMachine(const Module &M, const std::string &FS)
+ : X86TargetMachine(M, FS, true) {
+}
+
+/// X86TargetMachine ctor - Create an ILP32 architecture model
+///
+X86TargetMachine::X86TargetMachine(const Module &M, const std::string &FS,
+ bool is64Bit)
+ : Subtarget(M, FS, is64Bit),
+ DataLayout(Subtarget.is64Bit() ?
+ std::string("e-p:64:64-f64:32:64-i64:32:64") :
+ std::string("e-p:32:32-f64:32:64-i64:32:64")),
+ FrameInfo(TargetFrameInfo::StackGrowsDown,
+ Subtarget.getStackAlignment(), Subtarget.is64Bit() ? -8 : -4),
+ InstrInfo(*this), JITInfo(*this), TLInfo(*this) {
+ if (getRelocationModel() == Reloc::Default)
+ if (Subtarget.isTargetDarwin() || Subtarget.isTargetCygMing())
+ setRelocationModel(Reloc::DynamicNoPIC);
+ else
+ setRelocationModel(Reloc::Static);
+ if (Subtarget.is64Bit()) {
+ // No DynamicNoPIC support under X86-64.
+ if (getRelocationModel() == Reloc::DynamicNoPIC)
+ setRelocationModel(Reloc::PIC_);
+ // Default X86-64 code model is small.
+ if (getCodeModel() == CodeModel::Default)
+ setCodeModel(CodeModel::Small);
+ }
+
+ if (Subtarget.isTargetCygMing())
+ Subtarget.setPICStyle(PICStyle::WinPIC);
+ else if (Subtarget.isTargetDarwin())
+ if (Subtarget.is64Bit())
+ Subtarget.setPICStyle(PICStyle::RIPRel);
+ else
+ Subtarget.setPICStyle(PICStyle::Stub);
+ else if (Subtarget.isTargetELF())
+ if (Subtarget.is64Bit())
+ Subtarget.setPICStyle(PICStyle::RIPRel);
+ else
+ Subtarget.setPICStyle(PICStyle::GOT);
+}
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+bool X86TargetMachine::addInstSelector(FunctionPassManager &PM, bool Fast) {
+ // Install an instruction selector.
+ PM.add(createX86ISelDag(*this, Fast));
+ return false;
+}
+
+bool X86TargetMachine::addPostRegAlloc(FunctionPassManager &PM, bool Fast) {
+ PM.add(createX86FloatingPointStackifierPass());
+ return true; // -print-machineinstr should print after this.
+}
+
+bool X86TargetMachine::addAssemblyEmitter(FunctionPassManager &PM, bool Fast,
+ std::ostream &Out) {
+ PM.add(createX86CodePrinterPass(Out, *this));
+ return false;
+}
+
+bool X86TargetMachine::addCodeEmitter(FunctionPassManager &PM, bool Fast,
+ MachineCodeEmitter &MCE) {
+ // FIXME: Move this to TargetJITInfo!
+ setRelocationModel(Reloc::Static);
+ Subtarget.setPICStyle(PICStyle::None);
+
+ // JIT cannot ensure globals are placed in the lower 4G of address.
+ if (Subtarget.is64Bit())
+ setCodeModel(CodeModel::Large);
+
+ PM.add(createX86CodeEmitterPass(*this, MCE));
+ return false;
+}
+
+bool X86TargetMachine::addSimpleCodeEmitter(FunctionPassManager &PM, bool Fast,
+ MachineCodeEmitter &MCE) {
+ PM.add(createX86CodeEmitterPass(*this, MCE));
+ return false;
+}
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
new file mode 100644
index 0000000..0a4f1b5
--- /dev/null
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -0,0 +1,95 @@
+//===-- X86TargetMachine.h - Define TargetMachine for the X86 ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the X86 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86TARGETMACHINE_H
+#define X86TARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "X86.h"
+#include "X86ELFWriterInfo.h"
+#include "X86InstrInfo.h"
+#include "X86JITInfo.h"
+#include "X86Subtarget.h"
+#include "X86ISelLowering.h"
+
+namespace llvm {
+
+class X86TargetMachine : public LLVMTargetMachine {
+ X86Subtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ TargetFrameInfo FrameInfo;
+ X86InstrInfo InstrInfo;
+ X86JITInfo JITInfo;
+ X86TargetLowering TLInfo;
+ X86ELFWriterInfo ELFWriterInfo;
+
+protected:
+ virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
+public:
+ X86TargetMachine(const Module &M, const std::string &FS, bool is64Bit);
+
+ virtual const X86InstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
+ virtual TargetJITInfo *getJITInfo() { return &JITInfo; }
+ virtual const TargetSubtarget *getSubtargetImpl() const{ return &Subtarget; }
+ virtual X86TargetLowering *getTargetLowering() const {
+ return const_cast<X86TargetLowering*>(&TLInfo);
+ }
+ virtual const MRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const X86ELFWriterInfo *getELFWriterInfo() const {
+ return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
+ }
+
+ static unsigned getModuleMatchQuality(const Module &M);
+ static unsigned getJITMatchQuality();
+
+ // Set up the pass pipeline.
+ virtual bool addInstSelector(FunctionPassManager &PM, bool Fast);
+ virtual bool addPostRegAlloc(FunctionPassManager &PM, bool Fast);
+ virtual bool addAssemblyEmitter(FunctionPassManager &PM, bool Fast,
+ std::ostream &Out);
+ virtual bool addCodeEmitter(FunctionPassManager &PM, bool Fast,
+ MachineCodeEmitter &MCE);
+ virtual bool addSimpleCodeEmitter(FunctionPassManager &PM, bool Fast,
+ MachineCodeEmitter &MCE);
+};
+
+/// X86_32TargetMachine - X86 32-bit target machine.
+///
+class X86_32TargetMachine : public X86TargetMachine {
+public:
+ X86_32TargetMachine(const Module &M, const std::string &FS);
+
+ static unsigned getJITMatchQuality();
+ static unsigned getModuleMatchQuality(const Module &M);
+};
+
+/// X86_64TargetMachine - X86 64-bit target machine.
+///
+class X86_64TargetMachine : public X86TargetMachine {
+public:
+ X86_64TargetMachine(const Module &M, const std::string &FS);
+
+ static unsigned getJITMatchQuality();
+ static unsigned getModuleMatchQuality(const Module &M);
+};
+
+} // End llvm namespace
+
+#endif