summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorTobias Klausmann <tobias.johannes.klausmann@mni.thm.de>2016-09-30 23:50:15 +0200
committerIlia Mirkin <imirkin@alum.mit.edu>2016-10-14 23:23:57 -0400
commitb7d9677de804375827b3c433027ec2dd32cd1da6 (patch)
tree64b6762706ba90b0bb08022038042446aaffc657 /src/gallium
parent75128d6ffda9745161cb5c7ddd499422ad544d2c (diff)
downloadexternal_mesa3d-b7d9677de804375827b3c433027ec2dd32cd1da6.zip
external_mesa3d-b7d9677de804375827b3c433027ec2dd32cd1da6.tar.gz
external_mesa3d-b7d9677de804375827b3c433027ec2dd32cd1da6.tar.bz2
nv50/ir: constant fold OP_SPLIT
Split the source immediate value into new values and move them into the original defs set by the split. Since we can only have up to 64-bit immediates, this is largely beneficial for F64 (and, in the future, U64) operations. Signed-off-by: Tobias Klausmann <tobias.johannes.klausmann@mni.thm.de> [imirkin: always use U32, set newi for foldCount tracking] Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp18
1 files changed, 18 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 737bda3..0fb1a78 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -932,6 +932,24 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
Instruction *newi = i;
switch (i->op) {
+ case OP_SPLIT: {
+ bld.setPosition(i, false);
+
+ uint8_t size = i->getDef(0)->reg.size;
+ uint32_t mask = (1ULL << size) - 1;
+ assert(size <= 32);
+
+ uint64_t val = imm0.reg.data.u64;
+ for (int8_t d = 0; i->defExists(d); ++d) {
+ Value *def = i->getDef(d);
+ assert(def->reg.size == size);
+
+ newi = bld.mkMov(def, bld.mkImm((uint32_t)(val & mask)), TYPE_U32);
+ val >>= size;
+ }
+ delete_Instruction(prog, i);
+ break;
+ }
case OP_MUL:
if (i->dType == TYPE_F32)
tryCollapseChainedMULs(i, s, imm0);