diff options
author | Tobias Klausmann <tobias.johannes.klausmann@mni.thm.de> | 2016-09-30 23:50:15 +0200 |
---|---|---|
committer | Ilia Mirkin <imirkin@alum.mit.edu> | 2016-10-14 23:23:57 -0400 |
commit | b7d9677de804375827b3c433027ec2dd32cd1da6 (patch) | |
tree | 64b6762706ba90b0bb08022038042446aaffc657 /src/gallium | |
parent | 75128d6ffda9745161cb5c7ddd499422ad544d2c (diff) | |
download | external_mesa3d-b7d9677de804375827b3c433027ec2dd32cd1da6.zip external_mesa3d-b7d9677de804375827b3c433027ec2dd32cd1da6.tar.gz external_mesa3d-b7d9677de804375827b3c433027ec2dd32cd1da6.tar.bz2 |
nv50/ir: constant fold OP_SPLIT
Split the source immediate value into new values and move them into the
original defs set by the split. Since we can only have up to 64-bit
immediates, this is largely beneficial for F64 (and, in the future, U64)
operations.
Signed-off-by: Tobias Klausmann <tobias.johannes.klausmann@mni.thm.de>
[imirkin: always use U32, set newi for foldCount tracking]
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 18 |
1 files changed, 18 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 737bda3..0fb1a78 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -932,6 +932,24 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) Instruction *newi = i; switch (i->op) { + case OP_SPLIT: { + bld.setPosition(i, false); + + uint8_t size = i->getDef(0)->reg.size; + uint32_t mask = (1ULL << size) - 1; + assert(size <= 32); + + uint64_t val = imm0.reg.data.u64; + for (int8_t d = 0; i->defExists(d); ++d) { + Value *def = i->getDef(d); + assert(def->reg.size == size); + + newi = bld.mkMov(def, bld.mkImm((uint32_t)(val & mask)), TYPE_U32); + val >>= size; + } + delete_Instruction(prog, i); + break; + } case OP_MUL: if (i->dType == TYPE_F32) tryCollapseChainedMULs(i, s, imm0); |