summaryrefslogtreecommitdiffstats
path: root/libpixelflinger/arch-mips/t32cb16blend.S
diff options
context:
space:
mode:
Diffstat (limited to 'libpixelflinger/arch-mips/t32cb16blend.S')
-rw-r--r--libpixelflinger/arch-mips/t32cb16blend.S264
1 files changed, 264 insertions, 0 deletions
diff --git a/libpixelflinger/arch-mips/t32cb16blend.S b/libpixelflinger/arch-mips/t32cb16blend.S
new file mode 100644
index 0000000..c911fbb
--- /dev/null
+++ b/libpixelflinger/arch-mips/t32cb16blend.S
@@ -0,0 +1,264 @@
+/* libs/pixelflinger/t32cb16blend.S
+**
+** Copyright 2010, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+** http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
+
+#ifdef DEBUG
+#define DBG
+#else
+#define DBG #
+#endif
+
+/*
+ * blend one of 2 16bpp RGB pixels held in dreg selected by shift
+ * with the 32bpp ABGR pixel held in src and store the result in fb
+ *
+ * Assumes that the dreg data is little endian and that
+ * the the second pixel (shift==16) will be merged into
+ * the fb result
+ *
+ * Uses $t0,$t6,$t7,$t8
+ */
+
+#if __mips==32 && __mips_isa_rev>=2
+ .macro pixel dreg src fb shift
+ /*
+ * sA = s >> 24
+ * f = 0x100 - (sA + (sA>>7))
+ */
+DBG .set noat
+DBG rdhwr $at,$2
+DBG .set at
+
+ srl $t7,\src,24
+ srl $t6,$t7,7
+ addu $t7,$t6
+ li $t6,0x100
+ subu $t7,$t6,$t7
+
+ /* red */
+ ext $t8,\dreg,\shift+6+5,5 # dst[\shift:15..11]
+ mul $t6,$t8,$t7
+ ext $t0,\dreg,\shift+5,6 # start green extraction dst[\shift:10..5]
+ ext $t8,\src,3,5 # src[7..3]
+ srl $t6,8
+ addu $t8,$t6
+ ins \fb,$t8,\shift+6+5,5 # dst[\shift:15..11]
+
+ /* green */
+ mul $t8,$t0,$t7
+ ext $t0,\dreg,\shift,5 # start blue extraction dst[\shift:4..0]
+ ext $t6,\src,2+8,6 # src[15..10]
+ srl $t8,8
+ addu $t8,$t6
+
+ /* blue */
+ mul $t0,$t0,$t7
+ ins \fb,$t8,\shift+5,6 # finish green insertion dst[\shift:10..5]
+ ext $t6,\src,(3+8+8),5
+ srl $t8,$t0,8
+ addu $t8,$t6
+ ins \fb,$t8,\shift,5
+
+DBG .set noat
+DBG rdhwr $t8,$2
+DBG subu $t8,$at
+DBG sltu $at,$t8,$v0
+DBG movn $v0,$t8,$at
+DBG sgtu $at,$t8,$v1
+DBG movn $v1,$t8,$at
+DBG .set at
+ .endm
+
+#else
+
+ .macro pixel dreg src fb shift
+ /*
+ * sA = s >> 24
+ * f = 0x100 - (sA + (sA>>7))
+ */
+DBG .set push
+DBG .set noat
+DBG .set mips32r2
+DBG rdhwr $at,$2
+DBG .set pop
+
+ srl $t7,\src,24
+ srl $t6,$t7,7
+ addu $t7,$t6
+ li $t6,0x100
+ subu $t7,$t6,$t7
+
+ /*
+ * red
+ * dR = (d >> (6 + 5)) & 0x1f;
+ * dR = (f*dR)>>8
+ * sR = (s >> ( 3)) & 0x1f;
+ * sR += dR
+ * fb |= sR << 11
+ */
+ srl $t8,\dreg,\shift+6+5
+.if \shift==0
+ and $t8,0x1f
+.endif
+ mul $t8,$t8,$t7
+ srl $t6,\src,3
+ and $t6,0x1f
+ srl $t8,8
+ addu $t8,$t6
+.if \shift!=0
+ sll $t8,\shift+11
+ or \fb,$t8
+.else
+ sll \fb,$t8,11
+.endif
+
+ /*
+ * green
+ * dG = (d >> 5) & 0x3f
+ * dG = (f*dG) >> 8
+ * sG = (s >> ( 8+2))&0x3F;
+ */
+ srl $t8,\dreg,\shift+5
+ and $t8,0x3f
+ mul $t8,$t8,$t7
+ srl $t6,\src,8+2
+ and $t6,0x3f
+ srl $t8,8
+ addu $t8,$t6
+ sll $t8,\shift + 5
+ or \fb,$t8
+
+ /* blue */
+.if \shift!=0
+ srl $t8,\dreg,\shift
+ and $t8,0x1f
+.else
+ and $t8,\dreg,0x1f
+.endif
+ mul $t8,$t8,$t7
+ srl $t6,\src,(8+8+3)
+ and $t6,0x1f
+ srl $t8,8
+ addu $t8,$t6
+.if \shift!=0
+ sll $t8,\shift
+.endif
+ or \fb,$t8
+DBG .set push
+DBG .set noat
+DBG .set mips32r2
+DBG rdhwr $t8,$2
+DBG subu $t8,$at
+DBG sltu $at,$t8,$v0
+DBG movn $v0,$t8,$at
+DBG sgtu $at,$t8,$v1
+DBG movn $v1,$t8,$at
+DBG .set pop
+ .endm
+#endif
+
+ .text
+ .align
+
+ .global scanline_t32cb16blend_mips
+ .ent scanline_t32cb16blend_mips
+scanline_t32cb16blend_mips:
+DBG li $v0,0xffffffff
+DBG li $v1,0
+ /* Align the destination if necessary */
+ and $t0,$a0,3
+ beqz $t0,aligned
+
+ /* as long as there is at least one pixel */
+ beqz $a2,done
+
+ lw $t4,($a1)
+ addu $a0,2
+ addu $a1,4
+ beqz $t4,1f
+ lhu $t3,-2($a0)
+ pixel $t3,$t4,$t1,0
+ sh $t1,-2($a0)
+1: subu $a2,1
+
+aligned:
+ /* Check to see if its worth unrolling the loop */
+ subu $a2,4
+ bltz $a2,tail
+
+ /* Process 4 pixels at a time */
+fourpixels:
+ /* 1st pair of pixels */
+ lw $t4,0($a1)
+ lw $t5,4($a1)
+ addu $a0,8
+ addu $a1,16
+
+ /* both are zero, skip this pair */
+ or $t3,$t4,$t5
+ beqz $t3,1f
+
+ /* load the destination */
+ lw $t3,-8($a0)
+
+ pixel $t3,$t4,$t1,0
+ pixel $t3,$t5,$t1,16
+ sw $t1,-8($a0)
+
+1:
+ /* 2nd pair of pixels */
+ lw $t4,-8($a1)
+ lw $t5,-4($a1)
+
+ /* both are zero, skip this pair */
+ or $t3,$t4,$t5
+ beqz $t3,1f
+
+ /* load the destination */
+ lw $t3,-4($a0)
+
+ pixel $t3,$t4,$t1,0
+ pixel $t3,$t5,$t1,16
+ sw $t1,-4($a0)
+
+1: subu $a2,4
+ bgtz $a2,fourpixels
+
+tail:
+ /* the pixel count underran, restore it now */
+ addu $a2,4
+
+ /* handle the last 0..3 pixels */
+ beqz $a2,done
+onepixel:
+ lw $t4,($a1)
+ addu $a0,2
+ addu $a1,4
+ beqz $t4,1f
+ lhu $t3,-2($a0)
+ pixel $t3,$t4,$t1,0
+ sh $t1,-2($a0)
+1: subu $a2,1
+ bnez $a2,onepixel
+done:
+DBG .set push
+DBG .set mips32r2
+DBG rdhwr $a0,$3
+DBG mul $v0,$a0
+DBG mul $v1,$a0
+DBG .set pop
+ j $ra
+ .end scanline_t32cb16blend_mips