summaryrefslogtreecommitdiffstats
path: root/libpixelflinger/arch-aarch64/t32cb16blend.S
diff options
context:
space:
mode:
Diffstat (limited to 'libpixelflinger/arch-aarch64/t32cb16blend.S')
-rw-r--r--libpixelflinger/arch-aarch64/t32cb16blend.S213
1 files changed, 213 insertions, 0 deletions
diff --git a/libpixelflinger/arch-aarch64/t32cb16blend.S b/libpixelflinger/arch-aarch64/t32cb16blend.S
new file mode 100644
index 0000000..b62ed36
--- /dev/null
+++ b/libpixelflinger/arch-aarch64/t32cb16blend.S
@@ -0,0 +1,213 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+ .text
+ .align
+
+ .global scanline_t32cb16blend_aarch64
+
+/*
+ * .macro pixel
+ *
+ * This macro alpha blends RGB565 original pixel located in either
+ * top or bottom 16 bits of DREG register with SRC 32 bit pixel value
+ * and writes the result to FB register
+ *
+ * \DREG is a 32-bit register containing *two* original destination RGB565
+ * pixels, with the even one in the low-16 bits, and the odd one in the
+ * high 16 bits.
+ *
+ * \SRC is a 32-bit 0xAABBGGRR pixel value, with pre-multiplied colors.
+ *
+ * \FB is a target register that will contain the blended pixel values.
+ *
+ * \ODD is either 0 or 1 and indicates if we're blending the lower or
+ * upper 16-bit pixels in DREG into FB
+ *
+ *
+ * clobbered: w6, w7, w16, w17, w18
+ *
+ */
+
+.macro pixel, DREG, SRC, FB, ODD
+
+ // SRC = 0xAABBGGRR
+ lsr w7, \SRC, #24 // sA
+ add w7, w7, w7, lsr #7 // sA + (sA >> 7)
+ mov w6, #0x100
+ sub w7, w6, w7 // sA = 0x100 - (sA+(sA>>7))
+
+1:
+
+.if \ODD //Blending odd pixel present in top 16 bits of DREG register
+
+ // red
+ lsr w16, \DREG, #(16 + 11)
+ mul w16, w7, w16
+ lsr w6, \SRC, #3
+ and w6, w6, #0x1F
+ add w16, w6, w16, lsr #8
+ cmp w16, #0x1F
+ orr w17, \FB, #(0x1F<<(16 + 11))
+ orr w18, \FB, w16, lsl #(16 + 11)
+ csel \FB, w17, w18, hi
+ // green
+ and w6, \DREG, #(0x3F<<(16 + 5))
+ lsr w17,w6,#(16+5)
+ mul w6, w7, w17
+ lsr w16, \SRC, #(8+2)
+ and w16, w16, #0x3F
+ add w6, w16, w6, lsr #8
+ cmp w6, #0x3F
+ orr w17, \FB, #(0x3F<<(16 + 5))
+ orr w18, \FB, w6, lsl #(16 + 5)
+ csel \FB, w17, w18, hi
+ // blue
+ and w16, \DREG, #(0x1F << 16)
+ lsr w17,w16,#16
+ mul w16, w7, w17
+ lsr w6, \SRC, #(8+8+3)
+ and w6, w6, #0x1F
+ add w16, w6, w16, lsr #8
+ cmp w16, #0x1F
+ orr w17, \FB, #(0x1F << 16)
+ orr w18, \FB, w16, lsl #16
+ csel \FB, w17, w18, hi
+
+.else //Blending even pixel present in bottom 16 bits of DREG register
+
+ // red
+ lsr w16, \DREG, #11
+ and w16, w16, #0x1F
+ mul w16, w7, w16
+ lsr w6, \SRC, #3
+ and w6, w6, #0x1F
+ add w16, w6, w16, lsr #8
+ cmp w16, #0x1F
+ mov w17, #(0x1F<<11)
+ lsl w18, w16, #11
+ csel \FB, w17, w18, hi
+
+
+ // green
+ and w6, \DREG, #(0x3F<<5)
+ mul w6, w7, w6
+ lsr w16, \SRC, #(8+2)
+ and w16, w16, #0x3F
+ add w6, w16, w6, lsr #(5+8)
+ cmp w6, #0x3F
+ orr w17, \FB, #(0x3F<<5)
+ orr w18, \FB, w6, lsl #5
+ csel \FB, w17, w18, hi
+
+ // blue
+ and w16, \DREG, #0x1F
+ mul w16, w7, w16
+ lsr w6, \SRC, #(8+8+3)
+ and w6, w6, #0x1F
+ add w16, w6, w16, lsr #8
+ cmp w16, #0x1F
+ orr w17, \FB, #0x1F
+ orr w18, \FB, w16
+ csel \FB, w17, w18, hi
+
+.endif // End of blending even pixel
+
+.endm // End of pixel macro
+
+
+// x0: dst ptr
+// x1: src ptr
+// w2: count
+// w3: d
+// w4: s0
+// w5: s1
+// w6: pixel
+// w7: pixel
+// w8: free
+// w9: free
+// w10: free
+// w11: free
+// w12: scratch
+// w14: pixel
+
+scanline_t32cb16blend_aarch64:
+
+ // align DST to 32 bits
+ tst x0, #0x3
+ b.eq aligned
+ subs w2, w2, #1
+ b.lo return
+
+last:
+ ldr w4, [x1], #4
+ ldrh w3, [x0]
+ pixel w3, w4, w12, 0
+ strh w12, [x0], #2
+
+aligned:
+ subs w2, w2, #2
+ b.lo 9f
+
+ // The main loop is unrolled twice and processes 4 pixels
+8:
+ ldp w4,w5, [x1], #8
+ add x0, x0, #4
+ // it's all zero, skip this pixel
+ orr w3, w4, w5
+ cbz w3, 7f
+
+ // load the destination
+ ldr w3, [x0, #-4]
+ // stream the destination
+ pixel w3, w4, w12, 0
+ pixel w3, w5, w12, 1
+ str w12, [x0, #-4]
+
+ // 2nd iteration of the loop, don't stream anything
+ subs w2, w2, #2
+ csel w4, w5, w4, lt
+ blt 9f
+ ldp w4,w5, [x1], #8
+ add x0, x0, #4
+ orr w3, w4, w5
+ cbz w3, 7f
+ ldr w3, [x0, #-4]
+ pixel w3, w4, w12, 0
+ pixel w3, w5, w12, 1
+ str w12, [x0, #-4]
+
+7: subs w2, w2, #2
+ bhs 8b
+ mov w4, w5
+
+9: adds w2, w2, #1
+ b.lo return
+ b last
+
+return:
+ ret