diff options
author | Brian Paul <brian.paul@tungstengraphics.com> | 2001-05-23 14:27:03 +0000 |
---|---|---|
committer | Brian Paul <brian.paul@tungstengraphics.com> | 2001-05-23 14:27:03 +0000 |
commit | 7943b349d696f8030f0d2f836ad42a762f4c6026 (patch) | |
tree | f6ff639bac006d8d755f06ce5e937571e255c508 /src/mesa/sparc/clip.S | |
parent | 8bd06931018d5662b92f1cfeee2abaf352d0044c (diff) | |
download | external_mesa3d-7943b349d696f8030f0d2f836ad42a762f4c6026.zip external_mesa3d-7943b349d696f8030f0d2f836ad42a762f4c6026.tar.gz external_mesa3d-7943b349d696f8030f0d2f836ad42a762f4c6026.tar.bz2 |
SPARC assembly optimizations from David Miller.
Diffstat (limited to 'src/mesa/sparc/clip.S')
-rw-r--r-- | src/mesa/sparc/clip.S | 234 |
1 files changed, 234 insertions, 0 deletions
diff --git a/src/mesa/sparc/clip.S b/src/mesa/sparc/clip.S new file mode 100644 index 0000000..a569428 --- /dev/null +++ b/src/mesa/sparc/clip.S @@ -0,0 +1,234 @@ +/* $Id: clip.S,v 1.1 2001/05/23 14:27:03 brianp Exp $ */ + +#ifdef __sparc_v9__ +#define LDPTR ldx +#define V4F_DATA 0x00 +#define V4F_START 0x08 +#define V4F_COUNT 0x10 +#define V4F_STRIDE 0x14 +#define V4F_SIZE 0x18 +#define V4F_FLAGS 0x1c +#else +#define LDPTR ld +#define V4F_DATA 0x00 +#define V4F_START 0x04 +#define V4F_COUNT 0x08 +#define V4F_STRIDE 0x0c +#define V4F_SIZE 0x10 +#define V4F_FLAGS 0x14 +#endif + +#define VEC_SIZE_1 1 +#define VEC_SIZE_2 3 +#define VEC_SIZE_3 7 +#define VEC_SIZE_4 15 + + .text + .align 64 + +one_dot_zero: + .word 0x3f800000 /* 1.0f */ + + /* This trick is shamelessly stolen from the x86 + * Mesa asm. Very clever, and we can do it too + * since we have the necessary add with carry + * instructions on Sparc. + */ +clip_table: + .byte 0, 1, 0, 2, 4, 5, 4, 6 + .byte 0, 1, 0, 2, 8, 9, 8, 10 + .byte 32, 33, 32, 34, 36, 37, 36, 38 + .byte 32, 33, 32, 34, 40, 41, 40, 42 + .byte 0, 1, 0, 2, 4, 5, 4, 6 + .byte 0, 1, 0, 2, 8, 9, 8, 10 + .byte 16, 17, 16, 18, 20, 21, 20, 22 + .byte 16, 17, 16, 18, 24, 25, 24, 26 + .byte 63, 61, 63, 62, 55, 53, 55, 54 + .byte 63, 61, 63, 62, 59, 57, 59, 58 + .byte 47, 45, 47, 46, 39, 37, 39, 38 + .byte 47, 45, 47, 46, 43, 41, 43, 42 + .byte 63, 61, 63, 62, 55, 53, 55, 54 + .byte 63, 61, 63, 62, 59, 57, 59, 58 + .byte 31, 29, 31, 30, 23, 21, 23, 22 + .byte 31, 29, 31, 30, 27, 25, 27, 26 + +/* GLvector4f *clip_vec, GLvector4f *proj_vec, + GLubyte clipMask[], GLubyte *orMask, GLubyte *andMask */ + + .align 64 +__pc_tramp: + retl + nop + + .globl _mesa_sparc_cliptest_points4 +_mesa_sparc_cliptest_points4: + save %sp, -64, %sp + call __pc_tramp + sub %o7, (. - one_dot_zero - 4), %g1 + ld [%g1 + 0x0], %f4 + add %g1, 0x4, %g1 + + ld [%i0 + V4F_STRIDE], %l1 + ld [%i0 + V4F_COUNT], %g7 + LDPTR [%i0 + V4F_START], %i0 + LDPTR [%i1 + V4F_START], %i5 + ldub [%i3], %g2 + ldub [%i4], %g3 + sll %g3, 8, %g3 + or %g2, %g3, %g2 + + ld [%i1 + V4F_FLAGS], %g3 + or %g3, VEC_SIZE_4, %g3 + st %g3, [%i1 + V4F_FLAGS] + mov 3, %g3 + st %g3, [%i1 + V4F_SIZE] + st %g7, [%i1 + V4F_COUNT] + clr %l2 + clr %l0 + + /* l0: i + * g7: count + * l1: stride + * l2: c + * g2: (tmpAndMask << 8) | tmpOrMask + * g1: clip_table + * i0: from[stride][i] + * i2: clipMask + * i5: vProj[4][i] + */ + +1: ld [%i0 + 0x0c], %f3 ! LSU Group + ld [%i0 + 0x0c], %g5 ! LSU Group + ld [%i0 + 0x08], %g4 ! LSU Group + fdivs %f4, %f3, %f8 ! FGM + addcc %g5, %g5, %g5 ! IEU1 Group + addx %g0, 0x0, %g3 ! IEU1 Group + addcc %g4, %g4, %g4 ! IEU1 Group + addx %g3, %g3, %g3 ! IEU1 Group + subcc %g5, %g4, %g0 ! IEU1 Group + ld [%i0 + 0x04], %g4 ! LSU Group + addx %g3, %g3, %g3 ! IEU1 Group + addcc %g4, %g4, %g4 ! IEU1 Group + addx %g3, %g3, %g3 ! IEU1 Group + subcc %g5, %g4, %g0 ! IEU1 Group + ld [%i0 + 0x00], %g4 ! LSU Group + addx %g3, %g3, %g3 ! IEU1 Group + addcc %g4, %g4, %g4 ! IEU1 Group + addx %g3, %g3, %g3 ! IEU1 Group + subcc %g5, %g4, %g0 ! IEU1 Group + addx %g3, %g3, %g3 ! IEU1 Group + ldub [%g1 + %g3], %g3 ! LSU Group + cmp %g3, 0 ! IEU1 Group, stall + be 2f ! CTI + stb %g3, [%i2] ! LSU + sll %g3, 8, %g4 ! IEU1 Group + add %l2, 1, %l2 ! IEU0 + st %g0, [%i5 + 0x00] ! LSU + or %g4, 0xff, %g4 ! IEU0 Group + or %g2, %g3, %g2 ! IEU1 + st %g0, [%i5 + 0x04] ! LSU + and %g2, %g4, %g2 ! IEU0 Group + st %g0, [%i5 + 0x08] ! LSU + b 3f ! CTI + st %f4, [%i5 + 0x0c] ! LSU Group +2: ld [%i0 + 0x00], %f0 ! LSU Group + ld [%i0 + 0x04], %f1 ! LSU Group + ld [%i0 + 0x08], %f2 ! LSU Group + fmuls %f0, %f8, %f0 ! FGM + st %f0, [%i5 + 0x00] ! LSU Group + fmuls %f1, %f8, %f1 ! FGM + st %f1, [%i5 + 0x04] ! LSU Group + fmuls %f2, %f8, %f2 ! FGM + st %f2, [%i5 + 0x08] ! LSU Group + st %f8, [%i5 + 0x0c] ! LSU Group +3: add %i5, 0x10, %i5 ! IEU1 + add %l0, 1, %l0 ! IEU0 Group + add %i2, 1, %i2 ! IEU0 Group + cmp %l0, %g7 ! IEU1 Group + bne 1b ! CTI + add %i0, %l1, %i0 ! IEU0 Group + stb %g2, [%i3] ! LSU + srl %g2, 8, %g3 ! IEU0 Group + cmp %l2, %g7 ! IEU1 Group + bl,a 1f ! CTI + clr %g3 ! IEU0 +1: stb %g3, [%i4] ! LSU Group + ret ! CTI Group + restore %i1, 0x0, %o0 + + .globl _mesa_sparc_cliptest_points4_np +_mesa_sparc_cliptest_points4_np: + save %sp, -64, %sp + + call __pc_tramp + sub %o7, (. - one_dot_zero - 4), %g1 + add %g1, 0x4, %g1 + + ld [%i0 + V4F_STRIDE], %l1 + ld [%i0 + V4F_COUNT], %g7 + LDPTR [%i0 + V4F_START], %i0 + LDPTR [%i1 + V4F_START], %i5 + ldub [%i3], %g2 + ldub [%i4], %g3 + sll %g3, 8, %g3 + or %g2, %g3, %g2 + + ld [%i1 + V4F_FLAGS], %g3 + or %g3, VEC_SIZE_4, %g3 + st %g3, [%i1 + V4F_FLAGS] + mov 3, %g3 + st %g3, [%i1 + V4F_SIZE] + st %g7, [%i1 + V4F_COUNT] + clr %l2 + clr %l0 + + /* l0: i + * g7: count + * l1: stride + * l2: c + * g2: (tmpAndMask << 8) | tmpOrMask + * g1: clip_table + * i0: from[stride][i] + * i2: clipMask + */ + +1: ld [%i0 + 0x0c], %g5 ! LSU Group + ld [%i0 + 0x08], %g4 ! LSU Group + addcc %g5, %g5, %g5 ! IEU1 Group + addx %g0, 0x0, %g3 ! IEU1 Group + addcc %g4, %g4, %g4 ! IEU1 Group + addx %g3, %g3, %g3 ! IEU1 Group + subcc %g5, %g4, %g0 ! IEU1 Group + ld [%i0 + 0x04], %g4 ! LSU Group + addx %g3, %g3, %g3 ! IEU1 Group + addcc %g4, %g4, %g4 ! IEU1 Group + addx %g3, %g3, %g3 ! IEU1 Group + subcc %g5, %g4, %g0 ! IEU1 Group + ld [%i0 + 0x00], %g4 ! LSU Group + addx %g3, %g3, %g3 ! IEU1 Group + addcc %g4, %g4, %g4 ! IEU1 Group + addx %g3, %g3, %g3 ! IEU1 Group + subcc %g5, %g4, %g0 ! IEU1 Group + addx %g3, %g3, %g3 ! IEU1 Group + ldub [%g1 + %g3], %g3 ! LSU Group + cmp %g3, 0 ! IEU1 Group, stall + be 2f ! CTI + stb %g3, [%i2] ! LSU + sll %g3, 8, %g4 ! IEU1 Group + add %l2, 1, %l2 ! IEU0 + or %g4, 0xff, %g4 ! IEU0 Group + or %g2, %g3, %g2 ! IEU1 + and %g2, %g4, %g2 ! IEU0 Group +2: add %l0, 1, %l0 ! IEU0 Group + add %i2, 1, %i2 ! IEU0 Group + cmp %l0, %g7 ! IEU1 Group + bne 1b ! CTI + add %i0, %l1, %i0 ! IEU0 Group + stb %g2, [%i3] ! LSU + srl %g2, 8, %g3 ! IEU0 Group + cmp %l2, %g7 ! IEU1 Group + bl,a 1f ! CTI + clr %g3 ! IEU0 +1: stb %g3, [%i4] ! LSU Group + ret ! CTI Group + restore %i1, 0x0, %o0 |