aboutsummaryrefslogtreecommitdiffstats
path: root/arch/alpha/lib/memmove.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/alpha/lib/memmove.S')
-rw-r--r--arch/alpha/lib/memmove.S181
1 files changed, 181 insertions, 0 deletions
diff --git a/arch/alpha/lib/memmove.S b/arch/alpha/lib/memmove.S
new file mode 100644
index 0000000..eb3b6e0
--- /dev/null
+++ b/arch/alpha/lib/memmove.S
@@ -0,0 +1,181 @@
+/*
+ * arch/alpha/lib/memmove.S
+ *
+ * Barely optimized memmove routine for Alpha EV5.
+ *
+ * This is hand-massaged output from the original memcpy.c. We defer to
+ * memcpy whenever possible; the backwards copy loops are not unrolled.
+ */
+
+ .set noat
+ .set noreorder
+ .text
+
+ .align 4
+ .globl memmove
+ .ent memmove
+memmove:
+ ldgp $29, 0($27)
+ unop
+ nop
+ .prologue 1
+
+ addq $16,$18,$4
+ addq $17,$18,$5
+ cmpule $4,$17,$1 /* dest + n <= src */
+ cmpule $5,$16,$2 /* dest >= src + n */
+
+ bis $1,$2,$1
+ mov $16,$0
+ xor $16,$17,$2
+ bne $1,memcpy !samegp
+
+ and $2,7,$2 /* Test for src/dest co-alignment. */
+ and $16,7,$1
+ cmpule $16,$17,$3
+ bne $3,$memmove_up /* dest < src */
+
+ and $4,7,$1
+ bne $2,$misaligned_dn
+ unop
+ beq $1,$skip_aligned_byte_loop_head_dn
+
+$aligned_byte_loop_head_dn:
+ lda $4,-1($4)
+ lda $5,-1($5)
+ unop
+ ble $18,$egress
+
+ ldq_u $3,0($5)
+ ldq_u $2,0($4)
+ lda $18,-1($18)
+ extbl $3,$5,$1
+
+ insbl $1,$4,$1
+ mskbl $2,$4,$2
+ bis $1,$2,$1
+ and $4,7,$6
+
+ stq_u $1,0($4)
+ bne $6,$aligned_byte_loop_head_dn
+
+$skip_aligned_byte_loop_head_dn:
+ lda $18,-8($18)
+ blt $18,$skip_aligned_word_loop_dn
+
+$aligned_word_loop_dn:
+ ldq $1,-8($5)
+ nop
+ lda $5,-8($5)
+ lda $18,-8($18)
+
+ stq $1,-8($4)
+ nop
+ lda $4,-8($4)
+ bge $18,$aligned_word_loop_dn
+
+$skip_aligned_word_loop_dn:
+ lda $18,8($18)
+ bgt $18,$byte_loop_tail_dn
+ unop
+ ret $31,($26),1
+
+ .align 4
+$misaligned_dn:
+ nop
+ fnop
+ unop
+ beq $18,$egress
+
+$byte_loop_tail_dn:
+ ldq_u $3,-1($5)
+ ldq_u $2,-1($4)
+ lda $5,-1($5)
+ lda $4,-1($4)
+
+ lda $18,-1($18)
+ extbl $3,$5,$1
+ insbl $1,$4,$1
+ mskbl $2,$4,$2
+
+ bis $1,$2,$1
+ stq_u $1,0($4)
+ bgt $18,$byte_loop_tail_dn
+ br $egress
+
+$memmove_up:
+ mov $16,$4
+ mov $17,$5
+ bne $2,$misaligned_up
+ beq $1,$skip_aligned_byte_loop_head_up
+
+$aligned_byte_loop_head_up:
+ unop
+ ble $18,$egress
+ ldq_u $3,0($5)
+ ldq_u $2,0($4)
+
+ lda $18,-1($18)
+ extbl $3,$5,$1
+ insbl $1,$4,$1
+ mskbl $2,$4,$2
+
+ bis $1,$2,$1
+ lda $5,1($5)
+ stq_u $1,0($4)
+ lda $4,1($4)
+
+ and $4,7,$6
+ bne $6,$aligned_byte_loop_head_up
+
+$skip_aligned_byte_loop_head_up:
+ lda $18,-8($18)
+ blt $18,$skip_aligned_word_loop_up
+
+$aligned_word_loop_up:
+ ldq $1,0($5)
+ nop
+ lda $5,8($5)
+ lda $18,-8($18)
+
+ stq $1,0($4)
+ nop
+ lda $4,8($4)
+ bge $18,$aligned_word_loop_up
+
+$skip_aligned_word_loop_up:
+ lda $18,8($18)
+ bgt $18,$byte_loop_tail_up
+ unop
+ ret $31,($26),1
+
+ .align 4
+$misaligned_up:
+ nop
+ fnop
+ unop
+ beq $18,$egress
+
+$byte_loop_tail_up:
+ ldq_u $3,0($5)
+ ldq_u $2,0($4)
+ lda $18,-1($18)
+ extbl $3,$5,$1
+
+ insbl $1,$4,$1
+ mskbl $2,$4,$2
+ bis $1,$2,$1
+ stq_u $1,0($4)
+
+ lda $5,1($5)
+ lda $4,1($4)
+ nop
+ bgt $18,$byte_loop_tail_up
+
+$egress:
+ ret $31,($26),1
+ nop
+ nop
+ nop
+
+ .end memmove