diff options
Diffstat (limited to 'arch/alpha/lib/memmove.S')
-rw-r--r-- | arch/alpha/lib/memmove.S | 181 |
1 files changed, 181 insertions, 0 deletions
diff --git a/arch/alpha/lib/memmove.S b/arch/alpha/lib/memmove.S new file mode 100644 index 0000000..eb3b6e0 --- /dev/null +++ b/arch/alpha/lib/memmove.S @@ -0,0 +1,181 @@ +/* + * arch/alpha/lib/memmove.S + * + * Barely optimized memmove routine for Alpha EV5. + * + * This is hand-massaged output from the original memcpy.c. We defer to + * memcpy whenever possible; the backwards copy loops are not unrolled. + */ + + .set noat + .set noreorder + .text + + .align 4 + .globl memmove + .ent memmove +memmove: + ldgp $29, 0($27) + unop + nop + .prologue 1 + + addq $16,$18,$4 + addq $17,$18,$5 + cmpule $4,$17,$1 /* dest + n <= src */ + cmpule $5,$16,$2 /* dest >= src + n */ + + bis $1,$2,$1 + mov $16,$0 + xor $16,$17,$2 + bne $1,memcpy !samegp + + and $2,7,$2 /* Test for src/dest co-alignment. */ + and $16,7,$1 + cmpule $16,$17,$3 + bne $3,$memmove_up /* dest < src */ + + and $4,7,$1 + bne $2,$misaligned_dn + unop + beq $1,$skip_aligned_byte_loop_head_dn + +$aligned_byte_loop_head_dn: + lda $4,-1($4) + lda $5,-1($5) + unop + ble $18,$egress + + ldq_u $3,0($5) + ldq_u $2,0($4) + lda $18,-1($18) + extbl $3,$5,$1 + + insbl $1,$4,$1 + mskbl $2,$4,$2 + bis $1,$2,$1 + and $4,7,$6 + + stq_u $1,0($4) + bne $6,$aligned_byte_loop_head_dn + +$skip_aligned_byte_loop_head_dn: + lda $18,-8($18) + blt $18,$skip_aligned_word_loop_dn + +$aligned_word_loop_dn: + ldq $1,-8($5) + nop + lda $5,-8($5) + lda $18,-8($18) + + stq $1,-8($4) + nop + lda $4,-8($4) + bge $18,$aligned_word_loop_dn + +$skip_aligned_word_loop_dn: + lda $18,8($18) + bgt $18,$byte_loop_tail_dn + unop + ret $31,($26),1 + + .align 4 +$misaligned_dn: + nop + fnop + unop + beq $18,$egress + +$byte_loop_tail_dn: + ldq_u $3,-1($5) + ldq_u $2,-1($4) + lda $5,-1($5) + lda $4,-1($4) + + lda $18,-1($18) + extbl $3,$5,$1 + insbl $1,$4,$1 + mskbl $2,$4,$2 + + bis $1,$2,$1 + stq_u $1,0($4) + bgt $18,$byte_loop_tail_dn + br $egress + +$memmove_up: + mov $16,$4 + mov $17,$5 + bne $2,$misaligned_up + beq $1,$skip_aligned_byte_loop_head_up + +$aligned_byte_loop_head_up: + unop + ble $18,$egress + ldq_u $3,0($5) + ldq_u $2,0($4) + + lda $18,-1($18) + extbl $3,$5,$1 + insbl $1,$4,$1 + mskbl $2,$4,$2 + + bis $1,$2,$1 + lda $5,1($5) + stq_u $1,0($4) + lda $4,1($4) + + and $4,7,$6 + bne $6,$aligned_byte_loop_head_up + +$skip_aligned_byte_loop_head_up: + lda $18,-8($18) + blt $18,$skip_aligned_word_loop_up + +$aligned_word_loop_up: + ldq $1,0($5) + nop + lda $5,8($5) + lda $18,-8($18) + + stq $1,0($4) + nop + lda $4,8($4) + bge $18,$aligned_word_loop_up + +$skip_aligned_word_loop_up: + lda $18,8($18) + bgt $18,$byte_loop_tail_up + unop + ret $31,($26),1 + + .align 4 +$misaligned_up: + nop + fnop + unop + beq $18,$egress + +$byte_loop_tail_up: + ldq_u $3,0($5) + ldq_u $2,0($4) + lda $18,-1($18) + extbl $3,$5,$1 + + insbl $1,$4,$1 + mskbl $2,$4,$2 + bis $1,$2,$1 + stq_u $1,0($4) + + lda $5,1($5) + lda $4,1($4) + nop + bgt $18,$byte_loop_tail_up + +$egress: + ret $31,($26),1 + nop + nop + nop + + .end memmove |