From d7a77156eb13973f7fce5c9db6113bef83bc205b Mon Sep 17 00:00:00 2001
From: Andy Hung <hunga@google.com>
Date: Fri, 6 Feb 2015 14:58:38 -0800
Subject: Fix comments in resampler assembly

Change-Id: Iedbd14a2c6c73dd9631813bd2414bf11cee78c32
---
 .../audioflinger/AudioResamplerFirProcessNeon.h    | 80 +++++++++++-----------
 1 file changed, 40 insertions(+), 40 deletions(-)

(limited to 'services/audioflinger/AudioResamplerFirProcessNeon.h')

diff --git a/services/audioflinger/AudioResamplerFirProcessNeon.h b/services/audioflinger/AudioResamplerFirProcessNeon.h
index d4fa7ad..29ff179 100644
--- a/services/audioflinger/AudioResamplerFirProcessNeon.h
+++ b/services/audioflinger/AudioResamplerFirProcessNeon.h
@@ -115,13 +115,13 @@ inline void ProcessL<2, 16>(int32_t* const out,
 
         "1:                                      \n"
 
-        "vld2.16        {q2, q3}, [%[sP]]        \n"// (3+0d) load 8 16-bits stereo samples
-        "vld2.16        {q5, q6}, [%[sN]]!       \n"// (3) load 8 16-bits stereo samples
+        "vld2.16        {q2, q3}, [%[sP]]        \n"// (3+0d) load 8 16-bits stereo frames
+        "vld2.16        {q5, q6}, [%[sN]]!       \n"// (3) load 8 16-bits stereo frames
         "vld1.16        {q8}, [%[coefsP0]:128]!  \n"// (1) load 8 16-bits coefs
         "vld1.16        {q10}, [%[coefsN0]:128]! \n"// (1) load 8 16-bits coefs
 
-        "vrev64.16      q2, q2                   \n"// (1) reverse 8 frames of the left positive
-        "vrev64.16      q3, q3                   \n"// (0 combines+) reverse right positive
+        "vrev64.16      q2, q2                   \n"// (1) reverse 8 samples of positive left
+        "vrev64.16      q3, q3                   \n"// (0 combines+) reverse positive right
 
         "vmlal.s16      q0, d4, d17              \n"// (1) multiply (reversed) samples left
         "vmlal.s16      q0, d5, d16              \n"// (1) multiply (reversed) samples left
@@ -247,8 +247,8 @@ inline void Process<2, 16>(int32_t* const out,
 
         "1:                                      \n"
 
-        "vld2.16        {q2, q3}, [%[sP]]        \n"// (3+0d) load 8 16-bits stereo samples
-        "vld2.16        {q5, q6}, [%[sN]]!       \n"// (3) load 8 16-bits stereo samples
+        "vld2.16        {q2, q3}, [%[sP]]        \n"// (3+0d) load 8 16-bits stereo frames
+        "vld2.16        {q5, q6}, [%[sN]]!       \n"// (3) load 8 16-bits stereo frames
         "vld1.16        {q8}, [%[coefsP0]:128]!  \n"// (1) load 8 16-bits coefs
         "vld1.16        {q9}, [%[coefsP1]:128]!  \n"// (1) load 8 16-bits coefs for interpolation
         "vld1.16        {q10}, [%[coefsN1]:128]! \n"// (1) load 8 16-bits coefs
@@ -260,8 +260,8 @@ inline void Process<2, 16>(int32_t* const out,
         "vqrdmulh.s16   q9, q9, d2[0]            \n"// (2) interpolate (step2) 1st set of coefs
         "vqrdmulh.s16   q11, q11, d2[0]          \n"// (2) interpolate (step2) 2nd set of coefs
 
-        "vrev64.16      q2, q2                   \n"// (1) reverse 8 frames of the left positive
-        "vrev64.16      q3, q3                   \n"// (1) reverse 8 frames of the right positive
+        "vrev64.16      q2, q2                   \n"// (1) reverse 8 samples of positive left
+        "vrev64.16      q3, q3                   \n"// (1) reverse 8 samples of positive right
 
         "vadd.s16       q8, q8, q9               \n"// (1+1d) interpolate (step3) 1st set
         "vadd.s16       q10, q10, q11            \n"// (1+1d) interpolate (step3) 2nd set
@@ -323,7 +323,7 @@ inline void ProcessL<1, 16>(int32_t* const out,
         "vld1.32        {q8, q9}, [%[coefsP0]:128]!   \n"// load 8 32-bits coefs
         "vld1.32        {q10, q11}, [%[coefsN0]:128]! \n"// load 8 32-bits coefs
 
-        "vrev64.16      q2, q2                        \n"// reverse 8 frames of the positive side
+        "vrev64.16      q2, q2                        \n"// reverse 8 samples of the positive side
 
         "vshll.s16      q12, d4, #15                  \n"// extend samples to 31 bits
         "vshll.s16      q13, d5, #15                  \n"// extend samples to 31 bits
@@ -331,10 +331,10 @@ inline void ProcessL<1, 16>(int32_t* const out,
         "vshll.s16      q14, d6, #15                  \n"// extend samples to 31 bits
         "vshll.s16      q15, d7, #15                  \n"// extend samples to 31 bits
 
-        "vqrdmulh.s32   q12, q12, q9                  \n"// multiply samples by interpolated coef
-        "vqrdmulh.s32   q13, q13, q8                  \n"// multiply samples by interpolated coef
-        "vqrdmulh.s32   q14, q14, q10                 \n"// multiply samples by interpolated coef
-        "vqrdmulh.s32   q15, q15, q11                 \n"// multiply samples by interpolated coef
+        "vqrdmulh.s32   q12, q12, q9                  \n"// multiply samples
+        "vqrdmulh.s32   q13, q13, q8                  \n"// multiply samples
+        "vqrdmulh.s32   q14, q14, q10                 \n"// multiply samples
+        "vqrdmulh.s32   q15, q15, q11                 \n"// multiply samples
 
         "vadd.s32       q0, q0, q12                   \n"// accumulate result
         "vadd.s32       q13, q13, q14                 \n"// accumulate result
@@ -380,13 +380,13 @@ inline void ProcessL<2, 16>(int32_t* const out,
 
         "1:                                           \n"
 
-        "vld2.16        {q2, q3}, [%[sP]]             \n"// load 4 16-bits stereo samples
-        "vld2.16        {q5, q6}, [%[sN]]!            \n"// load 4 16-bits stereo samples
-        "vld1.32        {q8, q9}, [%[coefsP0]:128]!   \n"// load 4 32-bits coefs
-        "vld1.32        {q10, q11}, [%[coefsN0]:128]! \n"// load 4 32-bits coefs
+        "vld2.16        {q2, q3}, [%[sP]]             \n"// load 8 16-bits stereo frames
+        "vld2.16        {q5, q6}, [%[sN]]!            \n"// load 8 16-bits stereo frames
+        "vld1.32        {q8, q9}, [%[coefsP0]:128]!   \n"// load 8 32-bits coefs
+        "vld1.32        {q10, q11}, [%[coefsN0]:128]! \n"// load 8 32-bits coefs
 
-        "vrev64.16      q2, q2                        \n"// reverse 8 frames of the positive side
-        "vrev64.16      q3, q3                        \n"// reverse 8 frames of the positive side
+        "vrev64.16      q2, q2                        \n"// reverse 8 samples of positive left
+        "vrev64.16      q3, q3                        \n"// reverse 8 samples of positive right
 
         "vshll.s16      q12,  d4, #15                 \n"// extend samples to 31 bits
         "vshll.s16      q13,  d5, #15                 \n"// extend samples to 31 bits
@@ -394,15 +394,15 @@ inline void ProcessL<2, 16>(int32_t* const out,
         "vshll.s16      q14,  d10, #15                \n"// extend samples to 31 bits
         "vshll.s16      q15,  d11, #15                \n"// extend samples to 31 bits
 
-        "vqrdmulh.s32   q12, q12, q9                  \n"// multiply samples by interpolated coef
-        "vqrdmulh.s32   q13, q13, q8                  \n"// multiply samples by interpolated coef
-        "vqrdmulh.s32   q14, q14, q10                 \n"// multiply samples by interpolated coef
-        "vqrdmulh.s32   q15, q15, q11                 \n"// multiply samples by interpolated coef
+        "vqrdmulh.s32   q12, q12, q9                  \n"// multiply samples by coef
+        "vqrdmulh.s32   q13, q13, q8                  \n"// multiply samples by coef
+        "vqrdmulh.s32   q14, q14, q10                 \n"// multiply samples by coef
+        "vqrdmulh.s32   q15, q15, q11                 \n"// multiply samples by coef
 
         "vadd.s32       q0, q0, q12                   \n"// accumulate result
         "vadd.s32       q13, q13, q14                 \n"// accumulate result
-        "vadd.s32       q0, q0, q15                   \n"// (+1) accumulate result
-        "vadd.s32       q0, q0, q13                   \n"// (+1) accumulate result
+        "vadd.s32       q0, q0, q15                   \n"// accumulate result
+        "vadd.s32       q0, q0, q13                   \n"// accumulate result
 
         "vshll.s16      q12,  d6, #15                 \n"// extend samples to 31 bits
         "vshll.s16      q13,  d7, #15                 \n"// extend samples to 31 bits
@@ -410,15 +410,15 @@ inline void ProcessL<2, 16>(int32_t* const out,
         "vshll.s16      q14,  d12, #15                \n"// extend samples to 31 bits
         "vshll.s16      q15,  d13, #15                \n"// extend samples to 31 bits
 
-        "vqrdmulh.s32   q12, q12, q9                  \n"// multiply samples by interpolated coef
-        "vqrdmulh.s32   q13, q13, q8                  \n"// multiply samples by interpolated coef
-        "vqrdmulh.s32   q14, q14, q10                 \n"// multiply samples by interpolated coef
-        "vqrdmulh.s32   q15, q15, q11                 \n"// multiply samples by interpolated coef
+        "vqrdmulh.s32   q12, q12, q9                  \n"// multiply samples by coef
+        "vqrdmulh.s32   q13, q13, q8                  \n"// multiply samples by coef
+        "vqrdmulh.s32   q14, q14, q10                 \n"// multiply samples by coef
+        "vqrdmulh.s32   q15, q15, q11                 \n"// multiply samples by coef
 
         "vadd.s32       q4, q4, q12                   \n"// accumulate result
         "vadd.s32       q13, q13, q14                 \n"// accumulate result
-        "vadd.s32       q4, q4, q15                   \n"// (+1) accumulate result
-        "vadd.s32       q4, q4, q13                   \n"// (+1) accumulate result
+        "vadd.s32       q4, q4, q15                   \n"// accumulate result
+        "vadd.s32       q4, q4, q13                   \n"// accumulate result
 
         "subs           %[count], %[count], #8        \n"// update loop counter
         "sub            %[sP], %[sP], #32             \n"// move pointer to next set of samples
@@ -485,7 +485,7 @@ inline void Process<1, 16>(int32_t* const out,
         "vadd.s32       q10, q10, q14                 \n"// interpolate (step3)
         "vadd.s32       q11, q11, q15                 \n"// interpolate (step3)
 
-        "vrev64.16      q2, q2                        \n"// reverse 8 frames of the positive side
+        "vrev64.16      q2, q2                        \n"// reverse 8 samples of the positive side
 
         "vshll.s16      q12,  d4, #15                 \n"// extend samples to 31 bits
         "vshll.s16      q13,  d5, #15                 \n"// extend samples to 31 bits
@@ -549,8 +549,8 @@ inline void Process<2, 16>(int32_t* const out,
 
         "1:                                           \n"
 
-        "vld2.16        {q2, q3}, [%[sP]]             \n"// load 4 16-bits stereo samples
-        "vld2.16        {q5, q6}, [%[sN]]!            \n"// load 4 16-bits stereo samples
+        "vld2.16        {q2, q3}, [%[sP]]             \n"// load 8 16-bits stereo frames
+        "vld2.16        {q5, q6}, [%[sN]]!            \n"// load 8 16-bits stereo frames
         "vld1.32        {q8, q9}, [%[coefsP0]:128]!   \n"// load 8 32-bits coefs
         "vld1.32        {q12, q13}, [%[coefsP1]:128]! \n"// load 8 32-bits coefs
         "vld1.32        {q10, q11}, [%[coefsN1]:128]! \n"// load 8 32-bits coefs
@@ -571,8 +571,8 @@ inline void Process<2, 16>(int32_t* const out,
         "vadd.s32       q10, q10, q14                 \n"// interpolate (step3)
         "vadd.s32       q11, q11, q15                 \n"// interpolate (step3)
 
-        "vrev64.16      q2, q2                        \n"// reverse 8 frames of the positive side
-        "vrev64.16      q3, q3                        \n"// reverse 8 frames of the positive side
+        "vrev64.16      q2, q2                        \n"// reverse 8 samples of positive left
+        "vrev64.16      q3, q3                        \n"// reverse 8 samples of positive right
 
         "vshll.s16      q12,  d4, #15                 \n"// extend samples to 31 bits
         "vshll.s16      q13,  d5, #15                 \n"// extend samples to 31 bits
@@ -587,8 +587,8 @@ inline void Process<2, 16>(int32_t* const out,
 
         "vadd.s32       q0, q0, q12                   \n"// accumulate result
         "vadd.s32       q13, q13, q14                 \n"// accumulate result
-        "vadd.s32       q0, q0, q15                   \n"// (+1) accumulate result
-        "vadd.s32       q0, q0, q13                   \n"// (+1) accumulate result
+        "vadd.s32       q0, q0, q15                   \n"// accumulate result
+        "vadd.s32       q0, q0, q13                   \n"// accumulate result
 
         "vshll.s16      q12,  d6, #15                 \n"// extend samples to 31 bits
         "vshll.s16      q13,  d7, #15                 \n"// extend samples to 31 bits
@@ -603,8 +603,8 @@ inline void Process<2, 16>(int32_t* const out,
 
         "vadd.s32       q4, q4, q12                   \n"// accumulate result
         "vadd.s32       q13, q13, q14                 \n"// accumulate result
-        "vadd.s32       q4, q4, q15                   \n"// (+1) accumulate result
-        "vadd.s32       q4, q4, q13                   \n"// (+1) accumulate result
+        "vadd.s32       q4, q4, q15                   \n"// accumulate result
+        "vadd.s32       q4, q4, q13                   \n"// accumulate result
 
         "subs           %[count], %[count], #8        \n"// update loop counter
         "sub            %[sP], %[sP], #32             \n"// move pointer to next set of samples
-- 
cgit v1.1