diff options
Diffstat (limited to 'src/libFLAC/arm/lpc_asm.s')
-rw-r--r-- | src/libFLAC/arm/lpc_asm.s | 678 |
1 files changed, 678 insertions, 0 deletions
diff --git a/src/libFLAC/arm/lpc_asm.s b/src/libFLAC/arm/lpc_asm.s new file mode 100644 index 00000000..3629a7b3 --- /dev/null +++ b/src/libFLAC/arm/lpc_asm.s @@ -0,0 +1,678 @@ +@ libFLAC - Free Lossless Audio Codec library +@ Copyright (C) 2001,2002,2003,2004,2005,2006 Josh Coalson +@ +@ Redistribution and use in source and binary forms, with or without +@ modification, are permitted provided that the following conditions +@ are met: +@ +@ - Redistributions of source code must retain the above copyright +@ notice, this list of conditions and the following disclaimer. +@ +@ - Redistributions in binary form must reproduce the above copyright +@ notice, this list of conditions and the following disclaimer in the +@ documentation and/or other materials provided with the distribution. +@ +@ - Neither the name of the Xiph.org Foundation nor the names of its +@ contributors may be used to endorse or promote products derived from +@ this software without specific prior written permission. +@ +@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +@ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR +@ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES@ LOSS OF USE, DATA, OR +@ PROFITS@ OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +@ TODO: special cases for order 9, 10, 11, 12 may be further optimizable... + + .text + .align 2 + .global FLAC__lpc_restore_signal_asm_arm + .type FLAC__lpc_restore_signal_asm_arm, %function +FLAC__lpc_restore_signal_asm_arm: + stmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r14} + @ r0 = residual; + @ r1 = data_len; + @ r2 = qlp_coeff; + @ r3 = order; + @ r5 = lp_quantization + @ r14 = &data + ldr r5, [r13, #40] @ lp_quantization + ldr r14, [r13, #44] @ &data + + + @ Special case each of the common LPC order levels used by encoders + @ switch (order) { ... + sub r6, r3, #1 + cmp r6, #11 + ldrls r15, [r15, r6, asl #2] + b .Lgeneric_restore_signal + + .align 2 + .word .Lorder1 + .word .Lorder2 + .word .Lorder3 + .word .Lorder4 + .word .Lorder5 + .word .Lorder6 + .word .Lorder7 + .word .Lorder8 + .word .Lorder9 + .word .Lorder10 + .word .Lorder11 + .word .Lorder12 +.Lorder12: + ldmia r2!, {r8 - r11} @ qlp_coeff[0 - 3] + sub r14, r14, #16 @ &data[-4] +.Lorder12a: + ldmia r14, {r3, r4, r6, r7} @ r14 = &data[-4] + mul r12, r8, r7 + mla r12, r9, r6, r12 + mla r12, r10, r4, r12 + mla r12, r11, r3, r12 + + ldmia r2!, {r8 - r11} @ qlp_coeff[4 - 7] + ldmdb r14!, {r3, r4, r6, r7} @ r14 = &data[-4] => &data[-8] + mla r12, r8, r7, r12 + mla r12, r9, r6, r12 + mla r12, r10, r4, r12 + mla r12, r11, r3, r12 + + ldmia r2, {r8 - r11} @ qlp_coeff[8 - 11] + ldmdb r14, {r3, r4, r6, r7} + mla r12, r8, r7, r12 + mla r12, r9, r6, r12 + mla r12, r10, r4, r12 + mla r12, r11, r3, r12 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + str r3, [r14, #32] + subs r1, r1, #1 + ldmeqfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} + add r14, r14, #4 @ data++, r14 = &data[-4] + + ldmdb r14, {r3, r4, r6, r7} @ data[-12, -11, -10, -9], &data[-8] + mul r12, r11, r3 + mla r12, r10, r4, r12 + mla r12, r9, r6, r12 + mla r12, r8, r7, r12 + + ldmdb r2!, {r8 - r11} @ qlp_coeff[4 - 7] + ldmia r14!, {r3, r4, r6, r7} @ &data[-4] + mla r12, r11, r3, r12 + mla r12, r10, r4, r12 + mla r12, r9, r6, r12 + mla r12, r8, r7, r12 + + ldmdb r2, {r8 - r11} @ qlp_coeff[0 - 3] + ldmia r14, {r3, r4, r6, r7} + mla r12, r11, r3, r12 + mla r12, r10, r4, r12 + mla r12, r9, r6, r12 + mla r12, r8, r7, r12 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + str r3, [r14, #16] + subs r1, r1, #1 + add r14, r14, #4 @ r14 = &data[-4] + bne .Lorder12a + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} +.Lorder11: + ldmia r2!, {r8 - r11} @ qlp_coeff[0 - 3] + sub r14, r14, #16 @ &data[-4] +.Lorder11a: + ldmia r14, {r3, r4, r6, r7} @ r14 = &data[-4] + mul r12, r8, r7 + mla r12, r9, r6, r12 + mla r12, r10, r4, r12 + mla r12, r11, r3, r12 + + ldmia r2!, {r8 - r11} @ qlp_coeff[4 - 7] + ldmdb r14!, {r3, r4, r6, r7} @ r14 = &data[-4] => &data[-8] + mla r12, r8, r7, r12 + mla r12, r9, r6, r12 + mla r12, r10, r4, r12 + mla r12, r11, r3, r12 + + ldmia r2, {r8 - r10} @ qlp_coeff[8 - 11] + ldmda r14, {r3, r4, r6, r7} @ we'll reuse r4, r6, and r7 + mla r12, r8, r6, r12 + mla r12, r9, r4, r12 + mla r12, r10, r3, r12 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + str r3, [r14, #32] + subs r1, r1, #1 + ldmeqfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} + add r14, r14, #4 @ data++, r14 = &data[-4] + + mul r12, r10, r4 + mla r12, r9, r6, r12 + mla r12, r8, r7, r12 + + ldmdb r2!, {r8 - r11} @ qlp_coeff[4 - 7] + ldmia r14!, {r3, r4, r6, r7} @ &data[-4] + mla r12, r11, r3, r12 + mla r12, r10, r4, r12 + mla r12, r9, r6, r12 + mla r12, r8, r7, r12 + + ldmdb r2, {r8 - r11} @ qlp_coeff[0 - 3] + ldmia r14, {r3, r4, r6, r7} + mla r12, r11, r3, r12 + mla r12, r10, r4, r12 + mla r12, r9, r6, r12 + mla r12, r8, r7, r12 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + str r3, [r14, #16] + subs r1, r1, #1 + add r14, r14, #4 @ r14 = &data[-4] + bne .Lorder11a + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} +.Lorder10: + ldmia r2!, {r8 - r11} @ qlp_coeff[0 - 3] + sub r14, r14, #16 @ &data[-4] +.Lorder10a: + ldmia r14, {r3, r4, r6, r7} @ r14 = &data[-4] + mul r12, r8, r7 + mla r12, r9, r6, r12 + mla r12, r10, r4, r12 + mla r12, r11, r3, r12 + + ldmia r2!, {r8 - r11} @ qlp_coeff[4 - 7] + ldmdb r14!, {r3, r4, r6, r7} @ r14 = &data[-4] => &data[-8] + mla r12, r8, r7, r12 + mla r12, r9, r6, r12 + mla r12, r10, r4, r12 + mla r12, r11, r3, r12 + + ldmia r2, {r8, r9} + ldmda r14, {r4, r6, r7} @ we'll reuse r4, r6, and r7 + mla r12, r8, r6, r12 + mla r12, r9, r4, r12 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + str r3, [r14, #32] + subs r1, r1, #1 + ldmeqfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} + add r14, r14, #4 @ data++, r14 = &data[-4] + + mul r12, r9, r6 + mla r12, r8, r7, r12 + + ldmdb r2!, {r8 - r11} @ qlp_coeff[4 - 7] + ldmia r14!, {r3, r4, r6, r7} @ &data[-4] + mla r12, r11, r3, r12 + mla r12, r10, r4, r12 + mla r12, r9, r6, r12 + mla r12, r8, r7, r12 + + ldmdb r2, {r8 - r11} @ qlp_coeff[0 - 3] + ldmia r14, {r3, r4, r6, r7} + mla r12, r11, r3, r12 + mla r12, r10, r4, r12 + mla r12, r9, r6, r12 + mla r12, r8, r7, r12 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + str r3, [r14, #16] + subs r1, r1, #1 + add r14, r14, #4 @ r14 = &data[-4] + bne .Lorder10a + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} +.Lorder9: + ldmia r2!, {r8 - r11} @ qlp_coeff[0 - 3] + sub r14, r14, #16 @ &data[-4] +.Lorder9a: + ldmia r14, {r3, r4, r6, r7} @ r14 = &data[-4] + mul r12, r8, r7 + mla r12, r9, r6, r12 + mla r12, r10, r4, r12 + mla r12, r11, r3, r12 + + ldmia r2!, {r8 - r11} @ qlp_coeff[4 - 7] + ldmdb r14!, {r3, r4, r6, r7} @ r14 = &data[-4] => &data[-8] + mla r12, r8, r7, r12 + mla r12, r9, r6, r12 + mla r12, r10, r4, r12 + mla r12, r11, r3, r12 + + ldr r8, [r2] + ldmda r14, {r6, r7} @ we'll reuse r7 + mla r12, r8, r6, r12 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + str r3, [r14, #32] + subs r1, r1, #1 + ldmeqfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} + add r14, r14, #4 @ data++, r14 = &data[-4] + + mul r12, r8, r7 + + ldmdb r2!, {r8 - r11} @ qlp_coeff[4 - 7] + ldmia r14!, {r3, r4, r6, r7} @ &data[-4] + mla r12, r11, r3, r12 + mla r12, r10, r4, r12 + mla r12, r9, r6, r12 + mla r12, r8, r7, r12 + + ldmdb r2, {r8 - r11} @ qlp_coeff[0 - 3] + ldmia r14, {r3, r4, r6, r7} + mla r12, r11, r3, r12 + mla r12, r10, r4, r12 + mla r12, r9, r6, r12 + mla r12, r8, r7, r12 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + str r3, [r14, #16] + subs r1, r1, #1 + add r14, r14, #4 @ r14 = &data[-4] + bne .Lorder9a + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} +.Lorder8: + ldmia r2!, {r8 - r11} @ qlp_coeff[0 - 3] + sub r14, r14, #16 +.Lorder8a: + ldmia r14, {r3, r4, r6, r7} @ r14 = &data[-4] + mul r12, r8, r7 + mla r12, r9, r6, r12 + mla r12, r10, r4, r12 + mla r12, r11, r3, r12 + + ldmia r2, {r8 - r11} @ qlp_coeff [4 - 7] + ldmdb r14, {r3, r4, r6, r7} @ r14 = &data[-4] + mla r12, r8, r7, r12 + mla r12, r9, r6, r12 + mla r12, r10, r4, r12 + mla r12, r11, r3, r12 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + str r3, [r14, #16] + subs r1, r1, #1 + ldmeqfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} + add r14, r14, #4 @ data++, r14 = &data[-4] + + ldmdb r14, {r3, r4, r6, r7} @ data[-8, -7, -6, -5] + mul r12, r11, r3 @ q[7] * d[-8] + mla r12, r10, r4, r12 @ q[6] * d[-7] + mla r12, r9, r6, r12 + mla r12, r8, r7, r12 + + ldmdb r2, {r8 - r11} @ qlp_coeff[0 - 3] + ldmia r14, {r3, r4, r6, r7} @ r14 = &data[-4] + + mla r12, r11, r3, r12 + mla r12, r10, r4, r12 + mla r12, r9, r6, r12 + mla r12, r8, r7, r12 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + str r3, [r14, #16] + subs r1, r1, #1 + add r14, r14, #4 @ r14 = &data[-4] + bne .Lorder8a + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} +.Lorder7: + ldmia r2!, {r8 - r11} + sub r14, r14, #16 +.Lorder7a: + ldmia r14, {r3, r4, r6, r7} + mul r12, r8, r7 + mla r12, r9, r6, r12 + mla r12, r10, r4, r12 + mla r12, r11, r3, r12 + + ldmia r2, {r8 - r10} + ldmda r14, {r3, r4, r6, r7} @ we reuse r4, r6, r7 below, too: + mla r12, r8, r6, r12 + mla r12, r9, r4, r12 + mla r12, r10, r3, r12 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + str r3, [r14, #16] + + subs r1, r1, #1 + ldmeqfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} + add r14, r14, #4 + + mul r12, r10, r4 + mla r12, r9, r6, r12 + mla r12, r8, r7, r12 + + ldmdb r2, {r8 - r11} + ldmia r14, {r3, r4, r6, r7} + + mla r12, r11, r3, r12 + mla r12, r10, r4, r12 + mla r12, r9, r6, r12 + mla r12, r8, r7, r12 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + str r3, [r14, #16] + subs r1, r1, #1 + add r14, r14, #4 + + bne .Lorder7a + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} +.Lorder6: + ldmia r2, {r6 - r11} @ qlp_coeff[0 - 5] + sub r14, r14, #12 @ data[-3] +.Lorder6a: + ldmia r14, {r2 - r4} + mul r12, r6, r4 + mla r12, r7, r3, r12 + mla r12, r8, r2, r12 + + ldmdb r14, {r2 - r4} + mla r12, r9, r4, r12 + mla r12, r10, r3, r12 + mla r12, r11, r2, r12 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + str r3, [r14, #12] + add r14, r14, #4 + subs r1, r1, #1 + bne .Lorder6a + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} +.Lorder5: + ldmia r2, {r6 - r10} @ qlp_coeff[0 - 4] + ldr r12, [r14, #-4]! @ &data[-1] +.Lorder5a: + ldmdb r14, {r2, r3, r4, r11} + mul r12, r6, r12 + mla r12, r7, r11, r12 + mla r12, r8, r4, r12 + mla r12, r9, r3, r12 + mla r11, r10, r2, r12 + + ldr r3, [r0], #4 + add r12, r3, r11, asr r5 + str r12, [r14, #4]! + + subs r1, r1, #1 + bne .Lorder5a + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} +.Lorder4: + ldmia r2, {r6 - r9} @ qlp_coeff[0 - 3] +.Lorder4a: + ldmdb r14, {r2 - r4, r11} + mul r12, r6, r11 + mla r12, r7, r4, r12 + mla r12, r8, r3, r12 + mla r12, r9, r2, r12 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + str r3, [r14], #4 + + subs r1, r1, #1 + bne .Lorder4a + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} +.Lorder3: + ldmia r2, {r6 - r8} @ qlp_coeff[0 - 2] +.Lorder3a: + ldmdb r14, {r2 - r4} + mul r12, r6, r4 + mla r12, r7, r3, r12 + mla r12, r8, r2, r12 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + str r3, [r14], #4 + + subs r1, r1, #1 + bne .Lorder3a + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} +.Lorder2: + ldmia r2, {r6, r7} @ qlp_coeff[0, 1] + ldmdb r14, {r2, r3} +.Lorder2a: + mul r12, r6, r3 + mla r12, r7, r2, r12 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + ldr r2, [r14, #-4] + str r3, [r14], #4 + + subs r1, r1, #1 + bne .Lorder2a + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} +.Lorder1: + ldr r6, [r2] + ldr r3, [r14, #-4] +.Lorder1a: + mul r12, r6, r3 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + str r3, [r14], #4 + + subs r1, r1, #1 + bne .Lorder1a + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} + +@ this part started out as a Duff's Device in C, but now it's +@ optimized to take advantage of the ldm instructions: +.Lgeneric_restore_signal: + add r2, r2, r3, asl #2 @ qlp0 = &qlp_coeff[order] + add r7, r3, #7 @ order + 7 + mov r8, r7, lsr #3 @ n = (order + 7) / (2^3) +.Lduffs_device_outer: + sub r14, r14, r3, asl #2 @ r8 = history = &data[-order] + + mov r7, r8 + mov r9, r2 @ qlp = qlp0 + mov r12, #0 @ sum = 0 + + @ switch (order % 8) + and r4, r3, #7 + cmp r4, #7 + ldrls r15, [r15, r4, asl #2] + b .Lduffs_end + + .align 2 + .word .Lduffs_case0 + .word .Lduffs_case1 + .word .Lduffs_case2 + .word .Lduffs_case3 + .word .Lduffs_case4 + .word .Lduffs_case5 + .word .Lduffs_case6 + .word .Lduffs_case7 + +.Lduffs_case7: + ldmia r14!, {r4, r6} + ldmdb r9!, {r10, r11} + mla r12, r4, r11, r12 + mla r12, r6, r10, r12 +.Lduffs_case5: + ldmia r14!, {r4, r6} + ldmdb r9!, {r10, r11} + mla r12, r4, r11, r12 + mla r12, r6, r10, r12 +.Lduffs_case3: + ldmia r14!, {r4, r6} + ldmdb r9!, {r10, r11} + mla r12, r4, r11, r12 + mla r12, r6, r10, r12 +.Lduffs_case1: + ldr r10, [r9, #-4]! + ldr r11, [r14], #4 + mla r12, r10, r11, r12 + + subs r7, r7, #1 + bne .Lduffs_case0 + +.Lduffs_end: + ldr r10, [r0], #4 + add r11, r10, r12, asr r5 + str r11, [r14], #4 + subs r1, r1, #1 + bne .Lduffs_device_outer + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} + +.Lduffs_case0: + ldmia r14!, {r4, r6} + ldmdb r9!, {r10, r11} + mla r12, r4, r11, r12 + mla r12, r6, r10, r12 +.Lduffs_case6: + ldmia r14!, {r4, r6} + ldmdb r9!, {r10, r11} + mla r12, r4, r11, r12 + mla r12, r6, r10, r12 +.Lduffs_case4: + ldmia r14!, {r4, r6} + ldmdb r9!, {r10, r11} + mla r12, r4, r11, r12 + mla r12, r6, r10, r12 +.Lduffs_case2: + ldmia r14!, {r4, r6} + ldmdb r9!, {r10, r11} + mla r12, r4, r11, r12 + mla r12, r6, r10, r12 + + subs r7, r7, #1 + bne .Lduffs_case0 + + ldr r10, [r0], #4 + add r11, r10, r12, asr r5 + str r11, [r14], #4 + subs r1, r1, #1 + bne .Lduffs_device_outer + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} + + .size FLAC__lpc_restore_signal_asm_arm, .-FLAC__lpc_restore_signal_asm_arm + .align 2 + .global FLAC__lpc_restore_signal_asm_arm_wide + .type FLAC__lpc_restore_signal_asm_arm_wide, %function +FLAC__lpc_restore_signal_asm_arm_wide: + stmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r14} + @ r0 = residual; + @ r1 = data_len; + @ r2 = qlp_coeff; + @ r3 = order; + + .macro finish_loop + @ 64-bit arithmetic shift right: + mov r7, r7, lsr r2 @ shift lo register r2 bits right + rsb r10, r2, #32 + mov r11, r12, lsl r10 @ shift hi register (32 - r2) bits left + orr r7, r7, r11 @ combine new-hi and new-lo in one word + ands r6, r12, #0x80000000 @ mask sign bit from original hi word + orrne r7, r7, r6 @ restore sign bit from the original + + ldr r10, [r0], #4 @ residual + add r12, r10, r7 + str r12, [r8], #4 + subs r1, r1, #1 + bne .Lwide_duffs_device_outer + .endm + + add r5, r2, r3, asl #2 @ qlp0 = &qlp_coeff[order] + + ldr r2, [r13, #40] @ lp_quantization + ldr r8, [r13, #44] @ &data +.Lwide_duffs_device_outer: + sub r8, r8, r3, asl #2 @ r8 = data = &data[-order] + add r14, r3, #7 @ order + 7 + mov r14, r14, lsr #3 @ n = (order + 7) / (2^3) + + mov r9, r5 @ qlp = qlp0 + mov r7, #0 @ sum = 0 + mov r12, #0 @ sum = 0 + + @ switch (order % 8) + and r4, r3, #7 + cmp r4, #7 + ldrls r15, [r15, r4, asl #2] + b .Lwide_duffs_end + + .align 2 + .word .Lwide_duffs_case0 + .word .Lwide_duffs_case1 + .word .Lwide_duffs_case2 + .word .Lwide_duffs_case3 + .word .Lwide_duffs_case4 + .word .Lwide_duffs_case5 + .word .Lwide_duffs_case6 + .word .Lwide_duffs_case7 + +.Lwide_duffs_case7: + ldmia r8!, {r4, r6} + ldmdb r9!, {r10, r11} + smlal r7, r12, r4, r11 + smlal r7, r12, r6, r10 +.Lwide_duffs_case5: + ldmia r8!, {r4, r6} + ldmdb r9!, {r10, r11} + smlal r7, r12, r4, r11 + smlal r7, r12, r6, r10 +.Lwide_duffs_case3: + ldmia r8!, {r4, r6} + ldmdb r9!, {r10, r11} + smlal r7, r12, r4, r11 + smlal r7, r12, r6, r10 +.Lwide_duffs_case1: + ldr r10, [r9, #-4]! + ldr r11, [r8], #4 + smlal r7, r12, r10, r11 + + subs r14, r14, #1 @ --n + bne .Lwide_duffs_case0 + +.Lwide_duffs_end: + finish_loop + + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} + +.Lwide_duffs_case0: + ldmia r8!, {r4, r6} + ldmdb r9!, {r10, r11} + smlal r7, r12, r4, r11 + smlal r7, r12, r6, r10 +.Lwide_duffs_case6: + ldmia r8!, {r4, r6} + ldmdb r9!, {r10, r11} + smlal r7, r12, r4, r11 + smlal r7, r12, r6, r10 +.Lwide_duffs_case4: + ldmia r8!, {r4, r6} + ldmdb r9!, {r10, r11} + smlal r7, r12, r4, r11 + smlal r7, r12, r6, r10 +.Lwide_duffs_case2: + ldmia r8!, {r4, r6} + ldmdb r9!, {r10, r11} + smlal r7, r12, r4, r11 + smlal r7, r12, r6, r10 + + subs r14, r14, #1 @ --n + bne .Lwide_duffs_case0 + + finish_loop + + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} + + .size FLAC__lpc_restore_signal_asm_arm_wide, .-FLAC__lpc_restore_signal_asm_arm_wide + |