about summary refs log tree commit
path: root/src/libFLAC/arm/lpc_asm.s
diff options
context:
space:
mode:
Diffstat (limited to 'src/libFLAC/arm/lpc_asm.s')
-rw-r--r--src/libFLAC/arm/lpc_asm.s678
1 files changed, 678 insertions, 0 deletions
diff --git a/src/libFLAC/arm/lpc_asm.s b/src/libFLAC/arm/lpc_asm.s
new file mode 100644
index 00000000..3629a7b3
--- /dev/null
+++ b/src/libFLAC/arm/lpc_asm.s
@@ -0,0 +1,678 @@
+@  libFLAC - Free Lossless Audio Codec library
+@  Copyright (C) 2001,2002,2003,2004,2005,2006  Josh Coalson
+@
+@  Redistribution and use in source and binary forms, with or without
+@  modification, are permitted provided that the following conditions
+@  are met:
+@
+@  - Redistributions of source code must retain the above copyright
+@  notice, this list of conditions and the following disclaimer.
+@
+@  - Redistributions in binary form must reproduce the above copyright
+@  notice, this list of conditions and the following disclaimer in the
+@  documentation and/or other materials provided with the distribution.
+@
+@  - Neither the name of the Xiph.org Foundation nor the names of its
+@  contributors may be used to endorse or promote products derived from
+@  this software without specific prior written permission.
+@
+@  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+@  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+@  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+@  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+@  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+@  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+@  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES@ LOSS OF USE, DATA, OR
+@  PROFITS@ OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+@  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+@  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+@  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+@ TODO: special cases for order 9, 10, 11, 12 may be further optimizable...
+
+        .text
+        .align 2
+        .global        FLAC__lpc_restore_signal_asm_arm
+        .type        FLAC__lpc_restore_signal_asm_arm, %function
+FLAC__lpc_restore_signal_asm_arm:
+        stmfd        r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r14}
+        @        r0 = residual;
+        @        r1 = data_len;
+        @        r2 = qlp_coeff;
+        @        r3 = order;
+        @        r5 = lp_quantization
+        @        r14 = &data
+        ldr        r5, [r13, #40]                @ lp_quantization
+        ldr        r14, [r13, #44]                @ &data
+
+
+        @ Special case each of the common LPC order levels used by encoders
+        @ switch (order) { ...
+        sub        r6, r3, #1
+        cmp        r6, #11
+        ldrls        r15, [r15, r6, asl #2]
+        b        .Lgeneric_restore_signal
+
+        .align  2
+        .word        .Lorder1
+        .word        .Lorder2
+        .word        .Lorder3
+        .word        .Lorder4
+        .word        .Lorder5
+        .word        .Lorder6
+        .word        .Lorder7
+        .word        .Lorder8
+        .word        .Lorder9
+        .word        .Lorder10
+        .word        .Lorder11
+        .word        .Lorder12
+.Lorder12:
+        ldmia        r2!, {r8 - r11}                @ qlp_coeff[0 - 3]
+        sub        r14, r14, #16                @ &data[-4]
+.Lorder12a:
+        ldmia        r14, {r3, r4, r6, r7}        @ r14 = &data[-4]
+        mul        r12, r8, r7
+        mla        r12, r9, r6, r12
+        mla        r12, r10, r4, r12
+        mla        r12, r11, r3, r12
+
+        ldmia        r2!, {r8 - r11}                @ qlp_coeff[4 - 7]
+        ldmdb        r14!, {r3, r4, r6, r7}        @ r14 = &data[-4] => &data[-8]
+        mla        r12, r8, r7, r12
+        mla        r12, r9, r6, r12
+        mla        r12, r10, r4, r12
+        mla        r12, r11, r3, r12
+
+        ldmia        r2, {r8 - r11}                @ qlp_coeff[8 - 11]
+        ldmdb        r14, {r3, r4, r6, r7}
+        mla        r12, r8, r7, r12
+        mla        r12, r9, r6, r12
+        mla        r12, r10, r4, r12
+        mla        r12, r11, r3, r12
+
+        ldr        r3, [r0], #4
+        add        r3, r3, r12, asr r5
+        str        r3, [r14, #32]
+        subs        r1, r1, #1
+        ldmeqfd        r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15}
+        add        r14, r14, #4                @ data++, r14 = &data[-4]
+
+        ldmdb        r14, {r3, r4, r6, r7}        @ data[-12, -11, -10, -9], &data[-8]
+        mul        r12, r11, r3
+        mla        r12, r10, r4, r12
+        mla        r12, r9, r6, r12
+        mla        r12, r8, r7, r12
+
+        ldmdb        r2!, {r8 - r11}                @ qlp_coeff[4 - 7]
+        ldmia        r14!, {r3, r4, r6, r7}        @ &data[-4]
+        mla        r12, r11, r3, r12
+        mla        r12, r10, r4, r12
+        mla        r12, r9, r6, r12
+        mla        r12, r8, r7, r12
+
+        ldmdb        r2, {r8 - r11}                @ qlp_coeff[0 - 3]
+        ldmia        r14, {r3, r4, r6, r7}
+        mla        r12, r11, r3, r12
+        mla        r12, r10, r4, r12
+        mla        r12, r9, r6, r12
+        mla        r12, r8, r7, r12
+
+        ldr        r3, [r0], #4
+        add        r3, r3, r12, asr r5
+        str        r3, [r14, #16]
+        subs        r1, r1, #1
+        add        r14, r14, #4                @ r14 = &data[-4]
+        bne        .Lorder12a
+        ldmfd        r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15}
+.Lorder11:
+        ldmia        r2!, {r8 - r11}                @ qlp_coeff[0 - 3]
+        sub        r14, r14, #16                @ &data[-4]
+.Lorder11a:
+        ldmia        r14, {r3, r4, r6, r7}        @ r14 = &data[-4]
+        mul        r12, r8, r7
+        mla        r12, r9, r6, r12
+        mla        r12, r10, r4, r12
+        mla        r12, r11, r3, r12
+
+        ldmia        r2!, {r8 - r11}                @ qlp_coeff[4 - 7]
+        ldmdb        r14!, {r3, r4, r6, r7}        @ r14 = &data[-4] => &data[-8]
+        mla        r12, r8, r7, r12
+        mla        r12, r9, r6, r12
+        mla        r12, r10, r4, r12
+        mla        r12, r11, r3, r12
+
+        ldmia        r2, {r8 - r10}                @ qlp_coeff[8 - 11]
+        ldmda        r14, {r3, r4, r6, r7}        @ we'll reuse r4, r6, and r7
+        mla        r12, r8, r6, r12
+        mla        r12, r9, r4, r12
+        mla        r12, r10, r3, r12
+
+        ldr        r3, [r0], #4
+        add        r3, r3, r12, asr r5
+        str        r3, [r14, #32]
+        subs        r1, r1, #1
+        ldmeqfd        r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15}
+        add        r14, r14, #4                @ data++, r14 = &data[-4]
+
+        mul        r12, r10, r4
+        mla        r12, r9, r6, r12
+        mla        r12, r8, r7, r12
+
+        ldmdb        r2!, {r8 - r11}                @ qlp_coeff[4 - 7]
+        ldmia        r14!, {r3, r4, r6, r7}        @ &data[-4]
+        mla        r12, r11, r3, r12
+        mla        r12, r10, r4, r12
+        mla        r12, r9, r6, r12
+        mla        r12, r8, r7, r12
+
+        ldmdb        r2, {r8 - r11}                @ qlp_coeff[0 - 3]
+        ldmia        r14, {r3, r4, r6, r7}
+        mla        r12, r11, r3, r12
+        mla        r12, r10, r4, r12
+        mla        r12, r9, r6, r12
+        mla        r12, r8, r7, r12
+
+        ldr        r3, [r0], #4
+        add        r3, r3, r12, asr r5
+        str        r3, [r14, #16]
+        subs        r1, r1, #1
+        add        r14, r14, #4                @ r14 = &data[-4]
+        bne        .Lorder11a
+        ldmfd        r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15}
+.Lorder10:
+        ldmia        r2!, {r8 - r11}                @ qlp_coeff[0 - 3]
+        sub        r14, r14, #16                @ &data[-4]
+.Lorder10a:
+        ldmia        r14, {r3, r4, r6, r7}        @ r14 = &data[-4]
+        mul        r12, r8, r7
+        mla        r12, r9, r6, r12
+        mla        r12, r10, r4, r12
+        mla        r12, r11, r3, r12
+
+        ldmia        r2!, {r8 - r11}                @ qlp_coeff[4 - 7]
+        ldmdb        r14!, {r3, r4, r6, r7}        @ r14 = &data[-4] => &data[-8]
+        mla        r12, r8, r7, r12
+        mla        r12, r9, r6, r12
+        mla        r12, r10, r4, r12
+        mla        r12, r11, r3, r12
+
+        ldmia        r2, {r8, r9}
+        ldmda        r14, {r4, r6, r7}        @ we'll reuse r4, r6, and r7
+        mla        r12, r8, r6, r12
+        mla        r12, r9, r4, r12
+
+        ldr        r3, [r0], #4
+        add        r3, r3, r12, asr r5
+        str        r3, [r14, #32]
+        subs        r1, r1, #1
+        ldmeqfd        r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15}
+        add        r14, r14, #4                @ data++, r14 = &data[-4]
+
+        mul        r12, r9, r6
+        mla        r12, r8, r7, r12
+
+        ldmdb        r2!, {r8 - r11}                @ qlp_coeff[4 - 7]
+        ldmia        r14!, {r3, r4, r6, r7}        @ &data[-4]
+        mla        r12, r11, r3, r12
+        mla        r12, r10, r4, r12
+        mla        r12, r9, r6, r12
+        mla        r12, r8, r7, r12
+
+        ldmdb        r2, {r8 - r11}                @ qlp_coeff[0 - 3]
+        ldmia        r14, {r3, r4, r6, r7}
+        mla        r12, r11, r3, r12
+        mla        r12, r10, r4, r12
+        mla        r12, r9, r6, r12
+        mla        r12, r8, r7, r12
+
+        ldr        r3, [r0], #4
+        add        r3, r3, r12, asr r5
+        str        r3, [r14, #16]
+        subs        r1, r1, #1
+        add        r14, r14, #4                @ r14 = &data[-4]
+        bne        .Lorder10a
+        ldmfd        r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15}
+.Lorder9:
+        ldmia        r2!, {r8 - r11}                @ qlp_coeff[0 - 3]
+        sub        r14, r14, #16                @ &data[-4]
+.Lorder9a:
+        ldmia        r14, {r3, r4, r6, r7}        @ r14 = &data[-4]
+        mul        r12, r8, r7
+        mla        r12, r9, r6, r12
+        mla        r12, r10, r4, r12
+        mla        r12, r11, r3, r12
+
+        ldmia        r2!, {r8 - r11}                @ qlp_coeff[4 - 7]
+        ldmdb        r14!, {r3, r4, r6, r7}        @ r14 = &data[-4] => &data[-8]
+        mla        r12, r8, r7, r12
+        mla        r12, r9, r6, r12
+        mla        r12, r10, r4, r12
+        mla        r12, r11, r3, r12
+
+        ldr        r8, [r2]
+        ldmda        r14, {r6, r7}        @ we'll reuse r7
+        mla        r12, r8, r6, r12
+
+        ldr        r3, [r0], #4
+        add        r3, r3, r12, asr r5
+        str        r3, [r14, #32]
+        subs        r1, r1, #1
+        ldmeqfd        r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15}
+        add        r14, r14, #4                @ data++, r14 = &data[-4]
+
+        mul        r12, r8, r7
+
+        ldmdb        r2!, {r8 - r11}                @ qlp_coeff[4 - 7]
+        ldmia        r14!, {r3, r4, r6, r7}        @ &data[-4]
+        mla        r12, r11, r3, r12
+        mla        r12, r10, r4, r12
+        mla        r12, r9, r6, r12
+        mla        r12, r8, r7, r12
+
+        ldmdb        r2, {r8 - r11}                @ qlp_coeff[0 - 3]
+        ldmia        r14, {r3, r4, r6, r7}
+        mla        r12, r11, r3, r12
+        mla        r12, r10, r4, r12
+        mla        r12, r9, r6, r12
+        mla        r12, r8, r7, r12
+
+        ldr        r3, [r0], #4
+        add        r3, r3, r12, asr r5
+        str        r3, [r14, #16]
+        subs        r1, r1, #1
+        add        r14, r14, #4                @ r14 = &data[-4]
+        bne        .Lorder9a
+        ldmfd        r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15}
+.Lorder8:
+        ldmia        r2!, {r8 - r11}                @ qlp_coeff[0 - 3]
+        sub        r14, r14, #16
+.Lorder8a:
+        ldmia        r14, {r3, r4, r6, r7}        @ r14 = &data[-4]
+        mul        r12, r8, r7
+        mla        r12, r9, r6, r12
+        mla        r12, r10, r4, r12
+        mla        r12, r11, r3, r12
+
+        ldmia        r2, {r8 - r11}                @ qlp_coeff [4 - 7]
+        ldmdb        r14, {r3, r4, r6, r7}        @ r14 = &data[-4]
+        mla        r12, r8, r7, r12
+        mla        r12, r9, r6, r12
+        mla        r12, r10, r4, r12
+        mla        r12, r11, r3, r12
+
+        ldr        r3, [r0], #4
+        add        r3, r3, r12, asr r5
+        str        r3, [r14, #16]
+        subs        r1, r1, #1
+        ldmeqfd        r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15}
+        add        r14, r14, #4                @ data++, r14 = &data[-4]
+
+        ldmdb        r14, {r3, r4, r6, r7}        @ data[-8, -7, -6, -5]
+        mul        r12, r11, r3                @ q[7] * d[-8]
+        mla        r12, r10, r4, r12        @ q[6] * d[-7]
+        mla        r12, r9, r6, r12
+        mla        r12, r8, r7, r12
+
+        ldmdb        r2, {r8 - r11}                @ qlp_coeff[0 - 3]
+        ldmia        r14, {r3, r4, r6, r7}        @ r14 = &data[-4]
+
+        mla        r12, r11, r3, r12
+        mla        r12, r10, r4, r12
+        mla        r12, r9, r6, r12
+        mla        r12, r8, r7, r12
+
+        ldr        r3, [r0], #4
+        add        r3, r3, r12, asr r5
+        str        r3, [r14, #16]
+        subs        r1, r1, #1
+        add        r14, r14, #4                @ r14 = &data[-4]
+        bne        .Lorder8a
+        ldmfd        r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15}
+.Lorder7:
+        ldmia        r2!, {r8 - r11}
+        sub        r14, r14, #16
+.Lorder7a:
+        ldmia        r14, {r3, r4, r6, r7}
+        mul        r12, r8, r7
+        mla        r12, r9, r6, r12
+        mla        r12, r10, r4, r12
+        mla        r12, r11, r3, r12
+
+        ldmia        r2, {r8 - r10}
+        ldmda        r14, {r3, r4, r6, r7} @ we reuse r4, r6, r7 below, too:
+        mla        r12, r8, r6, r12
+        mla        r12, r9, r4, r12
+        mla        r12, r10, r3, r12
+
+        ldr        r3, [r0], #4
+        add        r3, r3, r12, asr r5
+        str        r3, [r14, #16]
+
+        subs        r1, r1, #1
+        ldmeqfd        r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15}
+        add        r14, r14, #4
+
+        mul        r12, r10, r4
+        mla        r12, r9, r6, r12
+        mla        r12, r8, r7, r12
+
+        ldmdb        r2, {r8 - r11}
+        ldmia        r14, {r3, r4, r6, r7}
+
+        mla        r12, r11, r3, r12
+        mla        r12, r10, r4, r12
+        mla        r12, r9, r6, r12
+        mla        r12, r8, r7, r12
+
+        ldr        r3, [r0], #4
+        add        r3, r3, r12, asr r5
+        str        r3, [r14, #16]
+        subs        r1, r1, #1
+        add        r14, r14, #4
+
+        bne        .Lorder7a
+        ldmfd        r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15}
+.Lorder6:
+        ldmia        r2, {r6 - r11}                @ qlp_coeff[0 - 5]
+        sub        r14, r14, #12                @ data[-3]
+.Lorder6a:
+        ldmia        r14, {r2 - r4}
+        mul        r12, r6, r4
+        mla        r12, r7, r3, r12
+        mla        r12, r8, r2, r12
+
+        ldmdb        r14, {r2 - r4}
+        mla        r12, r9, r4, r12
+        mla        r12, r10, r3, r12
+        mla        r12, r11, r2, r12
+
+        ldr        r3, [r0], #4
+        add        r3, r3, r12, asr r5
+        str        r3, [r14, #12]
+        add        r14, r14, #4
+        subs        r1, r1, #1
+        bne        .Lorder6a
+        ldmfd        r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15}
+.Lorder5:
+        ldmia        r2, {r6 - r10}                @ qlp_coeff[0 - 4]
+        ldr        r12, [r14, #-4]!        @ &data[-1]
+.Lorder5a:
+        ldmdb        r14, {r2, r3, r4, r11}
+        mul        r12, r6, r12
+        mla        r12, r7, r11, r12
+        mla        r12, r8, r4, r12
+        mla        r12, r9, r3, r12
+        mla        r11, r10, r2, r12
+
+        ldr        r3, [r0], #4
+        add        r12, r3, r11, asr r5
+        str        r12, [r14, #4]!
+
+        subs        r1, r1, #1
+        bne        .Lorder5a
+        ldmfd        r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15}
+.Lorder4:
+        ldmia        r2, {r6 - r9}                @ qlp_coeff[0 - 3]
+.Lorder4a:
+        ldmdb        r14, {r2 - r4, r11}
+        mul        r12, r6, r11
+        mla        r12, r7, r4, r12
+        mla        r12, r8, r3, r12
+        mla        r12, r9, r2, r12
+
+        ldr        r3, [r0], #4
+        add        r3, r3, r12, asr r5
+        str        r3, [r14], #4
+
+        subs        r1, r1, #1
+        bne        .Lorder4a
+        ldmfd        r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15}
+.Lorder3:
+        ldmia        r2, {r6 - r8}                @ qlp_coeff[0 - 2]
+.Lorder3a:
+        ldmdb        r14, {r2 - r4}
+        mul        r12, r6, r4
+        mla        r12, r7, r3, r12
+        mla        r12, r8, r2, r12
+
+        ldr        r3, [r0], #4
+        add        r3, r3, r12, asr r5
+        str        r3, [r14], #4
+
+        subs        r1, r1, #1
+        bne        .Lorder3a
+        ldmfd        r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15}
+.Lorder2:
+        ldmia        r2, {r6, r7}                @ qlp_coeff[0, 1]
+        ldmdb        r14, {r2, r3}
+.Lorder2a:
+        mul        r12, r6, r3
+        mla        r12, r7, r2, r12
+
+        ldr        r3, [r0], #4
+        add        r3, r3, r12, asr r5
+        ldr        r2, [r14, #-4]
+        str        r3, [r14], #4
+
+        subs        r1, r1, #1
+        bne        .Lorder2a
+        ldmfd        r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15}
+.Lorder1:
+        ldr        r6, [r2]
+        ldr        r3, [r14, #-4]
+.Lorder1a:
+        mul        r12, r6, r3
+
+        ldr        r3, [r0], #4
+        add        r3, r3, r12, asr r5
+        str        r3, [r14], #4
+
+        subs        r1, r1, #1
+        bne        .Lorder1a
+        ldmfd        r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15}
+
+@ this part started out as a Duff's Device in C, but now it's
+@ optimized to take advantage of the ldm instructions:
+.Lgeneric_restore_signal:
+        add        r2, r2, r3, asl #2        @ qlp0 = &qlp_coeff[order]
+        add        r7, r3, #7                @ order + 7
+        mov        r8, r7, lsr #3                @ n = (order + 7) / (2^3)
+.Lduffs_device_outer:
+        sub        r14, r14, r3, asl #2        @ r8 = history = &data[-order]
+
+        mov        r7, r8
+        mov        r9, r2                        @ qlp = qlp0
+        mov        r12, #0                        @ sum = 0
+
+        @ switch (order % 8)
+        and        r4, r3, #7
+        cmp        r4, #7
+        ldrls        r15, [r15, r4, asl #2]
+        b        .Lduffs_end
+
+        .align 2
+        .word .Lduffs_case0
+        .word .Lduffs_case1
+        .word .Lduffs_case2
+        .word .Lduffs_case3
+        .word .Lduffs_case4
+        .word .Lduffs_case5
+        .word .Lduffs_case6
+        .word .Lduffs_case7
+
+.Lduffs_case7:
+        ldmia        r14!, {r4, r6}
+        ldmdb        r9!, {r10, r11}
+        mla        r12, r4, r11, r12
+        mla        r12, r6, r10, r12
+.Lduffs_case5:
+        ldmia        r14!, {r4, r6}
+        ldmdb        r9!, {r10, r11}
+        mla        r12, r4, r11, r12
+        mla        r12, r6, r10, r12
+.Lduffs_case3:
+        ldmia        r14!, {r4, r6}
+        ldmdb        r9!, {r10, r11}
+        mla        r12, r4, r11, r12
+        mla        r12, r6, r10, r12
+.Lduffs_case1:
+        ldr        r10, [r9, #-4]!
+        ldr        r11, [r14], #4
+        mla        r12, r10, r11, r12
+
+        subs        r7, r7, #1
+        bne        .Lduffs_case0
+
+.Lduffs_end:
+        ldr        r10, [r0], #4
+        add        r11, r10, r12, asr r5
+        str        r11, [r14], #4
+        subs        r1, r1, #1
+        bne        .Lduffs_device_outer
+        ldmfd        r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15}
+
+.Lduffs_case0:
+        ldmia        r14!, {r4, r6}
+        ldmdb        r9!, {r10, r11}
+        mla        r12, r4, r11, r12
+        mla        r12, r6, r10, r12
+.Lduffs_case6:
+        ldmia        r14!, {r4, r6}
+        ldmdb        r9!, {r10, r11}
+        mla        r12, r4, r11, r12
+        mla        r12, r6, r10, r12
+.Lduffs_case4:
+        ldmia        r14!, {r4, r6}
+        ldmdb        r9!, {r10, r11}
+        mla        r12, r4, r11, r12
+        mla        r12, r6, r10, r12
+.Lduffs_case2:
+        ldmia        r14!, {r4, r6}
+        ldmdb        r9!, {r10, r11}
+        mla        r12, r4, r11, r12
+        mla        r12, r6, r10, r12
+
+        subs        r7, r7, #1
+        bne        .Lduffs_case0
+
+        ldr        r10, [r0], #4
+        add        r11, r10, r12, asr r5
+        str        r11, [r14], #4
+        subs        r1, r1, #1
+        bne        .Lduffs_device_outer
+        ldmfd        r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15}
+
+        .size        FLAC__lpc_restore_signal_asm_arm, .-FLAC__lpc_restore_signal_asm_arm
+        .align 2
+        .global        FLAC__lpc_restore_signal_asm_arm_wide
+        .type        FLAC__lpc_restore_signal_asm_arm_wide, %function
+FLAC__lpc_restore_signal_asm_arm_wide:
+        stmfd        r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r14}
+        @ r0 = residual;
+        @ r1 = data_len;
+        @ r2 = qlp_coeff;
+        @ r3 = order;
+
+        .macro finish_loop
+        @ 64-bit arithmetic shift right:
+        mov        r7, r7, lsr r2                @ shift lo register r2 bits right
+        rsb        r10, r2, #32
+        mov        r11, r12, lsl r10        @ shift hi register (32 - r2) bits left
+        orr        r7, r7, r11                @ combine new-hi and new-lo in one word
+        ands        r6, r12, #0x80000000        @ mask sign bit from original hi word
+        orrne        r7, r7, r6                @ restore sign bit from the original
+
+        ldr        r10, [r0], #4                @ residual
+        add        r12, r10, r7
+        str        r12, [r8], #4
+        subs        r1, r1, #1
+        bne        .Lwide_duffs_device_outer
+        .endm
+
+        add        r5, r2, r3, asl #2        @ qlp0 = &qlp_coeff[order]
+
+        ldr        r2, [r13, #40]                @ lp_quantization
+        ldr        r8, [r13, #44]                @ &data
+.Lwide_duffs_device_outer:
+        sub        r8, r8, r3, asl #2        @ r8 = data = &data[-order]
+        add        r14, r3, #7                @ order + 7
+        mov        r14, r14, lsr #3        @ n = (order + 7) / (2^3)
+
+        mov        r9, r5                        @ qlp = qlp0
+        mov        r7, #0                        @ sum = 0
+        mov        r12, #0                        @ sum = 0
+
+        @ switch (order % 8)
+        and        r4, r3, #7
+        cmp        r4, #7
+        ldrls        r15, [r15, r4, asl #2]
+        b        .Lwide_duffs_end
+
+        .align 2
+        .word .Lwide_duffs_case0
+        .word .Lwide_duffs_case1
+        .word .Lwide_duffs_case2
+        .word .Lwide_duffs_case3
+        .word .Lwide_duffs_case4
+        .word .Lwide_duffs_case5
+        .word .Lwide_duffs_case6
+        .word .Lwide_duffs_case7
+
+.Lwide_duffs_case7:
+        ldmia        r8!, {r4, r6}
+        ldmdb        r9!, {r10, r11}
+        smlal        r7, r12, r4, r11
+        smlal        r7, r12, r6, r10
+.Lwide_duffs_case5:
+        ldmia        r8!, {r4, r6}
+        ldmdb        r9!, {r10, r11}
+        smlal        r7, r12, r4, r11
+        smlal        r7, r12, r6, r10
+.Lwide_duffs_case3:
+        ldmia        r8!, {r4, r6}
+        ldmdb        r9!, {r10, r11}
+        smlal        r7, r12, r4, r11
+        smlal        r7, r12, r6, r10
+.Lwide_duffs_case1:
+        ldr        r10, [r9, #-4]!
+        ldr        r11, [r8], #4
+        smlal        r7, r12, r10, r11
+
+        subs        r14, r14, #1                @ --n
+        bne        .Lwide_duffs_case0
+
+.Lwide_duffs_end:
+        finish_loop
+
+        ldmfd        r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15}
+
+.Lwide_duffs_case0:
+        ldmia        r8!, {r4, r6}
+        ldmdb        r9!, {r10, r11}
+        smlal        r7, r12, r4, r11
+        smlal        r7, r12, r6, r10
+.Lwide_duffs_case6:
+        ldmia        r8!, {r4, r6}
+        ldmdb        r9!, {r10, r11}
+        smlal        r7, r12, r4, r11
+        smlal        r7, r12, r6, r10
+.Lwide_duffs_case4:
+        ldmia        r8!, {r4, r6}
+        ldmdb        r9!, {r10, r11}
+        smlal        r7, r12, r4, r11
+        smlal        r7, r12, r6, r10
+.Lwide_duffs_case2:
+        ldmia        r8!, {r4, r6}
+        ldmdb        r9!, {r10, r11}
+        smlal        r7, r12, r4, r11
+        smlal        r7, r12, r6, r10
+
+        subs        r14, r14, #1                @ --n
+        bne        .Lwide_duffs_case0
+
+        finish_loop
+
+        ldmfd        r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15}
+
+        .size        FLAC__lpc_restore_signal_asm_arm_wide, .-FLAC__lpc_restore_signal_asm_arm_wide
+