diff options
author | Eric Wong <normalperson@yhbt.net> | 2006-09-01 02:49:49 -0700 |
---|---|---|
committer | Eric Wong <normalperson@yhbt.net> | 2006-09-09 18:57:40 -0700 |
commit | 3df8e38af6da6c0699da8ef040ffc80cf7dab810 (patch) | |
tree | f931e7aa499850db2992262f38aa3c1ccbd9d332 | |
parent | f345a3435dde249874fde205ef4b793184e1e7c4 (diff) | |
download | flac-arm-1.1.3-3df8e38af6da6c0699da8ef040ffc80cf7dab810.tar.gz |
Add ARM assembly optimizations
This was my first time writing ARM assembly, so it may not be the best; but they're still significantly faster than the optimized C versions[1] of the functions replace. An knowledgable ARM coder should be able to make better optimizations, but nobody has done so for FLAC yet (at least not publicly, to my knowledge), so I decided to take a stab at it. Also tweaked configure.in a bit to better support cross-compilation. no longer enable 3dnow, sse, or altivec optimizations unless our CPU supports them. (note: sse and 3dnow are supported by 64-bit chips, but the code is currently only optimized for 32-bit sse and 3dnow). FLAC__fixed_restore_signal_asm_arm and FLAC__lpc_restore_signal_asm_arm are pretty well-tested from tracks in my music collection. I do not think I have any music that can exersises FLAC__lpc_restore_signal_asm_arm_wide (especially the 64-bit ASR macro). These optimizations changes result in an approximately 20% reduction in decoding time on my 3rd generation ipod (running ipodlinux). [1] - I still have them at hand, but they're not probably not worth it for the non-ASM optimized architectures.
-rw-r--r-- | configure.in | 19 | ||||
-rw-r--r-- | src/libFLAC/Makefile.am | 4 | ||||
-rw-r--r-- | src/libFLAC/arm/fixed_asm.s | 236 | ||||
-rw-r--r-- | src/libFLAC/arm/lpc_asm.s | 678 | ||||
-rw-r--r-- | src/libFLAC/cpu.c | 2 | ||||
-rw-r--r-- | src/libFLAC/include/private/cpu.h | 5 | ||||
-rw-r--r-- | src/libFLAC/include/private/fixed.h | 4 | ||||
-rw-r--r-- | src/libFLAC/include/private/lpc.h | 3 | ||||
-rw-r--r-- | src/libFLAC/stream_decoder.c | 14 |
9 files changed, 959 insertions, 6 deletions
diff --git a/configure.in b/configure.in index 7dce4e77..bf48c4d7 100644 --- a/configure.in +++ b/configure.in @@ -61,6 +61,11 @@ case "$host_cpu" in AC_DEFINE(FLAC__CPU_PPC) AH_TEMPLATE(FLAC__CPU_PPC, [define if building for PowerPC]) ;; + arm) + cpu_arm=true + AC_DEFINE(FLAC__CPU_ARM) + AH_TEMPLATE(FLAC__CPU_ARM, [define if building for ARM]) + ;; sparc) cpu_sparc=true AC_DEFINE(FLAC__CPU_SPARC) @@ -69,6 +74,7 @@ case "$host_cpu" in esac AM_CONDITIONAL(FLaC__CPU_IA32, test "x$cpu_ia32" = xtrue) AM_CONDITIONAL(FLaC__CPU_PPC, test "x$cpu_ppc" = xtrue) +AM_CONDITIONAL(FLaC__CPU_ARM, test "x$cpu_arm" = xtrue) AM_CONDITIONAL(FLaC__CPU_SPARC, test "x$cpu_sparc" = xtrue) case "$host" in i386-*-openbsd3.[[0-3]]) OBJ_FORMAT=aoutb ;; @@ -119,8 +125,8 @@ AC_HELP_STRING([--enable-sse], [Enable SSE support by asserting that the OS supp no) sse_os=false ;; *) AC_MSG_ERROR(bad value ${enableval} for --enable-sse) ;; esac],[sse_os=false]) -AM_CONDITIONAL(FLaC__SSE_OS, test "x$sse_os" = xtrue) -if test "x$sse_os" = xtrue ; then +AM_CONDITIONAL(FLaC__SSE_OS, test "x$sse_os" = xtrue && test "x$cpu_ia32" = xtrue) +if test "x$sse_os" = xtrue && test "x$cpu_ia32" = xtrue; then AC_DEFINE(FLAC__SSE_OS) AH_TEMPLATE(FLAC__SSE_OS, [define if your operating system supports SSE instructions]) fi @@ -132,8 +138,8 @@ AC_HELP_STRING([--disable-3dnow], [Disable 3DNOW! optimizations]), no) use_3dnow=false ;; *) AC_MSG_ERROR(bad value ${enableval} for --enable-3dnow) ;; esac],[use_3dnow=true]) -AM_CONDITIONAL(FLaC__USE_3DNOW, test "x$use_3dnow" = xtrue) -if test "x$use_3dnow" = xtrue ; then +AM_CONDITIONAL(FLaC__USE_3DNOW, test "x$use_3dnow" = xtrue && test "x$cpu_ia32" = xtrue) +if test "x$use_3dnow" = xtrue && test "x$cpu_ia32" = xtrue; then AC_DEFINE(FLAC__USE_3DNOW) AH_TEMPLATE(FLAC__USE_3DNOW, [define to enable use of 3Dnow! instructions]) fi @@ -145,8 +151,8 @@ AC_HELP_STRING([--disable-altivec], [Disable Altivec optimizations]), no) use_altivec=false ;; *) AC_MSG_ERROR(bad value ${enableval} for --enable-altivec) ;; esac],[use_altivec=true]) -AM_CONDITIONAL(FLaC__USE_ALTIVEC, test "x$use_altivec" = xtrue) -if test "x$use_altivec" = xtrue ; then +AM_CONDITIONAL(FLaC__USE_ALTIVEC, test "x$use_altivec" = xtrue && test "x$cpu_ppc" = xtrue) +if test "x$use_altivec" = xtrue && test "x$cpu_ppc" = xtrue; then AC_DEFINE(FLAC__USE_ALTIVEC) AH_TEMPLATE(FLAC__USE_ALTIVEC, [define to enable use of Altivec instructions]) fi @@ -281,6 +287,7 @@ AC_CONFIG_FILES([ \ Makefile \ src/Makefile \ src/libFLAC/Makefile \ + src/libFLAC/arm/Makefile \ src/libFLAC/ia32/Makefile \ src/libFLAC/ppc/Makefile \ src/libFLAC/ppc/as/Makefile \ diff --git a/src/libFLAC/Makefile.am b/src/libFLAC/Makefile.am index 395308fb..67154470 100644 --- a/src/libFLAC/Makefile.am +++ b/src/libFLAC/Makefile.am @@ -49,6 +49,10 @@ AM_CFLAGS = $(DEBUGCFLAGS) $(CPUCFLAGS) if FLaC__NO_ASM else +if FLaC__CPU_ARM +ARCH_SUBDIRS = arm +libFLAC_la_LIBADD = arm/libFLAC-asm.la +endif if FLaC__CPU_IA32 if FLaC__HAS_NASM ARCH_SUBDIRS = ia32 diff --git a/src/libFLAC/arm/fixed_asm.s b/src/libFLAC/arm/fixed_asm.s new file mode 100644 index 00000000..bffb2a99 --- /dev/null +++ b/src/libFLAC/arm/fixed_asm.s @@ -0,0 +1,236 @@ +@ libFLAC - Free Lossless Audio Codec library +@ Copyright (C) 2001,2002,2003,2004,2005,2006 Josh Coalson +@ +@ Redistribution and use in source and binary forms, with or without +@ modification, are permitted provided that the following conditions +@ are met: +@ +@ - Redistributions of source code must retain the above copyright +@ notice, this list of conditions and the following disclaimer. +@ +@ - Redistributions in binary form must reproduce the above copyright +@ notice, this list of conditions and the following disclaimer in the +@ documentation and/or other materials provided with the distribution. +@ +@ - Neither the name of the Xiph.org Foundation nor the names of its +@ contributors may be used to endorse or promote products derived from +@ this software without specific prior written permission. +@ +@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +@ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR +@ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES@ LOSS OF USE, DATA, OR +@ PROFITS@ OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + .text + .align 2 + .global FLAC__fixed_restore_signal_asm_arm + .type FLAC__fixed_restore_signal_asm_arm, %function +FLAC__fixed_restore_signal_asm_arm: + stmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r14} + @ r0 = residual; + @ r1 = data_len; + @ r2 = order; + @ r3 = data; + + cmp r2, #4 + ldrls r15, [r15, r2, asl #2] + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} + + .align 2 + .word .Lorder0 + .word .Lorder1 + .word .Lorder2 + .word .Lorder3 + .word .Lorder4 + + @ data[i] = residual[i]; +.Lorder0: + tst r1, #15 + beq .Lorder0b +.Lorder0a: + @ start off slow until we get to (data_len % 16) == 0 + ldr r2, [r0], #4 + str r2, [r3], #4 + subs r1, r1, #1 + ldmeqfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} + tst r1, #15 + bne .Lorder0a +.Lorder0b: + @ WHEEEEEEEEEEEEEE!!!!!!!!!!!!!!!!!!! + ldmia r0!, {r5 - r12} + stmia r3!, {r5 - r12} + ldmia r0!, {r5 - r12} + stmia r3!, {r5 - r12} + subs r1, r1, #16 + bne .Lorder0b + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} + + @ data[i] = residual[i] + data[i-1]; +.Lorder1: + ldr r14, [r3,#-4] + tst r1, #7 + beq .Lorder1b +.Lorder1a: + ldr r2, [r0], #4 + add r14, r2, r14 + str r14, [r3], #4 + subs r1, r1, #1 + ldmeqfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} + tst r1, #7 + bne .Lorder1a +.Lorder1b: + ldmia r0!, {r2, r4 - r10} + add r2, r2, r14 + add r4, r4, r2 + add r5, r5, r4 + add r6, r6, r5 + add r7, r7, r6 + add r8, r8, r7 + add r9, r9, r8 + add r14, r10, r9 + stmia r3!, {r2, r4 - r9, r14} + subs r1, r1, #8 + bne .Lorder1b + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} + + @ data[i] = residual[i] + (data[i-1]<<1) - data[i-2]; +.Lorder2: + @ r12 = data[i-2], r14 = data[i-1] + ldmdb r3, {r12, r14} + tst r1, #7 + beq .Lorder2b +.Lorder2a: + ldr r2, [r0], #4 + add r2, r2, r14, asl #1 + sub r2, r2, r12 + str r2, [r3], #4 + mov r12, r14 + mov r14, r2 + subs r1, r1, #1 + ldmeqfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} + tst r1, #7 + bne .Lorder2a +.Lorder2b: + ldmia r0!, {r2, r4 - r10} + + @ r12 = data[i-2], r14 = data[i-1] + add r2, r2, r14, asl #1 + sub r2, r2, r12 + + add r4, r4, r2, asl #1 + sub r4, r4, r14 + + add r5, r5, r4, asl #1 + sub r5, r5, r2 + + add r6, r6, r5, asl #1 + sub r6, r6, r4 + + add r7, r7, r6, asl #1 + sub r7, r7, r5 + + add r8, r8, r7, asl #1 + sub r8, r8, r6 + + add r9, r9, r8, asl #1 + sub r12, r9, r7 + + add r10, r10, r12, asl #1 + sub r14, r10, r8 + + stmia r3!, {r2, r4 - r8, r12, r14} + subs r1, r1, #8 + bne .Lorder2b + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} + + @ data[i] = residual[i] + (((data[i-1]-data[i-2])<<1) + @ + (data[i-1]-data[i-2])) + data[i-3]; + .macro do_order_3, dest, res_i, b3, b2, b1, tmp + add \res_i, \res_i, \b3 + sub \tmp, \b1, \b2 + add \tmp, \tmp, \tmp, asl #1 + add \dest, \res_i, \tmp + .endm +.Lorder3: + ldmdb r3, { r11, r12, r14 } + tst r1, #7 + beq .Lorder3b +.Lorder3a: + ldr r2, [r0], #4 + + do_order_3 r2, r2, r11, r12, r14, r11 + str r2, [r3], #4 + + ldmdb r3, { r11, r12, r14 } + + subs r1, r1, #1 + ldmeqfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} + tst r1, #7 + bne .Lorder3a +.Lorder3b: + ldmia r0!, {r2, r4 - r10} + + do_order_3 r2, r2, r11, r12, r14, r11 + do_order_3 r4, r4, r12, r14, r2, r12 + do_order_3 r5, r5, r14, r2, r4, r14 + do_order_3 r6, r6, r2, r4, r5, r11 + + do_order_3 r7, r7, r4, r5, r6, r12 + do_order_3 r11, r8, r5, r6, r7, r11 + do_order_3 r12, r9, r6, r7, r11, r12 + do_order_3 r14, r10, r7, r11, r12, r14 + + stmia r3!, {r2, r4 - r7, r11, r12, r14} + subs r1, r1, #8 + bne .Lorder3b + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} + + @ data[i] = residual[i] + ((data[i-1]+data[i-3])<<2) + @ - ((data[i-2]<<2) + (data[i-2]<<1)) - data[i-4]; + .macro do_order_4, dest, res_i, b4, b3, b2, b1, tmp + sub \res_i, \res_i, \b4 + add \tmp, \b1, \b3 + add \res_i, \res_i, \tmp, asl #2 + sub \res_i, \res_i, \b2, asl #2 + sub \dest, \res_i, \b2, asl #1 + .endm +.Lorder4: + ldmdb r3, {r7, r11, r12, r14} + tst r1, #7 + beq .Lorder4b +.Lorder4a: + ldr r2, [r0], #4 + do_order_4 r2, r2, r7, r11, r12, r14, r7 + str r2, [r3], #4 + ldmdb r3, {r7, r11, r12, r14} + subs r1, r1, #1 + ldmeqfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} + tst r1, #7 + bne .Lorder4a +.Lorder4b: + ldr r2, [r0], #4 + do_order_4 r2, r2, r7, r11, r12, r14, r10 + + ldmia r0!, {r4 - r10} + + do_order_4 r4, r4, r11, r12, r14, r2, r11 + do_order_4 r5, r5, r12, r14, r2, r4, r12 + do_order_4 r6, r6, r14, r2, r4, r5, r14 + + do_order_4 r7, r7, r2, r4, r5, r6, r11 + do_order_4 r11, r8, r4, r5, r6, r7, r11 + do_order_4 r12, r9, r5, r6, r7, r11, r12 + do_order_4 r14, r10, r6, r7, r11, r12, r14 + + stmia r3!, {r2, r4, r5, r6, r7, r11, r12, r14} + subs r1, r1, #8 + bne .Lorder4b + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} + .size FLAC__fixed_restore_signal_asm_arm, .-FLAC__fixed_restore_signal_asm_arm diff --git a/src/libFLAC/arm/lpc_asm.s b/src/libFLAC/arm/lpc_asm.s new file mode 100644 index 00000000..3629a7b3 --- /dev/null +++ b/src/libFLAC/arm/lpc_asm.s @@ -0,0 +1,678 @@ +@ libFLAC - Free Lossless Audio Codec library +@ Copyright (C) 2001,2002,2003,2004,2005,2006 Josh Coalson +@ +@ Redistribution and use in source and binary forms, with or without +@ modification, are permitted provided that the following conditions +@ are met: +@ +@ - Redistributions of source code must retain the above copyright +@ notice, this list of conditions and the following disclaimer. +@ +@ - Redistributions in binary form must reproduce the above copyright +@ notice, this list of conditions and the following disclaimer in the +@ documentation and/or other materials provided with the distribution. +@ +@ - Neither the name of the Xiph.org Foundation nor the names of its +@ contributors may be used to endorse or promote products derived from +@ this software without specific prior written permission. +@ +@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +@ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR +@ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES@ LOSS OF USE, DATA, OR +@ PROFITS@ OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +@ TODO: special cases for order 9, 10, 11, 12 may be further optimizable... + + .text + .align 2 + .global FLAC__lpc_restore_signal_asm_arm + .type FLAC__lpc_restore_signal_asm_arm, %function +FLAC__lpc_restore_signal_asm_arm: + stmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r14} + @ r0 = residual; + @ r1 = data_len; + @ r2 = qlp_coeff; + @ r3 = order; + @ r5 = lp_quantization + @ r14 = &data + ldr r5, [r13, #40] @ lp_quantization + ldr r14, [r13, #44] @ &data + + + @ Special case each of the common LPC order levels used by encoders + @ switch (order) { ... + sub r6, r3, #1 + cmp r6, #11 + ldrls r15, [r15, r6, asl #2] + b .Lgeneric_restore_signal + + .align 2 + .word .Lorder1 + .word .Lorder2 + .word .Lorder3 + .word .Lorder4 + .word .Lorder5 + .word .Lorder6 + .word .Lorder7 + .word .Lorder8 + .word .Lorder9 + .word .Lorder10 + .word .Lorder11 + .word .Lorder12 +.Lorder12: + ldmia r2!, {r8 - r11} @ qlp_coeff[0 - 3] + sub r14, r14, #16 @ &data[-4] +.Lorder12a: + ldmia r14, {r3, r4, r6, r7} @ r14 = &data[-4] + mul r12, r8, r7 + mla r12, r9, r6, r12 + mla r12, r10, r4, r12 + mla r12, r11, r3, r12 + + ldmia r2!, {r8 - r11} @ qlp_coeff[4 - 7] + ldmdb r14!, {r3, r4, r6, r7} @ r14 = &data[-4] => &data[-8] + mla r12, r8, r7, r12 + mla r12, r9, r6, r12 + mla r12, r10, r4, r12 + mla r12, r11, r3, r12 + + ldmia r2, {r8 - r11} @ qlp_coeff[8 - 11] + ldmdb r14, {r3, r4, r6, r7} + mla r12, r8, r7, r12 + mla r12, r9, r6, r12 + mla r12, r10, r4, r12 + mla r12, r11, r3, r12 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + str r3, [r14, #32] + subs r1, r1, #1 + ldmeqfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} + add r14, r14, #4 @ data++, r14 = &data[-4] + + ldmdb r14, {r3, r4, r6, r7} @ data[-12, -11, -10, -9], &data[-8] + mul r12, r11, r3 + mla r12, r10, r4, r12 + mla r12, r9, r6, r12 + mla r12, r8, r7, r12 + + ldmdb r2!, {r8 - r11} @ qlp_coeff[4 - 7] + ldmia r14!, {r3, r4, r6, r7} @ &data[-4] + mla r12, r11, r3, r12 + mla r12, r10, r4, r12 + mla r12, r9, r6, r12 + mla r12, r8, r7, r12 + + ldmdb r2, {r8 - r11} @ qlp_coeff[0 - 3] + ldmia r14, {r3, r4, r6, r7} + mla r12, r11, r3, r12 + mla r12, r10, r4, r12 + mla r12, r9, r6, r12 + mla r12, r8, r7, r12 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + str r3, [r14, #16] + subs r1, r1, #1 + add r14, r14, #4 @ r14 = &data[-4] + bne .Lorder12a + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} +.Lorder11: + ldmia r2!, {r8 - r11} @ qlp_coeff[0 - 3] + sub r14, r14, #16 @ &data[-4] +.Lorder11a: + ldmia r14, {r3, r4, r6, r7} @ r14 = &data[-4] + mul r12, r8, r7 + mla r12, r9, r6, r12 + mla r12, r10, r4, r12 + mla r12, r11, r3, r12 + + ldmia r2!, {r8 - r11} @ qlp_coeff[4 - 7] + ldmdb r14!, {r3, r4, r6, r7} @ r14 = &data[-4] => &data[-8] + mla r12, r8, r7, r12 + mla r12, r9, r6, r12 + mla r12, r10, r4, r12 + mla r12, r11, r3, r12 + + ldmia r2, {r8 - r10} @ qlp_coeff[8 - 11] + ldmda r14, {r3, r4, r6, r7} @ we'll reuse r4, r6, and r7 + mla r12, r8, r6, r12 + mla r12, r9, r4, r12 + mla r12, r10, r3, r12 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + str r3, [r14, #32] + subs r1, r1, #1 + ldmeqfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} + add r14, r14, #4 @ data++, r14 = &data[-4] + + mul r12, r10, r4 + mla r12, r9, r6, r12 + mla r12, r8, r7, r12 + + ldmdb r2!, {r8 - r11} @ qlp_coeff[4 - 7] + ldmia r14!, {r3, r4, r6, r7} @ &data[-4] + mla r12, r11, r3, r12 + mla r12, r10, r4, r12 + mla r12, r9, r6, r12 + mla r12, r8, r7, r12 + + ldmdb r2, {r8 - r11} @ qlp_coeff[0 - 3] + ldmia r14, {r3, r4, r6, r7} + mla r12, r11, r3, r12 + mla r12, r10, r4, r12 + mla r12, r9, r6, r12 + mla r12, r8, r7, r12 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + str r3, [r14, #16] + subs r1, r1, #1 + add r14, r14, #4 @ r14 = &data[-4] + bne .Lorder11a + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} +.Lorder10: + ldmia r2!, {r8 - r11} @ qlp_coeff[0 - 3] + sub r14, r14, #16 @ &data[-4] +.Lorder10a: + ldmia r14, {r3, r4, r6, r7} @ r14 = &data[-4] + mul r12, r8, r7 + mla r12, r9, r6, r12 + mla r12, r10, r4, r12 + mla r12, r11, r3, r12 + + ldmia r2!, {r8 - r11} @ qlp_coeff[4 - 7] + ldmdb r14!, {r3, r4, r6, r7} @ r14 = &data[-4] => &data[-8] + mla r12, r8, r7, r12 + mla r12, r9, r6, r12 + mla r12, r10, r4, r12 + mla r12, r11, r3, r12 + + ldmia r2, {r8, r9} + ldmda r14, {r4, r6, r7} @ we'll reuse r4, r6, and r7 + mla r12, r8, r6, r12 + mla r12, r9, r4, r12 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + str r3, [r14, #32] + subs r1, r1, #1 + ldmeqfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} + add r14, r14, #4 @ data++, r14 = &data[-4] + + mul r12, r9, r6 + mla r12, r8, r7, r12 + + ldmdb r2!, {r8 - r11} @ qlp_coeff[4 - 7] + ldmia r14!, {r3, r4, r6, r7} @ &data[-4] + mla r12, r11, r3, r12 + mla r12, r10, r4, r12 + mla r12, r9, r6, r12 + mla r12, r8, r7, r12 + + ldmdb r2, {r8 - r11} @ qlp_coeff[0 - 3] + ldmia r14, {r3, r4, r6, r7} + mla r12, r11, r3, r12 + mla r12, r10, r4, r12 + mla r12, r9, r6, r12 + mla r12, r8, r7, r12 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + str r3, [r14, #16] + subs r1, r1, #1 + add r14, r14, #4 @ r14 = &data[-4] + bne .Lorder10a + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} +.Lorder9: + ldmia r2!, {r8 - r11} @ qlp_coeff[0 - 3] + sub r14, r14, #16 @ &data[-4] +.Lorder9a: + ldmia r14, {r3, r4, r6, r7} @ r14 = &data[-4] + mul r12, r8, r7 + mla r12, r9, r6, r12 + mla r12, r10, r4, r12 + mla r12, r11, r3, r12 + + ldmia r2!, {r8 - r11} @ qlp_coeff[4 - 7] + ldmdb r14!, {r3, r4, r6, r7} @ r14 = &data[-4] => &data[-8] + mla r12, r8, r7, r12 + mla r12, r9, r6, r12 + mla r12, r10, r4, r12 + mla r12, r11, r3, r12 + + ldr r8, [r2] + ldmda r14, {r6, r7} @ we'll reuse r7 + mla r12, r8, r6, r12 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + str r3, [r14, #32] + subs r1, r1, #1 + ldmeqfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} + add r14, r14, #4 @ data++, r14 = &data[-4] + + mul r12, r8, r7 + + ldmdb r2!, {r8 - r11} @ qlp_coeff[4 - 7] + ldmia r14!, {r3, r4, r6, r7} @ &data[-4] + mla r12, r11, r3, r12 + mla r12, r10, r4, r12 + mla r12, r9, r6, r12 + mla r12, r8, r7, r12 + + ldmdb r2, {r8 - r11} @ qlp_coeff[0 - 3] + ldmia r14, {r3, r4, r6, r7} + mla r12, r11, r3, r12 + mla r12, r10, r4, r12 + mla r12, r9, r6, r12 + mla r12, r8, r7, r12 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + str r3, [r14, #16] + subs r1, r1, #1 + add r14, r14, #4 @ r14 = &data[-4] + bne .Lorder9a + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} +.Lorder8: + ldmia r2!, {r8 - r11} @ qlp_coeff[0 - 3] + sub r14, r14, #16 +.Lorder8a: + ldmia r14, {r3, r4, r6, r7} @ r14 = &data[-4] + mul r12, r8, r7 + mla r12, r9, r6, r12 + mla r12, r10, r4, r12 + mla r12, r11, r3, r12 + + ldmia r2, {r8 - r11} @ qlp_coeff [4 - 7] + ldmdb r14, {r3, r4, r6, r7} @ r14 = &data[-4] + mla r12, r8, r7, r12 + mla r12, r9, r6, r12 + mla r12, r10, r4, r12 + mla r12, r11, r3, r12 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + str r3, [r14, #16] + subs r1, r1, #1 + ldmeqfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} + add r14, r14, #4 @ data++, r14 = &data[-4] + + ldmdb r14, {r3, r4, r6, r7} @ data[-8, -7, -6, -5] + mul r12, r11, r3 @ q[7] * d[-8] + mla r12, r10, r4, r12 @ q[6] * d[-7] + mla r12, r9, r6, r12 + mla r12, r8, r7, r12 + + ldmdb r2, {r8 - r11} @ qlp_coeff[0 - 3] + ldmia r14, {r3, r4, r6, r7} @ r14 = &data[-4] + + mla r12, r11, r3, r12 + mla r12, r10, r4, r12 + mla r12, r9, r6, r12 + mla r12, r8, r7, r12 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + str r3, [r14, #16] + subs r1, r1, #1 + add r14, r14, #4 @ r14 = &data[-4] + bne .Lorder8a + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} +.Lorder7: + ldmia r2!, {r8 - r11} + sub r14, r14, #16 +.Lorder7a: + ldmia r14, {r3, r4, r6, r7} + mul r12, r8, r7 + mla r12, r9, r6, r12 + mla r12, r10, r4, r12 + mla r12, r11, r3, r12 + + ldmia r2, {r8 - r10} + ldmda r14, {r3, r4, r6, r7} @ we reuse r4, r6, r7 below, too: + mla r12, r8, r6, r12 + mla r12, r9, r4, r12 + mla r12, r10, r3, r12 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + str r3, [r14, #16] + + subs r1, r1, #1 + ldmeqfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} + add r14, r14, #4 + + mul r12, r10, r4 + mla r12, r9, r6, r12 + mla r12, r8, r7, r12 + + ldmdb r2, {r8 - r11} + ldmia r14, {r3, r4, r6, r7} + + mla r12, r11, r3, r12 + mla r12, r10, r4, r12 + mla r12, r9, r6, r12 + mla r12, r8, r7, r12 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + str r3, [r14, #16] + subs r1, r1, #1 + add r14, r14, #4 + + bne .Lorder7a + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} +.Lorder6: + ldmia r2, {r6 - r11} @ qlp_coeff[0 - 5] + sub r14, r14, #12 @ data[-3] +.Lorder6a: + ldmia r14, {r2 - r4} + mul r12, r6, r4 + mla r12, r7, r3, r12 + mla r12, r8, r2, r12 + + ldmdb r14, {r2 - r4} + mla r12, r9, r4, r12 + mla r12, r10, r3, r12 + mla r12, r11, r2, r12 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + str r3, [r14, #12] + add r14, r14, #4 + subs r1, r1, #1 + bne .Lorder6a + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} +.Lorder5: + ldmia r2, {r6 - r10} @ qlp_coeff[0 - 4] + ldr r12, [r14, #-4]! @ &data[-1] +.Lorder5a: + ldmdb r14, {r2, r3, r4, r11} + mul r12, r6, r12 + mla r12, r7, r11, r12 + mla r12, r8, r4, r12 + mla r12, r9, r3, r12 + mla r11, r10, r2, r12 + + ldr r3, [r0], #4 + add r12, r3, r11, asr r5 + str r12, [r14, #4]! + + subs r1, r1, #1 + bne .Lorder5a + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} +.Lorder4: + ldmia r2, {r6 - r9} @ qlp_coeff[0 - 3] +.Lorder4a: + ldmdb r14, {r2 - r4, r11} + mul r12, r6, r11 + mla r12, r7, r4, r12 + mla r12, r8, r3, r12 + mla r12, r9, r2, r12 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + str r3, [r14], #4 + + subs r1, r1, #1 + bne .Lorder4a + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} +.Lorder3: + ldmia r2, {r6 - r8} @ qlp_coeff[0 - 2] +.Lorder3a: + ldmdb r14, {r2 - r4} + mul r12, r6, r4 + mla r12, r7, r3, r12 + mla r12, r8, r2, r12 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + str r3, [r14], #4 + + subs r1, r1, #1 + bne .Lorder3a + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} +.Lorder2: + ldmia r2, {r6, r7} @ qlp_coeff[0, 1] + ldmdb r14, {r2, r3} +.Lorder2a: + mul r12, r6, r3 + mla r12, r7, r2, r12 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + ldr r2, [r14, #-4] + str r3, [r14], #4 + + subs r1, r1, #1 + bne .Lorder2a + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} +.Lorder1: + ldr r6, [r2] + ldr r3, [r14, #-4] +.Lorder1a: + mul r12, r6, r3 + + ldr r3, [r0], #4 + add r3, r3, r12, asr r5 + str r3, [r14], #4 + + subs r1, r1, #1 + bne .Lorder1a + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} + +@ this part started out as a Duff's Device in C, but now it's +@ optimized to take advantage of the ldm instructions: +.Lgeneric_restore_signal: + add r2, r2, r3, asl #2 @ qlp0 = &qlp_coeff[order] + add r7, r3, #7 @ order + 7 + mov r8, r7, lsr #3 @ n = (order + 7) / (2^3) +.Lduffs_device_outer: + sub r14, r14, r3, asl #2 @ r8 = history = &data[-order] + + mov r7, r8 + mov r9, r2 @ qlp = qlp0 + mov r12, #0 @ sum = 0 + + @ switch (order % 8) + and r4, r3, #7 + cmp r4, #7 + ldrls r15, [r15, r4, asl #2] + b .Lduffs_end + + .align 2 + .word .Lduffs_case0 + .word .Lduffs_case1 + .word .Lduffs_case2 + .word .Lduffs_case3 + .word .Lduffs_case4 + .word .Lduffs_case5 + .word .Lduffs_case6 + .word .Lduffs_case7 + +.Lduffs_case7: + ldmia r14!, {r4, r6} + ldmdb r9!, {r10, r11} + mla r12, r4, r11, r12 + mla r12, r6, r10, r12 +.Lduffs_case5: + ldmia r14!, {r4, r6} + ldmdb r9!, {r10, r11} + mla r12, r4, r11, r12 + mla r12, r6, r10, r12 +.Lduffs_case3: + ldmia r14!, {r4, r6} + ldmdb r9!, {r10, r11} + mla r12, r4, r11, r12 + mla r12, r6, r10, r12 +.Lduffs_case1: + ldr r10, [r9, #-4]! + ldr r11, [r14], #4 + mla r12, r10, r11, r12 + + subs r7, r7, #1 + bne .Lduffs_case0 + +.Lduffs_end: + ldr r10, [r0], #4 + add r11, r10, r12, asr r5 + str r11, [r14], #4 + subs r1, r1, #1 + bne .Lduffs_device_outer + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} + +.Lduffs_case0: + ldmia r14!, {r4, r6} + ldmdb r9!, {r10, r11} + mla r12, r4, r11, r12 + mla r12, r6, r10, r12 +.Lduffs_case6: + ldmia r14!, {r4, r6} + ldmdb r9!, {r10, r11} + mla r12, r4, r11, r12 + mla r12, r6, r10, r12 +.Lduffs_case4: + ldmia r14!, {r4, r6} + ldmdb r9!, {r10, r11} + mla r12, r4, r11, r12 + mla r12, r6, r10, r12 +.Lduffs_case2: + ldmia r14!, {r4, r6} + ldmdb r9!, {r10, r11} + mla r12, r4, r11, r12 + mla r12, r6, r10, r12 + + subs r7, r7, #1 + bne .Lduffs_case0 + + ldr r10, [r0], #4 + add r11, r10, r12, asr r5 + str r11, [r14], #4 + subs r1, r1, #1 + bne .Lduffs_device_outer + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} + + .size FLAC__lpc_restore_signal_asm_arm, .-FLAC__lpc_restore_signal_asm_arm + .align 2 + .global FLAC__lpc_restore_signal_asm_arm_wide + .type FLAC__lpc_restore_signal_asm_arm_wide, %function +FLAC__lpc_restore_signal_asm_arm_wide: + stmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r14} + @ r0 = residual; + @ r1 = data_len; + @ r2 = qlp_coeff; + @ r3 = order; + + .macro finish_loop + @ 64-bit arithmetic shift right: + mov r7, r7, lsr r2 @ shift lo register r2 bits right + rsb r10, r2, #32 + mov r11, r12, lsl r10 @ shift hi register (32 - r2) bits left + orr r7, r7, r11 @ combine new-hi and new-lo in one word + ands r6, r12, #0x80000000 @ mask sign bit from original hi word + orrne r7, r7, r6 @ restore sign bit from the original + + ldr r10, [r0], #4 @ residual + add r12, r10, r7 + str r12, [r8], #4 + subs r1, r1, #1 + bne .Lwide_duffs_device_outer + .endm + + add r5, r2, r3, asl #2 @ qlp0 = &qlp_coeff[order] + + ldr r2, [r13, #40] @ lp_quantization + ldr r8, [r13, #44] @ &data +.Lwide_duffs_device_outer: + sub r8, r8, r3, asl #2 @ r8 = data = &data[-order] + add r14, r3, #7 @ order + 7 + mov r14, r14, lsr #3 @ n = (order + 7) / (2^3) + + mov r9, r5 @ qlp = qlp0 + mov r7, #0 @ sum = 0 + mov r12, #0 @ sum = 0 + + @ switch (order % 8) + and r4, r3, #7 + cmp r4, #7 + ldrls r15, [r15, r4, asl #2] + b .Lwide_duffs_end + + .align 2 + .word .Lwide_duffs_case0 + .word .Lwide_duffs_case1 + .word .Lwide_duffs_case2 + .word .Lwide_duffs_case3 + .word .Lwide_duffs_case4 + .word .Lwide_duffs_case5 + .word .Lwide_duffs_case6 + .word .Lwide_duffs_case7 + +.Lwide_duffs_case7: + ldmia r8!, {r4, r6} + ldmdb r9!, {r10, r11} + smlal r7, r12, r4, r11 + smlal r7, r12, r6, r10 +.Lwide_duffs_case5: + ldmia r8!, {r4, r6} + ldmdb r9!, {r10, r11} + smlal r7, r12, r4, r11 + smlal r7, r12, r6, r10 +.Lwide_duffs_case3: + ldmia r8!, {r4, r6} + ldmdb r9!, {r10, r11} + smlal r7, r12, r4, r11 + smlal r7, r12, r6, r10 +.Lwide_duffs_case1: + ldr r10, [r9, #-4]! + ldr r11, [r8], #4 + smlal r7, r12, r10, r11 + + subs r14, r14, #1 @ --n + bne .Lwide_duffs_case0 + +.Lwide_duffs_end: + finish_loop + + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} + +.Lwide_duffs_case0: + ldmia r8!, {r4, r6} + ldmdb r9!, {r10, r11} + smlal r7, r12, r4, r11 + smlal r7, r12, r6, r10 +.Lwide_duffs_case6: + ldmia r8!, {r4, r6} + ldmdb r9!, {r10, r11} + smlal r7, r12, r4, r11 + smlal r7, r12, r6, r10 +.Lwide_duffs_case4: + ldmia r8!, {r4, r6} + ldmdb r9!, {r10, r11} + smlal r7, r12, r4, r11 + smlal r7, r12, r6, r10 +.Lwide_duffs_case2: + ldmia r8!, {r4, r6} + ldmdb r9!, {r10, r11} + smlal r7, r12, r4, r11 + smlal r7, r12, r6, r10 + + subs r14, r14, #1 @ --n + bne .Lwide_duffs_case0 + + finish_loop + + ldmfd r13!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r15} + + .size FLAC__lpc_restore_signal_asm_arm_wide, .-FLAC__lpc_restore_signal_asm_arm_wide + diff --git a/src/libFLAC/cpu.c b/src/libFLAC/cpu.c index de2bb2a3..977cb90c 100644 --- a/src/libFLAC/cpu.c +++ b/src/libFLAC/cpu.c @@ -125,6 +125,8 @@ void FLAC__cpu_info(FLAC__CPUInfo *info) #endif #elif defined FLAC__CPU_PPC info->type = FLAC__CPUINFO_TYPE_PPC; +#elif defined FLAC__CPU_ARM + info->type = FLAC__CPUINFO_TYPE_ARM; #if !defined FLAC__NO_ASM info->use_asm = true; #ifdef FLAC__USE_ALTIVEC diff --git a/src/libFLAC/include/private/cpu.h b/src/libFLAC/include/private/cpu.h index d59c779e..adda1b8a 100644 --- a/src/libFLAC/include/private/cpu.h +++ b/src/libFLAC/include/private/cpu.h @@ -41,6 +41,7 @@ typedef enum { FLAC__CPUINFO_TYPE_IA32, FLAC__CPUINFO_TYPE_PPC, + FLAC__CPUINFO_TYPE_ARM, FLAC__CPUINFO_TYPE_UNKNOWN } FLAC__CPUInfo_Type; @@ -60,6 +61,9 @@ typedef struct { FLAC__bool ppc64; } FLAC__CPUInfo_PPC; +/* just generic ARM support for now */ +typedef FLAC__bool FLAC__CPUInfo_ARM; + extern const unsigned FLAC__CPUINFO_IA32_CPUID_CMOV; extern const unsigned FLAC__CPUINFO_IA32_CPUID_MMX; extern const unsigned FLAC__CPUINFO_IA32_CPUID_FXSR; @@ -76,6 +80,7 @@ typedef struct { union { FLAC__CPUInfo_IA32 ia32; FLAC__CPUInfo_PPC ppc; + FLAC__CPUInfo_ARM arm; } data; } FLAC__CPUInfo; diff --git a/src/libFLAC/include/private/fixed.h b/src/libFLAC/include/private/fixed.h index bb71b202..7dd40c9b 100644 --- a/src/libFLAC/include/private/fixed.h +++ b/src/libFLAC/include/private/fixed.h @@ -92,6 +92,10 @@ void FLAC__fixed_compute_residual(const FLAC__int32 data[], unsigned data_len, u * IN data[-order,-1] previously-reconstructed historical samples * OUT data[0,data_len-1] original signal */ +#if (!defined(FLAC__NO_ASM) && defined(FLAC__CPU_ARM)) +void FLAC__fixed_restore_signal_asm_arm(const FLAC__int32 residual[], unsigned data_len, unsigned order, FLAC__int32 data[]); +#else void FLAC__fixed_restore_signal(const FLAC__int32 residual[], unsigned data_len, unsigned order, FLAC__int32 data[]); +#endif #endif diff --git a/src/libFLAC/include/private/lpc.h b/src/libFLAC/include/private/lpc.h index 970db8a8..b79422c2 100644 --- a/src/libFLAC/include/private/lpc.h +++ b/src/libFLAC/include/private/lpc.h @@ -176,6 +176,9 @@ void FLAC__lpc_restore_signal_asm_ia32_mmx(const FLAC__int32 residual[], unsigne # elif defined FLAC__CPU_PPC void FLAC__lpc_restore_signal_asm_ppc_altivec_16(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]); void FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]); +# elif defined FLAC__CPU_ARM +void FLAC__lpc_restore_signal_asm_arm(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]); +void FLAC__lpc_restore_signal_asm_arm_wide(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]); # endif/* FLAC__CPU_IA32 || FLAC__CPU_PPC */ #endif /* FLAC__NO_ASM */ diff --git a/src/libFLAC/stream_decoder.c b/src/libFLAC/stream_decoder.c index b3ac1a87..e10de5f5 100644 --- a/src/libFLAC/stream_decoder.c +++ b/src/libFLAC/stream_decoder.c @@ -322,6 +322,8 @@ FLAC_API FLAC__StreamDecoderState FLAC__stream_decoder_init(FLAC__StreamDecoder decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_asm_ppc_altivec_16; decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8; } +#elif defined FLAC__CPU_ARM + FLAC__ASSERT(decoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_ARM); #endif } #endif @@ -1961,7 +1963,11 @@ FLAC__bool read_subframe_fixed_(FLAC__StreamDecoder *decoder, unsigned channel, /* decode the subframe */ if(do_full_decode) { memcpy(decoder->private_->output[channel], subframe->warmup, sizeof(FLAC__int32) * order); +#if (!defined(FLAC__NO_ASM) && defined(FLAC__CPU_ARM)) + FLAC__fixed_restore_signal_asm_arm(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, order, decoder->private_->output[channel]+order); +#else FLAC__fixed_restore_signal(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, order, decoder->private_->output[channel]+order); +#endif } return true; @@ -2039,6 +2045,9 @@ FLAC__bool read_subframe_lpc_(FLAC__StreamDecoder *decoder, unsigned channel, un if(do_full_decode) { memcpy(decoder->private_->output[channel], subframe->warmup, sizeof(FLAC__int32) * order); if(bps + subframe->qlp_coeff_precision + FLAC__bitmath_ilog2(order) <= 32) +#ifdef FLAC__CPU_ARM + FLAC__lpc_restore_signal_asm_arm(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order); +#else /* ! FLAC__CPU_ARM */ if(bps <= 16 && subframe->qlp_coeff_precision <= 16) { if(order <= 8) decoder->private_->local_lpc_restore_signal_16bit_order8(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order); @@ -2047,8 +2056,13 @@ FLAC__bool read_subframe_lpc_(FLAC__StreamDecoder *decoder, unsigned channel, un } else decoder->private_->local_lpc_restore_signal(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order); +#endif /* ! FLAC__CPU_ARM */ else +#ifdef FLAC__CPU_ARM + FLAC__lpc_restore_signal_asm_arm_wide(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order); +#else /* ! FLAC__CPU_ARM */ decoder->private_->local_lpc_restore_signal_64bit(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order); +#endif /* ! FLAC__CPU_ARM */ } return true; |