diff options
-rw-r--r-- | audio/mpg123/Makefile | 5 | ||||
-rw-r--r-- | audio/mpg123/files/decode_i586_.s | 321 | ||||
-rw-r--r-- | audio/mpg123/patches/patch-aa | 19 |
3 files changed, 337 insertions, 8 deletions
diff --git a/audio/mpg123/Makefile b/audio/mpg123/Makefile index 80967e80d20..2d92fac396a 100644 --- a/audio/mpg123/Makefile +++ b/audio/mpg123/Makefile @@ -1,4 +1,4 @@ -# $NetBSD: Makefile,v 1.8 1998/08/20 15:16:39 tsarna Exp $ +# $NetBSD: Makefile,v 1.9 1998/10/30 03:11:51 mycroft Exp $ # DISTNAME= mpg123-0.59o @@ -16,4 +16,7 @@ ALL_TARGET= netbsd MAKE_FLAGS+= PREFIX="${PREFIX}" +post-patch: + cp files/decode_i586_.s ${WRKSRC}/ + .include "../../mk/bsd.pkg.mk" diff --git a/audio/mpg123/files/decode_i586_.s b/audio/mpg123/files/decode_i586_.s new file mode 100644 index 00000000000..d89b094a95f --- /dev/null +++ b/audio/mpg123/files/decode_i586_.s @@ -0,0 +1,321 @@ +/ +/ synth_1to1 works the same way as the c version of this +/ file. only two types of changes have been made: +/ - reordered floating point instructions to +/ prevent pipline stalls +/ - made WRITE_SAMPLE use integer instead of +/ (slower) floating point +/ all kinds of x86 processors should benefit from these +/ modifications. +/ +/ useful sources of information on optimizing x86 code include: +/ +/ Intel Architecture Optimization Manual +/ http://www.intel.com/design/pentium/manuals/242816.htm +/ +/ Cyrix 6x86 Instruction Set Summary +/ ftp://ftp.cyrix.com/6x86/6x-dbch6.pdf +/ +/ AMD-K5 Processor Software Development +/ http://www.amd.com/products/cpg/techdocs/appnotes/20007e.pdf +/ +/ Stefan Bieschewski <stb@acm.org> +/ +/ $Id: decode_i586_.s,v 1.1 1998/10/30 03:11:52 mycroft Exp $ +/ +.bss + .comm buffs,4352 +.data + .align 2 +bo: + .long 1 +.text + .align 3 +.LC0: + .long 0x0,0x40dfffc0 + .align 3 +.LC1: + .long 0x0,0xc0e00000 +.text + .align 3 +.globl _synth_1to1_pent +_synth_1to1_pent: + subl $12,%esp + pushl %ebp + pushl %edi + pushl %esi + pushl %ebx + movl 32(%esp),%eax + movl 40(%esp),%esi + xorl %edi,%edi + movl bo,%ebp + cmpl %edi,36(%esp) + jne .L48 + decl %ebp + andl $15,%ebp + movl %ebp,bo + movl $buffs,%ecx + jmp .L49 +.L48: + addl $2,%esi + movl $buffs+2176,%ecx +.L49: + testl $1,%ebp + je .L50 + movl %ecx,%ebx + movl %ebp,16(%esp) + pushl %eax + movl 20(%esp),%edx + leal (%ebx,%edx,4),%eax + pushl %eax + movl 24(%esp),%eax + incl %eax + andl $15,%eax + leal 1088(,%eax,4),%eax + addl %ebx,%eax + jmp .L74 +.L50: + leal 1088(%ecx),%ebx + leal 1(%ebp),%edx + movl %edx,16(%esp) + pushl %eax + leal 1092(%ecx,%ebp,4),%eax + pushl %eax + leal (%ecx,%ebp,4),%eax +.L74: + pushl %eax + call _dct64 + addl $12,%esp + movl 16(%esp),%edx + leal 0(,%edx,4),%edx + movl $_decwin+64,%eax + movl %eax,%ecx + subl %edx,%ecx + movl $16,%ebp +.L55: + flds (%ecx) + fmuls (%ebx) + flds 4(%ecx) + fmuls 4(%ebx) + fxch %st(1) + flds 8(%ecx) + fmuls 8(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds 12(%ecx) + fmuls 12(%ebx) + fxch %st(2) + faddp %st,%st(1) + flds 16(%ecx) + fmuls 16(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds 20(%ecx) + fmuls 20(%ebx) + fxch %st(2) + faddp %st,%st(1) + flds 24(%ecx) + fmuls 24(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds 28(%ecx) + fmuls 28(%ebx) + fxch %st(2) + faddp %st,%st(1) + flds 32(%ecx) + fmuls 32(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds 36(%ecx) + fmuls 36(%ebx) + fxch %st(2) + faddp %st,%st(1) + flds 40(%ecx) + fmuls 40(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds 44(%ecx) + fmuls 44(%ebx) + fxch %st(2) + faddp %st,%st(1) + flds 48(%ecx) + fmuls 48(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds 52(%ecx) + fmuls 52(%ebx) + fxch %st(2) + faddp %st,%st(1) + flds 56(%ecx) + fmuls 56(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds 60(%ecx) + fmuls 60(%ebx) + fxch %st(2) + subl $4,%esp + faddp %st,%st(1) + fxch %st(1) + fsubrp %st,%st(1) + fistpl (%esp) + popl %eax + cmpl $32767,%eax + jg 1f + cmpl $-32768,%eax + jl 2f + movw %ax,(%esi) + jmp 4f +1: movw $32767,(%esi) + jmp 3f +2: movw $-32768,(%esi) +3: incl %edi +4: +.L54: + addl $64,%ebx + subl $-128,%ecx + addl $4,%esi + decl %ebp + jnz .L55 + flds (%ecx) + fmuls (%ebx) + flds 8(%ecx) + fmuls 8(%ebx) + flds 16(%ecx) + fmuls 16(%ebx) + fxch %st(2) + faddp %st,%st(1) + flds 24(%ecx) + fmuls 24(%ebx) + fxch %st(2) + faddp %st,%st(1) + flds 32(%ecx) + fmuls 32(%ebx) + fxch %st(2) + faddp %st,%st(1) + flds 40(%ecx) + fmuls 40(%ebx) + fxch %st(2) + faddp %st,%st(1) + flds 48(%ecx) + fmuls 48(%ebx) + fxch %st(2) + faddp %st,%st(1) + flds 56(%ecx) + fmuls 56(%ebx) + fxch %st(2) + subl $4,%esp + faddp %st,%st(1) + fxch %st(1) + faddp %st,%st(1) + fistpl (%esp) + popl %eax + cmpl $32767,%eax + jg 1f + cmpl $-32768,%eax + jl 2f + movw %ax,(%esi) + jmp 4f +1: movw $32767,(%esi) + jmp 3f +2: movw $-32768,(%esi) +3: incl %edi +4: +.L62: + addl $-64,%ebx + addl $4,%esi + movl 16(%esp),%edx + leal -128(%ecx,%edx,8),%ecx + movl $15,%ebp +.L68: + flds -4(%ecx) + fchs + fmuls (%ebx) + flds -8(%ecx) + fmuls 4(%ebx) + fxch %st(1) + flds -12(%ecx) + fmuls 8(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds -16(%ecx) + fmuls 12(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds -20(%ecx) + fmuls 16(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds -24(%ecx) + fmuls 20(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds -28(%ecx) + fmuls 24(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds -32(%ecx) + fmuls 28(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds -36(%ecx) + fmuls 32(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds -40(%ecx) + fmuls 36(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds -44(%ecx) + fmuls 40(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds -48(%ecx) + fmuls 44(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds -52(%ecx) + fmuls 48(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds -56(%ecx) + fmuls 52(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds -60(%ecx) + fmuls 56(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds (%ecx) + fmuls 60(%ebx) + fxch %st(2) + subl $4,%esp + fsubrp %st,%st(1) + fxch %st(1) + fsubrp %st,%st(1) + fistpl (%esp) + popl %eax + cmpl $32767,%eax + jg 1f + cmpl $-32768,%eax + jl 2f + movw %ax,(%esi) + jmp 4f +1: movw $32767,(%esi) + jmp 3f +2: movw $-32768,(%esi) +3: incl %edi +4: +.L67: + addl $-64,%ebx + addl $-128,%ecx + addl $4,%esi + decl %ebp + jnz .L68 + movl %edi,%eax + popl %ebx + popl %esi + popl %edi + popl %ebp + addl $12,%esp + ret + diff --git a/audio/mpg123/patches/patch-aa b/audio/mpg123/patches/patch-aa index 1c0a7042b17..28fe8a37ac2 100644 --- a/audio/mpg123/patches/patch-aa +++ b/audio/mpg123/patches/patch-aa @@ -1,7 +1,7 @@ -$NetBSD: patch-aa,v 1.7 1998/08/15 10:58:41 frueauf Exp $ +$NetBSD: patch-aa,v 1.8 1998/10/30 03:11:52 mycroft Exp $ ---- Makefile.orig Sun Feb 8 19:23:04 1998 -+++ Makefile Sat Aug 15 12:18:14 1998 +--- Makefile.orig Sun Feb 8 13:23:04 1998 ++++ Makefile Thu Oct 29 22:01:30 1998 @@ -32,6 +32,7 @@ @echo "make aix IBM AIX (tested: 4.2)" @echo "make os2 IBM OS/2" @@ -10,13 +10,18 @@ $NetBSD: patch-aa,v 1.7 1998/08/15 10:58:41 frueauf Exp $ @echo "make bsdos BSDI BSD/OS" @echo "make generic try this one if your system isn't listed above" @echo "" -@@ -221,7 +222,8 @@ - OBJECTS='decode_i386.o dct64_i386.o getbits_.o audio_sun.o' \ +@@ -218,10 +219,12 @@ + + netbsd-i386: + $(MAKE) CC=cc LDFLAGS= \ +- OBJECTS='decode_i386.o dct64_i386.o getbits_.o audio_sun.o' \ ++ OBJECTS='decode_i386.o dct64_i386.o getbits_.o decode_i586_.o \ ++ audio_sun.o' \ CFLAGS='-Wall -ansi -pedantic -O4 -m486 -fomit-frame-pointer \ -funroll-all-loops -ffast-math -DROT_I386 \ - -DI386_ASSEM -DREAL_IS_FLOAT -DUSE_MMAP -DNETBSD' \ -+ -DI386_ASSEM -DREAL_IS_FLOAT -DUSE_MMAP -DREAD_MMAP \ -+ -DNETBSD' \ ++ -DI386_ASSEM -DPENTIUM_OPT -DREAL_IS_FLOAT -DUSE_MMAP \ ++ -DREAD_MMAP -DNETBSD' \ mpg123-make bsdos: |