summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormycroft <mycroft@pkgsrc.org>1998-10-30 03:11:51 +0000
committermycroft <mycroft@pkgsrc.org>1998-10-30 03:11:51 +0000
commitbd2f0f1de82c2d7f2b52b174766c925147e3ab4e (patch)
tree97d7e3476978381f7942902e8005e97635d6b5c8
parent8b66b876e1405d8f2debca3c5cd4d34654fb62e6 (diff)
downloadpkgsrc-bd2f0f1de82c2d7f2b52b174766c925147e3ab4e.tar.gz
Use the Pentium-optimized decoding routines.
-rw-r--r--audio/mpg123/Makefile5
-rw-r--r--audio/mpg123/files/decode_i586_.s321
-rw-r--r--audio/mpg123/patches/patch-aa19
3 files changed, 337 insertions, 8 deletions
diff --git a/audio/mpg123/Makefile b/audio/mpg123/Makefile
index 80967e80d20..2d92fac396a 100644
--- a/audio/mpg123/Makefile
+++ b/audio/mpg123/Makefile
@@ -1,4 +1,4 @@
-# $NetBSD: Makefile,v 1.8 1998/08/20 15:16:39 tsarna Exp $
+# $NetBSD: Makefile,v 1.9 1998/10/30 03:11:51 mycroft Exp $
#
DISTNAME= mpg123-0.59o
@@ -16,4 +16,7 @@ ALL_TARGET= netbsd
MAKE_FLAGS+= PREFIX="${PREFIX}"
+post-patch:
+ cp files/decode_i586_.s ${WRKSRC}/
+
.include "../../mk/bsd.pkg.mk"
diff --git a/audio/mpg123/files/decode_i586_.s b/audio/mpg123/files/decode_i586_.s
new file mode 100644
index 00000000000..d89b094a95f
--- /dev/null
+++ b/audio/mpg123/files/decode_i586_.s
@@ -0,0 +1,321 @@
+/
+/ synth_1to1 works the same way as the c version of this
+/ file. only two types of changes have been made:
+/ - reordered floating point instructions to
+/ prevent pipline stalls
+/ - made WRITE_SAMPLE use integer instead of
+/ (slower) floating point
+/ all kinds of x86 processors should benefit from these
+/ modifications.
+/
+/ useful sources of information on optimizing x86 code include:
+/
+/ Intel Architecture Optimization Manual
+/ http://www.intel.com/design/pentium/manuals/242816.htm
+/
+/ Cyrix 6x86 Instruction Set Summary
+/ ftp://ftp.cyrix.com/6x86/6x-dbch6.pdf
+/
+/ AMD-K5 Processor Software Development
+/ http://www.amd.com/products/cpg/techdocs/appnotes/20007e.pdf
+/
+/ Stefan Bieschewski <stb@acm.org>
+/
+/ $Id: decode_i586_.s,v 1.1 1998/10/30 03:11:52 mycroft Exp $
+/
+.bss
+ .comm buffs,4352
+.data
+ .align 2
+bo:
+ .long 1
+.text
+ .align 3
+.LC0:
+ .long 0x0,0x40dfffc0
+ .align 3
+.LC1:
+ .long 0x0,0xc0e00000
+.text
+ .align 3
+.globl _synth_1to1_pent
+_synth_1to1_pent:
+ subl $12,%esp
+ pushl %ebp
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ movl 32(%esp),%eax
+ movl 40(%esp),%esi
+ xorl %edi,%edi
+ movl bo,%ebp
+ cmpl %edi,36(%esp)
+ jne .L48
+ decl %ebp
+ andl $15,%ebp
+ movl %ebp,bo
+ movl $buffs,%ecx
+ jmp .L49
+.L48:
+ addl $2,%esi
+ movl $buffs+2176,%ecx
+.L49:
+ testl $1,%ebp
+ je .L50
+ movl %ecx,%ebx
+ movl %ebp,16(%esp)
+ pushl %eax
+ movl 20(%esp),%edx
+ leal (%ebx,%edx,4),%eax
+ pushl %eax
+ movl 24(%esp),%eax
+ incl %eax
+ andl $15,%eax
+ leal 1088(,%eax,4),%eax
+ addl %ebx,%eax
+ jmp .L74
+.L50:
+ leal 1088(%ecx),%ebx
+ leal 1(%ebp),%edx
+ movl %edx,16(%esp)
+ pushl %eax
+ leal 1092(%ecx,%ebp,4),%eax
+ pushl %eax
+ leal (%ecx,%ebp,4),%eax
+.L74:
+ pushl %eax
+ call _dct64
+ addl $12,%esp
+ movl 16(%esp),%edx
+ leal 0(,%edx,4),%edx
+ movl $_decwin+64,%eax
+ movl %eax,%ecx
+ subl %edx,%ecx
+ movl $16,%ebp
+.L55:
+ flds (%ecx)
+ fmuls (%ebx)
+ flds 4(%ecx)
+ fmuls 4(%ebx)
+ fxch %st(1)
+ flds 8(%ecx)
+ fmuls 8(%ebx)
+ fxch %st(2)
+ fsubrp %st,%st(1)
+ flds 12(%ecx)
+ fmuls 12(%ebx)
+ fxch %st(2)
+ faddp %st,%st(1)
+ flds 16(%ecx)
+ fmuls 16(%ebx)
+ fxch %st(2)
+ fsubrp %st,%st(1)
+ flds 20(%ecx)
+ fmuls 20(%ebx)
+ fxch %st(2)
+ faddp %st,%st(1)
+ flds 24(%ecx)
+ fmuls 24(%ebx)
+ fxch %st(2)
+ fsubrp %st,%st(1)
+ flds 28(%ecx)
+ fmuls 28(%ebx)
+ fxch %st(2)
+ faddp %st,%st(1)
+ flds 32(%ecx)
+ fmuls 32(%ebx)
+ fxch %st(2)
+ fsubrp %st,%st(1)
+ flds 36(%ecx)
+ fmuls 36(%ebx)
+ fxch %st(2)
+ faddp %st,%st(1)
+ flds 40(%ecx)
+ fmuls 40(%ebx)
+ fxch %st(2)
+ fsubrp %st,%st(1)
+ flds 44(%ecx)
+ fmuls 44(%ebx)
+ fxch %st(2)
+ faddp %st,%st(1)
+ flds 48(%ecx)
+ fmuls 48(%ebx)
+ fxch %st(2)
+ fsubrp %st,%st(1)
+ flds 52(%ecx)
+ fmuls 52(%ebx)
+ fxch %st(2)
+ faddp %st,%st(1)
+ flds 56(%ecx)
+ fmuls 56(%ebx)
+ fxch %st(2)
+ fsubrp %st,%st(1)
+ flds 60(%ecx)
+ fmuls 60(%ebx)
+ fxch %st(2)
+ subl $4,%esp
+ faddp %st,%st(1)
+ fxch %st(1)
+ fsubrp %st,%st(1)
+ fistpl (%esp)
+ popl %eax
+ cmpl $32767,%eax
+ jg 1f
+ cmpl $-32768,%eax
+ jl 2f
+ movw %ax,(%esi)
+ jmp 4f
+1: movw $32767,(%esi)
+ jmp 3f
+2: movw $-32768,(%esi)
+3: incl %edi
+4:
+.L54:
+ addl $64,%ebx
+ subl $-128,%ecx
+ addl $4,%esi
+ decl %ebp
+ jnz .L55
+ flds (%ecx)
+ fmuls (%ebx)
+ flds 8(%ecx)
+ fmuls 8(%ebx)
+ flds 16(%ecx)
+ fmuls 16(%ebx)
+ fxch %st(2)
+ faddp %st,%st(1)
+ flds 24(%ecx)
+ fmuls 24(%ebx)
+ fxch %st(2)
+ faddp %st,%st(1)
+ flds 32(%ecx)
+ fmuls 32(%ebx)
+ fxch %st(2)
+ faddp %st,%st(1)
+ flds 40(%ecx)
+ fmuls 40(%ebx)
+ fxch %st(2)
+ faddp %st,%st(1)
+ flds 48(%ecx)
+ fmuls 48(%ebx)
+ fxch %st(2)
+ faddp %st,%st(1)
+ flds 56(%ecx)
+ fmuls 56(%ebx)
+ fxch %st(2)
+ subl $4,%esp
+ faddp %st,%st(1)
+ fxch %st(1)
+ faddp %st,%st(1)
+ fistpl (%esp)
+ popl %eax
+ cmpl $32767,%eax
+ jg 1f
+ cmpl $-32768,%eax
+ jl 2f
+ movw %ax,(%esi)
+ jmp 4f
+1: movw $32767,(%esi)
+ jmp 3f
+2: movw $-32768,(%esi)
+3: incl %edi
+4:
+.L62:
+ addl $-64,%ebx
+ addl $4,%esi
+ movl 16(%esp),%edx
+ leal -128(%ecx,%edx,8),%ecx
+ movl $15,%ebp
+.L68:
+ flds -4(%ecx)
+ fchs
+ fmuls (%ebx)
+ flds -8(%ecx)
+ fmuls 4(%ebx)
+ fxch %st(1)
+ flds -12(%ecx)
+ fmuls 8(%ebx)
+ fxch %st(2)
+ fsubrp %st,%st(1)
+ flds -16(%ecx)
+ fmuls 12(%ebx)
+ fxch %st(2)
+ fsubrp %st,%st(1)
+ flds -20(%ecx)
+ fmuls 16(%ebx)
+ fxch %st(2)
+ fsubrp %st,%st(1)
+ flds -24(%ecx)
+ fmuls 20(%ebx)
+ fxch %st(2)
+ fsubrp %st,%st(1)
+ flds -28(%ecx)
+ fmuls 24(%ebx)
+ fxch %st(2)
+ fsubrp %st,%st(1)
+ flds -32(%ecx)
+ fmuls 28(%ebx)
+ fxch %st(2)
+ fsubrp %st,%st(1)
+ flds -36(%ecx)
+ fmuls 32(%ebx)
+ fxch %st(2)
+ fsubrp %st,%st(1)
+ flds -40(%ecx)
+ fmuls 36(%ebx)
+ fxch %st(2)
+ fsubrp %st,%st(1)
+ flds -44(%ecx)
+ fmuls 40(%ebx)
+ fxch %st(2)
+ fsubrp %st,%st(1)
+ flds -48(%ecx)
+ fmuls 44(%ebx)
+ fxch %st(2)
+ fsubrp %st,%st(1)
+ flds -52(%ecx)
+ fmuls 48(%ebx)
+ fxch %st(2)
+ fsubrp %st,%st(1)
+ flds -56(%ecx)
+ fmuls 52(%ebx)
+ fxch %st(2)
+ fsubrp %st,%st(1)
+ flds -60(%ecx)
+ fmuls 56(%ebx)
+ fxch %st(2)
+ fsubrp %st,%st(1)
+ flds (%ecx)
+ fmuls 60(%ebx)
+ fxch %st(2)
+ subl $4,%esp
+ fsubrp %st,%st(1)
+ fxch %st(1)
+ fsubrp %st,%st(1)
+ fistpl (%esp)
+ popl %eax
+ cmpl $32767,%eax
+ jg 1f
+ cmpl $-32768,%eax
+ jl 2f
+ movw %ax,(%esi)
+ jmp 4f
+1: movw $32767,(%esi)
+ jmp 3f
+2: movw $-32768,(%esi)
+3: incl %edi
+4:
+.L67:
+ addl $-64,%ebx
+ addl $-128,%ecx
+ addl $4,%esi
+ decl %ebp
+ jnz .L68
+ movl %edi,%eax
+ popl %ebx
+ popl %esi
+ popl %edi
+ popl %ebp
+ addl $12,%esp
+ ret
+
diff --git a/audio/mpg123/patches/patch-aa b/audio/mpg123/patches/patch-aa
index 1c0a7042b17..28fe8a37ac2 100644
--- a/audio/mpg123/patches/patch-aa
+++ b/audio/mpg123/patches/patch-aa
@@ -1,7 +1,7 @@
-$NetBSD: patch-aa,v 1.7 1998/08/15 10:58:41 frueauf Exp $
+$NetBSD: patch-aa,v 1.8 1998/10/30 03:11:52 mycroft Exp $
---- Makefile.orig Sun Feb 8 19:23:04 1998
-+++ Makefile Sat Aug 15 12:18:14 1998
+--- Makefile.orig Sun Feb 8 13:23:04 1998
++++ Makefile Thu Oct 29 22:01:30 1998
@@ -32,6 +32,7 @@
@echo "make aix IBM AIX (tested: 4.2)"
@echo "make os2 IBM OS/2"
@@ -10,13 +10,18 @@ $NetBSD: patch-aa,v 1.7 1998/08/15 10:58:41 frueauf Exp $
@echo "make bsdos BSDI BSD/OS"
@echo "make generic try this one if your system isn't listed above"
@echo ""
-@@ -221,7 +222,8 @@
- OBJECTS='decode_i386.o dct64_i386.o getbits_.o audio_sun.o' \
+@@ -218,10 +219,12 @@
+
+ netbsd-i386:
+ $(MAKE) CC=cc LDFLAGS= \
+- OBJECTS='decode_i386.o dct64_i386.o getbits_.o audio_sun.o' \
++ OBJECTS='decode_i386.o dct64_i386.o getbits_.o decode_i586_.o \
++ audio_sun.o' \
CFLAGS='-Wall -ansi -pedantic -O4 -m486 -fomit-frame-pointer \
-funroll-all-loops -ffast-math -DROT_I386 \
- -DI386_ASSEM -DREAL_IS_FLOAT -DUSE_MMAP -DNETBSD' \
-+ -DI386_ASSEM -DREAL_IS_FLOAT -DUSE_MMAP -DREAD_MMAP \
-+ -DNETBSD' \
++ -DI386_ASSEM -DPENTIUM_OPT -DREAL_IS_FLOAT -DUSE_MMAP \
++ -DREAD_MMAP -DNETBSD' \
mpg123-make
bsdos: