3 files changed, 337 insertions, 8 deletions
diff --git a/audio/mpg123/Makefile b/audio/mpg123/Makefile
index 80967e80d20..2d92fac396a 100644
--- a/audio/mpg123/Makefile
+++ b/audio/mpg123/Makefile
@@ -1,4 +1,4 @@
-# $NetBSD: Makefile,v 1.8 1998/08/20 15:16:39 tsarna Exp $
+# $NetBSD: Makefile,v 1.9 1998/10/30 03:11:51 mycroft Exp $
 #
 
 DISTNAME=       mpg123-0.59o
@@ -16,4 +16,7 @@ ALL_TARGET=	netbsd
 
 MAKE_FLAGS+=	PREFIX="${PREFIX}"
 
+post-patch:
+	cp files/decode_i586_.s ${WRKSRC}/
+
 .include "../../mk/bsd.pkg.mk"
diff --git a/audio/mpg123/files/decode_i586_.s b/audio/mpg123/files/decode_i586_.s
new file mode 100644
index 00000000000..d89b094a95f
--- /dev/null
+++ b/audio/mpg123/files/decode_i586_.s
@@ -0,0 +1,321 @@
+/ 
+/ synth_1to1 works the same way as the c version of this
+/ file.  only two types of changes have been made:
+/ - reordered floating point instructions to
+/   prevent pipline stalls
+/ - made WRITE_SAMPLE use integer instead of
+/   (slower) floating point
+/ all kinds of x86 processors should benefit from these
+/ modifications.
+/
+/ useful sources of information on optimizing x86 code include:
+/
+/     Intel Architecture Optimization Manual
+/     http://www.intel.com/design/pentium/manuals/242816.htm
+/
+/     Cyrix 6x86 Instruction Set Summary
+/     ftp://ftp.cyrix.com/6x86/6x-dbch6.pdf
+/
+/     AMD-K5 Processor Software Development
+/     http://www.amd.com/products/cpg/techdocs/appnotes/20007e.pdf
+/
+/ Stefan Bieschewski <stb@acm.org>
+/
+/ $Id: decode_i586_.s,v 1.1 1998/10/30 03:11:52 mycroft Exp $
+/
+.bss
+	.comm	buffs,4352
+.data
+	.align 2
+bo:
+	.long 1
+.text
+	.align 3
+.LC0:
+	.long 0x0,0x40dfffc0
+	.align 3
+.LC1:
+	.long 0x0,0xc0e00000
+.text
+	.align 3
+.globl _synth_1to1_pent
+_synth_1to1_pent:
+	subl $12,%esp
+	pushl %ebp
+	pushl %edi
+	pushl %esi
+	pushl %ebx
+	movl 32(%esp),%eax
+	movl 40(%esp),%esi
+	xorl %edi,%edi
+	movl bo,%ebp
+	cmpl %edi,36(%esp)
+	jne .L48
+	decl %ebp
+	andl $15,%ebp
+	movl %ebp,bo
+	movl $buffs,%ecx
+	jmp .L49
+.L48:
+	addl $2,%esi
+	movl $buffs+2176,%ecx
+.L49:
+	testl $1,%ebp
+	je .L50
+	movl %ecx,%ebx
+	movl %ebp,16(%esp)
+	pushl %eax
+	movl 20(%esp),%edx
+	leal (%ebx,%edx,4),%eax
+	pushl %eax
+	movl 24(%esp),%eax
+	incl %eax
+	andl $15,%eax
+	leal 1088(,%eax,4),%eax
+	addl %ebx,%eax
+	jmp .L74
+.L50:
+	leal 1088(%ecx),%ebx
+	leal 1(%ebp),%edx
+	movl %edx,16(%esp)
+	pushl %eax
+	leal 1092(%ecx,%ebp,4),%eax
+	pushl %eax
+	leal (%ecx,%ebp,4),%eax
+.L74:
+	pushl %eax
+	call _dct64
+	addl $12,%esp
+	movl 16(%esp),%edx
+	leal 0(,%edx,4),%edx
+	movl $_decwin+64,%eax
+	movl %eax,%ecx
+	subl %edx,%ecx
+	movl $16,%ebp
+.L55:
+	flds (%ecx)
+	fmuls (%ebx)
+	flds 4(%ecx)
+	fmuls 4(%ebx)
+	fxch %st(1)
+	flds 8(%ecx)
+	fmuls 8(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds 12(%ecx)
+	fmuls 12(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 16(%ecx)
+	fmuls 16(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds 20(%ecx)
+	fmuls 20(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 24(%ecx)
+	fmuls 24(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds 28(%ecx)
+	fmuls 28(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 32(%ecx)
+	fmuls 32(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds 36(%ecx)
+	fmuls 36(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 40(%ecx)
+	fmuls 40(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds 44(%ecx)
+	fmuls 44(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 48(%ecx)
+	fmuls 48(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds 52(%ecx)
+	fmuls 52(%ebx)
+	fxch %st(2)         
+	faddp %st,%st(1)
+	flds 56(%ecx)
+	fmuls 56(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds 60(%ecx)
+	fmuls 60(%ebx)
+	fxch %st(2)
+	subl $4,%esp
+	faddp %st,%st(1)
+	fxch %st(1)
+	fsubrp %st,%st(1)
+	fistpl (%esp)
+	popl %eax
+	cmpl $32767,%eax
+	jg 1f
+	cmpl $-32768,%eax
+	jl 2f
+	movw %ax,(%esi)
+	jmp 4f
+1:	movw $32767,(%esi)
+	jmp 3f
+2:	movw $-32768,(%esi)
+3:	incl %edi
+4:
+.L54:
+	addl $64,%ebx
+	subl $-128,%ecx
+	addl $4,%esi
+	decl %ebp
+	jnz .L55
+	flds (%ecx)
+	fmuls (%ebx)
+	flds 8(%ecx)
+	fmuls 8(%ebx)
+	flds 16(%ecx)
+	fmuls 16(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 24(%ecx)
+	fmuls 24(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 32(%ecx)
+	fmuls 32(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 40(%ecx)
+	fmuls 40(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 48(%ecx)
+	fmuls 48(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 56(%ecx)
+	fmuls 56(%ebx)
+	fxch %st(2)
+	subl $4,%esp
+	faddp %st,%st(1)
+	fxch %st(1)
+	faddp %st,%st(1)
+	fistpl (%esp)
+	popl %eax
+	cmpl $32767,%eax
+	jg 1f
+	cmpl $-32768,%eax
+	jl 2f
+	movw %ax,(%esi)
+	jmp 4f
+1:	movw $32767,(%esi)
+	jmp 3f
+2:	movw $-32768,(%esi)
+3:	incl %edi
+4:
+.L62:
+	addl $-64,%ebx
+	addl $4,%esi
+	movl 16(%esp),%edx
+	leal -128(%ecx,%edx,8),%ecx
+	movl $15,%ebp
+.L68:
+	flds -4(%ecx)
+	fchs
+	fmuls (%ebx)
+	flds -8(%ecx)
+	fmuls 4(%ebx)
+	fxch %st(1)
+	flds -12(%ecx)
+	fmuls 8(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -16(%ecx)
+	fmuls 12(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -20(%ecx)
+	fmuls 16(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -24(%ecx)
+	fmuls 20(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -28(%ecx)
+	fmuls 24(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -32(%ecx)
+	fmuls 28(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -36(%ecx)
+	fmuls 32(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -40(%ecx)
+	fmuls 36(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -44(%ecx)
+	fmuls 40(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -48(%ecx)
+	fmuls 44(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -52(%ecx)
+	fmuls 48(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -56(%ecx)
+	fmuls 52(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -60(%ecx)
+	fmuls 56(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds (%ecx)
+	fmuls 60(%ebx)
+	fxch %st(2)
+	subl $4,%esp
+	fsubrp %st,%st(1)
+	fxch %st(1)
+	fsubrp %st,%st(1)
+	fistpl (%esp)
+	popl %eax
+	cmpl $32767,%eax
+	jg 1f
+	cmpl $-32768,%eax
+	jl 2f
+	movw %ax,(%esi)
+	jmp 4f
+1:	movw $32767,(%esi)
+	jmp 3f
+2:	movw $-32768,(%esi)
+3:	incl %edi
+4:
+.L67:
+	addl $-64,%ebx
+	addl $-128,%ecx
+	addl $4,%esi
+	decl %ebp
+	jnz .L68
+	movl %edi,%eax
+	popl %ebx
+	popl %esi
+	popl %edi
+	popl %ebp
+	addl $12,%esp
+	ret
+
diff --git a/audio/mpg123/patches/patch-aa b/audio/mpg123/patches/patch-aa
index 1c0a7042b17..28fe8a37ac2 100644
--- a/audio/mpg123/patches/patch-aa
+++ b/audio/mpg123/patches/patch-aa
@@ -1,7 +1,7 @@
-$NetBSD: patch-aa,v 1.7 1998/08/15 10:58:41 frueauf Exp $
+$NetBSD: patch-aa,v 1.8 1998/10/30 03:11:52 mycroft Exp $
 
---- Makefile.orig	Sun Feb  8 19:23:04 1998
-+++ Makefile	Sat Aug 15 12:18:14 1998
+--- Makefile.orig	Sun Feb  8 13:23:04 1998
++++ Makefile	Thu Oct 29 22:01:30 1998
 @@ -32,6 +32,7 @@
  	@echo "make aix            IBM AIX (tested: 4.2)"
  	@echo "make os2            IBM OS/2"
@@ -10,13 +10,18 @@ $NetBSD: patch-aa,v 1.7 1998/08/15 10:58:41 frueauf Exp $
  	@echo "make bsdos          BSDI BSD/OS"
  	@echo "make generic        try this one if your system isn't listed above"
  	@echo ""
-@@ -221,7 +222,8 @@
- 		OBJECTS='decode_i386.o dct64_i386.o getbits_.o audio_sun.o' \
+@@ -218,10 +219,12 @@
+ 
+ netbsd-i386:
+ 	$(MAKE) CC=cc LDFLAGS= \
+-		OBJECTS='decode_i386.o dct64_i386.o getbits_.o audio_sun.o' \
++		OBJECTS='decode_i386.o dct64_i386.o getbits_.o decode_i586_.o \
++			audio_sun.o' \
  		CFLAGS='-Wall -ansi -pedantic -O4 -m486 -fomit-frame-pointer \
  			-funroll-all-loops -ffast-math -DROT_I386 \
 -			-DI386_ASSEM -DREAL_IS_FLOAT -DUSE_MMAP -DNETBSD' \
-+			-DI386_ASSEM -DREAL_IS_FLOAT -DUSE_MMAP -DREAD_MMAP \
-+			-DNETBSD' \
++			-DI386_ASSEM -DPENTIUM_OPT -DREAL_IS_FLOAT -DUSE_MMAP \
++			-DREAD_MMAP -DNETBSD' \
  		mpg123-make
  
  bsdos: