diff options
author | tron <tron@pkgsrc.org> | 2000-03-27 22:39:28 +0000 |
---|---|---|
committer | tron <tron@pkgsrc.org> | 2000-03-27 22:39:28 +0000 |
commit | 659f0a8c0a8e6348fdceb185b221ca0c0c216efe (patch) | |
tree | 8f411080e8735f479c3871a30734ccd1b4a0f45b | |
parent | bc20f6bc0e596bff5f02ba66ed20100ae7fccfe7 (diff) | |
download | pkgsrc-659f0a8c0a8e6348fdceb185b221ca0c0c216efe.tar.gz |
Add "a.out" version of "i586" decoder taken from "mpg123" package.
-rw-r--r-- | audio/xmms/Makefile | 7 | ||||
-rw-r--r-- | audio/xmms/files/decode_i586_.s | 321 |
2 files changed, 327 insertions, 1 deletions
diff --git a/audio/xmms/Makefile b/audio/xmms/Makefile index ce8b14642da..363b7e8a4eb 100644 --- a/audio/xmms/Makefile +++ b/audio/xmms/Makefile @@ -1,4 +1,4 @@ -# $NetBSD: Makefile,v 1.6 2000/03/12 22:31:54 tron Exp $ +# $NetBSD: Makefile,v 1.7 2000/03/27 22:39:28 tron Exp $ # FreeBSD: ports/audio/xmms/Makefile,v 1.9 2000/02/11 01:46:12 cpiazza Exp DISTNAME= xmms-1.0.1 @@ -32,10 +32,15 @@ CONFIGURE_ENV+= CPPFLAGS="${CPPFLAGS}" \ .if (${OPSYS} == NetBSD) CPPFLAGS+= -I${WRKDIR} +.if (${MACHINE_ARCH} == i386) post-patch: +.if (${OBJECT_FMT} == a.out) + ${CP} ${FILESDIR}/decode_i586_.s ${WRKSRC}/Input/mpg123/decode_i586.s +.endif cd ${WRKSRC}/Input/mpg123 && \ ${MV} decode_i586.s decode_i586.s.orig && \ ${SED} -e 's@^/@#/@g' <decode_i586.s.orig >decode_i586.s +.endif pre-configure: ${MKDIR} ${WRKDIR}/sys diff --git a/audio/xmms/files/decode_i586_.s b/audio/xmms/files/decode_i586_.s new file mode 100644 index 00000000000..40e6901be76 --- /dev/null +++ b/audio/xmms/files/decode_i586_.s @@ -0,0 +1,321 @@ +/ +/ synth_1to1 works the same way as the c version of this +/ file. only two types of changes have been made: +/ - reordered floating point instructions to +/ prevent pipline stalls +/ - made WRITE_SAMPLE use integer instead of +/ (slower) floating point +/ all kinds of x86 processors should benefit from these +/ modifications. +/ +/ useful sources of information on optimizing x86 code include: +/ +/ Intel Architecture Optimization Manual +/ http://www.intel.com/design/pentium/manuals/242816.htm +/ +/ Cyrix 6x86 Instruction Set Summary +/ ftp://ftp.cyrix.com/6x86/6x-dbch6.pdf +/ +/ AMD-K5 Processor Software Development +/ http://www.amd.com/products/cpg/techdocs/appnotes/20007e.pdf +/ +/ Stefan Bieschewski <stb@acm.org> +/ +/ $Id: decode_i586_.s,v 1.1 2000/03/27 22:39:29 tron Exp $ +/ +.bss + .comm buffs,4352 +.data + .align 2 +bo: + .long 1 +.text + .align 3 +.LC0: + .long 0x0,0x40dfffc0 + .align 3 +.LC1: + .long 0x0,0xc0e00000 +.text + .align 3 +.globl _mpg123_synth_1to1_pent +_mpg123_synth_1to1_pent: + subl $12,%esp + pushl %ebp + pushl %edi + pushl %esi + pushl %ebx + movl 32(%esp),%eax + movl 40(%esp),%esi + xorl %edi,%edi + movl bo,%ebp + cmpl %edi,36(%esp) + jne .L48 + decl %ebp + andl $15,%ebp + movl %ebp,bo + movl $buffs,%ecx + jmp .L49 +.L48: + addl $2,%esi + movl $buffs+2176,%ecx +.L49: + testl $1,%ebp + je .L50 + movl %ecx,%ebx + movl %ebp,16(%esp) + pushl %eax + movl 20(%esp),%edx + leal (%ebx,%edx,4),%eax + pushl %eax + movl 24(%esp),%eax + incl %eax + andl $15,%eax + leal 1088(,%eax,4),%eax + addl %ebx,%eax + jmp .L74 +.L50: + leal 1088(%ecx),%ebx + leal 1(%ebp),%edx + movl %edx,16(%esp) + pushl %eax + leal 1092(%ecx,%ebp,4),%eax + pushl %eax + leal (%ecx,%ebp,4),%eax +.L74: + pushl %eax + call _mpg123_dct64 + addl $12,%esp + movl 16(%esp),%edx + leal 0(,%edx,4),%edx + movl $_mpg123_decwin+64,%eax + movl %eax,%ecx + subl %edx,%ecx + movl $16,%ebp +.L55: + flds (%ecx) + fmuls (%ebx) + flds 4(%ecx) + fmuls 4(%ebx) + fxch %st(1) + flds 8(%ecx) + fmuls 8(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds 12(%ecx) + fmuls 12(%ebx) + fxch %st(2) + faddp %st,%st(1) + flds 16(%ecx) + fmuls 16(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds 20(%ecx) + fmuls 20(%ebx) + fxch %st(2) + faddp %st,%st(1) + flds 24(%ecx) + fmuls 24(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds 28(%ecx) + fmuls 28(%ebx) + fxch %st(2) + faddp %st,%st(1) + flds 32(%ecx) + fmuls 32(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds 36(%ecx) + fmuls 36(%ebx) + fxch %st(2) + faddp %st,%st(1) + flds 40(%ecx) + fmuls 40(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds 44(%ecx) + fmuls 44(%ebx) + fxch %st(2) + faddp %st,%st(1) + flds 48(%ecx) + fmuls 48(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds 52(%ecx) + fmuls 52(%ebx) + fxch %st(2) + faddp %st,%st(1) + flds 56(%ecx) + fmuls 56(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds 60(%ecx) + fmuls 60(%ebx) + fxch %st(2) + subl $4,%esp + faddp %st,%st(1) + fxch %st(1) + fsubrp %st,%st(1) + fistpl (%esp) + popl %eax + cmpl $32767,%eax + jg 1f + cmpl $-32768,%eax + jl 2f + movw %ax,(%esi) + jmp 4f +1: movw $32767,(%esi) + jmp 3f +2: movw $-32768,(%esi) +3: incl %edi +4: +.L54: + addl $64,%ebx + subl $-128,%ecx + addl $4,%esi + decl %ebp + jnz .L55 + flds (%ecx) + fmuls (%ebx) + flds 8(%ecx) + fmuls 8(%ebx) + flds 16(%ecx) + fmuls 16(%ebx) + fxch %st(2) + faddp %st,%st(1) + flds 24(%ecx) + fmuls 24(%ebx) + fxch %st(2) + faddp %st,%st(1) + flds 32(%ecx) + fmuls 32(%ebx) + fxch %st(2) + faddp %st,%st(1) + flds 40(%ecx) + fmuls 40(%ebx) + fxch %st(2) + faddp %st,%st(1) + flds 48(%ecx) + fmuls 48(%ebx) + fxch %st(2) + faddp %st,%st(1) + flds 56(%ecx) + fmuls 56(%ebx) + fxch %st(2) + subl $4,%esp + faddp %st,%st(1) + fxch %st(1) + faddp %st,%st(1) + fistpl (%esp) + popl %eax + cmpl $32767,%eax + jg 1f + cmpl $-32768,%eax + jl 2f + movw %ax,(%esi) + jmp 4f +1: movw $32767,(%esi) + jmp 3f +2: movw $-32768,(%esi) +3: incl %edi +4: +.L62: + addl $-64,%ebx + addl $4,%esi + movl 16(%esp),%edx + leal -128(%ecx,%edx,8),%ecx + movl $15,%ebp +.L68: + flds -4(%ecx) + fchs + fmuls (%ebx) + flds -8(%ecx) + fmuls 4(%ebx) + fxch %st(1) + flds -12(%ecx) + fmuls 8(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds -16(%ecx) + fmuls 12(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds -20(%ecx) + fmuls 16(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds -24(%ecx) + fmuls 20(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds -28(%ecx) + fmuls 24(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds -32(%ecx) + fmuls 28(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds -36(%ecx) + fmuls 32(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds -40(%ecx) + fmuls 36(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds -44(%ecx) + fmuls 40(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds -48(%ecx) + fmuls 44(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds -52(%ecx) + fmuls 48(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds -56(%ecx) + fmuls 52(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds -60(%ecx) + fmuls 56(%ebx) + fxch %st(2) + fsubrp %st,%st(1) + flds (%ecx) + fmuls 60(%ebx) + fxch %st(2) + subl $4,%esp + fsubrp %st,%st(1) + fxch %st(1) + fsubrp %st,%st(1) + fistpl (%esp) + popl %eax + cmpl $32767,%eax + jg 1f + cmpl $-32768,%eax + jl 2f + movw %ax,(%esi) + jmp 4f +1: movw $32767,(%esi) + jmp 3f +2: movw $-32768,(%esi) +3: incl %edi +4: +.L67: + addl $-64,%ebx + addl $-128,%ecx + addl $4,%esi + decl %ebp + jnz .L68 + movl %edi,%eax + popl %ebx + popl %esi + popl %edi + popl %ebp + addl $12,%esp + ret + |