diff options
Diffstat (limited to 'multimedia/libmpeg3/patches/patch-ah')
-rw-r--r-- | multimedia/libmpeg3/patches/patch-ah | 310 |
1 files changed, 310 insertions, 0 deletions
diff --git a/multimedia/libmpeg3/patches/patch-ah b/multimedia/libmpeg3/patches/patch-ah new file mode 100644 index 00000000000..12035bb0083 --- /dev/null +++ b/multimedia/libmpeg3/patches/patch-ah @@ -0,0 +1,310 @@ +$NetBSD: patch-ah,v 1.1 2004/06/26 18:00:21 grant Exp $ + +--- video/output.c.orig 2001-05-20 13:05:26.000000000 +1000 ++++ video/output.c 2003-07-29 00:53:45.000000000 +1000 +@@ -207,50 +207,50 @@ + ); + } + +-static unsigned long long mpeg3_MMX_U_80 = 0x0000008000800000; +-static unsigned long long mpeg3_MMX_V_80 = 0x0000000000800080; +-static long long mpeg3_MMX_U_COEF = 0x00000058ffd30000; +-static long long mpeg3_MMX_V_COEF = 0x00000000ffea006f; +-static long long mpeg3_MMX_601_Y_COEF = 0x0000004800480048; +-static long long mpeg3_MMX_601_Y_DIFF = 0x0000000000000010; ++static unsigned long long mpeg3_MMX_U_80 = 0x0000008000800000LL; ++static unsigned long long mpeg3_MMX_V_80 = 0x0000000000800080LL; ++static long long mpeg3_MMX_U_COEF = 0x00000058ffd30000LL; ++static long long mpeg3_MMX_V_COEF = 0x00000000ffea006fLL; ++static long long mpeg3_MMX_601_Y_COEF = 0x0000004800480048LL; ++static long long mpeg3_MMX_601_Y_DIFF = 0x0000000000000010LL; + + inline void mpeg3_bgra32_mmx(unsigned long y, + unsigned long u, + unsigned long v, + unsigned long *output) + { +-asm(" +-/* Output will be 0x00rrggbb with the 00 trailing so this can also be used */ +-/* for bgr24. */ +- movd (%0), %%mm0; /* Load y 0x00000000000000yy */ +- movd (%1), %%mm1; /* Load u 0x00000000000000cr */ +- movq %%mm0, %%mm3; /* Copy y to temp */ +- psllq $16, %%mm1; /* Shift u 0x0000000000cr0000 */ +- movd (%2), %%mm2; /* Load v 0x00000000000000cb */ +- psllq $16, %%mm3; /* Shift y */ +- movq %%mm1, %%mm4; /* Copy u to temp */ +- por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */ +- psllq $16, %%mm4; /* Shift u */ +- movq %%mm2, %%mm5; /* Copy v to temp */ +- psllq $16, %%mm3; /* Shift y */ +- por %%mm4, %%mm1; /* Overlay new u byte 0x000000cr00cr0000 */ +- psllq $16, %%mm5; /* Shift v */ +- por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */ +- por %%mm5, %%mm2; /* Overlay new v byte 0x0000000000cb00cb */ +- +-/* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */ +- psubw mpeg3_MMX_U_80, %%mm1; /* Subtract 128 from u 0x000000uu00uu0000 */ +- pmullw mpeg3_MMX_U_COEF, %%mm1; /* Multiply u coeffs 0x0000uuuuuuuu0000 */ +- psllw $6, %%mm0; /* Shift y coeffs 0x0000yyy0yyy0yyy0 */ +- psubw mpeg3_MMX_V_80, %%mm2; /* Subtract 128 from v 0x0000000000cb00cb */ +- pmullw mpeg3_MMX_V_COEF, %%mm2; /* Multiply v coeffs 0x0000crcrcrcrcrcr */ +- +-/* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */ +- paddsw %%mm1, %%mm0; /* Add u to result */ +- paddsw %%mm2, %%mm0; /* Add v to result 0x0000rrrrggggbbbb */ +- psraw $6, %%mm0; /* Demote precision */ +- packuswb %%mm0, %%mm0; /* Pack into ARGB 0x0000000000rrggbb */ +- movd %%mm0, (%3); /* Store output */ ++asm("\n\ ++/* Output will be 0x00rrggbb with the 00 trailing so this can also be used */\n\ ++/* for bgr24. */\n\ ++ movd (%0), %%mm0; /* Load y 0x00000000000000yy */\n\ ++ movd (%1), %%mm1; /* Load u 0x00000000000000cr */\n\ ++ movq %%mm0, %%mm3; /* Copy y to temp */\n\ ++ psllq $16, %%mm1; /* Shift u 0x0000000000cr0000 */\n\ ++ movd (%2), %%mm2; /* Load v 0x00000000000000cb */\n\ ++ psllq $16, %%mm3; /* Shift y */\n\ ++ movq %%mm1, %%mm4; /* Copy u to temp */\n\ ++ por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */\n\ ++ psllq $16, %%mm4; /* Shift u */\n\ ++ movq %%mm2, %%mm5; /* Copy v to temp */\n\ ++ psllq $16, %%mm3; /* Shift y */\n\ ++ por %%mm4, %%mm1; /* Overlay new u byte 0x000000cr00cr0000 */\n\ ++ psllq $16, %%mm5; /* Shift v */\n\ ++ por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */\n\ ++ por %%mm5, %%mm2; /* Overlay new v byte 0x0000000000cb00cb */\n\ ++\n\ ++/* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */\n\ ++ psubw mpeg3_MMX_U_80, %%mm1; /* Subtract 128 from u 0x000000uu00uu0000 */\n\ ++ pmullw mpeg3_MMX_U_COEF, %%mm1; /* Multiply u coeffs 0x0000uuuuuuuu0000 */\n\ ++ psllw $6, %%mm0; /* Shift y coeffs 0x0000yyy0yyy0yyy0 */\n\ ++ psubw mpeg3_MMX_V_80, %%mm2; /* Subtract 128 from v 0x0000000000cb00cb */\n\ ++ pmullw mpeg3_MMX_V_COEF, %%mm2; /* Multiply v coeffs 0x0000crcrcrcrcrcr */\n\ ++\n\ ++/* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */\n\ ++ paddsw %%mm1, %%mm0; /* Add u to result */\n\ ++ paddsw %%mm2, %%mm0; /* Add v to result 0x0000rrrrggggbbbb */\n\ ++ psraw $6, %%mm0; /* Demote precision */\n\ ++ packuswb %%mm0, %%mm0; /* Pack into ARGB 0x0000000000rrggbb */\n\ ++ movd %%mm0, (%3); /* Store output */\n\ + " + : + : "r" (&y), "r" (&u), "r" (&v), "r" (output)); +@@ -261,39 +261,39 @@ + unsigned long v, + unsigned long *output) + { +-asm(" +-/* Output will be 0x00rrggbb with the 00 trailing so this can also be used */ +-/* for bgr24. */ +- movd (%0), %%mm0; /* Load y 0x00000000000000yy */ +- psubsw mpeg3_MMX_601_Y_DIFF, %%mm0; /* Subtract 16 from y */ +- movd (%1), %%mm1; /* Load u 0x00000000000000cr */ +- movq %%mm0, %%mm3; /* Copy y to temp */ +- psllq $16, %%mm1; /* Shift u 0x0000000000cr0000 */ +- movd (%2), %%mm2; /* Load v 0x00000000000000cb */ +- psllq $16, %%mm3; /* Shift y */ +- movq %%mm1, %%mm4; /* Copy u to temp */ +- por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */ +- psllq $16, %%mm4; /* Shift u */ +- movq %%mm2, %%mm5; /* Copy v to temp */ +- psllq $16, %%mm3; /* Shift y */ +- por %%mm4, %%mm1; /* Overlay new u byte 0x000000cr00cr0000 */ +- psllq $16, %%mm5; /* Shift v */ +- por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */ +- por %%mm5, %%mm2; /* Overlay new v byte 0x0000000000cb00cb */ +- +-/* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */ +- pmullw mpeg3_MMX_601_Y_COEF, %%mm0; /* Scale and shift y coeffs */ +- psubw mpeg3_MMX_U_80, %%mm1; /* Subtract 128 from u 0x000000uu00uu0000 */ +- pmullw mpeg3_MMX_U_COEF, %%mm1; /* Multiply u coeffs 0x0000uuuuuuuu0000 */ +- psubw mpeg3_MMX_V_80, %%mm2; /* Subtract 128 from v 0x0000000000cb00cb */ +- pmullw mpeg3_MMX_V_COEF, %%mm2; /* Multiply v coeffs 0x0000crcrcrcrcrcr */ +- +-/* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */ +- paddsw %%mm1, %%mm0; /* Add u to result */ +- paddsw %%mm2, %%mm0; /* Add v to result 0x0000rrrrggggbbbb */ +- psraw $6, %%mm0; /* Demote precision */ +- packuswb %%mm0, %%mm0; /* Pack into ARGB 0x0000000000rrggbb */ +- movd %%mm0, (%3); /* Store output */ ++asm("\n\ ++/* Output will be 0x00rrggbb with the 00 trailing so this can also be used */\n\ ++/* for bgr24. */\n\ ++ movd (%0), %%mm0; /* Load y 0x00000000000000yy */\n\ ++ psubsw mpeg3_MMX_601_Y_DIFF, %%mm0; /* Subtract 16 from y */\n\ ++ movd (%1), %%mm1; /* Load u 0x00000000000000cr */\n\ ++ movq %%mm0, %%mm3; /* Copy y to temp */\n\ ++ psllq $16, %%mm1; /* Shift u 0x0000000000cr0000 */\n\ ++ movd (%2), %%mm2; /* Load v 0x00000000000000cb */\n\ ++ psllq $16, %%mm3; /* Shift y */\n\ ++ movq %%mm1, %%mm4; /* Copy u to temp */\n\ ++ por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */\n\ ++ psllq $16, %%mm4; /* Shift u */\n\ ++ movq %%mm2, %%mm5; /* Copy v to temp */\n\ ++ psllq $16, %%mm3; /* Shift y */\n\ ++ por %%mm4, %%mm1; /* Overlay new u byte 0x000000cr00cr0000 */\n\ ++ psllq $16, %%mm5; /* Shift v */\n\ ++ por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */\n\ ++ por %%mm5, %%mm2; /* Overlay new v byte 0x0000000000cb00cb */\n\ ++\n\ ++/* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */\n\ ++ pmullw mpeg3_MMX_601_Y_COEF, %%mm0; /* Scale and shift y coeffs */\n\ ++ psubw mpeg3_MMX_U_80, %%mm1; /* Subtract 128 from u 0x000000uu00uu0000 */\n\ ++ pmullw mpeg3_MMX_U_COEF, %%mm1; /* Multiply u coeffs 0x0000uuuuuuuu0000 */\n\ ++ psubw mpeg3_MMX_V_80, %%mm2; /* Subtract 128 from v 0x0000000000cb00cb */\n\ ++ pmullw mpeg3_MMX_V_COEF, %%mm2; /* Multiply v coeffs 0x0000crcrcrcrcrcr */\n\ ++\n\ ++/* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */\n\ ++ paddsw %%mm1, %%mm0; /* Add u to result */\n\ ++ paddsw %%mm2, %%mm0; /* Add v to result 0x0000rrrrggggbbbb */\n\ ++ psraw $6, %%mm0; /* Demote precision */\n\ ++ packuswb %%mm0, %%mm0; /* Pack into ARGB 0x0000000000rrggbb */\n\ ++ movd %%mm0, (%3); /* Store output */\n\ + " + : + : "r" (&y), "r" (&u), "r" (&v), "r" (output)); +@@ -309,38 +309,38 @@ + unsigned long v, + unsigned long *output) + { +-asm(" +-/* Output will be 0x00bbggrr with the 00 trailing so this can also be used */ +-/* for rgb24. */ +- movd (%0), %%mm0; /* Load y 0x00000000000000yy */ +- movd (%1), %%mm1; /* Load v 0x00000000000000vv */ +- movq %%mm0, %%mm3; /* Copy y to temp */ +- psllq $16, %%mm1; /* Shift v 0x0000000000vv0000 */ +- movd (%2), %%mm2; /* Load u 0x00000000000000uu */ +- psllq $16, %%mm3; /* Shift y */ +- movq %%mm1, %%mm4; /* Copy v to temp */ +- por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */ +- psllq $16, %%mm4; /* Shift v */ +- movq %%mm2, %%mm5; /* Copy u to temp */ +- psllq $16, %%mm3; /* Shift y */ +- por %%mm4, %%mm1; /* Overlay new v byte 0x000000vv00vv0000 */ +- psllq $16, %%mm5; /* Shift u */ +- por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */ +- por %%mm5, %%mm2; /* Overlay new u byte 0x0000000000uu00uu */ +- +-/* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */ +- psubw mpeg3_MMX_V_80_RGB, %%mm1; /* Subtract 128 from v 0x000000vv00vv0000 */ +- pmullw mpeg3_MMX_V_COEF_RGB, %%mm1; /* Multiply v coeffs 0x0000vvvvvvvv0000 */ +- psllw $6, %%mm0; /* Shift y coeffs 0x0000yyy0yyy0yyy0 */ +- psubw mpeg3_MMX_U_80_RGB, %%mm2; /* Subtract 128 from u 0x0000000000uu00uu */ +- pmullw mpeg3_MMX_U_COEF_RGB, %%mm2; /* Multiply u coeffs 0x0000uuuuuuuuuuuu */ +- +-/* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */ +- paddsw %%mm1, %%mm0; /* Add v to result */ +- paddsw %%mm2, %%mm0; /* Add u to result 0x0000bbbbggggrrrr */ +- psraw $6, %%mm0; /* Demote precision */ +- packuswb %%mm0, %%mm0; /* Pack into RGBA 0x0000000000bbggrr */ +- movd %%mm0, (%3); /* Store output */ ++asm("\n\ ++/* Output will be 0x00bbggrr with the 00 trailing so this can also be used */\n\ ++/* for rgb24. */\n\ ++ movd (%0), %%mm0; /* Load y 0x00000000000000yy */\n\ ++ movd (%1), %%mm1; /* Load v 0x00000000000000vv */\n\ ++ movq %%mm0, %%mm3; /* Copy y to temp */\n\ ++ psllq $16, %%mm1; /* Shift v 0x0000000000vv0000 */\n\ ++ movd (%2), %%mm2; /* Load u 0x00000000000000uu */\n\ ++ psllq $16, %%mm3; /* Shift y */\n\ ++ movq %%mm1, %%mm4; /* Copy v to temp */\n\ ++ por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */\n\ ++ psllq $16, %%mm4; /* Shift v */\n\ ++ movq %%mm2, %%mm5; /* Copy u to temp */\n\ ++ psllq $16, %%mm3; /* Shift y */\n\ ++ por %%mm4, %%mm1; /* Overlay new v byte 0x000000vv00vv0000 */\n\ ++ psllq $16, %%mm5; /* Shift u */\n\ ++ por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */\n\ ++ por %%mm5, %%mm2; /* Overlay new u byte 0x0000000000uu00uu */\n\ ++\n\ ++/* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */\n\ ++ psubw mpeg3_MMX_V_80_RGB, %%mm1; /* Subtract 128 from v 0x000000vv00vv0000 */\n\ ++ pmullw mpeg3_MMX_V_COEF_RGB, %%mm1; /* Multiply v coeffs 0x0000vvvvvvvv0000 */\n\ ++ psllw $6, %%mm0; /* Shift y coeffs 0x0000yyy0yyy0yyy0 */\n\ ++ psubw mpeg3_MMX_U_80_RGB, %%mm2; /* Subtract 128 from u 0x0000000000uu00uu */\n\ ++ pmullw mpeg3_MMX_U_COEF_RGB, %%mm2; /* Multiply u coeffs 0x0000uuuuuuuuuuuu */\n\ ++\n\ ++/* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */\n\ ++ paddsw %%mm1, %%mm0; /* Add v to result */\n\ ++ paddsw %%mm2, %%mm0; /* Add u to result 0x0000bbbbggggrrrr */\n\ ++ psraw $6, %%mm0; /* Demote precision */\n\ ++ packuswb %%mm0, %%mm0; /* Pack into RGBA 0x0000000000bbggrr */\n\ ++ movd %%mm0, (%3); /* Store output */\n\ + " + : + : "r" (&y), "r" (&v), "r" (&u), "r" (output)); +@@ -351,39 +351,39 @@ + unsigned long v, + unsigned long *output) + { +-asm(" +-/* Output will be 0x00bbggrr with the 00 trailing so this can also be used */ +-/* for rgb24. */ +- movd (%0), %%mm0; /* Load y 0x00000000000000yy */ +- psubsw mpeg3_MMX_601_Y_DIFF, %%mm0; /* Subtract 16 from y */ +- movd (%1), %%mm1; /* Load v 0x00000000000000vv */ +- movq %%mm0, %%mm3; /* Copy y to temp */ +- psllq $16, %%mm1; /* Shift v 0x0000000000vv0000 */ +- movd (%2), %%mm2; /* Load u 0x00000000000000uu */ +- psllq $16, %%mm3; /* Shift y */ +- movq %%mm1, %%mm4; /* Copy v to temp */ +- por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */ +- psllq $16, %%mm4; /* Shift v */ +- movq %%mm2, %%mm5; /* Copy u to temp */ +- psllq $16, %%mm3; /* Shift y */ +- por %%mm4, %%mm1; /* Overlay new v byte 0x000000vv00vv0000 */ +- psllq $16, %%mm5; /* Shift u */ +- por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */ +- por %%mm5, %%mm2; /* Overlay new u byte 0x0000000000uu00uu */ +- +-/* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */ +- pmullw mpeg3_MMX_601_Y_COEF, %%mm0; /* Scale y coeffs */ +- psubw mpeg3_MMX_V_80_RGB, %%mm1; /* Subtract 128 from v 0x000000vv00vv0000 */ +- pmullw mpeg3_MMX_V_COEF_RGB, %%mm1; /* Multiply v coeffs 0x0000vvvvvvvv0000 */ +- psubw mpeg3_MMX_U_80_RGB, %%mm2; /* Subtract 128 from u 0x0000000000uu00uu */ +- pmullw mpeg3_MMX_U_COEF_RGB, %%mm2; /* Multiply u coeffs 0x0000uuuuuuuuuuuu */ +- +-/* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */ +- paddsw %%mm1, %%mm0; /* Add v to result */ +- paddsw %%mm2, %%mm0; /* Add u to result 0x0000bbbbggggrrrr */ +- psraw $6, %%mm0; /* Demote precision */ +- packuswb %%mm0, %%mm0; /* Pack into RGBA 0x0000000000bbggrr */ +- movd %%mm0, (%3); /* Store output */ ++asm("\n\ ++/* Output will be 0x00bbggrr with the 00 trailing so this can also be used */\n\ ++/* for rgb24. */\n\ ++ movd (%0), %%mm0; /* Load y 0x00000000000000yy */\n\ ++ psubsw mpeg3_MMX_601_Y_DIFF, %%mm0; /* Subtract 16 from y */\n\ ++ movd (%1), %%mm1; /* Load v 0x00000000000000vv */\n\ ++ movq %%mm0, %%mm3; /* Copy y to temp */\n\ ++ psllq $16, %%mm1; /* Shift v 0x0000000000vv0000 */\n\ ++ movd (%2), %%mm2; /* Load u 0x00000000000000uu */\n\ ++ psllq $16, %%mm3; /* Shift y */\n\ ++ movq %%mm1, %%mm4; /* Copy v to temp */\n\ ++ por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */\n\ ++ psllq $16, %%mm4; /* Shift v */\n\ ++ movq %%mm2, %%mm5; /* Copy u to temp */\n\ ++ psllq $16, %%mm3; /* Shift y */\n\ ++ por %%mm4, %%mm1; /* Overlay new v byte 0x000000vv00vv0000 */\n\ ++ psllq $16, %%mm5; /* Shift u */\n\ ++ por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */\n\ ++ por %%mm5, %%mm2; /* Overlay new u byte 0x0000000000uu00uu */\n\ ++\n\ ++/* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */\n\ ++ pmullw mpeg3_MMX_601_Y_COEF, %%mm0; /* Scale y coeffs */\n\ ++ psubw mpeg3_MMX_V_80_RGB, %%mm1; /* Subtract 128 from v 0x000000vv00vv0000 */\n\ ++ pmullw mpeg3_MMX_V_COEF_RGB, %%mm1; /* Multiply v coeffs 0x0000vvvvvvvv0000 */\n\ ++ psubw mpeg3_MMX_U_80_RGB, %%mm2; /* Subtract 128 from u 0x0000000000uu00uu */\n\ ++ pmullw mpeg3_MMX_U_COEF_RGB, %%mm2; /* Multiply u coeffs 0x0000uuuuuuuuuuuu */\n\ ++\n\ ++/* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */\n\ ++ paddsw %%mm1, %%mm0; /* Add v to result */\n\ ++ paddsw %%mm2, %%mm0; /* Add u to result 0x0000bbbbggggrrrr */\n\ ++ psraw $6, %%mm0; /* Demote precision */\n\ ++ packuswb %%mm0, %%mm0; /* Pack into RGBA 0x0000000000bbggrr */\n\ ++ movd %%mm0, (%3); /* Store output */\n\ + " + : + : "r" (&y), "r" (&v), "r" (&u), "r" (output)); |