summaryrefslogtreecommitdiff
path: root/multimedia/ffmpeg2/patches/patch-libavcodec_x86_ac3dsp__init.c
blob: bce9a7bc2ebe7a7d1fe8b1cbd984529b8265a65c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
$NetBSD: patch-libavcodec_x86_ac3dsp__init.c,v 1.1 2019/01/03 11:56:09 bsiegert Exp $

--- libavcodec/x86/ac3dsp_init.c.orig	2018-02-19 00:50:25.000000000 +0000
+++ libavcodec/x86/ac3dsp_init.c
@@ -83,11 +83,11 @@ void ff_apply_window_int16_ssse3_atom(in
         "shufps     $0, %%xmm6, %%xmm6          \n"             \
         "shufps     $0, %%xmm7, %%xmm7          \n"             \
         "1:                                     \n"             \
-        "movaps       (%0, %2), %%xmm0          \n"             \
-        "movaps       (%0, %3), %%xmm1          \n"             \
-        "movaps       (%0, %4), %%xmm2          \n"             \
-        "movaps       (%0, %5), %%xmm3          \n"             \
-        "movaps       (%0, %6), %%xmm4          \n"             \
+        "movups       (%0, %2), %%xmm0          \n"             \
+        "movups       (%0, %3), %%xmm1          \n"             \
+        "movups       (%0, %4), %%xmm2          \n"             \
+        "movups       (%0, %5), %%xmm3          \n"             \
+        "movups       (%0, %6), %%xmm4          \n"             \
         "mulps          %%xmm5, %%xmm0          \n"             \
         "mulps          %%xmm6, %%xmm1          \n"             \
         "mulps          %%xmm5, %%xmm2          \n"             \
@@ -98,8 +98,8 @@ void ff_apply_window_int16_ssse3_atom(in
         "addps          %%xmm3, %%xmm0          \n"             \
         "addps          %%xmm4, %%xmm2          \n"             \
    mono("addps          %%xmm2, %%xmm0          \n")            \
-        "movaps         %%xmm0, (%0, %2)        \n"             \
- stereo("movaps         %%xmm2, (%0, %3)        \n")            \
+        "movups         %%xmm0, (%0, %2)        \n"             \
+ stereo("movups         %%xmm2, (%0, %3)        \n")            \
         "add               $16, %0              \n"             \
         "jl                 1b                  \n"             \
         : "+&r"(i)                                              \
@@ -119,24 +119,26 @@ void ff_apply_window_int16_ssse3_atom(in
         "mov              %5, %2            \n"                 \
         "1:                                 \n"                 \
         "mov -%c7(%6, %2, %c8), %3          \n"                 \
-        "movaps     (%3, %0), %%xmm0        \n"                 \
+        "movups     (%3, %0), %%xmm0        \n"                 \
  stereo("movaps       %%xmm0, %%xmm1        \n")                \
         "mulps        %%xmm4, %%xmm0        \n"                 \
  stereo("mulps        %%xmm5, %%xmm1        \n")                \
         "2:                                 \n"                 \
         "mov   (%6, %2, %c8), %1            \n"                 \
-        "movaps     (%1, %0), %%xmm2        \n"                 \
+        "movups     (%1, %0), %%xmm2        \n"                 \
  stereo("movaps       %%xmm2, %%xmm3        \n")                \
-        "mulps   (%4, %2, 8), %%xmm2        \n"                 \
- stereo("mulps 16(%4, %2, 8), %%xmm3        \n")                \
+        "movups  (%4, %2, 8), %%xmm4        \n"                 \
+        "mulps        %%xmm4, %%xmm2        \n"                 \
+ stereo("movups 16(%4, %2, 8), %%xmm4       \n")                \
+ stereo("mulps        %%xmm4, %%xmm3        \n")                \
         "addps        %%xmm2, %%xmm0        \n"                 \
  stereo("addps        %%xmm3, %%xmm1        \n")                \
         "add              $4, %2            \n"                 \
         "jl               2b                \n"                 \
         "mov              %5, %2            \n"                 \
  stereo("mov   (%6, %2, %c8), %1            \n")                \
-        "movaps       %%xmm0, (%3, %0)      \n"                 \
- stereo("movaps       %%xmm1, (%1, %0)      \n")                \
+        "movups       %%xmm0, (%3, %0)      \n"                 \
+ stereo("movups       %%xmm1, (%1, %0)      \n")                \
         "add             $16, %0            \n"                 \
         "jl               1b                \n"                 \
         : "+&r"(i), "=&r"(j), "=&r"(k), "=&r"(m)                \
@@ -179,8 +181,8 @@ static void ac3_downmix_sse(float **samp
             "movss    4(%2, %0), %%xmm5         \n"
             "shufps          $0, %%xmm4, %%xmm4 \n"
             "shufps          $0, %%xmm5, %%xmm5 \n"
-            "movaps      %%xmm4,   (%1, %0, 4)  \n"
-            "movaps      %%xmm5, 16(%1, %0, 4)  \n"
+            "movups      %%xmm4,   (%1, %0, 4)  \n"
+            "movups      %%xmm5, 16(%1, %0, 4)  \n"
             "jg              1b                 \n"
             : "+&r"(j)
             : "r"(matrix_simd), "r"(matrix)