summaryrefslogtreecommitdiff
path: root/fpcsrc/rtl/x86_64/strings.inc
blob: ff7331cb877faa256fb93c81094f1907367d01d1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
{
    This file is part of the Free Pascal run time library.
    Copyright (c) 2003 by Florian Klaempfl, member of the
    Free Pascal development team

    Processor dependent part of strings.pp, that can be shared with
    sysutils unit.

    See the file COPYING.FPC, included in this distribution,
    for details about the copyright.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

 **********************************************************************}

{$ASMMODE GAS}

{$ifndef FPC_UNIT_HAS_STRCOPY}
{$define FPC_UNIT_HAS_STRCOPY}
{ Created from glibc: libc/sysdeps/x86_64/strcpy.S Version 1.2 }
function strcopy(dest,source : pchar) : pchar;assembler;
{$ifdef win64}
var
  rdi,rsi : int64;
{$endif win64}
asm
{$ifdef win64}
        movq %rsi,rsi
        movq %rdi,rdi
        movq %rdx, %rsi
        movq %rcx, %rdi
{$endif win64}
        movq %rsi, %rcx                { Source register. }
        andl $7, %ecx                  { mask alignment bits }
        movq %rdi, %rdx                { Duplicate destination pointer.  }

        jz .LFPC_STRCOPY_5              { aligned => start loop }

        neg %ecx                       { We need to align to 8 bytes.  }
        addl $8,%ecx

        { Search the first bytes directly.  }
.LFPC_STRCOPY_0:
        movb        (%rsi), %al        { Fetch a byte }
        testb       %al, %al           { Is it NUL? }
        movb        %al, (%rdx)        { Store it }
        jz          .LFPC_STRCOPY_4     { If it was NUL, done! }
        incq        %rsi
        incq        %rdx
        decl        %ecx
        jnz         .LFPC_STRCOPY_0

.LFPC_STRCOPY_5:
        movq        $0xfefefefefefefeff,%r8

        { Now the sources is aligned.  Unfortunatly we cannot force
           to have both source and destination aligned, so ignore the
           alignment of the destination.  }
        .p2align 4
.LFPC_STRCOPY_1:
        { 1st unroll.  }
        movq        (%rsi), %rax       { Read double word (8 bytes).  }
        addq        $8, %rsi           { Adjust pointer for next word.  }
        movq        %rax, %r9          { Save a copy for NUL finding.  }
        addq        %r8, %r9           { add the magic value to the word.  We get
                                         carry bits reported for each byte which
                                         is *not* 0 }
        jnc         .LFPC_STRCOPY_3     { highest byte is NUL => return pointer }
        xorq        %rax, %r9          { (word+magic)^word }
        orq         %r8, %r9           { set all non-carry bits }
        incq        %r9                { add 1: if one carry bit was *not* set
                                         the addition will not result in 0.  }

        jnz         .LFPC_STRCOPY_3                { found NUL => return pointer }

        movq        %rax, (%rdx)        { Write value to destination.  }
        addq        $8, %rdx        { Adjust pointer.  }

        { 2nd unroll.  }
        movq        (%rsi), %rax        { Read double word (8 bytes).  }
        addq        $8, %rsi        { Adjust pointer for next word.  }
        movq        %rax, %r9        { Save a copy for NUL finding.  }
        addq        %r8, %r9        { add the magic value to the word.  We get
                                   carry bits reported for each byte which
                                   is *not* 0 }
        jnc         .LFPC_STRCOPY_3                { highest byte is NUL => return pointer }
        xorq        %rax, %r9        { (word+magic)^word }
        orq         %r8, %r9        { set all non-carry bits }
        incq        %r9                { add 1: if one carry bit was *not* set
                                   the addition will not result in 0.  }

        jnz         .LFPC_STRCOPY_3                { found NUL => return pointer }

        movq        %rax, (%rdx)        { Write value to destination.  }
        addq        $8, %rdx        { Adjust pointer.  }

        { 3rd unroll.  }
        movq        (%rsi), %rax        { Read double word (8 bytes).  }
        addq        $8, %rsi        { Adjust pointer for next word.  }
        movq        %rax, %r9        { Save a copy for NUL finding.  }
        addq        %r8, %r9        { add the magic value to the word.  We get
                                   carry bits reported for each byte which
                                   is *not* 0 }
        jnc        .LFPC_STRCOPY_3                { highest byte is NUL => return pointer }
        xorq        %rax, %r9        { (word+magic)^word }
        orq        %r8, %r9        { set all non-carry bits }
        incq        %r9                { add 1: if one carry bit was *not* set
                                   the addition will not result in 0.  }

        jnz         .LFPC_STRCOPY_3                { found NUL => return pointer }

        movq        %rax, (%rdx)        { Write value to destination.  }
        addq        $8, %rdx        { Adjust pointer.  }

        { 4th unroll.  }
        movq        (%rsi), %rax        { Read double word (8 bytes).  }
        addq        $8, %rsi        { Adjust pointer for next word.  }
        movq        %rax, %r9        { Save a copy for NUL finding.  }
        addq        %r8, %r9        { add the magic value to the word.  We get
                                   carry bits reported for each byte which
                                   is *not* 0 }
        jnc         .LFPC_STRCOPY_3                { highest byte is NUL => return pointer }
        xorq        %rax, %r9        { (word+magic)^word }
        orq         %r8, %r9        { set all non-carry bits }
        incq        %r9                { add 1: if one carry bit was *not* set
                                   the addition will not result in 0.  }

        jnz         .LFPC_STRCOPY_3                { found NUL => return pointer }

        movq        %rax, (%rdx)        { Write value to destination.  }
        addq        $8, %rdx        { Adjust pointer.  }
        jmp         .LFPC_STRCOPY_1                { Next iteration.  }

        { Do the last few bytes. %rax contains the value to write.
           The loop is unrolled twice.  }
        .p2align 4
.LFPC_STRCOPY_3:
        { Note that stpcpy needs to return with the value of the NUL
           byte.  }
        movb        %al, (%rdx)        { 1st byte.  }
        testb       %al, %al        { Is it NUL.  }
        jz          .LFPC_STRCOPY_4                { yes, finish.  }
        incq        %rdx                { Increment destination.  }
        movb        %ah, (%rdx)        { 2nd byte.  }
        testb       %ah, %ah        { Is it NUL?.  }
        jz          .LFPC_STRCOPY_4                { yes, finish.  }
        incq        %rdx                { Increment destination.  }
        shrq        $16, %rax        { Shift...  }
        jmp         .LFPC_STRCOPY_3                { and look at next two bytes in %rax.  }

.LFPC_STRCOPY_4:
        movq        %rdi, %rax        { Source is return value.  }
{$ifdef win64}
        movq rsi,%rsi
        movq rdi,%rdi
{$endif win64}
end;
{$endif FPC_UNIT_HAS_STRCOPY}


{$ifndef FPC_UNIT_HAS_STRCOMP}
{$define FPC_UNIT_HAS_STRCOMP}
{ Created from glibc: libc/sysdeps/x86_64/strcmp.S Version 1.2 }
function StrComp(Str1, Str2: PChar): SizeInt;assembler;
{$ifdef win64}
var
  rdi,rsi : int64;
{$endif win64}
asm
{$ifdef win64}
        movq %rsi,rsi
        movq %rdi,rdi
        movq %rdx, %rsi
        movq %rcx, %rdi
{$endif win64}
.LFPC_STRCMP_LOOP:
        movb        (%rdi), %al
        cmpb        (%rsi), %al
        jne         .LFPC_STRCMP_NEG
        incq        %rdi
        incq        %rsi
        testb       %al, %al
        jnz         .LFPC_STRCMP_LOOP

        xorq        %rax, %rax
        jmp .Lexit

.LFPC_STRCMP_NEG:
        movq        $1, %rax
        movq        $-1, %rcx
        cmovbq      %rcx, %rax
.Lexit:
{$ifdef win64}
        movq rsi,%rsi
        movq rdi,%rdi
{$endif win64}
end;
{$endif FPC_UNIT_HAS_STRCOMP}