1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
|
{ CFStringTokenizer.h
Copyright (c) 2006-2009, Apple Inc. All rights reserved.
}
{ Pascal Translation Updated: Jonas Maebe, <jonas@freepascal.org>, September 2010 }
{
Modified for use with Free Pascal
Version 308
Please report any bugs to <gpc@microbizz.nl>
}
{$ifc not defined MACOSALLINCLUDE or not MACOSALLINCLUDE}
{$mode macpas}
{$packenum 1}
{$macro on}
{$inline on}
{$calling mwpascal}
unit CFStringTokenizer;
interface
{$setc UNIVERSAL_INTERFACES_VERSION := $0400}
{$setc GAP_INTERFACES_VERSION := $0308}
{$ifc not defined USE_CFSTR_CONSTANT_MACROS}
{$setc USE_CFSTR_CONSTANT_MACROS := TRUE}
{$endc}
{$ifc defined CPUPOWERPC and defined CPUI386}
{$error Conflicting initial definitions for CPUPOWERPC and CPUI386}
{$endc}
{$ifc defined FPC_BIG_ENDIAN and defined FPC_LITTLE_ENDIAN}
{$error Conflicting initial definitions for FPC_BIG_ENDIAN and FPC_LITTLE_ENDIAN}
{$endc}
{$ifc not defined __ppc__ and defined CPUPOWERPC32}
{$setc __ppc__ := 1}
{$elsec}
{$setc __ppc__ := 0}
{$endc}
{$ifc not defined __ppc64__ and defined CPUPOWERPC64}
{$setc __ppc64__ := 1}
{$elsec}
{$setc __ppc64__ := 0}
{$endc}
{$ifc not defined __i386__ and defined CPUI386}
{$setc __i386__ := 1}
{$elsec}
{$setc __i386__ := 0}
{$endc}
{$ifc not defined __x86_64__ and defined CPUX86_64}
{$setc __x86_64__ := 1}
{$elsec}
{$setc __x86_64__ := 0}
{$endc}
{$ifc not defined __arm__ and defined CPUARM}
{$setc __arm__ := 1}
{$elsec}
{$setc __arm__ := 0}
{$endc}
{$ifc defined cpu64}
{$setc __LP64__ := 1}
{$elsec}
{$setc __LP64__ := 0}
{$endc}
{$ifc defined __ppc__ and __ppc__ and defined __i386__ and __i386__}
{$error Conflicting definitions for __ppc__ and __i386__}
{$endc}
{$ifc defined __ppc__ and __ppc__}
{$setc TARGET_CPU_PPC := TRUE}
{$setc TARGET_CPU_PPC64 := FALSE}
{$setc TARGET_CPU_X86 := FALSE}
{$setc TARGET_CPU_X86_64 := FALSE}
{$setc TARGET_CPU_ARM := FALSE}
{$setc TARGET_OS_MAC := TRUE}
{$setc TARGET_OS_IPHONE := FALSE}
{$setc TARGET_IPHONE_SIMULATOR := FALSE}
{$elifc defined __ppc64__ and __ppc64__}
{$setc TARGET_CPU_PPC := FALSE}
{$setc TARGET_CPU_PPC64 := TRUE}
{$setc TARGET_CPU_X86 := FALSE}
{$setc TARGET_CPU_X86_64 := FALSE}
{$setc TARGET_CPU_ARM := FALSE}
{$setc TARGET_OS_MAC := TRUE}
{$setc TARGET_OS_IPHONE := FALSE}
{$setc TARGET_IPHONE_SIMULATOR := FALSE}
{$elifc defined __i386__ and __i386__}
{$setc TARGET_CPU_PPC := FALSE}
{$setc TARGET_CPU_PPC64 := FALSE}
{$setc TARGET_CPU_X86 := TRUE}
{$setc TARGET_CPU_X86_64 := FALSE}
{$setc TARGET_CPU_ARM := FALSE}
{$ifc defined(iphonesim)}
{$setc TARGET_OS_MAC := FALSE}
{$setc TARGET_OS_IPHONE := TRUE}
{$setc TARGET_IPHONE_SIMULATOR := TRUE}
{$elsec}
{$setc TARGET_OS_MAC := TRUE}
{$setc TARGET_OS_IPHONE := FALSE}
{$setc TARGET_IPHONE_SIMULATOR := FALSE}
{$endc}
{$elifc defined __x86_64__ and __x86_64__}
{$setc TARGET_CPU_PPC := FALSE}
{$setc TARGET_CPU_PPC64 := FALSE}
{$setc TARGET_CPU_X86 := FALSE}
{$setc TARGET_CPU_X86_64 := TRUE}
{$setc TARGET_CPU_ARM := FALSE}
{$setc TARGET_OS_MAC := TRUE}
{$setc TARGET_OS_IPHONE := FALSE}
{$setc TARGET_IPHONE_SIMULATOR := FALSE}
{$elifc defined __arm__ and __arm__}
{$setc TARGET_CPU_PPC := FALSE}
{$setc TARGET_CPU_PPC64 := FALSE}
{$setc TARGET_CPU_X86 := FALSE}
{$setc TARGET_CPU_X86_64 := FALSE}
{$setc TARGET_CPU_ARM := TRUE}
{ will require compiler define when/if other Apple devices with ARM cpus ship }
{$setc TARGET_OS_MAC := FALSE}
{$setc TARGET_OS_IPHONE := TRUE}
{$setc TARGET_IPHONE_SIMULATOR := FALSE}
{$elsec}
{$error __ppc__ nor __ppc64__ nor __i386__ nor __x86_64__ nor __arm__ is defined.}
{$endc}
{$ifc defined __LP64__ and __LP64__ }
{$setc TARGET_CPU_64 := TRUE}
{$elsec}
{$setc TARGET_CPU_64 := FALSE}
{$endc}
{$ifc defined FPC_BIG_ENDIAN}
{$setc TARGET_RT_BIG_ENDIAN := TRUE}
{$setc TARGET_RT_LITTLE_ENDIAN := FALSE}
{$elifc defined FPC_LITTLE_ENDIAN}
{$setc TARGET_RT_BIG_ENDIAN := FALSE}
{$setc TARGET_RT_LITTLE_ENDIAN := TRUE}
{$elsec}
{$error Neither FPC_BIG_ENDIAN nor FPC_LITTLE_ENDIAN are defined.}
{$endc}
{$setc ACCESSOR_CALLS_ARE_FUNCTIONS := TRUE}
{$setc CALL_NOT_IN_CARBON := FALSE}
{$setc OLDROUTINENAMES := FALSE}
{$setc OPAQUE_TOOLBOX_STRUCTS := TRUE}
{$setc OPAQUE_UPP_TYPES := TRUE}
{$setc OTCARBONAPPLICATION := TRUE}
{$setc OTKERNEL := FALSE}
{$setc PM_USE_SESSION_APIS := TRUE}
{$setc TARGET_API_MAC_CARBON := TRUE}
{$setc TARGET_API_MAC_OS8 := FALSE}
{$setc TARGET_API_MAC_OSX := TRUE}
{$setc TARGET_CARBON := TRUE}
{$setc TARGET_CPU_68K := FALSE}
{$setc TARGET_CPU_MIPS := FALSE}
{$setc TARGET_CPU_SPARC := FALSE}
{$setc TARGET_OS_UNIX := FALSE}
{$setc TARGET_OS_WIN32 := FALSE}
{$setc TARGET_RT_MAC_68881 := FALSE}
{$setc TARGET_RT_MAC_CFM := FALSE}
{$setc TARGET_RT_MAC_MACHO := TRUE}
{$setc TYPED_FUNCTION_POINTERS := TRUE}
{$setc TYPE_BOOL := FALSE}
{$setc TYPE_EXTENDED := FALSE}
{$setc TYPE_LONGLONG := TRUE}
uses MacTypes,CFBase,CFLocale,CFArray,CFString;
{$endc} {not MACOSALLINCLUDE}
{$ALIGN POWER}
{!
@header CFStringTokenizer
@abstract A CFStringTokenizer object provides the means by which you can
tokenize a string. To find a token that includes the character
specified by character index and set it as the current token, you call
CFStringTokenizerGoToTokenAtIndex. To advance to the next token and set
it as the current token, you call CFStringTokenizerAdvanceToNextToken.
To get the range of current token, you call
CFStringTokenizerGetCurrentTokenRange. You can use
CFStringTokenizerCopyCurrentTokenAttribute to get the attribute of
current token. If current token is a compound, you can call
CFStringTokenizerGetCurrentSubTokens to retrieve the subtokens or derived
subtokens contained in the compound token.
To guess the language of a string, you call
CFStringTokenizerCopyBestStringLanguage.
}
{
===================================================================
Language Identifier
===================================================================
}
{!
@function CFStringTokenizerCopyBestStringLanguage
@abstract Guesses the language of a string and returns the BCP 47 string of the
language.
@param string The string whose language is to be guessed.
@param range The range of characters in string whose language to be
guessed. The specified range must not exceed the bounds of the string.
If the range is empty (length 0), the first few hundred characters in
the string are used.
@result A language represented in BCP 47 string, or NULL if the
language of the string cannot be guessed.
@discussion The result is not guaranteed to be accurate. Typically 200-400
characters are required to reliably guess the language of a string.
}
function CFStringTokenizerCopyBestStringLanguage( strng: CFStringRef; range: CFRange ): CFStringRef; external name '_CFStringTokenizerCopyBestStringLanguage';
(* AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER *)
{
===================================================================
Tokenization
===================================================================
}
type
CFStringTokenizerRef = ^__CFStringTokenizer; { an opaque type }
__CFStringTokenizer = record end;
{!
Tokenization options
}
const
{!
Tokenization Unit
Use one of tokenization unit options with CFStringTokenizerCreate to
specify how the string should be tokenized.
}
{ kCFStringTokenizerUnitWord is not locale sensitive. It doesn't return
space between words as a token. }
kCFStringTokenizerUnitWord = 0;
kCFStringTokenizerUnitSentence = 1;
kCFStringTokenizerUnitParagraph = 2;
kCFStringTokenizerUnitLineBreak = 3;
{ kCFStringTokenizerUnitWordBoundary can be used in double click detection
and whole word search. It is locale sensitive. If the locale parameter of
CFStringTokenizerCreate is NULL, default locale is used.
kCFStringTokenizerUnitWordBoundary returns space between words as a token. }
kCFStringTokenizerUnitWordBoundary = 4;
{!
Attribute Specifier
Use attribute specifier to tell tokenizer to prepare the specified attribute
when it tokenizes the given string. The attribute value can be retrieved by
calling CFStringTokenizerCopyCurrentTokenAttribute with one of the attribute
option.
}
{ Latin Transcription. Used with kCFStringTokenizerUnitWord or
kCFStringTokenizerUnitWordBoundary }
kCFStringTokenizerAttributeLatinTranscription = 1 shl 16;
{ Language in BCP 47 string. Used with kCFStringTokenizerUnitSentence
or kCFStringTokenizerUnitParagraph. }
kCFStringTokenizerAttributeLanguage = 1 shl 17;
{!
Token type
CFStringTokenizerGoToTokenAtIndex / CFStringTokenizerAdvanceToNextToken returns
the type of current token.
}
const
{ Have no token. }
kCFStringTokenizerTokenNone = 0;
{ Normal token }
kCFStringTokenizerTokenNormal = 1 shl 0;
{!
Compound token which may contain subtokens but with no derived subtokens.
Its subtokens can be obtained by calling CFStringTokenizerGetCurrentSubTokens.
}
kCFStringTokenizerTokenHasSubTokensMask = 1 shl 1;
{!
Compound token which may contain derived subtokens.
Its subtokens and derived subtokens can be obtained by calling
CFStringTokenizerGetCurrentSubTokens.
}
kCFStringTokenizerTokenHasDerivedSubTokensMask = 1 shl 2;
kCFStringTokenizerTokenHasHasNumbersMask = 1 shl 3;
kCFStringTokenizerTokenHasNonLettersMask = 1 shl 4;
kCFStringTokenizerTokenIsCJWordMask = 1 shl 5;
type
CFStringTokenizerTokenType = CFOptionFlags;
{!
@function CFStringTokenizerGetTypeID
@abstract Get the type identifier.
@result the type identifier of all CFStringTokenizer instances.
}
function CFStringTokenizerGetTypeID: CFTypeID; external name '_CFStringTokenizerGetTypeID';
(* AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER *)
{!
@function CFStringTokenizerCreate
@abstract Creates a tokenizer instance.
@param alloc The CFAllocator which should be used to allocate memory for the
tokenizer and its storage for values. This parameter may be NULL in which
case the current default CFAllocator is used.
@param string The string to tokenize.
@param range The range of characters within the string to be tokenized. The
specified range must not exceed the length of the string.
@param options Use one of the Tokenization Unit options to specify how the
string should be tokenized. Optionally specify one or more attribute
specifiers to tell the tokenizer to prepare specified attributes when it
tokenizes the string.
@param locale The locale to specify language or region specific behavior.
If the locale is NULL and the unit is not kCFStringTokenizerUnitWord,
default locale will be used.
The locale is ignored if the unit is kCFStringTokenizerUnitWord.
@result A reference to the new CFStringTokenizer.
}
function CFStringTokenizerCreate( alloc: CFAllocatorRef; strng: CFStringRef; range: CFRange; options: CFOptionFlags; locale: CFLocaleRef ): CFStringTokenizerRef; external name '_CFStringTokenizerCreate';
(* AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER *)
{!
@function CFStringTokenizerSetString
@abstract Set the string to tokenize.
@param tokenizer The reference to CFStringTokenizer returned by
CFStringTokenizerCreate.
@param string The string to tokenize.
@param range The range of characters within the string to be tokenized. The
specified range must not exceed the length of the string.
}
procedure CFStringTokenizerSetString( tokenizer: CFStringTokenizerRef; strng: CFStringRef; range: CFRange ); external name '_CFStringTokenizerSetString';
(* AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER *)
{!
@function CFStringTokenizerGoToTokenAtIndex
@abstract Random access to a token. Find a token that includes the character specified
by character index, and set it as the current token.
@param tokenizer The reference to CFStringTokenizer returned by
CFStringTokenizerCreate.
@param index The index of the Unicode character in the CFString.
@result Type of the token if succeeded in finding a token and setting it as
current token. kCFStringTokenizerTokenNone if failed in finding a token.
@discussion The range and attribute of the token can be obtained by calling
CFStringTokenizerGetCurrentTokenRange and CFStringTokenizerCopyCurrentTokenAttribute.
If the token is a compound (with type kCFStringTokenizerTokenHasSubTokensMask or
kCFStringTokenizerTokenHasDerivedSubTokensMask), its subtokens and
(or) derived subtokens can be obtained by calling CFStringTokenizerGetCurrentSubTokens.
}
function CFStringTokenizerGoToTokenAtIndex( tokenizer: CFStringTokenizerRef; index: CFIndex ): CFStringTokenizerTokenType; external name '_CFStringTokenizerGoToTokenAtIndex';
(* AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER *)
{!
@function CFStringTokenizerAdvanceToNextToken
@abstract Token enumerator.
@param tokenizer The reference to CFStringTokenizer returned by
CFStringTokenizerCreate.
@result Type of the token if succeeded in finding a token and setting it as
current token. kCFStringTokenizerTokenNone if failed in finding a token.
@discussion If there is no preceding call to CFStringTokenizerGoToTokenAtIndex
or CFStringTokenizerAdvanceToNextToken, it finds the first token in the range
specified to CFStringTokenizerCreate. If there is a current token after successful
call to CFStringTokenizerGoToTokenAtIndex or CFStringTokenizerAdvanceToNextToken,
it proceeds to the next token. If succeeded in finding a token, set it as current
token and return its token type. Otherwise invalidate current token and return
kCFStringTokenizerTokenNone.
The range and attribute of the token can be obtained by calling
CFStringTokenizerGetCurrentTokenRange and
CFStringTokenizerCopyCurrentTokenAttribute. If the token is a compound
(with type kCFStringTokenizerTokenHasSubTokensMask or
kCFStringTokenizerTokenHasDerivedSubTokensMask), its subtokens and
(or) derived subtokens can be obtained by calling CFStringTokenizerGetCurrentSubTokens.
}
function CFStringTokenizerAdvanceToNextToken( tokenizer: CFStringTokenizerRef ): CFStringTokenizerTokenType; external name '_CFStringTokenizerAdvanceToNextToken';
(* AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER *)
{!
@function CFStringTokenizerGetCurrentTokenRange
@abstract Returns the range of current token.
@param tokenizer The reference to CFStringTokenizer returned by
CFStringTokenizerCreate.
@result Range of current token, or (kCFNotFound,0) if there is no current token.
}
function CFStringTokenizerGetCurrentTokenRange( tokenizer: CFStringTokenizerRef ): CFRange; external name '_CFStringTokenizerGetCurrentTokenRange';
(* AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER *)
{!
@function CFStringTokenizerCopyCurrentTokenAttribute
@abstract Copies the specified attribute of current token.
@param tokenizer The reference to CFStringTokenizer returned by
CFStringTokenizerCreate.
@param attribute Specify a token attribute you want to obtain. The value is
one of kCFStringTokenizerAttributeLatinTranscription or
kCFStringTokenizerAttributeLanguage.
@result Token attribute, or NULL if current token does not have the specified
attribute or if there is no current token.
}
function CFStringTokenizerCopyCurrentTokenAttribute( tokenizer: CFStringTokenizerRef; attribute: CFOptionFlags ): CFTypeRef; external name '_CFStringTokenizerCopyCurrentTokenAttribute';
(* AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER *)
{!
@function CFStringTokenizerGetCurrentSubTokens
@abstract Retrieves the subtokens or derived subtokens contained in the compound token.
@param tokenizer The reference to CFStringTokenizer returned by CFStringTokenizerCreate.
@param ranges An array of CFRange to fill in with the ranges of subtokens. The filled in
ranges are relative to the string specified to CFStringTokenizerCreate. This parameter
can be NULL.
@param maxRangeLength The maximum number of ranges to return.
@param derivedSubTokens An array of CFMutableArray to which the derived subtokens are to
be added. This parameter can be NULL.
@result number of subtokens.
@discussion If token type is kCFStringTokenizerTokenNone, the ranges array and
derivedSubTokens array are untouched and the return value is 0.
If token type is kCFStringTokenizerTokenNormal, the ranges array has one item
filled in with the entire range of the token (if maxRangeLength >= 1) and a string
taken from the entire token range is added to the derivedSubTokens array and the
return value is 1.
If token type is kCFStringTokenizerTokenHasSubTokensMask or
kCFStringTokenizerTokenHasDerivedSubTokensMask, the ranges array is filled
in with as many items as there are subtokens (up to a limit of maxRangeLength).
The derivedSubTokens array will have sub tokens added even when the sub token is a
substring of the token. If token type is kCFStringTokenizerTokenHasSubTokensMask,
the ordinary non-derived subtokens are added to the derivedSubTokens array.
}
function CFStringTokenizerGetCurrentSubTokens( tokenizer: CFStringTokenizerRef; var ranges: CFRange; maxRangeLength: CFIndex; derivedSubTokens: CFMutableArrayRef ): CFIndex; external name '_CFStringTokenizerGetCurrentSubTokens';
(* AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER *)
{$ifc not defined MACOSALLINCLUDE or not MACOSALLINCLUDE}
end.
{$endc} {not MACOSALLINCLUDE}
|