1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
|
/*
* This file and its contents are supplied under the terms of the
* Common Development and Distribution License ("CDDL"), version 1.0.
* You may only use this file in accordance with the terms of version
* 1.0 of the CDDL.
*
* A full copy of the text of the CDDL should have accompanied this
* source. A copy of the CDDL is also available via the Internet at
* http://www.illumos.org/license/CDDL.
*/
/*
* Copyright 2014 Garrett D'Amore <garrett@damore.org>
*/
/*
* This file implements the 2008 newlocale and friends handling.
*/
#ifndef _LCONV_C99
#define _LCONV_C99
#endif
#include "lint.h"
#include <atomic.h>
#include <locale.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <errno.h>
#include <string.h>
#include "libc.h"
#include "mtlib.h"
#include "tsd.h"
#include "localeimpl.h"
#include "lctype.h"
/*
* Big Theory of Locales:
*
* (It is recommended that readers familiarize themselves with the POSIX
* 2008 (XPG Issue 7) specifications for locales, first.)
*
* Historically, we had a bunch of global variables that stored locale
* data. While this worked well, it limited applications to a single locale
* at a time. This doesn't work well in certain server applications.
*
* Issue 7, X/Open introduced the concept of a locale_t object, along with
* versions of functions that can take this object as a parameter, along
* with functions to clone and manipulate these locale objects. The new
* functions are named with a _l() suffix.
*
* Additionally uselocale() is introduced which can change the locale of
* of a single thread. However, setlocale() can still be used to change
* the global locale.
*
* In our implementation, we use libc's TSD to store the locale data that
* was previously global. We still have global data because some applications
* have had those global objects compiled into them. (Such applications will
* be unable to benefit from uselocale(), btw.) The legacy routines are
* reimplemented as wrappers that use the appropriate locale object by
* calling uselocale(). uselocale() when passed a NULL pointer returns the
* thread-specific locale object if one is present, or the global locale
* object otherwise. Note that once the TSD data is set, the only way
* to revert to the global locale is to pass the global locale LC_GLOBAL_LOCALE
* to uselocale().
*
* We are careful to minimize performance impact of multiple calls to
* uselocale() or setlocale() by using a cache of locale data whenever possible.
* As a consequence of this, applications that iterate over all possible
* locales will burn through a lot of virtual memory, but we find such
* applications rare. (locale -a might be an exception, but it is short lived.)
*
* Category data is never released (although enclosing locale objects might be),
* in order to guarantee thread-safety. Calling freelocale() on an object
* while it is in use by another thread is a programmer error (use-after-free)
* and we don't bother to note it further.
*
* Locale objects (global locales) established by setlocale() are also
* never freed (for MT safety), but we will save previous locale objects
* and reuse them when we can.
*/
typedef struct locdata *(*loadfn_t)(const char *);
static const loadfn_t loaders[LC_ALL] = {
__lc_ctype_load,
__lc_numeric_load,
__lc_time_load,
__lc_collate_load,
__lc_monetary_load,
__lc_messages_load,
};
extern struct lc_monetary lc_monetary_posix;
extern struct lc_numeric lc_numeric_posix;
extern struct lc_messages lc_messages_posix;
extern struct lc_time lc_time_posix;
extern struct lc_ctype lc_ctype_posix;
extern struct lc_collate lc_collate_posix;
extern struct _RuneLocale _DefaultRuneLocale;
static struct _locale posix_locale = {
/* locdata */
.locdata = {
&__posix_ctype_locdata,
&__posix_numeric_locdata,
&__posix_time_locdata,
&__posix_collate_locdata,
&__posix_monetary_locdata,
&__posix_messages_locdata,
},
.locname = "C",
.ctype = &lc_ctype_posix,
.numeric = &lc_numeric_posix,
.collate = &lc_collate_posix,
.monetary = &lc_monetary_posix,
.messages = &lc_messages_posix,
.time = &lc_time_posix,
.runelocale = &_DefaultRuneLocale,
};
locale_t ___global_locale = &posix_locale;
locale_t
__global_locale(void)
{
return (___global_locale);
}
/*
* Locale data for hybrid C.UTF-8 locale having all the characteristics of
* default C/POSIX locale, except for LC_CTYPE data which is retrieved from
* cache/file as for other UTF-8 locales.
*/
static struct locdata cutf_locdata[LC_ALL] = {
{ "C.UTF-8", NULL }, /* unused */
{ "C.UTF-8", &lc_numeric_posix },
{ "C.UTF-8", &lc_time_posix },
{ "C.UTF-8", &lc_collate_posix },
{ "C.UTF-8", &lc_monetary_posix },
{ "C.UTF-8", &lc_messages_posix },
};
/*
* Category names for getenv() Note that this was modified
* for Solaris. See <iso/locale_iso.h>.
*/
#define NUM_CATS 7
static char *categories[7] = {
"LC_CTYPE",
"LC_NUMERIC",
"LC_TIME",
"LC_COLLATE",
"LC_MONETARY",
"LC_MESSAGES",
"LC_ALL",
};
/*
* Prototypes.
*/
static const char *get_locale_env(int);
static struct locdata *locdata_get(int, const char *);
static struct locdata *locdata_get_cache(int, const char *);
static locale_t mklocname(locale_t);
/*
* Some utility routines.
*/
struct locdata *
__locdata_alloc(const char *name, size_t memsz)
{
struct locdata *ldata;
if ((ldata = lmalloc(sizeof (*ldata))) == NULL) {
return (NULL);
}
if ((ldata->l_data[0] = libc_malloc(memsz)) == NULL) {
lfree(ldata, sizeof (*ldata));
errno = ENOMEM;
return (NULL);
}
(void) strlcpy(ldata->l_lname, name, sizeof (ldata->l_lname));
return (ldata);
}
/*
* Normally we never free locale data truly, but if we failed to load it
* for some reason, this routine is used to cleanup the partial mess.
*/
void
__locdata_free(struct locdata *ldata)
{
for (int i = 0; i < NLOCDATA; i++)
libc_free(ldata->l_data[i]);
if (ldata->l_map != NULL && ldata->l_map_len)
(void) munmap(ldata->l_map, ldata->l_map_len);
lfree(ldata, sizeof (*ldata));
}
/*
* It turns out that for performance reasons we would really like to
* cache the most recently referenced locale data to avoid wasteful
* loading from files.
*/
static struct locdata *cache_data[LC_ALL];
static struct locdata *cat_data[LC_ALL];
static mutex_t cache_lock = DEFAULTMUTEX;
/*
* Returns the cached data if the locale name is the same. If not,
* returns NULL (cache miss). The locdata is returned with a hold on
* it, taken on behalf of the caller. The caller should drop the hold
* when it is finished.
*/
static struct locdata *
locdata_get_cache(int category, const char *locname)
{
struct locdata *loc;
if (category < 0 || category >= LC_ALL)
return (NULL);
/* Try cache first. */
lmutex_lock(&cache_lock);
loc = cache_data[category];
if ((loc != NULL) && (strcmp(loc->l_lname, locname) == 0)) {
lmutex_unlock(&cache_lock);
return (loc);
}
/*
* Failing that try previously loaded locales (linear search) --
* this could be optimized to a hash, but its unlikely that a single
* application will ever need to work with more than a few locales.
*/
for (loc = cat_data[category]; loc != NULL; loc = loc->l_next) {
if (strcmp(locname, loc->l_lname) == 0) {
break;
}
}
/*
* Finally, if we still don't have one, try loading the locale
* data from the actual on-disk data.
*
* We drop the lock (libc wants to ensure no internal locks
* are held when we call other routines required to read from
* files, allocate memory, etc.) There is a small race here,
* but the consequences of the race are benign -- if multiple
* threads hit this at precisely the same point, we could
* wind up with duplicates of the locale data in the cache.
*
* This wastes the memory for an extra copy of the locale
* data, but there is no further harm beyond that. Its not
* worth the effort to recode this to something "safe"
* (which would require rescanning the list, etc.), given
* that this race will probably never actually occur.
*/
if (loc == NULL) {
lmutex_unlock(&cache_lock);
loc = (*loaders[category])(locname);
lmutex_lock(&cache_lock);
if (loc != NULL)
(void) strlcpy(loc->l_lname, locname,
sizeof (loc->l_lname));
}
/*
* Assuming we got one, update the cache, and stick us on the list
* of loaded locale data. We insert into the head (more recent
* use is likely to win.)
*/
if (loc != NULL) {
cache_data[category] = loc;
if (!loc->l_cached) {
loc->l_cached = 1;
loc->l_next = cat_data[category];
cat_data[category] = loc;
}
}
lmutex_unlock(&cache_lock);
return (loc);
}
/* Charmap aliases, mostly found in Linux */
static const struct {
const char *alias;
const char *name;
} cmalias[] = {
{ "utf8", "UTF-8" },
{ "iso88591", "ISO8859-1" },
{ "iso885915", "ISO8859-15" },
{ "gb18030", "GB18030" },
{ "koi8r", "KOI8-R" },
{ NULL, NULL }
};
/*
* Routine to get the locdata for a given category and locale.
* This includes retrieving it from cache, retrieving it from
* a file, etc.
*/
static struct locdata *
locdata_get(int category, const char *locname)
{
char scratch[ENCODING_LEN + 1];
char scratch2[ENCODING_LEN + 1];
char *slash, *cm;
int cnt;
int len;
int i;
if (locname == NULL || *locname == 0) {
locname = get_locale_env(category);
}
/*
* Extract the locale name for the category if it is a composite
* locale.
*/
if ((slash = strchr(locname, '/')) != NULL) {
for (cnt = category; cnt && slash != NULL; cnt--) {
locname = slash + 1;
slash = strchr(locname, '/');
}
if (slash) {
len = slash - locname + 1;
if (len >= sizeof (scratch)) {
len = sizeof (scratch);
}
} else {
len = sizeof (scratch);
}
(void) strlcpy(scratch, locname, len);
locname = scratch;
}
if ((strcmp(locname, "C") == 0) || (strcmp(locname, "POSIX") == 0))
return (posix_locale.locdata[category]);
/* Handle charmap aliases */
for (i = 0; cmalias[i].alias != NULL; i++) {
if ((cm = strstr(locname, cmalias[i].alias)) != NULL &&
strlen(cm) == strlen(cmalias[i].alias)) {
len = cm - locname + 1;
if (len + strlen(cmalias[i].name) >= sizeof (scratch2))
break;
(void) strlcpy(scratch2, locname, len);
(void) strlcat(scratch2, cmalias[i].name,
sizeof (scratch2));
locname = scratch2;
break;
}
}
if ((strcmp(locname, "C.UTF-8") == 0) && (category != LC_CTYPE))
return (&cutf_locdata[category]);
return (locdata_get_cache(category, locname));
}
/* tsd destructor */
static void
freelocptr(void *arg)
{
locale_t *locptr = arg;
if (*locptr != NULL)
freelocale(*locptr);
}
static const char *
get_locale_env(int category)
{
const char *env;
/* 1. check LC_ALL. */
env = getenv(categories[LC_ALL]);
/* 2. check LC_* */
if (env == NULL || *env == '\0')
env = getenv(categories[category]);
/* 3. check LANG */
if (env == NULL || *env == '\0')
env = getenv("LANG");
/* 4. if none is set, fall to "C" */
if (env == NULL || *env == '\0')
env = "C";
return (env);
}
/*
* This routine is exposed via the MB_CUR_MAX macro. Note that legacy
* code will continue to use _ctype[520], but we prefer this function as
* it is the only way to get thread-specific information.
*/
unsigned char
__mb_cur_max_l(locale_t loc)
{
return (loc->ctype->lc_max_mblen);
}
unsigned char
__mb_cur_max(void)
{
return (__mb_cur_max_l(uselocale(NULL)));
}
/*
* Public interfaces.
*/
locale_t
duplocale(locale_t src)
{
locale_t loc;
int i;
loc = lmalloc(sizeof (*loc));
if (loc == NULL) {
return (NULL);
}
if (src == NULL) {
/* illumos extension: POSIX says LC_GLOBAL_LOCALE here */
src = ___global_locale;
}
for (i = 0; i < LC_ALL; i++) {
loc->locdata[i] = src->locdata[i];
loc->loaded[i] = 0;
}
loc->collate = loc->locdata[LC_COLLATE]->l_data[0];
loc->ctype = loc->locdata[LC_CTYPE]->l_data[0];
loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1];
loc->messages = loc->locdata[LC_MESSAGES]->l_data[0];
loc->monetary = loc->locdata[LC_MONETARY]->l_data[0];
loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0];
loc->time = loc->locdata[LC_TIME]->l_data[0];
return (loc);
}
void
freelocale(locale_t loc)
{
/*
* We take extra care never to free a saved locale created by
* setlocale(). This shouldn't be strictly necessary, but a little
* extra safety doesn't hurt here.
*/
if ((loc != NULL) && (loc != &posix_locale) && (!loc->on_list))
lfree(loc, sizeof (*loc));
}
locale_t
newlocale(int catmask, const char *locname, locale_t base)
{
locale_t loc;
int i, e;
if (catmask & ~(LC_ALL_MASK)) {
errno = EINVAL;
return (NULL);
}
/*
* Technically passing LC_GLOBAL_LOCALE here is illegal,
* but we allow it.
*/
if (base == NULL || base == ___global_locale) {
loc = duplocale(___global_locale);
} else {
loc = duplocale(base);
}
if (loc == NULL) {
return (NULL);
}
for (i = 0; i < LC_ALL; i++) {
struct locdata *ldata;
loc->loaded[i] = 0;
if (((1 << i) & catmask) == 0) {
/* Default to base locale if not overriding */
continue;
}
ldata = locdata_get(i, locname);
if (ldata == NULL) {
e = errno;
freelocale(loc);
errno = e;
return (NULL);
}
loc->locdata[i] = ldata;
}
loc->collate = loc->locdata[LC_COLLATE]->l_data[0];
loc->ctype = loc->locdata[LC_CTYPE]->l_data[0];
loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1];
loc->messages = loc->locdata[LC_MESSAGES]->l_data[0];
loc->monetary = loc->locdata[LC_MONETARY]->l_data[0];
loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0];
loc->time = loc->locdata[LC_TIME]->l_data[0];
freelocale(base);
return (mklocname(loc));
}
locale_t
uselocale(locale_t loc)
{
locale_t lastloc = ___global_locale;
locale_t *locptr;
locptr = tsdalloc(_T_SETLOCALE, sizeof (locale_t), freelocptr);
/* Should never occur */
if (locptr == NULL) {
errno = EINVAL;
return (NULL);
}
if (*locptr != NULL)
lastloc = *locptr;
/* Argument loc is NULL if we are just querying. */
if (loc != NULL) {
/*
* Set it to LC_GLOBAL_LOCAL to return to using
* the global locale (setlocale).
*/
if (loc == ___global_locale) {
*locptr = NULL;
} else {
/* No validation of the provided locale at present */
*locptr = loc;
}
}
/*
* The caller is responsible for freeing, of course it would be
* gross error to call freelocale() on a locale object that is still
* in use.
*/
return (lastloc);
}
static locale_t
mklocname(locale_t loc)
{
int composite = 0;
/* Look to see if any category is different */
for (int i = 1; i < LC_ALL; ++i) {
if (strcmp(loc->locdata[0]->l_lname,
loc->locdata[i]->l_lname) != 0) {
composite = 1;
break;
}
}
if (composite) {
/*
* Note ordering of these follows the numeric order,
* if the order is changed, then setlocale() will need
* to be changed as well.
*/
(void) snprintf(loc->locname, sizeof (loc->locname),
"%s/%s/%s/%s/%s/%s",
loc->locdata[LC_CTYPE]->l_lname,
loc->locdata[LC_NUMERIC]->l_lname,
loc->locdata[LC_TIME]->l_lname,
loc->locdata[LC_COLLATE]->l_lname,
loc->locdata[LC_MONETARY]->l_lname,
loc->locdata[LC_MESSAGES]->l_lname);
} else {
(void) strlcpy(loc->locname, loc->locdata[LC_CTYPE]->l_lname,
sizeof (loc->locname));
}
return (loc);
}
|