diff options
author | Yuri Pankov <yuri.pankov@nexenta.com> | 2017-03-26 06:49:30 +0300 |
---|---|---|
committer | Dan McDonald <danmcd@omniti.com> | 2017-03-28 00:13:01 -0400 |
commit | efcfb316ebac90ddc614edc285349bc5c55e52bd (patch) | |
tree | a1b9e169683674dd3460371cbc23a14b8a221dae | |
parent | 0d71d7a6ce03ecffbb6f198e02a72fd4a262f936 (diff) | |
download | illumos-joyent-efcfb316ebac90ddc614edc285349bc5c55e52bd.tar.gz |
6907 strcoll() and strxfrm() don't seem to agree
Reviewed by: Robert Mustacchi <rm@joyent.com>
Approved by: Dan McDonald <danmcd@omniti.com>
-rw-r--r-- | usr/src/lib/libc/port/locale/wcscoll.c | 120 | ||||
-rw-r--r-- | usr/src/pkg/manifests/system-test-libctest.mf | 2 | ||||
-rw-r--r-- | usr/src/test/libc-tests/runfiles/default.run | 3 | ||||
-rw-r--r-- | usr/src/test/libc-tests/tests/Makefile | 1 | ||||
-rw-r--r-- | usr/src/test/libc-tests/tests/strcoll-strxfrm-6907.c | 117 |
5 files changed, 184 insertions, 59 deletions
diff --git a/usr/src/lib/libc/port/locale/wcscoll.c b/usr/src/lib/libc/port/locale/wcscoll.c index 26b39c9da2..4813362c6c 100644 --- a/usr/src/lib/libc/port/locale/wcscoll.c +++ b/usr/src/lib/libc/port/locale/wcscoll.c @@ -1,6 +1,6 @@ /* * Copyright 2013 Garrett D'Amore <garrett@damore.org> - * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Copyright 2017 Nexenta Systems, Inc. * Copyright (c) 2002 Tim J. Robbins * All rights reserved. * @@ -38,19 +38,19 @@ int wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t loc) { - int len1, len2, pri1, pri2, ret; + int len1, len2, pri1, pri2; wchar_t *tr1 = NULL, *tr2 = NULL; int direc, pass; const struct lc_collate *lcc = loc->collate; + int ret = wcscmp(ws1, ws2); - if (lcc->lc_is_posix) - /* - * Locale has no special collating order or could not be - * loaded, do a fast binary comparison. - */ - return (wcscmp(ws1, ws2)); + if (lcc->lc_is_posix || ret == 0) + return (ret); - ret = 0; + if (*ws1 == 0 && *ws2 != 0) + return (-1); + if (*ws1 != 0 && *ws2 == 0) + return (1); /* * Once upon a time we had code to try to optimize this, but @@ -65,24 +65,23 @@ wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t loc) * up UNDEFINED elements. There is special handling for them. */ for (pass = 0; pass <= lcc->lc_directive_count; pass++) { - const int32_t *st1 = NULL; const int32_t *st2 = NULL; const wchar_t *w1 = ws1; const wchar_t *w2 = ws2; - int check1, check2; /* special pass for UNDEFINED */ if (pass == lcc->lc_directive_count) { - direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED; + direc = DIRECTIVE_FORWARD; } else { direc = lcc->lc_directive[pass]; } if (direc & DIRECTIVE_BACKWARD) { wchar_t *bp, *fp, c; + free(tr1); if ((tr1 = wcsdup(w1)) == NULL) - goto fail; + goto end; bp = tr1; fp = tr1 + wcslen(tr1) - 1; while (bp < fp) { @@ -90,8 +89,9 @@ wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t loc) *bp++ = *fp; *fp-- = c; } + free(tr2); if ((tr2 = wcsdup(w2)) == NULL) - goto fail; + goto end; bp = tr2; fp = tr2 + wcslen(tr2) - 1; while (bp < fp) { @@ -104,6 +104,7 @@ wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t loc) } if (direc & DIRECTIVE_POSITION) { + int check1, check2; while (*w1 && *w2) { pri1 = pri2 = 0; check1 = check2 = 1; @@ -113,7 +114,7 @@ wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t loc) &pri1, pass, &st1); if (pri1 < 0) { errno = EINVAL; - goto fail; + goto end; } if (!pri1) { /*CSTYLED*/ @@ -127,7 +128,7 @@ wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t loc) &pri2, pass, &st2); if (pri2 < 0) { errno = EINVAL; - goto fail; + goto end; } if (!pri2) { /*CSTYLED*/ @@ -144,58 +145,63 @@ wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t loc) w1 += len1; w2 += len2; } + if (!*w1) { + if (*w2) { + ret = -(int)*w2; + goto end; + } + } else { + ret = *w1; + goto end; + } } else { - while (*w1 && *w2) { - pri1 = pri2 = 0; - check1 = check2 = 1; - while ((pri1 == pri2) && (check1 || check2)) { - while (check1 && *w1) { - _collate_lookup(lcc, w1, &len1, - &pri1, pass, &st1); - if (pri1 > 0) - break; - if (pri1 < 0) { - errno = EINVAL; - goto fail; - } - st1 = NULL; - w1 += 1; + int vpri1 = 0, vpri2 = 0; + while (*w1 || *w2 || st1 || st2) { + pri1 = 1; + while (*w1 || st1) { + _collate_lookup(lcc, w1, &len1, &pri1, + pass, &st1); + w1 += len1; + if (pri1 > 0) { + vpri1++; + break; } - check1 = (st1 != NULL); - while (check2 && *w2) { - _collate_lookup(lcc, w2, &len2, - &pri2, pass, &st2); - if (pri2 > 0) - break; - if (pri2 < 0) { - errno = EINVAL; - goto fail; - } - st2 = NULL; - w2 += 1; + if (pri1 < 0) { + errno = EINVAL; + goto end; } - check2 = (st2 != NULL); - if (!pri1 || !pri2) + st1 = NULL; + } + pri2 = 1; + while (*w2 || st2) { + _collate_lookup(lcc, w2, &len2, &pri2, + pass, &st2); + w2 += len2; + if (pri2 > 0) { + vpri2++; break; + } + if (pri2 < 0) { + errno = EINVAL; + goto end; + } + st2 = NULL; } - if (!pri1 || !pri2) + if ((!pri1 || !pri2) && (vpri1 == vpri2)) break; if (pri1 != pri2) { ret = pri1 - pri2; goto end; } - w1 += len1; - w2 += len2; } - } - if (!*w1) { - if (*w2) { - ret = -(int)*w2; + if (vpri1 && !vpri2) { + ret = 1; + goto end; + } + if (!vpri1 && vpri2) { + ret = -1; goto end; } - } else { - ret = *w1; - goto end; } } ret = 0; @@ -205,10 +211,6 @@ end: free(tr2); return (ret); - -fail: - ret = wcscmp(ws1, ws2); - goto end; } int diff --git a/usr/src/pkg/manifests/system-test-libctest.mf b/usr/src/pkg/manifests/system-test-libctest.mf index 3412500cff..16ec75f259 100644 --- a/usr/src/pkg/manifests/system-test-libctest.mf +++ b/usr/src/pkg/manifests/system-test-libctest.mf @@ -122,6 +122,8 @@ file path=opt/libc-tests/tests/random/inz_split_vpp mode=0555 file path=opt/libc-tests/tests/random/inz_vpp mode=0555 file path=opt/libc-tests/tests/select/select.sh mode=0555 file path=opt/libc-tests/tests/select/select_test mode=0555 +file path=opt/libc-tests/tests/strcoll-strxfrm-6907.32 mode=0555 +file path=opt/libc-tests/tests/strcoll-strxfrm-6907.64 mode=0555 file path=opt/libc-tests/tests/strerror mode=0555 file path=opt/libc-tests/tests/symbols/setup mode=0555 file path=opt/libc-tests/tests/symbols/symbols_test.$(ARCH) mode=0555 diff --git a/usr/src/test/libc-tests/runfiles/default.run b/usr/src/test/libc-tests/runfiles/default.run index 9ac4e183cd..dfb4c9b161 100644 --- a/usr/src/test/libc-tests/runfiles/default.run +++ b/usr/src/test/libc-tests/runfiles/default.run @@ -28,6 +28,9 @@ outputdir = /var/tmp/test_results [/opt/libc-tests/tests/nl_langinfo_test] [/opt/libc-tests/tests/wcsrtombs_test] [/opt/libc-tests/tests/wctype_test] + +[/opt/libc-tests/tests/strcoll-strxfrm-6907.32] +[/opt/libc-tests/tests/strcoll-strxfrm-6907.64] [/opt/libc-tests/tests/wcsncasecmp.32] [/opt/libc-tests/tests/wcsncasecmp.64] [/opt/libc-tests/tests/wcsncasecmp-7344.32] diff --git a/usr/src/test/libc-tests/tests/Makefile b/usr/src/test/libc-tests/tests/Makefile index 586ac08383..6c365aefb1 100644 --- a/usr/src/test/libc-tests/tests/Makefile +++ b/usr/src/test/libc-tests/tests/Makefile @@ -38,6 +38,7 @@ PROGS = \ env-7076 \ quick_exit_order \ quick_exit_status \ + strcoll-strxfrm-6907 \ timespec_get \ wcsncasecmp \ wcsncasecmp-7344 \ diff --git a/usr/src/test/libc-tests/tests/strcoll-strxfrm-6907.c b/usr/src/test/libc-tests/tests/strcoll-strxfrm-6907.c new file mode 100644 index 0000000000..3ae1ea6cb5 --- /dev/null +++ b/usr/src/test/libc-tests/tests/strcoll-strxfrm-6907.c @@ -0,0 +1,117 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2016 Tom Lane <tgl@sss.pgh.pa.us> + * Copyright 2017 Nexenta Systems, Inc. + */ + +#include <err.h> +#include <errno.h> +#include <locale.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +/* + * #6907: generate random UTF8 strings, strxfrm'ing them in process. + * Walk through comparing each string with all strings, and checking + * that strcoll() and strcmp() for strxfrm'ed data produce same results. + */ +#define NSTRINGS 2000 +#define MAXSTRLEN 20 +#define MAXXFRMLEN (MAXSTRLEN * 20) + +typedef struct { + char sval[MAXSTRLEN]; + char xval[MAXXFRMLEN]; +} cstr; + +int +main(void) +{ + cstr data[NSTRINGS]; + char *curloc; + int i, j; + + if ((curloc = setlocale(LC_ALL, "")) == NULL) + err(1, "setlocale"); + + /* Ensure new random() values on every run */ + srandom((unsigned int) time(NULL)); + + /* Generate random UTF8 strings of length less than MAXSTRLEN bytes */ + for (i = 0; i < NSTRINGS; i++) { + char *p; + int len; + +again: + p = data[i].sval; + len = 1 + (random() % (MAXSTRLEN - 1)); + while (len > 0) { + int c; + + /* + * Generate random printable char in ISO8859-1 range. + * Bias towards producing a lot of spaces. + */ + if ((random() % 16) < 3) { + c = ' '; + } else { + do { + c = random() & 0xFF; + } while (!((c >= ' ' && c <= 127) || + (c >= 0xA0 && c <= 0xFF))); + } + + if (c <= 127) { + *p++ = c; + len--; + } else { + if (len < 2) + break; + /* Poor man's utf8-ification */ + *p++ = 0xC0 + (c >> 6); + len--; + *p++ = 0x80 + (c & 0x3F); + len--; + } + } + *p = '\0'; + + /* strxfrm() each string as we produce it */ + errno = 0; + if (strxfrm(data[i].xval, data[i].sval, + MAXXFRMLEN) >= MAXXFRMLEN) { + errx(1, "strxfrm() result for %d-length string " + "exceeded %d bytes", (int)strlen(data[i].sval), + MAXXFRMLEN); + } + /* Amend strxfrm() failing for certain characters (#7962) */ + if (errno != 0) + goto again; + } + + for (i = 0; i < NSTRINGS; i++) { + for (j = 0; j < NSTRINGS; j++) { + int sr = strcoll(data[i].sval, data[j].sval); + int sx = strcmp(data[i].xval, data[j].xval); + + if ((sr * sx < 0) || (sr * sx == 0 && sr + sx != 0)) { + errx(1, "%s: diff for \"%s\" and \"%s\"", + curloc, data[i].sval, data[j].sval); + } + } + } + + return (0); +} |