summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGuillem Jover <guillem@hadrons.org>2011-02-25 18:48:10 +0100
committerGuillem Jover <guillem@hadrons.org>2011-05-14 13:43:48 +0200
commit4b95e82a32f07ec80445dd4103103ebaa356c89b (patch)
tree236a7e2ba010872c1467ea493e90b91378a483e9
parentc766e58acf3a894644291d21f9c98d322ef8cd11 (diff)
downloadlibbsd-4b95e82a32f07ec80445dd4103103ebaa356c89b.tar.gz
Add new radixsort and sradixsort functions
Taken from FreeBSD.
-rw-r--r--Makefile3
-rw-r--r--Versions2
-rw-r--r--include/bsd/stdlib.h4
-rw-r--r--src/radixsort.3160
-rw-r--r--src/radixsort.c327
-rw-r--r--src/sradixsort.31
6 files changed, 497 insertions, 0 deletions
diff --git a/Makefile b/Makefile
index 315ad97..ad73456 100644
--- a/Makefile
+++ b/Makefile
@@ -44,6 +44,7 @@ LIB_SRCS := \
fmtcheck.c \
nlist.c \
progname.c \
+ radixsort.c \
vis.c unvis.c \
$(LIB_SRCS_GEN)
LIB_SRCS_GEN := $(patsubst %,src/%,$(LIB_SRCS_GEN))
@@ -93,6 +94,8 @@ LIB_MANS := \
humanize_number.3 \
fmtcheck.3 \
mergesort.3 \
+ radixsort.3 \
+ sradixsort.3 \
nlist.3 \
pidfile.3 \
setmode.3 \
diff --git a/Versions b/Versions
index cac2efe..51dc9ec 100644
--- a/Versions
+++ b/Versions
@@ -70,5 +70,7 @@ LIBBSD_0.3 {
getpeereid;
mergesort;
+ radixsort;
+ sradixsort;
} LIBBSD_0.2;
diff --git a/include/bsd/stdlib.h b/include/bsd/stdlib.h
index bfe3027..ea790bc 100644
--- a/include/bsd/stdlib.h
+++ b/include/bsd/stdlib.h
@@ -54,6 +54,10 @@ void setprogname(const char *);
int heapsort (void *, size_t, size_t, int (*)(const void *, const void *));
int mergesort(void *base, size_t nmemb, size_t size,
int (*cmp)(const void *, const void *));
+int radixsort(const unsigned char **base, int nmemb,
+ const unsigned char *table, unsigned endbyte);
+int sradixsort(const unsigned char **base, int nmemb,
+ const unsigned char *table, unsigned endbyte);
void *reallocf(void *ptr, size_t size);
diff --git a/src/radixsort.3 b/src/radixsort.3
new file mode 100644
index 0000000..dfa65f1
--- /dev/null
+++ b/src/radixsort.3
@@ -0,0 +1,160 @@
+.\" Copyright (c) 1990, 1991, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)radixsort.3 8.2 (Berkeley) 1/27/94
+.\" $FreeBSD$
+.\"
+.Dd January 27, 1994
+.Dt RADIXSORT 3
+.Os
+.Sh NAME
+.Nm radixsort , sradixsort
+.Nd radix sort
+.Sh LIBRARY
+.Lb libc
+.Sh SYNOPSIS
+.In limits.h
+.In stdlib.h
+.Ft int
+.Fn radixsort "const unsigned char **base" "int nmemb" "const unsigned char *table" "unsigned endbyte"
+.Ft int
+.Fn sradixsort "const unsigned char **base" "int nmemb" "const unsigned char *table" "unsigned endbyte"
+.Sh DESCRIPTION
+The
+.Fn radixsort
+and
+.Fn sradixsort
+functions
+are implementations of radix sort.
+.Pp
+These functions sort an array of pointers to byte strings, the initial
+member of which is referenced by
+.Fa base .
+The byte strings may contain any values; the end of each string
+is denoted by the user-specified value
+.Fa endbyte .
+.Pp
+Applications may specify a sort order by providing the
+.Fa table
+argument.
+If
+.Pf non- Dv NULL ,
+.Fa table
+must reference an array of
+.Dv UCHAR_MAX
++ 1 bytes which contains the sort
+weight of each possible byte value.
+The end-of-string byte must have a sort weight of 0 or 255
+(for sorting in reverse order).
+More than one byte may have the same sort weight.
+The
+.Fa table
+argument
+is useful for applications which wish to sort different characters
+equally, for example, providing a table with the same weights
+for A-Z as for a-z will result in a case-insensitive sort.
+If
+.Fa table
+is NULL, the contents of the array are sorted in ascending order
+according to the
+.Tn ASCII
+order of the byte strings they reference and
+.Fa endbyte
+has a sorting weight of 0.
+.Pp
+The
+.Fn sradixsort
+function is stable, that is, if two elements compare as equal, their
+order in the sorted array is unchanged.
+The
+.Fn sradixsort
+function uses additional memory sufficient to hold
+.Fa nmemb
+pointers.
+.Pp
+The
+.Fn radixsort
+function is not stable, but uses no additional memory.
+.Pp
+These functions are variants of most-significant-byte radix sorting; in
+particular, see
+.An "D.E. Knuth" Ns 's
+.%T "Algorithm R"
+and section 5.2.5, exercise 10.
+They take linear time relative to the number of bytes in the strings.
+.Sh RETURN VALUES
+.Rv -std radixsort
+.Sh ERRORS
+.Bl -tag -width Er
+.It Bq Er EINVAL
+The value of the
+.Fa endbyte
+element of
+.Fa table
+is not 0 or 255.
+.El
+.Pp
+Additionally, the
+.Fn sradixsort
+function
+may fail and set
+.Va errno
+for any of the errors specified for the library routine
+.Xr malloc 3 .
+.Sh SEE ALSO
+.Xr sort 1 ,
+.Xr qsort 3
+.Pp
+.Rs
+.%A Knuth, D.E.
+.%D 1968
+.%B "The Art of Computer Programming"
+.%T "Sorting and Searching"
+.%V Vol. 3
+.%P pp. 170-178
+.Re
+.Rs
+.%A Paige, R.
+.%D 1987
+.%T "Three Partition Refinement Algorithms"
+.%J "SIAM J. Comput."
+.%V Vol. 16
+.%N No. 6
+.Re
+.Rs
+.%A McIlroy, P.
+.%D 1993
+.%B "Engineering Radix Sort"
+.%T "Computing Systems"
+.%V Vol. 6:1
+.%P pp. 5-27
+.Re
+.Sh HISTORY
+The
+.Fn radixsort
+function first appeared in
+.Bx 4.4 .
diff --git a/src/radixsort.c b/src/radixsort.c
new file mode 100644
index 0000000..82ff1bc
--- /dev/null
+++ b/src/radixsort.c
@@ -0,0 +1,327 @@
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy and by Dan Bernstein at New York University,
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)radixsort.c 8.2 (Berkeley) 4/28/95";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * Radixsort routines.
+ *
+ * Program r_sort_a() is unstable but uses O(logN) extra memory for a stack.
+ * Use radixsort(a, n, trace, endchar) for this case.
+ *
+ * For stable sorting (using N extra pointers) use sradixsort(), which calls
+ * r_sort_b().
+ *
+ * For a description of this code, see D. McIlroy, P. McIlroy, K. Bostic,
+ * "Engineering Radix Sort".
+ */
+
+#include <sys/types.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <errno.h>
+
+typedef struct {
+ const u_char **sa;
+ int sn, si;
+} stack;
+
+static inline void simplesort
+(const u_char **, int, int, const u_char *, u_int);
+static void r_sort_a(const u_char **, int, int, const u_char *, u_int);
+static void r_sort_b(const u_char **, const u_char **, int, int,
+ const u_char *, u_int);
+
+#define THRESHOLD 20 /* Divert to simplesort(). */
+#define SIZE 512 /* Default stack size. */
+
+#define SETUP { \
+ if (tab == NULL) { \
+ tr = tr0; \
+ for (c = 0; c < endch; c++) \
+ tr0[c] = c + 1; \
+ tr0[c] = 0; \
+ for (c++; c < 256; c++) \
+ tr0[c] = c; \
+ endch = 0; \
+ } else { \
+ endch = tab[endch]; \
+ tr = tab; \
+ if (endch != 0 && endch != 255) { \
+ errno = EINVAL; \
+ return (-1); \
+ } \
+ } \
+}
+
+int
+radixsort(a, n, tab, endch)
+ const u_char **a, *tab;
+ int n;
+ u_int endch;
+{
+ const u_char *tr;
+ int c;
+ u_char tr0[256];
+
+ SETUP;
+ r_sort_a(a, n, 0, tr, endch);
+ return (0);
+}
+
+int
+sradixsort(a, n, tab, endch)
+ const u_char **a, *tab;
+ int n;
+ u_int endch;
+{
+ const u_char *tr, **ta;
+ int c;
+ u_char tr0[256];
+
+ SETUP;
+ if (n < THRESHOLD)
+ simplesort(a, n, 0, tr, endch);
+ else {
+ if ((ta = malloc(n * sizeof(a))) == NULL)
+ return (-1);
+ r_sort_b(a, ta, n, 0, tr, endch);
+ free(ta);
+ }
+ return (0);
+}
+
+#define empty(s) (s >= sp)
+#define pop(a, n, i) a = (--sp)->sa, n = sp->sn, i = sp->si
+#define push(a, n, i) sp->sa = a, sp->sn = n, (sp++)->si = i
+#define swap(a, b, t) t = a, a = b, b = t
+
+/* Unstable, in-place sort. */
+static void
+r_sort_a(a, n, i, tr, endch)
+ const u_char **a;
+ int n, i;
+ const u_char *tr;
+ u_int endch;
+{
+ static int count[256], nc, bmin;
+ int c;
+ const u_char **ak, *r;
+ stack s[SIZE], *sp, *sp0, *sp1, temp;
+ int *cp, bigc;
+ const u_char **an, *t, **aj, **top[256];
+
+ /* Set up stack. */
+ sp = s;
+ push(a, n, i);
+ while (!empty(s)) {
+ pop(a, n, i);
+ if (n < THRESHOLD) {
+ simplesort(a, n, i, tr, endch);
+ continue;
+ }
+ an = a + n;
+
+ /* Make character histogram. */
+ if (nc == 0) {
+ bmin = 255; /* First occupied bin, excluding eos. */
+ for (ak = a; ak < an;) {
+ c = tr[(*ak++)[i]];
+ if (++count[c] == 1 && c != endch) {
+ if (c < bmin)
+ bmin = c;
+ nc++;
+ }
+ }
+ if (sp + nc > s + SIZE) { /* Get more stack. */
+ r_sort_a(a, n, i, tr, endch);
+ continue;
+ }
+ }
+
+ /*
+ * Special case: if all strings have the same
+ * character at position i, move on to the next
+ * character.
+ */
+ if (nc == 1 && count[bmin] == n) {
+ push(a, n, i+1);
+ nc = count[bmin] = 0;
+ continue;
+ }
+
+ /*
+ * Set top[]; push incompletely sorted bins onto stack.
+ * top[] = pointers to last out-of-place element in bins.
+ * count[] = counts of elements in bins.
+ * Before permuting: top[c-1] + count[c] = top[c];
+ * during deal: top[c] counts down to top[c-1].
+ */
+ sp0 = sp1 = sp; /* Stack position of biggest bin. */
+ bigc = 2; /* Size of biggest bin. */
+ if (endch == 0) /* Special case: set top[eos]. */
+ top[0] = ak = a + count[0];
+ else {
+ ak = a;
+ top[255] = an;
+ }
+ for (cp = count + bmin; nc > 0; cp++) {
+ while (*cp == 0) /* Find next non-empty pile. */
+ cp++;
+ if (*cp > 1) {
+ if (*cp > bigc) {
+ bigc = *cp;
+ sp1 = sp;
+ }
+ push(ak, *cp, i+1);
+ }
+ top[cp-count] = ak += *cp;
+ nc--;
+ }
+ swap(*sp0, *sp1, temp); /* Play it safe -- biggest bin last. */
+
+ /*
+ * Permute misplacements home. Already home: everything
+ * before aj, and in bin[c], items from top[c] on.
+ * Inner loop:
+ * r = next element to put in place;
+ * ak = top[r[i]] = location to put the next element.
+ * aj = bottom of 1st disordered bin.
+ * Outer loop:
+ * Once the 1st disordered bin is done, ie. aj >= ak,
+ * aj<-aj + count[c] connects the bins in a linked list;
+ * reset count[c].
+ */
+ for (aj = a; aj < an; *aj = r, aj += count[c], count[c] = 0)
+ for (r = *aj; aj < (ak = --top[c = tr[r[i]]]);)
+ swap(*ak, r, t);
+ }
+}
+
+/* Stable sort, requiring additional memory. */
+static void
+r_sort_b(a, ta, n, i, tr, endch)
+ const u_char **a, **ta;
+ int n, i;
+ const u_char *tr;
+ u_int endch;
+{
+ static int count[256], nc, bmin;
+ int c;
+ const u_char **ak, **ai;
+ stack s[512], *sp, *sp0, *sp1, temp;
+ const u_char **top[256];
+ int *cp, bigc;
+
+ sp = s;
+ push(a, n, i);
+ while (!empty(s)) {
+ pop(a, n, i);
+ if (n < THRESHOLD) {
+ simplesort(a, n, i, tr, endch);
+ continue;
+ }
+
+ if (nc == 0) {
+ bmin = 255;
+ for (ak = a + n; --ak >= a;) {
+ c = tr[(*ak)[i]];
+ if (++count[c] == 1 && c != endch) {
+ if (c < bmin)
+ bmin = c;
+ nc++;
+ }
+ }
+ if (sp + nc > s + SIZE) {
+ r_sort_b(a, ta, n, i, tr, endch);
+ continue;
+ }
+ }
+
+ sp0 = sp1 = sp;
+ bigc = 2;
+ if (endch == 0) {
+ top[0] = ak = a + count[0];
+ count[0] = 0;
+ } else {
+ ak = a;
+ top[255] = a + n;
+ count[255] = 0;
+ }
+ for (cp = count + bmin; nc > 0; cp++) {
+ while (*cp == 0)
+ cp++;
+ if ((c = *cp) > 1) {
+ if (c > bigc) {
+ bigc = c;
+ sp1 = sp;
+ }
+ push(ak, c, i+1);
+ }
+ top[cp-count] = ak += c;
+ *cp = 0; /* Reset count[]. */
+ nc--;
+ }
+ swap(*sp0, *sp1, temp);
+
+ for (ak = ta + n, ai = a+n; ak > ta;) /* Copy to temp. */
+ *--ak = *--ai;
+ for (ak = ta+n; --ak >= ta;) /* Deal to piles. */
+ *--top[tr[(*ak)[i]]] = *ak;
+ }
+}
+
+static inline void
+simplesort(a, n, b, tr, endch) /* insertion sort */
+ const u_char **a;
+ int n, b;
+ const u_char *tr;
+ u_int endch;
+{
+ u_char ch;
+ const u_char **ak, **ai, *s, *t;
+
+ for (ak = a+1; --n >= 1; ak++)
+ for (ai = ak; ai > a; ai--) {
+ for (s = ai[0] + b, t = ai[-1] + b;
+ (ch = tr[*s]) != endch; s++, t++)
+ if (ch != tr[*t])
+ break;
+ if (ch >= tr[*t])
+ break;
+ swap(ai[0], ai[-1], s);
+ }
+}
diff --git a/src/sradixsort.3 b/src/sradixsort.3
new file mode 100644
index 0000000..86a95a3
--- /dev/null
+++ b/src/sradixsort.3
@@ -0,0 +1 @@
+.so man3/radixsort.3