diff options
Diffstat (limited to 'src/sort.c')
-rw-r--r-- | src/sort.c | 142 |
1 files changed, 101 insertions, 41 deletions
@@ -1,5 +1,5 @@ /* sort - sort lines of text (with all kinds of options). - Copyright (C) 1988-2012 Free Software Foundation, Inc. + Copyright (C) 1988-2013 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -28,6 +28,7 @@ #include <sys/types.h> #include <sys/wait.h> #include <signal.h> +#include <assert.h> #include "system.h" #include "argmatch.h" #include "error.h" @@ -261,6 +262,9 @@ struct merge_node_queue when popping. */ }; +/* Used to implement --unique (-u). */ +static struct line saved_line; + /* FIXME: None of these tables work with multibyte character sets. Also, there are many other bugs when handling multibyte characters. One way to fix this is to rewrite 'sort' to use wide characters @@ -394,11 +398,10 @@ Usage: %s [OPTION]... [FILE]...\n\ program_name, program_name); fputs (_("\ Write sorted concatenation of all FILE(s) to standard output.\n\ -\n\ -"), stdout); - fputs (_("\ -Mandatory arguments to long options are mandatory for short options too.\n\ "), stdout); + + emit_mandatory_arg_note (); + fputs (_("\ Ordering options:\n\ \n\ @@ -910,11 +913,10 @@ create_temp_file (int *pfd, bool survive_fd_exhaustion) static FILE * stream_open (char const *file, char const *how) { - if (!file) - return stdout; + FILE *fp; + if (*how == 'r') { - FILE *fp; if (STREQ (file, "-")) { have_read_stdin = true; @@ -923,9 +925,18 @@ stream_open (char const *file, char const *how) else fp = fopen (file, how); fadvise (fp, FADVISE_SEQUENTIAL); - return fp; } - return fopen (file, how); + else if (*how == 'w') + { + if (file && ftruncate (STDOUT_FILENO, 0) != 0) + error (SORT_FAILURE, errno, _("%s: error truncating"), + quote (file)); + fp = stdout; + } + else + assert (!"unexpected mode passed to stream_open"); + + return fp; } /* Same as stream_open, except always return a non-null value; die on @@ -967,10 +978,14 @@ xfclose (FILE *fp, char const *file) } static void -dup2_or_die (int oldfd, int newfd) +move_fd_or_die (int oldfd, int newfd) { - if (dup2 (oldfd, newfd) < 0) - error (SORT_FAILURE, errno, _("dup2 failed")); + if (oldfd != newfd) + { + if (dup2 (oldfd, newfd) < 0) + error (SORT_FAILURE, errno, _("dup2 failed")); + close (oldfd); + } } /* Fork a child process for piping to and do common cleanup. The @@ -1081,10 +1096,8 @@ maybe_create_temp (FILE **pfp, bool survive_fd_exhaustion) else if (node->pid == 0) { close (pipefds[1]); - dup2_or_die (tempfd, STDOUT_FILENO); - close (tempfd); - dup2_or_die (pipefds[0], STDIN_FILENO); - close (pipefds[0]); + move_fd_or_die (tempfd, STDOUT_FILENO); + move_fd_or_die (pipefds[0], STDIN_FILENO); if (execlp (compress_program, compress_program, (char *) NULL) < 0) error (SORT_FAILURE, errno, _("couldn't execute %s"), @@ -1141,10 +1154,8 @@ open_temp (struct tempnode *temp) case 0: close (pipefds[0]); - dup2_or_die (tempfd, STDIN_FILENO); - close (tempfd); - dup2_or_die (pipefds[1], STDOUT_FILENO); - close (pipefds[1]); + move_fd_or_die (tempfd, STDIN_FILENO); + move_fd_or_die (pipefds[1], STDOUT_FILENO); execlp (compress_program, compress_program, "-d", (char *) NULL); error (SORT_FAILURE, errno, _("couldn't execute %s -d"), @@ -1399,22 +1410,16 @@ specify_nthreads (int oi, char c, char const *s) return nthreads; } - /* Return the default sort size. */ static size_t default_sort_size (void) { - /* Let MEM be available memory or 1/8 of total memory, whichever - is greater. */ - double avail = physmem_available (); - double total = physmem_total (); - double mem = MAX (avail, total / 8); - struct rlimit rlimit; - - /* Let SIZE be MEM, but no more than the maximum object size or - system resource limits. Don't bother to check for values like - RLIM_INFINITY since in practice they are not much less than SIZE_MAX. */ + /* Let SIZE be MEM, but no more than the maximum object size, + total memory, or system resource limits. Don't bother to check + for values like RLIM_INFINITY since in practice they are not much + less than SIZE_MAX. */ size_t size = SIZE_MAX; + struct rlimit rlimit; if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size) size = rlimit.rlim_cur; #ifdef RLIMIT_AS @@ -1433,6 +1438,16 @@ default_sort_size (void) size = rlimit.rlim_cur / 16 * 15; #endif + /* Let MEM be available memory or 1/8 of total memory, whichever + is greater. */ + double avail = physmem_available (); + double total = physmem_total (); + double mem = MAX (avail, total / 8); + + /* Leave a 1/4 margin for physical memory. */ + if (total * 0.75 < size) + size = total * 0.75; + /* Return the minimum of MEM and SIZE, but no less than MIN_SORT_SIZE. Avoid the MIN macro here, as it is not quite right when only one argument is floating point. */ @@ -1800,7 +1815,7 @@ fillbuf (struct buffer *buf, FILE *fp, char const *file) { /* The current input line is too long to fit in the buffer. - Double the buffer size and try again, keeping it properly + Increase the buffer size and try again, keeping it properly aligned. */ size_t line_alloc = buf->alloc / sizeof (struct line); buf->buf = x2nrealloc (buf->buf, &line_alloc, sizeof (struct line)); @@ -3335,13 +3350,11 @@ queue_pop (struct merge_node_queue *queue) static void write_unique (struct line const *line, FILE *tfp, char const *temp_output) { - static struct line saved; - if (unique) { - if (saved.text && ! compare (line, &saved)) + if (saved_line.text && ! compare (line, &saved_line)) return; - saved = *line; + saved_line = *line; } write_line (line, tfp, temp_output); @@ -3636,10 +3649,7 @@ avoid_trashing_input (struct sortfile *files, size_t ntemps, { if (! got_outstat) { - if ((outfile - ? stat (outfile, &outstat) - : fstat (STDOUT_FILENO, &outstat)) - != 0) + if (fstat (STDOUT_FILENO, &outstat) != 0) break; got_outstat = true; } @@ -3666,6 +3676,45 @@ avoid_trashing_input (struct sortfile *files, size_t ntemps, } } +/* Scan the input files to ensure all are accessible. + Otherwise exit with a diagnostic. + + Note this will catch common issues with permissions etc. + but will fail to notice issues where you can open() but not read(), + like when a directory is specified on some systems. + Catching these obscure cases could slow down performance in + common cases. */ + +static void +check_inputs (char *const *files, size_t nfiles) +{ + size_t i; + for (i = 0; i < nfiles; i++) + { + if (STREQ (files[i], "-")) + continue; + + if (euidaccess (files[i], R_OK) != 0) + die (_("cannot read"), files[i]); + } +} + +/* Ensure a specified output file can be created or written to, + and point stdout to it. Do not truncate the file. + Exit with a diagnostic on failure. */ + +static void +check_output (char const *outfile) +{ + if (outfile) + { + int outfd = open (outfile, O_WRONLY | O_CREAT | O_BINARY, MODE_RW_UGO); + if (outfd < 0) + die (_("open failed"), outfile); + move_fd_or_die (outfd, STDOUT_FILENO); + } +} + /* Merge the input FILES. NTEMPS is the number of files at the start of FILES that are temporary; it is zero at the top level. NFILES is the total number of files. Put the output in @@ -3843,6 +3892,7 @@ sort (char *const *files, size_t nfiles, char const *output_file, break; } + saved_line.text = NULL; line = buffer_linelim (&buf); if (buf.eof && !nfiles && !ntemps && !buf.left) { @@ -4243,6 +4293,10 @@ main (int argc, char **argv) char const *optarg1 = argv[optind++]; s = parse_field_count (optarg1 + 1, &key->eword, N_("invalid number after '-'")); + /* When called with a non-NULL message ID, + parse_field_count cannot return NULL. Tell static + analysis tools that dereferencing S is safe. */ + assert (s); if (*s == '.') s = parse_field_count (s + 1, &key->echar, N_("invalid number after '.'")); @@ -4616,6 +4670,12 @@ main (int argc, char **argv) exit (check (files[0], checkonly) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER); } + /* Check all inputs are accessible, or exit immediately. */ + check_inputs (files, nfiles); + + /* Check output is writable, or exit immediately. */ + check_output (outfile); + if (mergeonly) { struct sortfile *sortfiles = xcalloc (nfiles, sizeof *sortfiles); |