1 files changed, 73 insertions, 39 deletions
diff --git a/src/wc.c b/src/wc.c
index 4909d9fe..9fbaee7b 100644
--- a/src/wc.c
+++ b/src/wc.c
@@ -1,5 +1,5 @@
 /* wc - print the number of lines, words, and bytes in files
-   Copyright (C) 1985-2014 Free Software Foundation, Inc.
+   Copyright (C) 1985-2015 Free Software Foundation, Inc.
 
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -36,6 +36,7 @@
 #include "quotearg.h"
 #include "readtokens0.h"
 #include "safe-read.h"
+#include "stat-size.h"
 #include "xfreopen.h"
 
 #if !defined iswspace && !HAVE_ISWSPACE
@@ -116,9 +117,14 @@ Usage: %s [OPTION]... [FILE]...\n\
               program_name, program_name);
       fputs (_("\
 Print newline, word, and byte counts for each FILE, and a total line if\n\
-more than one FILE is specified.  With no FILE, or when FILE is -,\n\
-read standard input.  A word is a non-zero-length sequence of characters\n\
-delimited by white space.\n\
+more than one FILE is specified.  A word is a non-zero-length sequence of\n\
+characters delimited by white space.\n\
+"), stdout);
+
+      emit_stdin_note ();
+
+      fputs (_("\
+\n\
 The options below may be used to select which counts are printed, always in\n\
 the following order: newline, word, character, byte, maximum line length.\n\
   -c, --bytes            print the byte counts\n\
@@ -129,12 +135,12 @@ the following order: newline, word, character, byte, maximum line length.\n\
       --files0-from=F    read input from the files specified by\n\
                            NUL-terminated names in file F;\n\
                            If F is - then read names from standard input\n\
-  -L, --max-line-length  print the length of the longest line\n\
+  -L, --max-line-length  print the maximum display width\n\
   -w, --words            print the word counts\n\
 "), stdout);
       fputs (HELP_OPTION_DESCRIPTION, stdout);
       fputs (VERSION_OPTION_DESCRIPTION, stdout);
-      emit_ancillary_info ();
+      emit_ancillary_info (PROGRAM_NAME);
     }
   exit (status);
 }
@@ -184,9 +190,10 @@ write_counts (uintmax_t lines,
 
 /* Count words.  FILE_X is the name of the file (or NULL for standard
    input) that is open on descriptor FD.  *FSTATUS is its status.
+   CURRENT_POS is the current file offset if known, negative if unknown.
    Return true if successful.  */
 static bool
-wc (int fd, char const *file_x, struct fstatus *fstatus)
+wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
 {
   bool ok = true;
   char buf[BUFFER_SIZE + 1];
@@ -229,42 +236,43 @@ wc (int fd, char const *file_x, struct fstatus *fstatus)
 
   if (count_bytes && !count_chars && !print_lines && !count_complicated)
     {
-      off_t current_pos, end_pos;
-
       if (0 < fstatus->failed)
         fstatus->failed = fstat (fd, &fstatus->st);
 
-      if (! fstatus->failed && S_ISREG (fstatus->st.st_mode)
-          && (current_pos = lseek (fd, 0, SEEK_CUR)) != -1
-          && (end_pos = lseek (fd, 0, SEEK_END)) != -1)
+      /* For sized files, seek to one st_blksize before EOF rather than to EOF.
+         This works better for files in proc-like file systems where
+         the size is only approximate.  */
+      if (! fstatus->failed && usable_st_size (&fstatus->st)
+          && 0 <= fstatus->st.st_size)
         {
-          /* Be careful here.  The current position may actually be
-             beyond the end of the file.  As in the example above.  */
-          bytes = end_pos < current_pos ? 0 : end_pos - current_pos;
+          size_t end_pos = fstatus->st.st_size;
+          off_t hi_pos = end_pos - end_pos % (ST_BLKSIZE (fstatus->st) + 1);
+          if (current_pos < 0)
+            current_pos = lseek (fd, 0, SEEK_CUR);
+          if (0 <= current_pos && current_pos < hi_pos
+              && 0 <= lseek (fd, hi_pos, SEEK_CUR))
+            bytes = hi_pos - current_pos;
         }
-      else
+
+      fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
+      while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
         {
-          fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
-          while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
+          if (bytes_read == SAFE_READ_ERROR)
             {
-              if (bytes_read == SAFE_READ_ERROR)
-                {
-                  error (0, errno, "%s", file);
-                  ok = false;
-                  break;
-                }
-              bytes += bytes_read;
+              error (0, errno, "%s", file);
+              ok = false;
+              break;
             }
+          bytes += bytes_read;
         }
     }
   else if (!count_chars && !count_complicated)
     {
       /* Use a separate loop when counting only lines or lines and bytes --
          but not chars or words.  */
+      bool long_lines = false;
       while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
         {
-          char *p = buf;
-
           if (bytes_read == SAFE_READ_ERROR)
             {
               error (0, errno, "%s", file);
@@ -272,12 +280,38 @@ wc (int fd, char const *file_x, struct fstatus *fstatus)
               break;
             }
 
-          while ((p = memchr (p, '\n', (buf + bytes_read) - p)))
+          bytes += bytes_read;
+
+          char *p = buf;
+          char *end = p + bytes_read;
+          uintmax_t plines = lines;
+
+          if (! long_lines)
             {
-              ++p;
-              ++lines;
+              /* Avoid function call overhead for shorter lines.  */
+              while (p != end)
+                lines += *p++ == '\n';
             }
-          bytes += bytes_read;
+          else
+            {
+              /* memchr is more efficient with longer lines.  */
+              while ((p = memchr (p, '\n', end - p)))
+                {
+                  ++p;
+                  ++lines;
+                }
+            }
+
+          /* If the average line length in the block is >= 15, then use
+             memchr for the next block, where system specific optimizations
+             may outweigh function call overhead.
+             FIXME: This line length was determined in 2015, on both
+             x86_64 and ppc64, but it's worth re-evaluating in future with
+             newer compilers, CPUs, or memchr() implementations etc.  */
+          if (lines - plines <= bytes_read / 15)
+            long_lines = true;
+          else
+            long_lines = false;
         }
     }
 #if MB_LEN_MAX > 1
@@ -500,7 +534,7 @@ wc_file (char const *file, struct fstatus *fstatus)
       have_read_stdin = true;
       if (O_BINARY && ! isatty (STDIN_FILENO))
         xfreopen (NULL, "rb", stdin);
-      return wc (STDIN_FILENO, file, fstatus);
+      return wc (STDIN_FILENO, file, fstatus, -1);
     }
   else
     {
@@ -512,7 +546,7 @@ wc_file (char const *file, struct fstatus *fstatus)
         }
       else
         {
-          bool ok = wc (fd, file, fstatus);
+          bool ok = wc (fd, file, fstatus, 0);
           if (close (fd) != 0)
             {
               error (0, errno, "%s", file);
@@ -530,7 +564,7 @@ wc_file (char const *file, struct fstatus *fstatus)
    that happens when we don't know how long the list of file names will be.  */
 
 static struct fstatus *
-get_input_fstatus (int nfiles, char *const *file)
+get_input_fstatus (size_t nfiles, char *const *file)
 {
   struct fstatus *fstatus = xnmalloc (nfiles ? nfiles : 1, sizeof *fstatus);
 
@@ -542,7 +576,7 @@ get_input_fstatus (int nfiles, char *const *file)
     fstatus[0].failed = 1;
   else
     {
-      int i;
+      size_t i;
 
       for (i = 0; i < nfiles; i++)
         fstatus[i].failed = (! file[i] || STREQ (file[i], "-")
@@ -558,7 +592,7 @@ get_input_fstatus (int nfiles, char *const *file)
    get_input_fstatus optimizes.  */
 
 static int _GL_ATTRIBUTE_PURE
-compute_number_width (int nfiles, struct fstatus const *fstatus)
+compute_number_width (size_t nfiles, struct fstatus const *fstatus)
 {
   int width = 1;
 
@@ -566,7 +600,7 @@ compute_number_width (int nfiles, struct fstatus const *fstatus)
     {
       int minimum_width = 1;
       uintmax_t regular_total = 0;
-      int i;
+      size_t i;
 
       for (i = 0; i < nfiles; i++)
         if (! fstatus[i].failed)
@@ -592,7 +626,7 @@ main (int argc, char **argv)
 {
   bool ok;
   int optc;
-  int nfiles;
+  size_t nfiles;
   char **files;
   char *files_from = NULL;
   struct fstatus *fstatus;
@@ -797,5 +831,5 @@ main (int argc, char **argv)
   if (have_read_stdin && close (STDIN_FILENO) != 0)
     error (EXIT_FAILURE, errno, "-");
 
-  exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
+  return ok ? EXIT_SUCCESS : EXIT_FAILURE;
 }