summaryrefslogtreecommitdiff
path: root/src/wc.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/wc.c')
-rw-r--r--src/wc.c112
1 files changed, 73 insertions, 39 deletions
diff --git a/src/wc.c b/src/wc.c
index 4909d9fe..9fbaee7b 100644
--- a/src/wc.c
+++ b/src/wc.c
@@ -1,5 +1,5 @@
/* wc - print the number of lines, words, and bytes in files
- Copyright (C) 1985-2014 Free Software Foundation, Inc.
+ Copyright (C) 1985-2015 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -36,6 +36,7 @@
#include "quotearg.h"
#include "readtokens0.h"
#include "safe-read.h"
+#include "stat-size.h"
#include "xfreopen.h"
#if !defined iswspace && !HAVE_ISWSPACE
@@ -116,9 +117,14 @@ Usage: %s [OPTION]... [FILE]...\n\
program_name, program_name);
fputs (_("\
Print newline, word, and byte counts for each FILE, and a total line if\n\
-more than one FILE is specified. With no FILE, or when FILE is -,\n\
-read standard input. A word is a non-zero-length sequence of characters\n\
-delimited by white space.\n\
+more than one FILE is specified. A word is a non-zero-length sequence of\n\
+characters delimited by white space.\n\
+"), stdout);
+
+ emit_stdin_note ();
+
+ fputs (_("\
+\n\
The options below may be used to select which counts are printed, always in\n\
the following order: newline, word, character, byte, maximum line length.\n\
-c, --bytes print the byte counts\n\
@@ -129,12 +135,12 @@ the following order: newline, word, character, byte, maximum line length.\n\
--files0-from=F read input from the files specified by\n\
NUL-terminated names in file F;\n\
If F is - then read names from standard input\n\
- -L, --max-line-length print the length of the longest line\n\
+ -L, --max-line-length print the maximum display width\n\
-w, --words print the word counts\n\
"), stdout);
fputs (HELP_OPTION_DESCRIPTION, stdout);
fputs (VERSION_OPTION_DESCRIPTION, stdout);
- emit_ancillary_info ();
+ emit_ancillary_info (PROGRAM_NAME);
}
exit (status);
}
@@ -184,9 +190,10 @@ write_counts (uintmax_t lines,
/* Count words. FILE_X is the name of the file (or NULL for standard
input) that is open on descriptor FD. *FSTATUS is its status.
+ CURRENT_POS is the current file offset if known, negative if unknown.
Return true if successful. */
static bool
-wc (int fd, char const *file_x, struct fstatus *fstatus)
+wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
{
bool ok = true;
char buf[BUFFER_SIZE + 1];
@@ -229,42 +236,43 @@ wc (int fd, char const *file_x, struct fstatus *fstatus)
if (count_bytes && !count_chars && !print_lines && !count_complicated)
{
- off_t current_pos, end_pos;
-
if (0 < fstatus->failed)
fstatus->failed = fstat (fd, &fstatus->st);
- if (! fstatus->failed && S_ISREG (fstatus->st.st_mode)
- && (current_pos = lseek (fd, 0, SEEK_CUR)) != -1
- && (end_pos = lseek (fd, 0, SEEK_END)) != -1)
+ /* For sized files, seek to one st_blksize before EOF rather than to EOF.
+ This works better for files in proc-like file systems where
+ the size is only approximate. */
+ if (! fstatus->failed && usable_st_size (&fstatus->st)
+ && 0 <= fstatus->st.st_size)
{
- /* Be careful here. The current position may actually be
- beyond the end of the file. As in the example above. */
- bytes = end_pos < current_pos ? 0 : end_pos - current_pos;
+ size_t end_pos = fstatus->st.st_size;
+ off_t hi_pos = end_pos - end_pos % (ST_BLKSIZE (fstatus->st) + 1);
+ if (current_pos < 0)
+ current_pos = lseek (fd, 0, SEEK_CUR);
+ if (0 <= current_pos && current_pos < hi_pos
+ && 0 <= lseek (fd, hi_pos, SEEK_CUR))
+ bytes = hi_pos - current_pos;
}
- else
+
+ fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
+ while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
{
- fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
- while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
+ if (bytes_read == SAFE_READ_ERROR)
{
- if (bytes_read == SAFE_READ_ERROR)
- {
- error (0, errno, "%s", file);
- ok = false;
- break;
- }
- bytes += bytes_read;
+ error (0, errno, "%s", file);
+ ok = false;
+ break;
}
+ bytes += bytes_read;
}
}
else if (!count_chars && !count_complicated)
{
/* Use a separate loop when counting only lines or lines and bytes --
but not chars or words. */
+ bool long_lines = false;
while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
{
- char *p = buf;
-
if (bytes_read == SAFE_READ_ERROR)
{
error (0, errno, "%s", file);
@@ -272,12 +280,38 @@ wc (int fd, char const *file_x, struct fstatus *fstatus)
break;
}
- while ((p = memchr (p, '\n', (buf + bytes_read) - p)))
+ bytes += bytes_read;
+
+ char *p = buf;
+ char *end = p + bytes_read;
+ uintmax_t plines = lines;
+
+ if (! long_lines)
{
- ++p;
- ++lines;
+ /* Avoid function call overhead for shorter lines. */
+ while (p != end)
+ lines += *p++ == '\n';
}
- bytes += bytes_read;
+ else
+ {
+ /* memchr is more efficient with longer lines. */
+ while ((p = memchr (p, '\n', end - p)))
+ {
+ ++p;
+ ++lines;
+ }
+ }
+
+ /* If the average line length in the block is >= 15, then use
+ memchr for the next block, where system specific optimizations
+ may outweigh function call overhead.
+ FIXME: This line length was determined in 2015, on both
+ x86_64 and ppc64, but it's worth re-evaluating in future with
+ newer compilers, CPUs, or memchr() implementations etc. */
+ if (lines - plines <= bytes_read / 15)
+ long_lines = true;
+ else
+ long_lines = false;
}
}
#if MB_LEN_MAX > 1
@@ -500,7 +534,7 @@ wc_file (char const *file, struct fstatus *fstatus)
have_read_stdin = true;
if (O_BINARY && ! isatty (STDIN_FILENO))
xfreopen (NULL, "rb", stdin);
- return wc (STDIN_FILENO, file, fstatus);
+ return wc (STDIN_FILENO, file, fstatus, -1);
}
else
{
@@ -512,7 +546,7 @@ wc_file (char const *file, struct fstatus *fstatus)
}
else
{
- bool ok = wc (fd, file, fstatus);
+ bool ok = wc (fd, file, fstatus, 0);
if (close (fd) != 0)
{
error (0, errno, "%s", file);
@@ -530,7 +564,7 @@ wc_file (char const *file, struct fstatus *fstatus)
that happens when we don't know how long the list of file names will be. */
static struct fstatus *
-get_input_fstatus (int nfiles, char *const *file)
+get_input_fstatus (size_t nfiles, char *const *file)
{
struct fstatus *fstatus = xnmalloc (nfiles ? nfiles : 1, sizeof *fstatus);
@@ -542,7 +576,7 @@ get_input_fstatus (int nfiles, char *const *file)
fstatus[0].failed = 1;
else
{
- int i;
+ size_t i;
for (i = 0; i < nfiles; i++)
fstatus[i].failed = (! file[i] || STREQ (file[i], "-")
@@ -558,7 +592,7 @@ get_input_fstatus (int nfiles, char *const *file)
get_input_fstatus optimizes. */
static int _GL_ATTRIBUTE_PURE
-compute_number_width (int nfiles, struct fstatus const *fstatus)
+compute_number_width (size_t nfiles, struct fstatus const *fstatus)
{
int width = 1;
@@ -566,7 +600,7 @@ compute_number_width (int nfiles, struct fstatus const *fstatus)
{
int minimum_width = 1;
uintmax_t regular_total = 0;
- int i;
+ size_t i;
for (i = 0; i < nfiles; i++)
if (! fstatus[i].failed)
@@ -592,7 +626,7 @@ main (int argc, char **argv)
{
bool ok;
int optc;
- int nfiles;
+ size_t nfiles;
char **files;
char *files_from = NULL;
struct fstatus *fstatus;
@@ -797,5 +831,5 @@ main (int argc, char **argv)
if (have_read_stdin && close (STDIN_FILENO) != 0)
error (EXIT_FAILURE, errno, "-");
- exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
+ return ok ? EXIT_SUCCESS : EXIT_FAILURE;
}