diff options
author | Tianon Gravi <admwiggin@gmail.com> | 2015-01-15 11:54:00 -0700 |
---|---|---|
committer | Tianon Gravi <admwiggin@gmail.com> | 2015-01-15 11:54:00 -0700 |
commit | f154da9e12608589e8d5f0508f908a0c3e88a1bb (patch) | |
tree | f8255d51e10c6f1e0ed69702200b966c9556a431 /src/pkg/encoding/csv | |
parent | 8d8329ed5dfb9622c82a9fbec6fd99a580f9c9f6 (diff) | |
download | golang-upstream/1.4.tar.gz |
Imported Upstream version 1.4upstream/1.4
Diffstat (limited to 'src/pkg/encoding/csv')
-rw-r--r-- | src/pkg/encoding/csv/reader.go | 337 | ||||
-rw-r--r-- | src/pkg/encoding/csv/reader_test.go | 284 | ||||
-rw-r--r-- | src/pkg/encoding/csv/writer.go | 127 | ||||
-rw-r--r-- | src/pkg/encoding/csv/writer_test.go | 74 |
4 files changed, 0 insertions, 822 deletions
diff --git a/src/pkg/encoding/csv/reader.go b/src/pkg/encoding/csv/reader.go deleted file mode 100644 index d9432954a..000000000 --- a/src/pkg/encoding/csv/reader.go +++ /dev/null @@ -1,337 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package csv reads and writes comma-separated values (CSV) files. -// -// A csv file contains zero or more records of one or more fields per record. -// Each record is separated by the newline character. The final record may -// optionally be followed by a newline character. -// -// field1,field2,field3 -// -// White space is considered part of a field. -// -// Carriage returns before newline characters are silently removed. -// -// Blank lines are ignored. A line with only whitespace characters (excluding -// the ending newline character) is not considered a blank line. -// -// Fields which start and stop with the quote character " are called -// quoted-fields. The beginning and ending quote are not part of the -// field. -// -// The source: -// -// normal string,"quoted-field" -// -// results in the fields -// -// {`normal string`, `quoted-field`} -// -// Within a quoted-field a quote character followed by a second quote -// character is considered a single quote. -// -// "the ""word"" is true","a ""quoted-field""" -// -// results in -// -// {`the "word" is true`, `a "quoted-field"`} -// -// Newlines and commas may be included in a quoted-field -// -// "Multi-line -// field","comma is ," -// -// results in -// -// {`Multi-line -// field`, `comma is ,`} -package csv - -import ( - "bufio" - "bytes" - "errors" - "fmt" - "io" - "unicode" -) - -// A ParseError is returned for parsing errors. -// The first line is 1. The first column is 0. -type ParseError struct { - Line int // Line where the error occurred - Column int // Column (rune index) where the error occurred - Err error // The actual error -} - -func (e *ParseError) Error() string { - return fmt.Sprintf("line %d, column %d: %s", e.Line, e.Column, e.Err) -} - -// These are the errors that can be returned in ParseError.Error -var ( - ErrTrailingComma = errors.New("extra delimiter at end of line") // no longer used - ErrBareQuote = errors.New("bare \" in non-quoted-field") - ErrQuote = errors.New("extraneous \" in field") - ErrFieldCount = errors.New("wrong number of fields in line") -) - -// A Reader reads records from a CSV-encoded file. -// -// As returned by NewReader, a Reader expects input conforming to RFC 4180. -// The exported fields can be changed to customize the details before the -// first call to Read or ReadAll. -// -// Comma is the field delimiter. It defaults to ','. -// -// Comment, if not 0, is the comment character. Lines beginning with the -// Comment character are ignored. -// -// If FieldsPerRecord is positive, Read requires each record to -// have the given number of fields. If FieldsPerRecord is 0, Read sets it to -// the number of fields in the first record, so that future records must -// have the same field count. If FieldsPerRecord is negative, no check is -// made and records may have a variable number of fields. -// -// If LazyQuotes is true, a quote may appear in an unquoted field and a -// non-doubled quote may appear in a quoted field. -// -// If TrimLeadingSpace is true, leading white space in a field is ignored. -type Reader struct { - Comma rune // field delimiter (set to ',' by NewReader) - Comment rune // comment character for start of line - FieldsPerRecord int // number of expected fields per record - LazyQuotes bool // allow lazy quotes - TrailingComma bool // ignored; here for backwards compatibility - TrimLeadingSpace bool // trim leading space - line int - column int - r *bufio.Reader - field bytes.Buffer -} - -// NewReader returns a new Reader that reads from r. -func NewReader(r io.Reader) *Reader { - return &Reader{ - Comma: ',', - r: bufio.NewReader(r), - } -} - -// error creates a new ParseError based on err. -func (r *Reader) error(err error) error { - return &ParseError{ - Line: r.line, - Column: r.column, - Err: err, - } -} - -// Read reads one record from r. The record is a slice of strings with each -// string representing one field. -func (r *Reader) Read() (record []string, err error) { - for { - record, err = r.parseRecord() - if record != nil { - break - } - if err != nil { - return nil, err - } - } - - if r.FieldsPerRecord > 0 { - if len(record) != r.FieldsPerRecord { - r.column = 0 // report at start of record - return record, r.error(ErrFieldCount) - } - } else if r.FieldsPerRecord == 0 { - r.FieldsPerRecord = len(record) - } - return record, nil -} - -// ReadAll reads all the remaining records from r. -// Each record is a slice of fields. -// A successful call returns err == nil, not err == EOF. Because ReadAll is -// defined to read until EOF, it does not treat end of file as an error to be -// reported. -func (r *Reader) ReadAll() (records [][]string, err error) { - for { - record, err := r.Read() - if err == io.EOF { - return records, nil - } - if err != nil { - return nil, err - } - records = append(records, record) - } -} - -// readRune reads one rune from r, folding \r\n to \n and keeping track -// of how far into the line we have read. r.column will point to the start -// of this rune, not the end of this rune. -func (r *Reader) readRune() (rune, error) { - r1, _, err := r.r.ReadRune() - - // Handle \r\n here. We make the simplifying assumption that - // anytime \r is followed by \n that it can be folded to \n. - // We will not detect files which contain both \r\n and bare \n. - if r1 == '\r' { - r1, _, err = r.r.ReadRune() - if err == nil { - if r1 != '\n' { - r.r.UnreadRune() - r1 = '\r' - } - } - } - r.column++ - return r1, err -} - -// skip reads runes up to and including the rune delim or until error. -func (r *Reader) skip(delim rune) error { - for { - r1, err := r.readRune() - if err != nil { - return err - } - if r1 == delim { - return nil - } - } -} - -// parseRecord reads and parses a single csv record from r. -func (r *Reader) parseRecord() (fields []string, err error) { - // Each record starts on a new line. We increment our line - // number (lines start at 1, not 0) and set column to -1 - // so as we increment in readRune it points to the character we read. - r.line++ - r.column = -1 - - // Peek at the first rune. If it is an error we are done. - // If we are support comments and it is the comment character - // then skip to the end of line. - - r1, _, err := r.r.ReadRune() - if err != nil { - return nil, err - } - - if r.Comment != 0 && r1 == r.Comment { - return nil, r.skip('\n') - } - r.r.UnreadRune() - - // At this point we have at least one field. - for { - haveField, delim, err := r.parseField() - if haveField { - fields = append(fields, r.field.String()) - } - if delim == '\n' || err == io.EOF { - return fields, err - } else if err != nil { - return nil, err - } - } -} - -// parseField parses the next field in the record. The read field is -// located in r.field. Delim is the first character not part of the field -// (r.Comma or '\n'). -func (r *Reader) parseField() (haveField bool, delim rune, err error) { - r.field.Reset() - - r1, err := r.readRune() - for err == nil && r.TrimLeadingSpace && r1 != '\n' && unicode.IsSpace(r1) { - r1, err = r.readRune() - } - - if err == io.EOF && r.column != 0 { - return true, 0, err - } - if err != nil { - return false, 0, err - } - - switch r1 { - case r.Comma: - // will check below - - case '\n': - // We are a trailing empty field or a blank line - if r.column == 0 { - return false, r1, nil - } - return true, r1, nil - - case '"': - // quoted field - Quoted: - for { - r1, err = r.readRune() - if err != nil { - if err == io.EOF { - if r.LazyQuotes { - return true, 0, err - } - return false, 0, r.error(ErrQuote) - } - return false, 0, err - } - switch r1 { - case '"': - r1, err = r.readRune() - if err != nil || r1 == r.Comma { - break Quoted - } - if r1 == '\n' { - return true, r1, nil - } - if r1 != '"' { - if !r.LazyQuotes { - r.column-- - return false, 0, r.error(ErrQuote) - } - // accept the bare quote - r.field.WriteRune('"') - } - case '\n': - r.line++ - r.column = -1 - } - r.field.WriteRune(r1) - } - - default: - // unquoted field - for { - r.field.WriteRune(r1) - r1, err = r.readRune() - if err != nil || r1 == r.Comma { - break - } - if r1 == '\n' { - return true, r1, nil - } - if !r.LazyQuotes && r1 == '"' { - return false, 0, r.error(ErrBareQuote) - } - } - } - - if err != nil { - if err == io.EOF { - return true, 0, err - } - return false, 0, err - } - - return true, r1, nil -} diff --git a/src/pkg/encoding/csv/reader_test.go b/src/pkg/encoding/csv/reader_test.go deleted file mode 100644 index 123df06bc..000000000 --- a/src/pkg/encoding/csv/reader_test.go +++ /dev/null @@ -1,284 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package csv - -import ( - "reflect" - "strings" - "testing" -) - -var readTests = []struct { - Name string - Input string - Output [][]string - UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1 - - // These fields are copied into the Reader - Comma rune - Comment rune - FieldsPerRecord int - LazyQuotes bool - TrailingComma bool - TrimLeadingSpace bool - - Error string - Line int // Expected error line if != 0 - Column int // Expected error column if line != 0 -}{ - { - Name: "Simple", - Input: "a,b,c\n", - Output: [][]string{{"a", "b", "c"}}, - }, - { - Name: "CRLF", - Input: "a,b\r\nc,d\r\n", - Output: [][]string{{"a", "b"}, {"c", "d"}}, - }, - { - Name: "BareCR", - Input: "a,b\rc,d\r\n", - Output: [][]string{{"a", "b\rc", "d"}}, - }, - { - Name: "RFC4180test", - UseFieldsPerRecord: true, - Input: `#field1,field2,field3 -"aaa","bb -b","ccc" -"a,a","b""bb","ccc" -zzz,yyy,xxx -`, - Output: [][]string{ - {"#field1", "field2", "field3"}, - {"aaa", "bb\nb", "ccc"}, - {"a,a", `b"bb`, "ccc"}, - {"zzz", "yyy", "xxx"}, - }, - }, - { - Name: "NoEOLTest", - Input: "a,b,c", - Output: [][]string{{"a", "b", "c"}}, - }, - { - Name: "Semicolon", - Comma: ';', - Input: "a;b;c\n", - Output: [][]string{{"a", "b", "c"}}, - }, - { - Name: "MultiLine", - Input: `"two -line","one line","three -line -field"`, - Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}}, - }, - { - Name: "BlankLine", - Input: "a,b,c\n\nd,e,f\n\n", - Output: [][]string{ - {"a", "b", "c"}, - {"d", "e", "f"}, - }, - }, - { - Name: "TrimSpace", - Input: " a, b, c\n", - TrimLeadingSpace: true, - Output: [][]string{{"a", "b", "c"}}, - }, - { - Name: "LeadingSpace", - Input: " a, b, c\n", - Output: [][]string{{" a", " b", " c"}}, - }, - { - Name: "Comment", - Comment: '#', - Input: "#1,2,3\na,b,c\n#comment", - Output: [][]string{{"a", "b", "c"}}, - }, - { - Name: "NoComment", - Input: "#1,2,3\na,b,c", - Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}}, - }, - { - Name: "LazyQuotes", - LazyQuotes: true, - Input: `a "word","1"2",a","b`, - Output: [][]string{{`a "word"`, `1"2`, `a"`, `b`}}, - }, - { - Name: "BareQuotes", - LazyQuotes: true, - Input: `a "word","1"2",a"`, - Output: [][]string{{`a "word"`, `1"2`, `a"`}}, - }, - { - Name: "BareDoubleQuotes", - LazyQuotes: true, - Input: `a""b,c`, - Output: [][]string{{`a""b`, `c`}}, - }, - { - Name: "BadDoubleQuotes", - Input: `a""b,c`, - Error: `bare " in non-quoted-field`, Line: 1, Column: 1, - }, - { - Name: "TrimQuote", - Input: ` "a"," b",c`, - TrimLeadingSpace: true, - Output: [][]string{{"a", " b", "c"}}, - }, - { - Name: "BadBareQuote", - Input: `a "word","b"`, - Error: `bare " in non-quoted-field`, Line: 1, Column: 2, - }, - { - Name: "BadTrailingQuote", - Input: `"a word",b"`, - Error: `bare " in non-quoted-field`, Line: 1, Column: 10, - }, - { - Name: "ExtraneousQuote", - Input: `"a "word","b"`, - Error: `extraneous " in field`, Line: 1, Column: 3, - }, - { - Name: "BadFieldCount", - UseFieldsPerRecord: true, - Input: "a,b,c\nd,e", - Error: "wrong number of fields", Line: 2, - }, - { - Name: "BadFieldCount1", - UseFieldsPerRecord: true, - FieldsPerRecord: 2, - Input: `a,b,c`, - Error: "wrong number of fields", Line: 1, - }, - { - Name: "FieldCount", - Input: "a,b,c\nd,e", - Output: [][]string{{"a", "b", "c"}, {"d", "e"}}, - }, - { - Name: "TrailingCommaEOF", - Input: "a,b,c,", - Output: [][]string{{"a", "b", "c", ""}}, - }, - { - Name: "TrailingCommaEOL", - Input: "a,b,c,\n", - Output: [][]string{{"a", "b", "c", ""}}, - }, - { - Name: "TrailingCommaSpaceEOF", - TrimLeadingSpace: true, - Input: "a,b,c, ", - Output: [][]string{{"a", "b", "c", ""}}, - }, - { - Name: "TrailingCommaSpaceEOL", - TrimLeadingSpace: true, - Input: "a,b,c, \n", - Output: [][]string{{"a", "b", "c", ""}}, - }, - { - Name: "TrailingCommaLine3", - TrimLeadingSpace: true, - Input: "a,b,c\nd,e,f\ng,hi,", - Output: [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}}, - }, - { - Name: "NotTrailingComma3", - Input: "a,b,c, \n", - Output: [][]string{{"a", "b", "c", " "}}, - }, - { - Name: "CommaFieldTest", - TrailingComma: true, - Input: `x,y,z,w -x,y,z, -x,y,, -x,,, -,,, -"x","y","z","w" -"x","y","z","" -"x","y","","" -"x","","","" -"","","","" -`, - Output: [][]string{ - {"x", "y", "z", "w"}, - {"x", "y", "z", ""}, - {"x", "y", "", ""}, - {"x", "", "", ""}, - {"", "", "", ""}, - {"x", "y", "z", "w"}, - {"x", "y", "z", ""}, - {"x", "y", "", ""}, - {"x", "", "", ""}, - {"", "", "", ""}, - }, - }, - { - Name: "TrailingCommaIneffective1", - TrailingComma: true, - TrimLeadingSpace: true, - Input: "a,b,\nc,d,e", - Output: [][]string{ - {"a", "b", ""}, - {"c", "d", "e"}, - }, - }, - { - Name: "TrailingCommaIneffective2", - TrailingComma: false, - TrimLeadingSpace: true, - Input: "a,b,\nc,d,e", - Output: [][]string{ - {"a", "b", ""}, - {"c", "d", "e"}, - }, - }, -} - -func TestRead(t *testing.T) { - for _, tt := range readTests { - r := NewReader(strings.NewReader(tt.Input)) - r.Comment = tt.Comment - if tt.UseFieldsPerRecord { - r.FieldsPerRecord = tt.FieldsPerRecord - } else { - r.FieldsPerRecord = -1 - } - r.LazyQuotes = tt.LazyQuotes - r.TrailingComma = tt.TrailingComma - r.TrimLeadingSpace = tt.TrimLeadingSpace - if tt.Comma != 0 { - r.Comma = tt.Comma - } - out, err := r.ReadAll() - perr, _ := err.(*ParseError) - if tt.Error != "" { - if err == nil || !strings.Contains(err.Error(), tt.Error) { - t.Errorf("%s: error %v, want error %q", tt.Name, err, tt.Error) - } else if tt.Line != 0 && (tt.Line != perr.Line || tt.Column != perr.Column) { - t.Errorf("%s: error at %d:%d expected %d:%d", tt.Name, perr.Line, perr.Column, tt.Line, tt.Column) - } - } else if err != nil { - t.Errorf("%s: unexpected error %v", tt.Name, err) - } else if !reflect.DeepEqual(out, tt.Output) { - t.Errorf("%s: out=%q want %q", tt.Name, out, tt.Output) - } - } -} diff --git a/src/pkg/encoding/csv/writer.go b/src/pkg/encoding/csv/writer.go deleted file mode 100644 index 1faecb664..000000000 --- a/src/pkg/encoding/csv/writer.go +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package csv - -import ( - "bufio" - "io" - "strings" - "unicode" - "unicode/utf8" -) - -// A Writer writes records to a CSV encoded file. -// -// As returned by NewWriter, a Writer writes records terminated by a -// newline and uses ',' as the field delimiter. The exported fields can be -// changed to customize the details before the first call to Write or WriteAll. -// -// Comma is the field delimiter. -// -// If UseCRLF is true, the Writer ends each record with \r\n instead of \n. -type Writer struct { - Comma rune // Field delimiter (set to ',' by NewWriter) - UseCRLF bool // True to use \r\n as the line terminator - w *bufio.Writer -} - -// NewWriter returns a new Writer that writes to w. -func NewWriter(w io.Writer) *Writer { - return &Writer{ - Comma: ',', - w: bufio.NewWriter(w), - } -} - -// Writer writes a single CSV record to w along with any necessary quoting. -// A record is a slice of strings with each string being one field. -func (w *Writer) Write(record []string) (err error) { - for n, field := range record { - if n > 0 { - if _, err = w.w.WriteRune(w.Comma); err != nil { - return - } - } - - // If we don't have to have a quoted field then just - // write out the field and continue to the next field. - if !w.fieldNeedsQuotes(field) { - if _, err = w.w.WriteString(field); err != nil { - return - } - continue - } - if err = w.w.WriteByte('"'); err != nil { - return - } - - for _, r1 := range field { - switch r1 { - case '"': - _, err = w.w.WriteString(`""`) - case '\r': - if !w.UseCRLF { - err = w.w.WriteByte('\r') - } - case '\n': - if w.UseCRLF { - _, err = w.w.WriteString("\r\n") - } else { - err = w.w.WriteByte('\n') - } - default: - _, err = w.w.WriteRune(r1) - } - if err != nil { - return - } - } - - if err = w.w.WriteByte('"'); err != nil { - return - } - } - if w.UseCRLF { - _, err = w.w.WriteString("\r\n") - } else { - err = w.w.WriteByte('\n') - } - return -} - -// Flush writes any buffered data to the underlying io.Writer. -// To check if an error occurred during the Flush, call Error. -func (w *Writer) Flush() { - w.w.Flush() -} - -// Error reports any error that has occurred during a previous Write or Flush. -func (w *Writer) Error() error { - _, err := w.w.Write(nil) - return err -} - -// WriteAll writes multiple CSV records to w using Write and then calls Flush. -func (w *Writer) WriteAll(records [][]string) (err error) { - for _, record := range records { - err = w.Write(record) - if err != nil { - return err - } - } - return w.w.Flush() -} - -// fieldNeedsQuotes returns true if our field must be enclosed in quotes. -// Empty fields, files with a Comma, fields with a quote or newline, and -// fields which start with a space must be enclosed in quotes. -func (w *Writer) fieldNeedsQuotes(field string) bool { - if len(field) == 0 || strings.IndexRune(field, w.Comma) >= 0 || strings.IndexAny(field, "\"\r\n") >= 0 { - return true - } - - r1, _ := utf8.DecodeRuneInString(field) - return unicode.IsSpace(r1) -} diff --git a/src/pkg/encoding/csv/writer_test.go b/src/pkg/encoding/csv/writer_test.go deleted file mode 100644 index 22b740c07..000000000 --- a/src/pkg/encoding/csv/writer_test.go +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package csv - -import ( - "bytes" - "errors" - "testing" -) - -var writeTests = []struct { - Input [][]string - Output string - UseCRLF bool -}{ - {Input: [][]string{{"abc"}}, Output: "abc\n"}, - {Input: [][]string{{"abc"}}, Output: "abc\r\n", UseCRLF: true}, - {Input: [][]string{{`"abc"`}}, Output: `"""abc"""` + "\n"}, - {Input: [][]string{{`a"b`}}, Output: `"a""b"` + "\n"}, - {Input: [][]string{{`"a"b"`}}, Output: `"""a""b"""` + "\n"}, - {Input: [][]string{{" abc"}}, Output: `" abc"` + "\n"}, - {Input: [][]string{{"abc,def"}}, Output: `"abc,def"` + "\n"}, - {Input: [][]string{{"abc", "def"}}, Output: "abc,def\n"}, - {Input: [][]string{{"abc"}, {"def"}}, Output: "abc\ndef\n"}, - {Input: [][]string{{"abc\ndef"}}, Output: "\"abc\ndef\"\n"}, - {Input: [][]string{{"abc\ndef"}}, Output: "\"abc\r\ndef\"\r\n", UseCRLF: true}, - {Input: [][]string{{"abc\rdef"}}, Output: "\"abcdef\"\r\n", UseCRLF: true}, - {Input: [][]string{{"abc\rdef"}}, Output: "\"abc\rdef\"\n", UseCRLF: false}, -} - -func TestWrite(t *testing.T) { - for n, tt := range writeTests { - b := &bytes.Buffer{} - f := NewWriter(b) - f.UseCRLF = tt.UseCRLF - err := f.WriteAll(tt.Input) - if err != nil { - t.Errorf("Unexpected error: %s\n", err) - } - out := b.String() - if out != tt.Output { - t.Errorf("#%d: out=%q want %q", n, out, tt.Output) - } - } -} - -type errorWriter struct{} - -func (e errorWriter) Write(b []byte) (int, error) { - return 0, errors.New("Test") -} - -func TestError(t *testing.T) { - b := &bytes.Buffer{} - f := NewWriter(b) - f.Write([]string{"abc"}) - f.Flush() - err := f.Error() - - if err != nil { - t.Errorf("Unexpected error: %s\n", err) - } - - f = NewWriter(errorWriter{}) - f.Write([]string{"abc"}) - f.Flush() - err = f.Error() - - if err == nil { - t.Error("Error should not be nil") - } -} |