From 4b61f02573aa8402f95846437f23f437731f2257 Mon Sep 17 00:00:00 2001 From: Douglas Parsons Date: Thu, 12 Aug 2021 17:36:28 +0100 Subject: [PATCH] Expand error messages and fix shared strings parse failure (#39) --- date.go | 3 ++- file.go | 20 ++++++++++---------- file_test.go | 4 ++-- rows.go | 18 ++++++++---------- rows_test.go | 19 +++++++++++-------- shared_strings.go | 28 ++++++++++++++-------------- sheets.go | 16 ++++++++-------- styles.go | 7 ++++--- xml.go | 14 +++++++++----- 9 files changed, 68 insertions(+), 61 deletions(-) diff --git a/date.go b/date.go index 9246e90..59d92eb 100644 --- a/date.go +++ b/date.go @@ -1,6 +1,7 @@ package xlsxreader import ( + "fmt" "math" "strconv" "time" @@ -23,7 +24,7 @@ var excelEpoch = time.Date(1899, 12, 30, 0, 0, 0, 0, time.UTC) func convertExcelDateToDateString(value string) (string, error) { floatValue, err := strconv.ParseFloat(value, 64) if err != nil { - return "", err + return "", fmt.Errorf("unable to parse date float value: %w", err) } numberOfDays := math.Trunc(floatValue) diff --git a/file.go b/file.go index 98325ed..243a140 100644 --- a/file.go +++ b/file.go @@ -39,14 +39,14 @@ func getFileForName(files []*zip.File, name string) (*zip.File, error) { func readFile(file *zip.File) ([]byte, error) { rc, err := file.Open() if err != nil { - return []byte{}, err + return []byte{}, fmt.Errorf("unable to open file: %w", err) } defer rc.Close() buff := bytes.NewBuffer(nil) _, err = io.Copy(buff, rc) if err != nil { - return []byte{}, err + return []byte{}, fmt.Errorf("unable to copy bytes: %w", err) } return buff.Bytes(), nil } @@ -66,13 +66,13 @@ func (xl *XlsxFileCloser) Close() error { func OpenFile(filename string) (*XlsxFileCloser, error) { zipFile, err := zip.OpenReader(filename) if err != nil { - return nil, err + return nil, fmt.Errorf("unable to open file reader: %w", err) } x := XlsxFile{} if err := x.init(&zipFile.Reader); err != nil { zipFile.Close() - return nil, err + return nil, fmt.Errorf("unable to initialise file: %w", err) } return &XlsxFileCloser{ @@ -105,13 +105,13 @@ func OpenReaderZip(rc *zip.ReadCloser) (*XlsxFileCloser, error) { func NewReader(xlsxBytes []byte) (*XlsxFile, error) { r, err := zip.NewReader(bytes.NewReader(xlsxBytes), int64(len(xlsxBytes))) if err != nil { - return nil, err + return nil, fmt.Errorf("unable to create new reader: %w", err) } x := XlsxFile{} err = x.init(r) if err != nil { - return nil, err + return nil, fmt.Errorf("unable to initialise file: %w", err) } return &x, nil @@ -124,7 +124,7 @@ func NewReaderZip(r *zip.Reader) (*XlsxFile, error) { x := XlsxFile{} if err := x.init(r); err != nil { - return nil, err + return nil, fmt.Errorf("unable to initialise file: %w", err) } return &x, nil @@ -133,17 +133,17 @@ func NewReaderZip(r *zip.Reader) (*XlsxFile, error) { func (x *XlsxFile) init(zipReader *zip.Reader) error { sharedStrings, err := getSharedStrings(zipReader.File) if err != nil { - return err + return fmt.Errorf("unable to get shared strings: %w", err) } sheets, sheetFiles, err := getWorksheets(zipReader.File) if err != nil { - return err + return fmt.Errorf("unable to get worksheets: %w", err) } dateStyles, err := getDateFormatStyles(zipReader.File) if err != nil { - return err + return fmt.Errorf("unable to get date styles: %w", err) } x.sharedStrings = sharedStrings diff --git a/file_test.go b/file_test.go index 7ac2a0f..0858764 100644 --- a/file_test.go +++ b/file_test.go @@ -29,12 +29,12 @@ func TestGettingFileByNameFailure(t *testing.T) { _, err := getFileForName(zipFiles, "OOPS") require.EqualError(t, err, "file not found: OOPS") - } func TestOpeningMissingFile(t *testing.T) { _, err := OpenFile("this_doesnt_exist.zip") - require.EqualError(t, err, "open this_doesnt_exist.zip: no such file or directory") + require.Error(t, err) + require.Contains(t, err.Error(), "open this_doesnt_exist.zip: no such file or directory") } func TestHandlingSpuriousWorkbookLinks(t *testing.T) { diff --git a/rows.go b/rows.go index f7a3ad7..efd1c34 100644 --- a/rows.go +++ b/rows.go @@ -22,14 +22,14 @@ func (rr *rawRow) unmarshalXML(d *xml.Decoder, start xml.StartElement) error { var err error if rr.Index, err = strconv.Atoi(attr.Value); err != nil { - return err + return fmt.Errorf("unable to parse row index: %w", err) } } for { tok, err := d.Token() if err != nil { - return err + return fmt.Errorf("error retrieving xml token: %w", err) } var se xml.StartElement @@ -51,7 +51,7 @@ func (rr *rawRow) unmarshalXML(d *xml.Decoder, start xml.StartElement) error { var rc rawCell if err = rc.unmarshalXML(d, se); err != nil { - return err + return fmt.Errorf("unable to unmarshal cell: %w", err) } rr.RawCells = append(rr.RawCells, rc) @@ -87,7 +87,7 @@ func (rc *rawCell) unmarshalXML(d *xml.Decoder, start xml.StartElement) error { for { tok, err := d.Token() if err != nil { - return err + return fmt.Errorf("error retrieving xml token: %w", err) } var se xml.StartElement @@ -120,7 +120,7 @@ func (rc *rawCell) unmarshalXML(d *xml.Decoder, start xml.StartElement) error { } if err != nil { - return err + return fmt.Errorf("unable to parse cell data: %w", err) } } } @@ -129,7 +129,7 @@ func (rc *rawCell) unmarshalInlineString(d *xml.Decoder, start xml.StartElement) for { tok, err := d.Token() if err != nil { - return err + return fmt.Errorf("error retrieving xml token: %w", err) } var se xml.StartElement @@ -152,7 +152,7 @@ func (rc *rawCell) unmarshalInlineString(d *xml.Decoder, start xml.StartElement) v, err := getCharData(d) if err != nil { - return err + return fmt.Errorf("unable to parse string: %w", err) } rc.InlineString = &v @@ -246,8 +246,7 @@ func (x *XlsxFile) getCellType(r rawCell) CellType { return TypeDateTime case "n", "": return TypeNumerical - case "s", - "inlineStr": + case "s", "inlineStr": return TypeString default: return TypeString @@ -284,7 +283,6 @@ func (x *XlsxFile) readSheetRows(sheet string, ch chan<- Row) { } switch startElement := token.(type) { - case xml.StartElement: if startElement.Name.Local == "row" { row := x.parseRow(decoder, &startElement) diff --git a/rows_test.go b/rows_test.go index b4017cd..adf0b7c 100644 --- a/rows_test.go +++ b/rows_test.go @@ -13,13 +13,15 @@ var testFile = XlsxFile{ dateStyles: map[int]bool{1: true, 3: true}, } -var inlineStr = "The meaning of life" -var dateValue = "43489.25" -var invalidValue = "wat" -var sharedString = "2" -var offsetTooHighSharedString = "32" -var dateString = "2005-06-04" -var boolString = "1" +var ( + inlineStr = "The meaning of life" + dateValue = "43489.25" + invalidValue = "wat" + sharedString = "2" + offsetTooHighSharedString = "32" + dateString = "2005-06-04" + boolString = "1" +) var cellValueTests = []struct { Name string @@ -100,7 +102,8 @@ func TestGettingValueFromRawCell(t *testing.T) { val, err := testFile.getCellValue(test.Cell) if test.Error != "" { - require.EqualError(t, err, test.Error) + require.Error(t, err) + require.Contains(t, err.Error(), test.Error) } else { require.NoError(t, err) require.Equal(t, test.Expected, val) diff --git a/shared_strings.go b/shared_strings.go index 05822a7..3fcc532 100644 --- a/shared_strings.go +++ b/shared_strings.go @@ -4,6 +4,7 @@ import ( "archive/zip" "encoding/xml" "errors" + "fmt" "io" "strconv" "strings" @@ -19,19 +20,19 @@ func (sv *sharedStringsValue) unmarshalXML(d *xml.Decoder, start xml.StartElemen for { tok, err := d.Token() if err != nil { - return err + return fmt.Errorf("error retrieving xml token: %w", err) } var se xml.StartElement switch el := tok.(type) { - case xml.StartElement: - se = el case xml.EndElement: if el == start.End() { return nil } continue + case xml.StartElement: + se = el default: continue } @@ -46,7 +47,7 @@ func (sv *sharedStringsValue) unmarshalXML(d *xml.Decoder, start xml.StartElemen } if err != nil { - return err + return fmt.Errorf("unable to parse string: %w", err) } } } @@ -55,7 +56,7 @@ func (sv *sharedStringsValue) decodeRichText(d *xml.Decoder, start xml.StartElem for { tok, err := d.Token() if err != nil { - return err + return fmt.Errorf("unable to get shared strings value token: %w", err) } var se xml.StartElement @@ -79,7 +80,7 @@ func (sv *sharedStringsValue) decodeRichText(d *xml.Decoder, start xml.StartElem var s string if s, err = getCharData(d); err != nil { - return err + return fmt.Errorf("unable to parse string: %w", err) } sv.RichText = append(sv.RichText, s) @@ -137,12 +138,12 @@ func getSharedStrings(files []*zip.File) ([]string, error) { return []string{}, nil } if err != nil { - return nil, err + return nil, fmt.Errorf("unable to get shared strings file: %w", err) } f, err := ssFile.Open() if err != nil { - return nil, err + return nil, fmt.Errorf("unable to open shared strings file: %w", err) } defer f.Close() @@ -155,12 +156,11 @@ func getSharedStrings(files []*zip.File) ([]string, error) { dec := xml.NewDecoder(f) for { token, err := dec.Token() - switch err { - case nil: - case io.EOF: + if err == io.EOF { return sharedStrings, nil - default: - return nil, err + } + if err != nil { + return nil, fmt.Errorf("error decoding token: %w", err) } startElement, ok := token.(xml.StartElement) @@ -175,7 +175,7 @@ func getSharedStrings(files []*zip.File) ([]string, error) { value.Reset() if err := value.unmarshalXML(dec, startElement); err != nil { - return nil, err + return nil, fmt.Errorf("error unmarshaling shared strings value %+v: %w", startElement, err) } sharedStrings = append(sharedStrings, value.String()) diff --git a/sheets.go b/sheets.go index a3090ff..0bd692f 100644 --- a/sheets.go +++ b/sheets.go @@ -49,32 +49,32 @@ func getFileNameFromRelationships(rels []relationship, s sheet) (string, error) func getWorksheets(files []*zip.File) ([]string, *map[string]*zip.File, error) { wbFile, err := getFileForName(files, "xl/workbook.xml") if err != nil { - return nil, nil, err + return nil, nil, fmt.Errorf("unable to get workbook file: %w", err) } data, err := readFile(wbFile) if err != nil { - return nil, nil, err + return nil, nil, fmt.Errorf("unable to read workbook file: %w", err) } var wb workbook err = xml.Unmarshal(data, &wb) if err != nil { - return nil, nil, err + return nil, nil, fmt.Errorf("unable to parse workbook file: %w", err) } relsFile, err := getFileForName(files, "xl/_rels/workbook.xml.rels") if err != nil { - return nil, nil, err + return nil, nil, fmt.Errorf("unable to get relationships file: %w", err) } relsData, err := readFile(relsFile) if err != nil { - return nil, nil, err + return nil, nil, fmt.Errorf("unable to read relationships file: %w", err) } rels := relationships{} err = xml.Unmarshal(relsData, &rels) if err != nil { - return nil, nil, err + return nil, nil, fmt.Errorf("unable to parse relationships file: %w", err) } wsFileMap := map[string]*zip.File{} @@ -83,11 +83,11 @@ func getWorksheets(files []*zip.File) ([]string, *map[string]*zip.File, error) { for i, sheet := range wb.Sheets { sheetFilename, err := getFileNameFromRelationships(rels.Relationships, sheet) if err != nil { - return nil, nil, err + return nil, nil, fmt.Errorf("unable to get file name from relationships: %w", err) } sheetFile, err := getFileForName(files, sheetFilename) if err != nil { - return nil, nil, err + return nil, nil, fmt.Errorf("unable to get file for sheet name %s: %w", sheetFilename, err) } wsFileMap[sheet.Name] = sheetFile diff --git a/styles.go b/styles.go index b6e0c37..f89800c 100644 --- a/styles.go +++ b/styles.go @@ -3,6 +3,7 @@ package xlsxreader import ( "archive/zip" "encoding/xml" + "fmt" "regexp" "strings" ) @@ -70,18 +71,18 @@ func getDateStylesFromStyleSheet(ss *styleSheet) *map[int]bool { func getDateFormatStyles(files []*zip.File) (*map[int]bool, error) { stylesFile, err := getFileForName(files, "xl/styles.xml") if err != nil { - return nil, err + return nil, fmt.Errorf("unable to get styles file: %w", err) } data, err := readFile(stylesFile) if err != nil { - return nil, err + return nil, fmt.Errorf("unable to read styles file: %w", err) } var ss styleSheet err = xml.Unmarshal(data, &ss) if err != nil { - return nil, err + return nil, fmt.Errorf("unable to parse styles file: %w", err) } return getDateStylesFromStyleSheet(&ss), nil diff --git a/xml.go b/xml.go index 772eb11..a675eb5 100644 --- a/xml.go +++ b/xml.go @@ -1,16 +1,20 @@ package xlsxreader -import "encoding/xml" +import ( + "encoding/xml" + "fmt" +) func getCharData(d *xml.Decoder) (string, error) { - rawToken, err := d.RawToken() + tok, err := d.Token() if err != nil { - return "", err + return "", fmt.Errorf("unable to get raw token: %w", err) } - cdata, ok := rawToken.(xml.CharData) + cdata, ok := tok.(xml.CharData) if !ok { - return "", xml.UnmarshalError("expected chardata to be present, but none was found") + // Valid for no chardata to be present + return "", nil } return string(cdata), nil