From f97a0f2b23ef0935170c99cea7f73d4d62009e20 Mon Sep 17 00:00:00 2001 From: Herman Slatman Date: Mon, 23 Oct 2023 15:03:26 +0200 Subject: [PATCH 1/3] Make the file reader utility aware of BOMs Fixes #1042 --- command/certificate/inspect.go | 11 +- utils/read.go | 8 +- utils/utfbom/LICENSE | 201 +++++++++++++++++++++++++++ utils/utfbom/README.md | 66 +++++++++ utils/utfbom/utfbom.go | 195 ++++++++++++++++++++++++++ utils/utfbom/utfbom_test.go | 245 +++++++++++++++++++++++++++++++++ 6 files changed, 722 insertions(+), 4 deletions(-) create mode 100644 utils/utfbom/LICENSE create mode 100644 utils/utfbom/README.md create mode 100644 utils/utfbom/utfbom.go create mode 100644 utils/utfbom/utfbom_test.go diff --git a/command/certificate/inspect.go b/command/certificate/inspect.go index 9fdf1350e..677820b00 100644 --- a/command/certificate/inspect.go +++ b/command/certificate/inspect.go @@ -235,17 +235,22 @@ func inspectAction(ctx *cli.Context) error { break } if bundle && block.Type != "CERTIFICATE" { - return errors.Errorf("certificate bundle %s contains an unexpected PEM block of type %s\n\n expected type: CERTIFICATE", + return errors.Errorf("certificate bundle %q contains an unexpected PEM block of type %q\n\n expected type: CERTIFICATE", crtFile, block.Type) } blocks = append(blocks, block) } } else { if block = derToPemBlock(crtBytes); block == nil { - return errors.Errorf("%s contains an invalid PEM block", crtFile) + return errors.Errorf("%q contains an invalid PEM block", crtFile) } blocks = append(blocks, block) } + + // prevent index out of range errors + if len(blocks) == 0 { + return fmt.Errorf("%q does not contain valid PEM blocks", crtFile) + } } // Keep the first one if !bundle @@ -259,7 +264,7 @@ func inspectAction(ctx *cli.Context) error { case "CERTIFICATE REQUEST", "NEW CERTIFICATE REQUEST": // only one is supported return inspectCertificateRequest(ctx, blocks[0]) default: - return errors.Errorf("Invalid PEM type in %s. Expected [CERTIFICATE|CERTIFICATE REQUEST] but got %s)", crtFile, block.Type) + return errors.Errorf("Invalid PEM type in %q. Expected [CERTIFICATE|CERTIFICATE REQUEST] but got %q)", crtFile, block.Type) } } diff --git a/utils/read.go b/utils/read.go index 10c3e0962..296346c80 100644 --- a/utils/read.go +++ b/utils/read.go @@ -9,6 +9,7 @@ import ( "unicode" "github.com/pkg/errors" + "github.com/smallstep/cli/utils/utfbom" "go.step.sm/cli-utils/errs" "go.step.sm/cli-utils/ui" @@ -91,7 +92,12 @@ func ReadFile(name string) (b []byte, err error) { name = "/dev/stdin" b, err = io.ReadAll(stdin) } else { - b, err = os.ReadFile(name) + var contents []byte + contents, err = os.ReadFile(name) + if err != nil { + return nil, errs.FileError(err, name) + } + b, err = io.ReadAll(utfbom.SkipOnly(bytes.NewReader(contents))) } if err != nil { return nil, errs.FileError(err, name) diff --git a/utils/utfbom/LICENSE b/utils/utfbom/LICENSE new file mode 100644 index 000000000..6279cb87f --- /dev/null +++ b/utils/utfbom/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright (c) 2018-2020, Dmitrij Koniajev (dimchansky@gmail.com) + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/utils/utfbom/README.md b/utils/utfbom/README.md new file mode 100644 index 000000000..8ece28008 --- /dev/null +++ b/utils/utfbom/README.md @@ -0,0 +1,66 @@ +# utfbom [![Godoc](https://godoc.org/github.com/dimchansky/utfbom?status.png)](https://godoc.org/github.com/dimchansky/utfbom) [![License](https://img.shields.io/:license-apache-blue.svg)](https://opensource.org/licenses/Apache-2.0) [![Build Status](https://travis-ci.org/dimchansky/utfbom.svg?branch=master)](https://travis-ci.org/dimchansky/utfbom) [![Go Report Card](https://goreportcard.com/badge/github.com/dimchansky/utfbom)](https://goreportcard.com/report/github.com/dimchansky/utfbom) [![Coverage Status](https://coveralls.io/repos/github/dimchansky/utfbom/badge.svg?branch=master)](https://coveralls.io/github/dimchansky/utfbom?branch=master) + +The package utfbom implements the detection of the BOM (Unicode Byte Order Mark) and removing as necessary. It can also return the encoding detected by the BOM. + +## Installation + + go get -u github.com/dimchansky/utfbom + +## Example + +```go +package main + +import ( + "bytes" + "fmt" + "io/ioutil" + + "github.com/dimchansky/utfbom" +) + +func main() { + trySkip([]byte("\xEF\xBB\xBFhello")) + trySkip([]byte("hello")) +} + +func trySkip(byteData []byte) { + fmt.Println("Input:", byteData) + + // just skip BOM + output, err := ioutil.ReadAll(utfbom.SkipOnly(bytes.NewReader(byteData))) + if err != nil { + fmt.Println(err) + return + } + fmt.Println("ReadAll with BOM skipping", output) + + // skip BOM and detect encoding + sr, enc := utfbom.Skip(bytes.NewReader(byteData)) + fmt.Printf("Detected encoding: %s\n", enc) + output, err = ioutil.ReadAll(sr) + if err != nil { + fmt.Println(err) + return + } + fmt.Println("ReadAll with BOM detection and skipping", output) + fmt.Println() +} +``` + +Output: + +``` +$ go run main.go +Input: [239 187 191 104 101 108 108 111] +ReadAll with BOM skipping [104 101 108 108 111] +Detected encoding: UTF8 +ReadAll with BOM detection and skipping [104 101 108 108 111] + +Input: [104 101 108 108 111] +ReadAll with BOM skipping [104 101 108 108 111] +Detected encoding: Unknown +ReadAll with BOM detection and skipping [104 101 108 108 111] +``` + + diff --git a/utils/utfbom/utfbom.go b/utils/utfbom/utfbom.go new file mode 100644 index 000000000..0eeac8f8c --- /dev/null +++ b/utils/utfbom/utfbom.go @@ -0,0 +1,195 @@ +// Package utfbom implements the detection of the BOM (Unicode Byte Order Mark) and removing as necessary. +// It wraps an io.Reader object, creating another object (Reader) that also implements the io.Reader +// interface but provides automatic BOM checking and removing as necessary. +// +// This package was copied from https://github.com/dimchansky/utfbom. Only minor changes +// were made to not depend on the io/ioutil package. +package utfbom + +import ( + "errors" + "io" +) + +// Encoding is type alias for detected UTF encoding. +type Encoding int + +// Constants to identify detected UTF encodings. +const ( + // Unknown encoding, returned when no BOM was detected + Unknown Encoding = iota + + // UTF8, BOM bytes: EF BB BF + UTF8 + + // UTF-16, big-endian, BOM bytes: FE FF + UTF16BigEndian + + // UTF-16, little-endian, BOM bytes: FF FE + UTF16LittleEndian + + // UTF-32, big-endian, BOM bytes: 00 00 FE FF + UTF32BigEndian + + // UTF-32, little-endian, BOM bytes: FF FE 00 00 + UTF32LittleEndian +) + +// String returns a user-friendly string representation of the encoding. Satisfies fmt.Stringer interface. +func (e Encoding) String() string { + switch e { + case UTF8: + return "UTF8" + case UTF16BigEndian: + return "UTF16BigEndian" + case UTF16LittleEndian: + return "UTF16LittleEndian" + case UTF32BigEndian: + return "UTF32BigEndian" + case UTF32LittleEndian: + return "UTF32LittleEndian" + default: + return "Unknown" + } +} + +const maxConsecutiveEmptyReads = 100 + +// Skip creates Reader which automatically detects BOM (Unicode Byte Order Mark) and removes it as necessary. +// It also returns the encoding detected by the BOM. +// If the detected encoding is not needed, you can call the SkipOnly function. +func Skip(rd io.Reader) (*Reader, Encoding) { + // Is it already a Reader? + b, ok := rd.(*Reader) + if ok { + return b, Unknown + } + + enc, left, err := detectUtf(rd) + return &Reader{ + rd: rd, + buf: left, + err: err, + }, enc +} + +// SkipOnly creates Reader which automatically detects BOM (Unicode Byte Order Mark) and removes it as necessary. +func SkipOnly(rd io.Reader) *Reader { + r, _ := Skip(rd) + return r +} + +// Reader implements automatic BOM (Unicode Byte Order Mark) checking and +// removing as necessary for an io.Reader object. +type Reader struct { + rd io.Reader // reader provided by the client + buf []byte // buffered data + err error // last error +} + +// Read is an implementation of io.Reader interface. +// The bytes are taken from the underlying Reader, but it checks for BOMs, removing them as necessary. +func (r *Reader) Read(p []byte) (n int, err error) { + if len(p) == 0 { + return 0, nil + } + + if r.buf == nil { + if r.err != nil { + return 0, r.readErr() + } + + return r.rd.Read(p) + } + + // copy as much as we can + n = copy(p, r.buf) + r.buf = nilIfEmpty(r.buf[n:]) + return n, nil +} + +func (r *Reader) readErr() error { + err := r.err + r.err = nil + return err +} + +var errNegativeRead = errors.New("utfbom: reader returned negative count from Read") + +func detectUtf(rd io.Reader) (enc Encoding, buf []byte, err error) { + buf, err = readBOM(rd) + + if len(buf) >= 4 { + if isUTF32BigEndianBOM4(buf) { + return UTF32BigEndian, nilIfEmpty(buf[4:]), err + } + if isUTF32LittleEndianBOM4(buf) { + return UTF32LittleEndian, nilIfEmpty(buf[4:]), err + } + } + + if len(buf) > 2 && isUTF8BOM3(buf) { + return UTF8, nilIfEmpty(buf[3:]), err + } + + if (err != nil && err != io.EOF) || (len(buf) < 2) { + return Unknown, nilIfEmpty(buf), err + } + + if isUTF16BigEndianBOM2(buf) { + return UTF16BigEndian, nilIfEmpty(buf[2:]), err + } + if isUTF16LittleEndianBOM2(buf) { + return UTF16LittleEndian, nilIfEmpty(buf[2:]), err + } + + return Unknown, nilIfEmpty(buf), err +} + +func readBOM(rd io.Reader) (buf []byte, err error) { + const maxBOMSize = 4 + var bom [maxBOMSize]byte // used to read BOM + + // read as many bytes as possible + for nEmpty, n := 0, 0; err == nil && len(buf) < maxBOMSize; buf = bom[:len(buf)+n] { + if n, err = rd.Read(bom[len(buf):]); n < 0 { + panic(errNegativeRead) + } + if n > 0 { + nEmpty = 0 + } else { + nEmpty++ + if nEmpty >= maxConsecutiveEmptyReads { + err = io.ErrNoProgress + } + } + } + return +} + +func isUTF32BigEndianBOM4(buf []byte) bool { + return buf[0] == 0x00 && buf[1] == 0x00 && buf[2] == 0xFE && buf[3] == 0xFF +} + +func isUTF32LittleEndianBOM4(buf []byte) bool { + return buf[0] == 0xFF && buf[1] == 0xFE && buf[2] == 0x00 && buf[3] == 0x00 +} + +func isUTF8BOM3(buf []byte) bool { + return buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF +} + +func isUTF16BigEndianBOM2(buf []byte) bool { + return buf[0] == 0xFE && buf[1] == 0xFF +} + +func isUTF16LittleEndianBOM2(buf []byte) bool { + return buf[0] == 0xFF && buf[1] == 0xFE +} + +func nilIfEmpty(buf []byte) (res []byte) { + if len(buf) > 0 { + res = buf + } + return +} diff --git a/utils/utfbom/utfbom_test.go b/utils/utfbom/utfbom_test.go new file mode 100644 index 000000000..16bfae54f --- /dev/null +++ b/utils/utfbom/utfbom_test.go @@ -0,0 +1,245 @@ +package utfbom + +import ( + "io" + "reflect" + "testing" + "testing/iotest" + "time" +) + +var testCases = []struct { + name string + input []byte + inputError error + encoding Encoding + output []byte +}{ + {"1", []byte{}, nil, Unknown, []byte{}}, + {"2", []byte("hello"), nil, Unknown, []byte("hello")}, + {"3", []byte("\xEF\xBB\xBF"), nil, UTF8, []byte{}}, + {"4", []byte("\xEF\xBB\xBFhello"), nil, UTF8, []byte("hello")}, + {"5", []byte("\xFE\xFF"), nil, UTF16BigEndian, []byte{}}, + {"6", []byte("\xFF\xFE"), nil, UTF16LittleEndian, []byte{}}, + {"7", []byte("\x00\x00\xFE\xFF"), nil, UTF32BigEndian, []byte{}}, + {"8", []byte("\xFF\xFE\x00\x00"), nil, UTF32LittleEndian, []byte{}}, + {"5", []byte("\xFE\xFF\x00\x68\x00\x65\x00\x6C\x00\x6C\x00\x6F"), nil, + UTF16BigEndian, []byte{0x00, 0x68, 0x00, 0x65, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0x6F}}, + {"6", []byte("\xFF\xFE\x68\x00\x65\x00\x6C\x00\x6C\x00\x6F\x00"), nil, + UTF16LittleEndian, []byte{0x68, 0x00, 0x65, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0x6F, 0x00}}, + {"7", []byte("\x00\x00\xFE\xFF\x00\x00\x00\x68\x00\x00\x00\x65\x00\x00\x00\x6C\x00\x00\x00\x6C\x00\x00\x00\x6F"), nil, + UTF32BigEndian, + []byte{0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x6F}}, + {"8", []byte("\xFF\xFE\x00\x00\x68\x00\x00\x00\x65\x00\x00\x00\x6C\x00\x00\x00\x6C\x00\x00\x00\x6F\x00\x00\x00"), nil, + UTF32LittleEndian, + []byte{0x68, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00}}, + {"9", []byte("\xEF"), nil, Unknown, []byte("\xEF")}, + {"10", []byte("\xEF\xBB"), nil, Unknown, []byte("\xEF\xBB")}, + {"11", []byte("\xEF\xBB\xBF"), io.ErrClosedPipe, UTF8, []byte{}}, + {"12", []byte("\xFE\xFF"), io.ErrClosedPipe, Unknown, []byte("\xFE\xFF")}, + {"13", []byte("\xFE"), io.ErrClosedPipe, Unknown, []byte("\xFE")}, + {"14", []byte("\xFF\xFE"), io.ErrClosedPipe, Unknown, []byte("\xFF\xFE")}, + {"15", []byte("\x00\x00\xFE\xFF"), io.ErrClosedPipe, UTF32BigEndian, []byte{}}, + {"16", []byte("\x00\x00\xFE"), io.ErrClosedPipe, Unknown, []byte{0x00, 0x00, 0xFE}}, + {"17", []byte("\x00\x00"), io.ErrClosedPipe, Unknown, []byte{0x00, 0x00}}, + {"18", []byte("\x00"), io.ErrClosedPipe, Unknown, []byte{0x00}}, + {"19", []byte("\xFF\xFE\x00\x00"), io.ErrClosedPipe, UTF32LittleEndian, []byte{}}, + {"20", []byte("\xFF\xFE\x00"), io.ErrClosedPipe, Unknown, []byte{0xFF, 0xFE, 0x00}}, + {"21", []byte("\xFF\xFE"), io.ErrClosedPipe, Unknown, []byte{0xFF, 0xFE}}, + {"22", []byte("\xFF"), io.ErrClosedPipe, Unknown, []byte{0xFF}}, + {"23", []byte("\x68\x65"), nil, Unknown, []byte{0x68, 0x65}}, +} + +type sliceReader struct { + input []byte + inputError error +} + +func (r *sliceReader) Read(p []byte) (n int, err error) { + if len(p) == 0 { + return + } + + if err = r.getError(); err != nil { + return + } + + n = copy(p, r.input) + r.input = r.input[n:] + err = r.getError() + return +} + +func (r *sliceReader) getError() (err error) { + if len(r.input) == 0 { + if r.inputError == nil { + err = io.EOF + } else { + err = r.inputError + } + } + return +} + +var readMakers = []struct { + name string + fn func(io.Reader) io.Reader +}{ + {"full", func(r io.Reader) io.Reader { return r }}, + {"byte", iotest.OneByteReader}, +} + +func TestSkip(t *testing.T) { + for _, tc := range testCases { + for _, readMaker := range readMakers { + r := readMaker.fn(&sliceReader{tc.input, tc.inputError}) + + sr, enc := Skip(r) + if enc != tc.encoding { + t.Fatalf("test %v reader=%s: expected encoding %v, but got %v", tc.name, readMaker.name, tc.encoding, enc) + } + + output, err := io.ReadAll(sr) + if !reflect.DeepEqual(output, tc.output) { + t.Fatalf("test %v reader=%s: expected to read %+#v, but got %+#v", tc.name, readMaker.name, tc.output, output) + } + if err != tc.inputError { + t.Fatalf("test %v reader=%s: expected to get %+#v error, but got %+#v", tc.name, readMaker.name, tc.inputError, err) + } + } + } +} + +func TestSkipSkip(t *testing.T) { + for _, tc := range testCases { + for _, readMaker := range readMakers { + r := readMaker.fn(&sliceReader{tc.input, tc.inputError}) + + sr0, _ := Skip(r) + sr, enc := Skip(sr0) + if enc != Unknown { + t.Fatalf("test %v reader=%s: expected encoding %v, but got %v", tc.name, readMaker.name, Unknown, enc) + } + + output, err := io.ReadAll(sr) + if !reflect.DeepEqual(output, tc.output) { + t.Fatalf("test %v reader=%s: expected to read %+#v, but got %+#v", tc.name, readMaker.name, tc.output, output) + } + if err != tc.inputError { + t.Fatalf("test %v reader=%s: expected to get %+#v error, but got %+#v", tc.name, readMaker.name, tc.inputError, err) + } + } + } +} + +func TestSkipOnly(t *testing.T) { + for _, tc := range testCases { + for _, readMaker := range readMakers { + r := readMaker.fn(&sliceReader{tc.input, tc.inputError}) + + sr := SkipOnly(r) + + output, err := io.ReadAll(sr) + if !reflect.DeepEqual(output, tc.output) { + t.Fatalf("test %v reader=%s: expected to read %+#v, but got %+#v", tc.name, readMaker.name, tc.output, output) + } + if err != tc.inputError { + t.Fatalf("test %v reader=%s: expected to get %+#v error, but got %+#v", tc.name, readMaker.name, tc.inputError, err) + } + } + } +} + +type zeroReader struct{} + +func (zeroReader) Read(p []byte) (int, error) { + return 0, nil +} + +type readerEncoding struct { + Rd *Reader + Enc Encoding +} + +func TestSkipZeroReader(t *testing.T) { + var z zeroReader + + c := make(chan readerEncoding) + go func() { + r, enc := Skip(z) + c <- readerEncoding{r, enc} + }() + + select { + case re := <-c: + if re.Enc != Unknown { + t.Error("Unknown encoding expected") + } else { + var b [1]byte + n, err := re.Rd.Read(b[:]) + if n != 0 { + t.Error("unexpected bytes count:", n) + } + if err != io.ErrNoProgress { + t.Error("unexpected error:", err) + } + } + case <-time.After(time.Second): + t.Error("test timed out (endless loop in Skip?)") + } +} + +func TestSkipOnlyZeroReader(t *testing.T) { + var z zeroReader + + c := make(chan *Reader) + go func() { + r := SkipOnly(z) + c <- r + }() + + select { + case r := <-c: + var b [1]byte + n, err := r.Read(b[:]) + if n != 0 { + t.Error("unexpected bytes count:", n) + } + if err != io.ErrNoProgress { + t.Error("unexpected error:", err) + } + case <-time.After(time.Second): + t.Error("test timed out (endless loop in Skip?)") + } +} + +func TestReader_ReadEmpty(t *testing.T) { + for _, tc := range testCases { + for _, readMaker := range readMakers { + r := readMaker.fn(&sliceReader{tc.input, tc.inputError}) + + sr := SkipOnly(r) + + n, err := sr.Read(nil) + if n != 0 { + t.Fatalf("test %v reader=%s: expected to read zero bytes, but got %v", tc.name, readMaker.name, n) + } + if err != nil { + t.Fatalf("test %v reader=%s: expected to get error, but got %+#v", tc.name, readMaker.name, err) + } + } + } +} + +func TestEncoding_String(t *testing.T) { + for e := Unknown; e <= UTF32LittleEndian; e++ { + s := e.String() + if s == "" { + t.Errorf("no string for %#v", e) + } + } + s := Encoding(999).String() + if s != "Unknown" { + t.Errorf("wrong string '%s' for invalid encoding", s) + } +} From deda87eb509492a97afb3bc93d4d33bba8022c95 Mon Sep 17 00:00:00 2001 From: Herman Slatman Date: Mon, 23 Oct 2023 15:23:04 +0200 Subject: [PATCH 2/3] Fix linting issues --- utils/utfbom/utfbom.go | 8 ++++---- utils/utfbom/utfbom_test.go | 11 ++++++----- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/utils/utfbom/utfbom.go b/utils/utfbom/utfbom.go index 0eeac8f8c..93a144fd2 100644 --- a/utils/utfbom/utfbom.go +++ b/utils/utfbom/utfbom.go @@ -3,7 +3,7 @@ // interface but provides automatic BOM checking and removing as necessary. // // This package was copied from https://github.com/dimchansky/utfbom. Only minor changes -// were made to not depend on the io/ioutil package. +// were made to not depend on the io/ioutil package and to make our linters pass. package utfbom import ( @@ -132,7 +132,7 @@ func detectUtf(rd io.Reader) (enc Encoding, buf []byte, err error) { return UTF8, nilIfEmpty(buf[3:]), err } - if (err != nil && err != io.EOF) || (len(buf) < 2) { + if (err != nil && !errors.Is(err, io.EOF)) || (len(buf) < 2) { return Unknown, nilIfEmpty(buf), err } @@ -151,9 +151,9 @@ func readBOM(rd io.Reader) (buf []byte, err error) { var bom [maxBOMSize]byte // used to read BOM // read as many bytes as possible - for nEmpty, n := 0, 0; err == nil && len(buf) < maxBOMSize; buf = bom[:len(buf)+n] { + for nEmpty, n := 0, 0; err == nil && len(buf) < maxBOMSize; buf = bom[:len(buf)+n] { //nolint:wastedassign // copied code if n, err = rd.Read(bom[len(buf):]); n < 0 { - panic(errNegativeRead) + return nil, errNegativeRead } if n > 0 { nEmpty = 0 diff --git a/utils/utfbom/utfbom_test.go b/utils/utfbom/utfbom_test.go index 16bfae54f..9f80027c2 100644 --- a/utils/utfbom/utfbom_test.go +++ b/utils/utfbom/utfbom_test.go @@ -1,6 +1,7 @@ package utfbom import ( + "errors" "io" "reflect" "testing" @@ -103,7 +104,7 @@ func TestSkip(t *testing.T) { if !reflect.DeepEqual(output, tc.output) { t.Fatalf("test %v reader=%s: expected to read %+#v, but got %+#v", tc.name, readMaker.name, tc.output, output) } - if err != tc.inputError { + if !errors.Is(err, tc.inputError) { t.Fatalf("test %v reader=%s: expected to get %+#v error, but got %+#v", tc.name, readMaker.name, tc.inputError, err) } } @@ -125,7 +126,7 @@ func TestSkipSkip(t *testing.T) { if !reflect.DeepEqual(output, tc.output) { t.Fatalf("test %v reader=%s: expected to read %+#v, but got %+#v", tc.name, readMaker.name, tc.output, output) } - if err != tc.inputError { + if !errors.Is(err, tc.inputError) { t.Fatalf("test %v reader=%s: expected to get %+#v error, but got %+#v", tc.name, readMaker.name, tc.inputError, err) } } @@ -143,7 +144,7 @@ func TestSkipOnly(t *testing.T) { if !reflect.DeepEqual(output, tc.output) { t.Fatalf("test %v reader=%s: expected to read %+#v, but got %+#v", tc.name, readMaker.name, tc.output, output) } - if err != tc.inputError { + if !errors.Is(err, tc.inputError) { t.Fatalf("test %v reader=%s: expected to get %+#v error, but got %+#v", tc.name, readMaker.name, tc.inputError, err) } } @@ -180,7 +181,7 @@ func TestSkipZeroReader(t *testing.T) { if n != 0 { t.Error("unexpected bytes count:", n) } - if err != io.ErrNoProgress { + if !errors.Is(err, io.ErrNoProgress) { t.Error("unexpected error:", err) } } @@ -205,7 +206,7 @@ func TestSkipOnlyZeroReader(t *testing.T) { if n != 0 { t.Error("unexpected bytes count:", n) } - if err != io.ErrNoProgress { + if !errors.Is(err, io.ErrNoProgress) { t.Error("unexpected error:", err) } case <-time.After(time.Second): From 92f807f1fdee9fb05a1eab3b41e555efe3c57f7c Mon Sep 17 00:00:00 2001 From: Herman Slatman Date: Tue, 24 Oct 2023 23:57:48 +0200 Subject: [PATCH 3/3] Make the `utfbom` package internal --- utils/{ => internal}/utfbom/LICENSE | 0 utils/{ => internal}/utfbom/README.md | 0 utils/{ => internal}/utfbom/utfbom.go | 0 utils/{ => internal}/utfbom/utfbom_test.go | 0 utils/read.go | 2 +- 5 files changed, 1 insertion(+), 1 deletion(-) rename utils/{ => internal}/utfbom/LICENSE (100%) rename utils/{ => internal}/utfbom/README.md (100%) rename utils/{ => internal}/utfbom/utfbom.go (100%) rename utils/{ => internal}/utfbom/utfbom_test.go (100%) diff --git a/utils/utfbom/LICENSE b/utils/internal/utfbom/LICENSE similarity index 100% rename from utils/utfbom/LICENSE rename to utils/internal/utfbom/LICENSE diff --git a/utils/utfbom/README.md b/utils/internal/utfbom/README.md similarity index 100% rename from utils/utfbom/README.md rename to utils/internal/utfbom/README.md diff --git a/utils/utfbom/utfbom.go b/utils/internal/utfbom/utfbom.go similarity index 100% rename from utils/utfbom/utfbom.go rename to utils/internal/utfbom/utfbom.go diff --git a/utils/utfbom/utfbom_test.go b/utils/internal/utfbom/utfbom_test.go similarity index 100% rename from utils/utfbom/utfbom_test.go rename to utils/internal/utfbom/utfbom_test.go diff --git a/utils/read.go b/utils/read.go index 296346c80..869c7a2cb 100644 --- a/utils/read.go +++ b/utils/read.go @@ -9,7 +9,7 @@ import ( "unicode" "github.com/pkg/errors" - "github.com/smallstep/cli/utils/utfbom" + "github.com/smallstep/cli/utils/internal/utfbom" "go.step.sm/cli-utils/errs" "go.step.sm/cli-utils/ui"