diff --git a/ffjson/encoder.go b/ffjson/encoder.go index 3e82d61..64bffd4 100644 --- a/ffjson/encoder.go +++ b/ffjson/encoder.go @@ -29,9 +29,10 @@ import ( // It allows to encode many objects to a single writer. // This should not be used by more than one goroutine at the time. type Encoder struct { - buf fflib.Buffer - w io.Writer - enc *json.Encoder + buf fflib.Buffer + w io.Writer + enc *json.Encoder + escapeHTML bool } // SetEscapeHTML specifies whether problematic HTML characters @@ -42,13 +43,14 @@ type Encoder struct { // In non-HTML settings where the escaping interferes with the readability // of the output, SetEscapeHTML(false) disables this behavior. func (enc *Encoder) SetEscapeHTML(on bool) { + enc.escapeHTML = on enc.enc.SetEscapeHTML(on) } // NewEncoder returns a reusable Encoder. // Output will be written to the supplied writer. func NewEncoder(w io.Writer) *Encoder { - return &Encoder{w: w, enc: json.NewEncoder(w)} + return &Encoder{w: w, enc: json.NewEncoder(w), escapeHTML: true} } // Encode the data in the supplied value to the stream @@ -59,7 +61,7 @@ func (e *Encoder) Encode(v interface{}) error { f, ok := v.(marshalerFaster) if ok { e.buf.Reset() - err := f.MarshalJSONBuf(&e.buf) + err := f.MarshalJSONBuf(&e.buf, e.escapeHTML) if err != nil { return err } diff --git a/ffjson/marshal.go b/ffjson/marshal.go index ff0685e..f1ab6ea 100644 --- a/ffjson/marshal.go +++ b/ffjson/marshal.go @@ -25,7 +25,7 @@ import ( ) type marshalerFaster interface { - MarshalJSONBuf(buf fflib.EncodingBuffer) error + MarshalJSONBuf(buf fflib.EncodingBuffer, escapeHTML bool) error } type unmarshalFaster interface { @@ -43,7 +43,7 @@ func Marshal(v interface{}) ([]byte, error) { f, ok := v.(marshalerFaster) if ok { buf := fflib.Buffer{} - err := f.MarshalJSONBuf(&buf) + err := f.MarshalJSONBuf(&buf, true) b := buf.Bytes() if err != nil { if len(b) > 0 { diff --git a/fflib/v1/jsonstring.go b/fflib/v1/jsonstring.go index 513b45d..509aaab 100644 --- a/fflib/v1/jsonstring.go +++ b/fflib/v1/jsonstring.go @@ -38,25 +38,19 @@ type JsonStringWriter interface { stringWriter } -func WriteJsonString(buf JsonStringWriter, s string) { - WriteJson(buf, []byte(s)) +func WriteJsonString(buf JsonStringWriter, s string, escapeHTML bool) { + WriteJson(buf, []byte(s), escapeHTML) } /** * Function ported from encoding/json: func (e *encodeState) string(s string) (int, error) */ -func WriteJson(buf JsonStringWriter, s []byte) { +func WriteJson(buf JsonStringWriter, s []byte, escapeHTML bool) { buf.WriteByte('"') start := 0 for i := 0; i < len(s); { if b := s[i]; b < utf8.RuneSelf { - /* - if 0x20 <= b && b != '\\' && b != '"' && b != '<' && b != '>' && b != '&' { - i++ - continue - } - */ - if lt[b] == true { + if htmlSafeSet[b] || (!escapeHTML && safeSet[b]) { i++ continue } @@ -74,11 +68,15 @@ func WriteJson(buf JsonStringWriter, s []byte) { case '\r': buf.WriteByte('\\') buf.WriteByte('r') + case '\t': + buf.WriteByte('\\') + buf.WriteByte('t') default: - // This encodes bytes < 0x20 except for \n and \r, - // as well as < and >. The latter are escaped because they - // can lead to security holes when user-controlled strings - // are rendered into JSON and served to some browsers. + // This encodes bytes < 0x20 except for \t, \n and \r. + // If escapeHTML is set, it also escapes <, >, and & + // because they can lead to security holes when + // user-controlled strings are rendered into JSON + // and served to some browsers. buf.WriteString(`\u00`) buf.WriteByte(hex[b>>4]) buf.WriteByte(hex[b&0xF]) diff --git a/fflib/v1/jsonstring_test.go b/fflib/v1/jsonstring_test.go index 476d336..5f05a6d 100644 --- a/fflib/v1/jsonstring_test.go +++ b/fflib/v1/jsonstring_test.go @@ -24,15 +24,28 @@ import ( func TestWriteJsonString(t *testing.T) { var buf bytes.Buffer - WriteJsonString(&buf, "foo") + WriteJsonString(&buf, "foo", true) if string(buf.Bytes()) != `"foo"` { t.Fatalf("Expected: %v\nGot: %v", `"foo"`, string(buf.Bytes())) } buf.Reset() - WriteJsonString(&buf, `f"oo`) + WriteJsonString(&buf, `f"oo`, true) if string(buf.Bytes()) != `"f\"oo"` { t.Fatalf("Expected: %v\nGot: %v", `"f\"oo"`, string(buf.Bytes())) } + + buf.Reset() + WriteJsonString(&buf, `&foo`, true) + if string(buf.Bytes()) != `"\u0026foo\u003cbar\u003e"` { + t.Fatalf("Expected: %v\nGot: %v", `\u0026foo\u003cbar\u003e`, string(buf.Bytes())) + } + + buf.Reset() + WriteJsonString(&buf, `&foo`, false) + if string(buf.Bytes()) != `"&foo"` { + t.Fatalf("Expected: %v\nGot: %v", `"&foo"`, string(buf.Bytes())) + } + // TODO(pquerna): all them important tests. } diff --git a/fflib/v1/lexer.go b/fflib/v1/lexer.go index 5589292..9f20a44 100644 --- a/fflib/v1/lexer.go +++ b/fflib/v1/lexer.go @@ -267,7 +267,7 @@ func (ffl *FFLexer) lexString() FFTok { return FFTok_error } - WriteJson(ffl.Output, ffl.buf.Bytes()) + WriteJson(ffl.Output, ffl.buf.Bytes(), false) return FFTok_string } else { @@ -548,7 +548,7 @@ func (ffl *FFLexer) scanField(start FFTok, capture bool) ([]byte, error) { //TODO(pquerna): so, other users expect this to be a quoted string :( if capture { ffl.buf.Reset() - WriteJson(&ffl.buf, ffl.Output.Bytes()) + WriteJson(&ffl.buf, ffl.Output.Bytes(), false) return ffl.buf.Bytes(), nil } else { return nil, nil diff --git a/fflib/v1/tables.go b/fflib/v1/tables.go new file mode 100644 index 0000000..9694c03 --- /dev/null +++ b/fflib/v1/tables.go @@ -0,0 +1,220 @@ +// this file is copied from the Go std library for ffjson internal use + +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package v1 + +import "unicode/utf8" + +// safeSet holds the value true if the ASCII character with the given array +// position can be represented inside a JSON string without any further +// escaping. +// +// All values are true except for the ASCII control characters (0-31), the +// double quote ("), and the backslash character ("\"). +var safeSet = [utf8.RuneSelf]bool{ + ' ': true, + '!': true, + '"': false, + '#': true, + '$': true, + '%': true, + '&': true, + '\'': true, + '(': true, + ')': true, + '*': true, + '+': true, + ',': true, + '-': true, + '.': true, + '/': true, + '0': true, + '1': true, + '2': true, + '3': true, + '4': true, + '5': true, + '6': true, + '7': true, + '8': true, + '9': true, + ':': true, + ';': true, + '<': true, + '=': true, + '>': true, + '?': true, + '@': true, + 'A': true, + 'B': true, + 'C': true, + 'D': true, + 'E': true, + 'F': true, + 'G': true, + 'H': true, + 'I': true, + 'J': true, + 'K': true, + 'L': true, + 'M': true, + 'N': true, + 'O': true, + 'P': true, + 'Q': true, + 'R': true, + 'S': true, + 'T': true, + 'U': true, + 'V': true, + 'W': true, + 'X': true, + 'Y': true, + 'Z': true, + '[': true, + '\\': false, + ']': true, + '^': true, + '_': true, + '`': true, + 'a': true, + 'b': true, + 'c': true, + 'd': true, + 'e': true, + 'f': true, + 'g': true, + 'h': true, + 'i': true, + 'j': true, + 'k': true, + 'l': true, + 'm': true, + 'n': true, + 'o': true, + 'p': true, + 'q': true, + 'r': true, + 's': true, + 't': true, + 'u': true, + 'v': true, + 'w': true, + 'x': true, + 'y': true, + 'z': true, + '{': true, + '|': true, + '}': true, + '~': true, + '\u007f': true, +} + +// htmlSafeSet holds the value true if the ASCII character with the given +// array position can be safely represented inside a JSON string, embedded +// inside of HTML