Skip to content

Commit

Permalink
support more charset
Browse files Browse the repository at this point in the history
  • Loading branch information
hippasus committed Mar 15, 2014
1 parent 99e3344 commit a8f6f4b
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 30 deletions.
30 changes: 0 additions & 30 deletions ISO-8859-1.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"bytes"
"errors"
"io"
"strings"
"unicode/utf8"
)

Expand Down Expand Up @@ -41,16 +40,6 @@ func (cs *charsetISO88591er) Read(p []byte) (int, error) {
return 0, errors.New("Use ReadByte()")
}

func isCharset(charset string, names []string) bool {
charset = strings.ToLower(charset)
for _, n := range names {
if charset == strings.ToLower(n) {
return true
}
}
return false
}

func isCharsetISO88591(charset string) bool {
// http://www.iana.org/assignments/character-sets
// (last updated 2010-11-04)
Expand All @@ -70,22 +59,3 @@ func isCharsetISO88591(charset string) bool {
}
return isCharset(charset, names)
}

func isCharsetUTF8(charset string) bool {
names := []string{
"UTF-8",
// Default
"",
}
return isCharset(charset, names)
}

func charsetReader(charset string, input io.Reader) (io.Reader, error) {
switch {
case isCharsetUTF8(charset):
return input, nil
case isCharsetISO88591(charset):
return newCharsetISO88591(input), nil
}
return nil, errors.New("CharsetReader: unexpected charset: " + charset)
}
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ and it seems to work fine with them.
If anyone has any problems with feeds being parsed incorrectly, please let me know so that
I can debug and improve the package.

Dependencies:
```bash
go get github.com/axgle/mahonia
```

Example usage:
```go
package main
Expand Down
47 changes: 47 additions & 0 deletions charset-reader.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package rss

import (
"errors"
"io"
"strings"

"github.com/axgle/mahonia"
)

func charsetReader(charset string, input io.Reader) (io.Reader, error) {
switch {
case isCharsetUTF8(charset):
return input, nil
case isCharsetISO88591(charset):
return newCharsetISO88591(input), nil
default:
decoder := mahonia.NewDecoder(charset)
if decoder == nil {
goto invalidCharset
}

return decoder.NewReader(input), nil
}

invalidCharset:
return nil, errors.New("CharsetReader: unexpected charset: " + charset)
}

func isCharset(charset string, names []string) bool {
charset = strings.ToLower(charset)
for _, n := range names {
if charset == strings.ToLower(n) {
return true
}
}
return false
}

func isCharsetUTF8(charset string) bool {
names := []string{
"UTF-8",
// Default
"",
}
return isCharset(charset, names)
}

0 comments on commit a8f6f4b

Please sign in to comment.