Skip to content

Commit

Permalink
Improved feed update, proper handling of duplicate items.
Browse files Browse the repository at this point in the history
This may cause issues with feeds which do not provide item IDs. If this
becomes an issue (feel free to submit one), I'll add automatic
generation of unique IDs.
  • Loading branch information
SlyMarbo committed Apr 23, 2013
1 parent 7fa8fbc commit 0099aa7
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 22 deletions.
11 changes: 11 additions & 0 deletions atom.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,18 @@ func parseAtom(data []byte, read *db) (*Feed, error) {
next.ID = item.ID
next.Read = false

if next.ID == "" {
fmt.Printf("Warning: Item %q has no ID and will be ignored.\n", next.Title)
continue
}

if _, ok := out.ItemMap[next.ID]; ok {
fmt.Printf("Warning: Item %q has duplicate ID.\n", next.Title)
continue
}

out.Items = append(out.Items, next)
out.ItemMap[next.ID] = struct{}{}
out.Unread++
}

Expand Down
11 changes: 11 additions & 0 deletions rss 1.0.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,18 @@ func parseRSS1(data []byte, read *db) (*Feed, error) {
next.ID = item.ID
next.Read = false

if next.ID == "" {
fmt.Printf("Warning: Item %q has no ID and will be ignored.\n", next.Title)
continue
}

if _, ok := out.ItemMap[next.ID]; ok {
fmt.Printf("Warning: Item %q has duplicate ID.\n", next.Title)
continue
}

out.Items = append(out.Items, next)
out.ItemMap[next.ID] = struct{}{}
out.Unread++
}

Expand Down
12 changes: 12 additions & 0 deletions rss 2.0.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ func parseRSS2(data []byte, read *db) (*Feed, error) {
}

out.Items = make([]*Item, 0, len(channel.Items))
out.ItemMap = make(map[string]struct{})

// Process items.
for _, item := range channel.Items {
Expand All @@ -82,7 +83,18 @@ func parseRSS2(data []byte, read *db) (*Feed, error) {
next.ID = item.ID
next.Read = false

if next.ID == "" {
fmt.Printf("Warning: Item %q has no ID and will be ignored.\n", next.Title)
continue
}

if _, ok := out.ItemMap[next.ID]; ok {
fmt.Printf("Warning: Item %q has duplicate ID.\n", next.Title)
continue
}

out.Items = append(out.Items, next)
out.ItemMap[next.ID] = struct{}{}
out.Unread++
}

Expand Down
40 changes: 18 additions & 22 deletions rss.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (

// Parse RSS or Atom data.
func Parse(data []byte) (*Feed, error) {

if strings.Contains(string(data), "<rss") {
return parseRSS2(data, database)
} else if strings.Contains(string(data), "xmlns=\"http://purl.org/rss/1.0/\"") {
Expand All @@ -37,15 +38,15 @@ func Fetch(url string) (*Feed, error) {
}

out, err := Parse(body)
if err != nil {
return nil, err
}
if out.Link == "" {
out.Link = url
}
return out, nil
if err != nil {
return nil, err
}

if out.Link == "" {
out.Link = url
}

return out, nil
}

// Feed is the top-level structure.
Expand All @@ -55,6 +56,7 @@ type Feed struct {
Link string
Image *Image
Items []*Item
ItemMap map[string]struct{}
Refresh time.Time
Unread uint32
}
Expand All @@ -71,6 +73,10 @@ func (f *Feed) Update() error {
return errors.New("Error: feed has no URL.")
}

if f.ItemMap == nil {
return errors.New("Error: Feed has no ItemMap.")
}

update, err := Fetch(f.Link)
if err != nil {
return err
Expand All @@ -80,21 +86,11 @@ func (f *Feed) Update() error {
f.Title = update.Title
f.Description = update.Description

// Find the offset between items.
offset := 0
for _, item := range f.Items {
if item.ID == update.Items[0].ID {
break
}
offset++
}

for i, item := range update.Items {
if i+offset >= len(f.Items) {
for _, item := range update.Items {
if _, ok := f.ItemMap[item.ID]; !ok {
f.Items = append(f.Items, item)
f.ItemMap[item.ID] = struct{}{}
f.Unread++
} else if f.Items[i+offset].ID != item.ID {
return errors.New("Error: offsets don't match.")
}
}

Expand Down

0 comments on commit 0099aa7

Please sign in to comment.