From 0099aa70f1a17ea11833885ed52ec05f49ef59f2 Mon Sep 17 00:00:00 2001 From: Jamie Hall Date: Tue, 23 Apr 2013 14:59:15 +0100 Subject: [PATCH] Improved feed update, proper handling of duplicate items. This may cause issues with feeds which do not provide item IDs. If this becomes an issue (feel free to submit one), I'll add automatic generation of unique IDs. --- atom.go | 11 +++++++++++ rss 1.0.go | 11 +++++++++++ rss 2.0.go | 12 ++++++++++++ rss.go | 40 ++++++++++++++++++---------------------- 4 files changed, 52 insertions(+), 22 deletions(-) diff --git a/atom.go b/atom.go index ac61be1..1048b3a 100644 --- a/atom.go +++ b/atom.go @@ -50,7 +50,18 @@ func parseAtom(data []byte, read *db) (*Feed, error) { next.ID = item.ID next.Read = false + if next.ID == "" { + fmt.Printf("Warning: Item %q has no ID and will be ignored.\n", next.Title) + continue + } + + if _, ok := out.ItemMap[next.ID]; ok { + fmt.Printf("Warning: Item %q has duplicate ID.\n", next.Title) + continue + } + out.Items = append(out.Items, next) + out.ItemMap[next.ID] = struct{}{} out.Unread++ } diff --git a/rss 1.0.go b/rss 1.0.go index 2e86749..19d43de 100644 --- a/rss 1.0.go +++ b/rss 1.0.go @@ -82,7 +82,18 @@ func parseRSS1(data []byte, read *db) (*Feed, error) { next.ID = item.ID next.Read = false + if next.ID == "" { + fmt.Printf("Warning: Item %q has no ID and will be ignored.\n", next.Title) + continue + } + + if _, ok := out.ItemMap[next.ID]; ok { + fmt.Printf("Warning: Item %q has duplicate ID.\n", next.Title) + continue + } + out.Items = append(out.Items, next) + out.ItemMap[next.ID] = struct{}{} out.Unread++ } diff --git a/rss 2.0.go b/rss 2.0.go index 15e8a9c..a5f377a 100644 --- a/rss 2.0.go +++ b/rss 2.0.go @@ -60,6 +60,7 @@ func parseRSS2(data []byte, read *db) (*Feed, error) { } out.Items = make([]*Item, 0, len(channel.Items)) + out.ItemMap = make(map[string]struct{}) // Process items. for _, item := range channel.Items { @@ -82,7 +83,18 @@ func parseRSS2(data []byte, read *db) (*Feed, error) { next.ID = item.ID next.Read = false + if next.ID == "" { + fmt.Printf("Warning: Item %q has no ID and will be ignored.\n", next.Title) + continue + } + + if _, ok := out.ItemMap[next.ID]; ok { + fmt.Printf("Warning: Item %q has duplicate ID.\n", next.Title) + continue + } + out.Items = append(out.Items, next) + out.ItemMap[next.ID] = struct{}{} out.Unread++ } diff --git a/rss.go b/rss.go index b396097..d74f050 100644 --- a/rss.go +++ b/rss.go @@ -12,6 +12,7 @@ import ( // Parse RSS or Atom data. func Parse(data []byte) (*Feed, error) { + if strings.Contains(string(data), "= len(f.Items) { + for _, item := range update.Items { + if _, ok := f.ItemMap[item.ID]; !ok { f.Items = append(f.Items, item) + f.ItemMap[item.ID] = struct{}{} f.Unread++ - } else if f.Items[i+offset].ID != item.ID { - return errors.New("Error: offsets don't match.") } }