-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
- Loading branch information
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
FROM golang:1.13-buster AS build | ||
COPY . /src/github.com/arussellsaw/news | ||
RUN cd /src/github.com/arussellsaw/news && CGO_ENABLED=0 go build -o news -mod=vendor | ||
|
||
FROM alpine:latest AS final | ||
RUN apk --no-cache add ca-certificates | ||
|
||
WORKDIR /app | ||
|
||
COPY --from=build /src/github.com/arussellsaw/news/news /app/ | ||
COPY --from=build /src/github.com/arussellsaw/news/static /app/static | ||
COPY --from=build /src/github.com/arussellsaw/news/tmpl /app/tmpl | ||
EXPOSE 8080 | ||
ENTRYPOINT /app/news |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
build: | ||
docker build -t gcr.io/russellsaw/news . | ||
|
||
deploy: build push | ||
gcloud beta run deploy news --image gcr.io/russellsaw/news:latest | ||
|
||
push: | ||
docker push gcr.io/russellsaw/news |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
package main | ||
|
||
import "sync" | ||
|
||
type Cache interface { | ||
Get(url string) (string, string, bool, error) | ||
Set(url, text, image string) error | ||
} | ||
|
||
type memoryCache struct { | ||
mu sync.RWMutex | ||
m map[string][2]string | ||
} | ||
|
||
func (m *memoryCache) Get(url string) (string, string, bool, error) { | ||
m.mu.RLock() | ||
defer m.mu.RUnlock() | ||
v, ok := m.m[url] | ||
return v[0], v[1], ok, nil | ||
} | ||
|
||
func (m *memoryCache) Set(url, text, image string) error { | ||
m.mu.Lock() | ||
defer m.mu.Unlock() | ||
m.m[url] = [2]string{text, image} | ||
return nil | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
package main | ||
|
||
import ( | ||
"bytes" | ||
dither "github.com/esimov/dithergo" | ||
"github.com/nfnt/resize" | ||
"image" | ||
_ "image/gif" | ||
"image/jpeg" | ||
_ "image/png" | ||
"log" | ||
"net/http" | ||
"strconv" | ||
) | ||
|
||
func handleDitherImage(w http.ResponseWriter, r *http.Request) { | ||
q := r.URL.Query() | ||
url := q.Get("url") | ||
res, err := http.Get(url) | ||
if err != nil { | ||
http.Error(w, err.Error(), 500) | ||
log.Println("getting", err, url) | ||
return | ||
} | ||
|
||
width, err := strconv.ParseInt(q.Get("w"), 10, 64) | ||
if err != nil { | ||
http.Error(w, err.Error(), 500) | ||
log.Println(err, url) | ||
return | ||
} | ||
|
||
img, _, err := image.Decode(res.Body) | ||
if err != nil { | ||
http.Error(w, err.Error(), 500) | ||
log.Println("decode: ", err, url) | ||
return | ||
} | ||
|
||
newImage := resize.Resize(uint(width), 0, img, resize.Lanczos3) | ||
|
||
d := dither.Dither{ | ||
Type: "FloydSteinberg", | ||
Settings: dither.Settings{ | ||
Filter: [][]float32{ | ||
{0.0, 0.0, 0.0, 7.0 / 48.0, 5.0 / 48.0}, | ||
{3.0 / 48.0, 5.0 / 48.0, 7.0 / 48.0, 5.0 / 48.0, 3.0 / 48.0}, | ||
{1.0 / 48.0, 3.0 / 48.0, 5.0 / 48.0, 3.0 / 48.0, 1.0 / 48.0}, | ||
}, | ||
}, | ||
} | ||
dithered := d.Monochrome(newImage, 1.18) | ||
|
||
buffer := new(bytes.Buffer) | ||
if err := jpeg.Encode(buffer, dithered, nil); err != nil { | ||
log.Println("unable to encode image.", url) | ||
} | ||
|
||
w.Header().Set("Content-Type", "image/jpeg") | ||
w.Header().Set("Content-Length", strconv.Itoa(len(buffer.Bytes()))) | ||
if _, err := w.Write(buffer.Bytes()); err != nil { | ||
log.Println("unable to write image.", url) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,192 @@ | ||
package main | ||
|
||
import ( | ||
"github.com/arussellsaw/news/pkg/goose" | ||
"github.com/mmcdole/gofeed" | ||
"golang.org/x/net/html" | ||
"golang.org/x/sync/errgroup" | ||
"sort" | ||
"strings" | ||
"time" | ||
) | ||
|
||
type Source struct { | ||
Name string | ||
URL string | ||
FeedURL string | ||
} | ||
|
||
type Article struct { | ||
Title string | ||
Description string | ||
Content string | ||
ImageURL string | ||
Link string | ||
Author string | ||
Source Source | ||
Timestamp time.Time | ||
TS string | ||
Layout Layout | ||
} | ||
|
||
var sources = []Source{ | ||
{ | ||
Name: "Vox", | ||
URL: "https://vox.com", | ||
FeedURL: "https://www.vox.com/rss/index.xml", | ||
}, | ||
{ | ||
Name: "The Verge", | ||
URL: "https://theverge.com", | ||
FeedURL: "https://www.theverge.com/rss/index.xml", | ||
}, | ||
{ | ||
Name: "Polygon", | ||
URL: "https://polygon.com", | ||
FeedURL: "https://www.polygon.com/rss/index.xml", | ||
}, | ||
{ | ||
Name: "TechCrunch", | ||
URL: "https://techcrunch.com", | ||
FeedURL: "http://feeds.feedburner.com/TechCrunch/", | ||
}, | ||
{ | ||
Name: "BBC News", | ||
URL: "https://bbc.co.uk/news", | ||
FeedURL: "http://feeds.bbci.co.uk/news/rss.xml", | ||
}, | ||
{ | ||
Name: "The Guardian", | ||
URL: "https://theguardian.co.uk", | ||
FeedURL: "https://www.theguardian.com/uk/rss", | ||
}, | ||
} | ||
|
||
func getArticles() ([]Article, error) { | ||
eg := errgroup.Group{} | ||
articles := make(chan Article, 1024^2) | ||
for _, s := range sources { | ||
s := s | ||
eg.Go(func() error { | ||
fp := gofeed.NewParser() | ||
feed, err := fp.ParseURL(s.FeedURL) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
g := errgroup.Group{} | ||
for _, item := range feed.Items { | ||
item := item | ||
g.Go(func() error { | ||
var imageURL string | ||
if item.Image != nil { | ||
imageURL = item.Image.URL | ||
} | ||
var content string | ||
if len(item.Content) < 100 { | ||
text, image, ok, err := cache.Get(item.Link) | ||
if err != nil { | ||
return err | ||
} | ||
if ok { | ||
content = text | ||
imageURL = image | ||
} else { | ||
g := goose.New() | ||
article, err := g.ExtractFromURL(item.Link) | ||
if err != nil { | ||
return err | ||
} | ||
content = article.CleanedText | ||
imageURL = article.TopImage | ||
err = cache.Set(item.Link, content, imageURL) | ||
if err != nil { | ||
return err | ||
} | ||
} | ||
} else { | ||
doc, err := html.Parse(strings.NewReader(item.Content)) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
var f func(n *html.Node) | ||
f = func(n *html.Node) { | ||
switch n.Type { | ||
case html.ElementNode: | ||
if n.Data == "img" { | ||
for _, a := range n.Attr { | ||
if a.Key == "src" { | ||
if imageURL == "" { | ||
imageURL = a.Val | ||
} | ||
} | ||
} | ||
} | ||
case html.TextNode: | ||
if n.Parent.Data == "p" { | ||
attrs := n.Parent.Attr | ||
for _, a := range attrs { | ||
if a.Key == "class" && matchClass(a.Val, []string{"twite", "top-stories"}) { | ||
goto recurse | ||
} | ||
} | ||
if item.Description == "" { | ||
item.Description = n.Data | ||
} | ||
content += n.Data + " " | ||
} | ||
} | ||
recurse: | ||
for c := n.FirstChild; c != nil; c = c.NextSibling { | ||
f(c) | ||
} | ||
} | ||
f(doc) | ||
} | ||
|
||
var author string | ||
if item.Author != nil { | ||
author = item.Author.Name | ||
} | ||
|
||
articles <- Article{ | ||
Title: item.Title, | ||
Description: item.Description, | ||
Content: content, | ||
ImageURL: imageURL, | ||
Link: item.Link, | ||
Author: author, | ||
Source: s, | ||
Timestamp: *item.PublishedParsed, | ||
TS: item.PublishedParsed.Format("15:04 02-01-2006"), | ||
} | ||
return nil | ||
}) | ||
} | ||
return g.Wait() | ||
}) | ||
} | ||
err := eg.Wait() | ||
if err != nil { | ||
return nil, err | ||
} | ||
close(articles) | ||
out := []Article{} | ||
for a := range articles { | ||
out = append(out, a) | ||
} | ||
sort.Slice(out, func(i, j int) bool { | ||
return out[i].Timestamp.Before(out[j].Timestamp) | ||
}) | ||
return out, nil | ||
} | ||
|
||
func matchClass(class string, exclude []string) bool { | ||
for _, e := range exclude { | ||
if strings.Contains(class, e) { | ||
return true | ||
} | ||
} | ||
return false | ||
} |