Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Alex Russell-Saw committed Apr 13, 2020
0 parents commit d6fb9cc
Show file tree
Hide file tree
Showing 275 changed files with 219,910 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions .idea/news.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 29 additions & 0 deletions .idea/watcherTasks.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
FROM golang:1.13-buster AS build
COPY . /src/github.com/arussellsaw/news
RUN cd /src/github.com/arussellsaw/news && CGO_ENABLED=0 go build -o news -mod=vendor

FROM alpine:latest AS final
RUN apk --no-cache add ca-certificates

WORKDIR /app

COPY --from=build /src/github.com/arussellsaw/news/news /app/
COPY --from=build /src/github.com/arussellsaw/news/static /app/static
COPY --from=build /src/github.com/arussellsaw/news/tmpl /app/tmpl
EXPOSE 8080
ENTRYPOINT /app/news
8 changes: 8 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
build:
docker build -t gcr.io/russellsaw/news .

deploy: build push
gcloud beta run deploy news --image gcr.io/russellsaw/news:latest

push:
docker push gcr.io/russellsaw/news
27 changes: 27 additions & 0 deletions cache.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package main

import "sync"

type Cache interface {
Get(url string) (string, string, bool, error)
Set(url, text, image string) error
}

type memoryCache struct {
mu sync.RWMutex
m map[string][2]string
}

func (m *memoryCache) Get(url string) (string, string, bool, error) {
m.mu.RLock()
defer m.mu.RUnlock()
v, ok := m.m[url]
return v[0], v[1], ok, nil
}

func (m *memoryCache) Set(url, text, image string) error {
m.mu.Lock()
defer m.mu.Unlock()
m.m[url] = [2]string{text, image}
return nil
}
64 changes: 64 additions & 0 deletions dither.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package main

import (
"bytes"
dither "github.com/esimov/dithergo"
"github.com/nfnt/resize"
"image"
_ "image/gif"
"image/jpeg"
_ "image/png"
"log"
"net/http"
"strconv"
)

func handleDitherImage(w http.ResponseWriter, r *http.Request) {
q := r.URL.Query()
url := q.Get("url")
res, err := http.Get(url)
if err != nil {
http.Error(w, err.Error(), 500)
log.Println("getting", err, url)
return
}

width, err := strconv.ParseInt(q.Get("w"), 10, 64)
if err != nil {
http.Error(w, err.Error(), 500)
log.Println(err, url)
return
}

img, _, err := image.Decode(res.Body)
if err != nil {
http.Error(w, err.Error(), 500)
log.Println("decode: ", err, url)
return
}

newImage := resize.Resize(uint(width), 0, img, resize.Lanczos3)

d := dither.Dither{
Type: "FloydSteinberg",
Settings: dither.Settings{
Filter: [][]float32{
{0.0, 0.0, 0.0, 7.0 / 48.0, 5.0 / 48.0},
{3.0 / 48.0, 5.0 / 48.0, 7.0 / 48.0, 5.0 / 48.0, 3.0 / 48.0},
{1.0 / 48.0, 3.0 / 48.0, 5.0 / 48.0, 3.0 / 48.0, 1.0 / 48.0},
},
},
}
dithered := d.Monochrome(newImage, 1.18)

buffer := new(bytes.Buffer)
if err := jpeg.Encode(buffer, dithered, nil); err != nil {
log.Println("unable to encode image.", url)
}

w.Header().Set("Content-Type", "image/jpeg")
w.Header().Set("Content-Length", strconv.Itoa(len(buffer.Bytes())))
if _, err := w.Write(buffer.Bytes()); err != nil {
log.Println("unable to write image.", url)
}
}
192 changes: 192 additions & 0 deletions feeds.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
package main

import (
"github.com/arussellsaw/news/pkg/goose"
"github.com/mmcdole/gofeed"
"golang.org/x/net/html"
"golang.org/x/sync/errgroup"
"sort"
"strings"
"time"
)

type Source struct {
Name string
URL string
FeedURL string
}

type Article struct {
Title string
Description string
Content string
ImageURL string
Link string
Author string
Source Source
Timestamp time.Time
TS string
Layout Layout
}

var sources = []Source{
{
Name: "Vox",
URL: "https://vox.com",
FeedURL: "https://www.vox.com/rss/index.xml",
},
{
Name: "The Verge",
URL: "https://theverge.com",
FeedURL: "https://www.theverge.com/rss/index.xml",
},
{
Name: "Polygon",
URL: "https://polygon.com",
FeedURL: "https://www.polygon.com/rss/index.xml",
},
{
Name: "TechCrunch",
URL: "https://techcrunch.com",
FeedURL: "http://feeds.feedburner.com/TechCrunch/",
},
{
Name: "BBC News",
URL: "https://bbc.co.uk/news",
FeedURL: "http://feeds.bbci.co.uk/news/rss.xml",
},
{
Name: "The Guardian",
URL: "https://theguardian.co.uk",
FeedURL: "https://www.theguardian.com/uk/rss",
},
}

func getArticles() ([]Article, error) {
eg := errgroup.Group{}
articles := make(chan Article, 1024^2)
for _, s := range sources {
s := s
eg.Go(func() error {
fp := gofeed.NewParser()
feed, err := fp.ParseURL(s.FeedURL)
if err != nil {
return err
}

g := errgroup.Group{}
for _, item := range feed.Items {
item := item
g.Go(func() error {
var imageURL string
if item.Image != nil {
imageURL = item.Image.URL
}
var content string
if len(item.Content) < 100 {
text, image, ok, err := cache.Get(item.Link)
if err != nil {
return err
}
if ok {
content = text
imageURL = image
} else {
g := goose.New()
article, err := g.ExtractFromURL(item.Link)
if err != nil {
return err
}
content = article.CleanedText
imageURL = article.TopImage
err = cache.Set(item.Link, content, imageURL)
if err != nil {
return err
}
}
} else {
doc, err := html.Parse(strings.NewReader(item.Content))
if err != nil {
return err
}

var f func(n *html.Node)
f = func(n *html.Node) {
switch n.Type {
case html.ElementNode:
if n.Data == "img" {
for _, a := range n.Attr {
if a.Key == "src" {
if imageURL == "" {
imageURL = a.Val
}
}
}
}
case html.TextNode:
if n.Parent.Data == "p" {
attrs := n.Parent.Attr
for _, a := range attrs {
if a.Key == "class" && matchClass(a.Val, []string{"twite", "top-stories"}) {
goto recurse
}
}
if item.Description == "" {
item.Description = n.Data
}
content += n.Data + " "
}
}
recurse:
for c := n.FirstChild; c != nil; c = c.NextSibling {
f(c)
}
}
f(doc)
}

var author string
if item.Author != nil {
author = item.Author.Name
}

articles <- Article{
Title: item.Title,
Description: item.Description,
Content: content,
ImageURL: imageURL,
Link: item.Link,
Author: author,
Source: s,
Timestamp: *item.PublishedParsed,
TS: item.PublishedParsed.Format("15:04 02-01-2006"),
}
return nil
})
}
return g.Wait()
})
}
err := eg.Wait()
if err != nil {
return nil, err
}
close(articles)
out := []Article{}
for a := range articles {
out = append(out, a)
}
sort.Slice(out, func(i, j int) bool {
return out[i].Timestamp.Before(out[j].Timestamp)
})
return out, nil
}

func matchClass(class string, exclude []string) bool {
for _, e := range exclude {
if strings.Contains(class, e) {
return true
}
}
return false
}
Loading

0 comments on commit d6fb9cc

Please sign in to comment.