Skip to content

Commit

Permalink
forgot to commit for a long time
Browse files Browse the repository at this point in the history
  • Loading branch information
Alex Russell-Saw committed Dec 13, 2020
1 parent a90a009 commit 548fca7
Show file tree
Hide file tree
Showing 1,091 changed files with 134,499 additions and 72,689 deletions.
9 changes: 8 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,14 @@ COPY . /src/github.com/arussellsaw/news
RUN cd /src/github.com/arussellsaw/news && CGO_ENABLED=0 go build -o news -mod=vendor

FROM alpine:latest AS final
RUN apk --no-cache add ca-certificates
RUN apk --no-cache add ca-certificates nodejs nodejs-npm


COPY --from=build /src/github.com/arussellsaw/news/readability-server /app/readability-server

WORKDIR /app/readability-server

RUN npm install

WORKDIR /app

Expand Down
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,8 @@ build:
deploy: build push
gcloud beta run deploy news --image gcr.io/russellsaw/news:latest

deploy-worker: build push
gcloud beta run deploy news-background --image gcr.io/russellsaw/news:latest

push:
docker push gcr.io/russellsaw/news
105 changes: 105 additions & 0 deletions cmd/articles/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
package main

import (
secretmanager "cloud.google.com/go/secretmanager/apiv1beta1"
"context"
"fmt"
"github.com/arussellsaw/news/dao"
"github.com/arussellsaw/news/domain"
"github.com/arussellsaw/news/pkg/util"
"github.com/monzo/slog"
"github.com/pacedotdev/firesearch-sdk/clients/go/firesearch"
secrets "google.golang.org/genproto/googleapis/cloud/secretmanager/v1beta1"
"os"
"time"
)

func main() {
ctx := context.Background()

var logger slog.Logger
logger = util.ContextParamLogger{Logger: &util.StackDriverLogger{}}
logger = util.ColourLogger{Writer: os.Stdout}
slog.SetDefaultLogger(logger)

err := dao.Init(ctx)
if err != nil {
slog.Critical(ctx, "Error setting up dao: %s", err)
return
}

sm, err := secretmanager.NewClient(ctx)
if err != nil {
panic(err)
}
defer sm.Close()

res, err := sm.AccessSecretVersion(
ctx,
&secrets.AccessSecretVersionRequest{Name: fmt.Sprintf(
"projects/266969078315/secrets/%s/versions/latest",
"FIRESEARCH_API_KEY",
)},
)
if err != nil {
panic(err)
}

slog.Info(ctx, res.Payload.String())
client := firesearch.NewClient("https://firesearch-3phpehgkya-ew.a.run.app/api", res.Payload.String())
indexService := firesearch.NewIndexService(client)

c := dao.Client()
docs := c.Collection("articles").Documents(ctx)
n := 0
for {
n++
start := time.Now()
doc, err := docs.Next()
if err != nil {
slog.Critical(ctx, "error reading article: %s", err)
slog.Info(ctx, "done %v articles", n)
return
}
if doc == nil {
slog.Info(ctx, "done %v articles", n)
return
}
a := domain.Article{}
err = doc.DataTo(&a)
if err != nil {
slog.Critical(ctx, "error reading article: %s", err)
return
}
a.RawHTML()
contentStr := ""
for _, e := range a.Content {
if e.Type != "text" {
continue
}
contentStr = contentStr + e.Value + ""
}
_, err = indexService.PutDoc(ctx, firesearch.PutDocRequest{
IndexPath: "news/search/articles",
Doc: firesearch.Doc{
ID: a.ID,
SearchFields: []firesearch.SearchField{
{
Key: "title",
Value: a.Title,
Store: true,
},
{
Key: "content",
Value: contentStr,
Store: true,
},
},
},
})
if err != nil {
slog.Error(ctx, "Error indexing: %s", err)
}
slog.Info(ctx, "Article: %s %s", time.Since(start), a.Link)
}
}
68 changes: 68 additions & 0 deletions cmd/search/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package main

import (
secretmanager "cloud.google.com/go/secretmanager/apiv1beta1"
"context"
"flag"
"fmt"
"github.com/arussellsaw/news/pkg/util"
"github.com/monzo/slog"
"github.com/pacedotdev/firesearch-sdk/clients/go/firesearch"
secrets "google.golang.org/genproto/googleapis/cloud/secretmanager/v1beta1"
"os"
)

func main() {
ctx := context.Background()

var logger slog.Logger
logger = util.ContextParamLogger{Logger: &util.StackDriverLogger{}}
logger = util.ColourLogger{Writer: os.Stdout}
slog.SetDefaultLogger(logger)

sm, err := secretmanager.NewClient(ctx)
if err != nil {
panic(err)
}
defer sm.Close()

res, err := sm.AccessSecretVersion(
ctx,
&secrets.AccessSecretVersionRequest{Name: fmt.Sprintf(
"projects/266969078315/secrets/%s/versions/latest",
"FIRESEARCH_API_KEY",
)},
)
if err != nil {
panic(err)
}
slog.Info(ctx, res.Payload.String())
client := firesearch.NewClient("https://firesearch-3phpehgkya-ew.a.run.app/api", res.Payload.String())
indexService := firesearch.NewIndexService(client)

query := flag.String("q", "", "query")
flag.Parse()
searchResults, err := indexService.Search(ctx, firesearch.SearchRequest{
Query: firesearch.SearchQuery{
IndexPath: "news/search/articles",
Limit: 5,
Text: *query,
Select: []string{"title", "content"},
},
})
if err != nil {
slog.Critical(ctx, "Error searching: %s", err)
return
}
for _, hit := range searchResults.Hits {
title, ok := hit.FieldValue("title")
if !ok {
title = "Untitled"
}
fmt.Printf("\t%s: %s:", hit.ID, title)
for _, highlight := range hit.Highlights {
fmt.Print(" " + highlight.Text)
}
fmt.Println()
}
}
70 changes: 66 additions & 4 deletions dao/dao.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package dao

import (
"context"
"github.com/monzo/slog"
"github.com/pkg/errors"
"golang.org/x/sync/errgroup"
"sync"
Expand Down Expand Up @@ -42,6 +43,10 @@ type storedEdition struct {
Metadata map[string]string
}

func Client() *firestore.Client {
return client
}

func GetEditionForTime(ctx context.Context, t time.Time, allowRecent bool) (*domain.Edition, error) {
iter := client.Collection("editions").
Where("EndTime", ">", t).
Expand Down Expand Up @@ -325,14 +330,32 @@ func GetAllSources(ctx context.Context) ([]domain.Source, error) {
}

func GetArticlesForOwner(ctx context.Context, ownerID string, start, end time.Time) ([]domain.Article, []domain.Source, error) {
sources, err := GetSources(ctx, ownerID)
if err != nil {
return nil, nil, err
var (
sources []domain.Source
err error
)
if ownerID != "" {
sources, err = GetSources(ctx, ownerID)
if err != nil {
return nil, nil, err
}
} else {
sources = domain.GetSources()
}
g := errgroup.Group{}
articles := make(chan domain.Article, 1024)
articles := make(chan domain.Article)
for _, s := range sources {
s := s
if as, ok := c.Get(s.FeedURL); ok {
as := as
g.Go(func() error {
for _, a := range as {
articles <- a
}
return nil
})
continue
}
g.Go(func() error {
docs, err := client.Collection("articles").
Where("Source.FeedURL", "==", s.FeedURL).
Expand All @@ -343,14 +366,18 @@ func GetArticlesForOwner(ctx context.Context, ownerID string, start, end time.Ti
if err != nil {
return err
}
toCache := []domain.Article{}
for _, doc := range docs {
a := domain.Article{}
err = doc.DataTo(&a)
if err != nil {
return err
}
toCache = append(toCache, a)
slog.Debug(ctx, "Article loaded: %s %s", a.Title, a.Source.FeedURL)
articles <- a
}
c.Set(s.FeedURL, toCache)
return nil
})
}
Expand All @@ -367,3 +394,38 @@ func GetArticlesForOwner(ctx context.Context, ownerID string, start, end time.Ti
}
return out, sources, nil
}

var c = feedCache{
c: make(map[string][]domain.Article),
ttl: 20 * time.Minute,
}

type feedCache struct {
mu sync.RWMutex
c map[string][]domain.Article

ttl time.Duration
}

func (c *feedCache) Get(url string) ([]domain.Article, bool) {
c.mu.RLock()
defer c.mu.RUnlock()
as, ok := c.c[url]
return as, ok
}

func (c *feedCache) Set(url string, as []domain.Article) {
c.mu.Lock()
defer c.mu.Unlock()
c.c[url] = as
go func(url string, ttl time.Duration) {
<-time.After(ttl)
c.Delete(url)
}(url, c.ttl)
}

func (c *feedCache) Delete(url string) {
c.mu.Lock()
delete(c.c, url)
c.mu.Unlock()
}
51 changes: 39 additions & 12 deletions domain/article.go
Original file line number Diff line number Diff line change
@@ -1,23 +1,30 @@
package domain

import (
"bytes"
"compress/gzip"
"context"
"github.com/monzo/slog"
"html/template"
"io/ioutil"
"time"
)

type Article struct {
ID string
Title string
Description string
Content []Element
HTMLContent template.HTML
ImageURL string
Link string
Author string
Source Source
Timestamp time.Time
TS string
Layout Layout
ID string
Title string
Description string
CompressedContent []byte
Content []Element
ImageURL string
Link string
Author string
Source Source
Timestamp time.Time
TS string
Layout Layout

decompressed []byte
}

type Element struct {
Expand Down Expand Up @@ -50,3 +57,23 @@ func (a *Article) Trim(size int) {
a.Content = append(a.Content, e)
}
}

func (a *Article) RawHTML() template.HTML {
if len(a.decompressed) != 0 {
return template.HTML(a.decompressed)
}
slog.Debug(context.Background(), "Decompressing %s", a.ID)
r, _ := gzip.NewReader(bytes.NewReader(a.CompressedContent))
buf, _ := ioutil.ReadAll(r)
a.decompressed = buf
return template.HTML(buf)
}

func (a *Article) SetHTMLContent(body string) {
buf := new(bytes.Buffer)
w := gzip.NewWriter(buf)
w.Write([]byte(body))
w.Flush()
w.Close()
a.CompressedContent = buf.Bytes()
}
6 changes: 0 additions & 6 deletions domain/layout.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,12 +98,6 @@ func LayoutArticles(aa []Article) []Article {
bySize := make([][]Article, 6)
for _, a := range aa {
s := 0
for _, c := range a.Content {
if c.Type != "text" {
continue
}
s += len(c.Value)
}
switch {
case s < 200:
bySize[0] = append(bySize[0], a)
Expand Down
Loading

0 comments on commit 548fca7

Please sign in to comment.