-
Notifications
You must be signed in to change notification settings - Fork 2
/
main.go
90 lines (78 loc) · 2.11 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
package main
import (
"apify/actor/example/store"
"log/slog"
"os"
"strings"
"github.com/PuerkitoBio/goquery"
"github.com/geziyor/geziyor"
"github.com/geziyor/geziyor/client"
"github.com/geziyor/geziyor/export"
)
var (
xlog = slog.New(slog.NewTextHandler(os.Stdout, nil))
)
type (
KVStoreExporter map[string]any
AutherNText struct {
Author string `json:"author"`
Text string `json:"text"`
}
)
func (kv *KVStoreExporter) Export(parsedData chan any) error {
if len(*kv) == 0 {
*kv = make(KVStoreExporter)
}
data := []AutherNText{}
for d := range parsedData {
e := d.(map[string]interface{})
data = append(data, AutherNText{Author: e["author"].(string), Text: e["text"].(string)})
}
(*kv)["data"] = data
return nil
}
func main() {
// Get Token and defualt KV store id
xlog.Info("Example actor written in Go.")
// Get default KV store
kv := store.KVStoreDefault()
// Get input
input, err := kv.Get("INPUT")
if err != nil {
xlog.Error("failed to get input from kv store", "error", err)
return
}
xlog.Info("input from kv store", "input", input)
url := input["url"].(string)
if strings.EqualFold(url, "") {
xlog.Error("no url in input", "url", url)
}
// Scrape data
kvExporter := KVStoreExporter{}
scrape([]string{url}, &kvExporter)
xlog.Info("saving scrapped data to kv store", "data", kvExporter)
if err := kv.Put("data", kvExporter); err != nil {
xlog.Error("error while add value to store", "data", kvExporter, "error", err)
}
xlog.Info("actor is done")
}
func scrape(urls []string, exporter export.Exporter) {
g := geziyor.NewGeziyor(&geziyor.Options{
StartURLs: urls,
ParseFunc: quotesParse,
Exporters: []export.Exporter{&export.JSON{}, exporter},
})
g.Start()
slog.Info("exportor content", "data", exporter)
}
func quotesParse(g *geziyor.Geziyor, r *client.Response) {
r.HTMLDoc.Find("div.quote").Each(func(i int, s *goquery.Selection) {
g.Exports <- map[string]interface{}{
"text": s.Find("span.text").Text(),
"author": s.Find("small.author").Text(),
}
})
if href, ok := r.HTMLDoc.Find("li.next > a").Attr("href"); ok {
g.Get(r.JoinURL(href), quotesParse)
}
}