-
Notifications
You must be signed in to change notification settings - Fork 0
/
trending.go
78 lines (65 loc) · 1.56 KB
/
trending.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
package main
import (
"context"
"github.com/PuerkitoBio/goquery"
"github.com/chromedp/chromedp"
"github.com/chromedp/chromedp/runner"
"log"
"strconv"
"strings"
)
type TrendingPaper struct {
Id string
TweetCount int
}
func RequestTrendingPapersOnArxiv() []TrendingPaper {
var err error
// create context
ctxt, cancel := context.WithCancel(context.Background())
defer cancel()
// create chrome instance
c, err := chromedp.New(ctxt, chromedp.WithRunnerOptions(
runner.Flag("headless", true),
runner.Flag("disable-gpu", true)))
if err != nil {
log.Fatal(err)
}
// run task list
var res string
err = c.Run(ctxt, requestBody(&res))
if err != nil {
log.Fatal(err)
}
doc, err := goquery.NewDocumentFromReader(strings.NewReader(res))
if err != nil {
log.Fatal(err)
}
var ids []string
doc.Find(".apaper").Each(func(_ int, s *goquery.Selection) {
id, _ := s.Attr("id")
ids = append(ids, id)
})
var tweetcounts []int
doc.Find(".tweetcount").Each(func(_ int, s *goquery.Selection) {
countStr := strings.Split(s.Text(), " ")[0]
count, _ := strconv.ParseInt(countStr, 10, 32)
tweetcounts = append(tweetcounts, int(count))
})
var min int
if len(ids) < len(tweetcounts) {
min = len(ids)
} else {
min = len(tweetcounts)
}
var papers []TrendingPaper
for i := 0; i < min; i++ {
papers = append(papers, TrendingPaper{ids[i], tweetcounts[i]})
}
return papers
}
func requestBody(res *string) chromedp.Tasks {
return chromedp.Tasks{
chromedp.Navigate("http://www.arxiv-sanity.com/toptwtr"),
chromedp.InnerHTML("//body", res),
}
}